RubyGems - ripper_ruby_parser - Versions diffs - 1.7.0 → 1.9.0 - Mend

ripper_ruby_parser 1.7.0 → 1.9.0

Files changed (48) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +76 -0
data/README.md +6 -4
data/lib/ripper_ruby_parser/commenting_ripper_parser.rb +24 -12
data/lib/ripper_ruby_parser/sexp_handlers.rb +2 -0
data/lib/ripper_ruby_parser/sexp_handlers/assignment.rb +9 -4
data/lib/ripper_ruby_parser/sexp_handlers/blocks.rb +40 -52
data/lib/ripper_ruby_parser/sexp_handlers/conditionals.rb +17 -19
data/lib/ripper_ruby_parser/sexp_handlers/helper_methods.rb +35 -2
data/lib/ripper_ruby_parser/sexp_handlers/literals.rb +15 -242
data/lib/ripper_ruby_parser/sexp_handlers/method_calls.rb +9 -5
data/lib/ripper_ruby_parser/sexp_handlers/methods.rb +22 -17
data/lib/ripper_ruby_parser/sexp_handlers/operators.rb +3 -3
data/lib/ripper_ruby_parser/sexp_handlers/string_literals.rb +256 -0
data/lib/ripper_ruby_parser/sexp_processor.rb +12 -56
data/lib/ripper_ruby_parser/unescape.rb +89 -43
data/lib/ripper_ruby_parser/version.rb +1 -1
metadata +125 -76
data/Rakefile +0 -33
data/test/end_to_end/comments_test.rb +0 -59
data/test/end_to_end/comparison_test.rb +0 -104
data/test/end_to_end/lib_comparison_test.rb +0 -29
data/test/end_to_end/line_numbering_test.rb +0 -31
data/test/end_to_end/samples_comparison_test.rb +0 -13
data/test/end_to_end/test_comparison_test.rb +0 -32
data/test/pt_testcase/pt_test.rb +0 -44
data/test/ripper_ruby_parser/commenting_ripper_parser_test.rb +0 -200
data/test/ripper_ruby_parser/parser_test.rb +0 -553
data/test/ripper_ruby_parser/sexp_handlers/assignment_test.rb +0 -613
data/test/ripper_ruby_parser/sexp_handlers/blocks_test.rb +0 -679
data/test/ripper_ruby_parser/sexp_handlers/conditionals_test.rb +0 -536
data/test/ripper_ruby_parser/sexp_handlers/literals_test.rb +0 -1106
data/test/ripper_ruby_parser/sexp_handlers/loops_test.rb +0 -209
data/test/ripper_ruby_parser/sexp_handlers/method_calls_test.rb +0 -267
data/test/ripper_ruby_parser/sexp_handlers/methods_test.rb +0 -421
data/test/ripper_ruby_parser/sexp_handlers/operators_test.rb +0 -399
data/test/ripper_ruby_parser/sexp_processor_test.rb +0 -303
data/test/ripper_ruby_parser/version_test.rb +0 -7
data/test/samples/assignment.rb +0 -17
data/test/samples/comments.rb +0 -13
data/test/samples/conditionals.rb +0 -23
data/test/samples/lambdas.rb +0 -5
data/test/samples/loops.rb +0 -36
data/test/samples/misc.rb +0 -281
data/test/samples/number.rb +0 -7
data/test/samples/operators.rb +0 -18
data/test/samples/strings.rb +0 -147
data/test/test_helper.rb +0 -107

data/lib/ripper_ruby_parser/sexp_handlers/string_literals.rb ADDED Viewed

@@ -0,0 +1,256 @@
+# frozen_string_literal: true
+module RipperRubyParser
+  module SexpHandlers
+    # Sexp handlers for string and stringlike literals
+    module StringLiterals
+      def process_string_literal(exp)
+        _, content = exp.shift 2
+        process(content)
+      end
+      def process_string_content(exp)
+        _, *rest = shift_all exp
+        line, string, rest = extract_string_parts(rest)
+        if rest.empty?
+          with_line_number(line, s(:str, string))
+        else
+          s(:dstr, string, *rest)
+        end
+      end
+      alias process_word process_string_content
+      def process_string_embexpr(exp)
+        _, list = exp.shift 2
+        val = process(list.sexp_body.first)
+        case val.sexp_type
+        when :str, :dstr
+          val
+        when :void_stmt
+          s(:dstr, "", s(:evstr))
+        else
+          s(:dstr, "", s(:evstr, val))
+        end
+      end
+      def process_string_dvar(exp)
+        _, list = exp.shift 2
+        val = process(list)
+        s(:dstr, "", s(:evstr, val))
+      end
+      def process_string_concat(exp)
+        _, left, right = exp.shift 3
+        left = process(left)
+        right = process(right)
+        if left.sexp_type == :str
+          merge_left_into_right(left, right)
+        else
+          merge_right_into_left(left, right)
+        end
+      end
+      def process_xstring_literal(exp)
+        _, content = exp.shift 2
+        process(content)
+      end
+      def process_xstring(exp)
+        _, *rest = shift_all exp
+        line, string, rest = extract_string_parts(rest)
+        result = if rest.empty?
+                   s(:xstr, string)
+                 else
+                   s(:dxstr, string, *rest)
+                 end
+        result.line = line
+        result
+      end
+      def process_regexp_literal(exp)
+        _, content, (_, flags,) = exp.shift 3
+        content = process(content)
+        numflags = character_flags_to_numerical flags
+        if content.length == 2
+          return with_line_number(content.line, s(:lit, Regexp.new(content.last, numflags)))
+        end
+        content.sexp_type = :dregx_once if /o/.match?(flags)
+        content << numflags unless numflags == 0
+        content
+      end
+      def process_regexp(exp)
+        _, *rest = shift_all exp
+        line, string, rest = extract_string_parts(rest)
+        with_line_number(line, s(:dregx, string, *rest))
+      end
+      def process_symbol_literal(exp)
+        _, symbol = exp.shift 2
+        handle_symbol_content(symbol)
+      end
+      def process_symbol(exp)
+        _, node = exp.shift 2
+        handle_symbol_content(node)
+      end
+      def process_dyna_symbol(exp)
+        _, node = exp.shift 2
+        handle_dyna_symbol_content(node)
+      end
+      def process_qsymbols(exp)
+        _, *items = shift_all(exp)
+        items = items.map { |item| handle_symbol_content(item) }
+        s(:qsymbols, *items)
+      end
+      def process_symbols(exp)
+        _, *items = shift_all(exp)
+        items = items.map { |item| handle_dyna_symbol_content(item) }
+        s(:symbols, *items)
+      end
+      def process_at_tstring_content(exp)
+        _, content, pos, delim = exp.shift 4
+        string = fix_encoding handle_string_unescaping(content, delim)
+        with_position(pos, s(:str, string))
+      end
+      private
+      def extract_string_parts(list)
+        return nil, "", [] if list.empty?
+        list = merge_raw_string_literals list
+        list = map_process_list list
+        parts = unpack_dstr list
+        merge_initial_string_literals(parts)
+      end
+      def merge_raw_string_literals(list)
+        chunks = list.chunk { |it| it.sexp_type == :@tstring_content }
+        chunks.flat_map do |is_simple, items|
+          if is_simple && items.count > 1
+            head = items.first
+            contents = items.map { |it| it[1] }.join
+            [s(:@tstring_content, contents, head[2], head[3])]
+          else
+            items
+          end
+        end
+      end
+      def unpack_dstr(list)
+        list.flat_map do |item|
+          type, val, *rest = item
+          if type == :dstr
+            if val.empty?
+              rest
+            else
+              [s(:str, val), *rest]
+            end
+          else
+            [item]
+          end
+        end
+      end
+      def merge_initial_string_literals(parts)
+        string = ""
+        while parts.first&.sexp_type == :str
+          str = parts.shift
+          line ||= str.line
+          string += str.last
+        end
+        return line, string, parts
+      end
+      def character_flags_to_numerical(flags)
+        numflags = 0
+        numflags = Regexp::MULTILINE if /m/.match?(flags)
+        numflags |= Regexp::EXTENDED if /x/.match?(flags)
+        numflags |= Regexp::IGNORECASE if /i/.match?(flags)
+        numflags |= Regexp::NOENCODING if /n/.match?(flags)
+        numflags |= Regexp::FIXEDENCODING if /[ues]/.match?(flags)
+        numflags
+      end
+      def handle_dyna_symbol_content(node)
+        type, *body = *process(node)
+        case type
+        when :str, :xstr
+          s(:lit, body.first.to_sym)
+        when :dstr, :dxstr
+          s(:dsym, *body)
+        end
+      end
+      def handle_symbol_content(node)
+        if node.sexp_type == :@kw
+          symbol, position = extract_node_symbol_with_position(node)
+          with_position(position, s(:lit, symbol))
+        else
+          processed = process(node)
+          symbol = processed.last.to_sym
+          line = processed.line
+          with_line_number(line, s(:lit, symbol))
+        end
+      end
+      def merge_left_into_right(left, right)
+        right[1] = left.last + right[1]
+        right
+      end
+      def merge_right_into_left(left, right)
+        if right.sexp_type == :str
+          left.push right
+        else
+          _, first, *rest = right
+          left.push s(:str, first) unless first.empty?
+          left.push(*rest)
+        end
+      end
+      INTERPOLATING_HEREDOC = /^<<[-~]?[^-~']/.freeze
+      NON_INTERPOLATING_HEREDOC = /^<<[-~]?'/.freeze
+      INTERPOLATING_STRINGS = ['"', "`", ':"', /^%Q.$/, /^%.$/].freeze
+      NON_INTERPOLATING_STRINGS = ["'", ":'", /^%q.$/].freeze
+      INTERPOLATING_WORD_LIST = /^%[WI].$/.freeze
+      NON_INTERPOLATING_WORD_LIST = /^%[wi].$/.freeze
+      REGEXP_LITERALS = ["/", /^%r.$/].freeze
+      def handle_string_unescaping(content, delim)
+        case delim
+        when INTERPOLATING_HEREDOC, *INTERPOLATING_STRINGS
+          unescape(content)
+        when INTERPOLATING_WORD_LIST
+          unescape_wordlist_word(content)
+        when *NON_INTERPOLATING_STRINGS
+          simple_unescape(content, delim)
+        when *REGEXP_LITERALS
+          unescape_regexp(content)
+        when NON_INTERPOLATING_WORD_LIST
+          simple_unescape_wordlist_word(content, delim)
+        else
+          content
+        end
+      end
+    end
+  end
+end

data/lib/ripper_ruby_parser/sexp_processor.rb CHANGED Viewed

@@ -11,31 +11,16 @@ module RipperRubyParser
   class SexpProcessor < ::SexpProcessor
     include Unescape
-    attr_reader :filename
-    attr_reader :extra_compatible
+    attr_reader :filename, :extra_compatible
     def initialize(filename: nil, extra_compatible: nil)
       super()
-      @processors[:@int] = :process_at_int
-      @processors[:@float] = :process_at_float
-      @processors[:@rational] = :process_at_rational
-      @processors[:@CHAR] = :process_at_CHAR
-      @processors[:@label] = :process_at_label
-      @processors[:@const] = :process_at_const
-      @processors[:@ident] = :process_at_ident
-      @processors[:@cvar] = :process_at_cvar
-      @processors[:@gvar] = :process_at_gvar
-      @processors[:@ivar] = :process_at_ivar
-      @processors[:@kw] = :process_at_kw
-      @processors[:@op] = :process_at_op
-      @processors[:@backref] = :process_at_backref
-      @processors[:@backtick] = :process_at_backtick
-      @processors[:@period] = :process_at_period
-      @processors[:@tstring_content] = :process_at_tstring_content
+      public_methods.each do |name|
+        if name =~ /^process_at_(.*)/
+          @processors["@#{Regexp.last_match(1)}".to_sym] = name.to_sym
+        end
+      end
       @filename = filename
       @extra_compatible = extra_compatible
@@ -79,16 +64,10 @@ module RipperRubyParser
     def process_stmts(exp)
       _, *statements = shift_all(exp)
-      statements = map_process_list_compact statements
-      case statements.count
-      when 0
-        s(:void_stmt)
-      when 1
-        statements.first
-      else
-        first = statements.shift
-        s(:block, *unwrap_block(first), *statements)
-      end
+      statements = map_unwrap_begin_list map_process_list statements
+      line = statements.first.line
+      statements = reject_void_stmt statements
+      wrap_in_block(statements, line)
     end
     def process_var_ref(exp)
@@ -137,11 +116,7 @@ module RipperRubyParser
     def process_paren(exp)
       _, body = exp.shift 2
       result = process body
-      if result.sexp_type == :void_stmt
-        s(:nil)
-      else
-        result
-      end
+      convert_void_stmt_to_nil_symbol result
     end
     def process_comment(exp)
@@ -170,25 +145,6 @@ module RipperRubyParser
       with_position pos, s(:iter, s(:postexe), 0, *body)
     end
-    # number literals
-    def process_at_int(exp)
-      make_literal(exp) { |val| Integer(val) }
-    end
-    def process_at_float(exp)
-      make_literal(exp, &:to_f)
-    end
-    def process_at_rational(exp)
-      make_literal(exp, &:to_r)
-    end
-    # character literals
-    def process_at_CHAR(exp)
-      _, val, pos = exp.shift 3
-      with_position(pos, s(:str, unescape(val[1..-1])))
-    end
     def process_at_label(exp)
       make_literal(exp) { |val| val.chop.to_sym }
     end
@@ -265,7 +221,7 @@ module RipperRubyParser
     def class_or_module_body(exp)
       body = process(exp)
-      return body if body.empty?
+      return [] if body.sexp_type == :void_stmt
       unwrap_block body
     end

data/lib/ripper_ruby_parser/unescape.rb CHANGED Viewed

@@ -7,19 +7,19 @@ module RipperRubyParser
   module Unescape
     ESCAPE_SEQUENCE_REGEXP =
       /\\(
-        [0-7]{1,3}        | # octal character
-        x[0-9a-fA-F]{1,2} | # hex byte
-        u[0-9a-fA-F]+     | # unicode character
-        u{[0-9a-fA-F]{4}} | # unicode character
-        M-\\C-.           | # meta-ctrl
-        C-\\M-.           | # ctrl-meta
-        M-\\c.            | # meta-ctrl (shorthand)
-        c\\M-.            | # ctrl-meta (shorthand)
-        C-.               | # control (regular)
-        c.                | # control (shorthand)
-        M-.               | # meta
-        \n                | # line continuation
-        .                   # single-character
+        [0-7]{1,3}          | # octal character
+        x[0-9a-fA-F]{1,2}   | # hex byte
+        u[0-9a-fA-F]{4}     | # unicode character
+        u{[0-9a-fA-F]{4,6}} | # unicode character
+        M-\\C-.             | # meta-ctrl
+        C-\\M-.             | # ctrl-meta
+        M-\\c.              | # meta-ctrl (shorthand)
+        c\\M-.              | # ctrl-meta (shorthand)
+        C-.                 | # control (regular)
+        c.                  | # control (shorthand)
+        M-.                 | # meta
+        \n                  | # line break
+        .                     # other single character
       )/x.freeze
     SINGLE_LETTER_ESCAPES = {
@@ -37,33 +37,49 @@ module RipperRubyParser
     SINGLE_LETTER_ESCAPES_REGEXP =
       Regexp.new("^[#{SINGLE_LETTER_ESCAPES.keys.join}]$")
-    def simple_unescape(string)
-      string.gsub(/\\(
-        '   | # single quote
-        \\    # backslash
-      )/x) do
-        Regexp.last_match[1]
-      end
+    DELIMITER_PAIRS = {
+      "(" => "()",
+      "<" => "<>",
+      "[" => "[]",
+      "{" => "{}"
+    }.freeze
+    def simple_unescape(string, delimiter)
+      delimiters = delimiter_regexp_pattern(delimiter)
+      string.gsub(/
+                  \\ # a backslash
+                  (  # followed by a
+                    #{delimiters} | # delimiter or
+                    \\              # backslash
+                  )/x) do
+                    Regexp.last_match[1]
+                  end
     end
-    def simple_unescape_wordlist_word(string)
-      string.gsub(/\\(
-        '   | # single quote
-        \\  | # backslash
-        [ ] | # space
-        \n    # newline
-      )/x) do
-        Regexp.last_match[1]
-      end
+    def simple_unescape_wordlist_word(string, delimiter)
+      delimiters = delimiter_regexp_pattern(delimiter)
+      string.gsub(/
+                  \\ # a backslash
+                  (  # followed by a
+                    #{delimiters} | # delimiter or
+                    \\            | # backslash or
+                    [ ]           | # space or
+                    \n              # newline
+                  )
+                  /x) do
+                    Regexp.last_match[1]
+                  end
     end
     def unescape(string)
+      string = string.dup if string.frozen?
+      string.force_encoding("ASCII-8BIT")
       string.gsub(ESCAPE_SEQUENCE_REGEXP) do
         bare = Regexp.last_match[1]
         if bare == "\n"
           ""
         else
-          unescaped_value(bare)
+          unescaped_value(bare).force_encoding("ASCII-8BIT")
         end
       end
     end
@@ -90,7 +106,7 @@ module RipperRubyParser
         when "\n"
           ""
         else
-          '\\\\'
+          "\\\\"
         end
       end
     end
@@ -100,26 +116,50 @@ module RipperRubyParser
     def unescaped_value(bare)
       case bare
       when SINGLE_LETTER_ESCAPES_REGEXP
-        SINGLE_LETTER_ESCAPES[bare]
+        SINGLE_LETTER_ESCAPES[bare].dup
       when /^x/
-        hex_to_char(bare[1..-1])
-      when /^u\{/
-        hex_to_unicode_char(bare[2..-2])
+        unescape_hex_char bare
       when /^u/
-        hex_to_unicode_char(bare[1..4]) + bare[5..-1]
-      when /^(c|C-).$/
-        control(bare[-1].ord).chr
-      when /^M-.$/
-        meta(bare[-1].ord).chr
-      when /^(M-\\C-|C-\\M-|M-\\c|c\\M-).$/
-        meta(control(bare[-1].ord)).chr
+        unescape_unicode_char bare
+      when /^(c|C-|M-|M-\\C-|C-\\M-|M-\\c|c\\M-).$/
+        unescape_meta_control bare
       when /^[0-7]+/
-        bare.to_i(8).chr
+        unescape_octal bare
       else
         bare
       end
     end
+    def unescape_hex_char(bare)
+      hex_to_char(bare[1..-1])
+    end
+    def unescape_unicode_char(bare)
+      hex_chars = if bare.start_with? "u{"
+                    bare[2..-2]
+                  else
+                    bare[1..4]
+                  end
+      hex_to_unicode_char(hex_chars)
+    end
+    def unescape_meta_control(bare)
+      base_value = bare[-1].ord
+      value = case bare
+              when /^(c|C-).$/
+                control(base_value)
+              when /^M-.$/
+                meta(base_value)
+              when /^(M-\\C-|C-\\M-|M-\\c|c\\M-).$/
+                meta(control(base_value))
+              end
+      value.chr
+    end
+    def unescape_octal(bare)
+      bare.to_i(8).chr
+    end
     def hex_to_unicode_char(str)
       str.to_i(16).chr(Encoding::UTF_8)
     end
@@ -135,5 +175,11 @@ module RipperRubyParser
     def meta(val)
       val | 0b1000_0000
     end
+    def delimiter_regexp_pattern(delimiter)
+      delimiter = delimiter[-1]
+      delimiters = DELIMITER_PAIRS.fetch(delimiter, delimiter)
+      delimiters.each_char.map { |it| Regexp.escape it }.join(" | ")
+    end
   end
 end