RubyGems - ruby_parser - Versions diffs - 3.0.0.a8 → 3.0.0.a9 - Mend

ruby_parser 3.0.0.a8 → 3.0.0.a9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ruby_parser might be problematic. Click here for more details.

Files changed (17) hide show

data.tar.gz.sig +0 -0
data/.autotest +6 -0
data/History.txt +40 -0
data/README.txt +6 -11
data/Rakefile +5 -0
data/bin/ruby_parse_extract_error +23 -6
data/lib/ruby18_parser.rb +18 -8
data/lib/ruby18_parser.y +18 -8
data/lib/ruby19_parser.rb +67 -35
data/lib/ruby19_parser.y +62 -32
data/lib/ruby_lexer.rb +84 -95
data/lib/ruby_parser_extras.rb +140 -20
data/test/test_ruby_lexer.rb +58 -3
data/test/test_ruby_parser.rb +193 -2
data/test/test_ruby_parser_extras.rb +109 -0
metadata +17 -22
metadata.gz.sig +5 -2

data/lib/ruby19_parser.y CHANGED Viewed

@@ -239,7 +239,7 @@ rule
                 | block_command
    block_command: block_call
-                | block_call tDOT operation2 command_args
+                | block_call tDOT operation2 command_args # TODO: dot_or_colon
                     {
                       result = new_call val[0], val[2], val[3]
                     }
@@ -271,9 +271,8 @@ rule
                     {
                       result = new_call nil, val[0].to_sym, val[1]
                       if val[2] then
-                        if result[0] == :block_pass then
-                          raise "both block arg and actual block given"
-                        end
+                        block_dup_check result, val[2]
                         result, operation = val[2], result
                         result.insert 1, operation
                       end
@@ -284,7 +283,13 @@ rule
                     }
                 | primary_value tDOT operation2 command_args cmd_brace_block
                     {
-                      result = new_call val[0], val[2].to_sym, val[3]
+                      recv, _, msg, args, block = val
+                      call = new_call recv, msg.to_sym, args
+                      block_dup_check call, block
+                      block.insert 1, call
+                      result = block
                     }
                 | primary_value tCOLON2 operation2 command_args =tLOWEST
                     {
@@ -292,14 +297,13 @@ rule
                     }
                 | primary_value tCOLON2 operation2 command_args cmd_brace_block
                     {
-                      result = new_call val[0], val[2].to_sym, val[3]
-                      if val[4] then
-                        if result[0] == :block_pass then # REFACTOR
-                          raise "both block arg and actual block given"
-                        end
-                        val[2] << result
-                        result = val[2]
-                      end
+                      recv, _, msg, args, block = val
+                      call = new_call recv, msg.to_sym, args
+                      block_dup_check call, block
+                      block.insert 1, call
+                      result = block
                     }
                 | kSUPER command_args
                     {
@@ -838,7 +842,7 @@ rule
                 | block_arg
     command_args:   {
-                      result = lexer.cmdarg.stack.dup
+                      result = lexer.cmdarg.stack.dup # TODO: smell?
                       lexer.cmdarg.push true
                     }
                       call_args
@@ -970,7 +974,7 @@ rule
                     }
                 | kNOT tLPAREN2 rparen
                     {
-                      raise "no3: #{val.inspect}"
+                      raise "no3\non#{val.inspect}"
                     }
                 | operation brace_block
                     {
@@ -984,7 +988,8 @@ rule
                 | method_call brace_block
                     {
                       call, iter = val[0], val[1]
-                      iter.insert 1, call
+                      block_dup_check call, iter
+                      iter.insert 1, call # FIX
                       result = iter
                     }
                 | tLAMBDA lambda
@@ -1212,35 +1217,35 @@ rule
                     }
                 | f_marg_list tCOMMA tSTAR f_norm_arg
                     {
-                      raise "no9: #{val.inspect}"
+                      result = block_var val[0], val[3], nil
                     }
                 | f_marg_list tCOMMA tSTAR f_norm_arg tCOMMA f_marg_list
                     {
-                      raise "no10: #{val.inspect}"
+                      raise "no10\non: #{val.inspect}"
                     }
                 | f_marg_list tCOMMA tSTAR
                     {
-                      raise "no11: #{val.inspect}"
+                      raise "no11\non: #{val.inspect}"
                     }
                 | f_marg_list tCOMMA tSTAR tCOMMA f_marg_list
                     {
-                      raise "no12: #{val.inspect}"
+                      raise "no12\non: #{val.inspect}"
                     }
                 | tSTAR f_norm_arg
                     {
-                      raise "no13: #{val.inspect}"
+                      raise "no13\non: #{val.inspect}"
                     }
                 | tSTAR f_norm_arg tCOMMA f_marg_list
                     {
-                      raise "no14: #{val.inspect}"
+                      raise "no14\non: #{val.inspect}"
                     }
                 | tSTAR
                     {
-                      raise "no15: #{val.inspect}"
+                      raise "no15\non: #{val.inspect}"
                     }
                 | tSTAR tCOMMA f_marg_list
                     {
-                      raise "no16: #{val.inspect}"
+                      raise "no16\non: #{val.inspect}"
                     }
      block_param: f_arg tCOMMA f_block_optarg tCOMMA f_rest_arg opt_f_block_arg
@@ -1253,7 +1258,12 @@ rule
                     }
                 | f_arg tCOMMA f_block_optarg opt_f_block_arg
                     {
-                      result = block_args19 val, "3"
+                      arg, _, opt, block = val
+                      result = arg
+                      result.concat opt[1..-1].map { |s| s[1] }
+                      result << "&#{block.last}".to_sym if block
+                      result << opt
                     }
                 | f_arg tCOMMA f_block_optarg tCOMMA f_arg opt_f_block_arg
                     {
@@ -1285,7 +1295,12 @@ rule
                     }
                 | f_block_optarg opt_f_block_arg
                     {
-                      result = block_args19 val, "11"
+                      opt, block = val
+                      result = s(:args)
+                      result.concat opt[1..-1].map { |s| s[1] }
+                      result << "&#{block.last}".to_sym if block
+                      result << opt
                     }
                 | f_block_optarg tCOMMA f_arg opt_f_block_arg
                     {
@@ -1297,7 +1312,11 @@ rule
                     }
                 | f_rest_arg tCOMMA f_arg opt_f_block_arg
                     {
-                      result = block_args19 val, "14"
+                      rest, _, args, block = val
+                      result = args
+                      result[1,0] = rest
+                      result << "&#{block.last}".to_sym if block
                     }
                 | f_block_arg
                     {
@@ -1329,10 +1348,13 @@ rule
                     }
         bv_decls: bvar
+                    {
+                      result = [val[0]]
+                    }
                 | bv_decls tCOMMA bvar
                     {
-                      result = val[0] << val[2]
-                      raise "no18: #{val.inspect}"
+                      result = val[0].concat val[2]
+                      raise "no18\non: #{val.inspect}"
                     }
             bvar: tIDENTIFIER
@@ -1393,8 +1415,12 @@ rule
       block_call: command do_block
                     {
-                      raise SyntaxError, "Both block arg and actual block given." if
-                        val[0] && val[0][0] == :blockpass
+                      # TODO:
+                      # if (nd_type($1) == NODE_YIELD) {
+                      #     compile_error(PARSER_ARG "block given to yield");
+                      syntax_error "Both block arg and actual block given." if
+                        val[0].block_pass?
                       result = val[1]
                       result.insert 1, val[0]
@@ -1924,9 +1950,13 @@ keyword_variable: kNIL      { result = s(:nil)   }
                     }
   f_block_optarg: f_block_opt
+                    {
+                      result = s(:block, val[0])
+                    }
                 | f_block_optarg tCOMMA f_block_opt
                     {
-                      raise "no22: #{val.inspect}"
+                      result = val[0]
+                      result << val[2]
                     }
         f_optarg: f_opt

data/lib/ruby_lexer.rb CHANGED Viewed

@@ -2,6 +2,8 @@
 class RubyLexer
+  RUBY19 = "".respond_to? :encoding
   IDENT_CHAR_RE = case RUBY_VERSION
                   when /^1\.8/ then
                     /[\w\x80-\xFF]/
@@ -115,7 +117,7 @@ class RubyLexer
   end
   def fix_arg_lex_state
-    self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
+    self.lex_state = if in_lex_state? :expr_fname, :expr_dot then
                        :expr_arg
                      else
                        :expr_beg
@@ -235,6 +237,10 @@ class RubyLexer
     end
   end
+  def in_lex_state?(*states)
+    states.include? lex_state
+  end
   def initialize v = 18
     self.version = v
     self.cond = RubyParserStuff::StackState.new(:cond)
@@ -273,13 +279,13 @@ class RubyLexer
     self.lex_state = :expr_end
     case
-    when src.scan(/[+-]?0[xbd]\b/) then
+    when src.scan(/[+-]?0[xXbBdD]\b/) then
       rb_compile_error "Invalid numeric format"
     when src.scan(/[+-]?0x[a-f0-9_]+/i) then
       int_with_base(16)
-    when src.scan(/[+-]?0b[01_]+/) then
+    when src.scan(/[+-]?0[Bb][01_]+/) then
       int_with_base(2)
-    when src.scan(/[+-]?0d[0-9_]+/) then
+    when src.scan(/[+-]?0[Dd][0-9_]+/) then
       int_with_base(10)
     when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
       rb_compile_error "Illegal octal digit."
@@ -344,7 +350,7 @@ class RubyLexer
                                 [:tSYMBEG,       STR_SSYM]
                               end
-    rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
+    rb_compile_error "Bad %string type. Expected [Qq\Wwxrs], found '#{c}'." if
       token_type.nil?
     self.lex_strterm = [:strterm, string_type, nnd, beg]
@@ -414,7 +420,7 @@ class RubyLexer
   def rb_compile_error msg
     msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
-    raise SyntaxError, msg
+    raise RubyParser::SyntaxError, msg
   end
   def read_escape # 51 lines
@@ -614,23 +620,24 @@ class RubyLexer
     return c
   end
-  def unescape s
+  ESCAPES = {
+    "a"    => "\007",
+    "b"    => "\010",
+    "e"    => "\033",
+    "f"    => "\f",
+    "n"    => "\n",
+    "r"    => "\r",
+    "s"    => " ",
+    "t"    => "\t",
+    "v"    => "\13",
+    "\\"   => '\\',
+    "\n"   => "",
+    "C-\?" => 127.chr,
+    "c\?"  => 127.chr,
+  }
-    r = {
-      "a"    => "\007",
-      "b"    => "\010",
-      "e"    => "\033",
-      "f"    => "\f",
-      "n"    => "\n",
-      "r"    => "\r",
-      "s"    => " ",
-      "t"    => "\t",
-      "v"    => "\13",
-      "\\"   => '\\',
-      "\n"   => "",
-      "C-\?" => 127.chr,
-      "c\?"  => 127.chr,
-    }[s]
+  def unescape s
+    r = ESCAPES[s]
     return r if r
@@ -684,25 +691,26 @@ class RubyLexer
           self.lineno = nil
           c = src.matched
           if c == '#' then
-            # TODO: add magic comment handling?
             src.pos -= 1
             while src.scan(/\s*#.*(\n+|\z)/) do
               @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
             end
-            if src.eos? then
-              return RubyLexer::EOF
-            end
+            return RubyLexer::EOF if src.eos?
           end
           # Replace a string of newlines with a single one
           src.scan(/\n+/)
-          if [:expr_beg, :expr_fname,
-              :expr_dot, :expr_class, :expr_value].include? lex_state then
-            next
+          next if in_lex_state?(:expr_beg, :expr_fname, :expr_dot, :expr_class,
+                                :expr_value)
+          if src.scan(/([\ \t\r\f\v]*)\./) then
+            self.space_seen = true unless src[1].empty?
+            src.pos -= 1
+            next unless src.check(/\.\./)
           end
           self.command_start = true
@@ -778,7 +786,7 @@ class RubyLexer
           return process_token(command_state)
         elsif src.scan(/\:\:/) then
-          if is_beg? || lex_state == :expr_class || is_space_arg? then
+          if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
             self.lex_state = :expr_beg
             self.yacc_value = "::"
             return :tCOLON3
@@ -787,7 +795,7 @@ class RubyLexer
           self.lex_state = :expr_dot
           self.yacc_value = "::"
           return :tCOLON2
-        elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?!>))?)/) then
+        elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
           # scanning shortcut to symbols
           self.yacc_value = src[1]
           self.lex_state = :expr_end
@@ -816,7 +824,7 @@ class RubyLexer
         elsif src.scan(/\[/) then
           result = src.matched
-          if lex_state == :expr_fname || lex_state == :expr_dot then
+          if in_lex_state? :expr_fname, :expr_dot then
             self.lex_state = :expr_arg
             case
             when src.scan(/\]\=/) then
@@ -831,7 +839,7 @@ class RubyLexer
           elsif is_beg? then
             self.tern.push false
             result = :tLBRACK
-          elsif lex_state.is_argument && space_seen then
+          elsif is_arg? && space_seen then
             self.tern.push false
             result = :tLBRACK
           else
@@ -870,9 +878,9 @@ class RubyLexer
             return :tLAMBEG
           end
-          result = if lex_state.is_argument || lex_state == :expr_end then
+          result = if is_arg? || in_lex_state?(:expr_end) then
                      :tLCURLY      #  block (primary)
-                   elsif lex_state == :expr_endarg then
+                   elsif in_lex_state?(:expr_endarg) then
                      :tLBRACE_ARG  #  block (expr)
                    else
                      self.tern.push false
@@ -895,7 +903,7 @@ class RubyLexer
                           [:tUMINUS, :tMINUS]
                         end
-          if lex_state == :expr_fname || lex_state == :expr_dot then
+          if in_lex_state? :expr_fname, :expr_dot then
             self.lex_state = :expr_arg
             if src.scan(/@/) then
               self.yacc_value = "#{sign}@"
@@ -913,8 +921,8 @@ class RubyLexer
           end
           if (is_beg? ||
-              (lex_state.is_argument && space_seen && !src.check(/\s/))) then
-            if lex_state.is_argument then
+              (is_arg? && space_seen && !src.check(/\s/))) then
+            if is_arg? then
               arg_ambiguous
             end
@@ -949,7 +957,7 @@ class RubyLexer
             self.yacc_value = "*"
             return :tOP_ASGN
           elsif src.scan(/\*/) then
-            result = if lex_state.is_argument && space_seen && src.check(/\S/) then
+            result = if is_arg? && space_seen && src.check(/\S/) then
                        warning("`*' interpreted as argument prefix")
                        :tSTAR
                      elsif is_beg? then
@@ -977,9 +985,9 @@ class RubyLexer
             self.yacc_value = "\<\<"
             return :tOP_ASGN
           elsif src.scan(/\<\</) then
-            if (! [:expr_end,    :expr_dot,
-                   :expr_endarg, :expr_class].include?(lex_state) &&
-                (!lex_state.is_argument || space_seen)) then
+            if (! in_lex_state?(:expr_end, :expr_dot,
+                                :expr_endarg, :expr_class) &&
+                (!is_arg? || space_seen)) then
               tok = self.heredoc_identifier
               if tok then
                 return tok
@@ -1030,8 +1038,9 @@ class RubyLexer
           self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
           return :tXSTRING_BEG
         elsif src.scan(/\?/) then
-          if lex_state == :expr_end || lex_state == :expr_endarg then
-            self.lex_state = :expr_beg
+          if is_end? then
+            self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
             self.tern.push true
             self.yacc_value = "?"
             return :tEH
@@ -1042,7 +1051,7 @@ class RubyLexer
           end
           if src.check(/\s|\v/) then
-            unless lex_state.is_argument then
+            unless is_arg? then
               c2 = { " " => 's',
                     "\n" => 'n',
                     "\t" => 't',
@@ -1056,7 +1065,7 @@ class RubyLexer
             end
             # ternary
-            self.lex_state = :expr_beg
+            self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
             self.tern.push true
             self.yacc_value = "?"
             return :tEH
@@ -1095,11 +1104,11 @@ class RubyLexer
             self.lex_state = :expr_beg
             return :tOP_ASGN
           elsif src.scan(/&/) then
-            result = if lex_state.is_argument && space_seen &&
+            result = if is_arg? && space_seen &&
                          !src.check(/\s/) then
                        warning("`&' interpreted as argument prefix")
                        :tAMPER
-                     elsif lex_state == :expr_beg || lex_state == :expr_mid then
+                     elsif in_lex_state? :expr_beg, :expr_mid then
                        :tAMPER
                      else
                        :tAMPER2
@@ -1122,7 +1131,7 @@ class RubyLexer
             return :tOP_ASGN
           end
-          if lex_state.is_argument && space_seen then
+          if is_arg? && space_seen then
             unless src.scan(/\s/) then
               arg_ambiguous
               self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
@@ -1149,7 +1158,7 @@ class RubyLexer
           self.yacc_value = ";"
           return :tSEMI
         elsif src.scan(/\~/) then
-          if lex_state == :expr_fname || lex_state == :expr_dot then
+          if in_lex_state? :expr_fname, :expr_dot then
             src.scan(/@/)
           end
@@ -1175,9 +1184,7 @@ class RubyLexer
             return :tOP_ASGN
           end
-          if lex_state.is_argument && space_seen && ! src.check(/\s/) then
-            return parse_quote
-          end
+          return parse_quote if is_arg? && space_seen && ! src.check(/\s/)
           self.fix_arg_lex_state
           self.yacc_value = "%"
@@ -1258,12 +1265,12 @@ class RubyLexer
     self.command_start = true
     result = :tLPAREN2
-    if lex_state == :expr_beg || lex_state == :expr_mid then
+    if in_lex_state? :expr_beg, :expr_mid then
       result = :tLPAREN
     elsif space_seen then
-      if lex_state == :expr_cmdarg then
+      if in_lex_state? :expr_cmdarg then
         result = :tLPAREN_ARG
-      elsif lex_state == :expr_arg then
+      elsif in_lex_state? :expr_arg then
         self.tern.push false
         warning "don't put space before argument parentheses"
       end
@@ -1275,20 +1282,15 @@ class RubyLexer
   end
   def is_end?
-    (lex_state == :expr_end    ||
-     lex_state == :expr_endarg ||
-     lex_state == :expr_endfn)
+    in_lex_state? :expr_end, :expr_endarg, :expr_endfn
   end
   def is_arg?
-    lex_state == :expr_arg || lex_state == :expr_cmdarg
+    in_lex_state? :expr_arg, :expr_cmdarg
   end
   def is_beg?
-    (lex_state == :expr_beg   ||
-     lex_state == :expr_mid   ||
-     lex_state == :expr_value ||
-     lex_state == :expr_class)
+    in_lex_state? :expr_beg, :expr_mid, :expr_value, :expr_class
   end
   def is_space_arg? c = "x"
@@ -1296,34 +1298,21 @@ class RubyLexer
   end
   def is_label_possible? command_state
-    (lex_state == :expr_beg && !command_state) || is_arg?
+    (in_lex_state?(:expr_beg) && !command_state) || is_arg?
   end
-  def yylex_paren19
-    if is_beg? then
-      result = :tLPAREN
-    elsif is_space_arg? then
-      result = :tLPAREN_ARG
-    else
-      self.tern.push false
-      result = :tLPAREN2
-    end
-    # p :wtf_paren => [lex_state, space_seen, result]
+  def yylex_paren19 # TODO: move or remove
+    result =
+      if is_beg? then
+        :tLPAREN
+      elsif is_space_arg? then
+        :tLPAREN_ARG
+      else
+        :tLPAREN2 # plain '(' in parse.y
+      end
-    # HACK paren_nest++;
+    # paren_nest++; # TODO
-    # HACK: this is a mess, but it makes the tests pass, so suck it
-    # (stolen from the 1.8 side)
-    if lex_state == :expr_beg || lex_state == :expr_mid then
-      # do nothing
-    elsif space_seen then
-      if lex_state == :expr_arg then
-        self.tern.push false
-      end
-    else
-      self.tern.push false
-    end
     result
   end
@@ -1345,7 +1334,7 @@ class RubyLexer
       if token =~ /[!?]$/ then
         result = :tFID
       else
-        if lex_state == :expr_fname then
+        if in_lex_state? :expr_fname then
           # ident=, not =~ => == or followed by =>
           # TODO test lexing of a=>b vs a==>b
           if src.scan(/=(?:(?![~>=])|(?==>))/) then
@@ -1361,7 +1350,7 @@ class RubyLexer
                    end
       end
-      unless self.tern.is_in_state
+      unless ruby18
         if is_label_possible? command_state then
           colon = src.scan(/:/)
@@ -1373,9 +1362,9 @@ class RubyLexer
           src.unscan if colon
         end
-      end unless ruby18
+      end
-      unless lex_state == :expr_dot then
+      unless in_lex_state? :expr_dot then
         # See if it is a reserved word.
         keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
                     RubyParserStuff::Keyword.keyword18 token
@@ -1417,13 +1406,13 @@ class RubyLexer
       # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
       self.lex_state =
-        if is_beg? || lex_state == :expr_dot || is_arg? then
+        if is_beg? || in_lex_state?(:expr_dot) || is_arg? then
           if command_state then
             :expr_cmdarg
           else
             :expr_arg
           end
-        elsif ruby19 && lex_state == :expr_fname then
+        elsif ruby19 && in_lex_state?(:expr_fname) then
           :expr_endfn
         else
           :expr_end