RubyGems - ruby_parser - Versions diffs - 3.1.3 → 3.2.0 - Mend

ruby_parser 3.1.3 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

data.tar.gz.sig +0 -0
data/.autotest +16 -3
data/History.txt +66 -0
data/Manifest.txt +2 -0
data/Rakefile +25 -15
data/bin/ruby_parse_extract_error +22 -2
data/lib/ruby18_parser.rb +27 -15
data/lib/ruby18_parser.y +27 -16
data/lib/ruby19_parser.rb +2296 -2265
data/lib/ruby19_parser.y +54 -35
data/lib/ruby20_parser.rb +6593 -0
data/lib/ruby20_parser.y +2290 -0
data/lib/ruby_lexer.rb +161 -93
data/lib/ruby_parser.rb +1 -1
data/lib/ruby_parser_extras.rb +95 -27
data/test/test_ruby_lexer.rb +476 -29
data/test/test_ruby_parser.rb +1141 -147
data/test/test_ruby_parser_extras.rb +2 -3
metadata +30 -14
metadata.gz.sig +1 -1

data/lib/ruby_parser.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 require 'ruby18_parser'
 require 'ruby19_parser'
+require 'ruby20_parser'
 require 'ruby_parser_extras'

data/lib/ruby_parser_extras.rb CHANGED Viewed

@@ -99,7 +99,8 @@ class RPStringScanner < StringScanner
     alias :old_scan :scan
     def scan re
       s = old_scan re
-      d :scan => [s, caller.first] if s
+      where = caller.first.split(/:/).first(2).join(":")
+      d :scan => [s, where] if s
       s
     end
   end
@@ -110,11 +111,39 @@ class RPStringScanner < StringScanner
 end
 module RubyParserStuff
-  VERSION = "3.1.3" unless constants.include? "VERSION" # SIGH
+  VERSION = "3.2.0" unless constants.include? "VERSION" # SIGH
   attr_accessor :lexer, :in_def, :in_single, :file
   attr_reader :env, :comments
+  $good20 = []
+  %w[
+  ].map(&:to_i).each do |n|
+    $good20[n] = n
+  end
+  def debug20 n, v = nil, r = nil
+    raise "not yet #{n} #{v.inspect} => #{r.inspect}" unless $good20[n]
+  end
+  ruby19 = "".respond_to? :encoding
+  # Rhis is in sorted order of occurrence according to
+  # charlock_holmes against 500k files, with UTF_8 forced
+  # to the top.
+  #
+  # Overwrite this contstant if you need something different.
+  ENCODING_ORDER = [
+    Encoding::UTF_8, # moved to top to reflect default in 2.0
+    Encoding::ISO_8859_1,
+    Encoding::ISO_8859_2,
+    Encoding::ISO_8859_9,
+    Encoding::SHIFT_JIS,
+    Encoding::WINDOWS_1252,
+    Encoding::EUC_JP
+  ] if ruby19
   def syntax_error msg
     raise RubyParser::SyntaxError, msg
   end
@@ -129,7 +158,7 @@ module RubyParserStuff
   end
   def arg_blk_pass node1, node2 # TODO: nuke
-    node1 = s(:arglist, node1) unless [:arglist, :array].include? node1.first
+    node1 = s(:arglist, node1) unless [:arglist, :call_args, :array, :args].include? node1.first
     node1 << node2 if node2
     node1
   end
@@ -146,12 +175,14 @@ module RubyParserStuff
       if sexp.size == 2 and sexp[1].sexp_type == :array then
         s(:masgn, *sexp[1][1..-1].map { |sub| clean_mlhs sub })
       else
+        debug20 5
         sexp
       end
     when :gasgn, :iasgn, :lasgn, :cvasgn then
       if sexp.size == 2 then
         sexp.last
       else
+        debug20 7
         sexp # optional value
       end
     else
@@ -182,6 +213,34 @@ module RubyParserStuff
     end
   end
+  def array_to_hash array
+    s(:hash, *array[1..-1])
+  end
+  def call_args args
+    result = s(:call_args)
+    args.each do |arg|
+      case arg
+      when Sexp then
+        case arg.sexp_type
+        when :array, :args, :call_args then # HACK? remove array at some point
+          result.concat arg[1..-1]
+        else
+          result << arg
+        end
+      when Symbol then
+        result << arg
+      when ",", nil then
+        # ignore
+      else
+        raise "unhandled: #{arg.inspect} in #{args.inspect}"
+      end
+    end
+    result
+  end
   def args args
     result = s(:args)
@@ -189,21 +248,27 @@ module RubyParserStuff
       case arg
       when Sexp then
         case arg.sexp_type
-        when :args, :block, :array then
+        when :args, :block, :array, :call_args then # HACK call_args mismatch
           result.concat arg[1..-1]
         when :block_arg then
           result << :"&#{arg.last}"
-        when :masgn then
+        when :shadow then
+          if Sexp === result.last and result.last.sexp_type == :shadow then
+            result.last << arg.last
+          else
+            result << arg
+          end
+        when :masgn, :block_pass, :hash then # HACK: remove. prolly call_args
           result << arg
         else
-          raise "unhandled: #{arg.inspect}"
+          raise "unhandled: #{arg.sexp_type} in #{args.inspect}"
         end
       when Symbol then
         result << arg
-      when ",", nil then
+      when ",", "|", ";", "(", ")", nil then
         # ignore
       else
-        raise "unhandled: #{arg.inspect}"
+        raise "unhandled: #{arg.inspect} in #{args.inspect}"
       end
     end
@@ -216,7 +281,7 @@ module RubyParserStuff
   end
   def assignable(lhs, value = nil)
-    id = lhs.to_sym
+    id = lhs.to_sym unless Sexp === lhs
     id = id.to_sym if Sexp === id
     raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
@@ -254,7 +319,7 @@ module RubyParserStuff
                end
              end
-    self.env[id] ||= :lvar
+    self.env[id] ||= :lvar unless result.sexp_type == :cdecl # HACK? cdecl
     result << value if value
@@ -526,7 +591,7 @@ module RubyParserStuff
     # TODO: need a test with f(&b) { } to produce warning
     args ||= s(:arglist)
-    args[0] = :arglist if args.first == :array
+    args[0] = :arglist if [:array, :call_args].include? args.first
     args = s(:arglist, args) unless args.first == :arglist
     # HACK quick hack to make this work quickly... easy to clean up above
@@ -932,18 +997,8 @@ module RubyParserStuff
   end
   def hack_encoding str, extra = nil
-    # this is in sorted order of occurrence according to
-    # charlock_holmes against 500k files
-    encodings = [
-                 extra,
-                 Encoding::ISO_8859_1,
-                 Encoding::UTF_8,
-                 Encoding::ISO_8859_2,
-                 Encoding::ISO_8859_9,
-                 Encoding::SHIFT_JIS,
-                 Encoding::WINDOWS_1252,
-                 Encoding::EUC_JP,
-                ].compact
+    encodings = ENCODING_ORDER.dup
+    encodings.unshift(extra) unless extra.nil?
     # terrible, horrible, no good, very bad, last ditch effort.
     encodings.each do |enc|
@@ -1134,6 +1189,8 @@ module RubyParserStuff
     WORDLIST18 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
     WORDLIST19 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
+    WORDLIST18.delete "__ENCODING__"
     %w[and case elsif for if in module or unless until when while].each do |k|
       WORDLIST19[k] = WORDLIST19[k].dup
       WORDLIST19[k].state = :expr_value
@@ -1267,6 +1324,10 @@ module RubyParserStuff
   end
 end
+class Ruby20Parser < Racc::Parser
+  include RubyParserStuff
+end
 class Ruby19Parser < Racc::Parser
   include RubyParserStuff
 end
@@ -1286,12 +1347,17 @@ class RubyParser
   def initialize
     @p18 = Ruby18Parser.new
     @p19 = Ruby19Parser.new
+    @p20 = Ruby20Parser.new
   end
   def process(s, f = "(string)", t = 10) # parens for emacs *sigh*
-    @p19.process s, f, t
-  rescue Racc::ParseError
-    @p18.process s, f, t
+    @p20.process s, f, t
+  rescue Racc::ParseError, RubyParser::SyntaxError
+    begin
+      @p19.process s, f, t
+    rescue Racc::ParseError, RubyParser::SyntaxError
+      @p18.process s, f, t
+    end
   end
   alias :parse :process
@@ -1307,6 +1373,8 @@ class RubyParser
       Ruby18Parser.new
     when /^1\.9/ then
       Ruby19Parser.new
+    when /^2.0/ then
+      Ruby20Parser.new
     else
       raise "unrecognized RUBY_VERSION #{RUBY_VERSION}"
     end
@@ -1337,7 +1405,7 @@ class Sexp
   end
   def to_sym
-    raise "no"
+    raise "no: #{self.inspect}.to_sym is a bug"
     self.value.to_sym
   end

data/test/test_ruby_lexer.rb CHANGED Viewed

@@ -2,30 +2,49 @@
 # encoding: US-ASCII
 require 'rubygems'
-gem "minitest"
 require 'minitest/autorun'
 require 'ruby_lexer'
 require 'ruby18_parser'
+require 'ruby20_parser'
+class TestRubyLexer < Minitest::Test
+  attr_accessor :processor, :lex, :parser_class
-class TestRubyLexer < MiniTest::Unit::TestCase
-  alias :deny :refute
+  alias :lexer :lex # lets me copy/paste code from parser
+  alias :lexer= :lex=
   def setup
-    setup_lexer Ruby18Parser
+    setup_lexer_class Ruby20Parser
   end
-  def setup_lexer parser_class
-    p = parser_class.new
-    @lex = p.lexer
-    @lex.src = "blah blah"
-    @lex.lex_state = :expr_beg
+  def setup_lexer input, exp_sexp = nil
+    setup_new_parser
+    lex.src = input
+    lex.lex_state = :expr_beg
+    assert_equal exp_sexp, processor.class.new.parse(input) if exp_sexp
+  end
+  def setup_new_parser
+    self.processor = parser_class.new
+    self.lex = processor.lexer
+  end
+  def setup_lexer_class parser_class
+    self.parser_class = parser_class
+    setup_new_parser
+    setup_lexer "blah blah"
   end
   def test_advance
     assert @lex.advance # blah
     assert @lex.advance # blah
-    deny   @lex.advance # nada
+    refute @lex.advance # nada
+  end
+  def test_unicode_ident
+    s = "@\u1088\u1077\u1093\u1072"
+    util_lex_token(s.dup,
+                   :tIVAR, s.dup)
   end
   def test_read_escape
@@ -44,6 +63,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
     util_escape "\010", "b"
     util_escape " ",    "s"
     util_escape "q",    "q" # plain vanilla escape
+    util_escape "8", "8" # ugh... mri... WHY?!?
+    util_escape "9", "9" # ugh... mri... WHY?!?
+    util_escape "$",    "444" # ugh
   end
   def test_read_escape_c
@@ -133,6 +157,8 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_label__18
+    setup_lexer_class Ruby18Parser
     util_lex_token("{a:",
                    :tLBRACE,     "{",
                    :tIDENTIFIER, "a",
@@ -140,6 +166,8 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_label_in_params__18
+    setup_lexer_class Ruby18Parser
     util_lex_token("foo(a:",
                    :tIDENTIFIER, "foo",
                    :tLPAREN2,    "(",
@@ -148,7 +176,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_label__19
-    setup_lexer Ruby19Parser
+    setup_lexer_class Ruby19Parser
     util_lex_token("{a:",
                    :tLBRACE, "{",
@@ -156,7 +184,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_label_in_params__19
-    setup_lexer Ruby19Parser
+    setup_lexer_class Ruby19Parser
     util_lex_token("foo(a:",
                    :tIDENTIFIER, "foo",
@@ -164,6 +192,345 @@ class TestRubyLexer < MiniTest::Unit::TestCase
                    :tLABEL,      "a")
   end
+  def assert_next_lexeme token=nil, value=nil, state=nil, paren=nil, brace=nil
+    assert @lex.advance, "no more tokens"
+    msg = message {
+      act = [@lex.token, @lex.yacc_value, @lex.lex_state,
+             @lex.paren_nest, @lex.brace_nest]
+      exp = [token, value, state, paren, brace]
+      "#{exp.inspect} vs #{act.inspect}"
+    }
+    act_value = @lex.yacc_value
+    act_value = act_value.first if Array === act_value
+    assert_equal token, @lex.token,      msg
+    assert_equal value, act_value,       msg
+    assert_equal state, @lex.lex_state,  msg
+    assert_equal paren, @lex.paren_nest, msg if paren
+    assert_equal brace, @lex.brace_nest, msg if brace
+  end
+  def refute_lexeme
+    refute @lex.advance, "not empty: #{[@lex.token, @lex.yacc_value].inspect}"
+  end
+  def assert_lex input, exp_sexp, *args
+    setup_lexer input, exp_sexp
+    args.each_slice(5) do |token, value, state, paren, brace|
+      assert_next_lexeme token, value, state, paren, brace
+    end
+    refute_lexeme
+  end
+  def emulate_string_interpolation
+    lex_strterm = lexer.lex_strterm
+    string_nest = lexer.string_nest
+    brace_nest  = lexer.brace_nest
+    lexer.string_nest = 0
+    lexer.brace_nest  = 0
+    lexer.cond.push false
+    lexer.cmdarg.push false
+    lexer.lex_strterm = nil
+    lexer.lex_state = :expr_beg
+    yield
+    lexer.lex_state = :expr_endarg
+    assert_next_lexeme :tRCURLY,     "}",  :expr_endarg, 0
+    lexer.lex_strterm = lex_strterm
+    lexer.lex_state   = :expr_beg
+    lexer.string_nest = string_nest
+    lexer.brace_nest  = brace_nest
+    lexer.cond.lexpop
+    lexer.cmdarg.lexpop
+  end
+  def test_yylex_paren_string_parens_interpolated
+    setup_lexer('%((#{b}#{d}))',
+                s(:dstr,
+                  "(",
+                  s(:evstr, s(:call, nil, :b)),
+                  s(:evstr, s(:call, nil, :d)),
+                  s(:str, ")")))
+    assert_next_lexeme :tSTRING_BEG,     "%)", :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_CONTENT, "(",  :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_DBEG,    nil,  :expr_beg, 0, 0
+    emulate_string_interpolation do
+      assert_next_lexeme :tIDENTIFIER,   "b",  :expr_arg, 0, 0
+    end
+    assert_next_lexeme :tSTRING_DBEG,    nil,  :expr_beg, 0, 0
+    emulate_string_interpolation do
+      assert_next_lexeme :tIDENTIFIER,   "d",  :expr_arg, 0, 0
+    end
+    assert_next_lexeme :tSTRING_CONTENT, ")",  :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_END,     ")",  :expr_end, 0, 0
+    refute_lexeme
+  end
+  def test_yylex_paren_string_interpolated_regexp
+    setup_lexer('%( #{(/abcd/)} )',
+                s(:dstr, " ", s(:evstr, s(:lit, /abcd/)), s(:str, " ")))
+    assert_next_lexeme :tSTRING_BEG,       "%)",   :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_CONTENT,   " ",    :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_DBEG,      nil,    :expr_beg, 0, 0
+    emulate_string_interpolation do
+      assert_next_lexeme :tLPAREN,         "(",    :expr_beg, 1, 0
+      assert_next_lexeme :tREGEXP_BEG,     "/",    :expr_beg, 1, 0
+      assert_next_lexeme :tSTRING_CONTENT, "abcd", :expr_beg, 1, 0
+      assert_next_lexeme :tREGEXP_END,     "",     :expr_end, 1, 0
+      assert_next_lexeme :tRPAREN,         ")",    :expr_endfn, 0, 0
+    end
+    assert_next_lexeme :tSTRING_CONTENT,   " ",    :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_END,       ")",    :expr_end, 0, 0
+    refute_lexeme
+  end
+  def test_yylex_not_at_defn
+    assert_lex("def +@; end",
+               s(:defn, :+@, s(:args), s(:nil)),
+               :kDEF,   "def", :expr_fname, 0, 0,
+               :tUPLUS, "+@",  :expr_arg,   0, 0,
+               :tSEMI,  ";",   :expr_beg,   0, 0,
+               :kEND,   "end", :expr_end,   0, 0)
+    assert_lex("def !@; end",
+               s(:defn, :"!@", s(:args), s(:nil)),
+               :kDEF,   "def", :expr_fname, 0, 0,
+               :tUBANG, "!@",  :expr_arg,   0, 0,
+               :tSEMI,  ";",   :expr_beg,   0, 0,
+               :kEND,   "end", :expr_end,   0, 0)
+  end
+  def test_yylex_not_at_ivar
+    assert_lex("!@ivar",
+               s(:call, s(:ivar, :@ivar), :"!"),
+               :tBANG, "!",     :expr_beg, 0, 0,
+               :tIVAR, "@ivar", :expr_end, 0, 0)
+  end
+  def test_yylex_number_times_ident_times_return_number
+    assert_lex("1 * b * 3",
+               s(:call,
+                 s(:call, s(:lit, 1), :*, s(:call, nil, :b)),
+                 :*, s(:lit, 3)),
+               :tINTEGER,      1, :expr_end, 0, 0,
+               :tSTAR2,      "*", :expr_beg, 0, 0,
+               :tIDENTIFIER, "b", :expr_arg, 0, 0,
+               :tSTAR2,      "*", :expr_beg, 0, 0,
+               :tINTEGER,      3, :expr_end, 0, 0)
+    assert_lex("1 * b *\n 3",
+               s(:call,
+                 s(:call, s(:lit, 1), :*, s(:call, nil, :b)),
+                 :*, s(:lit, 3)),
+               :tINTEGER,      1, :expr_end, 0, 0,
+               :tSTAR2,      "*", :expr_beg, 0, 0,
+               :tIDENTIFIER, "b", :expr_arg, 0, 0,
+               :tSTAR2,      "*", :expr_beg, 0, 0,
+               :tINTEGER,      3, :expr_end, 0, 0)
+  end
+  def test_yylex_paren_string_parens_interpolated_regexp
+    setup_lexer('%((#{(/abcd/)}))',
+                s(:dstr, "(", s(:evstr, s(:lit, /abcd/)), s(:str, ")")))
+    assert_next_lexeme :tSTRING_BEG,       "%)",   :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_CONTENT,   "(",    :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_DBEG,       nil,   :expr_beg, 0, 0
+    emulate_string_interpolation do
+      assert_next_lexeme :tLPAREN,         "(",    :expr_beg, 1, 0
+      assert_next_lexeme :tREGEXP_BEG,     "/",    :expr_beg, 1, 0
+      assert_next_lexeme :tSTRING_CONTENT, "abcd", :expr_beg, 1, 0
+      assert_next_lexeme :tREGEXP_END,     "",     :expr_end, 1, 0
+      assert_next_lexeme :tRPAREN,         ")",    :expr_endfn, 0, 0
+    end
+    assert_next_lexeme :tSTRING_CONTENT,   ")",    :expr_beg, 0, 0
+    assert_next_lexeme :tSTRING_END,       ")",    :expr_end, 0, 0
+    refute_lexeme
+  end
+  def test_yylex_method_parens_chevron
+    assert_lex("a()<<1",
+               s(:call, s(:call, nil, :a), :<<, s(:lit, 1)),
+               :tIDENTIFIER, "a",   :expr_cmdarg, 0, 0,
+               :tLPAREN2,    "(",   :expr_beg,    1, 0,
+               :tRPAREN,     ")",   :expr_endfn,  0, 0,
+               :tLSHFT,      "<<" , :expr_beg,    0, 0,
+               :tINTEGER,    1,     :expr_end,    0, 0)
+  end
+  def test_yylex_lambda_args__20
+    setup_lexer_class Ruby20Parser
+    assert_lex("-> (a) { }",
+               s(:iter, s(:call, nil, :lambda),
+                 s(:args, :a)),
+               :tLAMBDA,     nil, :expr_endfn,  0, 0,
+               :tLPAREN2,    "(", :expr_beg,    1, 0,
+               :tIDENTIFIER, "a", :expr_arg,    1, 0,
+               :tRPAREN,     ")", :expr_endfn,  0, 0,
+               :tLCURLY,     "{", :expr_beg,    0, 1,
+               :tRCURLY,     "}", :expr_endarg, 0, 0)
+  end
+  def test_yylex_lambda_args_opt__20
+    setup_lexer_class Ruby20Parser
+    assert_lex("-> (a=nil) { }",
+               s(:iter, s(:call, nil, :lambda),
+                 s(:args, s(:lasgn, :a, s(:nil)))),
+               :tLAMBDA,     nil, :expr_endfn,  0, 0,
+               :tLPAREN2,    "(", :expr_beg,    1, 0,
+               :tIDENTIFIER, "a", :expr_arg,    1, 0,
+               :tEQL,        "=", :expr_beg,    1, 0,
+               :kNIL,        "nil", :expr_end,    1, 0,
+               :tRPAREN,     ")", :expr_endfn,  0, 0,
+               :tLCURLY,     "{", :expr_beg,    0, 1,
+               :tRCURLY,     "}", :expr_endarg, 0, 0)
+  end
+  def test_yylex_lambda_hash__20
+    setup_lexer_class Ruby20Parser
+    assert_lex("-> (a={}) { }",
+               s(:iter, s(:call, nil, :lambda),
+                 s(:args, s(:lasgn, :a, s(:hash)))),
+               :tLAMBDA,     nil, :expr_endfn,  0, 0,
+               :tLPAREN2,    "(", :expr_beg,    1, 0,
+               :tIDENTIFIER, "a", :expr_arg,    1, 0,
+               :tEQL,        "=", :expr_beg,    1, 0,
+               :tLBRACE,     "{", :expr_beg,    1, 1,
+               :tRCURLY,     "}", :expr_endarg, 1, 0,
+               :tRPAREN,     ")", :expr_endfn,  0, 0,
+               :tLCURLY,     "{", :expr_beg,    0, 1,
+               :tRCURLY,     "}", :expr_endarg, 0, 0)
+  end
+  def test_yylex_iter_array_curly
+    assert_lex("f :a, [:b] { |c, d| }", # yes, this is bad code
+               s(:iter,
+                 s(:call, nil, :f, s(:lit, :a), s(:array, s(:lit, :b))),
+                 s(:args, :c, :d)),
+               :tIDENTIFIER, "f", :expr_cmdarg, 0, 0,
+               :tSYMBOL,     "a", :expr_end,    0, 0,
+               :tCOMMA,      ",", :expr_beg,    0, 0,
+               :tLBRACK,     "[", :expr_beg,    1, 0,
+               :tSYMBOL,     "b", :expr_end,    1, 0,
+               :tRBRACK,     "]", :expr_endarg, 0, 0,
+               :tLBRACE_ARG, "{", :expr_beg,    0, 1,
+               :tPIPE,       "|", :expr_beg,    0, 1,
+               :tIDENTIFIER, "c", :expr_arg,    0, 1,
+               :tCOMMA,      ",", :expr_beg,    0, 1,
+               :tIDENTIFIER, "d", :expr_arg,    0, 1,
+               :tPIPE,       "|", :expr_beg,    0, 1,
+               :tRCURLY,     "}", :expr_endarg, 0, 0)
+  end
+  def test_yylex_const_call_same_name
+    assert_lex("X = a { }; b { f :c }",
+               s(:block,
+                 s(:cdecl, :X, s(:iter, s(:call, nil, :a), s(:args))),
+                 s(:iter,
+                   s(:call, nil, :b),
+                   s(:args),
+                   s(:call, nil, :f, s(:lit, :c)))),
+               :tCONSTANT,   "X", :expr_cmdarg, 0, 0,
+               :tEQL,        "=", :expr_beg,    0, 0,
+               :tIDENTIFIER, "a", :expr_arg,    0, 0,
+               :tLCURLY,     "{", :expr_beg,    0, 1,
+               :tRCURLY,     "}", :expr_endarg, 0, 0,
+               :tSEMI,       ";", :expr_beg,    0, 0,
+               :tIDENTIFIER, "b", :expr_cmdarg, 0, 0,
+               :tLCURLY,     "{", :expr_beg,    0, 1,
+               :tIDENTIFIER, "f", :expr_cmdarg, 0, 1, # different
+               :tSYMBOL,     "c", :expr_end,    0, 1,
+               :tRCURLY,     "}", :expr_endarg, 0, 0)
+    assert_lex("X = a { }; b { X :c }",
+               s(:block,
+                 s(:cdecl, :X, s(:iter, s(:call, nil, :a), s(:args))),
+                 s(:iter,
+                   s(:call, nil, :b),
+                   s(:args),
+                   s(:call, nil, :X, s(:lit, :c)))),
+               :tCONSTANT,   "X", :expr_cmdarg, 0, 0,
+               :tEQL,        "=", :expr_beg,    0, 0,
+               :tIDENTIFIER, "a", :expr_arg,    0, 0,
+               :tLCURLY,     "{", :expr_beg,    0, 1,
+               :tRCURLY,     "}", :expr_endarg, 0, 0,
+               :tSEMI,       ";", :expr_beg,    0, 0,
+               :tIDENTIFIER, "b", :expr_cmdarg, 0, 0,
+               :tLCURLY,     "{", :expr_beg,    0, 1,
+               :tCONSTANT,   "X", :expr_cmdarg, 0, 1, # same
+               :tSYMBOL,     "c", :expr_end,    0, 1,
+               :tRCURLY,     "}", :expr_endarg, 0, 0)
+  end
+  def test_yylex_lasgn_call_same_name
+    assert_lex("a = b.c :d => 1",
+               s(:lasgn, :a,
+                 s(:call, s(:call, nil, :b), :c,
+                   s(:hash, s(:lit, :d), s(:lit, 1)))),
+               :tIDENTIFIER, "a", :expr_cmdarg, 0, 0,
+               :tEQL,        "=", :expr_beg,    0, 0,
+               :tIDENTIFIER, "b", :expr_arg,    0, 0,
+               :tDOT,        ".", :expr_dot,    0, 0,
+               :tIDENTIFIER, "c", :expr_arg,    0, 0, # different
+               :tSYMBOL,     "d", :expr_end,    0, 0,
+               :tASSOC,      "=>", :expr_beg,   0, 0,
+               :tINTEGER,      1, :expr_end,    0, 0)
+    assert_lex("a = b.a :d => 1",
+               s(:lasgn, :a,
+                 s(:call, s(:call, nil, :b), :a,
+                   s(:hash, s(:lit, :d), s(:lit, 1)))),
+               :tIDENTIFIER, "a", :expr_cmdarg, 0, 0,
+               :tEQL,        "=", :expr_beg,    0, 0,
+               :tIDENTIFIER, "b", :expr_arg,    0, 0,
+               :tDOT,        ".", :expr_dot,    0, 0,
+               :tIDENTIFIER, "a", :expr_arg,    0, 0, # same as lvar
+               :tSYMBOL,     "d", :expr_end,    0, 0,
+               :tASSOC,      "=>", :expr_beg,   0, 0,
+               :tINTEGER,      1, :expr_end,    0, 0)
+  end
   def test_yylex_back_ref
     util_lex_token("[$&, $`, $', $+]",
                    :tLBRACK,   "[",
@@ -628,6 +995,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_heredoc_double_interp
+    # TODO: convert to assert_lex
     util_lex_token("a = <<\"EOF\"\n#x a \#@a b \#$b c \#{3} \nEOF\n",
                    :tIDENTIFIER,     "a",
                    :tEQL,              "=",
@@ -739,10 +1107,18 @@ class TestRubyLexer < MiniTest::Unit::TestCase
     util_lex_fname "<=>", :tCMP
   end
-  def test_yylex_identifier_def
+  def test_yylex_identifier_def__18
+    setup_lexer_class Ruby18Parser
     util_lex_fname "identifier", :tIDENTIFIER, :expr_end
   end
+  def test_yylex_identifier_def__1920
+    setup_lexer_class Ruby19Parser
+    util_lex_fname "identifier", :tIDENTIFIER, :expr_endfn
+  end
   def test_yylex_identifier_eh
     util_lex_token("identifier?", :tFID, "identifier?")
   end
@@ -774,10 +1150,18 @@ class TestRubyLexer < MiniTest::Unit::TestCase
     util_lex_fname "^", :tCARET
   end
-  def test_yylex_identifier_equals_def
+  def test_yylex_identifier_equals_def__18
+    setup_lexer_class Ruby18Parser
     util_lex_fname "identifier=", :tIDENTIFIER, :expr_end
   end
+  def test_yylex_identifier_equals_def__1920
+    setup_lexer_class Ruby19Parser
+    util_lex_fname "identifier=", :tIDENTIFIER, :expr_endfn
+  end
   def test_yylex_identifier_equals_def2
     util_lex_fname "==", :tEQ
   end
@@ -868,25 +1252,25 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_question_eh_a__18
-    @lex = RubyLexer.new 18
+    setup_lexer_class Ruby18Parser
     util_lex_token "?a", :tINTEGER, 97
   end
   def test_yylex_question_eh_a__19
-    @lex = RubyLexer.new 19
+    setup_lexer_class Ruby19Parser
     util_lex_token '?a', :tSTRING, "a"
   end
   def test_yylex_question_eh_escape_M_escape_C__18
-    @lex = RubyLexer.new 18
+    setup_lexer_class Ruby18Parser
     util_lex_token '?\M-\C-a', :tINTEGER, 129
   end
   def test_yylex_question_eh_escape_M_escape_C__19
-    @lex = RubyLexer.new 19
+    setup_lexer_class Ruby19Parser
     util_lex_token '?\M-\C-a', :tSTRING, "\M-\C-a"
   end
@@ -911,6 +1295,10 @@ class TestRubyLexer < MiniTest::Unit::TestCase
     util_bad_token "08"
   end
+  def test_yylex_integer_oct_bad_range2
+    util_bad_token "08"
+  end
   def test_yylex_integer_oct_bad_underscores
     util_bad_token "01__23"
   end
@@ -1060,11 +1448,20 @@ class TestRubyLexer < MiniTest::Unit::TestCase
     util_lex_token(" (", :tLPAREN_ARG, "(")
   end
-  def test_yylex_open_bracket_exprarg
+  def test_yylex_open_bracket_exprarg__18
+    setup_lexer_class Ruby18Parser
     @lex.lex_state = :expr_arg
     util_lex_token(" (", :tLPAREN2, "(")
   end
+  def test_yylex_open_bracket_exprarg__19
+    setup_lexer_class Ruby19Parser
+    @lex.lex_state = :expr_arg
+    util_lex_token(" (", :tLPAREN_ARG, "(")
+  end
   def test_yylex_open_curly_bracket
     util_lex_token("{",
                    :tLBRACE, "{")
@@ -1166,6 +1563,13 @@ class TestRubyLexer < MiniTest::Unit::TestCase
     util_lex_token "+@", :tUPLUS, "+@"
   end
+  def test_yylex_not_unary_method
+    skip "not yet"
+    @lex.lex_state = :expr_fname
+    util_lex_token "!@", :tUBANG, "!@"
+  end
   def test_yylex_numbers
     util_lex_token "0b10", :tINTEGER, 2
     util_lex_token "0B10", :tINTEGER, 2
@@ -1211,13 +1615,13 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_question__18
-    @lex = RubyLexer.new 18
+    setup_lexer_class Ruby18Parser
     util_lex_token "?*", :tINTEGER, 42
   end
   def test_yylex_question__19
-    @lex = RubyLexer.new 19
+    setup_lexer_class Ruby19Parser
     util_lex_token "?*", :tSTRING, "*"
   end
@@ -1236,7 +1640,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_question_ws_backslashed__18
-    @lex = RubyLexer.new 18
+    setup_lexer_class Ruby18Parser
     @lex.lex_state = :expr_beg
     util_lex_token "?\\ ", :tINTEGER, 32
@@ -1253,7 +1657,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_question_ws_backslashed__19
-    @lex = RubyLexer.new 19
+    setup_lexer_class Ruby19Parser
     @lex.lex_state = :expr_beg
     util_lex_token "?\\ ", :tSTRING, " "
@@ -1617,8 +2021,17 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   end
   def test_yylex_string_double_escape_M
+    chr = "\341"
+    chr.force_encoding("UTF-8") if RubyLexer::RUBY19
     util_lex_token('"\\M-a"',
-                   :tSTRING, "\341")
+                   :tSTRING, chr)
+  end
+  def test_why_does_ruby_hate_me?
+    util_lex_token('"Nl%\000\000A\000\999"', # you should be ashamed
+                   :tSTRING,
+                   ["Nl%", "\x00", "\x00", "A", "\x00", "999"].join)
   end
   def test_yylex_string_double_escape_M_backslash
@@ -1684,6 +2097,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
                    :tSTRING, "n = ABC")
   end
+  def test_yylex_string_double_escape_octal_fucked
+    util_lex_token('"n = \\444"',
+                   :tSTRING, "n = $")
+  end
   def test_yylex_string_double_interp
     util_lex_token("\"blah #x a \#@a b \#$b c \#{3} # \"",
                    :tSTRING_BEG,     "\"",
@@ -1717,6 +2135,30 @@ class TestRubyLexer < MiniTest::Unit::TestCase
                    :tSTRING, "\000")
   end
+  def test_yylex_string_pct_i
+    util_lex_token("%i[s1 s2\ns3]",
+                   :tQSYMBOLS_BEG,   "%i[",
+                   :tSTRING_CONTENT, "s1",
+                   :tSPACE,              nil,
+                   :tSTRING_CONTENT, "s2",
+                   :tSPACE,              nil,
+                   :tSTRING_CONTENT, "s3",
+                   :tSPACE,              nil,
+                   :tSTRING_END,     nil)
+  end
+  def test_yylex_string_pct_I
+    util_lex_token("%I[s1 s2\ns3]",
+                   :tSYMBOLS_BEG,    "%I[",
+                   :tSTRING_CONTENT, "s1",
+                   :tSPACE,              nil,
+                   :tSTRING_CONTENT, "s2",
+                   :tSPACE,              nil,
+                   :tSTRING_CONTENT, "s3",
+                   :tSPACE,              nil,
+                   :tSTRING_END,     nil)
+  end
   def test_yylex_string_pct_Q
     util_lex_token("%Q[s1 s2]",
                    :tSTRING_BEG,     "%Q[",
@@ -1877,7 +2319,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   def test_yylex_underscore_end
     @lex.src = "__END__\n"
-    deny @lex.advance
+    refute @lex.advance
   end
   def test_yylex_uplus
@@ -1943,7 +2385,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   def util_escape expected, input
     @lex.src = input
-    assert_equal expected, @lex.read_escape
+    assert_equal expected, @lex.read_escape, input
   end
   def util_escape_bad input
@@ -1956,7 +2398,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
   def util_lex_fname name, type, end_state = :expr_arg
     @lex.lex_state = :expr_fname # can only set via parser's defs
-    util_lex_token("def #{name} ", :kDEF, "def", type, name)
+    assert_lex("def #{name} ",
+               nil,
+               :kDEF, "def", :expr_fname, 0, 0,
+               type, name, end_state, 0, 0)
     assert_equal end_state, @lex.lex_state
   end
@@ -1968,9 +2414,10 @@ class TestRubyLexer < MiniTest::Unit::TestCase
       token = args.shift
       value = args.shift
       assert @lex.advance, "no more tokens"
+      # assert_equal [token, value].map(&:encoding), [@lex.token, [@lex.yacc_value].flatten.first].map(&:encoding), input # TODO
       assert_equal [token, value], [@lex.token, [@lex.yacc_value].flatten.first], input
     end
-    deny @lex.advance, "must be empty, but had #{[@lex.token, @lex.yacc_value].inspect}"
+    refute @lex.advance, "must be empty, but had #{[@lex.token, @lex.yacc_value].inspect}"
   end
 end