RubyGems - ruby_parser - Versions diffs - 3.0.0 → 3.19.1 - Mend

ruby_parser 3.0.0 → 3.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +0 -0
data/.autotest +36 -19
data/History.rdoc +1297 -0
data/Manifest.txt +35 -7
data/{README.txt → README.rdoc} +44 -14
data/Rakefile +308 -110
data/bin/ruby_parse +3 -1
data/bin/ruby_parse_extract_error +36 -16
data/compare/normalize.rb +218 -0
data/debugging.md +190 -0
data/gauntlet.md +107 -0
data/lib/.document +1 -0
data/lib/rp_extensions.rb +53 -0
data/lib/rp_stringscanner.rb +33 -0
data/lib/ruby20_parser.rb +10973 -0
data/lib/ruby20_parser.y +2683 -0
data/lib/ruby21_parser.rb +10980 -0
data/lib/ruby21_parser.y +2700 -0
data/lib/ruby22_parser.rb +11123 -0
data/lib/ruby22_parser.y +2711 -0
data/lib/ruby23_parser.rb +11132 -0
data/lib/ruby23_parser.y +2713 -0
data/lib/ruby24_parser.rb +11231 -0
data/lib/ruby24_parser.y +2721 -0
data/lib/ruby25_parser.rb +11231 -0
data/lib/ruby25_parser.y +2721 -0
data/lib/ruby26_parser.rb +11253 -0
data/lib/ruby26_parser.y +2736 -0
data/lib/ruby27_parser.rb +12980 -0
data/lib/ruby27_parser.y +3324 -0
data/lib/ruby30_parser.rb +13242 -0
data/lib/ruby30_parser.y +3447 -0
data/lib/ruby31_parser.rb +13622 -0
data/lib/ruby31_parser.y +3481 -0
data/lib/ruby3_parser.yy +3536 -0
data/lib/ruby_lexer.rb +933 -1232
data/lib/ruby_lexer.rex +185 -0
data/lib/ruby_lexer.rex.rb +399 -0
data/lib/ruby_lexer_strings.rb +638 -0
data/lib/ruby_parser.rb +97 -3
data/lib/ruby_parser.yy +3465 -0
data/lib/ruby_parser_extras.rb +1216 -687
data/test/test_ruby_lexer.rb +2249 -1092
data/test/test_ruby_parser.rb +5156 -975
data/test/test_ruby_parser_extras.rb +47 -77
data/tools/munge.rb +250 -0
data/tools/ripper.rb +44 -0
data.tar.gz.sig +1 -1
metadata +200 -155
metadata.gz.sig +0 -0
data/.gemtest +0 -0
data/History.txt +0 -482
data/lib/gauntlet_rubyparser.rb +0 -120
data/lib/ruby18_parser.rb +0 -5747
data/lib/ruby18_parser.y +0 -1873
data/lib/ruby19_parser.rb +0 -6110
data/lib/ruby19_parser.y +0 -2078

data/lib/ruby_lexer.rb CHANGED Viewed

@@ -1,1443 +1,1144 @@
-# encoding: US-ASCII
+# frozen_string_literal: true
+# encoding: UTF-8
+$DEBUG = true if ENV["DEBUG"]
 class RubyLexer
+  # :stopdoc:
+  EOF = :eof_haha!
-  RUBY19 = "".respond_to? :encoding
+  ESCAPES = {
+    "a"    => "\007",
+    "b"    => "\010",
+    "e"    => "\033",
+    "f"    => "\f",
+    "n"    => "\n",
+    "r"    => "\r",
+    "s"    => " ",
+    "t"    => "\t",
+    "v"    => "\13",
+    "\\"   => '\\',
+    "\n"   => "",
+    "C-\?" => 127.chr,
+    "c\?"  => 127.chr,
+  }
-  IDENT_CHAR_RE = case RUBY_VERSION
-                  when /^1\.8/ then
-                    /[\w\x80-\xFF]/
-                  when /^(1\.9|2\.0)/ then # HACK - matching 2.0 for now
-                    /[\w\u0080-\uFFFF]/u
-                  else
-                    raise "bork"
-                  end
+  HAS_ENC = "".respond_to? :encoding
-  IDENT_RE = /^#{IDENT_CHAR_RE}+/
+  BTOKENS = {
+    ".."  => :tBDOT2,
+    "..." => :tBDOT3,
+  }
-  attr_accessor :command_start
-  attr_accessor :cmdarg
-  attr_accessor :cond
-  attr_accessor :tern # TODO: rename ternary damnit... wtf
-  attr_accessor :nest
+  TOKENS = {
+    "!"   => :tBANG,
+    "!="  => :tNEQ,
+    "!@"  => :tBANG,
+    "!~"  => :tNMATCH,
+    ","   => :tCOMMA,
+    ".."  => :tDOT2,
+    "..." => :tDOT3,
+    "="   => :tEQL,
+    "=="  => :tEQ,
+    "===" => :tEQQ,
+    "=>"  => :tASSOC,
+    "=~"  => :tMATCH,
+    "->"  => :tLAMBDA,
+  }
-  ESC_RE = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc]))/
+  PERCENT_END = {
+    "(" => ")",
+    "[" => "]",
+    "{" => "}",
+    "<" => ">",
+  }
-  ##
-  # What version of ruby to parse. 18 and 19 are the only valid values
-  # currently supported.
+  SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
-  attr_accessor :version
+  @@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
+  @@regexp_cache[nil] = nil
-  # Additional context surrounding tokens that both the lexer and
-  # grammar use.
-  attr_reader :lex_state
+  def regexp_cache
+    @@regexp_cache
+  end
-  attr_accessor :lex_strterm
+  if $DEBUG then
+    attr_reader :lex_state
-  attr_accessor :parser # HACK for very end of lexer... *sigh*
+    def lex_state= o
+      return if @lex_state == o
-  # Stream of data that yylex examines.
-  attr_reader :src
+      from = ""
+      if ENV["VERBOSE"]
+        path = caller[0]
+        path = caller[1] if path =~ /result/
+        path, line, *_ = path.split(/:/)
+        path.delete_prefix! File.dirname File.dirname __FILE__
+        from = " at .%s:%s" % [path, line]
+      end
-  # Last token read via yylex.
-  attr_accessor :token
+      warn "lex_state: %p -> %p%s" % [lex_state, o, from]
-  attr_accessor :string_buffer
+      @lex_state = o
+    end
+  end
-  # Value of last token which had a value associated with it.
-  attr_accessor :yacc_value
+  # :startdoc:
-  # What handles warnings
-  attr_accessor :warnings
+  attr_accessor :lex_state unless $DEBUG
-  attr_accessor :space_seen
+  attr_accessor :brace_nest
+  attr_accessor :cmdarg
+  attr_accessor :command_start
+  attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
+  attr_accessor :last_state
+  attr_accessor :cond
+  attr_accessor :old_ss
+  attr_accessor :old_lineno
-  EOF = :eof_haha!
+  # these are generated via ruby_lexer.rex: ss, lineno
-  # ruby constants for strings (should this be moved somewhere else?)
-  STR_FUNC_BORING = 0x00
-  STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
-  STR_FUNC_EXPAND = 0x02
-  STR_FUNC_REGEXP = 0x04
-  STR_FUNC_QWORDS = 0x08
-  STR_FUNC_SYMBOL = 0x10
-  STR_FUNC_INDENT = 0x20 # <<-HEREDOC
-  STR_SQUOTE = STR_FUNC_BORING
-  STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
-  STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
-  STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
-  STR_SSYM   = STR_FUNC_SYMBOL
-  STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
+  ##
+  # Additional context surrounding tokens that both the lexer and
+  # grammar use.
-  TOKENS = {
-    "!"   => :tBANG,
-    "!="  => :tNEQ,
-    "!~"  => :tNMATCH,
-    ","   => :tCOMMA,
-    ".."  => :tDOT2,
-    "..." => :tDOT3,
-    "="   => :tEQL,
-    "=="  => :tEQ,
-    "===" => :tEQQ,
-    "=>"  => :tASSOC,
-    "=~"  => :tMATCH,
-    "->"  => :tLAMBDA,
-  }
+  attr_accessor :lex_strterm
+  attr_accessor :lpar_beg
+  attr_accessor :paren_nest
+  attr_accessor :parser # HACK for very end of lexer... *sigh*
+  attr_accessor :space_seen
+  attr_accessor :string_buffer
+  attr_accessor :string_nest
+  # Last token read via next_token.
+  attr_accessor :token
-  # How the parser advances to the next token.
-  #
-  # @return true if not at end of file (EOF).
+  attr_writer :comments
-  def advance
-    r = yylex
-    self.token = r
+  def initialize _ = nil
+    @lex_state = nil # remove one warning under $DEBUG
+    self.lex_state = EXPR_NONE
-    raise "yylex returned nil" unless r
+    self.cond   = RubyParserStuff::StackState.new(:cond, $DEBUG)
+    self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
+    self.ss     = RPStringScanner.new ""
-    return RubyLexer::EOF != r
+    reset
   end
   def arg_ambiguous
-    self.warning("Ambiguous first argument. make sure.")
+    self.warning "Ambiguous first argument. make sure."
+  end
+  def arg_state
+    is_after_operator? ? EXPR_ARG : EXPR_BEG
+  end
+  def ignore_body_comments
+    @comments.clear
   end
-  def comments
+  def comments # TODO: remove this... maybe comment_string + attr_accessor
     c = @comments.join
     @comments.clear
     c
   end
-  def expr_beg_push val
+  def debug n
+    raise "debug #{n}"
+  end
+  def expr_dot?
+    lex_state =~ EXPR_DOT
+  end
+  def expr_fname? # REFACTOR
+    lex_state =~ EXPR_FNAME
+  end
+  def expr_result token, text
     cond.push false
     cmdarg.push false
-    self.lex_state = :expr_beg
-    self.yacc_value = val
+    result EXPR_BEG, token, text
   end
-  def fix_arg_lex_state
-    self.lex_state = if in_lex_state? :expr_fname, :expr_dot then
-                       :expr_arg
-                     else
-                       :expr_beg
-                     end
+  def in_fname? # REFACTOR
+    lex_state =~ EXPR_FNAME
   end
-  def heredoc here # 63 lines
-    _, eos, func, last_line = here
+  def int_with_base base
+    rb_compile_error "Invalid numeric format" if matched =~ /__/
-    indent  = (func & STR_FUNC_INDENT) != 0
-    expand  = (func & STR_FUNC_EXPAND) != 0
-    eos_re  = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
-    err_msg = "can't match #{eos_re.inspect} anywhere in "
+    text = matched
+    case
+    when text.end_with?("ri")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
+    when text.end_with?("r")
+      result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
+    when text.end_with?("i")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
+    else
+      result EXPR_NUM, :tINTEGER, text.to_i(base)
+    end
+  end
-    rb_compile_error err_msg if
-      src.eos?
+  def is_after_operator?
+    lex_state =~ EXPR_FNAME|EXPR_DOT
+  end
-    if src.beginning_of_line? && src.scan(eos_re) then
-      src.unread_many last_line # TODO: figure out how to remove this
-      self.yacc_value = eos
-      return :tSTRING_END
-    end
+  def is_arg?
+    lex_state =~ EXPR_ARG_ANY
+  end
-    self.string_buffer = []
+  def is_beg?
+    lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
+  end
-    if expand then
-      case
-      when src.scan(/#[$@]/) then
-        src.pos -= 1 # FIX omg stupid
-        self.yacc_value = src.matched
-        return :tSTRING_DVAR
-      when src.scan(/#[{]/) then
-        self.yacc_value = src.matched
-        return :tSTRING_DBEG
-      when src.scan(/#/) then
-        string_buffer << '#'
-      end
+  def is_end?
+    lex_state =~ EXPR_END_ANY
+  end
-      begin
-        c = tokadd_string func, "\n", nil
+  def is_label_possible?
+    (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
+  end
-        rb_compile_error err_msg if
-          c == RubyLexer::EOF
+  def is_label_suffix?
+    check(/:(?!:)/)
+  end
-        if c != "\n" then
-          self.yacc_value = string_buffer.join.delete("\r")
-          return :tSTRING_CONTENT
-        else
-          string_buffer << src.scan(/\n/)
-        end
+  def is_space_arg? c = "x"
+    is_arg? and space_seen and c !~ /\s/
+  end
+  def lambda_beginning?
+    lpar_beg && lpar_beg == paren_nest
+  end
-        rb_compile_error err_msg if
-          src.eos?
-      end until src.check(eos_re)
+  def is_local_id id
+    # maybe just make this false for now
+    self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
+  end
+  def lvar_defined? id
+    # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
+    self.parser.env[id.to_sym] == :lvar
+  end
+  def not_end?
+    not is_end?
+  end
+  def possibly_escape_string text, check
+    content = match[1]
+    if text =~ check then
+      content.gsub(ESC) { unescape $1 }
     else
-      until src.check(eos_re) do
-        string_buffer << src.scan(/.*(\n|\z)/)
-        rb_compile_error err_msg if
-          src.eos?
-      end
+      content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
     end
+  end
-    self.lex_strterm = [:heredoc, eos, func, last_line]
-    self.yacc_value = string_buffer.join.delete("\r")
+  def process_amper text
+    token = if is_arg? && space_seen && !check(/\s/) then
+               warning("`&' interpreted as argument prefix")
+               :tAMPER
+             elsif lex_state =~ EXPR_BEG|EXPR_MID then
+               :tAMPER
+             else
+               :tAMPER2
+             end
-    return :tSTRING_CONTENT
+    result :arg_state, token, "&"
   end
-  def heredoc_identifier # 51 lines
-    term, func = nil, STR_FUNC_BORING
-    self.string_buffer = []
+  def process_backref text
+    token = match[1].to_sym
+    # TODO: can't do lineno hack w/ symbol
+    result EXPR_END, :tBACK_REF, token
+  end
-    case
-    when src.scan(/(-?)(['"`])(.*?)\2/) then
-      term = src[2]
-      func |= STR_FUNC_INDENT unless src[1].empty?
-      func |= case term
-              when "\'" then
-                STR_SQUOTE
-              when '"' then
-                STR_DQUOTE
-              else
-                STR_XQUOTE
-              end
-      string_buffer << src[3]
-    when src.scan(/-?(['"`])(?!\1*\Z)/) then
-      rb_compile_error "unterminated here document identifier"
-    when src.scan(/(-?)(\w+)/) then
-      term = '"'
-      func |= STR_DQUOTE
-      unless src[1].empty? then
-        func |= STR_FUNC_INDENT
-      end
-      string_buffer << src[2]
-    else
-      return nil
+  def process_begin text
+    @comments << matched
+    unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
+      @comments.clear
+      rb_compile_error("embedded document meets end of file")
     end
-    if src.scan(/.*\n/) then
-      # TODO: think about storing off the char range instead
-      line = src.matched
-      src.extra_lines_added += 1
+    @comments << matched
+    self.lineno += matched.count("\n") # HACK?
+    nil # TODO
+  end
+  def process_brace_close text
+    case matched
+    when "}" then
+      self.brace_nest -= 1
+      return :tSTRING_DEND, matched if brace_nest < 0
+    end
+    # matching compare/parse26.y:8099
+    cond.pop
+    cmdarg.pop
+    case matched
+    when "}" then
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
+      return :tRCURLY, matched
+    when "]" then
+      self.paren_nest -= 1
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
+      return :tRBRACK, matched
+    when ")" then
+      self.paren_nest -= 1
+      self.lex_state   = EXPR_ENDFN
+      return :tRPAREN, matched
     else
-      line = nil
+      raise "Unknown bracing: #{matched.inspect}"
+    end
+  end
+  def process_brace_open text
+    # matching compare/parse23.y:8694
+    self.brace_nest += 1
+    if lambda_beginning? then
+      self.lpar_beg = nil
+      self.paren_nest -= 1 # close arg list when lambda opens body
+      return expr_result(:tLAMBEG, "{")
+    end
+    token = case
+            when lex_state =~ EXPR_LABELED then
+              :tLBRACE     # hash
+            when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
+              :tLCURLY     # block (primary) "{" in parse.y
+            when lex_state =~ EXPR_ENDARG then
+              :tLBRACE_ARG # block (expr)
+            else
+              :tLBRACE     # hash
+            end
+    state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
+    self.command_start = true if token != :tLBRACE
+    cond.push false
+    cmdarg.push false
+    result state, token, text
+  end
+  def process_colon1 text
+    # ?: / then / when
+    if is_end? || check(/\s/) then
+      return result EXPR_BEG, :tCOLON, text
     end
-    self.lex_strterm = [:heredoc, string_buffer.join, func, line]
+    case
+    when scan(/\'/) then
+      string STR_SSYM, matched
+    when scan(/\"/) then
+      string STR_DSYM, matched
+    end
+    result EXPR_FNAME, :tSYMBEG, text
+  end
-    if term == '`' then
-      self.yacc_value = "`"
-      return :tXSTRING_BEG
+  def process_colon2 text
+    if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
+      result EXPR_BEG, :tCOLON3, text
     else
-      self.yacc_value = "\""
-      return :tSTRING_BEG
+      result EXPR_DOT, :tCOLON2, text
     end
   end
-  def in_lex_state?(*states)
-    states.include? lex_state
+  def process_dots text
+    tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
+    result EXPR_BEG, tokens[text], text
   end
-  def initialize v = 18
-    self.version = v
-    self.cond = RubyParserStuff::StackState.new(:cond)
-    self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
-    self.tern = RubyParserStuff::StackState.new(:tern)
-    self.nest = 0
-    @comments = []
+  def process_float text
+    rb_compile_error "Invalid numeric format" if text =~ /__/
-    reset
+    case
+    when text.end_with?("ri")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
+    when text.end_with?("i")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
+    when text.end_with?("r")
+      result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
+    else
+      result EXPR_NUM, :tFLOAT, text.to_f
+    end
   end
-  def int_with_base base
-    rb_compile_error "Invalid numeric format" if src.matched =~ /__/
-    self.yacc_value = src.matched.to_i(base)
-    return :tINTEGER
+  def process_gvar text
+    if parser.class.version > 20 && text == "$-" then
+      rb_compile_error "unexpected $undefined"
+    end
+    result EXPR_END, :tGVAR, text
   end
-  def lex_state= o
-    # warn "wtf lex_state = #{o.inspect} from #{caller.first}"
-    raise "wtf\?" unless Symbol === o
-    @lex_state = o
+  def process_gvar_oddity text
+    rb_compile_error "#{text.inspect} is not allowed as a global variable name"
   end
-  attr_writer :lineno
-  def lineno
-    @lineno ||= src.lineno
+  def process_ivar text
+    tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
+    result EXPR_END, tok_id, text
   end
-  ##
-  #  Parse a number from the input stream.
-  #
-  # @param c The first character of the number.
-  # @return A int constant wich represents a token.
+  def process_label text
+    symbol = possibly_escape_string text, /^\"/
-  def parse_number
-    self.lex_state = :expr_end
+    result EXPR_LAB, :tLABEL, symbol
+  end
-    case
-    when src.scan(/[+-]?0[xXbBdD]\b/) then
-      rb_compile_error "Invalid numeric format"
-    when src.scan(/[+-]?0x[a-f0-9_]+/i) then
-      int_with_base(16)
-    when src.scan(/[+-]?0[Bb][01_]+/) then
-      int_with_base(2)
-    when src.scan(/[+-]?0[Dd][0-9_]+/) then
-      int_with_base(10)
-    when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
-      rb_compile_error "Illegal octal digit."
-    when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
-      int_with_base(8)
-    when src.scan(/[+-]?[\d_]+_(e|\.)/) then
-      rb_compile_error "Trailing '_' in number."
-    when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
-      number = src.matched
-      if number =~ /__/ then
-        rb_compile_error "Invalid numeric format"
-      end
-      self.yacc_value = number.to_f
-      :tFLOAT
-    when src.scan(/[+-]?0\b/) then
-      int_with_base(10)
-    when src.scan(/[+-]?[\d_]+\b/) then
-      int_with_base(10)
+  def process_label_or_string text
+    if @was_label && text =~ /:\Z/ then
+      @was_label = nil
+      return process_label text
+    elsif text =~ /:\Z/ then
+      self.pos -= 1 # put back ":"
+      text = text[0..-2]
+    end
+    orig_line = lineno
+    str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
+    self.lineno += str.count("\n")
+    result EXPR_END, :tSTRING, str, orig_line
+  end
+  def process_lchevron text
+    if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
+        !is_end? &&
+        (!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
+      tok = self.heredoc_identifier
+      return tok if tok
+    end
+    if is_after_operator? then
+      self.lex_state = EXPR_ARG
     else
-      rb_compile_error "Bad number format"
+      self.command_start = true if lex_state =~ EXPR_CLASS
+      self.lex_state = EXPR_BEG
     end
+    result lex_state, :tLSHFT, "\<\<"
   end
-  def parse_quote # 58 lines
-    beg, nnd, short_hand, c = nil, nil, false, nil
+  def process_newline_or_comment text    # ../compare/parse30.y:9126 ish
+    c = matched
+    if c == "#" then
+      self.pos -= 1
-    if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
-      rb_compile_error "unknown type of %string" if src.matched_size == 2
-      c, beg, short_hand = src.matched, src.getch, false
-    else                               # Short-hand (e.g. %{, %., %!, etc)
-      c, beg, short_hand = 'Q', src.getch, true
+      while scan(/\s*\#.*(\n+|\z)/) do
+        self.lineno += matched.count "\n"
+        @comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
+      end
+      return nil if end_of_stream?
     end
-    if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
-      rb_compile_error "unterminated quoted string meets end of file"
+    c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
+         lex_state !~ EXPR_LABELED)
+    if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
+      # ignore if !fallthrough?
+      if !c && parser.in_kwarg then
+        # normal newline
+        self.command_start = true
+        return result EXPR_BEG, :tNL, nil
+      else
+        maybe_pop_stack
+        return # goto retry
+      end
     end
-    # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
-    nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
-    nnd, beg = beg, "\0" if nnd.nil?
+    if scan(/[\ \t\r\f\v]+/) then
+      self.space_seen = true
+    end
-    token_type, self.yacc_value = nil, "%#{c}#{beg}"
-    token_type, string_type = case c
-                              when 'Q' then
-                                ch = short_hand ? nnd : c + beg
-                                self.yacc_value = "%#{ch}"
-                                [:tSTRING_BEG,   STR_DQUOTE]
-                              when 'q' then
-                                [:tSTRING_BEG,   STR_SQUOTE]
-                              when 'W' then
-                                src.scan(/\s*/)
-                                [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
-                              when 'w' then
-                                src.scan(/\s*/)
-                                [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
-                              when 'x' then
-                                [:tXSTRING_BEG,  STR_XQUOTE]
-                              when 'r' then
-                                [:tREGEXP_BEG,   STR_REGEXP]
-                              when 's' then
-                                self.lex_state  = :expr_fname
-                                [:tSYMBEG,       STR_SSYM]
-                              end
+    if check(/#/) then
+      return # goto retry
+    elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
+      return # goto retry
+    end
-    rb_compile_error "Bad %string type. Expected [Qq\Wwxrs], found '#{c}'." if
-      token_type.nil?
+    self.command_start = true
-    self.lex_strterm = [:strterm, string_type, nnd, beg]
+    result EXPR_BEG, :tNL, nil
+  end
-    return token_type
+  def process_nthref text
+    # TODO: can't do lineno hack w/ number
+    result EXPR_END, :tNTH_REF, match[1].to_i
   end
-  def parse_string(quote) # 65 lines
-    _, string_type, term, open = quote
+  def process_paren text
+    token = if is_beg? then
+              :tLPAREN
+            elsif !space_seen then
+              # foo( ... ) => method call, no ambiguity
+              :tLPAREN2
+            elsif is_space_arg? then
+              :tLPAREN_ARG
+            elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
+              # TODO:
+              # warn("parentheses after method name is interpreted as " \
+              #      "an argument list, not a decomposed argument")
+              :tLPAREN2
+            else
+              :tLPAREN2 # plain "(" in parse.y
+            end
-    space = false # FIX: remove these
-    func = string_type
-    paren = open
-    term_re = Regexp.escape term
+    self.paren_nest += 1
-    qwords = (func & STR_FUNC_QWORDS) != 0
-    regexp = (func & STR_FUNC_REGEXP) != 0
-    expand = (func & STR_FUNC_EXPAND) != 0
+    cond.push false
+    cmdarg.push false
+    result EXPR_PAR, token, text
+  end
-    unless func then # FIX: impossible, prolly needs == 0
-      self.lineno = nil
-      return :tSTRING_END
+  def process_percent text
+    case
+    when is_beg? then
+      process_percent_quote
+    when scan(/\=/)
+      result EXPR_BEG, :tOP_ASGN, "%"
+    when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
+      process_percent_quote
+    else
+      result :arg_state, :tPERCENT, "%"
     end
+  end
-    space = true if qwords and src.scan(/\s+/)
+  def process_plus_minus text
+    sign = matched
+    utype, type = if sign == "+" then
+                    [:tUPLUS, :tPLUS]
+                  else
+                    [:tUMINUS, :tMINUS]
+                  end
-    if self.nest == 0 && src.scan(/#{term_re}/) then
-      if qwords then
-        quote[1] = nil
-        return :tSPACE
-      elsif regexp then
-        self.yacc_value = self.regx_options
-        self.lineno = nil
-        return :tREGEXP_END
+    if is_after_operator? then
+      if scan(/@/) then
+        return result(EXPR_ARG, utype, "#{sign}@")
       else
-        self.yacc_value = term
-        self.lineno = nil
-        return :tSTRING_END
+        return result(EXPR_ARG, type, sign)
       end
     end
-    if space then
-      return :tSPACE
-    end
+    return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
-    self.string_buffer = []
+    if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
+      arg_ambiguous if is_arg?
-    if expand
-      case
-      when src.scan(/#(?=[$@])/) then
-        return :tSTRING_DVAR
-      when src.scan(/#[{]/) then
-        return :tSTRING_DBEG
-      when src.scan(/#/) then
-        string_buffer << '#'
+      if check(/\d/) then
+        return nil if utype == :tUPLUS
+        return result EXPR_BEG, :tUMINUS_NUM, sign
       end
+      return result EXPR_BEG, utype, sign
     end
-    if tokadd_string(func, term, paren) == RubyLexer::EOF then
-      rb_compile_error "unterminated string meets end of file"
+    result EXPR_BEG, type, sign
+  end
+  def process_questionmark text
+    if is_end? then
+      return result EXPR_BEG, :tEH, "?"
     end
-    self.yacc_value = string_buffer.join
+    if end_of_stream? then
+      rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
+    end
-    return :tSTRING_CONTENT
-  end
+    if check(/\s|\v/) then
+      unless is_arg? then
+        c2 = { " " => "s",
+              "\n" => "n",
+              "\t" => "t",
+              "\v" => "v",
+              "\r" => "r",
+              "\f" => "f" }[matched]
+        if c2 then
+          warning("invalid character syntax; use ?\\" + c2)
+        end
+      end
-  def rb_compile_error msg
-    msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
-    raise RubyParser::SyntaxError, msg
+      # ternary
+      return result EXPR_BEG, :tEH, "?"
+    elsif check(/\w(?=\w)/) then # ternary, also
+      return result EXPR_BEG, :tEH, "?"
+    end
+    c = if scan(/\\/) then
+          self.read_escape
+        else
+          getch
+        end
+    result EXPR_END, :tSTRING, c
   end
-  def read_escape # 51 lines
-    case
-    when src.scan(/\\/) then                  # Backslash
-      '\\'
-    when src.scan(/n/) then                   # newline
-      "\n"
-    when src.scan(/t/) then                   # horizontal tab
-      "\t"
-    when src.scan(/r/) then                   # carriage-return
-      "\r"
-    when src.scan(/f/) then                   # form-feed
-      "\f"
-    when src.scan(/v/) then                   # vertical tab
-      "\13"
-    when src.scan(/a/) then                   # alarm(bell)
-      "\007"
-    when src.scan(/e/) then                   # escape
-      "\033"
-    when src.scan(/b/) then                   # backspace
-      "\010"
-    when src.scan(/s/) then                   # space
-      " "
-    when src.scan(/[0-7]{1,3}/) then          # octal constant
-      src.matched.to_i(8).chr
-    when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
-      src[1].to_i(16).chr
-    when src.check(/M-\\[\\MCc]/) then
-      src.scan(/M-\\/) # eat it
-      c = self.read_escape
-      c[0] = (c[0].ord | 0x80).chr
-      c
-    when src.scan(/M-(.)/) then
-      c = src[1]
-      c[0] = (c[0].ord | 0x80).chr
-      c
-    when src.check(/(C-|c)\\[\\MCc]/) then
-      src.scan(/(C-|c)\\/) # eat it
-      c = self.read_escape
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when src.scan(/C-\?|c\?/) then
-      127.chr
-    when src.scan(/(C-|c)(.)/) then
-      c = src[2]
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when src.scan(/[McCx0-9]/) || src.eos? then
-      rb_compile_error("Invalid escape character syntax")
-    else
-      src.getch
-    end
+  def process_simple_string text
+    orig_line = lineno
+    self.lineno += text.count("\n")
+    str = text[1..-2]
+      .gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
+    str = str.b unless str.valid_encoding?
+    result EXPR_END, :tSTRING, str, orig_line
   end
-  def regx_options # 15 lines
-    good, bad = [], []
+  def process_slash text
+    if is_beg? then
+      string STR_REGEXP, matched
-    if src.scan(/[a-z]+/) then
-      good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
+      return result nil, :tREGEXP_BEG, "/"
     end
-    unless bad.empty? then
-      rb_compile_error("unknown regexp option%s - %s" %
-                       [(bad.size > 1 ? "s" : ""), bad.join.inspect])
+    if scan(/\=/) then
+      return result(EXPR_BEG, :tOP_ASGN, "/")
     end
-    return good.join
+    if is_arg? && space_seen then
+      unless scan(/\s/) then
+        arg_ambiguous
+        string STR_REGEXP, "/"
+        return result(nil, :tREGEXP_BEG, "/")
+      end
+    end
+    result :arg_state, :tDIVIDE, "/"
   end
-  def reset
-    self.command_start = true
-    self.lex_strterm   = nil
-    self.token         = nil
-    self.yacc_value    = nil
+  def process_square_bracket text
+    self.paren_nest += 1
-    @src       = nil
-    @lex_state = nil
-  end
+    token = nil
-  def ruby18
-    Ruby18Parser === parser
+    if is_after_operator? then
+      case
+      when scan(/\]\=/) then
+        self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
+        return result EXPR_ARG, :tASET, "[]="
+      when scan(/\]/) then
+        self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
+        return result EXPR_ARG, :tAREF, "[]"
+      else
+        rb_compile_error "unexpected '['"
+      end
+    elsif is_beg? then
+      token = :tLBRACK
+    elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
+      token = :tLBRACK
+    else
+      token = :tLBRACK2
+    end
+    cond.push false
+    cmdarg.push false
+    result EXPR_PAR, token, text
   end
-  def ruby19
-    Ruby19Parser === parser
+  def process_symbol text
+    symbol = possibly_escape_string text, /^:\"/ # stupid emacs
+    result EXPR_LIT, :tSYMBOL, symbol
   end
-  def src= src
-    raise "bad src: #{src.inspect}" unless String === src
-    @src = RPStringScanner.new(src)
+  def process_token text
+    # matching: parse_ident in compare/parse23.y:7989
+    # FIX: remove: self.last_state = lex_state
+    token = self.token = text
+    token << matched if scan(/[\!\?](?!=)/)
+    tok_id =
+      case
+      when token =~ /[!?]$/ then
+        :tFID
+      when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
+        # ident=, not =~ => == or followed by =>
+        # TODO test lexing of a=>b vs a==>b
+        token << matched
+        :tIDENTIFIER
+      when token =~ /^[A-Z]/ then
+        :tCONSTANT
+      else
+        :tIDENTIFIER
+      end
+    if is_label_possible? and is_label_suffix? then
+      scan(/:/)
+      return result EXPR_LAB, :tLABEL, token
+    end
+    # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
+    if lex_state !~ EXPR_DOT then
+      # See if it is a reserved word.
+      keyword = RubyParserStuff::Keyword.keyword token
+      return process_token_keyword keyword if keyword
+    end
+    # matching: compare/parse30.y:9039
+    state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
+              cmd_state ? EXPR_CMDARG : EXPR_ARG
+            elsif lex_state =~ EXPR_FNAME then
+              EXPR_ENDFN
+            else
+              EXPR_END
+            end
+    self.lex_state = state
+    tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
+    if last_state !~ EXPR_DOT|EXPR_FNAME and
+        (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
+        lvar_defined?(token) then
+      state = EXPR_END|EXPR_LABEL
+    end
+    result state, tok_id, token
   end
-  def tokadd_escape term # 20 lines
+  def process_token_keyword keyword
+    # matching MIDDLE of parse_ident in compare/parse23.y:8046
+    state = lex_state
+    return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
+    self.lex_state = keyword.state
+    self.command_start = true if lex_state =~ EXPR_BEG
     case
-    when src.scan(/\\\n/) then
-      # just ignore
-    when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
-      self.string_buffer << src.matched
-    when src.scan(/\\([MC]-|c)(?=\\)/) then
-      self.string_buffer << src.matched
-      self.tokadd_escape term
-    when src.scan(/\\([MC]-|c)(.)/) then
-      self.string_buffer << src.matched
-    when src.scan(/\\[McCx]/) then
-      rb_compile_error "Invalid escape character syntax"
-    when src.scan(/\\(.)/m) then
-      self.string_buffer << src.matched
+    when keyword.id0 == :kDO then # parse26.y line 7591
+      case
+      when lambda_beginning? then
+        self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
+        self.paren_nest -= 1 # TODO: question this?
+        result lex_state, :kDO_LAMBDA, token
+      when cond.is_in_state then
+        result lex_state, :kDO_COND, token
+      when cmdarg.is_in_state && state != EXPR_CMDARG then
+        result lex_state, :kDO_BLOCK, token
+      else
+        result lex_state, :kDO, token
+      end
+    when state =~ EXPR_PAD then
+      result lex_state, keyword.id0, token
+    when keyword.id0 != keyword.id1 then
+      result EXPR_PAR, keyword.id1, token
     else
-      rb_compile_error "Invalid escape character syntax"
+      result lex_state, keyword.id1, token
     end
   end
-  def tokadd_string(func, term, paren) # 105 lines
-    qwords = (func & STR_FUNC_QWORDS) != 0
-    escape = (func & STR_FUNC_ESCAPE) != 0
-    expand = (func & STR_FUNC_EXPAND) != 0
-    regexp = (func & STR_FUNC_REGEXP) != 0
-    symbol = (func & STR_FUNC_SYMBOL) != 0
+  def process_underscore text
+    self.unscan # put back "_"
-    paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
-    term_re  = Regexp.new(Regexp.escape(term))
+    if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
+      ss.terminate
+      [RubyLexer::EOF, RubyLexer::EOF]
+    elsif scan(/#{IDENT_CHAR}+/) then
+      process_token matched
+    end
+  end
-    until src.eos? do
-      c = nil
-      handled = true
-      case
-      when self.nest == 0 && src.scan(term_re) then
-        src.pos -= 1
-        break
-      when paren_re && src.scan(paren_re) then
-        self.nest += 1
-      when src.scan(term_re) then
-        self.nest -= 1
-      when qwords && src.scan(/\s/) then
-        src.pos -= 1
-        break
-      when expand && src.scan(/#(?=[\$\@\{])/) then
-        src.pos -= 1
-        break
-      when expand && src.scan(/#(?!\n)/) then
-        # do nothing
-      when src.check(/\\/) then
-        case
-        when qwords && src.scan(/\\\n/) then
-          string_buffer << "\n"
-          next
-        when qwords && src.scan(/\\\s/) then
-          c = ' '
-        when expand && src.scan(/\\\n/) then
-          next
-        when regexp && src.check(/\\/) then
-          self.tokadd_escape term
-          next
-        when expand && src.scan(/\\/) then
-          c = self.read_escape
-        when src.scan(/\\\n/) then
-          # do nothing
-        when src.scan(/\\\\/) then
-          string_buffer << '\\' if escape
-          c = '\\'
-        when src.scan(/\\/) then
-          unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
-            string_buffer << "\\"
-          end
-        else
-          handled = false
-        end
-      else
-        handled = false
-      end # case
+  def rb_compile_error msg
+    msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
+    raise RubyParser::SyntaxError, msg
+  end
-      unless handled then
+  def reset
+    self.lineno        = 1
+    self.brace_nest    = 0
+    self.command_start = true
+    self.comments      = []
+    self.lex_state     = EXPR_NONE
+    self.lex_strterm   = nil
+    self.lpar_beg      = nil
+    self.paren_nest    = 0
+    self.space_seen    = false
+    self.string_nest   = 0
+    self.token         = nil
+    self.string_buffer = []
+    self.old_ss        = nil
+    self.old_lineno    = nil
-        t = Regexp.escape term
-        x = Regexp.escape(paren) if paren && paren != "\000"
-        re = if qwords then
-               /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
-             else
-               /[^#{t}#{x}\#\0\\]+|./
-             end
+    self.cond.reset
+    self.cmdarg.reset
+  end
-        src.scan re
-        c = src.matched
+  def result new_state, token, text, line = self.lineno # :nodoc:
+    new_state = self.arg_state if new_state == :arg_state
+    self.lex_state = new_state if new_state
-        rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
-      end # unless handled
+    [token, [text, line]]
+  end
-      c ||= src.matched
-      string_buffer << c
-    end # until
+  def ruby22_label?
+    ruby22plus? and is_label_possible?
+  end
-    c ||= src.matched
-    c = RubyLexer::EOF if src.eos?
+  def ruby22plus?
+    parser.class.version >= 22
+  end
+  def ruby23plus?
+    parser.class.version >= 23
+  end
-    return c
+  def ruby24minus?
+    parser.class.version <= 24
   end
-  ESCAPES = {
-    "a"    => "\007",
-    "b"    => "\010",
-    "e"    => "\033",
-    "f"    => "\f",
-    "n"    => "\n",
-    "r"    => "\r",
-    "s"    => " ",
-    "t"    => "\t",
-    "v"    => "\13",
-    "\\"   => '\\',
-    "\n"   => "",
-    "C-\?" => 127.chr,
-    "c\?"  => 127.chr,
-  }
+  def ruby27plus?
+    parser.class.version >= 27
+  end
+  def space_vs_beginning space_type, beg_type, fallback
+    if is_space_arg? check(/./m) then
+      warning "`**' interpreted as argument prefix"
+      space_type
+    elsif is_beg? then
+      beg_type
+    else
+      # TODO: warn_balanced("**", "argument prefix");
+      fallback
+    end
+  end
   def unescape s
     r = ESCAPES[s]
     return r if r
-    case s
-    when /^[0-7]{1,3}/ then
-      $&.to_i(8).chr
-    when /^x([0-9a-fA-F]{1,2})/ then
-      $1.to_i(16).chr
-    when /^M-(.)/ then
-      ($1[0].ord | 0x80).chr
-    when /^(C-|c)(.)/ then
-      ($2[0].ord & 0x9f).chr
-    when /^[McCx0-9]/ then
-      rb_compile_error("Invalid escape character syntax")
-    else
-      s
-    end
+    x = case s
+        when /^[0-7]{1,3}/ then
+          ($&.to_i(8) & 0xFF).chr
+        when /^x([0-9a-fA-F]{1,2})/ then
+          $1.to_i(16).chr
+        when /^M-(.)/ then
+          ($1[0].ord | 0x80).chr
+        when /^(C-|c)(.)/ then
+          ($2[0].ord & 0x9f).chr
+        when /^[89a-f]/i then # bad octal or hex... ignore? that's what MRI does :(
+          s
+        when /^[McCx0-9]/ then
+          rb_compile_error("Invalid escape character syntax")
+        when /u(\h{4})/ then
+          [$1.delete("{}").to_i(16)].pack("U")
+        when /u(\h{1,3})/ then
+          rb_compile_error("Invalid escape character syntax")
+        when /u\{(\h+(?:\s+\h+)*)\}/ then
+          $1.split.map { |cp| cp.to_i(16) }.pack("U*")
+        else
+          s
+        end
+    x
   end
   def warning s
     # do nothing for now
   end
-  ##
-  # Returns the next token. Also sets yy_val is needed.
-  #
-  # @return Description of the Returned Value
-  def yylex # 826 lines
-    c = ''
-    self.space_seen = false
-    command_state = false
-    src = self.src
-    self.token = nil
-    self.yacc_value = nil
-    return yylex_string if lex_strterm
-    command_state = self.command_start
-    self.command_start = false
-    last_state = lex_state
-    loop do # START OF CASE
-      if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
-        self.space_seen = true
-        next
-      elsif src.check(/[^a-zA-Z]/) then
-        if src.scan(/\n|#/) then
-          self.lineno = nil
-          c = src.matched
-          if c == '#' then
-            src.pos -= 1
-            while src.scan(/\s*#.*(\n+|\z)/) do
-              @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
-            end
-            return RubyLexer::EOF if src.eos?
-          end
-          # Replace a string of newlines with a single one
-          src.scan(/\n+/)
-          next if in_lex_state?(:expr_beg, :expr_fname, :expr_dot, :expr_class,
-                                :expr_value)
-          if src.scan(/([\ \t\r\f\v]*)\./) then
-            self.space_seen = true unless src[1].empty?
-            src.pos -= 1
-            next unless src.check(/\.\./)
-          end
-          self.command_start = true
-          self.lex_state = :expr_beg
-          return :tNL
-        elsif src.scan(/[\]\)\}]/) then
-          cond.lexpop
-          cmdarg.lexpop
-          tern.lexpop
-          self.lex_state = :expr_end
-          self.yacc_value = src.matched
-          result = {
-            ")" => :tRPAREN,
-            "]" => :tRBRACK,
-            "}" => :tRCURLY
-          }[src.matched]
-          return result
-        elsif src.scan(/\.\.\.?|,|![=~]?/) then
-          self.lex_state = :expr_beg
-          tok = self.yacc_value = src.matched
-          return TOKENS[tok]
-        elsif src.check(/\./) then
-          if src.scan(/\.\d/) then
-            rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
-          elsif src.scan(/\./) then
-            self.lex_state = :expr_dot
-            self.yacc_value = "."
-            return :tDOT
-          end
-        elsif src.scan(/\(/) then
-          result = if ruby18 then
-                     yylex_paren18
-                   else
-                     yylex_paren19
-                   end
-          self.expr_beg_push "("
-          return result
-        elsif src.check(/\=/) then
-          if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
-            self.fix_arg_lex_state
-            tok = self.yacc_value = src.matched
-            return TOKENS[tok]
-          elsif src.scan(/\=begin(?=\s)/) then
-            # @comments << '=' << src.matched
-            @comments << src.matched
-            unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
-              @comments.clear
-              rb_compile_error("embedded document meets end of file")
-            end
-            @comments << src.matched
-            next
-          else
-            raise "you shouldn't be able to get here"
-          end
-        elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
-          self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
-          self.lex_state = :expr_end
-          return :tSTRING
-        elsif src.scan(/\"/) then # FALLBACK
-          self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
-          self.yacc_value = "\""
-          return :tSTRING_BEG
-        elsif src.scan(/\@\@?\w*/) then
-          self.token = src.matched
-          rb_compile_error "`#{token}` is not allowed as a variable name" if
-            token =~ /\@\d/
-          return process_token(command_state)
-        elsif src.scan(/\:\:/) then
-          if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
-            self.lex_state = :expr_beg
-            self.yacc_value = "::"
-            return :tCOLON3
-          end
-          self.lex_state = :expr_dot
-          self.yacc_value = "::"
-          return :tCOLON2
-        elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
-          # scanning shortcut to symbols
-          self.yacc_value = src[1]
-          self.lex_state = :expr_end
-          return :tSYMBOL
-        elsif src.scan(/\:/) then
-          # ?: / then / when
-          if is_end? || src.check(/\s/) then
-            self.lex_state = :expr_beg
-            # TODO warn_balanced(":", "symbol literal");
-            self.yacc_value = ":"
-            return :tCOLON
-          end
-          case
-          when src.scan(/\'/) then
-            self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
-          when src.scan(/\"/) then
-            self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
-          end
-          self.lex_state = :expr_fname
-          self.yacc_value = ":"
-          return :tSYMBEG
-        elsif src.check(/[0-9]/) then
-          return parse_number
-        elsif src.scan(/\[/) then
-          result = src.matched
-          if in_lex_state? :expr_fname, :expr_dot then
-            self.lex_state = :expr_arg
-            case
-            when src.scan(/\]\=/) then
-              self.yacc_value = "[]="
-              return :tASET
-            when src.scan(/\]/) then
-              self.yacc_value = "[]"
-              return :tAREF
-            else
-              rb_compile_error "unexpected '['"
-            end
-          elsif is_beg? then
-            self.tern.push false
-            result = :tLBRACK
-          elsif is_arg? && space_seen then
-            self.tern.push false
-            result = :tLBRACK
-          else
-            result = :tLBRACK2
-          end
-          self.expr_beg_push "["
-          return result
-        elsif src.scan(/\'(\\.|[^\'])*\'/) then
-          self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
-          self.lex_state = :expr_end
-          return :tSTRING
-        elsif src.check(/\|/) then
-          if src.scan(/\|\|\=/) then
-            self.lex_state = :expr_beg
-            self.yacc_value = "||"
-            return :tOP_ASGN
-          elsif src.scan(/\|\|/) then
-            self.lex_state = :expr_beg
-            self.yacc_value = "||"
-            return :tOROP
-          elsif src.scan(/\|\=/) then
-            self.lex_state = :expr_beg
-            self.yacc_value = "|"
-            return :tOP_ASGN
-          elsif src.scan(/\|/) then
-            self.fix_arg_lex_state
-            self.yacc_value = "|"
-            return :tPIPE
-          end
-        elsif src.scan(/\{/) then
-          if defined?(@hack_expects_lambda) && @hack_expects_lambda
-            @hack_expects_lambda = false
-            self.lex_state = :expr_beg
-            return :tLAMBEG
-          end
-          result = if is_arg? || in_lex_state?(:expr_end) then
-                     :tLCURLY      #  block (primary)
-                   elsif in_lex_state?(:expr_endarg) then
-                     :tLBRACE_ARG  #  block (expr)
-                   else
-                     self.tern.push false
-                     :tLBRACE      #  hash
-                   end
-          self.expr_beg_push "{"
-          self.command_start = true unless result == :tLBRACE
-          return result
-        elsif src.scan(/->/) then
-          @hack_expects_lambda = true
-          self.lex_state = :expr_arg
-          return :tLAMBDA
-        elsif src.scan(/[+-]/) then
-          sign = src.matched
-          utype, type = if sign == "+" then
-                          [:tUPLUS, :tPLUS]
-                        else
-                          [:tUMINUS, :tMINUS]
-                        end
-          if in_lex_state? :expr_fname, :expr_dot then
-            self.lex_state = :expr_arg
-            if src.scan(/@/) then
-              self.yacc_value = "#{sign}@"
-              return utype
-            else
-              self.yacc_value = sign
-              return type
-            end
-          end
-          if src.scan(/\=/) then
-            self.lex_state = :expr_beg
-            self.yacc_value = sign
-            return :tOP_ASGN
-          end
-          if (is_beg? ||
-              (is_arg? && space_seen && !src.check(/\s/))) then
-            if is_arg? then
-              arg_ambiguous
-            end
+  def was_label?
+    @was_label = ruby22_label?
+    true
+  end
-            self.lex_state = :expr_beg
-            self.yacc_value = sign
+  class State
+    attr_accessor :n
+    attr_accessor :names
-            if src.check(/\d/) then
-              if utype == :tUPLUS then
-                return self.parse_number
-              else
-                return :tUMINUS_NUM
-              end
-            end
+    # TODO: take a shared hash of strings for inspect/to_s
+    def initialize o, names
+      raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
-            return utype
-          end
-          self.lex_state = :expr_beg
-          self.yacc_value = sign
-          return type
-        elsif src.check(/\*/) then
-          if src.scan(/\*\*=/) then
-            self.lex_state = :expr_beg
-            self.yacc_value = "**"
-            return :tOP_ASGN
-          elsif src.scan(/\*\*/) then
-            self.yacc_value = "**"
-            self.fix_arg_lex_state
-            return :tPOW
-          elsif src.scan(/\*\=/) then
-            self.lex_state = :expr_beg
-            self.yacc_value = "*"
-            return :tOP_ASGN
-          elsif src.scan(/\*/) then
-            result = if is_arg? && space_seen && src.check(/\S/) then
-                       warning("`*' interpreted as argument prefix")
-                       :tSTAR
-                     elsif is_beg? then
-                       :tSTAR
-                     else
-                       :tSTAR2
-                     end
-            self.yacc_value = "*"
-            self.fix_arg_lex_state
-            return result
-          end
-        elsif src.check(/\</) then
-          if src.scan(/\<\=\>/) then
-            self.fix_arg_lex_state
-            self.yacc_value = "<=>"
-            return :tCMP
-          elsif src.scan(/\<\=/) then
-            self.fix_arg_lex_state
-            self.yacc_value = "<="
-            return :tLEQ
-          elsif src.scan(/\<\<\=/) then
-            self.fix_arg_lex_state
-            self.lex_state = :expr_beg
-            self.yacc_value = "\<\<"
-            return :tOP_ASGN
-          elsif src.scan(/\<\</) then
-            if (! in_lex_state?(:expr_end, :expr_dot,
-                                :expr_endarg, :expr_class) &&
-                (!is_arg? || space_seen)) then
-              tok = self.heredoc_identifier
-              return tok if tok
-            end
+      self.n = o
+      self.names = names
+    end
-            self.fix_arg_lex_state
-            self.yacc_value = "\<\<"
-            return :tLSHFT
-          elsif src.scan(/\</) then
-            self.fix_arg_lex_state
-            self.yacc_value = "<"
-            return :tLT
-          end
-        elsif src.check(/\>/) then
-          if src.scan(/\>\=/) then
-            self.fix_arg_lex_state
-            self.yacc_value = ">="
-            return :tGEQ
-          elsif src.scan(/\>\>=/) then
-            self.fix_arg_lex_state
-            self.lex_state = :expr_beg
-            self.yacc_value = ">>"
-            return :tOP_ASGN
-          elsif src.scan(/\>\>/) then
-            self.fix_arg_lex_state
-            self.yacc_value = ">>"
-            return :tRSHFT
-          elsif src.scan(/\>/) then
-            self.fix_arg_lex_state
-            self.yacc_value = ">"
-            return :tGT
-          end
-        elsif src.scan(/\`/) then
-          self.yacc_value = "`"
-          case lex_state
-          when :expr_fname then
-            self.lex_state = :expr_end
-            return :tBACK_REF2
-          when :expr_dot then
-            self.lex_state = if command_state then
-                               :expr_cmdarg
-                             else
-                               :expr_arg
-                             end
-            return :tBACK_REF2
-          end
-          self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
-          return :tXSTRING_BEG
-        elsif src.scan(/\?/) then
-          if is_end? then
-            self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
-            self.tern.push true
-            self.yacc_value = "?"
-            return :tEH
-          end
-          if src.eos? then
-            rb_compile_error "incomplete character syntax"
-          end
-          if src.check(/\s|\v/) then
-            unless is_arg? then
-              c2 = { " " => 's',
-                    "\n" => 'n',
-                    "\t" => 't',
-                    "\v" => 'v',
-                    "\r" => 'r',
-                    "\f" => 'f' }[src.matched]
-              if c2 then
-                warning("invalid character syntax; use ?\\" + c2)
-              end
-            end
+    def == o
+      self.equal?(o) || (o.class == self.class && o.n == self.n)
+    end
-            # ternary
-            self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
-            self.tern.push true
-            self.yacc_value = "?"
-            return :tEH
-          elsif src.check(/\w(?=\w)/) then # ternary, also
-            self.lex_state = :expr_beg
-            self.tern.push true
-            self.yacc_value = "?"
-            return :tEH
-          end
-          c = if src.scan(/\\/) then
-                self.read_escape
-              else
-                src.getch
-              end
-          self.lex_state = :expr_end
-          if version == 18 then
-            self.yacc_value = c[0].ord & 0xff
-            return :tINTEGER
-          else
-            self.yacc_value = c
-            return :tSTRING
-          end
-        elsif src.check(/\&/) then
-          if src.scan(/\&\&\=/) then
-            self.yacc_value = "&&"
-            self.lex_state = :expr_beg
-            return :tOP_ASGN
-          elsif src.scan(/\&\&/) then
-            self.lex_state = :expr_beg
-            self.yacc_value = "&&"
-            return :tANDOP
-          elsif src.scan(/\&\=/) then
-            self.yacc_value = "&"
-            self.lex_state = :expr_beg
-            return :tOP_ASGN
-          elsif src.scan(/&/) then
-            result = if is_arg? && space_seen &&
-                         !src.check(/\s/) then
-                       warning("`&' interpreted as argument prefix")
-                       :tAMPER
-                     elsif in_lex_state? :expr_beg, :expr_mid then
-                       :tAMPER
-                     else
-                       :tAMPER2
-                     end
-            self.fix_arg_lex_state
-            self.yacc_value = "&"
-            return result
-          end
-        elsif src.scan(/\//) then
-          if is_beg? then
-            self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
-            self.yacc_value = "/"
-            return :tREGEXP_BEG
-          end
-          if src.scan(/\=/) then
-            self.yacc_value = "/"
-            self.lex_state = :expr_beg
-            return :tOP_ASGN
-          end
-          if is_arg? && space_seen then
-            unless src.scan(/\s/) then
-              arg_ambiguous
-              self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
-              self.yacc_value = "/"
-              return :tREGEXP_BEG
-            end
-          end
-          self.fix_arg_lex_state
-          self.yacc_value = "/"
-          return :tDIVIDE
-        elsif src.scan(/\^=/) then
-          self.lex_state = :expr_beg
-          self.yacc_value = "^"
-          return :tOP_ASGN
-        elsif src.scan(/\^/) then
-          self.fix_arg_lex_state
-          self.yacc_value = "^"
-          return :tCARET
-        elsif src.scan(/\;/) then
-          self.command_start = true
-          self.lex_state = :expr_beg
-          self.yacc_value = ";"
-          return :tSEMI
-        elsif src.scan(/\~/) then
-          if in_lex_state? :expr_fname, :expr_dot then
-            src.scan(/@/)
-          end
-          self.fix_arg_lex_state
-          self.yacc_value = "~"
-          return :tTILDE
-        elsif src.scan(/\\/) then
-          if src.scan(/\r?\n/) then
-            self.lineno = nil
-            self.space_seen = true
-            next
-          end
-          rb_compile_error "bare backslash only allowed before newline"
-        elsif src.scan(/\%/) then
-          if is_beg? then
-            return parse_quote
-          end
-          if src.scan(/\=/) then
-            self.lex_state = :expr_beg
-            self.yacc_value = "%"
-            return :tOP_ASGN
-          end
-          return parse_quote if is_arg? && space_seen && ! src.check(/\s/)
-          self.fix_arg_lex_state
-          self.yacc_value = "%"
-          return :tPERCENT
-        elsif src.check(/\$/) then
-          if src.scan(/(\$_)(\w+)/) then
-            self.lex_state = :expr_end
-            self.token = src.matched
-            return process_token(command_state)
-          elsif src.scan(/\$_/) then
-            self.lex_state = :expr_end
-            self.token = src.matched
-            self.yacc_value = src.matched
-            return :tGVAR
-          elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
-            self.lex_state = :expr_end
-            self.yacc_value = src.matched
-            return :tGVAR
-          elsif src.scan(/\$([\&\`\'\+])/) then
-            self.lex_state = :expr_end
-            # Explicit reference to these vars as symbols...
-            if last_state == :expr_fname then
-              self.yacc_value = src.matched
-              return :tGVAR
-            else
-              self.yacc_value = src[1].to_sym
-              return :tBACK_REF
-            end
-          elsif src.scan(/\$([1-9]\d*)/) then
-            self.lex_state = :expr_end
-            if last_state == :expr_fname then
-              self.yacc_value = src.matched
-              return :tGVAR
-            else
-              self.yacc_value = src[1].to_i
-              return :tNTH_REF
-            end
-          elsif src.scan(/\$0/) then
-            self.lex_state = :expr_end
-            self.token = src.matched
-            return process_token(command_state)
-          elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
-            self.lex_state = :expr_end
-            self.yacc_value = "$"
-            return "$"
-          elsif src.scan(/\$\w+/)
-            self.lex_state = :expr_end
-            self.token = src.matched
-            return process_token(command_state)
-          end
-        elsif src.check(/\_/) then
-          if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
-            self.lineno = nil
-            return RubyLexer::EOF
-          elsif src.scan(/\_\w*/) then
-            self.token = src.matched
-            return process_token(command_state)
-          end
-        end
-      end # END OF CASE
+    def =~ v
+      (self.n & v.n) != 0
+    end
-      if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
-        return RubyLexer::EOF
-      else # alpha check
-        unless src.check IDENT_RE then
-          rb_compile_error "Invalid char #{src.matched.inspect} in expression"
-        end
-      end
+    def | v
+      raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
+        self.names == v.names
+      self.class.new(self.n | v.n, self.names)
+    end
-      self.token = src.matched if self.src.scan IDENT_RE
+    def inspect
+      return "Value(0)" if n.zero? # HACK?
-      return process_token(command_state)
+      names.map { |v, k| k if self =~ v }.
+        compact.
+        join("|").
+        gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
     end
-  end
-  def yylex_paren18
-    self.command_start = true
-    result = :tLPAREN2
-    if in_lex_state? :expr_beg, :expr_mid then
-      result = :tLPAREN
-    elsif space_seen then
-      if in_lex_state? :expr_cmdarg then
-        result = :tLPAREN_ARG
-      elsif in_lex_state? :expr_arg then
-        self.tern.push false
-        warning "don't put space before argument parentheses"
-      end
-    else
-      self.tern.push false
+    alias to_s inspect
+    module Values
+      expr_names = {}
+      EXPR_NONE    = State.new    0x0, expr_names
+      EXPR_BEG     = State.new    0x1, expr_names
+      EXPR_END     = State.new    0x2, expr_names
+      EXPR_ENDARG  = State.new    0x4, expr_names
+      EXPR_ENDFN   = State.new    0x8, expr_names
+      EXPR_ARG     = State.new   0x10, expr_names
+      EXPR_CMDARG  = State.new   0x20, expr_names
+      EXPR_MID     = State.new   0x40, expr_names
+      EXPR_FNAME   = State.new   0x80, expr_names
+      EXPR_DOT     = State.new  0x100, expr_names
+      EXPR_CLASS   = State.new  0x200, expr_names
+      EXPR_LABEL   = State.new  0x400, expr_names
+      EXPR_LABELED = State.new  0x800, expr_names
+      EXPR_FITEM   = State.new 0x1000, expr_names
+      EXPR_BEG_ANY = EXPR_BEG | EXPR_MID    | EXPR_CLASS
+      EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
+      EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
+      # extra fake lex_state names to make things a bit cleaner
+      EXPR_LAB = EXPR_ARG|EXPR_LABELED
+      EXPR_LIT = EXPR_END|EXPR_ENDARG
+      EXPR_PAR = EXPR_BEG|EXPR_LABEL
+      EXPR_PAD = EXPR_BEG|EXPR_LABELED
+      EXPR_NUM = EXPR_LIT
+      expr_names.merge!(EXPR_NONE    => "EXPR_NONE",
+                        EXPR_BEG     => "EXPR_BEG",
+                        EXPR_END     => "EXPR_END",
+                        EXPR_ENDARG  => "EXPR_ENDARG",
+                        EXPR_ENDFN   => "EXPR_ENDFN",
+                        EXPR_ARG     => "EXPR_ARG",
+                        EXPR_CMDARG  => "EXPR_CMDARG",
+                        EXPR_MID     => "EXPR_MID",
+                        EXPR_FNAME   => "EXPR_FNAME",
+                        EXPR_DOT     => "EXPR_DOT",
+                        EXPR_CLASS   => "EXPR_CLASS",
+                        EXPR_LABEL   => "EXPR_LABEL",
+                        EXPR_LABELED => "EXPR_LABELED",
+                        EXPR_FITEM   => "EXPR_FITEM")
+      # ruby constants for strings
+      str_func_names = {}
+      STR_FUNC_BORING = State.new 0x00,    str_func_names
+      STR_FUNC_ESCAPE = State.new 0x01,    str_func_names
+      STR_FUNC_EXPAND = State.new 0x02,    str_func_names
+      STR_FUNC_REGEXP = State.new 0x04,    str_func_names
+      STR_FUNC_QWORDS = State.new 0x08,    str_func_names
+      STR_FUNC_SYMBOL = State.new 0x10,    str_func_names
+      STR_FUNC_INDENT = State.new 0x20,    str_func_names # <<-HEREDOC
+      STR_FUNC_LABEL  = State.new 0x40,    str_func_names
+      STR_FUNC_LIST   = State.new 0x4000,  str_func_names
+      STR_FUNC_TERM   = State.new 0x8000,  str_func_names
+      STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
+      # TODO: check parser25.y on how they do STR_FUNC_INDENT
+      STR_SQUOTE = STR_FUNC_BORING
+      STR_DQUOTE = STR_FUNC_EXPAND
+      STR_XQUOTE = STR_FUNC_EXPAND
+      STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
+      STR_SWORD  = STR_FUNC_QWORDS | STR_FUNC_LIST
+      STR_DWORD  = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
+      STR_SSYM   = STR_FUNC_SYMBOL
+      STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
+      STR_LABEL  = STR_FUNC_LABEL
+      str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
+                            STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
+                            STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
+                            STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
+                            STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
+                            STR_FUNC_INDENT => "STR_FUNC_INDENT",
+                            STR_FUNC_LABEL  => "STR_FUNC_LABEL",
+                            STR_FUNC_LIST   => "STR_FUNC_LIST",
+                            STR_FUNC_TERM   => "STR_FUNC_TERM",
+                            STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
+                            STR_SQUOTE      => "STR_SQUOTE")
     end
-    result
+    include Values
   end
-  def is_end?
-    in_lex_state? :expr_end, :expr_endarg, :expr_endfn
-  end
+  include State::Values
+end
-  def is_arg?
-    in_lex_state? :expr_arg, :expr_cmdarg
-  end
+class RubyLexer
+  module SSWrapper
+    def string= s
+      ss.string= s
+    end
-  def is_beg?
-    in_lex_state? :expr_beg, :expr_mid, :expr_value, :expr_class
-  end
+    def beginning_of_line?
+      ss.bol?
+    end
-  def is_space_arg? c = "x"
-    is_arg? and space_seen and c !~ /\s/
-  end
+    alias bol? beginning_of_line? # to make .rex file more readable
-  def is_label_possible? command_state
-    (in_lex_state?(:expr_beg) && !command_state) || is_arg?
-  end
+    def check re
+      maybe_pop_stack
-  def yylex_paren19 # TODO: move or remove
-    result =
-      if is_beg? then
-        :tLPAREN
-      elsif is_space_arg? then
-        :tLPAREN_ARG
-      else
-        :tLPAREN2 # plain '(' in parse.y
-      end
+      ss.check re
+    end
-    # paren_nest++; # TODO
+    def end_of_stream?
+      ss.eos?
+    end
-    result
-  end
+    alias eos? end_of_stream?
-  def process_token(command_state)
+    def getch
+      c = ss.getch
+      c = ss.getch if c == "\r" && ss.peek(1) == "\n"
+      c
+    end
-    token << src.matched if token =~ IDENT_RE && src.scan(/[\!\?](?!=)/)
+    def match
+      ss
+    end
-    result = nil
-    last_state = lex_state
+    def matched
+      ss.matched
+    end
-    case token
-    when /^\$/ then
-      self.lex_state, result = :expr_end, :tGVAR
-    when /^@@/ then
-      self.lex_state, result = :expr_end, :tCVAR
-    when /^@/ then
-      self.lex_state, result = :expr_end, :tIVAR
-    else
-      if token =~ /[!?]$/ then
-        result = :tFID
-      else
-        if in_lex_state? :expr_fname then
-          # ident=, not =~ => == or followed by =>
-          # TODO test lexing of a=>b vs a==>b
-          if src.scan(/=(?:(?![~>=])|(?==>))/) then
-            result = :tIDENTIFIER
-            token << src.matched
-          end
-        end
+    def in_heredoc?
+      !!self.old_ss
+    end
-        result ||= if token =~ /^[A-Z]/ then
-                     :tCONSTANT
-                   else
-                     :tIDENTIFIER
-                   end
+    def maybe_pop_stack
+      if ss.eos? && in_heredoc? then
+        self.ss_pop
+        self.lineno_pop
       end
+    end
-      unless ruby18
-        if is_label_possible? command_state then
-          colon = src.scan(/:/)
+    def pos
+      ss.pos
+    end
-          if colon && src.peek(1) != ":" then
-            self.lex_state = :expr_beg
-            self.yacc_value = [token, src.lineno]
-            return :tLABEL
-          end
+    def pos= n
+      ss.pos = n
+    end
-          src.unscan if colon
-        end
-      end
+    def rest
+      ss.rest
+    end
-      unless in_lex_state? :expr_dot then
-        # See if it is a reserved word.
-        keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
-                    RubyParserStuff::Keyword.keyword18 token
-                  else
-                    RubyParserStuff::Keyword.keyword19 token
-                  end
+    def scan re
+      maybe_pop_stack
-        if keyword then
-          state           = lex_state
-          self.lex_state  = keyword.state
-          self.yacc_value = [token, src.lineno]
-          if state == :expr_fname then
-            self.yacc_value = keyword.name
-            return keyword.id0
-          end
-          if keyword.id0 == :kDO then
-            self.command_start = true
-            return :kDO_COND  if cond.is_in_state
-            return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
-            return :kDO_BLOCK if state == :expr_endarg
-            if defined?(@hack_expects_lambda) && @hack_expects_lambda
-              @hack_expects_lambda = false
-              return :kDO_LAMBDA
-            end
-            return :kDO
-          end
+      ss.scan re
+    end
-          return keyword.id0 if state == :expr_beg or state == :expr_value
+    def scanner_class # TODO: design this out of oedipus_lex. or something.
+      RPStringScanner
+    end
-          self.lex_state = :expr_beg if keyword.id0 != keyword.id1
+    def ss_string
+      ss.string
+    end
-          return keyword.id1
-        end
-      end
+    def ss_string= s
+      raise "Probably not"
+      ss.string = s
+    end
-      # TODO:
-      # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
-      self.lex_state =
-        if is_beg? || in_lex_state?(:expr_dot) || is_arg? then
-          if command_state then
-            :expr_cmdarg
-          else
-            :expr_arg
-          end
-        elsif ruby19 && in_lex_state?(:expr_fname) then
-          :expr_endfn
-        else
-          :expr_end
-        end
+    def unscan
+      ss.unscan
+    end
+  end
+  include SSWrapper
+end
+class RubyLexer
+  module SSStackish
+    def lineno_push new_lineno
+      self.old_lineno = self.lineno
+      self.lineno     = new_lineno
     end
-    self.yacc_value = token
+    def lineno_pop
+      self.lineno     = self.old_lineno
+      self.old_lineno = nil
+    end
+    def ss= o
+      raise "Clearing ss while in heredoc!?!" if in_heredoc?
+      @old_ss = nil
+      super
+    end
-    self.lex_state = :expr_end if
-      last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
+    def ss_push new_ss
+      @old_ss = self.ss
+      @ss     = new_ss
+    end
-    return result
+    def ss_pop
+      @ss     = self.old_ss
+      @old_ss = nil
+    end
   end
-  def yylex_string # 23 lines
-    token = if lex_strterm[0] == :heredoc then
-              self.heredoc lex_strterm
-            else
-              self.parse_string lex_strterm
-            end
+  prepend SSStackish
+end
+if ENV["RP_STRTERM_DEBUG"] then
+  class RubyLexer
+    def d o
+      $stderr.puts o.inspect
+    end
+    alias old_lex_strterm= lex_strterm=
+    def lex_strterm= o
+      self.old_lex_strterm= o
+      where = caller.first.split(/:/).first(2).join(":")
+      $stderr.puts
+      d :lex_strterm => [o, where]
+    end
+  end
+end
-    if token == :tSTRING_END || token == :tREGEXP_END then
-      self.lineno      = nil
-      self.lex_strterm = nil
-      self.lex_state   = :expr_end
+require_relative "./ruby_lexer.rex.rb"
+require_relative "./ruby_lexer_strings.rb"
+if ENV["RP_LINENO_DEBUG"] then
+  class RubyLexer
+    def d o
+      $stderr.puts o.inspect
     end
-    return token
+    alias old_lineno= lineno=
+    def lineno= n
+      self.old_lineno= n
+      where = caller.first.split(/:/).first(2).join(":")
+      $stderr.puts
+      d :lineno => [n, where]
+    end
   end
 end