RubyGems - ruby_parser - Versions diffs - 3.12.0 → 3.18.1 - Mend

ruby_parser 3.12.0 → 3.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.autotest +18 -29
data/History.rdoc +283 -0
data/Manifest.txt +12 -4
data/README.rdoc +4 -3
data/Rakefile +189 -51
data/bin/ruby_parse +3 -1
data/bin/ruby_parse_extract_error +19 -36
data/compare/normalize.rb +76 -4
data/debugging.md +190 -0
data/gauntlet.md +106 -0
data/lib/rp_extensions.rb +14 -42
data/lib/rp_stringscanner.rb +20 -51
data/lib/ruby20_parser.rb +4659 -4218
data/lib/ruby20_parser.y +953 -602
data/lib/ruby21_parser.rb +4723 -4308
data/lib/ruby21_parser.y +956 -605
data/lib/ruby22_parser.rb +4762 -4337
data/lib/ruby22_parser.y +960 -612
data/lib/ruby23_parser.rb +4761 -4342
data/lib/ruby23_parser.y +961 -613
data/lib/ruby24_parser.rb +4791 -4341
data/lib/ruby24_parser.y +968 -612
data/lib/ruby25_parser.rb +4791 -4341
data/lib/ruby25_parser.y +968 -612
data/lib/ruby26_parser.rb +7287 -0
data/lib/ruby26_parser.y +2749 -0
data/lib/ruby27_parser.rb +8517 -0
data/lib/ruby27_parser.y +3346 -0
data/lib/ruby30_parser.rb +8751 -0
data/lib/ruby30_parser.y +3472 -0
data/lib/ruby3_parser.yy +3476 -0
data/lib/ruby_lexer.rb +611 -826
data/lib/ruby_lexer.rex +48 -40
data/lib/ruby_lexer.rex.rb +122 -46
data/lib/ruby_lexer_strings.rb +638 -0
data/lib/ruby_parser.rb +38 -34
data/lib/ruby_parser.yy +1710 -704
data/lib/ruby_parser_extras.rb +987 -553
data/test/test_ruby_lexer.rb +1718 -1539
data/test/test_ruby_parser.rb +3957 -2164
data/test/test_ruby_parser_extras.rb +39 -4
data/tools/munge.rb +250 -0
data/tools/ripper.rb +44 -0
data.tar.gz.sig +0 -0
metadata +68 -47
metadata.gz.sig +0 -0
data/lib/ruby18_parser.rb +0 -5793
data/lib/ruby18_parser.y +0 -1908
data/lib/ruby19_parser.rb +0 -6185
data/lib/ruby19_parser.y +0 -2116

data/lib/ruby_lexer.rb CHANGED Viewed

@@ -4,36 +4,9 @@
 $DEBUG = true if ENV["DEBUG"]
 class RubyLexer
   # :stopdoc:
-  HAS_ENC = "".respond_to? :encoding
-  IDENT_CHAR = if HAS_ENC then
-                 /[\w\u0080-\u{10ffff}]/u
-               else
-                 /[\w\x80-\xFF]/n
-               end
   EOF = :eof_haha!
-  # ruby constants for strings (should this be moved somewhere else?)
-  STR_FUNC_BORING = 0x00
-  STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
-  STR_FUNC_EXPAND = 0x02
-  STR_FUNC_REGEXP = 0x04
-  STR_FUNC_QWORDS = 0x08
-  STR_FUNC_SYMBOL = 0x10
-  STR_FUNC_INDENT = 0x20 # <<-HEREDOC
-  STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
-  STR_SQUOTE = STR_FUNC_BORING
-  STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
-  STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
-  STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
-  STR_SSYM   = STR_FUNC_SYMBOL
-  STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
   ESCAPES = {
     "a"    => "\007",
     "b"    => "\010",
@@ -50,10 +23,17 @@ class RubyLexer
     "c\?"  => 127.chr,
   }
+  HAS_ENC = "".respond_to? :encoding
+  BTOKENS = {
+    ".."  => :tBDOT2,
+    "..." => :tBDOT3,
+  }
   TOKENS = {
     "!"   => :tBANG,
     "!="  => :tNEQ,
-    # "!@"  => :tUBANG,
+    "!@"  => :tBANG,
     "!~"  => :tNMATCH,
     ","   => :tCOMMA,
     ".."  => :tDOT2,
@@ -66,27 +46,62 @@ class RubyLexer
     "->"  => :tLAMBDA,
   }
-  TAB_WIDTH = 8
+  PERCENT_END = {
+    "(" => ")",
+    "[" => "]",
+    "{" => "}",
+    "<" => ">",
+  }
-  @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
+  SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
+  @@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
   @@regexp_cache[nil] = nil
+  def regexp_cache
+    @@regexp_cache
+  end
+  if $DEBUG then
+    attr_reader :lex_state
+    def lex_state= o
+      return if @lex_state == o
+      from = ""
+      if ENV["VERBOSE"]
+        path = caller[0]
+        path = caller[1] if path =~ /result/
+        path, line, *_ = path.split(/:/)
+        path.delete_prefix! File.dirname File.dirname __FILE__
+        from = " at .%s:%s" % [path, line]
+      end
+      warn "lex_state: %p -> %p%s" % [lex_state, o, from]
+      @lex_state = o
+    end
+  end
   # :startdoc:
-  attr_accessor :lineno # we're bypassing oedipus' lineno handling.
+  attr_accessor :lex_state unless $DEBUG
   attr_accessor :brace_nest
   attr_accessor :cmdarg
   attr_accessor :command_start
-  attr_accessor :command_state
+  attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
   attr_accessor :last_state
   attr_accessor :cond
-  attr_accessor :extra_lineno
+  attr_accessor :old_ss
+  attr_accessor :old_lineno
+  # these are generated via ruby_lexer.rex: ss, lineno
   ##
   # Additional context surrounding tokens that both the lexer and
   # grammar use.
-  attr_accessor :lex_state
   attr_accessor :lex_strterm
   attr_accessor :lpar_beg
   attr_accessor :paren_nest
@@ -95,50 +110,33 @@ class RubyLexer
   attr_accessor :string_buffer
   attr_accessor :string_nest
-  if $DEBUG then
-    alias lex_state= lex_state=
-    def lex_state=o
-      return if @lex_state == o
-      c = caller.first
-      c = caller[1] if c =~ /\bresult\b/
-      warn "lex_state: %p -> %p from %s" % [@lex_state, o, c.clean_caller]
-      @lex_state = o
-    end
-  end
   # Last token read via next_token.
   attr_accessor :token
-  ##
-  # What version of ruby to parse. 18 and 19 are the only valid values
-  # currently supported.
-  attr_accessor :version
   attr_writer :comments
-  def initialize v = 18
-    self.version = v
-    @lex_state = :expr_none
+  def initialize _ = nil
+    @lex_state = nil # remove one warning under $DEBUG
+    self.lex_state = EXPR_NONE
-    self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
     self.cond   = RubyParserStuff::StackState.new(:cond, $DEBUG)
+    self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
+    self.ss     = RPStringScanner.new ""
     reset
   end
   def arg_ambiguous
-    self.warning("Ambiguous first argument. make sure.")
+    self.warning "Ambiguous first argument. make sure."
   end
   def arg_state
-    in_arg_state? ? :expr_arg : :expr_beg
+    is_after_operator? ? EXPR_ARG : EXPR_BEG
   end
-  def beginning_of_line?
-    ss.bol?
+  def ignore_body_comments
+    @comments.clear
   end
-  alias :bol? :beginning_of_line? # to make .rex file more readable
   def comments # TODO: remove this... maybe comment_string + attr_accessor
     c = @comments.join
@@ -146,184 +144,26 @@ class RubyLexer
     c
   end
-  def end_of_stream?
-    ss.eos?
+  def debug n
+    raise "debug #{n}"
   end
   def expr_dot?
-    lex_state == :expr_dot
+    lex_state =~ EXPR_DOT
   end
-  def expr_fname?
-    lex_state == :expr_fname
+  def expr_fname? # REFACTOR
+    lex_state =~ EXPR_FNAME
   end
   def expr_result token, text
     cond.push false
     cmdarg.push false
-    result :expr_beg, token, text
-  end
-  def heredoc here # TODO: rewrite / remove
-    _, eos, func, last_line = here
-    indent         = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
-    content_indent = (func & STR_FUNC_ICNTNT) != 0
-    expand         = (func & STR_FUNC_EXPAND) != 0
-    eos_re         = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
-    err_msg        = "can't match #{eos_re.inspect} anywhere in "
-    rb_compile_error err_msg if end_of_stream?
-    if beginning_of_line? && scan(eos_re) then
-      self.lineno += 1
-      ss.unread_many last_line # TODO: figure out how to remove this
-      return :tSTRING_END, eos
-    end
-    self.string_buffer = []
-    if expand then
-      case
-      when scan(/#[$@]/) then
-        ss.pos -= 1 # FIX omg stupid
-        return :tSTRING_DVAR, matched
-      when scan(/#[{]/) then
-        return :tSTRING_DBEG, matched
-      when scan(/#/) then
-        string_buffer << '#'
-      end
-      begin
-        c = tokadd_string func, "\n", nil
-        rb_compile_error err_msg if
-          c == RubyLexer::EOF
-        if c != "\n" then
-          return :tSTRING_CONTENT, string_buffer.join.delete("\r")
-        else
-          string_buffer << scan(/\n/)
-        end
-        rb_compile_error err_msg if end_of_stream?
-      end until check(eos_re)
-    else
-      until check(eos_re) do
-        string_buffer << scan(/.*(\n|\z)/)
-        rb_compile_error err_msg if end_of_stream?
-      end
-    end
-    self.lex_strterm = [:heredoc, eos, func, last_line]
-    string_content = string_buffer.join.delete("\r")
-    string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
-    return :tSTRING_CONTENT, string_content
+    result EXPR_BEG, token, text
   end
-  def heredoc_dedent(string_content)
-    width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
-      heredoc_whitespace_indent_size whitespace
-    end.min || 0
-    string_content.split("\n", -1).map do |line|
-      dedent_string line, width
-    end.join "\n"
-  end
-  def dedent_string(string, width)
-    characters_skipped = 0
-    indentation_skipped = 0
-    string.chars.each do |char|
-      break if indentation_skipped >= width
-      if char == ' '
-        characters_skipped += 1
-        indentation_skipped += 1
-      elsif char == "\t"
-        proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
-        break if (proposed > width)
-        characters_skipped += 1
-        indentation_skipped = proposed
-      end
-    end
-    string[characters_skipped..-1]
-  end
-  def heredoc_whitespace_indent_size(whitespace)
-    whitespace.chars.inject 0 do |size, char|
-      if char == "\t"
-        size + TAB_WIDTH
-      else
-        size + 1
-      end
-    end
-  end
-  def heredoc_identifier # TODO: remove / rewrite
-    term, func = nil, STR_FUNC_BORING
-    self.string_buffer = []
-    heredoc_indent_mods = '-'
-    heredoc_indent_mods += '\~' if ruby23plus?
-    case
-    when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
-      term = ss[2]
-      func |= STR_FUNC_INDENT unless ss[1].empty?
-      func |= STR_FUNC_ICNTNT if ss[1] == '~'
-      func |= case term
-              when "\'" then
-                STR_SQUOTE
-              when '"' then
-                STR_DQUOTE
-              else
-                STR_XQUOTE
-              end
-      string_buffer << ss[3]
-    when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
-      rb_compile_error "unterminated here document identifier"
-    when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
-      term = '"'
-      func |= STR_DQUOTE
-      unless ss[1].empty? then
-        func |= STR_FUNC_INDENT
-        func |= STR_FUNC_ICNTNT if ss[1] == '~'
-      end
-      string_buffer << ss[2]
-    else
-      return nil
-    end
-    if scan(/.*\n/) then
-      # TODO: think about storing off the char range instead
-      line = matched
-    else
-      line = nil
-    end
-    self.lex_strterm = [:heredoc, string_buffer.join, func, line]
-    if term == '`' then
-      result nil, :tXSTRING_BEG, "`"
-    else
-      result nil, :tSTRING_BEG, "\""
-    end
-  end
-  def in_fname?
-    in_lex_state? :expr_fname
-  end
-  def in_arg_state? # TODO: rename is_after_operator?
-    in_lex_state? :expr_fname, :expr_dot
-  end
-  def in_lex_state?(*states)
-    states.include? lex_state
+  def in_fname? # REFACTOR
+    lex_state =~ EXPR_FNAME
   end
   def int_with_base base
@@ -331,35 +171,35 @@ class RubyLexer
     text = matched
     case
-    when text.end_with?('ri')
-      return result(:expr_end, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
-    when text.end_with?('r')
-      return result(:expr_end, :tRATIONAL, Rational(text.chop.to_i(base)))
-    when text.end_with?('i')
-      return result(:expr_end, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
+    when text.end_with?("ri")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
+    when text.end_with?("r")
+      result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
+    when text.end_with?("i")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
     else
-      return result(:expr_end, :tINTEGER, text.to_i(base))
+      result EXPR_NUM, :tINTEGER, text.to_i(base)
     end
   end
+  def is_after_operator?
+    lex_state =~ EXPR_FNAME|EXPR_DOT
+  end
   def is_arg?
-    in_lex_state? :expr_arg, :expr_cmdarg
+    lex_state =~ EXPR_ARG_ANY
   end
   def is_beg?
-    in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class, :expr_labelarg
+    lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
   end
   def is_end?
-    in_lex_state? :expr_end, :expr_endarg, :expr_endfn
-  end
-  def ruby22_label?
-    ruby22plus? and is_label_possible?
+    lex_state =~ EXPR_END_ANY
   end
   def is_label_possible?
-    (in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
+    (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
   end
   def is_label_suffix?
@@ -370,31 +210,51 @@ class RubyLexer
     is_arg? and space_seen and c !~ /\s/
   end
-  def matched
-    ss.matched
+  def lambda_beginning?
+    lpar_beg && lpar_beg == paren_nest
+  end
+  def is_local_id id
+    # maybe just make this false for now
+    self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
+  end
+  def lvar_defined? id
+    # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
+    self.parser.env[id.to_sym] == :lvar
   end
   def not_end?
     not is_end?
   end
+  def possibly_escape_string text, check
+    content = match[1]
+    if text =~ check then
+      content.gsub(ESC) { unescape $1 }
+    else
+      content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
+    end
+  end
   def process_amper text
     token = if is_arg? && space_seen && !check(/\s/) then
                warning("`&' interpreted as argument prefix")
                :tAMPER
-             elsif in_lex_state? :expr_beg, :expr_mid then
+             elsif lex_state =~ EXPR_BEG|EXPR_MID then
                :tAMPER
              else
                :tAMPER2
              end
-    return result(:arg_state, token, "&")
+    result :arg_state, token, "&"
   end
   def process_backref text
-    token = ss[1].to_sym
+    token = match[1].to_sym
     # TODO: can't do lineno hack w/ symbol
-    result :expr_end, :tBACK_REF, token
+    result EXPR_END, :tBACK_REF, token
   end
   def process_begin text
@@ -406,220 +266,256 @@ class RubyLexer
     end
     @comments << matched
-    self.lineno += matched.count("\n")
+    self.lineno += matched.count("\n") # HACK?
     nil # TODO
   end
-  def process_bracing text
-    cond.lexpop
-    cmdarg.lexpop
+  def process_brace_close text
     case matched
     when "}" then
       self.brace_nest -= 1
-      self.lex_state   = :expr_endarg
+      return :tSTRING_DEND, matched if brace_nest < 0
+    end
-      # TODO
-      # if (c == '}') {
-      #     if (!brace_nest--) c = tSTRING_DEND;
-      # }
+    # matching compare/parse26.y:8099
+    cond.pop
+    cmdarg.pop
+    case matched
+    when "}" then
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
       return :tRCURLY, matched
     when "]" then
       self.paren_nest -= 1
-      self.lex_state   = :expr_endarg
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
       return :tRBRACK, matched
     when ")" then
       self.paren_nest -= 1
-      self.lex_state   = :expr_endfn
+      self.lex_state   = EXPR_ENDFN
       return :tRPAREN, matched
     else
       raise "Unknown bracing: #{matched.inspect}"
     end
   end
+  def process_brace_open text
+    # matching compare/parse23.y:8694
+    self.brace_nest += 1
+    if lambda_beginning? then
+      self.lpar_beg = nil
+      self.paren_nest -= 1 # close arg list when lambda opens body
+      return expr_result(:tLAMBEG, "{")
+    end
+    token = case
+            when lex_state =~ EXPR_LABELED then
+              :tLBRACE     # hash
+            when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
+              :tLCURLY     # block (primary) "{" in parse.y
+            when lex_state =~ EXPR_ENDARG then
+              :tLBRACE_ARG # block (expr)
+            else
+              :tLBRACE     # hash
+            end
+    state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
+    self.command_start = true if token != :tLBRACE
+    cond.push false
+    cmdarg.push false
+    result state, token, text
+  end
   def process_colon1 text
     # ?: / then / when
     if is_end? || check(/\s/) then
-      return result :expr_beg, :tCOLON, text
+      return result EXPR_BEG, :tCOLON, text
     end
     case
     when scan(/\'/) then
-      string STR_SSYM
+      string STR_SSYM, matched
     when scan(/\"/) then
-      string STR_DSYM
+      string STR_DSYM, matched
     end
-    result :expr_fname, :tSYMBEG, text
+    result EXPR_FNAME, :tSYMBEG, text
   end
   def process_colon2 text
-    if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
-      result :expr_beg, :tCOLON3, text
+    if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
+      result EXPR_BEG, :tCOLON3, text
     else
-      result :expr_dot, :tCOLON2, text
+      result EXPR_DOT, :tCOLON2, text
     end
   end
-  def process_curly_brace text
-    self.brace_nest += 1
-    if lpar_beg && lpar_beg == paren_nest then
-      self.lpar_beg = nil
-      self.paren_nest -= 1
-      return expr_result(:tLAMBEG, "{")
-    end
+  def process_dots text
+    tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
-    token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
-               :tLCURLY      #  block (primary)
-             elsif in_lex_state?(:expr_endarg) then
-               :tLBRACE_ARG  #  block (expr)
-             else
-               :tLBRACE      #  hash
-             end
-    self.command_start = true unless token == :tLBRACE
-    return expr_result(token, "{")
+    result EXPR_BEG, tokens[text], text
   end
   def process_float text
     rb_compile_error "Invalid numeric format" if text =~ /__/
     case
-    when text.end_with?('ri')
-      return result(:expr_end, :tIMAGINARY, Complex(0, Rational(text.chop.chop)))
-    when text.end_with?('r')
-      return result(:expr_end, :tRATIONAL, Rational(text.chop))
-    when text.end_with?('i')
-      return result(:expr_end, :tIMAGINARY, Complex(0, text.chop.to_f))
+    when text.end_with?("ri")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
+    when text.end_with?("i")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
+    when text.end_with?("r")
+      result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
     else
-      return result(:expr_end, :tFLOAT, text.to_f)
+      result EXPR_NUM, :tFLOAT, text.to_f
     end
   end
   def process_gvar text
-    text.lineno = self.lineno
-    result(:expr_end, :tGVAR, text)
+    if parser.class.version > 20 && text == "$-" then
+      rb_compile_error "unexpected $undefined"
+    end
+    result EXPR_END, :tGVAR, text
   end
   def process_gvar_oddity text
-    return result :expr_end, "$", "$" if text == "$" # TODO: wtf is this?
     rb_compile_error "#{text.inspect} is not allowed as a global variable name"
   end
   def process_ivar text
     tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
-    text.lineno = self.lineno
-    return result(:expr_end, tok_id, text)
+    result EXPR_END, tok_id, text
+  end
+  def process_label text
+    symbol = possibly_escape_string text, /^\"/
+    result EXPR_LAB, :tLABEL, symbol
+  end
+  def process_label_or_string text
+    if @was_label && text =~ /:\Z/ then
+      @was_label = nil
+      return process_label text
+    elsif text =~ /:\Z/ then
+      self.pos -= 1 # put back ":"
+      text = text[0..-2]
+    end
+    orig_line = lineno
+    str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
+    self.lineno += str.count("\n")
+    result EXPR_END, :tSTRING, str, orig_line
   end
   def process_lchevron text
-    if (!in_lex_state?(:expr_dot, :expr_class) &&
+    if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
         !is_end? &&
-        (!is_arg? || space_seen)) then
+        (!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
       tok = self.heredoc_identifier
       return tok if tok
     end
-    return result(:arg_state, :tLSHFT, "\<\<")
+    if is_after_operator? then
+      self.lex_state = EXPR_ARG
+    else
+      self.command_start = true if lex_state =~ EXPR_CLASS
+      self.lex_state = EXPR_BEG
+    end
+    result lex_state, :tLSHFT, "\<\<"
   end
-  def process_newline_or_comment text
+  def process_newline_or_comment text    # ../compare/parse30.y:9126 ish
     c = matched
-    hit = false
-    if c == '#' then
-      ss.pos -= 1
+    if c == "#" then
+      self.pos -= 1
       while scan(/\s*\#.*(\n+|\z)/) do
-        hit = true
-        self.lineno += matched.lines.to_a.size
-        @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
+        self.lineno += matched.count "\n"
+        @comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
       end
       return nil if end_of_stream?
     end
-    self.lineno += 1 unless hit
-    # Replace a string of newlines with a single one
-    self.lineno += matched.lines.to_a.size if scan(/\n+/)
-    return if in_lex_state?(:expr_beg, :expr_value, :expr_class,
-                            :expr_fname, :expr_dot)
+    c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
+         lex_state !~ EXPR_LABELED)
+    if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
+      # ignore if !fallthrough?
+      if !c && parser.in_kwarg then
+        # normal newline
+        self.command_start = true
+        return result EXPR_BEG, :tNL, nil
+      else
+        maybe_pop_stack
+        return # goto retry
+      end
+    end
-    if scan(/([\ \t\r\f\v]*)(\.|&)/) then
-      self.space_seen = true unless ss[1].empty?
+    if scan(/[\ \t\r\f\v]+/) then
+      self.space_seen = true
+    end
-      ss.pos -= 1
-      return unless check(/\.\./)
+    if check(/#/) then
+      return # goto retry
+    elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
+      return # goto retry
     end
     self.command_start = true
-    return result(:expr_beg, :tNL, nil)
+    result EXPR_BEG, :tNL, nil
   end
   def process_nthref text
     # TODO: can't do lineno hack w/ number
-    result :expr_end, :tNTH_REF, ss[1].to_i
+    result EXPR_END, :tNTH_REF, match[1].to_i
   end
   def process_paren text
-    token = if ruby18 then
-              process_paren18
+    token = if is_beg? then
+              :tLPAREN
+            elsif !space_seen then
+              # foo( ... ) => method call, no ambiguity
+              :tLPAREN2
+            elsif is_space_arg? then
+              :tLPAREN_ARG
+            elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
+              # TODO:
+              # warn("parentheses after method name is interpreted as " \
+              #      "an argument list, not a decomposed argument")
+              :tLPAREN2
             else
-              process_paren19
+              :tLPAREN2 # plain "(" in parse.y
             end
     self.paren_nest += 1
-    # TODO: add :expr_label to :expr_beg (set in expr_result below)
-    return expr_result(token, "(")
-  end
-  def process_paren18
-    self.command_start = true
-    token = :tLPAREN2
-    if in_lex_state? :expr_beg, :expr_mid then
-      token = :tLPAREN
-    elsif space_seen then
-      if in_lex_state? :expr_cmdarg then
-        token = :tLPAREN_ARG
-      elsif in_lex_state? :expr_arg then
-        warning "don't put space before argument parentheses"
-      end
-    else
-      # not a ternary -- do nothing?
-    end
-    token
+    cond.push false
+    cmdarg.push false
+    result EXPR_PAR, token, text
   end
-  def process_paren19
-    if is_beg? then
-      :tLPAREN
-    elsif is_space_arg? then
-      :tLPAREN_ARG
+  def process_percent text
+    case
+    when is_beg? then
+      process_percent_quote
+    when scan(/\=/)
+      result EXPR_BEG, :tOP_ASGN, "%"
+    when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
+      process_percent_quote
     else
-      :tLPAREN2 # plain '(' in parse.y
+      result :arg_state, :tPERCENT, "%"
     end
   end
-  def process_percent text
-    return parse_quote if is_beg?
-    return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
-    return parse_quote if is_arg? && space_seen && ! check(/\s/)
-    return result(:arg_state, :tPERCENT, "%")
-  end
   def process_plus_minus text
     sign = matched
     utype, type = if sign == "+" then
@@ -628,34 +524,33 @@ class RubyLexer
                     [:tUMINUS, :tMINUS]
                   end
-    if in_arg_state? then
+    if is_after_operator? then
       if scan(/@/) then
-        return result(:expr_arg, utype, "#{sign}@")
+        return result(EXPR_ARG, utype, "#{sign}@")
       else
-        return result(:expr_arg, type, sign)
+        return result(EXPR_ARG, type, sign)
       end
     end
-    return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
+    return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
-    if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
+    if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
       arg_ambiguous if is_arg?
       if check(/\d/) then
         return nil if utype == :tUPLUS
-        return result(:expr_beg, :tUMINUS_NUM, sign)
+        return result EXPR_BEG, :tUMINUS_NUM, sign
       end
-      return result(:expr_beg, utype, sign)
+      return result EXPR_BEG, utype, sign
     end
-    return result(:expr_beg, type, sign)
+    result EXPR_BEG, type, sign
   end
   def process_questionmark text
     if is_end? then
-      state = ruby18 ? :expr_beg : :expr_value # HACK?
-      return result(state, :tEH, "?")
+      return result EXPR_BEG, :tEH, "?"
     end
     if end_of_stream? then
@@ -664,12 +559,12 @@ class RubyLexer
     if check(/\s|\v/) then
       unless is_arg? then
-        c2 = { " " => 's',
-              "\n" => 'n',
-              "\t" => 't',
-              "\v" => 'v',
-              "\r" => 'r',
-              "\f" => 'f' }[matched]
+        c2 = { " " => "s",
+              "\n" => "n",
+              "\t" => "t",
+              "\v" => "v",
+              "\r" => "r",
+              "\f" => "f" }[matched]
         if c2 then
           warning("invalid character syntax; use ?\\" + c2)
@@ -677,34 +572,40 @@ class RubyLexer
       end
       # ternary
-      state = ruby18 ? :expr_beg : :expr_value # HACK?
-      return result(state, :tEH, "?")
+      return result EXPR_BEG, :tEH, "?"
     elsif check(/\w(?=\w)/) then # ternary, also
-      return result(:expr_beg, :tEH, "?")
+      return result EXPR_BEG, :tEH, "?"
     end
     c = if scan(/\\/) then
           self.read_escape
         else
-          ss.getch
+          getch
         end
-    if version == 18 then
-      return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
-    else
-      return result(:expr_end, :tSTRING, c)
-    end
+    result EXPR_END, :tSTRING, c
+  end
+  def process_simple_string text
+    orig_line = lineno
+    self.lineno += text.count("\n")
+    str = text[1..-2]
+      .gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
+    str = str.b unless str.valid_encoding?
+    result EXPR_END, :tSTRING, str, orig_line
   end
   def process_slash text
     if is_beg? then
-      string STR_REGEXP
+      string STR_REGEXP, matched
-      return result(nil, :tREGEXP_BEG, "/")
+      return result nil, :tREGEXP_BEG, "/"
     end
     if scan(/\=/) then
-      return result(:expr_beg, :tOP_ASGN, "/")
+      return result(EXPR_BEG, :tOP_ASGN, "/")
     end
     if is_arg? && space_seen then
@@ -715,7 +616,7 @@ class RubyLexer
       end
     end
-    return result(:arg_state, :tDIVIDE, "/")
+    result :arg_state, :tDIVIDE, "/"
   end
   def process_square_bracket text
@@ -723,72 +624,40 @@ class RubyLexer
     token = nil
-    if in_arg_state? then
+    if is_after_operator? then
       case
       when scan(/\]\=/) then
         self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
-        return result(:expr_arg, :tASET, "[]=")
+        return result EXPR_ARG, :tASET, "[]="
       when scan(/\]/) then
         self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
-        return result(:expr_arg, :tAREF, "[]")
+        return result EXPR_ARG, :tAREF, "[]"
       else
         rb_compile_error "unexpected '['"
       end
     elsif is_beg? then
       token = :tLBRACK
-    elsif is_arg? && space_seen then
+    elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
       token = :tLBRACK
     else
       token = :tLBRACK2
     end
-    return expr_result(token, "[")
-  end
-  def possibly_escape_string text, check
-    content = match[1]
-    if text =~ check then
-      content.gsub(ESC) { unescape $1 }
-    else
-      content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
-    end
+    cond.push false
+    cmdarg.push false
+    result EXPR_PAR, token, text
   end
   def process_symbol text
-    symbol = possibly_escape_string text, /^:"/
-    rb_compile_error "symbol cannot contain '\\0'" if
-      ruby18 && symbol =~ /\0/
-    return result(:expr_end, :tSYMBOL, symbol)
-  end
-  def was_label?
-    @was_label = ruby22_label?
-    true
-  end
+    symbol = possibly_escape_string text, /^:\"/ # stupid emacs
-  def process_label_or_string text
-    if @was_label && text =~ /:\Z/ then
-      @was_label = nil
-      return process_label text
-    elsif text =~ /:\Z/ then
-      ss.pos -= 1 # put back ":"
-      text = text[0..-2]
-    end
-    result :expr_end, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
-  end
-  def process_label text
-    symbol = possibly_escape_string text, /^"/
-    result(:expr_labelarg, :tLABEL, [symbol, self.lineno])
+    result EXPR_LIT, :tSYMBOL, symbol
   end
   def process_token text
-    # TODO: make this always return [token, lineno]
+    # matching: parse_ident in compare/parse23.y:7989
+    # FIX: remove: self.last_state = lex_state
     token = self.token = text
     token << matched if scan(/[\!\?](?!=)/)
@@ -796,7 +665,7 @@ class RubyLexer
       case
       when token =~ /[!?]$/ then
         :tFID
-      when in_lex_state?(:expr_fname) && scan(/=(?:(?![~>=])|(?==>))/) then
+      when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
         # ident=, not =~ => == or followed by =>
         # TODO test lexing of a=>b vs a==>b
         token << matched
@@ -807,216 +676,133 @@ class RubyLexer
         :tIDENTIFIER
       end
-    if !ruby18 and is_label_possible? and is_label_suffix? then
+    if is_label_possible? and is_label_suffix? then
       scan(/:/)
-      return result(:expr_labelarg, :tLABEL, [token, self.lineno])
+      return result EXPR_LAB, :tLABEL, token
     end
-    unless in_lex_state? :expr_dot then
+    # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
+    if lex_state !~ EXPR_DOT then
       # See if it is a reserved word.
-      keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
-                  RubyParserStuff::Keyword.keyword18 token
-                else
-                  RubyParserStuff::Keyword.keyword19 token
-                end
+      keyword = RubyParserStuff::Keyword.keyword token
       return process_token_keyword keyword if keyword
-    end # unless in_lex_state? :expr_dot
-    # TODO:
-    # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
+    end
-    state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
-              command_state ? :expr_cmdarg : :expr_arg
-            elsif not ruby18 and in_lex_state? :expr_fname then
-              :expr_endfn
+    # matching: compare/parse30.y:9039
+    state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
+              cmd_state ? EXPR_CMDARG : EXPR_ARG
+            elsif lex_state =~ EXPR_FNAME then
+              EXPR_ENDFN
             else
-              :expr_end
+              EXPR_END
             end
+    self.lex_state = state
-    if not [:expr_dot, :expr_fname].include? last_state and
-        self.parser.env[token.to_sym] == :lvar then
-      state = :expr_end
-    end
+    tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
-    token.lineno = self.lineno # yes, on a string. I know... I know...
+    if last_state !~ EXPR_DOT|EXPR_FNAME and
+        (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
+        lvar_defined?(token) then
+      state = EXPR_END|EXPR_LABEL
+    end
-    return result(state, tok_id, token)
+    result state, tok_id, token
   end
   def process_token_keyword keyword
-    state = keyword.state
+    # matching MIDDLE of parse_ident in compare/parse23.y:8046
+    state = lex_state
-    value = [token, self.lineno]
+    return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
-    self.command_start = true if state == :expr_beg and lex_state != :expr_fname
+    self.lex_state = keyword.state
+    self.command_start = true if lex_state =~ EXPR_BEG
     case
-    when lex_state == :expr_fname then
-      result(state, keyword.id0, keyword.name)
-    when keyword.id0 == :kDO then
+    when keyword.id0 == :kDO then # parse26.y line 7591
       case
-      when lpar_beg && lpar_beg == paren_nest then
-        self.lpar_beg = nil
-        self.paren_nest -= 1
-        expr_result(:kDO_LAMBDA, value)
+      when lambda_beginning? then
+        self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
+        self.paren_nest -= 1 # TODO: question this?
+        result lex_state, :kDO_LAMBDA, token
       when cond.is_in_state then
-        result(state, :kDO_COND, value)
-      when cmdarg.is_in_state && lex_state != :expr_cmdarg then
-        result(state, :kDO_BLOCK, value)
-      when in_lex_state?(:expr_beg, :expr_endarg) then
-        result(state, :kDO_BLOCK, value)
-      when lex_state == :expr_end # eg: a -> do end do end
-        result(state, :kDO_BLOCK, value)
+        result lex_state, :kDO_COND, token
+      when cmdarg.is_in_state && state != EXPR_CMDARG then
+        result lex_state, :kDO_BLOCK, token
       else
-        result(state, :kDO, value)
+        result lex_state, :kDO, token
       end
-    when in_lex_state?(:expr_beg, :expr_value, :expr_labelarg) then
-      result(state, keyword.id0, value)
+    when state =~ EXPR_PAD then
+      result lex_state, keyword.id0, token
     when keyword.id0 != keyword.id1 then
-      result(:expr_beg, keyword.id1, value)
+      result EXPR_PAR, keyword.id1, token
     else
-      result(state, keyword.id1, value)
+      result lex_state, keyword.id1, token
     end
   end
   def process_underscore text
-    ss.unscan # put back "_"
+    self.unscan # put back "_"
     if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
-      return [RubyLexer::EOF, RubyLexer::EOF]
-    elsif scan(/\_\w*/) then
-      return process_token matched
+      ss.terminate
+      [RubyLexer::EOF, RubyLexer::EOF]
+    elsif scan(/#{IDENT_CHAR}+/) then
+      process_token matched
     end
   end
   def rb_compile_error msg
-    msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
+    msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
     raise RubyParser::SyntaxError, msg
   end
-  def read_escape # TODO: remove / rewrite
-    case
-    when scan(/\\/) then                  # Backslash
-      '\\'
-    when scan(/n/) then                   # newline
-      self.extra_lineno -= 1
-      "\n"
-    when scan(/t/) then                   # horizontal tab
-      "\t"
-    when scan(/r/) then                   # carriage-return
-      "\r"
-    when scan(/f/) then                   # form-feed
-      "\f"
-    when scan(/v/) then                   # vertical tab
-      "\13"
-    when scan(/a/) then                   # alarm(bell)
-      "\007"
-    when scan(/e/) then                   # escape
-      "\033"
-    when scan(/b/) then                   # backspace
-      "\010"
-    when scan(/s/) then                   # space
-      " "
-    when scan(/[0-7]{1,3}/) then          # octal constant
-      (matched.to_i(8) & 0xFF).chr
-    when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
-      ss[1].to_i(16).chr
-    when check(/M-\\[\\MCc]/) then
-      scan(/M-\\/) # eat it
-      c = self.read_escape
-      c[0] = (c[0].ord | 0x80).chr
-      c
-    when scan(/M-(.)/) then
-      c = ss[1]
-      c[0] = (c[0].ord | 0x80).chr
-      c
-    when check(/(C-|c)\\[\\MCc]/) then
-      scan(/(C-|c)\\/) # eat it
-      c = self.read_escape
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when scan(/C-\?|c\?/) then
-      127.chr
-    when scan(/(C-|c)(.)/) then
-      c = ss[2]
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
-      matched
-    when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
-      [ss[1].delete("{}").to_i(16)].pack("U")
-    when scan(/[McCx0-9]/) || end_of_stream? then
-      rb_compile_error("Invalid escape character syntax")
-    else
-      ss.getch
-    end.dup
-  end
-  def regx_options # TODO: rewrite / remove
-    good, bad = [], []
-    if scan(/[a-z]+/) then
-      good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
-    end
-    unless bad.empty? then
-      rb_compile_error("unknown regexp option%s - %s" %
-                       [(bad.size > 1 ? "s" : ""), bad.join.inspect])
-    end
-    return good.join
-  end
   def reset
+    self.lineno        = 1
     self.brace_nest    = 0
     self.command_start = true
     self.comments      = []
-    self.lex_state     = :expr_none
+    self.lex_state     = EXPR_NONE
     self.lex_strterm   = nil
-    self.lineno        = 1
     self.lpar_beg      = nil
     self.paren_nest    = 0
     self.space_seen    = false
     self.string_nest   = 0
     self.token         = nil
-    self.extra_lineno  = 0
+    self.string_buffer = []
+    self.old_ss        = nil
+    self.old_lineno    = nil
-    self.cmdarg.reset
     self.cond.reset
+    self.cmdarg.reset
   end
-  def result lex_state, token, text # :nodoc:
-    lex_state = self.arg_state if lex_state == :arg_state
-    self.lex_state = lex_state if lex_state
-    [token, text]
-  end
+  def result new_state, token, text, line = self.lineno # :nodoc:
+    new_state = self.arg_state if new_state == :arg_state
+    self.lex_state = new_state if new_state
-  def ruby18
-    RubyParser::V18 === parser
+    [token, [text, line]]
   end
-  def scan re
-    ss.scan re
+  def ruby22_label?
+    ruby22plus? and is_label_possible?
   end
-  def check re
-    ss.check re
+  def ruby22plus?
+    parser.class.version >= 22
   end
-  def eat_whitespace
-    r = scan(/\s+/)
-    self.extra_lineno += r.count("\n") if r
-    r
+  def ruby23plus?
+    parser.class.version >= 23
   end
-  def fixup_lineno extra = 0
-    self.lineno += self.extra_lineno + extra
-    self.extra_lineno = 0
+  def ruby24minus?
+    parser.class.version <= 24
   end
-  def scanner_class # TODO: design this out of oedipus_lex. or something.
-    RPStringScanner
+  def ruby27plus?
+    parser.class.version >= 27
   end
   def space_vs_beginning space_type, beg_type, fallback
@@ -1031,139 +817,9 @@ class RubyLexer
     end
   end
-  def string type, beg = matched, nnd = "\0"
-    self.lex_strterm = [:strterm, type, beg, nnd]
-  end
-  # TODO: consider
-  # def src= src
-  #   raise "bad src: #{src.inspect}" unless String === src
-  #   @src = RPStringScanner.new(src)
-  # end
-  def tokadd_escape term # TODO: rewrite / remove
-    case
-    when scan(/\\\n/) then
-      # just ignore
-    when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
-      self.string_buffer << matched
-    when scan(/\\([MC]-|c)(?=\\)/) then
-      self.string_buffer << matched
-      self.tokadd_escape term
-    when scan(/\\([MC]-|c)(.)/) then
-      self.string_buffer << matched
-    when scan(/\\[McCx]/) then
-      rb_compile_error "Invalid escape character syntax"
-    when scan(/\\(.)/m) then
-      chr = ss[1]
-      prev = self.string_buffer.last
-      if term == chr && prev && prev.end_with?("(?") then
-        self.string_buffer << chr
-      else
-        self.string_buffer << matched
-      end
-    else
-      rb_compile_error "Invalid escape character syntax"
-    end
-  end
-  def tokadd_string(func, term, paren) # TODO: rewrite / remove
-    qwords = (func & STR_FUNC_QWORDS) != 0
-    escape = (func & STR_FUNC_ESCAPE) != 0
-    expand = (func & STR_FUNC_EXPAND) != 0
-    regexp = (func & STR_FUNC_REGEXP) != 0
-    symbol = (func & STR_FUNC_SYMBOL) != 0
-    paren_re = @@regexp_cache[paren]
-    term_re  = @@regexp_cache[term]
-    until end_of_stream? do
-      c = nil
-      handled = true
-      case
-      when paren_re && scan(paren_re) then
-        self.string_nest += 1
-      when scan(term_re) then
-        if self.string_nest == 0 then
-          ss.pos -= 1
-          break
-        else
-          self.string_nest -= 1
-        end
-      when expand && scan(/#(?=[\$\@\{])/) then
-        ss.pos -= 1
-        break
-      when qwords && scan(/\s/) then
-        ss.pos -= 1
-        break
-      when expand && scan(/#(?!\n)/) then
-        # do nothing
-      when check(/\\/) then
-        case
-        when qwords && scan(/\\\n/) then
-          string_buffer << "\n"
-          next
-        when qwords && scan(/\\\s/) then
-          c = ' '
-        when expand && scan(/\\\n/) then
-          next
-        when regexp && check(/\\/) then
-          self.tokadd_escape term
-          next
-        when expand && scan(/\\/) then
-          c = self.read_escape
-        when scan(/\\\n/) then
-          # do nothing
-        when scan(/\\\\/) then
-          string_buffer << '\\' if escape
-          c = '\\'
-        when scan(/\\/) then
-          unless scan(term_re) || paren.nil? || scan(paren_re) then
-            string_buffer << "\\"
-          end
-        else
-          handled = false
-        end # inner /\\/ case
-      else
-        handled = false
-      end # top case
-      unless handled then
-        t = Regexp.escape term
-        x = Regexp.escape(paren) if paren && paren != "\000"
-        re = if qwords then
-               if HAS_ENC then
-                 /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
-               else
-                 /[^#{t}#{x}\#\0\\\s\v]+|./ # argh. 1.8's \s doesn't pick up \v
-               end
-             else
-               /[^#{t}#{x}\#\0\\]+|./
-             end
-        scan re
-        c = matched
-        rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
-      end # unless handled
-      c ||= matched
-      string_buffer << c
-    end # until
-    c ||= matched
-    c = RubyLexer::EOF if end_of_stream?
-    return c
-  end
   def unescape s
     r = ESCAPES[s]
-    self.extra_lineno += 1 if s == "\n"     # eg backslash newline strings
-    self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
     return r if r
     x = case s
@@ -1179,12 +835,15 @@ class RubyLexer
           s
         when /^[McCx0-9]/ then
           rb_compile_error("Invalid escape character syntax")
-        when /u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/ then
+        when /u(\h{4})/ then
           [$1.delete("{}").to_i(16)].pack("U")
+        when /u(\h{1,3})/ then
+          rb_compile_error("Invalid escape character syntax")
+        when /u\{(\h+(?:\s+\h+)*)\}/ then
+          $1.split.map { |cp| cp.to_i(16) }.pack("U*")
         else
           s
         end
-    x.force_encoding "UTF-8" if HAS_ENC
     x
   end
@@ -1192,168 +851,294 @@ class RubyLexer
     # do nothing for now
   end
-  def ruby22plus?
-    parser.class.version >= 22
+  def was_label?
+    @was_label = ruby22_label?
+    true
   end
-  def ruby23plus?
-    parser.class.version >= 23
-  end
+  class State
+    attr_accessor :n
+    attr_accessor :names
-  def process_string # TODO: rewrite / remove
-    token = if lex_strterm[0] == :heredoc then
-              self.heredoc lex_strterm
-            else
-              self.parse_string lex_strterm
-            end
+    # TODO: take a shared hash of strings for inspect/to_s
+    def initialize o, names
+      raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
-    token_type, c = token
+      self.n = o
+      self.names = names
+    end
-    if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
-      if (([:expr_beg, :expr_endfn].include?(lex_state) &&
-           !cond.is_in_state) || is_arg?) &&
-          is_label_suffix? then
-        scan(/:/)
-        token_type = token[0] = :tLABEL_END
-      end
+    def == o
+      self.equal?(o) || (o.class == self.class && o.n == self.n)
     end
-    if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
-      self.lex_strterm = nil
-      self.lex_state   = (token_type == :tLABEL_END) ? :expr_labelarg : :expr_end
+    def =~ v
+      (self.n & v.n) != 0
     end
-    return token
-  end
+    def | v
+      raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
+        self.names == v.names
+      self.class.new(self.n | v.n, self.names)
+    end
-  def parse_quote # TODO: remove / rewrite
-    beg, nnd, short_hand, c = nil, nil, false, nil
+    def inspect
+      return "Value(0)" if n.zero? # HACK?
-    if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
-      rb_compile_error "unknown type of %string" if ss.matched_size == 2
-      c, beg, short_hand = matched, ss.getch, false
-    else                               # Short-hand (e.g. %{, %., %!, etc)
-      c, beg, short_hand = 'Q', ss.getch, true
+      names.map { |v, k| k if self =~ v }.
+        compact.
+        join("|").
+        gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
     end
-    if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
-      rb_compile_error "unterminated quoted string meets end of file"
+    alias to_s inspect
+    module Values
+      expr_names = {}
+      EXPR_NONE    = State.new    0x0, expr_names
+      EXPR_BEG     = State.new    0x1, expr_names
+      EXPR_END     = State.new    0x2, expr_names
+      EXPR_ENDARG  = State.new    0x4, expr_names
+      EXPR_ENDFN   = State.new    0x8, expr_names
+      EXPR_ARG     = State.new   0x10, expr_names
+      EXPR_CMDARG  = State.new   0x20, expr_names
+      EXPR_MID     = State.new   0x40, expr_names
+      EXPR_FNAME   = State.new   0x80, expr_names
+      EXPR_DOT     = State.new  0x100, expr_names
+      EXPR_CLASS   = State.new  0x200, expr_names
+      EXPR_LABEL   = State.new  0x400, expr_names
+      EXPR_LABELED = State.new  0x800, expr_names
+      EXPR_FITEM   = State.new 0x1000, expr_names
+      EXPR_BEG_ANY = EXPR_BEG | EXPR_MID    | EXPR_CLASS
+      EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
+      EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
+      # extra fake lex_state names to make things a bit cleaner
+      EXPR_LAB = EXPR_ARG|EXPR_LABELED
+      EXPR_LIT = EXPR_END|EXPR_ENDARG
+      EXPR_PAR = EXPR_BEG|EXPR_LABEL
+      EXPR_PAD = EXPR_BEG|EXPR_LABELED
+      EXPR_NUM = EXPR_LIT
+      expr_names.merge!(EXPR_NONE    => "EXPR_NONE",
+                        EXPR_BEG     => "EXPR_BEG",
+                        EXPR_END     => "EXPR_END",
+                        EXPR_ENDARG  => "EXPR_ENDARG",
+                        EXPR_ENDFN   => "EXPR_ENDFN",
+                        EXPR_ARG     => "EXPR_ARG",
+                        EXPR_CMDARG  => "EXPR_CMDARG",
+                        EXPR_MID     => "EXPR_MID",
+                        EXPR_FNAME   => "EXPR_FNAME",
+                        EXPR_DOT     => "EXPR_DOT",
+                        EXPR_CLASS   => "EXPR_CLASS",
+                        EXPR_LABEL   => "EXPR_LABEL",
+                        EXPR_LABELED => "EXPR_LABELED",
+                        EXPR_FITEM   => "EXPR_FITEM")
+      # ruby constants for strings
+      str_func_names = {}
+      STR_FUNC_BORING = State.new 0x00,    str_func_names
+      STR_FUNC_ESCAPE = State.new 0x01,    str_func_names
+      STR_FUNC_EXPAND = State.new 0x02,    str_func_names
+      STR_FUNC_REGEXP = State.new 0x04,    str_func_names
+      STR_FUNC_QWORDS = State.new 0x08,    str_func_names
+      STR_FUNC_SYMBOL = State.new 0x10,    str_func_names
+      STR_FUNC_INDENT = State.new 0x20,    str_func_names # <<-HEREDOC
+      STR_FUNC_LABEL  = State.new 0x40,    str_func_names
+      STR_FUNC_LIST   = State.new 0x4000,  str_func_names
+      STR_FUNC_TERM   = State.new 0x8000,  str_func_names
+      STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
+      # TODO: check parser25.y on how they do STR_FUNC_INDENT
+      STR_SQUOTE = STR_FUNC_BORING
+      STR_DQUOTE = STR_FUNC_EXPAND
+      STR_XQUOTE = STR_FUNC_EXPAND
+      STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
+      STR_SWORD  = STR_FUNC_QWORDS | STR_FUNC_LIST
+      STR_DWORD  = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
+      STR_SSYM   = STR_FUNC_SYMBOL
+      STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
+      STR_LABEL  = STR_FUNC_LABEL
+      str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
+                            STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
+                            STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
+                            STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
+                            STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
+                            STR_FUNC_INDENT => "STR_FUNC_INDENT",
+                            STR_FUNC_LABEL  => "STR_FUNC_LABEL",
+                            STR_FUNC_LIST   => "STR_FUNC_LIST",
+                            STR_FUNC_TERM   => "STR_FUNC_TERM",
+                            STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
+                            STR_SQUOTE      => "STR_SQUOTE")
     end
-    # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
-    nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
-    nnd, beg = beg, "\0" if nnd.nil?
+    include Values
+  end
-    token_type, text = nil, "%#{c}#{beg}"
-    token_type, string_type = case c
-                              when 'Q' then
-                                ch = short_hand ? nnd : c + beg
-                                text = "%#{ch}"
-                                [:tSTRING_BEG,   STR_DQUOTE]
-                              when 'q' then
-                                [:tSTRING_BEG,   STR_SQUOTE]
-                              when 'W' then
-                                eat_whitespace
-                                [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
-                              when 'w' then
-                                eat_whitespace
-                                [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
-                              when 'x' then
-                                [:tXSTRING_BEG,  STR_XQUOTE]
-                              when 'r' then
-                                [:tREGEXP_BEG,   STR_REGEXP]
-                              when 's' then
-                                self.lex_state  = :expr_fname
-                                [:tSYMBEG,       STR_SSYM]
-                              when 'I' then
-                                eat_whitespace
-                                [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
-                              when 'i' then
-                                eat_whitespace
-                                [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
-                              end
+  include State::Values
+end
-    rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
-      token_type.nil?
+class RubyLexer
+  module SSWrapper
+    def string= s
+      ss.string= s
+    end
-    raise "huh" unless string_type
+    def beginning_of_line?
+      ss.bol?
+    end
-    string string_type, nnd, beg
+    alias bol? beginning_of_line? # to make .rex file more readable
-    return token_type, text
-  end
+    def check re
+      maybe_pop_stack
-  def parse_string quote # TODO: rewrite / remove
-    _, string_type, term, open = quote
+      ss.check re
+    end
-    space = false # FIX: remove these
-    func = string_type
-    paren = open
-    term_re = @@regexp_cache[term]
+    def end_of_stream?
+      ss.eos?
+    end
-    qwords = (func & STR_FUNC_QWORDS) != 0
-    regexp = (func & STR_FUNC_REGEXP) != 0
-    expand = (func & STR_FUNC_EXPAND) != 0
+    alias eos? end_of_stream?
-    unless func then # nil'ed from qwords below. *sigh*
-      return :tSTRING_END, nil
+    def getch
+      c = ss.getch
+      c = ss.getch if c == "\r" && ss.peek(1) == "\n"
+      c
     end
-    space = true if qwords and eat_whitespace
+    def match
+      ss
+    end
-    if self.string_nest == 0 && scan(/#{term_re}/) then
-      if qwords then
-        quote[1] = nil
-        return :tSPACE, nil
-      elsif regexp then
-        return :tREGEXP_END, self.regx_options
-      else
-        return :tSTRING_END, term
+    def matched
+      ss.matched
+    end
+    def in_heredoc?
+      !!self.old_ss
+    end
+    def maybe_pop_stack
+      if ss.eos? && in_heredoc? then
+        self.ss_pop
+        self.lineno_pop
       end
     end
-    return :tSPACE, nil if space
+    def pos
+      ss.pos
+    end
-    self.string_buffer = []
+    def pos= n
+      ss.pos = n
+    end
-    if expand
-      case
-      when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
-        # TODO: !ISASCII
-        # ?! see parser_peek_variable_name
-        return :tSTRING_DVAR, nil
-      when scan(/#(?=\@\@?[a-zA-Z_])/) then
-        # TODO: !ISASCII
-        return :tSTRING_DVAR, nil
-      when scan(/#[{]/) then
-        return :tSTRING_DBEG, nil
-      when scan(/#/) then
-        string_buffer << '#'
-      end
+    def rest
+      ss.rest
+    end
+    def scan re
+      maybe_pop_stack
+      ss.scan re
     end
-    if tokadd_string(func, term, paren) == RubyLexer::EOF then
-      rb_compile_error "unterminated string meets end of file"
+    def scanner_class # TODO: design this out of oedipus_lex. or something.
+      RPStringScanner
     end
-    return :tSTRING_CONTENT, string_buffer.join
+    def ss_string
+      ss.string
+    end
+    def ss_string= s
+      raise "Probably not"
+      ss.string = s
+    end
+    def unscan
+      ss.unscan
+    end
   end
+  include SSWrapper
 end
-require "ruby_lexer.rex"
+class RubyLexer
+  module SSStackish
+    def lineno_push new_lineno
+      self.old_lineno = self.lineno
+      self.lineno     = new_lineno
+    end
-if ENV["RP_LINENO_DEBUG"] then
+    def lineno_pop
+      self.lineno     = self.old_lineno
+      self.old_lineno = nil
+    end
+    def ss= o
+      raise "Clearing ss while in heredoc!?!" if in_heredoc?
+      @old_ss = nil
+      super
+    end
+    def ss_push new_ss
+      @old_ss = self.ss
+      @ss     = new_ss
+    end
+    def ss_pop
+      @ss     = self.old_ss
+      @old_ss = nil
+    end
+  end
+  prepend SSStackish
+end
+if ENV["RP_STRTERM_DEBUG"] then
   class RubyLexer
-    alias :old_lineno= :lineno=
+    def d o
+      $stderr.puts o.inspect
+    end
+    alias old_lex_strterm= lex_strterm=
+    def lex_strterm= o
+      self.old_lex_strterm= o
+      where = caller.first.split(/:/).first(2).join(":")
+      $stderr.puts
+      d :lex_strterm => [o, where]
+    end
+  end
+end
+require_relative "./ruby_lexer.rex.rb"
+require_relative "./ruby_lexer_strings.rb"
+if ENV["RP_LINENO_DEBUG"] then
+  class RubyLexer
     def d o
       $stderr.puts o.inspect
     end
+    alias old_lineno= lineno=
     def lineno= n
       self.old_lineno= n
       where = caller.first.split(/:/).first(2).join(":")
-      d :lineno => [n, where, ss && ss.rest[0,40]]
+      $stderr.puts
+      d :lineno => [n, where]
     end
   end
 end