RubyGems - ruby_parser - Versions diffs - 3.13.1 → 3.21.0 - Mend

ruby_parser 3.13.1 → 3.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/.autotest +18 -29
data/History.rdoc +312 -0
data/Manifest.txt +16 -15
data/README.rdoc +13 -9
data/Rakefile +237 -106
data/bin/ruby_parse +3 -1
data/bin/ruby_parse_extract_error +9 -4
data/compare/normalize.rb +54 -6
data/debugging.md +172 -0
data/gauntlet.md +107 -0
data/lib/rp_extensions.rb +15 -36
data/lib/rp_stringscanner.rb +20 -51
data/lib/ruby_lexer.rb +515 -812
data/lib/ruby_lexer.rex +33 -27
data/lib/ruby_lexer.rex.rb +64 -31
data/lib/ruby_lexer_strings.rb +638 -0
data/lib/ruby_parser.rb +46 -36
data/lib/{ruby_parser.yy → ruby_parser2.yy} +1400 -488
data/lib/ruby_parser20.rb +10953 -0
data/lib/ruby_parser21.rb +10978 -0
data/lib/ruby_parser22.rb +11119 -0
data/lib/ruby_parser23.rb +11160 -0
data/lib/ruby_parser24.rb +11209 -0
data/lib/ruby_parser25.rb +11209 -0
data/lib/ruby_parser26.rb +11231 -0
data/lib/ruby_parser27.rb +12960 -0
data/lib/{ruby26_parser.y → ruby_parser3.yy} +1652 -521
data/lib/ruby_parser30.rb +13292 -0
data/lib/ruby_parser31.rb +13625 -0
data/lib/ruby_parser32.rb +13577 -0
data/lib/ruby_parser33.rb +13577 -0
data/lib/ruby_parser_extras.rb +988 -474
data/test/test_ruby_lexer.rb +1339 -1155
data/test/test_ruby_parser.rb +4255 -2103
data/test/test_ruby_parser_extras.rb +39 -4
data/tools/munge.rb +52 -13
data/tools/ripper.rb +24 -6
data.tar.gz.sig +0 -0
metadata +73 -56
metadata.gz.sig +0 -0
data/lib/ruby20_parser.rb +0 -6869
data/lib/ruby20_parser.y +0 -2431
data/lib/ruby21_parser.rb +0 -6944
data/lib/ruby21_parser.y +0 -2449
data/lib/ruby22_parser.rb +0 -6968
data/lib/ruby22_parser.y +0 -2458
data/lib/ruby23_parser.rb +0 -6987
data/lib/ruby23_parser.y +0 -2460
data/lib/ruby24_parser.rb +0 -6994
data/lib/ruby24_parser.y +0 -2466
data/lib/ruby25_parser.rb +0 -6994
data/lib/ruby25_parser.y +0 -2466
data/lib/ruby26_parser.rb +0 -7012

data/lib/ruby_lexer.rb CHANGED Viewed

@@ -4,135 +4,9 @@
 $DEBUG = true if ENV["DEBUG"]
 class RubyLexer
   # :stopdoc:
-  HAS_ENC = "".respond_to? :encoding
-  IDENT_CHAR = if HAS_ENC then
-                 /[\w\u0080-\u{10ffff}]/u
-               else
-                 /[\w\x80-\xFF]/n
-               end
   EOF = :eof_haha!
-  # ruby constants for strings (should this be moved somewhere else?)
-  STR_FUNC_BORING = 0x00
-  STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
-  STR_FUNC_EXPAND = 0x02
-  STR_FUNC_REGEXP = 0x04
-  STR_FUNC_QWORDS = 0x08
-  STR_FUNC_SYMBOL = 0x10
-  STR_FUNC_INDENT = 0x20 # <<-HEREDOC
-  STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
-  STR_SQUOTE = STR_FUNC_BORING
-  STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
-  STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
-  STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
-  STR_SSYM   = STR_FUNC_SYMBOL
-  STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
-  class State
-    attr_accessor :n
-    def initialize o
-      raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
-      self.n = o
-    end
-    def == o
-      o.class == self.class && o.n == self.n
-    end
-    def =~ v
-      (self.n & v.n) != 0
-    end
-    def | v
-      self.class.new(self.n | v.n)
-    end
-    def inspect
-      return "EXPR_NONE" if n.zero?
-      NAMES.map { |v,k| k if self =~ v }.compact.join "|"
-    end
-    module Values
-      EXPR_NONE    = State.new    0x0
-      EXPR_BEG     = State.new    0x1
-      EXPR_END     = State.new    0x2
-      EXPR_ENDARG  = State.new    0x4
-      EXPR_ENDFN   = State.new    0x8
-      EXPR_ARG     = State.new   0x10
-      EXPR_CMDARG  = State.new   0x20
-      EXPR_MID     = State.new   0x40
-      EXPR_FNAME   = State.new   0x80
-      EXPR_DOT     = State.new  0x100
-      EXPR_CLASS   = State.new  0x200
-      EXPR_LABEL   = State.new  0x400
-      EXPR_LABELED = State.new  0x800
-      EXPR_FITEM   = State.new 0x1000
-      EXPR_BEG_ANY = EXPR_BEG | EXPR_MID    | EXPR_CLASS
-      EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
-      EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
-      # extra fake lex_state names to make things a bit cleaner
-      EXPR_LAB = EXPR_ARG|EXPR_LABELED
-      EXPR_NUM = EXPR_END|EXPR_ENDARG
-      EXPR_PAR = EXPR_BEG|EXPR_LABEL
-      EXPR_PAD = EXPR_BEG|EXPR_LABELED
-    end
-    include Values
-    NAMES = {
-      EXPR_NONE    => "EXPR_NONE",
-      EXPR_BEG     => "EXPR_BEG",
-      EXPR_END     => "EXPR_END",
-      EXPR_ENDARG  => "EXPR_ENDARG",
-      EXPR_ENDFN   => "EXPR_ENDFN",
-      EXPR_ARG     => "EXPR_ARG",
-      EXPR_CMDARG  => "EXPR_CMDARG",
-      EXPR_MID     => "EXPR_MID",
-      EXPR_FNAME   => "EXPR_FNAME",
-      EXPR_DOT     => "EXPR_DOT",
-      EXPR_CLASS   => "EXPR_CLASS",
-      EXPR_LABEL   => "EXPR_LABEL",
-      EXPR_LABELED => "EXPR_LABELED",
-      EXPR_FITEM   => "EXPR_FITEM",
-    }
-  end
-  include State::Values
-  if $DEBUG then
-    def lex_state= o
-      return if @lex_state == o
-      raise ArgumentError, "bad state: %p" % [o] unless State === o
-      if ENV["V"] then
-        c = caller[0]
-        c = caller[1] if c =~ /\b(expr_)?result\b/
-        c = caller[2] if c =~ /\b(expr_)?result\b/
-        warn "lex_state: %p -> %p from %s" % [lex_state, o, c.clean_caller]
-      else
-        warn "lex_state: %p -> %p" % [lex_state, o]
-      end
-      @lex_state = o
-    end
-  else
-    def lex_state= o
-      raise ArgumentError, "bad state: %p" % [o] unless State === o
-      @lex_state = o
-    end
-  end
-  attr_reader :lex_state
   ESCAPES = {
     "a"    => "\007",
     "b"    => "\010",
@@ -149,10 +23,17 @@ class RubyLexer
     "c\?"  => 127.chr,
   }
+  HAS_ENC = "".respond_to? :encoding
+  BTOKENS = {
+    ".."  => :tBDOT2,
+    "..." => :tBDOT3,
+  }
   TOKENS = {
     "!"   => :tBANG,
     "!="  => :tNEQ,
-    # "!@"  => :tUBANG,
+    "!@"  => :tBANG,
     "!~"  => :tNMATCH,
     ","   => :tCOMMA,
     ".."  => :tDOT2,
@@ -165,21 +46,57 @@ class RubyLexer
     "->"  => :tLAMBDA,
   }
-  TAB_WIDTH = 8
+  PERCENT_END = {
+    "(" => ")",
+    "[" => "]",
+    "{" => "}",
+    "<" => ">",
+  }
-  @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
+  SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
+  @@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
   @@regexp_cache[nil] = nil
+  def regexp_cache
+    @@regexp_cache
+  end
+  if $DEBUG then
+    attr_reader :lex_state
+    def lex_state= o
+      return if @lex_state == o
+      from = ""
+      if ENV["VERBOSE"]
+        path = caller[0]
+        path = caller[1] if path =~ /result/
+        path, line, *_ = path.split(/:/)
+        path.delete_prefix! File.dirname File.dirname __FILE__
+        from = " at .%s:%s" % [path, line]
+      end
+      warn "lex_state: %p -> %p%s" % [lex_state, o, from]
+      @lex_state = o
+    end
+  end
   # :startdoc:
-  attr_accessor :lineno # we're bypassing oedipus' lineno handling.
+  attr_accessor :lex_state unless $DEBUG
   attr_accessor :brace_nest
   attr_accessor :cmdarg
   attr_accessor :command_start
   attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
   attr_accessor :last_state
   attr_accessor :cond
-  attr_accessor :extra_lineno
+  attr_accessor :old_ss
+  attr_accessor :old_lineno
+  # these are generated via ruby_lexer.rex: ss, lineno
   ##
   # Additional context surrounding tokens that both the lexer and
@@ -196,39 +113,30 @@ class RubyLexer
   # Last token read via next_token.
   attr_accessor :token
-  attr_writer :comments
+  # Last comment lexed, or nil
+  attr_accessor :comment
   def initialize _ = nil
     @lex_state = nil # remove one warning under $DEBUG
-    self.lex_state = EXPR_NONE
+    @lex_state = EXPR_NONE
     self.cond   = RubyParserStuff::StackState.new(:cond, $DEBUG)
     self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
+    self.ss     = RPStringScanner.new ""
     reset
   end
   def arg_ambiguous
-    self.warning("Ambiguous first argument. make sure.")
+    self.warning "Ambiguous first argument. make sure."
   end
   def arg_state
     is_after_operator? ? EXPR_ARG : EXPR_BEG
   end
-  def beginning_of_line?
-    ss.bol?
-  end
-  alias :bol? :beginning_of_line? # to make .rex file more readable
-  def comments # TODO: remove this... maybe comment_string + attr_accessor
-    c = @comments.join
-    @comments.clear
-    c
-  end
-  def end_of_stream?
-    ss.eos?
+  def debug n
+    raise "debug #{n}"
   end
   def expr_dot?
@@ -245,185 +153,30 @@ class RubyLexer
     result EXPR_BEG, token, text
   end
-  def heredoc here # TODO: rewrite / remove
-    _, eos, func, last_line = here
-    indent         = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
-    content_indent = (func & STR_FUNC_ICNTNT) != 0
-    expand         = (func & STR_FUNC_EXPAND) != 0
-    eos_re         = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
-    err_msg        = "can't match #{eos_re.inspect} anywhere in "
-    rb_compile_error err_msg if end_of_stream?
-    if beginning_of_line? && scan(eos_re) then
-      self.lineno += 1
-      ss.unread_many last_line # TODO: figure out how to remove this
-      return :tSTRING_END, eos
-    end
-    self.string_buffer = []
-    if expand then
-      case
-      when scan(/#[$@]/) then
-        ss.pos -= 1 # FIX omg stupid
-        return :tSTRING_DVAR, matched
-      when scan(/#[{]/) then
-        return :tSTRING_DBEG, matched
-      when scan(/#/) then
-        string_buffer << '#'
-      end
-      begin
-        c = tokadd_string func, "\n", nil
-        rb_compile_error err_msg if
-          c == RubyLexer::EOF
-        if c != "\n" then
-          return :tSTRING_CONTENT, string_buffer.join.delete("\r")
-        else
-          string_buffer << scan(/\n/)
-        end
-        rb_compile_error err_msg if end_of_stream?
-      end until check(eos_re)
-    else
-      until check(eos_re) do
-        string_buffer << scan(/.*(\n|\z)/)
-        rb_compile_error err_msg if end_of_stream?
-      end
-    end
-    self.lex_strterm = [:heredoc, eos, func, last_line]
-    string_content = begin
-                       s = string_buffer.join
-                       s.delete "\r"
-                     rescue ArgumentError
-                       s.b.delete("\r").force_encoding Encoding::UTF_8
-                     end
-    string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
-    return :tSTRING_CONTENT, string_content
-  end
-  def heredoc_dedent(string_content)
-    width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
-      heredoc_whitespace_indent_size whitespace
-    end.min || 0
-    string_content.split("\n", -1).map do |line|
-      dedent_string line, width
-    end.join "\n"
-  end
-  def dedent_string(string, width)
-    characters_skipped = 0
-    indentation_skipped = 0
-    string.chars.each do |char|
-      break if indentation_skipped >= width
-      if char == ' '
-        characters_skipped += 1
-        indentation_skipped += 1
-      elsif char == "\t"
-        proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
-        break if (proposed > width)
-        characters_skipped += 1
-        indentation_skipped = proposed
-      end
-    end
-    string[characters_skipped..-1]
-  end
-  def heredoc_whitespace_indent_size(whitespace)
-    whitespace.chars.inject 0 do |size, char|
-      if char == "\t"
-        size + TAB_WIDTH
-      else
-        size + 1
-      end
-    end
-  end
-  def heredoc_identifier # TODO: remove / rewrite
-    term, func = nil, STR_FUNC_BORING
-    self.string_buffer = []
-    heredoc_indent_mods = '-'
-    heredoc_indent_mods += '\~' if ruby23plus?
-    case
-    when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
-      term = ss[2]
-      func |= STR_FUNC_INDENT unless ss[1].empty?
-      func |= STR_FUNC_ICNTNT if ss[1] == '~'
-      func |= case term
-              when "\'" then
-                STR_SQUOTE
-              when '"' then
-                STR_DQUOTE
-              else
-                STR_XQUOTE
-              end
-      string_buffer << ss[3]
-    when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
-      rb_compile_error "unterminated here document identifier"
-    when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
-      term = '"'
-      func |= STR_DQUOTE
-      unless ss[1].empty? then
-        func |= STR_FUNC_INDENT
-        func |= STR_FUNC_ICNTNT if ss[1] == '~'
-      end
-      string_buffer << ss[2]
-    else
-      return nil
-    end
-    if scan(/.*\n/) then
-      # TODO: think about storing off the char range instead
-      line = matched
-    else
-      line = nil
-    end
-    self.lex_strterm = [:heredoc, string_buffer.join, func, line]
-    if term == '`' then
-      result nil, :tXSTRING_BEG, "`"
-    else
-      result nil, :tSTRING_BEG, "\""
-    end
-  end
   def in_fname? # REFACTOR
     lex_state =~ EXPR_FNAME
   end
-  def is_after_operator?
-    lex_state =~ EXPR_FNAME|EXPR_DOT
-  end
   def int_with_base base
     rb_compile_error "Invalid numeric format" if matched =~ /__/
     text = matched
     case
-    when text.end_with?('ri')
-      return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
-    when text.end_with?('r')
-      return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
-    when text.end_with?('i')
-      return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
+    when text.end_with?("ri")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
+    when text.end_with?("r")
+      result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
+    when text.end_with?("i")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
     else
-      return result(EXPR_NUM, :tINTEGER, text.to_i(base))
+      result EXPR_NUM, :tINTEGER, text.to_i(base)
     end
   end
+  def is_after_operator?
+    lex_state =~ EXPR_FNAME|EXPR_DOT
+  end
   def is_arg?
     lex_state =~ EXPR_ARG_ANY
   end
@@ -436,15 +189,6 @@ class RubyLexer
     lex_state =~ EXPR_END_ANY
   end
-  def lvar_defined? id
-    # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
-    self.parser.env[id.to_sym] == :lvar
-  end
-  def ruby22_label?
-    ruby22plus? and is_label_possible?
-  end
   def is_label_possible?
     (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
   end
@@ -461,14 +205,30 @@ class RubyLexer
     lpar_beg && lpar_beg == paren_nest
   end
-  def matched
-    ss.matched
+  def is_local_id id
+    # maybe just make this false for now
+    self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
+  end
+  def lvar_defined? id
+    # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
+    self.parser.env[id.to_sym] == :lvar
   end
   def not_end?
     not is_end?
   end
+  def possibly_escape_string text, check
+    content = match[1]
+    if text =~ check then
+      unescape_string content
+    else
+      content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
+    end
+  end
   def process_amper text
     token = if is_arg? && space_seen && !check(/\s/) then
                warning("`&' interpreted as argument prefix")
@@ -479,44 +239,50 @@ class RubyLexer
                :tAMPER2
              end
-    return result(:arg_state, token, "&")
+    result :arg_state, token, "&"
   end
   def process_backref text
-    token = ss[1].to_sym
+    token = match[1].to_sym
     # TODO: can't do lineno hack w/ symbol
     result EXPR_END, :tBACK_REF, token
   end
   def process_begin text
-    @comments << matched
+    self.comment ||= +""
+    self.comment << matched
     unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
-      @comments.clear
+      self.comment = nil
       rb_compile_error("embedded document meets end of file")
     end
-    @comments << matched
-    self.lineno += matched.count("\n")
+    self.comment << matched
+    self.lineno += matched.count("\n") # HACK?
     nil # TODO
   end
-  def process_brace_close text
-    # matching compare/parse23.y:8561
-    cond.lexpop
-    cmdarg.lexpop
+  # TODO: make all tXXXX terminals include lexer.lineno ... enforce it somehow?
+  def process_brace_close text
     case matched
     when "}" then
       self.brace_nest -= 1
-      self.lex_state   = EXPR_ENDARG # TODO: EXPR_END ? Look at 2.6
       return :tSTRING_DEND, matched if brace_nest < 0
+    end
+    # matching compare/parse26.y:8099
+    cond.pop
+    cmdarg.pop
+    case matched
+    when "}" then
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
       return :tRCURLY, matched
     when "]" then
       self.paren_nest -= 1
-      self.lex_state   = EXPR_ENDARG
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
       return :tRBRACK, matched
     when ")" then
       self.paren_nest -= 1
@@ -527,30 +293,6 @@ class RubyLexer
     end
   end
-  def process_colon1 text
-    # ?: / then / when
-    if is_end? || check(/\s/) then
-      return result EXPR_BEG, :tCOLON, text
-    end
-    case
-    when scan(/\'/) then
-      string STR_SSYM
-    when scan(/\"/) then
-      string STR_DSYM
-    end
-    result EXPR_FNAME, :tSYMBEG, text
-  end
-  def process_colon2 text
-    if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
-      result EXPR_BEG, :tCOLON3, text
-    else
-      result EXPR_DOT, :tCOLON2, text
-    end
-  end
   def process_brace_open text
     # matching compare/parse23.y:8694
     self.brace_nest += 1
@@ -566,7 +308,7 @@ class RubyLexer
             when lex_state =~ EXPR_LABELED then
               :tLBRACE     # hash
             when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
-              :tLCURLY     # block (primary) '{' in parse.y
+              :tLCURLY     # block (primary) "{" in parse.y
             when lex_state =~ EXPR_ENDARG then
               :tLBRACE_ARG # block (expr)
             else
@@ -581,37 +323,96 @@ class RubyLexer
     result state, token, text
   end
+  def process_colon1 text
+    # ?: / then / when
+    if is_end? || check(/\s/) then
+      return result EXPR_BEG, :tCOLON, text
+    end
+    case
+    when scan(/\'/) then
+      string STR_SSYM, matched
+    when scan(/\"/) then
+      string STR_DSYM, matched
+    end
+    result EXPR_FNAME, :tSYMBEG, text
+  end
+  def process_colon2 text
+    if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
+      result EXPR_BEG, :tCOLON3, text
+    else
+      result EXPR_DOT, :tCOLON2, text
+    end
+  end
+  def process_dots text # parse32.y:10216
+    is_beg = self.is_beg?
+    self.lex_state = EXPR_BEG
+    return result EXPR_ENDARG, :tBDOT3, text if
+      parser.in_argdef && text == "..." # TODO: version check?
+    tokens = ruby27plus? && is_beg ? BTOKENS : TOKENS
+    result EXPR_BEG, tokens[text], text
+  end
   def process_float text
     rb_compile_error "Invalid numeric format" if text =~ /__/
     case
-    when text.end_with?('ri')
-      return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
-    when text.end_with?('i')
-      return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
-    when text.end_with?('r')
-      return result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
+    when text.end_with?("ri")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
+    when text.end_with?("i")
+      result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
+    when text.end_with?("r")
+      result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
     else
-      return result EXPR_NUM, :tFLOAT, text.to_f
+      result EXPR_NUM, :tFLOAT, text.to_f
     end
   end
   def process_gvar text
-    text.lineno = self.lineno
+    if parser.class.version > 20 && text == "$-" then
+      rb_compile_error "unexpected $undefined"
+    end
     result EXPR_END, :tGVAR, text
   end
   def process_gvar_oddity text
-    return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
     rb_compile_error "#{text.inspect} is not allowed as a global variable name"
   end
   def process_ivar text
     tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
-    text.lineno = self.lineno
     result EXPR_END, tok_id, text
   end
+  def process_label text
+    symbol = possibly_escape_string text, /^\"/
+    result EXPR_LAB, :tLABEL, symbol
+  end
+  def process_label_or_string text
+    if @was_label && text =~ /:\Z/ then
+      @was_label = nil
+      return process_label text
+    elsif text =~ /:\Z/ then
+      self.pos -= 1 # put back ":"
+      text = text[0..-2]
+    end
+    orig_line = lineno
+    str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
+    self.lineno += str.count("\n")
+    result EXPR_END, :tSTRING, str, orig_line
+  end
   def process_lchevron text
     if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
         !is_end? &&
@@ -627,34 +428,26 @@ class RubyLexer
       self.lex_state = EXPR_BEG
     end
-    return result(lex_state, :tLSHFT, "\<\<")
+    result lex_state, :tLSHFT, "\<\<"
   end
-  def process_newline_or_comment text
+  def process_newline_or_comment text    # ../compare/parse30.y:9126 ish
     c = matched
-    hit = false
-    if c == '#' then
-      ss.pos -= 1
+    if c == "#" then
+      self.pos -= 1
-      # TODO: handle magic comments
       while scan(/\s*\#.*(\n+|\z)/) do
-        hit = true
-        self.lineno += matched.lines.to_a.size
-        @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
+        self.lineno += matched.count "\n"
+        self.comment ||= +""
+        self.comment << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
       end
       return nil if end_of_stream?
     end
-    self.lineno += 1 unless hit
-    # Replace a string of newlines with a single one
-    self.lineno += matched.lines.to_a.size if scan(/\n+/)
     c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
          lex_state !~ EXPR_LABELED)
-    # TODO: figure out what token_seen is for
     if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
       # ignore if !fallthrough?
       if !c && parser.in_kwarg then
@@ -662,25 +455,29 @@ class RubyLexer
         self.command_start = true
         return result EXPR_BEG, :tNL, nil
       else
-        return # skip
+        maybe_pop_stack
+        return # goto retry
       end
     end
-    if scan(/([\ \t\r\f\v]*)(\.|&)/) then
-      self.space_seen = true unless ss[1].empty?
+    if scan(/[\ \t\r\f\v]+/) then
+      self.space_seen = true
+    end
-      ss.pos -= 1
-      return unless check(/\.\./)
+    if check(/#/) then
+      return # goto retry
+    elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
+      return # goto retry
     end
     self.command_start = true
-    return result(EXPR_BEG, :tNL, nil)
+    result EXPR_BEG, :tNL, nil
   end
   def process_nthref text
     # TODO: can't do lineno hack w/ number
-    result EXPR_END, :tNTH_REF, ss[1].to_i
+    result EXPR_END, :tNTH_REF, match[1].to_i
   end
   def process_paren text
@@ -697,7 +494,7 @@ class RubyLexer
               #      "an argument list, not a decomposed argument")
               :tLPAREN2
             else
-              :tLPAREN2 # plain '(' in parse.y
+              :tLPAREN2 # plain "(" in parse.y
             end
     self.paren_nest += 1
@@ -708,13 +505,16 @@ class RubyLexer
   end
   def process_percent text
-    return parse_quote if is_beg?
-    return result EXPR_BEG, :tOP_ASGN, "%" if scan(/\=/)
-    return parse_quote if is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
-    return result :arg_state, :tPERCENT, "%"
+    case
+    when is_beg? then
+      process_percent_quote
+    when scan(/\=/)
+      result EXPR_BEG, :tOP_ASGN, "%"
+    when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
+      process_percent_quote
+    else
+      result :arg_state, :tPERCENT, "%"
+    end
   end
   def process_plus_minus text
@@ -735,7 +535,7 @@ class RubyLexer
     return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
-    if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
+    if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
       arg_ambiguous if is_arg?
       if check(/\d/) then
@@ -760,12 +560,12 @@ class RubyLexer
     if check(/\s|\v/) then
       unless is_arg? then
-        c2 = { " " => 's',
-              "\n" => 'n',
-              "\t" => 't',
-              "\v" => 'v',
-              "\r" => 'r',
-              "\f" => 'f' }[matched]
+        c2 = { " " => "s",
+              "\n" => "n",
+              "\t" => "t",
+              "\v" => "v",
+              "\r" => "r",
+              "\f" => "f" }[matched]
         if c2 then
           warning("invalid character syntax; use ?\\" + c2)
@@ -781,17 +581,26 @@ class RubyLexer
     c = if scan(/\\/) then
           self.read_escape
         else
-          ss.getch
+          getch
         end
     result EXPR_END, :tSTRING, c
   end
+  def process_simple_string text
+    orig_line = lineno
+    self.lineno += text.count("\n")
+    str = unescape_string text[1..-2]
+    result EXPR_END, :tSTRING, str, orig_line
+  end
   def process_slash text
     if is_beg? then
-      string STR_REGEXP
+      string STR_REGEXP, matched
-      return result(nil, :tREGEXP_BEG, "/")
+      return result nil, :tREGEXP_BEG, "/"
     end
     if scan(/\=/) then
@@ -806,7 +615,7 @@ class RubyLexer
       end
     end
-    return result(:arg_state, :tDIVIDE, "/")
+    result :arg_state, :tDIVIDE, "/"
   end
   def process_square_bracket text
@@ -838,48 +647,14 @@ class RubyLexer
     result EXPR_PAR, token, text
   end
-  def possibly_escape_string text, check
-    content = match[1]
-    if text =~ check then
-      content.gsub(ESC) { unescape $1 }
-    else
-      content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
-    end
-  end
   def process_symbol text
-    symbol = possibly_escape_string text, /^:"/
-    result EXPR_END, :tSYMBOL, symbol
-  end
-  def was_label?
-    @was_label = ruby22_label?
-    true
-  end
-  def process_label_or_string text
-    if @was_label && text =~ /:\Z/ then
-      @was_label = nil
-      return process_label text
-    elsif text =~ /:\Z/ then
-      ss.pos -= 1 # put back ":"
-      text = text[0..-2]
-    end
+    symbol = possibly_escape_string text, /^:\"/ # stupid emacs
-    result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
-  end
-  def process_label text
-    symbol = possibly_escape_string text, /^"/
-    result EXPR_LAB, :tLABEL, [symbol, self.lineno]
+    result EXPR_LIT, :tSYMBOL, symbol
   end
   def process_token text
     # matching: parse_ident in compare/parse23.y:7989
-    # TODO: make this always return [token, lineno]
     # FIX: remove: self.last_state = lex_state
     token = self.token = text
@@ -902,7 +677,7 @@ class RubyLexer
     if is_label_possible? and is_label_suffix? then
       scan(/:/)
-      return result EXPR_LAB, :tLABEL, [token, self.lineno]
+      return result EXPR_LAB, :tLABEL, token
     end
     # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
@@ -913,14 +688,17 @@ class RubyLexer
       return process_token_keyword keyword if keyword
     end
-    # matching: compare/parse23.y:8079
-    state = if is_beg? or is_arg? or lex_state =~ EXPR_DOT then
+    # matching: compare/parse32.y:9031
+    state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
               cmd_state ? EXPR_CMDARG : EXPR_ARG
             elsif lex_state =~ EXPR_FNAME then
               EXPR_ENDFN
             else
               EXPR_END
             end
+    self.lex_state = state
+    tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
     if last_state !~ EXPR_DOT|EXPR_FNAME and
         (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
@@ -928,183 +706,102 @@ class RubyLexer
       state = EXPR_END|EXPR_LABEL
     end
-    token.lineno = self.lineno # yes, on a string. I know... I know...
-    return result(state, tok_id, token)
+    result state, tok_id, token
   end
   def process_token_keyword keyword
-    # matching MIDDLE of parse_ident in compare/parse23.y:8046
+    # matching MIDDLE of parse_ident in compare/parse32.y:9695
     state = lex_state
-    self.lex_state = keyword.state
-    value = [token, self.lineno]
-    return result(lex_state, keyword.id0, value) if state =~ EXPR_FNAME
+    return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
+    self.lex_state = keyword.state
     self.command_start = true if lex_state =~ EXPR_BEG
     case
-    when keyword.id0 == :kDO then
+    when keyword.id0 == :kDO then # parse32.y line 9712
       case
       when lambda_beginning? then
         self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
-        self.paren_nest -= 1
-        result lex_state, :kDO_LAMBDA, value
+        self.paren_nest -= 1 # TODO: question this?
+        result lex_state, :kDO_LAMBDA, token
       when cond.is_in_state then
-        result lex_state, :kDO_COND, value
+        result lex_state, :kDO_COND, token
       when cmdarg.is_in_state && state != EXPR_CMDARG then
-        result lex_state, :kDO_BLOCK, value
-      when state =~ EXPR_BEG|EXPR_ENDARG then
-        result lex_state, :kDO_BLOCK, value
+        result lex_state, :kDO_BLOCK, token
       else
-        result lex_state, :kDO, value
+        result lex_state, :kDO, token
       end
     when state =~ EXPR_PAD then
-      result lex_state, keyword.id0, value
+      result lex_state, keyword.id0, token
     when keyword.id0 != keyword.id1 then
-      result EXPR_PAR, keyword.id1, value
+      result EXPR_PAR, keyword.id1, token
     else
-      result lex_state, keyword.id1, value
+      result lex_state, keyword.id1, token
     end
   end
   def process_underscore text
-    ss.unscan # put back "_"
+    self.unscan # put back "_"
     if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
-      return [RubyLexer::EOF, RubyLexer::EOF]
-    elsif scan(/\_\w*/) then
-      return process_token matched
+      ss.terminate
+      [RubyLexer::EOF, RubyLexer::EOF]
+    elsif scan(/#{IDENT_CHAR}+/) then
+      process_token matched
     end
   end
   def rb_compile_error msg
-    msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
+    msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
     raise RubyParser::SyntaxError, msg
   end
-  def read_escape # TODO: remove / rewrite
-    case
-    when scan(/\\/) then                  # Backslash
-      '\\'
-    when scan(/n/) then                   # newline
-      self.extra_lineno -= 1
-      "\n"
-    when scan(/t/) then                   # horizontal tab
-      "\t"
-    when scan(/r/) then                   # carriage-return
-      "\r"
-    when scan(/f/) then                   # form-feed
-      "\f"
-    when scan(/v/) then                   # vertical tab
-      "\13"
-    when scan(/a/) then                   # alarm(bell)
-      "\007"
-    when scan(/e/) then                   # escape
-      "\033"
-    when scan(/b/) then                   # backspace
-      "\010"
-    when scan(/s/) then                   # space
-      " "
-    when scan(/[0-7]{1,3}/) then          # octal constant
-      (matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
-    when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
-      # TODO: force encode everything to UTF-8?
-      ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
-    when check(/M-\\[\\MCc]/) then
-      scan(/M-\\/) # eat it
-      c = self.read_escape
-      c[0] = (c[0].ord | 0x80).chr
-      c
-    when scan(/M-(.)/) then
-      c = ss[1]
-      c[0] = (c[0].ord | 0x80).chr
-      c
-    when check(/(C-|c)\\[\\MCc]/) then
-      scan(/(C-|c)\\/) # eat it
-      c = self.read_escape
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when scan(/C-\?|c\?/) then
-      127.chr
-    when scan(/(C-|c)(.)/) then
-      c = ss[2]
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
-      matched
-    when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
-      [ss[1].delete("{}").to_i(16)].pack("U")
-    when scan(/u([0-9a-fA-F]{1,3})/) then
-      rb_compile_error "Invalid escape character syntax"
-    when scan(/[McCx0-9]/) || end_of_stream? then
-      rb_compile_error("Invalid escape character syntax")
-    else
-      ss.getch
-    end.dup
-  end
-  def regx_options # TODO: rewrite / remove
-    good, bad = [], []
-    if scan(/[a-z]+/) then
-      good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
-    end
-    unless bad.empty? then
-      rb_compile_error("unknown regexp option%s - %s" %
-                       [(bad.size > 1 ? "s" : ""), bad.join.inspect])
-    end
-    return good.join
-  end
   def reset
+    self.lineno        = 1
     self.brace_nest    = 0
     self.command_start = true
-    self.comments      = []
+    self.comment       = nil
     self.lex_state     = EXPR_NONE
     self.lex_strterm   = nil
-    self.lineno        = 1
     self.lpar_beg      = nil
     self.paren_nest    = 0
     self.space_seen    = false
     self.string_nest   = 0
     self.token         = nil
-    self.extra_lineno  = 0
+    self.string_buffer = []
+    self.old_ss        = nil
+    self.old_lineno    = nil
     self.cond.reset
     self.cmdarg.reset
   end
-  def result new_state, token, text # :nodoc:
+  def result new_state, token, text, line = self.lineno # :nodoc:
     new_state = self.arg_state if new_state == :arg_state
     self.lex_state = new_state if new_state
-    [token, text]
+    [token, [text, line]]
   end
-  def scan re
-    ss.scan re
+  def ruby22_label?
+    ruby22plus? and is_label_possible?
   end
-  def check re
-    ss.check re
+  def ruby22plus?
+    parser.class.version >= 22
   end
-  def eat_whitespace
-    r = scan(/\s+/)
-    self.extra_lineno += r.count("\n") if r
-    r
+  def ruby23plus?
+    parser.class.version >= 23
   end
-  def fixup_lineno extra = 0
-    self.lineno += self.extra_lineno + extra
-    self.extra_lineno = 0
+  def ruby24minus?
+    parser.class.version <= 24
   end
-  def scanner_class # TODO: design this out of oedipus_lex. or something.
-    RPStringScanner
+  def ruby27plus?
+    parser.class.version >= 27
   end
   def space_vs_beginning space_type, beg_type, fallback
@@ -1119,137 +816,18 @@ class RubyLexer
     end
   end
-  def string type, beg = matched, nnd = "\0"
-    self.lex_strterm = [:strterm, type, beg, nnd]
-  end
-  # TODO: consider
-  # def src= src
-  #   raise "bad src: #{src.inspect}" unless String === src
-  #   @src = RPStringScanner.new(src)
-  # end
-  def tokadd_escape term # TODO: rewrite / remove
-    case
-    when scan(/\\\n/) then
-      # just ignore
-    when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
-      self.string_buffer << matched
-    when scan(/\\([MC]-|c)(?=\\)/) then
-      self.string_buffer << matched
-      self.tokadd_escape term
-    when scan(/\\([MC]-|c)(.)/) then
-      self.string_buffer << matched
-    when scan(/\\[McCx]/) then
-      rb_compile_error "Invalid escape character syntax"
-    when scan(/\\(.)/m) then
-      chr = ss[1]
-      prev = self.string_buffer.last
-      if term == chr && prev && prev.end_with?("(?") then
-        self.string_buffer << chr
-      elsif term == chr || chr.ascii_only? then
-        self.string_buffer << matched # dunno why we keep them for ascii
-      else
-        self.string_buffer << chr # HACK? this is such a rat's nest
-      end
+  def unescape_string str
+    str = str.gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
+    if str.valid_encoding?
+      str
     else
-      rb_compile_error "Invalid escape character syntax"
+      str.b
     end
   end
-  def tokadd_string(func, term, paren) # TODO: rewrite / remove
-    qwords = (func & STR_FUNC_QWORDS) != 0
-    escape = (func & STR_FUNC_ESCAPE) != 0
-    expand = (func & STR_FUNC_EXPAND) != 0
-    regexp = (func & STR_FUNC_REGEXP) != 0
-    symbol = (func & STR_FUNC_SYMBOL) != 0
-    paren_re = @@regexp_cache[paren]
-    term_re  = @@regexp_cache[term]
-    until end_of_stream? do
-      c = nil
-      handled = true
-      case
-      when paren_re && scan(paren_re) then
-        self.string_nest += 1
-      when scan(term_re) then
-        if self.string_nest == 0 then
-          ss.pos -= 1
-          break
-        else
-          self.string_nest -= 1
-        end
-      when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
-        ss.pos -= 1
-        break
-      when qwords && scan(/\s/) then
-        ss.pos -= 1
-        break
-      when expand && scan(/#(?!\n)/) then
-        # do nothing
-      when check(/\\/) then
-        case
-        when qwords && scan(/\\\n/) then
-          string_buffer << "\n"
-          next
-        when qwords && scan(/\\\s/) then
-          c = ' '
-        when expand && scan(/\\\n/) then
-          next
-        when regexp && check(/\\/) then
-          self.tokadd_escape term
-          next
-        when expand && scan(/\\/) then
-          c = self.read_escape
-        when scan(/\\\n/) then
-          # do nothing
-        when scan(/\\\\/) then
-          string_buffer << '\\' if escape
-          c = '\\'
-        when scan(/\\/) then
-          unless scan(term_re) || paren.nil? || scan(paren_re) then
-            string_buffer << "\\"
-          end
-        else
-          handled = false
-        end # inner /\\/ case
-      else
-        handled = false
-      end # top case
-      unless handled then
-        t = Regexp.escape term
-        x = Regexp.escape(paren) if paren && paren != "\000"
-        re = if qwords then
-               /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
-             else
-               /[^#{t}#{x}\#\0\\]+|./
-             end
-        scan re
-        c = matched
-        rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
-      end # unless handled
-      c ||= matched
-      string_buffer << c
-    end # until
-    c ||= matched
-    c = RubyLexer::EOF if end_of_stream?
-    return c
-  end
   def unescape s
     r = ESCAPES[s]
-    self.extra_lineno += 1 if s == "\n"     # eg backslash newline strings
-    self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
     return r if r
     x = case s
@@ -1265,10 +843,12 @@ class RubyLexer
           s
         when /^[McCx0-9]/ then
           rb_compile_error("Invalid escape character syntax")
-        when /u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/ then
+        when /u(\h{4})/ then
           [$1.delete("{}").to_i(16)].pack("U")
-        when /u([0-9a-fA-F]{1,3})/ then
+        when /u(\h{1,3})/ then
           rb_compile_error("Invalid escape character syntax")
+        when /u\{(\h+(?:\s+\h+)*)\}/ then
+          $1.split.map { |cp| cp.to_i(16) }.pack("U*")
         else
           s
         end
@@ -1279,171 +859,294 @@ class RubyLexer
     # do nothing for now
   end
-  def ruby22plus?
-    parser.class.version >= 22
+  def was_label?
+    @was_label = ruby22_label?
+    true
   end
-  def ruby23plus?
-    parser.class.version >= 23
-  end
+  class State
+    attr_accessor :n
+    attr_accessor :names
-  def process_string # TODO: rewrite / remove
-    # matches top of parser_yylex in compare/parse23.y:8113
-    token = if lex_strterm[0] == :heredoc then
-              self.heredoc lex_strterm
-            else
-              self.parse_string lex_strterm
-            end
+    # TODO: take a shared hash of strings for inspect/to_s
+    def initialize o, names
+      raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
-    token_type, c = token
+      self.n = o
+      self.names = names
+    end
-    # matches parser_string_term
-    if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
-      if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
-           !cond.is_in_state) || is_arg?) &&
-          is_label_suffix? then
-        scan(/:/)
-        token_type = token[0] = :tLABEL_END
-      end
+    def == o
+      self.equal?(o) || (o.class == self.class && o.n == self.n)
     end
-    if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
-      self.lex_strterm = nil
-      self.lex_state   = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
+    def =~ v
+      (self.n & v.n) != 0
     end
-    return token
-  end
+    def | v
+      raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
+        self.names == v.names
+      self.class.new(self.n | v.n, self.names)
+    end
-  def parse_quote # TODO: remove / rewrite
-    beg, nnd, short_hand, c = nil, nil, false, nil
+    def inspect
+      return "EXPR_NONE" if n.zero? # HACK?
-    if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
-      rb_compile_error "unknown type of %string" if ss.matched_size == 2
-      c, beg, short_hand = matched, ss.getch, false
-    else                               # Short-hand (e.g. %{, %., %!, etc)
-      c, beg, short_hand = 'Q', ss.getch, true
+      names.map { |v, k| k if self =~ v }.
+        compact.
+        join("|").
+        gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
     end
-    if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
-      rb_compile_error "unterminated quoted string meets end of file"
-    end
+    alias to_s inspect
-    # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
-    nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
-    nnd, beg = beg, "\0" if nnd.nil?
+    module Values
+      expr_names = {}
+      EXPR_NONE    = State.new    0x0, expr_names
+      EXPR_BEG     = State.new    0x1, expr_names
+      EXPR_END     = State.new    0x2, expr_names
+      EXPR_ENDARG  = State.new    0x4, expr_names
+      EXPR_ENDFN   = State.new    0x8, expr_names
+      EXPR_ARG     = State.new   0x10, expr_names
+      EXPR_CMDARG  = State.new   0x20, expr_names
+      EXPR_MID     = State.new   0x40, expr_names
+      EXPR_FNAME   = State.new   0x80, expr_names
+      EXPR_DOT     = State.new  0x100, expr_names
+      EXPR_CLASS   = State.new  0x200, expr_names
+      EXPR_LABEL   = State.new  0x400, expr_names
+      EXPR_LABELED = State.new  0x800, expr_names
+      EXPR_FITEM   = State.new 0x1000, expr_names
-    token_type, text = nil, "%#{c}#{beg}"
-    token_type, string_type = case c
-                              when 'Q' then
-                                ch = short_hand ? nnd : c + beg
-                                text = "%#{ch}"
-                                [:tSTRING_BEG,   STR_DQUOTE]
-                              when 'q' then
-                                [:tSTRING_BEG,   STR_SQUOTE]
-                              when 'W' then
-                                eat_whitespace
-                                [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
-                              when 'w' then
-                                eat_whitespace
-                                [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
-                              when 'x' then
-                                [:tXSTRING_BEG,  STR_XQUOTE]
-                              when 'r' then
-                                [:tREGEXP_BEG,   STR_REGEXP]
-                              when 's' then
-                                self.lex_state = EXPR_FNAME
-                                [:tSYMBEG,       STR_SSYM]
-                              when 'I' then
-                                eat_whitespace
-                                [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
-                              when 'i' then
-                                eat_whitespace
-                                [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
-                              end
+      EXPR_BEG_ANY = EXPR_BEG | EXPR_MID    | EXPR_CLASS
+      EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
+      EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
-    rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
-      token_type.nil?
+      # extra fake lex_state names to make things a bit cleaner
-    raise "huh" unless string_type
+      EXPR_LAB = EXPR_ARG|EXPR_LABELED
+      EXPR_LIT = EXPR_END|EXPR_ENDARG
+      EXPR_PAR = EXPR_BEG|EXPR_LABEL
+      EXPR_PAD = EXPR_BEG|EXPR_LABELED
-    string string_type, nnd, beg
+      EXPR_NUM = EXPR_LIT
+      expr_names.merge!(EXPR_NONE    => "EXPR_NONE",
+                        EXPR_BEG     => "EXPR_BEG",
+                        EXPR_END     => "EXPR_END",
+                        EXPR_ENDARG  => "EXPR_ENDARG",
+                        EXPR_ENDFN   => "EXPR_ENDFN",
+                        EXPR_ARG     => "EXPR_ARG",
+                        EXPR_CMDARG  => "EXPR_CMDARG",
+                        EXPR_MID     => "EXPR_MID",
+                        EXPR_FNAME   => "EXPR_FNAME",
+                        EXPR_DOT     => "EXPR_DOT",
+                        EXPR_CLASS   => "EXPR_CLASS",
+                        EXPR_LABEL   => "EXPR_LABEL",
+                        EXPR_LABELED => "EXPR_LABELED",
+                        EXPR_FITEM   => "EXPR_FITEM")
+      # ruby constants for strings
+      str_func_names = {}
+      STR_FUNC_BORING = State.new 0x00,    str_func_names
+      STR_FUNC_ESCAPE = State.new 0x01,    str_func_names
+      STR_FUNC_EXPAND = State.new 0x02,    str_func_names
+      STR_FUNC_REGEXP = State.new 0x04,    str_func_names
+      STR_FUNC_QWORDS = State.new 0x08,    str_func_names
+      STR_FUNC_SYMBOL = State.new 0x10,    str_func_names
+      STR_FUNC_INDENT = State.new 0x20,    str_func_names # <<-HEREDOC
+      STR_FUNC_LABEL  = State.new 0x40,    str_func_names
+      STR_FUNC_LIST   = State.new 0x4000,  str_func_names
+      STR_FUNC_TERM   = State.new 0x8000,  str_func_names
+      STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
+      # TODO: check parser25.y on how they do STR_FUNC_INDENT
+      STR_SQUOTE = STR_FUNC_BORING
+      STR_DQUOTE = STR_FUNC_EXPAND
+      STR_XQUOTE = STR_FUNC_EXPAND
+      STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
+      STR_SWORD  = STR_FUNC_QWORDS | STR_FUNC_LIST
+      STR_DWORD  = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
+      STR_SSYM   = STR_FUNC_SYMBOL
+      STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
+      STR_LABEL  = STR_FUNC_LABEL
+      str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
+                            STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
+                            STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
+                            STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
+                            STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
+                            STR_FUNC_INDENT => "STR_FUNC_INDENT",
+                            STR_FUNC_LABEL  => "STR_FUNC_LABEL",
+                            STR_FUNC_LIST   => "STR_FUNC_LIST",
+                            STR_FUNC_TERM   => "STR_FUNC_TERM",
+                            STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
+                            STR_SQUOTE      => "STR_SQUOTE")
+    end
-    return token_type, text
+    include Values
   end
-  def parse_string quote # TODO: rewrite / remove
-    _, string_type, term, open = quote
+  include State::Values
+end
+class RubyLexer
+  module SSWrapper
+    def string= s
+      ss.string= s
+    end
+    def beginning_of_line?
+      ss.bol?
+    end
+    alias bol? beginning_of_line? # to make .rex file more readable
-    space = false # FIX: remove these
-    func = string_type
-    paren = open
-    term_re = @@regexp_cache[term]
+    def check re
+      maybe_pop_stack
-    qwords = (func & STR_FUNC_QWORDS) != 0
-    regexp = (func & STR_FUNC_REGEXP) != 0
-    expand = (func & STR_FUNC_EXPAND) != 0
+      ss.check re
+    end
-    unless func then # nil'ed from qwords below. *sigh*
-      return :tSTRING_END, nil
+    def end_of_stream?
+      ss.eos?
     end
-    space = true if qwords and eat_whitespace
+    alias eos? end_of_stream?
-    if self.string_nest == 0 && scan(/#{term_re}/) then
-      if qwords then
-        quote[1] = nil
-        return :tSPACE, nil
-      elsif regexp then
-        return :tREGEXP_END, self.regx_options
-      else
-        return :tSTRING_END, term
-      end
+    def getch
+      c = ss.getch
+      c = ss.getch if c == "\r" && ss.peek(1) == "\n"
+      c
     end
-    return :tSPACE, nil if space
+    def match
+      ss
+    end
-    self.string_buffer = []
+    def matched
+      ss.matched
+    end
-    if expand
-      case
-      when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
-        # TODO: !ISASCII
-        # ?! see parser_peek_variable_name
-        return :tSTRING_DVAR, nil
-      when scan(/#(?=\@\@?[a-zA-Z_])/) then
-        # TODO: !ISASCII
-        return :tSTRING_DVAR, nil
-      when scan(/#[{]/) then
-        self.command_start = true
-        return :tSTRING_DBEG, nil
-      when scan(/#/) then
-        string_buffer << '#'
+    def in_heredoc?
+      !!self.old_ss
+    end
+    def maybe_pop_stack
+      if ss.eos? && in_heredoc? then
+        self.ss_pop
+        self.lineno_pop
       end
     end
-    if tokadd_string(func, term, paren) == RubyLexer::EOF then
-      rb_compile_error "unterminated string meets end of file"
+    def pos
+      ss.pos
+    end
+    def pos= n
+      ss.pos = n
+    end
+    def rest
+      ss.rest
+    end
+    def scan re
+      maybe_pop_stack
+      ss.scan re
+    end
+    def scanner_class # TODO: design this out of oedipus_lex. or something.
+      RPStringScanner
+    end
+    def ss_string
+      ss.string
+    end
+    def ss_string= s
+      raise "Probably not"
+      ss.string = s
+    end
+    def unscan
+      ss.unscan
+    end
+  end
+  include SSWrapper
+end
+class RubyLexer
+  module SSStackish
+    def lineno_push new_lineno
+      self.old_lineno = self.lineno
+      self.lineno     = new_lineno
+    end
+    def lineno_pop
+      self.lineno     = self.old_lineno
+      self.old_lineno = nil
+    end
+    def ss= o
+      raise "Clearing ss while in heredoc!?!" if in_heredoc?
+      @old_ss = nil
+      super
     end
-    return :tSTRING_CONTENT, string_buffer.join
+    def ss_push new_ss
+      @old_ss = self.ss
+      @ss     = new_ss
+    end
+    def ss_pop
+      @ss     = self.old_ss
+      @old_ss = nil
+    end
   end
+  prepend SSStackish
 end
-require "ruby_lexer.rex"
+if ENV["RP_STRTERM_DEBUG"] then
+  class RubyLexer
+    def d o
+      $stderr.puts o.inspect
+    end
+    alias old_lex_strterm= lex_strterm=
+    def lex_strterm= o
+      self.old_lex_strterm= o
+      where = caller.first.split(/:/).first(2).join(":")
+      $stderr.puts
+      d :lex_strterm => [o, where]
+    end
+  end
+end
+require_relative "./ruby_lexer.rex.rb"
+require_relative "./ruby_lexer_strings.rb"
 if ENV["RP_LINENO_DEBUG"] then
   class RubyLexer
-    alias :old_lineno= :lineno=
     def d o
       $stderr.puts o.inspect
     end
+    alias old_lineno= lineno=
     def lineno= n
       self.old_lineno= n
       where = caller.first.split(/:/).first(2).join(":")
-      d :lineno => [n, where, ss && ss.rest[0,40]]
+      $stderr.puts
+      d :lineno => [n, where]
     end
   end
 end