RubyGems - ruby_parser - Versions diffs - 3.17.0 → 3.19.0 - Mend

ruby_parser 3.17.0 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/History.rdoc +109 -0
data/Manifest.txt +5 -0
data/README.rdoc +9 -6
data/Rakefile +85 -24
data/bin/ruby_parse_extract_error +1 -1
data/compare/normalize.rb +6 -1
data/gauntlet.md +108 -0
data/lib/rp_extensions.rb +15 -36
data/lib/rp_stringscanner.rb +20 -51
data/lib/ruby20_parser.rb +7430 -3528
data/lib/ruby20_parser.y +328 -257
data/lib/ruby21_parser.rb +7408 -3572
data/lib/ruby21_parser.y +323 -254
data/lib/ruby22_parser.rb +7543 -3601
data/lib/ruby22_parser.y +327 -256
data/lib/ruby23_parser.rb +7549 -3612
data/lib/ruby23_parser.y +327 -256
data/lib/ruby24_parser.rb +7640 -3624
data/lib/ruby24_parser.y +327 -256
data/lib/ruby25_parser.rb +7640 -3623
data/lib/ruby25_parser.y +327 -256
data/lib/ruby26_parser.rb +7649 -3632
data/lib/ruby26_parser.y +326 -255
data/lib/ruby27_parser.rb +10132 -4545
data/lib/ruby27_parser.y +871 -262
data/lib/ruby30_parser.rb +10504 -4655
data/lib/ruby30_parser.y +1065 -333
data/lib/ruby31_parser.rb +13622 -0
data/lib/ruby31_parser.y +3481 -0
data/lib/ruby3_parser.yy +3536 -0
data/lib/ruby_lexer.rb +261 -609
data/lib/ruby_lexer.rex +27 -20
data/lib/ruby_lexer.rex.rb +59 -23
data/lib/ruby_lexer_strings.rb +638 -0
data/lib/ruby_parser.rb +2 -0
data/lib/ruby_parser.yy +903 -272
data/lib/ruby_parser_extras.rb +333 -113
data/test/test_ruby_lexer.rb +181 -129
data/test/test_ruby_parser.rb +1529 -288
data/tools/munge.rb +34 -6
data/tools/ripper.rb +15 -10
data.tar.gz.sig +0 -0
metadata +27 -23
metadata.gz.sig +0 -0

data/lib/ruby_lexer.rb CHANGED Viewed

@@ -33,7 +33,7 @@ class RubyLexer
   TOKENS = {
     "!"   => :tBANG,
     "!="  => :tNEQ,
-    # "!@"  => :tUBANG,
+    "!@"  => :tBANG,
     "!~"  => :tNMATCH,
     ","   => :tCOMMA,
     ".."  => :tDOT2,
@@ -46,17 +46,38 @@ class RubyLexer
     "->"  => :tLAMBDA,
   }
+  PERCENT_END = {
+    "(" => ")",
+    "[" => "]",
+    "{" => "}",
+    "<" => ">",
+  }
+  SIMPLE_RE_META = /[\$\*\+\.\?\^\|\)\]\}\>]/
   @@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
   @@regexp_cache[nil] = nil
+  def regexp_cache
+    @@regexp_cache
+  end
   if $DEBUG then
     attr_reader :lex_state
     def lex_state= o
       return if @lex_state == o
-      raise ArgumentError, "bad state: %p" % [o] unless State === o
-      warn "lex_state: %p -> %p" % [lex_state, o]
+      from = ""
+      if ENV["VERBOSE"]
+        path = caller[0]
+        path = caller[1] if path =~ /result/
+        path, line, *_ = path.split(/:/)
+        path.delete_prefix! File.dirname File.dirname __FILE__
+        from = " at .%s:%s" % [path, line]
+      end
+      warn "lex_state: %p -> %p%s" % [lex_state, o, from]
       @lex_state = o
     end
@@ -66,14 +87,16 @@ class RubyLexer
   attr_accessor :lex_state unless $DEBUG
-  attr_accessor :lineno # we're bypassing oedipus' lineno handling.
   attr_accessor :brace_nest
   attr_accessor :cmdarg
   attr_accessor :command_start
   attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
   attr_accessor :last_state
   attr_accessor :cond
-  attr_accessor :extra_lineno
+  attr_accessor :old_ss
+  attr_accessor :old_lineno
+  # these are generated via ruby_lexer.rex: ss, lineno
   ##
   # Additional context surrounding tokens that both the lexer and
@@ -98,6 +121,7 @@ class RubyLexer
     self.cond   = RubyParserStuff::StackState.new(:cond, $DEBUG)
     self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
+    self.ss     = RPStringScanner.new ""
     reset
   end
@@ -110,14 +134,8 @@ class RubyLexer
     is_after_operator? ? EXPR_ARG : EXPR_BEG
   end
-  def beginning_of_line?
-    ss.bol?
-  end
-  alias bol? beginning_of_line? # to make .rex file more readable
-  def check re
-    ss.check re
+  def ignore_body_comments
+    @comments.clear
   end
   def comments # TODO: remove this... maybe comment_string + attr_accessor
@@ -126,18 +144,8 @@ class RubyLexer
     c
   end
-  def eat_whitespace
-    r = scan(/\s+/)
-    self.extra_lineno += r.count("\n") if r
-    r
-  end
-  def end_of_stream?
-    ss.eos?
-  end
-  def expr_beg?
-    lex_state =~ EXPR_BEG
+  def debug n
+    raise "debug #{n}"
   end
   def expr_dot?
@@ -154,128 +162,6 @@ class RubyLexer
     result EXPR_BEG, token, text
   end
-  def fixup_lineno extra = 0
-    self.lineno += self.extra_lineno + extra
-    self.extra_lineno = 0
-  end
-  def heredoc here # TODO: rewrite / remove
-    _, eos, func, last_line = here
-    indent         = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
-    expand         = func =~ STR_FUNC_EXPAND
-    eol            = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
-    eos_re         = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
-    err_msg        = "can't match #{eos_re.inspect} anywhere in "
-    rb_compile_error err_msg if end_of_stream?
-    if beginning_of_line? && scan(eos_re) then
-      self.lineno += 1
-      ss.unread_many last_line # TODO: figure out how to remove this
-      return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
-    end
-    self.string_buffer = []
-    if expand then
-      case
-      when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
-        # TODO: !ISASCII
-        # ?! see parser_peek_variable_name
-        return :tSTRING_DVAR, matched
-      when scan(/#(?=\@\@?[a-zA-Z_])/) then
-        # TODO: !ISASCII
-        return :tSTRING_DVAR, matched
-      when scan(/#[{]/) then
-        self.command_start = true
-        return :tSTRING_DBEG, matched
-      when scan(/#/) then
-        string_buffer << "#"
-      end
-      begin
-        c = tokadd_string func, eol, nil
-        rb_compile_error err_msg if
-          c == RubyLexer::EOF
-        if c != eol then
-          return :tSTRING_CONTENT, string_buffer.join
-        else
-          string_buffer << scan(/\n/)
-        end
-        rb_compile_error err_msg if end_of_stream?
-      end until check(eos_re)
-    else
-      until check(eos_re) do
-        string_buffer << scan(/.*(\n|\z)/)
-        rb_compile_error err_msg if end_of_stream?
-      end
-    end
-    self.lex_strterm = [:heredoc, eos, func, last_line]
-    string_content = begin
-                       s = string_buffer.join
-                       s.b.force_encoding Encoding::UTF_8
-                     end
-    return :tSTRING_CONTENT, string_content
-  end
-  def heredoc_identifier # TODO: remove / rewrite
-    term, func = nil, STR_FUNC_BORING
-    self.string_buffer = []
-    heredoc_indent_mods = "-"
-    heredoc_indent_mods += '\~' if ruby23plus?
-    case
-    when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
-      term = ss[2]
-      func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
-      func |= STR_FUNC_ICNTNT if ss[1] == "~"
-      func |= case term
-              when "\'" then
-                STR_SQUOTE
-              when '"' then
-                STR_DQUOTE
-              else
-                STR_XQUOTE
-              end
-      string_buffer << ss[3]
-    when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
-      rb_compile_error "unterminated here document identifier"
-    when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
-      term = '"'
-      func |= STR_DQUOTE
-      unless ss[1].empty? then
-        func |= STR_FUNC_INDENT
-        func |= STR_FUNC_ICNTNT if ss[1] == "~"
-      end
-      string_buffer << ss[2]
-    else
-      return nil
-    end
-    if scan(/.*\n/) then
-      # TODO: think about storing off the char range instead
-      line = matched
-    else
-      line = nil
-    end
-    self.lex_strterm = [:heredoc, string_buffer.join, func, line]
-    if term == "`" then
-      result nil, :tXSTRING_BEG, "`"
-    else
-      result nil, :tSTRING_BEG, "\""
-    end
-  end
   def in_fname? # REFACTOR
     lex_state =~ EXPR_FNAME
   end
@@ -286,13 +172,13 @@ class RubyLexer
     text = matched
     case
     when text.end_with?("ri")
-      return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
+      result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
     when text.end_with?("r")
-      return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
+      result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
     when text.end_with?("i")
-      return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
+      result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
     else
-      return result(EXPR_NUM, :tINTEGER, text.to_i(base))
+      result EXPR_NUM, :tINTEGER, text.to_i(base)
     end
   end
@@ -338,132 +224,10 @@ class RubyLexer
     self.parser.env[id.to_sym] == :lvar
   end
-  def matched
-    ss.matched
-  end
   def not_end?
     not is_end?
   end
-  def parse_quote # TODO: remove / rewrite
-    beg, nnd, short_hand, c = nil, nil, false, nil
-    if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
-      rb_compile_error "unknown type of %string" if ss.matched_size == 2
-      c, beg, short_hand = matched, getch, false
-    else                               # Short-hand (e.g. %{, %., %!, etc)
-      c, beg, short_hand = "Q", getch, true
-    end
-    if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
-      rb_compile_error "unterminated quoted string meets end of file"
-    end
-    # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
-    nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
-    nnd, beg = beg, "\0" if nnd.nil?
-    token_type, text = nil, "%#{c}#{beg}"
-    token_type, string_type = case c
-                              when "Q" then
-                                ch = short_hand ? nnd : c + beg
-                                text = "%#{ch}"
-                                [:tSTRING_BEG,   STR_DQUOTE]
-                              when "q" then
-                                [:tSTRING_BEG,   STR_SQUOTE]
-                              when "W" then
-                                eat_whitespace
-                                [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
-                              when "w" then
-                                eat_whitespace
-                                [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
-                              when "x" then
-                                [:tXSTRING_BEG,  STR_XQUOTE]
-                              when "r" then
-                                [:tREGEXP_BEG,   STR_REGEXP]
-                              when "s" then
-                                self.lex_state = EXPR_FNAME
-                                [:tSYMBEG,       STR_SSYM]
-                              when "I" then
-                                eat_whitespace
-                                [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
-                              when "i" then
-                                eat_whitespace
-                                [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
-                              end
-    rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
-      token_type.nil?
-    raise "huh" unless string_type
-    string string_type, nnd, beg
-    return token_type, text
-  end
-  def parse_string quote # TODO: rewrite / remove
-    _, string_type, term, open = quote
-    space = false # FIX: remove these
-    func = string_type
-    paren = open
-    term_re = @@regexp_cache[term]
-    qwords = func =~ STR_FUNC_QWORDS
-    regexp = func =~ STR_FUNC_REGEXP
-    expand = func =~ STR_FUNC_EXPAND
-    unless func then # nil'ed from qwords below. *sigh*
-      return :tSTRING_END, nil
-    end
-    space = true if qwords and eat_whitespace
-    if self.string_nest == 0 && scan(/#{term_re}/) then
-      if qwords then
-        quote[1] = nil
-        return :tSPACE, nil
-      elsif regexp then
-        return :tREGEXP_END, self.regx_options
-      else
-        return :tSTRING_END, term
-      end
-    end
-    return :tSPACE, nil if space
-    self.string_buffer = []
-    if expand
-      case
-      when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
-        # TODO: !ISASCII
-        # ?! see parser_peek_variable_name
-        return :tSTRING_DVAR, nil
-      when scan(/#(?=\@\@?[a-zA-Z_])/) then
-        # TODO: !ISASCII
-        return :tSTRING_DVAR, nil
-      when scan(/#[{]/) then
-        self.command_start = true
-        return :tSTRING_DBEG, nil
-      when scan(/#/) then
-        string_buffer << "#"
-      end
-    end
-    if tokadd_string(func, term, paren) == RubyLexer::EOF then
-      if func =~ STR_FUNC_REGEXP then
-        rb_compile_error "unterminated regexp meets end of file"
-      else
-        rb_compile_error "unterminated string meets end of file"
-      end
-    end
-    return :tSTRING_CONTENT, string_buffer.join
-  end
   def possibly_escape_string text, check
     content = match[1]
@@ -484,11 +248,11 @@ class RubyLexer
                :tAMPER2
              end
-    return result(:arg_state, token, "&")
+    result :arg_state, token, "&"
   end
   def process_backref text
-    token = ss[1].to_sym
+    token = match[1].to_sym
     # TODO: can't do lineno hack w/ symbol
     result EXPR_END, :tBACK_REF, token
   end
@@ -502,7 +266,7 @@ class RubyLexer
     end
     @comments << matched
-    self.lineno += matched.count("\n")
+    self.lineno += matched.count("\n") # HACK?
     nil # TODO
   end
@@ -573,9 +337,9 @@ class RubyLexer
     case
     when scan(/\'/) then
-      string STR_SSYM
+      string STR_SSYM, matched
     when scan(/\"/) then
-      string STR_DSYM
+      string STR_DSYM, matched
     end
     result EXPR_FNAME, :tSYMBEG, text
@@ -590,7 +354,7 @@ class RubyLexer
   end
   def process_dots text
-    tokens = ruby27plus? && expr_beg? ? BTOKENS : TOKENS
+    tokens = ruby27plus? && is_beg? ? BTOKENS : TOKENS
     result EXPR_BEG, tokens[text], text
   end
@@ -600,36 +364,37 @@ class RubyLexer
     case
     when text.end_with?("ri")
-      return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
+      result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
     when text.end_with?("i")
-      return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
+      result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
     when text.end_with?("r")
-      return result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
+      result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
     else
-      return result EXPR_NUM, :tFLOAT, text.to_f
+      result EXPR_NUM, :tFLOAT, text.to_f
     end
   end
   def process_gvar text
-    text.lineno = self.lineno
+    if parser.class.version > 20 && text == "$-" then
+      rb_compile_error "unexpected $undefined"
+    end
     result EXPR_END, :tGVAR, text
   end
   def process_gvar_oddity text
-    return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
     rb_compile_error "#{text.inspect} is not allowed as a global variable name"
   end
   def process_ivar text
     tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
-    text.lineno = self.lineno
     result EXPR_END, tok_id, text
   end
   def process_label text
     symbol = possibly_escape_string text, /^\"/
-    result EXPR_LAB, :tLABEL, [symbol, self.lineno]
+    result EXPR_LAB, :tLABEL, symbol
   end
   def process_label_or_string text
@@ -637,11 +402,15 @@ class RubyLexer
       @was_label = nil
       return process_label text
     elsif text =~ /:\Z/ then
-      ss.pos -= 1 # put back ":"
+      self.pos -= 1 # put back ":"
       text = text[0..-2]
     end
-    result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
+    orig_line = lineno
+    str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
+    self.lineno += str.count("\n")
+    result EXPR_END, :tSTRING, str, orig_line
   end
   def process_lchevron text
@@ -659,34 +428,25 @@ class RubyLexer
       self.lex_state = EXPR_BEG
     end
-    return result(lex_state, :tLSHFT, "\<\<")
+    result lex_state, :tLSHFT, "\<\<"
   end
-  def process_newline_or_comment text
+  def process_newline_or_comment text    # ../compare/parse30.y:9126 ish
     c = matched
-    hit = false
     if c == "#" then
-      ss.pos -= 1
+      self.pos -= 1
-      # TODO: handle magic comments
       while scan(/\s*\#.*(\n+|\z)/) do
-        hit = true
-        self.lineno += matched.lines.to_a.size
+        self.lineno += matched.count "\n"
         @comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
       end
       return nil if end_of_stream?
     end
-    self.lineno += 1 unless hit
-    # Replace a string of newlines with a single one
-    self.lineno += matched.lines.to_a.size if scan(/\n+/)
     c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
          lex_state !~ EXPR_LABELED)
-    # TODO: figure out what token_seen is for
     if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
       # ignore if !fallthrough?
       if !c && parser.in_kwarg then
@@ -694,25 +454,29 @@ class RubyLexer
         self.command_start = true
         return result EXPR_BEG, :tNL, nil
       else
-        return # skip
+        maybe_pop_stack
+        return # goto retry
       end
     end
-    if scan(/([\ \t\r\f\v]*)(\.|&)/) then
-      self.space_seen = true unless ss[1].empty?
+    if scan(/[\ \t\r\f\v]+/) then
+      self.space_seen = true
+    end
-      ss.pos -= 1
-      return unless check(/\.\./)
+    if check(/#/) then
+      return # goto retry
+    elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
+      return # goto retry
     end
     self.command_start = true
-    return result(EXPR_BEG, :tNL, nil)
+    result EXPR_BEG, :tNL, nil
   end
   def process_nthref text
     # TODO: can't do lineno hack w/ number
-    result EXPR_END, :tNTH_REF, ss[1].to_i
+    result EXPR_END, :tNTH_REF, match[1].to_i
   end
   def process_paren text
@@ -740,13 +504,16 @@ class RubyLexer
   end
   def process_percent text
-    return parse_quote if is_beg?
-    return result EXPR_BEG, :tOP_ASGN, "%" if scan(/\=/)
-    return parse_quote if is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
-    return result :arg_state, :tPERCENT, "%"
+    case
+    when is_beg? then
+      process_percent_quote
+    when scan(/\=/)
+      result EXPR_BEG, :tOP_ASGN, "%"
+    when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
+      process_percent_quote
+    else
+      result :arg_state, :tPERCENT, "%"
+    end
   end
   def process_plus_minus text
@@ -820,20 +587,21 @@ class RubyLexer
   end
   def process_simple_string text
-    replacement = text[1..-2].gsub(ESC) {
-      unescape($1).b.force_encoding Encoding::UTF_8
-    }
+    orig_line = lineno
+    self.lineno += text.count("\n")
-    replacement = replacement.b unless replacement.valid_encoding?
+    str = text[1..-2]
+      .gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
+    str = str.b unless str.valid_encoding?
-    result EXPR_END, :tSTRING, replacement
+    result EXPR_END, :tSTRING, str, orig_line
   end
   def process_slash text
     if is_beg? then
-      string STR_REGEXP
+      string STR_REGEXP, matched
-      return result(nil, :tREGEXP_BEG, "/")
+      return result nil, :tREGEXP_BEG, "/"
     end
     if scan(/\=/) then
@@ -848,7 +616,7 @@ class RubyLexer
       end
     end
-    return result(:arg_state, :tDIVIDE, "/")
+    result :arg_state, :tDIVIDE, "/"
   end
   def process_square_bracket text
@@ -880,34 +648,6 @@ class RubyLexer
     result EXPR_PAR, token, text
   end
-  def process_string # TODO: rewrite / remove
-    # matches top of parser_yylex in compare/parse23.y:8113
-    token = if lex_strterm[0] == :heredoc then
-              self.heredoc lex_strterm
-            else
-              self.parse_string lex_strterm
-            end
-    token_type, c = token
-    # matches parser_string_term from 2.3, but way off from 2.5
-    if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
-      if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
-           !cond.is_in_state) || is_arg?) &&
-          is_label_suffix? then
-        scan(/:/)
-        token_type = token[0] = :tLABEL_END
-      end
-    end
-    if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
-      self.lex_strterm = nil
-      self.lex_state   = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
-    end
-    return token
-  end
   def process_symbol text
     symbol = possibly_escape_string text, /^:\"/ # stupid emacs
@@ -916,7 +656,6 @@ class RubyLexer
   def process_token text
     # matching: parse_ident in compare/parse23.y:7989
-    # TODO: make this always return [token, lineno]
     # FIX: remove: self.last_state = lex_state
     token = self.token = text
@@ -939,8 +678,7 @@ class RubyLexer
     if is_label_possible? and is_label_suffix? then
       scan(/:/)
-      # TODO: propagate the lineno to ALL results
-      return result EXPR_LAB, :tLABEL, [token, self.lineno]
+      return result EXPR_LAB, :tLABEL, token
     end
     # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
@@ -951,14 +689,15 @@ class RubyLexer
       return process_token_keyword keyword if keyword
     end
-    # matching: compare/parse23.y:8079
-    state = if is_beg? or is_arg? or lex_state =~ EXPR_DOT then
+    # matching: compare/parse30.y:9039
+    state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
               cmd_state ? EXPR_CMDARG : EXPR_ARG
             elsif lex_state =~ EXPR_FNAME then
               EXPR_ENDFN
             else
               EXPR_END
             end
+    self.lex_state = state
     tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
@@ -968,20 +707,16 @@ class RubyLexer
       state = EXPR_END|EXPR_LABEL
     end
-    token.lineno = self.lineno # yes, on a string. I know... I know...
-    return result(state, tok_id, token)
+    result state, tok_id, token
   end
   def process_token_keyword keyword
     # matching MIDDLE of parse_ident in compare/parse23.y:8046
     state = lex_state
-    self.lex_state = keyword.state
-    value = [token, self.lineno]
-    return result(lex_state, keyword.id0, value) if state =~ EXPR_FNAME
+    return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME
+    self.lex_state = keyword.state
     self.command_start = true if lex_state =~ EXPR_BEG
     case
@@ -990,27 +725,28 @@ class RubyLexer
       when lambda_beginning? then
         self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
         self.paren_nest -= 1 # TODO: question this?
-        result lex_state, :kDO_LAMBDA, value
+        result lex_state, :kDO_LAMBDA, token
       when cond.is_in_state then
-        result lex_state, :kDO_COND, value
+        result lex_state, :kDO_COND, token
       when cmdarg.is_in_state && state != EXPR_CMDARG then
-        result lex_state, :kDO_BLOCK, value
+        result lex_state, :kDO_BLOCK, token
       else
-        result lex_state, :kDO, value
+        result lex_state, :kDO, token
       end
     when state =~ EXPR_PAD then
-      result lex_state, keyword.id0, value
+      result lex_state, keyword.id0, token
     when keyword.id0 != keyword.id1 then
-      result EXPR_PAR, keyword.id1, value
+      result EXPR_PAR, keyword.id1, token
     else
-      result lex_state, keyword.id1, value
+      result lex_state, keyword.id1, token
     end
   end
   def process_underscore text
-    ss.unscan # put back "_"
+    self.unscan # put back "_"
     if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
+      ss.terminate
       [RubyLexer::EOF, RubyLexer::EOF]
     elsif scan(/#{IDENT_CHAR}+/) then
       process_token matched
@@ -1018,121 +754,35 @@ class RubyLexer
   end
   def rb_compile_error msg
-    msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
+    msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
     raise RubyParser::SyntaxError, msg
   end
-  def read_escape # TODO: remove / rewrite
-    case
-    when scan(/\\/) then                  # Backslash
-      '\\'
-    when scan(/n/) then                   # newline
-      self.extra_lineno -= 1
-      "\n"
-    when scan(/t/) then                   # horizontal tab
-      "\t"
-    when scan(/r/) then                   # carriage-return
-      "\r"
-    when scan(/f/) then                   # form-feed
-      "\f"
-    when scan(/v/) then                   # vertical tab
-      "\13"
-    when scan(/a/) then                   # alarm(bell)
-      "\007"
-    when scan(/e/) then                   # escape
-      "\033"
-    when scan(/b/) then                   # backspace
-      "\010"
-    when scan(/s/) then                   # space
-      " "
-    when scan(/[0-7]{1,3}/) then          # octal constant
-      (matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
-    when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
-      # TODO: force encode everything to UTF-8?
-      ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
-    when check(/M-\\./) then
-      scan(/M-\\/) # eat it
-      c = self.read_escape
-      c[0] = (c[0].ord | 0x80).chr
-      c
-    when scan(/M-(.)/) then
-      c = ss[1]
-      c[0] = (c[0].ord | 0x80).chr
-      c
-    when check(/(C-|c)\\[\\MCc]/) then
-      scan(/(C-|c)\\/) # eat it
-      c = self.read_escape
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when check(/(C-|c)\\(?!u|\\)/) then
-      scan(/(C-|c)\\/) # eat it
-      c = read_escape
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when scan(/C-\?|c\?/) then
-      127.chr
-    when scan(/(C-|c)(.)/) then
-      c = ss[2]
-      c[0] = (c[0].ord & 0x9f).chr
-      c
-    when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
-      matched
-    when scan(/u(\h{4})/) then
-      [ss[1].to_i(16)].pack("U")
-    when scan(/u(\h{1,3})/) then
-      rb_compile_error "Invalid escape character syntax"
-    when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
-      ss[1].split.map { |s| s.to_i(16) }.pack("U*")
-    when scan(/[McCx0-9]/) || end_of_stream? then
-      rb_compile_error("Invalid escape character syntax")
-    else
-      getch
-    end.dup
-  end
-  def getch
-    c = ss.getch
-    c = ss.getch if c == "\r" && ss.peek(1) == "\n"
-    c
-  end
-  def regx_options # TODO: rewrite / remove
-    good, bad = [], []
-    if scan(/[a-z]+/) then
-      good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
-    end
-    unless bad.empty? then
-      rb_compile_error("unknown regexp option%s - %s" %
-                       [(bad.size > 1 ? "s" : ""), bad.join.inspect])
-    end
-    return good.join
-  end
   def reset
+    self.lineno        = 1
     self.brace_nest    = 0
     self.command_start = true
     self.comments      = []
     self.lex_state     = EXPR_NONE
     self.lex_strterm   = nil
-    self.lineno        = 1
     self.lpar_beg      = nil
     self.paren_nest    = 0
     self.space_seen    = false
     self.string_nest   = 0
     self.token         = nil
-    self.extra_lineno  = 0
+    self.string_buffer = []
+    self.old_ss        = nil
+    self.old_lineno    = nil
     self.cond.reset
     self.cmdarg.reset
   end
-  def result new_state, token, text # :nodoc:
+  def result new_state, token, text, line = self.lineno # :nodoc:
     new_state = self.arg_state if new_state == :arg_state
     self.lex_state = new_state if new_state
-    [token, text]
+    [token, [text, line]]
   end
   def ruby22_label?
@@ -1155,14 +805,6 @@ class RubyLexer
     parser.class.version >= 27
   end
-  def scan re
-    ss.scan re
-  end
-  def scanner_class # TODO: design this out of oedipus_lex. or something.
-    RPStringScanner
-  end
   def space_vs_beginning space_type, beg_type, fallback
     if is_space_arg? check(/./m) then
       warning "`**' interpreted as argument prefix"
@@ -1175,139 +817,9 @@ class RubyLexer
     end
   end
-  def string type, beg = matched, nnd = "\0"
-    self.lex_strterm = [:strterm, type, beg, nnd]
-  end
-  def tokadd_escape term # TODO: rewrite / remove
-    case
-    when scan(/\\\n/) then
-      # just ignore
-    when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
-      self.string_buffer << matched
-    when scan(/\\([MC]-|c)(?=\\)/) then
-      self.string_buffer << matched
-      self.tokadd_escape term
-    when scan(/\\([MC]-|c)(.)/) then
-      self.string_buffer << matched
-    when scan(/\\[McCx]/) then
-      rb_compile_error "Invalid escape character syntax"
-    when scan(/\\(.)/m) then
-      chr = ss[1]
-      prev = self.string_buffer.last
-      if term == chr && prev && prev.end_with?("(?") then
-        self.string_buffer << chr
-      elsif term == chr || chr.ascii_only? then
-        self.string_buffer << matched # dunno why we keep them for ascii
-      else
-        self.string_buffer << chr # HACK? this is such a rat's nest
-      end
-    else
-      rb_compile_error "Invalid escape character syntax"
-    end
-  end
-  def tokadd_string(func, term, paren) # TODO: rewrite / remove
-    qwords = func =~ STR_FUNC_QWORDS
-    escape = func =~ STR_FUNC_ESCAPE
-    expand = func =~ STR_FUNC_EXPAND
-    regexp = func =~ STR_FUNC_REGEXP
-    symbol = func =~ STR_FUNC_SYMBOL
-    paren_re = @@regexp_cache[paren]
-    term_re  = if term == "\n"
-                 /#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
-               else
-                 @@regexp_cache[term]
-               end
-    until end_of_stream? do
-      c = nil
-      handled = true
-      case
-      when scan(term_re) then
-        if self.string_nest == 0 then
-          ss.pos -= 1
-          break
-        else
-          self.string_nest -= 1
-        end
-      when paren_re && scan(paren_re) then
-        self.string_nest += 1
-      when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
-        ss.pos -= 1
-        break
-      when qwords && scan(/\s/) then
-        ss.pos -= 1
-        break
-      when expand && scan(/#(?!\n)/) then
-        # do nothing
-      when check(/\\/) then
-        case
-        when qwords && scan(/\\\n/) then
-          string_buffer << "\n"
-          next
-        when qwords && scan(/\\\s/) then
-          c = " "
-        when expand && scan(/\\\n/) then
-          next
-        when regexp && check(/\\/) then
-          self.tokadd_escape term
-          next
-        when expand && scan(/\\/) then
-          c = self.read_escape
-        when scan(/\\\n/) then
-          # do nothing
-        when scan(/\\\\/) then
-          string_buffer << '\\' if escape
-          c = '\\'
-        when scan(/\\/) then
-          unless scan(term_re) || paren.nil? || scan(paren_re) then
-            string_buffer << "\\"
-          end
-        else
-          handled = false
-        end # inner /\\/ case
-      else
-        handled = false
-      end # top case
-      unless handled then
-        t = if term == "\n"
-              Regexp.escape "\r\n"
-            else
-              Regexp.escape term
-            end
-        x = Regexp.escape paren if paren && paren != "\000"
-        re = if qwords then
-               /[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
-             else
-               /[^#{t}#{x}\#\\]+|./
-             end
-        scan re
-        c = matched
-        rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
-      end # unless handled
-      c ||= matched
-      string_buffer << c
-    end # until
-    c ||= matched
-    c = RubyLexer::EOF if end_of_stream?
-    return c
-  end
   def unescape s
     r = ESCAPES[s]
-    self.extra_lineno += 1 if s == "\n"     # eg backslash newline strings
-    self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline
     return r if r
     x = case s
@@ -1328,7 +840,7 @@ class RubyLexer
         when /u(\h{1,3})/ then
           rb_compile_error("Invalid escape character syntax")
         when /u\{(\h+(?:\s+\h+)*)\}/ then
-          $1.split.map { |s| s.to_i(16) }.pack("U*")
+          $1.split.map { |cp| cp.to_i(16) }.pack("U*")
         else
           s
         end
@@ -1441,7 +953,7 @@ class RubyLexer
       STR_FUNC_LABEL  = State.new 0x40,    str_func_names
       STR_FUNC_LIST   = State.new 0x4000,  str_func_names
       STR_FUNC_TERM   = State.new 0x8000,  str_func_names
-      STR_FUNC_ICNTNT = State.new 0x10000, str_func_names # <<~HEREDOC -- TODO: remove?
+      STR_FUNC_DEDENT = State.new 0x10000, str_func_names # <<~HEREDOC
       # TODO: check parser25.y on how they do STR_FUNC_INDENT
@@ -1453,6 +965,7 @@ class RubyLexer
       STR_DWORD  = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
       STR_SSYM   = STR_FUNC_SYMBOL
       STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
+      STR_LABEL  = STR_FUNC_LABEL
       str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
                             STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
@@ -1463,7 +976,7 @@ class RubyLexer
                             STR_FUNC_LABEL  => "STR_FUNC_LABEL",
                             STR_FUNC_LIST   => "STR_FUNC_LIST",
                             STR_FUNC_TERM   => "STR_FUNC_TERM",
-                            STR_FUNC_ICNTNT => "STR_FUNC_ICNTNT",
+                            STR_FUNC_DEDENT => "STR_FUNC_DEDENT",
                             STR_SQUOTE      => "STR_SQUOTE")
     end
@@ -1473,7 +986,145 @@ class RubyLexer
   include State::Values
 end
-require "ruby_lexer.rex"
+class RubyLexer
+  module SSWrapper
+    def string= s
+      ss.string= s
+    end
+    def beginning_of_line?
+      ss.bol?
+    end
+    alias bol? beginning_of_line? # to make .rex file more readable
+    def check re
+      maybe_pop_stack
+      ss.check re
+    end
+    def end_of_stream?
+      ss.eos?
+    end
+    alias eos? end_of_stream?
+    def getch
+      c = ss.getch
+      c = ss.getch if c == "\r" && ss.peek(1) == "\n"
+      c
+    end
+    def match
+      ss
+    end
+    def matched
+      ss.matched
+    end
+    def in_heredoc?
+      !!self.old_ss
+    end
+    def maybe_pop_stack
+      if ss.eos? && in_heredoc? then
+        self.ss_pop
+        self.lineno_pop
+      end
+    end
+    def pos
+      ss.pos
+    end
+    def pos= n
+      ss.pos = n
+    end
+    def rest
+      ss.rest
+    end
+    def scan re
+      maybe_pop_stack
+      ss.scan re
+    end
+    def scanner_class # TODO: design this out of oedipus_lex. or something.
+      RPStringScanner
+    end
+    def ss_string
+      ss.string
+    end
+    def ss_string= s
+      raise "Probably not"
+      ss.string = s
+    end
+    def unscan
+      ss.unscan
+    end
+  end
+  include SSWrapper
+end
+class RubyLexer
+  module SSStackish
+    def lineno_push new_lineno
+      self.old_lineno = self.lineno
+      self.lineno     = new_lineno
+    end
+    def lineno_pop
+      self.lineno     = self.old_lineno
+      self.old_lineno = nil
+    end
+    def ss= o
+      raise "Clearing ss while in heredoc!?!" if in_heredoc?
+      @old_ss = nil
+      super
+    end
+    def ss_push new_ss
+      @old_ss = self.ss
+      @ss     = new_ss
+    end
+    def ss_pop
+      @ss     = self.old_ss
+      @old_ss = nil
+    end
+  end
+  prepend SSStackish
+end
+if ENV["RP_STRTERM_DEBUG"] then
+  class RubyLexer
+    def d o
+      $stderr.puts o.inspect
+    end
+    alias old_lex_strterm= lex_strterm=
+    def lex_strterm= o
+      self.old_lex_strterm= o
+      where = caller.first.split(/:/).first(2).join(":")
+      $stderr.puts
+      d :lex_strterm => [o, where]
+    end
+  end
+end
+require_relative "./ruby_lexer.rex.rb"
+require_relative "./ruby_lexer_strings.rb"
 if ENV["RP_LINENO_DEBUG"] then
   class RubyLexer
@@ -1486,7 +1137,8 @@ if ENV["RP_LINENO_DEBUG"] then
     def lineno= n
       self.old_lineno= n
       where = caller.first.split(/:/).first(2).join(":")
-      d :lineno => [n, where, ss && ss.rest[0, 40]]
+      $stderr.puts
+      d :lineno => [n, where]
     end
   end
 end