RubyGems - ruby_parser - Versions diffs - 3.13.1 → 3.14.0 - Mend

ruby_parser 3.13.1 → 3.14.0

Files changed (34) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data.tar.gz.sig +0 -0
data/.autotest +18 -29
data/History.rdoc +38 -0
data/README.rdoc +3 -3
data/Rakefile +10 -13
data/bin/ruby_parse +3 -1
data/lib/ruby20_parser.rb +3042 -2866
data/lib/ruby20_parser.y +391 -247
data/lib/ruby21_parser.rb +3088 -2916
data/lib/ruby21_parser.y +399 -254
data/lib/ruby22_parser.rb +3118 -2937
data/lib/ruby22_parser.y +400 -255
data/lib/ruby23_parser.rb +3119 -2940
data/lib/ruby23_parser.y +400 -255
data/lib/ruby24_parser.rb +3089 -2905
data/lib/ruby24_parser.y +404 -257
data/lib/ruby25_parser.rb +3089 -2905
data/lib/ruby25_parser.y +404 -257
data/lib/ruby26_parser.rb +3095 -2909
data/lib/ruby26_parser.y +410 -261
data/lib/ruby_lexer.rb +424 -432
data/lib/ruby_lexer.rex.rb +1 -1
data/lib/ruby_parser.rb +27 -27
data/lib/ruby_parser.yy +412 -262
data/lib/ruby_parser_extras.rb +627 -406
data/test/test_ruby_lexer.rb +1148 -1093
data/test/test_ruby_parser.rb +2259 -1915
data/test/test_ruby_parser_extras.rb +39 -4
data/tools/munge.rb +1 -1
data/tools/ripper.rb +13 -2
metadata +8 -8
metadata.gz.sig +0 -0

data/lib/ruby_lexer.rb CHANGED Viewed

@@ -4,135 +4,9 @@
 $DEBUG = true if ENV["DEBUG"]
 class RubyLexer
   # :stopdoc:
-  HAS_ENC = "".respond_to? :encoding
-  IDENT_CHAR = if HAS_ENC then
-                 /[\w\u0080-\u{10ffff}]/u
-               else
-                 /[\w\x80-\xFF]/n
-               end
   EOF = :eof_haha!
-  # ruby constants for strings (should this be moved somewhere else?)
-  STR_FUNC_BORING = 0x00
-  STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
-  STR_FUNC_EXPAND = 0x02
-  STR_FUNC_REGEXP = 0x04
-  STR_FUNC_QWORDS = 0x08
-  STR_FUNC_SYMBOL = 0x10
-  STR_FUNC_INDENT = 0x20 # <<-HEREDOC
-  STR_FUNC_ICNTNT = 0x40 # <<~HEREDOC
-  STR_SQUOTE = STR_FUNC_BORING
-  STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
-  STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
-  STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
-  STR_SSYM   = STR_FUNC_SYMBOL
-  STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
-  class State
-    attr_accessor :n
-    def initialize o
-      raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
-      self.n = o
-    end
-    def == o
-      o.class == self.class && o.n == self.n
-    end
-    def =~ v
-      (self.n & v.n) != 0
-    end
-    def | v
-      self.class.new(self.n | v.n)
-    end
-    def inspect
-      return "EXPR_NONE" if n.zero?
-      NAMES.map { |v,k| k if self =~ v }.compact.join "|"
-    end
-    module Values
-      EXPR_NONE    = State.new    0x0
-      EXPR_BEG     = State.new    0x1
-      EXPR_END     = State.new    0x2
-      EXPR_ENDARG  = State.new    0x4
-      EXPR_ENDFN   = State.new    0x8
-      EXPR_ARG     = State.new   0x10
-      EXPR_CMDARG  = State.new   0x20
-      EXPR_MID     = State.new   0x40
-      EXPR_FNAME   = State.new   0x80
-      EXPR_DOT     = State.new  0x100
-      EXPR_CLASS   = State.new  0x200
-      EXPR_LABEL   = State.new  0x400
-      EXPR_LABELED = State.new  0x800
-      EXPR_FITEM   = State.new 0x1000
-      EXPR_BEG_ANY = EXPR_BEG | EXPR_MID    | EXPR_CLASS
-      EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
-      EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
-      # extra fake lex_state names to make things a bit cleaner
-      EXPR_LAB = EXPR_ARG|EXPR_LABELED
-      EXPR_NUM = EXPR_END|EXPR_ENDARG
-      EXPR_PAR = EXPR_BEG|EXPR_LABEL
-      EXPR_PAD = EXPR_BEG|EXPR_LABELED
-    end
-    include Values
-    NAMES = {
-      EXPR_NONE    => "EXPR_NONE",
-      EXPR_BEG     => "EXPR_BEG",
-      EXPR_END     => "EXPR_END",
-      EXPR_ENDARG  => "EXPR_ENDARG",
-      EXPR_ENDFN   => "EXPR_ENDFN",
-      EXPR_ARG     => "EXPR_ARG",
-      EXPR_CMDARG  => "EXPR_CMDARG",
-      EXPR_MID     => "EXPR_MID",
-      EXPR_FNAME   => "EXPR_FNAME",
-      EXPR_DOT     => "EXPR_DOT",
-      EXPR_CLASS   => "EXPR_CLASS",
-      EXPR_LABEL   => "EXPR_LABEL",
-      EXPR_LABELED => "EXPR_LABELED",
-      EXPR_FITEM   => "EXPR_FITEM",
-    }
-  end
-  include State::Values
-  if $DEBUG then
-    def lex_state= o
-      return if @lex_state == o
-      raise ArgumentError, "bad state: %p" % [o] unless State === o
-      if ENV["V"] then
-        c = caller[0]
-        c = caller[1] if c =~ /\b(expr_)?result\b/
-        c = caller[2] if c =~ /\b(expr_)?result\b/
-        warn "lex_state: %p -> %p from %s" % [lex_state, o, c.clean_caller]
-      else
-        warn "lex_state: %p -> %p" % [lex_state, o]
-      end
-      @lex_state = o
-    end
-  else
-    def lex_state= o
-      raise ArgumentError, "bad state: %p" % [o] unless State === o
-      @lex_state = o
-    end
-  end
-  attr_reader :lex_state
   ESCAPES = {
     "a"    => "\007",
     "b"    => "\010",
@@ -149,6 +23,14 @@ class RubyLexer
     "c\?"  => 127.chr,
   }
+  HAS_ENC = "".respond_to? :encoding
+  IDENT_CHAR = if HAS_ENC then
+                 /[\w\u0080-\u{10ffff}]/u
+               else
+                 /[\w\x80-\xFF]/n
+               end
   TOKENS = {
     "!"   => :tBANG,
     "!="  => :tNEQ,
@@ -165,13 +47,26 @@ class RubyLexer
     "->"  => :tLAMBDA,
   }
-  TAB_WIDTH = 8
-  @@regexp_cache = Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }
+  @@regexp_cache = Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }
   @@regexp_cache[nil] = nil
+  if $DEBUG then
+    attr_reader :lex_state
+    def lex_state= o
+      return if @lex_state == o
+      raise ArgumentError, "bad state: %p" % [o] unless State === o
+      warn "lex_state: %p -> %p" % [lex_state, o]
+      @lex_state = o
+    end
+  end
   # :startdoc:
+  attr_accessor :lex_state unless $DEBUG
   attr_accessor :lineno # we're bypassing oedipus' lineno handling.
   attr_accessor :brace_nest
   attr_accessor :cmdarg
@@ -209,7 +104,7 @@ class RubyLexer
   end
   def arg_ambiguous
-    self.warning("Ambiguous first argument. make sure.")
+    self.warning "Ambiguous first argument. make sure."
   end
   def arg_state
@@ -219,7 +114,12 @@ class RubyLexer
   def beginning_of_line?
     ss.bol?
   end
-  alias :bol? :beginning_of_line? # to make .rex file more readable
+  alias bol? beginning_of_line? # to make .rex file more readable
+  def check re
+    ss.check re
+  end
   def comments # TODO: remove this... maybe comment_string + attr_accessor
     c = @comments.join
@@ -227,6 +127,12 @@ class RubyLexer
     c
   end
+  def eat_whitespace
+    r = scan(/\s+/)
+    self.extra_lineno += r.count("\n") if r
+    r
+  end
   def end_of_stream?
     ss.eos?
   end
@@ -245,13 +151,18 @@ class RubyLexer
     result EXPR_BEG, token, text
   end
+  def fixup_lineno extra = 0
+    self.lineno += self.extra_lineno + extra
+    self.extra_lineno = 0
+  end
   def heredoc here # TODO: rewrite / remove
     _, eos, func, last_line = here
-    indent         = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
-    content_indent = (func & STR_FUNC_ICNTNT) != 0
-    expand         = (func & STR_FUNC_EXPAND) != 0
-    eos_re         = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
+    indent         = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
+    expand         = func =~ STR_FUNC_EXPAND
+    eol            = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
+    eos_re         = /#{indent}#{Regexp.escape eos}(#{eol}|\z)/
     err_msg        = "can't match #{eos_re.inspect} anywhere in "
     rb_compile_error err_msg if end_of_stream?
@@ -259,7 +170,7 @@ class RubyLexer
     if beginning_of_line? && scan(eos_re) then
       self.lineno += 1
       ss.unread_many last_line # TODO: figure out how to remove this
-      return :tSTRING_END, eos
+      return :tSTRING_END, [eos, func] # TODO: calculate squiggle width at lex?
     end
     self.string_buffer = []
@@ -272,17 +183,17 @@ class RubyLexer
       when scan(/#[{]/) then
         return :tSTRING_DBEG, matched
       when scan(/#/) then
-        string_buffer << '#'
+        string_buffer << "#"
       end
       begin
-        c = tokadd_string func, "\n", nil
+        c = tokadd_string func, eol, nil
         rb_compile_error err_msg if
           c == RubyLexer::EOF
-        if c != "\n" then
-          return :tSTRING_CONTENT, string_buffer.join.delete("\r")
+        if c != eol then
+          return :tSTRING_CONTENT, string_buffer.join
         else
           string_buffer << scan(/\n/)
         end
@@ -300,67 +211,24 @@ class RubyLexer
     string_content = begin
                        s = string_buffer.join
-                       s.delete "\r"
-                     rescue ArgumentError
-                       s.b.delete("\r").force_encoding Encoding::UTF_8
+                       s.b.force_encoding Encoding::UTF_8
                      end
-    string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
     return :tSTRING_CONTENT, string_content
   end
-  def heredoc_dedent(string_content)
-    width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
-      heredoc_whitespace_indent_size whitespace
-    end.min || 0
-    string_content.split("\n", -1).map do |line|
-      dedent_string line, width
-    end.join "\n"
-  end
-  def dedent_string(string, width)
-    characters_skipped = 0
-    indentation_skipped = 0
-    string.chars.each do |char|
-      break if indentation_skipped >= width
-      if char == ' '
-        characters_skipped += 1
-        indentation_skipped += 1
-      elsif char == "\t"
-        proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
-        break if (proposed > width)
-        characters_skipped += 1
-        indentation_skipped = proposed
-      end
-    end
-    string[characters_skipped..-1]
-  end
-  def heredoc_whitespace_indent_size(whitespace)
-    whitespace.chars.inject 0 do |size, char|
-      if char == "\t"
-        size + TAB_WIDTH
-      else
-        size + 1
-      end
-    end
-  end
   def heredoc_identifier # TODO: remove / rewrite
     term, func = nil, STR_FUNC_BORING
     self.string_buffer = []
-    heredoc_indent_mods = '-'
+    heredoc_indent_mods = "-"
     heredoc_indent_mods += '\~' if ruby23plus?
     case
     when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
       term = ss[2]
-      func |= STR_FUNC_INDENT unless ss[1].empty?
-      func |= STR_FUNC_ICNTNT if ss[1] == '~'
+      func |= STR_FUNC_INDENT unless ss[1].empty? # TODO: this seems wrong
+      func |= STR_FUNC_ICNTNT if ss[1] == "~"
       func |= case term
               when "\'" then
                 STR_SQUOTE
@@ -377,7 +245,7 @@ class RubyLexer
       func |= STR_DQUOTE
       unless ss[1].empty? then
         func |= STR_FUNC_INDENT
-        func |= STR_FUNC_ICNTNT if ss[1] == '~'
+        func |= STR_FUNC_ICNTNT if ss[1] == "~"
       end
       string_buffer << ss[2]
     else
@@ -393,7 +261,7 @@ class RubyLexer
     self.lex_strterm = [:heredoc, string_buffer.join, func, line]
-    if term == '`' then
+    if term == "`" then
       result nil, :tXSTRING_BEG, "`"
     else
       result nil, :tSTRING_BEG, "\""
@@ -404,26 +272,26 @@ class RubyLexer
     lex_state =~ EXPR_FNAME
   end
-  def is_after_operator?
-    lex_state =~ EXPR_FNAME|EXPR_DOT
-  end
   def int_with_base base
     rb_compile_error "Invalid numeric format" if matched =~ /__/
     text = matched
     case
-    when text.end_with?('ri')
+    when text.end_with?("ri")
       return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
-    when text.end_with?('r')
+    when text.end_with?("r")
       return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
-    when text.end_with?('i')
+    when text.end_with?("i")
       return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
     else
       return result(EXPR_NUM, :tINTEGER, text.to_i(base))
     end
   end
+  def is_after_operator?
+    lex_state =~ EXPR_FNAME|EXPR_DOT
+  end
   def is_arg?
     lex_state =~ EXPR_ARG_ANY
   end
@@ -436,15 +304,6 @@ class RubyLexer
     lex_state =~ EXPR_END_ANY
   end
-  def lvar_defined? id
-    # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
-    self.parser.env[id.to_sym] == :lvar
-  end
-  def ruby22_label?
-    ruby22plus? and is_label_possible?
-  end
   def is_label_possible?
     (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
   end
@@ -461,6 +320,11 @@ class RubyLexer
     lpar_beg && lpar_beg == paren_nest
   end
+  def lvar_defined? id
+    # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
+    self.parser.env[id.to_sym] == :lvar
+  end
   def matched
     ss.matched
   end
@@ -469,6 +333,134 @@ class RubyLexer
     not is_end?
   end
+  def parse_quote # TODO: remove / rewrite
+    beg, nnd, short_hand, c = nil, nil, false, nil
+    if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
+      rb_compile_error "unknown type of %string" if ss.matched_size == 2
+      c, beg, short_hand = matched, ss.getch, false
+    else                               # Short-hand (e.g. %{, %., %!, etc)
+      c, beg, short_hand = "Q", ss.getch, true
+    end
+    if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
+      rb_compile_error "unterminated quoted string meets end of file"
+    end
+    # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
+    nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
+    nnd, beg = beg, "\0" if nnd.nil?
+    token_type, text = nil, "%#{c}#{beg}"
+    token_type, string_type = case c
+                              when "Q" then
+                                ch = short_hand ? nnd : c + beg
+                                text = "%#{ch}"
+                                [:tSTRING_BEG,   STR_DQUOTE]
+                              when "q" then
+                                [:tSTRING_BEG,   STR_SQUOTE]
+                              when "W" then
+                                eat_whitespace
+                                [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
+                              when "w" then
+                                eat_whitespace
+                                [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
+                              when "x" then
+                                [:tXSTRING_BEG,  STR_XQUOTE]
+                              when "r" then
+                                [:tREGEXP_BEG,   STR_REGEXP]
+                              when "s" then
+                                self.lex_state = EXPR_FNAME
+                                [:tSYMBEG,       STR_SSYM]
+                              when "I" then
+                                eat_whitespace
+                                [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
+                              when "i" then
+                                eat_whitespace
+                                [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
+                              end
+    rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
+      token_type.nil?
+    raise "huh" unless string_type
+    string string_type, nnd, beg
+    return token_type, text
+  end
+  def parse_string quote # TODO: rewrite / remove
+    _, string_type, term, open = quote
+    space = false # FIX: remove these
+    func = string_type
+    paren = open
+    term_re = @@regexp_cache[term]
+    qwords = func =~ STR_FUNC_QWORDS
+    regexp = func =~ STR_FUNC_REGEXP
+    expand = func =~ STR_FUNC_EXPAND
+    unless func then # nil'ed from qwords below. *sigh*
+      return :tSTRING_END, nil
+    end
+    space = true if qwords and eat_whitespace
+    if self.string_nest == 0 && scan(/#{term_re}/) then
+      if qwords then
+        quote[1] = nil
+        return :tSPACE, nil
+      elsif regexp then
+        return :tREGEXP_END, self.regx_options
+      else
+        return :tSTRING_END, term
+      end
+    end
+    return :tSPACE, nil if space
+    self.string_buffer = []
+    if expand
+      case
+      when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
+        # TODO: !ISASCII
+        # ?! see parser_peek_variable_name
+        return :tSTRING_DVAR, nil
+      when scan(/#(?=\@\@?[a-zA-Z_])/) then
+        # TODO: !ISASCII
+        return :tSTRING_DVAR, nil
+      when scan(/#[{]/) then
+        self.command_start = true
+        return :tSTRING_DBEG, nil
+      when scan(/#/) then
+        string_buffer << "#"
+      end
+    end
+    if tokadd_string(func, term, paren) == RubyLexer::EOF then
+      if func =~ STR_FUNC_REGEXP then
+        rb_compile_error "unterminated regexp meets end of file"
+      else
+        rb_compile_error "unterminated string meets end of file"
+      end
+    end
+    return :tSTRING_CONTENT, string_buffer.join
+  end
+  def possibly_escape_string text, check
+    content = match[1]
+    if text =~ check then
+      content.gsub(ESC) { unescape $1 }
+    else
+      content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
+    end
+  end
   def process_amper text
     token = if is_arg? && space_seen && !check(/\s/) then
                warning("`&' interpreted as argument prefix")
@@ -510,44 +502,20 @@ class RubyLexer
     case matched
     when "}" then
       self.brace_nest -= 1
-      self.lex_state   = EXPR_ENDARG # TODO: EXPR_END ? Look at 2.6
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
       return :tSTRING_DEND, matched if brace_nest < 0
       return :tRCURLY, matched
     when "]" then
       self.paren_nest -= 1
-      self.lex_state   = EXPR_ENDARG
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
       return :tRBRACK, matched
     when ")" then
       self.paren_nest -= 1
-      self.lex_state   = EXPR_ENDFN
-      return :tRPAREN, matched
-    else
-      raise "Unknown bracing: #{matched.inspect}"
-    end
-  end
-  def process_colon1 text
-    # ?: / then / when
-    if is_end? || check(/\s/) then
-      return result EXPR_BEG, :tCOLON, text
-    end
-    case
-    when scan(/\'/) then
-      string STR_SSYM
-    when scan(/\"/) then
-      string STR_DSYM
-    end
-    result EXPR_FNAME, :tSYMBEG, text
-  end
-  def process_colon2 text
-    if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
-      result EXPR_BEG, :tCOLON3, text
+      self.lex_state   = EXPR_ENDFN
+      return :tRPAREN, matched
     else
-      result EXPR_DOT, :tCOLON2, text
+      raise "Unknown bracing: #{matched.inspect}"
     end
   end
@@ -566,7 +534,7 @@ class RubyLexer
             when lex_state =~ EXPR_LABELED then
               :tLBRACE     # hash
             when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
-              :tLCURLY     # block (primary) '{' in parse.y
+              :tLCURLY     # block (primary) "{" in parse.y
             when lex_state =~ EXPR_ENDARG then
               :tLBRACE_ARG # block (expr)
             else
@@ -581,15 +549,39 @@ class RubyLexer
     result state, token, text
   end
+  def process_colon1 text
+    # ?: / then / when
+    if is_end? || check(/\s/) then
+      return result EXPR_BEG, :tCOLON, text
+    end
+    case
+    when scan(/\'/) then
+      string STR_SSYM
+    when scan(/\"/) then
+      string STR_DSYM
+    end
+    result EXPR_FNAME, :tSYMBEG, text
+  end
+  def process_colon2 text
+    if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
+      result EXPR_BEG, :tCOLON3, text
+    else
+      result EXPR_DOT, :tCOLON2, text
+    end
+  end
   def process_float text
     rb_compile_error "Invalid numeric format" if text =~ /__/
     case
-    when text.end_with?('ri')
+    when text.end_with?("ri")
       return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
-    when text.end_with?('i')
+    when text.end_with?("i")
       return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
-    when text.end_with?('r')
+    when text.end_with?("r")
       return result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
     else
       return result EXPR_NUM, :tFLOAT, text.to_f
@@ -612,6 +604,24 @@ class RubyLexer
     result EXPR_END, tok_id, text
   end
+  def process_label text
+    symbol = possibly_escape_string text, /^"/
+    result EXPR_LAB, :tLABEL, [symbol, self.lineno]
+  end
+  def process_label_or_string text
+    if @was_label && text =~ /:\Z/ then
+      @was_label = nil
+      return process_label text
+    elsif text =~ /:\Z/ then
+      ss.pos -= 1 # put back ":"
+      text = text[0..-2]
+    end
+    result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
+  end
   def process_lchevron text
     if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
         !is_end? &&
@@ -634,14 +644,14 @@ class RubyLexer
     c = matched
     hit = false
-    if c == '#' then
+    if c == "#" then
       ss.pos -= 1
       # TODO: handle magic comments
       while scan(/\s*\#.*(\n+|\z)/) do
         hit = true
         self.lineno += matched.lines.to_a.size
-        @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
+        @comments << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
       end
       return nil if end_of_stream?
@@ -697,7 +707,7 @@ class RubyLexer
               #      "an argument list, not a decomposed argument")
               :tLPAREN2
             else
-              :tLPAREN2 # plain '(' in parse.y
+              :tLPAREN2 # plain "(" in parse.y
             end
     self.paren_nest += 1
@@ -735,7 +745,7 @@ class RubyLexer
     return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
-    if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
+    if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
       arg_ambiguous if is_arg?
       if check(/\d/) then
@@ -760,12 +770,12 @@ class RubyLexer
     if check(/\s|\v/) then
       unless is_arg? then
-        c2 = { " " => 's',
-              "\n" => 'n',
-              "\t" => 't',
-              "\v" => 'v',
-              "\r" => 'r',
-              "\f" => 'f' }[matched]
+        c2 = { " " => "s",
+              "\n" => "n",
+              "\t" => "t",
+              "\v" => "v",
+              "\r" => "r",
+              "\f" => "f" }[matched]
         if c2 then
           warning("invalid character syntax; use ?\\" + c2)
@@ -838,43 +848,38 @@ class RubyLexer
     result EXPR_PAR, token, text
   end
-  def possibly_escape_string text, check
-    content = match[1]
-    if text =~ check then
-      content.gsub(ESC) { unescape $1 }
-    else
-      content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
-    end
-  end
-  def process_symbol text
-    symbol = possibly_escape_string text, /^:"/
+  def process_string # TODO: rewrite / remove
+    # matches top of parser_yylex in compare/parse23.y:8113
+    token = if lex_strterm[0] == :heredoc then
+              self.heredoc lex_strterm
+            else
+              self.parse_string lex_strterm
+            end
-    result EXPR_END, :tSYMBOL, symbol
-  end
+    token_type, c = token
-  def was_label?
-    @was_label = ruby22_label?
-    true
-  end
+    # matches parser_string_term from 2.3, but way off from 2.5
+    if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
+      if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
+           !cond.is_in_state) || is_arg?) &&
+          is_label_suffix? then
+        scan(/:/)
+        token_type = token[0] = :tLABEL_END
+      end
+    end
-  def process_label_or_string text
-    if @was_label && text =~ /:\Z/ then
-      @was_label = nil
-      return process_label text
-    elsif text =~ /:\Z/ then
-      ss.pos -= 1 # put back ":"
-      text = text[0..-2]
+    if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
+      self.lex_strterm = nil
+      self.lex_state   = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END|EXPR_ENDARG
     end
-    result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
+    return token
   end
-  def process_label text
-    symbol = possibly_escape_string text, /^"/
+  def process_symbol text
+    symbol = possibly_escape_string text, /^:"/
-    result EXPR_LAB, :tLABEL, [symbol, self.lineno]
+    result EXPR_END|EXPR_ENDARG, :tSYMBOL, symbol
   end
   def process_token text
@@ -902,6 +907,7 @@ class RubyLexer
     if is_label_possible? and is_label_suffix? then
       scan(/:/)
+      # TODO: propagate the lineno to ALL results
       return result EXPR_LAB, :tLABEL, [token, self.lineno]
     end
@@ -1084,23 +1090,24 @@ class RubyLexer
     [token, text]
   end
-  def scan re
-    ss.scan re
+  def ruby22_label?
+    ruby22plus? and is_label_possible?
   end
-  def check re
-    ss.check re
+  def ruby22plus?
+    parser.class.version >= 22
   end
-  def eat_whitespace
-    r = scan(/\s+/)
-    self.extra_lineno += r.count("\n") if r
-    r
+  def ruby23plus?
+    parser.class.version >= 23
   end
-  def fixup_lineno extra = 0
-    self.lineno += self.extra_lineno + extra
-    self.extra_lineno = 0
+  def ruby24minus?
+    parser.class.version <= 24
+  end
+  def scan re
+    ss.scan re
   end
   def scanner_class # TODO: design this out of oedipus_lex. or something.
@@ -1123,12 +1130,6 @@ class RubyLexer
     self.lex_strterm = [:strterm, type, beg, nnd]
   end
-  # TODO: consider
-  # def src= src
-  #   raise "bad src: #{src.inspect}" unless String === src
-  #   @src = RPStringScanner.new(src)
-  # end
   def tokadd_escape term # TODO: rewrite / remove
     case
     when scan(/\\\n/) then
@@ -1158,14 +1159,18 @@ class RubyLexer
   end
   def tokadd_string(func, term, paren) # TODO: rewrite / remove
-    qwords = (func & STR_FUNC_QWORDS) != 0
-    escape = (func & STR_FUNC_ESCAPE) != 0
-    expand = (func & STR_FUNC_EXPAND) != 0
-    regexp = (func & STR_FUNC_REGEXP) != 0
-    symbol = (func & STR_FUNC_SYMBOL) != 0
+    qwords = func =~ STR_FUNC_QWORDS
+    escape = func =~ STR_FUNC_ESCAPE
+    expand = func =~ STR_FUNC_EXPAND
+    regexp = func =~ STR_FUNC_REGEXP
+    symbol = func =~ STR_FUNC_SYMBOL
     paren_re = @@regexp_cache[paren]
-    term_re  = @@regexp_cache[term]
+    term_re  = if term == "\n"
+                 /#{Regexp.escape "\r"}?#{Regexp.escape "\n"}/
+               else
+                 @@regexp_cache[term]
+               end
     until end_of_stream? do
       c = nil
@@ -1195,7 +1200,7 @@ class RubyLexer
           string_buffer << "\n"
           next
         when qwords && scan(/\\\s/) then
-          c = ' '
+          c = " "
         when expand && scan(/\\\n/) then
           next
         when regexp && check(/\\/) then
@@ -1220,8 +1225,12 @@ class RubyLexer
       end # top case
       unless handled then
-        t = Regexp.escape term
-        x = Regexp.escape(paren) if paren && paren != "\000"
+        t = if term == "\n"
+              Regexp.escape "\r\n"
+            else
+              Regexp.escape term
+            end
+        x = Regexp.escape paren if paren && paren != "\000"
         re = if qwords then
                /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
              else
@@ -1279,171 +1288,154 @@ class RubyLexer
     # do nothing for now
   end
-  def ruby22plus?
-    parser.class.version >= 22
-  end
-  def ruby23plus?
-    parser.class.version >= 23
+  def was_label?
+    @was_label = ruby22_label?
+    true
   end
-  def process_string # TODO: rewrite / remove
-    # matches top of parser_yylex in compare/parse23.y:8113
-    token = if lex_strterm[0] == :heredoc then
-              self.heredoc lex_strterm
-            else
-              self.parse_string lex_strterm
-            end
+  class State
+    attr_accessor :n
+    attr_accessor :names
-    token_type, c = token
+    # TODO: take a shared hash of strings for inspect/to_s
+    def initialize o, names
+      raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
-    # matches parser_string_term
-    if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
-      if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
-           !cond.is_in_state) || is_arg?) &&
-          is_label_suffix? then
-        scan(/:/)
-        token_type = token[0] = :tLABEL_END
-      end
+      self.n = o
+      self.names = names
     end
-    if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
-      self.lex_strterm = nil
-      self.lex_state   = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
+    def == o
+      self.equal?(o) || (o.class == self.class && o.n == self.n)
     end
-    return token
-  end
-  def parse_quote # TODO: remove / rewrite
-    beg, nnd, short_hand, c = nil, nil, false, nil
-    if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
-      rb_compile_error "unknown type of %string" if ss.matched_size == 2
-      c, beg, short_hand = matched, ss.getch, false
-    else                               # Short-hand (e.g. %{, %., %!, etc)
-      c, beg, short_hand = 'Q', ss.getch, true
+    def =~ v
+      (self.n & v.n) != 0
     end
-    if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
-      rb_compile_error "unterminated quoted string meets end of file"
+    def | v
+      raise ArgumentError, "Incompatible State: %p vs %p" % [self, v] unless
+        self.names == v.names
+      self.class.new(self.n | v.n, self.names)
     end
-    # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
-    nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
-    nnd, beg = beg, "\0" if nnd.nil?
-    token_type, text = nil, "%#{c}#{beg}"
-    token_type, string_type = case c
-                              when 'Q' then
-                                ch = short_hand ? nnd : c + beg
-                                text = "%#{ch}"
-                                [:tSTRING_BEG,   STR_DQUOTE]
-                              when 'q' then
-                                [:tSTRING_BEG,   STR_SQUOTE]
-                              when 'W' then
-                                eat_whitespace
-                                [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
-                              when 'w' then
-                                eat_whitespace
-                                [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
-                              when 'x' then
-                                [:tXSTRING_BEG,  STR_XQUOTE]
-                              when 'r' then
-                                [:tREGEXP_BEG,   STR_REGEXP]
-                              when 's' then
-                                self.lex_state = EXPR_FNAME
-                                [:tSYMBEG,       STR_SSYM]
-                              when 'I' then
-                                eat_whitespace
-                                [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
-                              when 'i' then
-                                eat_whitespace
-                                [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
-                              end
-    rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
-      token_type.nil?
-    raise "huh" unless string_type
-    string string_type, nnd, beg
-    return token_type, text
-  end
-  def parse_string quote # TODO: rewrite / remove
-    _, string_type, term, open = quote
-    space = false # FIX: remove these
-    func = string_type
-    paren = open
-    term_re = @@regexp_cache[term]
-    qwords = (func & STR_FUNC_QWORDS) != 0
-    regexp = (func & STR_FUNC_REGEXP) != 0
-    expand = (func & STR_FUNC_EXPAND) != 0
+    def inspect
+      return "Value(0)" if n.zero? # HACK?
-    unless func then # nil'ed from qwords below. *sigh*
-      return :tSTRING_END, nil
+      names.map { |v, k| k if self =~ v }.
+        compact.
+        join("|").
+        gsub(/(?:EXPR_|STR_(?:FUNC_)?)/, "")
     end
-    space = true if qwords and eat_whitespace
+    alias to_s inspect
-    if self.string_nest == 0 && scan(/#{term_re}/) then
-      if qwords then
-        quote[1] = nil
-        return :tSPACE, nil
-      elsif regexp then
-        return :tREGEXP_END, self.regx_options
-      else
-        return :tSTRING_END, term
-      end
-    end
+    module Values
+      expr_names = {}
+      EXPR_NONE    = State.new    0x0, expr_names
+      EXPR_BEG     = State.new    0x1, expr_names
+      EXPR_END     = State.new    0x2, expr_names
+      EXPR_ENDARG  = State.new    0x4, expr_names
+      EXPR_ENDFN   = State.new    0x8, expr_names
+      EXPR_ARG     = State.new   0x10, expr_names
+      EXPR_CMDARG  = State.new   0x20, expr_names
+      EXPR_MID     = State.new   0x40, expr_names
+      EXPR_FNAME   = State.new   0x80, expr_names
+      EXPR_DOT     = State.new  0x100, expr_names
+      EXPR_CLASS   = State.new  0x200, expr_names
+      EXPR_LABEL   = State.new  0x400, expr_names
+      EXPR_LABELED = State.new  0x800, expr_names
+      EXPR_FITEM   = State.new 0x1000, expr_names
-    return :tSPACE, nil if space
+      EXPR_BEG_ANY = EXPR_BEG | EXPR_MID    | EXPR_CLASS
+      EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
+      EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
-    self.string_buffer = []
+      # extra fake lex_state names to make things a bit cleaner
-    if expand
-      case
-      when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
-        # TODO: !ISASCII
-        # ?! see parser_peek_variable_name
-        return :tSTRING_DVAR, nil
-      when scan(/#(?=\@\@?[a-zA-Z_])/) then
-        # TODO: !ISASCII
-        return :tSTRING_DVAR, nil
-      when scan(/#[{]/) then
-        self.command_start = true
-        return :tSTRING_DBEG, nil
-      when scan(/#/) then
-        string_buffer << '#'
-      end
-    end
+      EXPR_LAB = EXPR_ARG|EXPR_LABELED
+      EXPR_NUM = EXPR_END|EXPR_ENDARG
+      EXPR_PAR = EXPR_BEG|EXPR_LABEL
+      EXPR_PAD = EXPR_BEG|EXPR_LABELED
-    if tokadd_string(func, term, paren) == RubyLexer::EOF then
-      rb_compile_error "unterminated string meets end of file"
+      EXPR_LIT = EXPR_NUM # TODO: migrate to EXPR_LIT
+      expr_names.merge!(EXPR_NONE    => "EXPR_NONE",
+                        EXPR_BEG     => "EXPR_BEG",
+                        EXPR_END     => "EXPR_END",
+                        EXPR_ENDARG  => "EXPR_ENDARG",
+                        EXPR_ENDFN   => "EXPR_ENDFN",
+                        EXPR_ARG     => "EXPR_ARG",
+                        EXPR_CMDARG  => "EXPR_CMDARG",
+                        EXPR_MID     => "EXPR_MID",
+                        EXPR_FNAME   => "EXPR_FNAME",
+                        EXPR_DOT     => "EXPR_DOT",
+                        EXPR_CLASS   => "EXPR_CLASS",
+                        EXPR_LABEL   => "EXPR_LABEL",
+                        EXPR_LABELED => "EXPR_LABELED",
+                        EXPR_FITEM   => "EXPR_FITEM")
+      # ruby constants for strings
+      str_func_names = {}
+      STR_FUNC_BORING = State.new 0x00,    str_func_names
+      STR_FUNC_ESCAPE = State.new 0x01,    str_func_names
+      STR_FUNC_EXPAND = State.new 0x02,    str_func_names
+      STR_FUNC_REGEXP = State.new 0x04,    str_func_names
+      STR_FUNC_QWORDS = State.new 0x08,    str_func_names
+      STR_FUNC_SYMBOL = State.new 0x10,    str_func_names
+      STR_FUNC_INDENT = State.new 0x20,    str_func_names # <<-HEREDOC
+      STR_FUNC_LABEL  = State.new 0x40,    str_func_names
+      STR_FUNC_LIST   = State.new 0x4000,  str_func_names
+      STR_FUNC_TERM   = State.new 0x8000,  str_func_names
+      STR_FUNC_ICNTNT = State.new 0x10000, str_func_names # <<~HEREDOC -- TODO: remove?
+      # TODO: check parser25.y on how they do STR_FUNC_INDENT
+      STR_SQUOTE = STR_FUNC_BORING
+      STR_DQUOTE = STR_FUNC_EXPAND
+      STR_XQUOTE = STR_FUNC_EXPAND
+      STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
+      STR_SWORD  = STR_FUNC_QWORDS | STR_FUNC_LIST
+      STR_DWORD  = STR_FUNC_QWORDS | STR_FUNC_EXPAND | STR_FUNC_LIST
+      STR_SSYM   = STR_FUNC_SYMBOL
+      STR_DSYM   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
+      str_func_names.merge!(STR_FUNC_ESCAPE => "STR_FUNC_ESCAPE",
+                            STR_FUNC_EXPAND => "STR_FUNC_EXPAND",
+                            STR_FUNC_REGEXP => "STR_FUNC_REGEXP",
+                            STR_FUNC_QWORDS => "STR_FUNC_QWORDS",
+                            STR_FUNC_SYMBOL => "STR_FUNC_SYMBOL",
+                            STR_FUNC_INDENT => "STR_FUNC_INDENT",
+                            STR_FUNC_LABEL  => "STR_FUNC_LABEL",
+                            STR_FUNC_LIST   => "STR_FUNC_LIST",
+                            STR_FUNC_TERM   => "STR_FUNC_TERM",
+                            STR_FUNC_ICNTNT => "STR_FUNC_ICNTNT",
+                            STR_SQUOTE      => "STR_SQUOTE")
     end
-    return :tSTRING_CONTENT, string_buffer.join
+    include Values
   end
+  include State::Values
 end
 require "ruby_lexer.rex"
 if ENV["RP_LINENO_DEBUG"] then
   class RubyLexer
-    alias :old_lineno= :lineno=
     def d o
       $stderr.puts o.inspect
     end
+    alias old_lineno= lineno=
     def lineno= n
       self.old_lineno= n
       where = caller.first.split(/:/).first(2).join(":")
-      d :lineno => [n, where, ss && ss.rest[0,40]]
+      d :lineno => [n, where, ss && ss.rest[0, 40]]
     end
   end
 end