RubyGems - ruby-next-parser - Versions diffs - 3.1.1.3 → 3.2.2.0 - Mend

ruby-next-parser 3.1.1.3 → 3.2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/lib/parser/ruby-next/lexer-F0.rb +12957 -0
data/lib/parser/ruby-next/lexer-F1.rb +14968 -0
data/lib/parser/ruby-next/lexer.rl +201 -739
data/lib/parser/ruby-next/parser_ext.rb +6 -1
data/lib/parser/ruby-next/version.rb +1 -1
data/lib/parser/rubynext.rb +8151 -3791
data/lib/parser/rubynext.y +36 -15
metadata +5 -4
data/lib/parser/ruby-next/lexer.rb +0 -25177

data/lib/parser/ruby-next/lexer.rl CHANGED Viewed

@@ -82,14 +82,6 @@ class Next
   %% write data nofinal;
   # %
-  ESCAPES = {
-    ?a.ord => "\a", ?b.ord  => "\b", ?e.ord => "\e", ?f.ord => "\f",
-    ?n.ord => "\n", ?r.ord  => "\r", ?s.ord => "\s", ?t.ord => "\t",
-    ?v.ord => "\v", ?\\.ord => "\\"
-  }.freeze
-  REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
   attr_reader   :source_buffer
   attr_accessor :diagnostics
@@ -100,7 +92,7 @@ class Next
   attr_accessor :tokens, :comments
-  attr_reader :paren_nest, :cmdarg_stack, :cond_stack, :lambda_stack
+  attr_reader :paren_nest, :cmdarg_stack, :cond_stack, :lambda_stack, :version
   def initialize(version)
     @version    = version
@@ -110,6 +102,26 @@ class Next
     @tokens     = nil
     @comments   = nil
+    @_lex_actions =
+      if self.class.respond_to?(:_lex_actions, true)
+        self.class.send :_lex_actions
+      else
+        []
+      end
+    @emit_integer = lambda { |chars, p| emit(:tINTEGER,   chars); p }
+    @emit_rational = lambda { |chars, p| emit(:tRATIONAL,  Rational(chars)); p }
+    @emit_imaginary = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, chars)); p }
+    @emit_imaginary_rational = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Rational(chars))); p }
+    @emit_integer_re = lambda { |chars, p| emit(:tINTEGER,   chars, @ts, @te - 2); p - 2 }
+    @emit_integer_if = lambda { |chars, p| emit(:tINTEGER,   chars, @ts, @te - 2); p - 2 }
+    @emit_integer_rescue = lambda { |chars, p| emit(:tINTEGER,   chars, @ts, @te - 6); p - 6 }
+    @emit_float = lambda { |chars, p| emit(:tFLOAT,     Float(chars)); p }
+    @emit_imaginary_float = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Float(chars))); p }
+    @emit_float_if =     lambda { |chars, p| emit(:tFLOAT,     Float(chars), @ts, @te - 2); p - 2 }
+    @emit_float_rescue = lambda { |chars, p| emit(:tFLOAT,     Float(chars), @ts, @te - 6); p - 6 }
     reset
   end
@@ -139,7 +151,6 @@ class Next
     # Lexer state:
     @token_queue   = []
-    @literal_stack = []
     @eq_begin_s    = nil # location of last encountered =begin
     @sharp_s       = nil # location of last encountered #
@@ -151,34 +162,20 @@ class Next
     @num_suffix_s  = nil # starting position of numeric suffix
     @num_xfrm      = nil # numeric suffix-induced transformation
-    @escape_s      = nil # starting position of current sequence
-    @escape        = nil # last escaped sequence, as string
-    @herebody_s    = nil # starting position of current heredoc line
     # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
     # encountered after a matching closing parenthesis.
     @paren_nest    = 0
     @lambda_stack  = []
-    # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
-    # we store the indentation level and give it out to the parser
-    # on request. It is not possible to infer indentation level just
-    # from the AST because escape sequences such as `\ ` or `\t` are
-    # expanded inside the lexer, but count as non-whitespace for
-    # indentation purposes.
-    @dedent_level  = nil
     # If the lexer is in `command state' (aka expr_value)
     # at the entry to #advance, it will transition to expr_cmdarg
     # instead of expr_arg at certain points.
     @command_start = true
-    # True at the end of "def foo a:"
-    @in_kwarg      = false
     # State before =begin / =end block comment
     @cs_before_block_comment = self.class.lex_en_line_begin
+    @strings = Parser::LexerStrings.new(self, @version)
   end
   def source_buffer=(source_buffer)
@@ -200,6 +197,9 @@ class Next
     else
       @source_pts = nil
     end
+    @strings.source_buffer = @source_buffer
+    @strings.source_pts = @source_pts
   end
   def encoding
@@ -220,10 +220,7 @@ class Next
     :expr_endfn    => lex_en_expr_endfn,
     :expr_labelarg => lex_en_expr_labelarg,
-    :interp_string => lex_en_interp_string,
-    :interp_words  => lex_en_interp_words,
-    :plain_string  => lex_en_plain_string,
-    :plain_words   => lex_en_plain_string,
+    :inside_string => lex_en_inside_string
   }
   def state
@@ -253,15 +250,12 @@ class Next
   end
   def dedent_level
-    # We erase @dedent_level as a precaution to avoid accidentally
-    # using a stale value.
-    dedent_level, @dedent_level = @dedent_level, nil
-    dedent_level
+    @strings.dedent_level
   end
   # Return next token: [type, value].
   def advance
-    if @token_queue.any?
+    unless @token_queue.empty?
       return @token_queue.shift
     end
@@ -276,6 +270,7 @@ class Next
     _lex_to_state_actions   = klass.send :_lex_to_state_actions
     _lex_from_state_actions = klass.send :_lex_from_state_actions
     _lex_eof_trans          = klass.send :_lex_eof_trans
+    _lex_actions            = @_lex_actions
     pe = @source_pts.size + 2
     p, eof = @p, pe
@@ -307,10 +302,6 @@ class Next
   protected
-  def eof_codepoint?(point)
-    [0x04, 0x1a, 0x00].include? point
-  end
   def version?(*versions)
     versions.include?(@version)
   end
@@ -320,12 +311,8 @@ class Next
     @stack[@top]
   end
-  def encode_escape(ord)
-    ord.chr.force_encoding(@source_buffer.source.encoding)
-  end
   def tok(s = @ts, e = @te)
-    @source_buffer.slice(s...e)
+    @source_buffer.slice(s, e - s)
   end
   def range(s = @ts, e = @te)
@@ -378,64 +365,110 @@ class Next
     nil
   end
+  def emit_comment_from_range(p, pe)
+    emit_comment(@sharp_s, p == pe ? p - 2 : p)
+  end
   def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
     @diagnostics.process(
         Parser::Diagnostic.new(type, reason, arguments, location, highlights))
   end
-  #
-  # === LITERAL STACK ===
-  #
-  def push_literal(*args)
-    new_literal = Literal.new(self, *args)
-    @literal_stack.push(new_literal)
-    next_state_for_literal(new_literal)
+  def e_lbrace
+    @cond.push(false); @cmdarg.push(false)
+    current_literal = @strings.literal
+    if current_literal
+      current_literal.start_interp_brace
+    end
   end
-  def next_state_for_literal(literal)
-    if literal.words? && literal.backslash_delimited?
-      if literal.interpolate?
-        self.class.lex_en_interp_backslash_delimited_words
-      else
-        self.class.lex_en_plain_backslash_delimited_words
-      end
-    elsif literal.words? && !literal.backslash_delimited?
-      if literal.interpolate?
-        self.class.lex_en_interp_words
-      else
-        self.class.lex_en_plain_words
-      end
-    elsif !literal.words? && literal.backslash_delimited?
-      if literal.interpolate?
-        self.class.lex_en_interp_backslash_delimited
+  def numeric_literal_int
+    digits = tok(@num_digits_s, @num_suffix_s)
+    if digits.end_with? '_'.freeze
+      diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
+                 range(@te - 1, @te)
+    elsif digits.empty? && @num_base == 8 && version?(18)
+      # 1.8 did not raise an error on 0o.
+      digits = '0'.freeze
+    elsif digits.empty?
+      diagnostic :error, :empty_numeric
+    elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
+      invalid_s = @num_digits_s + invalid_idx
+      diagnostic :error, :invalid_octal, nil,
+                 range(invalid_s, invalid_s + 1)
+    end
+    digits
+  end
+  def on_newline(p)
+    @strings.on_newline(p)
+  end
+  def check_ambiguous_slash(tm)
+    if tok(tm, tm + 1) == '/'.freeze
+      # Ambiguous regexp literal.
+      if @version < 30
+        diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
       else
-        self.class.lex_en_plain_backslash_delimited
+        diagnostic :warning, :ambiguous_regexp, nil, range(tm, tm + 1)
       end
+    end
+  end
+  def emit_global_var(ts = @ts, te = @te)
+    if tok(ts, te) =~ /^\$([1-9][0-9]*)$/
+      emit(:tNTH_REF, tok(ts + 1, te).to_i, ts, te)
+    elsif tok =~ /^\$([&`'+])$/
+      emit(:tBACK_REF, tok(ts, te), ts, te)
     else
-      if literal.interpolate?
-        self.class.lex_en_interp_string
-      else
-        self.class.lex_en_plain_string
-      end
+      emit(:tGVAR, tok(ts, te), ts, te)
     end
   end
-  def literal
-    @literal_stack.last
+  def emit_class_var(ts = @ts, te = @te)
+    if tok(ts, te) =~ /^@@[0-9]/
+      diagnostic :error, :cvar_name, { :name => tok(ts, te) }
+    end
+    emit(:tCVAR, tok(ts, te), ts, te)
   end
-  def pop_literal
-    old_literal = @literal_stack.pop
+  def emit_instance_var(ts = @ts, te = @te)
+    if tok(ts, te) =~ /^@[0-9]/
+      diagnostic :error, :ivar_name, { :name => tok(ts, te) }
+    end
+    emit(:tIVAR, tok(ts, te), ts, te)
+  end
+  def emit_rbrace_rparen_rbrack
+    emit_table(PUNCTUATION)
-    @dedent_level = old_literal.dedent_level
+    if @version < 24
+      @cond.lexpop
+      @cmdarg.lexpop
+    else
+      @cond.pop
+      @cmdarg.pop
+    end
+  end
-    if old_literal.type == :tREGEXP_BEG
-      # Fetch modifiers.
-      self.class.lex_en_regexp_modifiers
+  def emit_colon_with_digits(p, tm, diag_msg)
+    if @version >= 27
+      diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
     else
-      self.class.lex_en_expr_end
+      emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
+      p = @ts
     end
+    p
+  end
+  def emit_singleton_class
+    emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
+    emit(:tLSHFT, '<<'.freeze,    @te - 2, @te)
   end
   # Mapping of strings to parser tokens.
@@ -457,7 +490,7 @@ class Next
     '=>'  => :tASSOC,   '::'  => :tCOLON2,  '===' => :tEQQ,
     '<=>' => :tCMP,     '[]'  => :tAREF,    '[]=' => :tASET,
     '{'   => :tLCURLY,  '}'   => :tRCURLY,  '`'   => :tBACK_REF2,
-    '!@'  => :tBANG,    '&.'  => :tANDDOT,  '.:'  => :tMETHREF
+    '!@'  => :tBANG,    '&.'  => :tANDDOT, '.:'  => :tMETHREF,
   }
   PUNCTUATION_BEGIN = {
@@ -480,6 +513,11 @@ class Next
     'BEGIN'  => :klBEGIN,      'END'      => :klEND,
   }
+  ESCAPE_WHITESPACE = {
+    " "  => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
+    "\v" => '\v', "\f" => '\f'
+  }
   %w(class module def undef begin end then elsif else ensure case when
      for break next redo retry in do return yield super self nil true
      false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
@@ -531,7 +569,7 @@ class Next
     # This allows to feed the lexer more data if needed; this is only used
     # in tests.
     #
-    # Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
+    # Note that this action is not embedded into e_eof like e_nl and e_bs
     # below. This is due to the fact that scanner state at EOF is observed
     # by tests, and encapsulating it in a rule would break the introspection.
     fhold; fbreak;
@@ -633,581 +671,43 @@ class Next
   flo_pow  = [eE] [+\-]? ( digit+ '_' )* digit+;
   int_suffix =
-    ''       % { @num_xfrm = lambda { |chars| emit(:tINTEGER,   chars) } }
-  | 'r'      % { @num_xfrm = lambda { |chars| emit(:tRATIONAL,  Rational(chars)) } }
-  | 'i'      % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, chars)) } }
-  | 'ri'     % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
-  | 're'     % { @num_xfrm = lambda { |chars| emit(:tINTEGER,   chars, @ts, @te - 2); p -= 2 } }
-  | 'if'     % { @num_xfrm = lambda { |chars| emit(:tINTEGER,   chars, @ts, @te - 2); p -= 2 } }
-  | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tINTEGER,   chars, @ts, @te - 6); p -= 6 } };
+    ''       % { @num_xfrm = @emit_integer }
+  | 'r'      % { @num_xfrm = @emit_rational }
+  | 'i'      % { @num_xfrm = @emit_imaginary }
+  | 'ri'     % { @num_xfrm = @emit_imaginary_rational }
+  | 're'     % { @num_xfrm = @emit_integer_re }
+  | 'if'     % { @num_xfrm = @emit_integer_if }
+  | 'rescue' % { @num_xfrm = @emit_integer_rescue };
   flo_pow_suffix =
-    ''   % { @num_xfrm = lambda { |chars| emit(:tFLOAT,     Float(chars)) } }
-  | 'i'  % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Float(chars))) } }
-  | 'if' % { @num_xfrm = lambda { |chars| emit(:tFLOAT,     Float(chars), @ts, @te - 2); p -= 2 } };
+    ''   % { @num_xfrm = @emit_float }
+  | 'i'  % { @num_xfrm = @emit_imaginary_float }
+  | 'if' % { @num_xfrm = @emit_float_if };
   flo_suffix =
     flo_pow_suffix
-  | 'r'      % { @num_xfrm = lambda { |chars| emit(:tRATIONAL,  Rational(chars)) } }
-  | 'ri'     % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
-  | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tFLOAT,     Float(chars), @ts, @te - 6); p -= 6 } };
-  #
-  # === ESCAPE SEQUENCE PARSING ===
-  #
-  # Escape parsing code is a Ragel pattern, not a scanner, and therefore
-  # it shouldn't directly raise errors or perform other actions with side effects.
-  # In reality this would probably just mess up error reporting in pathological
-  # cases, through.
-  # The amount of code required to parse \M\C stuff correctly is ridiculous.
-  escaped_nl = "\\" c_nl;
-  action unicode_points {
-    @escape = ""
-    codepoints  = tok(@escape_s + 2, p - 1)
-    codepoint_s = @escape_s + 2
-    if @version < 24
-      if codepoints.start_with?(" ") || codepoints.start_with?("\t")
-        diagnostic :fatal, :invalid_unicode_escape, nil,
-          range(@escape_s + 2, @escape_s + 3)
-      end
-      if spaces_p = codepoints.index(/[ \t]{2}/)
-        diagnostic :fatal, :invalid_unicode_escape, nil,
-          range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
-      end
-      if codepoints.end_with?(" ") || codepoints.end_with?("\t")
-        diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
-      end
-    end
-    codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
-      if spaces
-        codepoint_s += spaces.length
-      else
-        codepoint = codepoint_str.to_i(16)
-        if codepoint >= 0x110000
-          diagnostic :error, :unicode_point_too_large, nil,
-                     range(codepoint_s, codepoint_s + codepoint_str.length)
-          break
-        end
-        @escape     += codepoint.chr(Encoding::UTF_8)
-        codepoint_s += codepoint_str.length
-      end
-    end
-  }
-  action unescape_char {
-    codepoint = @source_pts[p - 1]
-    if @version >= 30 && (codepoint == 117 || codepoint == 85) # 'u' or 'U'
-      diagnostic :fatal, :invalid_escape
-    end
-    if (@escape = ESCAPES[codepoint]).nil?
-      @escape = encode_escape(@source_buffer.slice(p - 1))
-    end
-  }
-  action invalid_complex_escape {
-    diagnostic :fatal, :invalid_escape
-  }
-  action read_post_meta_or_ctrl_char {
-    @escape = @source_buffer.slice(p - 1).chr
-    if @version >= 27 && ((0..8).include?(@escape.ord) || (14..31).include?(@escape.ord))
-      diagnostic :fatal, :invalid_escape
-    end
-  }
-  action slash_c_char {
-    @escape = encode_escape(@escape[0].ord & 0x9f)
-  }
-  action slash_m_char {
-    @escape = encode_escape(@escape[0].ord | 0x80)
-  }
-  maybe_escaped_char = (
-        '\\' c_any      %unescape_char
-    |   '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
-    | ( c_any - [\\] )  %read_post_meta_or_ctrl_char
-  );
-  maybe_escaped_ctrl_char = ( # why?!
-        '\\' c_any      %unescape_char %slash_c_char
-    |   '?'             % { @escape = "\x7f" }
-    |   '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
-    | ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
-  );
-  escape = (
-      # \377
-      [0-7]{1,3}
-      % { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
-      # \xff
-    | 'x' xdigit{1,2}
-        % { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
-      # %q[\x]
-    | 'x' ( c_any - xdigit )
-      % {
-        diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
-      }
-      # \u263a
-    | 'u' xdigit{4}
-      % { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
-      # \u123
-    | 'u' xdigit{0,3}
-      % {
-        diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
-      }
-      # u{not hex} or u{}
-    | 'u{' ( c_any - xdigit - [ \t}] )* '}'
-      % {
-        diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
-      }
-      # \u{  \t  123  \t 456   \t\t }
-    | 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
-      (
-        ( xdigit{1,6} [ \t]* '}'
-          %unicode_points
-        )
-        |
-        ( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
-          | ( c_any - [ \t}] )* c_eof
-          | xdigit{7,}
-        ) % {
-          diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
-        }
-      )
-      # \C-\a \cx
-    | ( 'C-' | 'c' ) escaped_nl?
-      maybe_escaped_ctrl_char
-      # \M-a
-    | 'M-' escaped_nl?
-      maybe_escaped_char
-      %slash_m_char
-      # \C-\M-f \M-\cf \c\M-f
-    | ( ( 'C-'   | 'c' ) escaped_nl?   '\\M-'
-      |   'M-\\'         escaped_nl? ( 'C-'   | 'c' ) ) escaped_nl?
-      maybe_escaped_ctrl_char
-      %slash_m_char
-    | 'C' c_any %invalid_complex_escape
-    | 'M' c_any %invalid_complex_escape
-    | ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
-    | ( c_any - [0-7xuCMc] ) %unescape_char
-    | c_eof % {
-      diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
-    }
-  );
-  # Use rules in form of `e_bs escape' when you need to parse a sequence.
-  e_bs = '\\' % {
-    @escape_s = p
-    @escape   = nil
-  };
-  #
-  # === STRING AND HEREDOC PARSING ===
-  #
-  # Heredoc parsing is quite a complex topic. First, consider that heredocs
-  # can be arbitrarily nested. For example:
-  #
-  #     puts <<CODE
-  #     the result is: #{<<RESULT.inspect
-  #       i am a heredoc
-  #     RESULT
-  #     }
-  #     CODE
-  #
-  # which, incidentally, evaluates to:
-  #
-  #     the result is: "  i am a heredoc\n"
-  #
-  # To parse them, lexer refers to two kinds (remember, nested heredocs)
-  # of positions in the input stream, namely heredoc_e
-  # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
-  #
-  # heredoc_e is simply contained inside the corresponding Literal, and
-  # when the heredoc is closed, the lexing is restarted from that position.
-  #
-  # @herebody_s is quite more complex. First, @herebody_s changes after each
-  # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
-  # contains the current line, and also when a heredoc is started, @herebody_s
-  # contains the position from which the heredoc will be lexed.
-  #
-  # Second, as (insanity) there are nested heredocs, we need to maintain a
-  # stack of these positions. Each time #push_literal is called, it saves current
-  # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
-  # containing another heredocs) is closed, the previous value is restored.
-  e_heredoc_nl = c_nl % {
-    # After every heredoc was parsed, @herebody_s contains the
-    # position of next token after all heredocs.
-    if @herebody_s
-      p = @herebody_s
-      @herebody_s = nil
-    end
-  };
-  action extend_string {
-    string = tok
-    # tLABEL_END is only possible in non-cond context on >= 2.2
-    if @version >= 22 && !@cond.active?
-      lookahead = @source_buffer.slice(@te...@te+2)
-    end
-    current_literal = literal
-    if !current_literal.heredoc? &&
-          (token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
-      if token[0] == :tLABEL_END
-        p += 1
-        pop_literal
-        fnext expr_labelarg;
-      else
-        fnext *pop_literal;
-      end
-      fbreak;
-    else
-      current_literal.extend_string(string, @ts, @te)
-    end
-  }
-  action extend_string_escaped {
-    current_literal = literal
-    # Get the first character after the backslash.
-    escaped_char = @source_buffer.slice(@escape_s).chr
-    if current_literal.munge_escape? escaped_char
-      # If this particular literal uses this character as an opening
-      # or closing delimiter, it is an escape sequence for that
-      # particular character. Write it without the backslash.
-      if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
-        # Regular expressions should include escaped delimiters in their
-        # escaped form, except when the escaped character is
-        # a closing delimiter but not a regexp metacharacter.
-        #
-        # The backslash itself cannot be used as a closing delimiter
-        # at the same time as an escape symbol, but it is always munged,
-        # so this branch also executes for the non-closing-delimiter case
-        # for the backslash.
-        current_literal.extend_string(tok, @ts, @te)
-      else
-        current_literal.extend_string(escaped_char, @ts, @te)
-      end
-    else
-      # It does not. So this is an actual escape sequence, yay!
-      if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
-        # Squiggly heredocs like
-        #   <<~-HERE
-        #     1\
-        #     2
-        #   HERE
-        # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
-        # This information is emitted as is, without escaping,
-        # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
-        current_literal.extend_string(tok, @ts, @te)
-      elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
-        # Heredocs, regexp and a few other types of literals support line
-        # continuation via \\\n sequence. The code like
-        #   "a\
-        #   b"
-        # must be parsed as "ab"
-        current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
-      elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
-        # Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
-        # supported by regexes so far, so it needs a separate branch.
-        current_literal.extend_string(@escape, @ts, @te)
-      elsif current_literal.regexp?
-        # Regular expressions should include escape sequences in their
-        # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
-        current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
-      else
-        current_literal.extend_string(@escape || tok, @ts, @te)
-      end
-    end
-  }
-  # Extend a string with a newline or a EOF character.
-  # As heredoc closing line can immediately precede EOF, this action
-  # has to handle such case specially.
-  action extend_string_eol {
-    current_literal = literal
-    if @te == pe
-      diagnostic :fatal, :string_eof, nil,
-                 range(current_literal.str_s, current_literal.str_s + 1)
-    end
-    if current_literal.heredoc?
-      line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
-      if version?(18, 19, 20)
-        # See ruby:c48b4209c
-        line = line.gsub(/\r.*$/, ''.freeze)
-      end
-      # Try ending the heredoc with the complete most recently
-      # scanned line. @herebody_s always refers to the start of such line.
-      if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
-        # Adjust @herebody_s to point to the next line.
-        @herebody_s = @te
-        # Continue regular lexing after the heredoc reference (<<END).
-        p = current_literal.heredoc_e - 1
-        fnext *pop_literal; fbreak;
-      else
-        # Calculate indentation level for <<~HEREDOCs.
-        current_literal.infer_indent_level(line)
-        # Ditto.
-        @herebody_s = @te
-      end
-    else
-      # Try ending the literal with a newline.
-      if current_literal.nest_and_try_closing(tok, @ts, @te)
-        fnext *pop_literal; fbreak;
-      end
-      if @herebody_s
-        # This is a regular literal intertwined with a heredoc. Like:
-        #
-        #     p <<-foo+"1
-        #     bar
-        #     foo
-        #     2"
-        #
-        # which, incidentally, evaluates to "bar\n1\n2".
-        p = @herebody_s - 1
-        @herebody_s = nil
-      end
-    end
-    if current_literal.words? && !eof_codepoint?(@source_pts[p])
-      current_literal.extend_space @ts, @te
-    else
-      # A literal newline is appended if the heredoc was _not_ closed
-      # this time (see fbreak above). See also Literal#nest_and_try_closing
-      # for rationale of calling #flush_string here.
-      current_literal.extend_string tok, @ts, @te
-      current_literal.flush_string
-    end
-  }
-  action extend_string_space {
-    literal.extend_space @ts, @te
-  }
+  | 'r'      % { @num_xfrm = @emit_rational }
+  | 'ri'     % { @num_xfrm = @emit_imaginary_rational }
+  | 'rescue' % { @num_xfrm = @emit_float_rescue };
   #
   # === INTERPOLATION PARSING ===
   #
-  # Interpolations with immediate variable names simply call into
-  # the corresponding machine.
-  interp_var = '#' ( global_var | class_var_v | instance_var_v );
-  action extend_interp_var {
-    current_literal = literal
-    current_literal.flush_string
-    current_literal.extend_content
-    emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
-    p = @ts
-    fcall expr_variable;
-  }
-  # Special case for Ruby > 2.7
-  # If interpolated instance/class variable starts with a digit we parse it as a plain substring
-  # However, "#$1" is still a regular interpolation
-  interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
-  action extend_interp_digit_var {
-    if @version >= 27
-      literal.extend_string(tok, @ts, @te)
-    else
-      message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
-      diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
-    end
-  }
-  # Interpolations with code blocks must match nested curly braces, as
-  # interpolation ending is ambiguous with a block ending. So, every
-  # opening and closing brace should be matched with e_[lr]brace rules,
-  # which automatically perform the counting.
-  #
-  # Note that interpolations can themselves be nested, so brace balance
-  # is tied to the innermost literal.
-  #
-  # Also note that literals themselves should not use e_[lr]brace rules
-  # when matching their opening and closing delimiters, as the amount of
-  # braces inside the characters of a string literal is independent.
-  interp_code = '#{';
   e_lbrace = '{' % {
-    @cond.push(false); @cmdarg.push(false)
-    current_literal = literal
-    if current_literal
-      current_literal.start_interp_brace
-    end
+    e_lbrace
   };
   e_rbrace = '}' % {
-    current_literal = literal
-    if current_literal
-      if current_literal.end_interp_brace_and_try_closing
-        if version?(18, 19)
-          emit(:tRCURLY, '}'.freeze, p - 1, p)
-          @cond.lexpop
-          @cmdarg.lexpop
-        else
-          emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
-        end
-        if current_literal.saved_herebody_s
-          @herebody_s = current_literal.saved_herebody_s
-        end
-        fhold;
-        fnext *next_state_for_literal(current_literal);
-        fbreak;
-      end
+    if @strings.close_interp_on_current_literal(p)
+      fhold;
+      fnext inside_string;
+      fbreak;
     end
     @paren_nest -= 1
   };
-  action extend_interp_code {
-    current_literal = literal
-    current_literal.flush_string
-    current_literal.extend_content
-    emit(:tSTRING_DBEG, '#{'.freeze)
-    if current_literal.heredoc?
-      current_literal.saved_herebody_s = @herebody_s
-      @herebody_s = nil
-    end
-    current_literal.start_interp_brace
-    @command_start = true
-    fnext expr_value;
-    fbreak;
-  }
-  # Actual string parsers are simply combined from the primitives defined
-  # above.
-  interp_words := |*
-      interp_code      => extend_interp_code;
-      interp_digit_var => extend_interp_digit_var;
-      interp_var       => extend_interp_var;
-      e_bs escape      => extend_string_escaped;
-      c_space+         => extend_string_space;
-      c_eol            => extend_string_eol;
-      c_any            => extend_string;
-  *|;
-  interp_string := |*
-      interp_code      => extend_interp_code;
-      interp_digit_var => extend_interp_digit_var;
-      interp_var       => extend_interp_var;
-      e_bs escape      => extend_string_escaped;
-      c_eol            => extend_string_eol;
-      c_any            => extend_string;
-  *|;
-  plain_words := |*
-      e_bs c_any       => extend_string_escaped;
-      c_space+         => extend_string_space;
-      c_eol            => extend_string_eol;
-      c_any            => extend_string;
-  *|;
-  plain_string := |*
-      '\\' c_nl        => extend_string_eol;
-      e_bs c_any       => extend_string_escaped;
-      c_eol            => extend_string_eol;
-      c_any            => extend_string;
-  *|;
-  interp_backslash_delimited := |*
-      interp_code      => extend_interp_code;
-      interp_digit_var => extend_interp_digit_var;
-      interp_var       => extend_interp_var;
-      c_eol            => extend_string_eol;
-      c_any            => extend_string;
-  *|;
-  plain_backslash_delimited := |*
-      c_eol            => extend_string_eol;
-      c_any            => extend_string;
-  *|;
-  interp_backslash_delimited_words := |*
-      interp_code      => extend_interp_code;
-      interp_digit_var => extend_interp_digit_var;
-      interp_var       => extend_interp_var;
-      c_space+         => extend_string_space;
-      c_eol            => extend_string_eol;
-      c_any            => extend_string;
-  *|;
-  plain_backslash_delimited_words := |*
-      c_space+         => extend_string_space;
-      c_eol            => extend_string_eol;
-      c_any            => extend_string;
-  *|;
-  regexp_modifiers := |*
-      [A-Za-z]+
-      => {
-        unknown_options = tok.scan(/[^imxouesn]/)
-        if unknown_options.any?
-          diagnostic :error, :regexp_options,
-                     { :options => unknown_options.join }
-        end
-        emit(:tREGEXP_OPT)
-        fnext expr_end;
-        fbreak;
-      };
-      any
-      => {
-        emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
-        fhold;
-        fgoto expr_end;
-      };
-  *|;
   #
   # === WHITESPACE HANDLING ===
   #
@@ -1221,16 +721,20 @@ class Next
   # comment is deemed equivalent to non-newline whitespace
   # (c_space character class).
+  e_nl = c_nl % {
+    p = on_newline(p)
+  };
   w_space =
       c_space+
-    | '\\' e_heredoc_nl
+    | '\\' e_nl
     ;
   w_comment =
       '#'     %{ @sharp_s = p - 1 }
       # The (p == pe) condition compensates for added "\0" and
       # the way Ragel handles EOF.
-      c_line* %{ emit_comment(@sharp_s, p == pe ? p - 2 : p) }
+      c_line* %{ emit_comment_from_range(p, pe) }
     ;
   w_space_comment =
@@ -1253,7 +757,7 @@ class Next
   # is equivalent to `foo = "bar\n" + 2`.
   w_newline =
-      e_heredoc_nl;
+      e_nl;
   w_any =
       w_space
@@ -1341,34 +845,22 @@ class Next
   expr_variable := |*
       global_var
       => {
-        if    tok =~ /^\$([1-9][0-9]*)$/
-          emit(:tNTH_REF, tok(@ts + 1).to_i)
-        elsif tok =~ /^\$([&`'+])$/
-          emit(:tBACK_REF)
-        else
-          emit(:tGVAR)
-        end
+        emit_global_var
         fnext *stack_pop; fbreak;
       };
       class_var_v
       => {
-        if tok =~ /^@@[0-9]/
-          diagnostic :error, :cvar_name, { :name => tok }
-        end
+        emit_class_var
-        emit(:tCVAR)
         fnext *stack_pop; fbreak;
       };
       instance_var_v
       => {
-        if tok =~ /^@[0-9]/
-          diagnostic :error, :ivar_name, { :name => tok }
-        end
+        emit_instance_var
-        emit(:tIVAR)
         fnext *stack_pop; fbreak;
       };
   *|;
@@ -1418,7 +910,8 @@ class Next
       => {
         if version?(23)
           type, delimiter = tok[0..-2], tok[-1].chr
-          fgoto *push_literal(type, delimiter, @ts);
+          @strings.push_literal(type, delimiter, @ts)
+          fgoto inside_string;
         else
           p = @ts - 1
           fgoto expr_end;
@@ -1569,14 +1062,7 @@ class Next
       | '<<'
       )
       => {
-        if tok(tm, tm + 1) == '/'.freeze
-          # Ambiguous regexp literal.
-          if @version < 30
-            diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
-          else
-            diagnostic :warning, :ambiguous_regexp, nil, range(tm, tm + 1)
-          end
-        end
+        check_ambiguous_slash(tm)
         p = tm - 1
         fgoto expr_beg;
@@ -1774,21 +1260,26 @@ class Next
       '/' c_any
       => {
         type = delimiter = tok[0].chr
-        fhold; fgoto *push_literal(type, delimiter, @ts);
+        @strings.push_literal(type, delimiter, @ts)
+        fhold;
+        fgoto inside_string;
       };
       # %<string>
       '%' ( c_ascii - [A-Za-z0-9] )
       => {
-        type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
-        fgoto *push_literal(type, delimiter, @ts);
+        type, delimiter = @source_buffer.slice(@ts, 1).chr, tok[-1].chr
+        @strings.push_literal(type, delimiter, @ts)
+        fgoto inside_string;
       };
       # %w(we are the people)
       '%' [A-Za-z] (c_ascii - [A-Za-z0-9])
       => {
         type, delimiter = tok[0..-2], tok[-1].chr
-        fgoto *push_literal(type, delimiter, @ts);
+        @strings.push_literal(type, delimiter, @ts)
+        fgoto inside_string;
       };
       '%' c_eof
@@ -1834,10 +1325,11 @@ class Next
           p = @ts + 1
           fnext expr_beg; fbreak;
         else
-          fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
+          @strings.push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
+          @strings.herebody_s ||= new_herebody_s
-          @herebody_s ||= new_herebody_s
-          p = @herebody_s - 1
+          p = @strings.herebody_s - 1
+          fnext inside_string;
         end
       };
@@ -1871,7 +1363,9 @@ class Next
       ':' ['"] # '
       => {
         type, delimiter = tok, tok[-1].chr
-        fgoto *push_literal(type, delimiter, @ts);
+        @strings.push_literal(type, delimiter, @ts);
+        fgoto inside_string;
       };
       # :!@ is :!
@@ -1900,12 +1394,7 @@ class Next
           | '@@' %{ tm = p - 2; diag_msg = :cvar_name }
           ) [0-9]*
       => {
-        if @version >= 27
-          diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
-        else
-          emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
-          p = @ts
-        end
+        emit_colon_with_digits(p, tm, diag_msg)
         fnext expr_end; fbreak;
       };
@@ -1916,29 +1405,19 @@ class Next
       # Character constant, like ?a, ?\n, ?\u1000, and so on
       # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
-      '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
-          | (c_any - c_space_nl - e_bs) % { @escape = nil }
-          )
+      '?' c_any
       => {
-        value = @escape || tok(@ts + 1)
+        p, next_state = @strings.read_character_constant(@ts)
+        fhold; # Ragel will do `p += 1` to consume input, prevent it
-        if version?(18)
-          emit(:tINTEGER, value.getbyte(0))
+        # If strings lexer founds a character constant (?a) emit it,
+        # otherwise read ternary operator
+        if @token_queue.empty?
+          fgoto *next_state;
         else
-          emit(:tCHARACTER, value)
+          fnext *next_state;
+          fbreak;
         end
-        fnext expr_end; fbreak;
-      };
-      '?' c_space_nl
-      => {
-        escape = { " "  => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
-                   "\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
-        diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
-        p = @ts - 1
-        fgoto expr_end;
       };
       '?' c_eof
@@ -1946,13 +1425,6 @@ class Next
         diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
       };
-      # f ?aa : b: Disambiguate with a character literal.
-      '?' [A-Za-z_] bareword
-      => {
-        p = @ts - 1
-        fgoto expr_end;
-      };
       #
       # AMBIGUOUS EMPTY BLOCK ARGUMENTS
       #
@@ -2028,7 +1500,7 @@ class Next
         if version?(18)
           ident = tok(@ts, @te - 2)
-          emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
+          emit((@source_buffer.slice(@ts, 1) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
                ident, @ts, @te - 2)
           fhold; # continue as a symbol
@@ -2133,7 +1605,7 @@ class Next
       w_any;
-      e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
+      e_nl '=begin' ( c_space | c_nl_zlen )
       => {
         p = @ts - 1
         @cs_before_block_comment = @cs
@@ -2186,7 +1658,8 @@ class Next
       # "bar", 'baz'
       ['"] # '
       => {
-        fgoto *push_literal(tok, tok, @ts);
+        @strings.push_literal(tok, tok, @ts)
+        fgoto inside_string;
       };
       w_space_comment;
@@ -2247,8 +1720,7 @@ class Next
            fnext expr_fname; fbreak; };
       'class' w_any* '<<'
-      => { emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
-           emit(:tLSHFT, '<<'.freeze,    @te - 2, @te)
+      => { emit_singleton_class
            fnext expr_value; fbreak; };
       # a if b:c: Syntax error.
@@ -2307,27 +1779,13 @@ class Next
       | '0'   digit* '_'? %{ @num_base = 8;  @num_digits_s = @ts } int_dec
       ) %{ @num_suffix_s = p } int_suffix
       => {
-        digits = tok(@num_digits_s, @num_suffix_s)
-        if digits.end_with? '_'.freeze
-          diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
-                     range(@te - 1, @te)
-        elsif digits.empty? && @num_base == 8 && version?(18)
-          # 1.8 did not raise an error on 0o.
-          digits = '0'.freeze
-        elsif digits.empty?
-          diagnostic :error, :empty_numeric
-        elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
-          invalid_s = @num_digits_s + invalid_idx
-          diagnostic :error, :invalid_octal, nil,
-                     range(invalid_s, invalid_s + 1)
-        end
+        digits = numeric_literal_int
         if version?(18, 19, 20)
           emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
           p = @num_suffix_s - 1
         else
-          @num_xfrm.call(digits.to_i(@num_base))
+          p = @num_xfrm.call(digits.to_i(@num_base), p)
         end
         fbreak;
       };
@@ -2372,7 +1830,7 @@ class Next
           emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
           p = @num_suffix_s - 1
         else
-          @num_xfrm.call(digits)
+          p = @num_xfrm.call(digits, p)
         end
         fbreak;
       };
@@ -2385,7 +1843,8 @@ class Next
       '`' | ['"] # '
       => {
         type, delimiter = tok, tok[-1].chr
-        fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
+        @strings.push_literal(type, delimiter, @ts, nil, false, false, true);
+        fgoto inside_string;
       };
       #
@@ -2470,15 +1929,7 @@ class Next
       e_rbrace | e_rparen | e_rbrack
       => {
-        emit_table(PUNCTUATION)
-        if @version < 24
-          @cond.lexpop
-          @cmdarg.lexpop
-        else
-          @cond.pop
-          @cmdarg.pop
-        end
+        emit_rbrace_rparen_rbrack
         if tok == '}'.freeze || tok == ']'.freeze
           if @version >= 25
@@ -2635,6 +2086,17 @@ class Next
       c_eof => do_eof;
   *|;
+  inside_string := |*
+      any
+      => {
+        p, next_state = @strings.advance(p)
+        fhold; # Ragel will do `p += 1` to consume input, prevent it
+        fnext *next_state;
+        fbreak;
+      };
+  *|;
   }%%
   # %
 end