ruby-next-parser 3.1.1.3 → 3.2.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -82,14 +82,6 @@ class Next
82
82
  %% write data nofinal;
83
83
  # %
84
84
 
85
- ESCAPES = {
86
- ?a.ord => "\a", ?b.ord => "\b", ?e.ord => "\e", ?f.ord => "\f",
87
- ?n.ord => "\n", ?r.ord => "\r", ?s.ord => "\s", ?t.ord => "\t",
88
- ?v.ord => "\v", ?\\.ord => "\\"
89
- }.freeze
90
-
91
- REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
92
-
93
85
  attr_reader :source_buffer
94
86
 
95
87
  attr_accessor :diagnostics
@@ -100,7 +92,7 @@ class Next
100
92
 
101
93
  attr_accessor :tokens, :comments
102
94
 
103
- attr_reader :paren_nest, :cmdarg_stack, :cond_stack, :lambda_stack
95
+ attr_reader :paren_nest, :cmdarg_stack, :cond_stack, :lambda_stack, :version
104
96
 
105
97
  def initialize(version)
106
98
  @version = version
@@ -110,6 +102,26 @@ class Next
110
102
  @tokens = nil
111
103
  @comments = nil
112
104
 
105
+ @_lex_actions =
106
+ if self.class.respond_to?(:_lex_actions, true)
107
+ self.class.send :_lex_actions
108
+ else
109
+ []
110
+ end
111
+
112
+ @emit_integer = lambda { |chars, p| emit(:tINTEGER, chars); p }
113
+ @emit_rational = lambda { |chars, p| emit(:tRATIONAL, Rational(chars)); p }
114
+ @emit_imaginary = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, chars)); p }
115
+ @emit_imaginary_rational = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Rational(chars))); p }
116
+ @emit_integer_re = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 2); p - 2 }
117
+ @emit_integer_if = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 2); p - 2 }
118
+ @emit_integer_rescue = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 6); p - 6 }
119
+
120
+ @emit_float = lambda { |chars, p| emit(:tFLOAT, Float(chars)); p }
121
+ @emit_imaginary_float = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Float(chars))); p }
122
+ @emit_float_if = lambda { |chars, p| emit(:tFLOAT, Float(chars), @ts, @te - 2); p - 2 }
123
+ @emit_float_rescue = lambda { |chars, p| emit(:tFLOAT, Float(chars), @ts, @te - 6); p - 6 }
124
+
113
125
  reset
114
126
  end
115
127
 
@@ -139,7 +151,6 @@ class Next
139
151
 
140
152
  # Lexer state:
141
153
  @token_queue = []
142
- @literal_stack = []
143
154
 
144
155
  @eq_begin_s = nil # location of last encountered =begin
145
156
  @sharp_s = nil # location of last encountered #
@@ -151,34 +162,20 @@ class Next
151
162
  @num_suffix_s = nil # starting position of numeric suffix
152
163
  @num_xfrm = nil # numeric suffix-induced transformation
153
164
 
154
- @escape_s = nil # starting position of current sequence
155
- @escape = nil # last escaped sequence, as string
156
-
157
- @herebody_s = nil # starting position of current heredoc line
158
-
159
165
  # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
160
166
  # encountered after a matching closing parenthesis.
161
167
  @paren_nest = 0
162
168
  @lambda_stack = []
163
169
 
164
- # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
165
- # we store the indentation level and give it out to the parser
166
- # on request. It is not possible to infer indentation level just
167
- # from the AST because escape sequences such as `\ ` or `\t` are
168
- # expanded inside the lexer, but count as non-whitespace for
169
- # indentation purposes.
170
- @dedent_level = nil
171
-
172
170
  # If the lexer is in `command state' (aka expr_value)
173
171
  # at the entry to #advance, it will transition to expr_cmdarg
174
172
  # instead of expr_arg at certain points.
175
173
  @command_start = true
176
174
 
177
- # True at the end of "def foo a:"
178
- @in_kwarg = false
179
-
180
175
  # State before =begin / =end block comment
181
176
  @cs_before_block_comment = self.class.lex_en_line_begin
177
+
178
+ @strings = Parser::LexerStrings.new(self, @version)
182
179
  end
183
180
 
184
181
  def source_buffer=(source_buffer)
@@ -200,6 +197,9 @@ class Next
200
197
  else
201
198
  @source_pts = nil
202
199
  end
200
+
201
+ @strings.source_buffer = @source_buffer
202
+ @strings.source_pts = @source_pts
203
203
  end
204
204
 
205
205
  def encoding
@@ -220,10 +220,7 @@ class Next
220
220
  :expr_endfn => lex_en_expr_endfn,
221
221
  :expr_labelarg => lex_en_expr_labelarg,
222
222
 
223
- :interp_string => lex_en_interp_string,
224
- :interp_words => lex_en_interp_words,
225
- :plain_string => lex_en_plain_string,
226
- :plain_words => lex_en_plain_string,
223
+ :inside_string => lex_en_inside_string
227
224
  }
228
225
 
229
226
  def state
@@ -253,15 +250,12 @@ class Next
253
250
  end
254
251
 
255
252
  def dedent_level
256
- # We erase @dedent_level as a precaution to avoid accidentally
257
- # using a stale value.
258
- dedent_level, @dedent_level = @dedent_level, nil
259
- dedent_level
253
+ @strings.dedent_level
260
254
  end
261
255
 
262
256
  # Return next token: [type, value].
263
257
  def advance
264
- if @token_queue.any?
258
+ unless @token_queue.empty?
265
259
  return @token_queue.shift
266
260
  end
267
261
 
@@ -276,6 +270,7 @@ class Next
276
270
  _lex_to_state_actions = klass.send :_lex_to_state_actions
277
271
  _lex_from_state_actions = klass.send :_lex_from_state_actions
278
272
  _lex_eof_trans = klass.send :_lex_eof_trans
273
+ _lex_actions = @_lex_actions
279
274
 
280
275
  pe = @source_pts.size + 2
281
276
  p, eof = @p, pe
@@ -307,10 +302,6 @@ class Next
307
302
 
308
303
  protected
309
304
 
310
- def eof_codepoint?(point)
311
- [0x04, 0x1a, 0x00].include? point
312
- end
313
-
314
305
  def version?(*versions)
315
306
  versions.include?(@version)
316
307
  end
@@ -320,12 +311,8 @@ class Next
320
311
  @stack[@top]
321
312
  end
322
313
 
323
- def encode_escape(ord)
324
- ord.chr.force_encoding(@source_buffer.source.encoding)
325
- end
326
-
327
314
  def tok(s = @ts, e = @te)
328
- @source_buffer.slice(s...e)
315
+ @source_buffer.slice(s, e - s)
329
316
  end
330
317
 
331
318
  def range(s = @ts, e = @te)
@@ -378,64 +365,110 @@ class Next
378
365
  nil
379
366
  end
380
367
 
368
+ def emit_comment_from_range(p, pe)
369
+ emit_comment(@sharp_s, p == pe ? p - 2 : p)
370
+ end
371
+
381
372
  def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
382
373
  @diagnostics.process(
383
374
  Parser::Diagnostic.new(type, reason, arguments, location, highlights))
384
375
  end
385
376
 
386
- #
387
- # === LITERAL STACK ===
388
- #
389
377
 
390
- def push_literal(*args)
391
- new_literal = Literal.new(self, *args)
392
- @literal_stack.push(new_literal)
393
- next_state_for_literal(new_literal)
378
+ def e_lbrace
379
+ @cond.push(false); @cmdarg.push(false)
380
+
381
+ current_literal = @strings.literal
382
+ if current_literal
383
+ current_literal.start_interp_brace
384
+ end
394
385
  end
395
386
 
396
- def next_state_for_literal(literal)
397
- if literal.words? && literal.backslash_delimited?
398
- if literal.interpolate?
399
- self.class.lex_en_interp_backslash_delimited_words
400
- else
401
- self.class.lex_en_plain_backslash_delimited_words
402
- end
403
- elsif literal.words? && !literal.backslash_delimited?
404
- if literal.interpolate?
405
- self.class.lex_en_interp_words
406
- else
407
- self.class.lex_en_plain_words
408
- end
409
- elsif !literal.words? && literal.backslash_delimited?
410
- if literal.interpolate?
411
- self.class.lex_en_interp_backslash_delimited
387
+ def numeric_literal_int
388
+ digits = tok(@num_digits_s, @num_suffix_s)
389
+
390
+ if digits.end_with? '_'.freeze
391
+ diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
392
+ range(@te - 1, @te)
393
+ elsif digits.empty? && @num_base == 8 && version?(18)
394
+ # 1.8 did not raise an error on 0o.
395
+ digits = '0'.freeze
396
+ elsif digits.empty?
397
+ diagnostic :error, :empty_numeric
398
+ elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
399
+ invalid_s = @num_digits_s + invalid_idx
400
+ diagnostic :error, :invalid_octal, nil,
401
+ range(invalid_s, invalid_s + 1)
402
+ end
403
+ digits
404
+ end
405
+
406
+ def on_newline(p)
407
+ @strings.on_newline(p)
408
+ end
409
+
410
+ def check_ambiguous_slash(tm)
411
+ if tok(tm, tm + 1) == '/'.freeze
412
+ # Ambiguous regexp literal.
413
+ if @version < 30
414
+ diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
412
415
  else
413
- self.class.lex_en_plain_backslash_delimited
416
+ diagnostic :warning, :ambiguous_regexp, nil, range(tm, tm + 1)
414
417
  end
418
+ end
419
+ end
420
+
421
+ def emit_global_var(ts = @ts, te = @te)
422
+ if tok(ts, te) =~ /^\$([1-9][0-9]*)$/
423
+ emit(:tNTH_REF, tok(ts + 1, te).to_i, ts, te)
424
+ elsif tok =~ /^\$([&`'+])$/
425
+ emit(:tBACK_REF, tok(ts, te), ts, te)
415
426
  else
416
- if literal.interpolate?
417
- self.class.lex_en_interp_string
418
- else
419
- self.class.lex_en_plain_string
420
- end
427
+ emit(:tGVAR, tok(ts, te), ts, te)
421
428
  end
422
429
  end
423
430
 
424
- def literal
425
- @literal_stack.last
431
+ def emit_class_var(ts = @ts, te = @te)
432
+ if tok(ts, te) =~ /^@@[0-9]/
433
+ diagnostic :error, :cvar_name, { :name => tok(ts, te) }
434
+ end
435
+
436
+ emit(:tCVAR, tok(ts, te), ts, te)
426
437
  end
427
438
 
428
- def pop_literal
429
- old_literal = @literal_stack.pop
439
+ def emit_instance_var(ts = @ts, te = @te)
440
+ if tok(ts, te) =~ /^@[0-9]/
441
+ diagnostic :error, :ivar_name, { :name => tok(ts, te) }
442
+ end
443
+
444
+ emit(:tIVAR, tok(ts, te), ts, te)
445
+ end
446
+
447
+ def emit_rbrace_rparen_rbrack
448
+ emit_table(PUNCTUATION)
430
449
 
431
- @dedent_level = old_literal.dedent_level
450
+ if @version < 24
451
+ @cond.lexpop
452
+ @cmdarg.lexpop
453
+ else
454
+ @cond.pop
455
+ @cmdarg.pop
456
+ end
457
+ end
432
458
 
433
- if old_literal.type == :tREGEXP_BEG
434
- # Fetch modifiers.
435
- self.class.lex_en_regexp_modifiers
459
+ def emit_colon_with_digits(p, tm, diag_msg)
460
+ if @version >= 27
461
+ diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
436
462
  else
437
- self.class.lex_en_expr_end
463
+ emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
464
+ p = @ts
438
465
  end
466
+ p
467
+ end
468
+
469
+ def emit_singleton_class
470
+ emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
471
+ emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
439
472
  end
440
473
 
441
474
  # Mapping of strings to parser tokens.
@@ -457,7 +490,7 @@ class Next
457
490
  '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
458
491
  '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
459
492
  '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
460
- '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF
493
+ '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF,
461
494
  }
462
495
 
463
496
  PUNCTUATION_BEGIN = {
@@ -480,6 +513,11 @@ class Next
480
513
  'BEGIN' => :klBEGIN, 'END' => :klEND,
481
514
  }
482
515
 
516
+ ESCAPE_WHITESPACE = {
517
+ " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
518
+ "\v" => '\v', "\f" => '\f'
519
+ }
520
+
483
521
  %w(class module def undef begin end then elsif else ensure case when
484
522
  for break next redo retry in do return yield super self nil true
485
523
  false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
@@ -531,7 +569,7 @@ class Next
531
569
  # This allows to feed the lexer more data if needed; this is only used
532
570
  # in tests.
533
571
  #
534
- # Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
572
+ # Note that this action is not embedded into e_eof like e_nl and e_bs
535
573
  # below. This is due to the fact that scanner state at EOF is observed
536
574
  # by tests, and encapsulating it in a rule would break the introspection.
537
575
  fhold; fbreak;
@@ -633,581 +671,43 @@ class Next
633
671
  flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
634
672
 
635
673
  int_suffix =
636
- '' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars) } }
637
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
638
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, chars)) } }
639
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
640
- | 're' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
641
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
642
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 6); p -= 6 } };
674
+ '' % { @num_xfrm = @emit_integer }
675
+ | 'r' % { @num_xfrm = @emit_rational }
676
+ | 'i' % { @num_xfrm = @emit_imaginary }
677
+ | 'ri' % { @num_xfrm = @emit_imaginary_rational }
678
+ | 're' % { @num_xfrm = @emit_integer_re }
679
+ | 'if' % { @num_xfrm = @emit_integer_if }
680
+ | 'rescue' % { @num_xfrm = @emit_integer_rescue };
643
681
 
644
682
  flo_pow_suffix =
645
- '' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars)) } }
646
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Float(chars))) } }
647
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 2); p -= 2 } };
683
+ '' % { @num_xfrm = @emit_float }
684
+ | 'i' % { @num_xfrm = @emit_imaginary_float }
685
+ | 'if' % { @num_xfrm = @emit_float_if };
648
686
 
649
687
  flo_suffix =
650
688
  flo_pow_suffix
651
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
652
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
653
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 6); p -= 6 } };
654
-
655
- #
656
- # === ESCAPE SEQUENCE PARSING ===
657
- #
658
-
659
- # Escape parsing code is a Ragel pattern, not a scanner, and therefore
660
- # it shouldn't directly raise errors or perform other actions with side effects.
661
- # In reality this would probably just mess up error reporting in pathological
662
- # cases, through.
663
-
664
- # The amount of code required to parse \M\C stuff correctly is ridiculous.
665
-
666
- escaped_nl = "\\" c_nl;
667
-
668
- action unicode_points {
669
- @escape = ""
670
-
671
- codepoints = tok(@escape_s + 2, p - 1)
672
- codepoint_s = @escape_s + 2
673
-
674
- if @version < 24
675
- if codepoints.start_with?(" ") || codepoints.start_with?("\t")
676
- diagnostic :fatal, :invalid_unicode_escape, nil,
677
- range(@escape_s + 2, @escape_s + 3)
678
- end
679
-
680
- if spaces_p = codepoints.index(/[ \t]{2}/)
681
- diagnostic :fatal, :invalid_unicode_escape, nil,
682
- range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
683
- end
684
-
685
- if codepoints.end_with?(" ") || codepoints.end_with?("\t")
686
- diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
687
- end
688
- end
689
-
690
- codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
691
- if spaces
692
- codepoint_s += spaces.length
693
- else
694
- codepoint = codepoint_str.to_i(16)
695
-
696
- if codepoint >= 0x110000
697
- diagnostic :error, :unicode_point_too_large, nil,
698
- range(codepoint_s, codepoint_s + codepoint_str.length)
699
- break
700
- end
701
-
702
- @escape += codepoint.chr(Encoding::UTF_8)
703
- codepoint_s += codepoint_str.length
704
- end
705
- end
706
- }
707
-
708
- action unescape_char {
709
- codepoint = @source_pts[p - 1]
710
-
711
- if @version >= 30 && (codepoint == 117 || codepoint == 85) # 'u' or 'U'
712
- diagnostic :fatal, :invalid_escape
713
- end
714
-
715
- if (@escape = ESCAPES[codepoint]).nil?
716
- @escape = encode_escape(@source_buffer.slice(p - 1))
717
- end
718
- }
719
-
720
- action invalid_complex_escape {
721
- diagnostic :fatal, :invalid_escape
722
- }
723
-
724
- action read_post_meta_or_ctrl_char {
725
- @escape = @source_buffer.slice(p - 1).chr
726
-
727
- if @version >= 27 && ((0..8).include?(@escape.ord) || (14..31).include?(@escape.ord))
728
- diagnostic :fatal, :invalid_escape
729
- end
730
- }
731
-
732
- action slash_c_char {
733
- @escape = encode_escape(@escape[0].ord & 0x9f)
734
- }
735
-
736
- action slash_m_char {
737
- @escape = encode_escape(@escape[0].ord | 0x80)
738
- }
739
-
740
- maybe_escaped_char = (
741
- '\\' c_any %unescape_char
742
- | '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
743
- | ( c_any - [\\] ) %read_post_meta_or_ctrl_char
744
- );
745
-
746
- maybe_escaped_ctrl_char = ( # why?!
747
- '\\' c_any %unescape_char %slash_c_char
748
- | '?' % { @escape = "\x7f" }
749
- | '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
750
- | ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
751
- );
752
-
753
- escape = (
754
- # \377
755
- [0-7]{1,3}
756
- % { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
757
-
758
- # \xff
759
- | 'x' xdigit{1,2}
760
- % { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
761
-
762
- # %q[\x]
763
- | 'x' ( c_any - xdigit )
764
- % {
765
- diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
766
- }
767
-
768
- # \u263a
769
- | 'u' xdigit{4}
770
- % { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
771
-
772
- # \u123
773
- | 'u' xdigit{0,3}
774
- % {
775
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
776
- }
777
-
778
- # u{not hex} or u{}
779
- | 'u{' ( c_any - xdigit - [ \t}] )* '}'
780
- % {
781
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
782
- }
783
-
784
- # \u{ \t 123 \t 456 \t\t }
785
- | 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
786
- (
787
- ( xdigit{1,6} [ \t]* '}'
788
- %unicode_points
789
- )
790
- |
791
- ( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
792
- | ( c_any - [ \t}] )* c_eof
793
- | xdigit{7,}
794
- ) % {
795
- diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
796
- }
797
- )
798
-
799
- # \C-\a \cx
800
- | ( 'C-' | 'c' ) escaped_nl?
801
- maybe_escaped_ctrl_char
802
-
803
- # \M-a
804
- | 'M-' escaped_nl?
805
- maybe_escaped_char
806
- %slash_m_char
807
-
808
- # \C-\M-f \M-\cf \c\M-f
809
- | ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
810
- | 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
811
- maybe_escaped_ctrl_char
812
- %slash_m_char
813
-
814
- | 'C' c_any %invalid_complex_escape
815
- | 'M' c_any %invalid_complex_escape
816
- | ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
817
-
818
- | ( c_any - [0-7xuCMc] ) %unescape_char
819
-
820
- | c_eof % {
821
- diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
822
- }
823
- );
824
-
825
- # Use rules in form of `e_bs escape' when you need to parse a sequence.
826
- e_bs = '\\' % {
827
- @escape_s = p
828
- @escape = nil
829
- };
830
-
831
- #
832
- # === STRING AND HEREDOC PARSING ===
833
- #
834
-
835
- # Heredoc parsing is quite a complex topic. First, consider that heredocs
836
- # can be arbitrarily nested. For example:
837
- #
838
- # puts <<CODE
839
- # the result is: #{<<RESULT.inspect
840
- # i am a heredoc
841
- # RESULT
842
- # }
843
- # CODE
844
- #
845
- # which, incidentally, evaluates to:
846
- #
847
- # the result is: " i am a heredoc\n"
848
- #
849
- # To parse them, lexer refers to two kinds (remember, nested heredocs)
850
- # of positions in the input stream, namely heredoc_e
851
- # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
852
- #
853
- # heredoc_e is simply contained inside the corresponding Literal, and
854
- # when the heredoc is closed, the lexing is restarted from that position.
855
- #
856
- # @herebody_s is quite more complex. First, @herebody_s changes after each
857
- # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
858
- # contains the current line, and also when a heredoc is started, @herebody_s
859
- # contains the position from which the heredoc will be lexed.
860
- #
861
- # Second, as (insanity) there are nested heredocs, we need to maintain a
862
- # stack of these positions. Each time #push_literal is called, it saves current
863
- # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
864
- # containing another heredocs) is closed, the previous value is restored.
865
-
866
- e_heredoc_nl = c_nl % {
867
- # After every heredoc was parsed, @herebody_s contains the
868
- # position of next token after all heredocs.
869
- if @herebody_s
870
- p = @herebody_s
871
- @herebody_s = nil
872
- end
873
- };
874
-
875
- action extend_string {
876
- string = tok
877
-
878
- # tLABEL_END is only possible in non-cond context on >= 2.2
879
- if @version >= 22 && !@cond.active?
880
- lookahead = @source_buffer.slice(@te...@te+2)
881
- end
882
-
883
- current_literal = literal
884
- if !current_literal.heredoc? &&
885
- (token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
886
- if token[0] == :tLABEL_END
887
- p += 1
888
- pop_literal
889
- fnext expr_labelarg;
890
- else
891
- fnext *pop_literal;
892
- end
893
- fbreak;
894
- else
895
- current_literal.extend_string(string, @ts, @te)
896
- end
897
- }
898
-
899
- action extend_string_escaped {
900
- current_literal = literal
901
- # Get the first character after the backslash.
902
- escaped_char = @source_buffer.slice(@escape_s).chr
903
-
904
- if current_literal.munge_escape? escaped_char
905
- # If this particular literal uses this character as an opening
906
- # or closing delimiter, it is an escape sequence for that
907
- # particular character. Write it without the backslash.
908
-
909
- if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
910
- # Regular expressions should include escaped delimiters in their
911
- # escaped form, except when the escaped character is
912
- # a closing delimiter but not a regexp metacharacter.
913
- #
914
- # The backslash itself cannot be used as a closing delimiter
915
- # at the same time as an escape symbol, but it is always munged,
916
- # so this branch also executes for the non-closing-delimiter case
917
- # for the backslash.
918
- current_literal.extend_string(tok, @ts, @te)
919
- else
920
- current_literal.extend_string(escaped_char, @ts, @te)
921
- end
922
- else
923
- # It does not. So this is an actual escape sequence, yay!
924
- if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
925
- # Squiggly heredocs like
926
- # <<~-HERE
927
- # 1\
928
- # 2
929
- # HERE
930
- # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
931
- # This information is emitted as is, without escaping,
932
- # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
933
- current_literal.extend_string(tok, @ts, @te)
934
- elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
935
- # Heredocs, regexp and a few other types of literals support line
936
- # continuation via \\\n sequence. The code like
937
- # "a\
938
- # b"
939
- # must be parsed as "ab"
940
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
941
- elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
942
- # Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
943
- # supported by regexes so far, so it needs a separate branch.
944
- current_literal.extend_string(@escape, @ts, @te)
945
- elsif current_literal.regexp?
946
- # Regular expressions should include escape sequences in their
947
- # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
948
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
949
- else
950
- current_literal.extend_string(@escape || tok, @ts, @te)
951
- end
952
- end
953
- }
954
-
955
- # Extend a string with a newline or a EOF character.
956
- # As heredoc closing line can immediately precede EOF, this action
957
- # has to handle such case specially.
958
- action extend_string_eol {
959
- current_literal = literal
960
- if @te == pe
961
- diagnostic :fatal, :string_eof, nil,
962
- range(current_literal.str_s, current_literal.str_s + 1)
963
- end
964
-
965
- if current_literal.heredoc?
966
- line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
967
-
968
- if version?(18, 19, 20)
969
- # See ruby:c48b4209c
970
- line = line.gsub(/\r.*$/, ''.freeze)
971
- end
972
-
973
- # Try ending the heredoc with the complete most recently
974
- # scanned line. @herebody_s always refers to the start of such line.
975
- if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
976
- # Adjust @herebody_s to point to the next line.
977
- @herebody_s = @te
978
-
979
- # Continue regular lexing after the heredoc reference (<<END).
980
- p = current_literal.heredoc_e - 1
981
- fnext *pop_literal; fbreak;
982
- else
983
- # Calculate indentation level for <<~HEREDOCs.
984
- current_literal.infer_indent_level(line)
985
-
986
- # Ditto.
987
- @herebody_s = @te
988
- end
989
- else
990
- # Try ending the literal with a newline.
991
- if current_literal.nest_and_try_closing(tok, @ts, @te)
992
- fnext *pop_literal; fbreak;
993
- end
994
-
995
- if @herebody_s
996
- # This is a regular literal intertwined with a heredoc. Like:
997
- #
998
- # p <<-foo+"1
999
- # bar
1000
- # foo
1001
- # 2"
1002
- #
1003
- # which, incidentally, evaluates to "bar\n1\n2".
1004
- p = @herebody_s - 1
1005
- @herebody_s = nil
1006
- end
1007
- end
1008
-
1009
- if current_literal.words? && !eof_codepoint?(@source_pts[p])
1010
- current_literal.extend_space @ts, @te
1011
- else
1012
- # A literal newline is appended if the heredoc was _not_ closed
1013
- # this time (see fbreak above). See also Literal#nest_and_try_closing
1014
- # for rationale of calling #flush_string here.
1015
- current_literal.extend_string tok, @ts, @te
1016
- current_literal.flush_string
1017
- end
1018
- }
1019
-
1020
- action extend_string_space {
1021
- literal.extend_space @ts, @te
1022
- }
689
+ | 'r' % { @num_xfrm = @emit_rational }
690
+ | 'ri' % { @num_xfrm = @emit_imaginary_rational }
691
+ | 'rescue' % { @num_xfrm = @emit_float_rescue };
1023
692
 
1024
693
  #
1025
694
  # === INTERPOLATION PARSING ===
1026
695
  #
1027
696
 
1028
- # Interpolations with immediate variable names simply call into
1029
- # the corresponding machine.
1030
-
1031
- interp_var = '#' ( global_var | class_var_v | instance_var_v );
1032
-
1033
- action extend_interp_var {
1034
- current_literal = literal
1035
- current_literal.flush_string
1036
- current_literal.extend_content
1037
-
1038
- emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
1039
-
1040
- p = @ts
1041
- fcall expr_variable;
1042
- }
1043
-
1044
- # Special case for Ruby > 2.7
1045
- # If interpolated instance/class variable starts with a digit we parse it as a plain substring
1046
- # However, "#$1" is still a regular interpolation
1047
- interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
1048
-
1049
- action extend_interp_digit_var {
1050
- if @version >= 27
1051
- literal.extend_string(tok, @ts, @te)
1052
- else
1053
- message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
1054
- diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
1055
- end
1056
- }
1057
-
1058
- # Interpolations with code blocks must match nested curly braces, as
1059
- # interpolation ending is ambiguous with a block ending. So, every
1060
- # opening and closing brace should be matched with e_[lr]brace rules,
1061
- # which automatically perform the counting.
1062
- #
1063
- # Note that interpolations can themselves be nested, so brace balance
1064
- # is tied to the innermost literal.
1065
- #
1066
- # Also note that literals themselves should not use e_[lr]brace rules
1067
- # when matching their opening and closing delimiters, as the amount of
1068
- # braces inside the characters of a string literal is independent.
1069
-
1070
- interp_code = '#{';
1071
-
1072
697
  e_lbrace = '{' % {
1073
- @cond.push(false); @cmdarg.push(false)
1074
-
1075
- current_literal = literal
1076
- if current_literal
1077
- current_literal.start_interp_brace
1078
- end
698
+ e_lbrace
1079
699
  };
1080
700
 
1081
701
  e_rbrace = '}' % {
1082
- current_literal = literal
1083
- if current_literal
1084
- if current_literal.end_interp_brace_and_try_closing
1085
- if version?(18, 19)
1086
- emit(:tRCURLY, '}'.freeze, p - 1, p)
1087
- @cond.lexpop
1088
- @cmdarg.lexpop
1089
- else
1090
- emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
1091
- end
1092
-
1093
- if current_literal.saved_herebody_s
1094
- @herebody_s = current_literal.saved_herebody_s
1095
- end
1096
-
1097
-
1098
- fhold;
1099
- fnext *next_state_for_literal(current_literal);
1100
- fbreak;
1101
- end
702
+ if @strings.close_interp_on_current_literal(p)
703
+ fhold;
704
+ fnext inside_string;
705
+ fbreak;
1102
706
  end
1103
707
 
1104
708
  @paren_nest -= 1
1105
709
  };
1106
710
 
1107
- action extend_interp_code {
1108
- current_literal = literal
1109
- current_literal.flush_string
1110
- current_literal.extend_content
1111
-
1112
- emit(:tSTRING_DBEG, '#{'.freeze)
1113
-
1114
- if current_literal.heredoc?
1115
- current_literal.saved_herebody_s = @herebody_s
1116
- @herebody_s = nil
1117
- end
1118
-
1119
- current_literal.start_interp_brace
1120
- @command_start = true
1121
- fnext expr_value;
1122
- fbreak;
1123
- }
1124
-
1125
- # Actual string parsers are simply combined from the primitives defined
1126
- # above.
1127
-
1128
- interp_words := |*
1129
- interp_code => extend_interp_code;
1130
- interp_digit_var => extend_interp_digit_var;
1131
- interp_var => extend_interp_var;
1132
- e_bs escape => extend_string_escaped;
1133
- c_space+ => extend_string_space;
1134
- c_eol => extend_string_eol;
1135
- c_any => extend_string;
1136
- *|;
1137
-
1138
- interp_string := |*
1139
- interp_code => extend_interp_code;
1140
- interp_digit_var => extend_interp_digit_var;
1141
- interp_var => extend_interp_var;
1142
- e_bs escape => extend_string_escaped;
1143
- c_eol => extend_string_eol;
1144
- c_any => extend_string;
1145
- *|;
1146
-
1147
- plain_words := |*
1148
- e_bs c_any => extend_string_escaped;
1149
- c_space+ => extend_string_space;
1150
- c_eol => extend_string_eol;
1151
- c_any => extend_string;
1152
- *|;
1153
-
1154
- plain_string := |*
1155
- '\\' c_nl => extend_string_eol;
1156
- e_bs c_any => extend_string_escaped;
1157
- c_eol => extend_string_eol;
1158
- c_any => extend_string;
1159
- *|;
1160
-
1161
- interp_backslash_delimited := |*
1162
- interp_code => extend_interp_code;
1163
- interp_digit_var => extend_interp_digit_var;
1164
- interp_var => extend_interp_var;
1165
- c_eol => extend_string_eol;
1166
- c_any => extend_string;
1167
- *|;
1168
-
1169
- plain_backslash_delimited := |*
1170
- c_eol => extend_string_eol;
1171
- c_any => extend_string;
1172
- *|;
1173
-
1174
- interp_backslash_delimited_words := |*
1175
- interp_code => extend_interp_code;
1176
- interp_digit_var => extend_interp_digit_var;
1177
- interp_var => extend_interp_var;
1178
- c_space+ => extend_string_space;
1179
- c_eol => extend_string_eol;
1180
- c_any => extend_string;
1181
- *|;
1182
-
1183
- plain_backslash_delimited_words := |*
1184
- c_space+ => extend_string_space;
1185
- c_eol => extend_string_eol;
1186
- c_any => extend_string;
1187
- *|;
1188
-
1189
- regexp_modifiers := |*
1190
- [A-Za-z]+
1191
- => {
1192
- unknown_options = tok.scan(/[^imxouesn]/)
1193
- if unknown_options.any?
1194
- diagnostic :error, :regexp_options,
1195
- { :options => unknown_options.join }
1196
- end
1197
-
1198
- emit(:tREGEXP_OPT)
1199
- fnext expr_end;
1200
- fbreak;
1201
- };
1202
-
1203
- any
1204
- => {
1205
- emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
1206
- fhold;
1207
- fgoto expr_end;
1208
- };
1209
- *|;
1210
-
1211
711
  #
1212
712
  # === WHITESPACE HANDLING ===
1213
713
  #
@@ -1221,16 +721,20 @@ class Next
1221
721
  # comment is deemed equivalent to non-newline whitespace
1222
722
  # (c_space character class).
1223
723
 
724
+ e_nl = c_nl % {
725
+ p = on_newline(p)
726
+ };
727
+
1224
728
  w_space =
1225
729
  c_space+
1226
- | '\\' e_heredoc_nl
730
+ | '\\' e_nl
1227
731
  ;
1228
732
 
1229
733
  w_comment =
1230
734
  '#' %{ @sharp_s = p - 1 }
1231
735
  # The (p == pe) condition compensates for added "\0" and
1232
736
  # the way Ragel handles EOF.
1233
- c_line* %{ emit_comment(@sharp_s, p == pe ? p - 2 : p) }
737
+ c_line* %{ emit_comment_from_range(p, pe) }
1234
738
  ;
1235
739
 
1236
740
  w_space_comment =
@@ -1253,7 +757,7 @@ class Next
1253
757
  # is equivalent to `foo = "bar\n" + 2`.
1254
758
 
1255
759
  w_newline =
1256
- e_heredoc_nl;
760
+ e_nl;
1257
761
 
1258
762
  w_any =
1259
763
  w_space
@@ -1341,34 +845,22 @@ class Next
1341
845
  expr_variable := |*
1342
846
  global_var
1343
847
  => {
1344
- if tok =~ /^\$([1-9][0-9]*)$/
1345
- emit(:tNTH_REF, tok(@ts + 1).to_i)
1346
- elsif tok =~ /^\$([&`'+])$/
1347
- emit(:tBACK_REF)
1348
- else
1349
- emit(:tGVAR)
1350
- end
848
+ emit_global_var
1351
849
 
1352
850
  fnext *stack_pop; fbreak;
1353
851
  };
1354
852
 
1355
853
  class_var_v
1356
854
  => {
1357
- if tok =~ /^@@[0-9]/
1358
- diagnostic :error, :cvar_name, { :name => tok }
1359
- end
855
+ emit_class_var
1360
856
 
1361
- emit(:tCVAR)
1362
857
  fnext *stack_pop; fbreak;
1363
858
  };
1364
859
 
1365
860
  instance_var_v
1366
861
  => {
1367
- if tok =~ /^@[0-9]/
1368
- diagnostic :error, :ivar_name, { :name => tok }
1369
- end
862
+ emit_instance_var
1370
863
 
1371
- emit(:tIVAR)
1372
864
  fnext *stack_pop; fbreak;
1373
865
  };
1374
866
  *|;
@@ -1418,7 +910,8 @@ class Next
1418
910
  => {
1419
911
  if version?(23)
1420
912
  type, delimiter = tok[0..-2], tok[-1].chr
1421
- fgoto *push_literal(type, delimiter, @ts);
913
+ @strings.push_literal(type, delimiter, @ts)
914
+ fgoto inside_string;
1422
915
  else
1423
916
  p = @ts - 1
1424
917
  fgoto expr_end;
@@ -1569,14 +1062,7 @@ class Next
1569
1062
  | '<<'
1570
1063
  )
1571
1064
  => {
1572
- if tok(tm, tm + 1) == '/'.freeze
1573
- # Ambiguous regexp literal.
1574
- if @version < 30
1575
- diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
1576
- else
1577
- diagnostic :warning, :ambiguous_regexp, nil, range(tm, tm + 1)
1578
- end
1579
- end
1065
+ check_ambiguous_slash(tm)
1580
1066
 
1581
1067
  p = tm - 1
1582
1068
  fgoto expr_beg;
@@ -1774,21 +1260,26 @@ class Next
1774
1260
  '/' c_any
1775
1261
  => {
1776
1262
  type = delimiter = tok[0].chr
1777
- fhold; fgoto *push_literal(type, delimiter, @ts);
1263
+ @strings.push_literal(type, delimiter, @ts)
1264
+
1265
+ fhold;
1266
+ fgoto inside_string;
1778
1267
  };
1779
1268
 
1780
1269
  # %<string>
1781
1270
  '%' ( c_ascii - [A-Za-z0-9] )
1782
1271
  => {
1783
- type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
1784
- fgoto *push_literal(type, delimiter, @ts);
1272
+ type, delimiter = @source_buffer.slice(@ts, 1).chr, tok[-1].chr
1273
+ @strings.push_literal(type, delimiter, @ts)
1274
+ fgoto inside_string;
1785
1275
  };
1786
1276
 
1787
1277
  # %w(we are the people)
1788
1278
  '%' [A-Za-z] (c_ascii - [A-Za-z0-9])
1789
1279
  => {
1790
1280
  type, delimiter = tok[0..-2], tok[-1].chr
1791
- fgoto *push_literal(type, delimiter, @ts);
1281
+ @strings.push_literal(type, delimiter, @ts)
1282
+ fgoto inside_string;
1792
1283
  };
1793
1284
 
1794
1285
  '%' c_eof
@@ -1834,10 +1325,11 @@ class Next
1834
1325
  p = @ts + 1
1835
1326
  fnext expr_beg; fbreak;
1836
1327
  else
1837
- fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1328
+ @strings.push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1329
+ @strings.herebody_s ||= new_herebody_s
1838
1330
 
1839
- @herebody_s ||= new_herebody_s
1840
- p = @herebody_s - 1
1331
+ p = @strings.herebody_s - 1
1332
+ fnext inside_string;
1841
1333
  end
1842
1334
  };
1843
1335
 
@@ -1871,7 +1363,9 @@ class Next
1871
1363
  ':' ['"] # '
1872
1364
  => {
1873
1365
  type, delimiter = tok, tok[-1].chr
1874
- fgoto *push_literal(type, delimiter, @ts);
1366
+ @strings.push_literal(type, delimiter, @ts);
1367
+
1368
+ fgoto inside_string;
1875
1369
  };
1876
1370
 
1877
1371
  # :!@ is :!
@@ -1900,12 +1394,7 @@ class Next
1900
1394
  | '@@' %{ tm = p - 2; diag_msg = :cvar_name }
1901
1395
  ) [0-9]*
1902
1396
  => {
1903
- if @version >= 27
1904
- diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
1905
- else
1906
- emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
1907
- p = @ts
1908
- end
1397
+ emit_colon_with_digits(p, tm, diag_msg)
1909
1398
 
1910
1399
  fnext expr_end; fbreak;
1911
1400
  };
@@ -1916,29 +1405,19 @@ class Next
1916
1405
 
1917
1406
  # Character constant, like ?a, ?\n, ?\u1000, and so on
1918
1407
  # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1919
- '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
1920
- | (c_any - c_space_nl - e_bs) % { @escape = nil }
1921
- )
1408
+ '?' c_any
1922
1409
  => {
1923
- value = @escape || tok(@ts + 1)
1410
+ p, next_state = @strings.read_character_constant(@ts)
1411
+ fhold; # Ragel will do `p += 1` to consume input, prevent it
1924
1412
 
1925
- if version?(18)
1926
- emit(:tINTEGER, value.getbyte(0))
1413
+ # If strings lexer founds a character constant (?a) emit it,
1414
+ # otherwise read ternary operator
1415
+ if @token_queue.empty?
1416
+ fgoto *next_state;
1927
1417
  else
1928
- emit(:tCHARACTER, value)
1418
+ fnext *next_state;
1419
+ fbreak;
1929
1420
  end
1930
-
1931
- fnext expr_end; fbreak;
1932
- };
1933
-
1934
- '?' c_space_nl
1935
- => {
1936
- escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1937
- "\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
1938
- diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
1939
-
1940
- p = @ts - 1
1941
- fgoto expr_end;
1942
1421
  };
1943
1422
 
1944
1423
  '?' c_eof
@@ -1946,13 +1425,6 @@ class Next
1946
1425
  diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
1947
1426
  };
1948
1427
 
1949
- # f ?aa : b: Disambiguate with a character literal.
1950
- '?' [A-Za-z_] bareword
1951
- => {
1952
- p = @ts - 1
1953
- fgoto expr_end;
1954
- };
1955
-
1956
1428
  #
1957
1429
  # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1958
1430
  #
@@ -2028,7 +1500,7 @@ class Next
2028
1500
  if version?(18)
2029
1501
  ident = tok(@ts, @te - 2)
2030
1502
 
2031
- emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1503
+ emit((@source_buffer.slice(@ts, 1) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
2032
1504
  ident, @ts, @te - 2)
2033
1505
  fhold; # continue as a symbol
2034
1506
 
@@ -2133,7 +1605,7 @@ class Next
2133
1605
 
2134
1606
  w_any;
2135
1607
 
2136
- e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
1608
+ e_nl '=begin' ( c_space | c_nl_zlen )
2137
1609
  => {
2138
1610
  p = @ts - 1
2139
1611
  @cs_before_block_comment = @cs
@@ -2186,7 +1658,8 @@ class Next
2186
1658
  # "bar", 'baz'
2187
1659
  ['"] # '
2188
1660
  => {
2189
- fgoto *push_literal(tok, tok, @ts);
1661
+ @strings.push_literal(tok, tok, @ts)
1662
+ fgoto inside_string;
2190
1663
  };
2191
1664
 
2192
1665
  w_space_comment;
@@ -2247,8 +1720,7 @@ class Next
2247
1720
  fnext expr_fname; fbreak; };
2248
1721
 
2249
1722
  'class' w_any* '<<'
2250
- => { emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
2251
- emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
1723
+ => { emit_singleton_class
2252
1724
  fnext expr_value; fbreak; };
2253
1725
 
2254
1726
  # a if b:c: Syntax error.
@@ -2307,27 +1779,13 @@ class Next
2307
1779
  | '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
2308
1780
  ) %{ @num_suffix_s = p } int_suffix
2309
1781
  => {
2310
- digits = tok(@num_digits_s, @num_suffix_s)
2311
-
2312
- if digits.end_with? '_'.freeze
2313
- diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
2314
- range(@te - 1, @te)
2315
- elsif digits.empty? && @num_base == 8 && version?(18)
2316
- # 1.8 did not raise an error on 0o.
2317
- digits = '0'.freeze
2318
- elsif digits.empty?
2319
- diagnostic :error, :empty_numeric
2320
- elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
2321
- invalid_s = @num_digits_s + invalid_idx
2322
- diagnostic :error, :invalid_octal, nil,
2323
- range(invalid_s, invalid_s + 1)
2324
- end
1782
+ digits = numeric_literal_int
2325
1783
 
2326
1784
  if version?(18, 19, 20)
2327
1785
  emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
2328
1786
  p = @num_suffix_s - 1
2329
1787
  else
2330
- @num_xfrm.call(digits.to_i(@num_base))
1788
+ p = @num_xfrm.call(digits.to_i(@num_base), p)
2331
1789
  end
2332
1790
  fbreak;
2333
1791
  };
@@ -2372,7 +1830,7 @@ class Next
2372
1830
  emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
2373
1831
  p = @num_suffix_s - 1
2374
1832
  else
2375
- @num_xfrm.call(digits)
1833
+ p = @num_xfrm.call(digits, p)
2376
1834
  end
2377
1835
  fbreak;
2378
1836
  };
@@ -2385,7 +1843,8 @@ class Next
2385
1843
  '`' | ['"] # '
2386
1844
  => {
2387
1845
  type, delimiter = tok, tok[-1].chr
2388
- fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
1846
+ @strings.push_literal(type, delimiter, @ts, nil, false, false, true);
1847
+ fgoto inside_string;
2389
1848
  };
2390
1849
 
2391
1850
  #
@@ -2470,15 +1929,7 @@ class Next
2470
1929
 
2471
1930
  e_rbrace | e_rparen | e_rbrack
2472
1931
  => {
2473
- emit_table(PUNCTUATION)
2474
-
2475
- if @version < 24
2476
- @cond.lexpop
2477
- @cmdarg.lexpop
2478
- else
2479
- @cond.pop
2480
- @cmdarg.pop
2481
- end
1932
+ emit_rbrace_rparen_rbrack
2482
1933
 
2483
1934
  if tok == '}'.freeze || tok == ']'.freeze
2484
1935
  if @version >= 25
@@ -2635,6 +2086,17 @@ class Next
2635
2086
  c_eof => do_eof;
2636
2087
  *|;
2637
2088
 
2089
+ inside_string := |*
2090
+ any
2091
+ => {
2092
+ p, next_state = @strings.advance(p)
2093
+
2094
+ fhold; # Ragel will do `p += 1` to consume input, prevent it
2095
+ fnext *next_state;
2096
+ fbreak;
2097
+ };
2098
+ *|;
2099
+
2638
2100
  }%%
2639
2101
  # %
2640
2102
  end