ruby-next-parser 3.1.1.3 → 3.2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/parser/ruby-next/lexer-F0.rb +12957 -0
- data/lib/parser/ruby-next/lexer-F1.rb +14968 -0
- data/lib/parser/ruby-next/lexer.rl +201 -739
- data/lib/parser/ruby-next/parser_ext.rb +6 -1
- data/lib/parser/ruby-next/version.rb +1 -1
- data/lib/parser/rubynext.rb +8151 -3791
- data/lib/parser/rubynext.y +36 -15
- metadata +5 -4
- data/lib/parser/ruby-next/lexer.rb +0 -25177
@@ -82,14 +82,6 @@ class Next
|
|
82
82
|
%% write data nofinal;
|
83
83
|
# %
|
84
84
|
|
85
|
-
ESCAPES = {
|
86
|
-
?a.ord => "\a", ?b.ord => "\b", ?e.ord => "\e", ?f.ord => "\f",
|
87
|
-
?n.ord => "\n", ?r.ord => "\r", ?s.ord => "\s", ?t.ord => "\t",
|
88
|
-
?v.ord => "\v", ?\\.ord => "\\"
|
89
|
-
}.freeze
|
90
|
-
|
91
|
-
REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
|
92
|
-
|
93
85
|
attr_reader :source_buffer
|
94
86
|
|
95
87
|
attr_accessor :diagnostics
|
@@ -100,7 +92,7 @@ class Next
|
|
100
92
|
|
101
93
|
attr_accessor :tokens, :comments
|
102
94
|
|
103
|
-
attr_reader :paren_nest, :cmdarg_stack, :cond_stack, :lambda_stack
|
95
|
+
attr_reader :paren_nest, :cmdarg_stack, :cond_stack, :lambda_stack, :version
|
104
96
|
|
105
97
|
def initialize(version)
|
106
98
|
@version = version
|
@@ -110,6 +102,26 @@ class Next
|
|
110
102
|
@tokens = nil
|
111
103
|
@comments = nil
|
112
104
|
|
105
|
+
@_lex_actions =
|
106
|
+
if self.class.respond_to?(:_lex_actions, true)
|
107
|
+
self.class.send :_lex_actions
|
108
|
+
else
|
109
|
+
[]
|
110
|
+
end
|
111
|
+
|
112
|
+
@emit_integer = lambda { |chars, p| emit(:tINTEGER, chars); p }
|
113
|
+
@emit_rational = lambda { |chars, p| emit(:tRATIONAL, Rational(chars)); p }
|
114
|
+
@emit_imaginary = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, chars)); p }
|
115
|
+
@emit_imaginary_rational = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Rational(chars))); p }
|
116
|
+
@emit_integer_re = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 2); p - 2 }
|
117
|
+
@emit_integer_if = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 2); p - 2 }
|
118
|
+
@emit_integer_rescue = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 6); p - 6 }
|
119
|
+
|
120
|
+
@emit_float = lambda { |chars, p| emit(:tFLOAT, Float(chars)); p }
|
121
|
+
@emit_imaginary_float = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Float(chars))); p }
|
122
|
+
@emit_float_if = lambda { |chars, p| emit(:tFLOAT, Float(chars), @ts, @te - 2); p - 2 }
|
123
|
+
@emit_float_rescue = lambda { |chars, p| emit(:tFLOAT, Float(chars), @ts, @te - 6); p - 6 }
|
124
|
+
|
113
125
|
reset
|
114
126
|
end
|
115
127
|
|
@@ -139,7 +151,6 @@ class Next
|
|
139
151
|
|
140
152
|
# Lexer state:
|
141
153
|
@token_queue = []
|
142
|
-
@literal_stack = []
|
143
154
|
|
144
155
|
@eq_begin_s = nil # location of last encountered =begin
|
145
156
|
@sharp_s = nil # location of last encountered #
|
@@ -151,34 +162,20 @@ class Next
|
|
151
162
|
@num_suffix_s = nil # starting position of numeric suffix
|
152
163
|
@num_xfrm = nil # numeric suffix-induced transformation
|
153
164
|
|
154
|
-
@escape_s = nil # starting position of current sequence
|
155
|
-
@escape = nil # last escaped sequence, as string
|
156
|
-
|
157
|
-
@herebody_s = nil # starting position of current heredoc line
|
158
|
-
|
159
165
|
# Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
|
160
166
|
# encountered after a matching closing parenthesis.
|
161
167
|
@paren_nest = 0
|
162
168
|
@lambda_stack = []
|
163
169
|
|
164
|
-
# After encountering the closing line of <<~SQUIGGLY_HEREDOC,
|
165
|
-
# we store the indentation level and give it out to the parser
|
166
|
-
# on request. It is not possible to infer indentation level just
|
167
|
-
# from the AST because escape sequences such as `\ ` or `\t` are
|
168
|
-
# expanded inside the lexer, but count as non-whitespace for
|
169
|
-
# indentation purposes.
|
170
|
-
@dedent_level = nil
|
171
|
-
|
172
170
|
# If the lexer is in `command state' (aka expr_value)
|
173
171
|
# at the entry to #advance, it will transition to expr_cmdarg
|
174
172
|
# instead of expr_arg at certain points.
|
175
173
|
@command_start = true
|
176
174
|
|
177
|
-
# True at the end of "def foo a:"
|
178
|
-
@in_kwarg = false
|
179
|
-
|
180
175
|
# State before =begin / =end block comment
|
181
176
|
@cs_before_block_comment = self.class.lex_en_line_begin
|
177
|
+
|
178
|
+
@strings = Parser::LexerStrings.new(self, @version)
|
182
179
|
end
|
183
180
|
|
184
181
|
def source_buffer=(source_buffer)
|
@@ -200,6 +197,9 @@ class Next
|
|
200
197
|
else
|
201
198
|
@source_pts = nil
|
202
199
|
end
|
200
|
+
|
201
|
+
@strings.source_buffer = @source_buffer
|
202
|
+
@strings.source_pts = @source_pts
|
203
203
|
end
|
204
204
|
|
205
205
|
def encoding
|
@@ -220,10 +220,7 @@ class Next
|
|
220
220
|
:expr_endfn => lex_en_expr_endfn,
|
221
221
|
:expr_labelarg => lex_en_expr_labelarg,
|
222
222
|
|
223
|
-
:
|
224
|
-
:interp_words => lex_en_interp_words,
|
225
|
-
:plain_string => lex_en_plain_string,
|
226
|
-
:plain_words => lex_en_plain_string,
|
223
|
+
:inside_string => lex_en_inside_string
|
227
224
|
}
|
228
225
|
|
229
226
|
def state
|
@@ -253,15 +250,12 @@ class Next
|
|
253
250
|
end
|
254
251
|
|
255
252
|
def dedent_level
|
256
|
-
|
257
|
-
# using a stale value.
|
258
|
-
dedent_level, @dedent_level = @dedent_level, nil
|
259
|
-
dedent_level
|
253
|
+
@strings.dedent_level
|
260
254
|
end
|
261
255
|
|
262
256
|
# Return next token: [type, value].
|
263
257
|
def advance
|
264
|
-
|
258
|
+
unless @token_queue.empty?
|
265
259
|
return @token_queue.shift
|
266
260
|
end
|
267
261
|
|
@@ -276,6 +270,7 @@ class Next
|
|
276
270
|
_lex_to_state_actions = klass.send :_lex_to_state_actions
|
277
271
|
_lex_from_state_actions = klass.send :_lex_from_state_actions
|
278
272
|
_lex_eof_trans = klass.send :_lex_eof_trans
|
273
|
+
_lex_actions = @_lex_actions
|
279
274
|
|
280
275
|
pe = @source_pts.size + 2
|
281
276
|
p, eof = @p, pe
|
@@ -307,10 +302,6 @@ class Next
|
|
307
302
|
|
308
303
|
protected
|
309
304
|
|
310
|
-
def eof_codepoint?(point)
|
311
|
-
[0x04, 0x1a, 0x00].include? point
|
312
|
-
end
|
313
|
-
|
314
305
|
def version?(*versions)
|
315
306
|
versions.include?(@version)
|
316
307
|
end
|
@@ -320,12 +311,8 @@ class Next
|
|
320
311
|
@stack[@top]
|
321
312
|
end
|
322
313
|
|
323
|
-
def encode_escape(ord)
|
324
|
-
ord.chr.force_encoding(@source_buffer.source.encoding)
|
325
|
-
end
|
326
|
-
|
327
314
|
def tok(s = @ts, e = @te)
|
328
|
-
@source_buffer.slice(s
|
315
|
+
@source_buffer.slice(s, e - s)
|
329
316
|
end
|
330
317
|
|
331
318
|
def range(s = @ts, e = @te)
|
@@ -378,64 +365,110 @@ class Next
|
|
378
365
|
nil
|
379
366
|
end
|
380
367
|
|
368
|
+
def emit_comment_from_range(p, pe)
|
369
|
+
emit_comment(@sharp_s, p == pe ? p - 2 : p)
|
370
|
+
end
|
371
|
+
|
381
372
|
def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
|
382
373
|
@diagnostics.process(
|
383
374
|
Parser::Diagnostic.new(type, reason, arguments, location, highlights))
|
384
375
|
end
|
385
376
|
|
386
|
-
#
|
387
|
-
# === LITERAL STACK ===
|
388
|
-
#
|
389
377
|
|
390
|
-
def
|
391
|
-
|
392
|
-
|
393
|
-
|
378
|
+
def e_lbrace
|
379
|
+
@cond.push(false); @cmdarg.push(false)
|
380
|
+
|
381
|
+
current_literal = @strings.literal
|
382
|
+
if current_literal
|
383
|
+
current_literal.start_interp_brace
|
384
|
+
end
|
394
385
|
end
|
395
386
|
|
396
|
-
def
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
387
|
+
def numeric_literal_int
|
388
|
+
digits = tok(@num_digits_s, @num_suffix_s)
|
389
|
+
|
390
|
+
if digits.end_with? '_'.freeze
|
391
|
+
diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
|
392
|
+
range(@te - 1, @te)
|
393
|
+
elsif digits.empty? && @num_base == 8 && version?(18)
|
394
|
+
# 1.8 did not raise an error on 0o.
|
395
|
+
digits = '0'.freeze
|
396
|
+
elsif digits.empty?
|
397
|
+
diagnostic :error, :empty_numeric
|
398
|
+
elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
|
399
|
+
invalid_s = @num_digits_s + invalid_idx
|
400
|
+
diagnostic :error, :invalid_octal, nil,
|
401
|
+
range(invalid_s, invalid_s + 1)
|
402
|
+
end
|
403
|
+
digits
|
404
|
+
end
|
405
|
+
|
406
|
+
def on_newline(p)
|
407
|
+
@strings.on_newline(p)
|
408
|
+
end
|
409
|
+
|
410
|
+
def check_ambiguous_slash(tm)
|
411
|
+
if tok(tm, tm + 1) == '/'.freeze
|
412
|
+
# Ambiguous regexp literal.
|
413
|
+
if @version < 30
|
414
|
+
diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
|
412
415
|
else
|
413
|
-
|
416
|
+
diagnostic :warning, :ambiguous_regexp, nil, range(tm, tm + 1)
|
414
417
|
end
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
def emit_global_var(ts = @ts, te = @te)
|
422
|
+
if tok(ts, te) =~ /^\$([1-9][0-9]*)$/
|
423
|
+
emit(:tNTH_REF, tok(ts + 1, te).to_i, ts, te)
|
424
|
+
elsif tok =~ /^\$([&`'+])$/
|
425
|
+
emit(:tBACK_REF, tok(ts, te), ts, te)
|
415
426
|
else
|
416
|
-
|
417
|
-
self.class.lex_en_interp_string
|
418
|
-
else
|
419
|
-
self.class.lex_en_plain_string
|
420
|
-
end
|
427
|
+
emit(:tGVAR, tok(ts, te), ts, te)
|
421
428
|
end
|
422
429
|
end
|
423
430
|
|
424
|
-
def
|
425
|
-
|
431
|
+
def emit_class_var(ts = @ts, te = @te)
|
432
|
+
if tok(ts, te) =~ /^@@[0-9]/
|
433
|
+
diagnostic :error, :cvar_name, { :name => tok(ts, te) }
|
434
|
+
end
|
435
|
+
|
436
|
+
emit(:tCVAR, tok(ts, te), ts, te)
|
426
437
|
end
|
427
438
|
|
428
|
-
def
|
429
|
-
|
439
|
+
def emit_instance_var(ts = @ts, te = @te)
|
440
|
+
if tok(ts, te) =~ /^@[0-9]/
|
441
|
+
diagnostic :error, :ivar_name, { :name => tok(ts, te) }
|
442
|
+
end
|
443
|
+
|
444
|
+
emit(:tIVAR, tok(ts, te), ts, te)
|
445
|
+
end
|
446
|
+
|
447
|
+
def emit_rbrace_rparen_rbrack
|
448
|
+
emit_table(PUNCTUATION)
|
430
449
|
|
431
|
-
@
|
450
|
+
if @version < 24
|
451
|
+
@cond.lexpop
|
452
|
+
@cmdarg.lexpop
|
453
|
+
else
|
454
|
+
@cond.pop
|
455
|
+
@cmdarg.pop
|
456
|
+
end
|
457
|
+
end
|
432
458
|
|
433
|
-
|
434
|
-
|
435
|
-
|
459
|
+
def emit_colon_with_digits(p, tm, diag_msg)
|
460
|
+
if @version >= 27
|
461
|
+
diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
|
436
462
|
else
|
437
|
-
|
463
|
+
emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
|
464
|
+
p = @ts
|
438
465
|
end
|
466
|
+
p
|
467
|
+
end
|
468
|
+
|
469
|
+
def emit_singleton_class
|
470
|
+
emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
|
471
|
+
emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
|
439
472
|
end
|
440
473
|
|
441
474
|
# Mapping of strings to parser tokens.
|
@@ -457,7 +490,7 @@ class Next
|
|
457
490
|
'=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
|
458
491
|
'<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
|
459
492
|
'{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
|
460
|
-
'!@' => :tBANG, '&.' => :tANDDOT,
|
493
|
+
'!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF,
|
461
494
|
}
|
462
495
|
|
463
496
|
PUNCTUATION_BEGIN = {
|
@@ -480,6 +513,11 @@ class Next
|
|
480
513
|
'BEGIN' => :klBEGIN, 'END' => :klEND,
|
481
514
|
}
|
482
515
|
|
516
|
+
ESCAPE_WHITESPACE = {
|
517
|
+
" " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
|
518
|
+
"\v" => '\v', "\f" => '\f'
|
519
|
+
}
|
520
|
+
|
483
521
|
%w(class module def undef begin end then elsif else ensure case when
|
484
522
|
for break next redo retry in do return yield super self nil true
|
485
523
|
false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
|
@@ -531,7 +569,7 @@ class Next
|
|
531
569
|
# This allows to feed the lexer more data if needed; this is only used
|
532
570
|
# in tests.
|
533
571
|
#
|
534
|
-
# Note that this action is not embedded into e_eof like
|
572
|
+
# Note that this action is not embedded into e_eof like e_nl and e_bs
|
535
573
|
# below. This is due to the fact that scanner state at EOF is observed
|
536
574
|
# by tests, and encapsulating it in a rule would break the introspection.
|
537
575
|
fhold; fbreak;
|
@@ -633,581 +671,43 @@ class Next
|
|
633
671
|
flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
|
634
672
|
|
635
673
|
int_suffix =
|
636
|
-
'' % { @num_xfrm =
|
637
|
-
| 'r' % { @num_xfrm =
|
638
|
-
| 'i' % { @num_xfrm =
|
639
|
-
| 'ri' % { @num_xfrm =
|
640
|
-
| 're' % { @num_xfrm =
|
641
|
-
| 'if' % { @num_xfrm =
|
642
|
-
| 'rescue' % { @num_xfrm =
|
674
|
+
'' % { @num_xfrm = @emit_integer }
|
675
|
+
| 'r' % { @num_xfrm = @emit_rational }
|
676
|
+
| 'i' % { @num_xfrm = @emit_imaginary }
|
677
|
+
| 'ri' % { @num_xfrm = @emit_imaginary_rational }
|
678
|
+
| 're' % { @num_xfrm = @emit_integer_re }
|
679
|
+
| 'if' % { @num_xfrm = @emit_integer_if }
|
680
|
+
| 'rescue' % { @num_xfrm = @emit_integer_rescue };
|
643
681
|
|
644
682
|
flo_pow_suffix =
|
645
|
-
'' % { @num_xfrm =
|
646
|
-
| 'i' % { @num_xfrm =
|
647
|
-
| 'if' % { @num_xfrm =
|
683
|
+
'' % { @num_xfrm = @emit_float }
|
684
|
+
| 'i' % { @num_xfrm = @emit_imaginary_float }
|
685
|
+
| 'if' % { @num_xfrm = @emit_float_if };
|
648
686
|
|
649
687
|
flo_suffix =
|
650
688
|
flo_pow_suffix
|
651
|
-
| 'r' % { @num_xfrm =
|
652
|
-
| 'ri' % { @num_xfrm =
|
653
|
-
| 'rescue' % { @num_xfrm =
|
654
|
-
|
655
|
-
#
|
656
|
-
# === ESCAPE SEQUENCE PARSING ===
|
657
|
-
#
|
658
|
-
|
659
|
-
# Escape parsing code is a Ragel pattern, not a scanner, and therefore
|
660
|
-
# it shouldn't directly raise errors or perform other actions with side effects.
|
661
|
-
# In reality this would probably just mess up error reporting in pathological
|
662
|
-
# cases, through.
|
663
|
-
|
664
|
-
# The amount of code required to parse \M\C stuff correctly is ridiculous.
|
665
|
-
|
666
|
-
escaped_nl = "\\" c_nl;
|
667
|
-
|
668
|
-
action unicode_points {
|
669
|
-
@escape = ""
|
670
|
-
|
671
|
-
codepoints = tok(@escape_s + 2, p - 1)
|
672
|
-
codepoint_s = @escape_s + 2
|
673
|
-
|
674
|
-
if @version < 24
|
675
|
-
if codepoints.start_with?(" ") || codepoints.start_with?("\t")
|
676
|
-
diagnostic :fatal, :invalid_unicode_escape, nil,
|
677
|
-
range(@escape_s + 2, @escape_s + 3)
|
678
|
-
end
|
679
|
-
|
680
|
-
if spaces_p = codepoints.index(/[ \t]{2}/)
|
681
|
-
diagnostic :fatal, :invalid_unicode_escape, nil,
|
682
|
-
range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
|
683
|
-
end
|
684
|
-
|
685
|
-
if codepoints.end_with?(" ") || codepoints.end_with?("\t")
|
686
|
-
diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
|
687
|
-
end
|
688
|
-
end
|
689
|
-
|
690
|
-
codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
|
691
|
-
if spaces
|
692
|
-
codepoint_s += spaces.length
|
693
|
-
else
|
694
|
-
codepoint = codepoint_str.to_i(16)
|
695
|
-
|
696
|
-
if codepoint >= 0x110000
|
697
|
-
diagnostic :error, :unicode_point_too_large, nil,
|
698
|
-
range(codepoint_s, codepoint_s + codepoint_str.length)
|
699
|
-
break
|
700
|
-
end
|
701
|
-
|
702
|
-
@escape += codepoint.chr(Encoding::UTF_8)
|
703
|
-
codepoint_s += codepoint_str.length
|
704
|
-
end
|
705
|
-
end
|
706
|
-
}
|
707
|
-
|
708
|
-
action unescape_char {
|
709
|
-
codepoint = @source_pts[p - 1]
|
710
|
-
|
711
|
-
if @version >= 30 && (codepoint == 117 || codepoint == 85) # 'u' or 'U'
|
712
|
-
diagnostic :fatal, :invalid_escape
|
713
|
-
end
|
714
|
-
|
715
|
-
if (@escape = ESCAPES[codepoint]).nil?
|
716
|
-
@escape = encode_escape(@source_buffer.slice(p - 1))
|
717
|
-
end
|
718
|
-
}
|
719
|
-
|
720
|
-
action invalid_complex_escape {
|
721
|
-
diagnostic :fatal, :invalid_escape
|
722
|
-
}
|
723
|
-
|
724
|
-
action read_post_meta_or_ctrl_char {
|
725
|
-
@escape = @source_buffer.slice(p - 1).chr
|
726
|
-
|
727
|
-
if @version >= 27 && ((0..8).include?(@escape.ord) || (14..31).include?(@escape.ord))
|
728
|
-
diagnostic :fatal, :invalid_escape
|
729
|
-
end
|
730
|
-
}
|
731
|
-
|
732
|
-
action slash_c_char {
|
733
|
-
@escape = encode_escape(@escape[0].ord & 0x9f)
|
734
|
-
}
|
735
|
-
|
736
|
-
action slash_m_char {
|
737
|
-
@escape = encode_escape(@escape[0].ord | 0x80)
|
738
|
-
}
|
739
|
-
|
740
|
-
maybe_escaped_char = (
|
741
|
-
'\\' c_any %unescape_char
|
742
|
-
| '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
|
743
|
-
| ( c_any - [\\] ) %read_post_meta_or_ctrl_char
|
744
|
-
);
|
745
|
-
|
746
|
-
maybe_escaped_ctrl_char = ( # why?!
|
747
|
-
'\\' c_any %unescape_char %slash_c_char
|
748
|
-
| '?' % { @escape = "\x7f" }
|
749
|
-
| '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
|
750
|
-
| ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
|
751
|
-
);
|
752
|
-
|
753
|
-
escape = (
|
754
|
-
# \377
|
755
|
-
[0-7]{1,3}
|
756
|
-
% { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
|
757
|
-
|
758
|
-
# \xff
|
759
|
-
| 'x' xdigit{1,2}
|
760
|
-
% { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
|
761
|
-
|
762
|
-
# %q[\x]
|
763
|
-
| 'x' ( c_any - xdigit )
|
764
|
-
% {
|
765
|
-
diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
|
766
|
-
}
|
767
|
-
|
768
|
-
# \u263a
|
769
|
-
| 'u' xdigit{4}
|
770
|
-
% { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
|
771
|
-
|
772
|
-
# \u123
|
773
|
-
| 'u' xdigit{0,3}
|
774
|
-
% {
|
775
|
-
diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
|
776
|
-
}
|
777
|
-
|
778
|
-
# u{not hex} or u{}
|
779
|
-
| 'u{' ( c_any - xdigit - [ \t}] )* '}'
|
780
|
-
% {
|
781
|
-
diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
|
782
|
-
}
|
783
|
-
|
784
|
-
# \u{ \t 123 \t 456 \t\t }
|
785
|
-
| 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
|
786
|
-
(
|
787
|
-
( xdigit{1,6} [ \t]* '}'
|
788
|
-
%unicode_points
|
789
|
-
)
|
790
|
-
|
|
791
|
-
( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
|
792
|
-
| ( c_any - [ \t}] )* c_eof
|
793
|
-
| xdigit{7,}
|
794
|
-
) % {
|
795
|
-
diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
|
796
|
-
}
|
797
|
-
)
|
798
|
-
|
799
|
-
# \C-\a \cx
|
800
|
-
| ( 'C-' | 'c' ) escaped_nl?
|
801
|
-
maybe_escaped_ctrl_char
|
802
|
-
|
803
|
-
# \M-a
|
804
|
-
| 'M-' escaped_nl?
|
805
|
-
maybe_escaped_char
|
806
|
-
%slash_m_char
|
807
|
-
|
808
|
-
# \C-\M-f \M-\cf \c\M-f
|
809
|
-
| ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
|
810
|
-
| 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
|
811
|
-
maybe_escaped_ctrl_char
|
812
|
-
%slash_m_char
|
813
|
-
|
814
|
-
| 'C' c_any %invalid_complex_escape
|
815
|
-
| 'M' c_any %invalid_complex_escape
|
816
|
-
| ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
|
817
|
-
|
818
|
-
| ( c_any - [0-7xuCMc] ) %unescape_char
|
819
|
-
|
820
|
-
| c_eof % {
|
821
|
-
diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
|
822
|
-
}
|
823
|
-
);
|
824
|
-
|
825
|
-
# Use rules in form of `e_bs escape' when you need to parse a sequence.
|
826
|
-
e_bs = '\\' % {
|
827
|
-
@escape_s = p
|
828
|
-
@escape = nil
|
829
|
-
};
|
830
|
-
|
831
|
-
#
|
832
|
-
# === STRING AND HEREDOC PARSING ===
|
833
|
-
#
|
834
|
-
|
835
|
-
# Heredoc parsing is quite a complex topic. First, consider that heredocs
|
836
|
-
# can be arbitrarily nested. For example:
|
837
|
-
#
|
838
|
-
# puts <<CODE
|
839
|
-
# the result is: #{<<RESULT.inspect
|
840
|
-
# i am a heredoc
|
841
|
-
# RESULT
|
842
|
-
# }
|
843
|
-
# CODE
|
844
|
-
#
|
845
|
-
# which, incidentally, evaluates to:
|
846
|
-
#
|
847
|
-
# the result is: " i am a heredoc\n"
|
848
|
-
#
|
849
|
-
# To parse them, lexer refers to two kinds (remember, nested heredocs)
|
850
|
-
# of positions in the input stream, namely heredoc_e
|
851
|
-
# (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
|
852
|
-
#
|
853
|
-
# heredoc_e is simply contained inside the corresponding Literal, and
|
854
|
-
# when the heredoc is closed, the lexing is restarted from that position.
|
855
|
-
#
|
856
|
-
# @herebody_s is quite more complex. First, @herebody_s changes after each
|
857
|
-
# heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
|
858
|
-
# contains the current line, and also when a heredoc is started, @herebody_s
|
859
|
-
# contains the position from which the heredoc will be lexed.
|
860
|
-
#
|
861
|
-
# Second, as (insanity) there are nested heredocs, we need to maintain a
|
862
|
-
# stack of these positions. Each time #push_literal is called, it saves current
|
863
|
-
# @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
|
864
|
-
# containing another heredocs) is closed, the previous value is restored.
|
865
|
-
|
866
|
-
e_heredoc_nl = c_nl % {
|
867
|
-
# After every heredoc was parsed, @herebody_s contains the
|
868
|
-
# position of next token after all heredocs.
|
869
|
-
if @herebody_s
|
870
|
-
p = @herebody_s
|
871
|
-
@herebody_s = nil
|
872
|
-
end
|
873
|
-
};
|
874
|
-
|
875
|
-
action extend_string {
|
876
|
-
string = tok
|
877
|
-
|
878
|
-
# tLABEL_END is only possible in non-cond context on >= 2.2
|
879
|
-
if @version >= 22 && !@cond.active?
|
880
|
-
lookahead = @source_buffer.slice(@te...@te+2)
|
881
|
-
end
|
882
|
-
|
883
|
-
current_literal = literal
|
884
|
-
if !current_literal.heredoc? &&
|
885
|
-
(token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
|
886
|
-
if token[0] == :tLABEL_END
|
887
|
-
p += 1
|
888
|
-
pop_literal
|
889
|
-
fnext expr_labelarg;
|
890
|
-
else
|
891
|
-
fnext *pop_literal;
|
892
|
-
end
|
893
|
-
fbreak;
|
894
|
-
else
|
895
|
-
current_literal.extend_string(string, @ts, @te)
|
896
|
-
end
|
897
|
-
}
|
898
|
-
|
899
|
-
action extend_string_escaped {
|
900
|
-
current_literal = literal
|
901
|
-
# Get the first character after the backslash.
|
902
|
-
escaped_char = @source_buffer.slice(@escape_s).chr
|
903
|
-
|
904
|
-
if current_literal.munge_escape? escaped_char
|
905
|
-
# If this particular literal uses this character as an opening
|
906
|
-
# or closing delimiter, it is an escape sequence for that
|
907
|
-
# particular character. Write it without the backslash.
|
908
|
-
|
909
|
-
if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
|
910
|
-
# Regular expressions should include escaped delimiters in their
|
911
|
-
# escaped form, except when the escaped character is
|
912
|
-
# a closing delimiter but not a regexp metacharacter.
|
913
|
-
#
|
914
|
-
# The backslash itself cannot be used as a closing delimiter
|
915
|
-
# at the same time as an escape symbol, but it is always munged,
|
916
|
-
# so this branch also executes for the non-closing-delimiter case
|
917
|
-
# for the backslash.
|
918
|
-
current_literal.extend_string(tok, @ts, @te)
|
919
|
-
else
|
920
|
-
current_literal.extend_string(escaped_char, @ts, @te)
|
921
|
-
end
|
922
|
-
else
|
923
|
-
# It does not. So this is an actual escape sequence, yay!
|
924
|
-
if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
|
925
|
-
# Squiggly heredocs like
|
926
|
-
# <<~-HERE
|
927
|
-
# 1\
|
928
|
-
# 2
|
929
|
-
# HERE
|
930
|
-
# treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
|
931
|
-
# This information is emitted as is, without escaping,
|
932
|
-
# later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
|
933
|
-
current_literal.extend_string(tok, @ts, @te)
|
934
|
-
elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
|
935
|
-
# Heredocs, regexp and a few other types of literals support line
|
936
|
-
# continuation via \\\n sequence. The code like
|
937
|
-
# "a\
|
938
|
-
# b"
|
939
|
-
# must be parsed as "ab"
|
940
|
-
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
|
941
|
-
elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
|
942
|
-
# Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
|
943
|
-
# supported by regexes so far, so it needs a separate branch.
|
944
|
-
current_literal.extend_string(@escape, @ts, @te)
|
945
|
-
elsif current_literal.regexp?
|
946
|
-
# Regular expressions should include escape sequences in their
|
947
|
-
# escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
|
948
|
-
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
|
949
|
-
else
|
950
|
-
current_literal.extend_string(@escape || tok, @ts, @te)
|
951
|
-
end
|
952
|
-
end
|
953
|
-
}
|
954
|
-
|
955
|
-
# Extend a string with a newline or a EOF character.
|
956
|
-
# As heredoc closing line can immediately precede EOF, this action
|
957
|
-
# has to handle such case specially.
|
958
|
-
action extend_string_eol {
|
959
|
-
current_literal = literal
|
960
|
-
if @te == pe
|
961
|
-
diagnostic :fatal, :string_eof, nil,
|
962
|
-
range(current_literal.str_s, current_literal.str_s + 1)
|
963
|
-
end
|
964
|
-
|
965
|
-
if current_literal.heredoc?
|
966
|
-
line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
|
967
|
-
|
968
|
-
if version?(18, 19, 20)
|
969
|
-
# See ruby:c48b4209c
|
970
|
-
line = line.gsub(/\r.*$/, ''.freeze)
|
971
|
-
end
|
972
|
-
|
973
|
-
# Try ending the heredoc with the complete most recently
|
974
|
-
# scanned line. @herebody_s always refers to the start of such line.
|
975
|
-
if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
|
976
|
-
# Adjust @herebody_s to point to the next line.
|
977
|
-
@herebody_s = @te
|
978
|
-
|
979
|
-
# Continue regular lexing after the heredoc reference (<<END).
|
980
|
-
p = current_literal.heredoc_e - 1
|
981
|
-
fnext *pop_literal; fbreak;
|
982
|
-
else
|
983
|
-
# Calculate indentation level for <<~HEREDOCs.
|
984
|
-
current_literal.infer_indent_level(line)
|
985
|
-
|
986
|
-
# Ditto.
|
987
|
-
@herebody_s = @te
|
988
|
-
end
|
989
|
-
else
|
990
|
-
# Try ending the literal with a newline.
|
991
|
-
if current_literal.nest_and_try_closing(tok, @ts, @te)
|
992
|
-
fnext *pop_literal; fbreak;
|
993
|
-
end
|
994
|
-
|
995
|
-
if @herebody_s
|
996
|
-
# This is a regular literal intertwined with a heredoc. Like:
|
997
|
-
#
|
998
|
-
# p <<-foo+"1
|
999
|
-
# bar
|
1000
|
-
# foo
|
1001
|
-
# 2"
|
1002
|
-
#
|
1003
|
-
# which, incidentally, evaluates to "bar\n1\n2".
|
1004
|
-
p = @herebody_s - 1
|
1005
|
-
@herebody_s = nil
|
1006
|
-
end
|
1007
|
-
end
|
1008
|
-
|
1009
|
-
if current_literal.words? && !eof_codepoint?(@source_pts[p])
|
1010
|
-
current_literal.extend_space @ts, @te
|
1011
|
-
else
|
1012
|
-
# A literal newline is appended if the heredoc was _not_ closed
|
1013
|
-
# this time (see fbreak above). See also Literal#nest_and_try_closing
|
1014
|
-
# for rationale of calling #flush_string here.
|
1015
|
-
current_literal.extend_string tok, @ts, @te
|
1016
|
-
current_literal.flush_string
|
1017
|
-
end
|
1018
|
-
}
|
1019
|
-
|
1020
|
-
action extend_string_space {
|
1021
|
-
literal.extend_space @ts, @te
|
1022
|
-
}
|
689
|
+
| 'r' % { @num_xfrm = @emit_rational }
|
690
|
+
| 'ri' % { @num_xfrm = @emit_imaginary_rational }
|
691
|
+
| 'rescue' % { @num_xfrm = @emit_float_rescue };
|
1023
692
|
|
1024
693
|
#
|
1025
694
|
# === INTERPOLATION PARSING ===
|
1026
695
|
#
|
1027
696
|
|
1028
|
-
# Interpolations with immediate variable names simply call into
|
1029
|
-
# the corresponding machine.
|
1030
|
-
|
1031
|
-
interp_var = '#' ( global_var | class_var_v | instance_var_v );
|
1032
|
-
|
1033
|
-
action extend_interp_var {
|
1034
|
-
current_literal = literal
|
1035
|
-
current_literal.flush_string
|
1036
|
-
current_literal.extend_content
|
1037
|
-
|
1038
|
-
emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
|
1039
|
-
|
1040
|
-
p = @ts
|
1041
|
-
fcall expr_variable;
|
1042
|
-
}
|
1043
|
-
|
1044
|
-
# Special case for Ruby > 2.7
|
1045
|
-
# If interpolated instance/class variable starts with a digit we parse it as a plain substring
|
1046
|
-
# However, "#$1" is still a regular interpolation
|
1047
|
-
interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
|
1048
|
-
|
1049
|
-
action extend_interp_digit_var {
|
1050
|
-
if @version >= 27
|
1051
|
-
literal.extend_string(tok, @ts, @te)
|
1052
|
-
else
|
1053
|
-
message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
|
1054
|
-
diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
|
1055
|
-
end
|
1056
|
-
}
|
1057
|
-
|
1058
|
-
# Interpolations with code blocks must match nested curly braces, as
|
1059
|
-
# interpolation ending is ambiguous with a block ending. So, every
|
1060
|
-
# opening and closing brace should be matched with e_[lr]brace rules,
|
1061
|
-
# which automatically perform the counting.
|
1062
|
-
#
|
1063
|
-
# Note that interpolations can themselves be nested, so brace balance
|
1064
|
-
# is tied to the innermost literal.
|
1065
|
-
#
|
1066
|
-
# Also note that literals themselves should not use e_[lr]brace rules
|
1067
|
-
# when matching their opening and closing delimiters, as the amount of
|
1068
|
-
# braces inside the characters of a string literal is independent.
|
1069
|
-
|
1070
|
-
interp_code = '#{';
|
1071
|
-
|
1072
697
|
e_lbrace = '{' % {
|
1073
|
-
|
1074
|
-
|
1075
|
-
current_literal = literal
|
1076
|
-
if current_literal
|
1077
|
-
current_literal.start_interp_brace
|
1078
|
-
end
|
698
|
+
e_lbrace
|
1079
699
|
};
|
1080
700
|
|
1081
701
|
e_rbrace = '}' % {
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
emit(:tRCURLY, '}'.freeze, p - 1, p)
|
1087
|
-
@cond.lexpop
|
1088
|
-
@cmdarg.lexpop
|
1089
|
-
else
|
1090
|
-
emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
|
1091
|
-
end
|
1092
|
-
|
1093
|
-
if current_literal.saved_herebody_s
|
1094
|
-
@herebody_s = current_literal.saved_herebody_s
|
1095
|
-
end
|
1096
|
-
|
1097
|
-
|
1098
|
-
fhold;
|
1099
|
-
fnext *next_state_for_literal(current_literal);
|
1100
|
-
fbreak;
|
1101
|
-
end
|
702
|
+
if @strings.close_interp_on_current_literal(p)
|
703
|
+
fhold;
|
704
|
+
fnext inside_string;
|
705
|
+
fbreak;
|
1102
706
|
end
|
1103
707
|
|
1104
708
|
@paren_nest -= 1
|
1105
709
|
};
|
1106
710
|
|
1107
|
-
action extend_interp_code {
|
1108
|
-
current_literal = literal
|
1109
|
-
current_literal.flush_string
|
1110
|
-
current_literal.extend_content
|
1111
|
-
|
1112
|
-
emit(:tSTRING_DBEG, '#{'.freeze)
|
1113
|
-
|
1114
|
-
if current_literal.heredoc?
|
1115
|
-
current_literal.saved_herebody_s = @herebody_s
|
1116
|
-
@herebody_s = nil
|
1117
|
-
end
|
1118
|
-
|
1119
|
-
current_literal.start_interp_brace
|
1120
|
-
@command_start = true
|
1121
|
-
fnext expr_value;
|
1122
|
-
fbreak;
|
1123
|
-
}
|
1124
|
-
|
1125
|
-
# Actual string parsers are simply combined from the primitives defined
|
1126
|
-
# above.
|
1127
|
-
|
1128
|
-
interp_words := |*
|
1129
|
-
interp_code => extend_interp_code;
|
1130
|
-
interp_digit_var => extend_interp_digit_var;
|
1131
|
-
interp_var => extend_interp_var;
|
1132
|
-
e_bs escape => extend_string_escaped;
|
1133
|
-
c_space+ => extend_string_space;
|
1134
|
-
c_eol => extend_string_eol;
|
1135
|
-
c_any => extend_string;
|
1136
|
-
*|;
|
1137
|
-
|
1138
|
-
interp_string := |*
|
1139
|
-
interp_code => extend_interp_code;
|
1140
|
-
interp_digit_var => extend_interp_digit_var;
|
1141
|
-
interp_var => extend_interp_var;
|
1142
|
-
e_bs escape => extend_string_escaped;
|
1143
|
-
c_eol => extend_string_eol;
|
1144
|
-
c_any => extend_string;
|
1145
|
-
*|;
|
1146
|
-
|
1147
|
-
plain_words := |*
|
1148
|
-
e_bs c_any => extend_string_escaped;
|
1149
|
-
c_space+ => extend_string_space;
|
1150
|
-
c_eol => extend_string_eol;
|
1151
|
-
c_any => extend_string;
|
1152
|
-
*|;
|
1153
|
-
|
1154
|
-
plain_string := |*
|
1155
|
-
'\\' c_nl => extend_string_eol;
|
1156
|
-
e_bs c_any => extend_string_escaped;
|
1157
|
-
c_eol => extend_string_eol;
|
1158
|
-
c_any => extend_string;
|
1159
|
-
*|;
|
1160
|
-
|
1161
|
-
interp_backslash_delimited := |*
|
1162
|
-
interp_code => extend_interp_code;
|
1163
|
-
interp_digit_var => extend_interp_digit_var;
|
1164
|
-
interp_var => extend_interp_var;
|
1165
|
-
c_eol => extend_string_eol;
|
1166
|
-
c_any => extend_string;
|
1167
|
-
*|;
|
1168
|
-
|
1169
|
-
plain_backslash_delimited := |*
|
1170
|
-
c_eol => extend_string_eol;
|
1171
|
-
c_any => extend_string;
|
1172
|
-
*|;
|
1173
|
-
|
1174
|
-
interp_backslash_delimited_words := |*
|
1175
|
-
interp_code => extend_interp_code;
|
1176
|
-
interp_digit_var => extend_interp_digit_var;
|
1177
|
-
interp_var => extend_interp_var;
|
1178
|
-
c_space+ => extend_string_space;
|
1179
|
-
c_eol => extend_string_eol;
|
1180
|
-
c_any => extend_string;
|
1181
|
-
*|;
|
1182
|
-
|
1183
|
-
plain_backslash_delimited_words := |*
|
1184
|
-
c_space+ => extend_string_space;
|
1185
|
-
c_eol => extend_string_eol;
|
1186
|
-
c_any => extend_string;
|
1187
|
-
*|;
|
1188
|
-
|
1189
|
-
regexp_modifiers := |*
|
1190
|
-
[A-Za-z]+
|
1191
|
-
=> {
|
1192
|
-
unknown_options = tok.scan(/[^imxouesn]/)
|
1193
|
-
if unknown_options.any?
|
1194
|
-
diagnostic :error, :regexp_options,
|
1195
|
-
{ :options => unknown_options.join }
|
1196
|
-
end
|
1197
|
-
|
1198
|
-
emit(:tREGEXP_OPT)
|
1199
|
-
fnext expr_end;
|
1200
|
-
fbreak;
|
1201
|
-
};
|
1202
|
-
|
1203
|
-
any
|
1204
|
-
=> {
|
1205
|
-
emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
|
1206
|
-
fhold;
|
1207
|
-
fgoto expr_end;
|
1208
|
-
};
|
1209
|
-
*|;
|
1210
|
-
|
1211
711
|
#
|
1212
712
|
# === WHITESPACE HANDLING ===
|
1213
713
|
#
|
@@ -1221,16 +721,20 @@ class Next
|
|
1221
721
|
# comment is deemed equivalent to non-newline whitespace
|
1222
722
|
# (c_space character class).
|
1223
723
|
|
724
|
+
e_nl = c_nl % {
|
725
|
+
p = on_newline(p)
|
726
|
+
};
|
727
|
+
|
1224
728
|
w_space =
|
1225
729
|
c_space+
|
1226
|
-
| '\\'
|
730
|
+
| '\\' e_nl
|
1227
731
|
;
|
1228
732
|
|
1229
733
|
w_comment =
|
1230
734
|
'#' %{ @sharp_s = p - 1 }
|
1231
735
|
# The (p == pe) condition compensates for added "\0" and
|
1232
736
|
# the way Ragel handles EOF.
|
1233
|
-
c_line* %{
|
737
|
+
c_line* %{ emit_comment_from_range(p, pe) }
|
1234
738
|
;
|
1235
739
|
|
1236
740
|
w_space_comment =
|
@@ -1253,7 +757,7 @@ class Next
|
|
1253
757
|
# is equivalent to `foo = "bar\n" + 2`.
|
1254
758
|
|
1255
759
|
w_newline =
|
1256
|
-
|
760
|
+
e_nl;
|
1257
761
|
|
1258
762
|
w_any =
|
1259
763
|
w_space
|
@@ -1341,34 +845,22 @@ class Next
|
|
1341
845
|
expr_variable := |*
|
1342
846
|
global_var
|
1343
847
|
=> {
|
1344
|
-
|
1345
|
-
emit(:tNTH_REF, tok(@ts + 1).to_i)
|
1346
|
-
elsif tok =~ /^\$([&`'+])$/
|
1347
|
-
emit(:tBACK_REF)
|
1348
|
-
else
|
1349
|
-
emit(:tGVAR)
|
1350
|
-
end
|
848
|
+
emit_global_var
|
1351
849
|
|
1352
850
|
fnext *stack_pop; fbreak;
|
1353
851
|
};
|
1354
852
|
|
1355
853
|
class_var_v
|
1356
854
|
=> {
|
1357
|
-
|
1358
|
-
diagnostic :error, :cvar_name, { :name => tok }
|
1359
|
-
end
|
855
|
+
emit_class_var
|
1360
856
|
|
1361
|
-
emit(:tCVAR)
|
1362
857
|
fnext *stack_pop; fbreak;
|
1363
858
|
};
|
1364
859
|
|
1365
860
|
instance_var_v
|
1366
861
|
=> {
|
1367
|
-
|
1368
|
-
diagnostic :error, :ivar_name, { :name => tok }
|
1369
|
-
end
|
862
|
+
emit_instance_var
|
1370
863
|
|
1371
|
-
emit(:tIVAR)
|
1372
864
|
fnext *stack_pop; fbreak;
|
1373
865
|
};
|
1374
866
|
*|;
|
@@ -1418,7 +910,8 @@ class Next
|
|
1418
910
|
=> {
|
1419
911
|
if version?(23)
|
1420
912
|
type, delimiter = tok[0..-2], tok[-1].chr
|
1421
|
-
|
913
|
+
@strings.push_literal(type, delimiter, @ts)
|
914
|
+
fgoto inside_string;
|
1422
915
|
else
|
1423
916
|
p = @ts - 1
|
1424
917
|
fgoto expr_end;
|
@@ -1569,14 +1062,7 @@ class Next
|
|
1569
1062
|
| '<<'
|
1570
1063
|
)
|
1571
1064
|
=> {
|
1572
|
-
|
1573
|
-
# Ambiguous regexp literal.
|
1574
|
-
if @version < 30
|
1575
|
-
diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
|
1576
|
-
else
|
1577
|
-
diagnostic :warning, :ambiguous_regexp, nil, range(tm, tm + 1)
|
1578
|
-
end
|
1579
|
-
end
|
1065
|
+
check_ambiguous_slash(tm)
|
1580
1066
|
|
1581
1067
|
p = tm - 1
|
1582
1068
|
fgoto expr_beg;
|
@@ -1774,21 +1260,26 @@ class Next
|
|
1774
1260
|
'/' c_any
|
1775
1261
|
=> {
|
1776
1262
|
type = delimiter = tok[0].chr
|
1777
|
-
|
1263
|
+
@strings.push_literal(type, delimiter, @ts)
|
1264
|
+
|
1265
|
+
fhold;
|
1266
|
+
fgoto inside_string;
|
1778
1267
|
};
|
1779
1268
|
|
1780
1269
|
# %<string>
|
1781
1270
|
'%' ( c_ascii - [A-Za-z0-9] )
|
1782
1271
|
=> {
|
1783
|
-
type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
|
1784
|
-
|
1272
|
+
type, delimiter = @source_buffer.slice(@ts, 1).chr, tok[-1].chr
|
1273
|
+
@strings.push_literal(type, delimiter, @ts)
|
1274
|
+
fgoto inside_string;
|
1785
1275
|
};
|
1786
1276
|
|
1787
1277
|
# %w(we are the people)
|
1788
1278
|
'%' [A-Za-z] (c_ascii - [A-Za-z0-9])
|
1789
1279
|
=> {
|
1790
1280
|
type, delimiter = tok[0..-2], tok[-1].chr
|
1791
|
-
|
1281
|
+
@strings.push_literal(type, delimiter, @ts)
|
1282
|
+
fgoto inside_string;
|
1792
1283
|
};
|
1793
1284
|
|
1794
1285
|
'%' c_eof
|
@@ -1834,10 +1325,11 @@ class Next
|
|
1834
1325
|
p = @ts + 1
|
1835
1326
|
fnext expr_beg; fbreak;
|
1836
1327
|
else
|
1837
|
-
|
1328
|
+
@strings.push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
|
1329
|
+
@strings.herebody_s ||= new_herebody_s
|
1838
1330
|
|
1839
|
-
@herebody_s
|
1840
|
-
|
1331
|
+
p = @strings.herebody_s - 1
|
1332
|
+
fnext inside_string;
|
1841
1333
|
end
|
1842
1334
|
};
|
1843
1335
|
|
@@ -1871,7 +1363,9 @@ class Next
|
|
1871
1363
|
':' ['"] # '
|
1872
1364
|
=> {
|
1873
1365
|
type, delimiter = tok, tok[-1].chr
|
1874
|
-
|
1366
|
+
@strings.push_literal(type, delimiter, @ts);
|
1367
|
+
|
1368
|
+
fgoto inside_string;
|
1875
1369
|
};
|
1876
1370
|
|
1877
1371
|
# :!@ is :!
|
@@ -1900,12 +1394,7 @@ class Next
|
|
1900
1394
|
| '@@' %{ tm = p - 2; diag_msg = :cvar_name }
|
1901
1395
|
) [0-9]*
|
1902
1396
|
=> {
|
1903
|
-
|
1904
|
-
diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
|
1905
|
-
else
|
1906
|
-
emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
|
1907
|
-
p = @ts
|
1908
|
-
end
|
1397
|
+
emit_colon_with_digits(p, tm, diag_msg)
|
1909
1398
|
|
1910
1399
|
fnext expr_end; fbreak;
|
1911
1400
|
};
|
@@ -1916,29 +1405,19 @@ class Next
|
|
1916
1405
|
|
1917
1406
|
# Character constant, like ?a, ?\n, ?\u1000, and so on
|
1918
1407
|
# Don't accept \u escape with multiple codepoints, like \u{1 2 3}
|
1919
|
-
'?'
|
1920
|
-
| (c_any - c_space_nl - e_bs) % { @escape = nil }
|
1921
|
-
)
|
1408
|
+
'?' c_any
|
1922
1409
|
=> {
|
1923
|
-
|
1410
|
+
p, next_state = @strings.read_character_constant(@ts)
|
1411
|
+
fhold; # Ragel will do `p += 1` to consume input, prevent it
|
1924
1412
|
|
1925
|
-
|
1926
|
-
|
1413
|
+
# If strings lexer founds a character constant (?a) emit it,
|
1414
|
+
# otherwise read ternary operator
|
1415
|
+
if @token_queue.empty?
|
1416
|
+
fgoto *next_state;
|
1927
1417
|
else
|
1928
|
-
|
1418
|
+
fnext *next_state;
|
1419
|
+
fbreak;
|
1929
1420
|
end
|
1930
|
-
|
1931
|
-
fnext expr_end; fbreak;
|
1932
|
-
};
|
1933
|
-
|
1934
|
-
'?' c_space_nl
|
1935
|
-
=> {
|
1936
|
-
escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
|
1937
|
-
"\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
|
1938
|
-
diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
|
1939
|
-
|
1940
|
-
p = @ts - 1
|
1941
|
-
fgoto expr_end;
|
1942
1421
|
};
|
1943
1422
|
|
1944
1423
|
'?' c_eof
|
@@ -1946,13 +1425,6 @@ class Next
|
|
1946
1425
|
diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
|
1947
1426
|
};
|
1948
1427
|
|
1949
|
-
# f ?aa : b: Disambiguate with a character literal.
|
1950
|
-
'?' [A-Za-z_] bareword
|
1951
|
-
=> {
|
1952
|
-
p = @ts - 1
|
1953
|
-
fgoto expr_end;
|
1954
|
-
};
|
1955
|
-
|
1956
1428
|
#
|
1957
1429
|
# AMBIGUOUS EMPTY BLOCK ARGUMENTS
|
1958
1430
|
#
|
@@ -2028,7 +1500,7 @@ class Next
|
|
2028
1500
|
if version?(18)
|
2029
1501
|
ident = tok(@ts, @te - 2)
|
2030
1502
|
|
2031
|
-
emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
|
1503
|
+
emit((@source_buffer.slice(@ts, 1) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
|
2032
1504
|
ident, @ts, @te - 2)
|
2033
1505
|
fhold; # continue as a symbol
|
2034
1506
|
|
@@ -2133,7 +1605,7 @@ class Next
|
|
2133
1605
|
|
2134
1606
|
w_any;
|
2135
1607
|
|
2136
|
-
|
1608
|
+
e_nl '=begin' ( c_space | c_nl_zlen )
|
2137
1609
|
=> {
|
2138
1610
|
p = @ts - 1
|
2139
1611
|
@cs_before_block_comment = @cs
|
@@ -2186,7 +1658,8 @@ class Next
|
|
2186
1658
|
# "bar", 'baz'
|
2187
1659
|
['"] # '
|
2188
1660
|
=> {
|
2189
|
-
|
1661
|
+
@strings.push_literal(tok, tok, @ts)
|
1662
|
+
fgoto inside_string;
|
2190
1663
|
};
|
2191
1664
|
|
2192
1665
|
w_space_comment;
|
@@ -2247,8 +1720,7 @@ class Next
|
|
2247
1720
|
fnext expr_fname; fbreak; };
|
2248
1721
|
|
2249
1722
|
'class' w_any* '<<'
|
2250
|
-
=> {
|
2251
|
-
emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
|
1723
|
+
=> { emit_singleton_class
|
2252
1724
|
fnext expr_value; fbreak; };
|
2253
1725
|
|
2254
1726
|
# a if b:c: Syntax error.
|
@@ -2307,27 +1779,13 @@ class Next
|
|
2307
1779
|
| '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
|
2308
1780
|
) %{ @num_suffix_s = p } int_suffix
|
2309
1781
|
=> {
|
2310
|
-
digits =
|
2311
|
-
|
2312
|
-
if digits.end_with? '_'.freeze
|
2313
|
-
diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
|
2314
|
-
range(@te - 1, @te)
|
2315
|
-
elsif digits.empty? && @num_base == 8 && version?(18)
|
2316
|
-
# 1.8 did not raise an error on 0o.
|
2317
|
-
digits = '0'.freeze
|
2318
|
-
elsif digits.empty?
|
2319
|
-
diagnostic :error, :empty_numeric
|
2320
|
-
elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
|
2321
|
-
invalid_s = @num_digits_s + invalid_idx
|
2322
|
-
diagnostic :error, :invalid_octal, nil,
|
2323
|
-
range(invalid_s, invalid_s + 1)
|
2324
|
-
end
|
1782
|
+
digits = numeric_literal_int
|
2325
1783
|
|
2326
1784
|
if version?(18, 19, 20)
|
2327
1785
|
emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
|
2328
1786
|
p = @num_suffix_s - 1
|
2329
1787
|
else
|
2330
|
-
@num_xfrm.call(digits.to_i(@num_base))
|
1788
|
+
p = @num_xfrm.call(digits.to_i(@num_base), p)
|
2331
1789
|
end
|
2332
1790
|
fbreak;
|
2333
1791
|
};
|
@@ -2372,7 +1830,7 @@ class Next
|
|
2372
1830
|
emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
|
2373
1831
|
p = @num_suffix_s - 1
|
2374
1832
|
else
|
2375
|
-
@num_xfrm.call(digits)
|
1833
|
+
p = @num_xfrm.call(digits, p)
|
2376
1834
|
end
|
2377
1835
|
fbreak;
|
2378
1836
|
};
|
@@ -2385,7 +1843,8 @@ class Next
|
|
2385
1843
|
'`' | ['"] # '
|
2386
1844
|
=> {
|
2387
1845
|
type, delimiter = tok, tok[-1].chr
|
2388
|
-
|
1846
|
+
@strings.push_literal(type, delimiter, @ts, nil, false, false, true);
|
1847
|
+
fgoto inside_string;
|
2389
1848
|
};
|
2390
1849
|
|
2391
1850
|
#
|
@@ -2470,15 +1929,7 @@ class Next
|
|
2470
1929
|
|
2471
1930
|
e_rbrace | e_rparen | e_rbrack
|
2472
1931
|
=> {
|
2473
|
-
|
2474
|
-
|
2475
|
-
if @version < 24
|
2476
|
-
@cond.lexpop
|
2477
|
-
@cmdarg.lexpop
|
2478
|
-
else
|
2479
|
-
@cond.pop
|
2480
|
-
@cmdarg.pop
|
2481
|
-
end
|
1932
|
+
emit_rbrace_rparen_rbrack
|
2482
1933
|
|
2483
1934
|
if tok == '}'.freeze || tok == ']'.freeze
|
2484
1935
|
if @version >= 25
|
@@ -2635,6 +2086,17 @@ class Next
|
|
2635
2086
|
c_eof => do_eof;
|
2636
2087
|
*|;
|
2637
2088
|
|
2089
|
+
inside_string := |*
|
2090
|
+
any
|
2091
|
+
=> {
|
2092
|
+
p, next_state = @strings.advance(p)
|
2093
|
+
|
2094
|
+
fhold; # Ragel will do `p += 1` to consume input, prevent it
|
2095
|
+
fnext *next_state;
|
2096
|
+
fbreak;
|
2097
|
+
};
|
2098
|
+
*|;
|
2099
|
+
|
2638
2100
|
}%%
|
2639
2101
|
# %
|
2640
2102
|
end
|