ruby-next-parser 3.2.2.0 → 3.4.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,2103 +0,0 @@
1
- %%machine lex; # % fix highlighting
2
-
3
- #
4
- # === BEFORE YOU START ===
5
- #
6
- # Read the Ruby Hacking Guide chapter 11, available in English at
7
- # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
- #
9
- # Remember two things about Ragel scanners:
10
- #
11
- # 1) Longest match wins.
12
- #
13
- # 2) If two matches have the same length, the first
14
- # in source code wins.
15
- #
16
- # General rules of making Ragel and Bison happy:
17
- #
18
- # * `p` (position) and `@te` contain the index of the character
19
- # they're pointing to ("current"), plus one. `@ts` contains the index
20
- # of the corresponding character. The code for extracting matched token is:
21
- #
22
- # @source_buffer.slice(@ts...@te)
23
- #
24
- # * If your input is `foooooooobar` and the rule is:
25
- #
26
- # 'f' 'o'+
27
- #
28
- # the result will be:
29
- #
30
- # foooooooobar
31
- # ^ ts=0 ^ p=te=9
32
- #
33
- # * A Ragel lexer action should not emit more than one token, unless
34
- # you know what you are doing.
35
- #
36
- # * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
37
- #
38
- # * If an action emits the token and transitions to another state, use
39
- # these Ragel commands:
40
- #
41
- # emit($whatever)
42
- # fnext $next_state; fbreak;
43
- #
44
- # If you perform `fgoto` in an action which does not emit a token nor
45
- # rewinds the stream pointer, the parser's side-effectful,
46
- # context-sensitive lookahead actions will break in a hard to detect
47
- # and debug way.
48
- #
49
- # * If an action does not emit a token:
50
- #
51
- # fgoto $next_state;
52
- #
53
- # * If an action features lookbehind, i.e. matches characters with the
54
- # intent of passing them to another action:
55
- #
56
- # p = @ts - 1
57
- # fgoto $next_state;
58
- #
59
- # or, if the lookbehind consists of a single character:
60
- #
61
- # fhold; fgoto $next_state;
62
- #
63
- # * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
64
- # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
65
- # _will_ invoke the action `act`.
66
- #
67
- # e_something stands for "something with **e**mbedded action".
68
- #
69
- # * EOF is explicit and is matched by `c_eof`. If you want to introspect
70
- # the state of the lexer, add this rule to the state:
71
- #
72
- # c_eof => do_eof;
73
- #
74
- # * If you proceed past EOF, the lexer will complain:
75
- #
76
- # NoMethodError: undefined method `ord' for nil:NilClass
77
- #
78
-
79
- class Parser::Lexer
80
- class Next
81
-
82
- %% write data nofinal;
83
- # %
84
-
85
- attr_reader :source_buffer
86
-
87
- attr_accessor :diagnostics
88
- attr_accessor :static_env
89
- attr_accessor :force_utf32
90
-
91
- attr_accessor :cond, :cmdarg, :context, :command_start
92
-
93
- attr_accessor :tokens, :comments
94
-
95
- attr_reader :paren_nest, :cmdarg_stack, :cond_stack, :lambda_stack, :version
96
-
97
- def initialize(version)
98
- @version = version
99
- @static_env = nil
100
- @context = nil
101
-
102
- @tokens = nil
103
- @comments = nil
104
-
105
- @_lex_actions =
106
- if self.class.respond_to?(:_lex_actions, true)
107
- self.class.send :_lex_actions
108
- else
109
- []
110
- end
111
-
112
- @emit_integer = lambda { |chars, p| emit(:tINTEGER, chars); p }
113
- @emit_rational = lambda { |chars, p| emit(:tRATIONAL, Rational(chars)); p }
114
- @emit_imaginary = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, chars)); p }
115
- @emit_imaginary_rational = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Rational(chars))); p }
116
- @emit_integer_re = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 2); p - 2 }
117
- @emit_integer_if = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 2); p - 2 }
118
- @emit_integer_rescue = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 6); p - 6 }
119
-
120
- @emit_float = lambda { |chars, p| emit(:tFLOAT, Float(chars)); p }
121
- @emit_imaginary_float = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Float(chars))); p }
122
- @emit_float_if = lambda { |chars, p| emit(:tFLOAT, Float(chars), @ts, @te - 2); p - 2 }
123
- @emit_float_rescue = lambda { |chars, p| emit(:tFLOAT, Float(chars), @ts, @te - 6); p - 6 }
124
-
125
- reset
126
- end
127
-
128
- def reset(reset_state=true)
129
- # Ragel state:
130
- if reset_state
131
- # Unit tests set state prior to resetting lexer.
132
- @cs = self.class.lex_en_line_begin
133
-
134
- @cond = StackState.new('cond')
135
- @cmdarg = StackState.new('cmdarg')
136
- @cond_stack = []
137
- @cmdarg_stack = []
138
- end
139
-
140
- @force_utf32 = false # Set to true by some tests
141
-
142
- @source_pts = nil # @source as a codepoint array
143
-
144
- @p = 0 # stream position (saved manually in #advance)
145
- @ts = nil # token start
146
- @te = nil # token end
147
- @act = 0 # next action
148
-
149
- @stack = [] # state stack
150
- @top = 0 # state stack top pointer
151
-
152
- # Lexer state:
153
- @token_queue = []
154
-
155
- @eq_begin_s = nil # location of last encountered =begin
156
- @sharp_s = nil # location of last encountered #
157
-
158
- @newline_s = nil # location of last encountered newline
159
-
160
- @num_base = nil # last numeric base
161
- @num_digits_s = nil # starting position of numeric digits
162
- @num_suffix_s = nil # starting position of numeric suffix
163
- @num_xfrm = nil # numeric suffix-induced transformation
164
-
165
- # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
166
- # encountered after a matching closing parenthesis.
167
- @paren_nest = 0
168
- @lambda_stack = []
169
-
170
- # If the lexer is in `command state' (aka expr_value)
171
- # at the entry to #advance, it will transition to expr_cmdarg
172
- # instead of expr_arg at certain points.
173
- @command_start = true
174
-
175
- # State before =begin / =end block comment
176
- @cs_before_block_comment = self.class.lex_en_line_begin
177
-
178
- @strings = Parser::LexerStrings.new(self, @version)
179
- end
180
-
181
- def source_buffer=(source_buffer)
182
- @source_buffer = source_buffer
183
-
184
- if @source_buffer
185
- source = @source_buffer.source
186
-
187
- if source.encoding == Encoding::UTF_8
188
- @source_pts = source.unpack('U*')
189
- else
190
- @source_pts = source.unpack('C*')
191
- end
192
-
193
- if @source_pts[0] == 0xfeff
194
- # Skip byte order mark.
195
- @p = 1
196
- end
197
- else
198
- @source_pts = nil
199
- end
200
-
201
- @strings.source_buffer = @source_buffer
202
- @strings.source_pts = @source_pts
203
- end
204
-
205
- def encoding
206
- @source_buffer.source.encoding
207
- end
208
-
209
- LEX_STATES = {
210
- :line_begin => lex_en_line_begin,
211
- :expr_dot => lex_en_expr_dot,
212
- :expr_fname => lex_en_expr_fname,
213
- :expr_value => lex_en_expr_value,
214
- :expr_beg => lex_en_expr_beg,
215
- :expr_mid => lex_en_expr_mid,
216
- :expr_arg => lex_en_expr_arg,
217
- :expr_cmdarg => lex_en_expr_cmdarg,
218
- :expr_end => lex_en_expr_end,
219
- :expr_endarg => lex_en_expr_endarg,
220
- :expr_endfn => lex_en_expr_endfn,
221
- :expr_labelarg => lex_en_expr_labelarg,
222
-
223
- :inside_string => lex_en_inside_string
224
- }
225
-
226
- def state
227
- LEX_STATES.invert.fetch(@cs, @cs)
228
- end
229
-
230
- def state=(state)
231
- @cs = LEX_STATES.fetch(state)
232
- end
233
-
234
- def push_cmdarg
235
- @cmdarg_stack.push(@cmdarg)
236
- @cmdarg = StackState.new("cmdarg.#{@cmdarg_stack.count}")
237
- end
238
-
239
- def pop_cmdarg
240
- @cmdarg = @cmdarg_stack.pop
241
- end
242
-
243
- def push_cond
244
- @cond_stack.push(@cond)
245
- @cond = StackState.new("cond.#{@cond_stack.count}")
246
- end
247
-
248
- def pop_cond
249
- @cond = @cond_stack.pop
250
- end
251
-
252
- def dedent_level
253
- @strings.dedent_level
254
- end
255
-
256
- # Return next token: [type, value].
257
- def advance
258
- unless @token_queue.empty?
259
- return @token_queue.shift
260
- end
261
-
262
- # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
263
- klass = self.class
264
- _lex_trans_keys = klass.send :_lex_trans_keys
265
- _lex_key_spans = klass.send :_lex_key_spans
266
- _lex_index_offsets = klass.send :_lex_index_offsets
267
- _lex_indicies = klass.send :_lex_indicies
268
- _lex_trans_targs = klass.send :_lex_trans_targs
269
- _lex_trans_actions = klass.send :_lex_trans_actions
270
- _lex_to_state_actions = klass.send :_lex_to_state_actions
271
- _lex_from_state_actions = klass.send :_lex_from_state_actions
272
- _lex_eof_trans = klass.send :_lex_eof_trans
273
- _lex_actions = @_lex_actions
274
-
275
- pe = @source_pts.size + 2
276
- p, eof = @p, pe
277
-
278
- cmd_state = @command_start
279
- @command_start = false
280
-
281
- %% write exec;
282
- # %
283
-
284
- # Ragel creates a local variable called `testEof` but it doesn't use
285
- # it in any assignment. This dead code is here to swallow the warning.
286
- # It has no runtime cost because Ruby doesn't produce any instructions from it.
287
- if false
288
- testEof
289
- end
290
-
291
- @p = p
292
-
293
- if @token_queue.any?
294
- @token_queue.shift
295
- elsif @cs == klass.lex_error
296
- [ false, [ '$error'.freeze, range(p - 1, p) ] ]
297
- else
298
- eof = @source_pts.size
299
- [ false, [ '$eof'.freeze, range(eof, eof) ] ]
300
- end
301
- end
302
-
303
- protected
304
-
305
- def version?(*versions)
306
- versions.include?(@version)
307
- end
308
-
309
- def stack_pop
310
- @top -= 1
311
- @stack[@top]
312
- end
313
-
314
- def tok(s = @ts, e = @te)
315
- @source_buffer.slice(s, e - s)
316
- end
317
-
318
- def range(s = @ts, e = @te)
319
- Parser::Source::Range.new(@source_buffer, s, e)
320
- end
321
-
322
- def emit(type, value = tok, s = @ts, e = @te)
323
- token = [ type, [ value, range(s, e) ] ]
324
-
325
- @token_queue.push(token)
326
-
327
- @tokens.push(token) if @tokens
328
-
329
- token
330
- end
331
-
332
- def emit_table(table, s = @ts, e = @te)
333
- value = tok(s, e)
334
-
335
- emit(table[value], value, s, e)
336
- end
337
-
338
- def emit_do(do_block=false)
339
- if @cond.active?
340
- emit(:kDO_COND, 'do'.freeze)
341
- elsif @cmdarg.active? || do_block
342
- emit(:kDO_BLOCK, 'do'.freeze)
343
- else
344
- emit(:kDO, 'do'.freeze)
345
- end
346
- end
347
-
348
- def arg_or_cmdarg(cmd_state)
349
- if cmd_state
350
- self.class.lex_en_expr_cmdarg
351
- else
352
- self.class.lex_en_expr_arg
353
- end
354
- end
355
-
356
- def emit_comment(s = @ts, e = @te)
357
- if @comments
358
- @comments.push(Parser::Source::Comment.new(range(s, e)))
359
- end
360
-
361
- if @tokens
362
- @tokens.push([ :tCOMMENT, [ tok(s, e), range(s, e) ] ])
363
- end
364
-
365
- nil
366
- end
367
-
368
- def emit_comment_from_range(p, pe)
369
- emit_comment(@sharp_s, p == pe ? p - 2 : p)
370
- end
371
-
372
- def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
373
- @diagnostics.process(
374
- Parser::Diagnostic.new(type, reason, arguments, location, highlights))
375
- end
376
-
377
-
378
- def e_lbrace
379
- @cond.push(false); @cmdarg.push(false)
380
-
381
- current_literal = @strings.literal
382
- if current_literal
383
- current_literal.start_interp_brace
384
- end
385
- end
386
-
387
- def numeric_literal_int
388
- digits = tok(@num_digits_s, @num_suffix_s)
389
-
390
- if digits.end_with? '_'.freeze
391
- diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
392
- range(@te - 1, @te)
393
- elsif digits.empty? && @num_base == 8 && version?(18)
394
- # 1.8 did not raise an error on 0o.
395
- digits = '0'.freeze
396
- elsif digits.empty?
397
- diagnostic :error, :empty_numeric
398
- elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
399
- invalid_s = @num_digits_s + invalid_idx
400
- diagnostic :error, :invalid_octal, nil,
401
- range(invalid_s, invalid_s + 1)
402
- end
403
- digits
404
- end
405
-
406
- def on_newline(p)
407
- @strings.on_newline(p)
408
- end
409
-
410
- def check_ambiguous_slash(tm)
411
- if tok(tm, tm + 1) == '/'.freeze
412
- # Ambiguous regexp literal.
413
- if @version < 30
414
- diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
415
- else
416
- diagnostic :warning, :ambiguous_regexp, nil, range(tm, tm + 1)
417
- end
418
- end
419
- end
420
-
421
- def emit_global_var(ts = @ts, te = @te)
422
- if tok(ts, te) =~ /^\$([1-9][0-9]*)$/
423
- emit(:tNTH_REF, tok(ts + 1, te).to_i, ts, te)
424
- elsif tok =~ /^\$([&`'+])$/
425
- emit(:tBACK_REF, tok(ts, te), ts, te)
426
- else
427
- emit(:tGVAR, tok(ts, te), ts, te)
428
- end
429
- end
430
-
431
- def emit_class_var(ts = @ts, te = @te)
432
- if tok(ts, te) =~ /^@@[0-9]/
433
- diagnostic :error, :cvar_name, { :name => tok(ts, te) }
434
- end
435
-
436
- emit(:tCVAR, tok(ts, te), ts, te)
437
- end
438
-
439
- def emit_instance_var(ts = @ts, te = @te)
440
- if tok(ts, te) =~ /^@[0-9]/
441
- diagnostic :error, :ivar_name, { :name => tok(ts, te) }
442
- end
443
-
444
- emit(:tIVAR, tok(ts, te), ts, te)
445
- end
446
-
447
- def emit_rbrace_rparen_rbrack
448
- emit_table(PUNCTUATION)
449
-
450
- if @version < 24
451
- @cond.lexpop
452
- @cmdarg.lexpop
453
- else
454
- @cond.pop
455
- @cmdarg.pop
456
- end
457
- end
458
-
459
- def emit_colon_with_digits(p, tm, diag_msg)
460
- if @version >= 27
461
- diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
462
- else
463
- emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
464
- p = @ts
465
- end
466
- p
467
- end
468
-
469
- def emit_singleton_class
470
- emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
471
- emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
472
- end
473
-
474
- # Mapping of strings to parser tokens.
475
-
476
- PUNCTUATION = {
477
- '=' => :tEQL, '&' => :tAMPER2, '|' => :tPIPE,
478
- '!' => :tBANG, '^' => :tCARET, '+' => :tPLUS,
479
- '-' => :tMINUS, '*' => :tSTAR2, '/' => :tDIVIDE,
480
- '%' => :tPERCENT, '~' => :tTILDE, ',' => :tCOMMA,
481
- ';' => :tSEMI, '.' => :tDOT, '..' => :tDOT2,
482
- '...' => :tDOT3, '[' => :tLBRACK2, ']' => :tRBRACK,
483
- '(' => :tLPAREN2, ')' => :tRPAREN, '?' => :tEH,
484
- ':' => :tCOLON, '&&' => :tANDOP, '||' => :tOROP,
485
- '-@' => :tUMINUS, '+@' => :tUPLUS, '~@' => :tTILDE,
486
- '**' => :tPOW, '->' => :tLAMBDA, '=~' => :tMATCH,
487
- '!~' => :tNMATCH, '==' => :tEQ, '!=' => :tNEQ,
488
- '>' => :tGT, '>>' => :tRSHFT, '>=' => :tGEQ,
489
- '<' => :tLT, '<<' => :tLSHFT, '<=' => :tLEQ,
490
- '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
491
- '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
492
- '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
493
- '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF,
494
- }
495
-
496
- PUNCTUATION_BEGIN = {
497
- '&' => :tAMPER, '*' => :tSTAR, '**' => :tDSTAR,
498
- '+' => :tUPLUS, '-' => :tUMINUS, '::' => :tCOLON3,
499
- '(' => :tLPAREN, '{' => :tLBRACE, '[' => :tLBRACK,
500
- }
501
-
502
- KEYWORDS = {
503
- 'if' => :kIF_MOD, 'unless' => :kUNLESS_MOD,
504
- 'while' => :kWHILE_MOD, 'until' => :kUNTIL_MOD,
505
- 'rescue' => :kRESCUE_MOD, 'defined?' => :kDEFINED,
506
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
507
- }
508
-
509
- KEYWORDS_BEGIN = {
510
- 'if' => :kIF, 'unless' => :kUNLESS,
511
- 'while' => :kWHILE, 'until' => :kUNTIL,
512
- 'rescue' => :kRESCUE, 'defined?' => :kDEFINED,
513
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
514
- }
515
-
516
- ESCAPE_WHITESPACE = {
517
- " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
518
- "\v" => '\v', "\f" => '\f'
519
- }
520
-
521
- %w(class module def undef begin end then elsif else ensure case when
522
- for break next redo retry in do return yield super self nil true
523
- false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
524
- KEYWORDS_BEGIN[keyword] = KEYWORDS[keyword] = :"k#{keyword.upcase}"
525
- end
526
-
527
- %%{
528
- # %
529
-
530
- access @;
531
- getkey (@source_pts[p] || 0);
532
-
533
- # === CHARACTER CLASSES ===
534
- #
535
- # Pay close attention to the differences between c_any and any.
536
- # c_any does not include EOF and so will cause incorrect behavior
537
- # for machine subtraction (any-except rules) and default transitions
538
- # for scanners.
539
-
540
- action do_nl {
541
- # Record position of a newline for precise location reporting on tNL
542
- # tokens.
543
- #
544
- # This action is embedded directly into c_nl, as it is idempotent and
545
- # there are no cases when we need to skip it.
546
- @newline_s = p
547
- }
548
-
549
- c_nl = '\n' $ do_nl;
550
- c_space = [ \t\r\f\v];
551
- c_space_nl = c_space | c_nl;
552
-
553
- c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
554
- c_eol = c_nl | c_eof;
555
- c_any = any - c_eof;
556
-
557
- c_nl_zlen = c_nl | zlen;
558
- c_line = any - c_nl_zlen;
559
-
560
- c_ascii = 0x00..0x7f;
561
- c_unicode = c_any - c_ascii;
562
- c_upper = [A-Z];
563
- c_lower = [a-z_] | c_unicode;
564
- c_alpha = c_lower | c_upper;
565
- c_alnum = c_alpha | [0-9];
566
-
567
- action do_eof {
568
- # Sit at EOF indefinitely. #advance would return $eof each time.
569
- # This allows to feed the lexer more data if needed; this is only used
570
- # in tests.
571
- #
572
- # Note that this action is not embedded into e_eof like e_nl and e_bs
573
- # below. This is due to the fact that scanner state at EOF is observed
574
- # by tests, and encapsulating it in a rule would break the introspection.
575
- fhold; fbreak;
576
- }
577
-
578
- #
579
- # === TOKEN DEFINITIONS ===
580
- #
581
-
582
- # All operators are punctuation. There is more to punctuation
583
- # than just operators. Operators can be overridden by user;
584
- # punctuation can not.
585
-
586
- # A list of operators which are valid in the function name context, but
587
- # have different semantics in others.
588
- operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
589
-
590
- # A list of operators which can occur within an assignment shortcut (+ → +=).
591
- operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
592
- '*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
593
-
594
- # A list of all user-definable operators not covered by groups above.
595
- operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
596
- '<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
597
-
598
- # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
599
- # as they are ambiguous with interpolation `#{}` and should be counted.
600
- # These braces are not present in punctuation lists.
601
-
602
- # A list of punctuation which has different meaning when used at the
603
- # beginning of expression.
604
- punctuation_begin = '-' | '+' | '::' | '(' | '[' |
605
- '*' | '**' | '&' ;
606
-
607
- # A list of all punctuation except punctuation_begin.
608
- punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
609
- '::' | '?' | ':' | '.' | '..' | '...' ;
610
-
611
- # A list of keywords which have different meaning at the beginning of expression.
612
- keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
613
-
614
- # A list of keywords which accept an argument-like expression, i.e. have the
615
- # same post-processing as method calls or commands. Example: `yield 1`,
616
- # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
617
- keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
618
-
619
- # A list of keywords which accept a literal function name as an argument.
620
- keyword_with_fname = 'def' | 'undef' | 'alias' ;
621
-
622
- # A list of keywords which accept an expression after them.
623
- keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
624
- 'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
625
- 'and' | 'or' ;
626
-
627
- # A list of keywords which accept a value, and treat the keywords from
628
- # `keyword_modifier` list as modifiers.
629
- keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
630
-
631
- # A list of keywords which do not accept an expression after them.
632
- keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
633
- 'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
634
- '__LINE__' | '__ENCODING__';
635
-
636
- # All keywords.
637
- keyword = keyword_with_value | keyword_with_mid |
638
- keyword_with_end | keyword_with_arg |
639
- keyword_with_fname | keyword_modifier ;
640
-
641
- constant = c_upper c_alnum*;
642
- bareword = c_alpha c_alnum*;
643
-
644
- call_or_var = c_lower c_alnum*;
645
- class_var = '@@' bareword;
646
- instance_var = '@' bareword;
647
- global_var = '$'
648
- ( bareword | digit+
649
- | [`'+~*$&?!@/\\;,.=:<>"] # `
650
- | '-' c_alnum
651
- )
652
- ;
653
-
654
- # Ruby accepts (and fails on) variables with leading digit
655
- # in literal context, but not in unquoted symbol body.
656
- class_var_v = '@@' c_alnum+;
657
- instance_var_v = '@' c_alnum+;
658
-
659
- label = bareword [?!]? ':';
660
-
661
- #
662
- # === NUMERIC PARSING ===
663
- #
664
-
665
- int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
666
- int_dec = ( digit+ '_' )* digit* '_'? ;
667
- int_bin = ( [01]+ '_' )* [01]* '_'? ;
668
-
669
- flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
670
- flo_frac = '.' ( digit+ '_' )* digit+;
671
- flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
672
-
673
- int_suffix =
674
- '' % { @num_xfrm = @emit_integer }
675
- | 'r' % { @num_xfrm = @emit_rational }
676
- | 'i' % { @num_xfrm = @emit_imaginary }
677
- | 'ri' % { @num_xfrm = @emit_imaginary_rational }
678
- | 're' % { @num_xfrm = @emit_integer_re }
679
- | 'if' % { @num_xfrm = @emit_integer_if }
680
- | 'rescue' % { @num_xfrm = @emit_integer_rescue };
681
-
682
- flo_pow_suffix =
683
- '' % { @num_xfrm = @emit_float }
684
- | 'i' % { @num_xfrm = @emit_imaginary_float }
685
- | 'if' % { @num_xfrm = @emit_float_if };
686
-
687
- flo_suffix =
688
- flo_pow_suffix
689
- | 'r' % { @num_xfrm = @emit_rational }
690
- | 'ri' % { @num_xfrm = @emit_imaginary_rational }
691
- | 'rescue' % { @num_xfrm = @emit_float_rescue };
692
-
693
- #
694
- # === INTERPOLATION PARSING ===
695
- #
696
-
697
- e_lbrace = '{' % {
698
- e_lbrace
699
- };
700
-
701
- e_rbrace = '}' % {
702
- if @strings.close_interp_on_current_literal(p)
703
- fhold;
704
- fnext inside_string;
705
- fbreak;
706
- end
707
-
708
- @paren_nest -= 1
709
- };
710
-
711
- #
712
- # === WHITESPACE HANDLING ===
713
- #
714
-
715
- # Various contexts in Ruby allow various kinds of whitespace
716
- # to be used. They are grouped to clarify the lexing machines
717
- # and ease collection of comments.
718
-
719
- # A line of code with inline #comment at end is always equivalent
720
- # to a line of code ending with just a newline, so an inline
721
- # comment is deemed equivalent to non-newline whitespace
722
- # (c_space character class).
723
-
724
- e_nl = c_nl % {
725
- p = on_newline(p)
726
- };
727
-
728
- w_space =
729
- c_space+
730
- | '\\' e_nl
731
- ;
732
-
733
- w_comment =
734
- '#' %{ @sharp_s = p - 1 }
735
- # The (p == pe) condition compensates for added "\0" and
736
- # the way Ragel handles EOF.
737
- c_line* %{ emit_comment_from_range(p, pe) }
738
- ;
739
-
740
- w_space_comment =
741
- w_space
742
- | w_comment
743
- ;
744
-
745
- # A newline in non-literal context always interoperates with
746
- # here document logic and can always be escaped by a backslash,
747
- # still interoperating with here document logic in the same way,
748
- # yet being invisible to anything else.
749
- #
750
- # To demonstrate:
751
- #
752
- # foo = <<FOO \
753
- # bar
754
- # FOO
755
- # + 2
756
- #
757
- # is equivalent to `foo = "bar\n" + 2`.
758
-
759
- w_newline =
760
- e_nl;
761
-
762
- w_any =
763
- w_space
764
- | w_comment
765
- | w_newline
766
- ;
767
-
768
-
769
- #
770
- # === EXPRESSION PARSING ===
771
- #
772
-
773
- # These rules implement a form of manually defined lookahead.
774
- # The default longest-match scanning does not work here due
775
- # to sheer ambiguity.
776
-
777
- ambiguous_fid_suffix = # actual parsed
778
- [?!] %{ tm = p } | # a? a?
779
- [?!]'=' %{ tm = p - 2 } # a!=b a != b
780
- ;
781
-
782
- ambiguous_ident_suffix = # actual parsed
783
- ambiguous_fid_suffix |
784
- '=' %{ tm = p } | # a= a=
785
- '==' %{ tm = p - 2 } | # a==b a == b
786
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
787
- '=>' %{ tm = p - 2 } | # a=>b a => b
788
- '===' %{ tm = p - 3 } # a===b a === b
789
- ;
790
-
791
- ambiguous_symbol_suffix = # actual parsed
792
- ambiguous_ident_suffix |
793
- '==>' %{ tm = p - 2 } # :a==>b :a= => b
794
- ;
795
-
796
- # Ambiguous with 1.9 hash labels.
797
- ambiguous_const_suffix = # actual parsed
798
- '::' %{ tm = p - 2 } # A::B A :: B
799
- ;
800
-
801
- # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
802
- # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
803
-
804
- e_lbrack = '[' % {
805
- @cond.push(false); @cmdarg.push(false)
806
-
807
- @paren_nest += 1
808
- };
809
-
810
- e_rbrack = ']' % {
811
- @paren_nest -= 1
812
- };
813
-
814
- # Ruby 1.9 lambdas require parentheses counting in order to
815
- # emit correct opening kDO/tLBRACE.
816
-
817
- e_lparen = '(' % {
818
- @cond.push(false); @cmdarg.push(false)
819
-
820
- @paren_nest += 1
821
-
822
- if version?(18)
823
- @command_start = true
824
- end
825
- };
826
-
827
- e_rparen = ')' % {
828
- @paren_nest -= 1
829
- };
830
-
831
- # Ruby is context-sensitive wrt/ local identifiers.
832
- action local_ident {
833
- emit(:tIDENTIFIER)
834
-
835
- if !@static_env.nil? && @static_env.declared?(tok)
836
- fnext expr_endfn; fbreak;
837
- else
838
- fnext *arg_or_cmdarg(cmd_state); fbreak;
839
- end
840
- }
841
-
842
- # Variable lexing code is accessed from both expressions and
843
- # string interpolation related code.
844
- #
845
- expr_variable := |*
846
- global_var
847
- => {
848
- emit_global_var
849
-
850
- fnext *stack_pop; fbreak;
851
- };
852
-
853
- class_var_v
854
- => {
855
- emit_class_var
856
-
857
- fnext *stack_pop; fbreak;
858
- };
859
-
860
- instance_var_v
861
- => {
862
- emit_instance_var
863
-
864
- fnext *stack_pop; fbreak;
865
- };
866
- *|;
867
-
868
- # Literal function name in definition (e.g. `def class`).
869
- # Keywords are returned as their respective tokens; this is used
870
- # to support singleton def `def self.foo`. Global variables are
871
- # returned as `tGVAR`; this is used in global variable alias
872
- # statements `alias $a $b`. Symbols are returned verbatim; this
873
- # is used in `alias :a :"b#{foo}"` and `undef :a`.
874
- #
875
- # Transitions to `expr_endfn` afterwards.
876
- #
877
- expr_fname := |*
878
- keyword
879
- => { emit_table(KEYWORDS_BEGIN);
880
- fnext expr_endfn; fbreak; };
881
-
882
- constant
883
- => { emit(:tCONSTANT)
884
- fnext expr_endfn; fbreak; };
885
-
886
- bareword [?=!]?
887
- => { emit(:tIDENTIFIER)
888
- fnext expr_endfn; fbreak; };
889
-
890
- global_var
891
- => { p = @ts - 1
892
- fnext expr_end; fcall expr_variable; };
893
-
894
- # If the handling was to be delegated to expr_end,
895
- # these cases would transition to something else than
896
- # expr_endfn, which is incorrect.
897
- operator_fname |
898
- operator_arithmetic |
899
- operator_rest
900
- => { emit_table(PUNCTUATION)
901
- fnext expr_endfn; fbreak; };
902
-
903
- '::'
904
- => { fhold; fhold; fgoto expr_end; };
905
-
906
- ':'
907
- => { fhold; fgoto expr_beg; };
908
-
909
- '%s' (c_ascii - [A-Za-z0-9])
910
- => {
911
- if version?(23)
912
- type, delimiter = tok[0..-2], tok[-1].chr
913
- @strings.push_literal(type, delimiter, @ts)
914
- fgoto inside_string;
915
- else
916
- p = @ts - 1
917
- fgoto expr_end;
918
- end
919
- };
920
-
921
- w_any;
922
-
923
- c_any
924
- => { fhold; fgoto expr_end; };
925
-
926
- c_eof => do_eof;
927
- *|;
928
-
929
- # After literal function name in definition. Behaves like `expr_end`,
930
- # but allows a tLABEL.
931
- #
932
- # Transitions to `expr_end` afterwards.
933
- #
934
- expr_endfn := |*
935
- label ( any - ':' )
936
- => { emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
937
- fhold; fnext expr_labelarg; fbreak; };
938
-
939
- '...'
940
- => {
941
- if @version >= 31 && @context.in_argdef
942
- emit(:tBDOT3, '...'.freeze)
943
- # emit(:tNL, "\n".freeze, @te - 1, @te)
944
- fnext expr_end; fbreak;
945
- else
946
- p -= 3;
947
- fgoto expr_end;
948
- end
949
- };
950
-
951
- w_space_comment;
952
-
953
- c_any
954
- => { fhold; fgoto expr_end; };
955
-
956
- c_eof => do_eof;
957
- *|;
958
-
959
- # Literal function name in method call (e.g. `a.class`).
960
- #
961
- # Transitions to `expr_arg` afterwards.
962
- #
963
- expr_dot := |*
964
- constant
965
- => { emit(:tCONSTANT)
966
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
967
-
968
- call_or_var
969
- => { emit(:tIDENTIFIER)
970
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
971
-
972
- bareword ambiguous_fid_suffix
973
- => { emit(:tFID, tok(@ts, tm), @ts, tm)
974
- fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
975
-
976
- # See the comment in `expr_fname`.
977
- operator_fname |
978
- operator_arithmetic |
979
- operator_rest
980
- => { emit_table(PUNCTUATION)
981
- fnext expr_arg; fbreak; };
982
-
983
- w_any;
984
-
985
- c_any
986
- => { fhold; fgoto expr_end; };
987
-
988
- c_eof => do_eof;
989
- *|;
990
-
991
- # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
992
- # is consumed; the current expression is a command or method call.
993
- #
994
- expr_arg := |*
995
- #
996
- # COMMAND MODE SPECIFIC TOKENS
997
- #
998
-
999
- # cmd (1 + 2)
1000
- # See below the rationale about expr_endarg.
1001
- w_space+ e_lparen
1002
- => {
1003
- if version?(18)
1004
- emit(:tLPAREN2, '('.freeze, @te - 1, @te)
1005
- fnext expr_value; fbreak;
1006
- else
1007
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1008
- fnext expr_beg; fbreak;
1009
- end
1010
- };
1011
-
1012
- # meth(1 + 2)
1013
- # Regular method call.
1014
- e_lparen
1015
- => { emit(:tLPAREN2, '('.freeze)
1016
- fnext expr_beg; fbreak; };
1017
-
1018
- # meth [...]
1019
- # Array argument. Compare with indexing `meth[...]`.
1020
- w_space+ e_lbrack
1021
- => { emit(:tLBRACK, '['.freeze, @te - 1, @te)
1022
- fnext expr_beg; fbreak; };
1023
-
1024
- # cmd {}
1025
- # Command: method call without parentheses.
1026
- w_space* e_lbrace
1027
- => {
1028
- if @lambda_stack.last == @paren_nest
1029
- @lambda_stack.pop
1030
- emit(:tLAMBEG, '{'.freeze, @te - 1, @te)
1031
- else
1032
- emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1033
- end
1034
- @command_start = true
1035
- @paren_nest += 1
1036
- fnext expr_value; fbreak;
1037
- };
1038
-
1039
- #
1040
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1041
- #
1042
-
1043
- # a??
1044
- # Ternary operator
1045
- '?' c_space_nl
1046
- => {
1047
- # Unlike expr_beg as invoked in the next rule, do not warn
1048
- p = @ts - 1
1049
- fgoto expr_end;
1050
- };
1051
-
1052
- # a ?b, a? ?
1053
- # Character literal or ternary operator
1054
- w_space* '?'
1055
- => { fhold; fgoto expr_beg; };
1056
-
1057
- # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1058
- # a /foo/ (but not "a / foo" or "a /=foo")
1059
- # a <<HEREDOC
1060
- w_space+ %{ tm = p }
1061
- ( [%/] ( c_any - c_space_nl - '=' ) # /
1062
- | '<<'
1063
- )
1064
- => {
1065
- check_ambiguous_slash(tm)
1066
-
1067
- p = tm - 1
1068
- fgoto expr_beg;
1069
- };
1070
-
1071
- # x *1
1072
- # Ambiguous splat, kwsplat or block-pass.
1073
- w_space+ %{ tm = p } ( '+' | '-' | '*' | '&' | '**' )
1074
- => {
1075
- diagnostic :warning, :ambiguous_prefix, { :prefix => tok(tm, @te) },
1076
- range(tm, @te)
1077
-
1078
- p = tm - 1
1079
- fgoto expr_beg;
1080
- };
1081
-
1082
- # x ::Foo
1083
- # Ambiguous toplevel constant access.
1084
- w_space+ '::'
1085
- => { fhold; fhold; fgoto expr_beg; };
1086
-
1087
- # x:b
1088
- # Symbol.
1089
- w_space* ':'
1090
- => { fhold; fgoto expr_beg; };
1091
-
1092
- w_space+ label
1093
- => { p = @ts - 1; fgoto expr_beg; };
1094
-
1095
- #
1096
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1097
- #
1098
-
1099
- # a ? b
1100
- # Ternary operator.
1101
- w_space+ %{ tm = p } '?' c_space_nl
1102
- => { p = tm - 1; fgoto expr_end; };
1103
-
1104
- # x + 1: Binary operator or operator-assignment.
1105
- w_space* operator_arithmetic
1106
- ( '=' | c_space_nl )? |
1107
- # x rescue y: Modifier keyword.
1108
- w_space* keyword_modifier |
1109
- # a &. b: Safe navigation operator.
1110
- w_space* '&.' |
1111
- # Miscellanea.
1112
- w_space* punctuation_end
1113
- => {
1114
- p = @ts - 1
1115
- fgoto expr_end;
1116
- };
1117
-
1118
- w_space;
1119
-
1120
- w_comment
1121
- => { fgoto expr_end; };
1122
-
1123
- w_newline
1124
- => { fhold; fgoto expr_end; };
1125
-
1126
- c_any
1127
- => { fhold; fgoto expr_beg; };
1128
-
1129
- c_eof => do_eof;
1130
- *|;
1131
-
1132
- # The previous token was an identifier which was seen while in the
1133
- # command mode (that is, the state at the beginning of #advance was
1134
- # expr_value). This state is very similar to expr_arg, but disambiguates
1135
- # two very rare and specific condition:
1136
- # * In 1.8 mode, "foo (lambda do end)".
1137
- # * In 1.9+ mode, "f x: -> do foo do end end".
1138
- expr_cmdarg := |*
1139
- w_space+ e_lparen
1140
- => {
1141
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1142
- if version?(18)
1143
- fnext expr_value; fbreak;
1144
- else
1145
- fnext expr_beg; fbreak;
1146
- end
1147
- };
1148
-
1149
- w_space* 'do'
1150
- => {
1151
- if @cond.active?
1152
- emit(:kDO_COND, 'do'.freeze, @te - 2, @te)
1153
- else
1154
- emit(:kDO, 'do'.freeze, @te - 2, @te)
1155
- end
1156
- fnext expr_value; fbreak;
1157
- };
1158
-
1159
- c_any |
1160
- # Disambiguate with the `do' rule above.
1161
- w_space* bareword |
1162
- w_space* label
1163
- => { p = @ts - 1
1164
- fgoto expr_arg; };
1165
-
1166
- c_eof => do_eof;
1167
- *|;
1168
-
1169
- # The rationale for this state is pretty complex. Normally, if an argument
1170
- # is passed to a command and then there is a block (tLCURLY...tRCURLY),
1171
- # the block is attached to the innermost argument (`f` in `m f {}`), or it
1172
- # is a parse error (`m 1 {}`). But there is a special case for passing a single
1173
- # primary expression grouped with parentheses: if you write `m (1) {}` or
1174
- # (2.0 only) `m () {}`, then the block is attached to `m`.
1175
- #
1176
- # Thus, we recognize the opening `(` of a command (remember, a command is
1177
- # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
1178
- # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
1179
- # lexer's state to `expr_endarg`, which makes it emit the possibly following
1180
- # `{` as `tLBRACE_ARG`.
1181
- #
1182
- # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1183
- # `do` (as `kDO_BLOCK` in `expr_beg`).
1184
- expr_endarg := |*
1185
- e_lbrace
1186
- => {
1187
- if @lambda_stack.last == @paren_nest
1188
- @lambda_stack.pop
1189
- emit(:tLAMBEG, '{'.freeze)
1190
- else
1191
- emit(:tLBRACE_ARG, '{'.freeze)
1192
- end
1193
- @paren_nest += 1
1194
- @command_start = true
1195
- fnext expr_value; fbreak;
1196
- };
1197
-
1198
- 'do'
1199
- => { emit_do(true)
1200
- fnext expr_value; fbreak; };
1201
-
1202
- w_space_comment;
1203
-
1204
- c_any
1205
- => { fhold; fgoto expr_end; };
1206
-
1207
- c_eof => do_eof;
1208
- *|;
1209
-
1210
- # The rationale for this state is that several keywords accept value
1211
- # (i.e. should transition to `expr_beg`), do not accept it like a command
1212
- # (i.e. not an `expr_arg`), and must behave like a statement, that is,
1213
- # accept a modifier if/while/etc.
1214
- #
1215
- expr_mid := |*
1216
- keyword_modifier
1217
- => { emit_table(KEYWORDS)
1218
- fnext expr_beg; fbreak; };
1219
-
1220
- bareword
1221
- => { p = @ts - 1; fgoto expr_beg; };
1222
-
1223
- w_space_comment;
1224
-
1225
- w_newline
1226
- => { fhold; fgoto expr_end; };
1227
-
1228
- c_any
1229
- => { fhold; fgoto expr_beg; };
1230
-
1231
- c_eof => do_eof;
1232
- *|;
1233
-
1234
- # Beginning of an expression.
1235
- #
1236
- # Don't fallthrough to this state from `c_any`; make sure to handle
1237
- # `c_space* c_nl` and let `expr_end` handle the newline.
1238
- # Otherwise code like `f\ndef x` gets glued together and the parser
1239
- # explodes.
1240
- #
1241
- expr_beg := |*
1242
- # +5, -5, - 5
1243
- [+\-] w_any* [0-9]
1244
- => {
1245
- emit(:tUNARY_NUM, tok(@ts, @ts + 1), @ts, @ts + 1)
1246
- fhold; fnext expr_end; fbreak;
1247
- };
1248
-
1249
- # splat *a
1250
- '*'
1251
- => { emit(:tSTAR, '*'.freeze)
1252
- fbreak; };
1253
-
1254
- #
1255
- # STRING AND REGEXP LITERALS
1256
- #
1257
-
1258
- # /regexp/oui
1259
- # /=/ (disambiguation with /=)
1260
- '/' c_any
1261
- => {
1262
- type = delimiter = tok[0].chr
1263
- @strings.push_literal(type, delimiter, @ts)
1264
-
1265
- fhold;
1266
- fgoto inside_string;
1267
- };
1268
-
1269
- # %<string>
1270
- '%' ( c_ascii - [A-Za-z0-9] )
1271
- => {
1272
- type, delimiter = @source_buffer.slice(@ts, 1).chr, tok[-1].chr
1273
- @strings.push_literal(type, delimiter, @ts)
1274
- fgoto inside_string;
1275
- };
1276
-
1277
- # %w(we are the people)
1278
- '%' [A-Za-z] (c_ascii - [A-Za-z0-9])
1279
- => {
1280
- type, delimiter = tok[0..-2], tok[-1].chr
1281
- @strings.push_literal(type, delimiter, @ts)
1282
- fgoto inside_string;
1283
- };
1284
-
1285
- '%' c_eof
1286
- => {
1287
- diagnostic :fatal, :string_eof, nil, range(@ts, @ts + 1)
1288
- };
1289
-
1290
- # Heredoc start.
1291
- # <<END | <<'END' | <<"END" | <<`END` |
1292
- # <<-END | <<-'END' | <<-"END" | <<-`END` |
1293
- # <<~END | <<~'END' | <<~"END" | <<~`END`
1294
- '<<' [~\-]?
1295
- ( '"' ( any - '"' )* '"'
1296
- | "'" ( any - "'" )* "'"
1297
- | "`" ( any - "`" )* "`"
1298
- | bareword ) % { heredoc_e = p }
1299
- c_line* c_nl % { new_herebody_s = p }
1300
- => {
1301
- tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/m
1302
-
1303
- indent = !$1.empty? || !$2.empty?
1304
- dedent_body = !$2.empty?
1305
- type = $3.empty? ? '<<"'.freeze : ('<<'.freeze + $3)
1306
- delimiter = $4
1307
-
1308
- if @version >= 27
1309
- if delimiter.count("\n") > 0 || delimiter.count("\r") > 0
1310
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1311
- end
1312
- elsif @version >= 24
1313
- if delimiter.count("\n") > 0
1314
- if delimiter.end_with?("\n")
1315
- diagnostic :warning, :heredoc_id_ends_with_nl, nil, range(@ts, @ts + 1)
1316
- delimiter = delimiter.rstrip
1317
- else
1318
- diagnostic :fatal, :heredoc_id_has_newline, nil, range(@ts, @ts + 1)
1319
- end
1320
- end
1321
- end
1322
-
1323
- if dedent_body && version?(18, 19, 20, 21, 22)
1324
- emit(:tLSHFT, '<<'.freeze, @ts, @ts + 2)
1325
- p = @ts + 1
1326
- fnext expr_beg; fbreak;
1327
- else
1328
- @strings.push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1329
- @strings.herebody_s ||= new_herebody_s
1330
-
1331
- p = @strings.herebody_s - 1
1332
- fnext inside_string;
1333
- end
1334
- };
1335
-
1336
- # Escaped unterminated heredoc start
1337
- # <<'END | <<"END | <<`END |
1338
- # <<-'END | <<-"END | <<-`END |
1339
- # <<~'END | <<~"END | <<~`END
1340
- #
1341
- # If the heredoc is terminated the rule above should handle it
1342
- '<<' [~\-]?
1343
- ('"' (any - c_nl - '"')*
1344
- |"'" (any - c_nl - "'")*
1345
- |"`" (any - c_nl - "`")
1346
- )
1347
- => {
1348
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1349
- };
1350
-
1351
- #
1352
- # SYMBOL LITERALS
1353
- #
1354
-
1355
- # :&&, :||
1356
- ':' ('&&' | '||') => {
1357
- fhold; fhold;
1358
- emit(:tSYMBEG, tok(@ts, @ts + 1), @ts, @ts + 1)
1359
- fgoto expr_fname;
1360
- };
1361
-
1362
- # :"bar", :'baz'
1363
- ':' ['"] # '
1364
- => {
1365
- type, delimiter = tok, tok[-1].chr
1366
- @strings.push_literal(type, delimiter, @ts);
1367
-
1368
- fgoto inside_string;
1369
- };
1370
-
1371
- # :!@ is :!
1372
- # :~@ is :~
1373
- ':' [!~] '@'
1374
- => {
1375
- emit(:tSYMBOL, tok(@ts + 1, @ts + 2))
1376
- fnext expr_end; fbreak;
1377
- };
1378
-
1379
- ':' bareword ambiguous_symbol_suffix
1380
- => {
1381
- emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1382
- p = tm - 1
1383
- fnext expr_end; fbreak;
1384
- };
1385
-
1386
- ':' ( bareword | global_var | class_var | instance_var |
1387
- operator_fname | operator_arithmetic | operator_rest )
1388
- => {
1389
- emit(:tSYMBOL, tok(@ts + 1), @ts)
1390
- fnext expr_end; fbreak;
1391
- };
1392
-
1393
- ':' ( '@' %{ tm = p - 1; diag_msg = :ivar_name }
1394
- | '@@' %{ tm = p - 2; diag_msg = :cvar_name }
1395
- ) [0-9]*
1396
- => {
1397
- emit_colon_with_digits(p, tm, diag_msg)
1398
-
1399
- fnext expr_end; fbreak;
1400
- };
1401
-
1402
- #
1403
- # AMBIGUOUS TERNARY OPERATOR
1404
- #
1405
-
1406
- # Character constant, like ?a, ?\n, ?\u1000, and so on
1407
- # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1408
- '?' c_any
1409
- => {
1410
- p, next_state = @strings.read_character_constant(@ts)
1411
- fhold; # Ragel will do `p += 1` to consume input, prevent it
1412
-
1413
- # If strings lexer founds a character constant (?a) emit it,
1414
- # otherwise read ternary operator
1415
- if @token_queue.empty?
1416
- fgoto *next_state;
1417
- else
1418
- fnext *next_state;
1419
- fbreak;
1420
- end
1421
- };
1422
-
1423
- '?' c_eof
1424
- => {
1425
- diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
1426
- };
1427
-
1428
- #
1429
- # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1430
- #
1431
-
1432
- # Ruby >= 2.7 emits it as two tPIPE terminals
1433
- # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1434
- '||'
1435
- => {
1436
- if @version >= 27
1437
- emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1438
- fhold;
1439
- fnext expr_beg; fbreak;
1440
- else
1441
- p -= 2
1442
- fgoto expr_end;
1443
- end
1444
- };
1445
-
1446
- #
1447
- # KEYWORDS AND PUNCTUATION
1448
- #
1449
-
1450
- # a({b=>c})
1451
- e_lbrace
1452
- => {
1453
- if @lambda_stack.last == @paren_nest
1454
- @lambda_stack.pop
1455
- @command_start = true
1456
- emit(:tLAMBEG, '{'.freeze)
1457
- else
1458
- emit(:tLBRACE, '{'.freeze)
1459
- end
1460
- @paren_nest += 1
1461
- fbreak;
1462
- };
1463
-
1464
- # a([1, 2])
1465
- e_lbrack
1466
- => { emit(:tLBRACK, '['.freeze)
1467
- fbreak; };
1468
-
1469
- # a()
1470
- e_lparen
1471
- => { emit(:tLPAREN, '('.freeze)
1472
- fbreak; };
1473
-
1474
- # a(+b)
1475
- punctuation_begin
1476
- => { emit_table(PUNCTUATION_BEGIN)
1477
- fbreak; };
1478
-
1479
- # rescue Exception => e: Block rescue.
1480
- # Special because it should transition to expr_mid.
1481
- 'rescue' %{ tm = p } '=>'?
1482
- => { emit(:kRESCUE, 'rescue'.freeze, @ts, tm)
1483
- p = tm - 1
1484
- fnext expr_mid; fbreak; };
1485
-
1486
- # if a: Statement if.
1487
- keyword_modifier
1488
- => { emit_table(KEYWORDS_BEGIN)
1489
- @command_start = true
1490
- fnext expr_value; fbreak; };
1491
-
1492
- #
1493
- # RUBY 1.9 HASH LABELS
1494
- #
1495
-
1496
- label ( any - ':' )
1497
- => {
1498
- fhold;
1499
-
1500
- if version?(18)
1501
- ident = tok(@ts, @te - 2)
1502
-
1503
- emit((@source_buffer.slice(@ts, 1) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1504
- ident, @ts, @te - 2)
1505
- fhold; # continue as a symbol
1506
-
1507
- if !@static_env.nil? && @static_env.declared?(ident)
1508
- fnext expr_end;
1509
- else
1510
- fnext *arg_or_cmdarg(cmd_state);
1511
- end
1512
- else
1513
- emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1514
- fnext expr_labelarg;
1515
- end
1516
-
1517
- fbreak;
1518
- };
1519
-
1520
- #
1521
- # RUBY 2.7 BEGINLESS RANGE
1522
-
1523
- '..'
1524
- => {
1525
- if @version >= 27
1526
- emit(:tBDOT2)
1527
- else
1528
- emit(:tDOT2)
1529
- end
1530
-
1531
- fnext expr_beg; fbreak;
1532
- };
1533
-
1534
- '...' c_nl?
1535
- => {
1536
- # Here we scan and conditionally emit "\n":
1537
- # + if it's there
1538
- # + and emitted we do nothing
1539
- # + and not emitted we return `p` to "\n" to process it on the next scan
1540
- # + if it's not there we do nothing
1541
- followed_by_nl = @te - 1 == @newline_s
1542
- nl_emitted = false
1543
- dots_te = followed_by_nl ? @te - 1 : @te
1544
-
1545
- if @version >= 30
1546
- if @lambda_stack.any? && @lambda_stack.last + 1 == @paren_nest
1547
- # To reject `->(...)` like `->...`
1548
- emit(:tDOT3, '...'.freeze, @ts, dots_te)
1549
- else
1550
- emit(:tBDOT3, '...'.freeze, @ts, dots_te)
1551
-
1552
- if @version >= 31 && followed_by_nl && @context.in_argdef
1553
- emit(:tNL, @te - 1, @te)
1554
- nl_emitted = true
1555
- end
1556
- end
1557
- elsif @version >= 27
1558
- emit(:tBDOT3, '...'.freeze, @ts, dots_te)
1559
- else
1560
- emit(:tDOT3, '...'.freeze, @ts, dots_te)
1561
- end
1562
-
1563
- if followed_by_nl && !nl_emitted
1564
- # return "\n" to process it on the next scan
1565
- fhold;
1566
- end
1567
-
1568
- fnext expr_beg; fbreak;
1569
- };
1570
-
1571
- #
1572
- # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
1573
- #
1574
-
1575
- # foo= bar: Disambiguate with bareword rule below.
1576
- bareword ambiguous_ident_suffix |
1577
- # def foo: Disambiguate with bareword rule below.
1578
- keyword
1579
- => { p = @ts - 1
1580
- fgoto expr_end; };
1581
-
1582
- # a = 42; a [42]: Indexing.
1583
- # def a; end; a [42]: Array argument.
1584
- call_or_var
1585
- => local_ident;
1586
-
1587
- (call_or_var - keyword)
1588
- % { ident_tok = tok; ident_ts = @ts; ident_te = @te; }
1589
- w_space+ '('
1590
- => {
1591
- emit(:tIDENTIFIER, ident_tok, ident_ts, ident_te)
1592
- p = ident_te - 1
1593
-
1594
- if !@static_env.nil? && @static_env.declared?(ident_tok) && @version < 25
1595
- fnext expr_endfn;
1596
- else
1597
- fnext expr_cmdarg;
1598
- end
1599
- fbreak;
1600
- };
1601
-
1602
- #
1603
- # WHITESPACE
1604
- #
1605
-
1606
- w_any;
1607
-
1608
- e_nl '=begin' ( c_space | c_nl_zlen )
1609
- => {
1610
- p = @ts - 1
1611
- @cs_before_block_comment = @cs
1612
- fgoto line_begin;
1613
- };
1614
-
1615
- #
1616
- # DEFAULT TRANSITION
1617
- #
1618
-
1619
- # The following rules match most binary and all unary operators.
1620
- # Rules for binary operators provide better error reporting.
1621
- operator_arithmetic '=' |
1622
- operator_rest |
1623
- punctuation_end |
1624
- c_any
1625
- => { p = @ts - 1; fgoto expr_end; };
1626
-
1627
- c_eof => do_eof;
1628
- *|;
1629
-
1630
- # Special newline handling for "def a b:"
1631
- #
1632
- expr_labelarg := |*
1633
- w_space_comment;
1634
-
1635
- w_newline
1636
- => {
1637
- if @context.in_kwarg
1638
- fhold; fgoto expr_end;
1639
- else
1640
- fgoto line_begin;
1641
- end
1642
- };
1643
-
1644
- c_any
1645
- => { fhold; fgoto expr_beg; };
1646
-
1647
- c_eof => do_eof;
1648
- *|;
1649
-
1650
- # Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
1651
- #
1652
- expr_value := |*
1653
- # a:b: a(:b), a::B, A::B
1654
- label (any - ':')
1655
- => { p = @ts - 1
1656
- fgoto expr_end; };
1657
-
1658
- # "bar", 'baz'
1659
- ['"] # '
1660
- => {
1661
- @strings.push_literal(tok, tok, @ts)
1662
- fgoto inside_string;
1663
- };
1664
-
1665
- w_space_comment;
1666
-
1667
- w_newline
1668
- => { fgoto line_begin; };
1669
-
1670
- c_any
1671
- => { fhold; fgoto expr_beg; };
1672
-
1673
- c_eof => do_eof;
1674
- *|;
1675
-
1676
- expr_end := |*
1677
- #
1678
- # STABBY LAMBDA
1679
- #
1680
-
1681
- '->'
1682
- => {
1683
- emit(:tLAMBDA, '->'.freeze, @ts, @ts + 2)
1684
-
1685
- @lambda_stack.push @paren_nest
1686
- fnext expr_endfn; fbreak;
1687
- };
1688
-
1689
- e_lbrace | 'do'
1690
- => {
1691
- if @lambda_stack.last == @paren_nest
1692
- @lambda_stack.pop
1693
-
1694
- if tok == '{'.freeze
1695
- emit(:tLAMBEG, '{'.freeze)
1696
- else # 'do'
1697
- emit(:kDO_LAMBDA, 'do'.freeze)
1698
- end
1699
- else
1700
- if tok == '{'.freeze
1701
- emit(:tLCURLY, '{'.freeze)
1702
- else # 'do'
1703
- emit_do
1704
- end
1705
- end
1706
- if tok == '{'.freeze
1707
- @paren_nest += 1
1708
- end
1709
- @command_start = true
1710
-
1711
- fnext expr_value; fbreak;
1712
- };
1713
-
1714
- #
1715
- # KEYWORDS
1716
- #
1717
-
1718
- keyword_with_fname
1719
- => { emit_table(KEYWORDS)
1720
- fnext expr_fname; fbreak; };
1721
-
1722
- 'class' w_any* '<<'
1723
- => { emit_singleton_class
1724
- fnext expr_value; fbreak; };
1725
-
1726
- # a if b:c: Syntax error.
1727
- keyword_modifier
1728
- => { emit_table(KEYWORDS)
1729
- fnext expr_beg; fbreak; };
1730
-
1731
- # elsif b:c: elsif b(:c)
1732
- keyword_with_value
1733
- => { emit_table(KEYWORDS)
1734
- @command_start = true
1735
- fnext expr_value; fbreak; };
1736
-
1737
- keyword_with_mid
1738
- => { emit_table(KEYWORDS)
1739
- fnext expr_mid; fbreak; };
1740
-
1741
- keyword_with_arg
1742
- => {
1743
- emit_table(KEYWORDS)
1744
-
1745
- if version?(18) && tok == 'not'.freeze
1746
- fnext expr_beg; fbreak;
1747
- else
1748
- fnext expr_arg; fbreak;
1749
- end
1750
- };
1751
-
1752
- '__ENCODING__'
1753
- => {
1754
- if version?(18)
1755
- emit(:tIDENTIFIER)
1756
-
1757
- unless !@static_env.nil? && @static_env.declared?(tok)
1758
- fnext *arg_or_cmdarg(cmd_state);
1759
- end
1760
- else
1761
- emit(:k__ENCODING__, '__ENCODING__'.freeze)
1762
- end
1763
- fbreak;
1764
- };
1765
-
1766
- keyword_with_end
1767
- => { emit_table(KEYWORDS)
1768
- fbreak; };
1769
-
1770
- #
1771
- # NUMERIC LITERALS
1772
- #
1773
-
1774
- ( '0' [Xx] %{ @num_base = 16; @num_digits_s = p } int_hex
1775
- | '0' [Dd] %{ @num_base = 10; @num_digits_s = p } int_dec
1776
- | '0' [Oo] %{ @num_base = 8; @num_digits_s = p } int_dec
1777
- | '0' [Bb] %{ @num_base = 2; @num_digits_s = p } int_bin
1778
- | [1-9] digit* '_'? %{ @num_base = 10; @num_digits_s = @ts } int_dec
1779
- | '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
1780
- ) %{ @num_suffix_s = p } int_suffix
1781
- => {
1782
- digits = numeric_literal_int
1783
-
1784
- if version?(18, 19, 20)
1785
- emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
1786
- p = @num_suffix_s - 1
1787
- else
1788
- p = @num_xfrm.call(digits.to_i(@num_base), p)
1789
- end
1790
- fbreak;
1791
- };
1792
-
1793
- flo_frac flo_pow?
1794
- => {
1795
- diagnostic :error, :no_dot_digit_literal
1796
- };
1797
-
1798
- flo_int [eE]
1799
- => {
1800
- if version?(18, 19, 20)
1801
- diagnostic :error,
1802
- :trailing_in_number, { :character => tok(@te - 1, @te) },
1803
- range(@te - 1, @te)
1804
- else
1805
- emit(:tINTEGER, tok(@ts, @te - 1).to_i, @ts, @te - 1)
1806
- fhold; fbreak;
1807
- end
1808
- };
1809
-
1810
- flo_int flo_frac [eE]
1811
- => {
1812
- if version?(18, 19, 20)
1813
- diagnostic :error,
1814
- :trailing_in_number, { :character => tok(@te - 1, @te) },
1815
- range(@te - 1, @te)
1816
- else
1817
- emit(:tFLOAT, tok(@ts, @te - 1).to_f, @ts, @te - 1)
1818
- fhold; fbreak;
1819
- end
1820
- };
1821
-
1822
- flo_int
1823
- ( flo_frac? flo_pow %{ @num_suffix_s = p } flo_pow_suffix
1824
- | flo_frac %{ @num_suffix_s = p } flo_suffix
1825
- )
1826
- => {
1827
- digits = tok(@ts, @num_suffix_s)
1828
-
1829
- if version?(18, 19, 20)
1830
- emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
1831
- p = @num_suffix_s - 1
1832
- else
1833
- p = @num_xfrm.call(digits, p)
1834
- end
1835
- fbreak;
1836
- };
1837
-
1838
- #
1839
- # STRING AND XSTRING LITERALS
1840
- #
1841
-
1842
- # `echo foo`, "bar", 'baz'
1843
- '`' | ['"] # '
1844
- => {
1845
- type, delimiter = tok, tok[-1].chr
1846
- @strings.push_literal(type, delimiter, @ts, nil, false, false, true);
1847
- fgoto inside_string;
1848
- };
1849
-
1850
- #
1851
- # CONSTANTS AND VARIABLES
1852
- #
1853
-
1854
- constant
1855
- => { emit(:tCONSTANT)
1856
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1857
-
1858
- constant ambiguous_const_suffix
1859
- => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
1860
- p = tm - 1; fbreak; };
1861
-
1862
- global_var | class_var_v | instance_var_v
1863
- => { p = @ts - 1; fcall expr_variable; };
1864
-
1865
- #
1866
- # METHOD CALLS
1867
- #
1868
-
1869
- '.:' w_space+
1870
- => { emit(:tDOT, '.', @ts, @ts + 1)
1871
- emit(:tCOLON, ':', @ts + 1, @ts + 2)
1872
- p = p - tok.length + 2
1873
- fnext expr_dot; fbreak; };
1874
-
1875
- '.:'
1876
- => {
1877
- if @version >= 27
1878
- emit_table(PUNCTUATION)
1879
- else
1880
- emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
1881
- fhold;
1882
- end
1883
-
1884
- fnext expr_dot; fbreak;
1885
- };
1886
-
1887
- '.' | '&.' | '::'
1888
- => { emit_table(PUNCTUATION)
1889
- fnext expr_dot; fbreak; };
1890
-
1891
- call_or_var
1892
- => local_ident;
1893
-
1894
- bareword ambiguous_fid_suffix
1895
- => {
1896
- if tm == @te
1897
- # Suffix was consumed, e.g. foo!
1898
- emit(:tFID)
1899
- else
1900
- # Suffix was not consumed, e.g. foo!=
1901
- emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
1902
- p = tm - 1
1903
- end
1904
- fnext expr_arg; fbreak;
1905
- };
1906
-
1907
- #
1908
- # OPERATORS
1909
- #
1910
-
1911
- '*' | '=>'
1912
- => {
1913
- emit_table(PUNCTUATION)
1914
- fnext expr_value; fbreak;
1915
- };
1916
-
1917
- # When '|', '~', '!', '=>' are used as operators
1918
- # they do not accept any symbols (or quoted labels) after.
1919
- # Other binary operators accept it.
1920
- ( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
1921
- => {
1922
- emit_table(PUNCTUATION);
1923
- fnext expr_value; fbreak;
1924
- };
1925
-
1926
- ( e_lparen | '|' | '~' | '!' )
1927
- => { emit_table(PUNCTUATION)
1928
- fnext expr_beg; fbreak; };
1929
-
1930
- e_rbrace | e_rparen | e_rbrack
1931
- => {
1932
- emit_rbrace_rparen_rbrack
1933
-
1934
- if tok == '}'.freeze || tok == ']'.freeze
1935
- if @version >= 25
1936
- fnext expr_end;
1937
- else
1938
- fnext expr_endarg;
1939
- end
1940
- else # )
1941
- # fnext expr_endfn; ?
1942
- end
1943
-
1944
- fbreak;
1945
- };
1946
-
1947
- operator_arithmetic '='
1948
- => { emit(:tOP_ASGN, tok(@ts, @te - 1))
1949
- fnext expr_beg; fbreak; };
1950
-
1951
- '?'
1952
- => { emit(:tEH, '?'.freeze)
1953
- fnext expr_value; fbreak; };
1954
-
1955
- e_lbrack
1956
- => { emit(:tLBRACK2, '['.freeze)
1957
- fnext expr_beg; fbreak; };
1958
-
1959
- '...' c_nl
1960
- => {
1961
- if @paren_nest == 0
1962
- diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
1963
- end
1964
-
1965
- emit(:tDOT3, '...'.freeze, @ts, @te - 1)
1966
- fhold;
1967
- fnext expr_beg; fbreak;
1968
- };
1969
-
1970
- punctuation_end
1971
- => { emit_table(PUNCTUATION)
1972
- fnext expr_beg; fbreak; };
1973
-
1974
- #
1975
- # WHITESPACE
1976
- #
1977
-
1978
- w_space_comment;
1979
-
1980
- w_newline
1981
- => { fgoto leading_dot; };
1982
-
1983
- ';'
1984
- => { emit(:tSEMI, ';'.freeze)
1985
- @command_start = true
1986
- fnext expr_value; fbreak; };
1987
-
1988
- '\\' c_line {
1989
- diagnostic :error, :bare_backslash, nil, range(@ts, @ts + 1)
1990
- fhold;
1991
- };
1992
-
1993
- c_any
1994
- => {
1995
- diagnostic :fatal, :unexpected, { :character => tok.inspect[1..-2] }
1996
- };
1997
-
1998
- c_eof => do_eof;
1999
- *|;
2000
-
2001
- leading_dot := |*
2002
- # Insane leading dots:
2003
- # a #comment
2004
- # # post-2.7 comment
2005
- # .b: a.b
2006
-
2007
- # Here we use '\n' instead of w_newline to not modify @newline_s
2008
- # and eventually properly emit tNL
2009
- (c_space* w_space_comment '\n')+
2010
- => {
2011
- if @version < 27
2012
- # Ruby before 2.7 doesn't support comments before leading dot.
2013
- # If a line after "a" starts with a comment then "a" is a self-contained statement.
2014
- # So in that case we emit a special tNL token and start reading the
2015
- # next line as a separate statement.
2016
- #
2017
- # Note: block comments before leading dot are not supported on any version of Ruby.
2018
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2019
- fhold; fnext line_begin; fbreak;
2020
- end
2021
- };
2022
-
2023
- c_space* '..'
2024
- => {
2025
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2026
- if @version < 27
2027
- fhold; fnext line_begin; fbreak;
2028
- else
2029
- emit(:tBDOT2)
2030
- fnext expr_beg; fbreak;
2031
- end
2032
- };
2033
-
2034
- c_space* '...'
2035
- => {
2036
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2037
- if @version < 27
2038
- fhold; fnext line_begin; fbreak;
2039
- else
2040
- emit(:tBDOT3)
2041
- fnext expr_beg; fbreak;
2042
- end
2043
- };
2044
-
2045
- c_space* %{ tm = p } ('.' | '&.')
2046
- => { p = tm - 1; fgoto expr_end; };
2047
-
2048
- any
2049
- => { emit(:tNL, nil, @newline_s, @newline_s + 1)
2050
- fhold; fnext line_begin; fbreak; };
2051
- *|;
2052
-
2053
- #
2054
- # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
2055
- #
2056
-
2057
- line_comment := |*
2058
- '=end' c_line* c_nl_zlen
2059
- => {
2060
- emit_comment(@eq_begin_s, @te)
2061
- fgoto *@cs_before_block_comment;
2062
- };
2063
-
2064
- c_line* c_nl;
2065
-
2066
- c_line* zlen
2067
- => {
2068
- diagnostic :fatal, :embedded_document, nil,
2069
- range(@eq_begin_s, @eq_begin_s + '=begin'.length)
2070
- };
2071
- *|;
2072
-
2073
- line_begin := |*
2074
- w_any;
2075
-
2076
- '=begin' ( c_space | c_nl_zlen )
2077
- => { @eq_begin_s = @ts
2078
- fgoto line_comment; };
2079
-
2080
- '__END__' ( c_eol - zlen )
2081
- => { p = pe - 3 };
2082
-
2083
- c_any
2084
- => { cmd_state = true; fhold; fgoto expr_value; };
2085
-
2086
- c_eof => do_eof;
2087
- *|;
2088
-
2089
- inside_string := |*
2090
- any
2091
- => {
2092
- p, next_state = @strings.advance(p)
2093
-
2094
- fhold; # Ragel will do `p += 1` to consume input, prevent it
2095
- fnext *next_state;
2096
- fbreak;
2097
- };
2098
- *|;
2099
-
2100
- }%%
2101
- # %
2102
- end
2103
- end