ruby-next-parser 3.2.2.0 → 3.4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2103 +0,0 @@
1
- %%machine lex; # % fix highlighting
2
-
3
- #
4
- # === BEFORE YOU START ===
5
- #
6
- # Read the Ruby Hacking Guide chapter 11, available in English at
7
- # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
- #
9
- # Remember two things about Ragel scanners:
10
- #
11
- # 1) Longest match wins.
12
- #
13
- # 2) If two matches have the same length, the first
14
- # in source code wins.
15
- #
16
- # General rules of making Ragel and Bison happy:
17
- #
18
- # * `p` (position) and `@te` contain the index of the character
19
- # they're pointing to ("current"), plus one. `@ts` contains the index
20
- # of the corresponding character. The code for extracting matched token is:
21
- #
22
- # @source_buffer.slice(@ts...@te)
23
- #
24
- # * If your input is `foooooooobar` and the rule is:
25
- #
26
- # 'f' 'o'+
27
- #
28
- # the result will be:
29
- #
30
- # foooooooobar
31
- # ^ ts=0 ^ p=te=9
32
- #
33
- # * A Ragel lexer action should not emit more than one token, unless
34
- # you know what you are doing.
35
- #
36
- # * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
37
- #
38
- # * If an action emits the token and transitions to another state, use
39
- # these Ragel commands:
40
- #
41
- # emit($whatever)
42
- # fnext $next_state; fbreak;
43
- #
44
- # If you perform `fgoto` in an action which does not emit a token nor
45
- # rewinds the stream pointer, the parser's side-effectful,
46
- # context-sensitive lookahead actions will break in a hard to detect
47
- # and debug way.
48
- #
49
- # * If an action does not emit a token:
50
- #
51
- # fgoto $next_state;
52
- #
53
- # * If an action features lookbehind, i.e. matches characters with the
54
- # intent of passing them to another action:
55
- #
56
- # p = @ts - 1
57
- # fgoto $next_state;
58
- #
59
- # or, if the lookbehind consists of a single character:
60
- #
61
- # fhold; fgoto $next_state;
62
- #
63
- # * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
64
- # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
65
- # _will_ invoke the action `act`.
66
- #
67
- # e_something stands for "something with **e**mbedded action".
68
- #
69
- # * EOF is explicit and is matched by `c_eof`. If you want to introspect
70
- # the state of the lexer, add this rule to the state:
71
- #
72
- # c_eof => do_eof;
73
- #
74
- # * If you proceed past EOF, the lexer will complain:
75
- #
76
- # NoMethodError: undefined method `ord' for nil:NilClass
77
- #
78
-
79
- class Parser::Lexer
80
- class Next
81
-
82
- %% write data nofinal;
83
- # %
84
-
85
- attr_reader :source_buffer
86
-
87
- attr_accessor :diagnostics
88
- attr_accessor :static_env
89
- attr_accessor :force_utf32
90
-
91
- attr_accessor :cond, :cmdarg, :context, :command_start
92
-
93
- attr_accessor :tokens, :comments
94
-
95
- attr_reader :paren_nest, :cmdarg_stack, :cond_stack, :lambda_stack, :version
96
-
97
- def initialize(version)
98
- @version = version
99
- @static_env = nil
100
- @context = nil
101
-
102
- @tokens = nil
103
- @comments = nil
104
-
105
- @_lex_actions =
106
- if self.class.respond_to?(:_lex_actions, true)
107
- self.class.send :_lex_actions
108
- else
109
- []
110
- end
111
-
112
- @emit_integer = lambda { |chars, p| emit(:tINTEGER, chars); p }
113
- @emit_rational = lambda { |chars, p| emit(:tRATIONAL, Rational(chars)); p }
114
- @emit_imaginary = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, chars)); p }
115
- @emit_imaginary_rational = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Rational(chars))); p }
116
- @emit_integer_re = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 2); p - 2 }
117
- @emit_integer_if = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 2); p - 2 }
118
- @emit_integer_rescue = lambda { |chars, p| emit(:tINTEGER, chars, @ts, @te - 6); p - 6 }
119
-
120
- @emit_float = lambda { |chars, p| emit(:tFLOAT, Float(chars)); p }
121
- @emit_imaginary_float = lambda { |chars, p| emit(:tIMAGINARY, Complex(0, Float(chars))); p }
122
- @emit_float_if = lambda { |chars, p| emit(:tFLOAT, Float(chars), @ts, @te - 2); p - 2 }
123
- @emit_float_rescue = lambda { |chars, p| emit(:tFLOAT, Float(chars), @ts, @te - 6); p - 6 }
124
-
125
- reset
126
- end
127
-
128
- def reset(reset_state=true)
129
- # Ragel state:
130
- if reset_state
131
- # Unit tests set state prior to resetting lexer.
132
- @cs = self.class.lex_en_line_begin
133
-
134
- @cond = StackState.new('cond')
135
- @cmdarg = StackState.new('cmdarg')
136
- @cond_stack = []
137
- @cmdarg_stack = []
138
- end
139
-
140
- @force_utf32 = false # Set to true by some tests
141
-
142
- @source_pts = nil # @source as a codepoint array
143
-
144
- @p = 0 # stream position (saved manually in #advance)
145
- @ts = nil # token start
146
- @te = nil # token end
147
- @act = 0 # next action
148
-
149
- @stack = [] # state stack
150
- @top = 0 # state stack top pointer
151
-
152
- # Lexer state:
153
- @token_queue = []
154
-
155
- @eq_begin_s = nil # location of last encountered =begin
156
- @sharp_s = nil # location of last encountered #
157
-
158
- @newline_s = nil # location of last encountered newline
159
-
160
- @num_base = nil # last numeric base
161
- @num_digits_s = nil # starting position of numeric digits
162
- @num_suffix_s = nil # starting position of numeric suffix
163
- @num_xfrm = nil # numeric suffix-induced transformation
164
-
165
- # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
166
- # encountered after a matching closing parenthesis.
167
- @paren_nest = 0
168
- @lambda_stack = []
169
-
170
- # If the lexer is in `command state' (aka expr_value)
171
- # at the entry to #advance, it will transition to expr_cmdarg
172
- # instead of expr_arg at certain points.
173
- @command_start = true
174
-
175
- # State before =begin / =end block comment
176
- @cs_before_block_comment = self.class.lex_en_line_begin
177
-
178
- @strings = Parser::LexerStrings.new(self, @version)
179
- end
180
-
181
- def source_buffer=(source_buffer)
182
- @source_buffer = source_buffer
183
-
184
- if @source_buffer
185
- source = @source_buffer.source
186
-
187
- if source.encoding == Encoding::UTF_8
188
- @source_pts = source.unpack('U*')
189
- else
190
- @source_pts = source.unpack('C*')
191
- end
192
-
193
- if @source_pts[0] == 0xfeff
194
- # Skip byte order mark.
195
- @p = 1
196
- end
197
- else
198
- @source_pts = nil
199
- end
200
-
201
- @strings.source_buffer = @source_buffer
202
- @strings.source_pts = @source_pts
203
- end
204
-
205
- def encoding
206
- @source_buffer.source.encoding
207
- end
208
-
209
- LEX_STATES = {
210
- :line_begin => lex_en_line_begin,
211
- :expr_dot => lex_en_expr_dot,
212
- :expr_fname => lex_en_expr_fname,
213
- :expr_value => lex_en_expr_value,
214
- :expr_beg => lex_en_expr_beg,
215
- :expr_mid => lex_en_expr_mid,
216
- :expr_arg => lex_en_expr_arg,
217
- :expr_cmdarg => lex_en_expr_cmdarg,
218
- :expr_end => lex_en_expr_end,
219
- :expr_endarg => lex_en_expr_endarg,
220
- :expr_endfn => lex_en_expr_endfn,
221
- :expr_labelarg => lex_en_expr_labelarg,
222
-
223
- :inside_string => lex_en_inside_string
224
- }
225
-
226
- def state
227
- LEX_STATES.invert.fetch(@cs, @cs)
228
- end
229
-
230
- def state=(state)
231
- @cs = LEX_STATES.fetch(state)
232
- end
233
-
234
- def push_cmdarg
235
- @cmdarg_stack.push(@cmdarg)
236
- @cmdarg = StackState.new("cmdarg.#{@cmdarg_stack.count}")
237
- end
238
-
239
- def pop_cmdarg
240
- @cmdarg = @cmdarg_stack.pop
241
- end
242
-
243
- def push_cond
244
- @cond_stack.push(@cond)
245
- @cond = StackState.new("cond.#{@cond_stack.count}")
246
- end
247
-
248
- def pop_cond
249
- @cond = @cond_stack.pop
250
- end
251
-
252
- def dedent_level
253
- @strings.dedent_level
254
- end
255
-
256
- # Return next token: [type, value].
257
- def advance
258
- unless @token_queue.empty?
259
- return @token_queue.shift
260
- end
261
-
262
- # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
263
- klass = self.class
264
- _lex_trans_keys = klass.send :_lex_trans_keys
265
- _lex_key_spans = klass.send :_lex_key_spans
266
- _lex_index_offsets = klass.send :_lex_index_offsets
267
- _lex_indicies = klass.send :_lex_indicies
268
- _lex_trans_targs = klass.send :_lex_trans_targs
269
- _lex_trans_actions = klass.send :_lex_trans_actions
270
- _lex_to_state_actions = klass.send :_lex_to_state_actions
271
- _lex_from_state_actions = klass.send :_lex_from_state_actions
272
- _lex_eof_trans = klass.send :_lex_eof_trans
273
- _lex_actions = @_lex_actions
274
-
275
- pe = @source_pts.size + 2
276
- p, eof = @p, pe
277
-
278
- cmd_state = @command_start
279
- @command_start = false
280
-
281
- %% write exec;
282
- # %
283
-
284
- # Ragel creates a local variable called `testEof` but it doesn't use
285
- # it in any assignment. This dead code is here to swallow the warning.
286
- # It has no runtime cost because Ruby doesn't produce any instructions from it.
287
- if false
288
- testEof
289
- end
290
-
291
- @p = p
292
-
293
- if @token_queue.any?
294
- @token_queue.shift
295
- elsif @cs == klass.lex_error
296
- [ false, [ '$error'.freeze, range(p - 1, p) ] ]
297
- else
298
- eof = @source_pts.size
299
- [ false, [ '$eof'.freeze, range(eof, eof) ] ]
300
- end
301
- end
302
-
303
- protected
304
-
305
- def version?(*versions)
306
- versions.include?(@version)
307
- end
308
-
309
- def stack_pop
310
- @top -= 1
311
- @stack[@top]
312
- end
313
-
314
- def tok(s = @ts, e = @te)
315
- @source_buffer.slice(s, e - s)
316
- end
317
-
318
- def range(s = @ts, e = @te)
319
- Parser::Source::Range.new(@source_buffer, s, e)
320
- end
321
-
322
- def emit(type, value = tok, s = @ts, e = @te)
323
- token = [ type, [ value, range(s, e) ] ]
324
-
325
- @token_queue.push(token)
326
-
327
- @tokens.push(token) if @tokens
328
-
329
- token
330
- end
331
-
332
- def emit_table(table, s = @ts, e = @te)
333
- value = tok(s, e)
334
-
335
- emit(table[value], value, s, e)
336
- end
337
-
338
- def emit_do(do_block=false)
339
- if @cond.active?
340
- emit(:kDO_COND, 'do'.freeze)
341
- elsif @cmdarg.active? || do_block
342
- emit(:kDO_BLOCK, 'do'.freeze)
343
- else
344
- emit(:kDO, 'do'.freeze)
345
- end
346
- end
347
-
348
- def arg_or_cmdarg(cmd_state)
349
- if cmd_state
350
- self.class.lex_en_expr_cmdarg
351
- else
352
- self.class.lex_en_expr_arg
353
- end
354
- end
355
-
356
- def emit_comment(s = @ts, e = @te)
357
- if @comments
358
- @comments.push(Parser::Source::Comment.new(range(s, e)))
359
- end
360
-
361
- if @tokens
362
- @tokens.push([ :tCOMMENT, [ tok(s, e), range(s, e) ] ])
363
- end
364
-
365
- nil
366
- end
367
-
368
- def emit_comment_from_range(p, pe)
369
- emit_comment(@sharp_s, p == pe ? p - 2 : p)
370
- end
371
-
372
- def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
373
- @diagnostics.process(
374
- Parser::Diagnostic.new(type, reason, arguments, location, highlights))
375
- end
376
-
377
-
378
- def e_lbrace
379
- @cond.push(false); @cmdarg.push(false)
380
-
381
- current_literal = @strings.literal
382
- if current_literal
383
- current_literal.start_interp_brace
384
- end
385
- end
386
-
387
- def numeric_literal_int
388
- digits = tok(@num_digits_s, @num_suffix_s)
389
-
390
- if digits.end_with? '_'.freeze
391
- diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
392
- range(@te - 1, @te)
393
- elsif digits.empty? && @num_base == 8 && version?(18)
394
- # 1.8 did not raise an error on 0o.
395
- digits = '0'.freeze
396
- elsif digits.empty?
397
- diagnostic :error, :empty_numeric
398
- elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
399
- invalid_s = @num_digits_s + invalid_idx
400
- diagnostic :error, :invalid_octal, nil,
401
- range(invalid_s, invalid_s + 1)
402
- end
403
- digits
404
- end
405
-
406
- def on_newline(p)
407
- @strings.on_newline(p)
408
- end
409
-
410
- def check_ambiguous_slash(tm)
411
- if tok(tm, tm + 1) == '/'.freeze
412
- # Ambiguous regexp literal.
413
- if @version < 30
414
- diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
415
- else
416
- diagnostic :warning, :ambiguous_regexp, nil, range(tm, tm + 1)
417
- end
418
- end
419
- end
420
-
421
- def emit_global_var(ts = @ts, te = @te)
422
- if tok(ts, te) =~ /^\$([1-9][0-9]*)$/
423
- emit(:tNTH_REF, tok(ts + 1, te).to_i, ts, te)
424
- elsif tok =~ /^\$([&`'+])$/
425
- emit(:tBACK_REF, tok(ts, te), ts, te)
426
- else
427
- emit(:tGVAR, tok(ts, te), ts, te)
428
- end
429
- end
430
-
431
- def emit_class_var(ts = @ts, te = @te)
432
- if tok(ts, te) =~ /^@@[0-9]/
433
- diagnostic :error, :cvar_name, { :name => tok(ts, te) }
434
- end
435
-
436
- emit(:tCVAR, tok(ts, te), ts, te)
437
- end
438
-
439
- def emit_instance_var(ts = @ts, te = @te)
440
- if tok(ts, te) =~ /^@[0-9]/
441
- diagnostic :error, :ivar_name, { :name => tok(ts, te) }
442
- end
443
-
444
- emit(:tIVAR, tok(ts, te), ts, te)
445
- end
446
-
447
- def emit_rbrace_rparen_rbrack
448
- emit_table(PUNCTUATION)
449
-
450
- if @version < 24
451
- @cond.lexpop
452
- @cmdarg.lexpop
453
- else
454
- @cond.pop
455
- @cmdarg.pop
456
- end
457
- end
458
-
459
- def emit_colon_with_digits(p, tm, diag_msg)
460
- if @version >= 27
461
- diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
462
- else
463
- emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
464
- p = @ts
465
- end
466
- p
467
- end
468
-
469
- def emit_singleton_class
470
- emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
471
- emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
472
- end
473
-
474
- # Mapping of strings to parser tokens.
475
-
476
- PUNCTUATION = {
477
- '=' => :tEQL, '&' => :tAMPER2, '|' => :tPIPE,
478
- '!' => :tBANG, '^' => :tCARET, '+' => :tPLUS,
479
- '-' => :tMINUS, '*' => :tSTAR2, '/' => :tDIVIDE,
480
- '%' => :tPERCENT, '~' => :tTILDE, ',' => :tCOMMA,
481
- ';' => :tSEMI, '.' => :tDOT, '..' => :tDOT2,
482
- '...' => :tDOT3, '[' => :tLBRACK2, ']' => :tRBRACK,
483
- '(' => :tLPAREN2, ')' => :tRPAREN, '?' => :tEH,
484
- ':' => :tCOLON, '&&' => :tANDOP, '||' => :tOROP,
485
- '-@' => :tUMINUS, '+@' => :tUPLUS, '~@' => :tTILDE,
486
- '**' => :tPOW, '->' => :tLAMBDA, '=~' => :tMATCH,
487
- '!~' => :tNMATCH, '==' => :tEQ, '!=' => :tNEQ,
488
- '>' => :tGT, '>>' => :tRSHFT, '>=' => :tGEQ,
489
- '<' => :tLT, '<<' => :tLSHFT, '<=' => :tLEQ,
490
- '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
491
- '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
492
- '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
493
- '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF,
494
- }
495
-
496
- PUNCTUATION_BEGIN = {
497
- '&' => :tAMPER, '*' => :tSTAR, '**' => :tDSTAR,
498
- '+' => :tUPLUS, '-' => :tUMINUS, '::' => :tCOLON3,
499
- '(' => :tLPAREN, '{' => :tLBRACE, '[' => :tLBRACK,
500
- }
501
-
502
- KEYWORDS = {
503
- 'if' => :kIF_MOD, 'unless' => :kUNLESS_MOD,
504
- 'while' => :kWHILE_MOD, 'until' => :kUNTIL_MOD,
505
- 'rescue' => :kRESCUE_MOD, 'defined?' => :kDEFINED,
506
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
507
- }
508
-
509
- KEYWORDS_BEGIN = {
510
- 'if' => :kIF, 'unless' => :kUNLESS,
511
- 'while' => :kWHILE, 'until' => :kUNTIL,
512
- 'rescue' => :kRESCUE, 'defined?' => :kDEFINED,
513
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
514
- }
515
-
516
- ESCAPE_WHITESPACE = {
517
- " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
518
- "\v" => '\v', "\f" => '\f'
519
- }
520
-
521
- %w(class module def undef begin end then elsif else ensure case when
522
- for break next redo retry in do return yield super self nil true
523
- false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
524
- KEYWORDS_BEGIN[keyword] = KEYWORDS[keyword] = :"k#{keyword.upcase}"
525
- end
526
-
527
- %%{
528
- # %
529
-
530
- access @;
531
- getkey (@source_pts[p] || 0);
532
-
533
- # === CHARACTER CLASSES ===
534
- #
535
- # Pay close attention to the differences between c_any and any.
536
- # c_any does not include EOF and so will cause incorrect behavior
537
- # for machine subtraction (any-except rules) and default transitions
538
- # for scanners.
539
-
540
- action do_nl {
541
- # Record position of a newline for precise location reporting on tNL
542
- # tokens.
543
- #
544
- # This action is embedded directly into c_nl, as it is idempotent and
545
- # there are no cases when we need to skip it.
546
- @newline_s = p
547
- }
548
-
549
- c_nl = '\n' $ do_nl;
550
- c_space = [ \t\r\f\v];
551
- c_space_nl = c_space | c_nl;
552
-
553
- c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
554
- c_eol = c_nl | c_eof;
555
- c_any = any - c_eof;
556
-
557
- c_nl_zlen = c_nl | zlen;
558
- c_line = any - c_nl_zlen;
559
-
560
- c_ascii = 0x00..0x7f;
561
- c_unicode = c_any - c_ascii;
562
- c_upper = [A-Z];
563
- c_lower = [a-z_] | c_unicode;
564
- c_alpha = c_lower | c_upper;
565
- c_alnum = c_alpha | [0-9];
566
-
567
- action do_eof {
568
- # Sit at EOF indefinitely. #advance would return $eof each time.
569
- # This allows to feed the lexer more data if needed; this is only used
570
- # in tests.
571
- #
572
- # Note that this action is not embedded into e_eof like e_nl and e_bs
573
- # below. This is due to the fact that scanner state at EOF is observed
574
- # by tests, and encapsulating it in a rule would break the introspection.
575
- fhold; fbreak;
576
- }
577
-
578
- #
579
- # === TOKEN DEFINITIONS ===
580
- #
581
-
582
- # All operators are punctuation. There is more to punctuation
583
- # than just operators. Operators can be overridden by user;
584
- # punctuation can not.
585
-
586
- # A list of operators which are valid in the function name context, but
587
- # have different semantics in others.
588
- operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
589
-
590
- # A list of operators which can occur within an assignment shortcut (+ → +=).
591
- operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
592
- '*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
593
-
594
- # A list of all user-definable operators not covered by groups above.
595
- operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
596
- '<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
597
-
598
- # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
599
- # as they are ambiguous with interpolation `#{}` and should be counted.
600
- # These braces are not present in punctuation lists.
601
-
602
- # A list of punctuation which has different meaning when used at the
603
- # beginning of expression.
604
- punctuation_begin = '-' | '+' | '::' | '(' | '[' |
605
- '*' | '**' | '&' ;
606
-
607
- # A list of all punctuation except punctuation_begin.
608
- punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
609
- '::' | '?' | ':' | '.' | '..' | '...' ;
610
-
611
- # A list of keywords which have different meaning at the beginning of expression.
612
- keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
613
-
614
- # A list of keywords which accept an argument-like expression, i.e. have the
615
- # same post-processing as method calls or commands. Example: `yield 1`,
616
- # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
617
- keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
618
-
619
- # A list of keywords which accept a literal function name as an argument.
620
- keyword_with_fname = 'def' | 'undef' | 'alias' ;
621
-
622
- # A list of keywords which accept an expression after them.
623
- keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
624
- 'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
625
- 'and' | 'or' ;
626
-
627
- # A list of keywords which accept a value, and treat the keywords from
628
- # `keyword_modifier` list as modifiers.
629
- keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
630
-
631
- # A list of keywords which do not accept an expression after them.
632
- keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
633
- 'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
634
- '__LINE__' | '__ENCODING__';
635
-
636
- # All keywords.
637
- keyword = keyword_with_value | keyword_with_mid |
638
- keyword_with_end | keyword_with_arg |
639
- keyword_with_fname | keyword_modifier ;
640
-
641
- constant = c_upper c_alnum*;
642
- bareword = c_alpha c_alnum*;
643
-
644
- call_or_var = c_lower c_alnum*;
645
- class_var = '@@' bareword;
646
- instance_var = '@' bareword;
647
- global_var = '$'
648
- ( bareword | digit+
649
- | [`'+~*$&?!@/\\;,.=:<>"] # `
650
- | '-' c_alnum
651
- )
652
- ;
653
-
654
- # Ruby accepts (and fails on) variables with leading digit
655
- # in literal context, but not in unquoted symbol body.
656
- class_var_v = '@@' c_alnum+;
657
- instance_var_v = '@' c_alnum+;
658
-
659
- label = bareword [?!]? ':';
660
-
661
- #
662
- # === NUMERIC PARSING ===
663
- #
664
-
665
- int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
666
- int_dec = ( digit+ '_' )* digit* '_'? ;
667
- int_bin = ( [01]+ '_' )* [01]* '_'? ;
668
-
669
- flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
670
- flo_frac = '.' ( digit+ '_' )* digit+;
671
- flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
672
-
673
- int_suffix =
674
- '' % { @num_xfrm = @emit_integer }
675
- | 'r' % { @num_xfrm = @emit_rational }
676
- | 'i' % { @num_xfrm = @emit_imaginary }
677
- | 'ri' % { @num_xfrm = @emit_imaginary_rational }
678
- | 're' % { @num_xfrm = @emit_integer_re }
679
- | 'if' % { @num_xfrm = @emit_integer_if }
680
- | 'rescue' % { @num_xfrm = @emit_integer_rescue };
681
-
682
- flo_pow_suffix =
683
- '' % { @num_xfrm = @emit_float }
684
- | 'i' % { @num_xfrm = @emit_imaginary_float }
685
- | 'if' % { @num_xfrm = @emit_float_if };
686
-
687
- flo_suffix =
688
- flo_pow_suffix
689
- | 'r' % { @num_xfrm = @emit_rational }
690
- | 'ri' % { @num_xfrm = @emit_imaginary_rational }
691
- | 'rescue' % { @num_xfrm = @emit_float_rescue };
692
-
693
- #
694
- # === INTERPOLATION PARSING ===
695
- #
696
-
697
- e_lbrace = '{' % {
698
- e_lbrace
699
- };
700
-
701
- e_rbrace = '}' % {
702
- if @strings.close_interp_on_current_literal(p)
703
- fhold;
704
- fnext inside_string;
705
- fbreak;
706
- end
707
-
708
- @paren_nest -= 1
709
- };
710
-
711
- #
712
- # === WHITESPACE HANDLING ===
713
- #
714
-
715
- # Various contexts in Ruby allow various kinds of whitespace
716
- # to be used. They are grouped to clarify the lexing machines
717
- # and ease collection of comments.
718
-
719
- # A line of code with inline #comment at end is always equivalent
720
- # to a line of code ending with just a newline, so an inline
721
- # comment is deemed equivalent to non-newline whitespace
722
- # (c_space character class).
723
-
724
- e_nl = c_nl % {
725
- p = on_newline(p)
726
- };
727
-
728
- w_space =
729
- c_space+
730
- | '\\' e_nl
731
- ;
732
-
733
- w_comment =
734
- '#' %{ @sharp_s = p - 1 }
735
- # The (p == pe) condition compensates for added "\0" and
736
- # the way Ragel handles EOF.
737
- c_line* %{ emit_comment_from_range(p, pe) }
738
- ;
739
-
740
- w_space_comment =
741
- w_space
742
- | w_comment
743
- ;
744
-
745
- # A newline in non-literal context always interoperates with
746
- # here document logic and can always be escaped by a backslash,
747
- # still interoperating with here document logic in the same way,
748
- # yet being invisible to anything else.
749
- #
750
- # To demonstrate:
751
- #
752
- # foo = <<FOO \
753
- # bar
754
- # FOO
755
- # + 2
756
- #
757
- # is equivalent to `foo = "bar\n" + 2`.
758
-
759
- w_newline =
760
- e_nl;
761
-
762
- w_any =
763
- w_space
764
- | w_comment
765
- | w_newline
766
- ;
767
-
768
-
769
- #
770
- # === EXPRESSION PARSING ===
771
- #
772
-
773
- # These rules implement a form of manually defined lookahead.
774
- # The default longest-match scanning does not work here due
775
- # to sheer ambiguity.
776
-
777
- ambiguous_fid_suffix = # actual parsed
778
- [?!] %{ tm = p } | # a? a?
779
- [?!]'=' %{ tm = p - 2 } # a!=b a != b
780
- ;
781
-
782
- ambiguous_ident_suffix = # actual parsed
783
- ambiguous_fid_suffix |
784
- '=' %{ tm = p } | # a= a=
785
- '==' %{ tm = p - 2 } | # a==b a == b
786
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
787
- '=>' %{ tm = p - 2 } | # a=>b a => b
788
- '===' %{ tm = p - 3 } # a===b a === b
789
- ;
790
-
791
- ambiguous_symbol_suffix = # actual parsed
792
- ambiguous_ident_suffix |
793
- '==>' %{ tm = p - 2 } # :a==>b :a= => b
794
- ;
795
-
796
- # Ambiguous with 1.9 hash labels.
797
- ambiguous_const_suffix = # actual parsed
798
- '::' %{ tm = p - 2 } # A::B A :: B
799
- ;
800
-
801
- # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
802
- # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
803
-
804
- e_lbrack = '[' % {
805
- @cond.push(false); @cmdarg.push(false)
806
-
807
- @paren_nest += 1
808
- };
809
-
810
- e_rbrack = ']' % {
811
- @paren_nest -= 1
812
- };
813
-
814
- # Ruby 1.9 lambdas require parentheses counting in order to
815
- # emit correct opening kDO/tLBRACE.
816
-
817
- e_lparen = '(' % {
818
- @cond.push(false); @cmdarg.push(false)
819
-
820
- @paren_nest += 1
821
-
822
- if version?(18)
823
- @command_start = true
824
- end
825
- };
826
-
827
- e_rparen = ')' % {
828
- @paren_nest -= 1
829
- };
830
-
831
- # Ruby is context-sensitive wrt/ local identifiers.
832
- action local_ident {
833
- emit(:tIDENTIFIER)
834
-
835
- if !@static_env.nil? && @static_env.declared?(tok)
836
- fnext expr_endfn; fbreak;
837
- else
838
- fnext *arg_or_cmdarg(cmd_state); fbreak;
839
- end
840
- }
841
-
842
- # Variable lexing code is accessed from both expressions and
843
- # string interpolation related code.
844
- #
845
- expr_variable := |*
846
- global_var
847
- => {
848
- emit_global_var
849
-
850
- fnext *stack_pop; fbreak;
851
- };
852
-
853
- class_var_v
854
- => {
855
- emit_class_var
856
-
857
- fnext *stack_pop; fbreak;
858
- };
859
-
860
- instance_var_v
861
- => {
862
- emit_instance_var
863
-
864
- fnext *stack_pop; fbreak;
865
- };
866
- *|;
867
-
868
- # Literal function name in definition (e.g. `def class`).
869
- # Keywords are returned as their respective tokens; this is used
870
- # to support singleton def `def self.foo`. Global variables are
871
- # returned as `tGVAR`; this is used in global variable alias
872
- # statements `alias $a $b`. Symbols are returned verbatim; this
873
- # is used in `alias :a :"b#{foo}"` and `undef :a`.
874
- #
875
- # Transitions to `expr_endfn` afterwards.
876
- #
877
- expr_fname := |*
878
- keyword
879
- => { emit_table(KEYWORDS_BEGIN);
880
- fnext expr_endfn; fbreak; };
881
-
882
- constant
883
- => { emit(:tCONSTANT)
884
- fnext expr_endfn; fbreak; };
885
-
886
- bareword [?=!]?
887
- => { emit(:tIDENTIFIER)
888
- fnext expr_endfn; fbreak; };
889
-
890
- global_var
891
- => { p = @ts - 1
892
- fnext expr_end; fcall expr_variable; };
893
-
894
- # If the handling was to be delegated to expr_end,
895
- # these cases would transition to something else than
896
- # expr_endfn, which is incorrect.
897
- operator_fname |
898
- operator_arithmetic |
899
- operator_rest
900
- => { emit_table(PUNCTUATION)
901
- fnext expr_endfn; fbreak; };
902
-
903
- '::'
904
- => { fhold; fhold; fgoto expr_end; };
905
-
906
- ':'
907
- => { fhold; fgoto expr_beg; };
908
-
909
- '%s' (c_ascii - [A-Za-z0-9])
910
- => {
911
- if version?(23)
912
- type, delimiter = tok[0..-2], tok[-1].chr
913
- @strings.push_literal(type, delimiter, @ts)
914
- fgoto inside_string;
915
- else
916
- p = @ts - 1
917
- fgoto expr_end;
918
- end
919
- };
920
-
921
- w_any;
922
-
923
- c_any
924
- => { fhold; fgoto expr_end; };
925
-
926
- c_eof => do_eof;
927
- *|;
928
-
929
- # After literal function name in definition. Behaves like `expr_end`,
930
- # but allows a tLABEL.
931
- #
932
- # Transitions to `expr_end` afterwards.
933
- #
934
- expr_endfn := |*
935
- label ( any - ':' )
936
- => { emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
937
- fhold; fnext expr_labelarg; fbreak; };
938
-
939
- '...'
940
- => {
941
- if @version >= 31 && @context.in_argdef
942
- emit(:tBDOT3, '...'.freeze)
943
- # emit(:tNL, "\n".freeze, @te - 1, @te)
944
- fnext expr_end; fbreak;
945
- else
946
- p -= 3;
947
- fgoto expr_end;
948
- end
949
- };
950
-
951
- w_space_comment;
952
-
953
- c_any
954
- => { fhold; fgoto expr_end; };
955
-
956
- c_eof => do_eof;
957
- *|;
958
-
959
- # Literal function name in method call (e.g. `a.class`).
960
- #
961
- # Transitions to `expr_arg` afterwards.
962
- #
963
- expr_dot := |*
964
- constant
965
- => { emit(:tCONSTANT)
966
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
967
-
968
- call_or_var
969
- => { emit(:tIDENTIFIER)
970
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
971
-
972
- bareword ambiguous_fid_suffix
973
- => { emit(:tFID, tok(@ts, tm), @ts, tm)
974
- fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
975
-
976
- # See the comment in `expr_fname`.
977
- operator_fname |
978
- operator_arithmetic |
979
- operator_rest
980
- => { emit_table(PUNCTUATION)
981
- fnext expr_arg; fbreak; };
982
-
983
- w_any;
984
-
985
- c_any
986
- => { fhold; fgoto expr_end; };
987
-
988
- c_eof => do_eof;
989
- *|;
990
-
991
- # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
992
- # is consumed; the current expression is a command or method call.
993
- #
994
- expr_arg := |*
995
- #
996
- # COMMAND MODE SPECIFIC TOKENS
997
- #
998
-
999
- # cmd (1 + 2)
1000
- # See below the rationale about expr_endarg.
1001
- w_space+ e_lparen
1002
- => {
1003
- if version?(18)
1004
- emit(:tLPAREN2, '('.freeze, @te - 1, @te)
1005
- fnext expr_value; fbreak;
1006
- else
1007
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1008
- fnext expr_beg; fbreak;
1009
- end
1010
- };
1011
-
1012
- # meth(1 + 2)
1013
- # Regular method call.
1014
- e_lparen
1015
- => { emit(:tLPAREN2, '('.freeze)
1016
- fnext expr_beg; fbreak; };
1017
-
1018
- # meth [...]
1019
- # Array argument. Compare with indexing `meth[...]`.
1020
- w_space+ e_lbrack
1021
- => { emit(:tLBRACK, '['.freeze, @te - 1, @te)
1022
- fnext expr_beg; fbreak; };
1023
-
1024
- # cmd {}
1025
- # Command: method call without parentheses.
1026
- w_space* e_lbrace
1027
- => {
1028
- if @lambda_stack.last == @paren_nest
1029
- @lambda_stack.pop
1030
- emit(:tLAMBEG, '{'.freeze, @te - 1, @te)
1031
- else
1032
- emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1033
- end
1034
- @command_start = true
1035
- @paren_nest += 1
1036
- fnext expr_value; fbreak;
1037
- };
1038
-
1039
- #
1040
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1041
- #
1042
-
1043
- # a??
1044
- # Ternary operator
1045
- '?' c_space_nl
1046
- => {
1047
- # Unlike expr_beg as invoked in the next rule, do not warn
1048
- p = @ts - 1
1049
- fgoto expr_end;
1050
- };
1051
-
1052
- # a ?b, a? ?
1053
- # Character literal or ternary operator
1054
- w_space* '?'
1055
- => { fhold; fgoto expr_beg; };
1056
-
1057
- # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1058
- # a /foo/ (but not "a / foo" or "a /=foo")
1059
- # a <<HEREDOC
1060
- w_space+ %{ tm = p }
1061
- ( [%/] ( c_any - c_space_nl - '=' ) # /
1062
- | '<<'
1063
- )
1064
- => {
1065
- check_ambiguous_slash(tm)
1066
-
1067
- p = tm - 1
1068
- fgoto expr_beg;
1069
- };
1070
-
1071
- # x *1
1072
- # Ambiguous splat, kwsplat or block-pass.
1073
- w_space+ %{ tm = p } ( '+' | '-' | '*' | '&' | '**' )
1074
- => {
1075
- diagnostic :warning, :ambiguous_prefix, { :prefix => tok(tm, @te) },
1076
- range(tm, @te)
1077
-
1078
- p = tm - 1
1079
- fgoto expr_beg;
1080
- };
1081
-
1082
- # x ::Foo
1083
- # Ambiguous toplevel constant access.
1084
- w_space+ '::'
1085
- => { fhold; fhold; fgoto expr_beg; };
1086
-
1087
- # x:b
1088
- # Symbol.
1089
- w_space* ':'
1090
- => { fhold; fgoto expr_beg; };
1091
-
1092
- w_space+ label
1093
- => { p = @ts - 1; fgoto expr_beg; };
1094
-
1095
- #
1096
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1097
- #
1098
-
1099
- # a ? b
1100
- # Ternary operator.
1101
- w_space+ %{ tm = p } '?' c_space_nl
1102
- => { p = tm - 1; fgoto expr_end; };
1103
-
1104
- # x + 1: Binary operator or operator-assignment.
1105
- w_space* operator_arithmetic
1106
- ( '=' | c_space_nl )? |
1107
- # x rescue y: Modifier keyword.
1108
- w_space* keyword_modifier |
1109
- # a &. b: Safe navigation operator.
1110
- w_space* '&.' |
1111
- # Miscellanea.
1112
- w_space* punctuation_end
1113
- => {
1114
- p = @ts - 1
1115
- fgoto expr_end;
1116
- };
1117
-
1118
- w_space;
1119
-
1120
- w_comment
1121
- => { fgoto expr_end; };
1122
-
1123
- w_newline
1124
- => { fhold; fgoto expr_end; };
1125
-
1126
- c_any
1127
- => { fhold; fgoto expr_beg; };
1128
-
1129
- c_eof => do_eof;
1130
- *|;
1131
-
1132
- # The previous token was an identifier which was seen while in the
1133
- # command mode (that is, the state at the beginning of #advance was
1134
- # expr_value). This state is very similar to expr_arg, but disambiguates
1135
- # two very rare and specific condition:
1136
- # * In 1.8 mode, "foo (lambda do end)".
1137
- # * In 1.9+ mode, "f x: -> do foo do end end".
1138
- expr_cmdarg := |*
1139
- w_space+ e_lparen
1140
- => {
1141
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1142
- if version?(18)
1143
- fnext expr_value; fbreak;
1144
- else
1145
- fnext expr_beg; fbreak;
1146
- end
1147
- };
1148
-
1149
- w_space* 'do'
1150
- => {
1151
- if @cond.active?
1152
- emit(:kDO_COND, 'do'.freeze, @te - 2, @te)
1153
- else
1154
- emit(:kDO, 'do'.freeze, @te - 2, @te)
1155
- end
1156
- fnext expr_value; fbreak;
1157
- };
1158
-
1159
- c_any |
1160
- # Disambiguate with the `do' rule above.
1161
- w_space* bareword |
1162
- w_space* label
1163
- => { p = @ts - 1
1164
- fgoto expr_arg; };
1165
-
1166
- c_eof => do_eof;
1167
- *|;
1168
-
1169
- # The rationale for this state is pretty complex. Normally, if an argument
1170
- # is passed to a command and then there is a block (tLCURLY...tRCURLY),
1171
- # the block is attached to the innermost argument (`f` in `m f {}`), or it
1172
- # is a parse error (`m 1 {}`). But there is a special case for passing a single
1173
- # primary expression grouped with parentheses: if you write `m (1) {}` or
1174
- # (2.0 only) `m () {}`, then the block is attached to `m`.
1175
- #
1176
- # Thus, we recognize the opening `(` of a command (remember, a command is
1177
- # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
1178
- # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
1179
- # lexer's state to `expr_endarg`, which makes it emit the possibly following
1180
- # `{` as `tLBRACE_ARG`.
1181
- #
1182
- # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1183
- # `do` (as `kDO_BLOCK` in `expr_beg`).
1184
- expr_endarg := |*
1185
- e_lbrace
1186
- => {
1187
- if @lambda_stack.last == @paren_nest
1188
- @lambda_stack.pop
1189
- emit(:tLAMBEG, '{'.freeze)
1190
- else
1191
- emit(:tLBRACE_ARG, '{'.freeze)
1192
- end
1193
- @paren_nest += 1
1194
- @command_start = true
1195
- fnext expr_value; fbreak;
1196
- };
1197
-
1198
- 'do'
1199
- => { emit_do(true)
1200
- fnext expr_value; fbreak; };
1201
-
1202
- w_space_comment;
1203
-
1204
- c_any
1205
- => { fhold; fgoto expr_end; };
1206
-
1207
- c_eof => do_eof;
1208
- *|;
1209
-
1210
- # The rationale for this state is that several keywords accept value
1211
- # (i.e. should transition to `expr_beg`), do not accept it like a command
1212
- # (i.e. not an `expr_arg`), and must behave like a statement, that is,
1213
- # accept a modifier if/while/etc.
1214
- #
1215
- expr_mid := |*
1216
- keyword_modifier
1217
- => { emit_table(KEYWORDS)
1218
- fnext expr_beg; fbreak; };
1219
-
1220
- bareword
1221
- => { p = @ts - 1; fgoto expr_beg; };
1222
-
1223
- w_space_comment;
1224
-
1225
- w_newline
1226
- => { fhold; fgoto expr_end; };
1227
-
1228
- c_any
1229
- => { fhold; fgoto expr_beg; };
1230
-
1231
- c_eof => do_eof;
1232
- *|;
1233
-
1234
- # Beginning of an expression.
1235
- #
1236
- # Don't fallthrough to this state from `c_any`; make sure to handle
1237
- # `c_space* c_nl` and let `expr_end` handle the newline.
1238
- # Otherwise code like `f\ndef x` gets glued together and the parser
1239
- # explodes.
1240
- #
1241
- expr_beg := |*
1242
- # +5, -5, - 5
1243
- [+\-] w_any* [0-9]
1244
- => {
1245
- emit(:tUNARY_NUM, tok(@ts, @ts + 1), @ts, @ts + 1)
1246
- fhold; fnext expr_end; fbreak;
1247
- };
1248
-
1249
- # splat *a
1250
- '*'
1251
- => { emit(:tSTAR, '*'.freeze)
1252
- fbreak; };
1253
-
1254
- #
1255
- # STRING AND REGEXP LITERALS
1256
- #
1257
-
1258
- # /regexp/oui
1259
- # /=/ (disambiguation with /=)
1260
- '/' c_any
1261
- => {
1262
- type = delimiter = tok[0].chr
1263
- @strings.push_literal(type, delimiter, @ts)
1264
-
1265
- fhold;
1266
- fgoto inside_string;
1267
- };
1268
-
1269
- # %<string>
1270
- '%' ( c_ascii - [A-Za-z0-9] )
1271
- => {
1272
- type, delimiter = @source_buffer.slice(@ts, 1).chr, tok[-1].chr
1273
- @strings.push_literal(type, delimiter, @ts)
1274
- fgoto inside_string;
1275
- };
1276
-
1277
- # %w(we are the people)
1278
- '%' [A-Za-z] (c_ascii - [A-Za-z0-9])
1279
- => {
1280
- type, delimiter = tok[0..-2], tok[-1].chr
1281
- @strings.push_literal(type, delimiter, @ts)
1282
- fgoto inside_string;
1283
- };
1284
-
1285
- '%' c_eof
1286
- => {
1287
- diagnostic :fatal, :string_eof, nil, range(@ts, @ts + 1)
1288
- };
1289
-
1290
- # Heredoc start.
1291
- # <<END | <<'END' | <<"END" | <<`END` |
1292
- # <<-END | <<-'END' | <<-"END" | <<-`END` |
1293
- # <<~END | <<~'END' | <<~"END" | <<~`END`
1294
- '<<' [~\-]?
1295
- ( '"' ( any - '"' )* '"'
1296
- | "'" ( any - "'" )* "'"
1297
- | "`" ( any - "`" )* "`"
1298
- | bareword ) % { heredoc_e = p }
1299
- c_line* c_nl % { new_herebody_s = p }
1300
- => {
1301
- tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/m
1302
-
1303
- indent = !$1.empty? || !$2.empty?
1304
- dedent_body = !$2.empty?
1305
- type = $3.empty? ? '<<"'.freeze : ('<<'.freeze + $3)
1306
- delimiter = $4
1307
-
1308
- if @version >= 27
1309
- if delimiter.count("\n") > 0 || delimiter.count("\r") > 0
1310
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1311
- end
1312
- elsif @version >= 24
1313
- if delimiter.count("\n") > 0
1314
- if delimiter.end_with?("\n")
1315
- diagnostic :warning, :heredoc_id_ends_with_nl, nil, range(@ts, @ts + 1)
1316
- delimiter = delimiter.rstrip
1317
- else
1318
- diagnostic :fatal, :heredoc_id_has_newline, nil, range(@ts, @ts + 1)
1319
- end
1320
- end
1321
- end
1322
-
1323
- if dedent_body && version?(18, 19, 20, 21, 22)
1324
- emit(:tLSHFT, '<<'.freeze, @ts, @ts + 2)
1325
- p = @ts + 1
1326
- fnext expr_beg; fbreak;
1327
- else
1328
- @strings.push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1329
- @strings.herebody_s ||= new_herebody_s
1330
-
1331
- p = @strings.herebody_s - 1
1332
- fnext inside_string;
1333
- end
1334
- };
1335
-
1336
- # Escaped unterminated heredoc start
1337
- # <<'END | <<"END | <<`END |
1338
- # <<-'END | <<-"END | <<-`END |
1339
- # <<~'END | <<~"END | <<~`END
1340
- #
1341
- # If the heredoc is terminated the rule above should handle it
1342
- '<<' [~\-]?
1343
- ('"' (any - c_nl - '"')*
1344
- |"'" (any - c_nl - "'")*
1345
- |"`" (any - c_nl - "`")
1346
- )
1347
- => {
1348
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1349
- };
1350
-
1351
- #
1352
- # SYMBOL LITERALS
1353
- #
1354
-
1355
- # :&&, :||
1356
- ':' ('&&' | '||') => {
1357
- fhold; fhold;
1358
- emit(:tSYMBEG, tok(@ts, @ts + 1), @ts, @ts + 1)
1359
- fgoto expr_fname;
1360
- };
1361
-
1362
- # :"bar", :'baz'
1363
- ':' ['"] # '
1364
- => {
1365
- type, delimiter = tok, tok[-1].chr
1366
- @strings.push_literal(type, delimiter, @ts);
1367
-
1368
- fgoto inside_string;
1369
- };
1370
-
1371
- # :!@ is :!
1372
- # :~@ is :~
1373
- ':' [!~] '@'
1374
- => {
1375
- emit(:tSYMBOL, tok(@ts + 1, @ts + 2))
1376
- fnext expr_end; fbreak;
1377
- };
1378
-
1379
- ':' bareword ambiguous_symbol_suffix
1380
- => {
1381
- emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1382
- p = tm - 1
1383
- fnext expr_end; fbreak;
1384
- };
1385
-
1386
- ':' ( bareword | global_var | class_var | instance_var |
1387
- operator_fname | operator_arithmetic | operator_rest )
1388
- => {
1389
- emit(:tSYMBOL, tok(@ts + 1), @ts)
1390
- fnext expr_end; fbreak;
1391
- };
1392
-
1393
- ':' ( '@' %{ tm = p - 1; diag_msg = :ivar_name }
1394
- | '@@' %{ tm = p - 2; diag_msg = :cvar_name }
1395
- ) [0-9]*
1396
- => {
1397
- emit_colon_with_digits(p, tm, diag_msg)
1398
-
1399
- fnext expr_end; fbreak;
1400
- };
1401
-
1402
- #
1403
- # AMBIGUOUS TERNARY OPERATOR
1404
- #
1405
-
1406
- # Character constant, like ?a, ?\n, ?\u1000, and so on
1407
- # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1408
- '?' c_any
1409
- => {
1410
- p, next_state = @strings.read_character_constant(@ts)
1411
- fhold; # Ragel will do `p += 1` to consume input, prevent it
1412
-
1413
- # If strings lexer founds a character constant (?a) emit it,
1414
- # otherwise read ternary operator
1415
- if @token_queue.empty?
1416
- fgoto *next_state;
1417
- else
1418
- fnext *next_state;
1419
- fbreak;
1420
- end
1421
- };
1422
-
1423
- '?' c_eof
1424
- => {
1425
- diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
1426
- };
1427
-
1428
- #
1429
- # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1430
- #
1431
-
1432
- # Ruby >= 2.7 emits it as two tPIPE terminals
1433
- # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1434
- '||'
1435
- => {
1436
- if @version >= 27
1437
- emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1438
- fhold;
1439
- fnext expr_beg; fbreak;
1440
- else
1441
- p -= 2
1442
- fgoto expr_end;
1443
- end
1444
- };
1445
-
1446
- #
1447
- # KEYWORDS AND PUNCTUATION
1448
- #
1449
-
1450
- # a({b=>c})
1451
- e_lbrace
1452
- => {
1453
- if @lambda_stack.last == @paren_nest
1454
- @lambda_stack.pop
1455
- @command_start = true
1456
- emit(:tLAMBEG, '{'.freeze)
1457
- else
1458
- emit(:tLBRACE, '{'.freeze)
1459
- end
1460
- @paren_nest += 1
1461
- fbreak;
1462
- };
1463
-
1464
- # a([1, 2])
1465
- e_lbrack
1466
- => { emit(:tLBRACK, '['.freeze)
1467
- fbreak; };
1468
-
1469
- # a()
1470
- e_lparen
1471
- => { emit(:tLPAREN, '('.freeze)
1472
- fbreak; };
1473
-
1474
- # a(+b)
1475
- punctuation_begin
1476
- => { emit_table(PUNCTUATION_BEGIN)
1477
- fbreak; };
1478
-
1479
- # rescue Exception => e: Block rescue.
1480
- # Special because it should transition to expr_mid.
1481
- 'rescue' %{ tm = p } '=>'?
1482
- => { emit(:kRESCUE, 'rescue'.freeze, @ts, tm)
1483
- p = tm - 1
1484
- fnext expr_mid; fbreak; };
1485
-
1486
- # if a: Statement if.
1487
- keyword_modifier
1488
- => { emit_table(KEYWORDS_BEGIN)
1489
- @command_start = true
1490
- fnext expr_value; fbreak; };
1491
-
1492
- #
1493
- # RUBY 1.9 HASH LABELS
1494
- #
1495
-
1496
- label ( any - ':' )
1497
- => {
1498
- fhold;
1499
-
1500
- if version?(18)
1501
- ident = tok(@ts, @te - 2)
1502
-
1503
- emit((@source_buffer.slice(@ts, 1) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1504
- ident, @ts, @te - 2)
1505
- fhold; # continue as a symbol
1506
-
1507
- if !@static_env.nil? && @static_env.declared?(ident)
1508
- fnext expr_end;
1509
- else
1510
- fnext *arg_or_cmdarg(cmd_state);
1511
- end
1512
- else
1513
- emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1514
- fnext expr_labelarg;
1515
- end
1516
-
1517
- fbreak;
1518
- };
1519
-
1520
- #
1521
- # RUBY 2.7 BEGINLESS RANGE
1522
-
1523
- '..'
1524
- => {
1525
- if @version >= 27
1526
- emit(:tBDOT2)
1527
- else
1528
- emit(:tDOT2)
1529
- end
1530
-
1531
- fnext expr_beg; fbreak;
1532
- };
1533
-
1534
- '...' c_nl?
1535
- => {
1536
- # Here we scan and conditionally emit "\n":
1537
- # + if it's there
1538
- # + and emitted we do nothing
1539
- # + and not emitted we return `p` to "\n" to process it on the next scan
1540
- # + if it's not there we do nothing
1541
- followed_by_nl = @te - 1 == @newline_s
1542
- nl_emitted = false
1543
- dots_te = followed_by_nl ? @te - 1 : @te
1544
-
1545
- if @version >= 30
1546
- if @lambda_stack.any? && @lambda_stack.last + 1 == @paren_nest
1547
- # To reject `->(...)` like `->...`
1548
- emit(:tDOT3, '...'.freeze, @ts, dots_te)
1549
- else
1550
- emit(:tBDOT3, '...'.freeze, @ts, dots_te)
1551
-
1552
- if @version >= 31 && followed_by_nl && @context.in_argdef
1553
- emit(:tNL, @te - 1, @te)
1554
- nl_emitted = true
1555
- end
1556
- end
1557
- elsif @version >= 27
1558
- emit(:tBDOT3, '...'.freeze, @ts, dots_te)
1559
- else
1560
- emit(:tDOT3, '...'.freeze, @ts, dots_te)
1561
- end
1562
-
1563
- if followed_by_nl && !nl_emitted
1564
- # return "\n" to process it on the next scan
1565
- fhold;
1566
- end
1567
-
1568
- fnext expr_beg; fbreak;
1569
- };
1570
-
1571
- #
1572
- # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
1573
- #
1574
-
1575
- # foo= bar: Disambiguate with bareword rule below.
1576
- bareword ambiguous_ident_suffix |
1577
- # def foo: Disambiguate with bareword rule below.
1578
- keyword
1579
- => { p = @ts - 1
1580
- fgoto expr_end; };
1581
-
1582
- # a = 42; a [42]: Indexing.
1583
- # def a; end; a [42]: Array argument.
1584
- call_or_var
1585
- => local_ident;
1586
-
1587
- (call_or_var - keyword)
1588
- % { ident_tok = tok; ident_ts = @ts; ident_te = @te; }
1589
- w_space+ '('
1590
- => {
1591
- emit(:tIDENTIFIER, ident_tok, ident_ts, ident_te)
1592
- p = ident_te - 1
1593
-
1594
- if !@static_env.nil? && @static_env.declared?(ident_tok) && @version < 25
1595
- fnext expr_endfn;
1596
- else
1597
- fnext expr_cmdarg;
1598
- end
1599
- fbreak;
1600
- };
1601
-
1602
- #
1603
- # WHITESPACE
1604
- #
1605
-
1606
- w_any;
1607
-
1608
- e_nl '=begin' ( c_space | c_nl_zlen )
1609
- => {
1610
- p = @ts - 1
1611
- @cs_before_block_comment = @cs
1612
- fgoto line_begin;
1613
- };
1614
-
1615
- #
1616
- # DEFAULT TRANSITION
1617
- #
1618
-
1619
- # The following rules match most binary and all unary operators.
1620
- # Rules for binary operators provide better error reporting.
1621
- operator_arithmetic '=' |
1622
- operator_rest |
1623
- punctuation_end |
1624
- c_any
1625
- => { p = @ts - 1; fgoto expr_end; };
1626
-
1627
- c_eof => do_eof;
1628
- *|;
1629
-
1630
- # Special newline handling for "def a b:"
1631
- #
1632
- expr_labelarg := |*
1633
- w_space_comment;
1634
-
1635
- w_newline
1636
- => {
1637
- if @context.in_kwarg
1638
- fhold; fgoto expr_end;
1639
- else
1640
- fgoto line_begin;
1641
- end
1642
- };
1643
-
1644
- c_any
1645
- => { fhold; fgoto expr_beg; };
1646
-
1647
- c_eof => do_eof;
1648
- *|;
1649
-
1650
- # Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
1651
- #
1652
- expr_value := |*
1653
- # a:b: a(:b), a::B, A::B
1654
- label (any - ':')
1655
- => { p = @ts - 1
1656
- fgoto expr_end; };
1657
-
1658
- # "bar", 'baz'
1659
- ['"] # '
1660
- => {
1661
- @strings.push_literal(tok, tok, @ts)
1662
- fgoto inside_string;
1663
- };
1664
-
1665
- w_space_comment;
1666
-
1667
- w_newline
1668
- => { fgoto line_begin; };
1669
-
1670
- c_any
1671
- => { fhold; fgoto expr_beg; };
1672
-
1673
- c_eof => do_eof;
1674
- *|;
1675
-
1676
- expr_end := |*
1677
- #
1678
- # STABBY LAMBDA
1679
- #
1680
-
1681
- '->'
1682
- => {
1683
- emit(:tLAMBDA, '->'.freeze, @ts, @ts + 2)
1684
-
1685
- @lambda_stack.push @paren_nest
1686
- fnext expr_endfn; fbreak;
1687
- };
1688
-
1689
- e_lbrace | 'do'
1690
- => {
1691
- if @lambda_stack.last == @paren_nest
1692
- @lambda_stack.pop
1693
-
1694
- if tok == '{'.freeze
1695
- emit(:tLAMBEG, '{'.freeze)
1696
- else # 'do'
1697
- emit(:kDO_LAMBDA, 'do'.freeze)
1698
- end
1699
- else
1700
- if tok == '{'.freeze
1701
- emit(:tLCURLY, '{'.freeze)
1702
- else # 'do'
1703
- emit_do
1704
- end
1705
- end
1706
- if tok == '{'.freeze
1707
- @paren_nest += 1
1708
- end
1709
- @command_start = true
1710
-
1711
- fnext expr_value; fbreak;
1712
- };
1713
-
1714
- #
1715
- # KEYWORDS
1716
- #
1717
-
1718
- keyword_with_fname
1719
- => { emit_table(KEYWORDS)
1720
- fnext expr_fname; fbreak; };
1721
-
1722
- 'class' w_any* '<<'
1723
- => { emit_singleton_class
1724
- fnext expr_value; fbreak; };
1725
-
1726
- # a if b:c: Syntax error.
1727
- keyword_modifier
1728
- => { emit_table(KEYWORDS)
1729
- fnext expr_beg; fbreak; };
1730
-
1731
- # elsif b:c: elsif b(:c)
1732
- keyword_with_value
1733
- => { emit_table(KEYWORDS)
1734
- @command_start = true
1735
- fnext expr_value; fbreak; };
1736
-
1737
- keyword_with_mid
1738
- => { emit_table(KEYWORDS)
1739
- fnext expr_mid; fbreak; };
1740
-
1741
- keyword_with_arg
1742
- => {
1743
- emit_table(KEYWORDS)
1744
-
1745
- if version?(18) && tok == 'not'.freeze
1746
- fnext expr_beg; fbreak;
1747
- else
1748
- fnext expr_arg; fbreak;
1749
- end
1750
- };
1751
-
1752
- '__ENCODING__'
1753
- => {
1754
- if version?(18)
1755
- emit(:tIDENTIFIER)
1756
-
1757
- unless !@static_env.nil? && @static_env.declared?(tok)
1758
- fnext *arg_or_cmdarg(cmd_state);
1759
- end
1760
- else
1761
- emit(:k__ENCODING__, '__ENCODING__'.freeze)
1762
- end
1763
- fbreak;
1764
- };
1765
-
1766
- keyword_with_end
1767
- => { emit_table(KEYWORDS)
1768
- fbreak; };
1769
-
1770
- #
1771
- # NUMERIC LITERALS
1772
- #
1773
-
1774
- ( '0' [Xx] %{ @num_base = 16; @num_digits_s = p } int_hex
1775
- | '0' [Dd] %{ @num_base = 10; @num_digits_s = p } int_dec
1776
- | '0' [Oo] %{ @num_base = 8; @num_digits_s = p } int_dec
1777
- | '0' [Bb] %{ @num_base = 2; @num_digits_s = p } int_bin
1778
- | [1-9] digit* '_'? %{ @num_base = 10; @num_digits_s = @ts } int_dec
1779
- | '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
1780
- ) %{ @num_suffix_s = p } int_suffix
1781
- => {
1782
- digits = numeric_literal_int
1783
-
1784
- if version?(18, 19, 20)
1785
- emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
1786
- p = @num_suffix_s - 1
1787
- else
1788
- p = @num_xfrm.call(digits.to_i(@num_base), p)
1789
- end
1790
- fbreak;
1791
- };
1792
-
1793
- flo_frac flo_pow?
1794
- => {
1795
- diagnostic :error, :no_dot_digit_literal
1796
- };
1797
-
1798
- flo_int [eE]
1799
- => {
1800
- if version?(18, 19, 20)
1801
- diagnostic :error,
1802
- :trailing_in_number, { :character => tok(@te - 1, @te) },
1803
- range(@te - 1, @te)
1804
- else
1805
- emit(:tINTEGER, tok(@ts, @te - 1).to_i, @ts, @te - 1)
1806
- fhold; fbreak;
1807
- end
1808
- };
1809
-
1810
- flo_int flo_frac [eE]
1811
- => {
1812
- if version?(18, 19, 20)
1813
- diagnostic :error,
1814
- :trailing_in_number, { :character => tok(@te - 1, @te) },
1815
- range(@te - 1, @te)
1816
- else
1817
- emit(:tFLOAT, tok(@ts, @te - 1).to_f, @ts, @te - 1)
1818
- fhold; fbreak;
1819
- end
1820
- };
1821
-
1822
- flo_int
1823
- ( flo_frac? flo_pow %{ @num_suffix_s = p } flo_pow_suffix
1824
- | flo_frac %{ @num_suffix_s = p } flo_suffix
1825
- )
1826
- => {
1827
- digits = tok(@ts, @num_suffix_s)
1828
-
1829
- if version?(18, 19, 20)
1830
- emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
1831
- p = @num_suffix_s - 1
1832
- else
1833
- p = @num_xfrm.call(digits, p)
1834
- end
1835
- fbreak;
1836
- };
1837
-
1838
- #
1839
- # STRING AND XSTRING LITERALS
1840
- #
1841
-
1842
- # `echo foo`, "bar", 'baz'
1843
- '`' | ['"] # '
1844
- => {
1845
- type, delimiter = tok, tok[-1].chr
1846
- @strings.push_literal(type, delimiter, @ts, nil, false, false, true);
1847
- fgoto inside_string;
1848
- };
1849
-
1850
- #
1851
- # CONSTANTS AND VARIABLES
1852
- #
1853
-
1854
- constant
1855
- => { emit(:tCONSTANT)
1856
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1857
-
1858
- constant ambiguous_const_suffix
1859
- => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
1860
- p = tm - 1; fbreak; };
1861
-
1862
- global_var | class_var_v | instance_var_v
1863
- => { p = @ts - 1; fcall expr_variable; };
1864
-
1865
- #
1866
- # METHOD CALLS
1867
- #
1868
-
1869
- '.:' w_space+
1870
- => { emit(:tDOT, '.', @ts, @ts + 1)
1871
- emit(:tCOLON, ':', @ts + 1, @ts + 2)
1872
- p = p - tok.length + 2
1873
- fnext expr_dot; fbreak; };
1874
-
1875
- '.:'
1876
- => {
1877
- if @version >= 27
1878
- emit_table(PUNCTUATION)
1879
- else
1880
- emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
1881
- fhold;
1882
- end
1883
-
1884
- fnext expr_dot; fbreak;
1885
- };
1886
-
1887
- '.' | '&.' | '::'
1888
- => { emit_table(PUNCTUATION)
1889
- fnext expr_dot; fbreak; };
1890
-
1891
- call_or_var
1892
- => local_ident;
1893
-
1894
- bareword ambiguous_fid_suffix
1895
- => {
1896
- if tm == @te
1897
- # Suffix was consumed, e.g. foo!
1898
- emit(:tFID)
1899
- else
1900
- # Suffix was not consumed, e.g. foo!=
1901
- emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
1902
- p = tm - 1
1903
- end
1904
- fnext expr_arg; fbreak;
1905
- };
1906
-
1907
- #
1908
- # OPERATORS
1909
- #
1910
-
1911
- '*' | '=>'
1912
- => {
1913
- emit_table(PUNCTUATION)
1914
- fnext expr_value; fbreak;
1915
- };
1916
-
1917
- # When '|', '~', '!', '=>' are used as operators
1918
- # they do not accept any symbols (or quoted labels) after.
1919
- # Other binary operators accept it.
1920
- ( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
1921
- => {
1922
- emit_table(PUNCTUATION);
1923
- fnext expr_value; fbreak;
1924
- };
1925
-
1926
- ( e_lparen | '|' | '~' | '!' )
1927
- => { emit_table(PUNCTUATION)
1928
- fnext expr_beg; fbreak; };
1929
-
1930
- e_rbrace | e_rparen | e_rbrack
1931
- => {
1932
- emit_rbrace_rparen_rbrack
1933
-
1934
- if tok == '}'.freeze || tok == ']'.freeze
1935
- if @version >= 25
1936
- fnext expr_end;
1937
- else
1938
- fnext expr_endarg;
1939
- end
1940
- else # )
1941
- # fnext expr_endfn; ?
1942
- end
1943
-
1944
- fbreak;
1945
- };
1946
-
1947
- operator_arithmetic '='
1948
- => { emit(:tOP_ASGN, tok(@ts, @te - 1))
1949
- fnext expr_beg; fbreak; };
1950
-
1951
- '?'
1952
- => { emit(:tEH, '?'.freeze)
1953
- fnext expr_value; fbreak; };
1954
-
1955
- e_lbrack
1956
- => { emit(:tLBRACK2, '['.freeze)
1957
- fnext expr_beg; fbreak; };
1958
-
1959
- '...' c_nl
1960
- => {
1961
- if @paren_nest == 0
1962
- diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
1963
- end
1964
-
1965
- emit(:tDOT3, '...'.freeze, @ts, @te - 1)
1966
- fhold;
1967
- fnext expr_beg; fbreak;
1968
- };
1969
-
1970
- punctuation_end
1971
- => { emit_table(PUNCTUATION)
1972
- fnext expr_beg; fbreak; };
1973
-
1974
- #
1975
- # WHITESPACE
1976
- #
1977
-
1978
- w_space_comment;
1979
-
1980
- w_newline
1981
- => { fgoto leading_dot; };
1982
-
1983
- ';'
1984
- => { emit(:tSEMI, ';'.freeze)
1985
- @command_start = true
1986
- fnext expr_value; fbreak; };
1987
-
1988
- '\\' c_line {
1989
- diagnostic :error, :bare_backslash, nil, range(@ts, @ts + 1)
1990
- fhold;
1991
- };
1992
-
1993
- c_any
1994
- => {
1995
- diagnostic :fatal, :unexpected, { :character => tok.inspect[1..-2] }
1996
- };
1997
-
1998
- c_eof => do_eof;
1999
- *|;
2000
-
2001
- leading_dot := |*
2002
- # Insane leading dots:
2003
- # a #comment
2004
- # # post-2.7 comment
2005
- # .b: a.b
2006
-
2007
- # Here we use '\n' instead of w_newline to not modify @newline_s
2008
- # and eventually properly emit tNL
2009
- (c_space* w_space_comment '\n')+
2010
- => {
2011
- if @version < 27
2012
- # Ruby before 2.7 doesn't support comments before leading dot.
2013
- # If a line after "a" starts with a comment then "a" is a self-contained statement.
2014
- # So in that case we emit a special tNL token and start reading the
2015
- # next line as a separate statement.
2016
- #
2017
- # Note: block comments before leading dot are not supported on any version of Ruby.
2018
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2019
- fhold; fnext line_begin; fbreak;
2020
- end
2021
- };
2022
-
2023
- c_space* '..'
2024
- => {
2025
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2026
- if @version < 27
2027
- fhold; fnext line_begin; fbreak;
2028
- else
2029
- emit(:tBDOT2)
2030
- fnext expr_beg; fbreak;
2031
- end
2032
- };
2033
-
2034
- c_space* '...'
2035
- => {
2036
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2037
- if @version < 27
2038
- fhold; fnext line_begin; fbreak;
2039
- else
2040
- emit(:tBDOT3)
2041
- fnext expr_beg; fbreak;
2042
- end
2043
- };
2044
-
2045
- c_space* %{ tm = p } ('.' | '&.')
2046
- => { p = tm - 1; fgoto expr_end; };
2047
-
2048
- any
2049
- => { emit(:tNL, nil, @newline_s, @newline_s + 1)
2050
- fhold; fnext line_begin; fbreak; };
2051
- *|;
2052
-
2053
- #
2054
- # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
2055
- #
2056
-
2057
- line_comment := |*
2058
- '=end' c_line* c_nl_zlen
2059
- => {
2060
- emit_comment(@eq_begin_s, @te)
2061
- fgoto *@cs_before_block_comment;
2062
- };
2063
-
2064
- c_line* c_nl;
2065
-
2066
- c_line* zlen
2067
- => {
2068
- diagnostic :fatal, :embedded_document, nil,
2069
- range(@eq_begin_s, @eq_begin_s + '=begin'.length)
2070
- };
2071
- *|;
2072
-
2073
- line_begin := |*
2074
- w_any;
2075
-
2076
- '=begin' ( c_space | c_nl_zlen )
2077
- => { @eq_begin_s = @ts
2078
- fgoto line_comment; };
2079
-
2080
- '__END__' ( c_eol - zlen )
2081
- => { p = pe - 3 };
2082
-
2083
- c_any
2084
- => { cmd_state = true; fhold; fgoto expr_value; };
2085
-
2086
- c_eof => do_eof;
2087
- *|;
2088
-
2089
- inside_string := |*
2090
- any
2091
- => {
2092
- p, next_state = @strings.advance(p)
2093
-
2094
- fhold; # Ragel will do `p += 1` to consume input, prevent it
2095
- fnext *next_state;
2096
- fbreak;
2097
- };
2098
- *|;
2099
-
2100
- }%%
2101
- # %
2102
- end
2103
- end