parser 2.6.0.0 → 3.1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parser/all.rb +3 -0
  3. data/lib/parser/ast/processor.rb +48 -1
  4. data/lib/parser/base.rb +30 -6
  5. data/lib/parser/builders/default.rb +670 -38
  6. data/lib/parser/context.rb +24 -26
  7. data/lib/parser/current.rb +36 -9
  8. data/lib/parser/current_arg_stack.rb +46 -0
  9. data/lib/parser/diagnostic/engine.rb +1 -2
  10. data/lib/parser/diagnostic.rb +1 -1
  11. data/lib/parser/lexer/dedenter.rb +58 -49
  12. data/lib/parser/lexer/explanation.rb +1 -1
  13. data/lib/parser/lexer.rb +13837 -11893
  14. data/lib/parser/macruby.rb +2544 -2489
  15. data/lib/parser/max_numparam_stack.rb +56 -0
  16. data/lib/parser/messages.rb +78 -44
  17. data/lib/parser/meta.rb +13 -3
  18. data/lib/parser/ruby18.rb +2313 -2259
  19. data/lib/parser/ruby19.rb +2537 -2488
  20. data/lib/parser/ruby20.rb +2724 -2673
  21. data/lib/parser/ruby21.rb +2766 -2727
  22. data/lib/parser/ruby22.rb +2683 -2628
  23. data/lib/parser/ruby23.rb +2796 -2755
  24. data/lib/parser/ruby24.rb +2812 -2771
  25. data/lib/parser/ruby25.rb +2703 -2670
  26. data/lib/parser/ruby26.rb +2794 -2747
  27. data/lib/parser/ruby27.rb +7914 -0
  28. data/lib/parser/ruby28.rb +8047 -0
  29. data/lib/parser/ruby30.rb +8096 -0
  30. data/lib/parser/ruby31.rb +8354 -0
  31. data/lib/parser/rubymotion.rb +2527 -2485
  32. data/lib/parser/runner/ruby_parse.rb +2 -2
  33. data/lib/parser/runner/ruby_rewrite.rb +2 -2
  34. data/lib/parser/runner.rb +36 -2
  35. data/lib/parser/source/buffer.rb +53 -28
  36. data/lib/parser/source/comment/associator.rb +31 -8
  37. data/lib/parser/source/comment.rb +14 -1
  38. data/lib/parser/source/map/method_definition.rb +25 -0
  39. data/lib/parser/source/range.rb +19 -3
  40. data/lib/parser/source/tree_rewriter/action.rb +137 -28
  41. data/lib/parser/source/tree_rewriter.rb +144 -14
  42. data/lib/parser/static_environment.rb +23 -0
  43. data/lib/parser/tree_rewriter.rb +3 -3
  44. data/lib/parser/variables_stack.rb +36 -0
  45. data/lib/parser/version.rb +1 -1
  46. data/lib/parser.rb +4 -0
  47. data/parser.gemspec +12 -19
  48. metadata +34 -99
  49. data/.gitignore +0 -32
  50. data/.travis.yml +0 -45
  51. data/.yardopts +0 -21
  52. data/CHANGELOG.md +0 -943
  53. data/CONTRIBUTING.md +0 -17
  54. data/Gemfile +0 -10
  55. data/README.md +0 -301
  56. data/Rakefile +0 -165
  57. data/ci/run_rubocop_specs +0 -14
  58. data/doc/AST_FORMAT.md +0 -1735
  59. data/doc/CUSTOMIZATION.md +0 -37
  60. data/doc/INTERNALS.md +0 -21
  61. data/doc/css/.gitkeep +0 -0
  62. data/doc/css/common.css +0 -68
  63. data/lib/parser/lexer.rl +0 -2383
  64. data/lib/parser/macruby.y +0 -2198
  65. data/lib/parser/ruby18.y +0 -1934
  66. data/lib/parser/ruby19.y +0 -2175
  67. data/lib/parser/ruby20.y +0 -2353
  68. data/lib/parser/ruby21.y +0 -2357
  69. data/lib/parser/ruby22.y +0 -2364
  70. data/lib/parser/ruby23.y +0 -2370
  71. data/lib/parser/ruby24.y +0 -2408
  72. data/lib/parser/ruby25.y +0 -2405
  73. data/lib/parser/ruby26.y +0 -2413
  74. data/lib/parser/rubymotion.y +0 -2182
  75. data/test/bug_163/fixtures/input.rb +0 -5
  76. data/test/bug_163/fixtures/output.rb +0 -5
  77. data/test/bug_163/rewriter.rb +0 -20
  78. data/test/helper.rb +0 -52
  79. data/test/parse_helper.rb +0 -315
  80. data/test/racc_coverage_helper.rb +0 -133
  81. data/test/test_base.rb +0 -31
  82. data/test/test_current.rb +0 -27
  83. data/test/test_diagnostic.rb +0 -96
  84. data/test/test_diagnostic_engine.rb +0 -62
  85. data/test/test_encoding.rb +0 -99
  86. data/test/test_lexer.rb +0 -3543
  87. data/test/test_lexer_stack_state.rb +0 -78
  88. data/test/test_parse_helper.rb +0 -80
  89. data/test/test_parser.rb +0 -7087
  90. data/test/test_runner_rewrite.rb +0 -47
  91. data/test/test_source_buffer.rb +0 -162
  92. data/test/test_source_comment.rb +0 -36
  93. data/test/test_source_comment_associator.rb +0 -367
  94. data/test/test_source_map.rb +0 -15
  95. data/test/test_source_range.rb +0 -172
  96. data/test/test_source_rewriter.rb +0 -541
  97. data/test/test_source_rewriter_action.rb +0 -46
  98. data/test/test_source_tree_rewriter.rb +0 -173
  99. data/test/test_static_environment.rb +0 -45
  100. data/test/using_tree_rewriter/fixtures/input.rb +0 -3
  101. data/test/using_tree_rewriter/fixtures/output.rb +0 -3
  102. data/test/using_tree_rewriter/using_tree_rewriter.rb +0 -9
data/lib/parser/lexer.rl DELETED
@@ -1,2383 +0,0 @@
1
- %%machine lex; # % fix highlighting
2
-
3
- #
4
- # === BEFORE YOU START ===
5
- #
6
- # Read the Ruby Hacking Guide chapter 11, available in English at
7
- # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
- #
9
- # Remember two things about Ragel scanners:
10
- #
11
- # 1) Longest match wins.
12
- #
13
- # 2) If two matches have the same length, the first
14
- # in source code wins.
15
- #
16
- # General rules of making Ragel and Bison happy:
17
- #
18
- # * `p` (position) and `@te` contain the index of the character
19
- # they're pointing to ("current"), plus one. `@ts` contains the index
20
- # of the corresponding character. The code for extracting matched token is:
21
- #
22
- # @source_buffer.slice(@ts...@te)
23
- #
24
- # * If your input is `foooooooobar` and the rule is:
25
- #
26
- # 'f' 'o'+
27
- #
28
- # the result will be:
29
- #
30
- # foooooooobar
31
- # ^ ts=0 ^ p=te=9
32
- #
33
- # * A Ragel lexer action should not emit more than one token, unless
34
- # you know what you are doing.
35
- #
36
- # * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
37
- #
38
- # * If an action emits the token and transitions to another state, use
39
- # these Ragel commands:
40
- #
41
- # emit($whatever)
42
- # fnext $next_state; fbreak;
43
- #
44
- # If you perform `fgoto` in an action which does not emit a token nor
45
- # rewinds the stream pointer, the parser's side-effectful,
46
- # context-sensitive lookahead actions will break in a hard to detect
47
- # and debug way.
48
- #
49
- # * If an action does not emit a token:
50
- #
51
- # fgoto $next_state;
52
- #
53
- # * If an action features lookbehind, i.e. matches characters with the
54
- # intent of passing them to another action:
55
- #
56
- # p = @ts - 1
57
- # fgoto $next_state;
58
- #
59
- # or, if the lookbehind consists of a single character:
60
- #
61
- # fhold; fgoto $next_state;
62
- #
63
- # * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
64
- # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
65
- # _will_ invoke the action `act`.
66
- #
67
- # e_something stands for "something with **e**mbedded action".
68
- #
69
- # * EOF is explicit and is matched by `c_eof`. If you want to introspect
70
- # the state of the lexer, add this rule to the state:
71
- #
72
- # c_eof => do_eof;
73
- #
74
- # * If you proceed past EOF, the lexer will complain:
75
- #
76
- # NoMethodError: undefined method `ord' for nil:NilClass
77
- #
78
-
79
- class Parser::Lexer
80
-
81
- %% write data nofinal;
82
- # %
83
-
84
- ESCAPES = {
85
- ?a.ord => "\a", ?b.ord => "\b", ?e.ord => "\e", ?f.ord => "\f",
86
- ?n.ord => "\n", ?r.ord => "\r", ?s.ord => "\s", ?t.ord => "\t",
87
- ?v.ord => "\v", ?\\.ord => "\\"
88
- }.freeze
89
-
90
- REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
-
92
- attr_reader :source_buffer
93
-
94
- attr_accessor :diagnostics
95
- attr_accessor :static_env
96
- attr_accessor :force_utf32
97
-
98
- attr_accessor :cond, :cmdarg, :in_kwarg
99
-
100
- attr_accessor :tokens, :comments
101
-
102
- def initialize(version)
103
- @version = version
104
- @static_env = nil
105
-
106
- @tokens = nil
107
- @comments = nil
108
-
109
- reset
110
- end
111
-
112
- def reset(reset_state=true)
113
- # Ragel state:
114
- if reset_state
115
- # Unit tests set state prior to resetting lexer.
116
- @cs = self.class.lex_en_line_begin
117
-
118
- @cond = StackState.new('cond')
119
- @cmdarg = StackState.new('cmdarg')
120
- @cond_stack = []
121
- @cmdarg_stack = []
122
- end
123
-
124
- @force_utf32 = false # Set to true by some tests
125
-
126
- @source_pts = nil # @source as a codepoint array
127
-
128
- @p = 0 # stream position (saved manually in #advance)
129
- @ts = nil # token start
130
- @te = nil # token end
131
- @act = 0 # next action
132
-
133
- @stack = [] # state stack
134
- @top = 0 # state stack top pointer
135
-
136
- # Lexer state:
137
- @token_queue = []
138
- @literal_stack = []
139
-
140
- @eq_begin_s = nil # location of last encountered =begin
141
- @sharp_s = nil # location of last encountered #
142
-
143
- @newline_s = nil # location of last encountered newline
144
-
145
- @num_base = nil # last numeric base
146
- @num_digits_s = nil # starting position of numeric digits
147
- @num_suffix_s = nil # starting position of numeric suffix
148
- @num_xfrm = nil # numeric suffix-induced transformation
149
-
150
- @escape_s = nil # starting position of current sequence
151
- @escape = nil # last escaped sequence, as string
152
-
153
- @herebody_s = nil # starting position of current heredoc line
154
-
155
- # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
156
- # encountered after a matching closing parenthesis.
157
- @paren_nest = 0
158
- @lambda_stack = []
159
-
160
- # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
161
- # we store the indentation level and give it out to the parser
162
- # on request. It is not possible to infer indentation level just
163
- # from the AST because escape sequences such as `\ ` or `\t` are
164
- # expanded inside the lexer, but count as non-whitespace for
165
- # indentation purposes.
166
- @dedent_level = nil
167
-
168
- # If the lexer is in `command state' (aka expr_value)
169
- # at the entry to #advance, it will transition to expr_cmdarg
170
- # instead of expr_arg at certain points.
171
- @command_state = false
172
-
173
- # True at the end of "def foo a:"
174
- @in_kwarg = false
175
-
176
- # State before =begin / =end block comment
177
- @cs_before_block_comment = self.class.lex_en_line_begin
178
- end
179
-
180
- def source_buffer=(source_buffer)
181
- @source_buffer = source_buffer
182
-
183
- if @source_buffer
184
- source = @source_buffer.source
185
-
186
- if source.encoding == Encoding::UTF_8
187
- @source_pts = source.unpack('U*')
188
- else
189
- @source_pts = source.unpack('C*')
190
- end
191
-
192
- if @source_pts[0] == 0xfeff
193
- # Skip byte order mark.
194
- @p = 1
195
- end
196
- else
197
- @source_pts = nil
198
- end
199
- end
200
-
201
- def encoding
202
- @source_buffer.source.encoding
203
- end
204
-
205
- LEX_STATES = {
206
- :line_begin => lex_en_line_begin,
207
- :expr_dot => lex_en_expr_dot,
208
- :expr_fname => lex_en_expr_fname,
209
- :expr_value => lex_en_expr_value,
210
- :expr_beg => lex_en_expr_beg,
211
- :expr_mid => lex_en_expr_mid,
212
- :expr_arg => lex_en_expr_arg,
213
- :expr_cmdarg => lex_en_expr_cmdarg,
214
- :expr_end => lex_en_expr_end,
215
- :expr_endarg => lex_en_expr_endarg,
216
- :expr_endfn => lex_en_expr_endfn,
217
- :expr_labelarg => lex_en_expr_labelarg,
218
-
219
- :interp_string => lex_en_interp_string,
220
- :interp_words => lex_en_interp_words,
221
- :plain_string => lex_en_plain_string,
222
- :plain_words => lex_en_plain_string,
223
- }
224
-
225
- def state
226
- LEX_STATES.invert.fetch(@cs, @cs)
227
- end
228
-
229
- def state=(state)
230
- @cs = LEX_STATES.fetch(state)
231
- end
232
-
233
- def push_cmdarg
234
- @cmdarg_stack.push(@cmdarg)
235
- @cmdarg = StackState.new("cmdarg.#{@cmdarg_stack.count}")
236
- end
237
-
238
- def pop_cmdarg
239
- @cmdarg = @cmdarg_stack.pop
240
- end
241
-
242
- def push_cond
243
- @cond_stack.push(@cond)
244
- @cond = StackState.new("cond.#{@cond_stack.count}")
245
- end
246
-
247
- def pop_cond
248
- @cond = @cond_stack.pop
249
- end
250
-
251
- def dedent_level
252
- # We erase @dedent_level as a precaution to avoid accidentally
253
- # using a stale value.
254
- dedent_level, @dedent_level = @dedent_level, nil
255
- dedent_level
256
- end
257
-
258
- # Return next token: [type, value].
259
- def advance
260
- if @token_queue.any?
261
- return @token_queue.shift
262
- end
263
-
264
- # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
265
- klass = self.class
266
- _lex_trans_keys = klass.send :_lex_trans_keys
267
- _lex_key_spans = klass.send :_lex_key_spans
268
- _lex_index_offsets = klass.send :_lex_index_offsets
269
- _lex_indicies = klass.send :_lex_indicies
270
- _lex_trans_targs = klass.send :_lex_trans_targs
271
- _lex_trans_actions = klass.send :_lex_trans_actions
272
- _lex_to_state_actions = klass.send :_lex_to_state_actions
273
- _lex_from_state_actions = klass.send :_lex_from_state_actions
274
- _lex_eof_trans = klass.send :_lex_eof_trans
275
-
276
- pe = @source_pts.size + 2
277
- p, eof = @p, pe
278
-
279
- @command_state = (@cs == klass.lex_en_expr_value ||
280
- @cs == klass.lex_en_line_begin)
281
-
282
- %% write exec;
283
- # %
284
-
285
- @p = p
286
-
287
- if @token_queue.any?
288
- @token_queue.shift
289
- elsif @cs == klass.lex_error
290
- [ false, [ '$error'.freeze, range(p - 1, p) ] ]
291
- else
292
- eof = @source_pts.size
293
- [ false, [ '$eof'.freeze, range(eof, eof) ] ]
294
- end
295
- end
296
-
297
- protected
298
-
299
- def eof_codepoint?(point)
300
- [0x04, 0x1a, 0x00].include? point
301
- end
302
-
303
- def version?(*versions)
304
- versions.include?(@version)
305
- end
306
-
307
- def stack_pop
308
- @top -= 1
309
- @stack[@top]
310
- end
311
-
312
- def encode_escape(ord)
313
- ord.chr.force_encoding(@source_buffer.source.encoding)
314
- end
315
-
316
- def tok(s = @ts, e = @te)
317
- @source_buffer.slice(s...e)
318
- end
319
-
320
- def range(s = @ts, e = @te)
321
- Parser::Source::Range.new(@source_buffer, s, e)
322
- end
323
-
324
- def emit(type, value = tok, s = @ts, e = @te)
325
- token = [ type, [ value, range(s, e) ] ]
326
-
327
- @token_queue.push(token)
328
-
329
- @tokens.push(token) if @tokens
330
-
331
- token
332
- end
333
-
334
- def emit_table(table, s = @ts, e = @te)
335
- value = tok(s, e)
336
-
337
- emit(table[value], value, s, e)
338
- end
339
-
340
- def emit_do(do_block=false)
341
- if @cond.active?
342
- emit(:kDO_COND, 'do'.freeze)
343
- elsif @cmdarg.active? || do_block
344
- emit(:kDO_BLOCK, 'do'.freeze)
345
- else
346
- emit(:kDO, 'do'.freeze)
347
- end
348
- end
349
-
350
- def arg_or_cmdarg
351
- if @command_state
352
- self.class.lex_en_expr_cmdarg
353
- else
354
- self.class.lex_en_expr_arg
355
- end
356
- end
357
-
358
- def emit_comment(s = @ts, e = @te)
359
- if @comments
360
- @comments.push(Parser::Source::Comment.new(range(s, e)))
361
- end
362
-
363
- if @tokens
364
- @tokens.push([ :tCOMMENT, [ tok(s, e), range(s, e) ] ])
365
- end
366
-
367
- nil
368
- end
369
-
370
- def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
371
- @diagnostics.process(
372
- Parser::Diagnostic.new(type, reason, arguments, location, highlights))
373
- end
374
-
375
- #
376
- # === LITERAL STACK ===
377
- #
378
-
379
- def push_literal(*args)
380
- new_literal = Literal.new(self, *args)
381
- @literal_stack.push(new_literal)
382
- next_state_for_literal(new_literal)
383
- end
384
-
385
- def next_state_for_literal(literal)
386
- if literal.words? && literal.backslash_delimited?
387
- if literal.interpolate?
388
- self.class.lex_en_interp_backslash_delimited_words
389
- else
390
- self.class.lex_en_plain_backslash_delimited_words
391
- end
392
- elsif literal.words? && !literal.backslash_delimited?
393
- if literal.interpolate?
394
- self.class.lex_en_interp_words
395
- else
396
- self.class.lex_en_plain_words
397
- end
398
- elsif !literal.words? && literal.backslash_delimited?
399
- if literal.interpolate?
400
- self.class.lex_en_interp_backslash_delimited
401
- else
402
- self.class.lex_en_plain_backslash_delimited
403
- end
404
- else
405
- if literal.interpolate?
406
- self.class.lex_en_interp_string
407
- else
408
- self.class.lex_en_plain_string
409
- end
410
- end
411
- end
412
-
413
- def literal
414
- @literal_stack.last
415
- end
416
-
417
- def pop_literal
418
- old_literal = @literal_stack.pop
419
-
420
- @dedent_level = old_literal.dedent_level
421
-
422
- if old_literal.type == :tREGEXP_BEG
423
- # Fetch modifiers.
424
- self.class.lex_en_regexp_modifiers
425
- else
426
- self.class.lex_en_expr_end
427
- end
428
- end
429
-
430
- # Mapping of strings to parser tokens.
431
-
432
- PUNCTUATION = {
433
- '=' => :tEQL, '&' => :tAMPER2, '|' => :tPIPE,
434
- '!' => :tBANG, '^' => :tCARET, '+' => :tPLUS,
435
- '-' => :tMINUS, '*' => :tSTAR2, '/' => :tDIVIDE,
436
- '%' => :tPERCENT, '~' => :tTILDE, ',' => :tCOMMA,
437
- ';' => :tSEMI, '.' => :tDOT, '..' => :tDOT2,
438
- '...' => :tDOT3, '[' => :tLBRACK2, ']' => :tRBRACK,
439
- '(' => :tLPAREN2, ')' => :tRPAREN, '?' => :tEH,
440
- ':' => :tCOLON, '&&' => :tANDOP, '||' => :tOROP,
441
- '-@' => :tUMINUS, '+@' => :tUPLUS, '~@' => :tTILDE,
442
- '**' => :tPOW, '->' => :tLAMBDA, '=~' => :tMATCH,
443
- '!~' => :tNMATCH, '==' => :tEQ, '!=' => :tNEQ,
444
- '>' => :tGT, '>>' => :tRSHFT, '>=' => :tGEQ,
445
- '<' => :tLT, '<<' => :tLSHFT, '<=' => :tLEQ,
446
- '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
447
- '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
448
- '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
449
- '!@' => :tBANG, '&.' => :tANDDOT,
450
- }
451
-
452
- PUNCTUATION_BEGIN = {
453
- '&' => :tAMPER, '*' => :tSTAR, '**' => :tDSTAR,
454
- '+' => :tUPLUS, '-' => :tUMINUS, '::' => :tCOLON3,
455
- '(' => :tLPAREN, '{' => :tLBRACE, '[' => :tLBRACK,
456
- }
457
-
458
- KEYWORDS = {
459
- 'if' => :kIF_MOD, 'unless' => :kUNLESS_MOD,
460
- 'while' => :kWHILE_MOD, 'until' => :kUNTIL_MOD,
461
- 'rescue' => :kRESCUE_MOD, 'defined?' => :kDEFINED,
462
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
463
- }
464
-
465
- KEYWORDS_BEGIN = {
466
- 'if' => :kIF, 'unless' => :kUNLESS,
467
- 'while' => :kWHILE, 'until' => :kUNTIL,
468
- 'rescue' => :kRESCUE, 'defined?' => :kDEFINED,
469
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
470
- }
471
-
472
- %w(class module def undef begin end then elsif else ensure case when
473
- for break next redo retry in do return yield super self nil true
474
- false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
475
- KEYWORDS_BEGIN[keyword] = KEYWORDS[keyword] = :"k#{keyword.upcase}"
476
- end
477
-
478
- %%{
479
- # %
480
-
481
- access @;
482
- getkey (@source_pts[p] || 0);
483
-
484
- # === CHARACTER CLASSES ===
485
- #
486
- # Pay close attention to the differences between c_any and any.
487
- # c_any does not include EOF and so will cause incorrect behavior
488
- # for machine subtraction (any-except rules) and default transitions
489
- # for scanners.
490
-
491
- action do_nl {
492
- # Record position of a newline for precise location reporting on tNL
493
- # tokens.
494
- #
495
- # This action is embedded directly into c_nl, as it is idempotent and
496
- # there are no cases when we need to skip it.
497
- @newline_s = p
498
- }
499
-
500
- c_nl = '\n' $ do_nl;
501
- c_space = [ \t\r\f\v];
502
- c_space_nl = c_space | c_nl;
503
-
504
- c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
505
- c_eol = c_nl | c_eof;
506
- c_any = any - c_eof;
507
-
508
- c_nl_zlen = c_nl | zlen;
509
- c_line = any - c_nl_zlen;
510
-
511
- c_unicode = c_any - 0x00..0x7f;
512
- c_upper = [A-Z];
513
- c_lower = [a-z_] | c_unicode;
514
- c_alpha = c_lower | c_upper;
515
- c_alnum = c_alpha | [0-9];
516
-
517
- action do_eof {
518
- # Sit at EOF indefinitely. #advance would return $eof each time.
519
- # This allows to feed the lexer more data if needed; this is only used
520
- # in tests.
521
- #
522
- # Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
523
- # below. This is due to the fact that scanner state at EOF is observed
524
- # by tests, and encapsulating it in a rule would break the introspection.
525
- fhold; fbreak;
526
- }
527
-
528
- #
529
- # === TOKEN DEFINITIONS ===
530
- #
531
-
532
- # All operators are punctuation. There is more to punctuation
533
- # than just operators. Operators can be overridden by user;
534
- # punctuation can not.
535
-
536
- # A list of operators which are valid in the function name context, but
537
- # have different semantics in others.
538
- operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
539
-
540
- # A list of operators which can occur within an assignment shortcut (+ → +=).
541
- operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
542
- '*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
543
-
544
- # A list of all user-definable operators not covered by groups above.
545
- operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
546
- '<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
547
-
548
- # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
549
- # as they are ambiguous with interpolation `#{}` and should be counted.
550
- # These braces are not present in punctuation lists.
551
-
552
- # A list of punctuation which has different meaning when used at the
553
- # beginning of expression.
554
- punctuation_begin = '-' | '+' | '::' | '(' | '[' |
555
- '*' | '**' | '&' ;
556
-
557
- # A list of all punctuation except punctuation_begin.
558
- punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
559
- '::' | '?' | ':' | '.' | '..' | '...' ;
560
-
561
- # A list of keywords which have different meaning at the beginning of expression.
562
- keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
563
-
564
- # A list of keywords which accept an argument-like expression, i.e. have the
565
- # same post-processing as method calls or commands. Example: `yield 1`,
566
- # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
567
- keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
568
-
569
- # A list of keywords which accept a literal function name as an argument.
570
- keyword_with_fname = 'def' | 'undef' | 'alias' ;
571
-
572
- # A list of keywords which accept an expression after them.
573
- keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
574
- 'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
575
- 'and' | 'or' ;
576
-
577
- # A list of keywords which accept a value, and treat the keywords from
578
- # `keyword_modifier` list as modifiers.
579
- keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
580
-
581
- # A list of keywords which do not accept an expression after them.
582
- keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
583
- 'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
584
- '__LINE__' | '__ENCODING__';
585
-
586
- # All keywords.
587
- keyword = keyword_with_value | keyword_with_mid |
588
- keyword_with_end | keyword_with_arg |
589
- keyword_with_fname | keyword_modifier ;
590
-
591
- constant = c_upper c_alnum*;
592
- bareword = c_alpha c_alnum*;
593
-
594
- call_or_var = c_lower c_alnum*;
595
- class_var = '@@' bareword;
596
- instance_var = '@' bareword;
597
- global_var = '$'
598
- ( bareword | digit+
599
- | [`'+~*$&?!@/\\;,.=:<>"] # `
600
- | '-' c_alnum
601
- )
602
- ;
603
-
604
- # Ruby accepts (and fails on) variables with leading digit
605
- # in literal context, but not in unquoted symbol body.
606
- class_var_v = '@@' c_alnum+;
607
- instance_var_v = '@' c_alnum+;
608
-
609
- label = bareword [?!]? ':';
610
-
611
- #
612
- # === NUMERIC PARSING ===
613
- #
614
-
615
- int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
616
- int_dec = ( digit+ '_' )* digit* '_'? ;
617
- int_bin = ( [01]+ '_' )* [01]* '_'? ;
618
-
619
- flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
620
- flo_frac = '.' ( digit+ '_' )* digit+;
621
- flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
622
-
623
- int_suffix =
624
- '' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars) } }
625
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
626
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, chars)) } }
627
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
628
- | 're' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
629
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
630
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 6); p -= 6 } };
631
-
632
- flo_pow_suffix =
633
- '' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars)) } }
634
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Float(chars))) } }
635
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 2); p -= 2 } };
636
-
637
- flo_suffix =
638
- flo_pow_suffix
639
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
640
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
641
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 6); p -= 6 } };
642
-
643
- #
644
- # === ESCAPE SEQUENCE PARSING ===
645
- #
646
-
647
- # Escape parsing code is a Ragel pattern, not a scanner, and therefore
648
- # it shouldn't directly raise errors or perform other actions with side effects.
649
- # In reality this would probably just mess up error reporting in pathological
650
- # cases, through.
651
-
652
- # The amount of code required to parse \M\C stuff correctly is ridiculous.
653
-
654
- escaped_nl = "\\" c_nl;
655
-
656
- action unicode_points {
657
- @escape = ""
658
-
659
- codepoints = tok(@escape_s + 2, p - 1)
660
- codepoint_s = @escape_s + 2
661
-
662
- if @version < 24
663
- if codepoints.start_with?(" ") || codepoints.start_with?("\t")
664
- diagnostic :fatal, :invalid_unicode_escape, nil,
665
- range(@escape_s + 2, @escape_s + 3)
666
- end
667
-
668
- if spaces_p = codepoints.index(/[ \t]{2}/)
669
- diagnostic :fatal, :invalid_unicode_escape, nil,
670
- range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
671
- end
672
-
673
- if codepoints.end_with?(" ") || codepoints.end_with?("\t")
674
- diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
675
- end
676
- end
677
-
678
- codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
679
- if spaces
680
- codepoint_s += spaces.length
681
- else
682
- codepoint = codepoint_str.to_i(16)
683
-
684
- if codepoint >= 0x110000
685
- diagnostic :error, :unicode_point_too_large, nil,
686
- range(codepoint_s, codepoint_s + codepoint_str.length)
687
- break
688
- end
689
-
690
- @escape += codepoint.chr(Encoding::UTF_8)
691
- codepoint_s += codepoint_str.length
692
- end
693
- end
694
- }
695
-
696
- action unescape_char {
697
- codepoint = @source_pts[p - 1]
698
- if (@escape = ESCAPES[codepoint]).nil?
699
- @escape = encode_escape(@source_buffer.slice(p - 1))
700
- end
701
- }
702
-
703
- action invalid_complex_escape {
704
- diagnostic :fatal, :invalid_escape
705
- }
706
-
707
- action slash_c_char {
708
- @escape = encode_escape(@escape[0].ord & 0x9f)
709
- }
710
-
711
- action slash_m_char {
712
- @escape = encode_escape(@escape[0].ord | 0x80)
713
- }
714
-
715
- maybe_escaped_char = (
716
- '\\' c_any %unescape_char
717
- | ( c_any - [\\] ) % { @escape = @source_buffer.slice(p - 1).chr }
718
- );
719
-
720
- maybe_escaped_ctrl_char = ( # why?!
721
- '\\' c_any %unescape_char %slash_c_char
722
- | '?' % { @escape = "\x7f" }
723
- | ( c_any - [\\?] ) % { @escape = @source_buffer.slice(p - 1).chr } %slash_c_char
724
- );
725
-
726
- escape = (
727
- # \377
728
- [0-7]{1,3}
729
- % { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
730
-
731
- # \xff
732
- | 'x' xdigit{1,2}
733
- % { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
734
-
735
- # %q[\x]
736
- | 'x' ( c_any - xdigit )
737
- % {
738
- diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
739
- }
740
-
741
- # \u263a
742
- | 'u' xdigit{4}
743
- % { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
744
-
745
- # \u123
746
- | 'u' xdigit{0,3}
747
- % {
748
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
749
- }
750
-
751
- # u{not hex} or u{}
752
- | 'u{' ( c_any - xdigit - [ \t}] )* '}'
753
- % {
754
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
755
- }
756
-
757
- # \u{ \t 123 \t 456 \t\t }
758
- | 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
759
- (
760
- ( xdigit{1,6} [ \t]* '}'
761
- %unicode_points
762
- )
763
- |
764
- ( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
765
- | ( c_any - [ \t}] )* c_eof
766
- | xdigit{7,}
767
- ) % {
768
- diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
769
- }
770
- )
771
-
772
- # \C-\a \cx
773
- | ( 'C-' | 'c' ) escaped_nl?
774
- maybe_escaped_ctrl_char
775
-
776
- # \M-a
777
- | 'M-' escaped_nl?
778
- maybe_escaped_char
779
- %slash_m_char
780
-
781
- # \C-\M-f \M-\cf \c\M-f
782
- | ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
783
- | 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
784
- maybe_escaped_ctrl_char
785
- %slash_m_char
786
-
787
- | 'C' c_any %invalid_complex_escape
788
- | 'M' c_any %invalid_complex_escape
789
- | ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
790
-
791
- | ( c_any - [0-7xuCMc] ) %unescape_char
792
-
793
- | c_eof % {
794
- diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
795
- }
796
- );
797
-
798
- # Use rules in form of `e_bs escape' when you need to parse a sequence.
799
- e_bs = '\\' % {
800
- @escape_s = p
801
- @escape = nil
802
- };
803
-
804
- #
805
- # === STRING AND HEREDOC PARSING ===
806
- #
807
-
808
- # Heredoc parsing is quite a complex topic. First, consider that heredocs
809
- # can be arbitrarily nested. For example:
810
- #
811
- # puts <<CODE
812
- # the result is: #{<<RESULT.inspect
813
- # i am a heredoc
814
- # RESULT
815
- # }
816
- # CODE
817
- #
818
- # which, incidentally, evaluates to:
819
- #
820
- # the result is: " i am a heredoc\n"
821
- #
822
- # To parse them, lexer refers to two kinds (remember, nested heredocs)
823
- # of positions in the input stream, namely heredoc_e
824
- # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
825
- #
826
- # heredoc_e is simply contained inside the corresponding Literal, and
827
- # when the heredoc is closed, the lexing is restarted from that position.
828
- #
829
- # @herebody_s is quite more complex. First, @herebody_s changes after each
830
- # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
831
- # contains the current line, and also when a heredoc is started, @herebody_s
832
- # contains the position from which the heredoc will be lexed.
833
- #
834
- # Second, as (insanity) there are nested heredocs, we need to maintain a
835
- # stack of these positions. Each time #push_literal is called, it saves current
836
- # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
837
- # containing another heredocs) is closed, the previous value is restored.
838
-
839
- e_heredoc_nl = c_nl % {
840
- # After every heredoc was parsed, @herebody_s contains the
841
- # position of next token after all heredocs.
842
- if @herebody_s
843
- p = @herebody_s
844
- @herebody_s = nil
845
- end
846
- };
847
-
848
- action extend_string {
849
- string = tok
850
-
851
- # tLABEL_END is only possible in non-cond context on >= 2.2
852
- if @version >= 22 && !@cond.active?
853
- lookahead = @source_buffer.slice(@te...@te+2)
854
- end
855
-
856
- current_literal = literal
857
- if !current_literal.heredoc? &&
858
- (token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
859
- if token[0] == :tLABEL_END
860
- p += 1
861
- pop_literal
862
- fnext expr_labelarg;
863
- else
864
- fnext *pop_literal;
865
- end
866
- fbreak;
867
- else
868
- current_literal.extend_string(string, @ts, @te)
869
- end
870
- }
871
-
872
- action extend_string_escaped {
873
- current_literal = literal
874
- # Get the first character after the backslash.
875
- escaped_char = @source_buffer.slice(@escape_s).chr
876
-
877
- if current_literal.munge_escape? escaped_char
878
- # If this particular literal uses this character as an opening
879
- # or closing delimiter, it is an escape sequence for that
880
- # particular character. Write it without the backslash.
881
-
882
- if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
883
- # Regular expressions should include escaped delimiters in their
884
- # escaped form, except when the escaped character is
885
- # a closing delimiter but not a regexp metacharacter.
886
- #
887
- # The backslash itself cannot be used as a closing delimiter
888
- # at the same time as an escape symbol, but it is always munged,
889
- # so this branch also executes for the non-closing-delimiter case
890
- # for the backslash.
891
- current_literal.extend_string(tok, @ts, @te)
892
- else
893
- current_literal.extend_string(escaped_char, @ts, @te)
894
- end
895
- else
896
- # It does not. So this is an actual escape sequence, yay!
897
- if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
898
- # Squiggly heredocs like
899
- # <<~-HERE
900
- # 1\
901
- # 2
902
- # HERE
903
- # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
904
- # This information is emitted as is, without escaping,
905
- # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
906
- current_literal.extend_string(tok, @ts, @te)
907
- elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
908
- # Heredocs, regexp and a few other types of literals support line
909
- # continuation via \\\n sequence. The code like
910
- # "a\
911
- # b"
912
- # must be parsed as "ab"
913
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
914
- elsif current_literal.regexp?
915
- # Regular expressions should include escape sequences in their
916
- # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
917
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
918
- else
919
- current_literal.extend_string(@escape || tok, @ts, @te)
920
- end
921
- end
922
- }
923
-
924
- # Extend a string with a newline or a EOF character.
925
- # As heredoc closing line can immediately precede EOF, this action
926
- # has to handle such case specially.
927
- action extend_string_eol {
928
- current_literal = literal
929
- if @te == pe
930
- diagnostic :fatal, :string_eof, nil,
931
- range(current_literal.str_s, current_literal.str_s + 1)
932
- end
933
-
934
- if current_literal.heredoc?
935
- line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
936
-
937
- if version?(18, 19, 20)
938
- # See ruby:c48b4209c
939
- line = line.gsub(/\r.*$/, ''.freeze)
940
- end
941
-
942
- # Try ending the heredoc with the complete most recently
943
- # scanned line. @herebody_s always refers to the start of such line.
944
- if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
945
- # Adjust @herebody_s to point to the next line.
946
- @herebody_s = @te
947
-
948
- # Continue regular lexing after the heredoc reference (<<END).
949
- p = current_literal.heredoc_e - 1
950
- fnext *pop_literal; fbreak;
951
- else
952
- # Calculate indentation level for <<~HEREDOCs.
953
- current_literal.infer_indent_level(line)
954
-
955
- # Ditto.
956
- @herebody_s = @te
957
- end
958
- else
959
- # Try ending the literal with a newline.
960
- if current_literal.nest_and_try_closing(tok, @ts, @te)
961
- fnext *pop_literal; fbreak;
962
- end
963
-
964
- if @herebody_s
965
- # This is a regular literal intertwined with a heredoc. Like:
966
- #
967
- # p <<-foo+"1
968
- # bar
969
- # foo
970
- # 2"
971
- #
972
- # which, incidentally, evaluates to "bar\n1\n2".
973
- p = @herebody_s - 1
974
- @herebody_s = nil
975
- end
976
- end
977
-
978
- if current_literal.words? && !eof_codepoint?(@source_pts[p])
979
- current_literal.extend_space @ts, @te
980
- else
981
- # A literal newline is appended if the heredoc was _not_ closed
982
- # this time (see fbreak above). See also Literal#nest_and_try_closing
983
- # for rationale of calling #flush_string here.
984
- current_literal.extend_string tok, @ts, @te
985
- current_literal.flush_string
986
- end
987
- }
988
-
989
- action extend_string_space {
990
- literal.extend_space @ts, @te
991
- }
992
-
993
- #
994
- # === INTERPOLATION PARSING ===
995
- #
996
-
997
- # Interpolations with immediate variable names simply call into
998
- # the corresponding machine.
999
-
1000
- interp_var = '#' ( global_var | class_var_v | instance_var_v );
1001
-
1002
- action extend_interp_var {
1003
- current_literal = literal
1004
- current_literal.flush_string
1005
- current_literal.extend_content
1006
-
1007
- emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
1008
-
1009
- p = @ts
1010
- fcall expr_variable;
1011
- }
1012
-
1013
- # Interpolations with code blocks must match nested curly braces, as
1014
- # interpolation ending is ambiguous with a block ending. So, every
1015
- # opening and closing brace should be matched with e_[lr]brace rules,
1016
- # which automatically perform the counting.
1017
- #
1018
- # Note that interpolations can themselves be nested, so brace balance
1019
- # is tied to the innermost literal.
1020
- #
1021
- # Also note that literals themselves should not use e_[lr]brace rules
1022
- # when matching their opening and closing delimiters, as the amount of
1023
- # braces inside the characters of a string literal is independent.
1024
-
1025
- interp_code = '#{';
1026
-
1027
- e_lbrace = '{' % {
1028
- @cond.push(false); @cmdarg.push(false)
1029
-
1030
- current_literal = literal
1031
- if current_literal
1032
- current_literal.start_interp_brace
1033
- end
1034
- };
1035
-
1036
- e_rbrace = '}' % {
1037
- current_literal = literal
1038
- if current_literal
1039
- if current_literal.end_interp_brace_and_try_closing
1040
- if version?(18, 19)
1041
- emit(:tRCURLY, '}'.freeze, p - 1, p)
1042
- if @version < 24
1043
- @cond.lexpop
1044
- @cmdarg.lexpop
1045
- else
1046
- @cond.pop
1047
- @cmdarg.pop
1048
- end
1049
- else
1050
- emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
1051
- end
1052
-
1053
- if current_literal.saved_herebody_s
1054
- @herebody_s = current_literal.saved_herebody_s
1055
- end
1056
-
1057
-
1058
- fhold;
1059
- fnext *next_state_for_literal(current_literal);
1060
- fbreak;
1061
- end
1062
- end
1063
- };
1064
-
1065
- action extend_interp_code {
1066
- current_literal = literal
1067
- current_literal.flush_string
1068
- current_literal.extend_content
1069
-
1070
- emit(:tSTRING_DBEG, '#{'.freeze)
1071
-
1072
- if current_literal.heredoc?
1073
- current_literal.saved_herebody_s = @herebody_s
1074
- @herebody_s = nil
1075
- end
1076
-
1077
- current_literal.start_interp_brace
1078
- fnext expr_value;
1079
- fbreak;
1080
- }
1081
-
1082
- # Actual string parsers are simply combined from the primitives defined
1083
- # above.
1084
-
1085
- interp_words := |*
1086
- interp_code => extend_interp_code;
1087
- interp_var => extend_interp_var;
1088
- e_bs escape => extend_string_escaped;
1089
- c_space+ => extend_string_space;
1090
- c_eol => extend_string_eol;
1091
- c_any => extend_string;
1092
- *|;
1093
-
1094
- interp_string := |*
1095
- interp_code => extend_interp_code;
1096
- interp_var => extend_interp_var;
1097
- e_bs escape => extend_string_escaped;
1098
- c_eol => extend_string_eol;
1099
- c_any => extend_string;
1100
- *|;
1101
-
1102
- plain_words := |*
1103
- e_bs c_any => extend_string_escaped;
1104
- c_space+ => extend_string_space;
1105
- c_eol => extend_string_eol;
1106
- c_any => extend_string;
1107
- *|;
1108
-
1109
- plain_string := |*
1110
- '\\' c_nl => extend_string_eol;
1111
- e_bs c_any => extend_string_escaped;
1112
- c_eol => extend_string_eol;
1113
- c_any => extend_string;
1114
- *|;
1115
-
1116
- interp_backslash_delimited := |*
1117
- interp_code => extend_interp_code;
1118
- interp_var => extend_interp_var;
1119
- c_eol => extend_string_eol;
1120
- c_any => extend_string;
1121
- *|;
1122
-
1123
- plain_backslash_delimited := |*
1124
- c_eol => extend_string_eol;
1125
- c_any => extend_string;
1126
- *|;
1127
-
1128
- interp_backslash_delimited_words := |*
1129
- interp_code => extend_interp_code;
1130
- interp_var => extend_interp_var;
1131
- c_space+ => extend_string_space;
1132
- c_eol => extend_string_eol;
1133
- c_any => extend_string;
1134
- *|;
1135
-
1136
- plain_backslash_delimited_words := |*
1137
- c_space+ => extend_string_space;
1138
- c_eol => extend_string_eol;
1139
- c_any => extend_string;
1140
- *|;
1141
-
1142
- regexp_modifiers := |*
1143
- [A-Za-z]+
1144
- => {
1145
- unknown_options = tok.scan(/[^imxouesn]/)
1146
- if unknown_options.any?
1147
- diagnostic :error, :regexp_options,
1148
- { :options => unknown_options.join }
1149
- end
1150
-
1151
- emit(:tREGEXP_OPT)
1152
- fnext expr_end;
1153
- fbreak;
1154
- };
1155
-
1156
- any
1157
- => {
1158
- emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
1159
- fhold;
1160
- fgoto expr_end;
1161
- };
1162
- *|;
1163
-
1164
- #
1165
- # === WHITESPACE HANDLING ===
1166
- #
1167
-
1168
- # Various contexts in Ruby allow various kinds of whitespace
1169
- # to be used. They are grouped to clarify the lexing machines
1170
- # and ease collection of comments.
1171
-
1172
- # A line of code with inline #comment at end is always equivalent
1173
- # to a line of code ending with just a newline, so an inline
1174
- # comment is deemed equivalent to non-newline whitespace
1175
- # (c_space character class).
1176
-
1177
- w_space =
1178
- c_space+
1179
- | '\\' e_heredoc_nl
1180
- ;
1181
-
1182
- w_comment =
1183
- '#' %{ @sharp_s = p - 1 }
1184
- # The (p == pe) condition compensates for added "\0" and
1185
- # the way Ragel handles EOF.
1186
- c_line* %{ emit_comment(@sharp_s, p == pe ? p - 2 : p) }
1187
- ;
1188
-
1189
- w_space_comment =
1190
- w_space
1191
- | w_comment
1192
- ;
1193
-
1194
- # A newline in non-literal context always interoperates with
1195
- # here document logic and can always be escaped by a backslash,
1196
- # still interoperating with here document logic in the same way,
1197
- # yet being invisible to anything else.
1198
- #
1199
- # To demonstrate:
1200
- #
1201
- # foo = <<FOO \
1202
- # bar
1203
- # FOO
1204
- # + 2
1205
- #
1206
- # is equivalent to `foo = "bar\n" + 2`.
1207
-
1208
- w_newline =
1209
- e_heredoc_nl;
1210
-
1211
- w_any =
1212
- w_space
1213
- | w_comment
1214
- | w_newline
1215
- ;
1216
-
1217
-
1218
- #
1219
- # === EXPRESSION PARSING ===
1220
- #
1221
-
1222
- # These rules implement a form of manually defined lookahead.
1223
- # The default longest-match scanning does not work here due
1224
- # to sheer ambiguity.
1225
-
1226
- ambiguous_fid_suffix = # actual parsed
1227
- [?!] %{ tm = p } | # a? a?
1228
- [?!]'=' %{ tm = p - 2 } # a!=b a != b
1229
- ;
1230
-
1231
- ambiguous_ident_suffix = # actual parsed
1232
- ambiguous_fid_suffix |
1233
- '=' %{ tm = p } | # a= a=
1234
- '==' %{ tm = p - 2 } | # a==b a == b
1235
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
1236
- '=>' %{ tm = p - 2 } | # a=>b a => b
1237
- '===' %{ tm = p - 3 } # a===b a === b
1238
- ;
1239
-
1240
- ambiguous_symbol_suffix = # actual parsed
1241
- ambiguous_ident_suffix |
1242
- '==>' %{ tm = p - 2 } # :a==>b :a= => b
1243
- ;
1244
-
1245
- # Ambiguous with 1.9 hash labels.
1246
- ambiguous_const_suffix = # actual parsed
1247
- '::' %{ tm = p - 2 } # A::B A :: B
1248
- ;
1249
-
1250
- # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
1251
- # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
1252
-
1253
- e_lbrack = '[' % {
1254
- @cond.push(false); @cmdarg.push(false)
1255
- };
1256
-
1257
- # Ruby 1.9 lambdas require parentheses counting in order to
1258
- # emit correct opening kDO/tLBRACE.
1259
-
1260
- e_lparen = '(' % {
1261
- @cond.push(false); @cmdarg.push(false)
1262
-
1263
- @paren_nest += 1
1264
- };
1265
-
1266
- e_rparen = ')' % {
1267
- @paren_nest -= 1
1268
- };
1269
-
1270
- # Ruby is context-sensitive wrt/ local identifiers.
1271
- action local_ident {
1272
- emit(:tIDENTIFIER)
1273
-
1274
- if !@static_env.nil? && @static_env.declared?(tok)
1275
- fnext expr_endfn; fbreak;
1276
- else
1277
- fnext *arg_or_cmdarg; fbreak;
1278
- end
1279
- }
1280
-
1281
- # Variable lexing code is accessed from both expressions and
1282
- # string interpolation related code.
1283
- #
1284
- expr_variable := |*
1285
- global_var
1286
- => {
1287
- if tok =~ /^\$([1-9][0-9]*)$/
1288
- emit(:tNTH_REF, tok(@ts + 1).to_i)
1289
- elsif tok =~ /^\$([&`'+])$/
1290
- emit(:tBACK_REF)
1291
- else
1292
- emit(:tGVAR)
1293
- end
1294
-
1295
- fnext *stack_pop; fbreak;
1296
- };
1297
-
1298
- class_var_v
1299
- => {
1300
- if tok =~ /^@@[0-9]/
1301
- diagnostic :error, :cvar_name, { :name => tok }
1302
- end
1303
-
1304
- emit(:tCVAR)
1305
- fnext *stack_pop; fbreak;
1306
- };
1307
-
1308
- instance_var_v
1309
- => {
1310
- if tok =~ /^@[0-9]/
1311
- diagnostic :error, :ivar_name, { :name => tok }
1312
- end
1313
-
1314
- emit(:tIVAR)
1315
- fnext *stack_pop; fbreak;
1316
- };
1317
- *|;
1318
-
1319
- # Literal function name in definition (e.g. `def class`).
1320
- # Keywords are returned as their respective tokens; this is used
1321
- # to support singleton def `def self.foo`. Global variables are
1322
- # returned as `tGVAR`; this is used in global variable alias
1323
- # statements `alias $a $b`. Symbols are returned verbatim; this
1324
- # is used in `alias :a :"b#{foo}"` and `undef :a`.
1325
- #
1326
- # Transitions to `expr_endfn` afterwards.
1327
- #
1328
- expr_fname := |*
1329
- keyword
1330
- => { emit_table(KEYWORDS_BEGIN);
1331
- fnext expr_endfn; fbreak; };
1332
-
1333
- constant
1334
- => { emit(:tCONSTANT)
1335
- fnext expr_endfn; fbreak; };
1336
-
1337
- bareword [?=!]?
1338
- => { emit(:tIDENTIFIER)
1339
- fnext expr_endfn; fbreak; };
1340
-
1341
- global_var
1342
- => { p = @ts - 1
1343
- fnext expr_end; fcall expr_variable; };
1344
-
1345
- # If the handling was to be delegated to expr_end,
1346
- # these cases would transition to something else than
1347
- # expr_endfn, which is incorrect.
1348
- operator_fname |
1349
- operator_arithmetic |
1350
- operator_rest
1351
- => { emit_table(PUNCTUATION)
1352
- fnext expr_endfn; fbreak; };
1353
-
1354
- '::'
1355
- => { fhold; fhold; fgoto expr_end; };
1356
-
1357
- ':'
1358
- => { fhold; fgoto expr_beg; };
1359
-
1360
- '%s' c_any
1361
- => {
1362
- if version?(23)
1363
- type, delimiter = tok[0..-2], tok[-1].chr
1364
- fgoto *push_literal(type, delimiter, @ts);
1365
- else
1366
- p = @ts - 1
1367
- fgoto expr_end;
1368
- end
1369
- };
1370
-
1371
- w_any;
1372
-
1373
- c_any
1374
- => { fhold; fgoto expr_end; };
1375
-
1376
- c_eof => do_eof;
1377
- *|;
1378
-
1379
- # After literal function name in definition. Behaves like `expr_end`,
1380
- # but allows a tLABEL.
1381
- #
1382
- # Transitions to `expr_end` afterwards.
1383
- #
1384
- expr_endfn := |*
1385
- label ( any - ':' )
1386
- => { emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1387
- fhold; fnext expr_labelarg; fbreak; };
1388
-
1389
- w_space_comment;
1390
-
1391
- c_any
1392
- => { fhold; fgoto expr_end; };
1393
-
1394
- c_eof => do_eof;
1395
- *|;
1396
-
1397
- # Literal function name in method call (e.g. `a.class`).
1398
- #
1399
- # Transitions to `expr_arg` afterwards.
1400
- #
1401
- expr_dot := |*
1402
- constant
1403
- => { emit(:tCONSTANT)
1404
- fnext *arg_or_cmdarg; fbreak; };
1405
-
1406
- call_or_var
1407
- => { emit(:tIDENTIFIER)
1408
- fnext *arg_or_cmdarg; fbreak; };
1409
-
1410
- bareword ambiguous_fid_suffix
1411
- => { emit(:tFID, tok(@ts, tm), @ts, tm)
1412
- fnext *arg_or_cmdarg; p = tm - 1; fbreak; };
1413
-
1414
- # See the comment in `expr_fname`.
1415
- operator_fname |
1416
- operator_arithmetic |
1417
- operator_rest
1418
- => { emit_table(PUNCTUATION)
1419
- fnext expr_arg; fbreak; };
1420
-
1421
- w_any;
1422
-
1423
- c_any
1424
- => { fhold; fgoto expr_end; };
1425
-
1426
- c_eof => do_eof;
1427
- *|;
1428
-
1429
- # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
1430
- # is consumed; the current expression is a command or method call.
1431
- #
1432
- expr_arg := |*
1433
- #
1434
- # COMMAND MODE SPECIFIC TOKENS
1435
- #
1436
-
1437
- # cmd (1 + 2)
1438
- # See below the rationale about expr_endarg.
1439
- w_space+ e_lparen
1440
- => {
1441
- if version?(18)
1442
- emit(:tLPAREN2, '('.freeze, @te - 1, @te)
1443
- fnext expr_value; fbreak;
1444
- else
1445
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1446
- fnext expr_beg; fbreak;
1447
- end
1448
- };
1449
-
1450
- # meth(1 + 2)
1451
- # Regular method call.
1452
- e_lparen
1453
- => { emit(:tLPAREN2, '('.freeze)
1454
- fnext expr_beg; fbreak; };
1455
-
1456
- # meth [...]
1457
- # Array argument. Compare with indexing `meth[...]`.
1458
- w_space+ e_lbrack
1459
- => { emit(:tLBRACK, '['.freeze, @te - 1, @te)
1460
- fnext expr_beg; fbreak; };
1461
-
1462
- # cmd {}
1463
- # Command: method call without parentheses.
1464
- w_space* e_lbrace
1465
- => {
1466
- if @lambda_stack.last == @paren_nest
1467
- @lambda_stack.pop
1468
- emit(:tLAMBEG, '{'.freeze, @te - 1, @te)
1469
- else
1470
- emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1471
- end
1472
- fnext expr_value; fbreak;
1473
- };
1474
-
1475
- #
1476
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1477
- #
1478
-
1479
- # a??
1480
- # Ternary operator
1481
- '?' c_space_nl
1482
- => {
1483
- # Unlike expr_beg as invoked in the next rule, do not warn
1484
- p = @ts - 1
1485
- fgoto expr_end;
1486
- };
1487
-
1488
- # a ?b, a? ?
1489
- # Character literal or ternary operator
1490
- w_space* '?'
1491
- => { fhold; fgoto expr_beg; };
1492
-
1493
- # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1494
- # a /foo/ (but not "a / foo" or "a /=foo")
1495
- # a <<HEREDOC
1496
- w_space+ %{ tm = p }
1497
- ( [%/] ( c_any - c_space_nl - '=' ) # /
1498
- | '<<'
1499
- )
1500
- => {
1501
- if tok(tm, tm + 1) == '/'.freeze
1502
- # Ambiguous regexp literal.
1503
- diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
1504
- end
1505
-
1506
- p = tm - 1
1507
- fgoto expr_beg;
1508
- };
1509
-
1510
- # x *1
1511
- # Ambiguous splat, kwsplat or block-pass.
1512
- w_space+ %{ tm = p } ( '+' | '-' | '*' | '&' | '**' )
1513
- => {
1514
- diagnostic :warning, :ambiguous_prefix, { :prefix => tok(tm, @te) },
1515
- range(tm, @te)
1516
-
1517
- p = tm - 1
1518
- fgoto expr_beg;
1519
- };
1520
-
1521
- # x ::Foo
1522
- # Ambiguous toplevel constant access.
1523
- w_space+ '::'
1524
- => { fhold; fhold; fgoto expr_beg; };
1525
-
1526
- # x:b
1527
- # Symbol.
1528
- w_space* ':'
1529
- => { fhold; fgoto expr_beg; };
1530
-
1531
- w_space+ label
1532
- => { p = @ts - 1; fgoto expr_beg; };
1533
-
1534
- #
1535
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1536
- #
1537
-
1538
- # a ? b
1539
- # Ternary operator.
1540
- w_space+ %{ tm = p } '?' c_space_nl
1541
- => { p = tm - 1; fgoto expr_end; };
1542
-
1543
- # x + 1: Binary operator or operator-assignment.
1544
- w_space* operator_arithmetic
1545
- ( '=' | c_space_nl )? |
1546
- # x rescue y: Modifier keyword.
1547
- w_space* keyword_modifier |
1548
- # a &. b: Safe navigation operator.
1549
- w_space* '&.' |
1550
- # Miscellanea.
1551
- w_space* punctuation_end
1552
- => {
1553
- p = @ts - 1
1554
- fgoto expr_end;
1555
- };
1556
-
1557
- w_space;
1558
-
1559
- w_comment
1560
- => { fgoto expr_end; };
1561
-
1562
- w_newline
1563
- => { fhold; fgoto expr_end; };
1564
-
1565
- c_any
1566
- => { fhold; fgoto expr_beg; };
1567
-
1568
- c_eof => do_eof;
1569
- *|;
1570
-
1571
- # The previous token was an identifier which was seen while in the
1572
- # command mode (that is, the state at the beginning of #advance was
1573
- # expr_value). This state is very similar to expr_arg, but disambiguates
1574
- # two very rare and specific condition:
1575
- # * In 1.8 mode, "foo (lambda do end)".
1576
- # * In 1.9+ mode, "f x: -> do foo do end end".
1577
- expr_cmdarg := |*
1578
- w_space+ e_lparen
1579
- => {
1580
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1581
- if version?(18)
1582
- fnext expr_value; fbreak;
1583
- else
1584
- fnext expr_beg; fbreak;
1585
- end
1586
- };
1587
-
1588
- w_space* 'do'
1589
- => {
1590
- if @cond.active?
1591
- emit(:kDO_COND, 'do'.freeze, @te - 2, @te)
1592
- else
1593
- emit(:kDO, 'do'.freeze, @te - 2, @te)
1594
- end
1595
- fnext expr_value; fbreak;
1596
- };
1597
-
1598
- c_any |
1599
- # Disambiguate with the `do' rule above.
1600
- w_space* bareword |
1601
- w_space* label
1602
- => { p = @ts - 1
1603
- fgoto expr_arg; };
1604
-
1605
- c_eof => do_eof;
1606
- *|;
1607
-
1608
- # The rationale for this state is pretty complex. Normally, if an argument
1609
- # is passed to a command and then there is a block (tLCURLY...tRCURLY),
1610
- # the block is attached to the innermost argument (`f` in `m f {}`), or it
1611
- # is a parse error (`m 1 {}`). But there is a special case for passing a single
1612
- # primary expression grouped with parentheses: if you write `m (1) {}` or
1613
- # (2.0 only) `m () {}`, then the block is attached to `m`.
1614
- #
1615
- # Thus, we recognize the opening `(` of a command (remember, a command is
1616
- # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
1617
- # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
1618
- # lexer's state to `expr_endarg`, which makes it emit the possibly following
1619
- # `{` as `tLBRACE_ARG`.
1620
- #
1621
- # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1622
- # `do` (as `kDO_BLOCK` in `expr_beg`).
1623
- expr_endarg := |*
1624
- e_lbrace
1625
- => {
1626
- if @lambda_stack.last == @paren_nest
1627
- @lambda_stack.pop
1628
- emit(:tLAMBEG, '{'.freeze)
1629
- else
1630
- emit(:tLBRACE_ARG, '{'.freeze)
1631
- end
1632
- fnext expr_value; fbreak;
1633
- };
1634
-
1635
- 'do'
1636
- => { emit_do(true)
1637
- fnext expr_value; fbreak; };
1638
-
1639
- w_space_comment;
1640
-
1641
- c_any
1642
- => { fhold; fgoto expr_end; };
1643
-
1644
- c_eof => do_eof;
1645
- *|;
1646
-
1647
- # The rationale for this state is that several keywords accept value
1648
- # (i.e. should transition to `expr_beg`), do not accept it like a command
1649
- # (i.e. not an `expr_arg`), and must behave like a statement, that is,
1650
- # accept a modifier if/while/etc.
1651
- #
1652
- expr_mid := |*
1653
- keyword_modifier
1654
- => { emit_table(KEYWORDS)
1655
- fnext expr_beg; fbreak; };
1656
-
1657
- bareword
1658
- => { p = @ts - 1; fgoto expr_beg; };
1659
-
1660
- w_space_comment;
1661
-
1662
- w_newline
1663
- => { fhold; fgoto expr_end; };
1664
-
1665
- c_any
1666
- => { fhold; fgoto expr_beg; };
1667
-
1668
- c_eof => do_eof;
1669
- *|;
1670
-
1671
- # Beginning of an expression.
1672
- #
1673
- # Don't fallthrough to this state from `c_any`; make sure to handle
1674
- # `c_space* c_nl` and let `expr_end` handle the newline.
1675
- # Otherwise code like `f\ndef x` gets glued together and the parser
1676
- # explodes.
1677
- #
1678
- expr_beg := |*
1679
- # +5, -5, - 5
1680
- [+\-] w_any* [0-9]
1681
- => {
1682
- emit(:tUNARY_NUM, tok(@ts, @ts + 1), @ts, @ts + 1)
1683
- fhold; fnext expr_end; fbreak;
1684
- };
1685
-
1686
- # splat *a
1687
- '*'
1688
- => { emit(:tSTAR, '*'.freeze)
1689
- fbreak; };
1690
-
1691
- #
1692
- # STRING AND REGEXP LITERALS
1693
- #
1694
-
1695
- # /regexp/oui
1696
- # /=/ (disambiguation with /=)
1697
- '/' c_any
1698
- => {
1699
- type = delimiter = tok[0].chr
1700
- fhold; fgoto *push_literal(type, delimiter, @ts);
1701
- };
1702
-
1703
- # %<string>
1704
- '%' ( any - [A-Za-z] )
1705
- => {
1706
- type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
1707
- fgoto *push_literal(type, delimiter, @ts);
1708
- };
1709
-
1710
- # %w(we are the people)
1711
- '%' [A-Za-z]+ c_any
1712
- => {
1713
- type, delimiter = tok[0..-2], tok[-1].chr
1714
- fgoto *push_literal(type, delimiter, @ts);
1715
- };
1716
-
1717
- '%' c_eof
1718
- => {
1719
- diagnostic :fatal, :string_eof, nil, range(@ts, @ts + 1)
1720
- };
1721
-
1722
- # Heredoc start.
1723
- # <<END | <<'END' | <<"END" | <<`END` |
1724
- # <<-END | <<-'END' | <<-"END" | <<-`END` |
1725
- # <<~END | <<~'END' | <<~"END" | <<~`END`
1726
- '<<' [~\-]?
1727
- ( '"' ( any - '"' )* '"'
1728
- | "'" ( any - "'" )* "'"
1729
- | "`" ( any - "`" )* "`"
1730
- | bareword ) % { heredoc_e = p }
1731
- c_line* c_nl % { new_herebody_s = p }
1732
- => {
1733
- tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/m
1734
-
1735
- indent = !$1.empty? || !$2.empty?
1736
- dedent_body = !$2.empty?
1737
- type = $3.empty? ? '<<"'.freeze : ('<<'.freeze + $3)
1738
- delimiter = $4
1739
-
1740
- if @version >= 24
1741
- if delimiter.count("\n") > 0
1742
- if delimiter.end_with?("\n")
1743
- diagnostic :warning, :heredoc_id_ends_with_nl, nil, range(@ts, @ts + 1)
1744
- delimiter = delimiter.rstrip
1745
- else
1746
- diagnostic :fatal, :heredoc_id_has_newline, nil, range(@ts, @ts + 1)
1747
- end
1748
- end
1749
- end
1750
-
1751
- if dedent_body && version?(18, 19, 20, 21, 22)
1752
- emit(:tLSHFT, '<<'.freeze, @ts, @ts + 2)
1753
- p = @ts + 1
1754
- fnext expr_beg; fbreak;
1755
- else
1756
- fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1757
-
1758
- @herebody_s ||= new_herebody_s
1759
- p = @herebody_s - 1
1760
- end
1761
- };
1762
-
1763
- #
1764
- # SYMBOL LITERALS
1765
- #
1766
-
1767
- # :&&, :||
1768
- ':' ('&&' | '||') => {
1769
- fhold; fhold;
1770
- emit(:tSYMBEG, tok(@ts, @ts + 1), @ts, @ts + 1)
1771
- fgoto expr_fname;
1772
- };
1773
-
1774
- # :"bar", :'baz'
1775
- ':' ['"] # '
1776
- => {
1777
- type, delimiter = tok, tok[-1].chr
1778
- fgoto *push_literal(type, delimiter, @ts);
1779
- };
1780
-
1781
- # :!@ is :!
1782
- # :~@ is :~
1783
- ':' [!~] '@'
1784
- => {
1785
- emit(:tSYMBOL, tok(@ts + 1, @ts + 2))
1786
- fnext expr_end; fbreak;
1787
- };
1788
-
1789
- ':' bareword ambiguous_symbol_suffix
1790
- => {
1791
- emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1792
- p = tm - 1
1793
- fnext expr_end; fbreak;
1794
- };
1795
-
1796
- ':' ( bareword | global_var | class_var | instance_var |
1797
- operator_fname | operator_arithmetic | operator_rest )
1798
- => {
1799
- emit(:tSYMBOL, tok(@ts + 1), @ts)
1800
- fnext expr_end; fbreak;
1801
- };
1802
-
1803
- #
1804
- # AMBIGUOUS TERNARY OPERATOR
1805
- #
1806
-
1807
- # Character constant, like ?a, ?\n, ?\u1000, and so on
1808
- # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1809
- '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
1810
- | (c_any - c_space_nl - e_bs) % { @escape = nil }
1811
- )
1812
- => {
1813
- value = @escape || tok(@ts + 1)
1814
-
1815
- if version?(18)
1816
- emit(:tINTEGER, value.getbyte(0))
1817
- else
1818
- emit(:tCHARACTER, value)
1819
- end
1820
-
1821
- fnext expr_end; fbreak;
1822
- };
1823
-
1824
- '?' c_space_nl
1825
- => {
1826
- escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1827
- "\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
1828
- diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
1829
-
1830
- p = @ts - 1
1831
- fgoto expr_end;
1832
- };
1833
-
1834
- '?' c_eof
1835
- => {
1836
- diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
1837
- };
1838
-
1839
- # f ?aa : b: Disambiguate with a character literal.
1840
- '?' [A-Za-z_] bareword
1841
- => {
1842
- p = @ts - 1
1843
- fgoto expr_end;
1844
- };
1845
-
1846
- #
1847
- # KEYWORDS AND PUNCTUATION
1848
- #
1849
-
1850
- # a({b=>c})
1851
- e_lbrace
1852
- => {
1853
- if @lambda_stack.last == @paren_nest
1854
- @lambda_stack.pop
1855
- emit(:tLAMBEG, '{'.freeze)
1856
- else
1857
- emit(:tLBRACE, '{'.freeze)
1858
- end
1859
- fbreak;
1860
- };
1861
-
1862
- # a([1, 2])
1863
- e_lbrack
1864
- => { emit(:tLBRACK, '['.freeze)
1865
- fbreak; };
1866
-
1867
- # a()
1868
- e_lparen
1869
- => { emit(:tLPAREN, '('.freeze)
1870
- fbreak; };
1871
-
1872
- # a(+b)
1873
- punctuation_begin
1874
- => { emit_table(PUNCTUATION_BEGIN)
1875
- fbreak; };
1876
-
1877
- # rescue Exception => e: Block rescue.
1878
- # Special because it should transition to expr_mid.
1879
- 'rescue' %{ tm = p } '=>'?
1880
- => { emit(:kRESCUE, 'rescue'.freeze, @ts, tm)
1881
- p = tm - 1
1882
- fnext expr_mid; fbreak; };
1883
-
1884
- # if a: Statement if.
1885
- keyword_modifier
1886
- => { emit_table(KEYWORDS_BEGIN)
1887
- fnext expr_value; fbreak; };
1888
-
1889
- #
1890
- # RUBY 1.9 HASH LABELS
1891
- #
1892
-
1893
- label ( any - ':' )
1894
- => {
1895
- fhold;
1896
-
1897
- if version?(18)
1898
- ident = tok(@ts, @te - 2)
1899
-
1900
- emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1901
- ident, @ts, @te - 2)
1902
- fhold; # continue as a symbol
1903
-
1904
- if !@static_env.nil? && @static_env.declared?(ident)
1905
- fnext expr_end;
1906
- else
1907
- fnext *arg_or_cmdarg;
1908
- end
1909
- else
1910
- emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1911
- fnext expr_labelarg;
1912
- end
1913
-
1914
- fbreak;
1915
- };
1916
-
1917
- #
1918
- # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
1919
- #
1920
-
1921
- # foo= bar: Disambiguate with bareword rule below.
1922
- bareword ambiguous_ident_suffix |
1923
- # def foo: Disambiguate with bareword rule below.
1924
- keyword
1925
- => { p = @ts - 1
1926
- fgoto expr_end; };
1927
-
1928
- # a = 42; a [42]: Indexing.
1929
- # def a; end; a [42]: Array argument.
1930
- call_or_var
1931
- => local_ident;
1932
-
1933
- (call_or_var - keyword)
1934
- % { ident_tok = tok; ident_ts = @ts; ident_te = @te; }
1935
- w_space+ '('
1936
- => {
1937
- emit(:tIDENTIFIER, ident_tok, ident_ts, ident_te)
1938
- p = ident_te - 1
1939
-
1940
- if !@static_env.nil? && @static_env.declared?(ident_tok) && @version < 25
1941
- fnext expr_endfn;
1942
- else
1943
- fnext expr_cmdarg;
1944
- end
1945
- fbreak;
1946
- };
1947
-
1948
- #
1949
- # WHITESPACE
1950
- #
1951
-
1952
- w_any;
1953
-
1954
- e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
1955
- => {
1956
- p = @ts - 1
1957
- @cs_before_block_comment = @cs
1958
- fgoto line_begin;
1959
- };
1960
-
1961
- #
1962
- # DEFAULT TRANSITION
1963
- #
1964
-
1965
- # The following rules match most binary and all unary operators.
1966
- # Rules for binary operators provide better error reporting.
1967
- operator_arithmetic '=' |
1968
- operator_rest |
1969
- punctuation_end |
1970
- c_any
1971
- => { p = @ts - 1; fgoto expr_end; };
1972
-
1973
- c_eof => do_eof;
1974
- *|;
1975
-
1976
- # Special newline handling for "def a b:"
1977
- #
1978
- expr_labelarg := |*
1979
- w_space_comment;
1980
-
1981
- w_newline
1982
- => {
1983
- if @in_kwarg
1984
- fhold; fgoto expr_end;
1985
- else
1986
- fgoto line_begin;
1987
- end
1988
- };
1989
-
1990
- c_any
1991
- => { fhold; fgoto expr_beg; };
1992
-
1993
- c_eof => do_eof;
1994
- *|;
1995
-
1996
- # Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
1997
- #
1998
- expr_value := |*
1999
- # a:b: a(:b), a::B, A::B
2000
- label (any - ':')
2001
- => { p = @ts - 1
2002
- fgoto expr_end; };
2003
-
2004
- # "bar", 'baz'
2005
- ['"] # '
2006
- => {
2007
- fgoto *push_literal(tok, tok, @ts);
2008
- };
2009
-
2010
- w_space_comment;
2011
-
2012
- w_newline
2013
- => { fgoto line_begin; };
2014
-
2015
- c_any
2016
- => { fhold; fgoto expr_beg; };
2017
-
2018
- c_eof => do_eof;
2019
- *|;
2020
-
2021
- expr_end := |*
2022
- #
2023
- # STABBY LAMBDA
2024
- #
2025
-
2026
- '->'
2027
- => {
2028
- emit(:tLAMBDA, '->'.freeze, @ts, @ts + 2)
2029
-
2030
- @lambda_stack.push @paren_nest
2031
- fnext expr_endfn; fbreak;
2032
- };
2033
-
2034
- e_lbrace | 'do'
2035
- => {
2036
- if @lambda_stack.last == @paren_nest
2037
- @lambda_stack.pop
2038
-
2039
- if tok == '{'.freeze
2040
- emit(:tLAMBEG, '{'.freeze)
2041
- else # 'do'
2042
- emit(:kDO_LAMBDA, 'do'.freeze)
2043
- end
2044
- else
2045
- if tok == '{'.freeze
2046
- emit(:tLCURLY, '{'.freeze)
2047
- else # 'do'
2048
- emit_do
2049
- end
2050
- end
2051
-
2052
- fnext expr_value; fbreak;
2053
- };
2054
-
2055
- #
2056
- # KEYWORDS
2057
- #
2058
-
2059
- keyword_with_fname
2060
- => { emit_table(KEYWORDS)
2061
- fnext expr_fname; fbreak; };
2062
-
2063
- 'class' w_any* '<<'
2064
- => { emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
2065
- emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
2066
- fnext expr_value; fbreak; };
2067
-
2068
- # a if b:c: Syntax error.
2069
- keyword_modifier
2070
- => { emit_table(KEYWORDS)
2071
- fnext expr_beg; fbreak; };
2072
-
2073
- # elsif b:c: elsif b(:c)
2074
- keyword_with_value
2075
- => { emit_table(KEYWORDS)
2076
- fnext expr_value; fbreak; };
2077
-
2078
- keyword_with_mid
2079
- => { emit_table(KEYWORDS)
2080
- fnext expr_mid; fbreak; };
2081
-
2082
- keyword_with_arg
2083
- => {
2084
- emit_table(KEYWORDS)
2085
-
2086
- if version?(18) && tok == 'not'.freeze
2087
- fnext expr_beg; fbreak;
2088
- else
2089
- fnext expr_arg; fbreak;
2090
- end
2091
- };
2092
-
2093
- '__ENCODING__'
2094
- => {
2095
- if version?(18)
2096
- emit(:tIDENTIFIER)
2097
-
2098
- unless !@static_env.nil? && @static_env.declared?(tok)
2099
- fnext *arg_or_cmdarg;
2100
- end
2101
- else
2102
- emit(:k__ENCODING__, '__ENCODING__'.freeze)
2103
- end
2104
- fbreak;
2105
- };
2106
-
2107
- keyword_with_end
2108
- => { emit_table(KEYWORDS)
2109
- fbreak; };
2110
-
2111
- #
2112
- # NUMERIC LITERALS
2113
- #
2114
-
2115
- ( '0' [Xx] %{ @num_base = 16; @num_digits_s = p } int_hex
2116
- | '0' [Dd] %{ @num_base = 10; @num_digits_s = p } int_dec
2117
- | '0' [Oo] %{ @num_base = 8; @num_digits_s = p } int_dec
2118
- | '0' [Bb] %{ @num_base = 2; @num_digits_s = p } int_bin
2119
- | [1-9] digit* '_'? %{ @num_base = 10; @num_digits_s = @ts } int_dec
2120
- | '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
2121
- ) %{ @num_suffix_s = p } int_suffix
2122
- => {
2123
- digits = tok(@num_digits_s, @num_suffix_s)
2124
-
2125
- if digits.end_with? '_'.freeze
2126
- diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
2127
- range(@te - 1, @te)
2128
- elsif digits.empty? && @num_base == 8 && version?(18)
2129
- # 1.8 did not raise an error on 0o.
2130
- digits = '0'.freeze
2131
- elsif digits.empty?
2132
- diagnostic :error, :empty_numeric
2133
- elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
2134
- invalid_s = @num_digits_s + invalid_idx
2135
- diagnostic :error, :invalid_octal, nil,
2136
- range(invalid_s, invalid_s + 1)
2137
- end
2138
-
2139
- if version?(18, 19, 20)
2140
- emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
2141
- p = @num_suffix_s - 1
2142
- else
2143
- @num_xfrm.call(digits.to_i(@num_base))
2144
- end
2145
- fbreak;
2146
- };
2147
-
2148
- flo_frac flo_pow?
2149
- => {
2150
- diagnostic :error, :no_dot_digit_literal
2151
- };
2152
-
2153
- flo_int [eE]
2154
- => {
2155
- if version?(18, 19, 20)
2156
- diagnostic :error,
2157
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2158
- range(@te - 1, @te)
2159
- else
2160
- emit(:tINTEGER, tok(@ts, @te - 1).to_i, @ts, @te - 1)
2161
- fhold; fbreak;
2162
- end
2163
- };
2164
-
2165
- flo_int flo_frac [eE]
2166
- => {
2167
- if version?(18, 19, 20)
2168
- diagnostic :error,
2169
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2170
- range(@te - 1, @te)
2171
- else
2172
- emit(:tFLOAT, tok(@ts, @te - 1).to_f, @ts, @te - 1)
2173
- fhold; fbreak;
2174
- end
2175
- };
2176
-
2177
- flo_int
2178
- ( flo_frac? flo_pow %{ @num_suffix_s = p } flo_pow_suffix
2179
- | flo_frac %{ @num_suffix_s = p } flo_suffix
2180
- )
2181
- => {
2182
- digits = tok(@ts, @num_suffix_s)
2183
-
2184
- if version?(18, 19, 20)
2185
- emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
2186
- p = @num_suffix_s - 1
2187
- else
2188
- @num_xfrm.call(digits)
2189
- end
2190
- fbreak;
2191
- };
2192
-
2193
- #
2194
- # STRING AND XSTRING LITERALS
2195
- #
2196
-
2197
- # `echo foo`, "bar", 'baz'
2198
- '`' | ['"] # '
2199
- => {
2200
- type, delimiter = tok, tok[-1].chr
2201
- fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
2202
- };
2203
-
2204
- #
2205
- # CONSTANTS AND VARIABLES
2206
- #
2207
-
2208
- constant
2209
- => { emit(:tCONSTANT)
2210
- fnext *arg_or_cmdarg; fbreak; };
2211
-
2212
- constant ambiguous_const_suffix
2213
- => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
2214
- p = tm - 1; fbreak; };
2215
-
2216
- global_var | class_var_v | instance_var_v
2217
- => { p = @ts - 1; fcall expr_variable; };
2218
-
2219
- #
2220
- # METHOD CALLS
2221
- #
2222
-
2223
- '.' | '&.' | '::'
2224
- => { emit_table(PUNCTUATION)
2225
- fnext expr_dot; fbreak; };
2226
-
2227
- call_or_var
2228
- => local_ident;
2229
-
2230
- bareword ambiguous_fid_suffix
2231
- => {
2232
- if tm == @te
2233
- # Suffix was consumed, e.g. foo!
2234
- emit(:tFID)
2235
- else
2236
- # Suffix was not consumed, e.g. foo!=
2237
- emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
2238
- p = tm - 1
2239
- end
2240
- fnext expr_arg; fbreak;
2241
- };
2242
-
2243
- #
2244
- # OPERATORS
2245
- #
2246
-
2247
- '*' | '=>'
2248
- => {
2249
- emit_table(PUNCTUATION)
2250
- fgoto expr_value;
2251
- };
2252
-
2253
- # When '|', '~', '!', '=>' are used as operators
2254
- # they do not accept any symbols (or quoted labels) after.
2255
- # Other binary operators accept it.
2256
- ( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
2257
- => {
2258
- emit_table(PUNCTUATION);
2259
- fnext expr_value; fbreak;
2260
- };
2261
-
2262
- ( e_lparen | '|' | '~' | '!' )
2263
- => { emit_table(PUNCTUATION)
2264
- fnext expr_beg; fbreak; };
2265
-
2266
- e_rbrace | e_rparen | ']'
2267
- => {
2268
- emit_table(PUNCTUATION)
2269
-
2270
- if @version < 24
2271
- @cond.lexpop
2272
- @cmdarg.lexpop
2273
- else
2274
- @cond.pop
2275
- @cmdarg.pop
2276
- end
2277
-
2278
- if tok == '}'.freeze || tok == ']'.freeze
2279
- if @version >= 25
2280
- fnext expr_end;
2281
- else
2282
- fnext expr_endarg;
2283
- end
2284
- else # )
2285
- # fnext expr_endfn; ?
2286
- end
2287
-
2288
- fbreak;
2289
- };
2290
-
2291
- operator_arithmetic '='
2292
- => { emit(:tOP_ASGN, tok(@ts, @te - 1))
2293
- fnext expr_beg; fbreak; };
2294
-
2295
- '?'
2296
- => { emit(:tEH, '?'.freeze)
2297
- fnext expr_value; fbreak; };
2298
-
2299
- e_lbrack
2300
- => { emit(:tLBRACK2, '['.freeze)
2301
- fnext expr_beg; fbreak; };
2302
-
2303
- punctuation_end
2304
- => { emit_table(PUNCTUATION)
2305
- fnext expr_beg; fbreak; };
2306
-
2307
- #
2308
- # WHITESPACE
2309
- #
2310
-
2311
- w_space_comment;
2312
-
2313
- w_newline
2314
- => { fgoto leading_dot; };
2315
-
2316
- ';'
2317
- => { emit(:tSEMI, ';'.freeze)
2318
- fnext expr_value; fbreak; };
2319
-
2320
- '\\' c_line {
2321
- diagnostic :error, :bare_backslash, nil, range(@ts, @ts + 1)
2322
- fhold;
2323
- };
2324
-
2325
- c_any
2326
- => {
2327
- diagnostic :fatal, :unexpected, { :character => tok.inspect[1..-2] }
2328
- };
2329
-
2330
- c_eof => do_eof;
2331
- *|;
2332
-
2333
- leading_dot := |*
2334
- # Insane leading dots:
2335
- # a #comment
2336
- # .b: a.b
2337
- c_space* %{ tm = p } ('.' | '&.')
2338
- => { p = tm - 1; fgoto expr_end; };
2339
-
2340
- any
2341
- => { emit(:tNL, nil, @newline_s, @newline_s + 1)
2342
- fhold; fnext line_begin; fbreak; };
2343
- *|;
2344
-
2345
- #
2346
- # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
2347
- #
2348
-
2349
- line_comment := |*
2350
- '=end' c_line* c_nl_zlen
2351
- => {
2352
- emit_comment(@eq_begin_s, @te)
2353
- fgoto *@cs_before_block_comment;
2354
- };
2355
-
2356
- c_line* c_nl;
2357
-
2358
- c_line* zlen
2359
- => {
2360
- diagnostic :fatal, :embedded_document, nil,
2361
- range(@eq_begin_s, @eq_begin_s + '=begin'.length)
2362
- };
2363
- *|;
2364
-
2365
- line_begin := |*
2366
- w_any;
2367
-
2368
- '=begin' ( c_space | c_nl_zlen )
2369
- => { @eq_begin_s = @ts
2370
- fgoto line_comment; };
2371
-
2372
- '__END__' ( c_eol - zlen )
2373
- => { p = pe - 3 };
2374
-
2375
- c_any
2376
- => { fhold; fgoto expr_value; };
2377
-
2378
- c_eof => do_eof;
2379
- *|;
2380
-
2381
- }%%
2382
- # %
2383
- end