parser 2.7.1.5 → 3.0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parser/all.rb +1 -0
  3. data/lib/parser/ast/processor.rb +3 -0
  4. data/lib/parser/base.rb +1 -0
  5. data/lib/parser/builders/default.rb +126 -13
  6. data/lib/parser/context.rb +4 -0
  7. data/lib/parser/current.rb +16 -7
  8. data/lib/parser/current_arg_stack.rb +5 -2
  9. data/lib/parser/lexer.rb +872 -802
  10. data/lib/parser/max_numparam_stack.rb +12 -4
  11. data/lib/parser/messages.rb +1 -0
  12. data/lib/parser/meta.rb +4 -3
  13. data/lib/parser/ruby18.rb +6 -2
  14. data/lib/parser/ruby27.rb +3763 -3704
  15. data/lib/parser/ruby28.rb +8047 -0
  16. data/lib/parser/ruby30.rb +3657 -3649
  17. data/lib/parser/ruby31.rb +8226 -0
  18. data/lib/parser/runner.rb +6 -1
  19. data/lib/parser/source/buffer.rb +50 -27
  20. data/lib/parser/source/comment.rb +13 -0
  21. data/lib/parser/source/comment/associator.rb +17 -4
  22. data/lib/parser/source/tree_rewriter.rb +27 -0
  23. data/lib/parser/static_environment.rb +4 -0
  24. data/lib/parser/variables_stack.rb +4 -0
  25. data/lib/parser/version.rb +1 -1
  26. data/parser.gemspec +1 -20
  27. metadata +8 -95
  28. data/.travis.yml +0 -41
  29. data/.yardopts +0 -21
  30. data/CHANGELOG.md +0 -1137
  31. data/CONTRIBUTING.md +0 -17
  32. data/Gemfile +0 -10
  33. data/README.md +0 -309
  34. data/Rakefile +0 -167
  35. data/ci/run_rubocop_specs +0 -14
  36. data/doc/AST_FORMAT.md +0 -2284
  37. data/doc/CUSTOMIZATION.md +0 -37
  38. data/doc/INTERNALS.md +0 -21
  39. data/doc/css/.gitkeep +0 -0
  40. data/doc/css/common.css +0 -68
  41. data/lib/parser/lexer.rl +0 -2550
  42. data/lib/parser/macruby.y +0 -2208
  43. data/lib/parser/ruby18.y +0 -1936
  44. data/lib/parser/ruby19.y +0 -2185
  45. data/lib/parser/ruby20.y +0 -2363
  46. data/lib/parser/ruby21.y +0 -2364
  47. data/lib/parser/ruby22.y +0 -2371
  48. data/lib/parser/ruby23.y +0 -2377
  49. data/lib/parser/ruby24.y +0 -2415
  50. data/lib/parser/ruby25.y +0 -2412
  51. data/lib/parser/ruby26.y +0 -2420
  52. data/lib/parser/ruby27.y +0 -2949
  53. data/lib/parser/ruby30.y +0 -3048
  54. data/lib/parser/rubymotion.y +0 -2192
  55. data/test/bug_163/fixtures/input.rb +0 -5
  56. data/test/bug_163/fixtures/output.rb +0 -5
  57. data/test/bug_163/rewriter.rb +0 -20
  58. data/test/helper.rb +0 -103
  59. data/test/parse_helper.rb +0 -328
  60. data/test/racc_coverage_helper.rb +0 -133
  61. data/test/test_ast_processor.rb +0 -32
  62. data/test/test_base.rb +0 -31
  63. data/test/test_current.rb +0 -31
  64. data/test/test_diagnostic.rb +0 -95
  65. data/test/test_diagnostic_engine.rb +0 -59
  66. data/test/test_encoding.rb +0 -99
  67. data/test/test_lexer.rb +0 -3617
  68. data/test/test_lexer_stack_state.rb +0 -78
  69. data/test/test_meta.rb +0 -12
  70. data/test/test_parse_helper.rb +0 -83
  71. data/test/test_parser.rb +0 -9986
  72. data/test/test_runner_parse.rb +0 -56
  73. data/test/test_runner_rewrite.rb +0 -47
  74. data/test/test_source_buffer.rb +0 -165
  75. data/test/test_source_comment.rb +0 -36
  76. data/test/test_source_comment_associator.rb +0 -399
  77. data/test/test_source_map.rb +0 -14
  78. data/test/test_source_range.rb +0 -192
  79. data/test/test_source_rewriter.rb +0 -541
  80. data/test/test_source_rewriter_action.rb +0 -46
  81. data/test/test_source_tree_rewriter.rb +0 -361
  82. data/test/test_static_environment.rb +0 -45
  83. data/test/using_tree_rewriter/fixtures/input.rb +0 -3
  84. data/test/using_tree_rewriter/fixtures/output.rb +0 -3
  85. data/test/using_tree_rewriter/using_tree_rewriter.rb +0 -9
data/doc/CUSTOMIZATION.md DELETED
@@ -1,37 +0,0 @@
1
- # Customizing Parsers
2
-
3
- While the default setup of the parsers provided by this Gem should be suitable
4
- for most some developers might want to change parts of it. An example would be
5
- the use of a custom class for nodes instead of `Parser::AST::Node`.
6
-
7
- Customizing the AST is done by creating a custom builder class and passing it
8
- to the constructor method of a parser. The default setup comes down to the
9
- following:
10
-
11
- builder = Parser::Builders::Default.new
12
- parser = Parser::Ruby19.new(builder)
13
-
14
- When creating your own builder class it's best to subclass the default one so
15
- that you don't have to redefine every used method again:
16
-
17
- class MyBuilder < Parser::Builders::Default
18
-
19
- end
20
-
21
- builder = MyBuilder.new
22
- parser = Parser::Ruby19.new(builder)
23
-
24
- ## Custom Node Classes
25
-
26
- To use a custom node class you have to override the method
27
- `Parser::Builders::Default#n`:
28
-
29
- class MyBuilder < Parser::Builders::Default
30
- def n(type, children, location)
31
- return MyNodeClass.new(type, children, :location => location)
32
- end
33
- end
34
-
35
- Note that the used class (and corresponding instance) must be compatible with
36
- `Parser::AST::Node` so it's best to subclass it and override/add code where
37
- needed.
data/doc/INTERNALS.md DELETED
@@ -1,21 +0,0 @@
1
- Entry points
2
- ------------
3
-
4
- Parser should be kept as slim as possible. This includes not loading
5
- any potentially large files when they are likely to be unused in practice.
6
-
7
- Parser has five main (classes of) `require` entry points:
8
-
9
- * `require 'parser'`. Main entry point, requires all classes which
10
- are used across the entire library.
11
- * `require 'parser/rubyXX'`. Version-specific entry point. Can raise
12
- a NotImplementedError if current Ruby runtime is unable to parse the
13
- requested Ruby version.
14
- * `require 'parser/all'`. Requires all available parsers for released
15
- versions of Ruby. Can raise NotImplementedError.
16
- * `require 'parser/runner'`. Requires all the stuff which is useful for
17
- command-line tools but not otherwise.
18
- * `require 'parser/runner/X'`. Runner-specific entry point.
19
-
20
- All non-main entry points internally `require 'parser'`. Additionally, all
21
- runner-specific entry points internally `requre 'parser/runner'`.
data/doc/css/.gitkeep DELETED
File without changes
data/doc/css/common.css DELETED
@@ -1,68 +0,0 @@
1
- body
2
- {
3
- font-size: 14px;
4
- line-height: 1.6;
5
- margin: 0 auto;
6
- max-width: 960px;
7
- }
8
-
9
- p code
10
- {
11
- background: #f2f2f2;
12
- padding-left: 3px;
13
- padding-right: 3px;
14
- }
15
-
16
- pre.code
17
- {
18
- font-size: 13px;
19
- line-height: 1.4;
20
- }
21
-
22
- /**
23
- * YARD uses generic table styles, using a special class means those tables
24
- * don't get messed up.
25
- */
26
- .table
27
- {
28
- border: 1px solid #ccc;
29
- border-right: none;
30
- border-collapse: separate;
31
- border-spacing: 0;
32
- text-align: left;
33
- }
34
-
35
- .table.full
36
- {
37
- width: 100%;
38
- }
39
-
40
- .table .field_name
41
- {
42
- min-width: 160px;
43
- }
44
-
45
- .table thead tr th.no_sort:first-child
46
- {
47
- width: 25px;
48
- }
49
-
50
- .table thead tr th, .table tbody tr td
51
- {
52
- border-bottom: 1px solid #ccc;
53
- border-right: 1px solid #ccc;
54
- min-width: 20px;
55
- padding: 8px 5px;
56
- text-align: left;
57
- vertical-align: top;
58
- }
59
-
60
- .table tbody tr:last-child td
61
- {
62
- border-bottom: none;
63
- }
64
-
65
- .table tr:nth-child(odd) td
66
- {
67
- background: #f9f9f9;
68
- }
data/lib/parser/lexer.rl DELETED
@@ -1,2550 +0,0 @@
1
- %%machine lex; # % fix highlighting
2
-
3
- #
4
- # === BEFORE YOU START ===
5
- #
6
- # Read the Ruby Hacking Guide chapter 11, available in English at
7
- # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
- #
9
- # Remember two things about Ragel scanners:
10
- #
11
- # 1) Longest match wins.
12
- #
13
- # 2) If two matches have the same length, the first
14
- # in source code wins.
15
- #
16
- # General rules of making Ragel and Bison happy:
17
- #
18
- # * `p` (position) and `@te` contain the index of the character
19
- # they're pointing to ("current"), plus one. `@ts` contains the index
20
- # of the corresponding character. The code for extracting matched token is:
21
- #
22
- # @source_buffer.slice(@ts...@te)
23
- #
24
- # * If your input is `foooooooobar` and the rule is:
25
- #
26
- # 'f' 'o'+
27
- #
28
- # the result will be:
29
- #
30
- # foooooooobar
31
- # ^ ts=0 ^ p=te=9
32
- #
33
- # * A Ragel lexer action should not emit more than one token, unless
34
- # you know what you are doing.
35
- #
36
- # * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
37
- #
38
- # * If an action emits the token and transitions to another state, use
39
- # these Ragel commands:
40
- #
41
- # emit($whatever)
42
- # fnext $next_state; fbreak;
43
- #
44
- # If you perform `fgoto` in an action which does not emit a token nor
45
- # rewinds the stream pointer, the parser's side-effectful,
46
- # context-sensitive lookahead actions will break in a hard to detect
47
- # and debug way.
48
- #
49
- # * If an action does not emit a token:
50
- #
51
- # fgoto $next_state;
52
- #
53
- # * If an action features lookbehind, i.e. matches characters with the
54
- # intent of passing them to another action:
55
- #
56
- # p = @ts - 1
57
- # fgoto $next_state;
58
- #
59
- # or, if the lookbehind consists of a single character:
60
- #
61
- # fhold; fgoto $next_state;
62
- #
63
- # * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
64
- # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
65
- # _will_ invoke the action `act`.
66
- #
67
- # e_something stands for "something with **e**mbedded action".
68
- #
69
- # * EOF is explicit and is matched by `c_eof`. If you want to introspect
70
- # the state of the lexer, add this rule to the state:
71
- #
72
- # c_eof => do_eof;
73
- #
74
- # * If you proceed past EOF, the lexer will complain:
75
- #
76
- # NoMethodError: undefined method `ord' for nil:NilClass
77
- #
78
-
79
- class Parser::Lexer
80
-
81
- %% write data nofinal;
82
- # %
83
-
84
- ESCAPES = {
85
- ?a.ord => "\a", ?b.ord => "\b", ?e.ord => "\e", ?f.ord => "\f",
86
- ?n.ord => "\n", ?r.ord => "\r", ?s.ord => "\s", ?t.ord => "\t",
87
- ?v.ord => "\v", ?\\.ord => "\\"
88
- }.freeze
89
-
90
- REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
-
92
- attr_reader :source_buffer
93
-
94
- attr_accessor :diagnostics
95
- attr_accessor :static_env
96
- attr_accessor :force_utf32
97
-
98
- attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
99
-
100
- attr_accessor :tokens, :comments
101
-
102
- def initialize(version)
103
- @version = version
104
- @static_env = nil
105
- @context = nil
106
-
107
- @tokens = nil
108
- @comments = nil
109
-
110
- reset
111
- end
112
-
113
- def reset(reset_state=true)
114
- # Ragel state:
115
- if reset_state
116
- # Unit tests set state prior to resetting lexer.
117
- @cs = self.class.lex_en_line_begin
118
-
119
- @cond = StackState.new('cond')
120
- @cmdarg = StackState.new('cmdarg')
121
- @cond_stack = []
122
- @cmdarg_stack = []
123
- end
124
-
125
- @force_utf32 = false # Set to true by some tests
126
-
127
- @source_pts = nil # @source as a codepoint array
128
-
129
- @p = 0 # stream position (saved manually in #advance)
130
- @ts = nil # token start
131
- @te = nil # token end
132
- @act = 0 # next action
133
-
134
- @stack = [] # state stack
135
- @top = 0 # state stack top pointer
136
-
137
- # Lexer state:
138
- @token_queue = []
139
- @literal_stack = []
140
-
141
- @eq_begin_s = nil # location of last encountered =begin
142
- @sharp_s = nil # location of last encountered #
143
-
144
- @newline_s = nil # location of last encountered newline
145
-
146
- @num_base = nil # last numeric base
147
- @num_digits_s = nil # starting position of numeric digits
148
- @num_suffix_s = nil # starting position of numeric suffix
149
- @num_xfrm = nil # numeric suffix-induced transformation
150
-
151
- @escape_s = nil # starting position of current sequence
152
- @escape = nil # last escaped sequence, as string
153
-
154
- @herebody_s = nil # starting position of current heredoc line
155
-
156
- # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
157
- # encountered after a matching closing parenthesis.
158
- @paren_nest = 0
159
- @lambda_stack = []
160
-
161
- # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
162
- # we store the indentation level and give it out to the parser
163
- # on request. It is not possible to infer indentation level just
164
- # from the AST because escape sequences such as `\ ` or `\t` are
165
- # expanded inside the lexer, but count as non-whitespace for
166
- # indentation purposes.
167
- @dedent_level = nil
168
-
169
- # If the lexer is in `command state' (aka expr_value)
170
- # at the entry to #advance, it will transition to expr_cmdarg
171
- # instead of expr_arg at certain points.
172
- @command_start = true
173
-
174
- # True at the end of "def foo a:"
175
- @in_kwarg = false
176
-
177
- # State before =begin / =end block comment
178
- @cs_before_block_comment = self.class.lex_en_line_begin
179
- end
180
-
181
- def source_buffer=(source_buffer)
182
- @source_buffer = source_buffer
183
-
184
- if @source_buffer
185
- source = @source_buffer.source
186
-
187
- if source.encoding == Encoding::UTF_8
188
- @source_pts = source.unpack('U*')
189
- else
190
- @source_pts = source.unpack('C*')
191
- end
192
-
193
- if @source_pts[0] == 0xfeff
194
- # Skip byte order mark.
195
- @p = 1
196
- end
197
- else
198
- @source_pts = nil
199
- end
200
- end
201
-
202
- def encoding
203
- @source_buffer.source.encoding
204
- end
205
-
206
- LEX_STATES = {
207
- :line_begin => lex_en_line_begin,
208
- :expr_dot => lex_en_expr_dot,
209
- :expr_fname => lex_en_expr_fname,
210
- :expr_value => lex_en_expr_value,
211
- :expr_beg => lex_en_expr_beg,
212
- :expr_mid => lex_en_expr_mid,
213
- :expr_arg => lex_en_expr_arg,
214
- :expr_cmdarg => lex_en_expr_cmdarg,
215
- :expr_end => lex_en_expr_end,
216
- :expr_endarg => lex_en_expr_endarg,
217
- :expr_endfn => lex_en_expr_endfn,
218
- :expr_labelarg => lex_en_expr_labelarg,
219
-
220
- :interp_string => lex_en_interp_string,
221
- :interp_words => lex_en_interp_words,
222
- :plain_string => lex_en_plain_string,
223
- :plain_words => lex_en_plain_string,
224
- }
225
-
226
- def state
227
- LEX_STATES.invert.fetch(@cs, @cs)
228
- end
229
-
230
- def state=(state)
231
- @cs = LEX_STATES.fetch(state)
232
- end
233
-
234
- def push_cmdarg
235
- @cmdarg_stack.push(@cmdarg)
236
- @cmdarg = StackState.new("cmdarg.#{@cmdarg_stack.count}")
237
- end
238
-
239
- def pop_cmdarg
240
- @cmdarg = @cmdarg_stack.pop
241
- end
242
-
243
- def push_cond
244
- @cond_stack.push(@cond)
245
- @cond = StackState.new("cond.#{@cond_stack.count}")
246
- end
247
-
248
- def pop_cond
249
- @cond = @cond_stack.pop
250
- end
251
-
252
- def dedent_level
253
- # We erase @dedent_level as a precaution to avoid accidentally
254
- # using a stale value.
255
- dedent_level, @dedent_level = @dedent_level, nil
256
- dedent_level
257
- end
258
-
259
- # Return next token: [type, value].
260
- def advance
261
- if @token_queue.any?
262
- return @token_queue.shift
263
- end
264
-
265
- # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
266
- klass = self.class
267
- _lex_trans_keys = klass.send :_lex_trans_keys
268
- _lex_key_spans = klass.send :_lex_key_spans
269
- _lex_index_offsets = klass.send :_lex_index_offsets
270
- _lex_indicies = klass.send :_lex_indicies
271
- _lex_trans_targs = klass.send :_lex_trans_targs
272
- _lex_trans_actions = klass.send :_lex_trans_actions
273
- _lex_to_state_actions = klass.send :_lex_to_state_actions
274
- _lex_from_state_actions = klass.send :_lex_from_state_actions
275
- _lex_eof_trans = klass.send :_lex_eof_trans
276
-
277
- pe = @source_pts.size + 2
278
- p, eof = @p, pe
279
-
280
- cmd_state = @command_start
281
- @command_start = false
282
-
283
- %% write exec;
284
- # %
285
-
286
- # Ragel creates a local variable called `testEof` but it doesn't use
287
- # it in any assignment. This dead code is here to swallow the warning.
288
- # It has no runtime cost because Ruby doesn't produce any instructions from it.
289
- if false
290
- testEof
291
- end
292
-
293
- @p = p
294
-
295
- if @token_queue.any?
296
- @token_queue.shift
297
- elsif @cs == klass.lex_error
298
- [ false, [ '$error'.freeze, range(p - 1, p) ] ]
299
- else
300
- eof = @source_pts.size
301
- [ false, [ '$eof'.freeze, range(eof, eof) ] ]
302
- end
303
- end
304
-
305
- protected
306
-
307
- def eof_codepoint?(point)
308
- [0x04, 0x1a, 0x00].include? point
309
- end
310
-
311
- def version?(*versions)
312
- versions.include?(@version)
313
- end
314
-
315
- def stack_pop
316
- @top -= 1
317
- @stack[@top]
318
- end
319
-
320
- def encode_escape(ord)
321
- ord.chr.force_encoding(@source_buffer.source.encoding)
322
- end
323
-
324
- def tok(s = @ts, e = @te)
325
- @source_buffer.slice(s...e)
326
- end
327
-
328
- def range(s = @ts, e = @te)
329
- Parser::Source::Range.new(@source_buffer, s, e)
330
- end
331
-
332
- def emit(type, value = tok, s = @ts, e = @te)
333
- token = [ type, [ value, range(s, e) ] ]
334
-
335
- @token_queue.push(token)
336
-
337
- @tokens.push(token) if @tokens
338
-
339
- token
340
- end
341
-
342
- def emit_table(table, s = @ts, e = @te)
343
- value = tok(s, e)
344
-
345
- emit(table[value], value, s, e)
346
- end
347
-
348
- def emit_do(do_block=false)
349
- if @cond.active?
350
- emit(:kDO_COND, 'do'.freeze)
351
- elsif @cmdarg.active? || do_block
352
- emit(:kDO_BLOCK, 'do'.freeze)
353
- else
354
- emit(:kDO, 'do'.freeze)
355
- end
356
- end
357
-
358
- def arg_or_cmdarg(cmd_state)
359
- if cmd_state
360
- self.class.lex_en_expr_cmdarg
361
- else
362
- self.class.lex_en_expr_arg
363
- end
364
- end
365
-
366
- def emit_comment(s = @ts, e = @te)
367
- if @comments
368
- @comments.push(Parser::Source::Comment.new(range(s, e)))
369
- end
370
-
371
- if @tokens
372
- @tokens.push([ :tCOMMENT, [ tok(s, e), range(s, e) ] ])
373
- end
374
-
375
- nil
376
- end
377
-
378
- def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
379
- @diagnostics.process(
380
- Parser::Diagnostic.new(type, reason, arguments, location, highlights))
381
- end
382
-
383
- #
384
- # === LITERAL STACK ===
385
- #
386
-
387
- def push_literal(*args)
388
- new_literal = Literal.new(self, *args)
389
- @literal_stack.push(new_literal)
390
- next_state_for_literal(new_literal)
391
- end
392
-
393
- def next_state_for_literal(literal)
394
- if literal.words? && literal.backslash_delimited?
395
- if literal.interpolate?
396
- self.class.lex_en_interp_backslash_delimited_words
397
- else
398
- self.class.lex_en_plain_backslash_delimited_words
399
- end
400
- elsif literal.words? && !literal.backslash_delimited?
401
- if literal.interpolate?
402
- self.class.lex_en_interp_words
403
- else
404
- self.class.lex_en_plain_words
405
- end
406
- elsif !literal.words? && literal.backslash_delimited?
407
- if literal.interpolate?
408
- self.class.lex_en_interp_backslash_delimited
409
- else
410
- self.class.lex_en_plain_backslash_delimited
411
- end
412
- else
413
- if literal.interpolate?
414
- self.class.lex_en_interp_string
415
- else
416
- self.class.lex_en_plain_string
417
- end
418
- end
419
- end
420
-
421
- def literal
422
- @literal_stack.last
423
- end
424
-
425
- def pop_literal
426
- old_literal = @literal_stack.pop
427
-
428
- @dedent_level = old_literal.dedent_level
429
-
430
- if old_literal.type == :tREGEXP_BEG
431
- # Fetch modifiers.
432
- self.class.lex_en_regexp_modifiers
433
- else
434
- self.class.lex_en_expr_end
435
- end
436
- end
437
-
438
- # Mapping of strings to parser tokens.
439
-
440
- PUNCTUATION = {
441
- '=' => :tEQL, '&' => :tAMPER2, '|' => :tPIPE,
442
- '!' => :tBANG, '^' => :tCARET, '+' => :tPLUS,
443
- '-' => :tMINUS, '*' => :tSTAR2, '/' => :tDIVIDE,
444
- '%' => :tPERCENT, '~' => :tTILDE, ',' => :tCOMMA,
445
- ';' => :tSEMI, '.' => :tDOT, '..' => :tDOT2,
446
- '...' => :tDOT3, '[' => :tLBRACK2, ']' => :tRBRACK,
447
- '(' => :tLPAREN2, ')' => :tRPAREN, '?' => :tEH,
448
- ':' => :tCOLON, '&&' => :tANDOP, '||' => :tOROP,
449
- '-@' => :tUMINUS, '+@' => :tUPLUS, '~@' => :tTILDE,
450
- '**' => :tPOW, '->' => :tLAMBDA, '=~' => :tMATCH,
451
- '!~' => :tNMATCH, '==' => :tEQ, '!=' => :tNEQ,
452
- '>' => :tGT, '>>' => :tRSHFT, '>=' => :tGEQ,
453
- '<' => :tLT, '<<' => :tLSHFT, '<=' => :tLEQ,
454
- '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
455
- '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
456
- '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
457
- '!@' => :tBANG, '&.' => :tANDDOT,
458
- }
459
-
460
- PUNCTUATION_BEGIN = {
461
- '&' => :tAMPER, '*' => :tSTAR, '**' => :tDSTAR,
462
- '+' => :tUPLUS, '-' => :tUMINUS, '::' => :tCOLON3,
463
- '(' => :tLPAREN, '{' => :tLBRACE, '[' => :tLBRACK,
464
- }
465
-
466
- KEYWORDS = {
467
- 'if' => :kIF_MOD, 'unless' => :kUNLESS_MOD,
468
- 'while' => :kWHILE_MOD, 'until' => :kUNTIL_MOD,
469
- 'rescue' => :kRESCUE_MOD, 'defined?' => :kDEFINED,
470
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
471
- }
472
-
473
- KEYWORDS_BEGIN = {
474
- 'if' => :kIF, 'unless' => :kUNLESS,
475
- 'while' => :kWHILE, 'until' => :kUNTIL,
476
- 'rescue' => :kRESCUE, 'defined?' => :kDEFINED,
477
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
478
- }
479
-
480
- %w(class module def undef begin end then elsif else ensure case when
481
- for break next redo retry in do return yield super self nil true
482
- false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
483
- KEYWORDS_BEGIN[keyword] = KEYWORDS[keyword] = :"k#{keyword.upcase}"
484
- end
485
-
486
- %%{
487
- # %
488
-
489
- access @;
490
- getkey (@source_pts[p] || 0);
491
-
492
- # === CHARACTER CLASSES ===
493
- #
494
- # Pay close attention to the differences between c_any and any.
495
- # c_any does not include EOF and so will cause incorrect behavior
496
- # for machine subtraction (any-except rules) and default transitions
497
- # for scanners.
498
-
499
- action do_nl {
500
- # Record position of a newline for precise location reporting on tNL
501
- # tokens.
502
- #
503
- # This action is embedded directly into c_nl, as it is idempotent and
504
- # there are no cases when we need to skip it.
505
- @newline_s = p
506
- }
507
-
508
- c_nl = '\n' $ do_nl;
509
- c_space = [ \t\r\f\v];
510
- c_space_nl = c_space | c_nl;
511
-
512
- c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
513
- c_eol = c_nl | c_eof;
514
- c_any = any - c_eof;
515
-
516
- c_nl_zlen = c_nl | zlen;
517
- c_line = any - c_nl_zlen;
518
-
519
- c_unicode = c_any - 0x00..0x7f;
520
- c_upper = [A-Z];
521
- c_lower = [a-z_] | c_unicode;
522
- c_alpha = c_lower | c_upper;
523
- c_alnum = c_alpha | [0-9];
524
-
525
- action do_eof {
526
- # Sit at EOF indefinitely. #advance would return $eof each time.
527
- # This allows to feed the lexer more data if needed; this is only used
528
- # in tests.
529
- #
530
- # Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
531
- # below. This is due to the fact that scanner state at EOF is observed
532
- # by tests, and encapsulating it in a rule would break the introspection.
533
- fhold; fbreak;
534
- }
535
-
536
- #
537
- # === TOKEN DEFINITIONS ===
538
- #
539
-
540
- # All operators are punctuation. There is more to punctuation
541
- # than just operators. Operators can be overridden by user;
542
- # punctuation can not.
543
-
544
- # A list of operators which are valid in the function name context, but
545
- # have different semantics in others.
546
- operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
547
-
548
- # A list of operators which can occur within an assignment shortcut (+ → +=).
549
- operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
550
- '*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
551
-
552
- # A list of all user-definable operators not covered by groups above.
553
- operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
554
- '<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
555
-
556
- # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
557
- # as they are ambiguous with interpolation `#{}` and should be counted.
558
- # These braces are not present in punctuation lists.
559
-
560
- # A list of punctuation which has different meaning when used at the
561
- # beginning of expression.
562
- punctuation_begin = '-' | '+' | '::' | '(' | '[' |
563
- '*' | '**' | '&' ;
564
-
565
- # A list of all punctuation except punctuation_begin.
566
- punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
567
- '::' | '?' | ':' | '.' | '..' | '...' ;
568
-
569
- # A list of keywords which have different meaning at the beginning of expression.
570
- keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
571
-
572
- # A list of keywords which accept an argument-like expression, i.e. have the
573
- # same post-processing as method calls or commands. Example: `yield 1`,
574
- # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
575
- keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
576
-
577
- # A list of keywords which accept a literal function name as an argument.
578
- keyword_with_fname = 'def' | 'undef' | 'alias' ;
579
-
580
- # A list of keywords which accept an expression after them.
581
- keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
582
- 'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
583
- 'and' | 'or' ;
584
-
585
- # A list of keywords which accept a value, and treat the keywords from
586
- # `keyword_modifier` list as modifiers.
587
- keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
588
-
589
- # A list of keywords which do not accept an expression after them.
590
- keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
591
- 'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
592
- '__LINE__' | '__ENCODING__';
593
-
594
- # All keywords.
595
- keyword = keyword_with_value | keyword_with_mid |
596
- keyword_with_end | keyword_with_arg |
597
- keyword_with_fname | keyword_modifier ;
598
-
599
- constant = c_upper c_alnum*;
600
- bareword = c_alpha c_alnum*;
601
-
602
- call_or_var = c_lower c_alnum*;
603
- class_var = '@@' bareword;
604
- instance_var = '@' bareword;
605
- global_var = '$'
606
- ( bareword | digit+
607
- | [`'+~*$&?!@/\\;,.=:<>"] # `
608
- | '-' c_alnum
609
- )
610
- ;
611
-
612
- # Ruby accepts (and fails on) variables with leading digit
613
- # in literal context, but not in unquoted symbol body.
614
- class_var_v = '@@' c_alnum+;
615
- instance_var_v = '@' c_alnum+;
616
-
617
- label = bareword [?!]? ':';
618
-
619
- #
620
- # === NUMERIC PARSING ===
621
- #
622
-
623
- int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
624
- int_dec = ( digit+ '_' )* digit* '_'? ;
625
- int_bin = ( [01]+ '_' )* [01]* '_'? ;
626
-
627
- flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
628
- flo_frac = '.' ( digit+ '_' )* digit+;
629
- flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
630
-
631
- int_suffix =
632
- '' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars) } }
633
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
634
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, chars)) } }
635
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
636
- | 're' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
637
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
638
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 6); p -= 6 } };
639
-
640
- flo_pow_suffix =
641
- '' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars)) } }
642
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Float(chars))) } }
643
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 2); p -= 2 } };
644
-
645
- flo_suffix =
646
- flo_pow_suffix
647
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
648
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
649
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 6); p -= 6 } };
650
-
651
- #
652
- # === ESCAPE SEQUENCE PARSING ===
653
- #
654
-
655
- # Escape parsing code is a Ragel pattern, not a scanner, and therefore
656
- # it shouldn't directly raise errors or perform other actions with side effects.
657
- # In reality this would probably just mess up error reporting in pathological
658
- # cases, through.
659
-
660
- # The amount of code required to parse \M\C stuff correctly is ridiculous.
661
-
662
- escaped_nl = "\\" c_nl;
663
-
664
- action unicode_points {
665
- @escape = ""
666
-
667
- codepoints = tok(@escape_s + 2, p - 1)
668
- codepoint_s = @escape_s + 2
669
-
670
- if @version < 24
671
- if codepoints.start_with?(" ") || codepoints.start_with?("\t")
672
- diagnostic :fatal, :invalid_unicode_escape, nil,
673
- range(@escape_s + 2, @escape_s + 3)
674
- end
675
-
676
- if spaces_p = codepoints.index(/[ \t]{2}/)
677
- diagnostic :fatal, :invalid_unicode_escape, nil,
678
- range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
679
- end
680
-
681
- if codepoints.end_with?(" ") || codepoints.end_with?("\t")
682
- diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
683
- end
684
- end
685
-
686
- codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
687
- if spaces
688
- codepoint_s += spaces.length
689
- else
690
- codepoint = codepoint_str.to_i(16)
691
-
692
- if codepoint >= 0x110000
693
- diagnostic :error, :unicode_point_too_large, nil,
694
- range(codepoint_s, codepoint_s + codepoint_str.length)
695
- break
696
- end
697
-
698
- @escape += codepoint.chr(Encoding::UTF_8)
699
- codepoint_s += codepoint_str.length
700
- end
701
- end
702
- }
703
-
704
- action unescape_char {
705
- codepoint = @source_pts[p - 1]
706
- if (@escape = ESCAPES[codepoint]).nil?
707
- @escape = encode_escape(@source_buffer.slice(p - 1))
708
- end
709
- }
710
-
711
- action invalid_complex_escape {
712
- diagnostic :fatal, :invalid_escape
713
- }
714
-
715
- action read_post_meta_or_ctrl_char {
716
- @escape = @source_buffer.slice(p - 1).chr
717
-
718
- if @version >= 27 && ((0..8).include?(@escape.ord) || (14..31).include?(@escape.ord))
719
- diagnostic :fatal, :invalid_escape
720
- end
721
- }
722
-
723
- action slash_c_char {
724
- @escape = encode_escape(@escape[0].ord & 0x9f)
725
- }
726
-
727
- action slash_m_char {
728
- @escape = encode_escape(@escape[0].ord | 0x80)
729
- }
730
-
731
- maybe_escaped_char = (
732
- '\\' c_any %unescape_char
733
- | ( c_any - [\\] ) %read_post_meta_or_ctrl_char
734
- );
735
-
736
- maybe_escaped_ctrl_char = ( # why?!
737
- '\\' c_any %unescape_char %slash_c_char
738
- | '?' % { @escape = "\x7f" }
739
- | ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
740
- );
741
-
742
- escape = (
743
- # \377
744
- [0-7]{1,3}
745
- % { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
746
-
747
- # \xff
748
- | 'x' xdigit{1,2}
749
- % { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
750
-
751
- # %q[\x]
752
- | 'x' ( c_any - xdigit )
753
- % {
754
- diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
755
- }
756
-
757
- # \u263a
758
- | 'u' xdigit{4}
759
- % { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
760
-
761
- # \u123
762
- | 'u' xdigit{0,3}
763
- % {
764
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
765
- }
766
-
767
- # u{not hex} or u{}
768
- | 'u{' ( c_any - xdigit - [ \t}] )* '}'
769
- % {
770
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
771
- }
772
-
773
- # \u{ \t 123 \t 456 \t\t }
774
- | 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
775
- (
776
- ( xdigit{1,6} [ \t]* '}'
777
- %unicode_points
778
- )
779
- |
780
- ( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
781
- | ( c_any - [ \t}] )* c_eof
782
- | xdigit{7,}
783
- ) % {
784
- diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
785
- }
786
- )
787
-
788
- # \C-\a \cx
789
- | ( 'C-' | 'c' ) escaped_nl?
790
- maybe_escaped_ctrl_char
791
-
792
- # \M-a
793
- | 'M-' escaped_nl?
794
- maybe_escaped_char
795
- %slash_m_char
796
-
797
- # \C-\M-f \M-\cf \c\M-f
798
- | ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
799
- | 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
800
- maybe_escaped_ctrl_char
801
- %slash_m_char
802
-
803
- | 'C' c_any %invalid_complex_escape
804
- | 'M' c_any %invalid_complex_escape
805
- | ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
806
-
807
- | ( c_any - [0-7xuCMc] ) %unescape_char
808
-
809
- | c_eof % {
810
- diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
811
- }
812
- );
813
-
814
- # Use rules in form of `e_bs escape' when you need to parse a sequence.
815
- e_bs = '\\' % {
816
- @escape_s = p
817
- @escape = nil
818
- };
819
-
820
- #
821
- # === STRING AND HEREDOC PARSING ===
822
- #
823
-
824
- # Heredoc parsing is quite a complex topic. First, consider that heredocs
825
- # can be arbitrarily nested. For example:
826
- #
827
- # puts <<CODE
828
- # the result is: #{<<RESULT.inspect
829
- # i am a heredoc
830
- # RESULT
831
- # }
832
- # CODE
833
- #
834
- # which, incidentally, evaluates to:
835
- #
836
- # the result is: " i am a heredoc\n"
837
- #
838
- # To parse them, lexer refers to two kinds (remember, nested heredocs)
839
- # of positions in the input stream, namely heredoc_e
840
- # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
841
- #
842
- # heredoc_e is simply contained inside the corresponding Literal, and
843
- # when the heredoc is closed, the lexing is restarted from that position.
844
- #
845
- # @herebody_s is quite more complex. First, @herebody_s changes after each
846
- # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
847
- # contains the current line, and also when a heredoc is started, @herebody_s
848
- # contains the position from which the heredoc will be lexed.
849
- #
850
- # Second, as (insanity) there are nested heredocs, we need to maintain a
851
- # stack of these positions. Each time #push_literal is called, it saves current
852
- # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
853
- # containing another heredocs) is closed, the previous value is restored.
854
-
855
- e_heredoc_nl = c_nl % {
856
- # After every heredoc was parsed, @herebody_s contains the
857
- # position of next token after all heredocs.
858
- if @herebody_s
859
- p = @herebody_s
860
- @herebody_s = nil
861
- end
862
- };
863
-
864
- action extend_string {
865
- string = tok
866
-
867
- # tLABEL_END is only possible in non-cond context on >= 2.2
868
- if @version >= 22 && !@cond.active?
869
- lookahead = @source_buffer.slice(@te...@te+2)
870
- end
871
-
872
- current_literal = literal
873
- if !current_literal.heredoc? &&
874
- (token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
875
- if token[0] == :tLABEL_END
876
- p += 1
877
- pop_literal
878
- fnext expr_labelarg;
879
- else
880
- fnext *pop_literal;
881
- end
882
- fbreak;
883
- else
884
- current_literal.extend_string(string, @ts, @te)
885
- end
886
- }
887
-
888
- action extend_string_escaped {
889
- current_literal = literal
890
- # Get the first character after the backslash.
891
- escaped_char = @source_buffer.slice(@escape_s).chr
892
-
893
- if current_literal.munge_escape? escaped_char
894
- # If this particular literal uses this character as an opening
895
- # or closing delimiter, it is an escape sequence for that
896
- # particular character. Write it without the backslash.
897
-
898
- if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
899
- # Regular expressions should include escaped delimiters in their
900
- # escaped form, except when the escaped character is
901
- # a closing delimiter but not a regexp metacharacter.
902
- #
903
- # The backslash itself cannot be used as a closing delimiter
904
- # at the same time as an escape symbol, but it is always munged,
905
- # so this branch also executes for the non-closing-delimiter case
906
- # for the backslash.
907
- current_literal.extend_string(tok, @ts, @te)
908
- else
909
- current_literal.extend_string(escaped_char, @ts, @te)
910
- end
911
- else
912
- # It does not. So this is an actual escape sequence, yay!
913
- if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
914
- # Squiggly heredocs like
915
- # <<~-HERE
916
- # 1\
917
- # 2
918
- # HERE
919
- # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
920
- # This information is emitted as is, without escaping,
921
- # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
922
- current_literal.extend_string(tok, @ts, @te)
923
- elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
924
- # Heredocs, regexp and a few other types of literals support line
925
- # continuation via \\\n sequence. The code like
926
- # "a\
927
- # b"
928
- # must be parsed as "ab"
929
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
930
- elsif current_literal.regexp?
931
- # Regular expressions should include escape sequences in their
932
- # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
933
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
934
- else
935
- current_literal.extend_string(@escape || tok, @ts, @te)
936
- end
937
- end
938
- }
939
-
940
- # Extend a string with a newline or a EOF character.
941
- # As heredoc closing line can immediately precede EOF, this action
942
- # has to handle such case specially.
943
- action extend_string_eol {
944
- current_literal = literal
945
- if @te == pe
946
- diagnostic :fatal, :string_eof, nil,
947
- range(current_literal.str_s, current_literal.str_s + 1)
948
- end
949
-
950
- if current_literal.heredoc?
951
- line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
952
-
953
- if version?(18, 19, 20)
954
- # See ruby:c48b4209c
955
- line = line.gsub(/\r.*$/, ''.freeze)
956
- end
957
-
958
- # Try ending the heredoc with the complete most recently
959
- # scanned line. @herebody_s always refers to the start of such line.
960
- if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
961
- # Adjust @herebody_s to point to the next line.
962
- @herebody_s = @te
963
-
964
- # Continue regular lexing after the heredoc reference (<<END).
965
- p = current_literal.heredoc_e - 1
966
- fnext *pop_literal; fbreak;
967
- else
968
- # Calculate indentation level for <<~HEREDOCs.
969
- current_literal.infer_indent_level(line)
970
-
971
- # Ditto.
972
- @herebody_s = @te
973
- end
974
- else
975
- # Try ending the literal with a newline.
976
- if current_literal.nest_and_try_closing(tok, @ts, @te)
977
- fnext *pop_literal; fbreak;
978
- end
979
-
980
- if @herebody_s
981
- # This is a regular literal intertwined with a heredoc. Like:
982
- #
983
- # p <<-foo+"1
984
- # bar
985
- # foo
986
- # 2"
987
- #
988
- # which, incidentally, evaluates to "bar\n1\n2".
989
- p = @herebody_s - 1
990
- @herebody_s = nil
991
- end
992
- end
993
-
994
- if current_literal.words? && !eof_codepoint?(@source_pts[p])
995
- current_literal.extend_space @ts, @te
996
- else
997
- # A literal newline is appended if the heredoc was _not_ closed
998
- # this time (see fbreak above). See also Literal#nest_and_try_closing
999
- # for rationale of calling #flush_string here.
1000
- current_literal.extend_string tok, @ts, @te
1001
- current_literal.flush_string
1002
- end
1003
- }
1004
-
1005
- action extend_string_space {
1006
- literal.extend_space @ts, @te
1007
- }
1008
-
1009
- #
1010
- # === INTERPOLATION PARSING ===
1011
- #
1012
-
1013
- # Interpolations with immediate variable names simply call into
1014
- # the corresponding machine.
1015
-
1016
- interp_var = '#' ( global_var | class_var_v | instance_var_v );
1017
-
1018
- action extend_interp_var {
1019
- current_literal = literal
1020
- current_literal.flush_string
1021
- current_literal.extend_content
1022
-
1023
- emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
1024
-
1025
- p = @ts
1026
- fcall expr_variable;
1027
- }
1028
-
1029
- # Special case for Ruby > 2.7
1030
- # If interpolated instance/class variable starts with a digit we parse it as a plain substring
1031
- # However, "#$1" is still a regular interpolation
1032
- interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
1033
-
1034
- action extend_interp_digit_var {
1035
- if @version >= 27
1036
- literal.extend_string(tok, @ts, @te)
1037
- else
1038
- message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
1039
- diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
1040
- end
1041
- }
1042
-
1043
- # Interpolations with code blocks must match nested curly braces, as
1044
- # interpolation ending is ambiguous with a block ending. So, every
1045
- # opening and closing brace should be matched with e_[lr]brace rules,
1046
- # which automatically perform the counting.
1047
- #
1048
- # Note that interpolations can themselves be nested, so brace balance
1049
- # is tied to the innermost literal.
1050
- #
1051
- # Also note that literals themselves should not use e_[lr]brace rules
1052
- # when matching their opening and closing delimiters, as the amount of
1053
- # braces inside the characters of a string literal is independent.
1054
-
1055
- interp_code = '#{';
1056
-
1057
- e_lbrace = '{' % {
1058
- @cond.push(false); @cmdarg.push(false)
1059
-
1060
- current_literal = literal
1061
- if current_literal
1062
- current_literal.start_interp_brace
1063
- end
1064
- };
1065
-
1066
- e_rbrace = '}' % {
1067
- current_literal = literal
1068
- if current_literal
1069
- if current_literal.end_interp_brace_and_try_closing
1070
- if version?(18, 19)
1071
- emit(:tRCURLY, '}'.freeze, p - 1, p)
1072
- @cond.lexpop
1073
- @cmdarg.lexpop
1074
- else
1075
- emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
1076
- end
1077
-
1078
- if current_literal.saved_herebody_s
1079
- @herebody_s = current_literal.saved_herebody_s
1080
- end
1081
-
1082
-
1083
- fhold;
1084
- fnext *next_state_for_literal(current_literal);
1085
- fbreak;
1086
- end
1087
- end
1088
-
1089
- @paren_nest -= 1
1090
- };
1091
-
1092
- action extend_interp_code {
1093
- current_literal = literal
1094
- current_literal.flush_string
1095
- current_literal.extend_content
1096
-
1097
- emit(:tSTRING_DBEG, '#{'.freeze)
1098
-
1099
- if current_literal.heredoc?
1100
- current_literal.saved_herebody_s = @herebody_s
1101
- @herebody_s = nil
1102
- end
1103
-
1104
- current_literal.start_interp_brace
1105
- @command_start = true
1106
- fnext expr_value;
1107
- fbreak;
1108
- }
1109
-
1110
- # Actual string parsers are simply combined from the primitives defined
1111
- # above.
1112
-
1113
- interp_words := |*
1114
- interp_code => extend_interp_code;
1115
- interp_digit_var => extend_interp_digit_var;
1116
- interp_var => extend_interp_var;
1117
- e_bs escape => extend_string_escaped;
1118
- c_space+ => extend_string_space;
1119
- c_eol => extend_string_eol;
1120
- c_any => extend_string;
1121
- *|;
1122
-
1123
- interp_string := |*
1124
- interp_code => extend_interp_code;
1125
- interp_digit_var => extend_interp_digit_var;
1126
- interp_var => extend_interp_var;
1127
- e_bs escape => extend_string_escaped;
1128
- c_eol => extend_string_eol;
1129
- c_any => extend_string;
1130
- *|;
1131
-
1132
- plain_words := |*
1133
- e_bs c_any => extend_string_escaped;
1134
- c_space+ => extend_string_space;
1135
- c_eol => extend_string_eol;
1136
- c_any => extend_string;
1137
- *|;
1138
-
1139
- plain_string := |*
1140
- '\\' c_nl => extend_string_eol;
1141
- e_bs c_any => extend_string_escaped;
1142
- c_eol => extend_string_eol;
1143
- c_any => extend_string;
1144
- *|;
1145
-
1146
- interp_backslash_delimited := |*
1147
- interp_code => extend_interp_code;
1148
- interp_digit_var => extend_interp_digit_var;
1149
- interp_var => extend_interp_var;
1150
- c_eol => extend_string_eol;
1151
- c_any => extend_string;
1152
- *|;
1153
-
1154
- plain_backslash_delimited := |*
1155
- c_eol => extend_string_eol;
1156
- c_any => extend_string;
1157
- *|;
1158
-
1159
- interp_backslash_delimited_words := |*
1160
- interp_code => extend_interp_code;
1161
- interp_digit_var => extend_interp_digit_var;
1162
- interp_var => extend_interp_var;
1163
- c_space+ => extend_string_space;
1164
- c_eol => extend_string_eol;
1165
- c_any => extend_string;
1166
- *|;
1167
-
1168
- plain_backslash_delimited_words := |*
1169
- c_space+ => extend_string_space;
1170
- c_eol => extend_string_eol;
1171
- c_any => extend_string;
1172
- *|;
1173
-
1174
- regexp_modifiers := |*
1175
- [A-Za-z]+
1176
- => {
1177
- unknown_options = tok.scan(/[^imxouesn]/)
1178
- if unknown_options.any?
1179
- diagnostic :error, :regexp_options,
1180
- { :options => unknown_options.join }
1181
- end
1182
-
1183
- emit(:tREGEXP_OPT)
1184
- fnext expr_end;
1185
- fbreak;
1186
- };
1187
-
1188
- any
1189
- => {
1190
- emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
1191
- fhold;
1192
- fgoto expr_end;
1193
- };
1194
- *|;
1195
-
1196
- #
1197
- # === WHITESPACE HANDLING ===
1198
- #
1199
-
1200
- # Various contexts in Ruby allow various kinds of whitespace
1201
- # to be used. They are grouped to clarify the lexing machines
1202
- # and ease collection of comments.
1203
-
1204
- # A line of code with inline #comment at end is always equivalent
1205
- # to a line of code ending with just a newline, so an inline
1206
- # comment is deemed equivalent to non-newline whitespace
1207
- # (c_space character class).
1208
-
1209
- w_space =
1210
- c_space+
1211
- | '\\' e_heredoc_nl
1212
- ;
1213
-
1214
- w_comment =
1215
- '#' %{ @sharp_s = p - 1 }
1216
- # The (p == pe) condition compensates for added "\0" and
1217
- # the way Ragel handles EOF.
1218
- c_line* %{ emit_comment(@sharp_s, p == pe ? p - 2 : p) }
1219
- ;
1220
-
1221
- w_space_comment =
1222
- w_space
1223
- | w_comment
1224
- ;
1225
-
1226
- # A newline in non-literal context always interoperates with
1227
- # here document logic and can always be escaped by a backslash,
1228
- # still interoperating with here document logic in the same way,
1229
- # yet being invisible to anything else.
1230
- #
1231
- # To demonstrate:
1232
- #
1233
- # foo = <<FOO \
1234
- # bar
1235
- # FOO
1236
- # + 2
1237
- #
1238
- # is equivalent to `foo = "bar\n" + 2`.
1239
-
1240
- w_newline =
1241
- e_heredoc_nl;
1242
-
1243
- w_any =
1244
- w_space
1245
- | w_comment
1246
- | w_newline
1247
- ;
1248
-
1249
-
1250
- #
1251
- # === EXPRESSION PARSING ===
1252
- #
1253
-
1254
- # These rules implement a form of manually defined lookahead.
1255
- # The default longest-match scanning does not work here due
1256
- # to sheer ambiguity.
1257
-
1258
- ambiguous_fid_suffix = # actual parsed
1259
- [?!] %{ tm = p } | # a? a?
1260
- [?!]'=' %{ tm = p - 2 } # a!=b a != b
1261
- ;
1262
-
1263
- ambiguous_ident_suffix = # actual parsed
1264
- ambiguous_fid_suffix |
1265
- '=' %{ tm = p } | # a= a=
1266
- '==' %{ tm = p - 2 } | # a==b a == b
1267
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
1268
- '=>' %{ tm = p - 2 } | # a=>b a => b
1269
- '===' %{ tm = p - 3 } # a===b a === b
1270
- ;
1271
-
1272
- ambiguous_symbol_suffix = # actual parsed
1273
- ambiguous_ident_suffix |
1274
- '==>' %{ tm = p - 2 } # :a==>b :a= => b
1275
- ;
1276
-
1277
- # Ambiguous with 1.9 hash labels.
1278
- ambiguous_const_suffix = # actual parsed
1279
- '::' %{ tm = p - 2 } # A::B A :: B
1280
- ;
1281
-
1282
- # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
1283
- # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
1284
-
1285
- e_lbrack = '[' % {
1286
- @cond.push(false); @cmdarg.push(false)
1287
-
1288
- @paren_nest += 1
1289
- };
1290
-
1291
- e_rbrack = ']' % {
1292
- @paren_nest -= 1
1293
- };
1294
-
1295
- # Ruby 1.9 lambdas require parentheses counting in order to
1296
- # emit correct opening kDO/tLBRACE.
1297
-
1298
- e_lparen = '(' % {
1299
- @cond.push(false); @cmdarg.push(false)
1300
-
1301
- @paren_nest += 1
1302
-
1303
- if version?(18)
1304
- @command_start = true
1305
- end
1306
- };
1307
-
1308
- e_rparen = ')' % {
1309
- @paren_nest -= 1
1310
- };
1311
-
1312
- # Ruby is context-sensitive wrt/ local identifiers.
1313
- action local_ident {
1314
- emit(:tIDENTIFIER)
1315
-
1316
- if !@static_env.nil? && @static_env.declared?(tok)
1317
- fnext expr_endfn; fbreak;
1318
- else
1319
- fnext *arg_or_cmdarg(cmd_state); fbreak;
1320
- end
1321
- }
1322
-
1323
- # Variable lexing code is accessed from both expressions and
1324
- # string interpolation related code.
1325
- #
1326
- expr_variable := |*
1327
- global_var
1328
- => {
1329
- if tok =~ /^\$([1-9][0-9]*)$/
1330
- emit(:tNTH_REF, tok(@ts + 1).to_i)
1331
- elsif tok =~ /^\$([&`'+])$/
1332
- emit(:tBACK_REF)
1333
- else
1334
- emit(:tGVAR)
1335
- end
1336
-
1337
- fnext *stack_pop; fbreak;
1338
- };
1339
-
1340
- class_var_v
1341
- => {
1342
- if tok =~ /^@@[0-9]/
1343
- diagnostic :error, :cvar_name, { :name => tok }
1344
- end
1345
-
1346
- emit(:tCVAR)
1347
- fnext *stack_pop; fbreak;
1348
- };
1349
-
1350
- instance_var_v
1351
- => {
1352
- if tok =~ /^@[0-9]/
1353
- diagnostic :error, :ivar_name, { :name => tok }
1354
- end
1355
-
1356
- emit(:tIVAR)
1357
- fnext *stack_pop; fbreak;
1358
- };
1359
- *|;
1360
-
1361
- # Literal function name in definition (e.g. `def class`).
1362
- # Keywords are returned as their respective tokens; this is used
1363
- # to support singleton def `def self.foo`. Global variables are
1364
- # returned as `tGVAR`; this is used in global variable alias
1365
- # statements `alias $a $b`. Symbols are returned verbatim; this
1366
- # is used in `alias :a :"b#{foo}"` and `undef :a`.
1367
- #
1368
- # Transitions to `expr_endfn` afterwards.
1369
- #
1370
- expr_fname := |*
1371
- keyword
1372
- => { emit_table(KEYWORDS_BEGIN);
1373
- fnext expr_endfn; fbreak; };
1374
-
1375
- constant
1376
- => { emit(:tCONSTANT)
1377
- fnext expr_endfn; fbreak; };
1378
-
1379
- bareword [?=!]?
1380
- => { emit(:tIDENTIFIER)
1381
- fnext expr_endfn; fbreak; };
1382
-
1383
- global_var
1384
- => { p = @ts - 1
1385
- fnext expr_end; fcall expr_variable; };
1386
-
1387
- # If the handling was to be delegated to expr_end,
1388
- # these cases would transition to something else than
1389
- # expr_endfn, which is incorrect.
1390
- operator_fname |
1391
- operator_arithmetic |
1392
- operator_rest
1393
- => { emit_table(PUNCTUATION)
1394
- fnext expr_endfn; fbreak; };
1395
-
1396
- '::'
1397
- => { fhold; fhold; fgoto expr_end; };
1398
-
1399
- ':'
1400
- => { fhold; fgoto expr_beg; };
1401
-
1402
- '%s' c_any
1403
- => {
1404
- if version?(23)
1405
- type, delimiter = tok[0..-2], tok[-1].chr
1406
- fgoto *push_literal(type, delimiter, @ts);
1407
- else
1408
- p = @ts - 1
1409
- fgoto expr_end;
1410
- end
1411
- };
1412
-
1413
- w_any;
1414
-
1415
- c_any
1416
- => { fhold; fgoto expr_end; };
1417
-
1418
- c_eof => do_eof;
1419
- *|;
1420
-
1421
- # After literal function name in definition. Behaves like `expr_end`,
1422
- # but allows a tLABEL.
1423
- #
1424
- # Transitions to `expr_end` afterwards.
1425
- #
1426
- expr_endfn := |*
1427
- label ( any - ':' )
1428
- => { emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1429
- fhold; fnext expr_labelarg; fbreak; };
1430
-
1431
- w_space_comment;
1432
-
1433
- c_any
1434
- => { fhold; fgoto expr_end; };
1435
-
1436
- c_eof => do_eof;
1437
- *|;
1438
-
1439
- # Literal function name in method call (e.g. `a.class`).
1440
- #
1441
- # Transitions to `expr_arg` afterwards.
1442
- #
1443
- expr_dot := |*
1444
- constant
1445
- => { emit(:tCONSTANT)
1446
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1447
-
1448
- call_or_var
1449
- => { emit(:tIDENTIFIER)
1450
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1451
-
1452
- bareword ambiguous_fid_suffix
1453
- => { emit(:tFID, tok(@ts, tm), @ts, tm)
1454
- fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
1455
-
1456
- # See the comment in `expr_fname`.
1457
- operator_fname |
1458
- operator_arithmetic |
1459
- operator_rest
1460
- => { emit_table(PUNCTUATION)
1461
- fnext expr_arg; fbreak; };
1462
-
1463
- w_any;
1464
-
1465
- c_any
1466
- => { fhold; fgoto expr_end; };
1467
-
1468
- c_eof => do_eof;
1469
- *|;
1470
-
1471
- # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
1472
- # is consumed; the current expression is a command or method call.
1473
- #
1474
- expr_arg := |*
1475
- #
1476
- # COMMAND MODE SPECIFIC TOKENS
1477
- #
1478
-
1479
- # cmd (1 + 2)
1480
- # See below the rationale about expr_endarg.
1481
- w_space+ e_lparen
1482
- => {
1483
- if version?(18)
1484
- emit(:tLPAREN2, '('.freeze, @te - 1, @te)
1485
- fnext expr_value; fbreak;
1486
- else
1487
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1488
- fnext expr_beg; fbreak;
1489
- end
1490
- };
1491
-
1492
- # meth(1 + 2)
1493
- # Regular method call.
1494
- e_lparen
1495
- => { emit(:tLPAREN2, '('.freeze)
1496
- fnext expr_beg; fbreak; };
1497
-
1498
- # meth [...]
1499
- # Array argument. Compare with indexing `meth[...]`.
1500
- w_space+ e_lbrack
1501
- => { emit(:tLBRACK, '['.freeze, @te - 1, @te)
1502
- fnext expr_beg; fbreak; };
1503
-
1504
- # cmd {}
1505
- # Command: method call without parentheses.
1506
- w_space* e_lbrace
1507
- => {
1508
- if @lambda_stack.last == @paren_nest
1509
- @lambda_stack.pop
1510
- emit(:tLAMBEG, '{'.freeze, @te - 1, @te)
1511
- else
1512
- emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1513
- end
1514
- @command_start = true
1515
- @paren_nest += 1
1516
- fnext expr_value; fbreak;
1517
- };
1518
-
1519
- #
1520
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1521
- #
1522
-
1523
- # a??
1524
- # Ternary operator
1525
- '?' c_space_nl
1526
- => {
1527
- # Unlike expr_beg as invoked in the next rule, do not warn
1528
- p = @ts - 1
1529
- fgoto expr_end;
1530
- };
1531
-
1532
- # a ?b, a? ?
1533
- # Character literal or ternary operator
1534
- w_space* '?'
1535
- => { fhold; fgoto expr_beg; };
1536
-
1537
- # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1538
- # a /foo/ (but not "a / foo" or "a /=foo")
1539
- # a <<HEREDOC
1540
- w_space+ %{ tm = p }
1541
- ( [%/] ( c_any - c_space_nl - '=' ) # /
1542
- | '<<'
1543
- )
1544
- => {
1545
- if tok(tm, tm + 1) == '/'.freeze
1546
- # Ambiguous regexp literal.
1547
- diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
1548
- end
1549
-
1550
- p = tm - 1
1551
- fgoto expr_beg;
1552
- };
1553
-
1554
- # x *1
1555
- # Ambiguous splat, kwsplat or block-pass.
1556
- w_space+ %{ tm = p } ( '+' | '-' | '*' | '&' | '**' )
1557
- => {
1558
- diagnostic :warning, :ambiguous_prefix, { :prefix => tok(tm, @te) },
1559
- range(tm, @te)
1560
-
1561
- p = tm - 1
1562
- fgoto expr_beg;
1563
- };
1564
-
1565
- # x ::Foo
1566
- # Ambiguous toplevel constant access.
1567
- w_space+ '::'
1568
- => { fhold; fhold; fgoto expr_beg; };
1569
-
1570
- # x:b
1571
- # Symbol.
1572
- w_space* ':'
1573
- => { fhold; fgoto expr_beg; };
1574
-
1575
- w_space+ label
1576
- => { p = @ts - 1; fgoto expr_beg; };
1577
-
1578
- #
1579
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1580
- #
1581
-
1582
- # a ? b
1583
- # Ternary operator.
1584
- w_space+ %{ tm = p } '?' c_space_nl
1585
- => { p = tm - 1; fgoto expr_end; };
1586
-
1587
- # x + 1: Binary operator or operator-assignment.
1588
- w_space* operator_arithmetic
1589
- ( '=' | c_space_nl )? |
1590
- # x rescue y: Modifier keyword.
1591
- w_space* keyword_modifier |
1592
- # a &. b: Safe navigation operator.
1593
- w_space* '&.' |
1594
- # Miscellanea.
1595
- w_space* punctuation_end
1596
- => {
1597
- p = @ts - 1
1598
- fgoto expr_end;
1599
- };
1600
-
1601
- w_space;
1602
-
1603
- w_comment
1604
- => { fgoto expr_end; };
1605
-
1606
- w_newline
1607
- => { fhold; fgoto expr_end; };
1608
-
1609
- c_any
1610
- => { fhold; fgoto expr_beg; };
1611
-
1612
- c_eof => do_eof;
1613
- *|;
1614
-
1615
- # The previous token was an identifier which was seen while in the
1616
- # command mode (that is, the state at the beginning of #advance was
1617
- # expr_value). This state is very similar to expr_arg, but disambiguates
1618
- # two very rare and specific condition:
1619
- # * In 1.8 mode, "foo (lambda do end)".
1620
- # * In 1.9+ mode, "f x: -> do foo do end end".
1621
- expr_cmdarg := |*
1622
- w_space+ e_lparen
1623
- => {
1624
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1625
- if version?(18)
1626
- fnext expr_value; fbreak;
1627
- else
1628
- fnext expr_beg; fbreak;
1629
- end
1630
- };
1631
-
1632
- w_space* 'do'
1633
- => {
1634
- if @cond.active?
1635
- emit(:kDO_COND, 'do'.freeze, @te - 2, @te)
1636
- else
1637
- emit(:kDO, 'do'.freeze, @te - 2, @te)
1638
- end
1639
- fnext expr_value; fbreak;
1640
- };
1641
-
1642
- c_any |
1643
- # Disambiguate with the `do' rule above.
1644
- w_space* bareword |
1645
- w_space* label
1646
- => { p = @ts - 1
1647
- fgoto expr_arg; };
1648
-
1649
- c_eof => do_eof;
1650
- *|;
1651
-
1652
- # The rationale for this state is pretty complex. Normally, if an argument
1653
- # is passed to a command and then there is a block (tLCURLY...tRCURLY),
1654
- # the block is attached to the innermost argument (`f` in `m f {}`), or it
1655
- # is a parse error (`m 1 {}`). But there is a special case for passing a single
1656
- # primary expression grouped with parentheses: if you write `m (1) {}` or
1657
- # (2.0 only) `m () {}`, then the block is attached to `m`.
1658
- #
1659
- # Thus, we recognize the opening `(` of a command (remember, a command is
1660
- # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
1661
- # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
1662
- # lexer's state to `expr_endarg`, which makes it emit the possibly following
1663
- # `{` as `tLBRACE_ARG`.
1664
- #
1665
- # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1666
- # `do` (as `kDO_BLOCK` in `expr_beg`).
1667
- expr_endarg := |*
1668
- e_lbrace
1669
- => {
1670
- if @lambda_stack.last == @paren_nest
1671
- @lambda_stack.pop
1672
- emit(:tLAMBEG, '{'.freeze)
1673
- else
1674
- emit(:tLBRACE_ARG, '{'.freeze)
1675
- end
1676
- @paren_nest += 1
1677
- @command_start = true
1678
- fnext expr_value; fbreak;
1679
- };
1680
-
1681
- 'do'
1682
- => { emit_do(true)
1683
- fnext expr_value; fbreak; };
1684
-
1685
- w_space_comment;
1686
-
1687
- c_any
1688
- => { fhold; fgoto expr_end; };
1689
-
1690
- c_eof => do_eof;
1691
- *|;
1692
-
1693
- # The rationale for this state is that several keywords accept value
1694
- # (i.e. should transition to `expr_beg`), do not accept it like a command
1695
- # (i.e. not an `expr_arg`), and must behave like a statement, that is,
1696
- # accept a modifier if/while/etc.
1697
- #
1698
- expr_mid := |*
1699
- keyword_modifier
1700
- => { emit_table(KEYWORDS)
1701
- fnext expr_beg; fbreak; };
1702
-
1703
- bareword
1704
- => { p = @ts - 1; fgoto expr_beg; };
1705
-
1706
- w_space_comment;
1707
-
1708
- w_newline
1709
- => { fhold; fgoto expr_end; };
1710
-
1711
- c_any
1712
- => { fhold; fgoto expr_beg; };
1713
-
1714
- c_eof => do_eof;
1715
- *|;
1716
-
1717
- # Beginning of an expression.
1718
- #
1719
- # Don't fallthrough to this state from `c_any`; make sure to handle
1720
- # `c_space* c_nl` and let `expr_end` handle the newline.
1721
- # Otherwise code like `f\ndef x` gets glued together and the parser
1722
- # explodes.
1723
- #
1724
- expr_beg := |*
1725
- # +5, -5, - 5
1726
- [+\-] w_any* [0-9]
1727
- => {
1728
- emit(:tUNARY_NUM, tok(@ts, @ts + 1), @ts, @ts + 1)
1729
- fhold; fnext expr_end; fbreak;
1730
- };
1731
-
1732
- # splat *a
1733
- '*'
1734
- => { emit(:tSTAR, '*'.freeze)
1735
- fbreak; };
1736
-
1737
- #
1738
- # STRING AND REGEXP LITERALS
1739
- #
1740
-
1741
- # /regexp/oui
1742
- # /=/ (disambiguation with /=)
1743
- '/' c_any
1744
- => {
1745
- type = delimiter = tok[0].chr
1746
- fhold; fgoto *push_literal(type, delimiter, @ts);
1747
- };
1748
-
1749
- # %<string>
1750
- '%' ( any - [A-Za-z] )
1751
- => {
1752
- type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
1753
- fgoto *push_literal(type, delimiter, @ts);
1754
- };
1755
-
1756
- # %w(we are the people)
1757
- '%' [A-Za-z]+ c_any
1758
- => {
1759
- type, delimiter = tok[0..-2], tok[-1].chr
1760
- fgoto *push_literal(type, delimiter, @ts);
1761
- };
1762
-
1763
- '%' c_eof
1764
- => {
1765
- diagnostic :fatal, :string_eof, nil, range(@ts, @ts + 1)
1766
- };
1767
-
1768
- # Heredoc start.
1769
- # <<END | <<'END' | <<"END" | <<`END` |
1770
- # <<-END | <<-'END' | <<-"END" | <<-`END` |
1771
- # <<~END | <<~'END' | <<~"END" | <<~`END`
1772
- '<<' [~\-]?
1773
- ( '"' ( any - '"' )* '"'
1774
- | "'" ( any - "'" )* "'"
1775
- | "`" ( any - "`" )* "`"
1776
- | bareword ) % { heredoc_e = p }
1777
- c_line* c_nl % { new_herebody_s = p }
1778
- => {
1779
- tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/m
1780
-
1781
- indent = !$1.empty? || !$2.empty?
1782
- dedent_body = !$2.empty?
1783
- type = $3.empty? ? '<<"'.freeze : ('<<'.freeze + $3)
1784
- delimiter = $4
1785
-
1786
- if @version >= 27
1787
- if delimiter.count("\n") > 0 || delimiter.count("\r") > 0
1788
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1789
- end
1790
- elsif @version >= 24
1791
- if delimiter.count("\n") > 0
1792
- if delimiter.end_with?("\n")
1793
- diagnostic :warning, :heredoc_id_ends_with_nl, nil, range(@ts, @ts + 1)
1794
- delimiter = delimiter.rstrip
1795
- else
1796
- diagnostic :fatal, :heredoc_id_has_newline, nil, range(@ts, @ts + 1)
1797
- end
1798
- end
1799
- end
1800
-
1801
- if dedent_body && version?(18, 19, 20, 21, 22)
1802
- emit(:tLSHFT, '<<'.freeze, @ts, @ts + 2)
1803
- p = @ts + 1
1804
- fnext expr_beg; fbreak;
1805
- else
1806
- fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1807
-
1808
- @herebody_s ||= new_herebody_s
1809
- p = @herebody_s - 1
1810
- end
1811
- };
1812
-
1813
- # Escaped unterminated heredoc start
1814
- # <<'END | <<"END | <<`END |
1815
- # <<-'END | <<-"END | <<-`END |
1816
- # <<~'END | <<~"END | <<~`END
1817
- #
1818
- # If the heredoc is terminated the rule above should handle it
1819
- '<<' [~\-]?
1820
- ('"' (any - c_nl - '"')*
1821
- |"'" (any - c_nl - "'")*
1822
- |"`" (any - c_nl - "`")
1823
- )
1824
- => {
1825
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1826
- };
1827
-
1828
- #
1829
- # SYMBOL LITERALS
1830
- #
1831
-
1832
- # :&&, :||
1833
- ':' ('&&' | '||') => {
1834
- fhold; fhold;
1835
- emit(:tSYMBEG, tok(@ts, @ts + 1), @ts, @ts + 1)
1836
- fgoto expr_fname;
1837
- };
1838
-
1839
- # :"bar", :'baz'
1840
- ':' ['"] # '
1841
- => {
1842
- type, delimiter = tok, tok[-1].chr
1843
- fgoto *push_literal(type, delimiter, @ts);
1844
- };
1845
-
1846
- # :!@ is :!
1847
- # :~@ is :~
1848
- ':' [!~] '@'
1849
- => {
1850
- emit(:tSYMBOL, tok(@ts + 1, @ts + 2))
1851
- fnext expr_end; fbreak;
1852
- };
1853
-
1854
- ':' bareword ambiguous_symbol_suffix
1855
- => {
1856
- emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1857
- p = tm - 1
1858
- fnext expr_end; fbreak;
1859
- };
1860
-
1861
- ':' ( bareword | global_var | class_var | instance_var |
1862
- operator_fname | operator_arithmetic | operator_rest )
1863
- => {
1864
- emit(:tSYMBOL, tok(@ts + 1), @ts)
1865
- fnext expr_end; fbreak;
1866
- };
1867
-
1868
- ':' ( '@' %{ tm = p - 1; diag_msg = :ivar_name }
1869
- | '@@' %{ tm = p - 2; diag_msg = :cvar_name }
1870
- ) [0-9]*
1871
- => {
1872
- if @version >= 27
1873
- diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
1874
- else
1875
- emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
1876
- p = @ts
1877
- end
1878
-
1879
- fnext expr_end; fbreak;
1880
- };
1881
-
1882
- #
1883
- # AMBIGUOUS TERNARY OPERATOR
1884
- #
1885
-
1886
- # Character constant, like ?a, ?\n, ?\u1000, and so on
1887
- # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1888
- '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
1889
- | (c_any - c_space_nl - e_bs) % { @escape = nil }
1890
- )
1891
- => {
1892
- value = @escape || tok(@ts + 1)
1893
-
1894
- if version?(18)
1895
- emit(:tINTEGER, value.getbyte(0))
1896
- else
1897
- emit(:tCHARACTER, value)
1898
- end
1899
-
1900
- fnext expr_end; fbreak;
1901
- };
1902
-
1903
- '?' c_space_nl
1904
- => {
1905
- escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1906
- "\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
1907
- diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
1908
-
1909
- p = @ts - 1
1910
- fgoto expr_end;
1911
- };
1912
-
1913
- '?' c_eof
1914
- => {
1915
- diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
1916
- };
1917
-
1918
- # f ?aa : b: Disambiguate with a character literal.
1919
- '?' [A-Za-z_] bareword
1920
- => {
1921
- p = @ts - 1
1922
- fgoto expr_end;
1923
- };
1924
-
1925
- #
1926
- # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1927
- #
1928
-
1929
- # Ruby >= 2.7 emits it as two tPIPE terminals
1930
- # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1931
- '||'
1932
- => {
1933
- if @version >= 27
1934
- emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1935
- fhold;
1936
- fnext expr_beg; fbreak;
1937
- else
1938
- p -= 2
1939
- fgoto expr_end;
1940
- end
1941
- };
1942
-
1943
- #
1944
- # KEYWORDS AND PUNCTUATION
1945
- #
1946
-
1947
- # a({b=>c})
1948
- e_lbrace
1949
- => {
1950
- if @lambda_stack.last == @paren_nest
1951
- @lambda_stack.pop
1952
- @command_start = true
1953
- emit(:tLAMBEG, '{'.freeze)
1954
- else
1955
- emit(:tLBRACE, '{'.freeze)
1956
- end
1957
- @paren_nest += 1
1958
- fbreak;
1959
- };
1960
-
1961
- # a([1, 2])
1962
- e_lbrack
1963
- => { emit(:tLBRACK, '['.freeze)
1964
- fbreak; };
1965
-
1966
- # a()
1967
- e_lparen
1968
- => { emit(:tLPAREN, '('.freeze)
1969
- fbreak; };
1970
-
1971
- # a(+b)
1972
- punctuation_begin
1973
- => { emit_table(PUNCTUATION_BEGIN)
1974
- fbreak; };
1975
-
1976
- # rescue Exception => e: Block rescue.
1977
- # Special because it should transition to expr_mid.
1978
- 'rescue' %{ tm = p } '=>'?
1979
- => { emit(:kRESCUE, 'rescue'.freeze, @ts, tm)
1980
- p = tm - 1
1981
- fnext expr_mid; fbreak; };
1982
-
1983
- # if a: Statement if.
1984
- keyword_modifier
1985
- => { emit_table(KEYWORDS_BEGIN)
1986
- @command_start = true
1987
- fnext expr_value; fbreak; };
1988
-
1989
- #
1990
- # RUBY 1.9 HASH LABELS
1991
- #
1992
-
1993
- label ( any - ':' )
1994
- => {
1995
- fhold;
1996
-
1997
- if version?(18)
1998
- ident = tok(@ts, @te - 2)
1999
-
2000
- emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
2001
- ident, @ts, @te - 2)
2002
- fhold; # continue as a symbol
2003
-
2004
- if !@static_env.nil? && @static_env.declared?(ident)
2005
- fnext expr_end;
2006
- else
2007
- fnext *arg_or_cmdarg(cmd_state);
2008
- end
2009
- else
2010
- emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
2011
- fnext expr_labelarg;
2012
- end
2013
-
2014
- fbreak;
2015
- };
2016
-
2017
- #
2018
- # RUBY 2.7 BEGINLESS RANGE
2019
-
2020
- '..'
2021
- => {
2022
- if @version >= 27
2023
- emit(:tBDOT2)
2024
- else
2025
- emit(:tDOT2)
2026
- end
2027
-
2028
- fnext expr_beg; fbreak;
2029
- };
2030
-
2031
- '...'
2032
- => {
2033
- if @version >= 30
2034
- if @lambda_stack.any? && @lambda_stack.last + 1 == @paren_nest
2035
- # To reject `->(...)` like `->...`
2036
- emit(:tDOT3)
2037
- else
2038
- emit(:tBDOT3)
2039
- end
2040
- elsif @version >= 27
2041
- emit(:tBDOT3)
2042
- else
2043
- emit(:tDOT3)
2044
- end
2045
-
2046
- fnext expr_beg; fbreak;
2047
- };
2048
-
2049
- #
2050
- # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
2051
- #
2052
-
2053
- # foo= bar: Disambiguate with bareword rule below.
2054
- bareword ambiguous_ident_suffix |
2055
- # def foo: Disambiguate with bareword rule below.
2056
- keyword
2057
- => { p = @ts - 1
2058
- fgoto expr_end; };
2059
-
2060
- # a = 42; a [42]: Indexing.
2061
- # def a; end; a [42]: Array argument.
2062
- call_or_var
2063
- => local_ident;
2064
-
2065
- (call_or_var - keyword)
2066
- % { ident_tok = tok; ident_ts = @ts; ident_te = @te; }
2067
- w_space+ '('
2068
- => {
2069
- emit(:tIDENTIFIER, ident_tok, ident_ts, ident_te)
2070
- p = ident_te - 1
2071
-
2072
- if !@static_env.nil? && @static_env.declared?(ident_tok) && @version < 25
2073
- fnext expr_endfn;
2074
- else
2075
- fnext expr_cmdarg;
2076
- end
2077
- fbreak;
2078
- };
2079
-
2080
- #
2081
- # WHITESPACE
2082
- #
2083
-
2084
- w_any;
2085
-
2086
- e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
2087
- => {
2088
- p = @ts - 1
2089
- @cs_before_block_comment = @cs
2090
- fgoto line_begin;
2091
- };
2092
-
2093
- #
2094
- # DEFAULT TRANSITION
2095
- #
2096
-
2097
- # The following rules match most binary and all unary operators.
2098
- # Rules for binary operators provide better error reporting.
2099
- operator_arithmetic '=' |
2100
- operator_rest |
2101
- punctuation_end |
2102
- c_any
2103
- => { p = @ts - 1; fgoto expr_end; };
2104
-
2105
- c_eof => do_eof;
2106
- *|;
2107
-
2108
- # Special newline handling for "def a b:"
2109
- #
2110
- expr_labelarg := |*
2111
- w_space_comment;
2112
-
2113
- w_newline
2114
- => {
2115
- if @in_kwarg
2116
- fhold; fgoto expr_end;
2117
- else
2118
- fgoto line_begin;
2119
- end
2120
- };
2121
-
2122
- c_any
2123
- => { fhold; fgoto expr_beg; };
2124
-
2125
- c_eof => do_eof;
2126
- *|;
2127
-
2128
- # Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
2129
- #
2130
- expr_value := |*
2131
- # a:b: a(:b), a::B, A::B
2132
- label (any - ':')
2133
- => { p = @ts - 1
2134
- fgoto expr_end; };
2135
-
2136
- # "bar", 'baz'
2137
- ['"] # '
2138
- => {
2139
- fgoto *push_literal(tok, tok, @ts);
2140
- };
2141
-
2142
- w_space_comment;
2143
-
2144
- w_newline
2145
- => { fgoto line_begin; };
2146
-
2147
- c_any
2148
- => { fhold; fgoto expr_beg; };
2149
-
2150
- c_eof => do_eof;
2151
- *|;
2152
-
2153
- expr_end := |*
2154
- #
2155
- # STABBY LAMBDA
2156
- #
2157
-
2158
- '->'
2159
- => {
2160
- emit(:tLAMBDA, '->'.freeze, @ts, @ts + 2)
2161
-
2162
- @lambda_stack.push @paren_nest
2163
- fnext expr_endfn; fbreak;
2164
- };
2165
-
2166
- e_lbrace | 'do'
2167
- => {
2168
- if @lambda_stack.last == @paren_nest
2169
- @lambda_stack.pop
2170
-
2171
- if tok == '{'.freeze
2172
- emit(:tLAMBEG, '{'.freeze)
2173
- else # 'do'
2174
- emit(:kDO_LAMBDA, 'do'.freeze)
2175
- end
2176
- else
2177
- if tok == '{'.freeze
2178
- emit(:tLCURLY, '{'.freeze)
2179
- else # 'do'
2180
- emit_do
2181
- end
2182
- end
2183
- if tok == '{'.freeze
2184
- @paren_nest += 1
2185
- end
2186
- @command_start = true
2187
-
2188
- fnext expr_value; fbreak;
2189
- };
2190
-
2191
- #
2192
- # KEYWORDS
2193
- #
2194
-
2195
- keyword_with_fname
2196
- => { emit_table(KEYWORDS)
2197
- fnext expr_fname; fbreak; };
2198
-
2199
- 'class' w_any* '<<'
2200
- => { emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
2201
- emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
2202
- fnext expr_value; fbreak; };
2203
-
2204
- # a if b:c: Syntax error.
2205
- keyword_modifier
2206
- => { emit_table(KEYWORDS)
2207
- fnext expr_beg; fbreak; };
2208
-
2209
- # elsif b:c: elsif b(:c)
2210
- keyword_with_value
2211
- => { emit_table(KEYWORDS)
2212
- @command_start = true
2213
- fnext expr_value; fbreak; };
2214
-
2215
- keyword_with_mid
2216
- => { emit_table(KEYWORDS)
2217
- fnext expr_mid; fbreak; };
2218
-
2219
- keyword_with_arg
2220
- => {
2221
- emit_table(KEYWORDS)
2222
-
2223
- if version?(18) && tok == 'not'.freeze
2224
- fnext expr_beg; fbreak;
2225
- else
2226
- fnext expr_arg; fbreak;
2227
- end
2228
- };
2229
-
2230
- '__ENCODING__'
2231
- => {
2232
- if version?(18)
2233
- emit(:tIDENTIFIER)
2234
-
2235
- unless !@static_env.nil? && @static_env.declared?(tok)
2236
- fnext *arg_or_cmdarg(cmd_state);
2237
- end
2238
- else
2239
- emit(:k__ENCODING__, '__ENCODING__'.freeze)
2240
- end
2241
- fbreak;
2242
- };
2243
-
2244
- keyword_with_end
2245
- => { emit_table(KEYWORDS)
2246
- fbreak; };
2247
-
2248
- #
2249
- # NUMERIC LITERALS
2250
- #
2251
-
2252
- ( '0' [Xx] %{ @num_base = 16; @num_digits_s = p } int_hex
2253
- | '0' [Dd] %{ @num_base = 10; @num_digits_s = p } int_dec
2254
- | '0' [Oo] %{ @num_base = 8; @num_digits_s = p } int_dec
2255
- | '0' [Bb] %{ @num_base = 2; @num_digits_s = p } int_bin
2256
- | [1-9] digit* '_'? %{ @num_base = 10; @num_digits_s = @ts } int_dec
2257
- | '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
2258
- ) %{ @num_suffix_s = p } int_suffix
2259
- => {
2260
- digits = tok(@num_digits_s, @num_suffix_s)
2261
-
2262
- if digits.end_with? '_'.freeze
2263
- diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
2264
- range(@te - 1, @te)
2265
- elsif digits.empty? && @num_base == 8 && version?(18)
2266
- # 1.8 did not raise an error on 0o.
2267
- digits = '0'.freeze
2268
- elsif digits.empty?
2269
- diagnostic :error, :empty_numeric
2270
- elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
2271
- invalid_s = @num_digits_s + invalid_idx
2272
- diagnostic :error, :invalid_octal, nil,
2273
- range(invalid_s, invalid_s + 1)
2274
- end
2275
-
2276
- if version?(18, 19, 20)
2277
- emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
2278
- p = @num_suffix_s - 1
2279
- else
2280
- @num_xfrm.call(digits.to_i(@num_base))
2281
- end
2282
- fbreak;
2283
- };
2284
-
2285
- flo_frac flo_pow?
2286
- => {
2287
- diagnostic :error, :no_dot_digit_literal
2288
- };
2289
-
2290
- flo_int [eE]
2291
- => {
2292
- if version?(18, 19, 20)
2293
- diagnostic :error,
2294
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2295
- range(@te - 1, @te)
2296
- else
2297
- emit(:tINTEGER, tok(@ts, @te - 1).to_i, @ts, @te - 1)
2298
- fhold; fbreak;
2299
- end
2300
- };
2301
-
2302
- flo_int flo_frac [eE]
2303
- => {
2304
- if version?(18, 19, 20)
2305
- diagnostic :error,
2306
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2307
- range(@te - 1, @te)
2308
- else
2309
- emit(:tFLOAT, tok(@ts, @te - 1).to_f, @ts, @te - 1)
2310
- fhold; fbreak;
2311
- end
2312
- };
2313
-
2314
- flo_int
2315
- ( flo_frac? flo_pow %{ @num_suffix_s = p } flo_pow_suffix
2316
- | flo_frac %{ @num_suffix_s = p } flo_suffix
2317
- )
2318
- => {
2319
- digits = tok(@ts, @num_suffix_s)
2320
-
2321
- if version?(18, 19, 20)
2322
- emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
2323
- p = @num_suffix_s - 1
2324
- else
2325
- @num_xfrm.call(digits)
2326
- end
2327
- fbreak;
2328
- };
2329
-
2330
- #
2331
- # STRING AND XSTRING LITERALS
2332
- #
2333
-
2334
- # `echo foo`, "bar", 'baz'
2335
- '`' | ['"] # '
2336
- => {
2337
- type, delimiter = tok, tok[-1].chr
2338
- fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
2339
- };
2340
-
2341
- #
2342
- # CONSTANTS AND VARIABLES
2343
- #
2344
-
2345
- constant
2346
- => { emit(:tCONSTANT)
2347
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
2348
-
2349
- constant ambiguous_const_suffix
2350
- => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
2351
- p = tm - 1; fbreak; };
2352
-
2353
- global_var | class_var_v | instance_var_v
2354
- => { p = @ts - 1; fcall expr_variable; };
2355
-
2356
- #
2357
- # METHOD CALLS
2358
- #
2359
-
2360
- '.' | '&.' | '::'
2361
- => { emit_table(PUNCTUATION)
2362
- fnext expr_dot; fbreak; };
2363
-
2364
- call_or_var
2365
- => local_ident;
2366
-
2367
- bareword ambiguous_fid_suffix
2368
- => {
2369
- if tm == @te
2370
- # Suffix was consumed, e.g. foo!
2371
- emit(:tFID)
2372
- else
2373
- # Suffix was not consumed, e.g. foo!=
2374
- emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
2375
- p = tm - 1
2376
- end
2377
- fnext expr_arg; fbreak;
2378
- };
2379
-
2380
- #
2381
- # OPERATORS
2382
- #
2383
-
2384
- '*' | '=>'
2385
- => {
2386
- emit_table(PUNCTUATION)
2387
- fgoto expr_value;
2388
- };
2389
-
2390
- # When '|', '~', '!', '=>' are used as operators
2391
- # they do not accept any symbols (or quoted labels) after.
2392
- # Other binary operators accept it.
2393
- ( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
2394
- => {
2395
- emit_table(PUNCTUATION);
2396
- fnext expr_value; fbreak;
2397
- };
2398
-
2399
- ( e_lparen | '|' | '~' | '!' )
2400
- => { emit_table(PUNCTUATION)
2401
- fnext expr_beg; fbreak; };
2402
-
2403
- e_rbrace | e_rparen | e_rbrack
2404
- => {
2405
- emit_table(PUNCTUATION)
2406
-
2407
- if @version < 24
2408
- @cond.lexpop
2409
- @cmdarg.lexpop
2410
- else
2411
- @cond.pop
2412
- @cmdarg.pop
2413
- end
2414
-
2415
- if tok == '}'.freeze || tok == ']'.freeze
2416
- if @version >= 25
2417
- fnext expr_end;
2418
- else
2419
- fnext expr_endarg;
2420
- end
2421
- else # )
2422
- # fnext expr_endfn; ?
2423
- end
2424
-
2425
- fbreak;
2426
- };
2427
-
2428
- operator_arithmetic '='
2429
- => { emit(:tOP_ASGN, tok(@ts, @te - 1))
2430
- fnext expr_beg; fbreak; };
2431
-
2432
- '?'
2433
- => { emit(:tEH, '?'.freeze)
2434
- fnext expr_value; fbreak; };
2435
-
2436
- e_lbrack
2437
- => { emit(:tLBRACK2, '['.freeze)
2438
- fnext expr_beg; fbreak; };
2439
-
2440
- '...' c_nl
2441
- => {
2442
- if @paren_nest == 0
2443
- diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
2444
- end
2445
-
2446
- emit(:tDOT3, '...'.freeze, @ts, @te - 1)
2447
- fhold;
2448
- fnext expr_beg; fbreak;
2449
- };
2450
-
2451
- punctuation_end
2452
- => { emit_table(PUNCTUATION)
2453
- fnext expr_beg; fbreak; };
2454
-
2455
- #
2456
- # WHITESPACE
2457
- #
2458
-
2459
- w_space_comment;
2460
-
2461
- w_newline
2462
- => { fgoto leading_dot; };
2463
-
2464
- ';'
2465
- => { emit(:tSEMI, ';'.freeze)
2466
- @command_start = true
2467
- fnext expr_value; fbreak; };
2468
-
2469
- '\\' c_line {
2470
- diagnostic :error, :bare_backslash, nil, range(@ts, @ts + 1)
2471
- fhold;
2472
- };
2473
-
2474
- c_any
2475
- => {
2476
- diagnostic :fatal, :unexpected, { :character => tok.inspect[1..-2] }
2477
- };
2478
-
2479
- c_eof => do_eof;
2480
- *|;
2481
-
2482
- leading_dot := |*
2483
- # Insane leading dots:
2484
- # a #comment
2485
- # # post-2.7 comment
2486
- # .b: a.b
2487
-
2488
- # Here we use '\n' instead of w_newline to not modify @newline_s
2489
- # and eventually properly emit tNL
2490
- (c_space* w_space_comment '\n')+
2491
- => {
2492
- if @version < 27
2493
- # Ruby before 2.7 doesn't support comments before leading dot.
2494
- # If a line after "a" starts with a comment then "a" is a self-contained statement.
2495
- # So in that case we emit a special tNL token and start reading the
2496
- # next line as a separate statement.
2497
- #
2498
- # Note: block comments before leading dot are not supported on any version of Ruby.
2499
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2500
- fhold; fnext line_begin; fbreak;
2501
- end
2502
- };
2503
-
2504
- c_space* %{ tm = p } ('.' | '&.')
2505
- => { p = tm - 1; fgoto expr_end; };
2506
-
2507
- any
2508
- => { emit(:tNL, nil, @newline_s, @newline_s + 1)
2509
- fhold; fnext line_begin; fbreak; };
2510
- *|;
2511
-
2512
- #
2513
- # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
2514
- #
2515
-
2516
- line_comment := |*
2517
- '=end' c_line* c_nl_zlen
2518
- => {
2519
- emit_comment(@eq_begin_s, @te)
2520
- fgoto *@cs_before_block_comment;
2521
- };
2522
-
2523
- c_line* c_nl;
2524
-
2525
- c_line* zlen
2526
- => {
2527
- diagnostic :fatal, :embedded_document, nil,
2528
- range(@eq_begin_s, @eq_begin_s + '=begin'.length)
2529
- };
2530
- *|;
2531
-
2532
- line_begin := |*
2533
- w_any;
2534
-
2535
- '=begin' ( c_space | c_nl_zlen )
2536
- => { @eq_begin_s = @ts
2537
- fgoto line_comment; };
2538
-
2539
- '__END__' ( c_eol - zlen )
2540
- => { p = pe - 3 };
2541
-
2542
- c_any
2543
- => { cmd_state = true; fhold; fgoto expr_value; };
2544
-
2545
- c_eof => do_eof;
2546
- *|;
2547
-
2548
- }%%
2549
- # %
2550
- end