parser 2.7.1.1 → 3.0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parser.rb +1 -0
  3. data/lib/parser/all.rb +2 -0
  4. data/lib/parser/ast/processor.rb +5 -0
  5. data/lib/parser/base.rb +7 -5
  6. data/lib/parser/builders/default.rb +263 -23
  7. data/lib/parser/context.rb +5 -0
  8. data/lib/parser/current.rb +24 -6
  9. data/lib/parser/current_arg_stack.rb +5 -2
  10. data/lib/parser/diagnostic.rb +1 -1
  11. data/lib/parser/diagnostic/engine.rb +1 -2
  12. data/lib/parser/lexer.rb +887 -803
  13. data/lib/parser/macruby.rb +2214 -2189
  14. data/lib/parser/max_numparam_stack.rb +13 -5
  15. data/lib/parser/messages.rb +18 -0
  16. data/lib/parser/meta.rb +6 -5
  17. data/lib/parser/ruby18.rb +9 -3
  18. data/lib/parser/ruby19.rb +2297 -2289
  19. data/lib/parser/ruby20.rb +2413 -2397
  20. data/lib/parser/ruby21.rb +2419 -2411
  21. data/lib/parser/ruby22.rb +2468 -2460
  22. data/lib/parser/ruby23.rb +2452 -2452
  23. data/lib/parser/ruby24.rb +2435 -2430
  24. data/lib/parser/ruby25.rb +2220 -2214
  25. data/lib/parser/ruby26.rb +2220 -2214
  26. data/lib/parser/ruby27.rb +3715 -3615
  27. data/lib/parser/ruby28.rb +8047 -0
  28. data/lib/parser/ruby30.rb +8060 -0
  29. data/lib/parser/ruby31.rb +8226 -0
  30. data/lib/parser/rubymotion.rb +2190 -2182
  31. data/lib/parser/runner.rb +31 -2
  32. data/lib/parser/runner/ruby_rewrite.rb +2 -2
  33. data/lib/parser/source/buffer.rb +53 -28
  34. data/lib/parser/source/comment.rb +14 -1
  35. data/lib/parser/source/comment/associator.rb +31 -8
  36. data/lib/parser/source/map/method_definition.rb +25 -0
  37. data/lib/parser/source/range.rb +10 -3
  38. data/lib/parser/source/tree_rewriter.rb +100 -10
  39. data/lib/parser/source/tree_rewriter/action.rb +114 -21
  40. data/lib/parser/static_environment.rb +4 -0
  41. data/lib/parser/tree_rewriter.rb +1 -2
  42. data/lib/parser/variables_stack.rb +4 -0
  43. data/lib/parser/version.rb +1 -1
  44. data/parser.gemspec +3 -18
  45. metadata +17 -98
  46. data/.gitignore +0 -33
  47. data/.travis.yml +0 -42
  48. data/.yardopts +0 -21
  49. data/CHANGELOG.md +0 -1075
  50. data/CONTRIBUTING.md +0 -17
  51. data/Gemfile +0 -10
  52. data/README.md +0 -309
  53. data/Rakefile +0 -166
  54. data/ci/run_rubocop_specs +0 -14
  55. data/doc/AST_FORMAT.md +0 -2180
  56. data/doc/CUSTOMIZATION.md +0 -37
  57. data/doc/INTERNALS.md +0 -21
  58. data/doc/css/.gitkeep +0 -0
  59. data/doc/css/common.css +0 -68
  60. data/lib/parser/lexer.rl +0 -2536
  61. data/lib/parser/macruby.y +0 -2198
  62. data/lib/parser/ruby18.y +0 -1934
  63. data/lib/parser/ruby19.y +0 -2175
  64. data/lib/parser/ruby20.y +0 -2353
  65. data/lib/parser/ruby21.y +0 -2357
  66. data/lib/parser/ruby22.y +0 -2364
  67. data/lib/parser/ruby23.y +0 -2370
  68. data/lib/parser/ruby24.y +0 -2408
  69. data/lib/parser/ruby25.y +0 -2405
  70. data/lib/parser/ruby26.y +0 -2413
  71. data/lib/parser/ruby27.y +0 -2941
  72. data/lib/parser/rubymotion.y +0 -2182
  73. data/test/bug_163/fixtures/input.rb +0 -5
  74. data/test/bug_163/fixtures/output.rb +0 -5
  75. data/test/bug_163/rewriter.rb +0 -20
  76. data/test/helper.rb +0 -60
  77. data/test/parse_helper.rb +0 -319
  78. data/test/racc_coverage_helper.rb +0 -133
  79. data/test/test_base.rb +0 -31
  80. data/test/test_current.rb +0 -29
  81. data/test/test_diagnostic.rb +0 -96
  82. data/test/test_diagnostic_engine.rb +0 -62
  83. data/test/test_encoding.rb +0 -99
  84. data/test/test_lexer.rb +0 -3608
  85. data/test/test_lexer_stack_state.rb +0 -78
  86. data/test/test_parse_helper.rb +0 -80
  87. data/test/test_parser.rb +0 -9430
  88. data/test/test_runner_parse.rb +0 -35
  89. data/test/test_runner_rewrite.rb +0 -47
  90. data/test/test_source_buffer.rb +0 -162
  91. data/test/test_source_comment.rb +0 -36
  92. data/test/test_source_comment_associator.rb +0 -367
  93. data/test/test_source_map.rb +0 -15
  94. data/test/test_source_range.rb +0 -187
  95. data/test/test_source_rewriter.rb +0 -541
  96. data/test/test_source_rewriter_action.rb +0 -46
  97. data/test/test_source_tree_rewriter.rb +0 -253
  98. data/test/test_static_environment.rb +0 -45
  99. data/test/using_tree_rewriter/fixtures/input.rb +0 -3
  100. data/test/using_tree_rewriter/fixtures/output.rb +0 -3
  101. data/test/using_tree_rewriter/using_tree_rewriter.rb +0 -9
data/doc/CUSTOMIZATION.md DELETED
@@ -1,37 +0,0 @@
1
- # Customizing Parsers
2
-
3
- While the default setup of the parsers provided by this Gem should be suitable
4
- for most some developers might want to change parts of it. An example would be
5
- the use of a custom class for nodes instead of `Parser::AST::Node`.
6
-
7
- Customizing the AST is done by creating a custom builder class and passing it
8
- to the constructor method of a parser. The default setup comes down to the
9
- following:
10
-
11
- builder = Parser::Builders::Default.new
12
- parser = Parser::Ruby19.new(builder)
13
-
14
- When creating your own builder class it's best to subclass the default one so
15
- that you don't have to redefine every used method again:
16
-
17
- class MyBuilder < Parser::Builders::Default
18
-
19
- end
20
-
21
- builder = MyBuilder.new
22
- parser = Parser::Ruby19.new(builder)
23
-
24
- ## Custom Node Classes
25
-
26
- To use a custom node class you have to override the method
27
- `Parser::Builders::Default#n`:
28
-
29
- class MyBuilder < Parser::Builders::Default
30
- def n(type, children, location)
31
- return MyNodeClass.new(type, children, :location => location)
32
- end
33
- end
34
-
35
- Note that the used class (and corresponding instance) must be compatible with
36
- `Parser::AST::Node` so it's best to subclass it and override/add code where
37
- needed.
data/doc/INTERNALS.md DELETED
@@ -1,21 +0,0 @@
1
- Entry points
2
- ------------
3
-
4
- Parser should be kept as slim as possible. This includes not loading
5
- any potentially large files when they are likely to be unused in practice.
6
-
7
- Parser has five main (classes of) `require` entry points:
8
-
9
- * `require 'parser'`. Main entry point, requires all classes which
10
- are used across the entire library.
11
- * `require 'parser/rubyXX'`. Version-specific entry point. Can raise
12
- a NotImplementedError if current Ruby runtime is unable to parse the
13
- requested Ruby version.
14
- * `require 'parser/all'`. Requires all available parsers for released
15
- versions of Ruby. Can raise NotImplementedError.
16
- * `require 'parser/runner'`. Requires all the stuff which is useful for
17
- command-line tools but not otherwise.
18
- * `require 'parser/runner/X'`. Runner-specific entry point.
19
-
20
- All non-main entry points internally `require 'parser'`. Additionally, all
21
- runner-specific entry points internally `requre 'parser/runner'`.
data/doc/css/.gitkeep DELETED
File without changes
data/doc/css/common.css DELETED
@@ -1,68 +0,0 @@
1
- body
2
- {
3
- font-size: 14px;
4
- line-height: 1.6;
5
- margin: 0 auto;
6
- max-width: 960px;
7
- }
8
-
9
- p code
10
- {
11
- background: #f2f2f2;
12
- padding-left: 3px;
13
- padding-right: 3px;
14
- }
15
-
16
- pre.code
17
- {
18
- font-size: 13px;
19
- line-height: 1.4;
20
- }
21
-
22
- /**
23
- * YARD uses generic table styles, using a special class means those tables
24
- * don't get messed up.
25
- */
26
- .table
27
- {
28
- border: 1px solid #ccc;
29
- border-right: none;
30
- border-collapse: separate;
31
- border-spacing: 0;
32
- text-align: left;
33
- }
34
-
35
- .table.full
36
- {
37
- width: 100%;
38
- }
39
-
40
- .table .field_name
41
- {
42
- min-width: 160px;
43
- }
44
-
45
- .table thead tr th.no_sort:first-child
46
- {
47
- width: 25px;
48
- }
49
-
50
- .table thead tr th, .table tbody tr td
51
- {
52
- border-bottom: 1px solid #ccc;
53
- border-right: 1px solid #ccc;
54
- min-width: 20px;
55
- padding: 8px 5px;
56
- text-align: left;
57
- vertical-align: top;
58
- }
59
-
60
- .table tbody tr:last-child td
61
- {
62
- border-bottom: none;
63
- }
64
-
65
- .table tr:nth-child(odd) td
66
- {
67
- background: #f9f9f9;
68
- }
data/lib/parser/lexer.rl DELETED
@@ -1,2536 +0,0 @@
1
- %%machine lex; # % fix highlighting
2
-
3
- #
4
- # === BEFORE YOU START ===
5
- #
6
- # Read the Ruby Hacking Guide chapter 11, available in English at
7
- # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
- #
9
- # Remember two things about Ragel scanners:
10
- #
11
- # 1) Longest match wins.
12
- #
13
- # 2) If two matches have the same length, the first
14
- # in source code wins.
15
- #
16
- # General rules of making Ragel and Bison happy:
17
- #
18
- # * `p` (position) and `@te` contain the index of the character
19
- # they're pointing to ("current"), plus one. `@ts` contains the index
20
- # of the corresponding character. The code for extracting matched token is:
21
- #
22
- # @source_buffer.slice(@ts...@te)
23
- #
24
- # * If your input is `foooooooobar` and the rule is:
25
- #
26
- # 'f' 'o'+
27
- #
28
- # the result will be:
29
- #
30
- # foooooooobar
31
- # ^ ts=0 ^ p=te=9
32
- #
33
- # * A Ragel lexer action should not emit more than one token, unless
34
- # you know what you are doing.
35
- #
36
- # * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
37
- #
38
- # * If an action emits the token and transitions to another state, use
39
- # these Ragel commands:
40
- #
41
- # emit($whatever)
42
- # fnext $next_state; fbreak;
43
- #
44
- # If you perform `fgoto` in an action which does not emit a token nor
45
- # rewinds the stream pointer, the parser's side-effectful,
46
- # context-sensitive lookahead actions will break in a hard to detect
47
- # and debug way.
48
- #
49
- # * If an action does not emit a token:
50
- #
51
- # fgoto $next_state;
52
- #
53
- # * If an action features lookbehind, i.e. matches characters with the
54
- # intent of passing them to another action:
55
- #
56
- # p = @ts - 1
57
- # fgoto $next_state;
58
- #
59
- # or, if the lookbehind consists of a single character:
60
- #
61
- # fhold; fgoto $next_state;
62
- #
63
- # * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
64
- # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
65
- # _will_ invoke the action `act`.
66
- #
67
- # e_something stands for "something with **e**mbedded action".
68
- #
69
- # * EOF is explicit and is matched by `c_eof`. If you want to introspect
70
- # the state of the lexer, add this rule to the state:
71
- #
72
- # c_eof => do_eof;
73
- #
74
- # * If you proceed past EOF, the lexer will complain:
75
- #
76
- # NoMethodError: undefined method `ord' for nil:NilClass
77
- #
78
-
79
- class Parser::Lexer
80
-
81
- %% write data nofinal;
82
- # %
83
-
84
- ESCAPES = {
85
- ?a.ord => "\a", ?b.ord => "\b", ?e.ord => "\e", ?f.ord => "\f",
86
- ?n.ord => "\n", ?r.ord => "\r", ?s.ord => "\s", ?t.ord => "\t",
87
- ?v.ord => "\v", ?\\.ord => "\\"
88
- }.freeze
89
-
90
- REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
-
92
- attr_reader :source_buffer
93
-
94
- attr_accessor :diagnostics
95
- attr_accessor :static_env
96
- attr_accessor :force_utf32
97
-
98
- attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
99
-
100
- attr_accessor :tokens, :comments
101
-
102
- def initialize(version)
103
- @version = version
104
- @static_env = nil
105
- @context = nil
106
-
107
- @tokens = nil
108
- @comments = nil
109
-
110
- reset
111
- end
112
-
113
- def reset(reset_state=true)
114
- # Ragel state:
115
- if reset_state
116
- # Unit tests set state prior to resetting lexer.
117
- @cs = self.class.lex_en_line_begin
118
-
119
- @cond = StackState.new('cond')
120
- @cmdarg = StackState.new('cmdarg')
121
- @cond_stack = []
122
- @cmdarg_stack = []
123
- end
124
-
125
- @force_utf32 = false # Set to true by some tests
126
-
127
- @source_pts = nil # @source as a codepoint array
128
-
129
- @p = 0 # stream position (saved manually in #advance)
130
- @ts = nil # token start
131
- @te = nil # token end
132
- @act = 0 # next action
133
-
134
- @stack = [] # state stack
135
- @top = 0 # state stack top pointer
136
-
137
- # Lexer state:
138
- @token_queue = []
139
- @literal_stack = []
140
-
141
- @eq_begin_s = nil # location of last encountered =begin
142
- @sharp_s = nil # location of last encountered #
143
-
144
- @newline_s = nil # location of last encountered newline
145
-
146
- @num_base = nil # last numeric base
147
- @num_digits_s = nil # starting position of numeric digits
148
- @num_suffix_s = nil # starting position of numeric suffix
149
- @num_xfrm = nil # numeric suffix-induced transformation
150
-
151
- @escape_s = nil # starting position of current sequence
152
- @escape = nil # last escaped sequence, as string
153
-
154
- @herebody_s = nil # starting position of current heredoc line
155
-
156
- # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
157
- # encountered after a matching closing parenthesis.
158
- @paren_nest = 0
159
- @lambda_stack = []
160
-
161
- # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
162
- # we store the indentation level and give it out to the parser
163
- # on request. It is not possible to infer indentation level just
164
- # from the AST because escape sequences such as `\ ` or `\t` are
165
- # expanded inside the lexer, but count as non-whitespace for
166
- # indentation purposes.
167
- @dedent_level = nil
168
-
169
- # If the lexer is in `command state' (aka expr_value)
170
- # at the entry to #advance, it will transition to expr_cmdarg
171
- # instead of expr_arg at certain points.
172
- @command_start = true
173
-
174
- # True at the end of "def foo a:"
175
- @in_kwarg = false
176
-
177
- # State before =begin / =end block comment
178
- @cs_before_block_comment = self.class.lex_en_line_begin
179
- end
180
-
181
- def source_buffer=(source_buffer)
182
- @source_buffer = source_buffer
183
-
184
- if @source_buffer
185
- source = @source_buffer.source
186
-
187
- if source.encoding == Encoding::UTF_8
188
- @source_pts = source.unpack('U*')
189
- else
190
- @source_pts = source.unpack('C*')
191
- end
192
-
193
- if @source_pts[0] == 0xfeff
194
- # Skip byte order mark.
195
- @p = 1
196
- end
197
- else
198
- @source_pts = nil
199
- end
200
- end
201
-
202
- def encoding
203
- @source_buffer.source.encoding
204
- end
205
-
206
- LEX_STATES = {
207
- :line_begin => lex_en_line_begin,
208
- :expr_dot => lex_en_expr_dot,
209
- :expr_fname => lex_en_expr_fname,
210
- :expr_value => lex_en_expr_value,
211
- :expr_beg => lex_en_expr_beg,
212
- :expr_mid => lex_en_expr_mid,
213
- :expr_arg => lex_en_expr_arg,
214
- :expr_cmdarg => lex_en_expr_cmdarg,
215
- :expr_end => lex_en_expr_end,
216
- :expr_endarg => lex_en_expr_endarg,
217
- :expr_endfn => lex_en_expr_endfn,
218
- :expr_labelarg => lex_en_expr_labelarg,
219
-
220
- :interp_string => lex_en_interp_string,
221
- :interp_words => lex_en_interp_words,
222
- :plain_string => lex_en_plain_string,
223
- :plain_words => lex_en_plain_string,
224
- }
225
-
226
- def state
227
- LEX_STATES.invert.fetch(@cs, @cs)
228
- end
229
-
230
- def state=(state)
231
- @cs = LEX_STATES.fetch(state)
232
- end
233
-
234
- def push_cmdarg
235
- @cmdarg_stack.push(@cmdarg)
236
- @cmdarg = StackState.new("cmdarg.#{@cmdarg_stack.count}")
237
- end
238
-
239
- def pop_cmdarg
240
- @cmdarg = @cmdarg_stack.pop
241
- end
242
-
243
- def push_cond
244
- @cond_stack.push(@cond)
245
- @cond = StackState.new("cond.#{@cond_stack.count}")
246
- end
247
-
248
- def pop_cond
249
- @cond = @cond_stack.pop
250
- end
251
-
252
- def dedent_level
253
- # We erase @dedent_level as a precaution to avoid accidentally
254
- # using a stale value.
255
- dedent_level, @dedent_level = @dedent_level, nil
256
- dedent_level
257
- end
258
-
259
- # Return next token: [type, value].
260
- def advance
261
- if @token_queue.any?
262
- return @token_queue.shift
263
- end
264
-
265
- # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
266
- klass = self.class
267
- _lex_trans_keys = klass.send :_lex_trans_keys
268
- _lex_key_spans = klass.send :_lex_key_spans
269
- _lex_index_offsets = klass.send :_lex_index_offsets
270
- _lex_indicies = klass.send :_lex_indicies
271
- _lex_trans_targs = klass.send :_lex_trans_targs
272
- _lex_trans_actions = klass.send :_lex_trans_actions
273
- _lex_to_state_actions = klass.send :_lex_to_state_actions
274
- _lex_from_state_actions = klass.send :_lex_from_state_actions
275
- _lex_eof_trans = klass.send :_lex_eof_trans
276
-
277
- pe = @source_pts.size + 2
278
- p, eof = @p, pe
279
-
280
- cmd_state = @command_start
281
- @command_start = false
282
-
283
- %% write exec;
284
- # %
285
-
286
- @p = p
287
-
288
- if @token_queue.any?
289
- @token_queue.shift
290
- elsif @cs == klass.lex_error
291
- [ false, [ '$error'.freeze, range(p - 1, p) ] ]
292
- else
293
- eof = @source_pts.size
294
- [ false, [ '$eof'.freeze, range(eof, eof) ] ]
295
- end
296
- end
297
-
298
- protected
299
-
300
- def eof_codepoint?(point)
301
- [0x04, 0x1a, 0x00].include? point
302
- end
303
-
304
- def version?(*versions)
305
- versions.include?(@version)
306
- end
307
-
308
- def stack_pop
309
- @top -= 1
310
- @stack[@top]
311
- end
312
-
313
- def encode_escape(ord)
314
- ord.chr.force_encoding(@source_buffer.source.encoding)
315
- end
316
-
317
- def tok(s = @ts, e = @te)
318
- @source_buffer.slice(s...e)
319
- end
320
-
321
- def range(s = @ts, e = @te)
322
- Parser::Source::Range.new(@source_buffer, s, e)
323
- end
324
-
325
- def emit(type, value = tok, s = @ts, e = @te)
326
- token = [ type, [ value, range(s, e) ] ]
327
-
328
- @token_queue.push(token)
329
-
330
- @tokens.push(token) if @tokens
331
-
332
- token
333
- end
334
-
335
- def emit_table(table, s = @ts, e = @te)
336
- value = tok(s, e)
337
-
338
- emit(table[value], value, s, e)
339
- end
340
-
341
- def emit_do(do_block=false)
342
- if @cond.active?
343
- emit(:kDO_COND, 'do'.freeze)
344
- elsif @cmdarg.active? || do_block
345
- emit(:kDO_BLOCK, 'do'.freeze)
346
- else
347
- emit(:kDO, 'do'.freeze)
348
- end
349
- end
350
-
351
- def arg_or_cmdarg(cmd_state)
352
- if cmd_state
353
- self.class.lex_en_expr_cmdarg
354
- else
355
- self.class.lex_en_expr_arg
356
- end
357
- end
358
-
359
- def emit_comment(s = @ts, e = @te)
360
- if @comments
361
- @comments.push(Parser::Source::Comment.new(range(s, e)))
362
- end
363
-
364
- if @tokens
365
- @tokens.push([ :tCOMMENT, [ tok(s, e), range(s, e) ] ])
366
- end
367
-
368
- nil
369
- end
370
-
371
- def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
372
- @diagnostics.process(
373
- Parser::Diagnostic.new(type, reason, arguments, location, highlights))
374
- end
375
-
376
- #
377
- # === LITERAL STACK ===
378
- #
379
-
380
- def push_literal(*args)
381
- new_literal = Literal.new(self, *args)
382
- @literal_stack.push(new_literal)
383
- next_state_for_literal(new_literal)
384
- end
385
-
386
- def next_state_for_literal(literal)
387
- if literal.words? && literal.backslash_delimited?
388
- if literal.interpolate?
389
- self.class.lex_en_interp_backslash_delimited_words
390
- else
391
- self.class.lex_en_plain_backslash_delimited_words
392
- end
393
- elsif literal.words? && !literal.backslash_delimited?
394
- if literal.interpolate?
395
- self.class.lex_en_interp_words
396
- else
397
- self.class.lex_en_plain_words
398
- end
399
- elsif !literal.words? && literal.backslash_delimited?
400
- if literal.interpolate?
401
- self.class.lex_en_interp_backslash_delimited
402
- else
403
- self.class.lex_en_plain_backslash_delimited
404
- end
405
- else
406
- if literal.interpolate?
407
- self.class.lex_en_interp_string
408
- else
409
- self.class.lex_en_plain_string
410
- end
411
- end
412
- end
413
-
414
- def literal
415
- @literal_stack.last
416
- end
417
-
418
- def pop_literal
419
- old_literal = @literal_stack.pop
420
-
421
- @dedent_level = old_literal.dedent_level
422
-
423
- if old_literal.type == :tREGEXP_BEG
424
- # Fetch modifiers.
425
- self.class.lex_en_regexp_modifiers
426
- else
427
- self.class.lex_en_expr_end
428
- end
429
- end
430
-
431
- # Mapping of strings to parser tokens.
432
-
433
- PUNCTUATION = {
434
- '=' => :tEQL, '&' => :tAMPER2, '|' => :tPIPE,
435
- '!' => :tBANG, '^' => :tCARET, '+' => :tPLUS,
436
- '-' => :tMINUS, '*' => :tSTAR2, '/' => :tDIVIDE,
437
- '%' => :tPERCENT, '~' => :tTILDE, ',' => :tCOMMA,
438
- ';' => :tSEMI, '.' => :tDOT, '..' => :tDOT2,
439
- '...' => :tDOT3, '[' => :tLBRACK2, ']' => :tRBRACK,
440
- '(' => :tLPAREN2, ')' => :tRPAREN, '?' => :tEH,
441
- ':' => :tCOLON, '&&' => :tANDOP, '||' => :tOROP,
442
- '-@' => :tUMINUS, '+@' => :tUPLUS, '~@' => :tTILDE,
443
- '**' => :tPOW, '->' => :tLAMBDA, '=~' => :tMATCH,
444
- '!~' => :tNMATCH, '==' => :tEQ, '!=' => :tNEQ,
445
- '>' => :tGT, '>>' => :tRSHFT, '>=' => :tGEQ,
446
- '<' => :tLT, '<<' => :tLSHFT, '<=' => :tLEQ,
447
- '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
448
- '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
449
- '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
450
- '!@' => :tBANG, '&.' => :tANDDOT,
451
- }
452
-
453
- PUNCTUATION_BEGIN = {
454
- '&' => :tAMPER, '*' => :tSTAR, '**' => :tDSTAR,
455
- '+' => :tUPLUS, '-' => :tUMINUS, '::' => :tCOLON3,
456
- '(' => :tLPAREN, '{' => :tLBRACE, '[' => :tLBRACK,
457
- }
458
-
459
- KEYWORDS = {
460
- 'if' => :kIF_MOD, 'unless' => :kUNLESS_MOD,
461
- 'while' => :kWHILE_MOD, 'until' => :kUNTIL_MOD,
462
- 'rescue' => :kRESCUE_MOD, 'defined?' => :kDEFINED,
463
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
464
- }
465
-
466
- KEYWORDS_BEGIN = {
467
- 'if' => :kIF, 'unless' => :kUNLESS,
468
- 'while' => :kWHILE, 'until' => :kUNTIL,
469
- 'rescue' => :kRESCUE, 'defined?' => :kDEFINED,
470
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
471
- }
472
-
473
- %w(class module def undef begin end then elsif else ensure case when
474
- for break next redo retry in do return yield super self nil true
475
- false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
476
- KEYWORDS_BEGIN[keyword] = KEYWORDS[keyword] = :"k#{keyword.upcase}"
477
- end
478
-
479
- %%{
480
- # %
481
-
482
- access @;
483
- getkey (@source_pts[p] || 0);
484
-
485
- # === CHARACTER CLASSES ===
486
- #
487
- # Pay close attention to the differences between c_any and any.
488
- # c_any does not include EOF and so will cause incorrect behavior
489
- # for machine subtraction (any-except rules) and default transitions
490
- # for scanners.
491
-
492
- action do_nl {
493
- # Record position of a newline for precise location reporting on tNL
494
- # tokens.
495
- #
496
- # This action is embedded directly into c_nl, as it is idempotent and
497
- # there are no cases when we need to skip it.
498
- @newline_s = p
499
- }
500
-
501
- c_nl = '\n' $ do_nl;
502
- c_space = [ \t\r\f\v];
503
- c_space_nl = c_space | c_nl;
504
-
505
- c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
506
- c_eol = c_nl | c_eof;
507
- c_any = any - c_eof;
508
-
509
- c_nl_zlen = c_nl | zlen;
510
- c_line = any - c_nl_zlen;
511
-
512
- c_unicode = c_any - 0x00..0x7f;
513
- c_upper = [A-Z];
514
- c_lower = [a-z_] | c_unicode;
515
- c_alpha = c_lower | c_upper;
516
- c_alnum = c_alpha | [0-9];
517
-
518
- action do_eof {
519
- # Sit at EOF indefinitely. #advance would return $eof each time.
520
- # This allows to feed the lexer more data if needed; this is only used
521
- # in tests.
522
- #
523
- # Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
524
- # below. This is due to the fact that scanner state at EOF is observed
525
- # by tests, and encapsulating it in a rule would break the introspection.
526
- fhold; fbreak;
527
- }
528
-
529
- #
530
- # === TOKEN DEFINITIONS ===
531
- #
532
-
533
- # All operators are punctuation. There is more to punctuation
534
- # than just operators. Operators can be overridden by user;
535
- # punctuation can not.
536
-
537
- # A list of operators which are valid in the function name context, but
538
- # have different semantics in others.
539
- operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
540
-
541
- # A list of operators which can occur within an assignment shortcut (+ → +=).
542
- operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
543
- '*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
544
-
545
- # A list of all user-definable operators not covered by groups above.
546
- operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
547
- '<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
548
-
549
- # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
550
- # as they are ambiguous with interpolation `#{}` and should be counted.
551
- # These braces are not present in punctuation lists.
552
-
553
- # A list of punctuation which has different meaning when used at the
554
- # beginning of expression.
555
- punctuation_begin = '-' | '+' | '::' | '(' | '[' |
556
- '*' | '**' | '&' ;
557
-
558
- # A list of all punctuation except punctuation_begin.
559
- punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
560
- '::' | '?' | ':' | '.' | '..' | '...' ;
561
-
562
- # A list of keywords which have different meaning at the beginning of expression.
563
- keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
564
-
565
- # A list of keywords which accept an argument-like expression, i.e. have the
566
- # same post-processing as method calls or commands. Example: `yield 1`,
567
- # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
568
- keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
569
-
570
- # A list of keywords which accept a literal function name as an argument.
571
- keyword_with_fname = 'def' | 'undef' | 'alias' ;
572
-
573
- # A list of keywords which accept an expression after them.
574
- keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
575
- 'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
576
- 'and' | 'or' ;
577
-
578
- # A list of keywords which accept a value, and treat the keywords from
579
- # `keyword_modifier` list as modifiers.
580
- keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
581
-
582
- # A list of keywords which do not accept an expression after them.
583
- keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
584
- 'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
585
- '__LINE__' | '__ENCODING__';
586
-
587
- # All keywords.
588
- keyword = keyword_with_value | keyword_with_mid |
589
- keyword_with_end | keyword_with_arg |
590
- keyword_with_fname | keyword_modifier ;
591
-
592
- constant = c_upper c_alnum*;
593
- bareword = c_alpha c_alnum*;
594
-
595
- call_or_var = c_lower c_alnum*;
596
- class_var = '@@' bareword;
597
- instance_var = '@' bareword;
598
- global_var = '$'
599
- ( bareword | digit+
600
- | [`'+~*$&?!@/\\;,.=:<>"] # `
601
- | '-' c_alnum
602
- )
603
- ;
604
-
605
- # Ruby accepts (and fails on) variables with leading digit
606
- # in literal context, but not in unquoted symbol body.
607
- class_var_v = '@@' c_alnum+;
608
- instance_var_v = '@' c_alnum+;
609
-
610
- label = bareword [?!]? ':';
611
-
612
- #
613
- # === NUMERIC PARSING ===
614
- #
615
-
616
- int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
617
- int_dec = ( digit+ '_' )* digit* '_'? ;
618
- int_bin = ( [01]+ '_' )* [01]* '_'? ;
619
-
620
- flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
621
- flo_frac = '.' ( digit+ '_' )* digit+;
622
- flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
623
-
624
- int_suffix =
625
- '' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars) } }
626
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
627
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, chars)) } }
628
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
629
- | 're' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
630
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
631
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 6); p -= 6 } };
632
-
633
- flo_pow_suffix =
634
- '' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars)) } }
635
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Float(chars))) } }
636
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 2); p -= 2 } };
637
-
638
- flo_suffix =
639
- flo_pow_suffix
640
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
641
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
642
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 6); p -= 6 } };
643
-
644
- #
645
- # === ESCAPE SEQUENCE PARSING ===
646
- #
647
-
648
- # Escape parsing code is a Ragel pattern, not a scanner, and therefore
649
- # it shouldn't directly raise errors or perform other actions with side effects.
650
- # In reality this would probably just mess up error reporting in pathological
651
- # cases, through.
652
-
653
- # The amount of code required to parse \M\C stuff correctly is ridiculous.
654
-
655
- escaped_nl = "\\" c_nl;
656
-
657
- action unicode_points {
658
- @escape = ""
659
-
660
- codepoints = tok(@escape_s + 2, p - 1)
661
- codepoint_s = @escape_s + 2
662
-
663
- if @version < 24
664
- if codepoints.start_with?(" ") || codepoints.start_with?("\t")
665
- diagnostic :fatal, :invalid_unicode_escape, nil,
666
- range(@escape_s + 2, @escape_s + 3)
667
- end
668
-
669
- if spaces_p = codepoints.index(/[ \t]{2}/)
670
- diagnostic :fatal, :invalid_unicode_escape, nil,
671
- range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
672
- end
673
-
674
- if codepoints.end_with?(" ") || codepoints.end_with?("\t")
675
- diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
676
- end
677
- end
678
-
679
- codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
680
- if spaces
681
- codepoint_s += spaces.length
682
- else
683
- codepoint = codepoint_str.to_i(16)
684
-
685
- if codepoint >= 0x110000
686
- diagnostic :error, :unicode_point_too_large, nil,
687
- range(codepoint_s, codepoint_s + codepoint_str.length)
688
- break
689
- end
690
-
691
- @escape += codepoint.chr(Encoding::UTF_8)
692
- codepoint_s += codepoint_str.length
693
- end
694
- end
695
- }
696
-
697
- action unescape_char {
698
- codepoint = @source_pts[p - 1]
699
- if (@escape = ESCAPES[codepoint]).nil?
700
- @escape = encode_escape(@source_buffer.slice(p - 1))
701
- end
702
- }
703
-
704
- action invalid_complex_escape {
705
- diagnostic :fatal, :invalid_escape
706
- }
707
-
708
- action read_post_meta_or_ctrl_char {
709
- @escape = @source_buffer.slice(p - 1).chr
710
-
711
- if @version >= 27 && ((0..8).include?(@escape.ord) || (14..31).include?(@escape.ord))
712
- diagnostic :fatal, :invalid_escape
713
- end
714
- }
715
-
716
- action slash_c_char {
717
- @escape = encode_escape(@escape[0].ord & 0x9f)
718
- }
719
-
720
- action slash_m_char {
721
- @escape = encode_escape(@escape[0].ord | 0x80)
722
- }
723
-
724
- maybe_escaped_char = (
725
- '\\' c_any %unescape_char
726
- | ( c_any - [\\] ) %read_post_meta_or_ctrl_char
727
- );
728
-
729
- maybe_escaped_ctrl_char = ( # why?!
730
- '\\' c_any %unescape_char %slash_c_char
731
- | '?' % { @escape = "\x7f" }
732
- | ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
733
- );
734
-
735
- escape = (
736
- # \377
737
- [0-7]{1,3}
738
- % { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
739
-
740
- # \xff
741
- | 'x' xdigit{1,2}
742
- % { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
743
-
744
- # %q[\x]
745
- | 'x' ( c_any - xdigit )
746
- % {
747
- diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
748
- }
749
-
750
- # \u263a
751
- | 'u' xdigit{4}
752
- % { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
753
-
754
- # \u123
755
- | 'u' xdigit{0,3}
756
- % {
757
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
758
- }
759
-
760
- # u{not hex} or u{}
761
- | 'u{' ( c_any - xdigit - [ \t}] )* '}'
762
- % {
763
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
764
- }
765
-
766
- # \u{ \t 123 \t 456 \t\t }
767
- | 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
768
- (
769
- ( xdigit{1,6} [ \t]* '}'
770
- %unicode_points
771
- )
772
- |
773
- ( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
774
- | ( c_any - [ \t}] )* c_eof
775
- | xdigit{7,}
776
- ) % {
777
- diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
778
- }
779
- )
780
-
781
- # \C-\a \cx
782
- | ( 'C-' | 'c' ) escaped_nl?
783
- maybe_escaped_ctrl_char
784
-
785
- # \M-a
786
- | 'M-' escaped_nl?
787
- maybe_escaped_char
788
- %slash_m_char
789
-
790
- # \C-\M-f \M-\cf \c\M-f
791
- | ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
792
- | 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
793
- maybe_escaped_ctrl_char
794
- %slash_m_char
795
-
796
- | 'C' c_any %invalid_complex_escape
797
- | 'M' c_any %invalid_complex_escape
798
- | ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
799
-
800
- | ( c_any - [0-7xuCMc] ) %unescape_char
801
-
802
- | c_eof % {
803
- diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
804
- }
805
- );
806
-
807
- # Use rules in form of `e_bs escape' when you need to parse a sequence.
808
- e_bs = '\\' % {
809
- @escape_s = p
810
- @escape = nil
811
- };
812
-
813
- #
814
- # === STRING AND HEREDOC PARSING ===
815
- #
816
-
817
- # Heredoc parsing is quite a complex topic. First, consider that heredocs
818
- # can be arbitrarily nested. For example:
819
- #
820
- # puts <<CODE
821
- # the result is: #{<<RESULT.inspect
822
- # i am a heredoc
823
- # RESULT
824
- # }
825
- # CODE
826
- #
827
- # which, incidentally, evaluates to:
828
- #
829
- # the result is: " i am a heredoc\n"
830
- #
831
- # To parse them, lexer refers to two kinds (remember, nested heredocs)
832
- # of positions in the input stream, namely heredoc_e
833
- # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
834
- #
835
- # heredoc_e is simply contained inside the corresponding Literal, and
836
- # when the heredoc is closed, the lexing is restarted from that position.
837
- #
838
- # @herebody_s is quite more complex. First, @herebody_s changes after each
839
- # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
840
- # contains the current line, and also when a heredoc is started, @herebody_s
841
- # contains the position from which the heredoc will be lexed.
842
- #
843
- # Second, as (insanity) there are nested heredocs, we need to maintain a
844
- # stack of these positions. Each time #push_literal is called, it saves current
845
- # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
846
- # containing another heredocs) is closed, the previous value is restored.
847
-
848
- e_heredoc_nl = c_nl % {
849
- # After every heredoc was parsed, @herebody_s contains the
850
- # position of next token after all heredocs.
851
- if @herebody_s
852
- p = @herebody_s
853
- @herebody_s = nil
854
- end
855
- };
856
-
857
- action extend_string {
858
- string = tok
859
-
860
- # tLABEL_END is only possible in non-cond context on >= 2.2
861
- if @version >= 22 && !@cond.active?
862
- lookahead = @source_buffer.slice(@te...@te+2)
863
- end
864
-
865
- current_literal = literal
866
- if !current_literal.heredoc? &&
867
- (token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
868
- if token[0] == :tLABEL_END
869
- p += 1
870
- pop_literal
871
- fnext expr_labelarg;
872
- else
873
- fnext *pop_literal;
874
- end
875
- fbreak;
876
- else
877
- current_literal.extend_string(string, @ts, @te)
878
- end
879
- }
880
-
881
- action extend_string_escaped {
882
- current_literal = literal
883
- # Get the first character after the backslash.
884
- escaped_char = @source_buffer.slice(@escape_s).chr
885
-
886
- if current_literal.munge_escape? escaped_char
887
- # If this particular literal uses this character as an opening
888
- # or closing delimiter, it is an escape sequence for that
889
- # particular character. Write it without the backslash.
890
-
891
- if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
892
- # Regular expressions should include escaped delimiters in their
893
- # escaped form, except when the escaped character is
894
- # a closing delimiter but not a regexp metacharacter.
895
- #
896
- # The backslash itself cannot be used as a closing delimiter
897
- # at the same time as an escape symbol, but it is always munged,
898
- # so this branch also executes for the non-closing-delimiter case
899
- # for the backslash.
900
- current_literal.extend_string(tok, @ts, @te)
901
- else
902
- current_literal.extend_string(escaped_char, @ts, @te)
903
- end
904
- else
905
- # It does not. So this is an actual escape sequence, yay!
906
- if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
907
- # Squiggly heredocs like
908
- # <<~-HERE
909
- # 1\
910
- # 2
911
- # HERE
912
- # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
913
- # This information is emitted as is, without escaping,
914
- # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
915
- current_literal.extend_string(tok, @ts, @te)
916
- elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
917
- # Heredocs, regexp and a few other types of literals support line
918
- # continuation via \\\n sequence. The code like
919
- # "a\
920
- # b"
921
- # must be parsed as "ab"
922
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
923
- elsif current_literal.regexp?
924
- # Regular expressions should include escape sequences in their
925
- # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
926
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
927
- else
928
- current_literal.extend_string(@escape || tok, @ts, @te)
929
- end
930
- end
931
- }
932
-
933
- # Extend a string with a newline or a EOF character.
934
- # As heredoc closing line can immediately precede EOF, this action
935
- # has to handle such case specially.
936
- action extend_string_eol {
937
- current_literal = literal
938
- if @te == pe
939
- diagnostic :fatal, :string_eof, nil,
940
- range(current_literal.str_s, current_literal.str_s + 1)
941
- end
942
-
943
- if current_literal.heredoc?
944
- line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
945
-
946
- if version?(18, 19, 20)
947
- # See ruby:c48b4209c
948
- line = line.gsub(/\r.*$/, ''.freeze)
949
- end
950
-
951
- # Try ending the heredoc with the complete most recently
952
- # scanned line. @herebody_s always refers to the start of such line.
953
- if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
954
- # Adjust @herebody_s to point to the next line.
955
- @herebody_s = @te
956
-
957
- # Continue regular lexing after the heredoc reference (<<END).
958
- p = current_literal.heredoc_e - 1
959
- fnext *pop_literal; fbreak;
960
- else
961
- # Calculate indentation level for <<~HEREDOCs.
962
- current_literal.infer_indent_level(line)
963
-
964
- # Ditto.
965
- @herebody_s = @te
966
- end
967
- else
968
- # Try ending the literal with a newline.
969
- if current_literal.nest_and_try_closing(tok, @ts, @te)
970
- fnext *pop_literal; fbreak;
971
- end
972
-
973
- if @herebody_s
974
- # This is a regular literal intertwined with a heredoc. Like:
975
- #
976
- # p <<-foo+"1
977
- # bar
978
- # foo
979
- # 2"
980
- #
981
- # which, incidentally, evaluates to "bar\n1\n2".
982
- p = @herebody_s - 1
983
- @herebody_s = nil
984
- end
985
- end
986
-
987
- if current_literal.words? && !eof_codepoint?(@source_pts[p])
988
- current_literal.extend_space @ts, @te
989
- else
990
- # A literal newline is appended if the heredoc was _not_ closed
991
- # this time (see fbreak above). See also Literal#nest_and_try_closing
992
- # for rationale of calling #flush_string here.
993
- current_literal.extend_string tok, @ts, @te
994
- current_literal.flush_string
995
- end
996
- }
997
-
998
- action extend_string_space {
999
- literal.extend_space @ts, @te
1000
- }
1001
-
1002
- #
1003
- # === INTERPOLATION PARSING ===
1004
- #
1005
-
1006
- # Interpolations with immediate variable names simply call into
1007
- # the corresponding machine.
1008
-
1009
- interp_var = '#' ( global_var | class_var_v | instance_var_v );
1010
-
1011
- action extend_interp_var {
1012
- current_literal = literal
1013
- current_literal.flush_string
1014
- current_literal.extend_content
1015
-
1016
- emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
1017
-
1018
- p = @ts
1019
- fcall expr_variable;
1020
- }
1021
-
1022
- # Special case for Ruby > 2.7
1023
- # If interpolated instance/class variable starts with a digit we parse it as a plain substring
1024
- # However, "#$1" is still a regular interpolation
1025
- interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
1026
-
1027
- action extend_interp_digit_var {
1028
- if @version >= 27
1029
- literal.extend_string(tok, @ts, @te)
1030
- else
1031
- message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
1032
- diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
1033
- end
1034
- }
1035
-
1036
- # Interpolations with code blocks must match nested curly braces, as
1037
- # interpolation ending is ambiguous with a block ending. So, every
1038
- # opening and closing brace should be matched with e_[lr]brace rules,
1039
- # which automatically perform the counting.
1040
- #
1041
- # Note that interpolations can themselves be nested, so brace balance
1042
- # is tied to the innermost literal.
1043
- #
1044
- # Also note that literals themselves should not use e_[lr]brace rules
1045
- # when matching their opening and closing delimiters, as the amount of
1046
- # braces inside the characters of a string literal is independent.
1047
-
1048
- interp_code = '#{';
1049
-
1050
- e_lbrace = '{' % {
1051
- @cond.push(false); @cmdarg.push(false)
1052
-
1053
- current_literal = literal
1054
- if current_literal
1055
- current_literal.start_interp_brace
1056
- end
1057
- };
1058
-
1059
- e_rbrace = '}' % {
1060
- current_literal = literal
1061
- if current_literal
1062
- if current_literal.end_interp_brace_and_try_closing
1063
- if version?(18, 19)
1064
- emit(:tRCURLY, '}'.freeze, p - 1, p)
1065
- @cond.lexpop
1066
- @cmdarg.lexpop
1067
- else
1068
- emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
1069
- end
1070
-
1071
- if current_literal.saved_herebody_s
1072
- @herebody_s = current_literal.saved_herebody_s
1073
- end
1074
-
1075
-
1076
- fhold;
1077
- fnext *next_state_for_literal(current_literal);
1078
- fbreak;
1079
- end
1080
- end
1081
-
1082
- @paren_nest -= 1
1083
- };
1084
-
1085
- action extend_interp_code {
1086
- current_literal = literal
1087
- current_literal.flush_string
1088
- current_literal.extend_content
1089
-
1090
- emit(:tSTRING_DBEG, '#{'.freeze)
1091
-
1092
- if current_literal.heredoc?
1093
- current_literal.saved_herebody_s = @herebody_s
1094
- @herebody_s = nil
1095
- end
1096
-
1097
- current_literal.start_interp_brace
1098
- @command_start = true
1099
- fnext expr_value;
1100
- fbreak;
1101
- }
1102
-
1103
- # Actual string parsers are simply combined from the primitives defined
1104
- # above.
1105
-
1106
- interp_words := |*
1107
- interp_code => extend_interp_code;
1108
- interp_digit_var => extend_interp_digit_var;
1109
- interp_var => extend_interp_var;
1110
- e_bs escape => extend_string_escaped;
1111
- c_space+ => extend_string_space;
1112
- c_eol => extend_string_eol;
1113
- c_any => extend_string;
1114
- *|;
1115
-
1116
- interp_string := |*
1117
- interp_code => extend_interp_code;
1118
- interp_digit_var => extend_interp_digit_var;
1119
- interp_var => extend_interp_var;
1120
- e_bs escape => extend_string_escaped;
1121
- c_eol => extend_string_eol;
1122
- c_any => extend_string;
1123
- *|;
1124
-
1125
- plain_words := |*
1126
- e_bs c_any => extend_string_escaped;
1127
- c_space+ => extend_string_space;
1128
- c_eol => extend_string_eol;
1129
- c_any => extend_string;
1130
- *|;
1131
-
1132
- plain_string := |*
1133
- '\\' c_nl => extend_string_eol;
1134
- e_bs c_any => extend_string_escaped;
1135
- c_eol => extend_string_eol;
1136
- c_any => extend_string;
1137
- *|;
1138
-
1139
- interp_backslash_delimited := |*
1140
- interp_code => extend_interp_code;
1141
- interp_digit_var => extend_interp_digit_var;
1142
- interp_var => extend_interp_var;
1143
- c_eol => extend_string_eol;
1144
- c_any => extend_string;
1145
- *|;
1146
-
1147
- plain_backslash_delimited := |*
1148
- c_eol => extend_string_eol;
1149
- c_any => extend_string;
1150
- *|;
1151
-
1152
- interp_backslash_delimited_words := |*
1153
- interp_code => extend_interp_code;
1154
- interp_digit_var => extend_interp_digit_var;
1155
- interp_var => extend_interp_var;
1156
- c_space+ => extend_string_space;
1157
- c_eol => extend_string_eol;
1158
- c_any => extend_string;
1159
- *|;
1160
-
1161
- plain_backslash_delimited_words := |*
1162
- c_space+ => extend_string_space;
1163
- c_eol => extend_string_eol;
1164
- c_any => extend_string;
1165
- *|;
1166
-
1167
- regexp_modifiers := |*
1168
- [A-Za-z]+
1169
- => {
1170
- unknown_options = tok.scan(/[^imxouesn]/)
1171
- if unknown_options.any?
1172
- diagnostic :error, :regexp_options,
1173
- { :options => unknown_options.join }
1174
- end
1175
-
1176
- emit(:tREGEXP_OPT)
1177
- fnext expr_end;
1178
- fbreak;
1179
- };
1180
-
1181
- any
1182
- => {
1183
- emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
1184
- fhold;
1185
- fgoto expr_end;
1186
- };
1187
- *|;
1188
-
1189
- #
1190
- # === WHITESPACE HANDLING ===
1191
- #
1192
-
1193
- # Various contexts in Ruby allow various kinds of whitespace
1194
- # to be used. They are grouped to clarify the lexing machines
1195
- # and ease collection of comments.
1196
-
1197
- # A line of code with inline #comment at end is always equivalent
1198
- # to a line of code ending with just a newline, so an inline
1199
- # comment is deemed equivalent to non-newline whitespace
1200
- # (c_space character class).
1201
-
1202
- w_space =
1203
- c_space+
1204
- | '\\' e_heredoc_nl
1205
- ;
1206
-
1207
- w_comment =
1208
- '#' %{ @sharp_s = p - 1 }
1209
- # The (p == pe) condition compensates for added "\0" and
1210
- # the way Ragel handles EOF.
1211
- c_line* %{ emit_comment(@sharp_s, p == pe ? p - 2 : p) }
1212
- ;
1213
-
1214
- w_space_comment =
1215
- w_space
1216
- | w_comment
1217
- ;
1218
-
1219
- # A newline in non-literal context always interoperates with
1220
- # here document logic and can always be escaped by a backslash,
1221
- # still interoperating with here document logic in the same way,
1222
- # yet being invisible to anything else.
1223
- #
1224
- # To demonstrate:
1225
- #
1226
- # foo = <<FOO \
1227
- # bar
1228
- # FOO
1229
- # + 2
1230
- #
1231
- # is equivalent to `foo = "bar\n" + 2`.
1232
-
1233
- w_newline =
1234
- e_heredoc_nl;
1235
-
1236
- w_any =
1237
- w_space
1238
- | w_comment
1239
- | w_newline
1240
- ;
1241
-
1242
-
1243
- #
1244
- # === EXPRESSION PARSING ===
1245
- #
1246
-
1247
- # These rules implement a form of manually defined lookahead.
1248
- # The default longest-match scanning does not work here due
1249
- # to sheer ambiguity.
1250
-
1251
- ambiguous_fid_suffix = # actual parsed
1252
- [?!] %{ tm = p } | # a? a?
1253
- [?!]'=' %{ tm = p - 2 } # a!=b a != b
1254
- ;
1255
-
1256
- ambiguous_ident_suffix = # actual parsed
1257
- ambiguous_fid_suffix |
1258
- '=' %{ tm = p } | # a= a=
1259
- '==' %{ tm = p - 2 } | # a==b a == b
1260
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
1261
- '=>' %{ tm = p - 2 } | # a=>b a => b
1262
- '===' %{ tm = p - 3 } # a===b a === b
1263
- ;
1264
-
1265
- ambiguous_symbol_suffix = # actual parsed
1266
- ambiguous_ident_suffix |
1267
- '==>' %{ tm = p - 2 } # :a==>b :a= => b
1268
- ;
1269
-
1270
- # Ambiguous with 1.9 hash labels.
1271
- ambiguous_const_suffix = # actual parsed
1272
- '::' %{ tm = p - 2 } # A::B A :: B
1273
- ;
1274
-
1275
- # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
1276
- # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
1277
-
1278
- e_lbrack = '[' % {
1279
- @cond.push(false); @cmdarg.push(false)
1280
-
1281
- @paren_nest += 1
1282
- };
1283
-
1284
- e_rbrack = ']' % {
1285
- @paren_nest -= 1
1286
- };
1287
-
1288
- # Ruby 1.9 lambdas require parentheses counting in order to
1289
- # emit correct opening kDO/tLBRACE.
1290
-
1291
- e_lparen = '(' % {
1292
- @cond.push(false); @cmdarg.push(false)
1293
-
1294
- @paren_nest += 1
1295
-
1296
- if version?(18)
1297
- @command_start = true
1298
- end
1299
- };
1300
-
1301
- e_rparen = ')' % {
1302
- @paren_nest -= 1
1303
- };
1304
-
1305
- # Ruby is context-sensitive wrt/ local identifiers.
1306
- action local_ident {
1307
- emit(:tIDENTIFIER)
1308
-
1309
- if !@static_env.nil? && @static_env.declared?(tok)
1310
- fnext expr_endfn; fbreak;
1311
- else
1312
- fnext *arg_or_cmdarg(cmd_state); fbreak;
1313
- end
1314
- }
1315
-
1316
- # Variable lexing code is accessed from both expressions and
1317
- # string interpolation related code.
1318
- #
1319
- expr_variable := |*
1320
- global_var
1321
- => {
1322
- if tok =~ /^\$([1-9][0-9]*)$/
1323
- emit(:tNTH_REF, tok(@ts + 1).to_i)
1324
- elsif tok =~ /^\$([&`'+])$/
1325
- emit(:tBACK_REF)
1326
- else
1327
- emit(:tGVAR)
1328
- end
1329
-
1330
- fnext *stack_pop; fbreak;
1331
- };
1332
-
1333
- class_var_v
1334
- => {
1335
- if tok =~ /^@@[0-9]/
1336
- diagnostic :error, :cvar_name, { :name => tok }
1337
- end
1338
-
1339
- emit(:tCVAR)
1340
- fnext *stack_pop; fbreak;
1341
- };
1342
-
1343
- instance_var_v
1344
- => {
1345
- if tok =~ /^@[0-9]/
1346
- diagnostic :error, :ivar_name, { :name => tok }
1347
- end
1348
-
1349
- emit(:tIVAR)
1350
- fnext *stack_pop; fbreak;
1351
- };
1352
- *|;
1353
-
1354
- # Literal function name in definition (e.g. `def class`).
1355
- # Keywords are returned as their respective tokens; this is used
1356
- # to support singleton def `def self.foo`. Global variables are
1357
- # returned as `tGVAR`; this is used in global variable alias
1358
- # statements `alias $a $b`. Symbols are returned verbatim; this
1359
- # is used in `alias :a :"b#{foo}"` and `undef :a`.
1360
- #
1361
- # Transitions to `expr_endfn` afterwards.
1362
- #
1363
- expr_fname := |*
1364
- keyword
1365
- => { emit_table(KEYWORDS_BEGIN);
1366
- fnext expr_endfn; fbreak; };
1367
-
1368
- constant
1369
- => { emit(:tCONSTANT)
1370
- fnext expr_endfn; fbreak; };
1371
-
1372
- bareword [?=!]?
1373
- => { emit(:tIDENTIFIER)
1374
- fnext expr_endfn; fbreak; };
1375
-
1376
- global_var
1377
- => { p = @ts - 1
1378
- fnext expr_end; fcall expr_variable; };
1379
-
1380
- # If the handling was to be delegated to expr_end,
1381
- # these cases would transition to something else than
1382
- # expr_endfn, which is incorrect.
1383
- operator_fname |
1384
- operator_arithmetic |
1385
- operator_rest
1386
- => { emit_table(PUNCTUATION)
1387
- fnext expr_endfn; fbreak; };
1388
-
1389
- '::'
1390
- => { fhold; fhold; fgoto expr_end; };
1391
-
1392
- ':'
1393
- => { fhold; fgoto expr_beg; };
1394
-
1395
- '%s' c_any
1396
- => {
1397
- if version?(23)
1398
- type, delimiter = tok[0..-2], tok[-1].chr
1399
- fgoto *push_literal(type, delimiter, @ts);
1400
- else
1401
- p = @ts - 1
1402
- fgoto expr_end;
1403
- end
1404
- };
1405
-
1406
- w_any;
1407
-
1408
- c_any
1409
- => { fhold; fgoto expr_end; };
1410
-
1411
- c_eof => do_eof;
1412
- *|;
1413
-
1414
- # After literal function name in definition. Behaves like `expr_end`,
1415
- # but allows a tLABEL.
1416
- #
1417
- # Transitions to `expr_end` afterwards.
1418
- #
1419
- expr_endfn := |*
1420
- label ( any - ':' )
1421
- => { emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1422
- fhold; fnext expr_labelarg; fbreak; };
1423
-
1424
- w_space_comment;
1425
-
1426
- c_any
1427
- => { fhold; fgoto expr_end; };
1428
-
1429
- c_eof => do_eof;
1430
- *|;
1431
-
1432
- # Literal function name in method call (e.g. `a.class`).
1433
- #
1434
- # Transitions to `expr_arg` afterwards.
1435
- #
1436
- expr_dot := |*
1437
- constant
1438
- => { emit(:tCONSTANT)
1439
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1440
-
1441
- call_or_var
1442
- => { emit(:tIDENTIFIER)
1443
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1444
-
1445
- bareword ambiguous_fid_suffix
1446
- => { emit(:tFID, tok(@ts, tm), @ts, tm)
1447
- fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
1448
-
1449
- # See the comment in `expr_fname`.
1450
- operator_fname |
1451
- operator_arithmetic |
1452
- operator_rest
1453
- => { emit_table(PUNCTUATION)
1454
- fnext expr_arg; fbreak; };
1455
-
1456
- w_any;
1457
-
1458
- c_any
1459
- => { fhold; fgoto expr_end; };
1460
-
1461
- c_eof => do_eof;
1462
- *|;
1463
-
1464
- # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
1465
- # is consumed; the current expression is a command or method call.
1466
- #
1467
- expr_arg := |*
1468
- #
1469
- # COMMAND MODE SPECIFIC TOKENS
1470
- #
1471
-
1472
- # cmd (1 + 2)
1473
- # See below the rationale about expr_endarg.
1474
- w_space+ e_lparen
1475
- => {
1476
- if version?(18)
1477
- emit(:tLPAREN2, '('.freeze, @te - 1, @te)
1478
- fnext expr_value; fbreak;
1479
- else
1480
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1481
- fnext expr_beg; fbreak;
1482
- end
1483
- };
1484
-
1485
- # meth(1 + 2)
1486
- # Regular method call.
1487
- e_lparen
1488
- => { emit(:tLPAREN2, '('.freeze)
1489
- fnext expr_beg; fbreak; };
1490
-
1491
- # meth [...]
1492
- # Array argument. Compare with indexing `meth[...]`.
1493
- w_space+ e_lbrack
1494
- => { emit(:tLBRACK, '['.freeze, @te - 1, @te)
1495
- fnext expr_beg; fbreak; };
1496
-
1497
- # cmd {}
1498
- # Command: method call without parentheses.
1499
- w_space* e_lbrace
1500
- => {
1501
- if @lambda_stack.last == @paren_nest
1502
- @lambda_stack.pop
1503
- emit(:tLAMBEG, '{'.freeze, @te - 1, @te)
1504
- else
1505
- emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1506
- end
1507
- @command_start = true
1508
- @paren_nest += 1
1509
- fnext expr_value; fbreak;
1510
- };
1511
-
1512
- #
1513
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1514
- #
1515
-
1516
- # a??
1517
- # Ternary operator
1518
- '?' c_space_nl
1519
- => {
1520
- # Unlike expr_beg as invoked in the next rule, do not warn
1521
- p = @ts - 1
1522
- fgoto expr_end;
1523
- };
1524
-
1525
- # a ?b, a? ?
1526
- # Character literal or ternary operator
1527
- w_space* '?'
1528
- => { fhold; fgoto expr_beg; };
1529
-
1530
- # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1531
- # a /foo/ (but not "a / foo" or "a /=foo")
1532
- # a <<HEREDOC
1533
- w_space+ %{ tm = p }
1534
- ( [%/] ( c_any - c_space_nl - '=' ) # /
1535
- | '<<'
1536
- )
1537
- => {
1538
- if tok(tm, tm + 1) == '/'.freeze
1539
- # Ambiguous regexp literal.
1540
- diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
1541
- end
1542
-
1543
- p = tm - 1
1544
- fgoto expr_beg;
1545
- };
1546
-
1547
- # x *1
1548
- # Ambiguous splat, kwsplat or block-pass.
1549
- w_space+ %{ tm = p } ( '+' | '-' | '*' | '&' | '**' )
1550
- => {
1551
- diagnostic :warning, :ambiguous_prefix, { :prefix => tok(tm, @te) },
1552
- range(tm, @te)
1553
-
1554
- p = tm - 1
1555
- fgoto expr_beg;
1556
- };
1557
-
1558
- # x ::Foo
1559
- # Ambiguous toplevel constant access.
1560
- w_space+ '::'
1561
- => { fhold; fhold; fgoto expr_beg; };
1562
-
1563
- # x:b
1564
- # Symbol.
1565
- w_space* ':'
1566
- => { fhold; fgoto expr_beg; };
1567
-
1568
- w_space+ label
1569
- => { p = @ts - 1; fgoto expr_beg; };
1570
-
1571
- #
1572
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1573
- #
1574
-
1575
- # a ? b
1576
- # Ternary operator.
1577
- w_space+ %{ tm = p } '?' c_space_nl
1578
- => { p = tm - 1; fgoto expr_end; };
1579
-
1580
- # x + 1: Binary operator or operator-assignment.
1581
- w_space* operator_arithmetic
1582
- ( '=' | c_space_nl )? |
1583
- # x rescue y: Modifier keyword.
1584
- w_space* keyword_modifier |
1585
- # a &. b: Safe navigation operator.
1586
- w_space* '&.' |
1587
- # Miscellanea.
1588
- w_space* punctuation_end
1589
- => {
1590
- p = @ts - 1
1591
- fgoto expr_end;
1592
- };
1593
-
1594
- w_space;
1595
-
1596
- w_comment
1597
- => { fgoto expr_end; };
1598
-
1599
- w_newline
1600
- => { fhold; fgoto expr_end; };
1601
-
1602
- c_any
1603
- => { fhold; fgoto expr_beg; };
1604
-
1605
- c_eof => do_eof;
1606
- *|;
1607
-
1608
- # The previous token was an identifier which was seen while in the
1609
- # command mode (that is, the state at the beginning of #advance was
1610
- # expr_value). This state is very similar to expr_arg, but disambiguates
1611
- # two very rare and specific condition:
1612
- # * In 1.8 mode, "foo (lambda do end)".
1613
- # * In 1.9+ mode, "f x: -> do foo do end end".
1614
- expr_cmdarg := |*
1615
- w_space+ e_lparen
1616
- => {
1617
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1618
- if version?(18)
1619
- fnext expr_value; fbreak;
1620
- else
1621
- fnext expr_beg; fbreak;
1622
- end
1623
- };
1624
-
1625
- w_space* 'do'
1626
- => {
1627
- if @cond.active?
1628
- emit(:kDO_COND, 'do'.freeze, @te - 2, @te)
1629
- else
1630
- emit(:kDO, 'do'.freeze, @te - 2, @te)
1631
- end
1632
- fnext expr_value; fbreak;
1633
- };
1634
-
1635
- c_any |
1636
- # Disambiguate with the `do' rule above.
1637
- w_space* bareword |
1638
- w_space* label
1639
- => { p = @ts - 1
1640
- fgoto expr_arg; };
1641
-
1642
- c_eof => do_eof;
1643
- *|;
1644
-
1645
- # The rationale for this state is pretty complex. Normally, if an argument
1646
- # is passed to a command and then there is a block (tLCURLY...tRCURLY),
1647
- # the block is attached to the innermost argument (`f` in `m f {}`), or it
1648
- # is a parse error (`m 1 {}`). But there is a special case for passing a single
1649
- # primary expression grouped with parentheses: if you write `m (1) {}` or
1650
- # (2.0 only) `m () {}`, then the block is attached to `m`.
1651
- #
1652
- # Thus, we recognize the opening `(` of a command (remember, a command is
1653
- # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
1654
- # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
1655
- # lexer's state to `expr_endarg`, which makes it emit the possibly following
1656
- # `{` as `tLBRACE_ARG`.
1657
- #
1658
- # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1659
- # `do` (as `kDO_BLOCK` in `expr_beg`).
1660
- expr_endarg := |*
1661
- e_lbrace
1662
- => {
1663
- if @lambda_stack.last == @paren_nest
1664
- @lambda_stack.pop
1665
- emit(:tLAMBEG, '{'.freeze)
1666
- else
1667
- emit(:tLBRACE_ARG, '{'.freeze)
1668
- end
1669
- @paren_nest += 1
1670
- @command_start = true
1671
- fnext expr_value; fbreak;
1672
- };
1673
-
1674
- 'do'
1675
- => { emit_do(true)
1676
- fnext expr_value; fbreak; };
1677
-
1678
- w_space_comment;
1679
-
1680
- c_any
1681
- => { fhold; fgoto expr_end; };
1682
-
1683
- c_eof => do_eof;
1684
- *|;
1685
-
1686
- # The rationale for this state is that several keywords accept value
1687
- # (i.e. should transition to `expr_beg`), do not accept it like a command
1688
- # (i.e. not an `expr_arg`), and must behave like a statement, that is,
1689
- # accept a modifier if/while/etc.
1690
- #
1691
- expr_mid := |*
1692
- keyword_modifier
1693
- => { emit_table(KEYWORDS)
1694
- fnext expr_beg; fbreak; };
1695
-
1696
- bareword
1697
- => { p = @ts - 1; fgoto expr_beg; };
1698
-
1699
- w_space_comment;
1700
-
1701
- w_newline
1702
- => { fhold; fgoto expr_end; };
1703
-
1704
- c_any
1705
- => { fhold; fgoto expr_beg; };
1706
-
1707
- c_eof => do_eof;
1708
- *|;
1709
-
1710
- # Beginning of an expression.
1711
- #
1712
- # Don't fallthrough to this state from `c_any`; make sure to handle
1713
- # `c_space* c_nl` and let `expr_end` handle the newline.
1714
- # Otherwise code like `f\ndef x` gets glued together and the parser
1715
- # explodes.
1716
- #
1717
- expr_beg := |*
1718
- # +5, -5, - 5
1719
- [+\-] w_any* [0-9]
1720
- => {
1721
- emit(:tUNARY_NUM, tok(@ts, @ts + 1), @ts, @ts + 1)
1722
- fhold; fnext expr_end; fbreak;
1723
- };
1724
-
1725
- # splat *a
1726
- '*'
1727
- => { emit(:tSTAR, '*'.freeze)
1728
- fbreak; };
1729
-
1730
- #
1731
- # STRING AND REGEXP LITERALS
1732
- #
1733
-
1734
- # /regexp/oui
1735
- # /=/ (disambiguation with /=)
1736
- '/' c_any
1737
- => {
1738
- type = delimiter = tok[0].chr
1739
- fhold; fgoto *push_literal(type, delimiter, @ts);
1740
- };
1741
-
1742
- # %<string>
1743
- '%' ( any - [A-Za-z] )
1744
- => {
1745
- type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
1746
- fgoto *push_literal(type, delimiter, @ts);
1747
- };
1748
-
1749
- # %w(we are the people)
1750
- '%' [A-Za-z]+ c_any
1751
- => {
1752
- type, delimiter = tok[0..-2], tok[-1].chr
1753
- fgoto *push_literal(type, delimiter, @ts);
1754
- };
1755
-
1756
- '%' c_eof
1757
- => {
1758
- diagnostic :fatal, :string_eof, nil, range(@ts, @ts + 1)
1759
- };
1760
-
1761
- # Heredoc start.
1762
- # <<END | <<'END' | <<"END" | <<`END` |
1763
- # <<-END | <<-'END' | <<-"END" | <<-`END` |
1764
- # <<~END | <<~'END' | <<~"END" | <<~`END`
1765
- '<<' [~\-]?
1766
- ( '"' ( any - '"' )* '"'
1767
- | "'" ( any - "'" )* "'"
1768
- | "`" ( any - "`" )* "`"
1769
- | bareword ) % { heredoc_e = p }
1770
- c_line* c_nl % { new_herebody_s = p }
1771
- => {
1772
- tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/m
1773
-
1774
- indent = !$1.empty? || !$2.empty?
1775
- dedent_body = !$2.empty?
1776
- type = $3.empty? ? '<<"'.freeze : ('<<'.freeze + $3)
1777
- delimiter = $4
1778
-
1779
- if @version >= 27
1780
- if delimiter.count("\n") > 0 || delimiter.count("\r") > 0
1781
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1782
- end
1783
- elsif @version >= 24
1784
- if delimiter.count("\n") > 0
1785
- if delimiter.end_with?("\n")
1786
- diagnostic :warning, :heredoc_id_ends_with_nl, nil, range(@ts, @ts + 1)
1787
- delimiter = delimiter.rstrip
1788
- else
1789
- diagnostic :fatal, :heredoc_id_has_newline, nil, range(@ts, @ts + 1)
1790
- end
1791
- end
1792
- end
1793
-
1794
- if dedent_body && version?(18, 19, 20, 21, 22)
1795
- emit(:tLSHFT, '<<'.freeze, @ts, @ts + 2)
1796
- p = @ts + 1
1797
- fnext expr_beg; fbreak;
1798
- else
1799
- fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1800
-
1801
- @herebody_s ||= new_herebody_s
1802
- p = @herebody_s - 1
1803
- end
1804
- };
1805
-
1806
- # Escaped unterminated heredoc start
1807
- # <<'END | <<"END | <<`END |
1808
- # <<-'END | <<-"END | <<-`END |
1809
- # <<~'END | <<~"END | <<~`END
1810
- #
1811
- # If the heredoc is terminated the rule above should handle it
1812
- '<<' [~\-]?
1813
- ('"' (any - c_nl - '"')*
1814
- |"'" (any - c_nl - "'")*
1815
- |"`" (any - c_nl - "`")
1816
- )
1817
- => {
1818
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1819
- };
1820
-
1821
- #
1822
- # SYMBOL LITERALS
1823
- #
1824
-
1825
- # :&&, :||
1826
- ':' ('&&' | '||') => {
1827
- fhold; fhold;
1828
- emit(:tSYMBEG, tok(@ts, @ts + 1), @ts, @ts + 1)
1829
- fgoto expr_fname;
1830
- };
1831
-
1832
- # :"bar", :'baz'
1833
- ':' ['"] # '
1834
- => {
1835
- type, delimiter = tok, tok[-1].chr
1836
- fgoto *push_literal(type, delimiter, @ts);
1837
- };
1838
-
1839
- # :!@ is :!
1840
- # :~@ is :~
1841
- ':' [!~] '@'
1842
- => {
1843
- emit(:tSYMBOL, tok(@ts + 1, @ts + 2))
1844
- fnext expr_end; fbreak;
1845
- };
1846
-
1847
- ':' bareword ambiguous_symbol_suffix
1848
- => {
1849
- emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1850
- p = tm - 1
1851
- fnext expr_end; fbreak;
1852
- };
1853
-
1854
- ':' ( bareword | global_var | class_var | instance_var |
1855
- operator_fname | operator_arithmetic | operator_rest )
1856
- => {
1857
- emit(:tSYMBOL, tok(@ts + 1), @ts)
1858
- fnext expr_end; fbreak;
1859
- };
1860
-
1861
- ':' ( '@' %{ tm = p - 1; diag_msg = :ivar_name }
1862
- | '@@' %{ tm = p - 2; diag_msg = :cvar_name }
1863
- ) [0-9]*
1864
- => {
1865
- if @version >= 27
1866
- diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
1867
- else
1868
- emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
1869
- p = @ts
1870
- end
1871
-
1872
- fnext expr_end; fbreak;
1873
- };
1874
-
1875
- #
1876
- # AMBIGUOUS TERNARY OPERATOR
1877
- #
1878
-
1879
- # Character constant, like ?a, ?\n, ?\u1000, and so on
1880
- # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1881
- '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
1882
- | (c_any - c_space_nl - e_bs) % { @escape = nil }
1883
- )
1884
- => {
1885
- value = @escape || tok(@ts + 1)
1886
-
1887
- if version?(18)
1888
- emit(:tINTEGER, value.getbyte(0))
1889
- else
1890
- emit(:tCHARACTER, value)
1891
- end
1892
-
1893
- fnext expr_end; fbreak;
1894
- };
1895
-
1896
- '?' c_space_nl
1897
- => {
1898
- escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1899
- "\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
1900
- diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
1901
-
1902
- p = @ts - 1
1903
- fgoto expr_end;
1904
- };
1905
-
1906
- '?' c_eof
1907
- => {
1908
- diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
1909
- };
1910
-
1911
- # f ?aa : b: Disambiguate with a character literal.
1912
- '?' [A-Za-z_] bareword
1913
- => {
1914
- p = @ts - 1
1915
- fgoto expr_end;
1916
- };
1917
-
1918
- #
1919
- # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1920
- #
1921
-
1922
- # Ruby >= 2.7 emits it as two tPIPE terminals
1923
- # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1924
- '||'
1925
- => {
1926
- if @version >= 27
1927
- emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1928
- fhold;
1929
- fnext expr_beg; fbreak;
1930
- else
1931
- p -= 2
1932
- fgoto expr_end;
1933
- end
1934
- };
1935
-
1936
- #
1937
- # KEYWORDS AND PUNCTUATION
1938
- #
1939
-
1940
- # a({b=>c})
1941
- e_lbrace
1942
- => {
1943
- if @lambda_stack.last == @paren_nest
1944
- @lambda_stack.pop
1945
- @command_start = true
1946
- emit(:tLAMBEG, '{'.freeze)
1947
- else
1948
- emit(:tLBRACE, '{'.freeze)
1949
- end
1950
- @paren_nest += 1
1951
- fbreak;
1952
- };
1953
-
1954
- # a([1, 2])
1955
- e_lbrack
1956
- => { emit(:tLBRACK, '['.freeze)
1957
- fbreak; };
1958
-
1959
- # a()
1960
- e_lparen
1961
- => { emit(:tLPAREN, '('.freeze)
1962
- fbreak; };
1963
-
1964
- # a(+b)
1965
- punctuation_begin
1966
- => { emit_table(PUNCTUATION_BEGIN)
1967
- fbreak; };
1968
-
1969
- # rescue Exception => e: Block rescue.
1970
- # Special because it should transition to expr_mid.
1971
- 'rescue' %{ tm = p } '=>'?
1972
- => { emit(:kRESCUE, 'rescue'.freeze, @ts, tm)
1973
- p = tm - 1
1974
- fnext expr_mid; fbreak; };
1975
-
1976
- # if a: Statement if.
1977
- keyword_modifier
1978
- => { emit_table(KEYWORDS_BEGIN)
1979
- @command_start = true
1980
- fnext expr_value; fbreak; };
1981
-
1982
- #
1983
- # RUBY 1.9 HASH LABELS
1984
- #
1985
-
1986
- label ( any - ':' )
1987
- => {
1988
- fhold;
1989
-
1990
- if version?(18)
1991
- ident = tok(@ts, @te - 2)
1992
-
1993
- emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1994
- ident, @ts, @te - 2)
1995
- fhold; # continue as a symbol
1996
-
1997
- if !@static_env.nil? && @static_env.declared?(ident)
1998
- fnext expr_end;
1999
- else
2000
- fnext *arg_or_cmdarg(cmd_state);
2001
- end
2002
- else
2003
- emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
2004
- fnext expr_labelarg;
2005
- end
2006
-
2007
- fbreak;
2008
- };
2009
-
2010
- #
2011
- # RUBY 2.7 BEGINLESS RANGE
2012
-
2013
- '..'
2014
- => {
2015
- if @version >= 27
2016
- emit(:tBDOT2)
2017
- else
2018
- emit(:tDOT2)
2019
- end
2020
-
2021
- fnext expr_beg; fbreak;
2022
- };
2023
-
2024
- '...'
2025
- => {
2026
- if @version >= 27
2027
- emit(:tBDOT3)
2028
- else
2029
- emit(:tDOT3)
2030
- end
2031
-
2032
- fnext expr_beg; fbreak;
2033
- };
2034
-
2035
- #
2036
- # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
2037
- #
2038
-
2039
- # foo= bar: Disambiguate with bareword rule below.
2040
- bareword ambiguous_ident_suffix |
2041
- # def foo: Disambiguate with bareword rule below.
2042
- keyword
2043
- => { p = @ts - 1
2044
- fgoto expr_end; };
2045
-
2046
- # a = 42; a [42]: Indexing.
2047
- # def a; end; a [42]: Array argument.
2048
- call_or_var
2049
- => local_ident;
2050
-
2051
- (call_or_var - keyword)
2052
- % { ident_tok = tok; ident_ts = @ts; ident_te = @te; }
2053
- w_space+ '('
2054
- => {
2055
- emit(:tIDENTIFIER, ident_tok, ident_ts, ident_te)
2056
- p = ident_te - 1
2057
-
2058
- if !@static_env.nil? && @static_env.declared?(ident_tok) && @version < 25
2059
- fnext expr_endfn;
2060
- else
2061
- fnext expr_cmdarg;
2062
- end
2063
- fbreak;
2064
- };
2065
-
2066
- #
2067
- # WHITESPACE
2068
- #
2069
-
2070
- w_any;
2071
-
2072
- e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
2073
- => {
2074
- p = @ts - 1
2075
- @cs_before_block_comment = @cs
2076
- fgoto line_begin;
2077
- };
2078
-
2079
- #
2080
- # DEFAULT TRANSITION
2081
- #
2082
-
2083
- # The following rules match most binary and all unary operators.
2084
- # Rules for binary operators provide better error reporting.
2085
- operator_arithmetic '=' |
2086
- operator_rest |
2087
- punctuation_end |
2088
- c_any
2089
- => { p = @ts - 1; fgoto expr_end; };
2090
-
2091
- c_eof => do_eof;
2092
- *|;
2093
-
2094
- # Special newline handling for "def a b:"
2095
- #
2096
- expr_labelarg := |*
2097
- w_space_comment;
2098
-
2099
- w_newline
2100
- => {
2101
- if @in_kwarg
2102
- fhold; fgoto expr_end;
2103
- else
2104
- fgoto line_begin;
2105
- end
2106
- };
2107
-
2108
- c_any
2109
- => { fhold; fgoto expr_beg; };
2110
-
2111
- c_eof => do_eof;
2112
- *|;
2113
-
2114
- # Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
2115
- #
2116
- expr_value := |*
2117
- # a:b: a(:b), a::B, A::B
2118
- label (any - ':')
2119
- => { p = @ts - 1
2120
- fgoto expr_end; };
2121
-
2122
- # "bar", 'baz'
2123
- ['"] # '
2124
- => {
2125
- fgoto *push_literal(tok, tok, @ts);
2126
- };
2127
-
2128
- w_space_comment;
2129
-
2130
- w_newline
2131
- => { fgoto line_begin; };
2132
-
2133
- c_any
2134
- => { fhold; fgoto expr_beg; };
2135
-
2136
- c_eof => do_eof;
2137
- *|;
2138
-
2139
- expr_end := |*
2140
- #
2141
- # STABBY LAMBDA
2142
- #
2143
-
2144
- '->'
2145
- => {
2146
- emit(:tLAMBDA, '->'.freeze, @ts, @ts + 2)
2147
-
2148
- @lambda_stack.push @paren_nest
2149
- fnext expr_endfn; fbreak;
2150
- };
2151
-
2152
- e_lbrace | 'do'
2153
- => {
2154
- if @lambda_stack.last == @paren_nest
2155
- @lambda_stack.pop
2156
-
2157
- if tok == '{'.freeze
2158
- emit(:tLAMBEG, '{'.freeze)
2159
- else # 'do'
2160
- emit(:kDO_LAMBDA, 'do'.freeze)
2161
- end
2162
- else
2163
- if tok == '{'.freeze
2164
- emit(:tLCURLY, '{'.freeze)
2165
- else # 'do'
2166
- emit_do
2167
- end
2168
- end
2169
- if tok == '{'.freeze
2170
- @paren_nest += 1
2171
- end
2172
- @command_start = true
2173
-
2174
- fnext expr_value; fbreak;
2175
- };
2176
-
2177
- #
2178
- # KEYWORDS
2179
- #
2180
-
2181
- keyword_with_fname
2182
- => { emit_table(KEYWORDS)
2183
- fnext expr_fname; fbreak; };
2184
-
2185
- 'class' w_any* '<<'
2186
- => { emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
2187
- emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
2188
- fnext expr_value; fbreak; };
2189
-
2190
- # a if b:c: Syntax error.
2191
- keyword_modifier
2192
- => { emit_table(KEYWORDS)
2193
- fnext expr_beg; fbreak; };
2194
-
2195
- # elsif b:c: elsif b(:c)
2196
- keyword_with_value
2197
- => { emit_table(KEYWORDS)
2198
- @command_start = true
2199
- fnext expr_value; fbreak; };
2200
-
2201
- keyword_with_mid
2202
- => { emit_table(KEYWORDS)
2203
- fnext expr_mid; fbreak; };
2204
-
2205
- keyword_with_arg
2206
- => {
2207
- emit_table(KEYWORDS)
2208
-
2209
- if version?(18) && tok == 'not'.freeze
2210
- fnext expr_beg; fbreak;
2211
- else
2212
- fnext expr_arg; fbreak;
2213
- end
2214
- };
2215
-
2216
- '__ENCODING__'
2217
- => {
2218
- if version?(18)
2219
- emit(:tIDENTIFIER)
2220
-
2221
- unless !@static_env.nil? && @static_env.declared?(tok)
2222
- fnext *arg_or_cmdarg(cmd_state);
2223
- end
2224
- else
2225
- emit(:k__ENCODING__, '__ENCODING__'.freeze)
2226
- end
2227
- fbreak;
2228
- };
2229
-
2230
- keyword_with_end
2231
- => { emit_table(KEYWORDS)
2232
- fbreak; };
2233
-
2234
- #
2235
- # NUMERIC LITERALS
2236
- #
2237
-
2238
- ( '0' [Xx] %{ @num_base = 16; @num_digits_s = p } int_hex
2239
- | '0' [Dd] %{ @num_base = 10; @num_digits_s = p } int_dec
2240
- | '0' [Oo] %{ @num_base = 8; @num_digits_s = p } int_dec
2241
- | '0' [Bb] %{ @num_base = 2; @num_digits_s = p } int_bin
2242
- | [1-9] digit* '_'? %{ @num_base = 10; @num_digits_s = @ts } int_dec
2243
- | '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
2244
- ) %{ @num_suffix_s = p } int_suffix
2245
- => {
2246
- digits = tok(@num_digits_s, @num_suffix_s)
2247
-
2248
- if digits.end_with? '_'.freeze
2249
- diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
2250
- range(@te - 1, @te)
2251
- elsif digits.empty? && @num_base == 8 && version?(18)
2252
- # 1.8 did not raise an error on 0o.
2253
- digits = '0'.freeze
2254
- elsif digits.empty?
2255
- diagnostic :error, :empty_numeric
2256
- elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
2257
- invalid_s = @num_digits_s + invalid_idx
2258
- diagnostic :error, :invalid_octal, nil,
2259
- range(invalid_s, invalid_s + 1)
2260
- end
2261
-
2262
- if version?(18, 19, 20)
2263
- emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
2264
- p = @num_suffix_s - 1
2265
- else
2266
- @num_xfrm.call(digits.to_i(@num_base))
2267
- end
2268
- fbreak;
2269
- };
2270
-
2271
- flo_frac flo_pow?
2272
- => {
2273
- diagnostic :error, :no_dot_digit_literal
2274
- };
2275
-
2276
- flo_int [eE]
2277
- => {
2278
- if version?(18, 19, 20)
2279
- diagnostic :error,
2280
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2281
- range(@te - 1, @te)
2282
- else
2283
- emit(:tINTEGER, tok(@ts, @te - 1).to_i, @ts, @te - 1)
2284
- fhold; fbreak;
2285
- end
2286
- };
2287
-
2288
- flo_int flo_frac [eE]
2289
- => {
2290
- if version?(18, 19, 20)
2291
- diagnostic :error,
2292
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2293
- range(@te - 1, @te)
2294
- else
2295
- emit(:tFLOAT, tok(@ts, @te - 1).to_f, @ts, @te - 1)
2296
- fhold; fbreak;
2297
- end
2298
- };
2299
-
2300
- flo_int
2301
- ( flo_frac? flo_pow %{ @num_suffix_s = p } flo_pow_suffix
2302
- | flo_frac %{ @num_suffix_s = p } flo_suffix
2303
- )
2304
- => {
2305
- digits = tok(@ts, @num_suffix_s)
2306
-
2307
- if version?(18, 19, 20)
2308
- emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
2309
- p = @num_suffix_s - 1
2310
- else
2311
- @num_xfrm.call(digits)
2312
- end
2313
- fbreak;
2314
- };
2315
-
2316
- #
2317
- # STRING AND XSTRING LITERALS
2318
- #
2319
-
2320
- # `echo foo`, "bar", 'baz'
2321
- '`' | ['"] # '
2322
- => {
2323
- type, delimiter = tok, tok[-1].chr
2324
- fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
2325
- };
2326
-
2327
- #
2328
- # CONSTANTS AND VARIABLES
2329
- #
2330
-
2331
- constant
2332
- => { emit(:tCONSTANT)
2333
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
2334
-
2335
- constant ambiguous_const_suffix
2336
- => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
2337
- p = tm - 1; fbreak; };
2338
-
2339
- global_var | class_var_v | instance_var_v
2340
- => { p = @ts - 1; fcall expr_variable; };
2341
-
2342
- #
2343
- # METHOD CALLS
2344
- #
2345
-
2346
- '.' | '&.' | '::'
2347
- => { emit_table(PUNCTUATION)
2348
- fnext expr_dot; fbreak; };
2349
-
2350
- call_or_var
2351
- => local_ident;
2352
-
2353
- bareword ambiguous_fid_suffix
2354
- => {
2355
- if tm == @te
2356
- # Suffix was consumed, e.g. foo!
2357
- emit(:tFID)
2358
- else
2359
- # Suffix was not consumed, e.g. foo!=
2360
- emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
2361
- p = tm - 1
2362
- end
2363
- fnext expr_arg; fbreak;
2364
- };
2365
-
2366
- #
2367
- # OPERATORS
2368
- #
2369
-
2370
- '*' | '=>'
2371
- => {
2372
- emit_table(PUNCTUATION)
2373
- fgoto expr_value;
2374
- };
2375
-
2376
- # When '|', '~', '!', '=>' are used as operators
2377
- # they do not accept any symbols (or quoted labels) after.
2378
- # Other binary operators accept it.
2379
- ( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
2380
- => {
2381
- emit_table(PUNCTUATION);
2382
- fnext expr_value; fbreak;
2383
- };
2384
-
2385
- ( e_lparen | '|' | '~' | '!' )
2386
- => { emit_table(PUNCTUATION)
2387
- fnext expr_beg; fbreak; };
2388
-
2389
- e_rbrace | e_rparen | e_rbrack
2390
- => {
2391
- emit_table(PUNCTUATION)
2392
-
2393
- if @version < 24
2394
- @cond.lexpop
2395
- @cmdarg.lexpop
2396
- else
2397
- @cond.pop
2398
- @cmdarg.pop
2399
- end
2400
-
2401
- if tok == '}'.freeze || tok == ']'.freeze
2402
- if @version >= 25
2403
- fnext expr_end;
2404
- else
2405
- fnext expr_endarg;
2406
- end
2407
- else # )
2408
- # fnext expr_endfn; ?
2409
- end
2410
-
2411
- fbreak;
2412
- };
2413
-
2414
- operator_arithmetic '='
2415
- => { emit(:tOP_ASGN, tok(@ts, @te - 1))
2416
- fnext expr_beg; fbreak; };
2417
-
2418
- '?'
2419
- => { emit(:tEH, '?'.freeze)
2420
- fnext expr_value; fbreak; };
2421
-
2422
- e_lbrack
2423
- => { emit(:tLBRACK2, '['.freeze)
2424
- fnext expr_beg; fbreak; };
2425
-
2426
- '...' c_nl
2427
- => {
2428
- if @paren_nest == 0
2429
- diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
2430
- end
2431
-
2432
- emit(:tDOT3, '...'.freeze, @ts, @te - 1)
2433
- fhold;
2434
- fnext expr_beg; fbreak;
2435
- };
2436
-
2437
- punctuation_end
2438
- => { emit_table(PUNCTUATION)
2439
- fnext expr_beg; fbreak; };
2440
-
2441
- #
2442
- # WHITESPACE
2443
- #
2444
-
2445
- w_space_comment;
2446
-
2447
- w_newline
2448
- => { fgoto leading_dot; };
2449
-
2450
- ';'
2451
- => { emit(:tSEMI, ';'.freeze)
2452
- @command_start = true
2453
- fnext expr_value; fbreak; };
2454
-
2455
- '\\' c_line {
2456
- diagnostic :error, :bare_backslash, nil, range(@ts, @ts + 1)
2457
- fhold;
2458
- };
2459
-
2460
- c_any
2461
- => {
2462
- diagnostic :fatal, :unexpected, { :character => tok.inspect[1..-2] }
2463
- };
2464
-
2465
- c_eof => do_eof;
2466
- *|;
2467
-
2468
- leading_dot := |*
2469
- # Insane leading dots:
2470
- # a #comment
2471
- # # post-2.7 comment
2472
- # .b: a.b
2473
-
2474
- # Here we use '\n' instead of w_newline to not modify @newline_s
2475
- # and eventually properly emit tNL
2476
- (c_space* w_space_comment '\n')+
2477
- => {
2478
- if @version < 27
2479
- # Ruby before 2.7 doesn't support comments before leading dot.
2480
- # If a line after "a" starts with a comment then "a" is a self-contained statement.
2481
- # So in that case we emit a special tNL token and start reading the
2482
- # next line as a separate statement.
2483
- #
2484
- # Note: block comments before leading dot are not supported on any version of Ruby.
2485
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2486
- fhold; fnext line_begin; fbreak;
2487
- end
2488
- };
2489
-
2490
- c_space* %{ tm = p } ('.' | '&.')
2491
- => { p = tm - 1; fgoto expr_end; };
2492
-
2493
- any
2494
- => { emit(:tNL, nil, @newline_s, @newline_s + 1)
2495
- fhold; fnext line_begin; fbreak; };
2496
- *|;
2497
-
2498
- #
2499
- # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
2500
- #
2501
-
2502
- line_comment := |*
2503
- '=end' c_line* c_nl_zlen
2504
- => {
2505
- emit_comment(@eq_begin_s, @te)
2506
- fgoto *@cs_before_block_comment;
2507
- };
2508
-
2509
- c_line* c_nl;
2510
-
2511
- c_line* zlen
2512
- => {
2513
- diagnostic :fatal, :embedded_document, nil,
2514
- range(@eq_begin_s, @eq_begin_s + '=begin'.length)
2515
- };
2516
- *|;
2517
-
2518
- line_begin := |*
2519
- w_any;
2520
-
2521
- '=begin' ( c_space | c_nl_zlen )
2522
- => { @eq_begin_s = @ts
2523
- fgoto line_comment; };
2524
-
2525
- '__END__' ( c_eol - zlen )
2526
- => { p = pe - 3 };
2527
-
2528
- c_any
2529
- => { cmd_state = true; fhold; fgoto expr_value; };
2530
-
2531
- c_eof => do_eof;
2532
- *|;
2533
-
2534
- }%%
2535
- # %
2536
- end