parser 2.7.1.1 → 3.0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parser.rb +1 -0
  3. data/lib/parser/all.rb +2 -0
  4. data/lib/parser/ast/processor.rb +5 -0
  5. data/lib/parser/base.rb +7 -5
  6. data/lib/parser/builders/default.rb +263 -23
  7. data/lib/parser/context.rb +5 -0
  8. data/lib/parser/current.rb +24 -6
  9. data/lib/parser/current_arg_stack.rb +5 -2
  10. data/lib/parser/diagnostic.rb +1 -1
  11. data/lib/parser/diagnostic/engine.rb +1 -2
  12. data/lib/parser/lexer.rb +887 -803
  13. data/lib/parser/macruby.rb +2214 -2189
  14. data/lib/parser/max_numparam_stack.rb +13 -5
  15. data/lib/parser/messages.rb +18 -0
  16. data/lib/parser/meta.rb +6 -5
  17. data/lib/parser/ruby18.rb +9 -3
  18. data/lib/parser/ruby19.rb +2297 -2289
  19. data/lib/parser/ruby20.rb +2413 -2397
  20. data/lib/parser/ruby21.rb +2419 -2411
  21. data/lib/parser/ruby22.rb +2468 -2460
  22. data/lib/parser/ruby23.rb +2452 -2452
  23. data/lib/parser/ruby24.rb +2435 -2430
  24. data/lib/parser/ruby25.rb +2220 -2214
  25. data/lib/parser/ruby26.rb +2220 -2214
  26. data/lib/parser/ruby27.rb +3715 -3615
  27. data/lib/parser/ruby28.rb +8047 -0
  28. data/lib/parser/ruby30.rb +8060 -0
  29. data/lib/parser/ruby31.rb +8226 -0
  30. data/lib/parser/rubymotion.rb +2190 -2182
  31. data/lib/parser/runner.rb +31 -2
  32. data/lib/parser/runner/ruby_rewrite.rb +2 -2
  33. data/lib/parser/source/buffer.rb +53 -28
  34. data/lib/parser/source/comment.rb +14 -1
  35. data/lib/parser/source/comment/associator.rb +31 -8
  36. data/lib/parser/source/map/method_definition.rb +25 -0
  37. data/lib/parser/source/range.rb +10 -3
  38. data/lib/parser/source/tree_rewriter.rb +100 -10
  39. data/lib/parser/source/tree_rewriter/action.rb +114 -21
  40. data/lib/parser/static_environment.rb +4 -0
  41. data/lib/parser/tree_rewriter.rb +1 -2
  42. data/lib/parser/variables_stack.rb +4 -0
  43. data/lib/parser/version.rb +1 -1
  44. data/parser.gemspec +3 -18
  45. metadata +17 -98
  46. data/.gitignore +0 -33
  47. data/.travis.yml +0 -42
  48. data/.yardopts +0 -21
  49. data/CHANGELOG.md +0 -1075
  50. data/CONTRIBUTING.md +0 -17
  51. data/Gemfile +0 -10
  52. data/README.md +0 -309
  53. data/Rakefile +0 -166
  54. data/ci/run_rubocop_specs +0 -14
  55. data/doc/AST_FORMAT.md +0 -2180
  56. data/doc/CUSTOMIZATION.md +0 -37
  57. data/doc/INTERNALS.md +0 -21
  58. data/doc/css/.gitkeep +0 -0
  59. data/doc/css/common.css +0 -68
  60. data/lib/parser/lexer.rl +0 -2536
  61. data/lib/parser/macruby.y +0 -2198
  62. data/lib/parser/ruby18.y +0 -1934
  63. data/lib/parser/ruby19.y +0 -2175
  64. data/lib/parser/ruby20.y +0 -2353
  65. data/lib/parser/ruby21.y +0 -2357
  66. data/lib/parser/ruby22.y +0 -2364
  67. data/lib/parser/ruby23.y +0 -2370
  68. data/lib/parser/ruby24.y +0 -2408
  69. data/lib/parser/ruby25.y +0 -2405
  70. data/lib/parser/ruby26.y +0 -2413
  71. data/lib/parser/ruby27.y +0 -2941
  72. data/lib/parser/rubymotion.y +0 -2182
  73. data/test/bug_163/fixtures/input.rb +0 -5
  74. data/test/bug_163/fixtures/output.rb +0 -5
  75. data/test/bug_163/rewriter.rb +0 -20
  76. data/test/helper.rb +0 -60
  77. data/test/parse_helper.rb +0 -319
  78. data/test/racc_coverage_helper.rb +0 -133
  79. data/test/test_base.rb +0 -31
  80. data/test/test_current.rb +0 -29
  81. data/test/test_diagnostic.rb +0 -96
  82. data/test/test_diagnostic_engine.rb +0 -62
  83. data/test/test_encoding.rb +0 -99
  84. data/test/test_lexer.rb +0 -3608
  85. data/test/test_lexer_stack_state.rb +0 -78
  86. data/test/test_parse_helper.rb +0 -80
  87. data/test/test_parser.rb +0 -9430
  88. data/test/test_runner_parse.rb +0 -35
  89. data/test/test_runner_rewrite.rb +0 -47
  90. data/test/test_source_buffer.rb +0 -162
  91. data/test/test_source_comment.rb +0 -36
  92. data/test/test_source_comment_associator.rb +0 -367
  93. data/test/test_source_map.rb +0 -15
  94. data/test/test_source_range.rb +0 -187
  95. data/test/test_source_rewriter.rb +0 -541
  96. data/test/test_source_rewriter_action.rb +0 -46
  97. data/test/test_source_tree_rewriter.rb +0 -253
  98. data/test/test_static_environment.rb +0 -45
  99. data/test/using_tree_rewriter/fixtures/input.rb +0 -3
  100. data/test/using_tree_rewriter/fixtures/output.rb +0 -3
  101. data/test/using_tree_rewriter/using_tree_rewriter.rb +0 -9
data/doc/CUSTOMIZATION.md DELETED
@@ -1,37 +0,0 @@
1
- # Customizing Parsers
2
-
3
- While the default setup of the parsers provided by this Gem should be suitable
4
- for most some developers might want to change parts of it. An example would be
5
- the use of a custom class for nodes instead of `Parser::AST::Node`.
6
-
7
- Customizing the AST is done by creating a custom builder class and passing it
8
- to the constructor method of a parser. The default setup comes down to the
9
- following:
10
-
11
- builder = Parser::Builders::Default.new
12
- parser = Parser::Ruby19.new(builder)
13
-
14
- When creating your own builder class it's best to subclass the default one so
15
- that you don't have to redefine every used method again:
16
-
17
- class MyBuilder < Parser::Builders::Default
18
-
19
- end
20
-
21
- builder = MyBuilder.new
22
- parser = Parser::Ruby19.new(builder)
23
-
24
- ## Custom Node Classes
25
-
26
- To use a custom node class you have to override the method
27
- `Parser::Builders::Default#n`:
28
-
29
- class MyBuilder < Parser::Builders::Default
30
- def n(type, children, location)
31
- return MyNodeClass.new(type, children, :location => location)
32
- end
33
- end
34
-
35
- Note that the used class (and corresponding instance) must be compatible with
36
- `Parser::AST::Node` so it's best to subclass it and override/add code where
37
- needed.
data/doc/INTERNALS.md DELETED
@@ -1,21 +0,0 @@
1
- Entry points
2
- ------------
3
-
4
- Parser should be kept as slim as possible. This includes not loading
5
- any potentially large files when they are likely to be unused in practice.
6
-
7
- Parser has five main (classes of) `require` entry points:
8
-
9
- * `require 'parser'`. Main entry point, requires all classes which
10
- are used across the entire library.
11
- * `require 'parser/rubyXX'`. Version-specific entry point. Can raise
12
- a NotImplementedError if current Ruby runtime is unable to parse the
13
- requested Ruby version.
14
- * `require 'parser/all'`. Requires all available parsers for released
15
- versions of Ruby. Can raise NotImplementedError.
16
- * `require 'parser/runner'`. Requires all the stuff which is useful for
17
- command-line tools but not otherwise.
18
- * `require 'parser/runner/X'`. Runner-specific entry point.
19
-
20
- All non-main entry points internally `require 'parser'`. Additionally, all
21
- runner-specific entry points internally `requre 'parser/runner'`.
data/doc/css/.gitkeep DELETED
File without changes
data/doc/css/common.css DELETED
@@ -1,68 +0,0 @@
1
- body
2
- {
3
- font-size: 14px;
4
- line-height: 1.6;
5
- margin: 0 auto;
6
- max-width: 960px;
7
- }
8
-
9
- p code
10
- {
11
- background: #f2f2f2;
12
- padding-left: 3px;
13
- padding-right: 3px;
14
- }
15
-
16
- pre.code
17
- {
18
- font-size: 13px;
19
- line-height: 1.4;
20
- }
21
-
22
- /**
23
- * YARD uses generic table styles, using a special class means those tables
24
- * don't get messed up.
25
- */
26
- .table
27
- {
28
- border: 1px solid #ccc;
29
- border-right: none;
30
- border-collapse: separate;
31
- border-spacing: 0;
32
- text-align: left;
33
- }
34
-
35
- .table.full
36
- {
37
- width: 100%;
38
- }
39
-
40
- .table .field_name
41
- {
42
- min-width: 160px;
43
- }
44
-
45
- .table thead tr th.no_sort:first-child
46
- {
47
- width: 25px;
48
- }
49
-
50
- .table thead tr th, .table tbody tr td
51
- {
52
- border-bottom: 1px solid #ccc;
53
- border-right: 1px solid #ccc;
54
- min-width: 20px;
55
- padding: 8px 5px;
56
- text-align: left;
57
- vertical-align: top;
58
- }
59
-
60
- .table tbody tr:last-child td
61
- {
62
- border-bottom: none;
63
- }
64
-
65
- .table tr:nth-child(odd) td
66
- {
67
- background: #f9f9f9;
68
- }
data/lib/parser/lexer.rl DELETED
@@ -1,2536 +0,0 @@
1
- %%machine lex; # % fix highlighting
2
-
3
- #
4
- # === BEFORE YOU START ===
5
- #
6
- # Read the Ruby Hacking Guide chapter 11, available in English at
7
- # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
- #
9
- # Remember two things about Ragel scanners:
10
- #
11
- # 1) Longest match wins.
12
- #
13
- # 2) If two matches have the same length, the first
14
- # in source code wins.
15
- #
16
- # General rules of making Ragel and Bison happy:
17
- #
18
- # * `p` (position) and `@te` contain the index of the character
19
- # they're pointing to ("current"), plus one. `@ts` contains the index
20
- # of the corresponding character. The code for extracting matched token is:
21
- #
22
- # @source_buffer.slice(@ts...@te)
23
- #
24
- # * If your input is `foooooooobar` and the rule is:
25
- #
26
- # 'f' 'o'+
27
- #
28
- # the result will be:
29
- #
30
- # foooooooobar
31
- # ^ ts=0 ^ p=te=9
32
- #
33
- # * A Ragel lexer action should not emit more than one token, unless
34
- # you know what you are doing.
35
- #
36
- # * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
37
- #
38
- # * If an action emits the token and transitions to another state, use
39
- # these Ragel commands:
40
- #
41
- # emit($whatever)
42
- # fnext $next_state; fbreak;
43
- #
44
- # If you perform `fgoto` in an action which does not emit a token nor
45
- # rewinds the stream pointer, the parser's side-effectful,
46
- # context-sensitive lookahead actions will break in a hard to detect
47
- # and debug way.
48
- #
49
- # * If an action does not emit a token:
50
- #
51
- # fgoto $next_state;
52
- #
53
- # * If an action features lookbehind, i.e. matches characters with the
54
- # intent of passing them to another action:
55
- #
56
- # p = @ts - 1
57
- # fgoto $next_state;
58
- #
59
- # or, if the lookbehind consists of a single character:
60
- #
61
- # fhold; fgoto $next_state;
62
- #
63
- # * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
64
- # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
65
- # _will_ invoke the action `act`.
66
- #
67
- # e_something stands for "something with **e**mbedded action".
68
- #
69
- # * EOF is explicit and is matched by `c_eof`. If you want to introspect
70
- # the state of the lexer, add this rule to the state:
71
- #
72
- # c_eof => do_eof;
73
- #
74
- # * If you proceed past EOF, the lexer will complain:
75
- #
76
- # NoMethodError: undefined method `ord' for nil:NilClass
77
- #
78
-
79
- class Parser::Lexer
80
-
81
- %% write data nofinal;
82
- # %
83
-
84
- ESCAPES = {
85
- ?a.ord => "\a", ?b.ord => "\b", ?e.ord => "\e", ?f.ord => "\f",
86
- ?n.ord => "\n", ?r.ord => "\r", ?s.ord => "\s", ?t.ord => "\t",
87
- ?v.ord => "\v", ?\\.ord => "\\"
88
- }.freeze
89
-
90
- REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
-
92
- attr_reader :source_buffer
93
-
94
- attr_accessor :diagnostics
95
- attr_accessor :static_env
96
- attr_accessor :force_utf32
97
-
98
- attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
99
-
100
- attr_accessor :tokens, :comments
101
-
102
- def initialize(version)
103
- @version = version
104
- @static_env = nil
105
- @context = nil
106
-
107
- @tokens = nil
108
- @comments = nil
109
-
110
- reset
111
- end
112
-
113
- def reset(reset_state=true)
114
- # Ragel state:
115
- if reset_state
116
- # Unit tests set state prior to resetting lexer.
117
- @cs = self.class.lex_en_line_begin
118
-
119
- @cond = StackState.new('cond')
120
- @cmdarg = StackState.new('cmdarg')
121
- @cond_stack = []
122
- @cmdarg_stack = []
123
- end
124
-
125
- @force_utf32 = false # Set to true by some tests
126
-
127
- @source_pts = nil # @source as a codepoint array
128
-
129
- @p = 0 # stream position (saved manually in #advance)
130
- @ts = nil # token start
131
- @te = nil # token end
132
- @act = 0 # next action
133
-
134
- @stack = [] # state stack
135
- @top = 0 # state stack top pointer
136
-
137
- # Lexer state:
138
- @token_queue = []
139
- @literal_stack = []
140
-
141
- @eq_begin_s = nil # location of last encountered =begin
142
- @sharp_s = nil # location of last encountered #
143
-
144
- @newline_s = nil # location of last encountered newline
145
-
146
- @num_base = nil # last numeric base
147
- @num_digits_s = nil # starting position of numeric digits
148
- @num_suffix_s = nil # starting position of numeric suffix
149
- @num_xfrm = nil # numeric suffix-induced transformation
150
-
151
- @escape_s = nil # starting position of current sequence
152
- @escape = nil # last escaped sequence, as string
153
-
154
- @herebody_s = nil # starting position of current heredoc line
155
-
156
- # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
157
- # encountered after a matching closing parenthesis.
158
- @paren_nest = 0
159
- @lambda_stack = []
160
-
161
- # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
162
- # we store the indentation level and give it out to the parser
163
- # on request. It is not possible to infer indentation level just
164
- # from the AST because escape sequences such as `\ ` or `\t` are
165
- # expanded inside the lexer, but count as non-whitespace for
166
- # indentation purposes.
167
- @dedent_level = nil
168
-
169
- # If the lexer is in `command state' (aka expr_value)
170
- # at the entry to #advance, it will transition to expr_cmdarg
171
- # instead of expr_arg at certain points.
172
- @command_start = true
173
-
174
- # True at the end of "def foo a:"
175
- @in_kwarg = false
176
-
177
- # State before =begin / =end block comment
178
- @cs_before_block_comment = self.class.lex_en_line_begin
179
- end
180
-
181
- def source_buffer=(source_buffer)
182
- @source_buffer = source_buffer
183
-
184
- if @source_buffer
185
- source = @source_buffer.source
186
-
187
- if source.encoding == Encoding::UTF_8
188
- @source_pts = source.unpack('U*')
189
- else
190
- @source_pts = source.unpack('C*')
191
- end
192
-
193
- if @source_pts[0] == 0xfeff
194
- # Skip byte order mark.
195
- @p = 1
196
- end
197
- else
198
- @source_pts = nil
199
- end
200
- end
201
-
202
- def encoding
203
- @source_buffer.source.encoding
204
- end
205
-
206
- LEX_STATES = {
207
- :line_begin => lex_en_line_begin,
208
- :expr_dot => lex_en_expr_dot,
209
- :expr_fname => lex_en_expr_fname,
210
- :expr_value => lex_en_expr_value,
211
- :expr_beg => lex_en_expr_beg,
212
- :expr_mid => lex_en_expr_mid,
213
- :expr_arg => lex_en_expr_arg,
214
- :expr_cmdarg => lex_en_expr_cmdarg,
215
- :expr_end => lex_en_expr_end,
216
- :expr_endarg => lex_en_expr_endarg,
217
- :expr_endfn => lex_en_expr_endfn,
218
- :expr_labelarg => lex_en_expr_labelarg,
219
-
220
- :interp_string => lex_en_interp_string,
221
- :interp_words => lex_en_interp_words,
222
- :plain_string => lex_en_plain_string,
223
- :plain_words => lex_en_plain_string,
224
- }
225
-
226
- def state
227
- LEX_STATES.invert.fetch(@cs, @cs)
228
- end
229
-
230
- def state=(state)
231
- @cs = LEX_STATES.fetch(state)
232
- end
233
-
234
- def push_cmdarg
235
- @cmdarg_stack.push(@cmdarg)
236
- @cmdarg = StackState.new("cmdarg.#{@cmdarg_stack.count}")
237
- end
238
-
239
- def pop_cmdarg
240
- @cmdarg = @cmdarg_stack.pop
241
- end
242
-
243
- def push_cond
244
- @cond_stack.push(@cond)
245
- @cond = StackState.new("cond.#{@cond_stack.count}")
246
- end
247
-
248
- def pop_cond
249
- @cond = @cond_stack.pop
250
- end
251
-
252
- def dedent_level
253
- # We erase @dedent_level as a precaution to avoid accidentally
254
- # using a stale value.
255
- dedent_level, @dedent_level = @dedent_level, nil
256
- dedent_level
257
- end
258
-
259
- # Return next token: [type, value].
260
- def advance
261
- if @token_queue.any?
262
- return @token_queue.shift
263
- end
264
-
265
- # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
266
- klass = self.class
267
- _lex_trans_keys = klass.send :_lex_trans_keys
268
- _lex_key_spans = klass.send :_lex_key_spans
269
- _lex_index_offsets = klass.send :_lex_index_offsets
270
- _lex_indicies = klass.send :_lex_indicies
271
- _lex_trans_targs = klass.send :_lex_trans_targs
272
- _lex_trans_actions = klass.send :_lex_trans_actions
273
- _lex_to_state_actions = klass.send :_lex_to_state_actions
274
- _lex_from_state_actions = klass.send :_lex_from_state_actions
275
- _lex_eof_trans = klass.send :_lex_eof_trans
276
-
277
- pe = @source_pts.size + 2
278
- p, eof = @p, pe
279
-
280
- cmd_state = @command_start
281
- @command_start = false
282
-
283
- %% write exec;
284
- # %
285
-
286
- @p = p
287
-
288
- if @token_queue.any?
289
- @token_queue.shift
290
- elsif @cs == klass.lex_error
291
- [ false, [ '$error'.freeze, range(p - 1, p) ] ]
292
- else
293
- eof = @source_pts.size
294
- [ false, [ '$eof'.freeze, range(eof, eof) ] ]
295
- end
296
- end
297
-
298
- protected
299
-
300
- def eof_codepoint?(point)
301
- [0x04, 0x1a, 0x00].include? point
302
- end
303
-
304
- def version?(*versions)
305
- versions.include?(@version)
306
- end
307
-
308
- def stack_pop
309
- @top -= 1
310
- @stack[@top]
311
- end
312
-
313
- def encode_escape(ord)
314
- ord.chr.force_encoding(@source_buffer.source.encoding)
315
- end
316
-
317
- def tok(s = @ts, e = @te)
318
- @source_buffer.slice(s...e)
319
- end
320
-
321
- def range(s = @ts, e = @te)
322
- Parser::Source::Range.new(@source_buffer, s, e)
323
- end
324
-
325
- def emit(type, value = tok, s = @ts, e = @te)
326
- token = [ type, [ value, range(s, e) ] ]
327
-
328
- @token_queue.push(token)
329
-
330
- @tokens.push(token) if @tokens
331
-
332
- token
333
- end
334
-
335
- def emit_table(table, s = @ts, e = @te)
336
- value = tok(s, e)
337
-
338
- emit(table[value], value, s, e)
339
- end
340
-
341
- def emit_do(do_block=false)
342
- if @cond.active?
343
- emit(:kDO_COND, 'do'.freeze)
344
- elsif @cmdarg.active? || do_block
345
- emit(:kDO_BLOCK, 'do'.freeze)
346
- else
347
- emit(:kDO, 'do'.freeze)
348
- end
349
- end
350
-
351
- def arg_or_cmdarg(cmd_state)
352
- if cmd_state
353
- self.class.lex_en_expr_cmdarg
354
- else
355
- self.class.lex_en_expr_arg
356
- end
357
- end
358
-
359
- def emit_comment(s = @ts, e = @te)
360
- if @comments
361
- @comments.push(Parser::Source::Comment.new(range(s, e)))
362
- end
363
-
364
- if @tokens
365
- @tokens.push([ :tCOMMENT, [ tok(s, e), range(s, e) ] ])
366
- end
367
-
368
- nil
369
- end
370
-
371
- def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
372
- @diagnostics.process(
373
- Parser::Diagnostic.new(type, reason, arguments, location, highlights))
374
- end
375
-
376
- #
377
- # === LITERAL STACK ===
378
- #
379
-
380
- def push_literal(*args)
381
- new_literal = Literal.new(self, *args)
382
- @literal_stack.push(new_literal)
383
- next_state_for_literal(new_literal)
384
- end
385
-
386
- def next_state_for_literal(literal)
387
- if literal.words? && literal.backslash_delimited?
388
- if literal.interpolate?
389
- self.class.lex_en_interp_backslash_delimited_words
390
- else
391
- self.class.lex_en_plain_backslash_delimited_words
392
- end
393
- elsif literal.words? && !literal.backslash_delimited?
394
- if literal.interpolate?
395
- self.class.lex_en_interp_words
396
- else
397
- self.class.lex_en_plain_words
398
- end
399
- elsif !literal.words? && literal.backslash_delimited?
400
- if literal.interpolate?
401
- self.class.lex_en_interp_backslash_delimited
402
- else
403
- self.class.lex_en_plain_backslash_delimited
404
- end
405
- else
406
- if literal.interpolate?
407
- self.class.lex_en_interp_string
408
- else
409
- self.class.lex_en_plain_string
410
- end
411
- end
412
- end
413
-
414
- def literal
415
- @literal_stack.last
416
- end
417
-
418
- def pop_literal
419
- old_literal = @literal_stack.pop
420
-
421
- @dedent_level = old_literal.dedent_level
422
-
423
- if old_literal.type == :tREGEXP_BEG
424
- # Fetch modifiers.
425
- self.class.lex_en_regexp_modifiers
426
- else
427
- self.class.lex_en_expr_end
428
- end
429
- end
430
-
431
- # Mapping of strings to parser tokens.
432
-
433
- PUNCTUATION = {
434
- '=' => :tEQL, '&' => :tAMPER2, '|' => :tPIPE,
435
- '!' => :tBANG, '^' => :tCARET, '+' => :tPLUS,
436
- '-' => :tMINUS, '*' => :tSTAR2, '/' => :tDIVIDE,
437
- '%' => :tPERCENT, '~' => :tTILDE, ',' => :tCOMMA,
438
- ';' => :tSEMI, '.' => :tDOT, '..' => :tDOT2,
439
- '...' => :tDOT3, '[' => :tLBRACK2, ']' => :tRBRACK,
440
- '(' => :tLPAREN2, ')' => :tRPAREN, '?' => :tEH,
441
- ':' => :tCOLON, '&&' => :tANDOP, '||' => :tOROP,
442
- '-@' => :tUMINUS, '+@' => :tUPLUS, '~@' => :tTILDE,
443
- '**' => :tPOW, '->' => :tLAMBDA, '=~' => :tMATCH,
444
- '!~' => :tNMATCH, '==' => :tEQ, '!=' => :tNEQ,
445
- '>' => :tGT, '>>' => :tRSHFT, '>=' => :tGEQ,
446
- '<' => :tLT, '<<' => :tLSHFT, '<=' => :tLEQ,
447
- '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
448
- '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
449
- '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
450
- '!@' => :tBANG, '&.' => :tANDDOT,
451
- }
452
-
453
- PUNCTUATION_BEGIN = {
454
- '&' => :tAMPER, '*' => :tSTAR, '**' => :tDSTAR,
455
- '+' => :tUPLUS, '-' => :tUMINUS, '::' => :tCOLON3,
456
- '(' => :tLPAREN, '{' => :tLBRACE, '[' => :tLBRACK,
457
- }
458
-
459
- KEYWORDS = {
460
- 'if' => :kIF_MOD, 'unless' => :kUNLESS_MOD,
461
- 'while' => :kWHILE_MOD, 'until' => :kUNTIL_MOD,
462
- 'rescue' => :kRESCUE_MOD, 'defined?' => :kDEFINED,
463
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
464
- }
465
-
466
- KEYWORDS_BEGIN = {
467
- 'if' => :kIF, 'unless' => :kUNLESS,
468
- 'while' => :kWHILE, 'until' => :kUNTIL,
469
- 'rescue' => :kRESCUE, 'defined?' => :kDEFINED,
470
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
471
- }
472
-
473
- %w(class module def undef begin end then elsif else ensure case when
474
- for break next redo retry in do return yield super self nil true
475
- false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
476
- KEYWORDS_BEGIN[keyword] = KEYWORDS[keyword] = :"k#{keyword.upcase}"
477
- end
478
-
479
- %%{
480
- # %
481
-
482
- access @;
483
- getkey (@source_pts[p] || 0);
484
-
485
- # === CHARACTER CLASSES ===
486
- #
487
- # Pay close attention to the differences between c_any and any.
488
- # c_any does not include EOF and so will cause incorrect behavior
489
- # for machine subtraction (any-except rules) and default transitions
490
- # for scanners.
491
-
492
- action do_nl {
493
- # Record position of a newline for precise location reporting on tNL
494
- # tokens.
495
- #
496
- # This action is embedded directly into c_nl, as it is idempotent and
497
- # there are no cases when we need to skip it.
498
- @newline_s = p
499
- }
500
-
501
- c_nl = '\n' $ do_nl;
502
- c_space = [ \t\r\f\v];
503
- c_space_nl = c_space | c_nl;
504
-
505
- c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
506
- c_eol = c_nl | c_eof;
507
- c_any = any - c_eof;
508
-
509
- c_nl_zlen = c_nl | zlen;
510
- c_line = any - c_nl_zlen;
511
-
512
- c_unicode = c_any - 0x00..0x7f;
513
- c_upper = [A-Z];
514
- c_lower = [a-z_] | c_unicode;
515
- c_alpha = c_lower | c_upper;
516
- c_alnum = c_alpha | [0-9];
517
-
518
- action do_eof {
519
- # Sit at EOF indefinitely. #advance would return $eof each time.
520
- # This allows to feed the lexer more data if needed; this is only used
521
- # in tests.
522
- #
523
- # Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
524
- # below. This is due to the fact that scanner state at EOF is observed
525
- # by tests, and encapsulating it in a rule would break the introspection.
526
- fhold; fbreak;
527
- }
528
-
529
- #
530
- # === TOKEN DEFINITIONS ===
531
- #
532
-
533
- # All operators are punctuation. There is more to punctuation
534
- # than just operators. Operators can be overridden by user;
535
- # punctuation can not.
536
-
537
- # A list of operators which are valid in the function name context, but
538
- # have different semantics in others.
539
- operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
540
-
541
- # A list of operators which can occur within an assignment shortcut (+ → +=).
542
- operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
543
- '*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
544
-
545
- # A list of all user-definable operators not covered by groups above.
546
- operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
547
- '<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
548
-
549
- # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
550
- # as they are ambiguous with interpolation `#{}` and should be counted.
551
- # These braces are not present in punctuation lists.
552
-
553
- # A list of punctuation which has different meaning when used at the
554
- # beginning of expression.
555
- punctuation_begin = '-' | '+' | '::' | '(' | '[' |
556
- '*' | '**' | '&' ;
557
-
558
- # A list of all punctuation except punctuation_begin.
559
- punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
560
- '::' | '?' | ':' | '.' | '..' | '...' ;
561
-
562
- # A list of keywords which have different meaning at the beginning of expression.
563
- keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
564
-
565
- # A list of keywords which accept an argument-like expression, i.e. have the
566
- # same post-processing as method calls or commands. Example: `yield 1`,
567
- # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
568
- keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
569
-
570
- # A list of keywords which accept a literal function name as an argument.
571
- keyword_with_fname = 'def' | 'undef' | 'alias' ;
572
-
573
- # A list of keywords which accept an expression after them.
574
- keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
575
- 'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
576
- 'and' | 'or' ;
577
-
578
- # A list of keywords which accept a value, and treat the keywords from
579
- # `keyword_modifier` list as modifiers.
580
- keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
581
-
582
- # A list of keywords which do not accept an expression after them.
583
- keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
584
- 'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
585
- '__LINE__' | '__ENCODING__';
586
-
587
- # All keywords.
588
- keyword = keyword_with_value | keyword_with_mid |
589
- keyword_with_end | keyword_with_arg |
590
- keyword_with_fname | keyword_modifier ;
591
-
592
- constant = c_upper c_alnum*;
593
- bareword = c_alpha c_alnum*;
594
-
595
- call_or_var = c_lower c_alnum*;
596
- class_var = '@@' bareword;
597
- instance_var = '@' bareword;
598
- global_var = '$'
599
- ( bareword | digit+
600
- | [`'+~*$&?!@/\\;,.=:<>"] # `
601
- | '-' c_alnum
602
- )
603
- ;
604
-
605
- # Ruby accepts (and fails on) variables with leading digit
606
- # in literal context, but not in unquoted symbol body.
607
- class_var_v = '@@' c_alnum+;
608
- instance_var_v = '@' c_alnum+;
609
-
610
- label = bareword [?!]? ':';
611
-
612
- #
613
- # === NUMERIC PARSING ===
614
- #
615
-
616
- int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
617
- int_dec = ( digit+ '_' )* digit* '_'? ;
618
- int_bin = ( [01]+ '_' )* [01]* '_'? ;
619
-
620
- flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
621
- flo_frac = '.' ( digit+ '_' )* digit+;
622
- flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
623
-
624
- int_suffix =
625
- '' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars) } }
626
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
627
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, chars)) } }
628
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
629
- | 're' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
630
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
631
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 6); p -= 6 } };
632
-
633
- flo_pow_suffix =
634
- '' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars)) } }
635
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Float(chars))) } }
636
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 2); p -= 2 } };
637
-
638
- flo_suffix =
639
- flo_pow_suffix
640
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
641
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
642
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 6); p -= 6 } };
643
-
644
- #
645
- # === ESCAPE SEQUENCE PARSING ===
646
- #
647
-
648
- # Escape parsing code is a Ragel pattern, not a scanner, and therefore
649
- # it shouldn't directly raise errors or perform other actions with side effects.
650
- # In reality this would probably just mess up error reporting in pathological
651
- # cases, through.
652
-
653
- # The amount of code required to parse \M\C stuff correctly is ridiculous.
654
-
655
- escaped_nl = "\\" c_nl;
656
-
657
- action unicode_points {
658
- @escape = ""
659
-
660
- codepoints = tok(@escape_s + 2, p - 1)
661
- codepoint_s = @escape_s + 2
662
-
663
- if @version < 24
664
- if codepoints.start_with?(" ") || codepoints.start_with?("\t")
665
- diagnostic :fatal, :invalid_unicode_escape, nil,
666
- range(@escape_s + 2, @escape_s + 3)
667
- end
668
-
669
- if spaces_p = codepoints.index(/[ \t]{2}/)
670
- diagnostic :fatal, :invalid_unicode_escape, nil,
671
- range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
672
- end
673
-
674
- if codepoints.end_with?(" ") || codepoints.end_with?("\t")
675
- diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
676
- end
677
- end
678
-
679
- codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
680
- if spaces
681
- codepoint_s += spaces.length
682
- else
683
- codepoint = codepoint_str.to_i(16)
684
-
685
- if codepoint >= 0x110000
686
- diagnostic :error, :unicode_point_too_large, nil,
687
- range(codepoint_s, codepoint_s + codepoint_str.length)
688
- break
689
- end
690
-
691
- @escape += codepoint.chr(Encoding::UTF_8)
692
- codepoint_s += codepoint_str.length
693
- end
694
- end
695
- }
696
-
697
- action unescape_char {
698
- codepoint = @source_pts[p - 1]
699
- if (@escape = ESCAPES[codepoint]).nil?
700
- @escape = encode_escape(@source_buffer.slice(p - 1))
701
- end
702
- }
703
-
704
- action invalid_complex_escape {
705
- diagnostic :fatal, :invalid_escape
706
- }
707
-
708
- action read_post_meta_or_ctrl_char {
709
- @escape = @source_buffer.slice(p - 1).chr
710
-
711
- if @version >= 27 && ((0..8).include?(@escape.ord) || (14..31).include?(@escape.ord))
712
- diagnostic :fatal, :invalid_escape
713
- end
714
- }
715
-
716
- action slash_c_char {
717
- @escape = encode_escape(@escape[0].ord & 0x9f)
718
- }
719
-
720
- action slash_m_char {
721
- @escape = encode_escape(@escape[0].ord | 0x80)
722
- }
723
-
724
- maybe_escaped_char = (
725
- '\\' c_any %unescape_char
726
- | ( c_any - [\\] ) %read_post_meta_or_ctrl_char
727
- );
728
-
729
- maybe_escaped_ctrl_char = ( # why?!
730
- '\\' c_any %unescape_char %slash_c_char
731
- | '?' % { @escape = "\x7f" }
732
- | ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
733
- );
734
-
735
- escape = (
736
- # \377
737
- [0-7]{1,3}
738
- % { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
739
-
740
- # \xff
741
- | 'x' xdigit{1,2}
742
- % { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
743
-
744
- # %q[\x]
745
- | 'x' ( c_any - xdigit )
746
- % {
747
- diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
748
- }
749
-
750
- # \u263a
751
- | 'u' xdigit{4}
752
- % { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
753
-
754
- # \u123
755
- | 'u' xdigit{0,3}
756
- % {
757
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
758
- }
759
-
760
- # u{not hex} or u{}
761
- | 'u{' ( c_any - xdigit - [ \t}] )* '}'
762
- % {
763
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
764
- }
765
-
766
- # \u{ \t 123 \t 456 \t\t }
767
- | 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
768
- (
769
- ( xdigit{1,6} [ \t]* '}'
770
- %unicode_points
771
- )
772
- |
773
- ( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
774
- | ( c_any - [ \t}] )* c_eof
775
- | xdigit{7,}
776
- ) % {
777
- diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
778
- }
779
- )
780
-
781
- # \C-\a \cx
782
- | ( 'C-' | 'c' ) escaped_nl?
783
- maybe_escaped_ctrl_char
784
-
785
- # \M-a
786
- | 'M-' escaped_nl?
787
- maybe_escaped_char
788
- %slash_m_char
789
-
790
- # \C-\M-f \M-\cf \c\M-f
791
- | ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
792
- | 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
793
- maybe_escaped_ctrl_char
794
- %slash_m_char
795
-
796
- | 'C' c_any %invalid_complex_escape
797
- | 'M' c_any %invalid_complex_escape
798
- | ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
799
-
800
- | ( c_any - [0-7xuCMc] ) %unescape_char
801
-
802
- | c_eof % {
803
- diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
804
- }
805
- );
806
-
807
- # Use rules in form of `e_bs escape' when you need to parse a sequence.
808
- e_bs = '\\' % {
809
- @escape_s = p
810
- @escape = nil
811
- };
812
-
813
- #
814
- # === STRING AND HEREDOC PARSING ===
815
- #
816
-
817
- # Heredoc parsing is quite a complex topic. First, consider that heredocs
818
- # can be arbitrarily nested. For example:
819
- #
820
- # puts <<CODE
821
- # the result is: #{<<RESULT.inspect
822
- # i am a heredoc
823
- # RESULT
824
- # }
825
- # CODE
826
- #
827
- # which, incidentally, evaluates to:
828
- #
829
- # the result is: " i am a heredoc\n"
830
- #
831
- # To parse them, lexer refers to two kinds (remember, nested heredocs)
832
- # of positions in the input stream, namely heredoc_e
833
- # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
834
- #
835
- # heredoc_e is simply contained inside the corresponding Literal, and
836
- # when the heredoc is closed, the lexing is restarted from that position.
837
- #
838
- # @herebody_s is quite more complex. First, @herebody_s changes after each
839
- # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
840
- # contains the current line, and also when a heredoc is started, @herebody_s
841
- # contains the position from which the heredoc will be lexed.
842
- #
843
- # Second, as (insanity) there are nested heredocs, we need to maintain a
844
- # stack of these positions. Each time #push_literal is called, it saves current
845
- # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
846
- # containing another heredocs) is closed, the previous value is restored.
847
-
848
- e_heredoc_nl = c_nl % {
849
- # After every heredoc was parsed, @herebody_s contains the
850
- # position of next token after all heredocs.
851
- if @herebody_s
852
- p = @herebody_s
853
- @herebody_s = nil
854
- end
855
- };
856
-
857
- action extend_string {
858
- string = tok
859
-
860
- # tLABEL_END is only possible in non-cond context on >= 2.2
861
- if @version >= 22 && !@cond.active?
862
- lookahead = @source_buffer.slice(@te...@te+2)
863
- end
864
-
865
- current_literal = literal
866
- if !current_literal.heredoc? &&
867
- (token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
868
- if token[0] == :tLABEL_END
869
- p += 1
870
- pop_literal
871
- fnext expr_labelarg;
872
- else
873
- fnext *pop_literal;
874
- end
875
- fbreak;
876
- else
877
- current_literal.extend_string(string, @ts, @te)
878
- end
879
- }
880
-
881
- action extend_string_escaped {
882
- current_literal = literal
883
- # Get the first character after the backslash.
884
- escaped_char = @source_buffer.slice(@escape_s).chr
885
-
886
- if current_literal.munge_escape? escaped_char
887
- # If this particular literal uses this character as an opening
888
- # or closing delimiter, it is an escape sequence for that
889
- # particular character. Write it without the backslash.
890
-
891
- if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
892
- # Regular expressions should include escaped delimiters in their
893
- # escaped form, except when the escaped character is
894
- # a closing delimiter but not a regexp metacharacter.
895
- #
896
- # The backslash itself cannot be used as a closing delimiter
897
- # at the same time as an escape symbol, but it is always munged,
898
- # so this branch also executes for the non-closing-delimiter case
899
- # for the backslash.
900
- current_literal.extend_string(tok, @ts, @te)
901
- else
902
- current_literal.extend_string(escaped_char, @ts, @te)
903
- end
904
- else
905
- # It does not. So this is an actual escape sequence, yay!
906
- if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
907
- # Squiggly heredocs like
908
- # <<~-HERE
909
- # 1\
910
- # 2
911
- # HERE
912
- # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
913
- # This information is emitted as is, without escaping,
914
- # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
915
- current_literal.extend_string(tok, @ts, @te)
916
- elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
917
- # Heredocs, regexp and a few other types of literals support line
918
- # continuation via \\\n sequence. The code like
919
- # "a\
920
- # b"
921
- # must be parsed as "ab"
922
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
923
- elsif current_literal.regexp?
924
- # Regular expressions should include escape sequences in their
925
- # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
926
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
927
- else
928
- current_literal.extend_string(@escape || tok, @ts, @te)
929
- end
930
- end
931
- }
932
-
933
- # Extend a string with a newline or a EOF character.
934
- # As heredoc closing line can immediately precede EOF, this action
935
- # has to handle such case specially.
936
- action extend_string_eol {
937
- current_literal = literal
938
- if @te == pe
939
- diagnostic :fatal, :string_eof, nil,
940
- range(current_literal.str_s, current_literal.str_s + 1)
941
- end
942
-
943
- if current_literal.heredoc?
944
- line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
945
-
946
- if version?(18, 19, 20)
947
- # See ruby:c48b4209c
948
- line = line.gsub(/\r.*$/, ''.freeze)
949
- end
950
-
951
- # Try ending the heredoc with the complete most recently
952
- # scanned line. @herebody_s always refers to the start of such line.
953
- if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
954
- # Adjust @herebody_s to point to the next line.
955
- @herebody_s = @te
956
-
957
- # Continue regular lexing after the heredoc reference (<<END).
958
- p = current_literal.heredoc_e - 1
959
- fnext *pop_literal; fbreak;
960
- else
961
- # Calculate indentation level for <<~HEREDOCs.
962
- current_literal.infer_indent_level(line)
963
-
964
- # Ditto.
965
- @herebody_s = @te
966
- end
967
- else
968
- # Try ending the literal with a newline.
969
- if current_literal.nest_and_try_closing(tok, @ts, @te)
970
- fnext *pop_literal; fbreak;
971
- end
972
-
973
- if @herebody_s
974
- # This is a regular literal intertwined with a heredoc. Like:
975
- #
976
- # p <<-foo+"1
977
- # bar
978
- # foo
979
- # 2"
980
- #
981
- # which, incidentally, evaluates to "bar\n1\n2".
982
- p = @herebody_s - 1
983
- @herebody_s = nil
984
- end
985
- end
986
-
987
- if current_literal.words? && !eof_codepoint?(@source_pts[p])
988
- current_literal.extend_space @ts, @te
989
- else
990
- # A literal newline is appended if the heredoc was _not_ closed
991
- # this time (see fbreak above). See also Literal#nest_and_try_closing
992
- # for rationale of calling #flush_string here.
993
- current_literal.extend_string tok, @ts, @te
994
- current_literal.flush_string
995
- end
996
- }
997
-
998
- action extend_string_space {
999
- literal.extend_space @ts, @te
1000
- }
1001
-
1002
- #
1003
- # === INTERPOLATION PARSING ===
1004
- #
1005
-
1006
- # Interpolations with immediate variable names simply call into
1007
- # the corresponding machine.
1008
-
1009
- interp_var = '#' ( global_var | class_var_v | instance_var_v );
1010
-
1011
- action extend_interp_var {
1012
- current_literal = literal
1013
- current_literal.flush_string
1014
- current_literal.extend_content
1015
-
1016
- emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
1017
-
1018
- p = @ts
1019
- fcall expr_variable;
1020
- }
1021
-
1022
- # Special case for Ruby > 2.7
1023
- # If interpolated instance/class variable starts with a digit we parse it as a plain substring
1024
- # However, "#$1" is still a regular interpolation
1025
- interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
1026
-
1027
- action extend_interp_digit_var {
1028
- if @version >= 27
1029
- literal.extend_string(tok, @ts, @te)
1030
- else
1031
- message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
1032
- diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
1033
- end
1034
- }
1035
-
1036
- # Interpolations with code blocks must match nested curly braces, as
1037
- # interpolation ending is ambiguous with a block ending. So, every
1038
- # opening and closing brace should be matched with e_[lr]brace rules,
1039
- # which automatically perform the counting.
1040
- #
1041
- # Note that interpolations can themselves be nested, so brace balance
1042
- # is tied to the innermost literal.
1043
- #
1044
- # Also note that literals themselves should not use e_[lr]brace rules
1045
- # when matching their opening and closing delimiters, as the amount of
1046
- # braces inside the characters of a string literal is independent.
1047
-
1048
- interp_code = '#{';
1049
-
1050
- e_lbrace = '{' % {
1051
- @cond.push(false); @cmdarg.push(false)
1052
-
1053
- current_literal = literal
1054
- if current_literal
1055
- current_literal.start_interp_brace
1056
- end
1057
- };
1058
-
1059
- e_rbrace = '}' % {
1060
- current_literal = literal
1061
- if current_literal
1062
- if current_literal.end_interp_brace_and_try_closing
1063
- if version?(18, 19)
1064
- emit(:tRCURLY, '}'.freeze, p - 1, p)
1065
- @cond.lexpop
1066
- @cmdarg.lexpop
1067
- else
1068
- emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
1069
- end
1070
-
1071
- if current_literal.saved_herebody_s
1072
- @herebody_s = current_literal.saved_herebody_s
1073
- end
1074
-
1075
-
1076
- fhold;
1077
- fnext *next_state_for_literal(current_literal);
1078
- fbreak;
1079
- end
1080
- end
1081
-
1082
- @paren_nest -= 1
1083
- };
1084
-
1085
- action extend_interp_code {
1086
- current_literal = literal
1087
- current_literal.flush_string
1088
- current_literal.extend_content
1089
-
1090
- emit(:tSTRING_DBEG, '#{'.freeze)
1091
-
1092
- if current_literal.heredoc?
1093
- current_literal.saved_herebody_s = @herebody_s
1094
- @herebody_s = nil
1095
- end
1096
-
1097
- current_literal.start_interp_brace
1098
- @command_start = true
1099
- fnext expr_value;
1100
- fbreak;
1101
- }
1102
-
1103
- # Actual string parsers are simply combined from the primitives defined
1104
- # above.
1105
-
1106
- interp_words := |*
1107
- interp_code => extend_interp_code;
1108
- interp_digit_var => extend_interp_digit_var;
1109
- interp_var => extend_interp_var;
1110
- e_bs escape => extend_string_escaped;
1111
- c_space+ => extend_string_space;
1112
- c_eol => extend_string_eol;
1113
- c_any => extend_string;
1114
- *|;
1115
-
1116
- interp_string := |*
1117
- interp_code => extend_interp_code;
1118
- interp_digit_var => extend_interp_digit_var;
1119
- interp_var => extend_interp_var;
1120
- e_bs escape => extend_string_escaped;
1121
- c_eol => extend_string_eol;
1122
- c_any => extend_string;
1123
- *|;
1124
-
1125
- plain_words := |*
1126
- e_bs c_any => extend_string_escaped;
1127
- c_space+ => extend_string_space;
1128
- c_eol => extend_string_eol;
1129
- c_any => extend_string;
1130
- *|;
1131
-
1132
- plain_string := |*
1133
- '\\' c_nl => extend_string_eol;
1134
- e_bs c_any => extend_string_escaped;
1135
- c_eol => extend_string_eol;
1136
- c_any => extend_string;
1137
- *|;
1138
-
1139
- interp_backslash_delimited := |*
1140
- interp_code => extend_interp_code;
1141
- interp_digit_var => extend_interp_digit_var;
1142
- interp_var => extend_interp_var;
1143
- c_eol => extend_string_eol;
1144
- c_any => extend_string;
1145
- *|;
1146
-
1147
- plain_backslash_delimited := |*
1148
- c_eol => extend_string_eol;
1149
- c_any => extend_string;
1150
- *|;
1151
-
1152
- interp_backslash_delimited_words := |*
1153
- interp_code => extend_interp_code;
1154
- interp_digit_var => extend_interp_digit_var;
1155
- interp_var => extend_interp_var;
1156
- c_space+ => extend_string_space;
1157
- c_eol => extend_string_eol;
1158
- c_any => extend_string;
1159
- *|;
1160
-
1161
- plain_backslash_delimited_words := |*
1162
- c_space+ => extend_string_space;
1163
- c_eol => extend_string_eol;
1164
- c_any => extend_string;
1165
- *|;
1166
-
1167
- regexp_modifiers := |*
1168
- [A-Za-z]+
1169
- => {
1170
- unknown_options = tok.scan(/[^imxouesn]/)
1171
- if unknown_options.any?
1172
- diagnostic :error, :regexp_options,
1173
- { :options => unknown_options.join }
1174
- end
1175
-
1176
- emit(:tREGEXP_OPT)
1177
- fnext expr_end;
1178
- fbreak;
1179
- };
1180
-
1181
- any
1182
- => {
1183
- emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
1184
- fhold;
1185
- fgoto expr_end;
1186
- };
1187
- *|;
1188
-
1189
- #
1190
- # === WHITESPACE HANDLING ===
1191
- #
1192
-
1193
- # Various contexts in Ruby allow various kinds of whitespace
1194
- # to be used. They are grouped to clarify the lexing machines
1195
- # and ease collection of comments.
1196
-
1197
- # A line of code with inline #comment at end is always equivalent
1198
- # to a line of code ending with just a newline, so an inline
1199
- # comment is deemed equivalent to non-newline whitespace
1200
- # (c_space character class).
1201
-
1202
- w_space =
1203
- c_space+
1204
- | '\\' e_heredoc_nl
1205
- ;
1206
-
1207
- w_comment =
1208
- '#' %{ @sharp_s = p - 1 }
1209
- # The (p == pe) condition compensates for added "\0" and
1210
- # the way Ragel handles EOF.
1211
- c_line* %{ emit_comment(@sharp_s, p == pe ? p - 2 : p) }
1212
- ;
1213
-
1214
- w_space_comment =
1215
- w_space
1216
- | w_comment
1217
- ;
1218
-
1219
- # A newline in non-literal context always interoperates with
1220
- # here document logic and can always be escaped by a backslash,
1221
- # still interoperating with here document logic in the same way,
1222
- # yet being invisible to anything else.
1223
- #
1224
- # To demonstrate:
1225
- #
1226
- # foo = <<FOO \
1227
- # bar
1228
- # FOO
1229
- # + 2
1230
- #
1231
- # is equivalent to `foo = "bar\n" + 2`.
1232
-
1233
- w_newline =
1234
- e_heredoc_nl;
1235
-
1236
- w_any =
1237
- w_space
1238
- | w_comment
1239
- | w_newline
1240
- ;
1241
-
1242
-
1243
- #
1244
- # === EXPRESSION PARSING ===
1245
- #
1246
-
1247
- # These rules implement a form of manually defined lookahead.
1248
- # The default longest-match scanning does not work here due
1249
- # to sheer ambiguity.
1250
-
1251
- ambiguous_fid_suffix = # actual parsed
1252
- [?!] %{ tm = p } | # a? a?
1253
- [?!]'=' %{ tm = p - 2 } # a!=b a != b
1254
- ;
1255
-
1256
- ambiguous_ident_suffix = # actual parsed
1257
- ambiguous_fid_suffix |
1258
- '=' %{ tm = p } | # a= a=
1259
- '==' %{ tm = p - 2 } | # a==b a == b
1260
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
1261
- '=>' %{ tm = p - 2 } | # a=>b a => b
1262
- '===' %{ tm = p - 3 } # a===b a === b
1263
- ;
1264
-
1265
- ambiguous_symbol_suffix = # actual parsed
1266
- ambiguous_ident_suffix |
1267
- '==>' %{ tm = p - 2 } # :a==>b :a= => b
1268
- ;
1269
-
1270
- # Ambiguous with 1.9 hash labels.
1271
- ambiguous_const_suffix = # actual parsed
1272
- '::' %{ tm = p - 2 } # A::B A :: B
1273
- ;
1274
-
1275
- # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
1276
- # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
1277
-
1278
- e_lbrack = '[' % {
1279
- @cond.push(false); @cmdarg.push(false)
1280
-
1281
- @paren_nest += 1
1282
- };
1283
-
1284
- e_rbrack = ']' % {
1285
- @paren_nest -= 1
1286
- };
1287
-
1288
- # Ruby 1.9 lambdas require parentheses counting in order to
1289
- # emit correct opening kDO/tLBRACE.
1290
-
1291
- e_lparen = '(' % {
1292
- @cond.push(false); @cmdarg.push(false)
1293
-
1294
- @paren_nest += 1
1295
-
1296
- if version?(18)
1297
- @command_start = true
1298
- end
1299
- };
1300
-
1301
- e_rparen = ')' % {
1302
- @paren_nest -= 1
1303
- };
1304
-
1305
- # Ruby is context-sensitive wrt/ local identifiers.
1306
- action local_ident {
1307
- emit(:tIDENTIFIER)
1308
-
1309
- if !@static_env.nil? && @static_env.declared?(tok)
1310
- fnext expr_endfn; fbreak;
1311
- else
1312
- fnext *arg_or_cmdarg(cmd_state); fbreak;
1313
- end
1314
- }
1315
-
1316
- # Variable lexing code is accessed from both expressions and
1317
- # string interpolation related code.
1318
- #
1319
- expr_variable := |*
1320
- global_var
1321
- => {
1322
- if tok =~ /^\$([1-9][0-9]*)$/
1323
- emit(:tNTH_REF, tok(@ts + 1).to_i)
1324
- elsif tok =~ /^\$([&`'+])$/
1325
- emit(:tBACK_REF)
1326
- else
1327
- emit(:tGVAR)
1328
- end
1329
-
1330
- fnext *stack_pop; fbreak;
1331
- };
1332
-
1333
- class_var_v
1334
- => {
1335
- if tok =~ /^@@[0-9]/
1336
- diagnostic :error, :cvar_name, { :name => tok }
1337
- end
1338
-
1339
- emit(:tCVAR)
1340
- fnext *stack_pop; fbreak;
1341
- };
1342
-
1343
- instance_var_v
1344
- => {
1345
- if tok =~ /^@[0-9]/
1346
- diagnostic :error, :ivar_name, { :name => tok }
1347
- end
1348
-
1349
- emit(:tIVAR)
1350
- fnext *stack_pop; fbreak;
1351
- };
1352
- *|;
1353
-
1354
- # Literal function name in definition (e.g. `def class`).
1355
- # Keywords are returned as their respective tokens; this is used
1356
- # to support singleton def `def self.foo`. Global variables are
1357
- # returned as `tGVAR`; this is used in global variable alias
1358
- # statements `alias $a $b`. Symbols are returned verbatim; this
1359
- # is used in `alias :a :"b#{foo}"` and `undef :a`.
1360
- #
1361
- # Transitions to `expr_endfn` afterwards.
1362
- #
1363
- expr_fname := |*
1364
- keyword
1365
- => { emit_table(KEYWORDS_BEGIN);
1366
- fnext expr_endfn; fbreak; };
1367
-
1368
- constant
1369
- => { emit(:tCONSTANT)
1370
- fnext expr_endfn; fbreak; };
1371
-
1372
- bareword [?=!]?
1373
- => { emit(:tIDENTIFIER)
1374
- fnext expr_endfn; fbreak; };
1375
-
1376
- global_var
1377
- => { p = @ts - 1
1378
- fnext expr_end; fcall expr_variable; };
1379
-
1380
- # If the handling was to be delegated to expr_end,
1381
- # these cases would transition to something else than
1382
- # expr_endfn, which is incorrect.
1383
- operator_fname |
1384
- operator_arithmetic |
1385
- operator_rest
1386
- => { emit_table(PUNCTUATION)
1387
- fnext expr_endfn; fbreak; };
1388
-
1389
- '::'
1390
- => { fhold; fhold; fgoto expr_end; };
1391
-
1392
- ':'
1393
- => { fhold; fgoto expr_beg; };
1394
-
1395
- '%s' c_any
1396
- => {
1397
- if version?(23)
1398
- type, delimiter = tok[0..-2], tok[-1].chr
1399
- fgoto *push_literal(type, delimiter, @ts);
1400
- else
1401
- p = @ts - 1
1402
- fgoto expr_end;
1403
- end
1404
- };
1405
-
1406
- w_any;
1407
-
1408
- c_any
1409
- => { fhold; fgoto expr_end; };
1410
-
1411
- c_eof => do_eof;
1412
- *|;
1413
-
1414
- # After literal function name in definition. Behaves like `expr_end`,
1415
- # but allows a tLABEL.
1416
- #
1417
- # Transitions to `expr_end` afterwards.
1418
- #
1419
- expr_endfn := |*
1420
- label ( any - ':' )
1421
- => { emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1422
- fhold; fnext expr_labelarg; fbreak; };
1423
-
1424
- w_space_comment;
1425
-
1426
- c_any
1427
- => { fhold; fgoto expr_end; };
1428
-
1429
- c_eof => do_eof;
1430
- *|;
1431
-
1432
- # Literal function name in method call (e.g. `a.class`).
1433
- #
1434
- # Transitions to `expr_arg` afterwards.
1435
- #
1436
- expr_dot := |*
1437
- constant
1438
- => { emit(:tCONSTANT)
1439
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1440
-
1441
- call_or_var
1442
- => { emit(:tIDENTIFIER)
1443
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1444
-
1445
- bareword ambiguous_fid_suffix
1446
- => { emit(:tFID, tok(@ts, tm), @ts, tm)
1447
- fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
1448
-
1449
- # See the comment in `expr_fname`.
1450
- operator_fname |
1451
- operator_arithmetic |
1452
- operator_rest
1453
- => { emit_table(PUNCTUATION)
1454
- fnext expr_arg; fbreak; };
1455
-
1456
- w_any;
1457
-
1458
- c_any
1459
- => { fhold; fgoto expr_end; };
1460
-
1461
- c_eof => do_eof;
1462
- *|;
1463
-
1464
- # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
1465
- # is consumed; the current expression is a command or method call.
1466
- #
1467
- expr_arg := |*
1468
- #
1469
- # COMMAND MODE SPECIFIC TOKENS
1470
- #
1471
-
1472
- # cmd (1 + 2)
1473
- # See below the rationale about expr_endarg.
1474
- w_space+ e_lparen
1475
- => {
1476
- if version?(18)
1477
- emit(:tLPAREN2, '('.freeze, @te - 1, @te)
1478
- fnext expr_value; fbreak;
1479
- else
1480
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1481
- fnext expr_beg; fbreak;
1482
- end
1483
- };
1484
-
1485
- # meth(1 + 2)
1486
- # Regular method call.
1487
- e_lparen
1488
- => { emit(:tLPAREN2, '('.freeze)
1489
- fnext expr_beg; fbreak; };
1490
-
1491
- # meth [...]
1492
- # Array argument. Compare with indexing `meth[...]`.
1493
- w_space+ e_lbrack
1494
- => { emit(:tLBRACK, '['.freeze, @te - 1, @te)
1495
- fnext expr_beg; fbreak; };
1496
-
1497
- # cmd {}
1498
- # Command: method call without parentheses.
1499
- w_space* e_lbrace
1500
- => {
1501
- if @lambda_stack.last == @paren_nest
1502
- @lambda_stack.pop
1503
- emit(:tLAMBEG, '{'.freeze, @te - 1, @te)
1504
- else
1505
- emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1506
- end
1507
- @command_start = true
1508
- @paren_nest += 1
1509
- fnext expr_value; fbreak;
1510
- };
1511
-
1512
- #
1513
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1514
- #
1515
-
1516
- # a??
1517
- # Ternary operator
1518
- '?' c_space_nl
1519
- => {
1520
- # Unlike expr_beg as invoked in the next rule, do not warn
1521
- p = @ts - 1
1522
- fgoto expr_end;
1523
- };
1524
-
1525
- # a ?b, a? ?
1526
- # Character literal or ternary operator
1527
- w_space* '?'
1528
- => { fhold; fgoto expr_beg; };
1529
-
1530
- # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1531
- # a /foo/ (but not "a / foo" or "a /=foo")
1532
- # a <<HEREDOC
1533
- w_space+ %{ tm = p }
1534
- ( [%/] ( c_any - c_space_nl - '=' ) # /
1535
- | '<<'
1536
- )
1537
- => {
1538
- if tok(tm, tm + 1) == '/'.freeze
1539
- # Ambiguous regexp literal.
1540
- diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
1541
- end
1542
-
1543
- p = tm - 1
1544
- fgoto expr_beg;
1545
- };
1546
-
1547
- # x *1
1548
- # Ambiguous splat, kwsplat or block-pass.
1549
- w_space+ %{ tm = p } ( '+' | '-' | '*' | '&' | '**' )
1550
- => {
1551
- diagnostic :warning, :ambiguous_prefix, { :prefix => tok(tm, @te) },
1552
- range(tm, @te)
1553
-
1554
- p = tm - 1
1555
- fgoto expr_beg;
1556
- };
1557
-
1558
- # x ::Foo
1559
- # Ambiguous toplevel constant access.
1560
- w_space+ '::'
1561
- => { fhold; fhold; fgoto expr_beg; };
1562
-
1563
- # x:b
1564
- # Symbol.
1565
- w_space* ':'
1566
- => { fhold; fgoto expr_beg; };
1567
-
1568
- w_space+ label
1569
- => { p = @ts - 1; fgoto expr_beg; };
1570
-
1571
- #
1572
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1573
- #
1574
-
1575
- # a ? b
1576
- # Ternary operator.
1577
- w_space+ %{ tm = p } '?' c_space_nl
1578
- => { p = tm - 1; fgoto expr_end; };
1579
-
1580
- # x + 1: Binary operator or operator-assignment.
1581
- w_space* operator_arithmetic
1582
- ( '=' | c_space_nl )? |
1583
- # x rescue y: Modifier keyword.
1584
- w_space* keyword_modifier |
1585
- # a &. b: Safe navigation operator.
1586
- w_space* '&.' |
1587
- # Miscellanea.
1588
- w_space* punctuation_end
1589
- => {
1590
- p = @ts - 1
1591
- fgoto expr_end;
1592
- };
1593
-
1594
- w_space;
1595
-
1596
- w_comment
1597
- => { fgoto expr_end; };
1598
-
1599
- w_newline
1600
- => { fhold; fgoto expr_end; };
1601
-
1602
- c_any
1603
- => { fhold; fgoto expr_beg; };
1604
-
1605
- c_eof => do_eof;
1606
- *|;
1607
-
1608
- # The previous token was an identifier which was seen while in the
1609
- # command mode (that is, the state at the beginning of #advance was
1610
- # expr_value). This state is very similar to expr_arg, but disambiguates
1611
- # two very rare and specific condition:
1612
- # * In 1.8 mode, "foo (lambda do end)".
1613
- # * In 1.9+ mode, "f x: -> do foo do end end".
1614
- expr_cmdarg := |*
1615
- w_space+ e_lparen
1616
- => {
1617
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1618
- if version?(18)
1619
- fnext expr_value; fbreak;
1620
- else
1621
- fnext expr_beg; fbreak;
1622
- end
1623
- };
1624
-
1625
- w_space* 'do'
1626
- => {
1627
- if @cond.active?
1628
- emit(:kDO_COND, 'do'.freeze, @te - 2, @te)
1629
- else
1630
- emit(:kDO, 'do'.freeze, @te - 2, @te)
1631
- end
1632
- fnext expr_value; fbreak;
1633
- };
1634
-
1635
- c_any |
1636
- # Disambiguate with the `do' rule above.
1637
- w_space* bareword |
1638
- w_space* label
1639
- => { p = @ts - 1
1640
- fgoto expr_arg; };
1641
-
1642
- c_eof => do_eof;
1643
- *|;
1644
-
1645
- # The rationale for this state is pretty complex. Normally, if an argument
1646
- # is passed to a command and then there is a block (tLCURLY...tRCURLY),
1647
- # the block is attached to the innermost argument (`f` in `m f {}`), or it
1648
- # is a parse error (`m 1 {}`). But there is a special case for passing a single
1649
- # primary expression grouped with parentheses: if you write `m (1) {}` or
1650
- # (2.0 only) `m () {}`, then the block is attached to `m`.
1651
- #
1652
- # Thus, we recognize the opening `(` of a command (remember, a command is
1653
- # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
1654
- # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
1655
- # lexer's state to `expr_endarg`, which makes it emit the possibly following
1656
- # `{` as `tLBRACE_ARG`.
1657
- #
1658
- # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1659
- # `do` (as `kDO_BLOCK` in `expr_beg`).
1660
- expr_endarg := |*
1661
- e_lbrace
1662
- => {
1663
- if @lambda_stack.last == @paren_nest
1664
- @lambda_stack.pop
1665
- emit(:tLAMBEG, '{'.freeze)
1666
- else
1667
- emit(:tLBRACE_ARG, '{'.freeze)
1668
- end
1669
- @paren_nest += 1
1670
- @command_start = true
1671
- fnext expr_value; fbreak;
1672
- };
1673
-
1674
- 'do'
1675
- => { emit_do(true)
1676
- fnext expr_value; fbreak; };
1677
-
1678
- w_space_comment;
1679
-
1680
- c_any
1681
- => { fhold; fgoto expr_end; };
1682
-
1683
- c_eof => do_eof;
1684
- *|;
1685
-
1686
- # The rationale for this state is that several keywords accept value
1687
- # (i.e. should transition to `expr_beg`), do not accept it like a command
1688
- # (i.e. not an `expr_arg`), and must behave like a statement, that is,
1689
- # accept a modifier if/while/etc.
1690
- #
1691
- expr_mid := |*
1692
- keyword_modifier
1693
- => { emit_table(KEYWORDS)
1694
- fnext expr_beg; fbreak; };
1695
-
1696
- bareword
1697
- => { p = @ts - 1; fgoto expr_beg; };
1698
-
1699
- w_space_comment;
1700
-
1701
- w_newline
1702
- => { fhold; fgoto expr_end; };
1703
-
1704
- c_any
1705
- => { fhold; fgoto expr_beg; };
1706
-
1707
- c_eof => do_eof;
1708
- *|;
1709
-
1710
- # Beginning of an expression.
1711
- #
1712
- # Don't fallthrough to this state from `c_any`; make sure to handle
1713
- # `c_space* c_nl` and let `expr_end` handle the newline.
1714
- # Otherwise code like `f\ndef x` gets glued together and the parser
1715
- # explodes.
1716
- #
1717
- expr_beg := |*
1718
- # +5, -5, - 5
1719
- [+\-] w_any* [0-9]
1720
- => {
1721
- emit(:tUNARY_NUM, tok(@ts, @ts + 1), @ts, @ts + 1)
1722
- fhold; fnext expr_end; fbreak;
1723
- };
1724
-
1725
- # splat *a
1726
- '*'
1727
- => { emit(:tSTAR, '*'.freeze)
1728
- fbreak; };
1729
-
1730
- #
1731
- # STRING AND REGEXP LITERALS
1732
- #
1733
-
1734
- # /regexp/oui
1735
- # /=/ (disambiguation with /=)
1736
- '/' c_any
1737
- => {
1738
- type = delimiter = tok[0].chr
1739
- fhold; fgoto *push_literal(type, delimiter, @ts);
1740
- };
1741
-
1742
- # %<string>
1743
- '%' ( any - [A-Za-z] )
1744
- => {
1745
- type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
1746
- fgoto *push_literal(type, delimiter, @ts);
1747
- };
1748
-
1749
- # %w(we are the people)
1750
- '%' [A-Za-z]+ c_any
1751
- => {
1752
- type, delimiter = tok[0..-2], tok[-1].chr
1753
- fgoto *push_literal(type, delimiter, @ts);
1754
- };
1755
-
1756
- '%' c_eof
1757
- => {
1758
- diagnostic :fatal, :string_eof, nil, range(@ts, @ts + 1)
1759
- };
1760
-
1761
- # Heredoc start.
1762
- # <<END | <<'END' | <<"END" | <<`END` |
1763
- # <<-END | <<-'END' | <<-"END" | <<-`END` |
1764
- # <<~END | <<~'END' | <<~"END" | <<~`END`
1765
- '<<' [~\-]?
1766
- ( '"' ( any - '"' )* '"'
1767
- | "'" ( any - "'" )* "'"
1768
- | "`" ( any - "`" )* "`"
1769
- | bareword ) % { heredoc_e = p }
1770
- c_line* c_nl % { new_herebody_s = p }
1771
- => {
1772
- tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/m
1773
-
1774
- indent = !$1.empty? || !$2.empty?
1775
- dedent_body = !$2.empty?
1776
- type = $3.empty? ? '<<"'.freeze : ('<<'.freeze + $3)
1777
- delimiter = $4
1778
-
1779
- if @version >= 27
1780
- if delimiter.count("\n") > 0 || delimiter.count("\r") > 0
1781
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1782
- end
1783
- elsif @version >= 24
1784
- if delimiter.count("\n") > 0
1785
- if delimiter.end_with?("\n")
1786
- diagnostic :warning, :heredoc_id_ends_with_nl, nil, range(@ts, @ts + 1)
1787
- delimiter = delimiter.rstrip
1788
- else
1789
- diagnostic :fatal, :heredoc_id_has_newline, nil, range(@ts, @ts + 1)
1790
- end
1791
- end
1792
- end
1793
-
1794
- if dedent_body && version?(18, 19, 20, 21, 22)
1795
- emit(:tLSHFT, '<<'.freeze, @ts, @ts + 2)
1796
- p = @ts + 1
1797
- fnext expr_beg; fbreak;
1798
- else
1799
- fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1800
-
1801
- @herebody_s ||= new_herebody_s
1802
- p = @herebody_s - 1
1803
- end
1804
- };
1805
-
1806
- # Escaped unterminated heredoc start
1807
- # <<'END | <<"END | <<`END |
1808
- # <<-'END | <<-"END | <<-`END |
1809
- # <<~'END | <<~"END | <<~`END
1810
- #
1811
- # If the heredoc is terminated the rule above should handle it
1812
- '<<' [~\-]?
1813
- ('"' (any - c_nl - '"')*
1814
- |"'" (any - c_nl - "'")*
1815
- |"`" (any - c_nl - "`")
1816
- )
1817
- => {
1818
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1819
- };
1820
-
1821
- #
1822
- # SYMBOL LITERALS
1823
- #
1824
-
1825
- # :&&, :||
1826
- ':' ('&&' | '||') => {
1827
- fhold; fhold;
1828
- emit(:tSYMBEG, tok(@ts, @ts + 1), @ts, @ts + 1)
1829
- fgoto expr_fname;
1830
- };
1831
-
1832
- # :"bar", :'baz'
1833
- ':' ['"] # '
1834
- => {
1835
- type, delimiter = tok, tok[-1].chr
1836
- fgoto *push_literal(type, delimiter, @ts);
1837
- };
1838
-
1839
- # :!@ is :!
1840
- # :~@ is :~
1841
- ':' [!~] '@'
1842
- => {
1843
- emit(:tSYMBOL, tok(@ts + 1, @ts + 2))
1844
- fnext expr_end; fbreak;
1845
- };
1846
-
1847
- ':' bareword ambiguous_symbol_suffix
1848
- => {
1849
- emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1850
- p = tm - 1
1851
- fnext expr_end; fbreak;
1852
- };
1853
-
1854
- ':' ( bareword | global_var | class_var | instance_var |
1855
- operator_fname | operator_arithmetic | operator_rest )
1856
- => {
1857
- emit(:tSYMBOL, tok(@ts + 1), @ts)
1858
- fnext expr_end; fbreak;
1859
- };
1860
-
1861
- ':' ( '@' %{ tm = p - 1; diag_msg = :ivar_name }
1862
- | '@@' %{ tm = p - 2; diag_msg = :cvar_name }
1863
- ) [0-9]*
1864
- => {
1865
- if @version >= 27
1866
- diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
1867
- else
1868
- emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
1869
- p = @ts
1870
- end
1871
-
1872
- fnext expr_end; fbreak;
1873
- };
1874
-
1875
- #
1876
- # AMBIGUOUS TERNARY OPERATOR
1877
- #
1878
-
1879
- # Character constant, like ?a, ?\n, ?\u1000, and so on
1880
- # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1881
- '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
1882
- | (c_any - c_space_nl - e_bs) % { @escape = nil }
1883
- )
1884
- => {
1885
- value = @escape || tok(@ts + 1)
1886
-
1887
- if version?(18)
1888
- emit(:tINTEGER, value.getbyte(0))
1889
- else
1890
- emit(:tCHARACTER, value)
1891
- end
1892
-
1893
- fnext expr_end; fbreak;
1894
- };
1895
-
1896
- '?' c_space_nl
1897
- => {
1898
- escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1899
- "\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
1900
- diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
1901
-
1902
- p = @ts - 1
1903
- fgoto expr_end;
1904
- };
1905
-
1906
- '?' c_eof
1907
- => {
1908
- diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
1909
- };
1910
-
1911
- # f ?aa : b: Disambiguate with a character literal.
1912
- '?' [A-Za-z_] bareword
1913
- => {
1914
- p = @ts - 1
1915
- fgoto expr_end;
1916
- };
1917
-
1918
- #
1919
- # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1920
- #
1921
-
1922
- # Ruby >= 2.7 emits it as two tPIPE terminals
1923
- # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1924
- '||'
1925
- => {
1926
- if @version >= 27
1927
- emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1928
- fhold;
1929
- fnext expr_beg; fbreak;
1930
- else
1931
- p -= 2
1932
- fgoto expr_end;
1933
- end
1934
- };
1935
-
1936
- #
1937
- # KEYWORDS AND PUNCTUATION
1938
- #
1939
-
1940
- # a({b=>c})
1941
- e_lbrace
1942
- => {
1943
- if @lambda_stack.last == @paren_nest
1944
- @lambda_stack.pop
1945
- @command_start = true
1946
- emit(:tLAMBEG, '{'.freeze)
1947
- else
1948
- emit(:tLBRACE, '{'.freeze)
1949
- end
1950
- @paren_nest += 1
1951
- fbreak;
1952
- };
1953
-
1954
- # a([1, 2])
1955
- e_lbrack
1956
- => { emit(:tLBRACK, '['.freeze)
1957
- fbreak; };
1958
-
1959
- # a()
1960
- e_lparen
1961
- => { emit(:tLPAREN, '('.freeze)
1962
- fbreak; };
1963
-
1964
- # a(+b)
1965
- punctuation_begin
1966
- => { emit_table(PUNCTUATION_BEGIN)
1967
- fbreak; };
1968
-
1969
- # rescue Exception => e: Block rescue.
1970
- # Special because it should transition to expr_mid.
1971
- 'rescue' %{ tm = p } '=>'?
1972
- => { emit(:kRESCUE, 'rescue'.freeze, @ts, tm)
1973
- p = tm - 1
1974
- fnext expr_mid; fbreak; };
1975
-
1976
- # if a: Statement if.
1977
- keyword_modifier
1978
- => { emit_table(KEYWORDS_BEGIN)
1979
- @command_start = true
1980
- fnext expr_value; fbreak; };
1981
-
1982
- #
1983
- # RUBY 1.9 HASH LABELS
1984
- #
1985
-
1986
- label ( any - ':' )
1987
- => {
1988
- fhold;
1989
-
1990
- if version?(18)
1991
- ident = tok(@ts, @te - 2)
1992
-
1993
- emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1994
- ident, @ts, @te - 2)
1995
- fhold; # continue as a symbol
1996
-
1997
- if !@static_env.nil? && @static_env.declared?(ident)
1998
- fnext expr_end;
1999
- else
2000
- fnext *arg_or_cmdarg(cmd_state);
2001
- end
2002
- else
2003
- emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
2004
- fnext expr_labelarg;
2005
- end
2006
-
2007
- fbreak;
2008
- };
2009
-
2010
- #
2011
- # RUBY 2.7 BEGINLESS RANGE
2012
-
2013
- '..'
2014
- => {
2015
- if @version >= 27
2016
- emit(:tBDOT2)
2017
- else
2018
- emit(:tDOT2)
2019
- end
2020
-
2021
- fnext expr_beg; fbreak;
2022
- };
2023
-
2024
- '...'
2025
- => {
2026
- if @version >= 27
2027
- emit(:tBDOT3)
2028
- else
2029
- emit(:tDOT3)
2030
- end
2031
-
2032
- fnext expr_beg; fbreak;
2033
- };
2034
-
2035
- #
2036
- # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
2037
- #
2038
-
2039
- # foo= bar: Disambiguate with bareword rule below.
2040
- bareword ambiguous_ident_suffix |
2041
- # def foo: Disambiguate with bareword rule below.
2042
- keyword
2043
- => { p = @ts - 1
2044
- fgoto expr_end; };
2045
-
2046
- # a = 42; a [42]: Indexing.
2047
- # def a; end; a [42]: Array argument.
2048
- call_or_var
2049
- => local_ident;
2050
-
2051
- (call_or_var - keyword)
2052
- % { ident_tok = tok; ident_ts = @ts; ident_te = @te; }
2053
- w_space+ '('
2054
- => {
2055
- emit(:tIDENTIFIER, ident_tok, ident_ts, ident_te)
2056
- p = ident_te - 1
2057
-
2058
- if !@static_env.nil? && @static_env.declared?(ident_tok) && @version < 25
2059
- fnext expr_endfn;
2060
- else
2061
- fnext expr_cmdarg;
2062
- end
2063
- fbreak;
2064
- };
2065
-
2066
- #
2067
- # WHITESPACE
2068
- #
2069
-
2070
- w_any;
2071
-
2072
- e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
2073
- => {
2074
- p = @ts - 1
2075
- @cs_before_block_comment = @cs
2076
- fgoto line_begin;
2077
- };
2078
-
2079
- #
2080
- # DEFAULT TRANSITION
2081
- #
2082
-
2083
- # The following rules match most binary and all unary operators.
2084
- # Rules for binary operators provide better error reporting.
2085
- operator_arithmetic '=' |
2086
- operator_rest |
2087
- punctuation_end |
2088
- c_any
2089
- => { p = @ts - 1; fgoto expr_end; };
2090
-
2091
- c_eof => do_eof;
2092
- *|;
2093
-
2094
- # Special newline handling for "def a b:"
2095
- #
2096
- expr_labelarg := |*
2097
- w_space_comment;
2098
-
2099
- w_newline
2100
- => {
2101
- if @in_kwarg
2102
- fhold; fgoto expr_end;
2103
- else
2104
- fgoto line_begin;
2105
- end
2106
- };
2107
-
2108
- c_any
2109
- => { fhold; fgoto expr_beg; };
2110
-
2111
- c_eof => do_eof;
2112
- *|;
2113
-
2114
- # Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
2115
- #
2116
- expr_value := |*
2117
- # a:b: a(:b), a::B, A::B
2118
- label (any - ':')
2119
- => { p = @ts - 1
2120
- fgoto expr_end; };
2121
-
2122
- # "bar", 'baz'
2123
- ['"] # '
2124
- => {
2125
- fgoto *push_literal(tok, tok, @ts);
2126
- };
2127
-
2128
- w_space_comment;
2129
-
2130
- w_newline
2131
- => { fgoto line_begin; };
2132
-
2133
- c_any
2134
- => { fhold; fgoto expr_beg; };
2135
-
2136
- c_eof => do_eof;
2137
- *|;
2138
-
2139
- expr_end := |*
2140
- #
2141
- # STABBY LAMBDA
2142
- #
2143
-
2144
- '->'
2145
- => {
2146
- emit(:tLAMBDA, '->'.freeze, @ts, @ts + 2)
2147
-
2148
- @lambda_stack.push @paren_nest
2149
- fnext expr_endfn; fbreak;
2150
- };
2151
-
2152
- e_lbrace | 'do'
2153
- => {
2154
- if @lambda_stack.last == @paren_nest
2155
- @lambda_stack.pop
2156
-
2157
- if tok == '{'.freeze
2158
- emit(:tLAMBEG, '{'.freeze)
2159
- else # 'do'
2160
- emit(:kDO_LAMBDA, 'do'.freeze)
2161
- end
2162
- else
2163
- if tok == '{'.freeze
2164
- emit(:tLCURLY, '{'.freeze)
2165
- else # 'do'
2166
- emit_do
2167
- end
2168
- end
2169
- if tok == '{'.freeze
2170
- @paren_nest += 1
2171
- end
2172
- @command_start = true
2173
-
2174
- fnext expr_value; fbreak;
2175
- };
2176
-
2177
- #
2178
- # KEYWORDS
2179
- #
2180
-
2181
- keyword_with_fname
2182
- => { emit_table(KEYWORDS)
2183
- fnext expr_fname; fbreak; };
2184
-
2185
- 'class' w_any* '<<'
2186
- => { emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
2187
- emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
2188
- fnext expr_value; fbreak; };
2189
-
2190
- # a if b:c: Syntax error.
2191
- keyword_modifier
2192
- => { emit_table(KEYWORDS)
2193
- fnext expr_beg; fbreak; };
2194
-
2195
- # elsif b:c: elsif b(:c)
2196
- keyword_with_value
2197
- => { emit_table(KEYWORDS)
2198
- @command_start = true
2199
- fnext expr_value; fbreak; };
2200
-
2201
- keyword_with_mid
2202
- => { emit_table(KEYWORDS)
2203
- fnext expr_mid; fbreak; };
2204
-
2205
- keyword_with_arg
2206
- => {
2207
- emit_table(KEYWORDS)
2208
-
2209
- if version?(18) && tok == 'not'.freeze
2210
- fnext expr_beg; fbreak;
2211
- else
2212
- fnext expr_arg; fbreak;
2213
- end
2214
- };
2215
-
2216
- '__ENCODING__'
2217
- => {
2218
- if version?(18)
2219
- emit(:tIDENTIFIER)
2220
-
2221
- unless !@static_env.nil? && @static_env.declared?(tok)
2222
- fnext *arg_or_cmdarg(cmd_state);
2223
- end
2224
- else
2225
- emit(:k__ENCODING__, '__ENCODING__'.freeze)
2226
- end
2227
- fbreak;
2228
- };
2229
-
2230
- keyword_with_end
2231
- => { emit_table(KEYWORDS)
2232
- fbreak; };
2233
-
2234
- #
2235
- # NUMERIC LITERALS
2236
- #
2237
-
2238
- ( '0' [Xx] %{ @num_base = 16; @num_digits_s = p } int_hex
2239
- | '0' [Dd] %{ @num_base = 10; @num_digits_s = p } int_dec
2240
- | '0' [Oo] %{ @num_base = 8; @num_digits_s = p } int_dec
2241
- | '0' [Bb] %{ @num_base = 2; @num_digits_s = p } int_bin
2242
- | [1-9] digit* '_'? %{ @num_base = 10; @num_digits_s = @ts } int_dec
2243
- | '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
2244
- ) %{ @num_suffix_s = p } int_suffix
2245
- => {
2246
- digits = tok(@num_digits_s, @num_suffix_s)
2247
-
2248
- if digits.end_with? '_'.freeze
2249
- diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
2250
- range(@te - 1, @te)
2251
- elsif digits.empty? && @num_base == 8 && version?(18)
2252
- # 1.8 did not raise an error on 0o.
2253
- digits = '0'.freeze
2254
- elsif digits.empty?
2255
- diagnostic :error, :empty_numeric
2256
- elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
2257
- invalid_s = @num_digits_s + invalid_idx
2258
- diagnostic :error, :invalid_octal, nil,
2259
- range(invalid_s, invalid_s + 1)
2260
- end
2261
-
2262
- if version?(18, 19, 20)
2263
- emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
2264
- p = @num_suffix_s - 1
2265
- else
2266
- @num_xfrm.call(digits.to_i(@num_base))
2267
- end
2268
- fbreak;
2269
- };
2270
-
2271
- flo_frac flo_pow?
2272
- => {
2273
- diagnostic :error, :no_dot_digit_literal
2274
- };
2275
-
2276
- flo_int [eE]
2277
- => {
2278
- if version?(18, 19, 20)
2279
- diagnostic :error,
2280
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2281
- range(@te - 1, @te)
2282
- else
2283
- emit(:tINTEGER, tok(@ts, @te - 1).to_i, @ts, @te - 1)
2284
- fhold; fbreak;
2285
- end
2286
- };
2287
-
2288
- flo_int flo_frac [eE]
2289
- => {
2290
- if version?(18, 19, 20)
2291
- diagnostic :error,
2292
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2293
- range(@te - 1, @te)
2294
- else
2295
- emit(:tFLOAT, tok(@ts, @te - 1).to_f, @ts, @te - 1)
2296
- fhold; fbreak;
2297
- end
2298
- };
2299
-
2300
- flo_int
2301
- ( flo_frac? flo_pow %{ @num_suffix_s = p } flo_pow_suffix
2302
- | flo_frac %{ @num_suffix_s = p } flo_suffix
2303
- )
2304
- => {
2305
- digits = tok(@ts, @num_suffix_s)
2306
-
2307
- if version?(18, 19, 20)
2308
- emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
2309
- p = @num_suffix_s - 1
2310
- else
2311
- @num_xfrm.call(digits)
2312
- end
2313
- fbreak;
2314
- };
2315
-
2316
- #
2317
- # STRING AND XSTRING LITERALS
2318
- #
2319
-
2320
- # `echo foo`, "bar", 'baz'
2321
- '`' | ['"] # '
2322
- => {
2323
- type, delimiter = tok, tok[-1].chr
2324
- fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
2325
- };
2326
-
2327
- #
2328
- # CONSTANTS AND VARIABLES
2329
- #
2330
-
2331
- constant
2332
- => { emit(:tCONSTANT)
2333
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
2334
-
2335
- constant ambiguous_const_suffix
2336
- => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
2337
- p = tm - 1; fbreak; };
2338
-
2339
- global_var | class_var_v | instance_var_v
2340
- => { p = @ts - 1; fcall expr_variable; };
2341
-
2342
- #
2343
- # METHOD CALLS
2344
- #
2345
-
2346
- '.' | '&.' | '::'
2347
- => { emit_table(PUNCTUATION)
2348
- fnext expr_dot; fbreak; };
2349
-
2350
- call_or_var
2351
- => local_ident;
2352
-
2353
- bareword ambiguous_fid_suffix
2354
- => {
2355
- if tm == @te
2356
- # Suffix was consumed, e.g. foo!
2357
- emit(:tFID)
2358
- else
2359
- # Suffix was not consumed, e.g. foo!=
2360
- emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
2361
- p = tm - 1
2362
- end
2363
- fnext expr_arg; fbreak;
2364
- };
2365
-
2366
- #
2367
- # OPERATORS
2368
- #
2369
-
2370
- '*' | '=>'
2371
- => {
2372
- emit_table(PUNCTUATION)
2373
- fgoto expr_value;
2374
- };
2375
-
2376
- # When '|', '~', '!', '=>' are used as operators
2377
- # they do not accept any symbols (or quoted labels) after.
2378
- # Other binary operators accept it.
2379
- ( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
2380
- => {
2381
- emit_table(PUNCTUATION);
2382
- fnext expr_value; fbreak;
2383
- };
2384
-
2385
- ( e_lparen | '|' | '~' | '!' )
2386
- => { emit_table(PUNCTUATION)
2387
- fnext expr_beg; fbreak; };
2388
-
2389
- e_rbrace | e_rparen | e_rbrack
2390
- => {
2391
- emit_table(PUNCTUATION)
2392
-
2393
- if @version < 24
2394
- @cond.lexpop
2395
- @cmdarg.lexpop
2396
- else
2397
- @cond.pop
2398
- @cmdarg.pop
2399
- end
2400
-
2401
- if tok == '}'.freeze || tok == ']'.freeze
2402
- if @version >= 25
2403
- fnext expr_end;
2404
- else
2405
- fnext expr_endarg;
2406
- end
2407
- else # )
2408
- # fnext expr_endfn; ?
2409
- end
2410
-
2411
- fbreak;
2412
- };
2413
-
2414
- operator_arithmetic '='
2415
- => { emit(:tOP_ASGN, tok(@ts, @te - 1))
2416
- fnext expr_beg; fbreak; };
2417
-
2418
- '?'
2419
- => { emit(:tEH, '?'.freeze)
2420
- fnext expr_value; fbreak; };
2421
-
2422
- e_lbrack
2423
- => { emit(:tLBRACK2, '['.freeze)
2424
- fnext expr_beg; fbreak; };
2425
-
2426
- '...' c_nl
2427
- => {
2428
- if @paren_nest == 0
2429
- diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
2430
- end
2431
-
2432
- emit(:tDOT3, '...'.freeze, @ts, @te - 1)
2433
- fhold;
2434
- fnext expr_beg; fbreak;
2435
- };
2436
-
2437
- punctuation_end
2438
- => { emit_table(PUNCTUATION)
2439
- fnext expr_beg; fbreak; };
2440
-
2441
- #
2442
- # WHITESPACE
2443
- #
2444
-
2445
- w_space_comment;
2446
-
2447
- w_newline
2448
- => { fgoto leading_dot; };
2449
-
2450
- ';'
2451
- => { emit(:tSEMI, ';'.freeze)
2452
- @command_start = true
2453
- fnext expr_value; fbreak; };
2454
-
2455
- '\\' c_line {
2456
- diagnostic :error, :bare_backslash, nil, range(@ts, @ts + 1)
2457
- fhold;
2458
- };
2459
-
2460
- c_any
2461
- => {
2462
- diagnostic :fatal, :unexpected, { :character => tok.inspect[1..-2] }
2463
- };
2464
-
2465
- c_eof => do_eof;
2466
- *|;
2467
-
2468
- leading_dot := |*
2469
- # Insane leading dots:
2470
- # a #comment
2471
- # # post-2.7 comment
2472
- # .b: a.b
2473
-
2474
- # Here we use '\n' instead of w_newline to not modify @newline_s
2475
- # and eventually properly emit tNL
2476
- (c_space* w_space_comment '\n')+
2477
- => {
2478
- if @version < 27
2479
- # Ruby before 2.7 doesn't support comments before leading dot.
2480
- # If a line after "a" starts with a comment then "a" is a self-contained statement.
2481
- # So in that case we emit a special tNL token and start reading the
2482
- # next line as a separate statement.
2483
- #
2484
- # Note: block comments before leading dot are not supported on any version of Ruby.
2485
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2486
- fhold; fnext line_begin; fbreak;
2487
- end
2488
- };
2489
-
2490
- c_space* %{ tm = p } ('.' | '&.')
2491
- => { p = tm - 1; fgoto expr_end; };
2492
-
2493
- any
2494
- => { emit(:tNL, nil, @newline_s, @newline_s + 1)
2495
- fhold; fnext line_begin; fbreak; };
2496
- *|;
2497
-
2498
- #
2499
- # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
2500
- #
2501
-
2502
- line_comment := |*
2503
- '=end' c_line* c_nl_zlen
2504
- => {
2505
- emit_comment(@eq_begin_s, @te)
2506
- fgoto *@cs_before_block_comment;
2507
- };
2508
-
2509
- c_line* c_nl;
2510
-
2511
- c_line* zlen
2512
- => {
2513
- diagnostic :fatal, :embedded_document, nil,
2514
- range(@eq_begin_s, @eq_begin_s + '=begin'.length)
2515
- };
2516
- *|;
2517
-
2518
- line_begin := |*
2519
- w_any;
2520
-
2521
- '=begin' ( c_space | c_nl_zlen )
2522
- => { @eq_begin_s = @ts
2523
- fgoto line_comment; };
2524
-
2525
- '__END__' ( c_eol - zlen )
2526
- => { p = pe - 3 };
2527
-
2528
- c_any
2529
- => { cmd_state = true; fhold; fgoto expr_value; };
2530
-
2531
- c_eof => do_eof;
2532
- *|;
2533
-
2534
- }%%
2535
- # %
2536
- end