parser 2.6.5.0 → 2.7.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parser.rb +4 -1
  3. data/lib/parser/all.rb +1 -0
  4. data/lib/parser/ast/processor.rb +21 -0
  5. data/lib/parser/base.rb +25 -5
  6. data/lib/parser/builders/default.rb +394 -24
  7. data/lib/parser/context.rb +5 -0
  8. data/lib/parser/current.rb +16 -7
  9. data/lib/parser/current_arg_stack.rb +43 -0
  10. data/lib/parser/diagnostic.rb +1 -1
  11. data/lib/parser/diagnostic/engine.rb +1 -2
  12. data/lib/parser/lexer.rb +23770 -0
  13. data/lib/parser/lexer/dedenter.rb +52 -49
  14. data/lib/parser/macruby.rb +6149 -0
  15. data/lib/parser/{lexer/max_numparam_stack.rb → max_numparam_stack.rb} +10 -4
  16. data/lib/parser/messages.rb +52 -29
  17. data/lib/parser/meta.rb +10 -5
  18. data/lib/parser/ruby18.rb +5663 -0
  19. data/lib/parser/ruby19.rb +6092 -0
  20. data/lib/parser/ruby20.rb +6527 -0
  21. data/lib/parser/ruby21.rb +6578 -0
  22. data/lib/parser/ruby22.rb +6613 -0
  23. data/lib/parser/ruby23.rb +6624 -0
  24. data/lib/parser/ruby24.rb +6694 -0
  25. data/lib/parser/ruby25.rb +6662 -0
  26. data/lib/parser/ruby26.rb +6676 -0
  27. data/lib/parser/ruby27.rb +7803 -0
  28. data/lib/parser/ruby28.rb +8047 -0
  29. data/lib/parser/ruby30.rb +8052 -0
  30. data/lib/parser/rubymotion.rb +6086 -0
  31. data/lib/parser/runner.rb +26 -2
  32. data/lib/parser/runner/ruby_rewrite.rb +2 -2
  33. data/lib/parser/source/buffer.rb +3 -1
  34. data/lib/parser/source/comment.rb +1 -1
  35. data/lib/parser/source/comment/associator.rb +14 -4
  36. data/lib/parser/source/map/method_definition.rb +25 -0
  37. data/lib/parser/source/range.rb +19 -3
  38. data/lib/parser/source/tree_rewriter.rb +115 -12
  39. data/lib/parser/source/tree_rewriter/action.rb +137 -28
  40. data/lib/parser/static_environment.rb +10 -0
  41. data/lib/parser/tree_rewriter.rb +1 -2
  42. data/lib/parser/variables_stack.rb +32 -0
  43. data/lib/parser/version.rb +1 -1
  44. data/parser.gemspec +10 -18
  45. metadata +22 -99
  46. data/.gitignore +0 -33
  47. data/.travis.yml +0 -45
  48. data/.yardopts +0 -21
  49. data/CHANGELOG.md +0 -997
  50. data/CONTRIBUTING.md +0 -17
  51. data/Gemfile +0 -10
  52. data/LICENSE.txt +0 -25
  53. data/README.md +0 -301
  54. data/Rakefile +0 -166
  55. data/ci/run_rubocop_specs +0 -14
  56. data/doc/AST_FORMAT.md +0 -1816
  57. data/doc/CUSTOMIZATION.md +0 -37
  58. data/doc/INTERNALS.md +0 -21
  59. data/doc/css/.gitkeep +0 -0
  60. data/doc/css/common.css +0 -68
  61. data/lib/parser/lexer.rl +0 -2533
  62. data/lib/parser/macruby.y +0 -2198
  63. data/lib/parser/ruby18.y +0 -1934
  64. data/lib/parser/ruby19.y +0 -2175
  65. data/lib/parser/ruby20.y +0 -2353
  66. data/lib/parser/ruby21.y +0 -2357
  67. data/lib/parser/ruby22.y +0 -2364
  68. data/lib/parser/ruby23.y +0 -2370
  69. data/lib/parser/ruby24.y +0 -2408
  70. data/lib/parser/ruby25.y +0 -2405
  71. data/lib/parser/ruby26.y +0 -2413
  72. data/lib/parser/ruby27.y +0 -2470
  73. data/lib/parser/rubymotion.y +0 -2182
  74. data/test/bug_163/fixtures/input.rb +0 -5
  75. data/test/bug_163/fixtures/output.rb +0 -5
  76. data/test/bug_163/rewriter.rb +0 -20
  77. data/test/helper.rb +0 -59
  78. data/test/parse_helper.rb +0 -316
  79. data/test/racc_coverage_helper.rb +0 -133
  80. data/test/test_base.rb +0 -31
  81. data/test/test_current.rb +0 -29
  82. data/test/test_diagnostic.rb +0 -96
  83. data/test/test_diagnostic_engine.rb +0 -62
  84. data/test/test_encoding.rb +0 -99
  85. data/test/test_lexer.rb +0 -3667
  86. data/test/test_lexer_stack_state.rb +0 -78
  87. data/test/test_parse_helper.rb +0 -80
  88. data/test/test_parser.rb +0 -7644
  89. data/test/test_runner_parse.rb +0 -35
  90. data/test/test_runner_rewrite.rb +0 -47
  91. data/test/test_source_buffer.rb +0 -162
  92. data/test/test_source_comment.rb +0 -36
  93. data/test/test_source_comment_associator.rb +0 -367
  94. data/test/test_source_map.rb +0 -15
  95. data/test/test_source_range.rb +0 -172
  96. data/test/test_source_rewriter.rb +0 -541
  97. data/test/test_source_rewriter_action.rb +0 -46
  98. data/test/test_source_tree_rewriter.rb +0 -173
  99. data/test/test_static_environment.rb +0 -45
  100. data/test/using_tree_rewriter/fixtures/input.rb +0 -3
  101. data/test/using_tree_rewriter/fixtures/output.rb +0 -3
  102. data/test/using_tree_rewriter/using_tree_rewriter.rb +0 -9
@@ -1,37 +0,0 @@
1
- # Customizing Parsers
2
-
3
- While the default setup of the parsers provided by this Gem should be suitable
4
- for most some developers might want to change parts of it. An example would be
5
- the use of a custom class for nodes instead of `Parser::AST::Node`.
6
-
7
- Customizing the AST is done by creating a custom builder class and passing it
8
- to the constructor method of a parser. The default setup comes down to the
9
- following:
10
-
11
- builder = Parser::Builders::Default.new
12
- parser = Parser::Ruby19.new(builder)
13
-
14
- When creating your own builder class it's best to subclass the default one so
15
- that you don't have to redefine every used method again:
16
-
17
- class MyBuilder < Parser::Builders::Default
18
-
19
- end
20
-
21
- builder = MyBuilder.new
22
- parser = Parser::Ruby19.new(builder)
23
-
24
- ## Custom Node Classes
25
-
26
- To use a custom node class you have to override the method
27
- `Parser::Builders::Default#n`:
28
-
29
- class MyBuilder < Parser::Builders::Default
30
- def n(type, children, location)
31
- return MyNodeClass.new(type, children, :location => location)
32
- end
33
- end
34
-
35
- Note that the used class (and corresponding instance) must be compatible with
36
- `Parser::AST::Node` so it's best to subclass it and override/add code where
37
- needed.
@@ -1,21 +0,0 @@
1
- Entry points
2
- ------------
3
-
4
- Parser should be kept as slim as possible. This includes not loading
5
- any potentially large files when they are likely to be unused in practice.
6
-
7
- Parser has five main (classes of) `require` entry points:
8
-
9
- * `require 'parser'`. Main entry point, requires all classes which
10
- are used across the entire library.
11
- * `require 'parser/rubyXX'`. Version-specific entry point. Can raise
12
- a NotImplementedError if current Ruby runtime is unable to parse the
13
- requested Ruby version.
14
- * `require 'parser/all'`. Requires all available parsers for released
15
- versions of Ruby. Can raise NotImplementedError.
16
- * `require 'parser/runner'`. Requires all the stuff which is useful for
17
- command-line tools but not otherwise.
18
- * `require 'parser/runner/X'`. Runner-specific entry point.
19
-
20
- All non-main entry points internally `require 'parser'`. Additionally, all
21
- runner-specific entry points internally `requre 'parser/runner'`.
File without changes
@@ -1,68 +0,0 @@
1
- body
2
- {
3
- font-size: 14px;
4
- line-height: 1.6;
5
- margin: 0 auto;
6
- max-width: 960px;
7
- }
8
-
9
- p code
10
- {
11
- background: #f2f2f2;
12
- padding-left: 3px;
13
- padding-right: 3px;
14
- }
15
-
16
- pre.code
17
- {
18
- font-size: 13px;
19
- line-height: 1.4;
20
- }
21
-
22
- /**
23
- * YARD uses generic table styles, using a special class means those tables
24
- * don't get messed up.
25
- */
26
- .table
27
- {
28
- border: 1px solid #ccc;
29
- border-right: none;
30
- border-collapse: separate;
31
- border-spacing: 0;
32
- text-align: left;
33
- }
34
-
35
- .table.full
36
- {
37
- width: 100%;
38
- }
39
-
40
- .table .field_name
41
- {
42
- min-width: 160px;
43
- }
44
-
45
- .table thead tr th.no_sort:first-child
46
- {
47
- width: 25px;
48
- }
49
-
50
- .table thead tr th, .table tbody tr td
51
- {
52
- border-bottom: 1px solid #ccc;
53
- border-right: 1px solid #ccc;
54
- min-width: 20px;
55
- padding: 8px 5px;
56
- text-align: left;
57
- vertical-align: top;
58
- }
59
-
60
- .table tbody tr:last-child td
61
- {
62
- border-bottom: none;
63
- }
64
-
65
- .table tr:nth-child(odd) td
66
- {
67
- background: #f9f9f9;
68
- }
@@ -1,2533 +0,0 @@
1
- %%machine lex; # % fix highlighting
2
-
3
- #
4
- # === BEFORE YOU START ===
5
- #
6
- # Read the Ruby Hacking Guide chapter 11, available in English at
7
- # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
- #
9
- # Remember two things about Ragel scanners:
10
- #
11
- # 1) Longest match wins.
12
- #
13
- # 2) If two matches have the same length, the first
14
- # in source code wins.
15
- #
16
- # General rules of making Ragel and Bison happy:
17
- #
18
- # * `p` (position) and `@te` contain the index of the character
19
- # they're pointing to ("current"), plus one. `@ts` contains the index
20
- # of the corresponding character. The code for extracting matched token is:
21
- #
22
- # @source_buffer.slice(@ts...@te)
23
- #
24
- # * If your input is `foooooooobar` and the rule is:
25
- #
26
- # 'f' 'o'+
27
- #
28
- # the result will be:
29
- #
30
- # foooooooobar
31
- # ^ ts=0 ^ p=te=9
32
- #
33
- # * A Ragel lexer action should not emit more than one token, unless
34
- # you know what you are doing.
35
- #
36
- # * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
37
- #
38
- # * If an action emits the token and transitions to another state, use
39
- # these Ragel commands:
40
- #
41
- # emit($whatever)
42
- # fnext $next_state; fbreak;
43
- #
44
- # If you perform `fgoto` in an action which does not emit a token nor
45
- # rewinds the stream pointer, the parser's side-effectful,
46
- # context-sensitive lookahead actions will break in a hard to detect
47
- # and debug way.
48
- #
49
- # * If an action does not emit a token:
50
- #
51
- # fgoto $next_state;
52
- #
53
- # * If an action features lookbehind, i.e. matches characters with the
54
- # intent of passing them to another action:
55
- #
56
- # p = @ts - 1
57
- # fgoto $next_state;
58
- #
59
- # or, if the lookbehind consists of a single character:
60
- #
61
- # fhold; fgoto $next_state;
62
- #
63
- # * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
64
- # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
65
- # _will_ invoke the action `act`.
66
- #
67
- # e_something stands for "something with **e**mbedded action".
68
- #
69
- # * EOF is explicit and is matched by `c_eof`. If you want to introspect
70
- # the state of the lexer, add this rule to the state:
71
- #
72
- # c_eof => do_eof;
73
- #
74
- # * If you proceed past EOF, the lexer will complain:
75
- #
76
- # NoMethodError: undefined method `ord' for nil:NilClass
77
- #
78
-
79
- class Parser::Lexer
80
-
81
- %% write data nofinal;
82
- # %
83
-
84
- ESCAPES = {
85
- ?a.ord => "\a", ?b.ord => "\b", ?e.ord => "\e", ?f.ord => "\f",
86
- ?n.ord => "\n", ?r.ord => "\r", ?s.ord => "\s", ?t.ord => "\t",
87
- ?v.ord => "\v", ?\\.ord => "\\"
88
- }.freeze
89
-
90
- REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
-
92
- NUMPARAM_MAX = 9
93
-
94
- attr_reader :source_buffer
95
- attr_reader :max_numparam_stack
96
-
97
- attr_accessor :diagnostics
98
- attr_accessor :static_env
99
- attr_accessor :force_utf32
100
-
101
- attr_accessor :cond, :cmdarg, :in_kwarg, :context
102
-
103
- attr_accessor :tokens, :comments
104
-
105
- def initialize(version)
106
- @version = version
107
- @static_env = nil
108
- @context = nil
109
-
110
- @tokens = nil
111
- @comments = nil
112
-
113
- reset
114
- end
115
-
116
- def reset(reset_state=true)
117
- # Ragel state:
118
- if reset_state
119
- # Unit tests set state prior to resetting lexer.
120
- @cs = self.class.lex_en_line_begin
121
-
122
- @cond = StackState.new('cond')
123
- @cmdarg = StackState.new('cmdarg')
124
- @cond_stack = []
125
- @cmdarg_stack = []
126
- end
127
-
128
- @force_utf32 = false # Set to true by some tests
129
-
130
- @source_pts = nil # @source as a codepoint array
131
-
132
- @p = 0 # stream position (saved manually in #advance)
133
- @ts = nil # token start
134
- @te = nil # token end
135
- @act = 0 # next action
136
-
137
- @stack = [] # state stack
138
- @top = 0 # state stack top pointer
139
-
140
- # Lexer state:
141
- @token_queue = []
142
- @literal_stack = []
143
-
144
- @eq_begin_s = nil # location of last encountered =begin
145
- @sharp_s = nil # location of last encountered #
146
-
147
- @newline_s = nil # location of last encountered newline
148
-
149
- @num_base = nil # last numeric base
150
- @num_digits_s = nil # starting position of numeric digits
151
- @num_suffix_s = nil # starting position of numeric suffix
152
- @num_xfrm = nil # numeric suffix-induced transformation
153
-
154
- @escape_s = nil # starting position of current sequence
155
- @escape = nil # last escaped sequence, as string
156
-
157
- @herebody_s = nil # starting position of current heredoc line
158
-
159
- # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
160
- # encountered after a matching closing parenthesis.
161
- @paren_nest = 0
162
- @lambda_stack = []
163
-
164
- # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
165
- # we store the indentation level and give it out to the parser
166
- # on request. It is not possible to infer indentation level just
167
- # from the AST because escape sequences such as `\ ` or `\t` are
168
- # expanded inside the lexer, but count as non-whitespace for
169
- # indentation purposes.
170
- @dedent_level = nil
171
-
172
- # If the lexer is in `command state' (aka expr_value)
173
- # at the entry to #advance, it will transition to expr_cmdarg
174
- # instead of expr_arg at certain points.
175
- @command_start = true
176
-
177
- # True at the end of "def foo a:"
178
- @in_kwarg = false
179
-
180
- # State before =begin / =end block comment
181
- @cs_before_block_comment = self.class.lex_en_line_begin
182
-
183
- # Maximum numbered parameters stack
184
- @max_numparam_stack = MaxNumparamStack.new
185
- end
186
-
187
- def source_buffer=(source_buffer)
188
- @source_buffer = source_buffer
189
-
190
- if @source_buffer
191
- source = @source_buffer.source
192
-
193
- if source.encoding == Encoding::UTF_8
194
- @source_pts = source.unpack('U*')
195
- else
196
- @source_pts = source.unpack('C*')
197
- end
198
-
199
- if @source_pts[0] == 0xfeff
200
- # Skip byte order mark.
201
- @p = 1
202
- end
203
- else
204
- @source_pts = nil
205
- end
206
- end
207
-
208
- def encoding
209
- @source_buffer.source.encoding
210
- end
211
-
212
- LEX_STATES = {
213
- :line_begin => lex_en_line_begin,
214
- :expr_dot => lex_en_expr_dot,
215
- :expr_fname => lex_en_expr_fname,
216
- :expr_value => lex_en_expr_value,
217
- :expr_beg => lex_en_expr_beg,
218
- :expr_mid => lex_en_expr_mid,
219
- :expr_arg => lex_en_expr_arg,
220
- :expr_cmdarg => lex_en_expr_cmdarg,
221
- :expr_end => lex_en_expr_end,
222
- :expr_endarg => lex_en_expr_endarg,
223
- :expr_endfn => lex_en_expr_endfn,
224
- :expr_labelarg => lex_en_expr_labelarg,
225
-
226
- :interp_string => lex_en_interp_string,
227
- :interp_words => lex_en_interp_words,
228
- :plain_string => lex_en_plain_string,
229
- :plain_words => lex_en_plain_string,
230
- }
231
-
232
- def state
233
- LEX_STATES.invert.fetch(@cs, @cs)
234
- end
235
-
236
- def state=(state)
237
- @cs = LEX_STATES.fetch(state)
238
- end
239
-
240
- def push_cmdarg
241
- @cmdarg_stack.push(@cmdarg)
242
- @cmdarg = StackState.new("cmdarg.#{@cmdarg_stack.count}")
243
- end
244
-
245
- def pop_cmdarg
246
- @cmdarg = @cmdarg_stack.pop
247
- end
248
-
249
- def push_cond
250
- @cond_stack.push(@cond)
251
- @cond = StackState.new("cond.#{@cond_stack.count}")
252
- end
253
-
254
- def pop_cond
255
- @cond = @cond_stack.pop
256
- end
257
-
258
- def max_numparam
259
- @max_numparam_stack.top
260
- end
261
-
262
- def dedent_level
263
- # We erase @dedent_level as a precaution to avoid accidentally
264
- # using a stale value.
265
- dedent_level, @dedent_level = @dedent_level, nil
266
- dedent_level
267
- end
268
-
269
- # Return next token: [type, value].
270
- def advance
271
- if @token_queue.any?
272
- return @token_queue.shift
273
- end
274
-
275
- # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
276
- klass = self.class
277
- _lex_trans_keys = klass.send :_lex_trans_keys
278
- _lex_key_spans = klass.send :_lex_key_spans
279
- _lex_index_offsets = klass.send :_lex_index_offsets
280
- _lex_indicies = klass.send :_lex_indicies
281
- _lex_trans_targs = klass.send :_lex_trans_targs
282
- _lex_trans_actions = klass.send :_lex_trans_actions
283
- _lex_to_state_actions = klass.send :_lex_to_state_actions
284
- _lex_from_state_actions = klass.send :_lex_from_state_actions
285
- _lex_eof_trans = klass.send :_lex_eof_trans
286
-
287
- pe = @source_pts.size + 2
288
- p, eof = @p, pe
289
-
290
- cmd_state = @command_start
291
- @command_start = false
292
-
293
- %% write exec;
294
- # %
295
-
296
- @p = p
297
-
298
- if @token_queue.any?
299
- @token_queue.shift
300
- elsif @cs == klass.lex_error
301
- [ false, [ '$error'.freeze, range(p - 1, p) ] ]
302
- else
303
- eof = @source_pts.size
304
- [ false, [ '$eof'.freeze, range(eof, eof) ] ]
305
- end
306
- end
307
-
308
- protected
309
-
310
- def eof_codepoint?(point)
311
- [0x04, 0x1a, 0x00].include? point
312
- end
313
-
314
- def version?(*versions)
315
- versions.include?(@version)
316
- end
317
-
318
- def stack_pop
319
- @top -= 1
320
- @stack[@top]
321
- end
322
-
323
- def encode_escape(ord)
324
- ord.chr.force_encoding(@source_buffer.source.encoding)
325
- end
326
-
327
- def tok(s = @ts, e = @te)
328
- @source_buffer.slice(s...e)
329
- end
330
-
331
- def range(s = @ts, e = @te)
332
- Parser::Source::Range.new(@source_buffer, s, e)
333
- end
334
-
335
- def emit(type, value = tok, s = @ts, e = @te)
336
- token = [ type, [ value, range(s, e) ] ]
337
-
338
- @token_queue.push(token)
339
-
340
- @tokens.push(token) if @tokens
341
-
342
- token
343
- end
344
-
345
- def emit_table(table, s = @ts, e = @te)
346
- value = tok(s, e)
347
-
348
- emit(table[value], value, s, e)
349
- end
350
-
351
- def emit_do(do_block=false)
352
- if @cond.active?
353
- emit(:kDO_COND, 'do'.freeze)
354
- elsif @cmdarg.active? || do_block
355
- emit(:kDO_BLOCK, 'do'.freeze)
356
- else
357
- emit(:kDO, 'do'.freeze)
358
- end
359
- end
360
-
361
- def arg_or_cmdarg(cmd_state)
362
- if cmd_state
363
- self.class.lex_en_expr_cmdarg
364
- else
365
- self.class.lex_en_expr_arg
366
- end
367
- end
368
-
369
- def emit_comment(s = @ts, e = @te)
370
- if @comments
371
- @comments.push(Parser::Source::Comment.new(range(s, e)))
372
- end
373
-
374
- if @tokens
375
- @tokens.push([ :tCOMMENT, [ tok(s, e), range(s, e) ] ])
376
- end
377
-
378
- nil
379
- end
380
-
381
- def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
382
- @diagnostics.process(
383
- Parser::Diagnostic.new(type, reason, arguments, location, highlights))
384
- end
385
-
386
- #
387
- # === LITERAL STACK ===
388
- #
389
-
390
- def push_literal(*args)
391
- new_literal = Literal.new(self, *args)
392
- @literal_stack.push(new_literal)
393
- next_state_for_literal(new_literal)
394
- end
395
-
396
- def next_state_for_literal(literal)
397
- if literal.words? && literal.backslash_delimited?
398
- if literal.interpolate?
399
- self.class.lex_en_interp_backslash_delimited_words
400
- else
401
- self.class.lex_en_plain_backslash_delimited_words
402
- end
403
- elsif literal.words? && !literal.backslash_delimited?
404
- if literal.interpolate?
405
- self.class.lex_en_interp_words
406
- else
407
- self.class.lex_en_plain_words
408
- end
409
- elsif !literal.words? && literal.backslash_delimited?
410
- if literal.interpolate?
411
- self.class.lex_en_interp_backslash_delimited
412
- else
413
- self.class.lex_en_plain_backslash_delimited
414
- end
415
- else
416
- if literal.interpolate?
417
- self.class.lex_en_interp_string
418
- else
419
- self.class.lex_en_plain_string
420
- end
421
- end
422
- end
423
-
424
- def literal
425
- @literal_stack.last
426
- end
427
-
428
- def pop_literal
429
- old_literal = @literal_stack.pop
430
-
431
- @dedent_level = old_literal.dedent_level
432
-
433
- if old_literal.type == :tREGEXP_BEG
434
- # Fetch modifiers.
435
- self.class.lex_en_regexp_modifiers
436
- else
437
- self.class.lex_en_expr_end
438
- end
439
- end
440
-
441
- # Mapping of strings to parser tokens.
442
-
443
- PUNCTUATION = {
444
- '=' => :tEQL, '&' => :tAMPER2, '|' => :tPIPE,
445
- '!' => :tBANG, '^' => :tCARET, '+' => :tPLUS,
446
- '-' => :tMINUS, '*' => :tSTAR2, '/' => :tDIVIDE,
447
- '%' => :tPERCENT, '~' => :tTILDE, ',' => :tCOMMA,
448
- ';' => :tSEMI, '.' => :tDOT, '..' => :tDOT2,
449
- '...' => :tDOT3, '[' => :tLBRACK2, ']' => :tRBRACK,
450
- '(' => :tLPAREN2, ')' => :tRPAREN, '?' => :tEH,
451
- ':' => :tCOLON, '&&' => :tANDOP, '||' => :tOROP,
452
- '-@' => :tUMINUS, '+@' => :tUPLUS, '~@' => :tTILDE,
453
- '**' => :tPOW, '->' => :tLAMBDA, '=~' => :tMATCH,
454
- '!~' => :tNMATCH, '==' => :tEQ, '!=' => :tNEQ,
455
- '>' => :tGT, '>>' => :tRSHFT, '>=' => :tGEQ,
456
- '<' => :tLT, '<<' => :tLSHFT, '<=' => :tLEQ,
457
- '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
458
- '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
459
- '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
460
- '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF
461
- }
462
-
463
- PUNCTUATION_BEGIN = {
464
- '&' => :tAMPER, '*' => :tSTAR, '**' => :tDSTAR,
465
- '+' => :tUPLUS, '-' => :tUMINUS, '::' => :tCOLON3,
466
- '(' => :tLPAREN, '{' => :tLBRACE, '[' => :tLBRACK,
467
- }
468
-
469
- KEYWORDS = {
470
- 'if' => :kIF_MOD, 'unless' => :kUNLESS_MOD,
471
- 'while' => :kWHILE_MOD, 'until' => :kUNTIL_MOD,
472
- 'rescue' => :kRESCUE_MOD, 'defined?' => :kDEFINED,
473
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
474
- }
475
-
476
- KEYWORDS_BEGIN = {
477
- 'if' => :kIF, 'unless' => :kUNLESS,
478
- 'while' => :kWHILE, 'until' => :kUNTIL,
479
- 'rescue' => :kRESCUE, 'defined?' => :kDEFINED,
480
- 'BEGIN' => :klBEGIN, 'END' => :klEND,
481
- }
482
-
483
- %w(class module def undef begin end then elsif else ensure case when
484
- for break next redo retry in do return yield super self nil true
485
- false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
486
- KEYWORDS_BEGIN[keyword] = KEYWORDS[keyword] = :"k#{keyword.upcase}"
487
- end
488
-
489
- %%{
490
- # %
491
-
492
- access @;
493
- getkey (@source_pts[p] || 0);
494
-
495
- # === CHARACTER CLASSES ===
496
- #
497
- # Pay close attention to the differences between c_any and any.
498
- # c_any does not include EOF and so will cause incorrect behavior
499
- # for machine subtraction (any-except rules) and default transitions
500
- # for scanners.
501
-
502
- action do_nl {
503
- # Record position of a newline for precise location reporting on tNL
504
- # tokens.
505
- #
506
- # This action is embedded directly into c_nl, as it is idempotent and
507
- # there are no cases when we need to skip it.
508
- @newline_s = p
509
- }
510
-
511
- c_nl = '\n' $ do_nl;
512
- c_space = [ \t\r\f\v];
513
- c_space_nl = c_space | c_nl;
514
-
515
- c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
516
- c_eol = c_nl | c_eof;
517
- c_any = any - c_eof;
518
-
519
- c_nl_zlen = c_nl | zlen;
520
- c_line = any - c_nl_zlen;
521
-
522
- c_unicode = c_any - 0x00..0x7f;
523
- c_upper = [A-Z];
524
- c_lower = [a-z_] | c_unicode;
525
- c_alpha = c_lower | c_upper;
526
- c_alnum = c_alpha | [0-9];
527
-
528
- action do_eof {
529
- # Sit at EOF indefinitely. #advance would return $eof each time.
530
- # This allows to feed the lexer more data if needed; this is only used
531
- # in tests.
532
- #
533
- # Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
534
- # below. This is due to the fact that scanner state at EOF is observed
535
- # by tests, and encapsulating it in a rule would break the introspection.
536
- fhold; fbreak;
537
- }
538
-
539
- #
540
- # === TOKEN DEFINITIONS ===
541
- #
542
-
543
- # All operators are punctuation. There is more to punctuation
544
- # than just operators. Operators can be overridden by user;
545
- # punctuation can not.
546
-
547
- # A list of operators which are valid in the function name context, but
548
- # have different semantics in others.
549
- operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
550
-
551
- # A list of operators which can occur within an assignment shortcut (+ → +=).
552
- operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
553
- '*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
554
-
555
- # A list of all user-definable operators not covered by groups above.
556
- operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
557
- '<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
558
-
559
- # Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
560
- # as they are ambiguous with interpolation `#{}` and should be counted.
561
- # These braces are not present in punctuation lists.
562
-
563
- # A list of punctuation which has different meaning when used at the
564
- # beginning of expression.
565
- punctuation_begin = '-' | '+' | '::' | '(' | '[' |
566
- '*' | '**' | '&' ;
567
-
568
- # A list of all punctuation except punctuation_begin.
569
- punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
570
- '::' | '?' | ':' | '.' | '..' | '...' ;
571
-
572
- # A list of keywords which have different meaning at the beginning of expression.
573
- keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
574
-
575
- # A list of keywords which accept an argument-like expression, i.e. have the
576
- # same post-processing as method calls or commands. Example: `yield 1`,
577
- # `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
578
- keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
579
-
580
- # A list of keywords which accept a literal function name as an argument.
581
- keyword_with_fname = 'def' | 'undef' | 'alias' ;
582
-
583
- # A list of keywords which accept an expression after them.
584
- keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
585
- 'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
586
- 'and' | 'or' ;
587
-
588
- # A list of keywords which accept a value, and treat the keywords from
589
- # `keyword_modifier` list as modifiers.
590
- keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
591
-
592
- # A list of keywords which do not accept an expression after them.
593
- keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
594
- 'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
595
- '__LINE__' | '__ENCODING__';
596
-
597
- # All keywords.
598
- keyword = keyword_with_value | keyword_with_mid |
599
- keyword_with_end | keyword_with_arg |
600
- keyword_with_fname | keyword_modifier ;
601
-
602
- constant = c_upper c_alnum*;
603
- bareword = c_alpha c_alnum*;
604
-
605
- call_or_var = c_lower c_alnum*;
606
- class_var = '@@' bareword;
607
- instance_var = '@' bareword;
608
- global_var = '$'
609
- ( bareword | digit+
610
- | [`'+~*$&?!@/\\;,.=:<>"] # `
611
- | '-' c_alnum
612
- )
613
- ;
614
-
615
- # Ruby accepts (and fails on) variables with leading digit
616
- # in literal context, but not in unquoted symbol body.
617
- class_var_v = '@@' c_alnum+;
618
- instance_var_v = '@' c_alnum+;
619
-
620
- label = bareword [?!]? ':';
621
-
622
- #
623
- # === NUMERIC PARSING ===
624
- #
625
-
626
- int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
627
- int_dec = ( digit+ '_' )* digit* '_'? ;
628
- int_bin = ( [01]+ '_' )* [01]* '_'? ;
629
-
630
- flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
631
- flo_frac = '.' ( digit+ '_' )* digit+;
632
- flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
633
-
634
- int_suffix =
635
- '' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars) } }
636
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
637
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, chars)) } }
638
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
639
- | 're' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
640
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
641
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 6); p -= 6 } };
642
-
643
- flo_pow_suffix =
644
- '' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars)) } }
645
- | 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Float(chars))) } }
646
- | 'if' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 2); p -= 2 } };
647
-
648
- flo_suffix =
649
- flo_pow_suffix
650
- | 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
651
- | 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
652
- | 'rescue' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 6); p -= 6 } };
653
-
654
- #
655
- # === ESCAPE SEQUENCE PARSING ===
656
- #
657
-
658
- # Escape parsing code is a Ragel pattern, not a scanner, and therefore
659
- # it shouldn't directly raise errors or perform other actions with side effects.
660
- # In reality this would probably just mess up error reporting in pathological
661
- # cases, through.
662
-
663
- # The amount of code required to parse \M\C stuff correctly is ridiculous.
664
-
665
- escaped_nl = "\\" c_nl;
666
-
667
- action unicode_points {
668
- @escape = ""
669
-
670
- codepoints = tok(@escape_s + 2, p - 1)
671
- codepoint_s = @escape_s + 2
672
-
673
- if @version < 24
674
- if codepoints.start_with?(" ") || codepoints.start_with?("\t")
675
- diagnostic :fatal, :invalid_unicode_escape, nil,
676
- range(@escape_s + 2, @escape_s + 3)
677
- end
678
-
679
- if spaces_p = codepoints.index(/[ \t]{2}/)
680
- diagnostic :fatal, :invalid_unicode_escape, nil,
681
- range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
682
- end
683
-
684
- if codepoints.end_with?(" ") || codepoints.end_with?("\t")
685
- diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
686
- end
687
- end
688
-
689
- codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
690
- if spaces
691
- codepoint_s += spaces.length
692
- else
693
- codepoint = codepoint_str.to_i(16)
694
-
695
- if codepoint >= 0x110000
696
- diagnostic :error, :unicode_point_too_large, nil,
697
- range(codepoint_s, codepoint_s + codepoint_str.length)
698
- break
699
- end
700
-
701
- @escape += codepoint.chr(Encoding::UTF_8)
702
- codepoint_s += codepoint_str.length
703
- end
704
- end
705
- }
706
-
707
- action unescape_char {
708
- codepoint = @source_pts[p - 1]
709
- if (@escape = ESCAPES[codepoint]).nil?
710
- @escape = encode_escape(@source_buffer.slice(p - 1))
711
- end
712
- }
713
-
714
- action invalid_complex_escape {
715
- diagnostic :fatal, :invalid_escape
716
- }
717
-
718
- action read_post_meta_or_ctrl_char {
719
- @escape = @source_buffer.slice(p - 1).chr
720
-
721
- if @version >= 27 && ((0..8).include?(@escape.ord) || (14..31).include?(@escape.ord))
722
- diagnostic :fatal, :invalid_escape
723
- end
724
- }
725
-
726
- action slash_c_char {
727
- @escape = encode_escape(@escape[0].ord & 0x9f)
728
- }
729
-
730
- action slash_m_char {
731
- @escape = encode_escape(@escape[0].ord | 0x80)
732
- }
733
-
734
- maybe_escaped_char = (
735
- '\\' c_any %unescape_char
736
- | ( c_any - [\\] ) %read_post_meta_or_ctrl_char
737
- );
738
-
739
- maybe_escaped_ctrl_char = ( # why?!
740
- '\\' c_any %unescape_char %slash_c_char
741
- | '?' % { @escape = "\x7f" }
742
- | ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
743
- );
744
-
745
- escape = (
746
- # \377
747
- [0-7]{1,3}
748
- % { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
749
-
750
- # \xff
751
- | 'x' xdigit{1,2}
752
- % { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
753
-
754
- # %q[\x]
755
- | 'x' ( c_any - xdigit )
756
- % {
757
- diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
758
- }
759
-
760
- # \u263a
761
- | 'u' xdigit{4}
762
- % { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
763
-
764
- # \u123
765
- | 'u' xdigit{0,3}
766
- % {
767
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
768
- }
769
-
770
- # u{not hex} or u{}
771
- | 'u{' ( c_any - xdigit - [ \t}] )* '}'
772
- % {
773
- diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
774
- }
775
-
776
- # \u{ \t 123 \t 456 \t\t }
777
- | 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
778
- (
779
- ( xdigit{1,6} [ \t]* '}'
780
- %unicode_points
781
- )
782
- |
783
- ( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
784
- | ( c_any - [ \t}] )* c_eof
785
- | xdigit{7,}
786
- ) % {
787
- diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
788
- }
789
- )
790
-
791
- # \C-\a \cx
792
- | ( 'C-' | 'c' ) escaped_nl?
793
- maybe_escaped_ctrl_char
794
-
795
- # \M-a
796
- | 'M-' escaped_nl?
797
- maybe_escaped_char
798
- %slash_m_char
799
-
800
- # \C-\M-f \M-\cf \c\M-f
801
- | ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
802
- | 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
803
- maybe_escaped_ctrl_char
804
- %slash_m_char
805
-
806
- | 'C' c_any %invalid_complex_escape
807
- | 'M' c_any %invalid_complex_escape
808
- | ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
809
-
810
- | ( c_any - [0-7xuCMc] ) %unescape_char
811
-
812
- | c_eof % {
813
- diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
814
- }
815
- );
816
-
817
- # Use rules in form of `e_bs escape' when you need to parse a sequence.
818
- e_bs = '\\' % {
819
- @escape_s = p
820
- @escape = nil
821
- };
822
-
823
- #
824
- # === STRING AND HEREDOC PARSING ===
825
- #
826
-
827
- # Heredoc parsing is quite a complex topic. First, consider that heredocs
828
- # can be arbitrarily nested. For example:
829
- #
830
- # puts <<CODE
831
- # the result is: #{<<RESULT.inspect
832
- # i am a heredoc
833
- # RESULT
834
- # }
835
- # CODE
836
- #
837
- # which, incidentally, evaluates to:
838
- #
839
- # the result is: " i am a heredoc\n"
840
- #
841
- # To parse them, lexer refers to two kinds (remember, nested heredocs)
842
- # of positions in the input stream, namely heredoc_e
843
- # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
844
- #
845
- # heredoc_e is simply contained inside the corresponding Literal, and
846
- # when the heredoc is closed, the lexing is restarted from that position.
847
- #
848
- # @herebody_s is quite more complex. First, @herebody_s changes after each
849
- # heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
850
- # contains the current line, and also when a heredoc is started, @herebody_s
851
- # contains the position from which the heredoc will be lexed.
852
- #
853
- # Second, as (insanity) there are nested heredocs, we need to maintain a
854
- # stack of these positions. Each time #push_literal is called, it saves current
855
- # @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
856
- # containing another heredocs) is closed, the previous value is restored.
857
-
858
- e_heredoc_nl = c_nl % {
859
- # After every heredoc was parsed, @herebody_s contains the
860
- # position of next token after all heredocs.
861
- if @herebody_s
862
- p = @herebody_s
863
- @herebody_s = nil
864
- end
865
- };
866
-
867
- action extend_string {
868
- string = tok
869
-
870
- # tLABEL_END is only possible in non-cond context on >= 2.2
871
- if @version >= 22 && !@cond.active?
872
- lookahead = @source_buffer.slice(@te...@te+2)
873
- end
874
-
875
- current_literal = literal
876
- if !current_literal.heredoc? &&
877
- (token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
878
- if token[0] == :tLABEL_END
879
- p += 1
880
- pop_literal
881
- fnext expr_labelarg;
882
- else
883
- fnext *pop_literal;
884
- end
885
- fbreak;
886
- else
887
- current_literal.extend_string(string, @ts, @te)
888
- end
889
- }
890
-
891
- action extend_string_escaped {
892
- current_literal = literal
893
- # Get the first character after the backslash.
894
- escaped_char = @source_buffer.slice(@escape_s).chr
895
-
896
- if current_literal.munge_escape? escaped_char
897
- # If this particular literal uses this character as an opening
898
- # or closing delimiter, it is an escape sequence for that
899
- # particular character. Write it without the backslash.
900
-
901
- if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
902
- # Regular expressions should include escaped delimiters in their
903
- # escaped form, except when the escaped character is
904
- # a closing delimiter but not a regexp metacharacter.
905
- #
906
- # The backslash itself cannot be used as a closing delimiter
907
- # at the same time as an escape symbol, but it is always munged,
908
- # so this branch also executes for the non-closing-delimiter case
909
- # for the backslash.
910
- current_literal.extend_string(tok, @ts, @te)
911
- else
912
- current_literal.extend_string(escaped_char, @ts, @te)
913
- end
914
- else
915
- # It does not. So this is an actual escape sequence, yay!
916
- if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
917
- # Squiggly heredocs like
918
- # <<~-HERE
919
- # 1\
920
- # 2
921
- # HERE
922
- # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
923
- # This information is emitted as is, without escaping,
924
- # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
925
- current_literal.extend_string(tok, @ts, @te)
926
- elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
927
- # Heredocs, regexp and a few other types of literals support line
928
- # continuation via \\\n sequence. The code like
929
- # "a\
930
- # b"
931
- # must be parsed as "ab"
932
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
933
- elsif current_literal.regexp?
934
- # Regular expressions should include escape sequences in their
935
- # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
936
- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
937
- else
938
- current_literal.extend_string(@escape || tok, @ts, @te)
939
- end
940
- end
941
- }
942
-
943
- # Extend a string with a newline or a EOF character.
944
- # As heredoc closing line can immediately precede EOF, this action
945
- # has to handle such case specially.
946
- action extend_string_eol {
947
- current_literal = literal
948
- if @te == pe
949
- diagnostic :fatal, :string_eof, nil,
950
- range(current_literal.str_s, current_literal.str_s + 1)
951
- end
952
-
953
- if current_literal.heredoc?
954
- line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
955
-
956
- if version?(18, 19, 20)
957
- # See ruby:c48b4209c
958
- line = line.gsub(/\r.*$/, ''.freeze)
959
- end
960
-
961
- # Try ending the heredoc with the complete most recently
962
- # scanned line. @herebody_s always refers to the start of such line.
963
- if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
964
- # Adjust @herebody_s to point to the next line.
965
- @herebody_s = @te
966
-
967
- # Continue regular lexing after the heredoc reference (<<END).
968
- p = current_literal.heredoc_e - 1
969
- fnext *pop_literal; fbreak;
970
- else
971
- # Calculate indentation level for <<~HEREDOCs.
972
- current_literal.infer_indent_level(line)
973
-
974
- # Ditto.
975
- @herebody_s = @te
976
- end
977
- else
978
- # Try ending the literal with a newline.
979
- if current_literal.nest_and_try_closing(tok, @ts, @te)
980
- fnext *pop_literal; fbreak;
981
- end
982
-
983
- if @herebody_s
984
- # This is a regular literal intertwined with a heredoc. Like:
985
- #
986
- # p <<-foo+"1
987
- # bar
988
- # foo
989
- # 2"
990
- #
991
- # which, incidentally, evaluates to "bar\n1\n2".
992
- p = @herebody_s - 1
993
- @herebody_s = nil
994
- end
995
- end
996
-
997
- if current_literal.words? && !eof_codepoint?(@source_pts[p])
998
- current_literal.extend_space @ts, @te
999
- else
1000
- # A literal newline is appended if the heredoc was _not_ closed
1001
- # this time (see fbreak above). See also Literal#nest_and_try_closing
1002
- # for rationale of calling #flush_string here.
1003
- current_literal.extend_string tok, @ts, @te
1004
- current_literal.flush_string
1005
- end
1006
- }
1007
-
1008
- action extend_string_space {
1009
- literal.extend_space @ts, @te
1010
- }
1011
-
1012
- #
1013
- # === INTERPOLATION PARSING ===
1014
- #
1015
-
1016
- # Interpolations with immediate variable names simply call into
1017
- # the corresponding machine.
1018
-
1019
- interp_var = '#' ( global_var | class_var_v | instance_var_v );
1020
-
1021
- action extend_interp_var {
1022
- current_literal = literal
1023
- current_literal.flush_string
1024
- current_literal.extend_content
1025
-
1026
- emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
1027
-
1028
- p = @ts
1029
- fcall expr_variable;
1030
- }
1031
-
1032
- # Interpolations with code blocks must match nested curly braces, as
1033
- # interpolation ending is ambiguous with a block ending. So, every
1034
- # opening and closing brace should be matched with e_[lr]brace rules,
1035
- # which automatically perform the counting.
1036
- #
1037
- # Note that interpolations can themselves be nested, so brace balance
1038
- # is tied to the innermost literal.
1039
- #
1040
- # Also note that literals themselves should not use e_[lr]brace rules
1041
- # when matching their opening and closing delimiters, as the amount of
1042
- # braces inside the characters of a string literal is independent.
1043
-
1044
- interp_code = '#{';
1045
-
1046
- e_lbrace = '{' % {
1047
- @cond.push(false); @cmdarg.push(false)
1048
-
1049
- current_literal = literal
1050
- if current_literal
1051
- current_literal.start_interp_brace
1052
- end
1053
- };
1054
-
1055
- e_rbrace = '}' % {
1056
- current_literal = literal
1057
- if current_literal
1058
- if current_literal.end_interp_brace_and_try_closing
1059
- if version?(18, 19)
1060
- emit(:tRCURLY, '}'.freeze, p - 1, p)
1061
- @cond.lexpop
1062
- @cmdarg.lexpop
1063
- else
1064
- emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
1065
- end
1066
-
1067
- if current_literal.saved_herebody_s
1068
- @herebody_s = current_literal.saved_herebody_s
1069
- end
1070
-
1071
-
1072
- fhold;
1073
- fnext *next_state_for_literal(current_literal);
1074
- fbreak;
1075
- end
1076
- end
1077
- };
1078
-
1079
- action extend_interp_code {
1080
- current_literal = literal
1081
- current_literal.flush_string
1082
- current_literal.extend_content
1083
-
1084
- emit(:tSTRING_DBEG, '#{'.freeze)
1085
-
1086
- if current_literal.heredoc?
1087
- current_literal.saved_herebody_s = @herebody_s
1088
- @herebody_s = nil
1089
- end
1090
-
1091
- current_literal.start_interp_brace
1092
- @command_start = true
1093
- fnext expr_value;
1094
- fbreak;
1095
- }
1096
-
1097
- # Actual string parsers are simply combined from the primitives defined
1098
- # above.
1099
-
1100
- interp_words := |*
1101
- interp_code => extend_interp_code;
1102
- interp_var => extend_interp_var;
1103
- e_bs escape => extend_string_escaped;
1104
- c_space+ => extend_string_space;
1105
- c_eol => extend_string_eol;
1106
- c_any => extend_string;
1107
- *|;
1108
-
1109
- interp_string := |*
1110
- interp_code => extend_interp_code;
1111
- interp_var => extend_interp_var;
1112
- e_bs escape => extend_string_escaped;
1113
- c_eol => extend_string_eol;
1114
- c_any => extend_string;
1115
- *|;
1116
-
1117
- plain_words := |*
1118
- e_bs c_any => extend_string_escaped;
1119
- c_space+ => extend_string_space;
1120
- c_eol => extend_string_eol;
1121
- c_any => extend_string;
1122
- *|;
1123
-
1124
- plain_string := |*
1125
- '\\' c_nl => extend_string_eol;
1126
- e_bs c_any => extend_string_escaped;
1127
- c_eol => extend_string_eol;
1128
- c_any => extend_string;
1129
- *|;
1130
-
1131
- interp_backslash_delimited := |*
1132
- interp_code => extend_interp_code;
1133
- interp_var => extend_interp_var;
1134
- c_eol => extend_string_eol;
1135
- c_any => extend_string;
1136
- *|;
1137
-
1138
- plain_backslash_delimited := |*
1139
- c_eol => extend_string_eol;
1140
- c_any => extend_string;
1141
- *|;
1142
-
1143
- interp_backslash_delimited_words := |*
1144
- interp_code => extend_interp_code;
1145
- interp_var => extend_interp_var;
1146
- c_space+ => extend_string_space;
1147
- c_eol => extend_string_eol;
1148
- c_any => extend_string;
1149
- *|;
1150
-
1151
- plain_backslash_delimited_words := |*
1152
- c_space+ => extend_string_space;
1153
- c_eol => extend_string_eol;
1154
- c_any => extend_string;
1155
- *|;
1156
-
1157
- regexp_modifiers := |*
1158
- [A-Za-z]+
1159
- => {
1160
- unknown_options = tok.scan(/[^imxouesn]/)
1161
- if unknown_options.any?
1162
- diagnostic :error, :regexp_options,
1163
- { :options => unknown_options.join }
1164
- end
1165
-
1166
- emit(:tREGEXP_OPT)
1167
- fnext expr_end;
1168
- fbreak;
1169
- };
1170
-
1171
- any
1172
- => {
1173
- emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
1174
- fhold;
1175
- fgoto expr_end;
1176
- };
1177
- *|;
1178
-
1179
- #
1180
- # === WHITESPACE HANDLING ===
1181
- #
1182
-
1183
- # Various contexts in Ruby allow various kinds of whitespace
1184
- # to be used. They are grouped to clarify the lexing machines
1185
- # and ease collection of comments.
1186
-
1187
- # A line of code with inline #comment at end is always equivalent
1188
- # to a line of code ending with just a newline, so an inline
1189
- # comment is deemed equivalent to non-newline whitespace
1190
- # (c_space character class).
1191
-
1192
- w_space =
1193
- c_space+
1194
- | '\\' e_heredoc_nl
1195
- ;
1196
-
1197
- w_comment =
1198
- '#' %{ @sharp_s = p - 1 }
1199
- # The (p == pe) condition compensates for added "\0" and
1200
- # the way Ragel handles EOF.
1201
- c_line* %{ emit_comment(@sharp_s, p == pe ? p - 2 : p) }
1202
- ;
1203
-
1204
- w_space_comment =
1205
- w_space
1206
- | w_comment
1207
- ;
1208
-
1209
- # A newline in non-literal context always interoperates with
1210
- # here document logic and can always be escaped by a backslash,
1211
- # still interoperating with here document logic in the same way,
1212
- # yet being invisible to anything else.
1213
- #
1214
- # To demonstrate:
1215
- #
1216
- # foo = <<FOO \
1217
- # bar
1218
- # FOO
1219
- # + 2
1220
- #
1221
- # is equivalent to `foo = "bar\n" + 2`.
1222
-
1223
- w_newline =
1224
- e_heredoc_nl;
1225
-
1226
- w_any =
1227
- w_space
1228
- | w_comment
1229
- | w_newline
1230
- ;
1231
-
1232
-
1233
- #
1234
- # === EXPRESSION PARSING ===
1235
- #
1236
-
1237
- # These rules implement a form of manually defined lookahead.
1238
- # The default longest-match scanning does not work here due
1239
- # to sheer ambiguity.
1240
-
1241
- ambiguous_fid_suffix = # actual parsed
1242
- [?!] %{ tm = p } | # a? a?
1243
- [?!]'=' %{ tm = p - 2 } # a!=b a != b
1244
- ;
1245
-
1246
- ambiguous_ident_suffix = # actual parsed
1247
- ambiguous_fid_suffix |
1248
- '=' %{ tm = p } | # a= a=
1249
- '==' %{ tm = p - 2 } | # a==b a == b
1250
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
1251
- '=>' %{ tm = p - 2 } | # a=>b a => b
1252
- '===' %{ tm = p - 3 } # a===b a === b
1253
- ;
1254
-
1255
- ambiguous_symbol_suffix = # actual parsed
1256
- ambiguous_ident_suffix |
1257
- '==>' %{ tm = p - 2 } # :a==>b :a= => b
1258
- ;
1259
-
1260
- # Ambiguous with 1.9 hash labels.
1261
- ambiguous_const_suffix = # actual parsed
1262
- '::' %{ tm = p - 2 } # A::B A :: B
1263
- ;
1264
-
1265
- # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
1266
- # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
1267
-
1268
- e_lbrack = '[' % {
1269
- @cond.push(false); @cmdarg.push(false)
1270
- };
1271
-
1272
- # Ruby 1.9 lambdas require parentheses counting in order to
1273
- # emit correct opening kDO/tLBRACE.
1274
-
1275
- e_lparen = '(' % {
1276
- @cond.push(false); @cmdarg.push(false)
1277
-
1278
- @paren_nest += 1
1279
-
1280
- if version?(18)
1281
- @command_start = true
1282
- end
1283
- };
1284
-
1285
- e_rparen = ')' % {
1286
- @paren_nest -= 1
1287
- };
1288
-
1289
- # Ruby is context-sensitive wrt/ local identifiers.
1290
- action local_ident {
1291
- emit(:tIDENTIFIER)
1292
-
1293
- if !@static_env.nil? && @static_env.declared?(tok)
1294
- fnext expr_endfn; fbreak;
1295
- else
1296
- fnext *arg_or_cmdarg(cmd_state); fbreak;
1297
- end
1298
- }
1299
-
1300
- # Variable lexing code is accessed from both expressions and
1301
- # string interpolation related code.
1302
- #
1303
- expr_variable := |*
1304
- global_var
1305
- => {
1306
- if tok =~ /^\$([1-9][0-9]*)$/
1307
- emit(:tNTH_REF, tok(@ts + 1).to_i)
1308
- elsif tok =~ /^\$([&`'+])$/
1309
- emit(:tBACK_REF)
1310
- else
1311
- emit(:tGVAR)
1312
- end
1313
-
1314
- fnext *stack_pop; fbreak;
1315
- };
1316
-
1317
- class_var_v
1318
- => {
1319
- if tok =~ /^@@[0-9]/
1320
- diagnostic :error, :cvar_name, { :name => tok }
1321
- end
1322
-
1323
- emit(:tCVAR)
1324
- fnext *stack_pop; fbreak;
1325
- };
1326
-
1327
- '@' [0-9]+
1328
- => {
1329
- if @version < 27
1330
- diagnostic :error, :ivar_name, { :name => tok }
1331
- end
1332
-
1333
- value = tok[1..-1]
1334
-
1335
- if value[0] == '0'
1336
- diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te)
1337
- end
1338
-
1339
- if value.to_i > NUMPARAM_MAX
1340
- diagnostic :error, :too_large_numparam, nil, range(@ts, @te)
1341
- end
1342
-
1343
- if !@context.in_block? && !@context.in_lambda?
1344
- diagnostic :error, :numparam_outside_block, nil, range(@ts, @te)
1345
- end
1346
-
1347
- if !@max_numparam_stack.can_have_numparams?
1348
- diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te)
1349
- end
1350
-
1351
- @max_numparam_stack.register(value.to_i)
1352
-
1353
- emit(:tNUMPARAM, tok[1..-1])
1354
- fnext *stack_pop; fbreak;
1355
- };
1356
-
1357
- instance_var_v
1358
- => {
1359
- if tok =~ /^@[0-9]/
1360
- diagnostic :error, :ivar_name, { :name => tok }
1361
- end
1362
-
1363
- emit(:tIVAR)
1364
- fnext *stack_pop; fbreak;
1365
- };
1366
- *|;
1367
-
1368
- # Literal function name in definition (e.g. `def class`).
1369
- # Keywords are returned as their respective tokens; this is used
1370
- # to support singleton def `def self.foo`. Global variables are
1371
- # returned as `tGVAR`; this is used in global variable alias
1372
- # statements `alias $a $b`. Symbols are returned verbatim; this
1373
- # is used in `alias :a :"b#{foo}"` and `undef :a`.
1374
- #
1375
- # Transitions to `expr_endfn` afterwards.
1376
- #
1377
- expr_fname := |*
1378
- keyword
1379
- => { emit_table(KEYWORDS_BEGIN);
1380
- fnext expr_endfn; fbreak; };
1381
-
1382
- constant
1383
- => { emit(:tCONSTANT)
1384
- fnext expr_endfn; fbreak; };
1385
-
1386
- bareword [?=!]?
1387
- => { emit(:tIDENTIFIER)
1388
- fnext expr_endfn; fbreak; };
1389
-
1390
- global_var
1391
- => { p = @ts - 1
1392
- fnext expr_end; fcall expr_variable; };
1393
-
1394
- # If the handling was to be delegated to expr_end,
1395
- # these cases would transition to something else than
1396
- # expr_endfn, which is incorrect.
1397
- operator_fname |
1398
- operator_arithmetic |
1399
- operator_rest
1400
- => { emit_table(PUNCTUATION)
1401
- fnext expr_endfn; fbreak; };
1402
-
1403
- '::'
1404
- => { fhold; fhold; fgoto expr_end; };
1405
-
1406
- ':'
1407
- => { fhold; fgoto expr_beg; };
1408
-
1409
- '%s' c_any
1410
- => {
1411
- if version?(23)
1412
- type, delimiter = tok[0..-2], tok[-1].chr
1413
- fgoto *push_literal(type, delimiter, @ts);
1414
- else
1415
- p = @ts - 1
1416
- fgoto expr_end;
1417
- end
1418
- };
1419
-
1420
- w_any;
1421
-
1422
- c_any
1423
- => { fhold; fgoto expr_end; };
1424
-
1425
- c_eof => do_eof;
1426
- *|;
1427
-
1428
- # After literal function name in definition. Behaves like `expr_end`,
1429
- # but allows a tLABEL.
1430
- #
1431
- # Transitions to `expr_end` afterwards.
1432
- #
1433
- expr_endfn := |*
1434
- label ( any - ':' )
1435
- => { emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1436
- fhold; fnext expr_labelarg; fbreak; };
1437
-
1438
- w_space_comment;
1439
-
1440
- c_any
1441
- => { fhold; fgoto expr_end; };
1442
-
1443
- c_eof => do_eof;
1444
- *|;
1445
-
1446
- # Literal function name in method call (e.g. `a.class`).
1447
- #
1448
- # Transitions to `expr_arg` afterwards.
1449
- #
1450
- expr_dot := |*
1451
- constant
1452
- => { emit(:tCONSTANT)
1453
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1454
-
1455
- call_or_var
1456
- => { emit(:tIDENTIFIER)
1457
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
1458
-
1459
- bareword ambiguous_fid_suffix
1460
- => { emit(:tFID, tok(@ts, tm), @ts, tm)
1461
- fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
1462
-
1463
- # See the comment in `expr_fname`.
1464
- operator_fname |
1465
- operator_arithmetic |
1466
- operator_rest
1467
- => { emit_table(PUNCTUATION)
1468
- fnext expr_arg; fbreak; };
1469
-
1470
- w_any;
1471
-
1472
- c_any
1473
- => { fhold; fgoto expr_end; };
1474
-
1475
- c_eof => do_eof;
1476
- *|;
1477
-
1478
- # The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
1479
- # is consumed; the current expression is a command or method call.
1480
- #
1481
- expr_arg := |*
1482
- #
1483
- # COMMAND MODE SPECIFIC TOKENS
1484
- #
1485
-
1486
- # cmd (1 + 2)
1487
- # See below the rationale about expr_endarg.
1488
- w_space+ e_lparen
1489
- => {
1490
- if version?(18)
1491
- emit(:tLPAREN2, '('.freeze, @te - 1, @te)
1492
- fnext expr_value; fbreak;
1493
- else
1494
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1495
- fnext expr_beg; fbreak;
1496
- end
1497
- };
1498
-
1499
- # meth(1 + 2)
1500
- # Regular method call.
1501
- e_lparen
1502
- => { emit(:tLPAREN2, '('.freeze)
1503
- fnext expr_beg; fbreak; };
1504
-
1505
- # meth [...]
1506
- # Array argument. Compare with indexing `meth[...]`.
1507
- w_space+ e_lbrack
1508
- => { emit(:tLBRACK, '['.freeze, @te - 1, @te)
1509
- fnext expr_beg; fbreak; };
1510
-
1511
- # cmd {}
1512
- # Command: method call without parentheses.
1513
- w_space* e_lbrace
1514
- => {
1515
- if @lambda_stack.last == @paren_nest
1516
- @lambda_stack.pop
1517
- emit(:tLAMBEG, '{'.freeze, @te - 1, @te)
1518
- else
1519
- emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1520
- end
1521
- @command_start = true
1522
- fnext expr_value; fbreak;
1523
- };
1524
-
1525
- #
1526
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1527
- #
1528
-
1529
- # a??
1530
- # Ternary operator
1531
- '?' c_space_nl
1532
- => {
1533
- # Unlike expr_beg as invoked in the next rule, do not warn
1534
- p = @ts - 1
1535
- fgoto expr_end;
1536
- };
1537
-
1538
- # a ?b, a? ?
1539
- # Character literal or ternary operator
1540
- w_space* '?'
1541
- => { fhold; fgoto expr_beg; };
1542
-
1543
- # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1544
- # a /foo/ (but not "a / foo" or "a /=foo")
1545
- # a <<HEREDOC
1546
- w_space+ %{ tm = p }
1547
- ( [%/] ( c_any - c_space_nl - '=' ) # /
1548
- | '<<'
1549
- )
1550
- => {
1551
- if tok(tm, tm + 1) == '/'.freeze
1552
- # Ambiguous regexp literal.
1553
- diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
1554
- end
1555
-
1556
- p = tm - 1
1557
- fgoto expr_beg;
1558
- };
1559
-
1560
- # x *1
1561
- # Ambiguous splat, kwsplat or block-pass.
1562
- w_space+ %{ tm = p } ( '+' | '-' | '*' | '&' | '**' )
1563
- => {
1564
- diagnostic :warning, :ambiguous_prefix, { :prefix => tok(tm, @te) },
1565
- range(tm, @te)
1566
-
1567
- p = tm - 1
1568
- fgoto expr_beg;
1569
- };
1570
-
1571
- # x ::Foo
1572
- # Ambiguous toplevel constant access.
1573
- w_space+ '::'
1574
- => { fhold; fhold; fgoto expr_beg; };
1575
-
1576
- # x:b
1577
- # Symbol.
1578
- w_space* ':'
1579
- => { fhold; fgoto expr_beg; };
1580
-
1581
- w_space+ label
1582
- => { p = @ts - 1; fgoto expr_beg; };
1583
-
1584
- #
1585
- # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1586
- #
1587
-
1588
- # a ? b
1589
- # Ternary operator.
1590
- w_space+ %{ tm = p } '?' c_space_nl
1591
- => { p = tm - 1; fgoto expr_end; };
1592
-
1593
- # x + 1: Binary operator or operator-assignment.
1594
- w_space* operator_arithmetic
1595
- ( '=' | c_space_nl )? |
1596
- # x rescue y: Modifier keyword.
1597
- w_space* keyword_modifier |
1598
- # a &. b: Safe navigation operator.
1599
- w_space* '&.' |
1600
- # Miscellanea.
1601
- w_space* punctuation_end
1602
- => {
1603
- p = @ts - 1
1604
- fgoto expr_end;
1605
- };
1606
-
1607
- w_space;
1608
-
1609
- w_comment
1610
- => { fgoto expr_end; };
1611
-
1612
- w_newline
1613
- => { fhold; fgoto expr_end; };
1614
-
1615
- c_any
1616
- => { fhold; fgoto expr_beg; };
1617
-
1618
- c_eof => do_eof;
1619
- *|;
1620
-
1621
- # The previous token was an identifier which was seen while in the
1622
- # command mode (that is, the state at the beginning of #advance was
1623
- # expr_value). This state is very similar to expr_arg, but disambiguates
1624
- # two very rare and specific condition:
1625
- # * In 1.8 mode, "foo (lambda do end)".
1626
- # * In 1.9+ mode, "f x: -> do foo do end end".
1627
- expr_cmdarg := |*
1628
- w_space+ e_lparen
1629
- => {
1630
- emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
1631
- if version?(18)
1632
- fnext expr_value; fbreak;
1633
- else
1634
- fnext expr_beg; fbreak;
1635
- end
1636
- };
1637
-
1638
- w_space* 'do'
1639
- => {
1640
- if @cond.active?
1641
- emit(:kDO_COND, 'do'.freeze, @te - 2, @te)
1642
- else
1643
- emit(:kDO, 'do'.freeze, @te - 2, @te)
1644
- end
1645
- fnext expr_value; fbreak;
1646
- };
1647
-
1648
- c_any |
1649
- # Disambiguate with the `do' rule above.
1650
- w_space* bareword |
1651
- w_space* label
1652
- => { p = @ts - 1
1653
- fgoto expr_arg; };
1654
-
1655
- c_eof => do_eof;
1656
- *|;
1657
-
1658
- # The rationale for this state is pretty complex. Normally, if an argument
1659
- # is passed to a command and then there is a block (tLCURLY...tRCURLY),
1660
- # the block is attached to the innermost argument (`f` in `m f {}`), or it
1661
- # is a parse error (`m 1 {}`). But there is a special case for passing a single
1662
- # primary expression grouped with parentheses: if you write `m (1) {}` or
1663
- # (2.0 only) `m () {}`, then the block is attached to `m`.
1664
- #
1665
- # Thus, we recognize the opening `(` of a command (remember, a command is
1666
- # a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
1667
- # `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
1668
- # lexer's state to `expr_endarg`, which makes it emit the possibly following
1669
- # `{` as `tLBRACE_ARG`.
1670
- #
1671
- # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1672
- # `do` (as `kDO_BLOCK` in `expr_beg`).
1673
- expr_endarg := |*
1674
- e_lbrace
1675
- => {
1676
- if @lambda_stack.last == @paren_nest
1677
- @lambda_stack.pop
1678
- emit(:tLAMBEG, '{'.freeze)
1679
- else
1680
- emit(:tLBRACE_ARG, '{'.freeze)
1681
- end
1682
- @command_start = true
1683
- fnext expr_value; fbreak;
1684
- };
1685
-
1686
- 'do'
1687
- => { emit_do(true)
1688
- fnext expr_value; fbreak; };
1689
-
1690
- w_space_comment;
1691
-
1692
- c_any
1693
- => { fhold; fgoto expr_end; };
1694
-
1695
- c_eof => do_eof;
1696
- *|;
1697
-
1698
- # The rationale for this state is that several keywords accept value
1699
- # (i.e. should transition to `expr_beg`), do not accept it like a command
1700
- # (i.e. not an `expr_arg`), and must behave like a statement, that is,
1701
- # accept a modifier if/while/etc.
1702
- #
1703
- expr_mid := |*
1704
- keyword_modifier
1705
- => { emit_table(KEYWORDS)
1706
- fnext expr_beg; fbreak; };
1707
-
1708
- bareword
1709
- => { p = @ts - 1; fgoto expr_beg; };
1710
-
1711
- w_space_comment;
1712
-
1713
- w_newline
1714
- => { fhold; fgoto expr_end; };
1715
-
1716
- c_any
1717
- => { fhold; fgoto expr_beg; };
1718
-
1719
- c_eof => do_eof;
1720
- *|;
1721
-
1722
- # Beginning of an expression.
1723
- #
1724
- # Don't fallthrough to this state from `c_any`; make sure to handle
1725
- # `c_space* c_nl` and let `expr_end` handle the newline.
1726
- # Otherwise code like `f\ndef x` gets glued together and the parser
1727
- # explodes.
1728
- #
1729
- expr_beg := |*
1730
- # +5, -5, - 5
1731
- [+\-] w_any* [0-9]
1732
- => {
1733
- emit(:tUNARY_NUM, tok(@ts, @ts + 1), @ts, @ts + 1)
1734
- fhold; fnext expr_end; fbreak;
1735
- };
1736
-
1737
- # splat *a
1738
- '*'
1739
- => { emit(:tSTAR, '*'.freeze)
1740
- fbreak; };
1741
-
1742
- #
1743
- # STRING AND REGEXP LITERALS
1744
- #
1745
-
1746
- # /regexp/oui
1747
- # /=/ (disambiguation with /=)
1748
- '/' c_any
1749
- => {
1750
- type = delimiter = tok[0].chr
1751
- fhold; fgoto *push_literal(type, delimiter, @ts);
1752
- };
1753
-
1754
- # %<string>
1755
- '%' ( any - [A-Za-z] )
1756
- => {
1757
- type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
1758
- fgoto *push_literal(type, delimiter, @ts);
1759
- };
1760
-
1761
- # %w(we are the people)
1762
- '%' [A-Za-z]+ c_any
1763
- => {
1764
- type, delimiter = tok[0..-2], tok[-1].chr
1765
- fgoto *push_literal(type, delimiter, @ts);
1766
- };
1767
-
1768
- '%' c_eof
1769
- => {
1770
- diagnostic :fatal, :string_eof, nil, range(@ts, @ts + 1)
1771
- };
1772
-
1773
- # Heredoc start.
1774
- # <<END | <<'END' | <<"END" | <<`END` |
1775
- # <<-END | <<-'END' | <<-"END" | <<-`END` |
1776
- # <<~END | <<~'END' | <<~"END" | <<~`END`
1777
- '<<' [~\-]?
1778
- ( '"' ( any - '"' )* '"'
1779
- | "'" ( any - "'" )* "'"
1780
- | "`" ( any - "`" )* "`"
1781
- | bareword ) % { heredoc_e = p }
1782
- c_line* c_nl % { new_herebody_s = p }
1783
- => {
1784
- tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/m
1785
-
1786
- indent = !$1.empty? || !$2.empty?
1787
- dedent_body = !$2.empty?
1788
- type = $3.empty? ? '<<"'.freeze : ('<<'.freeze + $3)
1789
- delimiter = $4
1790
-
1791
- if @version >= 27
1792
- if delimiter.count("\n") > 0 || delimiter.count("\r") > 0
1793
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1794
- end
1795
- elsif @version >= 24
1796
- if delimiter.count("\n") > 0
1797
- if delimiter.end_with?("\n")
1798
- diagnostic :warning, :heredoc_id_ends_with_nl, nil, range(@ts, @ts + 1)
1799
- delimiter = delimiter.rstrip
1800
- else
1801
- diagnostic :fatal, :heredoc_id_has_newline, nil, range(@ts, @ts + 1)
1802
- end
1803
- end
1804
- end
1805
-
1806
- if dedent_body && version?(18, 19, 20, 21, 22)
1807
- emit(:tLSHFT, '<<'.freeze, @ts, @ts + 2)
1808
- p = @ts + 1
1809
- fnext expr_beg; fbreak;
1810
- else
1811
- fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
1812
-
1813
- @herebody_s ||= new_herebody_s
1814
- p = @herebody_s - 1
1815
- end
1816
- };
1817
-
1818
- # Escaped unterminated heredoc start
1819
- # <<'END | <<"END | <<`END |
1820
- # <<-'END | <<-"END | <<-`END |
1821
- # <<~'END | <<~"END | <<~`END
1822
- #
1823
- # If the heredoc is terminated the rule above should handle it
1824
- '<<' [~\-]?
1825
- ('"' (any - c_nl - '"')*
1826
- |"'" (any - c_nl - "'")*
1827
- |"`" (any - c_nl - "`")
1828
- )
1829
- => {
1830
- diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
1831
- };
1832
-
1833
- #
1834
- # SYMBOL LITERALS
1835
- #
1836
-
1837
- # :&&, :||
1838
- ':' ('&&' | '||') => {
1839
- fhold; fhold;
1840
- emit(:tSYMBEG, tok(@ts, @ts + 1), @ts, @ts + 1)
1841
- fgoto expr_fname;
1842
- };
1843
-
1844
- # :"bar", :'baz'
1845
- ':' ['"] # '
1846
- => {
1847
- type, delimiter = tok, tok[-1].chr
1848
- fgoto *push_literal(type, delimiter, @ts);
1849
- };
1850
-
1851
- # :!@ is :!
1852
- # :~@ is :~
1853
- ':' [!~] '@'
1854
- => {
1855
- emit(:tSYMBOL, tok(@ts + 1, @ts + 2))
1856
- fnext expr_end; fbreak;
1857
- };
1858
-
1859
- ':' bareword ambiguous_symbol_suffix
1860
- => {
1861
- emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1862
- p = tm - 1
1863
- fnext expr_end; fbreak;
1864
- };
1865
-
1866
- ':' ( bareword | global_var | class_var | instance_var |
1867
- operator_fname | operator_arithmetic | operator_rest )
1868
- => {
1869
- emit(:tSYMBOL, tok(@ts + 1), @ts)
1870
- fnext expr_end; fbreak;
1871
- };
1872
-
1873
- ':' ( '@' %{ tm = p - 1; diag_msg = :ivar_name }
1874
- | '@@' %{ tm = p - 2; diag_msg = :cvar_name }
1875
- ) [0-9]*
1876
- => {
1877
- if @version >= 27
1878
- diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
1879
- else
1880
- emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
1881
- p = @ts
1882
- end
1883
-
1884
- fnext expr_end; fbreak;
1885
- };
1886
-
1887
- #
1888
- # AMBIGUOUS TERNARY OPERATOR
1889
- #
1890
-
1891
- # Character constant, like ?a, ?\n, ?\u1000, and so on
1892
- # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
1893
- '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
1894
- | (c_any - c_space_nl - e_bs) % { @escape = nil }
1895
- )
1896
- => {
1897
- value = @escape || tok(@ts + 1)
1898
-
1899
- if version?(18)
1900
- emit(:tINTEGER, value.getbyte(0))
1901
- else
1902
- emit(:tCHARACTER, value)
1903
- end
1904
-
1905
- fnext expr_end; fbreak;
1906
- };
1907
-
1908
- '?' c_space_nl
1909
- => {
1910
- escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1911
- "\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
1912
- diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
1913
-
1914
- p = @ts - 1
1915
- fgoto expr_end;
1916
- };
1917
-
1918
- '?' c_eof
1919
- => {
1920
- diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
1921
- };
1922
-
1923
- # f ?aa : b: Disambiguate with a character literal.
1924
- '?' [A-Za-z_] bareword
1925
- => {
1926
- p = @ts - 1
1927
- fgoto expr_end;
1928
- };
1929
-
1930
- #
1931
- # KEYWORDS AND PUNCTUATION
1932
- #
1933
-
1934
- # a({b=>c})
1935
- e_lbrace
1936
- => {
1937
- if @lambda_stack.last == @paren_nest
1938
- @lambda_stack.pop
1939
- @command_start = true
1940
- emit(:tLAMBEG, '{'.freeze)
1941
- else
1942
- emit(:tLBRACE, '{'.freeze)
1943
- end
1944
- fbreak;
1945
- };
1946
-
1947
- # a([1, 2])
1948
- e_lbrack
1949
- => { emit(:tLBRACK, '['.freeze)
1950
- fbreak; };
1951
-
1952
- # a()
1953
- e_lparen
1954
- => { emit(:tLPAREN, '('.freeze)
1955
- fbreak; };
1956
-
1957
- # a(+b)
1958
- punctuation_begin
1959
- => { emit_table(PUNCTUATION_BEGIN)
1960
- fbreak; };
1961
-
1962
- # rescue Exception => e: Block rescue.
1963
- # Special because it should transition to expr_mid.
1964
- 'rescue' %{ tm = p } '=>'?
1965
- => { emit(:kRESCUE, 'rescue'.freeze, @ts, tm)
1966
- p = tm - 1
1967
- fnext expr_mid; fbreak; };
1968
-
1969
- # if a: Statement if.
1970
- keyword_modifier
1971
- => { emit_table(KEYWORDS_BEGIN)
1972
- @command_start = true
1973
- fnext expr_value; fbreak; };
1974
-
1975
- #
1976
- # RUBY 1.9 HASH LABELS
1977
- #
1978
-
1979
- label ( any - ':' )
1980
- => {
1981
- fhold;
1982
-
1983
- if version?(18)
1984
- ident = tok(@ts, @te - 2)
1985
-
1986
- emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
1987
- ident, @ts, @te - 2)
1988
- fhold; # continue as a symbol
1989
-
1990
- if !@static_env.nil? && @static_env.declared?(ident)
1991
- fnext expr_end;
1992
- else
1993
- fnext *arg_or_cmdarg(cmd_state);
1994
- end
1995
- else
1996
- emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
1997
- fnext expr_labelarg;
1998
- end
1999
-
2000
- fbreak;
2001
- };
2002
-
2003
- #
2004
- # RUBY 2.7 BEGINLESS RANGE
2005
-
2006
- '..'
2007
- => {
2008
- if @version >= 27
2009
- emit(:tBDOT2)
2010
- else
2011
- emit(:tDOT2)
2012
- end
2013
-
2014
- fnext expr_beg; fbreak;
2015
- };
2016
-
2017
- '...'
2018
- => {
2019
- if @version >= 27
2020
- emit(:tBDOT3)
2021
- else
2022
- emit(:tDOT3)
2023
- end
2024
-
2025
- fnext expr_beg; fbreak;
2026
- };
2027
-
2028
- #
2029
- # CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
2030
- #
2031
-
2032
- # foo= bar: Disambiguate with bareword rule below.
2033
- bareword ambiguous_ident_suffix |
2034
- # def foo: Disambiguate with bareword rule below.
2035
- keyword
2036
- => { p = @ts - 1
2037
- fgoto expr_end; };
2038
-
2039
- # a = 42; a [42]: Indexing.
2040
- # def a; end; a [42]: Array argument.
2041
- call_or_var
2042
- => local_ident;
2043
-
2044
- (call_or_var - keyword)
2045
- % { ident_tok = tok; ident_ts = @ts; ident_te = @te; }
2046
- w_space+ '('
2047
- => {
2048
- emit(:tIDENTIFIER, ident_tok, ident_ts, ident_te)
2049
- p = ident_te - 1
2050
-
2051
- if !@static_env.nil? && @static_env.declared?(ident_tok) && @version < 25
2052
- fnext expr_endfn;
2053
- else
2054
- fnext expr_cmdarg;
2055
- end
2056
- fbreak;
2057
- };
2058
-
2059
- #
2060
- # WHITESPACE
2061
- #
2062
-
2063
- w_any;
2064
-
2065
- e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
2066
- => {
2067
- p = @ts - 1
2068
- @cs_before_block_comment = @cs
2069
- fgoto line_begin;
2070
- };
2071
-
2072
- #
2073
- # DEFAULT TRANSITION
2074
- #
2075
-
2076
- # The following rules match most binary and all unary operators.
2077
- # Rules for binary operators provide better error reporting.
2078
- operator_arithmetic '=' |
2079
- operator_rest |
2080
- punctuation_end |
2081
- c_any
2082
- => { p = @ts - 1; fgoto expr_end; };
2083
-
2084
- c_eof => do_eof;
2085
- *|;
2086
-
2087
- # Special newline handling for "def a b:"
2088
- #
2089
- expr_labelarg := |*
2090
- w_space_comment;
2091
-
2092
- w_newline
2093
- => {
2094
- if @in_kwarg
2095
- fhold; fgoto expr_end;
2096
- else
2097
- fgoto line_begin;
2098
- end
2099
- };
2100
-
2101
- c_any
2102
- => { fhold; fgoto expr_beg; };
2103
-
2104
- c_eof => do_eof;
2105
- *|;
2106
-
2107
- # Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
2108
- #
2109
- expr_value := |*
2110
- # a:b: a(:b), a::B, A::B
2111
- label (any - ':')
2112
- => { p = @ts - 1
2113
- fgoto expr_end; };
2114
-
2115
- # "bar", 'baz'
2116
- ['"] # '
2117
- => {
2118
- fgoto *push_literal(tok, tok, @ts);
2119
- };
2120
-
2121
- w_space_comment;
2122
-
2123
- w_newline
2124
- => { fgoto line_begin; };
2125
-
2126
- c_any
2127
- => { fhold; fgoto expr_beg; };
2128
-
2129
- c_eof => do_eof;
2130
- *|;
2131
-
2132
- expr_end := |*
2133
- #
2134
- # STABBY LAMBDA
2135
- #
2136
-
2137
- '->'
2138
- => {
2139
- emit(:tLAMBDA, '->'.freeze, @ts, @ts + 2)
2140
-
2141
- @lambda_stack.push @paren_nest
2142
- fnext expr_endfn; fbreak;
2143
- };
2144
-
2145
- e_lbrace | 'do'
2146
- => {
2147
- if @lambda_stack.last == @paren_nest
2148
- @lambda_stack.pop
2149
-
2150
- if tok == '{'.freeze
2151
- emit(:tLAMBEG, '{'.freeze)
2152
- else # 'do'
2153
- emit(:kDO_LAMBDA, 'do'.freeze)
2154
- end
2155
- else
2156
- if tok == '{'.freeze
2157
- emit(:tLCURLY, '{'.freeze)
2158
- else # 'do'
2159
- emit_do
2160
- end
2161
- end
2162
- @command_start = true
2163
-
2164
- fnext expr_value; fbreak;
2165
- };
2166
-
2167
- #
2168
- # KEYWORDS
2169
- #
2170
-
2171
- keyword_with_fname
2172
- => { emit_table(KEYWORDS)
2173
- fnext expr_fname; fbreak; };
2174
-
2175
- 'class' w_any* '<<'
2176
- => { emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
2177
- emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
2178
- fnext expr_value; fbreak; };
2179
-
2180
- # a if b:c: Syntax error.
2181
- keyword_modifier
2182
- => { emit_table(KEYWORDS)
2183
- fnext expr_beg; fbreak; };
2184
-
2185
- # elsif b:c: elsif b(:c)
2186
- keyword_with_value
2187
- => { emit_table(KEYWORDS)
2188
- @command_start = true
2189
- fnext expr_value; fbreak; };
2190
-
2191
- keyword_with_mid
2192
- => { emit_table(KEYWORDS)
2193
- fnext expr_mid; fbreak; };
2194
-
2195
- keyword_with_arg
2196
- => {
2197
- emit_table(KEYWORDS)
2198
-
2199
- if version?(18) && tok == 'not'.freeze
2200
- fnext expr_beg; fbreak;
2201
- else
2202
- fnext expr_arg; fbreak;
2203
- end
2204
- };
2205
-
2206
- '__ENCODING__'
2207
- => {
2208
- if version?(18)
2209
- emit(:tIDENTIFIER)
2210
-
2211
- unless !@static_env.nil? && @static_env.declared?(tok)
2212
- fnext *arg_or_cmdarg(cmd_state);
2213
- end
2214
- else
2215
- emit(:k__ENCODING__, '__ENCODING__'.freeze)
2216
- end
2217
- fbreak;
2218
- };
2219
-
2220
- keyword_with_end
2221
- => { emit_table(KEYWORDS)
2222
- fbreak; };
2223
-
2224
- #
2225
- # NUMERIC LITERALS
2226
- #
2227
-
2228
- ( '0' [Xx] %{ @num_base = 16; @num_digits_s = p } int_hex
2229
- | '0' [Dd] %{ @num_base = 10; @num_digits_s = p } int_dec
2230
- | '0' [Oo] %{ @num_base = 8; @num_digits_s = p } int_dec
2231
- | '0' [Bb] %{ @num_base = 2; @num_digits_s = p } int_bin
2232
- | [1-9] digit* '_'? %{ @num_base = 10; @num_digits_s = @ts } int_dec
2233
- | '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
2234
- ) %{ @num_suffix_s = p } int_suffix
2235
- => {
2236
- digits = tok(@num_digits_s, @num_suffix_s)
2237
-
2238
- if digits.end_with? '_'.freeze
2239
- diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
2240
- range(@te - 1, @te)
2241
- elsif digits.empty? && @num_base == 8 && version?(18)
2242
- # 1.8 did not raise an error on 0o.
2243
- digits = '0'.freeze
2244
- elsif digits.empty?
2245
- diagnostic :error, :empty_numeric
2246
- elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
2247
- invalid_s = @num_digits_s + invalid_idx
2248
- diagnostic :error, :invalid_octal, nil,
2249
- range(invalid_s, invalid_s + 1)
2250
- end
2251
-
2252
- if version?(18, 19, 20)
2253
- emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
2254
- p = @num_suffix_s - 1
2255
- else
2256
- @num_xfrm.call(digits.to_i(@num_base))
2257
- end
2258
- fbreak;
2259
- };
2260
-
2261
- flo_frac flo_pow?
2262
- => {
2263
- diagnostic :error, :no_dot_digit_literal
2264
- };
2265
-
2266
- flo_int [eE]
2267
- => {
2268
- if version?(18, 19, 20)
2269
- diagnostic :error,
2270
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2271
- range(@te - 1, @te)
2272
- else
2273
- emit(:tINTEGER, tok(@ts, @te - 1).to_i, @ts, @te - 1)
2274
- fhold; fbreak;
2275
- end
2276
- };
2277
-
2278
- flo_int flo_frac [eE]
2279
- => {
2280
- if version?(18, 19, 20)
2281
- diagnostic :error,
2282
- :trailing_in_number, { :character => tok(@te - 1, @te) },
2283
- range(@te - 1, @te)
2284
- else
2285
- emit(:tFLOAT, tok(@ts, @te - 1).to_f, @ts, @te - 1)
2286
- fhold; fbreak;
2287
- end
2288
- };
2289
-
2290
- flo_int
2291
- ( flo_frac? flo_pow %{ @num_suffix_s = p } flo_pow_suffix
2292
- | flo_frac %{ @num_suffix_s = p } flo_suffix
2293
- )
2294
- => {
2295
- digits = tok(@ts, @num_suffix_s)
2296
-
2297
- if version?(18, 19, 20)
2298
- emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
2299
- p = @num_suffix_s - 1
2300
- else
2301
- @num_xfrm.call(digits)
2302
- end
2303
- fbreak;
2304
- };
2305
-
2306
- #
2307
- # STRING AND XSTRING LITERALS
2308
- #
2309
-
2310
- # `echo foo`, "bar", 'baz'
2311
- '`' | ['"] # '
2312
- => {
2313
- type, delimiter = tok, tok[-1].chr
2314
- fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
2315
- };
2316
-
2317
- #
2318
- # CONSTANTS AND VARIABLES
2319
- #
2320
-
2321
- constant
2322
- => { emit(:tCONSTANT)
2323
- fnext *arg_or_cmdarg(cmd_state); fbreak; };
2324
-
2325
- constant ambiguous_const_suffix
2326
- => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
2327
- p = tm - 1; fbreak; };
2328
-
2329
- global_var | class_var_v | instance_var_v
2330
- => { p = @ts - 1; fcall expr_variable; };
2331
-
2332
- #
2333
- # METHOD CALLS
2334
- #
2335
-
2336
- '.:' w_space+
2337
- => { emit(:tDOT, '.', @ts, @ts + 1)
2338
- emit(:tCOLON, ':', @ts + 1, @ts + 2)
2339
- p = p - tok.length + 2
2340
- fnext expr_dot; fbreak; };
2341
-
2342
- '.:'
2343
- => {
2344
- if @version >= 27
2345
- emit_table(PUNCTUATION)
2346
- else
2347
- emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
2348
- fhold;
2349
- end
2350
-
2351
- fnext expr_dot; fbreak;
2352
- };
2353
-
2354
- '.' | '&.' | '::'
2355
- => { emit_table(PUNCTUATION)
2356
- fnext expr_dot; fbreak; };
2357
-
2358
- call_or_var
2359
- => local_ident;
2360
-
2361
- bareword ambiguous_fid_suffix
2362
- => {
2363
- if tm == @te
2364
- # Suffix was consumed, e.g. foo!
2365
- emit(:tFID)
2366
- else
2367
- # Suffix was not consumed, e.g. foo!=
2368
- emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
2369
- p = tm - 1
2370
- end
2371
- fnext expr_arg; fbreak;
2372
- };
2373
-
2374
- #
2375
- # OPERATORS
2376
- #
2377
-
2378
- '*' | '=>'
2379
- => {
2380
- emit_table(PUNCTUATION)
2381
- fgoto expr_value;
2382
- };
2383
-
2384
- # When '|', '~', '!', '=>' are used as operators
2385
- # they do not accept any symbols (or quoted labels) after.
2386
- # Other binary operators accept it.
2387
- ( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
2388
- => {
2389
- emit_table(PUNCTUATION);
2390
- fnext expr_value; fbreak;
2391
- };
2392
-
2393
- ( e_lparen | '|' | '~' | '!' )
2394
- => { emit_table(PUNCTUATION)
2395
- fnext expr_beg; fbreak; };
2396
-
2397
- e_rbrace | e_rparen | ']'
2398
- => {
2399
- emit_table(PUNCTUATION)
2400
-
2401
- if @version < 24
2402
- @cond.lexpop
2403
- @cmdarg.lexpop
2404
- else
2405
- @cond.pop
2406
- @cmdarg.pop
2407
- end
2408
-
2409
- if tok == '}'.freeze || tok == ']'.freeze
2410
- if @version >= 25
2411
- fnext expr_end;
2412
- else
2413
- fnext expr_endarg;
2414
- end
2415
- else # )
2416
- # fnext expr_endfn; ?
2417
- end
2418
-
2419
- fbreak;
2420
- };
2421
-
2422
- operator_arithmetic '='
2423
- => { emit(:tOP_ASGN, tok(@ts, @te - 1))
2424
- fnext expr_beg; fbreak; };
2425
-
2426
- '?'
2427
- => { emit(:tEH, '?'.freeze)
2428
- fnext expr_value; fbreak; };
2429
-
2430
- e_lbrack
2431
- => { emit(:tLBRACK2, '['.freeze)
2432
- fnext expr_beg; fbreak; };
2433
-
2434
- punctuation_end
2435
- => { emit_table(PUNCTUATION)
2436
- fnext expr_beg; fbreak; };
2437
-
2438
- #
2439
- # WHITESPACE
2440
- #
2441
-
2442
- w_space_comment;
2443
-
2444
- w_newline
2445
- => { fgoto leading_dot; };
2446
-
2447
- ';'
2448
- => { emit(:tSEMI, ';'.freeze)
2449
- @command_start = true
2450
- fnext expr_value; fbreak; };
2451
-
2452
- '\\' c_line {
2453
- diagnostic :error, :bare_backslash, nil, range(@ts, @ts + 1)
2454
- fhold;
2455
- };
2456
-
2457
- c_any
2458
- => {
2459
- diagnostic :fatal, :unexpected, { :character => tok.inspect[1..-2] }
2460
- };
2461
-
2462
- c_eof => do_eof;
2463
- *|;
2464
-
2465
- leading_dot := |*
2466
- # Insane leading dots:
2467
- # a #comment
2468
- # # post-2.7 comment
2469
- # .b: a.b
2470
-
2471
- # Here we use '\n' instead of w_newline to not modify @newline_s
2472
- # and eventually properly emit tNL
2473
- (w_space_comment '\n')+
2474
- => {
2475
- if @version < 27
2476
- # Ruby before 2.7 doesn't support comments before leading dot.
2477
- # If a line after "a" starts with a comment then "a" is a self-contained statement.
2478
- # So in that case we emit a special tNL token and start reading the
2479
- # next line as a separate statement.
2480
- #
2481
- # Note: block comments before leading dot are not supported on any version of Ruby.
2482
- emit(:tNL, nil, @newline_s, @newline_s + 1)
2483
- fhold; fnext line_begin; fbreak;
2484
- end
2485
- };
2486
-
2487
- c_space* %{ tm = p } ('.' | '&.')
2488
- => { p = tm - 1; fgoto expr_end; };
2489
-
2490
- any
2491
- => { emit(:tNL, nil, @newline_s, @newline_s + 1)
2492
- fhold; fnext line_begin; fbreak; };
2493
- *|;
2494
-
2495
- #
2496
- # === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
2497
- #
2498
-
2499
- line_comment := |*
2500
- '=end' c_line* c_nl_zlen
2501
- => {
2502
- emit_comment(@eq_begin_s, @te)
2503
- fgoto *@cs_before_block_comment;
2504
- };
2505
-
2506
- c_line* c_nl;
2507
-
2508
- c_line* zlen
2509
- => {
2510
- diagnostic :fatal, :embedded_document, nil,
2511
- range(@eq_begin_s, @eq_begin_s + '=begin'.length)
2512
- };
2513
- *|;
2514
-
2515
- line_begin := |*
2516
- w_any;
2517
-
2518
- '=begin' ( c_space | c_nl_zlen )
2519
- => { @eq_begin_s = @ts
2520
- fgoto line_comment; };
2521
-
2522
- '__END__' ( c_eol - zlen )
2523
- => { p = pe - 3 };
2524
-
2525
- c_any
2526
- => { cmd_state = true; fhold; fgoto expr_value; };
2527
-
2528
- c_eof => do_eof;
2529
- *|;
2530
-
2531
- }%%
2532
- # %
2533
- end