parser 0.9.alpha1 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -3
- data/AST_FORMAT.md +1338 -0
- data/README.md +58 -3
- data/Rakefile +32 -12
- data/bin/benchmark +47 -0
- data/bin/explain-parse +14 -0
- data/bin/parse +6 -0
- data/lib/parser.rb +84 -0
- data/lib/parser/all.rb +2 -0
- data/lib/parser/ast/node.rb +11 -0
- data/lib/parser/ast/processor.rb +8 -0
- data/lib/parser/base.rb +116 -0
- data/lib/parser/builders/default.rb +654 -0
- data/lib/parser/compatibility/ruby1_8.rb +13 -0
- data/lib/parser/diagnostic.rb +44 -0
- data/lib/parser/diagnostic/engine.rb +44 -0
- data/lib/parser/lexer.rl +335 -245
- data/lib/parser/lexer/explanation.rb +37 -0
- data/lib/parser/{lexer_literal.rb → lexer/literal.rb} +22 -12
- data/lib/parser/lexer/stack_state.rb +38 -0
- data/lib/parser/ruby18.y +1957 -0
- data/lib/parser/ruby19.y +2154 -0
- data/lib/parser/source/buffer.rb +78 -0
- data/lib/parser/source/map.rb +20 -0
- data/lib/parser/source/map/operator.rb +15 -0
- data/lib/parser/source/map/variable_assignment.rb +15 -0
- data/lib/parser/source/range.rb +66 -0
- data/lib/parser/static_environment.rb +12 -6
- data/parser.gemspec +23 -13
- data/test/helper.rb +45 -0
- data/test/parse_helper.rb +204 -0
- data/test/racc_coverage_helper.rb +130 -0
- data/test/test_diagnostic.rb +47 -0
- data/test/test_diagnostic_engine.rb +58 -0
- data/test/test_lexer.rb +601 -357
- data/test/test_lexer_stack_state.rb +69 -0
- data/test/test_parse_helper.rb +74 -0
- data/test/test_parser.rb +3654 -0
- data/test/test_source_buffer.rb +80 -0
- data/test/test_source_range.rb +51 -0
- data/test/test_static_environment.rb +1 -4
- metadata +137 -12
@@ -0,0 +1,44 @@
|
|
1
|
+
module Parser
|
2
|
+
|
3
|
+
class Diagnostic
|
4
|
+
LEVELS = [:note, :warning, :error, :fatal].freeze
|
5
|
+
|
6
|
+
attr_reader :level, :message
|
7
|
+
attr_reader :location, :highlights
|
8
|
+
|
9
|
+
def initialize(level, message, location, highlights=[])
|
10
|
+
unless LEVELS.include?(level)
|
11
|
+
raise ArgumentError,
|
12
|
+
"Diagnostic#level must be one of #{LEVELS.join(', ')}; " \
|
13
|
+
"#{level.inspect} provided."
|
14
|
+
end
|
15
|
+
|
16
|
+
@level = level
|
17
|
+
@message = message.to_s.dup.freeze
|
18
|
+
@location = location
|
19
|
+
@highlights = highlights.dup.freeze
|
20
|
+
|
21
|
+
freeze
|
22
|
+
end
|
23
|
+
|
24
|
+
def render
|
25
|
+
source_line = @location.source_line
|
26
|
+
highlight_line = ' ' * source_line.length
|
27
|
+
|
28
|
+
@highlights.each do |hilight|
|
29
|
+
range = hilight.column_range
|
30
|
+
highlight_line[range] = '~' * hilight.size
|
31
|
+
end
|
32
|
+
|
33
|
+
range = @location.column_range
|
34
|
+
highlight_line[range] = '^' * @location.size
|
35
|
+
|
36
|
+
[
|
37
|
+
"#{@location.to_s}: #{@level}: #{@message}",
|
38
|
+
source_line,
|
39
|
+
highlight_line,
|
40
|
+
]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Parser
|
2
|
+
|
3
|
+
class Diagnostic::Engine
|
4
|
+
attr_accessor :consumer
|
5
|
+
|
6
|
+
attr_accessor :all_errors_are_fatal
|
7
|
+
attr_accessor :ignore_warnings
|
8
|
+
|
9
|
+
def initialize(consumer=nil)
|
10
|
+
@consumer = consumer
|
11
|
+
|
12
|
+
@all_errors_are_fatal = false
|
13
|
+
@ignore_warnings = false
|
14
|
+
end
|
15
|
+
|
16
|
+
def process(diagnostic)
|
17
|
+
if ignore?(diagnostic)
|
18
|
+
# do nothing
|
19
|
+
elsif @consumer
|
20
|
+
@consumer.call(diagnostic)
|
21
|
+
end
|
22
|
+
|
23
|
+
if raise?(diagnostic)
|
24
|
+
raise Parser::SyntaxError, diagnostic.message
|
25
|
+
end
|
26
|
+
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
|
32
|
+
def ignore?(diagnostic)
|
33
|
+
@ignore_warnings &&
|
34
|
+
diagnostic.level == :warning
|
35
|
+
end
|
36
|
+
|
37
|
+
def raise?(diagnostic)
|
38
|
+
(@all_errors_are_fatal &&
|
39
|
+
diagnostic.level == :error) ||
|
40
|
+
diagnostic.level == :fatal
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/lib/parser/lexer.rl
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
#
|
4
4
|
# === BEFORE YOU START ===
|
5
5
|
#
|
6
|
+
# Read the Ruby Hacking Guide chapter 11, available in English at
|
7
|
+
# http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
|
8
|
+
#
|
6
9
|
# Remember two things about Ragel scanners:
|
7
10
|
#
|
8
11
|
# 1) Longest match wins.
|
@@ -38,6 +41,11 @@
|
|
38
41
|
# emit($whatever)
|
39
42
|
# fnext $next_state; fbreak;
|
40
43
|
#
|
44
|
+
# If you perform `fgoto` in an action which does not emit a token nor
|
45
|
+
# rewinds the stream pointer, the parser's side-effectful,
|
46
|
+
# context-sensitive lookahead actions will break in a hard to detect
|
47
|
+
# and debug way.
|
48
|
+
#
|
41
49
|
# * If an action does not emit a token:
|
42
50
|
#
|
43
51
|
# fgoto $next_state;
|
@@ -56,6 +64,8 @@
|
|
56
64
|
# `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
|
57
65
|
# _will_ invoke the action `act`.
|
58
66
|
#
|
67
|
+
# e_something stands for "something with **e**mbedded action".
|
68
|
+
#
|
59
69
|
# * EOF is explicit and is matched by `c_eof`. If you want to introspect
|
60
70
|
# the state of the lexer, add this rule to the state:
|
61
71
|
#
|
@@ -66,49 +76,53 @@
|
|
66
76
|
# NoMethodError: undefined method `ord' for nil:NilClass
|
67
77
|
#
|
68
78
|
|
69
|
-
require 'parser/lexer_literal'
|
70
|
-
require 'parser/syntax_error'
|
71
|
-
|
72
79
|
class Parser::Lexer
|
73
80
|
|
74
81
|
%% write data nofinal;
|
75
82
|
# %
|
76
83
|
|
77
|
-
attr_reader :
|
84
|
+
attr_reader :source_buffer
|
85
|
+
|
86
|
+
attr_accessor :diagnostics
|
78
87
|
attr_accessor :static_env
|
79
88
|
|
80
|
-
|
89
|
+
attr_accessor :cond, :cmdarg
|
90
|
+
|
91
|
+
attr_reader :comments
|
81
92
|
|
82
93
|
def initialize(version)
|
83
|
-
@version
|
94
|
+
@version = version
|
95
|
+
@static_env = nil
|
84
96
|
|
85
97
|
reset
|
86
98
|
end
|
87
99
|
|
88
100
|
def reset(reset_state=true)
|
101
|
+
# Ragel-related variables:
|
89
102
|
if reset_state
|
90
103
|
# Unit tests set state prior to resetting lexer.
|
91
|
-
@cs
|
104
|
+
@cs = self.class.lex_en_line_begin
|
105
|
+
|
106
|
+
@cond = StackState.new('cond')
|
107
|
+
@cmdarg = StackState.new('cmdarg')
|
92
108
|
end
|
93
109
|
|
94
|
-
#
|
95
|
-
@
|
96
|
-
@
|
97
|
-
@
|
98
|
-
@act = 0 # next action
|
110
|
+
@p = 0 # stream position (saved manually in #advance)
|
111
|
+
@ts = nil # token start
|
112
|
+
@te = nil # token end
|
113
|
+
@act = 0 # next action
|
99
114
|
|
100
|
-
@stack
|
101
|
-
@top
|
115
|
+
@stack = [] # state stack
|
116
|
+
@top = 0 # state stack top pointer
|
102
117
|
|
118
|
+
# Lexer state:
|
103
119
|
@token_queue = []
|
104
120
|
@literal_stack = []
|
105
121
|
|
106
|
-
@newlines = [0] # sorted set of \n positions
|
107
|
-
@newline_s = nil # location of last encountered newline
|
108
|
-
@location = nil # location of last #advance'd token
|
109
|
-
|
110
122
|
@comments = "" # collected comments
|
111
123
|
|
124
|
+
@newline_s = nil # location of last encountered newline
|
125
|
+
|
112
126
|
@num_base = nil # last numeric base
|
113
127
|
@num_digits_s = nil # starting position of numeric digits
|
114
128
|
|
@@ -125,15 +139,21 @@ class Parser::Lexer
|
|
125
139
|
@lambda_stack = []
|
126
140
|
end
|
127
141
|
|
128
|
-
def
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
142
|
+
def source_buffer=(source_buffer)
|
143
|
+
@source_buffer = source_buffer
|
144
|
+
|
145
|
+
if @source_buffer
|
146
|
+
# Heredoc processing coupled with weird newline quirks
|
147
|
+
# require three '\0' (EOF) chars to be appended; after
|
148
|
+
# `p = @heredoc_s`, if `p` points at EOF, the FSM could
|
149
|
+
# not bail out early enough and will crash.
|
150
|
+
#
|
151
|
+
# Patches accepted.
|
152
|
+
#
|
153
|
+
@source = @source_buffer.source.gsub(/\r\n/, "\n") + "\0\0\0"
|
154
|
+
else
|
155
|
+
@source = nil
|
156
|
+
end
|
137
157
|
end
|
138
158
|
|
139
159
|
LEX_STATES = {
|
@@ -159,7 +179,7 @@ class Parser::Lexer
|
|
159
179
|
# Return next token: [type, value].
|
160
180
|
def advance
|
161
181
|
if @token_queue.any?
|
162
|
-
return
|
182
|
+
return @token_queue.shift
|
163
183
|
end
|
164
184
|
|
165
185
|
# Ugly, but dependent on Ragel output. Consider refactoring it somehow.
|
@@ -183,24 +203,14 @@ class Parser::Lexer
|
|
183
203
|
@p = p
|
184
204
|
|
185
205
|
if @token_queue.any?
|
186
|
-
|
206
|
+
@token_queue.shift
|
187
207
|
elsif @cs == self.class.lex_error
|
188
|
-
|
208
|
+
[ false, [ '$error', range(p - 1, p) ] ]
|
189
209
|
else
|
190
|
-
|
210
|
+
[ false, [ '$eof', range(p - 1, p) ] ]
|
191
211
|
end
|
192
212
|
end
|
193
213
|
|
194
|
-
# Like #advance, but also pretty-print the token and its position
|
195
|
-
# in the stream to `stdout`.
|
196
|
-
def advance_and_decorate
|
197
|
-
type, val = advance
|
198
|
-
|
199
|
-
puts decorate(location, "\e[0;32m#{type} #{val.inspect}\e[0m")
|
200
|
-
|
201
|
-
[type, val]
|
202
|
-
end
|
203
|
-
|
204
214
|
# Return the current collected comment block and clear the storage.
|
205
215
|
def clear_comments
|
206
216
|
comments = @comments
|
@@ -209,103 +219,42 @@ class Parser::Lexer
|
|
209
219
|
comments
|
210
220
|
end
|
211
221
|
|
212
|
-
# Lex `str` for the Ruby version `version` with initial state `state`.
|
213
|
-
#
|
214
|
-
# The tokens displayed by this function are not the same as tokens
|
215
|
-
# consumed by parser, because the parser manipulates lexer state on
|
216
|
-
# its own.
|
217
|
-
def self.do(source, state=nil, version=19)
|
218
|
-
lex = new(version)
|
219
|
-
lex.source = source
|
220
|
-
lex.state = state if state
|
221
|
-
|
222
|
-
loop do
|
223
|
-
type, val = lex.advance_and_decorate
|
224
|
-
break if !type
|
225
|
-
end
|
226
|
-
|
227
|
-
puts "Lex state: #{lex.state}"
|
228
|
-
end
|
229
|
-
|
230
|
-
# Used by LexerLiteral to emit tokens for string content.
|
231
|
-
def emit(type, value = tok, s = @ts, e = @te)
|
232
|
-
if s.nil? || e.nil?
|
233
|
-
raise "broken #emit invocation in #{caller[0]}"
|
234
|
-
end
|
235
|
-
|
236
|
-
@token_queue << [ type, value, s, e ]
|
237
|
-
end
|
238
|
-
|
239
|
-
def emit_table(table, s = @ts, e = @te)
|
240
|
-
token = tok(s, e)
|
241
|
-
emit(table[token], token, s, e)
|
242
|
-
end
|
243
|
-
|
244
|
-
# shim
|
245
|
-
def lineno
|
246
|
-
@location[0] + 1
|
247
|
-
end
|
248
|
-
|
249
222
|
protected
|
250
223
|
|
251
224
|
def eof_char?(char)
|
252
225
|
[0x04, 0x1a, 0x00].include? char.ord
|
253
226
|
end
|
254
227
|
|
255
|
-
def
|
256
|
-
@version
|
228
|
+
def version?(*versions)
|
229
|
+
versions.include?(@version)
|
257
230
|
end
|
258
231
|
|
259
|
-
def
|
260
|
-
@
|
232
|
+
def stack_pop
|
233
|
+
@top -= 1
|
234
|
+
@stack[@top]
|
261
235
|
end
|
262
236
|
|
263
237
|
def tok(s = @ts, e = @te)
|
264
238
|
@source[s...e]
|
265
239
|
end
|
266
240
|
|
267
|
-
def
|
268
|
-
@
|
269
|
-
end
|
270
|
-
|
271
|
-
def dissect_location(start, finish)
|
272
|
-
line_number = @newlines.rindex { |nl| start >= nl }
|
273
|
-
line_first_col = @newlines[line_number]
|
274
|
-
|
275
|
-
start_col = start - line_first_col
|
276
|
-
finish_col = finish - line_first_col
|
277
|
-
|
278
|
-
[ line_number, start_col, finish_col ]
|
241
|
+
def range(s = @ts, e = @te)
|
242
|
+
Parser::Source::Range.new(@source_buffer, s, e - 1)
|
279
243
|
end
|
280
244
|
|
281
|
-
def
|
282
|
-
type, value,
|
283
|
-
|
284
|
-
@location = dissect_location(start, finish)
|
285
|
-
|
286
|
-
[ type, value ]
|
245
|
+
def emit(type, value = tok, s = @ts, e = @te)
|
246
|
+
@token_queue << [ type, [ value, range(s, e) ] ]
|
287
247
|
end
|
288
248
|
|
289
|
-
def
|
290
|
-
|
291
|
-
|
292
|
-
line = @source.lines.drop(line_number).first
|
293
|
-
line[from...to] = "\e[4m#{line[from...to]}\e[0m"
|
294
|
-
|
295
|
-
tail_len = to - from - 1
|
296
|
-
tail = "~" * (tail_len >= 0 ? tail_len : 0)
|
297
|
-
decoration = "#{" " * from}\e[1;31m^#{tail}\e[0m #{message}"
|
298
|
-
|
299
|
-
[ line, decoration ]
|
300
|
-
end
|
249
|
+
def emit_table(table, s = @ts, e = @te)
|
250
|
+
value = tok(s, e)
|
301
251
|
|
302
|
-
|
303
|
-
$stderr.puts "warning: #{message}"
|
304
|
-
$stderr.puts decorate(dissect_location(start, finish))
|
252
|
+
emit(table[value], value, s, e)
|
305
253
|
end
|
306
254
|
|
307
|
-
def
|
308
|
-
|
255
|
+
def diagnostic(type, message, location=range, highlights=[])
|
256
|
+
@diagnostics.process(
|
257
|
+
Parser::Diagnostic.new(type, message, location, highlights))
|
309
258
|
end
|
310
259
|
|
311
260
|
#
|
@@ -313,10 +262,10 @@ class Parser::Lexer
|
|
313
262
|
#
|
314
263
|
|
315
264
|
def push_literal(*args)
|
316
|
-
new_literal =
|
265
|
+
new_literal = Literal.new(self, *args)
|
317
266
|
@literal_stack.push(new_literal)
|
318
267
|
|
319
|
-
if
|
268
|
+
if new_literal.type == :tWORDS_BEG
|
320
269
|
self.class.lex_en_interp_words
|
321
270
|
elsif new_literal.type == :tQWORDS_BEG
|
322
271
|
self.class.lex_en_plain_words
|
@@ -328,7 +277,7 @@ class Parser::Lexer
|
|
328
277
|
end
|
329
278
|
|
330
279
|
def literal
|
331
|
-
@literal_stack
|
280
|
+
@literal_stack.last
|
332
281
|
end
|
333
282
|
|
334
283
|
def pop_literal
|
@@ -361,7 +310,6 @@ class Parser::Lexer
|
|
361
310
|
'=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
|
362
311
|
'<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
|
363
312
|
'{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
|
364
|
-
'do' => :kDO
|
365
313
|
}
|
366
314
|
|
367
315
|
PUNCTUATION_BEGIN = {
|
@@ -407,7 +355,6 @@ class Parser::Lexer
|
|
407
355
|
#
|
408
356
|
# This action is embedded directly into c_nl, as it is idempotent and
|
409
357
|
# there are no cases when we need to skip it.
|
410
|
-
record_newline(p + 1)
|
411
358
|
@newline_s = p
|
412
359
|
}
|
413
360
|
|
@@ -514,8 +461,8 @@ class Parser::Lexer
|
|
514
461
|
|
515
462
|
# Ruby accepts (and fails on) variables with leading digit
|
516
463
|
# in literal context, but not in unquoted symbol body.
|
517
|
-
class_var_v = '@@'
|
518
|
-
instance_var_v = '@'
|
464
|
+
class_var_v = '@@' c_alnum+;
|
465
|
+
instance_var_v = '@' c_alnum+;
|
519
466
|
|
520
467
|
#
|
521
468
|
# === ESCAPE SEQUENCE PARSING ===
|
@@ -538,7 +485,12 @@ class Parser::Lexer
|
|
538
485
|
codepoint = codepoint_str.to_i(16)
|
539
486
|
|
540
487
|
if codepoint >= 0x110000
|
541
|
-
@escape = lambda
|
488
|
+
@escape = lambda do
|
489
|
+
# TODO better location reporting
|
490
|
+
diagnostic :error, Parser::ERRORS[:unicode_point_too_large],
|
491
|
+
range(@escape_s, p)
|
492
|
+
end
|
493
|
+
|
542
494
|
break
|
543
495
|
end
|
544
496
|
|
@@ -551,30 +503,32 @@ class Parser::Lexer
|
|
551
503
|
'a' => "\a", 'b' => "\b", 'e' => "\e", 'f' => "\f",
|
552
504
|
'n' => "\n", 'r' => "\r", 's' => "\s", 't' => "\t",
|
553
505
|
'v' => "\v", '\\' => "\\"
|
554
|
-
}.fetch(@source[p - 1], @source[p - 1])
|
506
|
+
}.fetch(@source[p - 1].chr, @source[p - 1].chr)
|
555
507
|
}
|
556
508
|
|
557
509
|
action invalid_complex_escape {
|
558
|
-
@escape = lambda
|
510
|
+
@escape = lambda do
|
511
|
+
diagnostic :error, Parser::ERRORS[:invalid_escape]
|
512
|
+
end
|
559
513
|
}
|
560
514
|
|
561
515
|
action slash_c_char {
|
562
|
-
@escape = (@escape.ord & 0x9f).chr
|
516
|
+
@escape = (@escape[0].ord & 0x9f).chr
|
563
517
|
}
|
564
518
|
|
565
519
|
action slash_m_char {
|
566
|
-
@escape = (@escape.ord | 0x80).chr
|
520
|
+
@escape = (@escape[0].ord | 0x80).chr
|
567
521
|
}
|
568
522
|
|
569
523
|
maybe_escaped_char = (
|
570
524
|
'\\' c_any %unescape_char
|
571
|
-
| ( c_any - [\\] ) % { @escape = @source[p - 1] }
|
525
|
+
| ( c_any - [\\] ) % { @escape = @source[p - 1].chr }
|
572
526
|
);
|
573
527
|
|
574
528
|
maybe_escaped_ctrl_char = ( # why?!
|
575
529
|
'\\' c_any %unescape_char %slash_c_char
|
576
530
|
| '?' % { @escape = "\x7f" }
|
577
|
-
| ( c_any - [\\?] ) % { @escape = @source[p - 1] } %slash_c_char
|
531
|
+
| ( c_any - [\\?] ) % { @escape = @source[p - 1].chr } %slash_c_char
|
578
532
|
);
|
579
533
|
|
580
534
|
escape = (
|
@@ -592,7 +546,12 @@ class Parser::Lexer
|
|
592
546
|
|
593
547
|
# %q[\x]
|
594
548
|
| 'x' ( c_any - xdigit )
|
595
|
-
% {
|
549
|
+
% {
|
550
|
+
@escape = lambda do
|
551
|
+
diagnostic :error, Parser::ERRORS[:invalid_hex_escape],
|
552
|
+
range(@escape_s - 1, p + 2)
|
553
|
+
end
|
554
|
+
}
|
596
555
|
|
597
556
|
# %q[\u123] %q[\u{12]
|
598
557
|
| 'u' ( c_any{0,4} -
|
@@ -602,7 +561,12 @@ class Parser::Lexer
|
|
602
561
|
| '{' xdigit{2} [ \t}] # \u{12. \u{12} are valid
|
603
562
|
)
|
604
563
|
)
|
605
|
-
% {
|
564
|
+
% {
|
565
|
+
@escape = lambda do
|
566
|
+
diagnostic :error, Parser::ERRORS[:invalid_unicode_escape],
|
567
|
+
range(@escape_s - 1, p)
|
568
|
+
end
|
569
|
+
}
|
606
570
|
|
607
571
|
# \u{123 456}
|
608
572
|
| 'u{' ( xdigit{1,6} [ \t] )*
|
@@ -611,7 +575,12 @@ class Parser::Lexer
|
|
611
575
|
| ( xdigit* ( c_any - xdigit - '}' )+ '}'
|
612
576
|
| ( c_any - '}' )* c_eof
|
613
577
|
| xdigit{7,}
|
614
|
-
) % {
|
578
|
+
) % {
|
579
|
+
@escape = lambda do
|
580
|
+
diagnostic :fatal, Parser::ERRORS[:unterminated_unicode],
|
581
|
+
range(p - 1, p)
|
582
|
+
end
|
583
|
+
}
|
615
584
|
)
|
616
585
|
|
617
586
|
# \C-\a \cx
|
@@ -635,7 +604,10 @@ class Parser::Lexer
|
|
635
604
|
|
636
605
|
| ( c_any - [0-7xuCMc] ) %unescape_char
|
637
606
|
|
638
|
-
| c_eof % {
|
607
|
+
| c_eof % {
|
608
|
+
diagnostic :fatal, Parser::ERRORS[:escape_eof],
|
609
|
+
range(p - 1, p)
|
610
|
+
}
|
639
611
|
);
|
640
612
|
|
641
613
|
# Use rules in form of `e_bs escape' when you need to parse a sequence.
|
@@ -666,7 +638,7 @@ class Parser::Lexer
|
|
666
638
|
# of positions in the input stream, namely @heredoc_e
|
667
639
|
# (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
|
668
640
|
#
|
669
|
-
# @heredoc_e is simply contained inside the corresponding
|
641
|
+
# @heredoc_e is simply contained inside the corresponding Literal, and
|
670
642
|
# when the heredoc is closed, the lexing is restarted from that position.
|
671
643
|
#
|
672
644
|
# @herebody_s is quite more complex. First, @herebody_s changes after each
|
@@ -683,14 +655,14 @@ class Parser::Lexer
|
|
683
655
|
# After every heredoc was parsed, @herebody_s contains the
|
684
656
|
# position of next token after all heredocs.
|
685
657
|
if @herebody_s
|
686
|
-
p = @herebody_s
|
658
|
+
p = @herebody_s - 1
|
687
659
|
@herebody_s = nil
|
688
660
|
end
|
689
661
|
};
|
690
662
|
|
691
663
|
action extend_string {
|
692
664
|
if literal.nest_and_try_closing tok, @ts, @te
|
693
|
-
|
665
|
+
fnext *pop_literal; fbreak;
|
694
666
|
else
|
695
667
|
literal.extend_string tok, @ts, @te
|
696
668
|
end
|
@@ -701,10 +673,10 @@ class Parser::Lexer
|
|
701
673
|
# If the literal is actually closed by the backslash,
|
702
674
|
# rewind the input prior to consuming the escape sequence.
|
703
675
|
p = @escape_s - 1
|
704
|
-
|
676
|
+
fnext *pop_literal; fbreak;
|
705
677
|
else
|
706
678
|
# Get the first character after the backslash.
|
707
|
-
escaped_char = @source[@escape_s]
|
679
|
+
escaped_char = @source[@escape_s].chr
|
708
680
|
|
709
681
|
if literal.munge_escape? escaped_char
|
710
682
|
# If this particular literal uses this character as an opening
|
@@ -765,11 +737,12 @@ class Parser::Lexer
|
|
765
737
|
end
|
766
738
|
|
767
739
|
if is_eof
|
768
|
-
|
740
|
+
diagnostic :fatal, Parser::ERRORS[:string_eof],
|
741
|
+
range(literal.str_s, literal.str_s + 1)
|
769
742
|
end
|
770
743
|
|
771
744
|
# A literal newline is appended if the heredoc was _not_ closed
|
772
|
-
# this time. See also
|
745
|
+
# this time. See also Literal#nest_and_try_closing for rationale of
|
773
746
|
# calling #flush_string here.
|
774
747
|
literal.extend_string tok, @ts, @te
|
775
748
|
literal.flush_string
|
@@ -782,8 +755,7 @@ class Parser::Lexer
|
|
782
755
|
# Interpolations with immediate variable names simply call into
|
783
756
|
# the corresponding machine.
|
784
757
|
|
785
|
-
interp_var =
|
786
|
-
'#' ( global_var | class_var_v | instance_var_v );
|
758
|
+
interp_var = '#' ( global_var | class_var_v | instance_var_v );
|
787
759
|
|
788
760
|
action extend_interp_var {
|
789
761
|
literal.flush_string
|
@@ -808,6 +780,8 @@ class Parser::Lexer
|
|
808
780
|
interp_code = '#{';
|
809
781
|
|
810
782
|
e_lbrace = '{' % {
|
783
|
+
@cond.push(false); @cmdarg.push(false)
|
784
|
+
|
811
785
|
if literal
|
812
786
|
literal.start_interp_brace
|
813
787
|
end
|
@@ -827,7 +801,7 @@ class Parser::Lexer
|
|
827
801
|
end
|
828
802
|
|
829
803
|
fhold;
|
830
|
-
fnext
|
804
|
+
fnext *stack_pop;
|
831
805
|
fbreak;
|
832
806
|
end
|
833
807
|
end
|
@@ -872,6 +846,7 @@ class Parser::Lexer
|
|
872
846
|
*|;
|
873
847
|
|
874
848
|
plain_string := |*
|
849
|
+
'\\' c_nl => extend_string_eol;
|
875
850
|
e_bs c_any => extend_string_escaped;
|
876
851
|
c_eol => extend_string_eol;
|
877
852
|
c_any => extend_string;
|
@@ -882,11 +857,12 @@ class Parser::Lexer
|
|
882
857
|
=> {
|
883
858
|
unknown_options = tok.scan(/[^imxouesn]/)
|
884
859
|
if unknown_options.any?
|
885
|
-
|
860
|
+
message = Parser::ERRORS[:regexp_options] % { :options => unknown_options.join }
|
861
|
+
diagnostic :error, message
|
886
862
|
end
|
887
863
|
|
888
864
|
emit(:tREGEXP_OPT)
|
889
|
-
|
865
|
+
fnext expr_end; fbreak;
|
890
866
|
};
|
891
867
|
|
892
868
|
any
|
@@ -904,11 +880,17 @@ class Parser::Lexer
|
|
904
880
|
# The default longest-match scanning does not work here due
|
905
881
|
# to sheer ambiguity.
|
906
882
|
|
883
|
+
ambiguous_fid_suffix = # actual parsed
|
884
|
+
[?!] %{ tm = p } | # a? a?
|
885
|
+
'!=' %{ tm = p - 2 } # a!=b a != b
|
886
|
+
;
|
887
|
+
|
907
888
|
ambiguous_ident_suffix = # actual parsed
|
908
|
-
|
909
|
-
'
|
910
|
-
'
|
911
|
-
'
|
889
|
+
ambiguous_fid_suffix |
|
890
|
+
'=' %{ tm = p } | # a= a=
|
891
|
+
'==' %{ tm = p - 2 } | # a==b a == b
|
892
|
+
'=~' %{ tm = p - 2 } | # a=~b a =~ b
|
893
|
+
'=>' %{ tm = p - 2 } | # a=>b a => b
|
912
894
|
'===' %{ tm = p - 3 } # a===b a === b
|
913
895
|
;
|
914
896
|
|
@@ -922,15 +904,24 @@ class Parser::Lexer
|
|
922
904
|
'::' %{ tm = p - 2 } # A::B A :: B
|
923
905
|
;
|
924
906
|
|
907
|
+
# Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embegging
|
908
|
+
# @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
|
909
|
+
|
910
|
+
e_lbrack = '[' % {
|
911
|
+
@cond.push(false); @cmdarg.push(false)
|
912
|
+
};
|
913
|
+
|
925
914
|
# Ruby 1.9 lambdas require parentheses counting in order to
|
926
915
|
# emit correct opening kDO/tLBRACE.
|
927
916
|
|
928
917
|
e_lparen = '(' % {
|
929
|
-
|
918
|
+
@cond.push(false); @cmdarg.push(false)
|
919
|
+
|
920
|
+
@paren_nest += 1
|
930
921
|
};
|
931
922
|
|
932
923
|
e_rparen = ')' % {
|
933
|
-
|
924
|
+
@paren_nest -= 1
|
934
925
|
};
|
935
926
|
|
936
927
|
# Variable lexing code is accessed from both expressions and
|
@@ -940,30 +931,36 @@ class Parser::Lexer
|
|
940
931
|
global_var
|
941
932
|
=> {
|
942
933
|
if tok =~ /^\$([1-9][0-9]*)$/
|
943
|
-
emit(:tNTH_REF,
|
934
|
+
emit(:tNTH_REF, tok(@ts + 1).to_i)
|
944
935
|
elsif tok =~ /^\$([&`'+])$/
|
945
|
-
emit(:tBACK_REF
|
936
|
+
emit(:tBACK_REF)
|
946
937
|
else
|
947
938
|
emit(:tGVAR)
|
948
939
|
end
|
949
940
|
|
950
|
-
fnext
|
941
|
+
fnext *stack_pop; fbreak;
|
951
942
|
};
|
952
943
|
|
953
944
|
class_var_v
|
954
945
|
=> {
|
955
|
-
|
946
|
+
if tok =~ /^@@[0-9]/
|
947
|
+
message = Parser::ERRORS[:cvar_name] % { :name => tok }
|
948
|
+
diagnostic :error, message
|
949
|
+
end
|
956
950
|
|
957
951
|
emit(:tCVAR)
|
958
|
-
fnext
|
952
|
+
fnext *stack_pop; fbreak;
|
959
953
|
};
|
960
954
|
|
961
955
|
instance_var_v
|
962
956
|
=> {
|
963
|
-
|
957
|
+
if tok =~ /^@[0-9]/
|
958
|
+
message = Parser::ERRORS[:ivar_name] % { :name => tok }
|
959
|
+
diagnostic :error, message
|
960
|
+
end
|
964
961
|
|
965
962
|
emit(:tIVAR)
|
966
|
-
fnext
|
963
|
+
fnext *stack_pop; fbreak;
|
967
964
|
};
|
968
965
|
*|;
|
969
966
|
|
@@ -996,11 +993,11 @@ class Parser::Lexer
|
|
996
993
|
fnext expr_end; fbreak; };
|
997
994
|
|
998
995
|
':'
|
999
|
-
=> { fhold; fgoto
|
996
|
+
=> { fhold; fgoto expr_beg; };
|
1000
997
|
|
1001
998
|
global_var
|
1002
|
-
=> {
|
1003
|
-
|
999
|
+
=> { p = @ts - 1
|
1000
|
+
fcall expr_variable; };
|
1004
1001
|
|
1005
1002
|
c_space_nl+;
|
1006
1003
|
|
@@ -1015,12 +1012,16 @@ class Parser::Lexer
|
|
1015
1012
|
# Transitions to `expr_arg` afterwards.
|
1016
1013
|
#
|
1017
1014
|
expr_dot := |*
|
1018
|
-
|
1015
|
+
constant
|
1016
|
+
=> { emit(:tCONSTANT)
|
1017
|
+
fnext expr_arg; fbreak; };
|
1018
|
+
|
1019
|
+
call_or_var
|
1019
1020
|
=> { emit(:tIDENTIFIER)
|
1020
1021
|
fnext expr_arg; fbreak; };
|
1021
1022
|
|
1022
|
-
|
1023
|
-
=> { emit(:
|
1023
|
+
call_or_var ambiguous_ident_suffix
|
1024
|
+
=> { emit(:tFID, tok(@ts, tm), @ts, tm)
|
1024
1025
|
fnext expr_arg; p = tm - 1; fbreak; };
|
1025
1026
|
|
1026
1027
|
operator_fname |
|
@@ -1031,6 +1032,8 @@ class Parser::Lexer
|
|
1031
1032
|
|
1032
1033
|
c_space_nl+;
|
1033
1034
|
|
1035
|
+
'#' c_line* c_nl;
|
1036
|
+
|
1034
1037
|
c_any
|
1035
1038
|
=> { fhold; fgoto expr_end; };
|
1036
1039
|
|
@@ -1059,8 +1062,8 @@ class Parser::Lexer
|
|
1059
1062
|
|
1060
1063
|
# meth [...]
|
1061
1064
|
# Array argument. Compare with indexing `meth[...]`.
|
1062
|
-
c_space+
|
1063
|
-
=> { emit(:tLBRACK, '[', @te - 1, @te)
|
1065
|
+
c_space+ e_lbrack
|
1066
|
+
=> { emit(:tLBRACK, '[', @te - 1, @te)
|
1064
1067
|
fnext expr_beg; fbreak; };
|
1065
1068
|
|
1066
1069
|
# cmd {}
|
@@ -1076,12 +1079,6 @@ class Parser::Lexer
|
|
1076
1079
|
end
|
1077
1080
|
};
|
1078
1081
|
|
1079
|
-
# a.b
|
1080
|
-
# Dot-call.
|
1081
|
-
'.' | '::'
|
1082
|
-
=> { emit_table(PUNCTUATION);
|
1083
|
-
fnext expr_dot; fbreak; };
|
1084
|
-
|
1085
1082
|
#
|
1086
1083
|
# AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
|
1087
1084
|
#
|
@@ -1091,11 +1088,22 @@ class Parser::Lexer
|
|
1091
1088
|
c_space+ '?'
|
1092
1089
|
=> { fhold; fgoto expr_beg; };
|
1093
1090
|
|
1091
|
+
# a %{1}, a %[1] (but not "a %=1=" or "a % foo")
|
1092
|
+
c_space+ ( '%' [^= ]
|
1093
|
+
# a /foo/ (but not "a / foo" or "a /=foo")
|
1094
|
+
| '/' ( c_any - c_space_nl - '=' )
|
1095
|
+
# a <<HEREDOC
|
1096
|
+
| '<<'
|
1097
|
+
)
|
1098
|
+
=> { fhold; fhold; fgoto expr_beg; };
|
1099
|
+
|
1094
1100
|
# x +1
|
1095
1101
|
# Ambiguous unary operator or regexp literal.
|
1096
1102
|
c_space+ [+\-/]
|
1097
1103
|
=> {
|
1098
|
-
warning
|
1104
|
+
diagnostic :warning, Parser::ERRORS[:ambiguous_literal],
|
1105
|
+
range(@te - 1, @te)
|
1106
|
+
|
1099
1107
|
fhold; fhold; fgoto expr_beg;
|
1100
1108
|
};
|
1101
1109
|
|
@@ -1103,11 +1111,23 @@ class Parser::Lexer
|
|
1103
1111
|
# Ambiguous splat or block-pass.
|
1104
1112
|
c_space+ [*&]
|
1105
1113
|
=> {
|
1106
|
-
|
1107
|
-
warning
|
1114
|
+
message = Parser::ERRORS[:ambiguous_prefix] % { :prefix => tok(@te - 1, @te) }
|
1115
|
+
diagnostic :warning, message,
|
1116
|
+
range(@te - 1, @te)
|
1117
|
+
|
1108
1118
|
fhold; fgoto expr_beg;
|
1109
1119
|
};
|
1110
1120
|
|
1121
|
+
# x ::Foo
|
1122
|
+
# Ambiguous toplevel constant access.
|
1123
|
+
c_space+ '::'
|
1124
|
+
=> { fhold; fhold; fgoto expr_beg; };
|
1125
|
+
|
1126
|
+
# x:b
|
1127
|
+
# Symbol.
|
1128
|
+
c_space* ':'
|
1129
|
+
=> { fhold; fgoto expr_beg; };
|
1130
|
+
|
1111
1131
|
#
|
1112
1132
|
# AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
|
1113
1133
|
#
|
@@ -1129,7 +1149,7 @@ class Parser::Lexer
|
|
1129
1149
|
fgoto expr_end;
|
1130
1150
|
};
|
1131
1151
|
|
1132
|
-
c_space* c_nl
|
1152
|
+
c_space* ( '#' c_line* )? c_nl
|
1133
1153
|
=> { fhold; fgoto expr_end; };
|
1134
1154
|
|
1135
1155
|
c_any
|
@@ -1152,8 +1172,7 @@ class Parser::Lexer
|
|
1152
1172
|
# `{` as `tLBRACE_ARG`.
|
1153
1173
|
#
|
1154
1174
|
# The default post-`expr_endarg` state is `expr_end`, so this state also handles
|
1155
|
-
# `do` (as `kDO_BLOCK` in `expr_beg`).
|
1156
|
-
# just handle `kDO`.)
|
1175
|
+
# `do` (as `kDO_BLOCK` in `expr_beg`).
|
1157
1176
|
expr_endarg := |*
|
1158
1177
|
e_lbrace
|
1159
1178
|
=> { emit(:tLBRACE_ARG)
|
@@ -1183,6 +1202,8 @@ class Parser::Lexer
|
|
1183
1202
|
|
1184
1203
|
c_space+;
|
1185
1204
|
|
1205
|
+
'#' c_line*;
|
1206
|
+
|
1186
1207
|
c_nl
|
1187
1208
|
=> { fhold; fgoto expr_end; };
|
1188
1209
|
|
@@ -1221,39 +1242,32 @@ class Parser::Lexer
|
|
1221
1242
|
# STRING AND REGEXP LITERALS
|
1222
1243
|
#
|
1223
1244
|
|
1224
|
-
# a / 42
|
1225
|
-
# a % 42
|
1226
|
-
# a %= 42 (disambiguation with %=string=)
|
1227
|
-
[/%] c_space_nl | '%=' # /
|
1228
|
-
=> {
|
1229
|
-
fhold; fhold;
|
1230
|
-
fgoto expr_end;
|
1231
|
-
};
|
1232
|
-
|
1233
1245
|
# /regexp/oui
|
1234
|
-
|
1246
|
+
# /=/ (disambiguation with /=)
|
1247
|
+
'/' c_any
|
1235
1248
|
=> {
|
1236
|
-
type
|
1237
|
-
fgoto *push_literal(type, delimiter, @ts);
|
1249
|
+
type = delimiter = tok[0].chr
|
1250
|
+
fhold; fgoto *push_literal(type, delimiter, @ts);
|
1238
1251
|
};
|
1239
1252
|
|
1240
1253
|
# %<string>
|
1241
1254
|
'%' ( c_any - [A-Za-z] )
|
1242
1255
|
=> {
|
1243
|
-
type, delimiter = tok[0], tok[-1]
|
1256
|
+
type, delimiter = tok[0].chr, tok[-1].chr
|
1244
1257
|
fgoto *push_literal(type, delimiter, @ts);
|
1245
1258
|
};
|
1246
1259
|
|
1247
1260
|
# %w(we are the people)
|
1248
1261
|
'%' [A-Za-z]+ c_any
|
1249
1262
|
=> {
|
1250
|
-
type, delimiter = tok[0..-2], tok[-1]
|
1263
|
+
type, delimiter = tok[0..-2], tok[-1].chr
|
1251
1264
|
fgoto *push_literal(type, delimiter, @ts);
|
1252
1265
|
};
|
1253
1266
|
|
1254
1267
|
'%' c_eof
|
1255
1268
|
=> {
|
1256
|
-
|
1269
|
+
diagnostic :fatal, Parser::ERRORS[:string_eof],
|
1270
|
+
range(@ts, @ts + 1)
|
1257
1271
|
};
|
1258
1272
|
|
1259
1273
|
# Heredoc start.
|
@@ -1280,6 +1294,31 @@ class Parser::Lexer
|
|
1280
1294
|
p = @herebody_s - 1
|
1281
1295
|
};
|
1282
1296
|
|
1297
|
+
#
|
1298
|
+
# SYMBOL LITERALS
|
1299
|
+
#
|
1300
|
+
|
1301
|
+
# :"bar", :'baz'
|
1302
|
+
':' ['"] # '
|
1303
|
+
=> {
|
1304
|
+
type, delimiter = tok, tok[-1].chr
|
1305
|
+
fgoto *push_literal(type, delimiter, @ts);
|
1306
|
+
};
|
1307
|
+
|
1308
|
+
':' bareword ambiguous_symbol_suffix
|
1309
|
+
=> {
|
1310
|
+
emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
|
1311
|
+
p = tm - 1
|
1312
|
+
fnext expr_end; fbreak;
|
1313
|
+
};
|
1314
|
+
|
1315
|
+
':' ( bareword | global_var | class_var | instance_var |
|
1316
|
+
operator_fname | operator_arithmetic | operator_rest )
|
1317
|
+
=> {
|
1318
|
+
emit(:tSYMBOL, tok(@ts + 1), @ts)
|
1319
|
+
fnext expr_end; fbreak;
|
1320
|
+
};
|
1321
|
+
|
1283
1322
|
#
|
1284
1323
|
# AMBIGUOUS TERNARY OPERATOR
|
1285
1324
|
#
|
@@ -1293,20 +1332,22 @@ class Parser::Lexer
|
|
1293
1332
|
|
1294
1333
|
value = @escape || tok(@ts + 1)
|
1295
1334
|
|
1296
|
-
if
|
1297
|
-
emit(:tINTEGER, value.ord)
|
1335
|
+
if version?(18)
|
1336
|
+
emit(:tINTEGER, value[0].ord)
|
1298
1337
|
else
|
1299
1338
|
emit(:tSTRING, value)
|
1300
1339
|
end
|
1301
1340
|
|
1302
|
-
fbreak;
|
1341
|
+
fnext expr_end; fbreak;
|
1303
1342
|
};
|
1304
1343
|
|
1305
1344
|
'?' c_space_nl
|
1306
1345
|
=> {
|
1307
1346
|
escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
|
1308
1347
|
"\v" => '\v', "\f" => '\f' }[tok[@ts + 1]]
|
1309
|
-
|
1348
|
+
message = Parser::ERRORS[:invalid_escape_use] % { :escape => escape }
|
1349
|
+
diagnostic :warning, message,
|
1350
|
+
range(@ts, @ts + 1)
|
1310
1351
|
|
1311
1352
|
p = @ts - 1
|
1312
1353
|
fgoto expr_end;
|
@@ -1314,7 +1355,8 @@ class Parser::Lexer
|
|
1314
1355
|
|
1315
1356
|
'?' c_eof
|
1316
1357
|
=> {
|
1317
|
-
|
1358
|
+
diagnostic :fatal, Parser::ERRORS[:incomplete_escape],
|
1359
|
+
range(@ts, @ts + 1)
|
1318
1360
|
};
|
1319
1361
|
|
1320
1362
|
# f ?aa : b: Disambiguate with a character literal.
|
@@ -1328,15 +1370,20 @@ class Parser::Lexer
|
|
1328
1370
|
# KEYWORDS AND PUNCTUATION
|
1329
1371
|
#
|
1330
1372
|
|
1331
|
-
# a(
|
1332
|
-
|
1373
|
+
# a([1, 2])
|
1374
|
+
e_lbrack |
|
1333
1375
|
# a({b=>c})
|
1334
|
-
e_lbrace
|
1376
|
+
e_lbrace |
|
1335
1377
|
# a()
|
1336
1378
|
e_lparen
|
1337
1379
|
=> { emit_table(PUNCTUATION_BEGIN)
|
1338
1380
|
fbreak; };
|
1339
1381
|
|
1382
|
+
# a(+b)
|
1383
|
+
punctuation_begin
|
1384
|
+
=> { emit_table(PUNCTUATION_BEGIN)
|
1385
|
+
fbreak; };
|
1386
|
+
|
1340
1387
|
# rescue Exception => e: Block rescue.
|
1341
1388
|
# Special because it should transition to expr_mid.
|
1342
1389
|
'rescue'
|
@@ -1356,7 +1403,7 @@ class Parser::Lexer
|
|
1356
1403
|
=> {
|
1357
1404
|
fhold;
|
1358
1405
|
|
1359
|
-
if
|
1406
|
+
if version?(18)
|
1360
1407
|
emit(:tIDENTIFIER, tok(@ts, @te - 2), @ts, @te - 2)
|
1361
1408
|
fhold; # continue as a symbol
|
1362
1409
|
else
|
@@ -1383,14 +1430,32 @@ class Parser::Lexer
|
|
1383
1430
|
=> {
|
1384
1431
|
emit(:tIDENTIFIER)
|
1385
1432
|
|
1386
|
-
if @static_env && @static_env.declared?(tok
|
1387
|
-
|
1433
|
+
if @static_env && @static_env.declared?(tok)
|
1434
|
+
fnext expr_end; fbreak;
|
1388
1435
|
else
|
1389
|
-
|
1436
|
+
fnext expr_arg; fbreak;
|
1390
1437
|
end
|
1391
1438
|
};
|
1392
1439
|
|
1393
|
-
|
1440
|
+
#
|
1441
|
+
# WHITESPACE
|
1442
|
+
#
|
1443
|
+
|
1444
|
+
c_space_nl;
|
1445
|
+
|
1446
|
+
'\\\n';
|
1447
|
+
|
1448
|
+
'#' c_line* c_eol
|
1449
|
+
=> { @comments << tok
|
1450
|
+
fhold; };
|
1451
|
+
|
1452
|
+
c_nl '=begin' ( c_space | c_eol )
|
1453
|
+
=> { p = @ts - 1
|
1454
|
+
fgoto line_begin; };
|
1455
|
+
|
1456
|
+
#
|
1457
|
+
# DEFAULT TRANSITION
|
1458
|
+
#
|
1394
1459
|
|
1395
1460
|
# The following rules match most binary and all unary operators.
|
1396
1461
|
# Rules for binary operators provide better error reporting.
|
@@ -1439,11 +1504,21 @@ class Parser::Lexer
|
|
1439
1504
|
|
1440
1505
|
if tok == '{'
|
1441
1506
|
emit(:tLAMBEG)
|
1442
|
-
else
|
1507
|
+
else # 'do'
|
1443
1508
|
emit(:kDO_LAMBDA)
|
1444
1509
|
end
|
1445
1510
|
else
|
1446
|
-
|
1511
|
+
if tok == '{'
|
1512
|
+
emit_table(PUNCTUATION)
|
1513
|
+
else # 'do'
|
1514
|
+
if @cond.active?
|
1515
|
+
emit(:kDO_COND)
|
1516
|
+
elsif @cmdarg.active?
|
1517
|
+
emit(:kDO_BLOCK)
|
1518
|
+
else
|
1519
|
+
emit(:kDO)
|
1520
|
+
end
|
1521
|
+
end
|
1447
1522
|
end
|
1448
1523
|
|
1449
1524
|
fnext expr_value; fbreak;
|
@@ -1457,7 +1532,7 @@ class Parser::Lexer
|
|
1457
1532
|
=> { emit_table(KEYWORDS)
|
1458
1533
|
fnext expr_fname; fbreak; };
|
1459
1534
|
|
1460
|
-
'class' c_space_nl '<<'
|
1535
|
+
'class' c_space_nl* '<<'
|
1461
1536
|
=> { emit(:kCLASS, 'class', @ts, @ts + 5)
|
1462
1537
|
emit(:tLSHFT, '<<', @te - 2, @te)
|
1463
1538
|
fnext expr_beg; fbreak; };
|
@@ -1480,13 +1555,23 @@ class Parser::Lexer
|
|
1480
1555
|
=> {
|
1481
1556
|
emit_table(KEYWORDS)
|
1482
1557
|
|
1483
|
-
if
|
1558
|
+
if version?(18) && tok == 'not'
|
1484
1559
|
fnext expr_beg; fbreak;
|
1485
1560
|
else
|
1486
1561
|
fnext expr_arg; fbreak;
|
1487
1562
|
end
|
1488
1563
|
};
|
1489
1564
|
|
1565
|
+
'__ENCODING__'
|
1566
|
+
=> {
|
1567
|
+
if version?(18)
|
1568
|
+
emit(:tIDENTIFIER)
|
1569
|
+
else
|
1570
|
+
emit_table(KEYWORDS)
|
1571
|
+
end
|
1572
|
+
fbreak;
|
1573
|
+
};
|
1574
|
+
|
1490
1575
|
keyword_with_end
|
1491
1576
|
=> { emit_table(KEYWORDS)
|
1492
1577
|
fbreak; };
|
@@ -1503,7 +1588,8 @@ class Parser::Lexer
|
|
1503
1588
|
( digit+ '_' )* digit* '_'?
|
1504
1589
|
| '0' [Bb] %{ @num_base = 2; @num_digits_s = p }
|
1505
1590
|
( [01]+ '_' )* [01]* '_'?
|
1506
|
-
| [1-9]
|
1591
|
+
| [1-9] digit*
|
1592
|
+
%{ @num_base = 10; @num_digits_s = @ts }
|
1507
1593
|
( '_' digit+ )* digit* '_'?
|
1508
1594
|
| '0' %{ @num_base = 8; @num_digits_s = @ts }
|
1509
1595
|
( '_' digit+ )* digit* '_'?
|
@@ -1512,14 +1598,17 @@ class Parser::Lexer
|
|
1512
1598
|
digits = tok(@num_digits_s)
|
1513
1599
|
|
1514
1600
|
if digits.end_with? '_'
|
1515
|
-
error
|
1516
|
-
|
1601
|
+
diagnostic :error, Parser::ERRORS[:trailing_underscore],
|
1602
|
+
range(@te - 1, @te)
|
1603
|
+
elsif digits.empty? && @num_base == 8 && version?(18)
|
1517
1604
|
# 1.8 did not raise an error on 0o.
|
1518
1605
|
digits = "0"
|
1519
1606
|
elsif digits.empty?
|
1520
|
-
error
|
1521
|
-
elsif @num_base == 8 &&
|
1522
|
-
|
1607
|
+
diagnostic :error, Parser::ERRORS[:empty_numeric]
|
1608
|
+
elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
|
1609
|
+
invalid_s = @num_digits_s + invalid_idx
|
1610
|
+
diagnostic :error, Parser::ERRORS[:invalid_octal],
|
1611
|
+
range(invalid_s, invalid_s + 1)
|
1523
1612
|
end
|
1524
1613
|
|
1525
1614
|
emit(:tINTEGER, digits.to_i(@num_base))
|
@@ -1537,7 +1626,7 @@ class Parser::Lexer
|
|
1537
1626
|
)
|
1538
1627
|
=> {
|
1539
1628
|
if tok.start_with? '.'
|
1540
|
-
error
|
1629
|
+
diagnostic :error, Parser::ERRORS[:no_dot_digit_literal]
|
1541
1630
|
elsif tok =~ /^[eE]/
|
1542
1631
|
# The rule above allows to specify floats as just `e10', which is
|
1543
1632
|
# certainly not a float. Send a patch if you can do this better.
|
@@ -1550,25 +1639,16 @@ class Parser::Lexer
|
|
1550
1639
|
};
|
1551
1640
|
|
1552
1641
|
#
|
1553
|
-
#
|
1642
|
+
# STRING AND XSTRING LITERALS
|
1554
1643
|
#
|
1555
1644
|
|
1556
|
-
# `echo foo
|
1557
|
-
'`' |
|
1645
|
+
# `echo foo`, "bar", 'baz'
|
1646
|
+
'`' | ['"] # '
|
1558
1647
|
=> {
|
1559
|
-
type, delimiter = tok, tok[-1]
|
1648
|
+
type, delimiter = tok, tok[-1].chr
|
1560
1649
|
fgoto *push_literal(type, delimiter, @ts);
|
1561
1650
|
};
|
1562
1651
|
|
1563
|
-
':' bareword ambiguous_symbol_suffix
|
1564
|
-
=> { emit(:tSYMBOL, tok(@ts + 1, tm))
|
1565
|
-
p = tm - 1; fbreak; };
|
1566
|
-
|
1567
|
-
':' ( bareword | global_var | class_var | instance_var |
|
1568
|
-
operator_fname | operator_arithmetic | operator_rest )
|
1569
|
-
=> { emit(:tSYMBOL, tok(@ts + 1))
|
1570
|
-
fbreak; };
|
1571
|
-
|
1572
1652
|
#
|
1573
1653
|
# CONSTANTS AND VARIABLES
|
1574
1654
|
#
|
@@ -1578,7 +1658,7 @@ class Parser::Lexer
|
|
1578
1658
|
fbreak; };
|
1579
1659
|
|
1580
1660
|
constant ambiguous_const_suffix
|
1581
|
-
=> { emit(:tCONSTANT, tok(@ts, tm))
|
1661
|
+
=> { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
|
1582
1662
|
p = tm - 1; fbreak; };
|
1583
1663
|
|
1584
1664
|
global_var | class_var_v | instance_var_v
|
@@ -1588,7 +1668,7 @@ class Parser::Lexer
|
|
1588
1668
|
# METHOD CALLS
|
1589
1669
|
#
|
1590
1670
|
|
1591
|
-
'.'
|
1671
|
+
'.' | '::'
|
1592
1672
|
=> { emit_table(PUNCTUATION)
|
1593
1673
|
fnext expr_dot; fbreak; };
|
1594
1674
|
|
@@ -1596,8 +1676,9 @@ class Parser::Lexer
|
|
1596
1676
|
=> { emit(:tIDENTIFIER)
|
1597
1677
|
fnext expr_arg; fbreak; };
|
1598
1678
|
|
1599
|
-
call_or_var
|
1600
|
-
=> { emit(:tFID)
|
1679
|
+
call_or_var ambiguous_fid_suffix
|
1680
|
+
=> { emit(:tFID, tok(@ts, tm), @ts, tm)
|
1681
|
+
p = tm - 1
|
1601
1682
|
fnext expr_arg; fbreak; };
|
1602
1683
|
|
1603
1684
|
#
|
@@ -1613,6 +1694,7 @@ class Parser::Lexer
|
|
1613
1694
|
|
1614
1695
|
e_rbrace | e_rparen | ']'
|
1615
1696
|
=> { emit_table(PUNCTUATION)
|
1697
|
+
@cond.lexpop; @cmdarg.lexpop
|
1616
1698
|
fbreak; };
|
1617
1699
|
|
1618
1700
|
operator_arithmetic '='
|
@@ -1623,6 +1705,10 @@ class Parser::Lexer
|
|
1623
1705
|
=> { emit_table(PUNCTUATION)
|
1624
1706
|
fnext expr_value; fbreak; };
|
1625
1707
|
|
1708
|
+
e_lbrack
|
1709
|
+
=> { emit_table(PUNCTUATION)
|
1710
|
+
fnext expr_beg; fbreak; };
|
1711
|
+
|
1626
1712
|
punctuation_end
|
1627
1713
|
=> { emit_table(PUNCTUATION)
|
1628
1714
|
fnext expr_beg; fbreak; };
|
@@ -1632,11 +1718,16 @@ class Parser::Lexer
|
|
1632
1718
|
#
|
1633
1719
|
|
1634
1720
|
'\\' e_heredoc_nl;
|
1635
|
-
|
1636
|
-
|
1721
|
+
|
1722
|
+
'\\' c_line {
|
1723
|
+
diagnostic :error, Parser::ERRORS[:bare_backslash],
|
1724
|
+
range(@ts, @ts + 1)
|
1725
|
+
fhold;
|
1637
1726
|
};
|
1638
1727
|
|
1639
|
-
|
1728
|
+
c_space+;
|
1729
|
+
|
1730
|
+
'#' c_line*
|
1640
1731
|
=> { @comments << tok(@ts, @te + 1) };
|
1641
1732
|
|
1642
1733
|
e_heredoc_nl
|
@@ -1646,11 +1737,10 @@ class Parser::Lexer
|
|
1646
1737
|
=> { emit_table(PUNCTUATION)
|
1647
1738
|
fnext expr_value; fbreak; };
|
1648
1739
|
|
1649
|
-
c_space+;
|
1650
|
-
|
1651
1740
|
c_any
|
1652
1741
|
=> {
|
1653
|
-
|
1742
|
+
message = Parser::ERRORS[:unexpected] % { :character => tok.inspect }
|
1743
|
+
diagnostic :fatal, message
|
1654
1744
|
};
|
1655
1745
|
|
1656
1746
|
c_eof => do_eof;
|
@@ -1681,10 +1771,10 @@ class Parser::Lexer
|
|
1681
1771
|
c_line* c_nl
|
1682
1772
|
=> { @comments << tok };
|
1683
1773
|
|
1684
|
-
|
1774
|
+
c_eof
|
1685
1775
|
=> {
|
1686
|
-
|
1687
|
-
|
1776
|
+
# TODO better location information here
|
1777
|
+
diagnostic :fatal, Parser::ERRORS[:embedded_document], range(p - 1, p)
|
1688
1778
|
};
|
1689
1779
|
*|;
|
1690
1780
|
|