parser 0.9.alpha1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -3
- data/AST_FORMAT.md +1338 -0
- data/README.md +58 -3
- data/Rakefile +32 -12
- data/bin/benchmark +47 -0
- data/bin/explain-parse +14 -0
- data/bin/parse +6 -0
- data/lib/parser.rb +84 -0
- data/lib/parser/all.rb +2 -0
- data/lib/parser/ast/node.rb +11 -0
- data/lib/parser/ast/processor.rb +8 -0
- data/lib/parser/base.rb +116 -0
- data/lib/parser/builders/default.rb +654 -0
- data/lib/parser/compatibility/ruby1_8.rb +13 -0
- data/lib/parser/diagnostic.rb +44 -0
- data/lib/parser/diagnostic/engine.rb +44 -0
- data/lib/parser/lexer.rl +335 -245
- data/lib/parser/lexer/explanation.rb +37 -0
- data/lib/parser/{lexer_literal.rb → lexer/literal.rb} +22 -12
- data/lib/parser/lexer/stack_state.rb +38 -0
- data/lib/parser/ruby18.y +1957 -0
- data/lib/parser/ruby19.y +2154 -0
- data/lib/parser/source/buffer.rb +78 -0
- data/lib/parser/source/map.rb +20 -0
- data/lib/parser/source/map/operator.rb +15 -0
- data/lib/parser/source/map/variable_assignment.rb +15 -0
- data/lib/parser/source/range.rb +66 -0
- data/lib/parser/static_environment.rb +12 -6
- data/parser.gemspec +23 -13
- data/test/helper.rb +45 -0
- data/test/parse_helper.rb +204 -0
- data/test/racc_coverage_helper.rb +130 -0
- data/test/test_diagnostic.rb +47 -0
- data/test/test_diagnostic_engine.rb +58 -0
- data/test/test_lexer.rb +601 -357
- data/test/test_lexer_stack_state.rb +69 -0
- data/test/test_parse_helper.rb +74 -0
- data/test/test_parser.rb +3654 -0
- data/test/test_source_buffer.rb +80 -0
- data/test/test_source_range.rb +51 -0
- data/test/test_static_environment.rb +1 -4
- metadata +137 -12
@@ -0,0 +1,44 @@
|
|
1
|
+
module Parser
|
2
|
+
|
3
|
+
class Diagnostic
|
4
|
+
LEVELS = [:note, :warning, :error, :fatal].freeze
|
5
|
+
|
6
|
+
attr_reader :level, :message
|
7
|
+
attr_reader :location, :highlights
|
8
|
+
|
9
|
+
def initialize(level, message, location, highlights=[])
|
10
|
+
unless LEVELS.include?(level)
|
11
|
+
raise ArgumentError,
|
12
|
+
"Diagnostic#level must be one of #{LEVELS.join(', ')}; " \
|
13
|
+
"#{level.inspect} provided."
|
14
|
+
end
|
15
|
+
|
16
|
+
@level = level
|
17
|
+
@message = message.to_s.dup.freeze
|
18
|
+
@location = location
|
19
|
+
@highlights = highlights.dup.freeze
|
20
|
+
|
21
|
+
freeze
|
22
|
+
end
|
23
|
+
|
24
|
+
def render
|
25
|
+
source_line = @location.source_line
|
26
|
+
highlight_line = ' ' * source_line.length
|
27
|
+
|
28
|
+
@highlights.each do |hilight|
|
29
|
+
range = hilight.column_range
|
30
|
+
highlight_line[range] = '~' * hilight.size
|
31
|
+
end
|
32
|
+
|
33
|
+
range = @location.column_range
|
34
|
+
highlight_line[range] = '^' * @location.size
|
35
|
+
|
36
|
+
[
|
37
|
+
"#{@location.to_s}: #{@level}: #{@message}",
|
38
|
+
source_line,
|
39
|
+
highlight_line,
|
40
|
+
]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Parser
|
2
|
+
|
3
|
+
class Diagnostic::Engine
|
4
|
+
attr_accessor :consumer
|
5
|
+
|
6
|
+
attr_accessor :all_errors_are_fatal
|
7
|
+
attr_accessor :ignore_warnings
|
8
|
+
|
9
|
+
def initialize(consumer=nil)
|
10
|
+
@consumer = consumer
|
11
|
+
|
12
|
+
@all_errors_are_fatal = false
|
13
|
+
@ignore_warnings = false
|
14
|
+
end
|
15
|
+
|
16
|
+
def process(diagnostic)
|
17
|
+
if ignore?(diagnostic)
|
18
|
+
# do nothing
|
19
|
+
elsif @consumer
|
20
|
+
@consumer.call(diagnostic)
|
21
|
+
end
|
22
|
+
|
23
|
+
if raise?(diagnostic)
|
24
|
+
raise Parser::SyntaxError, diagnostic.message
|
25
|
+
end
|
26
|
+
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
|
32
|
+
def ignore?(diagnostic)
|
33
|
+
@ignore_warnings &&
|
34
|
+
diagnostic.level == :warning
|
35
|
+
end
|
36
|
+
|
37
|
+
def raise?(diagnostic)
|
38
|
+
(@all_errors_are_fatal &&
|
39
|
+
diagnostic.level == :error) ||
|
40
|
+
diagnostic.level == :fatal
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/lib/parser/lexer.rl
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
#
|
4
4
|
# === BEFORE YOU START ===
|
5
5
|
#
|
6
|
+
# Read the Ruby Hacking Guide chapter 11, available in English at
|
7
|
+
# http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
|
8
|
+
#
|
6
9
|
# Remember two things about Ragel scanners:
|
7
10
|
#
|
8
11
|
# 1) Longest match wins.
|
@@ -38,6 +41,11 @@
|
|
38
41
|
# emit($whatever)
|
39
42
|
# fnext $next_state; fbreak;
|
40
43
|
#
|
44
|
+
# If you perform `fgoto` in an action which does not emit a token nor
|
45
|
+
# rewinds the stream pointer, the parser's side-effectful,
|
46
|
+
# context-sensitive lookahead actions will break in a hard to detect
|
47
|
+
# and debug way.
|
48
|
+
#
|
41
49
|
# * If an action does not emit a token:
|
42
50
|
#
|
43
51
|
# fgoto $next_state;
|
@@ -56,6 +64,8 @@
|
|
56
64
|
# `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
|
57
65
|
# _will_ invoke the action `act`.
|
58
66
|
#
|
67
|
+
# e_something stands for "something with **e**mbedded action".
|
68
|
+
#
|
59
69
|
# * EOF is explicit and is matched by `c_eof`. If you want to introspect
|
60
70
|
# the state of the lexer, add this rule to the state:
|
61
71
|
#
|
@@ -66,49 +76,53 @@
|
|
66
76
|
# NoMethodError: undefined method `ord' for nil:NilClass
|
67
77
|
#
|
68
78
|
|
69
|
-
require 'parser/lexer_literal'
|
70
|
-
require 'parser/syntax_error'
|
71
|
-
|
72
79
|
class Parser::Lexer
|
73
80
|
|
74
81
|
%% write data nofinal;
|
75
82
|
# %
|
76
83
|
|
77
|
-
attr_reader :
|
84
|
+
attr_reader :source_buffer
|
85
|
+
|
86
|
+
attr_accessor :diagnostics
|
78
87
|
attr_accessor :static_env
|
79
88
|
|
80
|
-
|
89
|
+
attr_accessor :cond, :cmdarg
|
90
|
+
|
91
|
+
attr_reader :comments
|
81
92
|
|
82
93
|
def initialize(version)
|
83
|
-
@version
|
94
|
+
@version = version
|
95
|
+
@static_env = nil
|
84
96
|
|
85
97
|
reset
|
86
98
|
end
|
87
99
|
|
88
100
|
def reset(reset_state=true)
|
101
|
+
# Ragel-related variables:
|
89
102
|
if reset_state
|
90
103
|
# Unit tests set state prior to resetting lexer.
|
91
|
-
@cs
|
104
|
+
@cs = self.class.lex_en_line_begin
|
105
|
+
|
106
|
+
@cond = StackState.new('cond')
|
107
|
+
@cmdarg = StackState.new('cmdarg')
|
92
108
|
end
|
93
109
|
|
94
|
-
#
|
95
|
-
@
|
96
|
-
@
|
97
|
-
@
|
98
|
-
@act = 0 # next action
|
110
|
+
@p = 0 # stream position (saved manually in #advance)
|
111
|
+
@ts = nil # token start
|
112
|
+
@te = nil # token end
|
113
|
+
@act = 0 # next action
|
99
114
|
|
100
|
-
@stack
|
101
|
-
@top
|
115
|
+
@stack = [] # state stack
|
116
|
+
@top = 0 # state stack top pointer
|
102
117
|
|
118
|
+
# Lexer state:
|
103
119
|
@token_queue = []
|
104
120
|
@literal_stack = []
|
105
121
|
|
106
|
-
@newlines = [0] # sorted set of \n positions
|
107
|
-
@newline_s = nil # location of last encountered newline
|
108
|
-
@location = nil # location of last #advance'd token
|
109
|
-
|
110
122
|
@comments = "" # collected comments
|
111
123
|
|
124
|
+
@newline_s = nil # location of last encountered newline
|
125
|
+
|
112
126
|
@num_base = nil # last numeric base
|
113
127
|
@num_digits_s = nil # starting position of numeric digits
|
114
128
|
|
@@ -125,15 +139,21 @@ class Parser::Lexer
|
|
125
139
|
@lambda_stack = []
|
126
140
|
end
|
127
141
|
|
128
|
-
def
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
142
|
+
def source_buffer=(source_buffer)
|
143
|
+
@source_buffer = source_buffer
|
144
|
+
|
145
|
+
if @source_buffer
|
146
|
+
# Heredoc processing coupled with weird newline quirks
|
147
|
+
# require three '\0' (EOF) chars to be appended; after
|
148
|
+
# `p = @heredoc_s`, if `p` points at EOF, the FSM could
|
149
|
+
# not bail out early enough and will crash.
|
150
|
+
#
|
151
|
+
# Patches accepted.
|
152
|
+
#
|
153
|
+
@source = @source_buffer.source.gsub(/\r\n/, "\n") + "\0\0\0"
|
154
|
+
else
|
155
|
+
@source = nil
|
156
|
+
end
|
137
157
|
end
|
138
158
|
|
139
159
|
LEX_STATES = {
|
@@ -159,7 +179,7 @@ class Parser::Lexer
|
|
159
179
|
# Return next token: [type, value].
|
160
180
|
def advance
|
161
181
|
if @token_queue.any?
|
162
|
-
return
|
182
|
+
return @token_queue.shift
|
163
183
|
end
|
164
184
|
|
165
185
|
# Ugly, but dependent on Ragel output. Consider refactoring it somehow.
|
@@ -183,24 +203,14 @@ class Parser::Lexer
|
|
183
203
|
@p = p
|
184
204
|
|
185
205
|
if @token_queue.any?
|
186
|
-
|
206
|
+
@token_queue.shift
|
187
207
|
elsif @cs == self.class.lex_error
|
188
|
-
|
208
|
+
[ false, [ '$error', range(p - 1, p) ] ]
|
189
209
|
else
|
190
|
-
|
210
|
+
[ false, [ '$eof', range(p - 1, p) ] ]
|
191
211
|
end
|
192
212
|
end
|
193
213
|
|
194
|
-
# Like #advance, but also pretty-print the token and its position
|
195
|
-
# in the stream to `stdout`.
|
196
|
-
def advance_and_decorate
|
197
|
-
type, val = advance
|
198
|
-
|
199
|
-
puts decorate(location, "\e[0;32m#{type} #{val.inspect}\e[0m")
|
200
|
-
|
201
|
-
[type, val]
|
202
|
-
end
|
203
|
-
|
204
214
|
# Return the current collected comment block and clear the storage.
|
205
215
|
def clear_comments
|
206
216
|
comments = @comments
|
@@ -209,103 +219,42 @@ class Parser::Lexer
|
|
209
219
|
comments
|
210
220
|
end
|
211
221
|
|
212
|
-
# Lex `str` for the Ruby version `version` with initial state `state`.
|
213
|
-
#
|
214
|
-
# The tokens displayed by this function are not the same as tokens
|
215
|
-
# consumed by parser, because the parser manipulates lexer state on
|
216
|
-
# its own.
|
217
|
-
def self.do(source, state=nil, version=19)
|
218
|
-
lex = new(version)
|
219
|
-
lex.source = source
|
220
|
-
lex.state = state if state
|
221
|
-
|
222
|
-
loop do
|
223
|
-
type, val = lex.advance_and_decorate
|
224
|
-
break if !type
|
225
|
-
end
|
226
|
-
|
227
|
-
puts "Lex state: #{lex.state}"
|
228
|
-
end
|
229
|
-
|
230
|
-
# Used by LexerLiteral to emit tokens for string content.
|
231
|
-
def emit(type, value = tok, s = @ts, e = @te)
|
232
|
-
if s.nil? || e.nil?
|
233
|
-
raise "broken #emit invocation in #{caller[0]}"
|
234
|
-
end
|
235
|
-
|
236
|
-
@token_queue << [ type, value, s, e ]
|
237
|
-
end
|
238
|
-
|
239
|
-
def emit_table(table, s = @ts, e = @te)
|
240
|
-
token = tok(s, e)
|
241
|
-
emit(table[token], token, s, e)
|
242
|
-
end
|
243
|
-
|
244
|
-
# shim
|
245
|
-
def lineno
|
246
|
-
@location[0] + 1
|
247
|
-
end
|
248
|
-
|
249
222
|
protected
|
250
223
|
|
251
224
|
def eof_char?(char)
|
252
225
|
[0x04, 0x1a, 0x00].include? char.ord
|
253
226
|
end
|
254
227
|
|
255
|
-
def
|
256
|
-
@version
|
228
|
+
def version?(*versions)
|
229
|
+
versions.include?(@version)
|
257
230
|
end
|
258
231
|
|
259
|
-
def
|
260
|
-
@
|
232
|
+
def stack_pop
|
233
|
+
@top -= 1
|
234
|
+
@stack[@top]
|
261
235
|
end
|
262
236
|
|
263
237
|
def tok(s = @ts, e = @te)
|
264
238
|
@source[s...e]
|
265
239
|
end
|
266
240
|
|
267
|
-
def
|
268
|
-
@
|
269
|
-
end
|
270
|
-
|
271
|
-
def dissect_location(start, finish)
|
272
|
-
line_number = @newlines.rindex { |nl| start >= nl }
|
273
|
-
line_first_col = @newlines[line_number]
|
274
|
-
|
275
|
-
start_col = start - line_first_col
|
276
|
-
finish_col = finish - line_first_col
|
277
|
-
|
278
|
-
[ line_number, start_col, finish_col ]
|
241
|
+
def range(s = @ts, e = @te)
|
242
|
+
Parser::Source::Range.new(@source_buffer, s, e - 1)
|
279
243
|
end
|
280
244
|
|
281
|
-
def
|
282
|
-
type, value,
|
283
|
-
|
284
|
-
@location = dissect_location(start, finish)
|
285
|
-
|
286
|
-
[ type, value ]
|
245
|
+
def emit(type, value = tok, s = @ts, e = @te)
|
246
|
+
@token_queue << [ type, [ value, range(s, e) ] ]
|
287
247
|
end
|
288
248
|
|
289
|
-
def
|
290
|
-
|
291
|
-
|
292
|
-
line = @source.lines.drop(line_number).first
|
293
|
-
line[from...to] = "\e[4m#{line[from...to]}\e[0m"
|
294
|
-
|
295
|
-
tail_len = to - from - 1
|
296
|
-
tail = "~" * (tail_len >= 0 ? tail_len : 0)
|
297
|
-
decoration = "#{" " * from}\e[1;31m^#{tail}\e[0m #{message}"
|
298
|
-
|
299
|
-
[ line, decoration ]
|
300
|
-
end
|
249
|
+
def emit_table(table, s = @ts, e = @te)
|
250
|
+
value = tok(s, e)
|
301
251
|
|
302
|
-
|
303
|
-
$stderr.puts "warning: #{message}"
|
304
|
-
$stderr.puts decorate(dissect_location(start, finish))
|
252
|
+
emit(table[value], value, s, e)
|
305
253
|
end
|
306
254
|
|
307
|
-
def
|
308
|
-
|
255
|
+
def diagnostic(type, message, location=range, highlights=[])
|
256
|
+
@diagnostics.process(
|
257
|
+
Parser::Diagnostic.new(type, message, location, highlights))
|
309
258
|
end
|
310
259
|
|
311
260
|
#
|
@@ -313,10 +262,10 @@ class Parser::Lexer
|
|
313
262
|
#
|
314
263
|
|
315
264
|
def push_literal(*args)
|
316
|
-
new_literal =
|
265
|
+
new_literal = Literal.new(self, *args)
|
317
266
|
@literal_stack.push(new_literal)
|
318
267
|
|
319
|
-
if
|
268
|
+
if new_literal.type == :tWORDS_BEG
|
320
269
|
self.class.lex_en_interp_words
|
321
270
|
elsif new_literal.type == :tQWORDS_BEG
|
322
271
|
self.class.lex_en_plain_words
|
@@ -328,7 +277,7 @@ class Parser::Lexer
|
|
328
277
|
end
|
329
278
|
|
330
279
|
def literal
|
331
|
-
@literal_stack
|
280
|
+
@literal_stack.last
|
332
281
|
end
|
333
282
|
|
334
283
|
def pop_literal
|
@@ -361,7 +310,6 @@ class Parser::Lexer
|
|
361
310
|
'=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
|
362
311
|
'<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
|
363
312
|
'{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
|
364
|
-
'do' => :kDO
|
365
313
|
}
|
366
314
|
|
367
315
|
PUNCTUATION_BEGIN = {
|
@@ -407,7 +355,6 @@ class Parser::Lexer
|
|
407
355
|
#
|
408
356
|
# This action is embedded directly into c_nl, as it is idempotent and
|
409
357
|
# there are no cases when we need to skip it.
|
410
|
-
record_newline(p + 1)
|
411
358
|
@newline_s = p
|
412
359
|
}
|
413
360
|
|
@@ -514,8 +461,8 @@ class Parser::Lexer
|
|
514
461
|
|
515
462
|
# Ruby accepts (and fails on) variables with leading digit
|
516
463
|
# in literal context, but not in unquoted symbol body.
|
517
|
-
class_var_v = '@@'
|
518
|
-
instance_var_v = '@'
|
464
|
+
class_var_v = '@@' c_alnum+;
|
465
|
+
instance_var_v = '@' c_alnum+;
|
519
466
|
|
520
467
|
#
|
521
468
|
# === ESCAPE SEQUENCE PARSING ===
|
@@ -538,7 +485,12 @@ class Parser::Lexer
|
|
538
485
|
codepoint = codepoint_str.to_i(16)
|
539
486
|
|
540
487
|
if codepoint >= 0x110000
|
541
|
-
@escape = lambda
|
488
|
+
@escape = lambda do
|
489
|
+
# TODO better location reporting
|
490
|
+
diagnostic :error, Parser::ERRORS[:unicode_point_too_large],
|
491
|
+
range(@escape_s, p)
|
492
|
+
end
|
493
|
+
|
542
494
|
break
|
543
495
|
end
|
544
496
|
|
@@ -551,30 +503,32 @@ class Parser::Lexer
|
|
551
503
|
'a' => "\a", 'b' => "\b", 'e' => "\e", 'f' => "\f",
|
552
504
|
'n' => "\n", 'r' => "\r", 's' => "\s", 't' => "\t",
|
553
505
|
'v' => "\v", '\\' => "\\"
|
554
|
-
}.fetch(@source[p - 1], @source[p - 1])
|
506
|
+
}.fetch(@source[p - 1].chr, @source[p - 1].chr)
|
555
507
|
}
|
556
508
|
|
557
509
|
action invalid_complex_escape {
|
558
|
-
@escape = lambda
|
510
|
+
@escape = lambda do
|
511
|
+
diagnostic :error, Parser::ERRORS[:invalid_escape]
|
512
|
+
end
|
559
513
|
}
|
560
514
|
|
561
515
|
action slash_c_char {
|
562
|
-
@escape = (@escape.ord & 0x9f).chr
|
516
|
+
@escape = (@escape[0].ord & 0x9f).chr
|
563
517
|
}
|
564
518
|
|
565
519
|
action slash_m_char {
|
566
|
-
@escape = (@escape.ord | 0x80).chr
|
520
|
+
@escape = (@escape[0].ord | 0x80).chr
|
567
521
|
}
|
568
522
|
|
569
523
|
maybe_escaped_char = (
|
570
524
|
'\\' c_any %unescape_char
|
571
|
-
| ( c_any - [\\] ) % { @escape = @source[p - 1] }
|
525
|
+
| ( c_any - [\\] ) % { @escape = @source[p - 1].chr }
|
572
526
|
);
|
573
527
|
|
574
528
|
maybe_escaped_ctrl_char = ( # why?!
|
575
529
|
'\\' c_any %unescape_char %slash_c_char
|
576
530
|
| '?' % { @escape = "\x7f" }
|
577
|
-
| ( c_any - [\\?] ) % { @escape = @source[p - 1] } %slash_c_char
|
531
|
+
| ( c_any - [\\?] ) % { @escape = @source[p - 1].chr } %slash_c_char
|
578
532
|
);
|
579
533
|
|
580
534
|
escape = (
|
@@ -592,7 +546,12 @@ class Parser::Lexer
|
|
592
546
|
|
593
547
|
# %q[\x]
|
594
548
|
| 'x' ( c_any - xdigit )
|
595
|
-
% {
|
549
|
+
% {
|
550
|
+
@escape = lambda do
|
551
|
+
diagnostic :error, Parser::ERRORS[:invalid_hex_escape],
|
552
|
+
range(@escape_s - 1, p + 2)
|
553
|
+
end
|
554
|
+
}
|
596
555
|
|
597
556
|
# %q[\u123] %q[\u{12]
|
598
557
|
| 'u' ( c_any{0,4} -
|
@@ -602,7 +561,12 @@ class Parser::Lexer
|
|
602
561
|
| '{' xdigit{2} [ \t}] # \u{12. \u{12} are valid
|
603
562
|
)
|
604
563
|
)
|
605
|
-
% {
|
564
|
+
% {
|
565
|
+
@escape = lambda do
|
566
|
+
diagnostic :error, Parser::ERRORS[:invalid_unicode_escape],
|
567
|
+
range(@escape_s - 1, p)
|
568
|
+
end
|
569
|
+
}
|
606
570
|
|
607
571
|
# \u{123 456}
|
608
572
|
| 'u{' ( xdigit{1,6} [ \t] )*
|
@@ -611,7 +575,12 @@ class Parser::Lexer
|
|
611
575
|
| ( xdigit* ( c_any - xdigit - '}' )+ '}'
|
612
576
|
| ( c_any - '}' )* c_eof
|
613
577
|
| xdigit{7,}
|
614
|
-
) % {
|
578
|
+
) % {
|
579
|
+
@escape = lambda do
|
580
|
+
diagnostic :fatal, Parser::ERRORS[:unterminated_unicode],
|
581
|
+
range(p - 1, p)
|
582
|
+
end
|
583
|
+
}
|
615
584
|
)
|
616
585
|
|
617
586
|
# \C-\a \cx
|
@@ -635,7 +604,10 @@ class Parser::Lexer
|
|
635
604
|
|
636
605
|
| ( c_any - [0-7xuCMc] ) %unescape_char
|
637
606
|
|
638
|
-
| c_eof % {
|
607
|
+
| c_eof % {
|
608
|
+
diagnostic :fatal, Parser::ERRORS[:escape_eof],
|
609
|
+
range(p - 1, p)
|
610
|
+
}
|
639
611
|
);
|
640
612
|
|
641
613
|
# Use rules in form of `e_bs escape' when you need to parse a sequence.
|
@@ -666,7 +638,7 @@ class Parser::Lexer
|
|
666
638
|
# of positions in the input stream, namely @heredoc_e
|
667
639
|
# (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
|
668
640
|
#
|
669
|
-
# @heredoc_e is simply contained inside the corresponding
|
641
|
+
# @heredoc_e is simply contained inside the corresponding Literal, and
|
670
642
|
# when the heredoc is closed, the lexing is restarted from that position.
|
671
643
|
#
|
672
644
|
# @herebody_s is quite more complex. First, @herebody_s changes after each
|
@@ -683,14 +655,14 @@ class Parser::Lexer
|
|
683
655
|
# After every heredoc was parsed, @herebody_s contains the
|
684
656
|
# position of next token after all heredocs.
|
685
657
|
if @herebody_s
|
686
|
-
p = @herebody_s
|
658
|
+
p = @herebody_s - 1
|
687
659
|
@herebody_s = nil
|
688
660
|
end
|
689
661
|
};
|
690
662
|
|
691
663
|
action extend_string {
|
692
664
|
if literal.nest_and_try_closing tok, @ts, @te
|
693
|
-
|
665
|
+
fnext *pop_literal; fbreak;
|
694
666
|
else
|
695
667
|
literal.extend_string tok, @ts, @te
|
696
668
|
end
|
@@ -701,10 +673,10 @@ class Parser::Lexer
|
|
701
673
|
# If the literal is actually closed by the backslash,
|
702
674
|
# rewind the input prior to consuming the escape sequence.
|
703
675
|
p = @escape_s - 1
|
704
|
-
|
676
|
+
fnext *pop_literal; fbreak;
|
705
677
|
else
|
706
678
|
# Get the first character after the backslash.
|
707
|
-
escaped_char = @source[@escape_s]
|
679
|
+
escaped_char = @source[@escape_s].chr
|
708
680
|
|
709
681
|
if literal.munge_escape? escaped_char
|
710
682
|
# If this particular literal uses this character as an opening
|
@@ -765,11 +737,12 @@ class Parser::Lexer
|
|
765
737
|
end
|
766
738
|
|
767
739
|
if is_eof
|
768
|
-
|
740
|
+
diagnostic :fatal, Parser::ERRORS[:string_eof],
|
741
|
+
range(literal.str_s, literal.str_s + 1)
|
769
742
|
end
|
770
743
|
|
771
744
|
# A literal newline is appended if the heredoc was _not_ closed
|
772
|
-
# this time. See also
|
745
|
+
# this time. See also Literal#nest_and_try_closing for rationale of
|
773
746
|
# calling #flush_string here.
|
774
747
|
literal.extend_string tok, @ts, @te
|
775
748
|
literal.flush_string
|
@@ -782,8 +755,7 @@ class Parser::Lexer
|
|
782
755
|
# Interpolations with immediate variable names simply call into
|
783
756
|
# the corresponding machine.
|
784
757
|
|
785
|
-
interp_var =
|
786
|
-
'#' ( global_var | class_var_v | instance_var_v );
|
758
|
+
interp_var = '#' ( global_var | class_var_v | instance_var_v );
|
787
759
|
|
788
760
|
action extend_interp_var {
|
789
761
|
literal.flush_string
|
@@ -808,6 +780,8 @@ class Parser::Lexer
|
|
808
780
|
interp_code = '#{';
|
809
781
|
|
810
782
|
e_lbrace = '{' % {
|
783
|
+
@cond.push(false); @cmdarg.push(false)
|
784
|
+
|
811
785
|
if literal
|
812
786
|
literal.start_interp_brace
|
813
787
|
end
|
@@ -827,7 +801,7 @@ class Parser::Lexer
|
|
827
801
|
end
|
828
802
|
|
829
803
|
fhold;
|
830
|
-
fnext
|
804
|
+
fnext *stack_pop;
|
831
805
|
fbreak;
|
832
806
|
end
|
833
807
|
end
|
@@ -872,6 +846,7 @@ class Parser::Lexer
|
|
872
846
|
*|;
|
873
847
|
|
874
848
|
plain_string := |*
|
849
|
+
'\\' c_nl => extend_string_eol;
|
875
850
|
e_bs c_any => extend_string_escaped;
|
876
851
|
c_eol => extend_string_eol;
|
877
852
|
c_any => extend_string;
|
@@ -882,11 +857,12 @@ class Parser::Lexer
|
|
882
857
|
=> {
|
883
858
|
unknown_options = tok.scan(/[^imxouesn]/)
|
884
859
|
if unknown_options.any?
|
885
|
-
|
860
|
+
message = Parser::ERRORS[:regexp_options] % { :options => unknown_options.join }
|
861
|
+
diagnostic :error, message
|
886
862
|
end
|
887
863
|
|
888
864
|
emit(:tREGEXP_OPT)
|
889
|
-
|
865
|
+
fnext expr_end; fbreak;
|
890
866
|
};
|
891
867
|
|
892
868
|
any
|
@@ -904,11 +880,17 @@ class Parser::Lexer
|
|
904
880
|
# The default longest-match scanning does not work here due
|
905
881
|
# to sheer ambiguity.
|
906
882
|
|
883
|
+
ambiguous_fid_suffix = # actual parsed
|
884
|
+
[?!] %{ tm = p } | # a? a?
|
885
|
+
'!=' %{ tm = p - 2 } # a!=b a != b
|
886
|
+
;
|
887
|
+
|
907
888
|
ambiguous_ident_suffix = # actual parsed
|
908
|
-
|
909
|
-
'
|
910
|
-
'
|
911
|
-
'
|
889
|
+
ambiguous_fid_suffix |
|
890
|
+
'=' %{ tm = p } | # a= a=
|
891
|
+
'==' %{ tm = p - 2 } | # a==b a == b
|
892
|
+
'=~' %{ tm = p - 2 } | # a=~b a =~ b
|
893
|
+
'=>' %{ tm = p - 2 } | # a=>b a => b
|
912
894
|
'===' %{ tm = p - 3 } # a===b a === b
|
913
895
|
;
|
914
896
|
|
@@ -922,15 +904,24 @@ class Parser::Lexer
|
|
922
904
|
'::' %{ tm = p - 2 } # A::B A :: B
|
923
905
|
;
|
924
906
|
|
907
|
+
# Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embegging
|
908
|
+
# @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
|
909
|
+
|
910
|
+
e_lbrack = '[' % {
|
911
|
+
@cond.push(false); @cmdarg.push(false)
|
912
|
+
};
|
913
|
+
|
925
914
|
# Ruby 1.9 lambdas require parentheses counting in order to
|
926
915
|
# emit correct opening kDO/tLBRACE.
|
927
916
|
|
928
917
|
e_lparen = '(' % {
|
929
|
-
|
918
|
+
@cond.push(false); @cmdarg.push(false)
|
919
|
+
|
920
|
+
@paren_nest += 1
|
930
921
|
};
|
931
922
|
|
932
923
|
e_rparen = ')' % {
|
933
|
-
|
924
|
+
@paren_nest -= 1
|
934
925
|
};
|
935
926
|
|
936
927
|
# Variable lexing code is accessed from both expressions and
|
@@ -940,30 +931,36 @@ class Parser::Lexer
|
|
940
931
|
global_var
|
941
932
|
=> {
|
942
933
|
if tok =~ /^\$([1-9][0-9]*)$/
|
943
|
-
emit(:tNTH_REF,
|
934
|
+
emit(:tNTH_REF, tok(@ts + 1).to_i)
|
944
935
|
elsif tok =~ /^\$([&`'+])$/
|
945
|
-
emit(:tBACK_REF
|
936
|
+
emit(:tBACK_REF)
|
946
937
|
else
|
947
938
|
emit(:tGVAR)
|
948
939
|
end
|
949
940
|
|
950
|
-
fnext
|
941
|
+
fnext *stack_pop; fbreak;
|
951
942
|
};
|
952
943
|
|
953
944
|
class_var_v
|
954
945
|
=> {
|
955
|
-
|
946
|
+
if tok =~ /^@@[0-9]/
|
947
|
+
message = Parser::ERRORS[:cvar_name] % { :name => tok }
|
948
|
+
diagnostic :error, message
|
949
|
+
end
|
956
950
|
|
957
951
|
emit(:tCVAR)
|
958
|
-
fnext
|
952
|
+
fnext *stack_pop; fbreak;
|
959
953
|
};
|
960
954
|
|
961
955
|
instance_var_v
|
962
956
|
=> {
|
963
|
-
|
957
|
+
if tok =~ /^@[0-9]/
|
958
|
+
message = Parser::ERRORS[:ivar_name] % { :name => tok }
|
959
|
+
diagnostic :error, message
|
960
|
+
end
|
964
961
|
|
965
962
|
emit(:tIVAR)
|
966
|
-
fnext
|
963
|
+
fnext *stack_pop; fbreak;
|
967
964
|
};
|
968
965
|
*|;
|
969
966
|
|
@@ -996,11 +993,11 @@ class Parser::Lexer
|
|
996
993
|
fnext expr_end; fbreak; };
|
997
994
|
|
998
995
|
':'
|
999
|
-
=> { fhold; fgoto
|
996
|
+
=> { fhold; fgoto expr_beg; };
|
1000
997
|
|
1001
998
|
global_var
|
1002
|
-
=> {
|
1003
|
-
|
999
|
+
=> { p = @ts - 1
|
1000
|
+
fcall expr_variable; };
|
1004
1001
|
|
1005
1002
|
c_space_nl+;
|
1006
1003
|
|
@@ -1015,12 +1012,16 @@ class Parser::Lexer
|
|
1015
1012
|
# Transitions to `expr_arg` afterwards.
|
1016
1013
|
#
|
1017
1014
|
expr_dot := |*
|
1018
|
-
|
1015
|
+
constant
|
1016
|
+
=> { emit(:tCONSTANT)
|
1017
|
+
fnext expr_arg; fbreak; };
|
1018
|
+
|
1019
|
+
call_or_var
|
1019
1020
|
=> { emit(:tIDENTIFIER)
|
1020
1021
|
fnext expr_arg; fbreak; };
|
1021
1022
|
|
1022
|
-
|
1023
|
-
=> { emit(:
|
1023
|
+
call_or_var ambiguous_ident_suffix
|
1024
|
+
=> { emit(:tFID, tok(@ts, tm), @ts, tm)
|
1024
1025
|
fnext expr_arg; p = tm - 1; fbreak; };
|
1025
1026
|
|
1026
1027
|
operator_fname |
|
@@ -1031,6 +1032,8 @@ class Parser::Lexer
|
|
1031
1032
|
|
1032
1033
|
c_space_nl+;
|
1033
1034
|
|
1035
|
+
'#' c_line* c_nl;
|
1036
|
+
|
1034
1037
|
c_any
|
1035
1038
|
=> { fhold; fgoto expr_end; };
|
1036
1039
|
|
@@ -1059,8 +1062,8 @@ class Parser::Lexer
|
|
1059
1062
|
|
1060
1063
|
# meth [...]
|
1061
1064
|
# Array argument. Compare with indexing `meth[...]`.
|
1062
|
-
c_space+
|
1063
|
-
=> { emit(:tLBRACK, '[', @te - 1, @te)
|
1065
|
+
c_space+ e_lbrack
|
1066
|
+
=> { emit(:tLBRACK, '[', @te - 1, @te)
|
1064
1067
|
fnext expr_beg; fbreak; };
|
1065
1068
|
|
1066
1069
|
# cmd {}
|
@@ -1076,12 +1079,6 @@ class Parser::Lexer
|
|
1076
1079
|
end
|
1077
1080
|
};
|
1078
1081
|
|
1079
|
-
# a.b
|
1080
|
-
# Dot-call.
|
1081
|
-
'.' | '::'
|
1082
|
-
=> { emit_table(PUNCTUATION);
|
1083
|
-
fnext expr_dot; fbreak; };
|
1084
|
-
|
1085
1082
|
#
|
1086
1083
|
# AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
|
1087
1084
|
#
|
@@ -1091,11 +1088,22 @@ class Parser::Lexer
|
|
1091
1088
|
c_space+ '?'
|
1092
1089
|
=> { fhold; fgoto expr_beg; };
|
1093
1090
|
|
1091
|
+
# a %{1}, a %[1] (but not "a %=1=" or "a % foo")
|
1092
|
+
c_space+ ( '%' [^= ]
|
1093
|
+
# a /foo/ (but not "a / foo" or "a /=foo")
|
1094
|
+
| '/' ( c_any - c_space_nl - '=' )
|
1095
|
+
# a <<HEREDOC
|
1096
|
+
| '<<'
|
1097
|
+
)
|
1098
|
+
=> { fhold; fhold; fgoto expr_beg; };
|
1099
|
+
|
1094
1100
|
# x +1
|
1095
1101
|
# Ambiguous unary operator or regexp literal.
|
1096
1102
|
c_space+ [+\-/]
|
1097
1103
|
=> {
|
1098
|
-
warning
|
1104
|
+
diagnostic :warning, Parser::ERRORS[:ambiguous_literal],
|
1105
|
+
range(@te - 1, @te)
|
1106
|
+
|
1099
1107
|
fhold; fhold; fgoto expr_beg;
|
1100
1108
|
};
|
1101
1109
|
|
@@ -1103,11 +1111,23 @@ class Parser::Lexer
|
|
1103
1111
|
# Ambiguous splat or block-pass.
|
1104
1112
|
c_space+ [*&]
|
1105
1113
|
=> {
|
1106
|
-
|
1107
|
-
warning
|
1114
|
+
message = Parser::ERRORS[:ambiguous_prefix] % { :prefix => tok(@te - 1, @te) }
|
1115
|
+
diagnostic :warning, message,
|
1116
|
+
range(@te - 1, @te)
|
1117
|
+
|
1108
1118
|
fhold; fgoto expr_beg;
|
1109
1119
|
};
|
1110
1120
|
|
1121
|
+
# x ::Foo
|
1122
|
+
# Ambiguous toplevel constant access.
|
1123
|
+
c_space+ '::'
|
1124
|
+
=> { fhold; fhold; fgoto expr_beg; };
|
1125
|
+
|
1126
|
+
# x:b
|
1127
|
+
# Symbol.
|
1128
|
+
c_space* ':'
|
1129
|
+
=> { fhold; fgoto expr_beg; };
|
1130
|
+
|
1111
1131
|
#
|
1112
1132
|
# AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
|
1113
1133
|
#
|
@@ -1129,7 +1149,7 @@ class Parser::Lexer
|
|
1129
1149
|
fgoto expr_end;
|
1130
1150
|
};
|
1131
1151
|
|
1132
|
-
c_space* c_nl
|
1152
|
+
c_space* ( '#' c_line* )? c_nl
|
1133
1153
|
=> { fhold; fgoto expr_end; };
|
1134
1154
|
|
1135
1155
|
c_any
|
@@ -1152,8 +1172,7 @@ class Parser::Lexer
|
|
1152
1172
|
# `{` as `tLBRACE_ARG`.
|
1153
1173
|
#
|
1154
1174
|
# The default post-`expr_endarg` state is `expr_end`, so this state also handles
|
1155
|
-
# `do` (as `kDO_BLOCK` in `expr_beg`).
|
1156
|
-
# just handle `kDO`.)
|
1175
|
+
# `do` (as `kDO_BLOCK` in `expr_beg`).
|
1157
1176
|
expr_endarg := |*
|
1158
1177
|
e_lbrace
|
1159
1178
|
=> { emit(:tLBRACE_ARG)
|
@@ -1183,6 +1202,8 @@ class Parser::Lexer
|
|
1183
1202
|
|
1184
1203
|
c_space+;
|
1185
1204
|
|
1205
|
+
'#' c_line*;
|
1206
|
+
|
1186
1207
|
c_nl
|
1187
1208
|
=> { fhold; fgoto expr_end; };
|
1188
1209
|
|
@@ -1221,39 +1242,32 @@ class Parser::Lexer
|
|
1221
1242
|
# STRING AND REGEXP LITERALS
|
1222
1243
|
#
|
1223
1244
|
|
1224
|
-
# a / 42
|
1225
|
-
# a % 42
|
1226
|
-
# a %= 42 (disambiguation with %=string=)
|
1227
|
-
[/%] c_space_nl | '%=' # /
|
1228
|
-
=> {
|
1229
|
-
fhold; fhold;
|
1230
|
-
fgoto expr_end;
|
1231
|
-
};
|
1232
|
-
|
1233
1245
|
# /regexp/oui
|
1234
|
-
|
1246
|
+
# /=/ (disambiguation with /=)
|
1247
|
+
'/' c_any
|
1235
1248
|
=> {
|
1236
|
-
type
|
1237
|
-
fgoto *push_literal(type, delimiter, @ts);
|
1249
|
+
type = delimiter = tok[0].chr
|
1250
|
+
fhold; fgoto *push_literal(type, delimiter, @ts);
|
1238
1251
|
};
|
1239
1252
|
|
1240
1253
|
# %<string>
|
1241
1254
|
'%' ( c_any - [A-Za-z] )
|
1242
1255
|
=> {
|
1243
|
-
type, delimiter = tok[0], tok[-1]
|
1256
|
+
type, delimiter = tok[0].chr, tok[-1].chr
|
1244
1257
|
fgoto *push_literal(type, delimiter, @ts);
|
1245
1258
|
};
|
1246
1259
|
|
1247
1260
|
# %w(we are the people)
|
1248
1261
|
'%' [A-Za-z]+ c_any
|
1249
1262
|
=> {
|
1250
|
-
type, delimiter = tok[0..-2], tok[-1]
|
1263
|
+
type, delimiter = tok[0..-2], tok[-1].chr
|
1251
1264
|
fgoto *push_literal(type, delimiter, @ts);
|
1252
1265
|
};
|
1253
1266
|
|
1254
1267
|
'%' c_eof
|
1255
1268
|
=> {
|
1256
|
-
|
1269
|
+
diagnostic :fatal, Parser::ERRORS[:string_eof],
|
1270
|
+
range(@ts, @ts + 1)
|
1257
1271
|
};
|
1258
1272
|
|
1259
1273
|
# Heredoc start.
|
@@ -1280,6 +1294,31 @@ class Parser::Lexer
|
|
1280
1294
|
p = @herebody_s - 1
|
1281
1295
|
};
|
1282
1296
|
|
1297
|
+
#
|
1298
|
+
# SYMBOL LITERALS
|
1299
|
+
#
|
1300
|
+
|
1301
|
+
# :"bar", :'baz'
|
1302
|
+
':' ['"] # '
|
1303
|
+
=> {
|
1304
|
+
type, delimiter = tok, tok[-1].chr
|
1305
|
+
fgoto *push_literal(type, delimiter, @ts);
|
1306
|
+
};
|
1307
|
+
|
1308
|
+
':' bareword ambiguous_symbol_suffix
|
1309
|
+
=> {
|
1310
|
+
emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
|
1311
|
+
p = tm - 1
|
1312
|
+
fnext expr_end; fbreak;
|
1313
|
+
};
|
1314
|
+
|
1315
|
+
':' ( bareword | global_var | class_var | instance_var |
|
1316
|
+
operator_fname | operator_arithmetic | operator_rest )
|
1317
|
+
=> {
|
1318
|
+
emit(:tSYMBOL, tok(@ts + 1), @ts)
|
1319
|
+
fnext expr_end; fbreak;
|
1320
|
+
};
|
1321
|
+
|
1283
1322
|
#
|
1284
1323
|
# AMBIGUOUS TERNARY OPERATOR
|
1285
1324
|
#
|
@@ -1293,20 +1332,22 @@ class Parser::Lexer
|
|
1293
1332
|
|
1294
1333
|
value = @escape || tok(@ts + 1)
|
1295
1334
|
|
1296
|
-
if
|
1297
|
-
emit(:tINTEGER, value.ord)
|
1335
|
+
if version?(18)
|
1336
|
+
emit(:tINTEGER, value[0].ord)
|
1298
1337
|
else
|
1299
1338
|
emit(:tSTRING, value)
|
1300
1339
|
end
|
1301
1340
|
|
1302
|
-
fbreak;
|
1341
|
+
fnext expr_end; fbreak;
|
1303
1342
|
};
|
1304
1343
|
|
1305
1344
|
'?' c_space_nl
|
1306
1345
|
=> {
|
1307
1346
|
escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
|
1308
1347
|
"\v" => '\v', "\f" => '\f' }[tok[@ts + 1]]
|
1309
|
-
|
1348
|
+
message = Parser::ERRORS[:invalid_escape_use] % { :escape => escape }
|
1349
|
+
diagnostic :warning, message,
|
1350
|
+
range(@ts, @ts + 1)
|
1310
1351
|
|
1311
1352
|
p = @ts - 1
|
1312
1353
|
fgoto expr_end;
|
@@ -1314,7 +1355,8 @@ class Parser::Lexer
|
|
1314
1355
|
|
1315
1356
|
'?' c_eof
|
1316
1357
|
=> {
|
1317
|
-
|
1358
|
+
diagnostic :fatal, Parser::ERRORS[:incomplete_escape],
|
1359
|
+
range(@ts, @ts + 1)
|
1318
1360
|
};
|
1319
1361
|
|
1320
1362
|
# f ?aa : b: Disambiguate with a character literal.
|
@@ -1328,15 +1370,20 @@ class Parser::Lexer
|
|
1328
1370
|
# KEYWORDS AND PUNCTUATION
|
1329
1371
|
#
|
1330
1372
|
|
1331
|
-
# a(
|
1332
|
-
|
1373
|
+
# a([1, 2])
|
1374
|
+
e_lbrack |
|
1333
1375
|
# a({b=>c})
|
1334
|
-
e_lbrace
|
1376
|
+
e_lbrace |
|
1335
1377
|
# a()
|
1336
1378
|
e_lparen
|
1337
1379
|
=> { emit_table(PUNCTUATION_BEGIN)
|
1338
1380
|
fbreak; };
|
1339
1381
|
|
1382
|
+
# a(+b)
|
1383
|
+
punctuation_begin
|
1384
|
+
=> { emit_table(PUNCTUATION_BEGIN)
|
1385
|
+
fbreak; };
|
1386
|
+
|
1340
1387
|
# rescue Exception => e: Block rescue.
|
1341
1388
|
# Special because it should transition to expr_mid.
|
1342
1389
|
'rescue'
|
@@ -1356,7 +1403,7 @@ class Parser::Lexer
|
|
1356
1403
|
=> {
|
1357
1404
|
fhold;
|
1358
1405
|
|
1359
|
-
if
|
1406
|
+
if version?(18)
|
1360
1407
|
emit(:tIDENTIFIER, tok(@ts, @te - 2), @ts, @te - 2)
|
1361
1408
|
fhold; # continue as a symbol
|
1362
1409
|
else
|
@@ -1383,14 +1430,32 @@ class Parser::Lexer
|
|
1383
1430
|
=> {
|
1384
1431
|
emit(:tIDENTIFIER)
|
1385
1432
|
|
1386
|
-
if @static_env && @static_env.declared?(tok
|
1387
|
-
|
1433
|
+
if @static_env && @static_env.declared?(tok)
|
1434
|
+
fnext expr_end; fbreak;
|
1388
1435
|
else
|
1389
|
-
|
1436
|
+
fnext expr_arg; fbreak;
|
1390
1437
|
end
|
1391
1438
|
};
|
1392
1439
|
|
1393
|
-
|
1440
|
+
#
|
1441
|
+
# WHITESPACE
|
1442
|
+
#
|
1443
|
+
|
1444
|
+
c_space_nl;
|
1445
|
+
|
1446
|
+
'\\\n';
|
1447
|
+
|
1448
|
+
'#' c_line* c_eol
|
1449
|
+
=> { @comments << tok
|
1450
|
+
fhold; };
|
1451
|
+
|
1452
|
+
c_nl '=begin' ( c_space | c_eol )
|
1453
|
+
=> { p = @ts - 1
|
1454
|
+
fgoto line_begin; };
|
1455
|
+
|
1456
|
+
#
|
1457
|
+
# DEFAULT TRANSITION
|
1458
|
+
#
|
1394
1459
|
|
1395
1460
|
# The following rules match most binary and all unary operators.
|
1396
1461
|
# Rules for binary operators provide better error reporting.
|
@@ -1439,11 +1504,21 @@ class Parser::Lexer
|
|
1439
1504
|
|
1440
1505
|
if tok == '{'
|
1441
1506
|
emit(:tLAMBEG)
|
1442
|
-
else
|
1507
|
+
else # 'do'
|
1443
1508
|
emit(:kDO_LAMBDA)
|
1444
1509
|
end
|
1445
1510
|
else
|
1446
|
-
|
1511
|
+
if tok == '{'
|
1512
|
+
emit_table(PUNCTUATION)
|
1513
|
+
else # 'do'
|
1514
|
+
if @cond.active?
|
1515
|
+
emit(:kDO_COND)
|
1516
|
+
elsif @cmdarg.active?
|
1517
|
+
emit(:kDO_BLOCK)
|
1518
|
+
else
|
1519
|
+
emit(:kDO)
|
1520
|
+
end
|
1521
|
+
end
|
1447
1522
|
end
|
1448
1523
|
|
1449
1524
|
fnext expr_value; fbreak;
|
@@ -1457,7 +1532,7 @@ class Parser::Lexer
|
|
1457
1532
|
=> { emit_table(KEYWORDS)
|
1458
1533
|
fnext expr_fname; fbreak; };
|
1459
1534
|
|
1460
|
-
'class' c_space_nl '<<'
|
1535
|
+
'class' c_space_nl* '<<'
|
1461
1536
|
=> { emit(:kCLASS, 'class', @ts, @ts + 5)
|
1462
1537
|
emit(:tLSHFT, '<<', @te - 2, @te)
|
1463
1538
|
fnext expr_beg; fbreak; };
|
@@ -1480,13 +1555,23 @@ class Parser::Lexer
|
|
1480
1555
|
=> {
|
1481
1556
|
emit_table(KEYWORDS)
|
1482
1557
|
|
1483
|
-
if
|
1558
|
+
if version?(18) && tok == 'not'
|
1484
1559
|
fnext expr_beg; fbreak;
|
1485
1560
|
else
|
1486
1561
|
fnext expr_arg; fbreak;
|
1487
1562
|
end
|
1488
1563
|
};
|
1489
1564
|
|
1565
|
+
'__ENCODING__'
|
1566
|
+
=> {
|
1567
|
+
if version?(18)
|
1568
|
+
emit(:tIDENTIFIER)
|
1569
|
+
else
|
1570
|
+
emit_table(KEYWORDS)
|
1571
|
+
end
|
1572
|
+
fbreak;
|
1573
|
+
};
|
1574
|
+
|
1490
1575
|
keyword_with_end
|
1491
1576
|
=> { emit_table(KEYWORDS)
|
1492
1577
|
fbreak; };
|
@@ -1503,7 +1588,8 @@ class Parser::Lexer
|
|
1503
1588
|
( digit+ '_' )* digit* '_'?
|
1504
1589
|
| '0' [Bb] %{ @num_base = 2; @num_digits_s = p }
|
1505
1590
|
( [01]+ '_' )* [01]* '_'?
|
1506
|
-
| [1-9]
|
1591
|
+
| [1-9] digit*
|
1592
|
+
%{ @num_base = 10; @num_digits_s = @ts }
|
1507
1593
|
( '_' digit+ )* digit* '_'?
|
1508
1594
|
| '0' %{ @num_base = 8; @num_digits_s = @ts }
|
1509
1595
|
( '_' digit+ )* digit* '_'?
|
@@ -1512,14 +1598,17 @@ class Parser::Lexer
|
|
1512
1598
|
digits = tok(@num_digits_s)
|
1513
1599
|
|
1514
1600
|
if digits.end_with? '_'
|
1515
|
-
error
|
1516
|
-
|
1601
|
+
diagnostic :error, Parser::ERRORS[:trailing_underscore],
|
1602
|
+
range(@te - 1, @te)
|
1603
|
+
elsif digits.empty? && @num_base == 8 && version?(18)
|
1517
1604
|
# 1.8 did not raise an error on 0o.
|
1518
1605
|
digits = "0"
|
1519
1606
|
elsif digits.empty?
|
1520
|
-
error
|
1521
|
-
elsif @num_base == 8 &&
|
1522
|
-
|
1607
|
+
diagnostic :error, Parser::ERRORS[:empty_numeric]
|
1608
|
+
elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
|
1609
|
+
invalid_s = @num_digits_s + invalid_idx
|
1610
|
+
diagnostic :error, Parser::ERRORS[:invalid_octal],
|
1611
|
+
range(invalid_s, invalid_s + 1)
|
1523
1612
|
end
|
1524
1613
|
|
1525
1614
|
emit(:tINTEGER, digits.to_i(@num_base))
|
@@ -1537,7 +1626,7 @@ class Parser::Lexer
|
|
1537
1626
|
)
|
1538
1627
|
=> {
|
1539
1628
|
if tok.start_with? '.'
|
1540
|
-
error
|
1629
|
+
diagnostic :error, Parser::ERRORS[:no_dot_digit_literal]
|
1541
1630
|
elsif tok =~ /^[eE]/
|
1542
1631
|
# The rule above allows to specify floats as just `e10', which is
|
1543
1632
|
# certainly not a float. Send a patch if you can do this better.
|
@@ -1550,25 +1639,16 @@ class Parser::Lexer
|
|
1550
1639
|
};
|
1551
1640
|
|
1552
1641
|
#
|
1553
|
-
#
|
1642
|
+
# STRING AND XSTRING LITERALS
|
1554
1643
|
#
|
1555
1644
|
|
1556
|
-
# `echo foo
|
1557
|
-
'`' |
|
1645
|
+
# `echo foo`, "bar", 'baz'
|
1646
|
+
'`' | ['"] # '
|
1558
1647
|
=> {
|
1559
|
-
type, delimiter = tok, tok[-1]
|
1648
|
+
type, delimiter = tok, tok[-1].chr
|
1560
1649
|
fgoto *push_literal(type, delimiter, @ts);
|
1561
1650
|
};
|
1562
1651
|
|
1563
|
-
':' bareword ambiguous_symbol_suffix
|
1564
|
-
=> { emit(:tSYMBOL, tok(@ts + 1, tm))
|
1565
|
-
p = tm - 1; fbreak; };
|
1566
|
-
|
1567
|
-
':' ( bareword | global_var | class_var | instance_var |
|
1568
|
-
operator_fname | operator_arithmetic | operator_rest )
|
1569
|
-
=> { emit(:tSYMBOL, tok(@ts + 1))
|
1570
|
-
fbreak; };
|
1571
|
-
|
1572
1652
|
#
|
1573
1653
|
# CONSTANTS AND VARIABLES
|
1574
1654
|
#
|
@@ -1578,7 +1658,7 @@ class Parser::Lexer
|
|
1578
1658
|
fbreak; };
|
1579
1659
|
|
1580
1660
|
constant ambiguous_const_suffix
|
1581
|
-
=> { emit(:tCONSTANT, tok(@ts, tm))
|
1661
|
+
=> { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
|
1582
1662
|
p = tm - 1; fbreak; };
|
1583
1663
|
|
1584
1664
|
global_var | class_var_v | instance_var_v
|
@@ -1588,7 +1668,7 @@ class Parser::Lexer
|
|
1588
1668
|
# METHOD CALLS
|
1589
1669
|
#
|
1590
1670
|
|
1591
|
-
'.'
|
1671
|
+
'.' | '::'
|
1592
1672
|
=> { emit_table(PUNCTUATION)
|
1593
1673
|
fnext expr_dot; fbreak; };
|
1594
1674
|
|
@@ -1596,8 +1676,9 @@ class Parser::Lexer
|
|
1596
1676
|
=> { emit(:tIDENTIFIER)
|
1597
1677
|
fnext expr_arg; fbreak; };
|
1598
1678
|
|
1599
|
-
call_or_var
|
1600
|
-
=> { emit(:tFID)
|
1679
|
+
call_or_var ambiguous_fid_suffix
|
1680
|
+
=> { emit(:tFID, tok(@ts, tm), @ts, tm)
|
1681
|
+
p = tm - 1
|
1601
1682
|
fnext expr_arg; fbreak; };
|
1602
1683
|
|
1603
1684
|
#
|
@@ -1613,6 +1694,7 @@ class Parser::Lexer
|
|
1613
1694
|
|
1614
1695
|
e_rbrace | e_rparen | ']'
|
1615
1696
|
=> { emit_table(PUNCTUATION)
|
1697
|
+
@cond.lexpop; @cmdarg.lexpop
|
1616
1698
|
fbreak; };
|
1617
1699
|
|
1618
1700
|
operator_arithmetic '='
|
@@ -1623,6 +1705,10 @@ class Parser::Lexer
|
|
1623
1705
|
=> { emit_table(PUNCTUATION)
|
1624
1706
|
fnext expr_value; fbreak; };
|
1625
1707
|
|
1708
|
+
e_lbrack
|
1709
|
+
=> { emit_table(PUNCTUATION)
|
1710
|
+
fnext expr_beg; fbreak; };
|
1711
|
+
|
1626
1712
|
punctuation_end
|
1627
1713
|
=> { emit_table(PUNCTUATION)
|
1628
1714
|
fnext expr_beg; fbreak; };
|
@@ -1632,11 +1718,16 @@ class Parser::Lexer
|
|
1632
1718
|
#
|
1633
1719
|
|
1634
1720
|
'\\' e_heredoc_nl;
|
1635
|
-
|
1636
|
-
|
1721
|
+
|
1722
|
+
'\\' c_line {
|
1723
|
+
diagnostic :error, Parser::ERRORS[:bare_backslash],
|
1724
|
+
range(@ts, @ts + 1)
|
1725
|
+
fhold;
|
1637
1726
|
};
|
1638
1727
|
|
1639
|
-
|
1728
|
+
c_space+;
|
1729
|
+
|
1730
|
+
'#' c_line*
|
1640
1731
|
=> { @comments << tok(@ts, @te + 1) };
|
1641
1732
|
|
1642
1733
|
e_heredoc_nl
|
@@ -1646,11 +1737,10 @@ class Parser::Lexer
|
|
1646
1737
|
=> { emit_table(PUNCTUATION)
|
1647
1738
|
fnext expr_value; fbreak; };
|
1648
1739
|
|
1649
|
-
c_space+;
|
1650
|
-
|
1651
1740
|
c_any
|
1652
1741
|
=> {
|
1653
|
-
|
1742
|
+
message = Parser::ERRORS[:unexpected] % { :character => tok.inspect }
|
1743
|
+
diagnostic :fatal, message
|
1654
1744
|
};
|
1655
1745
|
|
1656
1746
|
c_eof => do_eof;
|
@@ -1681,10 +1771,10 @@ class Parser::Lexer
|
|
1681
1771
|
c_line* c_nl
|
1682
1772
|
=> { @comments << tok };
|
1683
1773
|
|
1684
|
-
|
1774
|
+
c_eof
|
1685
1775
|
=> {
|
1686
|
-
|
1687
|
-
|
1776
|
+
# TODO better location information here
|
1777
|
+
diagnostic :fatal, Parser::ERRORS[:embedded_document], range(p - 1, p)
|
1688
1778
|
};
|
1689
1779
|
*|;
|
1690
1780
|
|