parser 0.9.alpha1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/AST_FORMAT.md +1338 -0
  4. data/README.md +58 -3
  5. data/Rakefile +32 -12
  6. data/bin/benchmark +47 -0
  7. data/bin/explain-parse +14 -0
  8. data/bin/parse +6 -0
  9. data/lib/parser.rb +84 -0
  10. data/lib/parser/all.rb +2 -0
  11. data/lib/parser/ast/node.rb +11 -0
  12. data/lib/parser/ast/processor.rb +8 -0
  13. data/lib/parser/base.rb +116 -0
  14. data/lib/parser/builders/default.rb +654 -0
  15. data/lib/parser/compatibility/ruby1_8.rb +13 -0
  16. data/lib/parser/diagnostic.rb +44 -0
  17. data/lib/parser/diagnostic/engine.rb +44 -0
  18. data/lib/parser/lexer.rl +335 -245
  19. data/lib/parser/lexer/explanation.rb +37 -0
  20. data/lib/parser/{lexer_literal.rb → lexer/literal.rb} +22 -12
  21. data/lib/parser/lexer/stack_state.rb +38 -0
  22. data/lib/parser/ruby18.y +1957 -0
  23. data/lib/parser/ruby19.y +2154 -0
  24. data/lib/parser/source/buffer.rb +78 -0
  25. data/lib/parser/source/map.rb +20 -0
  26. data/lib/parser/source/map/operator.rb +15 -0
  27. data/lib/parser/source/map/variable_assignment.rb +15 -0
  28. data/lib/parser/source/range.rb +66 -0
  29. data/lib/parser/static_environment.rb +12 -6
  30. data/parser.gemspec +23 -13
  31. data/test/helper.rb +45 -0
  32. data/test/parse_helper.rb +204 -0
  33. data/test/racc_coverage_helper.rb +130 -0
  34. data/test/test_diagnostic.rb +47 -0
  35. data/test/test_diagnostic_engine.rb +58 -0
  36. data/test/test_lexer.rb +601 -357
  37. data/test/test_lexer_stack_state.rb +69 -0
  38. data/test/test_parse_helper.rb +74 -0
  39. data/test/test_parser.rb +3654 -0
  40. data/test/test_source_buffer.rb +80 -0
  41. data/test/test_source_range.rb +51 -0
  42. data/test/test_static_environment.rb +1 -4
  43. metadata +137 -12
@@ -0,0 +1,13 @@
1
+ class String
2
+ alias original_percent %
3
+
4
+ def %(arg, *args)
5
+ if arg.is_a?(Hash)
6
+ gsub(/%\{(\w+)\}/) do
7
+ arg[$1.to_sym]
8
+ end
9
+ else
10
+ original_percent(arg, *args)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,44 @@
1
+ module Parser
2
+
3
+ class Diagnostic
4
+ LEVELS = [:note, :warning, :error, :fatal].freeze
5
+
6
+ attr_reader :level, :message
7
+ attr_reader :location, :highlights
8
+
9
+ def initialize(level, message, location, highlights=[])
10
+ unless LEVELS.include?(level)
11
+ raise ArgumentError,
12
+ "Diagnostic#level must be one of #{LEVELS.join(', ')}; " \
13
+ "#{level.inspect} provided."
14
+ end
15
+
16
+ @level = level
17
+ @message = message.to_s.dup.freeze
18
+ @location = location
19
+ @highlights = highlights.dup.freeze
20
+
21
+ freeze
22
+ end
23
+
24
+ def render
25
+ source_line = @location.source_line
26
+ highlight_line = ' ' * source_line.length
27
+
28
+ @highlights.each do |hilight|
29
+ range = hilight.column_range
30
+ highlight_line[range] = '~' * hilight.size
31
+ end
32
+
33
+ range = @location.column_range
34
+ highlight_line[range] = '^' * @location.size
35
+
36
+ [
37
+ "#{@location.to_s}: #{@level}: #{@message}",
38
+ source_line,
39
+ highlight_line,
40
+ ]
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1,44 @@
1
+ module Parser
2
+
3
+ class Diagnostic::Engine
4
+ attr_accessor :consumer
5
+
6
+ attr_accessor :all_errors_are_fatal
7
+ attr_accessor :ignore_warnings
8
+
9
+ def initialize(consumer=nil)
10
+ @consumer = consumer
11
+
12
+ @all_errors_are_fatal = false
13
+ @ignore_warnings = false
14
+ end
15
+
16
+ def process(diagnostic)
17
+ if ignore?(diagnostic)
18
+ # do nothing
19
+ elsif @consumer
20
+ @consumer.call(diagnostic)
21
+ end
22
+
23
+ if raise?(diagnostic)
24
+ raise Parser::SyntaxError, diagnostic.message
25
+ end
26
+
27
+ self
28
+ end
29
+
30
+ protected
31
+
32
+ def ignore?(diagnostic)
33
+ @ignore_warnings &&
34
+ diagnostic.level == :warning
35
+ end
36
+
37
+ def raise?(diagnostic)
38
+ (@all_errors_are_fatal &&
39
+ diagnostic.level == :error) ||
40
+ diagnostic.level == :fatal
41
+ end
42
+ end
43
+
44
+ end
@@ -3,6 +3,9 @@
3
3
  #
4
4
  # === BEFORE YOU START ===
5
5
  #
6
+ # Read the Ruby Hacking Guide chapter 11, available in English at
7
+ # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
+ #
6
9
  # Remember two things about Ragel scanners:
7
10
  #
8
11
  # 1) Longest match wins.
@@ -38,6 +41,11 @@
38
41
  # emit($whatever)
39
42
  # fnext $next_state; fbreak;
40
43
  #
44
+ # If you perform `fgoto` in an action which does not emit a token nor
45
+ # rewinds the stream pointer, the parser's side-effectful,
46
+ # context-sensitive lookahead actions will break in a hard to detect
47
+ # and debug way.
48
+ #
41
49
  # * If an action does not emit a token:
42
50
  #
43
51
  # fgoto $next_state;
@@ -56,6 +64,8 @@
56
64
  # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
57
65
  # _will_ invoke the action `act`.
58
66
  #
67
+ # e_something stands for "something with **e**mbedded action".
68
+ #
59
69
  # * EOF is explicit and is matched by `c_eof`. If you want to introspect
60
70
  # the state of the lexer, add this rule to the state:
61
71
  #
@@ -66,49 +76,53 @@
66
76
  # NoMethodError: undefined method `ord' for nil:NilClass
67
77
  #
68
78
 
69
- require 'parser/lexer_literal'
70
- require 'parser/syntax_error'
71
-
72
79
  class Parser::Lexer
73
80
 
74
81
  %% write data nofinal;
75
82
  # %
76
83
 
77
- attr_reader :source
84
+ attr_reader :source_buffer
85
+
86
+ attr_accessor :diagnostics
78
87
  attr_accessor :static_env
79
88
 
80
- attr_reader :location, :comments
89
+ attr_accessor :cond, :cmdarg
90
+
91
+ attr_reader :comments
81
92
 
82
93
  def initialize(version)
83
- @version = version
94
+ @version = version
95
+ @static_env = nil
84
96
 
85
97
  reset
86
98
  end
87
99
 
88
100
  def reset(reset_state=true)
101
+ # Ragel-related variables:
89
102
  if reset_state
90
103
  # Unit tests set state prior to resetting lexer.
91
- @cs = self.class.lex_en_line_begin
104
+ @cs = self.class.lex_en_line_begin
105
+
106
+ @cond = StackState.new('cond')
107
+ @cmdarg = StackState.new('cmdarg')
92
108
  end
93
109
 
94
- # Ragel-internal variables:
95
- @p = 0 # stream position (saved manually in #advance)
96
- @ts = nil # token start
97
- @te = nil # token end
98
- @act = 0 # next action
110
+ @p = 0 # stream position (saved manually in #advance)
111
+ @ts = nil # token start
112
+ @te = nil # token end
113
+ @act = 0 # next action
99
114
 
100
- @stack = [] # state stack
101
- @top = 0 # state stack top pointer
115
+ @stack = [] # state stack
116
+ @top = 0 # state stack top pointer
102
117
 
118
+ # Lexer state:
103
119
  @token_queue = []
104
120
  @literal_stack = []
105
121
 
106
- @newlines = [0] # sorted set of \n positions
107
- @newline_s = nil # location of last encountered newline
108
- @location = nil # location of last #advance'd token
109
-
110
122
  @comments = "" # collected comments
111
123
 
124
+ @newline_s = nil # location of last encountered newline
125
+
112
126
  @num_base = nil # last numeric base
113
127
  @num_digits_s = nil # starting position of numeric digits
114
128
 
@@ -125,15 +139,21 @@ class Parser::Lexer
125
139
  @lambda_stack = []
126
140
  end
127
141
 
128
- def source=(source)
129
- # Heredoc processing coupled with weird newline quirks
130
- # require three '\0' (EOF) chars to be appended; after
131
- # `p = @heredoc_s`, if `p` points at EOF, the FSM could
132
- # not bail out early enough and will crash.
133
- #
134
- # Patches accepted.
135
- #
136
- @source = source.gsub(/\r\n/, "\n") + "\0\0\0"
142
+ def source_buffer=(source_buffer)
143
+ @source_buffer = source_buffer
144
+
145
+ if @source_buffer
146
+ # Heredoc processing coupled with weird newline quirks
147
+ # require three '\0' (EOF) chars to be appended; after
148
+ # `p = @heredoc_s`, if `p` points at EOF, the FSM could
149
+ # not bail out early enough and will crash.
150
+ #
151
+ # Patches accepted.
152
+ #
153
+ @source = @source_buffer.source.gsub(/\r\n/, "\n") + "\0\0\0"
154
+ else
155
+ @source = nil
156
+ end
137
157
  end
138
158
 
139
159
  LEX_STATES = {
@@ -159,7 +179,7 @@ class Parser::Lexer
159
179
  # Return next token: [type, value].
160
180
  def advance
161
181
  if @token_queue.any?
162
- return with_location(@token_queue.shift)
182
+ return @token_queue.shift
163
183
  end
164
184
 
165
185
  # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
@@ -183,24 +203,14 @@ class Parser::Lexer
183
203
  @p = p
184
204
 
185
205
  if @token_queue.any?
186
- with_location(@token_queue.shift)
206
+ @token_queue.shift
187
207
  elsif @cs == self.class.lex_error
188
- with_location([ false, '$undefined', p, p + 1 ])
208
+ [ false, [ '$error', range(p - 1, p) ] ]
189
209
  else
190
- with_location([ false, '$end', p, p + 1 ])
210
+ [ false, [ '$eof', range(p - 1, p) ] ]
191
211
  end
192
212
  end
193
213
 
194
- # Like #advance, but also pretty-print the token and its position
195
- # in the stream to `stdout`.
196
- def advance_and_decorate
197
- type, val = advance
198
-
199
- puts decorate(location, "\e[0;32m#{type} #{val.inspect}\e[0m")
200
-
201
- [type, val]
202
- end
203
-
204
214
  # Return the current collected comment block and clear the storage.
205
215
  def clear_comments
206
216
  comments = @comments
@@ -209,103 +219,42 @@ class Parser::Lexer
209
219
  comments
210
220
  end
211
221
 
212
- # Lex `str` for the Ruby version `version` with initial state `state`.
213
- #
214
- # The tokens displayed by this function are not the same as tokens
215
- # consumed by parser, because the parser manipulates lexer state on
216
- # its own.
217
- def self.do(source, state=nil, version=19)
218
- lex = new(version)
219
- lex.source = source
220
- lex.state = state if state
221
-
222
- loop do
223
- type, val = lex.advance_and_decorate
224
- break if !type
225
- end
226
-
227
- puts "Lex state: #{lex.state}"
228
- end
229
-
230
- # Used by LexerLiteral to emit tokens for string content.
231
- def emit(type, value = tok, s = @ts, e = @te)
232
- if s.nil? || e.nil?
233
- raise "broken #emit invocation in #{caller[0]}"
234
- end
235
-
236
- @token_queue << [ type, value, s, e ]
237
- end
238
-
239
- def emit_table(table, s = @ts, e = @te)
240
- token = tok(s, e)
241
- emit(table[token], token, s, e)
242
- end
243
-
244
- # shim
245
- def lineno
246
- @location[0] + 1
247
- end
248
-
249
222
  protected
250
223
 
251
224
  def eof_char?(char)
252
225
  [0x04, 0x1a, 0x00].include? char.ord
253
226
  end
254
227
 
255
- def ruby18?
256
- @version == 18
228
+ def version?(*versions)
229
+ versions.include?(@version)
257
230
  end
258
231
 
259
- def ruby19?
260
- @version == 19
232
+ def stack_pop
233
+ @top -= 1
234
+ @stack[@top]
261
235
  end
262
236
 
263
237
  def tok(s = @ts, e = @te)
264
238
  @source[s...e]
265
239
  end
266
240
 
267
- def record_newline(p)
268
- @newlines = (@newlines + [p]).uniq.sort
269
- end
270
-
271
- def dissect_location(start, finish)
272
- line_number = @newlines.rindex { |nl| start >= nl }
273
- line_first_col = @newlines[line_number]
274
-
275
- start_col = start - line_first_col
276
- finish_col = finish - line_first_col
277
-
278
- [ line_number, start_col, finish_col ]
241
+ def range(s = @ts, e = @te)
242
+ Parser::Source::Range.new(@source_buffer, s, e - 1)
279
243
  end
280
244
 
281
- def with_location(item)
282
- type, value, start, finish = *item
283
-
284
- @location = dissect_location(start, finish)
285
-
286
- [ type, value ]
245
+ def emit(type, value = tok, s = @ts, e = @te)
246
+ @token_queue << [ type, [ value, range(s, e) ] ]
287
247
  end
288
248
 
289
- def decorate(location, message="")
290
- line_number, from, to = location
291
-
292
- line = @source.lines.drop(line_number).first
293
- line[from...to] = "\e[4m#{line[from...to]}\e[0m"
294
-
295
- tail_len = to - from - 1
296
- tail = "~" * (tail_len >= 0 ? tail_len : 0)
297
- decoration = "#{" " * from}\e[1;31m^#{tail}\e[0m #{message}"
298
-
299
- [ line, decoration ]
300
- end
249
+ def emit_table(table, s = @ts, e = @te)
250
+ value = tok(s, e)
301
251
 
302
- def warning(message, start = @ts, finish = @te)
303
- $stderr.puts "warning: #{message}"
304
- $stderr.puts decorate(dissect_location(start, finish))
252
+ emit(table[value], value, s, e)
305
253
  end
306
254
 
307
- def error(message)
308
- raise Parser::SyntaxError, message
255
+ def diagnostic(type, message, location=range, highlights=[])
256
+ @diagnostics.process(
257
+ Parser::Diagnostic.new(type, message, location, highlights))
309
258
  end
310
259
 
311
260
  #
@@ -313,10 +262,10 @@ class Parser::Lexer
313
262
  #
314
263
 
315
264
  def push_literal(*args)
316
- new_literal = Parser::LexerLiteral.new(self, *args)
265
+ new_literal = Literal.new(self, *args)
317
266
  @literal_stack.push(new_literal)
318
267
 
319
- if new_literal.type == :tWORDS_BEG
268
+ if new_literal.type == :tWORDS_BEG
320
269
  self.class.lex_en_interp_words
321
270
  elsif new_literal.type == :tQWORDS_BEG
322
271
  self.class.lex_en_plain_words
@@ -328,7 +277,7 @@ class Parser::Lexer
328
277
  end
329
278
 
330
279
  def literal
331
- @literal_stack[-1]
280
+ @literal_stack.last
332
281
  end
333
282
 
334
283
  def pop_literal
@@ -361,7 +310,6 @@ class Parser::Lexer
361
310
  '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
362
311
  '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
363
312
  '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
364
- 'do' => :kDO
365
313
  }
366
314
 
367
315
  PUNCTUATION_BEGIN = {
@@ -407,7 +355,6 @@ class Parser::Lexer
407
355
  #
408
356
  # This action is embedded directly into c_nl, as it is idempotent and
409
357
  # there are no cases when we need to skip it.
410
- record_newline(p + 1)
411
358
  @newline_s = p
412
359
  }
413
360
 
@@ -514,8 +461,8 @@ class Parser::Lexer
514
461
 
515
462
  # Ruby accepts (and fails on) variables with leading digit
516
463
  # in literal context, but not in unquoted symbol body.
517
- class_var_v = '@@' [0-9]? bareword;
518
- instance_var_v = '@' [0-9]? bareword;
464
+ class_var_v = '@@' c_alnum+;
465
+ instance_var_v = '@' c_alnum+;
519
466
 
520
467
  #
521
468
  # === ESCAPE SEQUENCE PARSING ===
@@ -538,7 +485,12 @@ class Parser::Lexer
538
485
  codepoint = codepoint_str.to_i(16)
539
486
 
540
487
  if codepoint >= 0x110000
541
- @escape = lambda { error "invalid Unicode codepoint (too large)" }
488
+ @escape = lambda do
489
+ # TODO better location reporting
490
+ diagnostic :error, Parser::ERRORS[:unicode_point_too_large],
491
+ range(@escape_s, p)
492
+ end
493
+
542
494
  break
543
495
  end
544
496
 
@@ -551,30 +503,32 @@ class Parser::Lexer
551
503
  'a' => "\a", 'b' => "\b", 'e' => "\e", 'f' => "\f",
552
504
  'n' => "\n", 'r' => "\r", 's' => "\s", 't' => "\t",
553
505
  'v' => "\v", '\\' => "\\"
554
- }.fetch(@source[p - 1], @source[p - 1])
506
+ }.fetch(@source[p - 1].chr, @source[p - 1].chr)
555
507
  }
556
508
 
557
509
  action invalid_complex_escape {
558
- @escape = lambda { error "invalid escape character syntax" }
510
+ @escape = lambda do
511
+ diagnostic :error, Parser::ERRORS[:invalid_escape]
512
+ end
559
513
  }
560
514
 
561
515
  action slash_c_char {
562
- @escape = (@escape.ord & 0x9f).chr
516
+ @escape = (@escape[0].ord & 0x9f).chr
563
517
  }
564
518
 
565
519
  action slash_m_char {
566
- @escape = (@escape.ord | 0x80).chr
520
+ @escape = (@escape[0].ord | 0x80).chr
567
521
  }
568
522
 
569
523
  maybe_escaped_char = (
570
524
  '\\' c_any %unescape_char
571
- | ( c_any - [\\] ) % { @escape = @source[p - 1] }
525
+ | ( c_any - [\\] ) % { @escape = @source[p - 1].chr }
572
526
  );
573
527
 
574
528
  maybe_escaped_ctrl_char = ( # why?!
575
529
  '\\' c_any %unescape_char %slash_c_char
576
530
  | '?' % { @escape = "\x7f" }
577
- | ( c_any - [\\?] ) % { @escape = @source[p - 1] } %slash_c_char
531
+ | ( c_any - [\\?] ) % { @escape = @source[p - 1].chr } %slash_c_char
578
532
  );
579
533
 
580
534
  escape = (
@@ -592,7 +546,12 @@ class Parser::Lexer
592
546
 
593
547
  # %q[\x]
594
548
  | 'x' ( c_any - xdigit )
595
- % { @escape = lambda { error "invalid hex escape" } }
549
+ % {
550
+ @escape = lambda do
551
+ diagnostic :error, Parser::ERRORS[:invalid_hex_escape],
552
+ range(@escape_s - 1, p + 2)
553
+ end
554
+ }
596
555
 
597
556
  # %q[\u123] %q[\u{12]
598
557
  | 'u' ( c_any{0,4} -
@@ -602,7 +561,12 @@ class Parser::Lexer
602
561
  | '{' xdigit{2} [ \t}] # \u{12. \u{12} are valid
603
562
  )
604
563
  )
605
- % { @escape = lambda { error "invalid Unicode escape" } }
564
+ % {
565
+ @escape = lambda do
566
+ diagnostic :error, Parser::ERRORS[:invalid_unicode_escape],
567
+ range(@escape_s - 1, p)
568
+ end
569
+ }
606
570
 
607
571
  # \u{123 456}
608
572
  | 'u{' ( xdigit{1,6} [ \t] )*
@@ -611,7 +575,12 @@ class Parser::Lexer
611
575
  | ( xdigit* ( c_any - xdigit - '}' )+ '}'
612
576
  | ( c_any - '}' )* c_eof
613
577
  | xdigit{7,}
614
- ) % { @escape = lambda { error "unterminated Unicode escape" } }
578
+ ) % {
579
+ @escape = lambda do
580
+ diagnostic :fatal, Parser::ERRORS[:unterminated_unicode],
581
+ range(p - 1, p)
582
+ end
583
+ }
615
584
  )
616
585
 
617
586
  # \C-\a \cx
@@ -635,7 +604,10 @@ class Parser::Lexer
635
604
 
636
605
  | ( c_any - [0-7xuCMc] ) %unescape_char
637
606
 
638
- | c_eof % { error "escape sequence meets end of file" }
607
+ | c_eof % {
608
+ diagnostic :fatal, Parser::ERRORS[:escape_eof],
609
+ range(p - 1, p)
610
+ }
639
611
  );
640
612
 
641
613
  # Use rules in form of `e_bs escape' when you need to parse a sequence.
@@ -666,7 +638,7 @@ class Parser::Lexer
666
638
  # of positions in the input stream, namely @heredoc_e
667
639
  # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
668
640
  #
669
- # @heredoc_e is simply contained inside the corresponding LexerLiteral, and
641
+ # @heredoc_e is simply contained inside the corresponding Literal, and
670
642
  # when the heredoc is closed, the lexing is restarted from that position.
671
643
  #
672
644
  # @herebody_s is quite more complex. First, @herebody_s changes after each
@@ -683,14 +655,14 @@ class Parser::Lexer
683
655
  # After every heredoc was parsed, @herebody_s contains the
684
656
  # position of next token after all heredocs.
685
657
  if @herebody_s
686
- p = @herebody_s
658
+ p = @herebody_s - 1
687
659
  @herebody_s = nil
688
660
  end
689
661
  };
690
662
 
691
663
  action extend_string {
692
664
  if literal.nest_and_try_closing tok, @ts, @te
693
- fgoto *pop_literal;
665
+ fnext *pop_literal; fbreak;
694
666
  else
695
667
  literal.extend_string tok, @ts, @te
696
668
  end
@@ -701,10 +673,10 @@ class Parser::Lexer
701
673
  # If the literal is actually closed by the backslash,
702
674
  # rewind the input prior to consuming the escape sequence.
703
675
  p = @escape_s - 1
704
- fgoto *pop_literal;
676
+ fnext *pop_literal; fbreak;
705
677
  else
706
678
  # Get the first character after the backslash.
707
- escaped_char = @source[@escape_s]
679
+ escaped_char = @source[@escape_s].chr
708
680
 
709
681
  if literal.munge_escape? escaped_char
710
682
  # If this particular literal uses this character as an opening
@@ -765,11 +737,12 @@ class Parser::Lexer
765
737
  end
766
738
 
767
739
  if is_eof
768
- error "unterminated string meets end of file"
740
+ diagnostic :fatal, Parser::ERRORS[:string_eof],
741
+ range(literal.str_s, literal.str_s + 1)
769
742
  end
770
743
 
771
744
  # A literal newline is appended if the heredoc was _not_ closed
772
- # this time. See also LexerLiteral#nest_and_try_closing for rationale of
745
+ # this time. See also Literal#nest_and_try_closing for rationale of
773
746
  # calling #flush_string here.
774
747
  literal.extend_string tok, @ts, @te
775
748
  literal.flush_string
@@ -782,8 +755,7 @@ class Parser::Lexer
782
755
  # Interpolations with immediate variable names simply call into
783
756
  # the corresponding machine.
784
757
 
785
- interp_var =
786
- '#' ( global_var | class_var_v | instance_var_v );
758
+ interp_var = '#' ( global_var | class_var_v | instance_var_v );
787
759
 
788
760
  action extend_interp_var {
789
761
  literal.flush_string
@@ -808,6 +780,8 @@ class Parser::Lexer
808
780
  interp_code = '#{';
809
781
 
810
782
  e_lbrace = '{' % {
783
+ @cond.push(false); @cmdarg.push(false)
784
+
811
785
  if literal
812
786
  literal.start_interp_brace
813
787
  end
@@ -827,7 +801,7 @@ class Parser::Lexer
827
801
  end
828
802
 
829
803
  fhold;
830
- fnext *@stack.pop;
804
+ fnext *stack_pop;
831
805
  fbreak;
832
806
  end
833
807
  end
@@ -872,6 +846,7 @@ class Parser::Lexer
872
846
  *|;
873
847
 
874
848
  plain_string := |*
849
+ '\\' c_nl => extend_string_eol;
875
850
  e_bs c_any => extend_string_escaped;
876
851
  c_eol => extend_string_eol;
877
852
  c_any => extend_string;
@@ -882,11 +857,12 @@ class Parser::Lexer
882
857
  => {
883
858
  unknown_options = tok.scan(/[^imxouesn]/)
884
859
  if unknown_options.any?
885
- error "unknown regexp options: #{unknown_options.join}"
860
+ message = Parser::ERRORS[:regexp_options] % { :options => unknown_options.join }
861
+ diagnostic :error, message
886
862
  end
887
863
 
888
864
  emit(:tREGEXP_OPT)
889
- fgoto expr_end;
865
+ fnext expr_end; fbreak;
890
866
  };
891
867
 
892
868
  any
@@ -904,11 +880,17 @@ class Parser::Lexer
904
880
  # The default longest-match scanning does not work here due
905
881
  # to sheer ambiguity.
906
882
 
883
+ ambiguous_fid_suffix = # actual parsed
884
+ [?!] %{ tm = p } | # a? a?
885
+ '!=' %{ tm = p - 2 } # a!=b a != b
886
+ ;
887
+
907
888
  ambiguous_ident_suffix = # actual parsed
908
- [?!=] %{ tm = p } | # a? a?
909
- '==' %{ tm = p - 2 } | # a==b a == b
910
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
911
- '=>' %{ tm = p - 2 } | # a=>b a => b
889
+ ambiguous_fid_suffix |
890
+ '=' %{ tm = p } | # a= a=
891
+ '==' %{ tm = p - 2 } | # a==b a == b
892
+ '=~' %{ tm = p - 2 } | # a=~b a =~ b
893
+ '=>' %{ tm = p - 2 } | # a=>b a => b
912
894
  '===' %{ tm = p - 3 } # a===b a === b
913
895
  ;
914
896
 
@@ -922,15 +904,24 @@ class Parser::Lexer
922
904
  '::' %{ tm = p - 2 } # A::B A :: B
923
905
  ;
924
906
 
907
+ # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embegging
908
+ # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
909
+
910
+ e_lbrack = '[' % {
911
+ @cond.push(false); @cmdarg.push(false)
912
+ };
913
+
925
914
  # Ruby 1.9 lambdas require parentheses counting in order to
926
915
  # emit correct opening kDO/tLBRACE.
927
916
 
928
917
  e_lparen = '(' % {
929
- @paren_nest += 1
918
+ @cond.push(false); @cmdarg.push(false)
919
+
920
+ @paren_nest += 1
930
921
  };
931
922
 
932
923
  e_rparen = ')' % {
933
- @paren_nest -= 1
924
+ @paren_nest -= 1
934
925
  };
935
926
 
936
927
  # Variable lexing code is accessed from both expressions and
@@ -940,30 +931,36 @@ class Parser::Lexer
940
931
  global_var
941
932
  => {
942
933
  if tok =~ /^\$([1-9][0-9]*)$/
943
- emit(:tNTH_REF, $1.to_i)
934
+ emit(:tNTH_REF, tok(@ts + 1).to_i)
944
935
  elsif tok =~ /^\$([&`'+])$/
945
- emit(:tBACK_REF, $1.to_sym)
936
+ emit(:tBACK_REF)
946
937
  else
947
938
  emit(:tGVAR)
948
939
  end
949
940
 
950
- fnext *@stack.pop; fbreak;
941
+ fnext *stack_pop; fbreak;
951
942
  };
952
943
 
953
944
  class_var_v
954
945
  => {
955
- error "`#{tok}' is not allowed as a class variable name" if tok =~ /^@@[0-9]/
946
+ if tok =~ /^@@[0-9]/
947
+ message = Parser::ERRORS[:cvar_name] % { :name => tok }
948
+ diagnostic :error, message
949
+ end
956
950
 
957
951
  emit(:tCVAR)
958
- fnext *@stack.pop; fbreak;
952
+ fnext *stack_pop; fbreak;
959
953
  };
960
954
 
961
955
  instance_var_v
962
956
  => {
963
- error "`#{tok}' is not allowed as an instance variable name" if tok =~ /^@[0-9]/
957
+ if tok =~ /^@[0-9]/
958
+ message = Parser::ERRORS[:ivar_name] % { :name => tok }
959
+ diagnostic :error, message
960
+ end
964
961
 
965
962
  emit(:tIVAR)
966
- fnext *@stack.pop; fbreak;
963
+ fnext *stack_pop; fbreak;
967
964
  };
968
965
  *|;
969
966
 
@@ -996,11 +993,11 @@ class Parser::Lexer
996
993
  fnext expr_end; fbreak; };
997
994
 
998
995
  ':'
999
- => { fhold; fgoto expr_end; };
996
+ => { fhold; fgoto expr_beg; };
1000
997
 
1001
998
  global_var
1002
- => { emit(:tGVAR)
1003
- fbreak; };
999
+ => { p = @ts - 1
1000
+ fcall expr_variable; };
1004
1001
 
1005
1002
  c_space_nl+;
1006
1003
 
@@ -1015,12 +1012,16 @@ class Parser::Lexer
1015
1012
  # Transitions to `expr_arg` afterwards.
1016
1013
  #
1017
1014
  expr_dot := |*
1018
- bareword
1015
+ constant
1016
+ => { emit(:tCONSTANT)
1017
+ fnext expr_arg; fbreak; };
1018
+
1019
+ call_or_var
1019
1020
  => { emit(:tIDENTIFIER)
1020
1021
  fnext expr_arg; fbreak; };
1021
1022
 
1022
- bareword ambiguous_ident_suffix
1023
- => { emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
1023
+ call_or_var ambiguous_ident_suffix
1024
+ => { emit(:tFID, tok(@ts, tm), @ts, tm)
1024
1025
  fnext expr_arg; p = tm - 1; fbreak; };
1025
1026
 
1026
1027
  operator_fname |
@@ -1031,6 +1032,8 @@ class Parser::Lexer
1031
1032
 
1032
1033
  c_space_nl+;
1033
1034
 
1035
+ '#' c_line* c_nl;
1036
+
1034
1037
  c_any
1035
1038
  => { fhold; fgoto expr_end; };
1036
1039
 
@@ -1059,8 +1062,8 @@ class Parser::Lexer
1059
1062
 
1060
1063
  # meth [...]
1061
1064
  # Array argument. Compare with indexing `meth[...]`.
1062
- c_space+ '['
1063
- => { emit(:tLBRACK, '[', @te - 1, @te);
1065
+ c_space+ e_lbrack
1066
+ => { emit(:tLBRACK, '[', @te - 1, @te)
1064
1067
  fnext expr_beg; fbreak; };
1065
1068
 
1066
1069
  # cmd {}
@@ -1076,12 +1079,6 @@ class Parser::Lexer
1076
1079
  end
1077
1080
  };
1078
1081
 
1079
- # a.b
1080
- # Dot-call.
1081
- '.' | '::'
1082
- => { emit_table(PUNCTUATION);
1083
- fnext expr_dot; fbreak; };
1084
-
1085
1082
  #
1086
1083
  # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1087
1084
  #
@@ -1091,11 +1088,22 @@ class Parser::Lexer
1091
1088
  c_space+ '?'
1092
1089
  => { fhold; fgoto expr_beg; };
1093
1090
 
1091
+ # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1092
+ c_space+ ( '%' [^= ]
1093
+ # a /foo/ (but not "a / foo" or "a /=foo")
1094
+ | '/' ( c_any - c_space_nl - '=' )
1095
+ # a <<HEREDOC
1096
+ | '<<'
1097
+ )
1098
+ => { fhold; fhold; fgoto expr_beg; };
1099
+
1094
1100
  # x +1
1095
1101
  # Ambiguous unary operator or regexp literal.
1096
1102
  c_space+ [+\-/]
1097
1103
  => {
1098
- warning "ambiguous first argument; put parentheses or even spaces", @te - 1, @te
1104
+ diagnostic :warning, Parser::ERRORS[:ambiguous_literal],
1105
+ range(@te - 1, @te)
1106
+
1099
1107
  fhold; fhold; fgoto expr_beg;
1100
1108
  };
1101
1109
 
@@ -1103,11 +1111,23 @@ class Parser::Lexer
1103
1111
  # Ambiguous splat or block-pass.
1104
1112
  c_space+ [*&]
1105
1113
  => {
1106
- what = tok(@te - 1, @te)
1107
- warning "`#{what}' interpreted as argument prefix", @te - 1, @te
1114
+ message = Parser::ERRORS[:ambiguous_prefix] % { :prefix => tok(@te - 1, @te) }
1115
+ diagnostic :warning, message,
1116
+ range(@te - 1, @te)
1117
+
1108
1118
  fhold; fgoto expr_beg;
1109
1119
  };
1110
1120
 
1121
+ # x ::Foo
1122
+ # Ambiguous toplevel constant access.
1123
+ c_space+ '::'
1124
+ => { fhold; fhold; fgoto expr_beg; };
1125
+
1126
+ # x:b
1127
+ # Symbol.
1128
+ c_space* ':'
1129
+ => { fhold; fgoto expr_beg; };
1130
+
1111
1131
  #
1112
1132
  # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1113
1133
  #
@@ -1129,7 +1149,7 @@ class Parser::Lexer
1129
1149
  fgoto expr_end;
1130
1150
  };
1131
1151
 
1132
- c_space* c_nl
1152
+ c_space* ( '#' c_line* )? c_nl
1133
1153
  => { fhold; fgoto expr_end; };
1134
1154
 
1135
1155
  c_any
@@ -1152,8 +1172,7 @@ class Parser::Lexer
1152
1172
  # `{` as `tLBRACE_ARG`.
1153
1173
  #
1154
1174
  # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1155
- # `do` (as `kDO_BLOCK` in `expr_beg`). (I have no clue why the parser cannot
1156
- # just handle `kDO`.)
1175
+ # `do` (as `kDO_BLOCK` in `expr_beg`).
1157
1176
  expr_endarg := |*
1158
1177
  e_lbrace
1159
1178
  => { emit(:tLBRACE_ARG)
@@ -1183,6 +1202,8 @@ class Parser::Lexer
1183
1202
 
1184
1203
  c_space+;
1185
1204
 
1205
+ '#' c_line*;
1206
+
1186
1207
  c_nl
1187
1208
  => { fhold; fgoto expr_end; };
1188
1209
 
@@ -1221,39 +1242,32 @@ class Parser::Lexer
1221
1242
  # STRING AND REGEXP LITERALS
1222
1243
  #
1223
1244
 
1224
- # a / 42
1225
- # a % 42
1226
- # a %= 42 (disambiguation with %=string=)
1227
- [/%] c_space_nl | '%=' # /
1228
- => {
1229
- fhold; fhold;
1230
- fgoto expr_end;
1231
- };
1232
-
1233
1245
  # /regexp/oui
1234
- '/'
1246
+ # /=/ (disambiguation with /=)
1247
+ '/' c_any
1235
1248
  => {
1236
- type, delimiter = tok, tok
1237
- fgoto *push_literal(type, delimiter, @ts);
1249
+ type = delimiter = tok[0].chr
1250
+ fhold; fgoto *push_literal(type, delimiter, @ts);
1238
1251
  };
1239
1252
 
1240
1253
  # %<string>
1241
1254
  '%' ( c_any - [A-Za-z] )
1242
1255
  => {
1243
- type, delimiter = tok[0], tok[-1]
1256
+ type, delimiter = tok[0].chr, tok[-1].chr
1244
1257
  fgoto *push_literal(type, delimiter, @ts);
1245
1258
  };
1246
1259
 
1247
1260
  # %w(we are the people)
1248
1261
  '%' [A-Za-z]+ c_any
1249
1262
  => {
1250
- type, delimiter = tok[0..-2], tok[-1]
1263
+ type, delimiter = tok[0..-2], tok[-1].chr
1251
1264
  fgoto *push_literal(type, delimiter, @ts);
1252
1265
  };
1253
1266
 
1254
1267
  '%' c_eof
1255
1268
  => {
1256
- error "unterminated string meets end of file"
1269
+ diagnostic :fatal, Parser::ERRORS[:string_eof],
1270
+ range(@ts, @ts + 1)
1257
1271
  };
1258
1272
 
1259
1273
  # Heredoc start.
@@ -1280,6 +1294,31 @@ class Parser::Lexer
1280
1294
  p = @herebody_s - 1
1281
1295
  };
1282
1296
 
1297
+ #
1298
+ # SYMBOL LITERALS
1299
+ #
1300
+
1301
+ # :"bar", :'baz'
1302
+ ':' ['"] # '
1303
+ => {
1304
+ type, delimiter = tok, tok[-1].chr
1305
+ fgoto *push_literal(type, delimiter, @ts);
1306
+ };
1307
+
1308
+ ':' bareword ambiguous_symbol_suffix
1309
+ => {
1310
+ emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1311
+ p = tm - 1
1312
+ fnext expr_end; fbreak;
1313
+ };
1314
+
1315
+ ':' ( bareword | global_var | class_var | instance_var |
1316
+ operator_fname | operator_arithmetic | operator_rest )
1317
+ => {
1318
+ emit(:tSYMBOL, tok(@ts + 1), @ts)
1319
+ fnext expr_end; fbreak;
1320
+ };
1321
+
1283
1322
  #
1284
1323
  # AMBIGUOUS TERNARY OPERATOR
1285
1324
  #
@@ -1293,20 +1332,22 @@ class Parser::Lexer
1293
1332
 
1294
1333
  value = @escape || tok(@ts + 1)
1295
1334
 
1296
- if ruby18?
1297
- emit(:tINTEGER, value.ord)
1335
+ if version?(18)
1336
+ emit(:tINTEGER, value[0].ord)
1298
1337
  else
1299
1338
  emit(:tSTRING, value)
1300
1339
  end
1301
1340
 
1302
- fbreak;
1341
+ fnext expr_end; fbreak;
1303
1342
  };
1304
1343
 
1305
1344
  '?' c_space_nl
1306
1345
  => {
1307
1346
  escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1308
1347
  "\v" => '\v', "\f" => '\f' }[tok[@ts + 1]]
1309
- warning "invalid character syntax; use ?#{escape}", @ts
1348
+ message = Parser::ERRORS[:invalid_escape_use] % { :escape => escape }
1349
+ diagnostic :warning, message,
1350
+ range(@ts, @ts + 1)
1310
1351
 
1311
1352
  p = @ts - 1
1312
1353
  fgoto expr_end;
@@ -1314,7 +1355,8 @@ class Parser::Lexer
1314
1355
 
1315
1356
  '?' c_eof
1316
1357
  => {
1317
- error "incomplete character syntax"
1358
+ diagnostic :fatal, Parser::ERRORS[:incomplete_escape],
1359
+ range(@ts, @ts + 1)
1318
1360
  };
1319
1361
 
1320
1362
  # f ?aa : b: Disambiguate with a character literal.
@@ -1328,15 +1370,20 @@ class Parser::Lexer
1328
1370
  # KEYWORDS AND PUNCTUATION
1329
1371
  #
1330
1372
 
1331
- # a(+b)
1332
- punctuation_begin |
1373
+ # a([1, 2])
1374
+ e_lbrack |
1333
1375
  # a({b=>c})
1334
- e_lbrace |
1376
+ e_lbrace |
1335
1377
  # a()
1336
1378
  e_lparen
1337
1379
  => { emit_table(PUNCTUATION_BEGIN)
1338
1380
  fbreak; };
1339
1381
 
1382
+ # a(+b)
1383
+ punctuation_begin
1384
+ => { emit_table(PUNCTUATION_BEGIN)
1385
+ fbreak; };
1386
+
1340
1387
  # rescue Exception => e: Block rescue.
1341
1388
  # Special because it should transition to expr_mid.
1342
1389
  'rescue'
@@ -1356,7 +1403,7 @@ class Parser::Lexer
1356
1403
  => {
1357
1404
  fhold;
1358
1405
 
1359
- if ruby18?
1406
+ if version?(18)
1360
1407
  emit(:tIDENTIFIER, tok(@ts, @te - 2), @ts, @te - 2)
1361
1408
  fhold; # continue as a symbol
1362
1409
  else
@@ -1383,14 +1430,32 @@ class Parser::Lexer
1383
1430
  => {
1384
1431
  emit(:tIDENTIFIER)
1385
1432
 
1386
- if @static_env && @static_env.declared?(tok.to_sym)
1387
- fgoto expr_end;
1433
+ if @static_env && @static_env.declared?(tok)
1434
+ fnext expr_end; fbreak;
1388
1435
  else
1389
- fgoto expr_arg;
1436
+ fnext expr_arg; fbreak;
1390
1437
  end
1391
1438
  };
1392
1439
 
1393
- c_space_nl+;
1440
+ #
1441
+ # WHITESPACE
1442
+ #
1443
+
1444
+ c_space_nl;
1445
+
1446
+ '\\\n';
1447
+
1448
+ '#' c_line* c_eol
1449
+ => { @comments << tok
1450
+ fhold; };
1451
+
1452
+ c_nl '=begin' ( c_space | c_eol )
1453
+ => { p = @ts - 1
1454
+ fgoto line_begin; };
1455
+
1456
+ #
1457
+ # DEFAULT TRANSITION
1458
+ #
1394
1459
 
1395
1460
  # The following rules match most binary and all unary operators.
1396
1461
  # Rules for binary operators provide better error reporting.
@@ -1439,11 +1504,21 @@ class Parser::Lexer
1439
1504
 
1440
1505
  if tok == '{'
1441
1506
  emit(:tLAMBEG)
1442
- else
1507
+ else # 'do'
1443
1508
  emit(:kDO_LAMBDA)
1444
1509
  end
1445
1510
  else
1446
- emit_table(PUNCTUATION)
1511
+ if tok == '{'
1512
+ emit_table(PUNCTUATION)
1513
+ else # 'do'
1514
+ if @cond.active?
1515
+ emit(:kDO_COND)
1516
+ elsif @cmdarg.active?
1517
+ emit(:kDO_BLOCK)
1518
+ else
1519
+ emit(:kDO)
1520
+ end
1521
+ end
1447
1522
  end
1448
1523
 
1449
1524
  fnext expr_value; fbreak;
@@ -1457,7 +1532,7 @@ class Parser::Lexer
1457
1532
  => { emit_table(KEYWORDS)
1458
1533
  fnext expr_fname; fbreak; };
1459
1534
 
1460
- 'class' c_space_nl '<<'
1535
+ 'class' c_space_nl* '<<'
1461
1536
  => { emit(:kCLASS, 'class', @ts, @ts + 5)
1462
1537
  emit(:tLSHFT, '<<', @te - 2, @te)
1463
1538
  fnext expr_beg; fbreak; };
@@ -1480,13 +1555,23 @@ class Parser::Lexer
1480
1555
  => {
1481
1556
  emit_table(KEYWORDS)
1482
1557
 
1483
- if ruby18? && tok == 'not'
1558
+ if version?(18) && tok == 'not'
1484
1559
  fnext expr_beg; fbreak;
1485
1560
  else
1486
1561
  fnext expr_arg; fbreak;
1487
1562
  end
1488
1563
  };
1489
1564
 
1565
+ '__ENCODING__'
1566
+ => {
1567
+ if version?(18)
1568
+ emit(:tIDENTIFIER)
1569
+ else
1570
+ emit_table(KEYWORDS)
1571
+ end
1572
+ fbreak;
1573
+ };
1574
+
1490
1575
  keyword_with_end
1491
1576
  => { emit_table(KEYWORDS)
1492
1577
  fbreak; };
@@ -1503,7 +1588,8 @@ class Parser::Lexer
1503
1588
  ( digit+ '_' )* digit* '_'?
1504
1589
  | '0' [Bb] %{ @num_base = 2; @num_digits_s = p }
1505
1590
  ( [01]+ '_' )* [01]* '_'?
1506
- | [1-9] %{ @num_base = 10; @num_digits_s = @ts }
1591
+ | [1-9] digit*
1592
+ %{ @num_base = 10; @num_digits_s = @ts }
1507
1593
  ( '_' digit+ )* digit* '_'?
1508
1594
  | '0' %{ @num_base = 8; @num_digits_s = @ts }
1509
1595
  ( '_' digit+ )* digit* '_'?
@@ -1512,14 +1598,17 @@ class Parser::Lexer
1512
1598
  digits = tok(@num_digits_s)
1513
1599
 
1514
1600
  if digits.end_with? '_'
1515
- error "trailing `_' in number"
1516
- elsif digits.empty? && @num_base == 8 && ruby18?
1601
+ diagnostic :error, Parser::ERRORS[:trailing_underscore],
1602
+ range(@te - 1, @te)
1603
+ elsif digits.empty? && @num_base == 8 && version?(18)
1517
1604
  # 1.8 did not raise an error on 0o.
1518
1605
  digits = "0"
1519
1606
  elsif digits.empty?
1520
- error "numeric literal without digits"
1521
- elsif @num_base == 8 && digits =~ /[89]/
1522
- error "invalid octal digit"
1607
+ diagnostic :error, Parser::ERRORS[:empty_numeric]
1608
+ elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
1609
+ invalid_s = @num_digits_s + invalid_idx
1610
+ diagnostic :error, Parser::ERRORS[:invalid_octal],
1611
+ range(invalid_s, invalid_s + 1)
1523
1612
  end
1524
1613
 
1525
1614
  emit(:tINTEGER, digits.to_i(@num_base))
@@ -1537,7 +1626,7 @@ class Parser::Lexer
1537
1626
  )
1538
1627
  => {
1539
1628
  if tok.start_with? '.'
1540
- error "no .<digit> floating literal anymore; put 0 before dot"
1629
+ diagnostic :error, Parser::ERRORS[:no_dot_digit_literal]
1541
1630
  elsif tok =~ /^[eE]/
1542
1631
  # The rule above allows to specify floats as just `e10', which is
1543
1632
  # certainly not a float. Send a patch if you can do this better.
@@ -1550,25 +1639,16 @@ class Parser::Lexer
1550
1639
  };
1551
1640
 
1552
1641
  #
1553
- # SYMBOL LITERALS
1642
+ # STRING AND XSTRING LITERALS
1554
1643
  #
1555
1644
 
1556
- # `echo foo` | :"bar" | :'baz'
1557
- '`' | ':'? ['"] # '
1645
+ # `echo foo`, "bar", 'baz'
1646
+ '`' | ['"] # '
1558
1647
  => {
1559
- type, delimiter = tok, tok[-1]
1648
+ type, delimiter = tok, tok[-1].chr
1560
1649
  fgoto *push_literal(type, delimiter, @ts);
1561
1650
  };
1562
1651
 
1563
- ':' bareword ambiguous_symbol_suffix
1564
- => { emit(:tSYMBOL, tok(@ts + 1, tm))
1565
- p = tm - 1; fbreak; };
1566
-
1567
- ':' ( bareword | global_var | class_var | instance_var |
1568
- operator_fname | operator_arithmetic | operator_rest )
1569
- => { emit(:tSYMBOL, tok(@ts + 1))
1570
- fbreak; };
1571
-
1572
1652
  #
1573
1653
  # CONSTANTS AND VARIABLES
1574
1654
  #
@@ -1578,7 +1658,7 @@ class Parser::Lexer
1578
1658
  fbreak; };
1579
1659
 
1580
1660
  constant ambiguous_const_suffix
1581
- => { emit(:tCONSTANT, tok(@ts, tm))
1661
+ => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
1582
1662
  p = tm - 1; fbreak; };
1583
1663
 
1584
1664
  global_var | class_var_v | instance_var_v
@@ -1588,7 +1668,7 @@ class Parser::Lexer
1588
1668
  # METHOD CALLS
1589
1669
  #
1590
1670
 
1591
- '.'
1671
+ '.' | '::'
1592
1672
  => { emit_table(PUNCTUATION)
1593
1673
  fnext expr_dot; fbreak; };
1594
1674
 
@@ -1596,8 +1676,9 @@ class Parser::Lexer
1596
1676
  => { emit(:tIDENTIFIER)
1597
1677
  fnext expr_arg; fbreak; };
1598
1678
 
1599
- call_or_var [?!]
1600
- => { emit(:tFID)
1679
+ call_or_var ambiguous_fid_suffix
1680
+ => { emit(:tFID, tok(@ts, tm), @ts, tm)
1681
+ p = tm - 1
1601
1682
  fnext expr_arg; fbreak; };
1602
1683
 
1603
1684
  #
@@ -1613,6 +1694,7 @@ class Parser::Lexer
1613
1694
 
1614
1695
  e_rbrace | e_rparen | ']'
1615
1696
  => { emit_table(PUNCTUATION)
1697
+ @cond.lexpop; @cmdarg.lexpop
1616
1698
  fbreak; };
1617
1699
 
1618
1700
  operator_arithmetic '='
@@ -1623,6 +1705,10 @@ class Parser::Lexer
1623
1705
  => { emit_table(PUNCTUATION)
1624
1706
  fnext expr_value; fbreak; };
1625
1707
 
1708
+ e_lbrack
1709
+ => { emit_table(PUNCTUATION)
1710
+ fnext expr_beg; fbreak; };
1711
+
1626
1712
  punctuation_end
1627
1713
  => { emit_table(PUNCTUATION)
1628
1714
  fnext expr_beg; fbreak; };
@@ -1632,11 +1718,16 @@ class Parser::Lexer
1632
1718
  #
1633
1719
 
1634
1720
  '\\' e_heredoc_nl;
1635
- '\\' ( any - c_nl ) {
1636
- error "bare backslash only allowed before newline"
1721
+
1722
+ '\\' c_line {
1723
+ diagnostic :error, Parser::ERRORS[:bare_backslash],
1724
+ range(@ts, @ts + 1)
1725
+ fhold;
1637
1726
  };
1638
1727
 
1639
- '#' ( c_any - c_nl )*
1728
+ c_space+;
1729
+
1730
+ '#' c_line*
1640
1731
  => { @comments << tok(@ts, @te + 1) };
1641
1732
 
1642
1733
  e_heredoc_nl
@@ -1646,11 +1737,10 @@ class Parser::Lexer
1646
1737
  => { emit_table(PUNCTUATION)
1647
1738
  fnext expr_value; fbreak; };
1648
1739
 
1649
- c_space+;
1650
-
1651
1740
  c_any
1652
1741
  => {
1653
- error "unexpected #{tok.inspect}"
1742
+ message = Parser::ERRORS[:unexpected] % { :character => tok.inspect }
1743
+ diagnostic :fatal, message
1654
1744
  };
1655
1745
 
1656
1746
  c_eof => do_eof;
@@ -1681,10 +1771,10 @@ class Parser::Lexer
1681
1771
  c_line* c_nl
1682
1772
  => { @comments << tok };
1683
1773
 
1684
- any
1774
+ c_eof
1685
1775
  => {
1686
- @comments = ""
1687
- error "embedded document meats end of file (and they embark on a romantic journey)"
1776
+ # TODO better location information here
1777
+ diagnostic :fatal, Parser::ERRORS[:embedded_document], range(p - 1, p)
1688
1778
  };
1689
1779
  *|;
1690
1780