parser 0.9.alpha1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/AST_FORMAT.md +1338 -0
  4. data/README.md +58 -3
  5. data/Rakefile +32 -12
  6. data/bin/benchmark +47 -0
  7. data/bin/explain-parse +14 -0
  8. data/bin/parse +6 -0
  9. data/lib/parser.rb +84 -0
  10. data/lib/parser/all.rb +2 -0
  11. data/lib/parser/ast/node.rb +11 -0
  12. data/lib/parser/ast/processor.rb +8 -0
  13. data/lib/parser/base.rb +116 -0
  14. data/lib/parser/builders/default.rb +654 -0
  15. data/lib/parser/compatibility/ruby1_8.rb +13 -0
  16. data/lib/parser/diagnostic.rb +44 -0
  17. data/lib/parser/diagnostic/engine.rb +44 -0
  18. data/lib/parser/lexer.rl +335 -245
  19. data/lib/parser/lexer/explanation.rb +37 -0
  20. data/lib/parser/{lexer_literal.rb → lexer/literal.rb} +22 -12
  21. data/lib/parser/lexer/stack_state.rb +38 -0
  22. data/lib/parser/ruby18.y +1957 -0
  23. data/lib/parser/ruby19.y +2154 -0
  24. data/lib/parser/source/buffer.rb +78 -0
  25. data/lib/parser/source/map.rb +20 -0
  26. data/lib/parser/source/map/operator.rb +15 -0
  27. data/lib/parser/source/map/variable_assignment.rb +15 -0
  28. data/lib/parser/source/range.rb +66 -0
  29. data/lib/parser/static_environment.rb +12 -6
  30. data/parser.gemspec +23 -13
  31. data/test/helper.rb +45 -0
  32. data/test/parse_helper.rb +204 -0
  33. data/test/racc_coverage_helper.rb +130 -0
  34. data/test/test_diagnostic.rb +47 -0
  35. data/test/test_diagnostic_engine.rb +58 -0
  36. data/test/test_lexer.rb +601 -357
  37. data/test/test_lexer_stack_state.rb +69 -0
  38. data/test/test_parse_helper.rb +74 -0
  39. data/test/test_parser.rb +3654 -0
  40. data/test/test_source_buffer.rb +80 -0
  41. data/test/test_source_range.rb +51 -0
  42. data/test/test_static_environment.rb +1 -4
  43. metadata +137 -12
@@ -0,0 +1,13 @@
1
+ class String
2
+ alias original_percent %
3
+
4
+ def %(arg, *args)
5
+ if arg.is_a?(Hash)
6
+ gsub(/%\{(\w+)\}/) do
7
+ arg[$1.to_sym]
8
+ end
9
+ else
10
+ original_percent(arg, *args)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,44 @@
1
+ module Parser
2
+
3
+ class Diagnostic
4
+ LEVELS = [:note, :warning, :error, :fatal].freeze
5
+
6
+ attr_reader :level, :message
7
+ attr_reader :location, :highlights
8
+
9
+ def initialize(level, message, location, highlights=[])
10
+ unless LEVELS.include?(level)
11
+ raise ArgumentError,
12
+ "Diagnostic#level must be one of #{LEVELS.join(', ')}; " \
13
+ "#{level.inspect} provided."
14
+ end
15
+
16
+ @level = level
17
+ @message = message.to_s.dup.freeze
18
+ @location = location
19
+ @highlights = highlights.dup.freeze
20
+
21
+ freeze
22
+ end
23
+
24
+ def render
25
+ source_line = @location.source_line
26
+ highlight_line = ' ' * source_line.length
27
+
28
+ @highlights.each do |hilight|
29
+ range = hilight.column_range
30
+ highlight_line[range] = '~' * hilight.size
31
+ end
32
+
33
+ range = @location.column_range
34
+ highlight_line[range] = '^' * @location.size
35
+
36
+ [
37
+ "#{@location.to_s}: #{@level}: #{@message}",
38
+ source_line,
39
+ highlight_line,
40
+ ]
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1,44 @@
1
+ module Parser
2
+
3
+ class Diagnostic::Engine
4
+ attr_accessor :consumer
5
+
6
+ attr_accessor :all_errors_are_fatal
7
+ attr_accessor :ignore_warnings
8
+
9
+ def initialize(consumer=nil)
10
+ @consumer = consumer
11
+
12
+ @all_errors_are_fatal = false
13
+ @ignore_warnings = false
14
+ end
15
+
16
+ def process(diagnostic)
17
+ if ignore?(diagnostic)
18
+ # do nothing
19
+ elsif @consumer
20
+ @consumer.call(diagnostic)
21
+ end
22
+
23
+ if raise?(diagnostic)
24
+ raise Parser::SyntaxError, diagnostic.message
25
+ end
26
+
27
+ self
28
+ end
29
+
30
+ protected
31
+
32
+ def ignore?(diagnostic)
33
+ @ignore_warnings &&
34
+ diagnostic.level == :warning
35
+ end
36
+
37
+ def raise?(diagnostic)
38
+ (@all_errors_are_fatal &&
39
+ diagnostic.level == :error) ||
40
+ diagnostic.level == :fatal
41
+ end
42
+ end
43
+
44
+ end
@@ -3,6 +3,9 @@
3
3
  #
4
4
  # === BEFORE YOU START ===
5
5
  #
6
+ # Read the Ruby Hacking Guide chapter 11, available in English at
7
+ # http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
8
+ #
6
9
  # Remember two things about Ragel scanners:
7
10
  #
8
11
  # 1) Longest match wins.
@@ -38,6 +41,11 @@
38
41
  # emit($whatever)
39
42
  # fnext $next_state; fbreak;
40
43
  #
44
+ # If you perform `fgoto` in an action which does not emit a token nor
45
+ # rewinds the stream pointer, the parser's side-effectful,
46
+ # context-sensitive lookahead actions will break in a hard to detect
47
+ # and debug way.
48
+ #
41
49
  # * If an action does not emit a token:
42
50
  #
43
51
  # fgoto $next_state;
@@ -56,6 +64,8 @@
56
64
  # `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
57
65
  # _will_ invoke the action `act`.
58
66
  #
67
+ # e_something stands for "something with **e**mbedded action".
68
+ #
59
69
  # * EOF is explicit and is matched by `c_eof`. If you want to introspect
60
70
  # the state of the lexer, add this rule to the state:
61
71
  #
@@ -66,49 +76,53 @@
66
76
  # NoMethodError: undefined method `ord' for nil:NilClass
67
77
  #
68
78
 
69
- require 'parser/lexer_literal'
70
- require 'parser/syntax_error'
71
-
72
79
  class Parser::Lexer
73
80
 
74
81
  %% write data nofinal;
75
82
  # %
76
83
 
77
- attr_reader :source
84
+ attr_reader :source_buffer
85
+
86
+ attr_accessor :diagnostics
78
87
  attr_accessor :static_env
79
88
 
80
- attr_reader :location, :comments
89
+ attr_accessor :cond, :cmdarg
90
+
91
+ attr_reader :comments
81
92
 
82
93
  def initialize(version)
83
- @version = version
94
+ @version = version
95
+ @static_env = nil
84
96
 
85
97
  reset
86
98
  end
87
99
 
88
100
  def reset(reset_state=true)
101
+ # Ragel-related variables:
89
102
  if reset_state
90
103
  # Unit tests set state prior to resetting lexer.
91
- @cs = self.class.lex_en_line_begin
104
+ @cs = self.class.lex_en_line_begin
105
+
106
+ @cond = StackState.new('cond')
107
+ @cmdarg = StackState.new('cmdarg')
92
108
  end
93
109
 
94
- # Ragel-internal variables:
95
- @p = 0 # stream position (saved manually in #advance)
96
- @ts = nil # token start
97
- @te = nil # token end
98
- @act = 0 # next action
110
+ @p = 0 # stream position (saved manually in #advance)
111
+ @ts = nil # token start
112
+ @te = nil # token end
113
+ @act = 0 # next action
99
114
 
100
- @stack = [] # state stack
101
- @top = 0 # state stack top pointer
115
+ @stack = [] # state stack
116
+ @top = 0 # state stack top pointer
102
117
 
118
+ # Lexer state:
103
119
  @token_queue = []
104
120
  @literal_stack = []
105
121
 
106
- @newlines = [0] # sorted set of \n positions
107
- @newline_s = nil # location of last encountered newline
108
- @location = nil # location of last #advance'd token
109
-
110
122
  @comments = "" # collected comments
111
123
 
124
+ @newline_s = nil # location of last encountered newline
125
+
112
126
  @num_base = nil # last numeric base
113
127
  @num_digits_s = nil # starting position of numeric digits
114
128
 
@@ -125,15 +139,21 @@ class Parser::Lexer
125
139
  @lambda_stack = []
126
140
  end
127
141
 
128
- def source=(source)
129
- # Heredoc processing coupled with weird newline quirks
130
- # require three '\0' (EOF) chars to be appended; after
131
- # `p = @heredoc_s`, if `p` points at EOF, the FSM could
132
- # not bail out early enough and will crash.
133
- #
134
- # Patches accepted.
135
- #
136
- @source = source.gsub(/\r\n/, "\n") + "\0\0\0"
142
+ def source_buffer=(source_buffer)
143
+ @source_buffer = source_buffer
144
+
145
+ if @source_buffer
146
+ # Heredoc processing coupled with weird newline quirks
147
+ # require three '\0' (EOF) chars to be appended; after
148
+ # `p = @heredoc_s`, if `p` points at EOF, the FSM could
149
+ # not bail out early enough and will crash.
150
+ #
151
+ # Patches accepted.
152
+ #
153
+ @source = @source_buffer.source.gsub(/\r\n/, "\n") + "\0\0\0"
154
+ else
155
+ @source = nil
156
+ end
137
157
  end
138
158
 
139
159
  LEX_STATES = {
@@ -159,7 +179,7 @@ class Parser::Lexer
159
179
  # Return next token: [type, value].
160
180
  def advance
161
181
  if @token_queue.any?
162
- return with_location(@token_queue.shift)
182
+ return @token_queue.shift
163
183
  end
164
184
 
165
185
  # Ugly, but dependent on Ragel output. Consider refactoring it somehow.
@@ -183,24 +203,14 @@ class Parser::Lexer
183
203
  @p = p
184
204
 
185
205
  if @token_queue.any?
186
- with_location(@token_queue.shift)
206
+ @token_queue.shift
187
207
  elsif @cs == self.class.lex_error
188
- with_location([ false, '$undefined', p, p + 1 ])
208
+ [ false, [ '$error', range(p - 1, p) ] ]
189
209
  else
190
- with_location([ false, '$end', p, p + 1 ])
210
+ [ false, [ '$eof', range(p - 1, p) ] ]
191
211
  end
192
212
  end
193
213
 
194
- # Like #advance, but also pretty-print the token and its position
195
- # in the stream to `stdout`.
196
- def advance_and_decorate
197
- type, val = advance
198
-
199
- puts decorate(location, "\e[0;32m#{type} #{val.inspect}\e[0m")
200
-
201
- [type, val]
202
- end
203
-
204
214
  # Return the current collected comment block and clear the storage.
205
215
  def clear_comments
206
216
  comments = @comments
@@ -209,103 +219,42 @@ class Parser::Lexer
209
219
  comments
210
220
  end
211
221
 
212
- # Lex `str` for the Ruby version `version` with initial state `state`.
213
- #
214
- # The tokens displayed by this function are not the same as tokens
215
- # consumed by parser, because the parser manipulates lexer state on
216
- # its own.
217
- def self.do(source, state=nil, version=19)
218
- lex = new(version)
219
- lex.source = source
220
- lex.state = state if state
221
-
222
- loop do
223
- type, val = lex.advance_and_decorate
224
- break if !type
225
- end
226
-
227
- puts "Lex state: #{lex.state}"
228
- end
229
-
230
- # Used by LexerLiteral to emit tokens for string content.
231
- def emit(type, value = tok, s = @ts, e = @te)
232
- if s.nil? || e.nil?
233
- raise "broken #emit invocation in #{caller[0]}"
234
- end
235
-
236
- @token_queue << [ type, value, s, e ]
237
- end
238
-
239
- def emit_table(table, s = @ts, e = @te)
240
- token = tok(s, e)
241
- emit(table[token], token, s, e)
242
- end
243
-
244
- # shim
245
- def lineno
246
- @location[0] + 1
247
- end
248
-
249
222
  protected
250
223
 
251
224
  def eof_char?(char)
252
225
  [0x04, 0x1a, 0x00].include? char.ord
253
226
  end
254
227
 
255
- def ruby18?
256
- @version == 18
228
+ def version?(*versions)
229
+ versions.include?(@version)
257
230
  end
258
231
 
259
- def ruby19?
260
- @version == 19
232
+ def stack_pop
233
+ @top -= 1
234
+ @stack[@top]
261
235
  end
262
236
 
263
237
  def tok(s = @ts, e = @te)
264
238
  @source[s...e]
265
239
  end
266
240
 
267
- def record_newline(p)
268
- @newlines = (@newlines + [p]).uniq.sort
269
- end
270
-
271
- def dissect_location(start, finish)
272
- line_number = @newlines.rindex { |nl| start >= nl }
273
- line_first_col = @newlines[line_number]
274
-
275
- start_col = start - line_first_col
276
- finish_col = finish - line_first_col
277
-
278
- [ line_number, start_col, finish_col ]
241
+ def range(s = @ts, e = @te)
242
+ Parser::Source::Range.new(@source_buffer, s, e - 1)
279
243
  end
280
244
 
281
- def with_location(item)
282
- type, value, start, finish = *item
283
-
284
- @location = dissect_location(start, finish)
285
-
286
- [ type, value ]
245
+ def emit(type, value = tok, s = @ts, e = @te)
246
+ @token_queue << [ type, [ value, range(s, e) ] ]
287
247
  end
288
248
 
289
- def decorate(location, message="")
290
- line_number, from, to = location
291
-
292
- line = @source.lines.drop(line_number).first
293
- line[from...to] = "\e[4m#{line[from...to]}\e[0m"
294
-
295
- tail_len = to - from - 1
296
- tail = "~" * (tail_len >= 0 ? tail_len : 0)
297
- decoration = "#{" " * from}\e[1;31m^#{tail}\e[0m #{message}"
298
-
299
- [ line, decoration ]
300
- end
249
+ def emit_table(table, s = @ts, e = @te)
250
+ value = tok(s, e)
301
251
 
302
- def warning(message, start = @ts, finish = @te)
303
- $stderr.puts "warning: #{message}"
304
- $stderr.puts decorate(dissect_location(start, finish))
252
+ emit(table[value], value, s, e)
305
253
  end
306
254
 
307
- def error(message)
308
- raise Parser::SyntaxError, message
255
+ def diagnostic(type, message, location=range, highlights=[])
256
+ @diagnostics.process(
257
+ Parser::Diagnostic.new(type, message, location, highlights))
309
258
  end
310
259
 
311
260
  #
@@ -313,10 +262,10 @@ class Parser::Lexer
313
262
  #
314
263
 
315
264
  def push_literal(*args)
316
- new_literal = Parser::LexerLiteral.new(self, *args)
265
+ new_literal = Literal.new(self, *args)
317
266
  @literal_stack.push(new_literal)
318
267
 
319
- if new_literal.type == :tWORDS_BEG
268
+ if new_literal.type == :tWORDS_BEG
320
269
  self.class.lex_en_interp_words
321
270
  elsif new_literal.type == :tQWORDS_BEG
322
271
  self.class.lex_en_plain_words
@@ -328,7 +277,7 @@ class Parser::Lexer
328
277
  end
329
278
 
330
279
  def literal
331
- @literal_stack[-1]
280
+ @literal_stack.last
332
281
  end
333
282
 
334
283
  def pop_literal
@@ -361,7 +310,6 @@ class Parser::Lexer
361
310
  '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
362
311
  '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
363
312
  '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
364
- 'do' => :kDO
365
313
  }
366
314
 
367
315
  PUNCTUATION_BEGIN = {
@@ -407,7 +355,6 @@ class Parser::Lexer
407
355
  #
408
356
  # This action is embedded directly into c_nl, as it is idempotent and
409
357
  # there are no cases when we need to skip it.
410
- record_newline(p + 1)
411
358
  @newline_s = p
412
359
  }
413
360
 
@@ -514,8 +461,8 @@ class Parser::Lexer
514
461
 
515
462
  # Ruby accepts (and fails on) variables with leading digit
516
463
  # in literal context, but not in unquoted symbol body.
517
- class_var_v = '@@' [0-9]? bareword;
518
- instance_var_v = '@' [0-9]? bareword;
464
+ class_var_v = '@@' c_alnum+;
465
+ instance_var_v = '@' c_alnum+;
519
466
 
520
467
  #
521
468
  # === ESCAPE SEQUENCE PARSING ===
@@ -538,7 +485,12 @@ class Parser::Lexer
538
485
  codepoint = codepoint_str.to_i(16)
539
486
 
540
487
  if codepoint >= 0x110000
541
- @escape = lambda { error "invalid Unicode codepoint (too large)" }
488
+ @escape = lambda do
489
+ # TODO better location reporting
490
+ diagnostic :error, Parser::ERRORS[:unicode_point_too_large],
491
+ range(@escape_s, p)
492
+ end
493
+
542
494
  break
543
495
  end
544
496
 
@@ -551,30 +503,32 @@ class Parser::Lexer
551
503
  'a' => "\a", 'b' => "\b", 'e' => "\e", 'f' => "\f",
552
504
  'n' => "\n", 'r' => "\r", 's' => "\s", 't' => "\t",
553
505
  'v' => "\v", '\\' => "\\"
554
- }.fetch(@source[p - 1], @source[p - 1])
506
+ }.fetch(@source[p - 1].chr, @source[p - 1].chr)
555
507
  }
556
508
 
557
509
  action invalid_complex_escape {
558
- @escape = lambda { error "invalid escape character syntax" }
510
+ @escape = lambda do
511
+ diagnostic :error, Parser::ERRORS[:invalid_escape]
512
+ end
559
513
  }
560
514
 
561
515
  action slash_c_char {
562
- @escape = (@escape.ord & 0x9f).chr
516
+ @escape = (@escape[0].ord & 0x9f).chr
563
517
  }
564
518
 
565
519
  action slash_m_char {
566
- @escape = (@escape.ord | 0x80).chr
520
+ @escape = (@escape[0].ord | 0x80).chr
567
521
  }
568
522
 
569
523
  maybe_escaped_char = (
570
524
  '\\' c_any %unescape_char
571
- | ( c_any - [\\] ) % { @escape = @source[p - 1] }
525
+ | ( c_any - [\\] ) % { @escape = @source[p - 1].chr }
572
526
  );
573
527
 
574
528
  maybe_escaped_ctrl_char = ( # why?!
575
529
  '\\' c_any %unescape_char %slash_c_char
576
530
  | '?' % { @escape = "\x7f" }
577
- | ( c_any - [\\?] ) % { @escape = @source[p - 1] } %slash_c_char
531
+ | ( c_any - [\\?] ) % { @escape = @source[p - 1].chr } %slash_c_char
578
532
  );
579
533
 
580
534
  escape = (
@@ -592,7 +546,12 @@ class Parser::Lexer
592
546
 
593
547
  # %q[\x]
594
548
  | 'x' ( c_any - xdigit )
595
- % { @escape = lambda { error "invalid hex escape" } }
549
+ % {
550
+ @escape = lambda do
551
+ diagnostic :error, Parser::ERRORS[:invalid_hex_escape],
552
+ range(@escape_s - 1, p + 2)
553
+ end
554
+ }
596
555
 
597
556
  # %q[\u123] %q[\u{12]
598
557
  | 'u' ( c_any{0,4} -
@@ -602,7 +561,12 @@ class Parser::Lexer
602
561
  | '{' xdigit{2} [ \t}] # \u{12. \u{12} are valid
603
562
  )
604
563
  )
605
- % { @escape = lambda { error "invalid Unicode escape" } }
564
+ % {
565
+ @escape = lambda do
566
+ diagnostic :error, Parser::ERRORS[:invalid_unicode_escape],
567
+ range(@escape_s - 1, p)
568
+ end
569
+ }
606
570
 
607
571
  # \u{123 456}
608
572
  | 'u{' ( xdigit{1,6} [ \t] )*
@@ -611,7 +575,12 @@ class Parser::Lexer
611
575
  | ( xdigit* ( c_any - xdigit - '}' )+ '}'
612
576
  | ( c_any - '}' )* c_eof
613
577
  | xdigit{7,}
614
- ) % { @escape = lambda { error "unterminated Unicode escape" } }
578
+ ) % {
579
+ @escape = lambda do
580
+ diagnostic :fatal, Parser::ERRORS[:unterminated_unicode],
581
+ range(p - 1, p)
582
+ end
583
+ }
615
584
  )
616
585
 
617
586
  # \C-\a \cx
@@ -635,7 +604,10 @@ class Parser::Lexer
635
604
 
636
605
  | ( c_any - [0-7xuCMc] ) %unescape_char
637
606
 
638
- | c_eof % { error "escape sequence meets end of file" }
607
+ | c_eof % {
608
+ diagnostic :fatal, Parser::ERRORS[:escape_eof],
609
+ range(p - 1, p)
610
+ }
639
611
  );
640
612
 
641
613
  # Use rules in form of `e_bs escape' when you need to parse a sequence.
@@ -666,7 +638,7 @@ class Parser::Lexer
666
638
  # of positions in the input stream, namely @heredoc_e
667
639
  # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
668
640
  #
669
- # @heredoc_e is simply contained inside the corresponding LexerLiteral, and
641
+ # @heredoc_e is simply contained inside the corresponding Literal, and
670
642
  # when the heredoc is closed, the lexing is restarted from that position.
671
643
  #
672
644
  # @herebody_s is quite more complex. First, @herebody_s changes after each
@@ -683,14 +655,14 @@ class Parser::Lexer
683
655
  # After every heredoc was parsed, @herebody_s contains the
684
656
  # position of next token after all heredocs.
685
657
  if @herebody_s
686
- p = @herebody_s
658
+ p = @herebody_s - 1
687
659
  @herebody_s = nil
688
660
  end
689
661
  };
690
662
 
691
663
  action extend_string {
692
664
  if literal.nest_and_try_closing tok, @ts, @te
693
- fgoto *pop_literal;
665
+ fnext *pop_literal; fbreak;
694
666
  else
695
667
  literal.extend_string tok, @ts, @te
696
668
  end
@@ -701,10 +673,10 @@ class Parser::Lexer
701
673
  # If the literal is actually closed by the backslash,
702
674
  # rewind the input prior to consuming the escape sequence.
703
675
  p = @escape_s - 1
704
- fgoto *pop_literal;
676
+ fnext *pop_literal; fbreak;
705
677
  else
706
678
  # Get the first character after the backslash.
707
- escaped_char = @source[@escape_s]
679
+ escaped_char = @source[@escape_s].chr
708
680
 
709
681
  if literal.munge_escape? escaped_char
710
682
  # If this particular literal uses this character as an opening
@@ -765,11 +737,12 @@ class Parser::Lexer
765
737
  end
766
738
 
767
739
  if is_eof
768
- error "unterminated string meets end of file"
740
+ diagnostic :fatal, Parser::ERRORS[:string_eof],
741
+ range(literal.str_s, literal.str_s + 1)
769
742
  end
770
743
 
771
744
  # A literal newline is appended if the heredoc was _not_ closed
772
- # this time. See also LexerLiteral#nest_and_try_closing for rationale of
745
+ # this time. See also Literal#nest_and_try_closing for rationale of
773
746
  # calling #flush_string here.
774
747
  literal.extend_string tok, @ts, @te
775
748
  literal.flush_string
@@ -782,8 +755,7 @@ class Parser::Lexer
782
755
  # Interpolations with immediate variable names simply call into
783
756
  # the corresponding machine.
784
757
 
785
- interp_var =
786
- '#' ( global_var | class_var_v | instance_var_v );
758
+ interp_var = '#' ( global_var | class_var_v | instance_var_v );
787
759
 
788
760
  action extend_interp_var {
789
761
  literal.flush_string
@@ -808,6 +780,8 @@ class Parser::Lexer
808
780
  interp_code = '#{';
809
781
 
810
782
  e_lbrace = '{' % {
783
+ @cond.push(false); @cmdarg.push(false)
784
+
811
785
  if literal
812
786
  literal.start_interp_brace
813
787
  end
@@ -827,7 +801,7 @@ class Parser::Lexer
827
801
  end
828
802
 
829
803
  fhold;
830
- fnext *@stack.pop;
804
+ fnext *stack_pop;
831
805
  fbreak;
832
806
  end
833
807
  end
@@ -872,6 +846,7 @@ class Parser::Lexer
872
846
  *|;
873
847
 
874
848
  plain_string := |*
849
+ '\\' c_nl => extend_string_eol;
875
850
  e_bs c_any => extend_string_escaped;
876
851
  c_eol => extend_string_eol;
877
852
  c_any => extend_string;
@@ -882,11 +857,12 @@ class Parser::Lexer
882
857
  => {
883
858
  unknown_options = tok.scan(/[^imxouesn]/)
884
859
  if unknown_options.any?
885
- error "unknown regexp options: #{unknown_options.join}"
860
+ message = Parser::ERRORS[:regexp_options] % { :options => unknown_options.join }
861
+ diagnostic :error, message
886
862
  end
887
863
 
888
864
  emit(:tREGEXP_OPT)
889
- fgoto expr_end;
865
+ fnext expr_end; fbreak;
890
866
  };
891
867
 
892
868
  any
@@ -904,11 +880,17 @@ class Parser::Lexer
904
880
  # The default longest-match scanning does not work here due
905
881
  # to sheer ambiguity.
906
882
 
883
+ ambiguous_fid_suffix = # actual parsed
884
+ [?!] %{ tm = p } | # a? a?
885
+ '!=' %{ tm = p - 2 } # a!=b a != b
886
+ ;
887
+
907
888
  ambiguous_ident_suffix = # actual parsed
908
- [?!=] %{ tm = p } | # a? a?
909
- '==' %{ tm = p - 2 } | # a==b a == b
910
- '=~' %{ tm = p - 2 } | # a=~b a =~ b
911
- '=>' %{ tm = p - 2 } | # a=>b a => b
889
+ ambiguous_fid_suffix |
890
+ '=' %{ tm = p } | # a= a=
891
+ '==' %{ tm = p - 2 } | # a==b a == b
892
+ '=~' %{ tm = p - 2 } | # a=~b a =~ b
893
+ '=>' %{ tm = p - 2 } | # a=>b a => b
912
894
  '===' %{ tm = p - 3 } # a===b a === b
913
895
  ;
914
896
 
@@ -922,15 +904,24 @@ class Parser::Lexer
922
904
  '::' %{ tm = p - 2 } # A::B A :: B
923
905
  ;
924
906
 
907
+ # Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embegging
908
+ # @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
909
+
910
+ e_lbrack = '[' % {
911
+ @cond.push(false); @cmdarg.push(false)
912
+ };
913
+
925
914
  # Ruby 1.9 lambdas require parentheses counting in order to
926
915
  # emit correct opening kDO/tLBRACE.
927
916
 
928
917
  e_lparen = '(' % {
929
- @paren_nest += 1
918
+ @cond.push(false); @cmdarg.push(false)
919
+
920
+ @paren_nest += 1
930
921
  };
931
922
 
932
923
  e_rparen = ')' % {
933
- @paren_nest -= 1
924
+ @paren_nest -= 1
934
925
  };
935
926
 
936
927
  # Variable lexing code is accessed from both expressions and
@@ -940,30 +931,36 @@ class Parser::Lexer
940
931
  global_var
941
932
  => {
942
933
  if tok =~ /^\$([1-9][0-9]*)$/
943
- emit(:tNTH_REF, $1.to_i)
934
+ emit(:tNTH_REF, tok(@ts + 1).to_i)
944
935
  elsif tok =~ /^\$([&`'+])$/
945
- emit(:tBACK_REF, $1.to_sym)
936
+ emit(:tBACK_REF)
946
937
  else
947
938
  emit(:tGVAR)
948
939
  end
949
940
 
950
- fnext *@stack.pop; fbreak;
941
+ fnext *stack_pop; fbreak;
951
942
  };
952
943
 
953
944
  class_var_v
954
945
  => {
955
- error "`#{tok}' is not allowed as a class variable name" if tok =~ /^@@[0-9]/
946
+ if tok =~ /^@@[0-9]/
947
+ message = Parser::ERRORS[:cvar_name] % { :name => tok }
948
+ diagnostic :error, message
949
+ end
956
950
 
957
951
  emit(:tCVAR)
958
- fnext *@stack.pop; fbreak;
952
+ fnext *stack_pop; fbreak;
959
953
  };
960
954
 
961
955
  instance_var_v
962
956
  => {
963
- error "`#{tok}' is not allowed as an instance variable name" if tok =~ /^@[0-9]/
957
+ if tok =~ /^@[0-9]/
958
+ message = Parser::ERRORS[:ivar_name] % { :name => tok }
959
+ diagnostic :error, message
960
+ end
964
961
 
965
962
  emit(:tIVAR)
966
- fnext *@stack.pop; fbreak;
963
+ fnext *stack_pop; fbreak;
967
964
  };
968
965
  *|;
969
966
 
@@ -996,11 +993,11 @@ class Parser::Lexer
996
993
  fnext expr_end; fbreak; };
997
994
 
998
995
  ':'
999
- => { fhold; fgoto expr_end; };
996
+ => { fhold; fgoto expr_beg; };
1000
997
 
1001
998
  global_var
1002
- => { emit(:tGVAR)
1003
- fbreak; };
999
+ => { p = @ts - 1
1000
+ fcall expr_variable; };
1004
1001
 
1005
1002
  c_space_nl+;
1006
1003
 
@@ -1015,12 +1012,16 @@ class Parser::Lexer
1015
1012
  # Transitions to `expr_arg` afterwards.
1016
1013
  #
1017
1014
  expr_dot := |*
1018
- bareword
1015
+ constant
1016
+ => { emit(:tCONSTANT)
1017
+ fnext expr_arg; fbreak; };
1018
+
1019
+ call_or_var
1019
1020
  => { emit(:tIDENTIFIER)
1020
1021
  fnext expr_arg; fbreak; };
1021
1022
 
1022
- bareword ambiguous_ident_suffix
1023
- => { emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
1023
+ call_or_var ambiguous_ident_suffix
1024
+ => { emit(:tFID, tok(@ts, tm), @ts, tm)
1024
1025
  fnext expr_arg; p = tm - 1; fbreak; };
1025
1026
 
1026
1027
  operator_fname |
@@ -1031,6 +1032,8 @@ class Parser::Lexer
1031
1032
 
1032
1033
  c_space_nl+;
1033
1034
 
1035
+ '#' c_line* c_nl;
1036
+
1034
1037
  c_any
1035
1038
  => { fhold; fgoto expr_end; };
1036
1039
 
@@ -1059,8 +1062,8 @@ class Parser::Lexer
1059
1062
 
1060
1063
  # meth [...]
1061
1064
  # Array argument. Compare with indexing `meth[...]`.
1062
- c_space+ '['
1063
- => { emit(:tLBRACK, '[', @te - 1, @te);
1065
+ c_space+ e_lbrack
1066
+ => { emit(:tLBRACK, '[', @te - 1, @te)
1064
1067
  fnext expr_beg; fbreak; };
1065
1068
 
1066
1069
  # cmd {}
@@ -1076,12 +1079,6 @@ class Parser::Lexer
1076
1079
  end
1077
1080
  };
1078
1081
 
1079
- # a.b
1080
- # Dot-call.
1081
- '.' | '::'
1082
- => { emit_table(PUNCTUATION);
1083
- fnext expr_dot; fbreak; };
1084
-
1085
1082
  #
1086
1083
  # AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
1087
1084
  #
@@ -1091,11 +1088,22 @@ class Parser::Lexer
1091
1088
  c_space+ '?'
1092
1089
  => { fhold; fgoto expr_beg; };
1093
1090
 
1091
+ # a %{1}, a %[1] (but not "a %=1=" or "a % foo")
1092
+ c_space+ ( '%' [^= ]
1093
+ # a /foo/ (but not "a / foo" or "a /=foo")
1094
+ | '/' ( c_any - c_space_nl - '=' )
1095
+ # a <<HEREDOC
1096
+ | '<<'
1097
+ )
1098
+ => { fhold; fhold; fgoto expr_beg; };
1099
+
1094
1100
  # x +1
1095
1101
  # Ambiguous unary operator or regexp literal.
1096
1102
  c_space+ [+\-/]
1097
1103
  => {
1098
- warning "ambiguous first argument; put parentheses or even spaces", @te - 1, @te
1104
+ diagnostic :warning, Parser::ERRORS[:ambiguous_literal],
1105
+ range(@te - 1, @te)
1106
+
1099
1107
  fhold; fhold; fgoto expr_beg;
1100
1108
  };
1101
1109
 
@@ -1103,11 +1111,23 @@ class Parser::Lexer
1103
1111
  # Ambiguous splat or block-pass.
1104
1112
  c_space+ [*&]
1105
1113
  => {
1106
- what = tok(@te - 1, @te)
1107
- warning "`#{what}' interpreted as argument prefix", @te - 1, @te
1114
+ message = Parser::ERRORS[:ambiguous_prefix] % { :prefix => tok(@te - 1, @te) }
1115
+ diagnostic :warning, message,
1116
+ range(@te - 1, @te)
1117
+
1108
1118
  fhold; fgoto expr_beg;
1109
1119
  };
1110
1120
 
1121
+ # x ::Foo
1122
+ # Ambiguous toplevel constant access.
1123
+ c_space+ '::'
1124
+ => { fhold; fhold; fgoto expr_beg; };
1125
+
1126
+ # x:b
1127
+ # Symbol.
1128
+ c_space* ':'
1129
+ => { fhold; fgoto expr_beg; };
1130
+
1111
1131
  #
1112
1132
  # AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
1113
1133
  #
@@ -1129,7 +1149,7 @@ class Parser::Lexer
1129
1149
  fgoto expr_end;
1130
1150
  };
1131
1151
 
1132
- c_space* c_nl
1152
+ c_space* ( '#' c_line* )? c_nl
1133
1153
  => { fhold; fgoto expr_end; };
1134
1154
 
1135
1155
  c_any
@@ -1152,8 +1172,7 @@ class Parser::Lexer
1152
1172
  # `{` as `tLBRACE_ARG`.
1153
1173
  #
1154
1174
  # The default post-`expr_endarg` state is `expr_end`, so this state also handles
1155
- # `do` (as `kDO_BLOCK` in `expr_beg`). (I have no clue why the parser cannot
1156
- # just handle `kDO`.)
1175
+ # `do` (as `kDO_BLOCK` in `expr_beg`).
1157
1176
  expr_endarg := |*
1158
1177
  e_lbrace
1159
1178
  => { emit(:tLBRACE_ARG)
@@ -1183,6 +1202,8 @@ class Parser::Lexer
1183
1202
 
1184
1203
  c_space+;
1185
1204
 
1205
+ '#' c_line*;
1206
+
1186
1207
  c_nl
1187
1208
  => { fhold; fgoto expr_end; };
1188
1209
 
@@ -1221,39 +1242,32 @@ class Parser::Lexer
1221
1242
  # STRING AND REGEXP LITERALS
1222
1243
  #
1223
1244
 
1224
- # a / 42
1225
- # a % 42
1226
- # a %= 42 (disambiguation with %=string=)
1227
- [/%] c_space_nl | '%=' # /
1228
- => {
1229
- fhold; fhold;
1230
- fgoto expr_end;
1231
- };
1232
-
1233
1245
  # /regexp/oui
1234
- '/'
1246
+ # /=/ (disambiguation with /=)
1247
+ '/' c_any
1235
1248
  => {
1236
- type, delimiter = tok, tok
1237
- fgoto *push_literal(type, delimiter, @ts);
1249
+ type = delimiter = tok[0].chr
1250
+ fhold; fgoto *push_literal(type, delimiter, @ts);
1238
1251
  };
1239
1252
 
1240
1253
  # %<string>
1241
1254
  '%' ( c_any - [A-Za-z] )
1242
1255
  => {
1243
- type, delimiter = tok[0], tok[-1]
1256
+ type, delimiter = tok[0].chr, tok[-1].chr
1244
1257
  fgoto *push_literal(type, delimiter, @ts);
1245
1258
  };
1246
1259
 
1247
1260
  # %w(we are the people)
1248
1261
  '%' [A-Za-z]+ c_any
1249
1262
  => {
1250
- type, delimiter = tok[0..-2], tok[-1]
1263
+ type, delimiter = tok[0..-2], tok[-1].chr
1251
1264
  fgoto *push_literal(type, delimiter, @ts);
1252
1265
  };
1253
1266
 
1254
1267
  '%' c_eof
1255
1268
  => {
1256
- error "unterminated string meets end of file"
1269
+ diagnostic :fatal, Parser::ERRORS[:string_eof],
1270
+ range(@ts, @ts + 1)
1257
1271
  };
1258
1272
 
1259
1273
  # Heredoc start.
@@ -1280,6 +1294,31 @@ class Parser::Lexer
1280
1294
  p = @herebody_s - 1
1281
1295
  };
1282
1296
 
1297
+ #
1298
+ # SYMBOL LITERALS
1299
+ #
1300
+
1301
+ # :"bar", :'baz'
1302
+ ':' ['"] # '
1303
+ => {
1304
+ type, delimiter = tok, tok[-1].chr
1305
+ fgoto *push_literal(type, delimiter, @ts);
1306
+ };
1307
+
1308
+ ':' bareword ambiguous_symbol_suffix
1309
+ => {
1310
+ emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
1311
+ p = tm - 1
1312
+ fnext expr_end; fbreak;
1313
+ };
1314
+
1315
+ ':' ( bareword | global_var | class_var | instance_var |
1316
+ operator_fname | operator_arithmetic | operator_rest )
1317
+ => {
1318
+ emit(:tSYMBOL, tok(@ts + 1), @ts)
1319
+ fnext expr_end; fbreak;
1320
+ };
1321
+
1283
1322
  #
1284
1323
  # AMBIGUOUS TERNARY OPERATOR
1285
1324
  #
@@ -1293,20 +1332,22 @@ class Parser::Lexer
1293
1332
 
1294
1333
  value = @escape || tok(@ts + 1)
1295
1334
 
1296
- if ruby18?
1297
- emit(:tINTEGER, value.ord)
1335
+ if version?(18)
1336
+ emit(:tINTEGER, value[0].ord)
1298
1337
  else
1299
1338
  emit(:tSTRING, value)
1300
1339
  end
1301
1340
 
1302
- fbreak;
1341
+ fnext expr_end; fbreak;
1303
1342
  };
1304
1343
 
1305
1344
  '?' c_space_nl
1306
1345
  => {
1307
1346
  escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
1308
1347
  "\v" => '\v', "\f" => '\f' }[tok[@ts + 1]]
1309
- warning "invalid character syntax; use ?#{escape}", @ts
1348
+ message = Parser::ERRORS[:invalid_escape_use] % { :escape => escape }
1349
+ diagnostic :warning, message,
1350
+ range(@ts, @ts + 1)
1310
1351
 
1311
1352
  p = @ts - 1
1312
1353
  fgoto expr_end;
@@ -1314,7 +1355,8 @@ class Parser::Lexer
1314
1355
 
1315
1356
  '?' c_eof
1316
1357
  => {
1317
- error "incomplete character syntax"
1358
+ diagnostic :fatal, Parser::ERRORS[:incomplete_escape],
1359
+ range(@ts, @ts + 1)
1318
1360
  };
1319
1361
 
1320
1362
  # f ?aa : b: Disambiguate with a character literal.
@@ -1328,15 +1370,20 @@ class Parser::Lexer
1328
1370
  # KEYWORDS AND PUNCTUATION
1329
1371
  #
1330
1372
 
1331
- # a(+b)
1332
- punctuation_begin |
1373
+ # a([1, 2])
1374
+ e_lbrack |
1333
1375
  # a({b=>c})
1334
- e_lbrace |
1376
+ e_lbrace |
1335
1377
  # a()
1336
1378
  e_lparen
1337
1379
  => { emit_table(PUNCTUATION_BEGIN)
1338
1380
  fbreak; };
1339
1381
 
1382
+ # a(+b)
1383
+ punctuation_begin
1384
+ => { emit_table(PUNCTUATION_BEGIN)
1385
+ fbreak; };
1386
+
1340
1387
  # rescue Exception => e: Block rescue.
1341
1388
  # Special because it should transition to expr_mid.
1342
1389
  'rescue'
@@ -1356,7 +1403,7 @@ class Parser::Lexer
1356
1403
  => {
1357
1404
  fhold;
1358
1405
 
1359
- if ruby18?
1406
+ if version?(18)
1360
1407
  emit(:tIDENTIFIER, tok(@ts, @te - 2), @ts, @te - 2)
1361
1408
  fhold; # continue as a symbol
1362
1409
  else
@@ -1383,14 +1430,32 @@ class Parser::Lexer
1383
1430
  => {
1384
1431
  emit(:tIDENTIFIER)
1385
1432
 
1386
- if @static_env && @static_env.declared?(tok.to_sym)
1387
- fgoto expr_end;
1433
+ if @static_env && @static_env.declared?(tok)
1434
+ fnext expr_end; fbreak;
1388
1435
  else
1389
- fgoto expr_arg;
1436
+ fnext expr_arg; fbreak;
1390
1437
  end
1391
1438
  };
1392
1439
 
1393
- c_space_nl+;
1440
+ #
1441
+ # WHITESPACE
1442
+ #
1443
+
1444
+ c_space_nl;
1445
+
1446
+ '\\\n';
1447
+
1448
+ '#' c_line* c_eol
1449
+ => { @comments << tok
1450
+ fhold; };
1451
+
1452
+ c_nl '=begin' ( c_space | c_eol )
1453
+ => { p = @ts - 1
1454
+ fgoto line_begin; };
1455
+
1456
+ #
1457
+ # DEFAULT TRANSITION
1458
+ #
1394
1459
 
1395
1460
  # The following rules match most binary and all unary operators.
1396
1461
  # Rules for binary operators provide better error reporting.
@@ -1439,11 +1504,21 @@ class Parser::Lexer
1439
1504
 
1440
1505
  if tok == '{'
1441
1506
  emit(:tLAMBEG)
1442
- else
1507
+ else # 'do'
1443
1508
  emit(:kDO_LAMBDA)
1444
1509
  end
1445
1510
  else
1446
- emit_table(PUNCTUATION)
1511
+ if tok == '{'
1512
+ emit_table(PUNCTUATION)
1513
+ else # 'do'
1514
+ if @cond.active?
1515
+ emit(:kDO_COND)
1516
+ elsif @cmdarg.active?
1517
+ emit(:kDO_BLOCK)
1518
+ else
1519
+ emit(:kDO)
1520
+ end
1521
+ end
1447
1522
  end
1448
1523
 
1449
1524
  fnext expr_value; fbreak;
@@ -1457,7 +1532,7 @@ class Parser::Lexer
1457
1532
  => { emit_table(KEYWORDS)
1458
1533
  fnext expr_fname; fbreak; };
1459
1534
 
1460
- 'class' c_space_nl '<<'
1535
+ 'class' c_space_nl* '<<'
1461
1536
  => { emit(:kCLASS, 'class', @ts, @ts + 5)
1462
1537
  emit(:tLSHFT, '<<', @te - 2, @te)
1463
1538
  fnext expr_beg; fbreak; };
@@ -1480,13 +1555,23 @@ class Parser::Lexer
1480
1555
  => {
1481
1556
  emit_table(KEYWORDS)
1482
1557
 
1483
- if ruby18? && tok == 'not'
1558
+ if version?(18) && tok == 'not'
1484
1559
  fnext expr_beg; fbreak;
1485
1560
  else
1486
1561
  fnext expr_arg; fbreak;
1487
1562
  end
1488
1563
  };
1489
1564
 
1565
+ '__ENCODING__'
1566
+ => {
1567
+ if version?(18)
1568
+ emit(:tIDENTIFIER)
1569
+ else
1570
+ emit_table(KEYWORDS)
1571
+ end
1572
+ fbreak;
1573
+ };
1574
+
1490
1575
  keyword_with_end
1491
1576
  => { emit_table(KEYWORDS)
1492
1577
  fbreak; };
@@ -1503,7 +1588,8 @@ class Parser::Lexer
1503
1588
  ( digit+ '_' )* digit* '_'?
1504
1589
  | '0' [Bb] %{ @num_base = 2; @num_digits_s = p }
1505
1590
  ( [01]+ '_' )* [01]* '_'?
1506
- | [1-9] %{ @num_base = 10; @num_digits_s = @ts }
1591
+ | [1-9] digit*
1592
+ %{ @num_base = 10; @num_digits_s = @ts }
1507
1593
  ( '_' digit+ )* digit* '_'?
1508
1594
  | '0' %{ @num_base = 8; @num_digits_s = @ts }
1509
1595
  ( '_' digit+ )* digit* '_'?
@@ -1512,14 +1598,17 @@ class Parser::Lexer
1512
1598
  digits = tok(@num_digits_s)
1513
1599
 
1514
1600
  if digits.end_with? '_'
1515
- error "trailing `_' in number"
1516
- elsif digits.empty? && @num_base == 8 && ruby18?
1601
+ diagnostic :error, Parser::ERRORS[:trailing_underscore],
1602
+ range(@te - 1, @te)
1603
+ elsif digits.empty? && @num_base == 8 && version?(18)
1517
1604
  # 1.8 did not raise an error on 0o.
1518
1605
  digits = "0"
1519
1606
  elsif digits.empty?
1520
- error "numeric literal without digits"
1521
- elsif @num_base == 8 && digits =~ /[89]/
1522
- error "invalid octal digit"
1607
+ diagnostic :error, Parser::ERRORS[:empty_numeric]
1608
+ elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
1609
+ invalid_s = @num_digits_s + invalid_idx
1610
+ diagnostic :error, Parser::ERRORS[:invalid_octal],
1611
+ range(invalid_s, invalid_s + 1)
1523
1612
  end
1524
1613
 
1525
1614
  emit(:tINTEGER, digits.to_i(@num_base))
@@ -1537,7 +1626,7 @@ class Parser::Lexer
1537
1626
  )
1538
1627
  => {
1539
1628
  if tok.start_with? '.'
1540
- error "no .<digit> floating literal anymore; put 0 before dot"
1629
+ diagnostic :error, Parser::ERRORS[:no_dot_digit_literal]
1541
1630
  elsif tok =~ /^[eE]/
1542
1631
  # The rule above allows to specify floats as just `e10', which is
1543
1632
  # certainly not a float. Send a patch if you can do this better.
@@ -1550,25 +1639,16 @@ class Parser::Lexer
1550
1639
  };
1551
1640
 
1552
1641
  #
1553
- # SYMBOL LITERALS
1642
+ # STRING AND XSTRING LITERALS
1554
1643
  #
1555
1644
 
1556
- # `echo foo` | :"bar" | :'baz'
1557
- '`' | ':'? ['"] # '
1645
+ # `echo foo`, "bar", 'baz'
1646
+ '`' | ['"] # '
1558
1647
  => {
1559
- type, delimiter = tok, tok[-1]
1648
+ type, delimiter = tok, tok[-1].chr
1560
1649
  fgoto *push_literal(type, delimiter, @ts);
1561
1650
  };
1562
1651
 
1563
- ':' bareword ambiguous_symbol_suffix
1564
- => { emit(:tSYMBOL, tok(@ts + 1, tm))
1565
- p = tm - 1; fbreak; };
1566
-
1567
- ':' ( bareword | global_var | class_var | instance_var |
1568
- operator_fname | operator_arithmetic | operator_rest )
1569
- => { emit(:tSYMBOL, tok(@ts + 1))
1570
- fbreak; };
1571
-
1572
1652
  #
1573
1653
  # CONSTANTS AND VARIABLES
1574
1654
  #
@@ -1578,7 +1658,7 @@ class Parser::Lexer
1578
1658
  fbreak; };
1579
1659
 
1580
1660
  constant ambiguous_const_suffix
1581
- => { emit(:tCONSTANT, tok(@ts, tm))
1661
+ => { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
1582
1662
  p = tm - 1; fbreak; };
1583
1663
 
1584
1664
  global_var | class_var_v | instance_var_v
@@ -1588,7 +1668,7 @@ class Parser::Lexer
1588
1668
  # METHOD CALLS
1589
1669
  #
1590
1670
 
1591
- '.'
1671
+ '.' | '::'
1592
1672
  => { emit_table(PUNCTUATION)
1593
1673
  fnext expr_dot; fbreak; };
1594
1674
 
@@ -1596,8 +1676,9 @@ class Parser::Lexer
1596
1676
  => { emit(:tIDENTIFIER)
1597
1677
  fnext expr_arg; fbreak; };
1598
1678
 
1599
- call_or_var [?!]
1600
- => { emit(:tFID)
1679
+ call_or_var ambiguous_fid_suffix
1680
+ => { emit(:tFID, tok(@ts, tm), @ts, tm)
1681
+ p = tm - 1
1601
1682
  fnext expr_arg; fbreak; };
1602
1683
 
1603
1684
  #
@@ -1613,6 +1694,7 @@ class Parser::Lexer
1613
1694
 
1614
1695
  e_rbrace | e_rparen | ']'
1615
1696
  => { emit_table(PUNCTUATION)
1697
+ @cond.lexpop; @cmdarg.lexpop
1616
1698
  fbreak; };
1617
1699
 
1618
1700
  operator_arithmetic '='
@@ -1623,6 +1705,10 @@ class Parser::Lexer
1623
1705
  => { emit_table(PUNCTUATION)
1624
1706
  fnext expr_value; fbreak; };
1625
1707
 
1708
+ e_lbrack
1709
+ => { emit_table(PUNCTUATION)
1710
+ fnext expr_beg; fbreak; };
1711
+
1626
1712
  punctuation_end
1627
1713
  => { emit_table(PUNCTUATION)
1628
1714
  fnext expr_beg; fbreak; };
@@ -1632,11 +1718,16 @@ class Parser::Lexer
1632
1718
  #
1633
1719
 
1634
1720
  '\\' e_heredoc_nl;
1635
- '\\' ( any - c_nl ) {
1636
- error "bare backslash only allowed before newline"
1721
+
1722
+ '\\' c_line {
1723
+ diagnostic :error, Parser::ERRORS[:bare_backslash],
1724
+ range(@ts, @ts + 1)
1725
+ fhold;
1637
1726
  };
1638
1727
 
1639
- '#' ( c_any - c_nl )*
1728
+ c_space+;
1729
+
1730
+ '#' c_line*
1640
1731
  => { @comments << tok(@ts, @te + 1) };
1641
1732
 
1642
1733
  e_heredoc_nl
@@ -1646,11 +1737,10 @@ class Parser::Lexer
1646
1737
  => { emit_table(PUNCTUATION)
1647
1738
  fnext expr_value; fbreak; };
1648
1739
 
1649
- c_space+;
1650
-
1651
1740
  c_any
1652
1741
  => {
1653
- error "unexpected #{tok.inspect}"
1742
+ message = Parser::ERRORS[:unexpected] % { :character => tok.inspect }
1743
+ diagnostic :fatal, message
1654
1744
  };
1655
1745
 
1656
1746
  c_eof => do_eof;
@@ -1681,10 +1771,10 @@ class Parser::Lexer
1681
1771
  c_line* c_nl
1682
1772
  => { @comments << tok };
1683
1773
 
1684
- any
1774
+ c_eof
1685
1775
  => {
1686
- @comments = ""
1687
- error "embedded document meats end of file (and they embark on a romantic journey)"
1776
+ # TODO better location information here
1777
+ diagnostic :fatal, Parser::ERRORS[:embedded_document], range(p - 1, p)
1688
1778
  };
1689
1779
  *|;
1690
1780