ruby_parser 3.13.0 → 3.15.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,5 @@
1
1
  # encoding: ASCII-8BIT
2
+ # TODO: remove
2
3
 
3
4
  require "sexp"
4
5
  require "ruby_lexer"
@@ -6,13 +7,50 @@ require "timeout"
6
7
  require "rp_extensions"
7
8
  require "rp_stringscanner"
8
9
 
10
+ class Sexp
11
+ def check_line_numbers
12
+ raise "bad nil line for:\n%s" % [self.pretty_inspect] if nil_line?
13
+ raise "bad line number for:\n%s" % [self.pretty_inspect] unless
14
+ Integer === self.line &&
15
+ self.line >= 1 &&
16
+ self.line <= self.line_min
17
+ end
18
+
19
+ ##
20
+ # Returns the maximum line number of the children of self.
21
+
22
+ def line_min
23
+ @line_min ||= [self.deep_each.map(&:line).min, self.line].compact.min
24
+ end
25
+
26
+ def nil_line?
27
+ self.deep_each.map(&:line).any?(&:nil?)
28
+ end
29
+ end
30
+
9
31
  module RubyParserStuff
10
- VERSION = "3.13.0"
32
+ VERSION = "3.15.0"
11
33
 
12
34
  attr_accessor :lexer, :in_def, :in_single, :file
13
35
  attr_accessor :in_kwarg
14
36
  attr_reader :env, :comments
15
37
 
38
+ ##
39
+ # Canonicalize conditionals. Eg:
40
+ #
41
+ # not x ? a : b
42
+ #
43
+ # becomes:
44
+ #
45
+ # x ? b : a
46
+
47
+ attr_accessor :canonicalize_conditions
48
+
49
+ ##
50
+ # The last token type returned from #next_token
51
+
52
+ attr_accessor :last_token_type
53
+
16
54
  $good20 = []
17
55
 
18
56
  %w[
@@ -31,6 +69,28 @@ module RubyParserStuff
31
69
  end
32
70
  end
33
71
 
72
+ ##
73
+ # for pure ruby systems only
74
+
75
+ def do_parse
76
+ _racc_do_parse_rb(_racc_setup, false)
77
+ end if ENV["PURE_RUBY"] || ENV["CHECK_LINE_NUMS"]
78
+
79
+ if ENV["CHECK_LINE_NUMS"] then
80
+ def _racc_do_reduce arg, act
81
+ x = super
82
+
83
+ @racc_vstack.grep(Sexp).each do |sexp|
84
+ sexp.check_line_numbers
85
+ end
86
+ x
87
+ end
88
+ end
89
+
90
+ ARG_TYPES = [:arglist, :call_args, :array, :args].map { |k|
91
+ [k, true]
92
+ }.to_h
93
+
34
94
  has_enc = "".respond_to? :encoding
35
95
 
36
96
  # This is in sorted order of occurrence according to
@@ -48,85 +108,52 @@ module RubyParserStuff
48
108
  Encoding::EUC_JP
49
109
  ] if has_enc
50
110
 
51
- def syntax_error msg
52
- raise RubyParser::SyntaxError, msg
53
- end
111
+ JUMP_TYPE = [:return, :next, :break, :yield].map { |k| [k, true] }.to_h
54
112
 
55
- def arg_blk_pass node1, node2 # TODO: nuke
56
- node1 = s(:arglist, node1) unless [:arglist, :call_args, :array, :args].include? node1.sexp_type
57
- node1 << node2 if node2
58
- node1
59
- end
113
+ TAB_WIDTH = 8
60
114
 
61
- def arg_concat node1, node2 # TODO: nuke
62
- raise "huh" unless node2
63
- node1 << s(:splat, node2).compact
64
- node1
65
- end
115
+ def initialize(options = {})
116
+ super()
66
117
 
67
- def clean_mlhs sexp
68
- case sexp.sexp_type
69
- when :masgn then
70
- if sexp.size == 2 and sexp[1].sexp_type == :array then
71
- s(:masgn, *sexp[1].sexp_body.map { |sub| clean_mlhs sub })
72
- else
73
- debug20 5
74
- sexp
75
- end
76
- when :gasgn, :iasgn, :lasgn, :cvasgn then
77
- if sexp.size == 2 then
78
- sexp.last
79
- else
80
- debug20 7
81
- sexp # optional value
82
- end
83
- else
84
- raise "unsupported type: #{sexp.inspect}"
85
- end
86
- end
118
+ v = self.class.name[/2\d/]
119
+ raise "Bad Class name #{self.class}" unless v
87
120
 
88
- def block_var *args
89
- result = self.args args
90
- result.sexp_type = :masgn
91
- result
92
- end
121
+ self.lexer = RubyLexer.new v && v.to_i
122
+ self.lexer.parser = self
123
+ self.in_kwarg = false
93
124
 
94
- def array_to_hash array
95
- case array.sexp_type
96
- when :kwsplat then
97
- array
98
- else
99
- s(:hash, *array.sexp_body)
100
- end
125
+ @env = RubyParserStuff::Environment.new
126
+ @comments = []
127
+
128
+ @canonicalize_conditions = true
129
+
130
+ self.reset
101
131
  end
102
132
 
103
- def call_args args
104
- result = s(:call_args)
133
+ def arg_concat node1, node2 # TODO: nuke
134
+ raise "huh" unless node2
105
135
 
106
- args.each do |arg|
107
- case arg
108
- when Sexp then
109
- case arg.sexp_type
110
- when :array, :args, :call_args then # HACK? remove array at some point
111
- result.concat arg.sexp_body
112
- else
113
- result << arg
114
- end
115
- when Symbol then
116
- result << arg
117
- when ",", nil then
118
- # ignore
119
- else
120
- raise "unhandled: #{arg.inspect} in #{args.inspect}"
121
- end
122
- end
136
+ splat = s(:splat, node2)
137
+ splat.line node2.line
123
138
 
124
- result
139
+ node1 << splat
140
+ end
141
+
142
+ def argl x
143
+ x = s(:arglist, x) if x and x.sexp_type == :array
144
+ x
125
145
  end
126
146
 
127
147
  def args args
128
148
  result = s(:args)
129
149
 
150
+ ss = args.grep Sexp
151
+ if ss.empty? then
152
+ result.line lexer.lineno
153
+ else
154
+ result.line ss.first.line
155
+ end
156
+
130
157
  args.each do |arg|
131
158
  case arg
132
159
  when Sexp then
@@ -162,13 +189,28 @@ module RubyParserStuff
162
189
  result
163
190
  end
164
191
 
192
+ def array_to_hash array
193
+ case array.sexp_type
194
+ when :kwsplat then
195
+ array
196
+ else
197
+ s(:hash, *array.sexp_body).line array.line
198
+ end
199
+ end
200
+
165
201
  def aryset receiver, index
166
202
  index ||= s()
167
- s(:attrasgn, receiver, :"[]=", *index.sexp_body).compact # [].sexp_body => nil
203
+ l = receiver.line
204
+ result = s(:attrasgn, receiver, :"[]=",
205
+ *index.sexp_body).compact # [].sexp_body => nil
206
+ result.line = l
207
+ result
168
208
  end
169
209
 
170
210
  def assignable(lhs, value = nil)
171
211
  id = lhs.to_sym unless Sexp === lhs
212
+
213
+ raise "WTF" if Sexp === id
172
214
  id = id.to_sym if Sexp === id
173
215
 
174
216
  raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
@@ -197,11 +239,33 @@ module RubyParserStuff
197
239
 
198
240
  self.env[id] ||= :lvar if result.sexp_type == :lasgn
199
241
 
242
+ line = case lhs
243
+ when Sexp then
244
+ lhs.line
245
+ else
246
+ value && value.line || lexer.lineno
247
+ end
248
+
200
249
  result << value if value
250
+ result.line = line
201
251
 
202
252
  return result
203
253
  end
204
254
 
255
+ def backref_assign_error ref
256
+ # TODO: need a test for this... obviously
257
+ case ref.sexp_type
258
+ when :nth_ref then
259
+ raise "write a test 2"
260
+ raise SyntaxError, "Can't set variable %p" % ref.last
261
+ when :back_ref then
262
+ raise "write a test 3"
263
+ raise SyntaxError, "Can't set back reference %p" % ref.last
264
+ else
265
+ raise "Unknown backref type: #{ref.inspect}"
266
+ end
267
+ end
268
+
205
269
  def block_append(head, tail)
206
270
  return head if tail.nil?
207
271
  return tail if head.nil?
@@ -215,6 +279,69 @@ module RubyParserStuff
215
279
  head << tail
216
280
  end
217
281
 
282
+ def block_dup_check call_or_args, block
283
+ syntax_error "Both block arg and actual block given." if
284
+ block and call_or_args.block_pass?
285
+ end
286
+
287
+ def block_var *args
288
+ result = self.args args
289
+ result.sexp_type = :masgn
290
+ result
291
+ end
292
+
293
+ def call_args args
294
+ result = s(:call_args)
295
+
296
+ a = args.grep(Sexp).first
297
+ if a then
298
+ result.line a.line
299
+ else
300
+ result.line lexer.lineno
301
+ end
302
+
303
+ args.each do |arg|
304
+ case arg
305
+ when Sexp then
306
+ case arg.sexp_type
307
+ when :array, :args, :call_args then # HACK? remove array at some point
308
+ result.concat arg.sexp_body
309
+ else
310
+ result << arg
311
+ end
312
+ when Symbol then
313
+ result << arg
314
+ when ",", nil then
315
+ # ignore
316
+ else
317
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
318
+ end
319
+ end
320
+
321
+ result
322
+ end
323
+
324
+ def clean_mlhs sexp
325
+ case sexp.sexp_type
326
+ when :masgn then
327
+ if sexp.size == 2 and sexp[1].sexp_type == :array then
328
+ s(:masgn, *sexp[1].sexp_body.map { |sub| clean_mlhs sub })
329
+ else
330
+ debug20 5
331
+ sexp
332
+ end
333
+ when :gasgn, :iasgn, :lasgn, :cvasgn then
334
+ if sexp.size == 2 then
335
+ sexp.last
336
+ else
337
+ debug20 7
338
+ sexp # optional value
339
+ end
340
+ else
341
+ raise "unsupported type: #{sexp.inspect}"
342
+ end
343
+ end
344
+
218
345
  def cond node
219
346
  return nil if node.nil?
220
347
  node = value_expr node
@@ -222,56 +349,113 @@ module RubyParserStuff
222
349
  case node.sexp_type
223
350
  when :lit then
224
351
  if Regexp === node.last then
225
- return s(:match, node)
352
+ s(:match, node)
226
353
  else
227
- return node
354
+ node
228
355
  end
229
356
  when :and then
230
- return s(:and, cond(node[1]), cond(node[2]))
357
+ _, lhs, rhs = node
358
+ s(:and, cond(lhs), cond(rhs))
231
359
  when :or then
232
- return s(:or, cond(node[1]), cond(node[2]))
360
+ _, lhs, rhs = node
361
+ s(:or, cond(lhs), cond(rhs))
233
362
  when :dot2 then
234
363
  label = "flip#{node.hash}"
235
364
  env[label] = :lvar
236
365
  _, lhs, rhs = node
237
- return s(:flip2, lhs, rhs)
366
+ s(:flip2, lhs, rhs) # TODO: recurse?
238
367
  when :dot3 then
239
368
  label = "flip#{node.hash}"
240
369
  env[label] = :lvar
241
370
  _, lhs, rhs = node
242
- return s(:flip3, lhs, rhs)
371
+ s(:flip3, lhs, rhs)
243
372
  else
244
- return node
245
- end
373
+ node
374
+ end.line node.line
246
375
  end
247
376
 
248
- ##
249
- # for pure ruby systems only
377
+ def dedent sexp
378
+ dedent_count = dedent_size sexp
250
379
 
251
- def do_parse
252
- _racc_do_parse_rb(_racc_setup, false)
253
- end if ENV['PURE_RUBY']
380
+ skip_one = false
381
+ sexp.map { |obj|
382
+ case obj
383
+ when Symbol then
384
+ obj
385
+ when String then
386
+ obj.lines.map { |l| remove_whitespace_width l, dedent_count }.join
387
+ when Sexp then
388
+ case obj.sexp_type
389
+ when :evstr then
390
+ skip_one = true
391
+ obj
392
+ when :str then
393
+ _, str = obj
394
+ str = if skip_one then
395
+ skip_one = false
396
+ s1, *rest = str.lines
397
+ s1 + rest.map { |l| remove_whitespace_width l, dedent_count }.join
398
+ else
399
+ str.lines.map { |l| remove_whitespace_width l, dedent_count }.join
400
+ end
254
401
 
255
- def new_match lhs, rhs
256
- if lhs then
257
- case lhs.sexp_type
258
- when :dregx, :dregx_once then
259
- return s(:match2, lhs, rhs).line(lhs.line)
260
- when :lit then
261
- return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
402
+ s(:str, str).line obj.line
403
+ else
404
+ warn "unprocessed sexp %p" % [obj]
405
+ end
406
+ else
407
+ warn "unprocessed: %p" % [obj]
262
408
  end
263
- end
409
+ }
410
+ end
264
411
 
265
- if rhs then
266
- case rhs.sexp_type
267
- when :dregx, :dregx_once then
268
- return s(:match3, rhs, lhs).line(lhs.line)
269
- when :lit then
270
- return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
412
+ def dedent_size sexp
413
+ skip_one = false
414
+ sexp.flat_map { |s|
415
+ case s
416
+ when Symbol then
417
+ next
418
+ when String then
419
+ s.lines
420
+ when Sexp then
421
+ case s.sexp_type
422
+ when :evstr then
423
+ skip_one = true
424
+ next
425
+ when :str then
426
+ _, str = s
427
+ lines = str.lines
428
+ if skip_one then
429
+ skip_one = false
430
+ lines.shift
431
+ end
432
+ lines
433
+ else
434
+ warn "unprocessed sexp %p" % [s]
435
+ end
436
+ else
437
+ warn "unprocessed: %p" % [s]
438
+ end.map { |l| whitespace_width l[/^[ \t]*/] }
439
+ }.compact.min
440
+ end
441
+
442
+ def dedent_string string, width
443
+ characters_skipped = 0
444
+ indentation_skipped = 0
445
+
446
+ string.chars.each do |char|
447
+ break if indentation_skipped >= width
448
+ if char == " "
449
+ characters_skipped += 1
450
+ indentation_skipped += 1
451
+ elsif char == "\t"
452
+ proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
453
+ break if proposed > width
454
+ characters_skipped += 1
455
+ indentation_skipped = proposed
271
456
  end
272
457
  end
273
-
274
- return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
458
+ string[characters_skipped..-1]
275
459
  end
276
460
 
277
461
  def gettable(id)
@@ -303,33 +487,94 @@ module RubyParserStuff
303
487
  result
304
488
  end
305
489
 
490
+ def hack_encoding str, extra = nil
491
+ encodings = ENCODING_ORDER.dup
492
+ encodings.unshift(extra) unless extra.nil?
493
+
494
+ # terrible, horrible, no good, very bad, last ditch effort.
495
+ encodings.each do |enc|
496
+ begin
497
+ str.force_encoding enc
498
+ if str.valid_encoding? then
499
+ str.encode! Encoding::UTF_8
500
+ break
501
+ end
502
+ rescue ArgumentError # unknown encoding name
503
+ # do nothing
504
+ rescue Encoding::InvalidByteSequenceError
505
+ # do nothing
506
+ rescue Encoding::UndefinedConversionError
507
+ # do nothing
508
+ end
509
+ end
510
+
511
+ # no amount of pain is enough for you.
512
+ raise "Bad encoding. Need a magic encoding comment." unless
513
+ str.encoding.name == "UTF-8"
514
+ end
515
+
306
516
  ##
307
- # Canonicalize conditionals. Eg:
308
- #
309
- # not x ? a : b
517
+ # Returns a UTF-8 encoded string after processing BOMs and magic
518
+ # encoding comments.
310
519
  #
311
- # becomes:
520
+ # Holy crap... ok. Here goes:
312
521
  #
313
- # x ? b : a
522
+ # Ruby's file handling and encoding support is insane. We need to be
523
+ # able to lex a file. The lexer file is explicitly UTF-8 to make
524
+ # things cleaner. This allows us to deal with extended chars in
525
+ # class and method names. In order to do this, we need to encode all
526
+ # input source files as UTF-8. First, we look for a UTF-8 BOM by
527
+ # looking at the first line while forcing its encoding to
528
+ # ASCII-8BIT. If we find a BOM, we strip it and set the expected
529
+ # encoding to UTF-8. Then, we search for a magic encoding comment.
530
+ # If found, it overrides the BOM. Finally, we force the encoding of
531
+ # the input string to whatever was found, and then encode that to
532
+ # UTF-8 for compatibility with the lexer.
314
533
 
315
- attr_accessor :canonicalize_conditions
534
+ def handle_encoding str
535
+ str = str.dup
536
+ has_enc = str.respond_to? :encoding
537
+ encoding = nil
316
538
 
317
- def initialize(options = {})
318
- super()
539
+ header = str.each_line.first(2)
540
+ header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
319
541
 
320
- v = self.class.name[/2\d/]
321
- raise "Bad Class name #{self.class}" unless v
542
+ first = header.first || ""
543
+ encoding, str = "utf-8", str.b[3..-1] if first =~ /\A\xEF\xBB\xBF/
322
544
 
323
- self.lexer = RubyLexer.new v && v.to_i
324
- self.lexer.parser = self
325
- self.in_kwarg = false
545
+ encoding = $1.strip if header.find { |s|
546
+ s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
547
+ s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
548
+ }
326
549
 
327
- @env = RubyParserStuff::Environment.new
328
- @comments = []
550
+ if encoding then
551
+ if has_enc then
552
+ encoding.sub!(/utf-8-.+$/, "utf-8") # HACK for stupid emacs formats
553
+ hack_encoding str, encoding
554
+ else
555
+ warn "Skipping magic encoding comment"
556
+ end
557
+ else
558
+ # nothing specified... ugh. try to encode as utf-8
559
+ hack_encoding str if has_enc
560
+ end
329
561
 
330
- @canonicalize_conditions = true
562
+ str
563
+ end
331
564
 
332
- self.reset
565
+ def invert_block_call val
566
+ ret, iter = val
567
+ type, call = ret
568
+
569
+ iter.insert 1, call
570
+
571
+ ret = s(type).line ret.line
572
+
573
+ [iter, ret]
574
+ end
575
+
576
+ def inverted? val
577
+ JUMP_TYPE[val[0].sexp_type]
333
578
  end
334
579
 
335
580
  def list_append list, item # TODO: nuke me *sigh*
@@ -350,12 +595,14 @@ module RubyParserStuff
350
595
 
351
596
  htype, ttype = head.sexp_type, tail.sexp_type
352
597
 
353
- head = s(:dstr, '', head) if htype == :evstr
598
+ head = s(:dstr, "", head).line head.line if htype == :evstr
354
599
 
355
600
  case ttype
356
601
  when :str then
357
602
  if htype == :str
358
- head.last << tail.last
603
+ a, b = head.last, tail.last
604
+ b = b.dup.force_encoding a.encoding unless Encoding.compatible?(a, b)
605
+ a << b
359
606
  elsif htype == :dstr and head.size == 2 then
360
607
  head.last << tail.last
361
608
  else
@@ -369,15 +616,15 @@ module RubyParserStuff
369
616
  head.line = lineno
370
617
  else
371
618
  tail.sexp_type = :array
372
- tail[1] = s(:str, tail[1])
373
- tail.delete_at 1 if tail[1] == s(:str, '')
619
+ tail[1] = s(:str, tail[1]).line tail.line
620
+ tail.delete_at 1 if tail[1] == s(:str, "")
374
621
 
375
622
  head.push(*tail.sexp_body)
376
623
  end
377
624
  when :evstr then
378
625
  if htype == :str then
379
626
  f, l = head.file, head.line
380
- head = s(:dstr, *head.sexp_body)
627
+ head = s(:dstr, *head.sexp_body).line head.line
381
628
  head.file = f
382
629
  head.line = l
383
630
  end
@@ -408,12 +655,15 @@ module RubyParserStuff
408
655
  node = rhs
409
656
  end
410
657
 
411
- node[2] = s(type, rhs, right)
658
+ node.pop
659
+ node << s(type, rhs, right).line(rhs.line)
412
660
 
413
661
  return left
414
662
  end
415
663
 
416
- return s(type, left, right)
664
+ result = s(type, left, right)
665
+ result.line left.line if left.line
666
+ result
417
667
  end
418
668
 
419
669
  def new_aref val
@@ -422,6 +672,49 @@ module RubyParserStuff
422
672
  new_call val[0], :"[]", val[2]
423
673
  end
424
674
 
675
+ def new_assign lhs, rhs
676
+ return nil unless lhs
677
+
678
+ rhs = value_expr rhs
679
+
680
+ case lhs.sexp_type
681
+ when :lasgn, :iasgn, :cdecl, :cvdecl, :gasgn, :cvasgn, :attrasgn, :safe_attrasgn then
682
+ lhs << rhs
683
+ when :const then
684
+ lhs.sexp_type = :cdecl
685
+ lhs << rhs
686
+ else
687
+ raise "unknown lhs #{lhs.inspect} w/ #{rhs.inspect}"
688
+ end
689
+
690
+ lhs
691
+ end
692
+
693
+ def new_attrasgn recv, meth, call_op = :"."
694
+ meth = :"#{meth}="
695
+
696
+ result = case call_op.to_sym
697
+ when :"."
698
+ s(:attrasgn, recv, meth)
699
+ when :"&."
700
+ s(:safe_attrasgn, recv, meth)
701
+ else
702
+ raise "unknown call operator: `#{type.inspect}`"
703
+ end
704
+
705
+ result.line = recv.line
706
+ result
707
+ end
708
+
709
+ def new_begin val
710
+ _, lineno, body, _ = val
711
+
712
+ result = body ? s(:begin, body) : s(:nil)
713
+ result.line lineno
714
+
715
+ result
716
+ end
717
+
425
718
  def new_body val
426
719
  body, resbody, elsebody, ensurebody = val
427
720
 
@@ -445,43 +738,27 @@ module RubyParserStuff
445
738
 
446
739
  if elsebody and not resbody then
447
740
  warning("else without rescue is useless")
448
- result = s(:begin, result) if result
741
+ result = s(:begin, result).line result.line if result
449
742
  result = block_append(result, elsebody)
450
743
  end
451
744
 
452
- result = s(:ensure, result, ensurebody).compact if ensurebody
745
+ if ensurebody
746
+ lineno = (result || ensurebody).line
747
+ result = s(:ensure, result, ensurebody).compact.line lineno
748
+ end
453
749
 
454
750
  result
455
751
  end
456
752
 
457
753
  def new_brace_body args, body, lineno
458
- new_iter(nil, args, body).line(lineno)
459
- end
460
-
461
- def argl x
462
- x = s(:arglist, x) if x and x.sexp_type == :array
463
- x
464
- end
465
-
466
- def backref_assign_error ref
467
- # TODO: need a test for this... obviously
468
- case ref.sexp_type
469
- when :nth_ref then
470
- raise "write a test 2"
471
- raise SyntaxError, "Can't set variable %p" % ref.last
472
- when :back_ref then
473
- raise "write a test 3"
474
- raise SyntaxError, "Can't set back reference %p" % ref.last
475
- else
476
- raise "Unknown backref type: #{ref.inspect}"
477
- end
754
+ new_iter(nil, args, body).line lineno
478
755
  end
479
756
 
480
- def new_call recv, meth, args = nil, call_op = :'.'
757
+ def new_call recv, meth, args = nil, call_op = :"."
481
758
  result = case call_op.to_sym
482
- when :'.'
759
+ when :"."
483
760
  s(:call, recv, meth)
484
- when :'&.'
761
+ when :"&."
485
762
  s(:safe_call, recv, meth)
486
763
  else
487
764
  raise "unknown call operator: `#{type.inspect}`"
@@ -491,35 +768,20 @@ module RubyParserStuff
491
768
  # TODO: need a test with f(&b) { } to produce warning
492
769
 
493
770
  if args
494
- if [:arglist, :args, :array, :call_args].include? args.sexp_type
771
+ if ARG_TYPES[args.sexp_type] then
495
772
  result.concat args.sexp_body
496
773
  else
497
774
  result << args
498
775
  end
499
776
  end
500
777
 
501
- line = result.grep(Sexp).map(&:line).compact.min
502
- result.line = line if line
778
+ # line = result.grep(Sexp).map(&:line).compact.min
779
+ result.line = recv.line if recv
780
+ result.line ||= lexer.lineno
503
781
 
504
782
  result
505
783
  end
506
784
 
507
- def new_attrasgn recv, meth, call_op
508
- meth = :"#{meth}="
509
-
510
- result = case call_op.to_sym
511
- when :'.'
512
- s(:attrasgn, recv, meth)
513
- when :'&.'
514
- s(:safe_attrasgn, recv, meth)
515
- else
516
- raise "unknown call operator: `#{type.inspect}`"
517
- end
518
-
519
- result.line = recv.line
520
- result
521
- end
522
-
523
785
  def new_case expr, body, line
524
786
  result = s(:case, expr)
525
787
 
@@ -565,11 +827,29 @@ module RubyParserStuff
565
827
  result
566
828
  end
567
829
 
830
+ def new_const_op_asgn val
831
+ lhs, asgn_op, rhs = val[0], val[1].to_sym, val[2]
832
+
833
+ result = case asgn_op
834
+ when :"||" then
835
+ s(:op_asgn_or, lhs, rhs)
836
+ when :"&&" then
837
+ s(:op_asgn_and, lhs, rhs)
838
+ else
839
+ s(:op_asgn, lhs, asgn_op, rhs)
840
+ end
841
+
842
+ result.line = lhs.line
843
+ result
844
+ end
845
+
568
846
  def new_defn val
569
- (_, line), (name, _), _, args, body, * = val
570
- body ||= s(:nil)
847
+ (_, line), name, _, args, body, nil_body_line, * = val
848
+ body ||= s(:nil).line nil_body_line
571
849
 
572
- result = s(:defn, name.to_sym, args)
850
+ args.line line
851
+
852
+ result = s(:defn, name.to_sym, args).line line
573
853
 
574
854
  if body then
575
855
  if body.sexp_type == :block then
@@ -579,19 +859,23 @@ module RubyParserStuff
579
859
  end
580
860
  end
581
861
 
582
- args.line line
583
- result.line = line
584
862
  result.comments = self.comments.pop
585
863
 
586
864
  result
587
865
  end
588
866
 
589
867
  def new_defs val
590
- recv, (name, _line), args, body = val[1], val[4], val[6], val[7]
591
- body ||= s(:nil)
868
+ _, recv, _, _, name, (_in_def, line), args, body, _ = val
869
+
870
+ body ||= s(:nil).line line
871
+
872
+ args.line line
592
873
 
593
874
  result = s(:defs, recv, name.to_sym, args)
594
875
 
876
+ # TODO: remove_begin
877
+ # TODO: reduce_nodes
878
+
595
879
  if body then
596
880
  if body.sexp_type == :block then
597
881
  result.push(*body.sexp_body)
@@ -616,7 +900,9 @@ module RubyParserStuff
616
900
  end
617
901
 
618
902
  def new_hash val
619
- s(:hash, *val[2].values).line(val[1])
903
+ _, line, assocs = val
904
+
905
+ s(:hash).line(line).concat assocs.values
620
906
  end
621
907
 
622
908
  def new_if c, t, f
@@ -637,27 +923,59 @@ module RubyParserStuff
637
923
  result << args
638
924
  result << body if body
639
925
 
640
- args.sexp_type = :args unless args == 0
926
+ result.line call.line if call
641
927
 
642
- result
643
- end
928
+ unless args == 0 then
929
+ args.line call.line if call
930
+ args.sexp_type = :args
931
+ end
644
932
 
645
- def new_masgn_arg rhs, wrap = false
646
- rhs = value_expr(rhs)
647
- rhs = s(:to_ary, rhs) if wrap # HACK: could be array if lhs isn't right
648
- rhs
933
+ result
649
934
  end
650
935
 
651
936
  def new_masgn lhs, rhs, wrap = false
652
937
  _, ary = lhs
653
938
 
939
+ line = rhs.line
654
940
  rhs = value_expr(rhs)
655
941
  rhs = ary ? s(:to_ary, rhs) : s(:array, rhs) if wrap
942
+ rhs.line line if wrap
656
943
 
657
944
  lhs.delete_at 1 if ary.nil?
658
945
  lhs << rhs
659
946
 
660
- lhs
947
+ lhs
948
+ end
949
+
950
+ def new_masgn_arg rhs, wrap = false
951
+ rhs = value_expr(rhs)
952
+ # HACK: could be array if lhs isn't right
953
+ rhs = s(:to_ary, rhs).line rhs.line if wrap
954
+ rhs
955
+ end
956
+
957
+ def new_match lhs, rhs
958
+ if lhs then
959
+ case lhs.sexp_type
960
+ when :dregx, :dregx_once then
961
+ # TODO: no test coverage
962
+ return s(:match2, lhs, rhs).line(lhs.line)
963
+ when :lit then
964
+ return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
965
+ end
966
+ end
967
+
968
+ if rhs then
969
+ case rhs.sexp_type
970
+ when :dregx, :dregx_once then
971
+ # TODO: no test coverage
972
+ return s(:match3, rhs, lhs).line(lhs.line)
973
+ when :lit then
974
+ return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
975
+ end
976
+ end
977
+
978
+ new_call(lhs, :"=~", argl(rhs)).line lhs.line
661
979
  end
662
980
 
663
981
  def new_module val
@@ -680,32 +998,41 @@ module RubyParserStuff
680
998
 
681
999
  def new_op_asgn val
682
1000
  lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
683
- name = lhs.value
1001
+ name = gettable(lhs.value).line lhs.line
684
1002
  arg = remove_begin(arg)
685
1003
  result = case asgn_op # REFACTOR
686
1004
  when :"||" then
687
1005
  lhs << arg
688
- s(:op_asgn_or, self.gettable(name), lhs)
1006
+ s(:op_asgn_or, name, lhs)
689
1007
  when :"&&" then
690
1008
  lhs << arg
691
- s(:op_asgn_and, self.gettable(name), lhs)
1009
+ s(:op_asgn_and, name, lhs)
692
1010
  else
693
- # TODO: why [2] ?
694
- lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
1011
+ lhs << new_call(name, asgn_op, argl(arg))
695
1012
  lhs
696
1013
  end
697
1014
  result.line = lhs.line
698
1015
  result
699
1016
  end
700
1017
 
1018
+ def new_op_asgn1 val
1019
+ lhs, _, args, _, op, rhs = val
1020
+
1021
+ args.sexp_type = :arglist if args
1022
+
1023
+ result = s(:op_asgn1, lhs, args, op.to_sym, rhs)
1024
+ result.line lhs.line
1025
+ result
1026
+ end
1027
+
701
1028
  def new_op_asgn2 val
702
1029
  recv, call_op, meth, op, arg = val
703
1030
  meth = :"#{meth}="
704
1031
 
705
1032
  result = case call_op.to_sym
706
- when :'.'
1033
+ when :"."
707
1034
  s(:op_asgn2, recv, meth, op.to_sym, arg)
708
- when :'&.'
1035
+ when :"&."
709
1036
  s(:safe_op_asgn2, recv, meth, op.to_sym, arg)
710
1037
  else
711
1038
  raise "unknown call operator: `#{type.inspect}`"
@@ -715,21 +1042,49 @@ module RubyParserStuff
715
1042
  result
716
1043
  end
717
1044
 
1045
+ def new_qsym_list
1046
+ result = s(:array).line lexer.lineno
1047
+ self.lexer.fixup_lineno
1048
+ result
1049
+ end
1050
+
1051
+ def new_qsym_list_entry val
1052
+ _, str, _ = val
1053
+ result = s(:lit, str.to_sym).line lexer.lineno
1054
+ self.lexer.fixup_lineno
1055
+ result
1056
+ end
1057
+
1058
+ def new_qword_list
1059
+ result = s(:array).line lexer.lineno
1060
+ self.lexer.fixup_lineno
1061
+ result
1062
+ end
1063
+
1064
+ def new_qword_list_entry val
1065
+ _, str, _ = val
1066
+ str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
1067
+ result = s(:str, str).line lexer.lineno # TODO: problematic? grab from parser
1068
+ self.lexer.fixup_lineno
1069
+ result
1070
+ end
1071
+
718
1072
  def new_regexp val
719
- node = val[1] || s(:str, '')
720
- options = val[2]
1073
+ _, node, options = val
1074
+
1075
+ node ||= s(:str, "").line lexer.lineno
721
1076
 
722
1077
  o, k = 0, nil
723
1078
  options.split(//).uniq.each do |c| # FIX: this has a better home
724
1079
  v = {
725
- 'x' => Regexp::EXTENDED,
726
- 'i' => Regexp::IGNORECASE,
727
- 'm' => Regexp::MULTILINE,
728
- 'o' => Regexp::ONCE,
729
- 'n' => Regexp::ENC_NONE,
730
- 'e' => Regexp::ENC_EUC,
731
- 's' => Regexp::ENC_SJIS,
732
- 'u' => Regexp::ENC_UTF8,
1080
+ "x" => Regexp::EXTENDED,
1081
+ "i" => Regexp::IGNORECASE,
1082
+ "m" => Regexp::MULTILINE,
1083
+ "o" => Regexp::ONCE,
1084
+ "n" => Regexp::ENC_NONE,
1085
+ "e" => Regexp::ENC_EUC,
1086
+ "s" => Regexp::ENC_SJIS,
1087
+ "u" => Regexp::ENC_UTF8,
733
1088
  }[c]
734
1089
  raise "unknown regexp option: #{c}" unless v
735
1090
  o += v
@@ -762,7 +1117,7 @@ module RubyParserStuff
762
1117
  end
763
1118
  node << o if o and o != 0
764
1119
  else
765
- node = s(:dregx, '', node);
1120
+ node = s(:dregx, "", node).line node.line
766
1121
  node.sexp_type = :dregx_once if options =~ /o/
767
1122
  node << o if o and o != 0
768
1123
  end
@@ -770,19 +1125,20 @@ module RubyParserStuff
770
1125
  node
771
1126
  end
772
1127
 
773
- def new_rescue body, resbody
774
- s(:rescue, body, resbody)
775
- end
776
-
777
1128
  def new_resbody cond, body
778
1129
  if body && body.sexp_type == :block then
779
1130
  body.shift # remove block and splat it in directly
780
1131
  else
781
1132
  body = [body]
782
1133
  end
1134
+
783
1135
  s(:resbody, cond, *body).line cond.line
784
1136
  end
785
1137
 
1138
+ def new_rescue body, resbody
1139
+ s(:rescue, body, resbody).line body.line
1140
+ end
1141
+
786
1142
  def new_sclass val
787
1143
  recv, in_def, in_single, body = val[3], val[4], val[6], val[7]
788
1144
 
@@ -803,62 +1159,36 @@ module RubyParserStuff
803
1159
  end
804
1160
 
805
1161
  def new_string val
806
- str = val[0]
1162
+ str, = val
1163
+ str.force_encoding("UTF-8")
1164
+ # TODO: remove:
807
1165
  str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
808
- result = s(:str, str)
1166
+ result = s(:str, str).line lexer.lineno
809
1167
  self.lexer.fixup_lineno str.count("\n")
810
1168
  result
811
1169
  end
812
1170
 
813
- def new_qword_list_entry val
814
- str = val[1]
815
- str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
816
- result = s(:str, str)
817
- self.lexer.fixup_lineno
818
- result
819
- end
820
-
821
- def new_qword_list
822
- result = s(:array)
823
- self.lexer.fixup_lineno
824
- result
825
- end
826
-
827
- def new_word_list
828
- result = s(:array)
829
- self.lexer.fixup_lineno
830
- result
831
- end
832
-
833
- def new_word_list_entry val
834
- result = val[1].sexp_type == :evstr ? s(:dstr, "", val[1]) : val[1]
835
- self.lexer.fixup_lineno
836
- result
837
- end
838
-
839
- def new_qsym_list
840
- result = s(:array)
841
- self.lexer.fixup_lineno
842
- result
843
- end
844
-
845
- def new_qsym_list_entry val
846
- result = s(:lit, val[1].to_sym)
847
- self.lexer.fixup_lineno
848
- result
1171
+ def new_super args
1172
+ if args && args.node_type == :block_pass then
1173
+ s(:super, args).line args.line
1174
+ else
1175
+ args ||= s(:arglist).line lexer.lineno
1176
+ s(:super, *args.sexp_body).line args.line
1177
+ end
849
1178
  end
850
1179
 
851
1180
  def new_symbol_list
852
- result = s(:array)
1181
+ result = s(:array).line lexer.lineno
853
1182
  self.lexer.fixup_lineno
854
1183
  result
855
1184
  end
856
1185
 
857
1186
  def new_symbol_list_entry val
858
- _list, sym, _nil = val # TODO: use _list
859
- result = val[1]
1187
+ _, sym, _ = val
1188
+
1189
+ sym ||= s(:str, "")
860
1190
 
861
- result ||= s(:str, "")
1191
+ line = lexer.lineno
862
1192
 
863
1193
  case sym.sexp_type
864
1194
  when :dstr then
@@ -866,26 +1196,21 @@ module RubyParserStuff
866
1196
  when :str then
867
1197
  sym = s(:lit, sym.last.to_sym)
868
1198
  else
869
- sym = s(:dsym, "", sym || s(:str, ""))
1199
+ sym = s(:dsym, "", sym || s(:str, "").line(line))
870
1200
  end
1201
+
1202
+ sym.line line
1203
+
871
1204
  self.lexer.fixup_lineno
872
- sym
873
- end
874
1205
 
875
- def new_super args
876
- if args && args.node_type == :block_pass then
877
- s(:super, args)
878
- else
879
- args ||= s(:arglist)
880
- s(:super, *args.sexp_body)
881
- end
1206
+ sym
882
1207
  end
883
1208
 
884
1209
  def new_undef n, m = nil
885
1210
  if m then
886
- block_append(n, s(:undef, m))
1211
+ block_append(n, s(:undef, m).line(m.line))
887
1212
  else
888
- s(:undef, n)
1213
+ s(:undef, n).line n.line
889
1214
  end
890
1215
  end
891
1216
 
@@ -918,20 +1243,36 @@ module RubyParserStuff
918
1243
  new_until_or_while :while, block, expr, pre
919
1244
  end
920
1245
 
921
- def new_xstring str
922
- if str then
923
- case str.sexp_type
1246
+ def new_word_list
1247
+ result = s(:array).line lexer.lineno
1248
+ self.lexer.fixup_lineno
1249
+ result
1250
+ end
1251
+
1252
+ def new_word_list_entry val
1253
+ _, word, _ = val
1254
+ result = word.sexp_type == :evstr ? s(:dstr, "", word).line(word.line) : word
1255
+ self.lexer.fixup_lineno
1256
+ result
1257
+ end
1258
+
1259
+ def new_xstring val
1260
+ _, node = val
1261
+
1262
+ node ||= s(:str, "").line lexer.lineno
1263
+
1264
+ if node then
1265
+ case node.sexp_type
924
1266
  when :str
925
- str.sexp_type = :xstr
1267
+ node.sexp_type = :xstr
926
1268
  when :dstr
927
- str.sexp_type = :dxstr
1269
+ node.sexp_type = :dxstr
928
1270
  else
929
- str = s(:dxstr, '', str)
1271
+ node = s(:dxstr, "", node).line node.line
930
1272
  end
931
- str
932
- else
933
- s(:xstr, '')
934
1273
  end
1274
+
1275
+ node
935
1276
  end
936
1277
 
937
1278
  def new_yield args = nil
@@ -940,113 +1281,32 @@ module RubyParserStuff
940
1281
  raise SyntaxError, "Block argument should not be given." if
941
1282
  args && args.node_type == :block_pass
942
1283
 
943
- args ||= s(:arglist)
1284
+ args ||= s(:arglist).line lexer.lineno
944
1285
 
945
1286
  args.sexp_type = :arglist if [:call_args, :array].include? args.sexp_type
946
- args = s(:arglist, args) unless args.sexp_type == :arglist
1287
+ args = s(:arglist, args).line args.line unless args.sexp_type == :arglist
947
1288
 
948
- return s(:yield, *args.sexp_body)
1289
+ s(:yield, *args.sexp_body).line args.line
949
1290
  end
950
1291
 
951
1292
  def next_token
952
1293
  token = self.lexer.next_token
953
1294
 
954
1295
  if token and token.first != RubyLexer::EOF then
1296
+ self.last_token_type = token
955
1297
  return token
956
1298
  else
957
- return [false, '$end']
958
- end
959
- end
960
-
961
- def new_assign lhs, rhs
962
- return nil unless lhs
963
-
964
- rhs = value_expr rhs
965
-
966
- case lhs.sexp_type
967
- when :lasgn, :iasgn, :cdecl, :cvdecl, :gasgn, :cvasgn, :attrasgn, :safe_attrasgn then
968
- lhs << rhs
969
- when :const then
970
- lhs.sexp_type = :cdecl
971
- lhs << rhs
972
- else
973
- raise "unknown lhs #{lhs.inspect} w/ #{rhs.inspect}"
974
- end
975
-
976
- lhs
977
- end
978
-
979
- ##
980
- # Returns a UTF-8 encoded string after processing BOMs and magic
981
- # encoding comments.
982
- #
983
- # Holy crap... ok. Here goes:
984
- #
985
- # Ruby's file handling and encoding support is insane. We need to be
986
- # able to lex a file. The lexer file is explicitly UTF-8 to make
987
- # things cleaner. This allows us to deal with extended chars in
988
- # class and method names. In order to do this, we need to encode all
989
- # input source files as UTF-8. First, we look for a UTF-8 BOM by
990
- # looking at the first line while forcing its encoding to
991
- # ASCII-8BIT. If we find a BOM, we strip it and set the expected
992
- # encoding to UTF-8. Then, we search for a magic encoding comment.
993
- # If found, it overrides the BOM. Finally, we force the encoding of
994
- # the input string to whatever was found, and then encode that to
995
- # UTF-8 for compatibility with the lexer.
996
-
997
- def handle_encoding str
998
- str = str.dup
999
- has_enc = str.respond_to? :encoding
1000
- encoding = nil
1001
-
1002
- header = str.each_line.first(2)
1003
- header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
1004
-
1005
- first = header.first || ""
1006
- encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
1007
-
1008
- encoding = $1.strip if header.find { |s|
1009
- s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
1010
- s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
1011
- }
1012
-
1013
- if encoding then
1014
- if has_enc then
1015
- encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
1016
- hack_encoding str, encoding
1017
- else
1018
- warn "Skipping magic encoding comment"
1019
- end
1020
- else
1021
- # nothing specified... ugh. try to encode as utf-8
1022
- hack_encoding str if has_enc
1299
+ return [false, false]
1023
1300
  end
1024
-
1025
- str
1026
1301
  end
1027
1302
 
1028
- def hack_encoding str, extra = nil
1029
- encodings = ENCODING_ORDER.dup
1030
- encodings.unshift(extra) unless extra.nil?
1031
-
1032
- # terrible, horrible, no good, very bad, last ditch effort.
1033
- encodings.each do |enc|
1034
- begin
1035
- str.force_encoding enc
1036
- if str.valid_encoding? then
1037
- str.encode! Encoding::UTF_8
1038
- break
1039
- end
1040
- rescue Encoding::InvalidByteSequenceError
1041
- # do nothing
1042
- rescue Encoding::UndefinedConversionError
1043
- # do nothing
1044
- end
1045
- end
1046
-
1047
- # no amount of pain is enough for you.
1048
- raise "Bad encoding. Need a magic encoding comment." unless
1049
- str.encoding.name == "UTF-8"
1303
+ def on_error(et, ev, values)
1304
+ super
1305
+ rescue Racc::ParseError => e
1306
+ # I don't like how the exception obscures the error message
1307
+ e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
1308
+ warn e.message if $DEBUG
1309
+ raise
1050
1310
  end
1051
1311
 
1052
1312
  ##
@@ -1061,7 +1321,7 @@ module RubyParserStuff
1061
1321
 
1062
1322
  self.file = file.dup
1063
1323
 
1064
- @yydebug = ENV.has_key? 'DEBUG'
1324
+ @yydebug = ENV.has_key? "DEBUG"
1065
1325
 
1066
1326
  # HACK -- need to get tests passing more than have graceful code
1067
1327
  self.lexer.ss = RPStringScanner.new str
@@ -1070,40 +1330,29 @@ module RubyParserStuff
1070
1330
  end
1071
1331
  end
1072
1332
 
1073
- alias :parse :process
1333
+ alias parse process
1074
1334
 
1075
1335
  def remove_begin node
1076
- oldnode = node
1077
- if node and node.sexp_type == :begin and node.size == 2 then
1078
- node = node.last
1079
- node.line = oldnode.line
1080
- end
1336
+ line = node.line
1337
+
1338
+ node = node.last while node and node.sexp_type == :begin and node.size == 2
1339
+
1340
+ node = s(:nil) if node == s(:begin)
1341
+
1342
+ node.line ||= line
1343
+
1081
1344
  node
1082
1345
  end
1083
1346
 
1347
+ alias value_expr remove_begin # TODO: for now..? could check the tree, but meh?
1348
+
1084
1349
  def reset
1085
1350
  lexer.reset
1086
1351
  self.in_def = false
1087
1352
  self.in_single = 0
1088
1353
  self.env.reset
1089
1354
  self.comments.clear
1090
- end
1091
-
1092
- def block_dup_check call_or_args, block
1093
- syntax_error "Both block arg and actual block given." if
1094
- block and call_or_args.block_pass?
1095
- end
1096
-
1097
- def inverted? val
1098
- [:return, :next, :break, :yield].include? val[0].sexp_type
1099
- end
1100
-
1101
- def invert_block_call val
1102
- (type, call), iter = val
1103
-
1104
- iter.insert 1, call
1105
-
1106
- [iter, s(type)]
1355
+ self.last_token_type = nil
1107
1356
  end
1108
1357
 
1109
1358
  def ret_args node
@@ -1118,7 +1367,7 @@ module RubyParserStuff
1118
1367
 
1119
1368
  # HACK matz wraps ONE of the FOUR splats in a newline to
1120
1369
  # distinguish. I use paren for now. ugh
1121
- node = s(:svalue, node) if node.sexp_type == :splat and not node.paren
1370
+ node = s(:svalue, node).line node.line if node.sexp_type == :splat and not node.paren
1122
1371
  node.sexp_type = :svalue if node.sexp_type == :arglist && node[1].sexp_type == :splat
1123
1372
  end
1124
1373
 
@@ -1127,18 +1376,17 @@ module RubyParserStuff
1127
1376
 
1128
1377
  def s(*args)
1129
1378
  result = Sexp.new(*args)
1130
- result.line ||= lexer.lineno if lexer.ss # otherwise...
1379
+ # result.line ||= lexer.lineno if lexer.ss unless ENV["CHECK_LINE_NUMS"] # otherwise...
1131
1380
  result.file = self.file
1132
1381
  result
1133
1382
  end
1134
1383
 
1135
- def value_expr oldnode # HACK: much more to do
1136
- node = remove_begin oldnode
1137
- node.line = oldnode.line if oldnode
1138
- node[2] = value_expr node[2] if node and node.sexp_type == :if
1139
- node
1384
+ def syntax_error msg
1385
+ raise RubyParser::SyntaxError, msg
1140
1386
  end
1141
1387
 
1388
+ alias yyerror syntax_error
1389
+
1142
1390
  def void_stmts node
1143
1391
  return nil unless node
1144
1392
  return node unless node.sexp_type == :block
@@ -1156,18 +1404,37 @@ module RubyParserStuff
1156
1404
  # do nothing for now
1157
1405
  end
1158
1406
 
1159
- alias yyerror syntax_error
1407
+ def whitespace_width line, remove_width = nil
1408
+ col = 0
1409
+ idx = 0
1160
1410
 
1161
- def on_error(et, ev, values)
1162
- super
1163
- rescue Racc::ParseError => e
1164
- # I don't like how the exception obscures the error message
1165
- e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
1166
- warn e.message if $DEBUG
1167
- raise
1411
+ line.chars.each do |c|
1412
+ break if remove_width && col >= remove_width
1413
+ case c
1414
+ when " " then
1415
+ col += 1
1416
+ when "\t" then
1417
+ n = TAB_WIDTH * (col / TAB_WIDTH + 1)
1418
+ break if remove_width && n > remove_width
1419
+ col = n
1420
+ else
1421
+ break
1422
+ end
1423
+ idx += 1
1424
+ end
1425
+
1426
+ if remove_width then
1427
+ line[idx..-1]
1428
+ else
1429
+ col
1430
+ end
1168
1431
  end
1169
1432
 
1433
+ alias remove_whitespace_width whitespace_width
1434
+
1170
1435
  class Keyword
1436
+ include RubyLexer::State::Values
1437
+
1171
1438
  class KWtable
1172
1439
  attr_accessor :name, :state, :id0, :id1
1173
1440
  def initialize(name, id=[], state=nil)
@@ -1195,49 +1462,53 @@ module RubyParserStuff
1195
1462
  # :expr_fitem = symbol literal as FNAME.
1196
1463
  # :expr_value = :expr_beg -- work to remove. Need multi-state support.
1197
1464
 
1465
+ expr_woot = EXPR_FNAME|EXPR_FITEM
1466
+
1198
1467
  wordlist = [
1199
- ["alias", [:kALIAS, :kALIAS ], :expr_fname ],
1200
- ["and", [:kAND, :kAND ], :expr_beg ],
1201
- ["begin", [:kBEGIN, :kBEGIN ], :expr_beg ],
1202
- ["break", [:kBREAK, :kBREAK ], :expr_mid ],
1203
- ["case", [:kCASE, :kCASE ], :expr_beg ],
1204
- ["class", [:kCLASS, :kCLASS ], :expr_class ],
1205
- ["def", [:kDEF, :kDEF ], :expr_fname ],
1206
- ["defined?", [:kDEFINED, :kDEFINED ], :expr_arg ],
1207
- ["do", [:kDO, :kDO ], :expr_beg ],
1208
- ["else", [:kELSE, :kELSE ], :expr_beg ],
1209
- ["elsif", [:kELSIF, :kELSIF ], :expr_beg ],
1210
- ["end", [:kEND, :kEND ], :expr_end ],
1211
- ["ensure", [:kENSURE, :kENSURE ], :expr_beg ],
1212
- ["false", [:kFALSE, :kFALSE ], :expr_end ],
1213
- ["for", [:kFOR, :kFOR ], :expr_beg ],
1214
- ["if", [:kIF, :kIF_MOD ], :expr_beg ],
1215
- ["in", [:kIN, :kIN ], :expr_beg ],
1216
- ["module", [:kMODULE, :kMODULE ], :expr_beg ],
1217
- ["next", [:kNEXT, :kNEXT ], :expr_mid ],
1218
- ["nil", [:kNIL, :kNIL ], :expr_end ],
1219
- ["not", [:kNOT, :kNOT ], :expr_arg ],
1220
- ["or", [:kOR, :kOR ], :expr_beg ],
1221
- ["redo", [:kREDO, :kREDO ], :expr_end ],
1222
- ["rescue", [:kRESCUE, :kRESCUE_MOD ], :expr_mid ],
1223
- ["retry", [:kRETRY, :kRETRY ], :expr_end ],
1224
- ["return", [:kRETURN, :kRETURN ], :expr_mid ],
1225
- ["self", [:kSELF, :kSELF ], :expr_end ],
1226
- ["super", [:kSUPER, :kSUPER ], :expr_arg ],
1227
- ["then", [:kTHEN, :kTHEN ], :expr_beg ],
1228
- ["true", [:kTRUE, :kTRUE ], :expr_end ],
1229
- ["undef", [:kUNDEF, :kUNDEF ], :expr_fname ],
1230
- ["unless", [:kUNLESS, :kUNLESS_MOD ], :expr_beg ],
1231
- ["until", [:kUNTIL, :kUNTIL_MOD ], :expr_beg ],
1232
- ["when", [:kWHEN, :kWHEN ], :expr_beg ],
1233
- ["while", [:kWHILE, :kWHILE_MOD ], :expr_beg ],
1234
- ["yield", [:kYIELD, :kYIELD ], :expr_arg ],
1235
- ["BEGIN", [:klBEGIN, :klBEGIN ], :expr_end ],
1236
- ["END", [:klEND, :klEND ], :expr_end ],
1237
- ["__FILE__", [:k__FILE__, :k__FILE__ ], :expr_end ],
1238
- ["__LINE__", [:k__LINE__, :k__LINE__ ], :expr_end ],
1239
- ["__ENCODING__", [:k__ENCODING__, :k__ENCODING__], :expr_end],
1240
- ].map { |args| KWtable.new(*args) }
1468
+ ["alias", [:kALIAS, :kALIAS ], expr_woot ],
1469
+ ["and", [:kAND, :kAND ], EXPR_BEG ],
1470
+ ["begin", [:kBEGIN, :kBEGIN ], EXPR_BEG ],
1471
+ ["break", [:kBREAK, :kBREAK ], EXPR_MID ],
1472
+ ["case", [:kCASE, :kCASE ], EXPR_BEG ],
1473
+ ["class", [:kCLASS, :kCLASS ], EXPR_CLASS ],
1474
+ ["def", [:kDEF, :kDEF ], EXPR_FNAME ],
1475
+ ["defined?", [:kDEFINED, :kDEFINED ], EXPR_ARG ],
1476
+ ["do", [:kDO, :kDO ], EXPR_BEG ],
1477
+ ["else", [:kELSE, :kELSE ], EXPR_BEG ],
1478
+ ["elsif", [:kELSIF, :kELSIF ], EXPR_BEG ],
1479
+ ["end", [:kEND, :kEND ], EXPR_END ],
1480
+ ["ensure", [:kENSURE, :kENSURE ], EXPR_BEG ],
1481
+ ["false", [:kFALSE, :kFALSE ], EXPR_END ],
1482
+ ["for", [:kFOR, :kFOR ], EXPR_BEG ],
1483
+ ["if", [:kIF, :kIF_MOD ], EXPR_BEG ],
1484
+ ["in", [:kIN, :kIN ], EXPR_BEG ],
1485
+ ["module", [:kMODULE, :kMODULE ], EXPR_BEG ],
1486
+ ["next", [:kNEXT, :kNEXT ], EXPR_MID ],
1487
+ ["nil", [:kNIL, :kNIL ], EXPR_END ],
1488
+ ["not", [:kNOT, :kNOT ], EXPR_ARG ],
1489
+ ["or", [:kOR, :kOR ], EXPR_BEG ],
1490
+ ["redo", [:kREDO, :kREDO ], EXPR_END ],
1491
+ ["rescue", [:kRESCUE, :kRESCUE_MOD ], EXPR_MID ],
1492
+ ["retry", [:kRETRY, :kRETRY ], EXPR_END ],
1493
+ ["return", [:kRETURN, :kRETURN ], EXPR_MID ],
1494
+ ["self", [:kSELF, :kSELF ], EXPR_END ],
1495
+ ["super", [:kSUPER, :kSUPER ], EXPR_ARG ],
1496
+ ["then", [:kTHEN, :kTHEN ], EXPR_BEG ],
1497
+ ["true", [:kTRUE, :kTRUE ], EXPR_END ],
1498
+ ["undef", [:kUNDEF, :kUNDEF ], expr_woot ],
1499
+ ["unless", [:kUNLESS, :kUNLESS_MOD ], EXPR_BEG ],
1500
+ ["until", [:kUNTIL, :kUNTIL_MOD ], EXPR_BEG ],
1501
+ ["when", [:kWHEN, :kWHEN ], EXPR_BEG ],
1502
+ ["while", [:kWHILE, :kWHILE_MOD ], EXPR_BEG ],
1503
+ ["yield", [:kYIELD, :kYIELD ], EXPR_ARG ],
1504
+ ["BEGIN", [:klBEGIN, :klBEGIN ], EXPR_END ],
1505
+ ["END", [:klEND, :klEND ], EXPR_END ],
1506
+ ["__FILE__", [:k__FILE__, :k__FILE__ ], EXPR_END ],
1507
+ ["__LINE__", [:k__LINE__, :k__LINE__ ], EXPR_END ],
1508
+ ["__ENCODING__", [:k__ENCODING__, :k__ENCODING__], EXPR_END],
1509
+ ].map { |args|
1510
+ KWtable.new(*args)
1511
+ }
1241
1512
 
1242
1513
  # :startdoc:
1243
1514
 
@@ -1304,11 +1575,6 @@ module RubyParserStuff
1304
1575
  @debug = debug
1305
1576
  end
1306
1577
 
1307
- def reset
1308
- @stack = [false]
1309
- log :reset if debug
1310
- end
1311
-
1312
1578
  def inspect
1313
1579
  "StackState(#{@name}, #{@stack.inspect})"
1314
1580
  end
@@ -1345,16 +1611,21 @@ module RubyParserStuff
1345
1611
  log :push if debug
1346
1612
  end
1347
1613
 
1348
- def store base = false
1349
- result = @stack.dup
1350
- @stack.replace [base]
1351
- log :store if debug
1352
- result
1614
+ def reset
1615
+ @stack = [false]
1616
+ log :reset if debug
1353
1617
  end
1354
1618
 
1355
1619
  def restore oldstate
1356
1620
  @stack.replace oldstate
1357
1621
  log :restore if debug
1358
1622
  end
1623
+
1624
+ def store base = false
1625
+ result = @stack.dup
1626
+ @stack.replace [base]
1627
+ log :store if debug
1628
+ result
1629
+ end
1359
1630
  end
1360
1631
  end