ruby_parser 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  # encoding: ASCII-8BIT
2
+ # TODO: remove
2
3
 
3
4
  require "sexp"
4
5
  require "ruby_lexer"
@@ -6,13 +7,50 @@ require "timeout"
6
7
  require "rp_extensions"
7
8
  require "rp_stringscanner"
8
9
 
10
+ class Sexp
11
+ def check_line_numbers
12
+ raise "bad nil line for:\n%s" % [self.pretty_inspect] if nil_line?
13
+ raise "bad line number for:\n%s" % [self.pretty_inspect] unless
14
+ Integer === self.line &&
15
+ self.line >= 1 &&
16
+ self.line <= self.line_min
17
+ end
18
+
19
+ ##
20
+ # Returns the maximum line number of the children of self.
21
+
22
+ def line_min
23
+ @line_min ||= [self.deep_each.map(&:line).min, self.line].compact.min
24
+ end
25
+
26
+ def nil_line?
27
+ self.deep_each.map(&:line).any?(&:nil?)
28
+ end
29
+ end
30
+
9
31
  module RubyParserStuff
10
- VERSION = "3.13.0"
32
+ VERSION = "3.15.0"
11
33
 
12
34
  attr_accessor :lexer, :in_def, :in_single, :file
13
35
  attr_accessor :in_kwarg
14
36
  attr_reader :env, :comments
15
37
 
38
+ ##
39
+ # Canonicalize conditionals. Eg:
40
+ #
41
+ # not x ? a : b
42
+ #
43
+ # becomes:
44
+ #
45
+ # x ? b : a
46
+
47
+ attr_accessor :canonicalize_conditions
48
+
49
+ ##
50
+ # The last token type returned from #next_token
51
+
52
+ attr_accessor :last_token_type
53
+
16
54
  $good20 = []
17
55
 
18
56
  %w[
@@ -31,6 +69,28 @@ module RubyParserStuff
31
69
  end
32
70
  end
33
71
 
72
+ ##
73
+ # for pure ruby systems only
74
+
75
+ def do_parse
76
+ _racc_do_parse_rb(_racc_setup, false)
77
+ end if ENV["PURE_RUBY"] || ENV["CHECK_LINE_NUMS"]
78
+
79
+ if ENV["CHECK_LINE_NUMS"] then
80
+ def _racc_do_reduce arg, act
81
+ x = super
82
+
83
+ @racc_vstack.grep(Sexp).each do |sexp|
84
+ sexp.check_line_numbers
85
+ end
86
+ x
87
+ end
88
+ end
89
+
90
+ ARG_TYPES = [:arglist, :call_args, :array, :args].map { |k|
91
+ [k, true]
92
+ }.to_h
93
+
34
94
  has_enc = "".respond_to? :encoding
35
95
 
36
96
  # This is in sorted order of occurrence according to
@@ -48,85 +108,52 @@ module RubyParserStuff
48
108
  Encoding::EUC_JP
49
109
  ] if has_enc
50
110
 
51
- def syntax_error msg
52
- raise RubyParser::SyntaxError, msg
53
- end
111
+ JUMP_TYPE = [:return, :next, :break, :yield].map { |k| [k, true] }.to_h
54
112
 
55
- def arg_blk_pass node1, node2 # TODO: nuke
56
- node1 = s(:arglist, node1) unless [:arglist, :call_args, :array, :args].include? node1.sexp_type
57
- node1 << node2 if node2
58
- node1
59
- end
113
+ TAB_WIDTH = 8
60
114
 
61
- def arg_concat node1, node2 # TODO: nuke
62
- raise "huh" unless node2
63
- node1 << s(:splat, node2).compact
64
- node1
65
- end
115
+ def initialize(options = {})
116
+ super()
66
117
 
67
- def clean_mlhs sexp
68
- case sexp.sexp_type
69
- when :masgn then
70
- if sexp.size == 2 and sexp[1].sexp_type == :array then
71
- s(:masgn, *sexp[1].sexp_body.map { |sub| clean_mlhs sub })
72
- else
73
- debug20 5
74
- sexp
75
- end
76
- when :gasgn, :iasgn, :lasgn, :cvasgn then
77
- if sexp.size == 2 then
78
- sexp.last
79
- else
80
- debug20 7
81
- sexp # optional value
82
- end
83
- else
84
- raise "unsupported type: #{sexp.inspect}"
85
- end
86
- end
118
+ v = self.class.name[/2\d/]
119
+ raise "Bad Class name #{self.class}" unless v
87
120
 
88
- def block_var *args
89
- result = self.args args
90
- result.sexp_type = :masgn
91
- result
92
- end
121
+ self.lexer = RubyLexer.new v && v.to_i
122
+ self.lexer.parser = self
123
+ self.in_kwarg = false
93
124
 
94
- def array_to_hash array
95
- case array.sexp_type
96
- when :kwsplat then
97
- array
98
- else
99
- s(:hash, *array.sexp_body)
100
- end
125
+ @env = RubyParserStuff::Environment.new
126
+ @comments = []
127
+
128
+ @canonicalize_conditions = true
129
+
130
+ self.reset
101
131
  end
102
132
 
103
- def call_args args
104
- result = s(:call_args)
133
+ def arg_concat node1, node2 # TODO: nuke
134
+ raise "huh" unless node2
105
135
 
106
- args.each do |arg|
107
- case arg
108
- when Sexp then
109
- case arg.sexp_type
110
- when :array, :args, :call_args then # HACK? remove array at some point
111
- result.concat arg.sexp_body
112
- else
113
- result << arg
114
- end
115
- when Symbol then
116
- result << arg
117
- when ",", nil then
118
- # ignore
119
- else
120
- raise "unhandled: #{arg.inspect} in #{args.inspect}"
121
- end
122
- end
136
+ splat = s(:splat, node2)
137
+ splat.line node2.line
123
138
 
124
- result
139
+ node1 << splat
140
+ end
141
+
142
+ def argl x
143
+ x = s(:arglist, x) if x and x.sexp_type == :array
144
+ x
125
145
  end
126
146
 
127
147
  def args args
128
148
  result = s(:args)
129
149
 
150
+ ss = args.grep Sexp
151
+ if ss.empty? then
152
+ result.line lexer.lineno
153
+ else
154
+ result.line ss.first.line
155
+ end
156
+
130
157
  args.each do |arg|
131
158
  case arg
132
159
  when Sexp then
@@ -162,13 +189,28 @@ module RubyParserStuff
162
189
  result
163
190
  end
164
191
 
192
+ def array_to_hash array
193
+ case array.sexp_type
194
+ when :kwsplat then
195
+ array
196
+ else
197
+ s(:hash, *array.sexp_body).line array.line
198
+ end
199
+ end
200
+
165
201
  def aryset receiver, index
166
202
  index ||= s()
167
- s(:attrasgn, receiver, :"[]=", *index.sexp_body).compact # [].sexp_body => nil
203
+ l = receiver.line
204
+ result = s(:attrasgn, receiver, :"[]=",
205
+ *index.sexp_body).compact # [].sexp_body => nil
206
+ result.line = l
207
+ result
168
208
  end
169
209
 
170
210
  def assignable(lhs, value = nil)
171
211
  id = lhs.to_sym unless Sexp === lhs
212
+
213
+ raise "WTF" if Sexp === id
172
214
  id = id.to_sym if Sexp === id
173
215
 
174
216
  raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
@@ -197,11 +239,33 @@ module RubyParserStuff
197
239
 
198
240
  self.env[id] ||= :lvar if result.sexp_type == :lasgn
199
241
 
242
+ line = case lhs
243
+ when Sexp then
244
+ lhs.line
245
+ else
246
+ value && value.line || lexer.lineno
247
+ end
248
+
200
249
  result << value if value
250
+ result.line = line
201
251
 
202
252
  return result
203
253
  end
204
254
 
255
+ def backref_assign_error ref
256
+ # TODO: need a test for this... obviously
257
+ case ref.sexp_type
258
+ when :nth_ref then
259
+ raise "write a test 2"
260
+ raise SyntaxError, "Can't set variable %p" % ref.last
261
+ when :back_ref then
262
+ raise "write a test 3"
263
+ raise SyntaxError, "Can't set back reference %p" % ref.last
264
+ else
265
+ raise "Unknown backref type: #{ref.inspect}"
266
+ end
267
+ end
268
+
205
269
  def block_append(head, tail)
206
270
  return head if tail.nil?
207
271
  return tail if head.nil?
@@ -215,6 +279,69 @@ module RubyParserStuff
215
279
  head << tail
216
280
  end
217
281
 
282
+ def block_dup_check call_or_args, block
283
+ syntax_error "Both block arg and actual block given." if
284
+ block and call_or_args.block_pass?
285
+ end
286
+
287
+ def block_var *args
288
+ result = self.args args
289
+ result.sexp_type = :masgn
290
+ result
291
+ end
292
+
293
+ def call_args args
294
+ result = s(:call_args)
295
+
296
+ a = args.grep(Sexp).first
297
+ if a then
298
+ result.line a.line
299
+ else
300
+ result.line lexer.lineno
301
+ end
302
+
303
+ args.each do |arg|
304
+ case arg
305
+ when Sexp then
306
+ case arg.sexp_type
307
+ when :array, :args, :call_args then # HACK? remove array at some point
308
+ result.concat arg.sexp_body
309
+ else
310
+ result << arg
311
+ end
312
+ when Symbol then
313
+ result << arg
314
+ when ",", nil then
315
+ # ignore
316
+ else
317
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
318
+ end
319
+ end
320
+
321
+ result
322
+ end
323
+
324
+ def clean_mlhs sexp
325
+ case sexp.sexp_type
326
+ when :masgn then
327
+ if sexp.size == 2 and sexp[1].sexp_type == :array then
328
+ s(:masgn, *sexp[1].sexp_body.map { |sub| clean_mlhs sub })
329
+ else
330
+ debug20 5
331
+ sexp
332
+ end
333
+ when :gasgn, :iasgn, :lasgn, :cvasgn then
334
+ if sexp.size == 2 then
335
+ sexp.last
336
+ else
337
+ debug20 7
338
+ sexp # optional value
339
+ end
340
+ else
341
+ raise "unsupported type: #{sexp.inspect}"
342
+ end
343
+ end
344
+
218
345
  def cond node
219
346
  return nil if node.nil?
220
347
  node = value_expr node
@@ -222,56 +349,113 @@ module RubyParserStuff
222
349
  case node.sexp_type
223
350
  when :lit then
224
351
  if Regexp === node.last then
225
- return s(:match, node)
352
+ s(:match, node)
226
353
  else
227
- return node
354
+ node
228
355
  end
229
356
  when :and then
230
- return s(:and, cond(node[1]), cond(node[2]))
357
+ _, lhs, rhs = node
358
+ s(:and, cond(lhs), cond(rhs))
231
359
  when :or then
232
- return s(:or, cond(node[1]), cond(node[2]))
360
+ _, lhs, rhs = node
361
+ s(:or, cond(lhs), cond(rhs))
233
362
  when :dot2 then
234
363
  label = "flip#{node.hash}"
235
364
  env[label] = :lvar
236
365
  _, lhs, rhs = node
237
- return s(:flip2, lhs, rhs)
366
+ s(:flip2, lhs, rhs) # TODO: recurse?
238
367
  when :dot3 then
239
368
  label = "flip#{node.hash}"
240
369
  env[label] = :lvar
241
370
  _, lhs, rhs = node
242
- return s(:flip3, lhs, rhs)
371
+ s(:flip3, lhs, rhs)
243
372
  else
244
- return node
245
- end
373
+ node
374
+ end.line node.line
246
375
  end
247
376
 
248
- ##
249
- # for pure ruby systems only
377
+ def dedent sexp
378
+ dedent_count = dedent_size sexp
250
379
 
251
- def do_parse
252
- _racc_do_parse_rb(_racc_setup, false)
253
- end if ENV['PURE_RUBY']
380
+ skip_one = false
381
+ sexp.map { |obj|
382
+ case obj
383
+ when Symbol then
384
+ obj
385
+ when String then
386
+ obj.lines.map { |l| remove_whitespace_width l, dedent_count }.join
387
+ when Sexp then
388
+ case obj.sexp_type
389
+ when :evstr then
390
+ skip_one = true
391
+ obj
392
+ when :str then
393
+ _, str = obj
394
+ str = if skip_one then
395
+ skip_one = false
396
+ s1, *rest = str.lines
397
+ s1 + rest.map { |l| remove_whitespace_width l, dedent_count }.join
398
+ else
399
+ str.lines.map { |l| remove_whitespace_width l, dedent_count }.join
400
+ end
254
401
 
255
- def new_match lhs, rhs
256
- if lhs then
257
- case lhs.sexp_type
258
- when :dregx, :dregx_once then
259
- return s(:match2, lhs, rhs).line(lhs.line)
260
- when :lit then
261
- return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
402
+ s(:str, str).line obj.line
403
+ else
404
+ warn "unprocessed sexp %p" % [obj]
405
+ end
406
+ else
407
+ warn "unprocessed: %p" % [obj]
262
408
  end
263
- end
409
+ }
410
+ end
264
411
 
265
- if rhs then
266
- case rhs.sexp_type
267
- when :dregx, :dregx_once then
268
- return s(:match3, rhs, lhs).line(lhs.line)
269
- when :lit then
270
- return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
412
+ def dedent_size sexp
413
+ skip_one = false
414
+ sexp.flat_map { |s|
415
+ case s
416
+ when Symbol then
417
+ next
418
+ when String then
419
+ s.lines
420
+ when Sexp then
421
+ case s.sexp_type
422
+ when :evstr then
423
+ skip_one = true
424
+ next
425
+ when :str then
426
+ _, str = s
427
+ lines = str.lines
428
+ if skip_one then
429
+ skip_one = false
430
+ lines.shift
431
+ end
432
+ lines
433
+ else
434
+ warn "unprocessed sexp %p" % [s]
435
+ end
436
+ else
437
+ warn "unprocessed: %p" % [s]
438
+ end.map { |l| whitespace_width l[/^[ \t]*/] }
439
+ }.compact.min
440
+ end
441
+
442
+ def dedent_string string, width
443
+ characters_skipped = 0
444
+ indentation_skipped = 0
445
+
446
+ string.chars.each do |char|
447
+ break if indentation_skipped >= width
448
+ if char == " "
449
+ characters_skipped += 1
450
+ indentation_skipped += 1
451
+ elsif char == "\t"
452
+ proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
453
+ break if proposed > width
454
+ characters_skipped += 1
455
+ indentation_skipped = proposed
271
456
  end
272
457
  end
273
-
274
- return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
458
+ string[characters_skipped..-1]
275
459
  end
276
460
 
277
461
  def gettable(id)
@@ -303,33 +487,94 @@ module RubyParserStuff
303
487
  result
304
488
  end
305
489
 
490
+ def hack_encoding str, extra = nil
491
+ encodings = ENCODING_ORDER.dup
492
+ encodings.unshift(extra) unless extra.nil?
493
+
494
+ # terrible, horrible, no good, very bad, last ditch effort.
495
+ encodings.each do |enc|
496
+ begin
497
+ str.force_encoding enc
498
+ if str.valid_encoding? then
499
+ str.encode! Encoding::UTF_8
500
+ break
501
+ end
502
+ rescue ArgumentError # unknown encoding name
503
+ # do nothing
504
+ rescue Encoding::InvalidByteSequenceError
505
+ # do nothing
506
+ rescue Encoding::UndefinedConversionError
507
+ # do nothing
508
+ end
509
+ end
510
+
511
+ # no amount of pain is enough for you.
512
+ raise "Bad encoding. Need a magic encoding comment." unless
513
+ str.encoding.name == "UTF-8"
514
+ end
515
+
306
516
  ##
307
- # Canonicalize conditionals. Eg:
308
- #
309
- # not x ? a : b
517
+ # Returns a UTF-8 encoded string after processing BOMs and magic
518
+ # encoding comments.
310
519
  #
311
- # becomes:
520
+ # Holy crap... ok. Here goes:
312
521
  #
313
- # x ? b : a
522
+ # Ruby's file handling and encoding support is insane. We need to be
523
+ # able to lex a file. The lexer file is explicitly UTF-8 to make
524
+ # things cleaner. This allows us to deal with extended chars in
525
+ # class and method names. In order to do this, we need to encode all
526
+ # input source files as UTF-8. First, we look for a UTF-8 BOM by
527
+ # looking at the first line while forcing its encoding to
528
+ # ASCII-8BIT. If we find a BOM, we strip it and set the expected
529
+ # encoding to UTF-8. Then, we search for a magic encoding comment.
530
+ # If found, it overrides the BOM. Finally, we force the encoding of
531
+ # the input string to whatever was found, and then encode that to
532
+ # UTF-8 for compatibility with the lexer.
314
533
 
315
- attr_accessor :canonicalize_conditions
534
+ def handle_encoding str
535
+ str = str.dup
536
+ has_enc = str.respond_to? :encoding
537
+ encoding = nil
316
538
 
317
- def initialize(options = {})
318
- super()
539
+ header = str.each_line.first(2)
540
+ header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
319
541
 
320
- v = self.class.name[/2\d/]
321
- raise "Bad Class name #{self.class}" unless v
542
+ first = header.first || ""
543
+ encoding, str = "utf-8", str.b[3..-1] if first =~ /\A\xEF\xBB\xBF/
322
544
 
323
- self.lexer = RubyLexer.new v && v.to_i
324
- self.lexer.parser = self
325
- self.in_kwarg = false
545
+ encoding = $1.strip if header.find { |s|
546
+ s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
547
+ s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
548
+ }
326
549
 
327
- @env = RubyParserStuff::Environment.new
328
- @comments = []
550
+ if encoding then
551
+ if has_enc then
552
+ encoding.sub!(/utf-8-.+$/, "utf-8") # HACK for stupid emacs formats
553
+ hack_encoding str, encoding
554
+ else
555
+ warn "Skipping magic encoding comment"
556
+ end
557
+ else
558
+ # nothing specified... ugh. try to encode as utf-8
559
+ hack_encoding str if has_enc
560
+ end
329
561
 
330
- @canonicalize_conditions = true
562
+ str
563
+ end
331
564
 
332
- self.reset
565
+ def invert_block_call val
566
+ ret, iter = val
567
+ type, call = ret
568
+
569
+ iter.insert 1, call
570
+
571
+ ret = s(type).line ret.line
572
+
573
+ [iter, ret]
574
+ end
575
+
576
+ def inverted? val
577
+ JUMP_TYPE[val[0].sexp_type]
333
578
  end
334
579
 
335
580
  def list_append list, item # TODO: nuke me *sigh*
@@ -350,12 +595,14 @@ module RubyParserStuff
350
595
 
351
596
  htype, ttype = head.sexp_type, tail.sexp_type
352
597
 
353
- head = s(:dstr, '', head) if htype == :evstr
598
+ head = s(:dstr, "", head).line head.line if htype == :evstr
354
599
 
355
600
  case ttype
356
601
  when :str then
357
602
  if htype == :str
358
- head.last << tail.last
603
+ a, b = head.last, tail.last
604
+ b = b.dup.force_encoding a.encoding unless Encoding.compatible?(a, b)
605
+ a << b
359
606
  elsif htype == :dstr and head.size == 2 then
360
607
  head.last << tail.last
361
608
  else
@@ -369,15 +616,15 @@ module RubyParserStuff
369
616
  head.line = lineno
370
617
  else
371
618
  tail.sexp_type = :array
372
- tail[1] = s(:str, tail[1])
373
- tail.delete_at 1 if tail[1] == s(:str, '')
619
+ tail[1] = s(:str, tail[1]).line tail.line
620
+ tail.delete_at 1 if tail[1] == s(:str, "")
374
621
 
375
622
  head.push(*tail.sexp_body)
376
623
  end
377
624
  when :evstr then
378
625
  if htype == :str then
379
626
  f, l = head.file, head.line
380
- head = s(:dstr, *head.sexp_body)
627
+ head = s(:dstr, *head.sexp_body).line head.line
381
628
  head.file = f
382
629
  head.line = l
383
630
  end
@@ -408,12 +655,15 @@ module RubyParserStuff
408
655
  node = rhs
409
656
  end
410
657
 
411
- node[2] = s(type, rhs, right)
658
+ node.pop
659
+ node << s(type, rhs, right).line(rhs.line)
412
660
 
413
661
  return left
414
662
  end
415
663
 
416
- return s(type, left, right)
664
+ result = s(type, left, right)
665
+ result.line left.line if left.line
666
+ result
417
667
  end
418
668
 
419
669
  def new_aref val
@@ -422,6 +672,49 @@ module RubyParserStuff
422
672
  new_call val[0], :"[]", val[2]
423
673
  end
424
674
 
675
+ def new_assign lhs, rhs
676
+ return nil unless lhs
677
+
678
+ rhs = value_expr rhs
679
+
680
+ case lhs.sexp_type
681
+ when :lasgn, :iasgn, :cdecl, :cvdecl, :gasgn, :cvasgn, :attrasgn, :safe_attrasgn then
682
+ lhs << rhs
683
+ when :const then
684
+ lhs.sexp_type = :cdecl
685
+ lhs << rhs
686
+ else
687
+ raise "unknown lhs #{lhs.inspect} w/ #{rhs.inspect}"
688
+ end
689
+
690
+ lhs
691
+ end
692
+
693
+ def new_attrasgn recv, meth, call_op = :"."
694
+ meth = :"#{meth}="
695
+
696
+ result = case call_op.to_sym
697
+ when :"."
698
+ s(:attrasgn, recv, meth)
699
+ when :"&."
700
+ s(:safe_attrasgn, recv, meth)
701
+ else
702
+ raise "unknown call operator: `#{type.inspect}`"
703
+ end
704
+
705
+ result.line = recv.line
706
+ result
707
+ end
708
+
709
+ def new_begin val
710
+ _, lineno, body, _ = val
711
+
712
+ result = body ? s(:begin, body) : s(:nil)
713
+ result.line lineno
714
+
715
+ result
716
+ end
717
+
425
718
  def new_body val
426
719
  body, resbody, elsebody, ensurebody = val
427
720
 
@@ -445,43 +738,27 @@ module RubyParserStuff
445
738
 
446
739
  if elsebody and not resbody then
447
740
  warning("else without rescue is useless")
448
- result = s(:begin, result) if result
741
+ result = s(:begin, result).line result.line if result
449
742
  result = block_append(result, elsebody)
450
743
  end
451
744
 
452
- result = s(:ensure, result, ensurebody).compact if ensurebody
745
+ if ensurebody
746
+ lineno = (result || ensurebody).line
747
+ result = s(:ensure, result, ensurebody).compact.line lineno
748
+ end
453
749
 
454
750
  result
455
751
  end
456
752
 
457
753
  def new_brace_body args, body, lineno
458
- new_iter(nil, args, body).line(lineno)
459
- end
460
-
461
- def argl x
462
- x = s(:arglist, x) if x and x.sexp_type == :array
463
- x
464
- end
465
-
466
- def backref_assign_error ref
467
- # TODO: need a test for this... obviously
468
- case ref.sexp_type
469
- when :nth_ref then
470
- raise "write a test 2"
471
- raise SyntaxError, "Can't set variable %p" % ref.last
472
- when :back_ref then
473
- raise "write a test 3"
474
- raise SyntaxError, "Can't set back reference %p" % ref.last
475
- else
476
- raise "Unknown backref type: #{ref.inspect}"
477
- end
754
+ new_iter(nil, args, body).line lineno
478
755
  end
479
756
 
480
- def new_call recv, meth, args = nil, call_op = :'.'
757
+ def new_call recv, meth, args = nil, call_op = :"."
481
758
  result = case call_op.to_sym
482
- when :'.'
759
+ when :"."
483
760
  s(:call, recv, meth)
484
- when :'&.'
761
+ when :"&."
485
762
  s(:safe_call, recv, meth)
486
763
  else
487
764
  raise "unknown call operator: `#{type.inspect}`"
@@ -491,35 +768,20 @@ module RubyParserStuff
491
768
  # TODO: need a test with f(&b) { } to produce warning
492
769
 
493
770
  if args
494
- if [:arglist, :args, :array, :call_args].include? args.sexp_type
771
+ if ARG_TYPES[args.sexp_type] then
495
772
  result.concat args.sexp_body
496
773
  else
497
774
  result << args
498
775
  end
499
776
  end
500
777
 
501
- line = result.grep(Sexp).map(&:line).compact.min
502
- result.line = line if line
778
+ # line = result.grep(Sexp).map(&:line).compact.min
779
+ result.line = recv.line if recv
780
+ result.line ||= lexer.lineno
503
781
 
504
782
  result
505
783
  end
506
784
 
507
- def new_attrasgn recv, meth, call_op
508
- meth = :"#{meth}="
509
-
510
- result = case call_op.to_sym
511
- when :'.'
512
- s(:attrasgn, recv, meth)
513
- when :'&.'
514
- s(:safe_attrasgn, recv, meth)
515
- else
516
- raise "unknown call operator: `#{type.inspect}`"
517
- end
518
-
519
- result.line = recv.line
520
- result
521
- end
522
-
523
785
  def new_case expr, body, line
524
786
  result = s(:case, expr)
525
787
 
@@ -565,11 +827,29 @@ module RubyParserStuff
565
827
  result
566
828
  end
567
829
 
830
+ def new_const_op_asgn val
831
+ lhs, asgn_op, rhs = val[0], val[1].to_sym, val[2]
832
+
833
+ result = case asgn_op
834
+ when :"||" then
835
+ s(:op_asgn_or, lhs, rhs)
836
+ when :"&&" then
837
+ s(:op_asgn_and, lhs, rhs)
838
+ else
839
+ s(:op_asgn, lhs, asgn_op, rhs)
840
+ end
841
+
842
+ result.line = lhs.line
843
+ result
844
+ end
845
+
568
846
  def new_defn val
569
- (_, line), (name, _), _, args, body, * = val
570
- body ||= s(:nil)
847
+ (_, line), name, _, args, body, nil_body_line, * = val
848
+ body ||= s(:nil).line nil_body_line
571
849
 
572
- result = s(:defn, name.to_sym, args)
850
+ args.line line
851
+
852
+ result = s(:defn, name.to_sym, args).line line
573
853
 
574
854
  if body then
575
855
  if body.sexp_type == :block then
@@ -579,19 +859,23 @@ module RubyParserStuff
579
859
  end
580
860
  end
581
861
 
582
- args.line line
583
- result.line = line
584
862
  result.comments = self.comments.pop
585
863
 
586
864
  result
587
865
  end
588
866
 
589
867
  def new_defs val
590
- recv, (name, _line), args, body = val[1], val[4], val[6], val[7]
591
- body ||= s(:nil)
868
+ _, recv, _, _, name, (_in_def, line), args, body, _ = val
869
+
870
+ body ||= s(:nil).line line
871
+
872
+ args.line line
592
873
 
593
874
  result = s(:defs, recv, name.to_sym, args)
594
875
 
876
+ # TODO: remove_begin
877
+ # TODO: reduce_nodes
878
+
595
879
  if body then
596
880
  if body.sexp_type == :block then
597
881
  result.push(*body.sexp_body)
@@ -616,7 +900,9 @@ module RubyParserStuff
616
900
  end
617
901
 
618
902
  def new_hash val
619
- s(:hash, *val[2].values).line(val[1])
903
+ _, line, assocs = val
904
+
905
+ s(:hash).line(line).concat assocs.values
620
906
  end
621
907
 
622
908
  def new_if c, t, f
@@ -637,27 +923,59 @@ module RubyParserStuff
637
923
  result << args
638
924
  result << body if body
639
925
 
640
- args.sexp_type = :args unless args == 0
926
+ result.line call.line if call
641
927
 
642
- result
643
- end
928
+ unless args == 0 then
929
+ args.line call.line if call
930
+ args.sexp_type = :args
931
+ end
644
932
 
645
- def new_masgn_arg rhs, wrap = false
646
- rhs = value_expr(rhs)
647
- rhs = s(:to_ary, rhs) if wrap # HACK: could be array if lhs isn't right
648
- rhs
933
+ result
649
934
  end
650
935
 
651
936
  def new_masgn lhs, rhs, wrap = false
652
937
  _, ary = lhs
653
938
 
939
+ line = rhs.line
654
940
  rhs = value_expr(rhs)
655
941
  rhs = ary ? s(:to_ary, rhs) : s(:array, rhs) if wrap
942
+ rhs.line line if wrap
656
943
 
657
944
  lhs.delete_at 1 if ary.nil?
658
945
  lhs << rhs
659
946
 
660
- lhs
947
+ lhs
948
+ end
949
+
950
+ def new_masgn_arg rhs, wrap = false
951
+ rhs = value_expr(rhs)
952
+ # HACK: could be array if lhs isn't right
953
+ rhs = s(:to_ary, rhs).line rhs.line if wrap
954
+ rhs
955
+ end
956
+
957
+ def new_match lhs, rhs
958
+ if lhs then
959
+ case lhs.sexp_type
960
+ when :dregx, :dregx_once then
961
+ # TODO: no test coverage
962
+ return s(:match2, lhs, rhs).line(lhs.line)
963
+ when :lit then
964
+ return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
965
+ end
966
+ end
967
+
968
+ if rhs then
969
+ case rhs.sexp_type
970
+ when :dregx, :dregx_once then
971
+ # TODO: no test coverage
972
+ return s(:match3, rhs, lhs).line(lhs.line)
973
+ when :lit then
974
+ return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
975
+ end
976
+ end
977
+
978
+ new_call(lhs, :"=~", argl(rhs)).line lhs.line
661
979
  end
662
980
 
663
981
  def new_module val
@@ -680,32 +998,41 @@ module RubyParserStuff
680
998
 
681
999
  def new_op_asgn val
682
1000
  lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
683
- name = lhs.value
1001
+ name = gettable(lhs.value).line lhs.line
684
1002
  arg = remove_begin(arg)
685
1003
  result = case asgn_op # REFACTOR
686
1004
  when :"||" then
687
1005
  lhs << arg
688
- s(:op_asgn_or, self.gettable(name), lhs)
1006
+ s(:op_asgn_or, name, lhs)
689
1007
  when :"&&" then
690
1008
  lhs << arg
691
- s(:op_asgn_and, self.gettable(name), lhs)
1009
+ s(:op_asgn_and, name, lhs)
692
1010
  else
693
- # TODO: why [2] ?
694
- lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
1011
+ lhs << new_call(name, asgn_op, argl(arg))
695
1012
  lhs
696
1013
  end
697
1014
  result.line = lhs.line
698
1015
  result
699
1016
  end
700
1017
 
1018
+ def new_op_asgn1 val
1019
+ lhs, _, args, _, op, rhs = val
1020
+
1021
+ args.sexp_type = :arglist if args
1022
+
1023
+ result = s(:op_asgn1, lhs, args, op.to_sym, rhs)
1024
+ result.line lhs.line
1025
+ result
1026
+ end
1027
+
701
1028
  def new_op_asgn2 val
702
1029
  recv, call_op, meth, op, arg = val
703
1030
  meth = :"#{meth}="
704
1031
 
705
1032
  result = case call_op.to_sym
706
- when :'.'
1033
+ when :"."
707
1034
  s(:op_asgn2, recv, meth, op.to_sym, arg)
708
- when :'&.'
1035
+ when :"&."
709
1036
  s(:safe_op_asgn2, recv, meth, op.to_sym, arg)
710
1037
  else
711
1038
  raise "unknown call operator: `#{type.inspect}`"
@@ -715,21 +1042,49 @@ module RubyParserStuff
715
1042
  result
716
1043
  end
717
1044
 
1045
+ def new_qsym_list
1046
+ result = s(:array).line lexer.lineno
1047
+ self.lexer.fixup_lineno
1048
+ result
1049
+ end
1050
+
1051
+ def new_qsym_list_entry val
1052
+ _, str, _ = val
1053
+ result = s(:lit, str.to_sym).line lexer.lineno
1054
+ self.lexer.fixup_lineno
1055
+ result
1056
+ end
1057
+
1058
+ def new_qword_list
1059
+ result = s(:array).line lexer.lineno
1060
+ self.lexer.fixup_lineno
1061
+ result
1062
+ end
1063
+
1064
+ def new_qword_list_entry val
1065
+ _, str, _ = val
1066
+ str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
1067
+ result = s(:str, str).line lexer.lineno # TODO: problematic? grab from parser
1068
+ self.lexer.fixup_lineno
1069
+ result
1070
+ end
1071
+
718
1072
  def new_regexp val
719
- node = val[1] || s(:str, '')
720
- options = val[2]
1073
+ _, node, options = val
1074
+
1075
+ node ||= s(:str, "").line lexer.lineno
721
1076
 
722
1077
  o, k = 0, nil
723
1078
  options.split(//).uniq.each do |c| # FIX: this has a better home
724
1079
  v = {
725
- 'x' => Regexp::EXTENDED,
726
- 'i' => Regexp::IGNORECASE,
727
- 'm' => Regexp::MULTILINE,
728
- 'o' => Regexp::ONCE,
729
- 'n' => Regexp::ENC_NONE,
730
- 'e' => Regexp::ENC_EUC,
731
- 's' => Regexp::ENC_SJIS,
732
- 'u' => Regexp::ENC_UTF8,
1080
+ "x" => Regexp::EXTENDED,
1081
+ "i" => Regexp::IGNORECASE,
1082
+ "m" => Regexp::MULTILINE,
1083
+ "o" => Regexp::ONCE,
1084
+ "n" => Regexp::ENC_NONE,
1085
+ "e" => Regexp::ENC_EUC,
1086
+ "s" => Regexp::ENC_SJIS,
1087
+ "u" => Regexp::ENC_UTF8,
733
1088
  }[c]
734
1089
  raise "unknown regexp option: #{c}" unless v
735
1090
  o += v
@@ -762,7 +1117,7 @@ module RubyParserStuff
762
1117
  end
763
1118
  node << o if o and o != 0
764
1119
  else
765
- node = s(:dregx, '', node);
1120
+ node = s(:dregx, "", node).line node.line
766
1121
  node.sexp_type = :dregx_once if options =~ /o/
767
1122
  node << o if o and o != 0
768
1123
  end
@@ -770,19 +1125,20 @@ module RubyParserStuff
770
1125
  node
771
1126
  end
772
1127
 
773
- def new_rescue body, resbody
774
- s(:rescue, body, resbody)
775
- end
776
-
777
1128
  def new_resbody cond, body
778
1129
  if body && body.sexp_type == :block then
779
1130
  body.shift # remove block and splat it in directly
780
1131
  else
781
1132
  body = [body]
782
1133
  end
1134
+
783
1135
  s(:resbody, cond, *body).line cond.line
784
1136
  end
785
1137
 
1138
+ def new_rescue body, resbody
1139
+ s(:rescue, body, resbody).line body.line
1140
+ end
1141
+
786
1142
  def new_sclass val
787
1143
  recv, in_def, in_single, body = val[3], val[4], val[6], val[7]
788
1144
 
@@ -803,62 +1159,36 @@ module RubyParserStuff
803
1159
  end
804
1160
 
805
1161
  def new_string val
806
- str = val[0]
1162
+ str, = val
1163
+ str.force_encoding("UTF-8")
1164
+ # TODO: remove:
807
1165
  str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
808
- result = s(:str, str)
1166
+ result = s(:str, str).line lexer.lineno
809
1167
  self.lexer.fixup_lineno str.count("\n")
810
1168
  result
811
1169
  end
812
1170
 
813
- def new_qword_list_entry val
814
- str = val[1]
815
- str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
816
- result = s(:str, str)
817
- self.lexer.fixup_lineno
818
- result
819
- end
820
-
821
- def new_qword_list
822
- result = s(:array)
823
- self.lexer.fixup_lineno
824
- result
825
- end
826
-
827
- def new_word_list
828
- result = s(:array)
829
- self.lexer.fixup_lineno
830
- result
831
- end
832
-
833
- def new_word_list_entry val
834
- result = val[1].sexp_type == :evstr ? s(:dstr, "", val[1]) : val[1]
835
- self.lexer.fixup_lineno
836
- result
837
- end
838
-
839
- def new_qsym_list
840
- result = s(:array)
841
- self.lexer.fixup_lineno
842
- result
843
- end
844
-
845
- def new_qsym_list_entry val
846
- result = s(:lit, val[1].to_sym)
847
- self.lexer.fixup_lineno
848
- result
1171
+ def new_super args
1172
+ if args && args.node_type == :block_pass then
1173
+ s(:super, args).line args.line
1174
+ else
1175
+ args ||= s(:arglist).line lexer.lineno
1176
+ s(:super, *args.sexp_body).line args.line
1177
+ end
849
1178
  end
850
1179
 
851
1180
  def new_symbol_list
852
- result = s(:array)
1181
+ result = s(:array).line lexer.lineno
853
1182
  self.lexer.fixup_lineno
854
1183
  result
855
1184
  end
856
1185
 
857
1186
  def new_symbol_list_entry val
858
- _list, sym, _nil = val # TODO: use _list
859
- result = val[1]
1187
+ _, sym, _ = val
1188
+
1189
+ sym ||= s(:str, "")
860
1190
 
861
- result ||= s(:str, "")
1191
+ line = lexer.lineno
862
1192
 
863
1193
  case sym.sexp_type
864
1194
  when :dstr then
@@ -866,26 +1196,21 @@ module RubyParserStuff
866
1196
  when :str then
867
1197
  sym = s(:lit, sym.last.to_sym)
868
1198
  else
869
- sym = s(:dsym, "", sym || s(:str, ""))
1199
+ sym = s(:dsym, "", sym || s(:str, "").line(line))
870
1200
  end
1201
+
1202
+ sym.line line
1203
+
871
1204
  self.lexer.fixup_lineno
872
- sym
873
- end
874
1205
 
875
- def new_super args
876
- if args && args.node_type == :block_pass then
877
- s(:super, args)
878
- else
879
- args ||= s(:arglist)
880
- s(:super, *args.sexp_body)
881
- end
1206
+ sym
882
1207
  end
883
1208
 
884
1209
  def new_undef n, m = nil
885
1210
  if m then
886
- block_append(n, s(:undef, m))
1211
+ block_append(n, s(:undef, m).line(m.line))
887
1212
  else
888
- s(:undef, n)
1213
+ s(:undef, n).line n.line
889
1214
  end
890
1215
  end
891
1216
 
@@ -918,20 +1243,36 @@ module RubyParserStuff
918
1243
  new_until_or_while :while, block, expr, pre
919
1244
  end
920
1245
 
921
- def new_xstring str
922
- if str then
923
- case str.sexp_type
1246
+ def new_word_list
1247
+ result = s(:array).line lexer.lineno
1248
+ self.lexer.fixup_lineno
1249
+ result
1250
+ end
1251
+
1252
+ def new_word_list_entry val
1253
+ _, word, _ = val
1254
+ result = word.sexp_type == :evstr ? s(:dstr, "", word).line(word.line) : word
1255
+ self.lexer.fixup_lineno
1256
+ result
1257
+ end
1258
+
1259
+ def new_xstring val
1260
+ _, node = val
1261
+
1262
+ node ||= s(:str, "").line lexer.lineno
1263
+
1264
+ if node then
1265
+ case node.sexp_type
924
1266
  when :str
925
- str.sexp_type = :xstr
1267
+ node.sexp_type = :xstr
926
1268
  when :dstr
927
- str.sexp_type = :dxstr
1269
+ node.sexp_type = :dxstr
928
1270
  else
929
- str = s(:dxstr, '', str)
1271
+ node = s(:dxstr, "", node).line node.line
930
1272
  end
931
- str
932
- else
933
- s(:xstr, '')
934
1273
  end
1274
+
1275
+ node
935
1276
  end
936
1277
 
937
1278
  def new_yield args = nil
@@ -940,113 +1281,32 @@ module RubyParserStuff
940
1281
  raise SyntaxError, "Block argument should not be given." if
941
1282
  args && args.node_type == :block_pass
942
1283
 
943
- args ||= s(:arglist)
1284
+ args ||= s(:arglist).line lexer.lineno
944
1285
 
945
1286
  args.sexp_type = :arglist if [:call_args, :array].include? args.sexp_type
946
- args = s(:arglist, args) unless args.sexp_type == :arglist
1287
+ args = s(:arglist, args).line args.line unless args.sexp_type == :arglist
947
1288
 
948
- return s(:yield, *args.sexp_body)
1289
+ s(:yield, *args.sexp_body).line args.line
949
1290
  end
950
1291
 
951
1292
  def next_token
952
1293
  token = self.lexer.next_token
953
1294
 
954
1295
  if token and token.first != RubyLexer::EOF then
1296
+ self.last_token_type = token
955
1297
  return token
956
1298
  else
957
- return [false, '$end']
958
- end
959
- end
960
-
961
- def new_assign lhs, rhs
962
- return nil unless lhs
963
-
964
- rhs = value_expr rhs
965
-
966
- case lhs.sexp_type
967
- when :lasgn, :iasgn, :cdecl, :cvdecl, :gasgn, :cvasgn, :attrasgn, :safe_attrasgn then
968
- lhs << rhs
969
- when :const then
970
- lhs.sexp_type = :cdecl
971
- lhs << rhs
972
- else
973
- raise "unknown lhs #{lhs.inspect} w/ #{rhs.inspect}"
974
- end
975
-
976
- lhs
977
- end
978
-
979
- ##
980
- # Returns a UTF-8 encoded string after processing BOMs and magic
981
- # encoding comments.
982
- #
983
- # Holy crap... ok. Here goes:
984
- #
985
- # Ruby's file handling and encoding support is insane. We need to be
986
- # able to lex a file. The lexer file is explicitly UTF-8 to make
987
- # things cleaner. This allows us to deal with extended chars in
988
- # class and method names. In order to do this, we need to encode all
989
- # input source files as UTF-8. First, we look for a UTF-8 BOM by
990
- # looking at the first line while forcing its encoding to
991
- # ASCII-8BIT. If we find a BOM, we strip it and set the expected
992
- # encoding to UTF-8. Then, we search for a magic encoding comment.
993
- # If found, it overrides the BOM. Finally, we force the encoding of
994
- # the input string to whatever was found, and then encode that to
995
- # UTF-8 for compatibility with the lexer.
996
-
997
- def handle_encoding str
998
- str = str.dup
999
- has_enc = str.respond_to? :encoding
1000
- encoding = nil
1001
-
1002
- header = str.each_line.first(2)
1003
- header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
1004
-
1005
- first = header.first || ""
1006
- encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
1007
-
1008
- encoding = $1.strip if header.find { |s|
1009
- s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
1010
- s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
1011
- }
1012
-
1013
- if encoding then
1014
- if has_enc then
1015
- encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
1016
- hack_encoding str, encoding
1017
- else
1018
- warn "Skipping magic encoding comment"
1019
- end
1020
- else
1021
- # nothing specified... ugh. try to encode as utf-8
1022
- hack_encoding str if has_enc
1299
+ return [false, false]
1023
1300
  end
1024
-
1025
- str
1026
1301
  end
1027
1302
 
1028
- def hack_encoding str, extra = nil
1029
- encodings = ENCODING_ORDER.dup
1030
- encodings.unshift(extra) unless extra.nil?
1031
-
1032
- # terrible, horrible, no good, very bad, last ditch effort.
1033
- encodings.each do |enc|
1034
- begin
1035
- str.force_encoding enc
1036
- if str.valid_encoding? then
1037
- str.encode! Encoding::UTF_8
1038
- break
1039
- end
1040
- rescue Encoding::InvalidByteSequenceError
1041
- # do nothing
1042
- rescue Encoding::UndefinedConversionError
1043
- # do nothing
1044
- end
1045
- end
1046
-
1047
- # no amount of pain is enough for you.
1048
- raise "Bad encoding. Need a magic encoding comment." unless
1049
- str.encoding.name == "UTF-8"
1303
+ def on_error(et, ev, values)
1304
+ super
1305
+ rescue Racc::ParseError => e
1306
+ # I don't like how the exception obscures the error message
1307
+ e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
1308
+ warn e.message if $DEBUG
1309
+ raise
1050
1310
  end
1051
1311
 
1052
1312
  ##
@@ -1061,7 +1321,7 @@ module RubyParserStuff
1061
1321
 
1062
1322
  self.file = file.dup
1063
1323
 
1064
- @yydebug = ENV.has_key? 'DEBUG'
1324
+ @yydebug = ENV.has_key? "DEBUG"
1065
1325
 
1066
1326
  # HACK -- need to get tests passing more than have graceful code
1067
1327
  self.lexer.ss = RPStringScanner.new str
@@ -1070,40 +1330,29 @@ module RubyParserStuff
1070
1330
  end
1071
1331
  end
1072
1332
 
1073
- alias :parse :process
1333
+ alias parse process
1074
1334
 
1075
1335
  def remove_begin node
1076
- oldnode = node
1077
- if node and node.sexp_type == :begin and node.size == 2 then
1078
- node = node.last
1079
- node.line = oldnode.line
1080
- end
1336
+ line = node.line
1337
+
1338
+ node = node.last while node and node.sexp_type == :begin and node.size == 2
1339
+
1340
+ node = s(:nil) if node == s(:begin)
1341
+
1342
+ node.line ||= line
1343
+
1081
1344
  node
1082
1345
  end
1083
1346
 
1347
+ alias value_expr remove_begin # TODO: for now..? could check the tree, but meh?
1348
+
1084
1349
  def reset
1085
1350
  lexer.reset
1086
1351
  self.in_def = false
1087
1352
  self.in_single = 0
1088
1353
  self.env.reset
1089
1354
  self.comments.clear
1090
- end
1091
-
1092
- def block_dup_check call_or_args, block
1093
- syntax_error "Both block arg and actual block given." if
1094
- block and call_or_args.block_pass?
1095
- end
1096
-
1097
- def inverted? val
1098
- [:return, :next, :break, :yield].include? val[0].sexp_type
1099
- end
1100
-
1101
- def invert_block_call val
1102
- (type, call), iter = val
1103
-
1104
- iter.insert 1, call
1105
-
1106
- [iter, s(type)]
1355
+ self.last_token_type = nil
1107
1356
  end
1108
1357
 
1109
1358
  def ret_args node
@@ -1118,7 +1367,7 @@ module RubyParserStuff
1118
1367
 
1119
1368
  # HACK matz wraps ONE of the FOUR splats in a newline to
1120
1369
  # distinguish. I use paren for now. ugh
1121
- node = s(:svalue, node) if node.sexp_type == :splat and not node.paren
1370
+ node = s(:svalue, node).line node.line if node.sexp_type == :splat and not node.paren
1122
1371
  node.sexp_type = :svalue if node.sexp_type == :arglist && node[1].sexp_type == :splat
1123
1372
  end
1124
1373
 
@@ -1127,18 +1376,17 @@ module RubyParserStuff
1127
1376
 
1128
1377
  def s(*args)
1129
1378
  result = Sexp.new(*args)
1130
- result.line ||= lexer.lineno if lexer.ss # otherwise...
1379
+ # result.line ||= lexer.lineno if lexer.ss unless ENV["CHECK_LINE_NUMS"] # otherwise...
1131
1380
  result.file = self.file
1132
1381
  result
1133
1382
  end
1134
1383
 
1135
- def value_expr oldnode # HACK: much more to do
1136
- node = remove_begin oldnode
1137
- node.line = oldnode.line if oldnode
1138
- node[2] = value_expr node[2] if node and node.sexp_type == :if
1139
- node
1384
+ def syntax_error msg
1385
+ raise RubyParser::SyntaxError, msg
1140
1386
  end
1141
1387
 
1388
+ alias yyerror syntax_error
1389
+
1142
1390
  def void_stmts node
1143
1391
  return nil unless node
1144
1392
  return node unless node.sexp_type == :block
@@ -1156,18 +1404,37 @@ module RubyParserStuff
1156
1404
  # do nothing for now
1157
1405
  end
1158
1406
 
1159
- alias yyerror syntax_error
1407
+ def whitespace_width line, remove_width = nil
1408
+ col = 0
1409
+ idx = 0
1160
1410
 
1161
- def on_error(et, ev, values)
1162
- super
1163
- rescue Racc::ParseError => e
1164
- # I don't like how the exception obscures the error message
1165
- e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
1166
- warn e.message if $DEBUG
1167
- raise
1411
+ line.chars.each do |c|
1412
+ break if remove_width && col >= remove_width
1413
+ case c
1414
+ when " " then
1415
+ col += 1
1416
+ when "\t" then
1417
+ n = TAB_WIDTH * (col / TAB_WIDTH + 1)
1418
+ break if remove_width && n > remove_width
1419
+ col = n
1420
+ else
1421
+ break
1422
+ end
1423
+ idx += 1
1424
+ end
1425
+
1426
+ if remove_width then
1427
+ line[idx..-1]
1428
+ else
1429
+ col
1430
+ end
1168
1431
  end
1169
1432
 
1433
+ alias remove_whitespace_width whitespace_width
1434
+
1170
1435
  class Keyword
1436
+ include RubyLexer::State::Values
1437
+
1171
1438
  class KWtable
1172
1439
  attr_accessor :name, :state, :id0, :id1
1173
1440
  def initialize(name, id=[], state=nil)
@@ -1195,49 +1462,53 @@ module RubyParserStuff
1195
1462
  # :expr_fitem = symbol literal as FNAME.
1196
1463
  # :expr_value = :expr_beg -- work to remove. Need multi-state support.
1197
1464
 
1465
+ expr_woot = EXPR_FNAME|EXPR_FITEM
1466
+
1198
1467
  wordlist = [
1199
- ["alias", [:kALIAS, :kALIAS ], :expr_fname ],
1200
- ["and", [:kAND, :kAND ], :expr_beg ],
1201
- ["begin", [:kBEGIN, :kBEGIN ], :expr_beg ],
1202
- ["break", [:kBREAK, :kBREAK ], :expr_mid ],
1203
- ["case", [:kCASE, :kCASE ], :expr_beg ],
1204
- ["class", [:kCLASS, :kCLASS ], :expr_class ],
1205
- ["def", [:kDEF, :kDEF ], :expr_fname ],
1206
- ["defined?", [:kDEFINED, :kDEFINED ], :expr_arg ],
1207
- ["do", [:kDO, :kDO ], :expr_beg ],
1208
- ["else", [:kELSE, :kELSE ], :expr_beg ],
1209
- ["elsif", [:kELSIF, :kELSIF ], :expr_beg ],
1210
- ["end", [:kEND, :kEND ], :expr_end ],
1211
- ["ensure", [:kENSURE, :kENSURE ], :expr_beg ],
1212
- ["false", [:kFALSE, :kFALSE ], :expr_end ],
1213
- ["for", [:kFOR, :kFOR ], :expr_beg ],
1214
- ["if", [:kIF, :kIF_MOD ], :expr_beg ],
1215
- ["in", [:kIN, :kIN ], :expr_beg ],
1216
- ["module", [:kMODULE, :kMODULE ], :expr_beg ],
1217
- ["next", [:kNEXT, :kNEXT ], :expr_mid ],
1218
- ["nil", [:kNIL, :kNIL ], :expr_end ],
1219
- ["not", [:kNOT, :kNOT ], :expr_arg ],
1220
- ["or", [:kOR, :kOR ], :expr_beg ],
1221
- ["redo", [:kREDO, :kREDO ], :expr_end ],
1222
- ["rescue", [:kRESCUE, :kRESCUE_MOD ], :expr_mid ],
1223
- ["retry", [:kRETRY, :kRETRY ], :expr_end ],
1224
- ["return", [:kRETURN, :kRETURN ], :expr_mid ],
1225
- ["self", [:kSELF, :kSELF ], :expr_end ],
1226
- ["super", [:kSUPER, :kSUPER ], :expr_arg ],
1227
- ["then", [:kTHEN, :kTHEN ], :expr_beg ],
1228
- ["true", [:kTRUE, :kTRUE ], :expr_end ],
1229
- ["undef", [:kUNDEF, :kUNDEF ], :expr_fname ],
1230
- ["unless", [:kUNLESS, :kUNLESS_MOD ], :expr_beg ],
1231
- ["until", [:kUNTIL, :kUNTIL_MOD ], :expr_beg ],
1232
- ["when", [:kWHEN, :kWHEN ], :expr_beg ],
1233
- ["while", [:kWHILE, :kWHILE_MOD ], :expr_beg ],
1234
- ["yield", [:kYIELD, :kYIELD ], :expr_arg ],
1235
- ["BEGIN", [:klBEGIN, :klBEGIN ], :expr_end ],
1236
- ["END", [:klEND, :klEND ], :expr_end ],
1237
- ["__FILE__", [:k__FILE__, :k__FILE__ ], :expr_end ],
1238
- ["__LINE__", [:k__LINE__, :k__LINE__ ], :expr_end ],
1239
- ["__ENCODING__", [:k__ENCODING__, :k__ENCODING__], :expr_end],
1240
- ].map { |args| KWtable.new(*args) }
1468
+ ["alias", [:kALIAS, :kALIAS ], expr_woot ],
1469
+ ["and", [:kAND, :kAND ], EXPR_BEG ],
1470
+ ["begin", [:kBEGIN, :kBEGIN ], EXPR_BEG ],
1471
+ ["break", [:kBREAK, :kBREAK ], EXPR_MID ],
1472
+ ["case", [:kCASE, :kCASE ], EXPR_BEG ],
1473
+ ["class", [:kCLASS, :kCLASS ], EXPR_CLASS ],
1474
+ ["def", [:kDEF, :kDEF ], EXPR_FNAME ],
1475
+ ["defined?", [:kDEFINED, :kDEFINED ], EXPR_ARG ],
1476
+ ["do", [:kDO, :kDO ], EXPR_BEG ],
1477
+ ["else", [:kELSE, :kELSE ], EXPR_BEG ],
1478
+ ["elsif", [:kELSIF, :kELSIF ], EXPR_BEG ],
1479
+ ["end", [:kEND, :kEND ], EXPR_END ],
1480
+ ["ensure", [:kENSURE, :kENSURE ], EXPR_BEG ],
1481
+ ["false", [:kFALSE, :kFALSE ], EXPR_END ],
1482
+ ["for", [:kFOR, :kFOR ], EXPR_BEG ],
1483
+ ["if", [:kIF, :kIF_MOD ], EXPR_BEG ],
1484
+ ["in", [:kIN, :kIN ], EXPR_BEG ],
1485
+ ["module", [:kMODULE, :kMODULE ], EXPR_BEG ],
1486
+ ["next", [:kNEXT, :kNEXT ], EXPR_MID ],
1487
+ ["nil", [:kNIL, :kNIL ], EXPR_END ],
1488
+ ["not", [:kNOT, :kNOT ], EXPR_ARG ],
1489
+ ["or", [:kOR, :kOR ], EXPR_BEG ],
1490
+ ["redo", [:kREDO, :kREDO ], EXPR_END ],
1491
+ ["rescue", [:kRESCUE, :kRESCUE_MOD ], EXPR_MID ],
1492
+ ["retry", [:kRETRY, :kRETRY ], EXPR_END ],
1493
+ ["return", [:kRETURN, :kRETURN ], EXPR_MID ],
1494
+ ["self", [:kSELF, :kSELF ], EXPR_END ],
1495
+ ["super", [:kSUPER, :kSUPER ], EXPR_ARG ],
1496
+ ["then", [:kTHEN, :kTHEN ], EXPR_BEG ],
1497
+ ["true", [:kTRUE, :kTRUE ], EXPR_END ],
1498
+ ["undef", [:kUNDEF, :kUNDEF ], expr_woot ],
1499
+ ["unless", [:kUNLESS, :kUNLESS_MOD ], EXPR_BEG ],
1500
+ ["until", [:kUNTIL, :kUNTIL_MOD ], EXPR_BEG ],
1501
+ ["when", [:kWHEN, :kWHEN ], EXPR_BEG ],
1502
+ ["while", [:kWHILE, :kWHILE_MOD ], EXPR_BEG ],
1503
+ ["yield", [:kYIELD, :kYIELD ], EXPR_ARG ],
1504
+ ["BEGIN", [:klBEGIN, :klBEGIN ], EXPR_END ],
1505
+ ["END", [:klEND, :klEND ], EXPR_END ],
1506
+ ["__FILE__", [:k__FILE__, :k__FILE__ ], EXPR_END ],
1507
+ ["__LINE__", [:k__LINE__, :k__LINE__ ], EXPR_END ],
1508
+ ["__ENCODING__", [:k__ENCODING__, :k__ENCODING__], EXPR_END],
1509
+ ].map { |args|
1510
+ KWtable.new(*args)
1511
+ }
1241
1512
 
1242
1513
  # :startdoc:
1243
1514
 
@@ -1304,11 +1575,6 @@ module RubyParserStuff
1304
1575
  @debug = debug
1305
1576
  end
1306
1577
 
1307
- def reset
1308
- @stack = [false]
1309
- log :reset if debug
1310
- end
1311
-
1312
1578
  def inspect
1313
1579
  "StackState(#{@name}, #{@stack.inspect})"
1314
1580
  end
@@ -1345,16 +1611,21 @@ module RubyParserStuff
1345
1611
  log :push if debug
1346
1612
  end
1347
1613
 
1348
- def store base = false
1349
- result = @stack.dup
1350
- @stack.replace [base]
1351
- log :store if debug
1352
- result
1614
+ def reset
1615
+ @stack = [false]
1616
+ log :reset if debug
1353
1617
  end
1354
1618
 
1355
1619
  def restore oldstate
1356
1620
  @stack.replace oldstate
1357
1621
  log :restore if debug
1358
1622
  end
1623
+
1624
+ def store base = false
1625
+ result = @stack.dup
1626
+ @stack.replace [base]
1627
+ log :store if debug
1628
+ result
1629
+ end
1359
1630
  end
1360
1631
  end