ruby_parser 3.13.1 → 3.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.autotest +18 -29
  4. data/History.rdoc +312 -0
  5. data/Manifest.txt +16 -15
  6. data/README.rdoc +13 -9
  7. data/Rakefile +237 -106
  8. data/bin/ruby_parse +3 -1
  9. data/bin/ruby_parse_extract_error +9 -4
  10. data/compare/normalize.rb +54 -6
  11. data/debugging.md +172 -0
  12. data/gauntlet.md +107 -0
  13. data/lib/rp_extensions.rb +15 -36
  14. data/lib/rp_stringscanner.rb +20 -51
  15. data/lib/ruby_lexer.rb +515 -812
  16. data/lib/ruby_lexer.rex +33 -27
  17. data/lib/ruby_lexer.rex.rb +64 -31
  18. data/lib/ruby_lexer_strings.rb +638 -0
  19. data/lib/ruby_parser.rb +46 -36
  20. data/lib/{ruby_parser.yy → ruby_parser2.yy} +1400 -488
  21. data/lib/ruby_parser20.rb +10953 -0
  22. data/lib/ruby_parser21.rb +10978 -0
  23. data/lib/ruby_parser22.rb +11119 -0
  24. data/lib/ruby_parser23.rb +11160 -0
  25. data/lib/ruby_parser24.rb +11209 -0
  26. data/lib/ruby_parser25.rb +11209 -0
  27. data/lib/ruby_parser26.rb +11231 -0
  28. data/lib/ruby_parser27.rb +12960 -0
  29. data/lib/{ruby26_parser.y → ruby_parser3.yy} +1652 -521
  30. data/lib/ruby_parser30.rb +13292 -0
  31. data/lib/ruby_parser31.rb +13625 -0
  32. data/lib/ruby_parser32.rb +13577 -0
  33. data/lib/ruby_parser33.rb +13577 -0
  34. data/lib/ruby_parser_extras.rb +988 -474
  35. data/test/test_ruby_lexer.rb +1339 -1155
  36. data/test/test_ruby_parser.rb +4255 -2103
  37. data/test/test_ruby_parser_extras.rb +39 -4
  38. data/tools/munge.rb +52 -13
  39. data/tools/ripper.rb +24 -6
  40. data.tar.gz.sig +0 -0
  41. metadata +73 -56
  42. metadata.gz.sig +0 -0
  43. data/lib/ruby20_parser.rb +0 -6869
  44. data/lib/ruby20_parser.y +0 -2431
  45. data/lib/ruby21_parser.rb +0 -6944
  46. data/lib/ruby21_parser.y +0 -2449
  47. data/lib/ruby22_parser.rb +0 -6968
  48. data/lib/ruby22_parser.y +0 -2458
  49. data/lib/ruby23_parser.rb +0 -6987
  50. data/lib/ruby23_parser.y +0 -2460
  51. data/lib/ruby24_parser.rb +0 -6994
  52. data/lib/ruby24_parser.y +0 -2466
  53. data/lib/ruby25_parser.rb +0 -6994
  54. data/lib/ruby25_parser.y +0 -2466
  55. data/lib/ruby26_parser.rb +0 -7012
@@ -1,4 +1,6 @@
1
1
  # encoding: ASCII-8BIT
2
+ # frozen_string_literal: true
3
+ # TODO: remove encoding comment
2
4
 
3
5
  require "sexp"
4
6
  require "ruby_lexer"
@@ -6,12 +8,49 @@ require "timeout"
6
8
  require "rp_extensions"
7
9
  require "rp_stringscanner"
8
10
 
11
+ class Sexp
12
+ def check_line_numbers
13
+ raise "bad nil line for:\n%s" % [self.pretty_inspect] if nil_line?
14
+ raise "bad line number for:\n%s" % [self.pretty_inspect] unless
15
+ Integer === self.line &&
16
+ self.line >= 1 &&
17
+ self.line <= self.line_min
18
+ end
19
+
20
+ ##
21
+ # Returns the minimum line number of the children of self.
22
+
23
+ def line_min
24
+ @line_min ||= [self.deep_each.map(&:line).min, self.line].compact.min
25
+ end
26
+
27
+ def nil_line?
28
+ self.deep_each.map(&:line).any?(&:nil?)
29
+ end
30
+ end
31
+
9
32
  module RubyParserStuff
10
- VERSION = "3.13.1"
33
+ VERSION = "3.21.0"
11
34
 
12
- attr_accessor :lexer, :in_def, :in_single, :file
35
+ attr_accessor :lexer, :in_def, :in_single, :file, :in_argdef
13
36
  attr_accessor :in_kwarg
14
- attr_reader :env, :comments
37
+ attr_reader :env
38
+
39
+ ##
40
+ # Canonicalize conditionals. Eg:
41
+ #
42
+ # not x ? a : b
43
+ #
44
+ # becomes:
45
+ #
46
+ # x ? b : a
47
+
48
+ attr_accessor :canonicalize_conditions
49
+
50
+ ##
51
+ # The last token type returned from #next_token
52
+
53
+ attr_accessor :last_token_type
15
54
 
16
55
  $good20 = []
17
56
 
@@ -31,6 +70,29 @@ module RubyParserStuff
31
70
  end
32
71
  end
33
72
 
73
+ ##
74
+ # for pure ruby systems only
75
+
76
+ def do_parse
77
+ _racc_do_parse_rb(_racc_setup, false)
78
+ end if ENV["PURE_RUBY"] || ENV["CHECK_LINE_NUMS"]
79
+
80
+ if ENV["CHECK_LINE_NUMS"] then
81
+ def _racc_do_reduce arg, act
82
+ x = super
83
+
84
+ @racc_vstack.grep(Sexp).each do |sexp|
85
+ sexp.check_line_numbers
86
+ end
87
+ x
88
+ end
89
+ end
90
+
91
+ ARG_TYPES = [:arglist, :call_args, :array, :args].map { |k|
92
+ [k, true]
93
+ }.to_h
94
+
95
+ # TODO: remove
34
96
  has_enc = "".respond_to? :encoding
35
97
 
36
98
  # This is in sorted order of occurrence according to
@@ -48,91 +110,79 @@ module RubyParserStuff
48
110
  Encoding::EUC_JP
49
111
  ] if has_enc
50
112
 
51
- def syntax_error msg
52
- raise RubyParser::SyntaxError, msg
53
- end
113
+ JUMP_TYPE = [:return, :next, :break, :yield].map { |k| [k, true] }.to_h
54
114
 
55
- def arg_blk_pass node1, node2 # TODO: nuke
56
- node1 = s(:arglist, node1) unless [:arglist, :call_args, :array, :args].include? node1.sexp_type
57
- node1 << node2 if node2
58
- node1
115
+ TAB_WIDTH = 8
116
+
117
+ def initialize(options = {})
118
+ super()
119
+
120
+ v = self.class.name[/[23]\d/]
121
+ raise "Bad Class name #{self.class}" unless v
122
+
123
+ self.lexer = RubyLexer.new v && v.to_i
124
+ self.lexer.parser = self
125
+ self.in_kwarg = false
126
+ self.in_argdef = false
127
+
128
+ @env = RubyParserStuff::Environment.new
129
+
130
+ @canonicalize_conditions = true
131
+
132
+ self.reset
59
133
  end
60
134
 
61
135
  def arg_concat node1, node2 # TODO: nuke
62
136
  raise "huh" unless node2
63
- node1 << s(:splat, node2).compact
64
- node1
65
- end
66
137
 
67
- def clean_mlhs sexp
68
- case sexp.sexp_type
69
- when :masgn then
70
- if sexp.size == 2 and sexp[1].sexp_type == :array then
71
- s(:masgn, *sexp[1].sexp_body.map { |sub| clean_mlhs sub })
72
- else
73
- debug20 5
74
- sexp
75
- end
76
- when :gasgn, :iasgn, :lasgn, :cvasgn then
77
- if sexp.size == 2 then
78
- sexp.last
79
- else
80
- debug20 7
81
- sexp # optional value
82
- end
83
- else
84
- raise "unsupported type: #{sexp.inspect}"
85
- end
138
+ splat = s(:splat, node2)
139
+ splat.line node2.line
140
+
141
+ node1 << splat
86
142
  end
87
143
 
88
- def block_var *args
89
- result = self.args args
90
- result.sexp_type = :masgn
91
- result
144
+ def argl x
145
+ x = s(:arglist, x) if x and x.sexp_type == :array
146
+ x
92
147
  end
93
148
 
94
- def array_to_hash array
95
- case array.sexp_type
96
- when :kwsplat then
97
- array
149
+ def args args
150
+ result = s(:args)
151
+
152
+ ss = args.grep Sexp
153
+ if ss.empty? then
154
+ result.line lexer.lineno
98
155
  else
99
- s(:hash, *array.sexp_body)
156
+ result.line ss.first.line
157
+ result.line_max = ss.first.line_max
100
158
  end
101
- end
102
-
103
- def call_args args
104
- result = s(:call_args)
105
159
 
106
160
  args.each do |arg|
107
- case arg
108
- when Sexp then
109
- case arg.sexp_type
110
- when :array, :args, :call_args then # HACK? remove array at some point
111
- result.concat arg.sexp_body
112
- else
113
- result << arg
114
- end
115
- when Symbol then
116
- result << arg
117
- when ",", nil then
118
- # ignore
119
- else
120
- raise "unhandled: #{arg.inspect} in #{args.inspect}"
161
+ if arg.instance_of? Array and arg.size == 2 and arg.last.is_a? Numeric then
162
+ arg = arg.first
121
163
  end
122
- end
123
164
 
124
- result
125
- end
126
-
127
- def args args
128
- result = s(:args)
129
-
130
- args.each do |arg|
131
165
  case arg
132
166
  when Sexp then
133
167
  case arg.sexp_type
134
168
  when :args, :block, :array, :call_args then # HACK call_args mismatch
135
- result.concat arg.sexp_body
169
+ rest = arg.sexp_body
170
+
171
+ rest.map! { |x|
172
+ if x.instance_of? Array and x.size == 2 and Numeric === x.last then
173
+ x.first
174
+ else
175
+ x
176
+ end
177
+ }
178
+
179
+ result.concat rest
180
+ when :forward_args then
181
+ self.env[:*] = :lvar # TODO: arg_var(p, idFWD_REST) ?
182
+ self.env[:**] = :lvar
183
+ self.env[:&] = :lvar
184
+
185
+ result << arg
136
186
  when :block_arg then
137
187
  result << :"&#{arg.last}"
138
188
  when :shadow then
@@ -152,6 +202,8 @@ module RubyParserStuff
152
202
  name = arg.to_s.delete("&*")
153
203
  self.env[name.to_sym] = :lvar unless name.empty?
154
204
  result << arg
205
+ when true, false then
206
+ self.in_kwarg = arg
155
207
  when ",", "|", ";", "(", ")", nil then
156
208
  # ignore
157
209
  else
@@ -162,21 +214,50 @@ module RubyParserStuff
162
214
  result
163
215
  end
164
216
 
217
+ def end_args args
218
+ lexer.lex_state = RubyLexer::State::Values::EXPR_BEG
219
+ lexer.command_start = true
220
+ self.args args
221
+ end
222
+
223
+ def attrset_id? id
224
+ id =~ /^\[\]=$|^\w+=$/
225
+ end
226
+
227
+ def endless_method_name defn_or_defs
228
+ name = defn_or_defs[1]
229
+ name = defn_or_defs[2] unless Symbol === name
230
+
231
+ if attrset_id? name then
232
+ yyerror "setter method cannot be defined in an endless method definition"
233
+ end
234
+
235
+ # TODO? token_info_drop(p, "def", loc->beg_pos);
236
+ end
237
+
238
+ def array_to_hash array
239
+ case array.sexp_type
240
+ when :kwsplat then
241
+ array
242
+ else
243
+ s(:hash, *array.sexp_body).line array.line
244
+ end
245
+ end
246
+
165
247
  def aryset receiver, index
166
248
  index ||= s()
167
- s(:attrasgn, receiver, :"[]=", *index.sexp_body).compact # [].sexp_body => nil
249
+ l = receiver.line
250
+ result = s(:attrasgn, receiver, :"[]=",
251
+ *index.sexp_body).compact # [].sexp_body => nil
252
+ result.line = l
253
+ result
168
254
  end
169
255
 
170
256
  def assignable(lhs, value = nil)
171
- id = lhs.to_sym unless Sexp === lhs
172
- id = id.to_sym if Sexp === id
173
-
174
- raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
175
-
176
- raise SyntaxError, "Can't change the value of #{id}" if
177
- id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
257
+ id, line = lhs
258
+ id = id.to_sym
178
259
 
179
- result = case id.to_s
260
+ result = case id
180
261
  when /^@@/ then
181
262
  asgn = in_def || in_single > 0
182
263
  s((asgn ? :cvasgn : :cvdecl), id)
@@ -198,8 +279,22 @@ module RubyParserStuff
198
279
  self.env[id] ||= :lvar if result.sexp_type == :lasgn
199
280
 
200
281
  result << value if value
282
+ result.line line
283
+ result
284
+ end
201
285
 
202
- return result
286
+ def backref_assign_error ref
287
+ # TODO: need a test for this... obviously
288
+ case ref.sexp_type
289
+ when :nth_ref then
290
+ raise "write a test 2"
291
+ raise SyntaxError, "Can't set variable %p" % ref.last
292
+ when :back_ref then
293
+ raise "write a test 3"
294
+ raise SyntaxError, "Can't set back reference %p" % ref.last
295
+ else
296
+ raise "Unknown backref type: #{ref.inspect}"
297
+ end
203
298
  end
204
299
 
205
300
  def block_append(head, tail)
@@ -209,12 +304,81 @@ module RubyParserStuff
209
304
  line = [head.line, tail.line].compact.min
210
305
 
211
306
  head = remove_begin(head)
212
- head = s(:block, head) unless head.node_type == :block
307
+ head = s(:block, head).line(line) unless head.sexp_type == :block
213
308
 
214
- head.line = line
309
+ # head.line = line
215
310
  head << tail
216
311
  end
217
312
 
313
+ def block_dup_check call_or_args, block
314
+ syntax_error "Both block arg and actual block given." if
315
+ block and call_or_args.block_pass?
316
+ end
317
+
318
+ def block_var *args
319
+ result = self.args args
320
+ result.sexp_type = :masgn
321
+ result
322
+ end
323
+
324
+ def call_args args
325
+ result = s(:call_args)
326
+
327
+ a = args.grep(Sexp).first
328
+ if a then
329
+ result.line a.line
330
+ else
331
+ result.line lexer.lineno
332
+ end
333
+
334
+ args.each do |arg|
335
+ # ruby 3.0+ TODO: next if arg in [String, Integer] # eg ["(", 1]
336
+ next if arg.class == Array && arg.map(&:class) == [String, Integer]
337
+
338
+ case arg
339
+ when Sexp then
340
+ case arg.sexp_type
341
+ when :array, :args, :call_args then # HACK? remove array at some point
342
+ result.concat arg.sexp_body
343
+ else
344
+ result << arg
345
+ end
346
+ when Symbol then
347
+ result << arg
348
+ when Array then
349
+ id, _line = arg
350
+ result << id
351
+ when ",", nil, "(" then
352
+ # ignore
353
+ else
354
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
355
+ end
356
+ end
357
+
358
+ result
359
+ end
360
+
361
+ def clean_mlhs sexp
362
+ case sexp.sexp_type
363
+ when :masgn then
364
+ if sexp.size == 2 and sexp[1].sexp_type == :array then
365
+ s(:masgn, *sexp[1].sexp_body.map { |sub| clean_mlhs sub })
366
+ else
367
+ debug20 5
368
+ sexp
369
+ end
370
+ when :gasgn, :iasgn, :lasgn, :cvasgn then
371
+ if sexp.size == 2 then
372
+ sexp.last
373
+ else
374
+ debug20 7
375
+ sexp # optional value
376
+ end
377
+ else
378
+ raise "unsupported type: #{sexp.inspect}"
379
+ end
380
+ end
381
+
218
382
  def cond node
219
383
  return nil if node.nil?
220
384
  node = value_expr node
@@ -222,60 +386,116 @@ module RubyParserStuff
222
386
  case node.sexp_type
223
387
  when :lit then
224
388
  if Regexp === node.last then
225
- return s(:match, node)
389
+ s(:match, node)
226
390
  else
227
- return node
391
+ node
228
392
  end
229
393
  when :and then
230
- return s(:and, cond(node[1]), cond(node[2]))
394
+ _, lhs, rhs = node
395
+ s(:and, cond(lhs), cond(rhs))
231
396
  when :or then
232
- return s(:or, cond(node[1]), cond(node[2]))
397
+ _, lhs, rhs = node
398
+ s(:or, cond(lhs), cond(rhs))
233
399
  when :dot2 then
234
400
  label = "flip#{node.hash}"
235
401
  env[label] = :lvar
236
402
  _, lhs, rhs = node
237
- return s(:flip2, lhs, rhs)
403
+ s(:flip2, lhs, rhs) # TODO: recurse?
238
404
  when :dot3 then
239
405
  label = "flip#{node.hash}"
240
406
  env[label] = :lvar
241
407
  _, lhs, rhs = node
242
- return s(:flip3, lhs, rhs)
408
+ s(:flip3, lhs, rhs)
243
409
  else
244
- return node
245
- end
410
+ node
411
+ end.line node.line
246
412
  end
247
413
 
248
- ##
249
- # for pure ruby systems only
414
+ def dedent sexp
415
+ dedent_count = dedent_size sexp
250
416
 
251
- def do_parse
252
- _racc_do_parse_rb(_racc_setup, false)
253
- end if ENV['PURE_RUBY']
417
+ skip_one = false
418
+ sexp.map { |obj|
419
+ case obj
420
+ when Symbol then
421
+ obj
422
+ when String then
423
+ obj.lines.map { |l| remove_whitespace_width l, dedent_count }.join
424
+ when Sexp then
425
+ case obj.sexp_type
426
+ when :evstr then
427
+ skip_one = true
428
+ obj
429
+ when :str then
430
+ _, str = obj
431
+ str = if skip_one then
432
+ skip_one = false
433
+ s1, *rest = str.lines
434
+ s1 + rest.map { |l| remove_whitespace_width l, dedent_count }.join
435
+ else
436
+ str.lines.map { |l| remove_whitespace_width l, dedent_count }.join
437
+ end
254
438
 
255
- def new_match lhs, rhs
256
- if lhs then
257
- case lhs.sexp_type
258
- when :dregx, :dregx_once then
259
- return s(:match2, lhs, rhs).line(lhs.line)
260
- when :lit then
261
- return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
439
+ s(:str, str).line obj.line
440
+ else
441
+ warn "unprocessed sexp %p" % [obj]
442
+ end
443
+ else
444
+ warn "unprocessed: %p" % [obj]
262
445
  end
263
- end
446
+ }
447
+ end
264
448
 
265
- if rhs then
266
- case rhs.sexp_type
267
- when :dregx, :dregx_once then
268
- return s(:match3, rhs, lhs).line(lhs.line)
269
- when :lit then
270
- return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
449
+ def dedent_size sexp
450
+ skip_one = false
451
+ sexp.flat_map { |s|
452
+ case s
453
+ when Symbol then
454
+ next
455
+ when String then
456
+ s.lines
457
+ when Sexp then
458
+ case s.sexp_type
459
+ when :evstr then
460
+ skip_one = true
461
+ next
462
+ when :str then
463
+ _, str = s
464
+ lines = str.lines
465
+ if skip_one then
466
+ skip_one = false
467
+ lines.shift
468
+ end
469
+ lines
470
+ else
471
+ warn "unprocessed sexp %p" % [s]
472
+ end
473
+ else
474
+ warn "unprocessed: %p" % [s]
475
+ end.map { |l| whitespace_width l }
476
+ }.compact.min
477
+ end
478
+
479
+ def dedent_string string, width
480
+ characters_skipped = 0
481
+ indentation_skipped = 0
482
+
483
+ string.chars.each do |char|
484
+ break if indentation_skipped >= width
485
+ if char == " "
486
+ characters_skipped += 1
487
+ indentation_skipped += 1
488
+ elsif char == "\t"
489
+ proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
490
+ break if proposed > width
491
+ characters_skipped += 1
492
+ indentation_skipped = proposed
271
493
  end
272
494
  end
273
-
274
- return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
495
+ string[characters_skipped..-1]
275
496
  end
276
497
 
277
498
  def gettable(id)
278
- lineno = id.lineno if id.respond_to? :lineno
279
499
  id = id.to_sym if String === id
280
500
 
281
501
  result = case id.to_s
@@ -296,40 +516,99 @@ module RubyParserStuff
296
516
  end
297
517
  end
298
518
 
299
- result.line lineno if lineno
300
-
301
519
  raise "identifier #{id.inspect} is not valid" unless result
302
520
 
303
521
  result
304
522
  end
305
523
 
524
+ def hack_encoding str, extra = nil
525
+ encodings = ENCODING_ORDER.dup
526
+ encodings.unshift(extra) unless extra.nil?
527
+
528
+ # terrible, horrible, no good, very bad, last ditch effort.
529
+ encodings.each do |enc|
530
+ begin
531
+ str.force_encoding enc
532
+ if str.valid_encoding? then
533
+ str.encode! Encoding::UTF_8
534
+ break
535
+ end
536
+ rescue ArgumentError # unknown encoding name
537
+ # do nothing
538
+ rescue Encoding::InvalidByteSequenceError
539
+ # do nothing
540
+ rescue Encoding::UndefinedConversionError
541
+ # do nothing
542
+ end
543
+ end
544
+
545
+ # no amount of pain is enough for you.
546
+ raise "Bad encoding. Need a magic encoding comment." unless
547
+ str.encoding.name == "UTF-8"
548
+ end
549
+
306
550
  ##
307
- # Canonicalize conditionals. Eg:
308
- #
309
- # not x ? a : b
551
+ # Returns a UTF-8 encoded string after processing BOMs and magic
552
+ # encoding comments.
310
553
  #
311
- # becomes:
554
+ # Holy crap... ok. Here goes:
312
555
  #
313
- # x ? b : a
556
+ # Ruby's file handling and encoding support is insane. We need to be
557
+ # able to lex a file. The lexer file is explicitly UTF-8 to make
558
+ # things cleaner. This allows us to deal with extended chars in
559
+ # class and method names. In order to do this, we need to encode all
560
+ # input source files as UTF-8. First, we look for a UTF-8 BOM by
561
+ # looking at the first line while forcing its encoding to
562
+ # ASCII-8BIT. If we find a BOM, we strip it and set the expected
563
+ # encoding to UTF-8. Then, we search for a magic encoding comment.
564
+ # If found, it overrides the BOM. Finally, we force the encoding of
565
+ # the input string to whatever was found, and then encode that to
566
+ # UTF-8 for compatibility with the lexer.
567
+
568
+ def handle_encoding str
569
+ str = str.dup
570
+ has_enc = str.respond_to? :encoding # TODO: remove
571
+ encoding = nil
572
+
573
+ header = str.each_line.first(2)
574
+ header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
575
+
576
+ first = header.first || ""
577
+ encoding, str = +"utf-8", str.b[3..-1] if first =~ /\A\xEF\xBB\xBF/
314
578
 
315
- attr_accessor :canonicalize_conditions
579
+ encoding = $1.strip if header.find { |s|
580
+ s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
581
+ s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
582
+ }
316
583
 
317
- def initialize(options = {})
318
- super()
584
+ if encoding then
585
+ if has_enc then
586
+ encoding.sub!(/utf-8-.+$/, "utf-8") # HACK for stupid emacs formats
587
+ hack_encoding str, encoding
588
+ else
589
+ warn "Skipping magic encoding comment"
590
+ end
591
+ else
592
+ # nothing specified... ugh. try to encode as utf-8
593
+ hack_encoding str if has_enc
594
+ end
319
595
 
320
- v = self.class.name[/2\d/]
321
- raise "Bad Class name #{self.class}" unless v
596
+ str
597
+ end
322
598
 
323
- self.lexer = RubyLexer.new v && v.to_i
324
- self.lexer.parser = self
325
- self.in_kwarg = false
599
+ def invert_block_call val
600
+ ret, iter = val
601
+ type, call = ret
326
602
 
327
- @env = RubyParserStuff::Environment.new
328
- @comments = []
603
+ iter.insert 1, call
329
604
 
330
- @canonicalize_conditions = true
605
+ ret = s(type).line ret.line
331
606
 
332
- self.reset
607
+ [iter, ret]
608
+ end
609
+
610
+ def inverted? val
611
+ JUMP_TYPE[val[0].sexp_type]
333
612
  end
334
613
 
335
614
  def list_append list, item # TODO: nuke me *sigh*
@@ -350,12 +629,14 @@ module RubyParserStuff
350
629
 
351
630
  htype, ttype = head.sexp_type, tail.sexp_type
352
631
 
353
- head = s(:dstr, '', head) if htype == :evstr
632
+ head = s(:dstr, "", head).line head.line if htype == :evstr
354
633
 
355
634
  case ttype
356
635
  when :str then
357
636
  if htype == :str
358
- head.last << tail.last
637
+ a, b = head.last, tail.last
638
+ b = b.dup.force_encoding a.encoding unless Encoding.compatible?(a, b)
639
+ a << b
359
640
  elsif htype == :dstr and head.size == 2 then
360
641
  head.last << tail.last
361
642
  else
@@ -369,8 +650,8 @@ module RubyParserStuff
369
650
  head.line = lineno
370
651
  else
371
652
  tail.sexp_type = :array
372
- tail[1] = s(:str, tail[1])
373
- tail.delete_at 1 if tail[1] == s(:str, '')
653
+ tail[1] = s(:str, tail[1]).line tail.line
654
+ tail.delete_at 1 if tail[1] == s(:str, "")
374
655
 
375
656
  head.push(*tail.sexp_body)
376
657
  end
@@ -396,6 +677,13 @@ module RubyParserStuff
396
677
  return head
397
678
  end
398
679
 
680
+ def local_pop in_def
681
+ lexer.cond.pop # group = local_pop
682
+ lexer.cmdarg.pop
683
+ self.env.unextend
684
+ self.in_def = in_def
685
+ end
686
+
399
687
  def logical_op type, left, right
400
688
  left = value_expr left
401
689
 
@@ -408,12 +696,15 @@ module RubyParserStuff
408
696
  node = rhs
409
697
  end
410
698
 
411
- node[2] = s(type, rhs, right)
699
+ node.pop
700
+ node << s(type, rhs, right).line(rhs.line)
412
701
 
413
702
  return left
414
703
  end
415
704
 
416
- return s(type, left, right)
705
+ result = s(type, left, right)
706
+ result.line left.line if left.line
707
+ result
417
708
  end
418
709
 
419
710
  def new_aref val
@@ -422,6 +713,126 @@ module RubyParserStuff
422
713
  new_call val[0], :"[]", val[2]
423
714
  end
424
715
 
716
+ def new_arg val
717
+ arg, = val
718
+
719
+ case arg
720
+ when Symbol then
721
+ result = s(:args, arg).line line
722
+ when Sexp then
723
+ result = arg
724
+ when Array then
725
+ (arg, line), = val
726
+ result = s(:args, arg).line line
727
+ else
728
+ debug20 32
729
+ raise "Unknown f_arg type: #{val.inspect}"
730
+ end
731
+
732
+ result
733
+ end
734
+
735
+ def ary_to_pat ary
736
+ pat = ary.dup
737
+ pat.sexp_type = :array_TAIL
738
+
739
+ new_array_pattern nil, nil, pat, ary.line
740
+ end
741
+
742
+ def new_array_pattern const, pre_arg, arypat, loc
743
+ result = s(:array_pat, const).line loc
744
+ result << pre_arg if pre_arg
745
+
746
+ if arypat && arypat.sexp_type == :array_TAIL then
747
+ result.concat arypat.sexp_body
748
+ else
749
+ raise "NO?: %p" % [arypat]
750
+ end
751
+
752
+ result
753
+ end
754
+
755
+ def array_pat_concat lhs, rhs
756
+ case lhs.sexp_type
757
+ when :PATTERN then
758
+ lhs.sexp_type = :array_pat
759
+ end
760
+
761
+ if rhs then
762
+ case rhs.sexp_type
763
+ when :array_pat, :array_TAIL, :PATTERN then
764
+ lhs.concat rhs.sexp_body
765
+ else
766
+ lhs << rhs
767
+ end
768
+ end
769
+ end
770
+
771
+ def new_array_pattern_tail pre_args, has_rest, rest_arg, post_args
772
+ # TODO: remove has_rest once all tests pass !!!
773
+ rest_arg = if has_rest then
774
+ :"*#{rest_arg}"
775
+ else
776
+ nil
777
+ end
778
+
779
+ result = s(:array_TAIL).line 666
780
+
781
+ array_pat_concat result, pre_args
782
+
783
+ result << rest_arg if rest_arg
784
+
785
+ array_pat_concat result, post_args
786
+
787
+ result
788
+ end
789
+
790
+ def new_assign lhs, rhs
791
+ return nil unless lhs
792
+
793
+ rhs = value_expr rhs
794
+
795
+ case lhs.sexp_type
796
+ when :lasgn, :iasgn, :cdecl, :cvdecl, :gasgn, :cvasgn, :attrasgn, :safe_attrasgn then
797
+ lhs << rhs
798
+ lhs.line_max = rhs.line_max
799
+ when :const then
800
+ lhs.sexp_type = :cdecl
801
+ lhs << rhs
802
+ else
803
+ raise "unknown lhs #{lhs.inspect} w/ #{rhs.inspect}"
804
+ end
805
+
806
+ lhs
807
+ end
808
+
809
+ def new_attrasgn recv, meth, call_op = :"."
810
+ call_op = call_op.first if Array === call_op
811
+
812
+ meth = :"#{meth}="
813
+
814
+ result = case call_op.to_sym
815
+ when :"."
816
+ s(:attrasgn, recv, meth)
817
+ when :"&."
818
+ s(:safe_attrasgn, recv, meth)
819
+ else
820
+ raise "unknown call operator: `#{type.inspect}`"
821
+ end
822
+
823
+ result.line = recv.line
824
+ result
825
+ end
826
+
827
+ def new_begin val
828
+ (_, line), _, body, _ = val
829
+
830
+ result = body ? s(:begin, body) : s(:nil)
831
+ result.line line
832
+
833
+ result
834
+ end
835
+
425
836
  def new_body val
426
837
  body, resbody, elsebody, ensurebody = val
427
838
 
@@ -445,43 +856,29 @@ module RubyParserStuff
445
856
 
446
857
  if elsebody and not resbody then
447
858
  warning("else without rescue is useless")
448
- result = s(:begin, result) if result
859
+ result = s(:begin, result).line result.line if result
449
860
  result = block_append(result, elsebody)
450
861
  end
451
862
 
452
- result = s(:ensure, result, ensurebody).compact if ensurebody
863
+ if ensurebody
864
+ lineno = (result || ensurebody).line
865
+ result = s(:ensure, result, ensurebody).compact.line lineno
866
+ end
453
867
 
454
868
  result
455
869
  end
456
870
 
457
871
  def new_brace_body args, body, lineno
458
- new_iter(nil, args, body).line(lineno)
872
+ new_iter(nil, args, body).line lineno
459
873
  end
460
874
 
461
- def argl x
462
- x = s(:arglist, x) if x and x.sexp_type == :array
463
- x
464
- end
465
-
466
- def backref_assign_error ref
467
- # TODO: need a test for this... obviously
468
- case ref.sexp_type
469
- when :nth_ref then
470
- raise "write a test 2"
471
- raise SyntaxError, "Can't set variable %p" % ref.last
472
- when :back_ref then
473
- raise "write a test 3"
474
- raise SyntaxError, "Can't set back reference %p" % ref.last
475
- else
476
- raise "Unknown backref type: #{ref.inspect}"
477
- end
478
- end
875
+ def new_call recv, meth, args = nil, call_op = :"."
876
+ call_op = call_op.first if Array === call_op
479
877
 
480
- def new_call recv, meth, args = nil, call_op = :'.'
481
878
  result = case call_op.to_sym
482
- when :'.'
879
+ when :"."
483
880
  s(:call, recv, meth)
484
- when :'&.'
881
+ when :"&."
485
882
  s(:safe_call, recv, meth)
486
883
  else
487
884
  raise "unknown call operator: `#{type.inspect}`"
@@ -490,40 +887,30 @@ module RubyParserStuff
490
887
  # TODO: need a test with f(&b) to produce block_pass
491
888
  # TODO: need a test with f(&b) { } to produce warning
492
889
 
493
- if args
494
- if [:arglist, :args, :array, :call_args].include? args.sexp_type
890
+ if args then
891
+ if ARG_TYPES[args.sexp_type] then
495
892
  result.concat args.sexp_body
496
893
  else
497
894
  result << args
498
895
  end
896
+ result.line_max = args.line_max
499
897
  end
500
898
 
501
- line = result.grep(Sexp).map(&:line).compact.min
502
- result.line = line if line
899
+ # line = result.grep(Sexp).map(&:line).compact.min
900
+ result.line = recv.line if recv
901
+ result.line ||= lexer.lineno
503
902
 
504
903
  result
505
904
  end
506
905
 
507
- def new_attrasgn recv, meth, call_op
508
- meth = :"#{meth}="
509
-
510
- result = case call_op.to_sym
511
- when :'.'
512
- s(:attrasgn, recv, meth)
513
- when :'&.'
514
- s(:safe_attrasgn, recv, meth)
515
- else
516
- raise "unknown call operator: `#{type.inspect}`"
517
- end
518
-
519
- result.line = recv.line
520
- result
906
+ def new_in pat, body, cases, line
907
+ s(:in, pat, body, cases).line line
521
908
  end
522
909
 
523
910
  def new_case expr, body, line
524
911
  result = s(:case, expr)
525
912
 
526
- while body and body.node_type == :when
913
+ while body and [:when, :in].include? body.sexp_type
527
914
  result << body
528
915
  body = body.delete_at 3
529
916
  end
@@ -542,7 +929,9 @@ module RubyParserStuff
542
929
  end
543
930
 
544
931
  def new_class val
545
- line, path, superclass, body = val[1], val[2], val[3], val[5]
932
+ (_, line, comment), path, superclass, _, body, (_, line_max) = val
933
+
934
+ path = path.first if path.instance_of? Array
546
935
 
547
936
  result = s(:class, path, superclass)
548
937
 
@@ -555,7 +944,8 @@ module RubyParserStuff
555
944
  end
556
945
 
557
946
  result.line = line
558
- result.comments = self.comments.pop
947
+ result.line_max = line_max
948
+ result.comments = comment if comment
559
949
  result
560
950
  end
561
951
 
@@ -565,50 +955,147 @@ module RubyParserStuff
565
955
  result
566
956
  end
567
957
 
958
+ def new_const_op_asgn val
959
+ lhs, (asgn_op, _), rhs = val
960
+ asgn_op = asgn_op.to_sym
961
+
962
+ result = case asgn_op
963
+ when :"||" then
964
+ s(:op_asgn_or, lhs, rhs)
965
+ when :"&&" then
966
+ s(:op_asgn_and, lhs, rhs)
967
+ else
968
+ s(:op_asgn, lhs, asgn_op, rhs)
969
+ end
970
+
971
+ result.line = lhs.line
972
+ result
973
+ end
974
+
568
975
  def new_defn val
569
- (_, line), (name, _), _, args, body, * = val
570
- body ||= s(:nil)
976
+ if val.size == 4 then
977
+ ((_, line, comment), (name, _line, in_def)), args, body, (_, line_max) = val
978
+ else
979
+ (_, line, comment), (name, line), in_def, args, body, (_, line_max) = val
980
+ end
571
981
 
572
- result = s(:defn, name.to_sym, args)
982
+ body ||= s(:nil).line line
573
983
 
574
- if body then
575
- if body.sexp_type == :block then
576
- result.push(*body.sexp_body)
984
+ args.line line
985
+
986
+ result = s(:defn, name.to_sym, args).line line
987
+ result.line_max = line_max
988
+
989
+ if body.sexp_type == :block then
990
+ result.push(*body.sexp_body)
991
+ else
992
+ result.push body
993
+ end
994
+
995
+ result.comments = comment if comment
996
+
997
+ [result, in_def]
998
+ end
999
+
1000
+ def new_endless_defn val
1001
+ # not available in 2.x so we don't need to check size
1002
+ ((_, line, comment), (name, _, in_def)), args, _, body, _, resbody = val
1003
+
1004
+ result =
1005
+ if resbody then
1006
+ s(:defn, name, args,
1007
+ new_rescue(body,
1008
+ new_resbody(s(:array).line(line),
1009
+ resbody))).line line
577
1010
  else
578
- result.push body
1011
+ s(:defn, name, args, body).line line
579
1012
  end
580
- end
581
1013
 
582
- args.line line
583
- result.line = line
584
- result.comments = self.comments.pop
1014
+ local_pop in_def
1015
+ endless_method_name result
1016
+
1017
+ result.comments = comment if comment
585
1018
 
586
1019
  result
587
1020
  end
588
1021
 
589
- def new_defs val
590
- recv, (name, _line), args, body = val[1], val[4], val[6], val[7]
591
- body ||= s(:nil)
592
-
593
- result = s(:defs, recv, name.to_sym, args)
1022
+ def new_endless_defs val
1023
+ # not available in 2.x so we don't need to check size
1024
+ ((_, line, comment), recv, _, _, (name, line, in_def)), \
1025
+ args, _, body, _, resbody = val
594
1026
 
595
- if body then
596
- if body.sexp_type == :block then
597
- result.push(*body.sexp_body)
1027
+ result =
1028
+ if resbody then
1029
+ s(:defs, recv, name, args,
1030
+ new_rescue(body,
1031
+ new_resbody(s(:array).line(line),
1032
+ resbody))).line line
598
1033
  else
599
- result.push body
1034
+ s(:defs, recv, name, args, body).line(line)
600
1035
  end
601
- end
602
1036
 
603
- result.line = recv.line
604
- result.comments = self.comments.pop
1037
+ self.in_single -= 1
1038
+ local_pop in_def
1039
+ endless_method_name result
1040
+
1041
+ result.comments = comment if comment
1042
+
605
1043
  result
606
1044
  end
607
1045
 
1046
+ def new_defs val
1047
+ if val.size == 4 then
1048
+ ((_, line, comment), recv, _, _, (name, line, in_def)), \
1049
+ args, body, (_, line_max) = val
1050
+ else
1051
+ (_, line, comment), recv, (name, _), in_def, \
1052
+ args, body, (_, line_max) = val
1053
+ end
1054
+
1055
+ body ||= s(:nil).line line
1056
+
1057
+ args.line line
1058
+
1059
+ result = s(:defs, recv, name.to_sym, args).line line
1060
+ result.line_max = line_max
1061
+
1062
+ # TODO: remove_begin
1063
+ # TODO: reduce_nodes
1064
+
1065
+ if body.sexp_type == :block then
1066
+ result.push(*body.sexp_body)
1067
+ else
1068
+ result.push body
1069
+ end
1070
+
1071
+ result.comments = comment if comment
1072
+
1073
+ [result, in_def]
1074
+ end
1075
+
608
1076
  def new_do_body args, body, lineno
609
1077
  new_iter(nil, args, body).line(lineno)
610
1078
  end
611
1079
 
1080
+ def new_find_pattern const, pat
1081
+ pat.sexp_type = :find_pat
1082
+ pat.insert 1, const
1083
+ end
1084
+
1085
+ def new_find_pattern_tail lhs, mid, rhs
1086
+ lhs_id, line = lhs
1087
+ rhs_id, _line = rhs
1088
+
1089
+ # TODO: fpinfo->pre_rest_arg = pre_rest_arg ? assignable(p, pre_rest_arg, 0, loc) : NODE_SPECIAL_NO_NAME_REST;
1090
+
1091
+ lhs_id = "*#{lhs_id}".to_sym
1092
+ rhs_id = "*#{rhs_id}".to_sym
1093
+
1094
+ raise "BAD?" unless mid.sexp_type == :array_TAIL
1095
+
1096
+ s(:find_pat_TAIL, lhs_id, *mid.sexp_body, rhs_id).line line
1097
+ end
1098
+
612
1099
  def new_for expr, var, body
613
1100
  result = s(:for, expr, var).line(var.line)
614
1101
  result << body if body
@@ -616,7 +1103,49 @@ module RubyParserStuff
616
1103
  end
617
1104
 
618
1105
  def new_hash val
619
- s(:hash, *val[2].values).line(val[1])
1106
+ _, line, assocs = val
1107
+
1108
+ s(:hash).line(line).concat assocs.sexp_body
1109
+ end
1110
+
1111
+ def new_hash_pattern const, hash_pat, loc
1112
+ _, pat, kw_args, kw_rest_arg = hash_pat
1113
+
1114
+ line = (const||hash_pat).line
1115
+
1116
+ result = s(:hash_pat, const).line line
1117
+ result.concat pat.sexp_body if pat
1118
+ result << kw_args if kw_args
1119
+ result << kw_rest_arg if kw_rest_arg
1120
+ result
1121
+ end
1122
+
1123
+ def new_hash_pattern_tail kw_args, kw_rest_arg, line # TODO: remove line arg
1124
+ # kw_rest_arg = assignable(kw_rest_arg, nil).line line if kw_rest_arg
1125
+
1126
+ result = s(:hash_pat).line line
1127
+ result << kw_args
1128
+
1129
+ if kw_rest_arg then
1130
+ name = kw_rest_arg.value
1131
+ # TODO: I _hate_ this:
1132
+ assignable [name, kw_rest_arg.line] if name != :**
1133
+ result << kw_rest_arg
1134
+ end
1135
+
1136
+ result
1137
+ end
1138
+
1139
+ def push_pktbl
1140
+ end
1141
+
1142
+ def pop_pktbl
1143
+ end
1144
+
1145
+ def push_pvtbl
1146
+ end
1147
+
1148
+ def pop_pvtbl
620
1149
  end
621
1150
 
622
1151
  def new_if c, t, f
@@ -637,33 +1166,68 @@ module RubyParserStuff
637
1166
  result << args
638
1167
  result << body if body
639
1168
 
640
- args.sexp_type = :args unless args == 0
1169
+ result.line call.line if call
641
1170
 
642
- result
643
- end
1171
+ unless args == 0 then
1172
+ args.line call.line if call
1173
+ args.sexp_type = :args
1174
+ end
644
1175
 
645
- def new_masgn_arg rhs, wrap = false
646
- rhs = value_expr(rhs)
647
- rhs = s(:to_ary, rhs) if wrap # HACK: could be array if lhs isn't right
648
- rhs
1176
+ result
649
1177
  end
650
1178
 
651
1179
  def new_masgn lhs, rhs, wrap = false
652
1180
  _, ary = lhs
653
1181
 
1182
+ line = rhs.line
654
1183
  rhs = value_expr(rhs)
655
1184
  rhs = ary ? s(:to_ary, rhs) : s(:array, rhs) if wrap
1185
+ rhs.line line if wrap
656
1186
 
657
1187
  lhs.delete_at 1 if ary.nil?
658
1188
  lhs << rhs
659
1189
 
660
- lhs
1190
+ lhs
1191
+ end
1192
+
1193
+ def new_masgn_arg rhs, wrap = false
1194
+ rhs = value_expr(rhs)
1195
+ # HACK: could be array if lhs isn't right
1196
+ rhs = s(:to_ary, rhs).line rhs.line if wrap
1197
+ rhs
1198
+ end
1199
+
1200
+ def new_match lhs, rhs
1201
+ if lhs then
1202
+ case lhs.sexp_type
1203
+ when :dregx, :dregx_once then
1204
+ # TODO: no test coverage
1205
+ return s(:match2, lhs, rhs).line(lhs.line)
1206
+ when :lit then
1207
+ return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
1208
+ end
1209
+ end
1210
+
1211
+ if rhs then
1212
+ case rhs.sexp_type
1213
+ when :dregx, :dregx_once then
1214
+ # TODO: no test coverage
1215
+ return s(:match3, rhs, lhs).line(lhs.line)
1216
+ when :lit then
1217
+ return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
1218
+ end
1219
+ end
1220
+
1221
+ new_call(lhs, :"=~", argl(rhs)).line lhs.line
661
1222
  end
662
1223
 
663
1224
  def new_module val
664
- line, path, body = val[1], val[2], val[4]
1225
+ (_, line_min, comment), path, _, body, (_, line_max) = val
665
1226
 
666
- result = s(:module, path)
1227
+ path = path.first if path.instance_of? Array
1228
+
1229
+ result = s(:module, path).line line_min
1230
+ result.line_max = line_max
667
1231
 
668
1232
  if body then # REFACTOR?
669
1233
  if body.sexp_type == :block then
@@ -673,55 +1237,49 @@ module RubyParserStuff
673
1237
  end
674
1238
  end
675
1239
 
676
- result.line = line
677
- result.comments = self.comments.pop
1240
+ result.comments = comment if comment
678
1241
  result
679
1242
  end
680
1243
 
681
1244
  def new_op_asgn val
682
- lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
683
- name = lhs.value
684
- arg = remove_begin(arg)
685
- result = case asgn_op # REFACTOR
1245
+ lhs, (op, _line), rhs = val
1246
+ op = op.to_sym
1247
+
1248
+ name = gettable(lhs.last).line lhs.line
1249
+ arg = remove_begin rhs
1250
+ result = case op # REFACTOR
686
1251
  when :"||" then
687
1252
  lhs << arg
688
- s(:op_asgn_or, self.gettable(name), lhs)
1253
+ s(:op_asgn_or, name, lhs).line lhs.line
689
1254
  when :"&&" then
690
1255
  lhs << arg
691
- s(:op_asgn_and, self.gettable(name), lhs)
1256
+ s(:op_asgn_and, name, lhs).line lhs.line
692
1257
  else
693
- # TODO: why [2] ?
694
- lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
1258
+ lhs << new_call(name, op, argl(arg))
695
1259
  lhs
696
1260
  end
697
- result.line = lhs.line
1261
+
698
1262
  result
699
1263
  end
700
1264
 
701
- def new_const_op_asgn val
702
- lhs, asgn_op, rhs = val[0], val[1].to_sym, val[2]
1265
+ def new_op_asgn1 val
1266
+ lhs, _, args, _, (op, _), rhs = val
703
1267
 
704
- result = case asgn_op
705
- when :"||" then
706
- s(:op_asgn_or, lhs, rhs)
707
- when :"&&" then
708
- s(:op_asgn_and, lhs, rhs)
709
- else
710
- s(:op_asgn, lhs, asgn_op, rhs)
711
- end
1268
+ args.sexp_type = :arglist if args
712
1269
 
713
- result.line = lhs.line
1270
+ result = s(:op_asgn1, lhs, args, op.to_sym, rhs)
1271
+ result.line lhs.line
714
1272
  result
715
1273
  end
716
1274
 
717
1275
  def new_op_asgn2 val
718
- recv, call_op, meth, op, arg = val
1276
+ recv, (call_op, _), (meth, _), (op, _), arg = val
719
1277
  meth = :"#{meth}="
720
1278
 
721
1279
  result = case call_op.to_sym
722
- when :'.'
1280
+ when :"."
723
1281
  s(:op_asgn2, recv, meth, op.to_sym, arg)
724
- when :'&.'
1282
+ when :"&."
725
1283
  s(:safe_op_asgn2, recv, meth, op.to_sym, arg)
726
1284
  else
727
1285
  raise "unknown call operator: `#{type.inspect}`"
@@ -731,21 +1289,42 @@ module RubyParserStuff
731
1289
  result
732
1290
  end
733
1291
 
1292
+ def new_qsym_list
1293
+ s(:array).line lexer.lineno
1294
+ end
1295
+
1296
+ def new_qsym_list_entry val
1297
+ _, (str, line), _ = val
1298
+ s(:lit, str.to_sym).line line
1299
+ end
1300
+
1301
+ def new_qword_list
1302
+ s(:array).line lexer.lineno
1303
+ end
1304
+
1305
+ def new_qword_list_entry val
1306
+ _, (str, line), _ = val
1307
+ str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
1308
+ s(:str, str).line line
1309
+ end
1310
+
734
1311
  def new_regexp val
735
- node = val[1] || s(:str, '')
736
- options = val[2]
1312
+ (_, line), node, (options, line_max) = val
1313
+
1314
+ node ||= s(:str, "").line line
1315
+ node.line_max = line_max
737
1316
 
738
1317
  o, k = 0, nil
739
1318
  options.split(//).uniq.each do |c| # FIX: this has a better home
740
1319
  v = {
741
- 'x' => Regexp::EXTENDED,
742
- 'i' => Regexp::IGNORECASE,
743
- 'm' => Regexp::MULTILINE,
744
- 'o' => Regexp::ONCE,
745
- 'n' => Regexp::ENC_NONE,
746
- 'e' => Regexp::ENC_EUC,
747
- 's' => Regexp::ENC_SJIS,
748
- 'u' => Regexp::ENC_UTF8,
1320
+ "x" => Regexp::EXTENDED,
1321
+ "i" => Regexp::IGNORECASE,
1322
+ "m" => Regexp::MULTILINE,
1323
+ "o" => Regexp::ONCE,
1324
+ "n" => Regexp::ENC_NONE,
1325
+ "e" => Regexp::ENC_EUC,
1326
+ "s" => Regexp::ENC_SJIS,
1327
+ "u" => Regexp::ENC_UTF8,
749
1328
  }[c]
750
1329
  raise "unknown regexp option: #{c}" unless v
751
1330
  o += v
@@ -760,12 +1339,12 @@ module RubyParserStuff
760
1339
  begin
761
1340
  Regexp.new(node[1], o)
762
1341
  rescue RegexpError => e
763
- warn "WA\RNING: #{e.message} for #{node[1].inspect} #{options.inspect}"
1342
+ warn "WARNING: #{e.message} for #{node[1].inspect} #{options.inspect}"
764
1343
  begin
765
- warn "WA\RNING: trying to recover with ENC_UTF8"
1344
+ warn "WARNING: trying to recover with ENC_UTF8"
766
1345
  Regexp.new(node[1], Regexp::ENC_UTF8)
767
1346
  rescue RegexpError => e
768
- warn "WA\RNING: trying to recover with ENC_NONE"
1347
+ warn "WARNING: trying to recover with ENC_NONE"
769
1348
  Regexp.new(node[1], Regexp::ENC_NONE)
770
1349
  end
771
1350
  end
@@ -778,7 +1357,7 @@ module RubyParserStuff
778
1357
  end
779
1358
  node << o if o and o != 0
780
1359
  else
781
- node = s(:dregx, '', node);
1360
+ node = s(:dregx, "", node).line line
782
1361
  node.sexp_type = :dregx_once if options =~ /o/
783
1362
  node << o if o and o != 0
784
1363
  end
@@ -786,21 +1365,22 @@ module RubyParserStuff
786
1365
  node
787
1366
  end
788
1367
 
789
- def new_rescue body, resbody
790
- s(:rescue, body, resbody)
791
- end
792
-
793
1368
  def new_resbody cond, body
794
1369
  if body && body.sexp_type == :block then
795
1370
  body.shift # remove block and splat it in directly
796
1371
  else
797
1372
  body = [body]
798
1373
  end
1374
+
799
1375
  s(:resbody, cond, *body).line cond.line
800
1376
  end
801
1377
 
1378
+ def new_rescue body, resbody
1379
+ s(:rescue, body, resbody).line body.line
1380
+ end
1381
+
802
1382
  def new_sclass val
803
- recv, in_def, in_single, body = val[3], val[4], val[6], val[7]
1383
+ (_, line), _, recv, in_def, _, in_single, body, _ = val
804
1384
 
805
1385
  result = s(:sclass, recv)
806
1386
 
@@ -812,97 +1392,62 @@ module RubyParserStuff
812
1392
  end
813
1393
  end
814
1394
 
815
- result.line = val[2]
1395
+ result.line = line
816
1396
  self.in_def = in_def
817
1397
  self.in_single = in_single
818
1398
  result
819
1399
  end
820
1400
 
821
1401
  def new_string val
822
- str = val[0]
823
- str.force_encoding("UTF-8")
824
- str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
825
- result = s(:str, str)
826
- self.lexer.fixup_lineno str.count("\n")
827
- result
828
- end
1402
+ (str, line), = val
829
1403
 
830
- def new_qword_list_entry val
831
- str = val[1]
1404
+ str.force_encoding("UTF-8")
1405
+ # TODO: remove:
832
1406
  str.force_encoding("ASCII-8BIT") unless str.valid_encoding?
833
- result = s(:str, str)
834
- self.lexer.fixup_lineno
835
- result
836
- end
837
-
838
- def new_qword_list
839
- result = s(:array)
840
- self.lexer.fixup_lineno
841
- result
842
- end
843
-
844
- def new_word_list
845
- result = s(:array)
846
- self.lexer.fixup_lineno
847
- result
848
- end
849
-
850
- def new_word_list_entry val
851
- result = val[1].sexp_type == :evstr ? s(:dstr, "", val[1]) : val[1]
852
- self.lexer.fixup_lineno
853
- result
1407
+ s(:str, str).line line
854
1408
  end
855
1409
 
856
- def new_qsym_list
857
- result = s(:array)
858
- self.lexer.fixup_lineno
859
- result
1410
+ def new_super args
1411
+ if args && args.sexp_type == :block_pass then
1412
+ s(:super, args).line args.line
1413
+ else
1414
+ args ||= s(:arglist).line lexer.lineno
1415
+ s(:super, *args.sexp_body).line args.line
1416
+ end
860
1417
  end
861
1418
 
862
- def new_qsym_list_entry val
863
- result = s(:lit, val[1].to_sym)
864
- self.lexer.fixup_lineno
865
- result
1419
+ def new_symbol val
1420
+ name = val.last
1421
+ s(:lit, name.to_sym).line lexer.lineno
866
1422
  end
867
1423
 
868
1424
  def new_symbol_list
869
- result = s(:array)
870
- self.lexer.fixup_lineno
871
- result
1425
+ # TODO: hunt down and try to remove ALL lexer.lineno usage!
1426
+ s(:array).line lexer.lineno
872
1427
  end
873
1428
 
874
1429
  def new_symbol_list_entry val
875
- _list, sym, _nil = val # TODO: use _list
876
- result = val[1]
1430
+ _, sym, _ = val
877
1431
 
878
- result ||= s(:str, "")
1432
+ sym ||= s(:str, "").line lexer.lineno
879
1433
 
880
1434
  case sym.sexp_type
881
1435
  when :dstr then
882
1436
  sym.sexp_type = :dsym
883
1437
  when :str then
884
- sym = s(:lit, sym.last.to_sym)
1438
+ sym = s(:lit, sym.last.to_sym).line sym.line
885
1439
  else
886
- sym = s(:dsym, "", sym || s(:str, ""))
1440
+ sym = s(:dsym, "", sym).line sym.line
887
1441
  end
888
- self.lexer.fixup_lineno
889
- sym
890
- end
891
1442
 
892
- def new_super args
893
- if args && args.node_type == :block_pass then
894
- s(:super, args)
895
- else
896
- args ||= s(:arglist)
897
- s(:super, *args.sexp_body)
898
- end
1443
+ sym
899
1444
  end
900
1445
 
901
1446
  def new_undef n, m = nil
902
1447
  if m then
903
- block_append(n, s(:undef, m))
1448
+ block_append(n, s(:undef, m).line(m.line))
904
1449
  else
905
- s(:undef, n)
1450
+ s(:undef, n).line n.line
906
1451
  end
907
1452
  end
908
1453
 
@@ -935,135 +1480,83 @@ module RubyParserStuff
935
1480
  new_until_or_while :while, block, expr, pre
936
1481
  end
937
1482
 
938
- def new_xstring str
939
- if str then
940
- case str.sexp_type
1483
+ def new_word_list
1484
+ s(:array).line lexer.lineno
1485
+ end
1486
+
1487
+ def new_word_list_entry val
1488
+ _, word, _ = val
1489
+ word.sexp_type == :evstr ? s(:dstr, "", word).line(word.line) : word
1490
+ end
1491
+
1492
+ def new_xstring val
1493
+ _, node = val
1494
+
1495
+ node ||= s(:str, "").line lexer.lineno
1496
+
1497
+ if node then
1498
+ case node.sexp_type
941
1499
  when :str
942
- str.sexp_type = :xstr
1500
+ node.sexp_type = :xstr
943
1501
  when :dstr
944
- str.sexp_type = :dxstr
1502
+ node.sexp_type = :dxstr
945
1503
  else
946
- str = s(:dxstr, '', str)
1504
+ node = s(:dxstr, "", node).line node.line
947
1505
  end
948
- str
949
- else
950
- s(:xstr, '')
951
1506
  end
1507
+
1508
+ node
952
1509
  end
953
1510
 
954
1511
  def new_yield args = nil
955
1512
  # TODO: raise args.inspect unless [:arglist].include? args.first # HACK
956
- raise "write a test 4" if args && args.node_type == :block_pass
1513
+ raise "write a test 4" if args && args.sexp_type == :block_pass
957
1514
  raise SyntaxError, "Block argument should not be given." if
958
- args && args.node_type == :block_pass
1515
+ args && args.sexp_type == :block_pass
959
1516
 
960
- args ||= s(:arglist)
1517
+ args ||= s(:arglist).line lexer.lineno
961
1518
 
962
1519
  args.sexp_type = :arglist if [:call_args, :array].include? args.sexp_type
963
- args = s(:arglist, args) unless args.sexp_type == :arglist
964
-
965
- return s(:yield, *args.sexp_body)
966
- end
967
-
968
- def next_token
969
- token = self.lexer.next_token
1520
+ args = s(:arglist, args).line args.line unless args.sexp_type == :arglist
970
1521
 
971
- if token and token.first != RubyLexer::EOF then
972
- return token
973
- else
974
- return [false, '$end']
975
- end
1522
+ s(:yield, *args.sexp_body).line args.line
976
1523
  end
977
1524
 
978
- def new_assign lhs, rhs
979
- return nil unless lhs
980
-
981
- rhs = value_expr rhs
982
-
983
- case lhs.sexp_type
984
- when :lasgn, :iasgn, :cdecl, :cvdecl, :gasgn, :cvasgn, :attrasgn, :safe_attrasgn then
985
- lhs << rhs
986
- when :const then
987
- lhs.sexp_type = :cdecl
988
- lhs << rhs
1525
+ def prev_value_to_lineno v
1526
+ s, n = v
1527
+ if String === s then
1528
+ n
989
1529
  else
990
- raise "unknown lhs #{lhs.inspect} w/ #{rhs.inspect}"
1530
+ lexer.lineno
991
1531
  end
992
-
993
- lhs
994
1532
  end
995
1533
 
996
- ##
997
- # Returns a UTF-8 encoded string after processing BOMs and magic
998
- # encoding comments.
999
- #
1000
- # Holy crap... ok. Here goes:
1001
- #
1002
- # Ruby's file handling and encoding support is insane. We need to be
1003
- # able to lex a file. The lexer file is explicitly UTF-8 to make
1004
- # things cleaner. This allows us to deal with extended chars in
1005
- # class and method names. In order to do this, we need to encode all
1006
- # input source files as UTF-8. First, we look for a UTF-8 BOM by
1007
- # looking at the first line while forcing its encoding to
1008
- # ASCII-8BIT. If we find a BOM, we strip it and set the expected
1009
- # encoding to UTF-8. Then, we search for a magic encoding comment.
1010
- # If found, it overrides the BOM. Finally, we force the encoding of
1011
- # the input string to whatever was found, and then encode that to
1012
- # UTF-8 for compatibility with the lexer.
1013
-
1014
- def handle_encoding str
1015
- str = str.dup
1016
- has_enc = str.respond_to? :encoding
1017
- encoding = nil
1534
+ KEEP_COMMENT_TOKENS = [:kCLASS, :kMODULE, :kDEF, :tNL]
1018
1535
 
1019
- header = str.each_line.first(2)
1020
- header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
1536
+ def next_token
1537
+ token = self.lexer.next_token
1021
1538
 
1022
- first = header.first || ""
1023
- encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
1539
+ if token and token.first != RubyLexer::EOF then
1540
+ self.last_token_type = token
1024
1541
 
1025
- encoding = $1.strip if header.find { |s|
1026
- s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
1027
- s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
1028
- }
1542
+ self.lexer.comment = nil unless KEEP_COMMENT_TOKENS.include? token.first
1029
1543
 
1030
- if encoding then
1031
- if has_enc then
1032
- encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
1033
- hack_encoding str, encoding
1034
- else
1035
- warn "Skipping magic encoding comment"
1036
- end
1544
+ return token
1545
+ elsif !token
1546
+ return self.lexer.next_token
1037
1547
  else
1038
- # nothing specified... ugh. try to encode as utf-8
1039
- hack_encoding str if has_enc
1548
+ return [false, false]
1040
1549
  end
1041
-
1042
- str
1043
1550
  end
1044
1551
 
1045
- def hack_encoding str, extra = nil
1046
- encodings = ENCODING_ORDER.dup
1047
- encodings.unshift(extra) unless extra.nil?
1048
-
1049
- # terrible, horrible, no good, very bad, last ditch effort.
1050
- encodings.each do |enc|
1051
- begin
1052
- str.force_encoding enc
1053
- if str.valid_encoding? then
1054
- str.encode! Encoding::UTF_8
1055
- break
1056
- end
1057
- rescue Encoding::InvalidByteSequenceError
1058
- # do nothing
1059
- rescue Encoding::UndefinedConversionError
1060
- # do nothing
1061
- end
1062
- end
1063
-
1064
- # no amount of pain is enough for you.
1065
- raise "Bad encoding. Need a magic encoding comment." unless
1066
- str.encoding.name == "UTF-8"
1552
+ def on_error(et, ev, values)
1553
+ ev = ev.first if ev.instance_of?(Array) && ev.size == 2 && ev.last.is_a?(Integer)
1554
+ super
1555
+ rescue Racc::ParseError => e
1556
+ # I don't like how the exception obscures the error message
1557
+ e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
1558
+ warn e.message if $DEBUG
1559
+ raise
1067
1560
  end
1068
1561
 
1069
1562
  ##
@@ -1071,56 +1564,43 @@ module RubyParserStuff
1071
1564
  # Timeout::Error if it runs for more than +time+ seconds.
1072
1565
 
1073
1566
  def process(str, file = "(string)", time = 10)
1567
+ str.freeze
1568
+
1074
1569
  Timeout.timeout time do
1075
1570
  raise "bad val: #{str.inspect}" unless String === str
1076
1571
 
1077
- str = handle_encoding str
1572
+ self.lexer.string = handle_encoding str
1078
1573
 
1079
1574
  self.file = file.dup
1080
1575
 
1081
- @yydebug = ENV.has_key? 'DEBUG'
1082
-
1083
- # HACK -- need to get tests passing more than have graceful code
1084
- self.lexer.ss = RPStringScanner.new str
1576
+ @yydebug = ENV.has_key? "DEBUG"
1085
1577
 
1086
1578
  do_parse
1087
1579
  end
1088
1580
  end
1089
1581
 
1090
- alias :parse :process
1582
+ alias parse process
1091
1583
 
1092
1584
  def remove_begin node
1093
- oldnode = node
1094
- if node and node.sexp_type == :begin and node.size == 2 then
1095
- node = node.last
1096
- node.line = oldnode.line
1097
- end
1585
+ line = node.line
1586
+
1587
+ node = node.last while node and node.sexp_type == :begin and node.size == 2
1588
+
1589
+ node = s(:nil) if node == s(:begin)
1590
+
1591
+ node.line ||= line
1592
+
1098
1593
  node
1099
1594
  end
1100
1595
 
1596
+ alias value_expr remove_begin # TODO: for now..? could check the tree, but meh?
1597
+
1101
1598
  def reset
1102
1599
  lexer.reset
1103
1600
  self.in_def = false
1104
1601
  self.in_single = 0
1105
1602
  self.env.reset
1106
- self.comments.clear
1107
- end
1108
-
1109
- def block_dup_check call_or_args, block
1110
- syntax_error "Both block arg and actual block given." if
1111
- block and call_or_args.block_pass?
1112
- end
1113
-
1114
- def inverted? val
1115
- [:return, :next, :break, :yield].include? val[0].sexp_type
1116
- end
1117
-
1118
- def invert_block_call val
1119
- (type, call), iter = val
1120
-
1121
- iter.insert 1, call
1122
-
1123
- [iter, s(type)]
1603
+ self.last_token_type = nil
1124
1604
  end
1125
1605
 
1126
1606
  def ret_args node
@@ -1135,7 +1615,7 @@ module RubyParserStuff
1135
1615
 
1136
1616
  # HACK matz wraps ONE of the FOUR splats in a newline to
1137
1617
  # distinguish. I use paren for now. ugh
1138
- node = s(:svalue, node) if node.sexp_type == :splat and not node.paren
1618
+ node = s(:svalue, node).line node.line if node.sexp_type == :splat and not node.paren
1139
1619
  node.sexp_type = :svalue if node.sexp_type == :arglist && node[1].sexp_type == :splat
1140
1620
  end
1141
1621
 
@@ -1144,18 +1624,25 @@ module RubyParserStuff
1144
1624
 
1145
1625
  def s(*args)
1146
1626
  result = Sexp.new(*args)
1147
- result.line ||= lexer.lineno if lexer.ss # otherwise...
1627
+ # result.line ||= lexer.lineno if lexer.ss unless ENV["CHECK_LINE_NUMS"] # otherwise...
1148
1628
  result.file = self.file
1149
1629
  result
1150
1630
  end
1151
1631
 
1152
- def value_expr oldnode # HACK: much more to do
1153
- node = remove_begin oldnode
1154
- node.line = oldnode.line if oldnode
1155
- node[2] = value_expr node[2] if node and node.sexp_type == :if
1156
- node
1632
+ def debug n
1633
+ if ENV["PRY"] then
1634
+ require "pry"; binding.pry
1635
+ end
1636
+
1637
+ raise RubyParser::SyntaxError, "debug #{n}"
1638
+ end
1639
+
1640
+ def syntax_error msg
1641
+ raise RubyParser::SyntaxError, msg
1157
1642
  end
1158
1643
 
1644
+ alias yyerror syntax_error
1645
+
1159
1646
  def void_stmts node
1160
1647
  return nil unless node
1161
1648
  return node unless node.sexp_type == :block
@@ -1173,15 +1660,40 @@ module RubyParserStuff
1173
1660
  # do nothing for now
1174
1661
  end
1175
1662
 
1176
- alias yyerror syntax_error
1663
+ def whitespace_width line, remove_width = nil
1664
+ col = 0
1665
+ idx = 0
1177
1666
 
1178
- def on_error(et, ev, values)
1179
- super
1180
- rescue Racc::ParseError => e
1181
- # I don't like how the exception obscures the error message
1182
- e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
1183
- warn e.message if $DEBUG
1184
- raise
1667
+ line.chars.each do |c|
1668
+ break if remove_width && col >= remove_width
1669
+ case c
1670
+ when " " then
1671
+ col += 1
1672
+ when "\t" then
1673
+ n = TAB_WIDTH * (col / TAB_WIDTH + 1)
1674
+ break if remove_width && n > remove_width
1675
+ col = n
1676
+ else
1677
+ break
1678
+ end
1679
+ idx += 1
1680
+ end
1681
+
1682
+ if remove_width then
1683
+ line[idx..-1]
1684
+ elsif line[idx] == "\n"
1685
+ nil
1686
+ else
1687
+ col
1688
+ end
1689
+ end
1690
+
1691
+ alias remove_whitespace_width whitespace_width
1692
+
1693
+ def wrap type, node
1694
+ value, line = node
1695
+ value = value.to_sym if value.respond_to? :to_sym
1696
+ s(type, value).line line
1185
1697
  end
1186
1698
 
1187
1699
  class Keyword
@@ -1214,8 +1726,10 @@ module RubyParserStuff
1214
1726
  # :expr_fitem = symbol literal as FNAME.
1215
1727
  # :expr_value = :expr_beg -- work to remove. Need multi-state support.
1216
1728
 
1729
+ expr_woot = EXPR_FNAME|EXPR_FITEM
1730
+
1217
1731
  wordlist = [
1218
- ["alias", [:kALIAS, :kALIAS ], EXPR_FNAME|EXPR_FITEM],
1732
+ ["alias", [:kALIAS, :kALIAS ], expr_woot ],
1219
1733
  ["and", [:kAND, :kAND ], EXPR_BEG ],
1220
1734
  ["begin", [:kBEGIN, :kBEGIN ], EXPR_BEG ],
1221
1735
  ["break", [:kBREAK, :kBREAK ], EXPR_MID ],
@@ -1245,7 +1759,7 @@ module RubyParserStuff
1245
1759
  ["super", [:kSUPER, :kSUPER ], EXPR_ARG ],
1246
1760
  ["then", [:kTHEN, :kTHEN ], EXPR_BEG ],
1247
1761
  ["true", [:kTRUE, :kTRUE ], EXPR_END ],
1248
- ["undef", [:kUNDEF, :kUNDEF ], EXPR_FNAME|EXPR_FITEM],
1762
+ ["undef", [:kUNDEF, :kUNDEF ], expr_woot ],
1249
1763
  ["unless", [:kUNLESS, :kUNLESS_MOD ], EXPR_BEG ],
1250
1764
  ["until", [:kUNTIL, :kUNTIL_MOD ], EXPR_BEG ],
1251
1765
  ["when", [:kWHEN, :kWHEN ], EXPR_BEG ],
@@ -1325,11 +1839,6 @@ module RubyParserStuff
1325
1839
  @debug = debug
1326
1840
  end
1327
1841
 
1328
- def reset
1329
- @stack = [false]
1330
- log :reset if debug
1331
- end
1332
-
1333
1842
  def inspect
1334
1843
  "StackState(#{@name}, #{@stack.inspect})"
1335
1844
  end
@@ -1366,16 +1875,21 @@ module RubyParserStuff
1366
1875
  log :push if debug
1367
1876
  end
1368
1877
 
1369
- def store base = false
1370
- result = @stack.dup
1371
- @stack.replace [base]
1372
- log :store if debug
1373
- result
1878
+ def reset
1879
+ @stack = [false]
1880
+ log :reset if debug
1374
1881
  end
1375
1882
 
1376
1883
  def restore oldstate
1377
1884
  @stack.replace oldstate
1378
1885
  log :restore if debug
1379
1886
  end
1887
+
1888
+ def store base = false
1889
+ result = @stack.dup
1890
+ @stack.replace [base]
1891
+ log :store if debug
1892
+ result
1893
+ end
1380
1894
  end
1381
1895
  end