ruby_parser-legacy 1.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ruby_parser-legacy might be problematic. Click here for more details.

@@ -0,0 +1,30 @@
1
+ require "racc/parser"
2
+
3
+ class RubyParser
4
+ module Legacy
5
+ end
6
+ end
7
+
8
+ require "ruby_parser/legacy/ruby_parser_extras"
9
+ require "ruby_parser"
10
+
11
+ class RubyParser
12
+ module Legacy
13
+ class RubyParser < ::RubyParser::Parser
14
+ include ::RubyParser::Legacy::RubyParserStuff
15
+ end
16
+ end
17
+ end
18
+
19
+ require "ruby_parser/legacy/ruby19_parser"
20
+ require "ruby_parser/legacy/ruby18_parser"
21
+
22
+
23
+ class ::RubyParser # Plug into modern system
24
+ VERSIONS.delete ::RubyParser::Legacy::RubyParser
25
+ VERSIONS.delete Ruby19Parser
26
+ VERSIONS.delete Ruby18Parser
27
+
28
+ class V19 < ::Ruby19Parser; end
29
+ class V18 < ::Ruby18Parser; end
30
+ end
@@ -0,0 +1,1388 @@
1
+ # encoding: ASCII-8BIT
2
+
3
+ require "sexp"
4
+ require "ruby_parser/legacy/ruby_lexer"
5
+ require "timeout"
6
+ require "rp_extensions"
7
+ require "rp_stringscanner"
8
+
9
+ module RubyParser::Legacy::RubyParserStuff
10
+ attr_accessor :lexer, :in_def, :in_single, :file
11
+ attr_accessor :in_kwarg
12
+ attr_reader :env, :comments
13
+
14
+ $good20 = []
15
+
16
+ %w[
17
+ ].map(&:to_i).each do |n|
18
+ $good20[n] = n
19
+ end
20
+
21
+ def debug20 n, v = nil, r = nil
22
+ raise "not yet #{n} #{v.inspect} => #{r.inspect}" unless $good20[n]
23
+ end
24
+
25
+ def self.deprecate old, new
26
+ define_method old do |*args|
27
+ warn "DEPRECATED: #{old} -> #{new} from #{caller.first}"
28
+ send new, *args
29
+ end
30
+ end
31
+
32
+ has_enc = "".respond_to? :encoding
33
+
34
+ # This is in sorted order of occurrence according to
35
+ # charlock_holmes against 500k files, with UTF_8 forced
36
+ # to the top.
37
+ #
38
+ # Overwrite this contstant if you need something different.
39
+ ENCODING_ORDER = [
40
+ Encoding::UTF_8, # moved to top to reflect default in 2.0
41
+ Encoding::ISO_8859_1,
42
+ Encoding::ISO_8859_2,
43
+ Encoding::ISO_8859_9,
44
+ Encoding::SHIFT_JIS,
45
+ Encoding::WINDOWS_1252,
46
+ Encoding::EUC_JP
47
+ ] if has_enc
48
+
49
+ def syntax_error msg
50
+ raise RubyParser::SyntaxError, msg
51
+ end
52
+
53
+ def arg_blk_pass node1, node2 # TODO: nuke
54
+ node1 = s(:arglist, node1) unless [:arglist, :call_args, :array, :args].include? node1.sexp_type
55
+ node1 << node2 if node2
56
+ node1
57
+ end
58
+
59
+ def arg_concat node1, node2 # TODO: nuke
60
+ raise "huh" unless node2
61
+ node1 << s(:splat, node2).compact
62
+ node1
63
+ end
64
+
65
+ def clean_mlhs sexp
66
+ case sexp.sexp_type
67
+ when :masgn then
68
+ if sexp.size == 2 and sexp[1].sexp_type == :array then
69
+ s(:masgn, *sexp[1].sexp_body.map { |sub| clean_mlhs sub })
70
+ else
71
+ debug20 5
72
+ sexp
73
+ end
74
+ when :gasgn, :iasgn, :lasgn, :cvasgn then
75
+ if sexp.size == 2 then
76
+ sexp.last
77
+ else
78
+ debug20 7
79
+ sexp # optional value
80
+ end
81
+ else
82
+ raise "unsupported type: #{sexp.inspect}"
83
+ end
84
+ end
85
+
86
+ def block_var *args
87
+ result = self.args args
88
+ result.sexp_type = :masgn
89
+ result
90
+ end
91
+
92
+ def block_var18 ary, splat, block
93
+ ary ||= s(:array)
94
+
95
+ if splat then
96
+ splat = splat[1] unless Symbol === splat
97
+ ary << "*#{splat}".to_sym
98
+ end
99
+
100
+ ary << "&#{block[1]}".to_sym if block
101
+
102
+ if ary.length > 2 or ary.splat then # HACK
103
+ s(:masgn, *ary.sexp_body)
104
+ else
105
+ ary.last
106
+ end
107
+ end
108
+
109
+ def array_to_hash array
110
+ case array.sexp_type
111
+ when :kwsplat then
112
+ array
113
+ else
114
+ s(:hash, *array.sexp_body)
115
+ end
116
+ end
117
+
118
+ def call_args args
119
+ result = s(:call_args)
120
+
121
+ args.each do |arg|
122
+ case arg
123
+ when Sexp then
124
+ case arg.sexp_type
125
+ when :array, :args, :call_args then # HACK? remove array at some point
126
+ result.concat arg.sexp_body
127
+ else
128
+ result << arg
129
+ end
130
+ when Symbol then
131
+ result << arg
132
+ when ",", nil then
133
+ # ignore
134
+ else
135
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
136
+ end
137
+ end
138
+
139
+ result
140
+ end
141
+
142
+ def args args
143
+ result = s(:args)
144
+
145
+ args.each do |arg|
146
+ case arg
147
+ when Sexp then
148
+ case arg.sexp_type
149
+ when :args, :block, :array, :call_args then # HACK call_args mismatch
150
+ result.concat arg.sexp_body
151
+ when :block_arg then
152
+ result << :"&#{arg.last}"
153
+ when :shadow then
154
+ name = arg.last
155
+ self.env[name] = :lvar
156
+ if Sexp === result.last and result.last.sexp_type == :shadow then
157
+ result.last << name
158
+ else
159
+ result << arg
160
+ end
161
+ when :masgn, :block_pass, :hash then # HACK: remove. prolly call_args
162
+ result << arg
163
+ else
164
+ raise "unhandled: #{arg.sexp_type} in #{args.inspect}"
165
+ end
166
+ when Symbol then
167
+ name = arg.to_s.delete("&*")
168
+ self.env[name.to_sym] = :lvar unless name.empty?
169
+ result << arg
170
+ when ",", "|", ";", "(", ")", nil then
171
+ # ignore
172
+ else
173
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
174
+ end
175
+ end
176
+
177
+ result
178
+ end
179
+
180
+ def aryset receiver, index
181
+ index ||= s()
182
+ s(:attrasgn, receiver, :"[]=", *index.sexp_body).compact # [].sexp_body => nil
183
+ end
184
+
185
+ def assignable(lhs, value = nil)
186
+ id = lhs.to_sym unless Sexp === lhs
187
+ id = id.to_sym if Sexp === id
188
+
189
+ raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
190
+
191
+ raise SyntaxError, "Can't change the value of #{id}" if
192
+ id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
193
+
194
+ result = case id.to_s
195
+ when /^@@/ then
196
+ asgn = in_def || in_single > 0
197
+ s((asgn ? :cvasgn : :cvdecl), id)
198
+ when /^@/ then
199
+ s(:iasgn, id)
200
+ when /^\$/ then
201
+ s(:gasgn, id)
202
+ when /^[A-Z]/ then
203
+ s(:cdecl, id)
204
+ else
205
+ case self.env[id]
206
+ when :lvar, :dvar, nil then
207
+ s(:lasgn, id)
208
+ else
209
+ raise "wtf? unknown type: #{self.env[id]}"
210
+ end
211
+ end
212
+
213
+ self.env[id] ||= :lvar if result.sexp_type == :lasgn
214
+
215
+ result << value if value
216
+
217
+ return result
218
+ end
219
+
220
+ def block_append(head, tail)
221
+ return head if tail.nil?
222
+ return tail if head.nil?
223
+
224
+ line = [head.line, tail.line].compact.min
225
+
226
+ head = remove_begin(head)
227
+ head = s(:block, head) unless head.node_type == :block
228
+
229
+ head.line = line
230
+ head << tail
231
+ end
232
+
233
+ def cond node
234
+ return nil if node.nil?
235
+ node = value_expr node
236
+
237
+ case node.sexp_type
238
+ when :lit then
239
+ if Regexp === node.last then
240
+ return s(:match, node)
241
+ else
242
+ return node
243
+ end
244
+ when :and then
245
+ return s(:and, cond(node[1]), cond(node[2]))
246
+ when :or then
247
+ return s(:or, cond(node[1]), cond(node[2]))
248
+ when :dot2 then
249
+ label = "flip#{node.hash}"
250
+ env[label] = :lvar
251
+ _, lhs, rhs = node
252
+ return s(:flip2, lhs, rhs)
253
+ when :dot3 then
254
+ label = "flip#{node.hash}"
255
+ env[label] = :lvar
256
+ _, lhs, rhs = node
257
+ return s(:flip3, lhs, rhs)
258
+ else
259
+ return node
260
+ end
261
+ end
262
+
263
+ ##
264
+ # for pure ruby systems only
265
+
266
+ def do_parse
267
+ _racc_do_parse_rb(_racc_setup, false)
268
+ end if ENV['PURE_RUBY']
269
+
270
+ def new_match lhs, rhs
271
+ if lhs then
272
+ case lhs.sexp_type
273
+ when :dregx, :dregx_once then
274
+ return s(:match2, lhs, rhs).line(lhs.line)
275
+ when :lit then
276
+ return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
277
+ end
278
+ end
279
+
280
+ if rhs then
281
+ case rhs.sexp_type
282
+ when :dregx, :dregx_once then
283
+ return s(:match3, rhs, lhs).line(lhs.line)
284
+ when :lit then
285
+ return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
286
+ end
287
+ end
288
+
289
+ return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
290
+ end
291
+
292
+ # TODO: remove in 4.0 or 2018-01, whichever is first
293
+ deprecate :get_match_node, :new_match
294
+
295
+ def gettable(id)
296
+ lineno = id.lineno if id.respond_to? :lineno
297
+ id = id.to_sym if String === id
298
+
299
+ result = case id.to_s
300
+ when /^@@/ then
301
+ s(:cvar, id)
302
+ when /^@/ then
303
+ s(:ivar, id)
304
+ when /^\$/ then
305
+ s(:gvar, id)
306
+ when /^[A-Z]/ then
307
+ s(:const, id)
308
+ else
309
+ type = env[id]
310
+ if type then
311
+ s(type, id)
312
+ else
313
+ new_call(nil, id)
314
+ end
315
+ end
316
+
317
+ result.line lineno if lineno
318
+
319
+ raise "identifier #{id.inspect} is not valid" unless result
320
+
321
+ result
322
+ end
323
+
324
+ ##
325
+ # Canonicalize conditionals. Eg:
326
+ #
327
+ # not x ? a : b
328
+ #
329
+ # becomes:
330
+ #
331
+ # x ? b : a
332
+
333
+ attr_accessor :canonicalize_conditions
334
+
335
+ def initialize(options = {})
336
+ # no!: super()
337
+
338
+ v = self.class.name[/1[89]/]
339
+
340
+ self.lexer = RubyParser::Legacy::RubyLexer.new v && v.to_i
341
+ self.lexer.parser = self
342
+ self.in_kwarg = false
343
+
344
+ @env = RubyParser::Legacy::RubyParserStuff::Environment.new
345
+ @comments = []
346
+
347
+ @canonicalize_conditions = true
348
+
349
+ self.reset
350
+ end
351
+
352
+ def list_append list, item # TODO: nuke me *sigh*
353
+ return s(:array, item) unless list
354
+ list = s(:array, list) unless Sexp === list && list.sexp_type == :array
355
+ list << item
356
+ end
357
+
358
+ def list_prepend item, list # TODO: nuke me *sigh*
359
+ list = s(:array, list) unless Sexp === list && list.sexp_type == :array
360
+ list.insert 1, item
361
+ list
362
+ end
363
+
364
+ def literal_concat head, tail # TODO: ugh. rewrite
365
+ return tail unless head
366
+ return head unless tail
367
+
368
+ htype, ttype = head.sexp_type, tail.sexp_type
369
+
370
+ head = s(:dstr, '', head) if htype == :evstr
371
+
372
+ case ttype
373
+ when :str then
374
+ if htype == :str
375
+ head.last << tail.last
376
+ elsif htype == :dstr and head.size == 2 then
377
+ head.last << tail.last
378
+ else
379
+ head << tail
380
+ end
381
+ when :dstr then
382
+ if htype == :str then
383
+ lineno = head.line
384
+ tail[1] = head.last + tail[1]
385
+ head = tail
386
+ head.line = lineno
387
+ else
388
+ tail.sexp_type = :array
389
+ tail[1] = s(:str, tail[1])
390
+ tail.delete_at 1 if tail[1] == s(:str, '')
391
+
392
+ head.push(*tail.sexp_body)
393
+ end
394
+ when :evstr then
395
+ if htype == :str then
396
+ f, l = head.file, head.line
397
+ head = s(:dstr, *head.sexp_body)
398
+ head.file = f
399
+ head.line = l
400
+ end
401
+
402
+ if head.size == 2 and tail.size > 1 and tail[1].sexp_type == :str then
403
+ head.last << tail[1].last
404
+ head.sexp_type = :str if head.size == 2 # HACK ?
405
+ else
406
+ head.push(tail)
407
+ end
408
+ else
409
+ x = [head, tail]
410
+ raise "unknown type: #{x.inspect}"
411
+ end
412
+
413
+ return head
414
+ end
415
+
416
+ def logical_op type, left, right
417
+ left = value_expr left
418
+
419
+ if left and left.sexp_type == type and not left.paren then
420
+ node, rhs = left, nil
421
+
422
+ loop do
423
+ _, _lhs, rhs = node
424
+ break unless rhs && rhs.sexp_type == type and not rhs.paren
425
+ node = rhs
426
+ end
427
+
428
+ node[2] = s(type, rhs, right)
429
+
430
+ return left
431
+ end
432
+
433
+ return s(type, left, right)
434
+ end
435
+
436
+ # TODO: remove in 4.0 or 2018-01, whichever is first
437
+ deprecate :logop, :logical_op
438
+
439
+ def new_aref val
440
+ val[2] ||= s(:arglist)
441
+ val[2].sexp_type = :arglist if val[2].sexp_type == :array # REFACTOR
442
+ new_call val[0], :"[]", val[2]
443
+ end
444
+
445
+ def new_body val
446
+ body, resbody, elsebody, ensurebody = val
447
+
448
+ result = body
449
+
450
+ if resbody then
451
+ result = s(:rescue)
452
+ result << body if body
453
+
454
+ res = resbody
455
+
456
+ while res do
457
+ result << res
458
+ res = res.resbody(true)
459
+ end
460
+
461
+ result << elsebody if elsebody
462
+
463
+ result.line = (body || resbody).line
464
+ end
465
+
466
+ if elsebody and not resbody then
467
+ warning("else without rescue is useless")
468
+ result = s(:begin, result) if result
469
+ result = block_append(result, elsebody)
470
+ end
471
+
472
+ result = s(:ensure, result, ensurebody).compact if ensurebody
473
+
474
+ result
475
+ end
476
+
477
+ def new_brace_body args, body, lineno
478
+ new_iter(nil, args, body).line(lineno)
479
+ end
480
+
481
+ def argl x
482
+ x = s(:arglist, x) if x and x.sexp_type == :array
483
+ x
484
+ end
485
+
486
+ def backref_assign_error ref
487
+ # TODO: need a test for this... obviously
488
+ case ref.sexp_type
489
+ when :nth_ref then
490
+ raise "write a test 2"
491
+ raise SyntaxError, "Can't set variable %p" % ref.last
492
+ when :back_ref then
493
+ raise "write a test 3"
494
+ raise SyntaxError, "Can't set back reference %p" % ref.last
495
+ else
496
+ raise "Unknown backref type: #{ref.inspect}"
497
+ end
498
+ end
499
+
500
+ def new_call recv, meth, args = nil, call_op = :'.'
501
+ result = case call_op.to_sym
502
+ when :'.'
503
+ s(:call, recv, meth)
504
+ when :'&.'
505
+ s(:safe_call, recv, meth)
506
+ else
507
+ raise "unknown call operator: `#{type.inspect}`"
508
+ end
509
+
510
+ # TODO: need a test with f(&b) to produce block_pass
511
+ # TODO: need a test with f(&b) { } to produce warning
512
+
513
+ if args
514
+ if [:arglist, :args, :array, :call_args].include? args.sexp_type
515
+ result.concat args.sexp_body
516
+ else
517
+ result << args
518
+ end
519
+ end
520
+
521
+ line = result.grep(Sexp).map(&:line).compact.min
522
+ result.line = line if line
523
+
524
+ result
525
+ end
526
+
527
+ def new_attrasgn recv, meth, call_op
528
+ meth = :"#{meth}="
529
+
530
+ result = case call_op.to_sym
531
+ when :'.'
532
+ s(:attrasgn, recv, meth)
533
+ when :'&.'
534
+ s(:safe_attrasgn, recv, meth)
535
+ else
536
+ raise "unknown call operator: `#{type.inspect}`"
537
+ end
538
+
539
+ result.line = recv.line
540
+ result
541
+ end
542
+
543
+ def new_case expr, body, line
544
+ result = s(:case, expr)
545
+
546
+ while body and body.node_type == :when
547
+ result << body
548
+ body = body.delete_at 3
549
+ end
550
+
551
+ result[2..-1].each do |node|
552
+ block = node.block(:delete)
553
+ node.concat block.sexp_body if block
554
+ end
555
+
556
+ # else
557
+ body = nil if body == s(:block)
558
+ result << body
559
+
560
+ result.line = line
561
+ result
562
+ end
563
+
564
+ def new_class val
565
+ line, path, superclass, body = val[1], val[2], val[3], val[5]
566
+
567
+ result = s(:class, path, superclass)
568
+
569
+ if body then
570
+ if body.sexp_type == :block then
571
+ result.push(*body.sexp_body)
572
+ else
573
+ result.push body
574
+ end
575
+ end
576
+
577
+ result.line = line
578
+ result.comments = self.comments.pop
579
+ result
580
+ end
581
+
582
+ def new_compstmt val
583
+ result = void_stmts(val.grep(Sexp)[0])
584
+ result = remove_begin(result) if result
585
+ result
586
+ end
587
+
588
+ def new_defn val
589
+ (_, line), (name, _), _, args, body, * = val
590
+ body ||= s(:nil)
591
+
592
+ result = s(:defn, name.to_sym, args)
593
+
594
+ if body then
595
+ if body.sexp_type == :block then
596
+ result.push(*body.sexp_body)
597
+ else
598
+ result.push body
599
+ end
600
+ end
601
+
602
+ args.line line
603
+ result.line = line
604
+ result.comments = self.comments.pop
605
+
606
+ result
607
+ end
608
+
609
+ def new_defs val
610
+ recv, (name, _line), args, body = val[1], val[4], val[6], val[7]
611
+ body ||= s(:nil)
612
+
613
+ result = s(:defs, recv, name.to_sym, args)
614
+
615
+ if body then
616
+ if body.sexp_type == :block then
617
+ result.push(*body.sexp_body)
618
+ else
619
+ result.push body
620
+ end
621
+ end
622
+
623
+ result.line = recv.line
624
+ result.comments = self.comments.pop
625
+ result
626
+ end
627
+
628
+ def new_do_body args, body, lineno
629
+ new_iter(nil, args, body).line(lineno)
630
+ end
631
+
632
+ def new_for expr, var, body
633
+ result = s(:for, expr, var).line(var.line)
634
+ result << body if body
635
+ result
636
+ end
637
+
638
+ def new_hash val
639
+ s(:hash, *val[2].values).line(val[1])
640
+ end
641
+
642
+ def new_if c, t, f
643
+ l = [c.line, t && t.line, f && f.line].compact.min
644
+ c = cond c
645
+ c, t, f = c.last, f, t if c.sexp_type == :not and canonicalize_conditions
646
+ s(:if, c, t, f).line(l)
647
+ end
648
+
649
+ def new_iter call, args, body
650
+ body ||= nil
651
+
652
+ args ||= s(:args)
653
+ args = s(:args, args) if Symbol === args
654
+
655
+ result = s(:iter)
656
+ result << call if call
657
+ result << args
658
+ result << body if body
659
+
660
+ args.sexp_type = :args unless args == 0
661
+
662
+ result
663
+ end
664
+
665
+ def new_masgn_arg rhs, wrap = false
666
+ rhs = value_expr(rhs)
667
+ rhs = s(:to_ary, rhs) if wrap # HACK: could be array if lhs isn't right
668
+ rhs
669
+ end
670
+
671
+ def new_masgn lhs, rhs, wrap = false
672
+ _, ary = lhs
673
+
674
+ rhs = value_expr(rhs)
675
+ rhs = ary ? s(:to_ary, rhs) : s(:array, rhs) if wrap
676
+
677
+ lhs.delete_at 1 if ary.nil?
678
+ lhs << rhs
679
+
680
+ lhs
681
+ end
682
+
683
+ def new_module val
684
+ line, path, body = val[1], val[2], val[4]
685
+
686
+ result = s(:module, path)
687
+
688
+ if body then # REFACTOR?
689
+ if body.sexp_type == :block then
690
+ result.push(*body.sexp_body)
691
+ else
692
+ result.push body
693
+ end
694
+ end
695
+
696
+ result.line = line
697
+ result.comments = self.comments.pop
698
+ result
699
+ end
700
+
701
+ def new_op_asgn val
702
+ lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
703
+ name = lhs.value
704
+ arg = remove_begin(arg)
705
+ result = case asgn_op # REFACTOR
706
+ when :"||" then
707
+ lhs << arg
708
+ s(:op_asgn_or, self.gettable(name), lhs)
709
+ when :"&&" then
710
+ lhs << arg
711
+ s(:op_asgn_and, self.gettable(name), lhs)
712
+ else
713
+ # TODO: why [2] ?
714
+ lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
715
+ lhs
716
+ end
717
+ result.line = lhs.line
718
+ result
719
+ end
720
+
721
+ def new_op_asgn2 val
722
+ recv, call_op, meth, op, arg = val
723
+ meth = :"#{meth}="
724
+
725
+ result = case call_op.to_sym
726
+ when :'.'
727
+ s(:op_asgn2, recv, meth, op.to_sym, arg)
728
+ when :'&.'
729
+ s(:safe_op_asgn2, recv, meth, op.to_sym, arg)
730
+ else
731
+ raise "unknown call operator: `#{type.inspect}`"
732
+ end
733
+
734
+ result.line = recv.line
735
+ result
736
+ end
737
+
738
+ def new_regexp val
739
+ node = val[1] || s(:str, '')
740
+ options = val[2]
741
+
742
+ o, k = 0, nil
743
+ options.split(//).uniq.each do |c| # FIX: this has a better home
744
+ v = {
745
+ 'x' => Regexp::EXTENDED,
746
+ 'i' => Regexp::IGNORECASE,
747
+ 'm' => Regexp::MULTILINE,
748
+ 'o' => Regexp::ONCE,
749
+ 'n' => Regexp::ENC_NONE,
750
+ 'e' => Regexp::ENC_EUC,
751
+ 's' => Regexp::ENC_SJIS,
752
+ 'u' => Regexp::ENC_UTF8,
753
+ }[c]
754
+ raise "unknown regexp option: #{c}" unless v
755
+ o += v
756
+
757
+ # encoding options are ignored on 1.9+
758
+ k = c if c =~ /[esu]/ if RUBY_VERSION < "1.9"
759
+ end
760
+
761
+ case node.sexp_type
762
+ when :str then
763
+ node.sexp_type = :lit
764
+ node[1] = if k then
765
+ Regexp.new(node[1], o, k)
766
+ else
767
+ begin
768
+ Regexp.new(node[1], o)
769
+ rescue RegexpError => e
770
+ warn "WA\RNING: #{e.message} for #{node[1].inspect} #{options.inspect}"
771
+ begin
772
+ warn "WA\RNING: trying to recover with ENC_UTF8"
773
+ Regexp.new(node[1], Regexp::ENC_UTF8)
774
+ rescue RegexpError => e
775
+ warn "WA\RNING: trying to recover with ENC_NONE"
776
+ Regexp.new(node[1], Regexp::ENC_NONE)
777
+ end
778
+ end
779
+ end
780
+ when :dstr then
781
+ if options =~ /o/ then
782
+ node.sexp_type = :dregx_once
783
+ else
784
+ node.sexp_type = :dregx
785
+ end
786
+ node << o if o and o != 0
787
+ else
788
+ node = s(:dregx, '', node);
789
+ node.sexp_type = :dregx_once if options =~ /o/
790
+ node << o if o and o != 0
791
+ end
792
+
793
+ node
794
+ end
795
+
796
+ def new_rescue body, resbody
797
+ s(:rescue, body, resbody)
798
+ end
799
+
800
+ def new_resbody cond, body
801
+ if body && body.sexp_type == :block then
802
+ body.shift # remove block and splat it in directly
803
+ else
804
+ body = [body]
805
+ end
806
+ s(:resbody, cond, *body).line cond.line
807
+ end
808
+
809
+ def new_sclass val
810
+ recv, in_def, in_single, body = val[3], val[4], val[6], val[7]
811
+
812
+ result = s(:sclass, recv)
813
+
814
+ if body then
815
+ if body.sexp_type == :block then
816
+ result.push(*body.sexp_body)
817
+ else
818
+ result.push body
819
+ end
820
+ end
821
+
822
+ result.line = val[2]
823
+ self.in_def = in_def
824
+ self.in_single = in_single
825
+ result
826
+ end
827
+
828
+ def new_string val
829
+ str = val[0]
830
+ str.force_encoding("ASCII-8BIT") unless str.valid_encoding? unless RUBY_VERSION < "1.9"
831
+ result = s(:str, str)
832
+ self.lexer.fixup_lineno str.count("\n")
833
+ result
834
+ end
835
+
836
+ def new_qword_list_entry val
837
+ str = val[1]
838
+ str.force_encoding("ASCII-8BIT") unless str.valid_encoding? unless RUBY_VERSION < "1.9"
839
+ result = s(:str, str)
840
+ self.lexer.fixup_lineno
841
+ result
842
+ end
843
+
844
+ def new_qword_list
845
+ result = s(:array)
846
+ self.lexer.fixup_lineno
847
+ result
848
+ end
849
+
850
+ def new_word_list
851
+ result = s(:array)
852
+ self.lexer.fixup_lineno
853
+ result
854
+ end
855
+
856
+ def new_word_list_entry val
857
+ result = val[1].sexp_type == :evstr ? s(:dstr, "", val[1]) : val[1]
858
+ self.lexer.fixup_lineno
859
+ result
860
+ end
861
+
862
+ def new_qsym_list
863
+ result = s(:array)
864
+ self.lexer.fixup_lineno
865
+ result
866
+ end
867
+
868
+ def new_qsym_list_entry val
869
+ result = s(:lit, val[1].to_sym)
870
+ self.lexer.fixup_lineno
871
+ result
872
+ end
873
+
874
+ def new_symbol_list
875
+ result = s(:array)
876
+ self.lexer.fixup_lineno
877
+ result
878
+ end
879
+
880
+ def new_symbol_list_entry val
881
+ _list, sym, _nil = val # TODO: use _list
882
+ result = val[1]
883
+
884
+ result ||= s(:str, "")
885
+
886
+ case sym.sexp_type
887
+ when :dstr then
888
+ sym.sexp_type = :dsym
889
+ when :str then
890
+ sym = s(:lit, sym.last.to_sym)
891
+ else
892
+ sym = s(:dsym, "", sym || s(:str, ""))
893
+ end
894
+ self.lexer.fixup_lineno
895
+ sym
896
+ end
897
+
898
+ def new_super args
899
+ if args && args.node_type == :block_pass then
900
+ s(:super, args)
901
+ else
902
+ args ||= s(:arglist)
903
+ s(:super, *args.sexp_body)
904
+ end
905
+ end
906
+
907
+ def new_undef n, m = nil
908
+ if m then
909
+ block_append(n, s(:undef, m))
910
+ else
911
+ s(:undef, n)
912
+ end
913
+ end
914
+
915
+ def new_until block, expr, pre
916
+ new_until_or_while :until, block, expr, pre
917
+ end
918
+
919
+ def new_until_or_while type, block, expr, pre
920
+ other = type == :until ? :while : :until
921
+ line = [block && block.line, expr.line].compact.min
922
+ block, pre = block.last, false if block && block.sexp_type == :begin
923
+
924
+ expr = cond expr
925
+
926
+ result = unless expr.sexp_type == :not and canonicalize_conditions then
927
+ s(type, expr, block, pre)
928
+ else
929
+ s(other, expr.last, block, pre)
930
+ end
931
+
932
+ result.line = line
933
+ result
934
+ end
935
+
936
+ def new_when cond, body
937
+ s(:when, cond, body)
938
+ end
939
+
940
+ def new_while block, expr, pre
941
+ new_until_or_while :while, block, expr, pre
942
+ end
943
+
944
+ def new_xstring str
945
+ if str then
946
+ case str.sexp_type
947
+ when :str
948
+ str.sexp_type = :xstr
949
+ when :dstr
950
+ str.sexp_type = :dxstr
951
+ else
952
+ str = s(:dxstr, '', str)
953
+ end
954
+ str
955
+ else
956
+ s(:xstr, '')
957
+ end
958
+ end
959
+
960
+ def new_yield args = nil
961
+ # TODO: raise args.inspect unless [:arglist].include? args.first # HACK
962
+ raise "write a test 4" if args && args.node_type == :block_pass
963
+ raise SyntaxError, "Block argument should not be given." if
964
+ args && args.node_type == :block_pass
965
+
966
+ args ||= s(:arglist)
967
+
968
+ args.sexp_type = :arglist if [:call_args, :array].include? args.sexp_type
969
+ args = s(:arglist, args) unless args.sexp_type == :arglist
970
+
971
+ return s(:yield, *args.sexp_body)
972
+ end
973
+
974
+ def next_token
975
+ token = self.lexer.next_token
976
+
977
+ if token and token.first != RubyLexer::EOF then
978
+ return token
979
+ else
980
+ return [false, '$end']
981
+ end
982
+ end
983
+
984
+ def new_assign lhs, rhs
985
+ return nil unless lhs
986
+
987
+ rhs = value_expr rhs
988
+
989
+ case lhs.sexp_type
990
+ when :lasgn, :iasgn, :cdecl, :cvdecl, :gasgn, :cvasgn, :attrasgn, :safe_attrasgn then
991
+ lhs << rhs
992
+ when :const then
993
+ lhs.sexp_type = :cdecl
994
+ lhs << rhs
995
+ else
996
+ raise "unknown lhs #{lhs.inspect} w/ #{rhs.inspect}"
997
+ end
998
+
999
+ lhs
1000
+ end
1001
+
1002
+ # TODO: remove in 4.0 or 2018-01, whichever is first
1003
+ deprecate :node_assign, :new_assign
1004
+
1005
+ ##
1006
+ # Returns a UTF-8 encoded string after processing BOMs and magic
1007
+ # encoding comments.
1008
+ #
1009
+ # Holy crap... ok. Here goes:
1010
+ #
1011
+ # Ruby's file handling and encoding support is insane. We need to be
1012
+ # able to lex a file. The lexer file is explicitly UTF-8 to make
1013
+ # things cleaner. This allows us to deal with extended chars in
1014
+ # class and method names. In order to do this, we need to encode all
1015
+ # input source files as UTF-8. First, we look for a UTF-8 BOM by
1016
+ # looking at the first line while forcing its encoding to
1017
+ # ASCII-8BIT. If we find a BOM, we strip it and set the expected
1018
+ # encoding to UTF-8. Then, we search for a magic encoding comment.
1019
+ # If found, it overrides the BOM. Finally, we force the encoding of
1020
+ # the input string to whatever was found, and then encode that to
1021
+ # UTF-8 for compatibility with the lexer.
1022
+
1023
+ def handle_encoding str
1024
+ str = str.dup
1025
+ has_enc = str.respond_to? :encoding
1026
+ encoding = nil
1027
+
1028
+ header = str.each_line.first(2)
1029
+ header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc
1030
+
1031
+ first = header.first || ""
1032
+ encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
1033
+
1034
+ encoding = $1.strip if header.find { |s|
1035
+ s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
1036
+ s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
1037
+ }
1038
+
1039
+ if encoding then
1040
+ if has_enc then
1041
+ encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
1042
+ hack_encoding str, encoding
1043
+ else
1044
+ warn "Skipping magic encoding comment"
1045
+ end
1046
+ else
1047
+ # nothing specified... ugh. try to encode as utf-8
1048
+ hack_encoding str if has_enc
1049
+ end
1050
+
1051
+ str
1052
+ end
1053
+
1054
+ def hack_encoding str, extra = nil
1055
+ encodings = ENCODING_ORDER.dup
1056
+ encodings.unshift(extra) unless extra.nil?
1057
+
1058
+ # terrible, horrible, no good, very bad, last ditch effort.
1059
+ encodings.each do |enc|
1060
+ begin
1061
+ str.force_encoding enc
1062
+ if str.valid_encoding? then
1063
+ str.encode! Encoding::UTF_8
1064
+ break
1065
+ end
1066
+ rescue Encoding::InvalidByteSequenceError
1067
+ # do nothing
1068
+ rescue Encoding::UndefinedConversionError
1069
+ # do nothing
1070
+ end
1071
+ end
1072
+
1073
+ # no amount of pain is enough for you.
1074
+ raise "Bad encoding. Need a magic encoding comment." unless
1075
+ str.encoding.name == "UTF-8"
1076
+ end
1077
+
1078
+ ##
1079
+ # Parse +str+ at path +file+ and return a sexp. Raises
1080
+ # Timeout::Error if it runs for more than +time+ seconds.
1081
+
1082
+ def process(str, file = "(string)", time = 10)
1083
+ Timeout.timeout time do
1084
+ raise "bad val: #{str.inspect}" unless String === str
1085
+
1086
+ str = handle_encoding str
1087
+
1088
+ self.file = file.dup
1089
+
1090
+ @yydebug = ENV.has_key? 'DEBUG'
1091
+
1092
+ # HACK -- need to get tests passing more than have graceful code
1093
+ self.lexer.ss = RPStringScanner.new str
1094
+
1095
+ do_parse
1096
+ end
1097
+ end
1098
+
1099
+ alias :parse :process
1100
+
1101
+ def remove_begin node
1102
+ oldnode = node
1103
+ if node and node.sexp_type == :begin and node.size == 2 then
1104
+ node = node.last
1105
+ node.line = oldnode.line
1106
+ end
1107
+ node
1108
+ end
1109
+
1110
+ def reset
1111
+ lexer.reset
1112
+ self.in_def = false
1113
+ self.in_single = 0
1114
+ self.env.reset
1115
+ self.comments.clear
1116
+ end
1117
+
1118
+ def block_dup_check call_or_args, block
1119
+ syntax_error "Both block arg and actual block given." if
1120
+ block and call_or_args.block_pass?
1121
+ end
1122
+
1123
+ def inverted? val
1124
+ [:return, :next, :break, :yield].include? val[0].sexp_type
1125
+ end
1126
+
1127
+ def invert_block_call val
1128
+ (type, call), iter = val
1129
+
1130
+ iter.insert 1, call
1131
+
1132
+ [iter, s(type)]
1133
+ end
1134
+
1135
+ def ret_args node
1136
+ if node then
1137
+ raise "write a test 5" if node.sexp_type == :block_pass
1138
+
1139
+ raise SyntaxError, "block argument should not be given" if
1140
+ node.sexp_type == :block_pass
1141
+
1142
+ node.sexp_type = :array if node.sexp_type == :call_args
1143
+ node = node.last if node.sexp_type == :array && node.size == 2
1144
+
1145
+ # HACK matz wraps ONE of the FOUR splats in a newline to
1146
+ # distinguish. I use paren for now. ugh
1147
+ node = s(:svalue, node) if node.sexp_type == :splat and not node.paren
1148
+ node.sexp_type = :svalue if node.sexp_type == :arglist && node[1].sexp_type == :splat
1149
+ end
1150
+
1151
+ node
1152
+ end
1153
+
1154
+ def s(*args)
1155
+ result = Sexp.new(*args)
1156
+ result.line ||= lexer.lineno if lexer.ss # otherwise...
1157
+ result.file = self.file
1158
+ result
1159
+ end
1160
+
1161
+ def value_expr oldnode # HACK: much more to do
1162
+ node = remove_begin oldnode
1163
+ node.line = oldnode.line if oldnode
1164
+ node[2] = value_expr node[2] if node and node.sexp_type == :if
1165
+ node
1166
+ end
1167
+
1168
+ def void_stmts node
1169
+ return nil unless node
1170
+ return node unless node.sexp_type == :block
1171
+
1172
+ if node.respond_to? :sexp_body= then
1173
+ node.sexp_body = node.sexp_body.map { |n| remove_begin n }
1174
+ else
1175
+ node[1..-1] = node[1..-1].map { |n| remove_begin(n) }
1176
+ end
1177
+
1178
+ node
1179
+ end
1180
+
1181
+ def warning s
1182
+ # do nothing for now
1183
+ end
1184
+
1185
+ alias yyerror syntax_error
1186
+
1187
+ class Keyword
1188
+ class KWtable
1189
+ attr_accessor :name, :state, :id0, :id1
1190
+ def initialize(name, id=[], state=nil)
1191
+ @name = name
1192
+ @id0, @id1 = id
1193
+ @state = state
1194
+ end
1195
+ end
1196
+
1197
+ ##
1198
+ # :stopdoc:
1199
+ #
1200
+ # :expr_beg = ignore newline, +/- is a sign.
1201
+ # :expr_end = newline significant, +/- is a operator.
1202
+ # :expr_arg = newline significant, +/- is a operator.
1203
+ # :expr_cmdarg = newline significant, +/- is a operator.
1204
+ # :expr_endarg = newline significant, +/- is a operator.
1205
+ # :expr_mid = newline significant, +/- is a operator.
1206
+ # :expr_fname = ignore newline, no reserved words.
1207
+ # :expr_dot = right after . or ::, no reserved words.
1208
+ # :expr_class = immediate after class, no here document.
1209
+
1210
+ wordlist = [
1211
+ ["end", [:kEND, :kEND ], :expr_end ],
1212
+ ["else", [:kELSE, :kELSE ], :expr_beg ],
1213
+ ["case", [:kCASE, :kCASE ], :expr_beg ],
1214
+ ["ensure", [:kENSURE, :kENSURE ], :expr_beg ],
1215
+ ["module", [:kMODULE, :kMODULE ], :expr_beg ],
1216
+ ["elsif", [:kELSIF, :kELSIF ], :expr_beg ],
1217
+ ["def", [:kDEF, :kDEF ], :expr_fname ],
1218
+ ["rescue", [:kRESCUE, :kRESCUE_MOD ], :expr_mid ],
1219
+ ["not", [:kNOT, :kNOT ], :expr_beg ],
1220
+ ["then", [:kTHEN, :kTHEN ], :expr_beg ],
1221
+ ["yield", [:kYIELD, :kYIELD ], :expr_arg ],
1222
+ ["for", [:kFOR, :kFOR ], :expr_beg ],
1223
+ ["self", [:kSELF, :kSELF ], :expr_end ],
1224
+ ["false", [:kFALSE, :kFALSE ], :expr_end ],
1225
+ ["retry", [:kRETRY, :kRETRY ], :expr_end ],
1226
+ ["return", [:kRETURN, :kRETURN ], :expr_mid ],
1227
+ ["true", [:kTRUE, :kTRUE ], :expr_end ],
1228
+ ["if", [:kIF, :kIF_MOD ], :expr_beg ],
1229
+ ["defined?", [:kDEFINED, :kDEFINED ], :expr_arg ],
1230
+ ["super", [:kSUPER, :kSUPER ], :expr_arg ],
1231
+ ["undef", [:kUNDEF, :kUNDEF ], :expr_fname ],
1232
+ ["break", [:kBREAK, :kBREAK ], :expr_mid ],
1233
+ ["in", [:kIN, :kIN ], :expr_beg ],
1234
+ ["do", [:kDO, :kDO ], :expr_beg ],
1235
+ ["nil", [:kNIL, :kNIL ], :expr_end ],
1236
+ ["until", [:kUNTIL, :kUNTIL_MOD ], :expr_beg ],
1237
+ ["unless", [:kUNLESS, :kUNLESS_MOD ], :expr_beg ],
1238
+ ["or", [:kOR, :kOR ], :expr_beg ],
1239
+ ["next", [:kNEXT, :kNEXT ], :expr_mid ],
1240
+ ["when", [:kWHEN, :kWHEN ], :expr_beg ],
1241
+ ["redo", [:kREDO, :kREDO ], :expr_end ],
1242
+ ["and", [:kAND, :kAND ], :expr_beg ],
1243
+ ["begin", [:kBEGIN, :kBEGIN ], :expr_beg ],
1244
+ ["__LINE__", [:k__LINE__, :k__LINE__ ], :expr_end ],
1245
+ ["class", [:kCLASS, :kCLASS ], :expr_class ],
1246
+ ["__FILE__", [:k__FILE__, :k__FILE__ ], :expr_end ],
1247
+ ["END", [:klEND, :klEND ], :expr_end ],
1248
+ ["BEGIN", [:klBEGIN, :klBEGIN ], :expr_end ],
1249
+ ["while", [:kWHILE, :kWHILE_MOD ], :expr_beg ],
1250
+ ["alias", [:kALIAS, :kALIAS ], :expr_fname ],
1251
+ ["__ENCODING__", [:k__ENCODING__, :k__ENCODING__], :expr_end],
1252
+ ].map { |args| KWtable.new(*args) }
1253
+
1254
+ # :startdoc:
1255
+
1256
+ WORDLIST18 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1257
+ WORDLIST19 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1258
+
1259
+ WORDLIST18.delete "__ENCODING__"
1260
+
1261
+ %w[and case elsif for if in module or unless until when while].each do |k|
1262
+ WORDLIST19[k] = WORDLIST19[k].dup
1263
+ WORDLIST19[k].state = :expr_value
1264
+ end
1265
+ %w[not].each do |k|
1266
+ WORDLIST19[k] = WORDLIST19[k].dup
1267
+ WORDLIST19[k].state = :expr_arg
1268
+ end
1269
+
1270
+ def self.keyword18 str # REFACTOR
1271
+ WORDLIST18[str]
1272
+ end
1273
+
1274
+ def self.keyword19 str
1275
+ WORDLIST19[str]
1276
+ end
1277
+ end
1278
+
1279
+ class Environment
1280
+ attr_reader :env, :dyn
1281
+
1282
+ def [] k
1283
+ self.all[k]
1284
+ end
1285
+
1286
+ def []= k, v
1287
+ raise "no" if v == true
1288
+ self.current[k] = v
1289
+ end
1290
+
1291
+ def all
1292
+ idx = @dyn.index(false) || 0
1293
+ @env[0..idx].reverse.inject { |env, scope| env.merge scope }
1294
+ end
1295
+
1296
+ def current
1297
+ @env.first
1298
+ end
1299
+
1300
+ def extend dyn = false
1301
+ @dyn.unshift dyn
1302
+ @env.unshift({})
1303
+ end
1304
+
1305
+ def initialize dyn = false
1306
+ @dyn = []
1307
+ @env = []
1308
+ self.reset
1309
+ end
1310
+
1311
+ def reset
1312
+ @dyn.clear
1313
+ @env.clear
1314
+ self.extend
1315
+ end
1316
+
1317
+ def unextend
1318
+ @dyn.shift
1319
+ @env.shift
1320
+ raise "You went too far unextending env" if @env.empty?
1321
+ end
1322
+ end
1323
+
1324
+ class StackState
1325
+ attr_reader :name
1326
+ attr_reader :stack
1327
+ attr_accessor :debug
1328
+
1329
+ def initialize name, debug=false
1330
+ @name = name
1331
+ @stack = [false]
1332
+ @debug = debug
1333
+ end
1334
+
1335
+ def reset
1336
+ @stack = [false]
1337
+ log :reset if debug
1338
+ end
1339
+
1340
+ def inspect
1341
+ "StackState(#{@name}, #{@stack.inspect})"
1342
+ end
1343
+
1344
+ def is_in_state
1345
+ log :is_in_state if debug
1346
+ @stack.last
1347
+ end
1348
+
1349
+ def lexpop
1350
+ raise if @stack.size == 0
1351
+ a = @stack.pop
1352
+ b = @stack.pop
1353
+ @stack.push(a || b)
1354
+ log :lexpop if debug
1355
+ end
1356
+
1357
+ def log action
1358
+ c = caller[1]
1359
+ c = caller[2] if c =~ /expr_result/
1360
+ warn "%s_stack.%s: %p at %s" % [name, action, @stack, c.clean_caller]
1361
+ nil
1362
+ end
1363
+
1364
+ def pop
1365
+ r = @stack.pop
1366
+ @stack.push false if @stack.empty?
1367
+ log :pop if debug
1368
+ r
1369
+ end
1370
+
1371
+ def push val
1372
+ @stack.push val
1373
+ log :push if debug
1374
+ end
1375
+
1376
+ def store base = false
1377
+ result = @stack.dup
1378
+ @stack.replace [base]
1379
+ log :store if debug
1380
+ result
1381
+ end
1382
+
1383
+ def restore oldstate
1384
+ @stack.replace oldstate
1385
+ log :restore if debug
1386
+ end
1387
+ end
1388
+ end