parser 0.9.alpha

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+
2
+ require 'ruby18_parser'
3
+ require 'ruby19_parser'
4
+ require 'ruby_parser_extras'
@@ -0,0 +1,1148 @@
1
+ # encoding: ASCII-8BIT
2
+
3
+ require 'racc/parser'
4
+ require 'ruby_lexer'
5
+ require 'sexp'
6
+ require 'timeout'
7
+
8
+ # WHY do I have to do this?!?
9
+ class Regexp
10
+ ONCE = 0 unless defined? ONCE # FIX: remove this - it makes no sense
11
+
12
+ unless defined? ENC_NONE then
13
+ ENC_NONE = /x/n.options
14
+ ENC_EUC = /x/e.options
15
+ ENC_SJIS = /x/s.options
16
+ ENC_UTF8 = /x/u.options
17
+ end
18
+ end
19
+
20
+ # I hate ruby 1.9 string changes
21
+ class Fixnum
22
+ def ord
23
+ self
24
+ end
25
+ end unless "a"[0] == "a"
26
+
27
+ module RubyParserStuff
28
+ VERSION = '0.9.alpha'
29
+
30
+ attr_accessor :lexer, :in_def, :in_single, :file
31
+ attr_reader :env, :comments
32
+
33
+ def syntax_error msg
34
+ raise RubyParser::SyntaxError, msg
35
+ end
36
+
37
+ def arg_add(node1, node2) # TODO: nuke
38
+ return s(:arglist, node2) unless node1
39
+
40
+ node1[0] = :arglist if node1[0] == :array
41
+ return node1 << node2 if node1[0] == :arglist
42
+
43
+ return s(:arglist, node1, node2)
44
+ end
45
+
46
+ def arg_blk_pass node1, node2 # TODO: nuke
47
+ node1 = s(:arglist, node1) unless [:arglist, :array].include? node1.first
48
+ node1 << node2 if node2
49
+ node1
50
+ end
51
+
52
+ def arg_concat node1, node2 # TODO: nuke
53
+ raise "huh" unless node2
54
+ node1 << s(:splat, node2).compact
55
+ node1
56
+ end
57
+
58
+ def clean_mlhs sexp
59
+ case sexp.sexp_type
60
+ when :masgn then
61
+ if sexp.size == 2 and sexp[1].sexp_type == :array then
62
+ s(:masgn, *sexp[1][1..-1].map { |sub| clean_mlhs sub })
63
+ else
64
+ sexp
65
+ end
66
+ when :gasgn, :iasgn, :lasgn, :cvasgn then
67
+ if sexp.size == 2 then
68
+ sexp.last
69
+ else
70
+ sexp # optional value
71
+ end
72
+ else
73
+ raise "unsupported type: #{sexp.inspect}"
74
+ end
75
+ end
76
+
77
+ def block_var *args
78
+ result = self.args args
79
+ result[0] = :masgn
80
+ result
81
+ end
82
+
83
+ def block_var18 ary, splat, block
84
+ ary ||= s(:array)
85
+
86
+ if splat then
87
+ splat = splat[1] unless Symbol === splat
88
+ ary << "*#{splat}".to_sym
89
+ end
90
+
91
+ ary << "&#{block[1]}".to_sym if block
92
+
93
+ if ary.length > 2 or ary.splat then # HACK
94
+ s(:masgn, *ary[1..-1])
95
+ else
96
+ ary.last
97
+ end
98
+ end
99
+
100
+ def args args
101
+ result = s(:args)
102
+
103
+ args.each do |arg|
104
+ case arg
105
+ when Sexp then
106
+ case arg.sexp_type
107
+ when :args, :block, :array then
108
+ result.concat arg[1..-1]
109
+ when :block_arg then
110
+ result << :"&#{arg.last}"
111
+ when :masgn then
112
+ result << arg
113
+ else
114
+ raise "unhandled: #{arg.inspect}"
115
+ end
116
+ when Symbol then
117
+ result << arg
118
+ when ",", nil then
119
+ # ignore
120
+ else
121
+ raise "unhandled: #{arg.inspect}"
122
+ end
123
+ end
124
+
125
+ result
126
+ end
127
+
128
+ def aryset receiver, index
129
+ index ||= []
130
+ s(:attrasgn, receiver, :"[]=", *index[1..-1])
131
+ end
132
+
133
+ def assignable(lhs, value = nil)
134
+ id = lhs.to_sym
135
+ id = id.to_sym if Sexp === id
136
+
137
+ raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
138
+
139
+ raise SyntaxError, "Can't change the value of #{id}" if
140
+ id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
141
+
142
+ result = case id.to_s
143
+ when /^@@/ then
144
+ asgn = in_def || in_single > 0
145
+ s((asgn ? :cvasgn : :cvdecl), id)
146
+ when /^@/ then
147
+ s(:iasgn, id)
148
+ when /^\$/ then
149
+ s(:gasgn, id)
150
+ when /^[A-Z]/ then
151
+ s(:cdecl, id)
152
+ else
153
+ case self.env[id]
154
+ when :lvar, :dvar, nil then
155
+ s(:lasgn, id)
156
+ else
157
+ raise "wtf? unknown type: #{self.env[id]}"
158
+ end
159
+ end
160
+
161
+ self.env[id] ||= :lvar
162
+
163
+ result << value if value
164
+
165
+ return result
166
+ end
167
+
168
+ def block_append(head, tail)
169
+ return head if tail.nil?
170
+ return tail if head.nil?
171
+
172
+ case head[0]
173
+ when :lit, :str then
174
+ return tail
175
+ end
176
+
177
+ line = [head.line, tail.line].compact.min
178
+
179
+ head = remove_begin(head)
180
+ head = s(:block, head) unless head.node_type == :block
181
+
182
+ head.line = line
183
+ head << tail
184
+ end
185
+
186
+ def cond node
187
+ return nil if node.nil?
188
+ node = value_expr node
189
+
190
+ case node.first
191
+ when :lit then
192
+ if Regexp === node.last then
193
+ return s(:match, node)
194
+ else
195
+ return node
196
+ end
197
+ when :and then
198
+ return s(:and, cond(node[1]), cond(node[2]))
199
+ when :or then
200
+ return s(:or, cond(node[1]), cond(node[2]))
201
+ when :dot2 then
202
+ label = "flip#{node.hash}"
203
+ env[label] = :lvar
204
+ return s(:flip2, node[1], node[2])
205
+ when :dot3 then
206
+ label = "flip#{node.hash}"
207
+ env[label] = :lvar
208
+ return s(:flip3, node[1], node[2])
209
+ else
210
+ return node
211
+ end
212
+ end
213
+
214
+ ##
215
+ # for pure ruby systems only
216
+
217
+ def do_parse
218
+ _racc_do_parse_rb(_racc_setup, false)
219
+ end if ENV['PURE_RUBY']
220
+
221
+ def get_match_node lhs, rhs # TODO: rename to new_match
222
+ if lhs then
223
+ case lhs[0]
224
+ when :dregx, :dregx_once then
225
+ return s(:match2, lhs, rhs).line(lhs.line)
226
+ when :lit then
227
+ return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
228
+ end
229
+ end
230
+
231
+ if rhs then
232
+ case rhs[0]
233
+ when :dregx, :dregx_once then
234
+ return s(:match3, rhs, lhs).line(lhs.line)
235
+ when :lit then
236
+ return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
237
+ end
238
+ end
239
+
240
+ return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
241
+ end
242
+
243
+ def gettable(id)
244
+ id = id.to_sym if String === id
245
+
246
+ result = case id.to_s
247
+ when /^@@/ then
248
+ s(:cvar, id)
249
+ when /^@/ then
250
+ s(:ivar, id)
251
+ when /^\$/ then
252
+ s(:gvar, id)
253
+ when /^[A-Z]/ then
254
+ s(:const, id)
255
+ else
256
+ type = env[id]
257
+ if type then
258
+ s(type, id)
259
+ else
260
+ new_call(nil, id)
261
+ end
262
+ end
263
+
264
+ raise "identifier #{id.inspect} is not valid" unless result
265
+
266
+ result
267
+ end
268
+
269
+ ##
270
+ # Canonicalize conditionals. Eg:
271
+ #
272
+ # not x ? a : b
273
+ #
274
+ # becomes:
275
+ #
276
+ # x ? b : a
277
+
278
+ attr_accessor :canonicalize_conditions
279
+
280
+ def initialize(options = {})
281
+ super()
282
+
283
+ v = self.class.name[/1[89]/]
284
+ self.lexer = RubyLexer.new v && v.to_i
285
+
286
+ @env = RubyParserStuff::Environment.new
287
+ @comments = []
288
+
289
+ self.lexer.static_env = @env
290
+
291
+ @canonicalize_conditions = true
292
+
293
+ self.reset
294
+ end
295
+
296
+ def list_append list, item # TODO: nuke me *sigh*
297
+ return s(:array, item) unless list
298
+ list = s(:array, list) unless Sexp === list && list.first == :array
299
+ list << item
300
+ end
301
+
302
+ def list_prepend item, list # TODO: nuke me *sigh*
303
+ list = s(:array, list) unless Sexp === list && list[0] == :array
304
+ list.insert 1, item
305
+ list
306
+ end
307
+
308
+ def literal_concat head, tail
309
+ return tail unless head
310
+ return head unless tail
311
+
312
+ htype, ttype = head[0], tail[0]
313
+
314
+ head = s(:dstr, '', head) if htype == :evstr
315
+
316
+ case ttype
317
+ when :str then
318
+ if htype == :str
319
+ head[-1] << tail[-1]
320
+ elsif htype == :dstr and head.size == 2 then
321
+ head[-1] << tail[-1]
322
+ else
323
+ head << tail
324
+ end
325
+ when :dstr then
326
+ if htype == :str then
327
+ tail[1] = head[-1] + tail[1]
328
+ head = tail
329
+ else
330
+ tail[0] = :array
331
+ tail[1] = s(:str, tail[1])
332
+ tail.delete_at 1 if tail[1] == s(:str, '')
333
+
334
+ head.push(*tail[1..-1])
335
+ end
336
+ when :evstr then
337
+ head[0] = :dstr if htype == :str
338
+ if head.size == 2 and tail.size > 1 and tail[1][0] == :str then
339
+ head[-1] << tail[1][-1]
340
+ head[0] = :str if head.size == 2 # HACK ?
341
+ else
342
+ head.push(tail)
343
+ end
344
+ else
345
+ x = [head, tail]
346
+ raise "unknown type: #{x.inspect}"
347
+ end
348
+
349
+ return head
350
+ end
351
+
352
+ def logop(type, left, right) # TODO: rename logical_op
353
+ left = value_expr left
354
+
355
+ if left and left[0] == type and not left.paren then
356
+ node, second = left, nil
357
+
358
+ while (second = node[2]) && second[0] == type and not second.paren do
359
+ node = second
360
+ end
361
+
362
+ node[2] = s(type, second, right)
363
+
364
+ return left
365
+ end
366
+
367
+ return s(type, left, right)
368
+ end
369
+
370
+ def new_aref val
371
+ val[2] ||= s(:arglist)
372
+ val[2][0] = :arglist if val[2][0] == :array # REFACTOR
373
+ if val[0].node_type == :self then
374
+ result = new_call nil, :"[]", val[2]
375
+ else
376
+ result = new_call val[0], :"[]", val[2]
377
+ end
378
+ result
379
+ end
380
+
381
+ def new_body val
382
+ result = val[0]
383
+
384
+ if val[1] then
385
+ result = s(:rescue)
386
+ result << val[0] if val[0]
387
+
388
+ resbody = val[1]
389
+
390
+ while resbody do
391
+ result << resbody
392
+ resbody = resbody.resbody(true)
393
+ end
394
+
395
+ result << val[2] if val[2]
396
+
397
+ result.line = (val[0] || val[1]).line
398
+ elsif not val[2].nil? then
399
+ warning("else without rescue is useless")
400
+ result = block_append(result, val[2])
401
+ end
402
+
403
+ result = s(:ensure, result, val[3]).compact if val[3]
404
+ return result
405
+ end
406
+
407
+ def argl x
408
+ x = s(:arglist, x) if x and x[0] != :arglist
409
+ x
410
+ end
411
+
412
+ def backref_assign_error ref
413
+ # TODO: need a test for this... obviously
414
+ case ref.first
415
+ when :nth_ref then
416
+ raise "write a test 2"
417
+ raise SyntaxError, "Can't set variable %p" % ref.last
418
+ when :back_ref then
419
+ raise "write a test 3"
420
+ raise SyntaxError, "Can't set back reference %p" % ref.last
421
+ else
422
+ raise "Unknown backref type: #{ref.inspect}"
423
+ end
424
+ end
425
+
426
+ def new_call recv, meth, args = nil
427
+ result = s(:call, recv, meth)
428
+
429
+ # TODO: need a test with f(&b) to produce block_pass
430
+ # TODO: need a test with f(&b) { } to produce warning
431
+
432
+ args ||= s(:arglist)
433
+ args[0] = :arglist if args.first == :array
434
+ args = s(:arglist, args) unless args.first == :arglist
435
+
436
+ # HACK quick hack to make this work quickly... easy to clean up above
437
+ result.concat args[1..-1]
438
+
439
+ line = result.grep(Sexp).map(&:line).compact.min
440
+ result.line = line if line
441
+
442
+ result
443
+ end
444
+
445
+ def new_case expr, body
446
+ result = s(:case, expr)
447
+ line = (expr || body).line
448
+
449
+ while body and body.node_type == :when
450
+ result << body
451
+ body = body.delete_at 3
452
+ end
453
+
454
+ result[2..-1].each do |node|
455
+ block = node.block(:delete)
456
+ node.concat block[1..-1] if block
457
+ end
458
+
459
+ # else
460
+ body = nil if body == s(:block)
461
+ result << body
462
+
463
+ result.line = line
464
+ result
465
+ end
466
+
467
+ def new_class val
468
+ line, path, superclass, body = val[1], val[2], val[3], val[5]
469
+
470
+ result = s(:class, path, superclass)
471
+
472
+ if body then
473
+ if body.first == :block then
474
+ result.push(*body[1..-1])
475
+ else
476
+ result.push body
477
+ end
478
+ end
479
+
480
+ result.line = line
481
+ result.comments = self.comments.pop
482
+ result
483
+ end
484
+
485
+ def new_compstmt val
486
+ result = void_stmts(val.grep(Sexp)[0])
487
+ result = remove_begin(result) if result
488
+ result
489
+ end
490
+
491
+ def new_defn val
492
+ name, args, body = val[1], val[3], val[4]
493
+ body ||= s(:nil)
494
+
495
+ result = s(:defn, name.to_sym, args)
496
+
497
+ if body then
498
+ if body.first == :block then
499
+ result.push(*body[1..-1])
500
+ else
501
+ result.push body
502
+ end
503
+ end
504
+
505
+ result.comments = self.comments.pop
506
+ result
507
+ end
508
+
509
+ def new_defs val
510
+ recv, name, args, body = val[1], val[4], val[6], val[7]
511
+
512
+ result = s(:defs, recv, name.to_sym, args)
513
+
514
+ if body then
515
+ if body.first == :block then
516
+ result.push(*body[1..-1])
517
+ else
518
+ result.push body
519
+ end
520
+ end
521
+
522
+ result.line = recv.line
523
+ result.comments = self.comments.pop
524
+ result
525
+ end
526
+
527
+ def new_for expr, var, body
528
+ result = s(:for, expr, var).line(var.line)
529
+ result << body if body
530
+ result
531
+ end
532
+
533
+ def new_if c, t, f
534
+ l = [c.line, t && t.line, f && f.line].compact.min
535
+ c = cond c
536
+ c, t, f = c.last, f, t if c[0] == :not and canonicalize_conditions
537
+ s(:if, c, t, f).line(l)
538
+ end
539
+
540
+ def new_iter call, args, body
541
+ body ||= nil
542
+
543
+ args ||= s(:args)
544
+ args = s(:args, args) if Symbol === args
545
+
546
+ result = s(:iter)
547
+ result << call if call
548
+ result << args
549
+ result << body if body
550
+
551
+ args[0] = :args unless args == 0
552
+
553
+ result
554
+ end
555
+
556
+ def new_masgn lhs, rhs, wrap = false
557
+ rhs = value_expr(rhs)
558
+ rhs = lhs[1] ? s(:to_ary, rhs) : s(:array, rhs) if wrap
559
+
560
+ lhs.delete_at 1 if lhs[1].nil?
561
+ lhs << rhs
562
+
563
+ lhs
564
+ end
565
+
566
+ def new_module val
567
+ line, path, body = val[1], val[2], val[4]
568
+
569
+ result = s(:module, path)
570
+
571
+ if body then # REFACTOR?
572
+ if body.first == :block then
573
+ result.push(*body[1..-1])
574
+ else
575
+ result.push body
576
+ end
577
+ end
578
+
579
+ result.line = line
580
+ result.comments = self.comments.pop
581
+ result
582
+ end
583
+
584
+ def new_op_asgn val
585
+ lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
586
+ name = lhs.value
587
+ arg = remove_begin(arg)
588
+ result = case asgn_op # REFACTOR
589
+ when :"||" then
590
+ lhs << arg
591
+ s(:op_asgn_or, self.gettable(name), lhs)
592
+ when :"&&" then
593
+ lhs << arg
594
+ s(:op_asgn_and, self.gettable(name), lhs)
595
+ else
596
+ # TODO: why [2] ?
597
+ lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
598
+ lhs
599
+ end
600
+ result.line = lhs.line
601
+ result
602
+ end
603
+
604
+ def new_regexp val
605
+ node = val[1] || s(:str, '')
606
+ options = val[3]
607
+
608
+ o, k = 0, nil
609
+ options.split(//).uniq.each do |c| # FIX: this has a better home
610
+ v = {
611
+ 'x' => Regexp::EXTENDED,
612
+ 'i' => Regexp::IGNORECASE,
613
+ 'm' => Regexp::MULTILINE,
614
+ 'o' => Regexp::ONCE,
615
+ 'n' => Regexp::ENC_NONE,
616
+ 'e' => Regexp::ENC_EUC,
617
+ 's' => Regexp::ENC_SJIS,
618
+ 'u' => Regexp::ENC_UTF8,
619
+ }[c]
620
+ raise "unknown regexp option: #{c}" unless v
621
+ o += v
622
+ k = c if c =~ /[esu]/
623
+ end
624
+
625
+ case node[0]
626
+ when :str then
627
+ node[0] = :lit
628
+ node[1] = if k then
629
+ Regexp.new(node[1], o, k)
630
+ else
631
+ begin
632
+ Regexp.new(node[1], o)
633
+ rescue RegexpError => e
634
+ warn "Ignoring: #{e.message}"
635
+ Regexp.new(node[1], Regexp::ENC_NONE)
636
+ end
637
+ end
638
+ when :dstr then
639
+ if options =~ /o/ then
640
+ node[0] = :dregx_once
641
+ else
642
+ node[0] = :dregx
643
+ end
644
+ node << o if o and o != 0
645
+ else
646
+ node = s(:dregx, '', node);
647
+ node[0] = :dregx_once if options =~ /o/
648
+ node << o if o and o != 0
649
+ end
650
+
651
+ node
652
+ end
653
+
654
+ def new_resbody cond, body
655
+ if body && body.first == :block then
656
+ body.shift # remove block and splat it in directly
657
+ else
658
+ body = [body]
659
+ end
660
+ s(:resbody, cond, *body)
661
+ end
662
+
663
+ def new_sclass val
664
+ recv, in_def, in_single, body = val[3], val[4], val[6], val[7]
665
+
666
+ result = s(:sclass, recv)
667
+
668
+ if body then
669
+ if body.first == :block then
670
+ result.push(*body[1..-1])
671
+ else
672
+ result.push body
673
+ end
674
+ end
675
+
676
+ result.line = val[2]
677
+ self.in_def = in_def
678
+ self.in_single = in_single
679
+ result
680
+ end
681
+
682
+ def new_super args
683
+ if args && args.node_type == :block_pass then
684
+ s(:super, args)
685
+ else
686
+ args ||= s(:arglist)
687
+ s(:super, *args[1..-1])
688
+ end
689
+ end
690
+
691
+ def new_undef n, m = nil
692
+ if m then
693
+ block_append(n, s(:undef, m))
694
+ else
695
+ s(:undef, n)
696
+ end
697
+ end
698
+
699
+ def new_until block, expr, pre
700
+ new_until_or_while :until, block, expr, pre
701
+ end
702
+
703
+ def new_until_or_while type, block, expr, pre
704
+ other = type == :until ? :while : :until
705
+ line = [block && block.line, expr.line].compact.min
706
+ block, pre = block.last, false if block && block[0] == :begin
707
+
708
+ expr = cond expr
709
+
710
+ result = unless expr.first == :not and canonicalize_conditions then
711
+ s(type, expr, block, pre)
712
+ else
713
+ s(other, expr.last, block, pre)
714
+ end
715
+
716
+ result.line = line
717
+ result
718
+ end
719
+
720
+ def new_when cond, body
721
+ s(:when, cond, body)
722
+ end
723
+
724
+ def new_while block, expr, pre
725
+ new_until_or_while :while, block, expr, pre
726
+ end
727
+
728
+ def new_xstring str
729
+ if str then
730
+ case str[0]
731
+ when :str
732
+ str[0] = :xstr
733
+ when :dstr
734
+ str[0] = :dxstr
735
+ else
736
+ str = s(:dxstr, '', str)
737
+ end
738
+ str
739
+ else
740
+ s(:xstr, '')
741
+ end
742
+ end
743
+
744
+ def new_yield args = nil
745
+ # TODO: raise args.inspect unless [:arglist].include? args.first # HACK
746
+ raise "write a test 4" if args && args.node_type == :block_pass
747
+ raise SyntaxError, "Block argument should not be given." if
748
+ args && args.node_type == :block_pass
749
+
750
+ args ||= s(:arglist)
751
+
752
+ # TODO: I can prolly clean this up
753
+ args[0] = :arglist if args.first == :array
754
+ args = s(:arglist, args) unless args.first == :arglist
755
+
756
+ return s(:yield, *args[1..-1])
757
+ end
758
+
759
+ def next_token
760
+ if defined?(MiniTest)
761
+ lexer.advance
762
+ else
763
+ lexer.advance_and_decorate
764
+ end
765
+ end
766
+
767
+ def node_assign(lhs, rhs) # TODO: rename new_assign
768
+ return nil unless lhs
769
+
770
+ rhs = value_expr rhs
771
+
772
+ case lhs[0]
773
+ when :gasgn, :iasgn, :lasgn, :masgn, :cdecl, :cvdecl, :cvasgn then
774
+ lhs << rhs
775
+ when :attrasgn, :call then
776
+ args = lhs.pop unless Symbol === lhs.last
777
+ lhs.concat arg_add(args, rhs)[1..-1]
778
+ when :const then
779
+ lhs[0] = :cdecl
780
+ lhs << rhs
781
+ else
782
+ raise "unknown lhs #{lhs.inspect}"
783
+ end
784
+
785
+ lhs
786
+ end
787
+
788
+ ##
789
+ # Returns a UTF-8 encoded string after processing BOMs and magic
790
+ # encoding comments.
791
+ #
792
+ # Holy crap... ok. Here goes:
793
+ #
794
+ # Ruby's file handling and encoding support is insane. We need to be
795
+ # able to lex a file. The lexer file is explicitly UTF-8 to make
796
+ # things cleaner. This allows us to deal with extended chars in
797
+ # class and method names. In order to do this, we need to encode all
798
+ # input source files as UTF-8. First, we look for a UTF-8 BOM by
799
+ # looking at the first line while forcing its encoding to
800
+ # ASCII-8BIT. If we find a BOM, we strip it and set the expected
801
+ # encoding to UTF-8. Then, we search for a magic encoding comment.
802
+ # If found, it overrides the BOM. Finally, we force the encoding of
803
+ # the input string to whatever was found, and then encode that to
804
+ # UTF-8 for compatibility with the lexer.
805
+
806
+ def handle_encoding str
807
+ str = str.dup
808
+ ruby19 = str.respond_to? :encoding
809
+ encoding = nil
810
+
811
+ header = str.lines.first(2)
812
+ header.map! { |s| s.force_encoding "ASCII-8BIT" } if ruby19
813
+
814
+ first = header.first || ""
815
+ encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
816
+
817
+ encoding = $1.strip if header.find { |s|
818
+ s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
819
+ s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
820
+ }
821
+
822
+ if encoding then
823
+ if ruby19 then
824
+ encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
825
+ hack_encoding str, encoding
826
+ else
827
+ warn "Skipping magic encoding comment"
828
+ end
829
+ else
830
+ # nothing specified... ugh. try to encode as utf-8
831
+ hack_encoding str if ruby19
832
+ end
833
+
834
+ str
835
+ end
836
+
837
+ def hack_encoding str, extra = nil
838
+ # this is in sorted order of occurrence according to
839
+ # charlock_holmes against 500k files
840
+ encodings = [
841
+ extra,
842
+ Encoding::ISO_8859_1,
843
+ Encoding::UTF_8,
844
+ Encoding::ISO_8859_2,
845
+ Encoding::ISO_8859_9,
846
+ Encoding::SHIFT_JIS,
847
+ Encoding::WINDOWS_1252,
848
+ Encoding::EUC_JP,
849
+ ].compact
850
+
851
+ # terrible, horrible, no good, very bad, last ditch effort.
852
+ encodings.each do |enc|
853
+ begin
854
+ str.force_encoding enc
855
+ if str.valid_encoding? then
856
+ str.encode! Encoding::UTF_8
857
+ break
858
+ end
859
+ rescue Encoding::InvalidByteSequenceError
860
+ # do nothing
861
+ rescue Encoding::UndefinedConversionError
862
+ # do nothing
863
+ end
864
+ end
865
+
866
+ # no amount of pain is enough for you.
867
+ raise "Bad encoding. Need a magic encoding comment." unless
868
+ str.encoding.name == "UTF-8"
869
+ end
870
+
871
+ ##
872
+ # Parse +str+ at path +file+ and return a sexp. Raises
873
+ # Timeout::Error if it runs for more than +time+ seconds.
874
+
875
+ def process(str, file = "(string)", time = 10)
876
+ Timeout.timeout time do
877
+ raise "bad val: #{str.inspect}" unless String === str
878
+
879
+ str = handle_encoding str
880
+
881
+ self.file = file.dup
882
+ self.lexer.reset
883
+ self.lexer.source = str
884
+
885
+ @yydebug = ENV.has_key? 'DEBUG'
886
+
887
+ do_parse
888
+ end
889
+ end
890
+
891
+ alias :parse :process
892
+
893
+ def remove_begin node
894
+ oldnode = node
895
+ if node and :begin == node[0] and node.size == 2 then
896
+ node = node[-1]
897
+ node.line = oldnode.line
898
+ end
899
+ node
900
+ end
901
+
902
+ def reset
903
+ lexer.reset
904
+ self.in_def = false
905
+ self.in_single = 0
906
+ self.env.reset
907
+ self.comments.clear
908
+ end
909
+
910
+ def block_dup_check call_or_args, block
911
+ syntax_error "Both block arg and actual block given." if
912
+ block and call_or_args.block_pass?
913
+ end
914
+
915
+ def ret_args node
916
+ if node then
917
+ raise "write a test 5" if node[0] == :block_pass
918
+
919
+ raise SyntaxError, "block argument should not be given" if
920
+ node[0] == :block_pass
921
+
922
+ node = node.last if node[0] == :array && node.size == 2
923
+ # HACK matz wraps ONE of the FOUR splats in a newline to
924
+ # distinguish. I use paren for now. ugh
925
+ node = s(:svalue, node) if node[0] == :splat and not node.paren
926
+ node[0] = :svalue if node[0] == :arglist && node[1][0] == :splat
927
+ end
928
+
929
+ node
930
+ end
931
+
932
+ def s(*args)
933
+ result = Sexp.new(*args)
934
+ result.line ||= lexer.lineno if lexer.source
935
+ result.file = self.file
936
+ result
937
+ end
938
+
939
+ def value_expr oldnode # HACK
940
+ node = remove_begin oldnode
941
+ node.line = oldnode.line if oldnode
942
+ node[2] = value_expr(node[2]) if node and node[0] == :if
943
+ node
944
+ end
945
+
946
+ def void_stmts node
947
+ return nil unless node
948
+ return node unless node[0] == :block
949
+
950
+ node[1..-1] = node[1..-1].map { |n| remove_begin(n) }
951
+ node
952
+ end
953
+
954
+ def warning s
955
+ # do nothing for now
956
+ end
957
+
958
+ alias yyerror syntax_error
959
+
960
+ def on_error(et, ev, values)
961
+ super
962
+ rescue Racc::ParseError => e
963
+ # I don't like how the exception obscures the error message
964
+ e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
965
+ warn e.message if $DEBUG
966
+ raise
967
+ end
968
+
969
+ class Environment
970
+ attr_reader :env, :dyn
971
+
972
+ def [] k
973
+ self.all[k]
974
+ end
975
+
976
+ def []= k, v
977
+ raise "no" if v == true
978
+ self.current[k] = v
979
+ end
980
+
981
+ def all
982
+ idx = @dyn.index(false) || 0
983
+ @env[0..idx].reverse.inject { |env, scope| env.merge scope }
984
+ end
985
+
986
+ def current
987
+ @env.first
988
+ end
989
+
990
+ def extend dyn = false
991
+ @dyn.unshift dyn
992
+ @env.unshift({})
993
+ end
994
+
995
+ def initialize dyn = false
996
+ @dyn = []
997
+ @env = []
998
+ self.reset
999
+ end
1000
+
1001
+ def reset
1002
+ @dyn.clear
1003
+ @env.clear
1004
+ self.extend
1005
+ end
1006
+
1007
+ def unextend
1008
+ @dyn.shift
1009
+ @env.shift
1010
+ raise "You went too far unextending env" if @env.empty?
1011
+ end
1012
+ end
1013
+
1014
+ class StackState
1015
+ attr_reader :name
1016
+ attr_reader :stack
1017
+ attr_accessor :debug
1018
+
1019
+ def initialize(name)
1020
+ @name = name
1021
+ @stack = [false]
1022
+ @debug = false
1023
+ end
1024
+
1025
+ def inspect
1026
+ "StackState(#{@name}, #{@stack.inspect})"
1027
+ end
1028
+
1029
+ def is_in_state
1030
+ p :stack_is_in_state => [name, @stack.last, caller.first] if debug
1031
+ @stack.last
1032
+ end
1033
+
1034
+ def lexpop
1035
+ p :stack_lexpop => caller.first if debug
1036
+ raise if @stack.size == 0
1037
+ a = @stack.pop
1038
+ b = @stack.pop
1039
+ @stack.push(a || b)
1040
+ end
1041
+
1042
+ def pop
1043
+ r = @stack.pop
1044
+ p :stack_pop => [name, r, @stack, caller.first] if debug
1045
+ @stack.push false if @stack.size == 0
1046
+ r
1047
+ end
1048
+
1049
+ def push val
1050
+ @stack.push val
1051
+ p :stack_push => [name, @stack, caller.first] if debug
1052
+ nil
1053
+ end
1054
+ end
1055
+ end
1056
+
1057
+ class Ruby19Parser < Racc::Parser
1058
+ include RubyParserStuff
1059
+
1060
+ def self.do(what)
1061
+ p new.process(what)
1062
+ end
1063
+ end
1064
+
1065
+ class Ruby18Parser < Racc::Parser
1066
+ include RubyParserStuff
1067
+
1068
+ def self.do(what)
1069
+ p new.process(what)
1070
+ end
1071
+ end
1072
+
1073
+ ##
1074
+ # RubyParser is a compound parser that first attempts to parse using
1075
+ # the 1.9 syntax parser and falls back to the 1.8 syntax parser on a
1076
+ # parse error.
1077
+
1078
+ class RubyParser
1079
+ class SyntaxError < RuntimeError; end
1080
+
1081
+ def initialize
1082
+ @p18 = Ruby18Parser.new
1083
+ @p19 = Ruby19Parser.new
1084
+ end
1085
+
1086
+ def process(s, f = "(string)", t = 10) # parens for emacs *sigh*
1087
+ @p19.process s, f, t
1088
+ rescue Racc::ParseError
1089
+ @p18.process s, f, t
1090
+ end
1091
+
1092
+ alias :parse :process
1093
+
1094
+ def reset
1095
+ @p18.reset
1096
+ @p19.reset
1097
+ end
1098
+
1099
+ def self.for_current_ruby
1100
+ case RUBY_VERSION
1101
+ when /^1\.8/ then
1102
+ Ruby18Parser.new
1103
+ when /^1\.9/ then
1104
+ Ruby19Parser.new
1105
+ else
1106
+ raise "unrecognized RUBY_VERSION #{RUBY_VERSION}"
1107
+ end
1108
+ end
1109
+ end
1110
+
1111
+ ############################################################
1112
+ # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
1113
+
1114
+ class Sexp
1115
+ attr_writer :paren
1116
+
1117
+ def paren
1118
+ @paren ||= false
1119
+ end
1120
+
1121
+ def value
1122
+ raise "multi item sexp" if size > 2
1123
+ last
1124
+ end
1125
+
1126
+ def to_sym
1127
+ raise "no"
1128
+ self.value.to_sym
1129
+ end
1130
+
1131
+ def add x
1132
+ concat x
1133
+ end
1134
+
1135
+ def add_all x
1136
+ raise "no: #{self.inspect}.add_all #{x.inspect}" # TODO: need a test to trigger this
1137
+ end
1138
+
1139
+ def block_pass?
1140
+ any? { |s| Sexp === s && s[0] == :block_pass }
1141
+ end
1142
+
1143
+ alias :node_type :sexp_type
1144
+ alias :values :sexp_body # TODO: retire
1145
+ end
1146
+
1147
+ # END HACK
1148
+ ############################################################