parser 0.9.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+
2
+ require 'ruby18_parser'
3
+ require 'ruby19_parser'
4
+ require 'ruby_parser_extras'
@@ -0,0 +1,1148 @@
1
+ # encoding: ASCII-8BIT
2
+
3
+ require 'racc/parser'
4
+ require 'ruby_lexer'
5
+ require 'sexp'
6
+ require 'timeout'
7
+
8
+ # WHY do I have to do this?!?
9
+ class Regexp
10
+ ONCE = 0 unless defined? ONCE # FIX: remove this - it makes no sense
11
+
12
+ unless defined? ENC_NONE then
13
+ ENC_NONE = /x/n.options
14
+ ENC_EUC = /x/e.options
15
+ ENC_SJIS = /x/s.options
16
+ ENC_UTF8 = /x/u.options
17
+ end
18
+ end
19
+
20
+ # I hate ruby 1.9 string changes
21
+ class Fixnum
22
+ def ord
23
+ self
24
+ end
25
+ end unless "a"[0] == "a"
26
+
27
+ module RubyParserStuff
28
+ VERSION = '0.9.alpha'
29
+
30
+ attr_accessor :lexer, :in_def, :in_single, :file
31
+ attr_reader :env, :comments
32
+
33
+ def syntax_error msg
34
+ raise RubyParser::SyntaxError, msg
35
+ end
36
+
37
+ def arg_add(node1, node2) # TODO: nuke
38
+ return s(:arglist, node2) unless node1
39
+
40
+ node1[0] = :arglist if node1[0] == :array
41
+ return node1 << node2 if node1[0] == :arglist
42
+
43
+ return s(:arglist, node1, node2)
44
+ end
45
+
46
+ def arg_blk_pass node1, node2 # TODO: nuke
47
+ node1 = s(:arglist, node1) unless [:arglist, :array].include? node1.first
48
+ node1 << node2 if node2
49
+ node1
50
+ end
51
+
52
+ def arg_concat node1, node2 # TODO: nuke
53
+ raise "huh" unless node2
54
+ node1 << s(:splat, node2).compact
55
+ node1
56
+ end
57
+
58
+ def clean_mlhs sexp
59
+ case sexp.sexp_type
60
+ when :masgn then
61
+ if sexp.size == 2 and sexp[1].sexp_type == :array then
62
+ s(:masgn, *sexp[1][1..-1].map { |sub| clean_mlhs sub })
63
+ else
64
+ sexp
65
+ end
66
+ when :gasgn, :iasgn, :lasgn, :cvasgn then
67
+ if sexp.size == 2 then
68
+ sexp.last
69
+ else
70
+ sexp # optional value
71
+ end
72
+ else
73
+ raise "unsupported type: #{sexp.inspect}"
74
+ end
75
+ end
76
+
77
+ def block_var *args
78
+ result = self.args args
79
+ result[0] = :masgn
80
+ result
81
+ end
82
+
83
+ def block_var18 ary, splat, block
84
+ ary ||= s(:array)
85
+
86
+ if splat then
87
+ splat = splat[1] unless Symbol === splat
88
+ ary << "*#{splat}".to_sym
89
+ end
90
+
91
+ ary << "&#{block[1]}".to_sym if block
92
+
93
+ if ary.length > 2 or ary.splat then # HACK
94
+ s(:masgn, *ary[1..-1])
95
+ else
96
+ ary.last
97
+ end
98
+ end
99
+
100
+ def args args
101
+ result = s(:args)
102
+
103
+ args.each do |arg|
104
+ case arg
105
+ when Sexp then
106
+ case arg.sexp_type
107
+ when :args, :block, :array then
108
+ result.concat arg[1..-1]
109
+ when :block_arg then
110
+ result << :"&#{arg.last}"
111
+ when :masgn then
112
+ result << arg
113
+ else
114
+ raise "unhandled: #{arg.inspect}"
115
+ end
116
+ when Symbol then
117
+ result << arg
118
+ when ",", nil then
119
+ # ignore
120
+ else
121
+ raise "unhandled: #{arg.inspect}"
122
+ end
123
+ end
124
+
125
+ result
126
+ end
127
+
128
+ def aryset receiver, index
129
+ index ||= []
130
+ s(:attrasgn, receiver, :"[]=", *index[1..-1])
131
+ end
132
+
133
+ def assignable(lhs, value = nil)
134
+ id = lhs.to_sym
135
+ id = id.to_sym if Sexp === id
136
+
137
+ raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
138
+
139
+ raise SyntaxError, "Can't change the value of #{id}" if
140
+ id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
141
+
142
+ result = case id.to_s
143
+ when /^@@/ then
144
+ asgn = in_def || in_single > 0
145
+ s((asgn ? :cvasgn : :cvdecl), id)
146
+ when /^@/ then
147
+ s(:iasgn, id)
148
+ when /^\$/ then
149
+ s(:gasgn, id)
150
+ when /^[A-Z]/ then
151
+ s(:cdecl, id)
152
+ else
153
+ case self.env[id]
154
+ when :lvar, :dvar, nil then
155
+ s(:lasgn, id)
156
+ else
157
+ raise "wtf? unknown type: #{self.env[id]}"
158
+ end
159
+ end
160
+
161
+ self.env[id] ||= :lvar
162
+
163
+ result << value if value
164
+
165
+ return result
166
+ end
167
+
168
+ def block_append(head, tail)
169
+ return head if tail.nil?
170
+ return tail if head.nil?
171
+
172
+ case head[0]
173
+ when :lit, :str then
174
+ return tail
175
+ end
176
+
177
+ line = [head.line, tail.line].compact.min
178
+
179
+ head = remove_begin(head)
180
+ head = s(:block, head) unless head.node_type == :block
181
+
182
+ head.line = line
183
+ head << tail
184
+ end
185
+
186
+ def cond node
187
+ return nil if node.nil?
188
+ node = value_expr node
189
+
190
+ case node.first
191
+ when :lit then
192
+ if Regexp === node.last then
193
+ return s(:match, node)
194
+ else
195
+ return node
196
+ end
197
+ when :and then
198
+ return s(:and, cond(node[1]), cond(node[2]))
199
+ when :or then
200
+ return s(:or, cond(node[1]), cond(node[2]))
201
+ when :dot2 then
202
+ label = "flip#{node.hash}"
203
+ env[label] = :lvar
204
+ return s(:flip2, node[1], node[2])
205
+ when :dot3 then
206
+ label = "flip#{node.hash}"
207
+ env[label] = :lvar
208
+ return s(:flip3, node[1], node[2])
209
+ else
210
+ return node
211
+ end
212
+ end
213
+
214
+ ##
215
+ # for pure ruby systems only
216
+
217
+ def do_parse
218
+ _racc_do_parse_rb(_racc_setup, false)
219
+ end if ENV['PURE_RUBY']
220
+
221
+ def get_match_node lhs, rhs # TODO: rename to new_match
222
+ if lhs then
223
+ case lhs[0]
224
+ when :dregx, :dregx_once then
225
+ return s(:match2, lhs, rhs).line(lhs.line)
226
+ when :lit then
227
+ return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
228
+ end
229
+ end
230
+
231
+ if rhs then
232
+ case rhs[0]
233
+ when :dregx, :dregx_once then
234
+ return s(:match3, rhs, lhs).line(lhs.line)
235
+ when :lit then
236
+ return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
237
+ end
238
+ end
239
+
240
+ return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
241
+ end
242
+
243
+ def gettable(id)
244
+ id = id.to_sym if String === id
245
+
246
+ result = case id.to_s
247
+ when /^@@/ then
248
+ s(:cvar, id)
249
+ when /^@/ then
250
+ s(:ivar, id)
251
+ when /^\$/ then
252
+ s(:gvar, id)
253
+ when /^[A-Z]/ then
254
+ s(:const, id)
255
+ else
256
+ type = env[id]
257
+ if type then
258
+ s(type, id)
259
+ else
260
+ new_call(nil, id)
261
+ end
262
+ end
263
+
264
+ raise "identifier #{id.inspect} is not valid" unless result
265
+
266
+ result
267
+ end
268
+
269
+ ##
270
+ # Canonicalize conditionals. Eg:
271
+ #
272
+ # not x ? a : b
273
+ #
274
+ # becomes:
275
+ #
276
+ # x ? b : a
277
+
278
+ attr_accessor :canonicalize_conditions
279
+
280
+ def initialize(options = {})
281
+ super()
282
+
283
+ v = self.class.name[/1[89]/]
284
+ self.lexer = RubyLexer.new v && v.to_i
285
+
286
+ @env = RubyParserStuff::Environment.new
287
+ @comments = []
288
+
289
+ self.lexer.static_env = @env
290
+
291
+ @canonicalize_conditions = true
292
+
293
+ self.reset
294
+ end
295
+
296
+ def list_append list, item # TODO: nuke me *sigh*
297
+ return s(:array, item) unless list
298
+ list = s(:array, list) unless Sexp === list && list.first == :array
299
+ list << item
300
+ end
301
+
302
+ def list_prepend item, list # TODO: nuke me *sigh*
303
+ list = s(:array, list) unless Sexp === list && list[0] == :array
304
+ list.insert 1, item
305
+ list
306
+ end
307
+
308
+ def literal_concat head, tail
309
+ return tail unless head
310
+ return head unless tail
311
+
312
+ htype, ttype = head[0], tail[0]
313
+
314
+ head = s(:dstr, '', head) if htype == :evstr
315
+
316
+ case ttype
317
+ when :str then
318
+ if htype == :str
319
+ head[-1] << tail[-1]
320
+ elsif htype == :dstr and head.size == 2 then
321
+ head[-1] << tail[-1]
322
+ else
323
+ head << tail
324
+ end
325
+ when :dstr then
326
+ if htype == :str then
327
+ tail[1] = head[-1] + tail[1]
328
+ head = tail
329
+ else
330
+ tail[0] = :array
331
+ tail[1] = s(:str, tail[1])
332
+ tail.delete_at 1 if tail[1] == s(:str, '')
333
+
334
+ head.push(*tail[1..-1])
335
+ end
336
+ when :evstr then
337
+ head[0] = :dstr if htype == :str
338
+ if head.size == 2 and tail.size > 1 and tail[1][0] == :str then
339
+ head[-1] << tail[1][-1]
340
+ head[0] = :str if head.size == 2 # HACK ?
341
+ else
342
+ head.push(tail)
343
+ end
344
+ else
345
+ x = [head, tail]
346
+ raise "unknown type: #{x.inspect}"
347
+ end
348
+
349
+ return head
350
+ end
351
+
352
+ def logop(type, left, right) # TODO: rename logical_op
353
+ left = value_expr left
354
+
355
+ if left and left[0] == type and not left.paren then
356
+ node, second = left, nil
357
+
358
+ while (second = node[2]) && second[0] == type and not second.paren do
359
+ node = second
360
+ end
361
+
362
+ node[2] = s(type, second, right)
363
+
364
+ return left
365
+ end
366
+
367
+ return s(type, left, right)
368
+ end
369
+
370
+ def new_aref val
371
+ val[2] ||= s(:arglist)
372
+ val[2][0] = :arglist if val[2][0] == :array # REFACTOR
373
+ if val[0].node_type == :self then
374
+ result = new_call nil, :"[]", val[2]
375
+ else
376
+ result = new_call val[0], :"[]", val[2]
377
+ end
378
+ result
379
+ end
380
+
381
+ def new_body val
382
+ result = val[0]
383
+
384
+ if val[1] then
385
+ result = s(:rescue)
386
+ result << val[0] if val[0]
387
+
388
+ resbody = val[1]
389
+
390
+ while resbody do
391
+ result << resbody
392
+ resbody = resbody.resbody(true)
393
+ end
394
+
395
+ result << val[2] if val[2]
396
+
397
+ result.line = (val[0] || val[1]).line
398
+ elsif not val[2].nil? then
399
+ warning("else without rescue is useless")
400
+ result = block_append(result, val[2])
401
+ end
402
+
403
+ result = s(:ensure, result, val[3]).compact if val[3]
404
+ return result
405
+ end
406
+
407
+ def argl x
408
+ x = s(:arglist, x) if x and x[0] != :arglist
409
+ x
410
+ end
411
+
412
+ def backref_assign_error ref
413
+ # TODO: need a test for this... obviously
414
+ case ref.first
415
+ when :nth_ref then
416
+ raise "write a test 2"
417
+ raise SyntaxError, "Can't set variable %p" % ref.last
418
+ when :back_ref then
419
+ raise "write a test 3"
420
+ raise SyntaxError, "Can't set back reference %p" % ref.last
421
+ else
422
+ raise "Unknown backref type: #{ref.inspect}"
423
+ end
424
+ end
425
+
426
+ def new_call recv, meth, args = nil
427
+ result = s(:call, recv, meth)
428
+
429
+ # TODO: need a test with f(&b) to produce block_pass
430
+ # TODO: need a test with f(&b) { } to produce warning
431
+
432
+ args ||= s(:arglist)
433
+ args[0] = :arglist if args.first == :array
434
+ args = s(:arglist, args) unless args.first == :arglist
435
+
436
+ # HACK quick hack to make this work quickly... easy to clean up above
437
+ result.concat args[1..-1]
438
+
439
+ line = result.grep(Sexp).map(&:line).compact.min
440
+ result.line = line if line
441
+
442
+ result
443
+ end
444
+
445
+ def new_case expr, body
446
+ result = s(:case, expr)
447
+ line = (expr || body).line
448
+
449
+ while body and body.node_type == :when
450
+ result << body
451
+ body = body.delete_at 3
452
+ end
453
+
454
+ result[2..-1].each do |node|
455
+ block = node.block(:delete)
456
+ node.concat block[1..-1] if block
457
+ end
458
+
459
+ # else
460
+ body = nil if body == s(:block)
461
+ result << body
462
+
463
+ result.line = line
464
+ result
465
+ end
466
+
467
+ def new_class val
468
+ line, path, superclass, body = val[1], val[2], val[3], val[5]
469
+
470
+ result = s(:class, path, superclass)
471
+
472
+ if body then
473
+ if body.first == :block then
474
+ result.push(*body[1..-1])
475
+ else
476
+ result.push body
477
+ end
478
+ end
479
+
480
+ result.line = line
481
+ result.comments = self.comments.pop
482
+ result
483
+ end
484
+
485
+ def new_compstmt val
486
+ result = void_stmts(val.grep(Sexp)[0])
487
+ result = remove_begin(result) if result
488
+ result
489
+ end
490
+
491
+ def new_defn val
492
+ name, args, body = val[1], val[3], val[4]
493
+ body ||= s(:nil)
494
+
495
+ result = s(:defn, name.to_sym, args)
496
+
497
+ if body then
498
+ if body.first == :block then
499
+ result.push(*body[1..-1])
500
+ else
501
+ result.push body
502
+ end
503
+ end
504
+
505
+ result.comments = self.comments.pop
506
+ result
507
+ end
508
+
509
+ def new_defs val
510
+ recv, name, args, body = val[1], val[4], val[6], val[7]
511
+
512
+ result = s(:defs, recv, name.to_sym, args)
513
+
514
+ if body then
515
+ if body.first == :block then
516
+ result.push(*body[1..-1])
517
+ else
518
+ result.push body
519
+ end
520
+ end
521
+
522
+ result.line = recv.line
523
+ result.comments = self.comments.pop
524
+ result
525
+ end
526
+
527
+ def new_for expr, var, body
528
+ result = s(:for, expr, var).line(var.line)
529
+ result << body if body
530
+ result
531
+ end
532
+
533
+ def new_if c, t, f
534
+ l = [c.line, t && t.line, f && f.line].compact.min
535
+ c = cond c
536
+ c, t, f = c.last, f, t if c[0] == :not and canonicalize_conditions
537
+ s(:if, c, t, f).line(l)
538
+ end
539
+
540
+ def new_iter call, args, body
541
+ body ||= nil
542
+
543
+ args ||= s(:args)
544
+ args = s(:args, args) if Symbol === args
545
+
546
+ result = s(:iter)
547
+ result << call if call
548
+ result << args
549
+ result << body if body
550
+
551
+ args[0] = :args unless args == 0
552
+
553
+ result
554
+ end
555
+
556
+ def new_masgn lhs, rhs, wrap = false
557
+ rhs = value_expr(rhs)
558
+ rhs = lhs[1] ? s(:to_ary, rhs) : s(:array, rhs) if wrap
559
+
560
+ lhs.delete_at 1 if lhs[1].nil?
561
+ lhs << rhs
562
+
563
+ lhs
564
+ end
565
+
566
+ def new_module val
567
+ line, path, body = val[1], val[2], val[4]
568
+
569
+ result = s(:module, path)
570
+
571
+ if body then # REFACTOR?
572
+ if body.first == :block then
573
+ result.push(*body[1..-1])
574
+ else
575
+ result.push body
576
+ end
577
+ end
578
+
579
+ result.line = line
580
+ result.comments = self.comments.pop
581
+ result
582
+ end
583
+
584
+ def new_op_asgn val
585
+ lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
586
+ name = lhs.value
587
+ arg = remove_begin(arg)
588
+ result = case asgn_op # REFACTOR
589
+ when :"||" then
590
+ lhs << arg
591
+ s(:op_asgn_or, self.gettable(name), lhs)
592
+ when :"&&" then
593
+ lhs << arg
594
+ s(:op_asgn_and, self.gettable(name), lhs)
595
+ else
596
+ # TODO: why [2] ?
597
+ lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
598
+ lhs
599
+ end
600
+ result.line = lhs.line
601
+ result
602
+ end
603
+
604
+ def new_regexp val
605
+ node = val[1] || s(:str, '')
606
+ options = val[3]
607
+
608
+ o, k = 0, nil
609
+ options.split(//).uniq.each do |c| # FIX: this has a better home
610
+ v = {
611
+ 'x' => Regexp::EXTENDED,
612
+ 'i' => Regexp::IGNORECASE,
613
+ 'm' => Regexp::MULTILINE,
614
+ 'o' => Regexp::ONCE,
615
+ 'n' => Regexp::ENC_NONE,
616
+ 'e' => Regexp::ENC_EUC,
617
+ 's' => Regexp::ENC_SJIS,
618
+ 'u' => Regexp::ENC_UTF8,
619
+ }[c]
620
+ raise "unknown regexp option: #{c}" unless v
621
+ o += v
622
+ k = c if c =~ /[esu]/
623
+ end
624
+
625
+ case node[0]
626
+ when :str then
627
+ node[0] = :lit
628
+ node[1] = if k then
629
+ Regexp.new(node[1], o, k)
630
+ else
631
+ begin
632
+ Regexp.new(node[1], o)
633
+ rescue RegexpError => e
634
+ warn "Ignoring: #{e.message}"
635
+ Regexp.new(node[1], Regexp::ENC_NONE)
636
+ end
637
+ end
638
+ when :dstr then
639
+ if options =~ /o/ then
640
+ node[0] = :dregx_once
641
+ else
642
+ node[0] = :dregx
643
+ end
644
+ node << o if o and o != 0
645
+ else
646
+ node = s(:dregx, '', node);
647
+ node[0] = :dregx_once if options =~ /o/
648
+ node << o if o and o != 0
649
+ end
650
+
651
+ node
652
+ end
653
+
654
+ def new_resbody cond, body
655
+ if body && body.first == :block then
656
+ body.shift # remove block and splat it in directly
657
+ else
658
+ body = [body]
659
+ end
660
+ s(:resbody, cond, *body)
661
+ end
662
+
663
+ def new_sclass val
664
+ recv, in_def, in_single, body = val[3], val[4], val[6], val[7]
665
+
666
+ result = s(:sclass, recv)
667
+
668
+ if body then
669
+ if body.first == :block then
670
+ result.push(*body[1..-1])
671
+ else
672
+ result.push body
673
+ end
674
+ end
675
+
676
+ result.line = val[2]
677
+ self.in_def = in_def
678
+ self.in_single = in_single
679
+ result
680
+ end
681
+
682
+ def new_super args
683
+ if args && args.node_type == :block_pass then
684
+ s(:super, args)
685
+ else
686
+ args ||= s(:arglist)
687
+ s(:super, *args[1..-1])
688
+ end
689
+ end
690
+
691
+ def new_undef n, m = nil
692
+ if m then
693
+ block_append(n, s(:undef, m))
694
+ else
695
+ s(:undef, n)
696
+ end
697
+ end
698
+
699
+ def new_until block, expr, pre
700
+ new_until_or_while :until, block, expr, pre
701
+ end
702
+
703
+ def new_until_or_while type, block, expr, pre
704
+ other = type == :until ? :while : :until
705
+ line = [block && block.line, expr.line].compact.min
706
+ block, pre = block.last, false if block && block[0] == :begin
707
+
708
+ expr = cond expr
709
+
710
+ result = unless expr.first == :not and canonicalize_conditions then
711
+ s(type, expr, block, pre)
712
+ else
713
+ s(other, expr.last, block, pre)
714
+ end
715
+
716
+ result.line = line
717
+ result
718
+ end
719
+
720
+ def new_when cond, body
721
+ s(:when, cond, body)
722
+ end
723
+
724
+ def new_while block, expr, pre
725
+ new_until_or_while :while, block, expr, pre
726
+ end
727
+
728
+ def new_xstring str
729
+ if str then
730
+ case str[0]
731
+ when :str
732
+ str[0] = :xstr
733
+ when :dstr
734
+ str[0] = :dxstr
735
+ else
736
+ str = s(:dxstr, '', str)
737
+ end
738
+ str
739
+ else
740
+ s(:xstr, '')
741
+ end
742
+ end
743
+
744
+ def new_yield args = nil
745
+ # TODO: raise args.inspect unless [:arglist].include? args.first # HACK
746
+ raise "write a test 4" if args && args.node_type == :block_pass
747
+ raise SyntaxError, "Block argument should not be given." if
748
+ args && args.node_type == :block_pass
749
+
750
+ args ||= s(:arglist)
751
+
752
+ # TODO: I can prolly clean this up
753
+ args[0] = :arglist if args.first == :array
754
+ args = s(:arglist, args) unless args.first == :arglist
755
+
756
+ return s(:yield, *args[1..-1])
757
+ end
758
+
759
+ def next_token
760
+ if defined?(MiniTest)
761
+ lexer.advance
762
+ else
763
+ lexer.advance_and_decorate
764
+ end
765
+ end
766
+
767
+ def node_assign(lhs, rhs) # TODO: rename new_assign
768
+ return nil unless lhs
769
+
770
+ rhs = value_expr rhs
771
+
772
+ case lhs[0]
773
+ when :gasgn, :iasgn, :lasgn, :masgn, :cdecl, :cvdecl, :cvasgn then
774
+ lhs << rhs
775
+ when :attrasgn, :call then
776
+ args = lhs.pop unless Symbol === lhs.last
777
+ lhs.concat arg_add(args, rhs)[1..-1]
778
+ when :const then
779
+ lhs[0] = :cdecl
780
+ lhs << rhs
781
+ else
782
+ raise "unknown lhs #{lhs.inspect}"
783
+ end
784
+
785
+ lhs
786
+ end
787
+
788
+ ##
789
+ # Returns a UTF-8 encoded string after processing BOMs and magic
790
+ # encoding comments.
791
+ #
792
+ # Holy crap... ok. Here goes:
793
+ #
794
+ # Ruby's file handling and encoding support is insane. We need to be
795
+ # able to lex a file. The lexer file is explicitly UTF-8 to make
796
+ # things cleaner. This allows us to deal with extended chars in
797
+ # class and method names. In order to do this, we need to encode all
798
+ # input source files as UTF-8. First, we look for a UTF-8 BOM by
799
+ # looking at the first line while forcing its encoding to
800
+ # ASCII-8BIT. If we find a BOM, we strip it and set the expected
801
+ # encoding to UTF-8. Then, we search for a magic encoding comment.
802
+ # If found, it overrides the BOM. Finally, we force the encoding of
803
+ # the input string to whatever was found, and then encode that to
804
+ # UTF-8 for compatibility with the lexer.
805
+
806
+ def handle_encoding str
807
+ str = str.dup
808
+ ruby19 = str.respond_to? :encoding
809
+ encoding = nil
810
+
811
+ header = str.lines.first(2)
812
+ header.map! { |s| s.force_encoding "ASCII-8BIT" } if ruby19
813
+
814
+ first = header.first || ""
815
+ encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
816
+
817
+ encoding = $1.strip if header.find { |s|
818
+ s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
819
+ s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
820
+ }
821
+
822
+ if encoding then
823
+ if ruby19 then
824
+ encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
825
+ hack_encoding str, encoding
826
+ else
827
+ warn "Skipping magic encoding comment"
828
+ end
829
+ else
830
+ # nothing specified... ugh. try to encode as utf-8
831
+ hack_encoding str if ruby19
832
+ end
833
+
834
+ str
835
+ end
836
+
837
+ def hack_encoding str, extra = nil
838
+ # this is in sorted order of occurrence according to
839
+ # charlock_holmes against 500k files
840
+ encodings = [
841
+ extra,
842
+ Encoding::ISO_8859_1,
843
+ Encoding::UTF_8,
844
+ Encoding::ISO_8859_2,
845
+ Encoding::ISO_8859_9,
846
+ Encoding::SHIFT_JIS,
847
+ Encoding::WINDOWS_1252,
848
+ Encoding::EUC_JP,
849
+ ].compact
850
+
851
+ # terrible, horrible, no good, very bad, last ditch effort.
852
+ encodings.each do |enc|
853
+ begin
854
+ str.force_encoding enc
855
+ if str.valid_encoding? then
856
+ str.encode! Encoding::UTF_8
857
+ break
858
+ end
859
+ rescue Encoding::InvalidByteSequenceError
860
+ # do nothing
861
+ rescue Encoding::UndefinedConversionError
862
+ # do nothing
863
+ end
864
+ end
865
+
866
+ # no amount of pain is enough for you.
867
+ raise "Bad encoding. Need a magic encoding comment." unless
868
+ str.encoding.name == "UTF-8"
869
+ end
870
+
871
+ ##
872
+ # Parse +str+ at path +file+ and return a sexp. Raises
873
+ # Timeout::Error if it runs for more than +time+ seconds.
874
+
875
+ def process(str, file = "(string)", time = 10)
876
+ Timeout.timeout time do
877
+ raise "bad val: #{str.inspect}" unless String === str
878
+
879
+ str = handle_encoding str
880
+
881
+ self.file = file.dup
882
+ self.lexer.reset
883
+ self.lexer.source = str
884
+
885
+ @yydebug = ENV.has_key? 'DEBUG'
886
+
887
+ do_parse
888
+ end
889
+ end
890
+
891
+ alias :parse :process
892
+
893
+ def remove_begin node
894
+ oldnode = node
895
+ if node and :begin == node[0] and node.size == 2 then
896
+ node = node[-1]
897
+ node.line = oldnode.line
898
+ end
899
+ node
900
+ end
901
+
902
+ def reset
903
+ lexer.reset
904
+ self.in_def = false
905
+ self.in_single = 0
906
+ self.env.reset
907
+ self.comments.clear
908
+ end
909
+
910
+ def block_dup_check call_or_args, block
911
+ syntax_error "Both block arg and actual block given." if
912
+ block and call_or_args.block_pass?
913
+ end
914
+
915
+ def ret_args node
916
+ if node then
917
+ raise "write a test 5" if node[0] == :block_pass
918
+
919
+ raise SyntaxError, "block argument should not be given" if
920
+ node[0] == :block_pass
921
+
922
+ node = node.last if node[0] == :array && node.size == 2
923
+ # HACK matz wraps ONE of the FOUR splats in a newline to
924
+ # distinguish. I use paren for now. ugh
925
+ node = s(:svalue, node) if node[0] == :splat and not node.paren
926
+ node[0] = :svalue if node[0] == :arglist && node[1][0] == :splat
927
+ end
928
+
929
+ node
930
+ end
931
+
932
+ def s(*args)
933
+ result = Sexp.new(*args)
934
+ result.line ||= lexer.lineno if lexer.source
935
+ result.file = self.file
936
+ result
937
+ end
938
+
939
+ def value_expr oldnode # HACK
940
+ node = remove_begin oldnode
941
+ node.line = oldnode.line if oldnode
942
+ node[2] = value_expr(node[2]) if node and node[0] == :if
943
+ node
944
+ end
945
+
946
+ def void_stmts node
947
+ return nil unless node
948
+ return node unless node[0] == :block
949
+
950
+ node[1..-1] = node[1..-1].map { |n| remove_begin(n) }
951
+ node
952
+ end
953
+
954
+ def warning s
955
+ # do nothing for now
956
+ end
957
+
958
+ alias yyerror syntax_error
959
+
960
+ def on_error(et, ev, values)
961
+ super
962
+ rescue Racc::ParseError => e
963
+ # I don't like how the exception obscures the error message
964
+ e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
965
+ warn e.message if $DEBUG
966
+ raise
967
+ end
968
+
969
+ class Environment
970
+ attr_reader :env, :dyn
971
+
972
+ def [] k
973
+ self.all[k]
974
+ end
975
+
976
+ def []= k, v
977
+ raise "no" if v == true
978
+ self.current[k] = v
979
+ end
980
+
981
+ def all
982
+ idx = @dyn.index(false) || 0
983
+ @env[0..idx].reverse.inject { |env, scope| env.merge scope }
984
+ end
985
+
986
+ def current
987
+ @env.first
988
+ end
989
+
990
+ def extend dyn = false
991
+ @dyn.unshift dyn
992
+ @env.unshift({})
993
+ end
994
+
995
+ def initialize dyn = false
996
+ @dyn = []
997
+ @env = []
998
+ self.reset
999
+ end
1000
+
1001
+ def reset
1002
+ @dyn.clear
1003
+ @env.clear
1004
+ self.extend
1005
+ end
1006
+
1007
+ def unextend
1008
+ @dyn.shift
1009
+ @env.shift
1010
+ raise "You went too far unextending env" if @env.empty?
1011
+ end
1012
+ end
1013
+
1014
+ class StackState
1015
+ attr_reader :name
1016
+ attr_reader :stack
1017
+ attr_accessor :debug
1018
+
1019
+ def initialize(name)
1020
+ @name = name
1021
+ @stack = [false]
1022
+ @debug = false
1023
+ end
1024
+
1025
+ def inspect
1026
+ "StackState(#{@name}, #{@stack.inspect})"
1027
+ end
1028
+
1029
+ def is_in_state
1030
+ p :stack_is_in_state => [name, @stack.last, caller.first] if debug
1031
+ @stack.last
1032
+ end
1033
+
1034
+ def lexpop
1035
+ p :stack_lexpop => caller.first if debug
1036
+ raise if @stack.size == 0
1037
+ a = @stack.pop
1038
+ b = @stack.pop
1039
+ @stack.push(a || b)
1040
+ end
1041
+
1042
+ def pop
1043
+ r = @stack.pop
1044
+ p :stack_pop => [name, r, @stack, caller.first] if debug
1045
+ @stack.push false if @stack.size == 0
1046
+ r
1047
+ end
1048
+
1049
+ def push val
1050
+ @stack.push val
1051
+ p :stack_push => [name, @stack, caller.first] if debug
1052
+ nil
1053
+ end
1054
+ end
1055
+ end
1056
+
1057
+ class Ruby19Parser < Racc::Parser
1058
+ include RubyParserStuff
1059
+
1060
+ def self.do(what)
1061
+ p new.process(what)
1062
+ end
1063
+ end
1064
+
1065
+ class Ruby18Parser < Racc::Parser
1066
+ include RubyParserStuff
1067
+
1068
+ def self.do(what)
1069
+ p new.process(what)
1070
+ end
1071
+ end
1072
+
1073
+ ##
1074
+ # RubyParser is a compound parser that first attempts to parse using
1075
+ # the 1.9 syntax parser and falls back to the 1.8 syntax parser on a
1076
+ # parse error.
1077
+
1078
+ class RubyParser
1079
+ class SyntaxError < RuntimeError; end
1080
+
1081
+ def initialize
1082
+ @p18 = Ruby18Parser.new
1083
+ @p19 = Ruby19Parser.new
1084
+ end
1085
+
1086
+ def process(s, f = "(string)", t = 10) # parens for emacs *sigh*
1087
+ @p19.process s, f, t
1088
+ rescue Racc::ParseError
1089
+ @p18.process s, f, t
1090
+ end
1091
+
1092
+ alias :parse :process
1093
+
1094
+ def reset
1095
+ @p18.reset
1096
+ @p19.reset
1097
+ end
1098
+
1099
+ def self.for_current_ruby
1100
+ case RUBY_VERSION
1101
+ when /^1\.8/ then
1102
+ Ruby18Parser.new
1103
+ when /^1\.9/ then
1104
+ Ruby19Parser.new
1105
+ else
1106
+ raise "unrecognized RUBY_VERSION #{RUBY_VERSION}"
1107
+ end
1108
+ end
1109
+ end
1110
+
1111
+ ############################################################
1112
+ # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
1113
+
1114
+ class Sexp
1115
+ attr_writer :paren
1116
+
1117
+ def paren
1118
+ @paren ||= false
1119
+ end
1120
+
1121
+ def value
1122
+ raise "multi item sexp" if size > 2
1123
+ last
1124
+ end
1125
+
1126
+ def to_sym
1127
+ raise "no"
1128
+ self.value.to_sym
1129
+ end
1130
+
1131
+ def add x
1132
+ concat x
1133
+ end
1134
+
1135
+ def add_all x
1136
+ raise "no: #{self.inspect}.add_all #{x.inspect}" # TODO: need a test to trigger this
1137
+ end
1138
+
1139
+ def block_pass?
1140
+ any? { |s| Sexp === s && s[0] == :block_pass }
1141
+ end
1142
+
1143
+ alias :node_type :sexp_type
1144
+ alias :values :sexp_body # TODO: retire
1145
+ end
1146
+
1147
+ # END HACK
1148
+ ############################################################