parser 0.9.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.autotest +50 -0
- data/.gemtest +0 -0
- data/History.txt +558 -0
- data/Manifest.txt +18 -0
- data/README.txt +87 -0
- data/Rakefile +192 -0
- data/bin/ruby_parse +96 -0
- data/bin/ruby_parse_extract_error +130 -0
- data/lib/gauntlet_rubyparser.rb +117 -0
- data/lib/ruby18_parser.rb +5706 -0
- data/lib/ruby18_parser.y +1846 -0
- data/lib/ruby19_parser.rb +6054 -0
- data/lib/ruby19_parser.y +2035 -0
- data/lib/ruby_lexer.rb +6789 -0
- data/lib/ruby_parser.rb +4 -0
- data/lib/ruby_parser_extras.rb +1148 -0
- data/test/test_ruby_lexer.rb +2028 -0
- data/test/test_ruby_parser.rb +1772 -0
- data/test/test_ruby_parser_extras.rb +228 -0
- metadata +163 -0
data/lib/ruby_parser.rb
ADDED
@@ -0,0 +1,1148 @@
|
|
1
|
+
# encoding: ASCII-8BIT
|
2
|
+
|
3
|
+
require 'racc/parser'
|
4
|
+
require 'ruby_lexer'
|
5
|
+
require 'sexp'
|
6
|
+
require 'timeout'
|
7
|
+
|
8
|
+
# WHY do I have to do this?!?
|
9
|
+
class Regexp
|
10
|
+
ONCE = 0 unless defined? ONCE # FIX: remove this - it makes no sense
|
11
|
+
|
12
|
+
unless defined? ENC_NONE then
|
13
|
+
ENC_NONE = /x/n.options
|
14
|
+
ENC_EUC = /x/e.options
|
15
|
+
ENC_SJIS = /x/s.options
|
16
|
+
ENC_UTF8 = /x/u.options
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# I hate ruby 1.9 string changes
|
21
|
+
class Fixnum
|
22
|
+
def ord
|
23
|
+
self
|
24
|
+
end
|
25
|
+
end unless "a"[0] == "a"
|
26
|
+
|
27
|
+
module RubyParserStuff
|
28
|
+
VERSION = '0.9.alpha'
|
29
|
+
|
30
|
+
attr_accessor :lexer, :in_def, :in_single, :file
|
31
|
+
attr_reader :env, :comments
|
32
|
+
|
33
|
+
def syntax_error msg
|
34
|
+
raise RubyParser::SyntaxError, msg
|
35
|
+
end
|
36
|
+
|
37
|
+
def arg_add(node1, node2) # TODO: nuke
|
38
|
+
return s(:arglist, node2) unless node1
|
39
|
+
|
40
|
+
node1[0] = :arglist if node1[0] == :array
|
41
|
+
return node1 << node2 if node1[0] == :arglist
|
42
|
+
|
43
|
+
return s(:arglist, node1, node2)
|
44
|
+
end
|
45
|
+
|
46
|
+
def arg_blk_pass node1, node2 # TODO: nuke
|
47
|
+
node1 = s(:arglist, node1) unless [:arglist, :array].include? node1.first
|
48
|
+
node1 << node2 if node2
|
49
|
+
node1
|
50
|
+
end
|
51
|
+
|
52
|
+
def arg_concat node1, node2 # TODO: nuke
|
53
|
+
raise "huh" unless node2
|
54
|
+
node1 << s(:splat, node2).compact
|
55
|
+
node1
|
56
|
+
end
|
57
|
+
|
58
|
+
def clean_mlhs sexp
|
59
|
+
case sexp.sexp_type
|
60
|
+
when :masgn then
|
61
|
+
if sexp.size == 2 and sexp[1].sexp_type == :array then
|
62
|
+
s(:masgn, *sexp[1][1..-1].map { |sub| clean_mlhs sub })
|
63
|
+
else
|
64
|
+
sexp
|
65
|
+
end
|
66
|
+
when :gasgn, :iasgn, :lasgn, :cvasgn then
|
67
|
+
if sexp.size == 2 then
|
68
|
+
sexp.last
|
69
|
+
else
|
70
|
+
sexp # optional value
|
71
|
+
end
|
72
|
+
else
|
73
|
+
raise "unsupported type: #{sexp.inspect}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def block_var *args
|
78
|
+
result = self.args args
|
79
|
+
result[0] = :masgn
|
80
|
+
result
|
81
|
+
end
|
82
|
+
|
83
|
+
def block_var18 ary, splat, block
|
84
|
+
ary ||= s(:array)
|
85
|
+
|
86
|
+
if splat then
|
87
|
+
splat = splat[1] unless Symbol === splat
|
88
|
+
ary << "*#{splat}".to_sym
|
89
|
+
end
|
90
|
+
|
91
|
+
ary << "&#{block[1]}".to_sym if block
|
92
|
+
|
93
|
+
if ary.length > 2 or ary.splat then # HACK
|
94
|
+
s(:masgn, *ary[1..-1])
|
95
|
+
else
|
96
|
+
ary.last
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def args args
|
101
|
+
result = s(:args)
|
102
|
+
|
103
|
+
args.each do |arg|
|
104
|
+
case arg
|
105
|
+
when Sexp then
|
106
|
+
case arg.sexp_type
|
107
|
+
when :args, :block, :array then
|
108
|
+
result.concat arg[1..-1]
|
109
|
+
when :block_arg then
|
110
|
+
result << :"&#{arg.last}"
|
111
|
+
when :masgn then
|
112
|
+
result << arg
|
113
|
+
else
|
114
|
+
raise "unhandled: #{arg.inspect}"
|
115
|
+
end
|
116
|
+
when Symbol then
|
117
|
+
result << arg
|
118
|
+
when ",", nil then
|
119
|
+
# ignore
|
120
|
+
else
|
121
|
+
raise "unhandled: #{arg.inspect}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
result
|
126
|
+
end
|
127
|
+
|
128
|
+
def aryset receiver, index
|
129
|
+
index ||= []
|
130
|
+
s(:attrasgn, receiver, :"[]=", *index[1..-1])
|
131
|
+
end
|
132
|
+
|
133
|
+
def assignable(lhs, value = nil)
|
134
|
+
id = lhs.to_sym
|
135
|
+
id = id.to_sym if Sexp === id
|
136
|
+
|
137
|
+
raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
|
138
|
+
|
139
|
+
raise SyntaxError, "Can't change the value of #{id}" if
|
140
|
+
id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
|
141
|
+
|
142
|
+
result = case id.to_s
|
143
|
+
when /^@@/ then
|
144
|
+
asgn = in_def || in_single > 0
|
145
|
+
s((asgn ? :cvasgn : :cvdecl), id)
|
146
|
+
when /^@/ then
|
147
|
+
s(:iasgn, id)
|
148
|
+
when /^\$/ then
|
149
|
+
s(:gasgn, id)
|
150
|
+
when /^[A-Z]/ then
|
151
|
+
s(:cdecl, id)
|
152
|
+
else
|
153
|
+
case self.env[id]
|
154
|
+
when :lvar, :dvar, nil then
|
155
|
+
s(:lasgn, id)
|
156
|
+
else
|
157
|
+
raise "wtf? unknown type: #{self.env[id]}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
self.env[id] ||= :lvar
|
162
|
+
|
163
|
+
result << value if value
|
164
|
+
|
165
|
+
return result
|
166
|
+
end
|
167
|
+
|
168
|
+
def block_append(head, tail)
|
169
|
+
return head if tail.nil?
|
170
|
+
return tail if head.nil?
|
171
|
+
|
172
|
+
case head[0]
|
173
|
+
when :lit, :str then
|
174
|
+
return tail
|
175
|
+
end
|
176
|
+
|
177
|
+
line = [head.line, tail.line].compact.min
|
178
|
+
|
179
|
+
head = remove_begin(head)
|
180
|
+
head = s(:block, head) unless head.node_type == :block
|
181
|
+
|
182
|
+
head.line = line
|
183
|
+
head << tail
|
184
|
+
end
|
185
|
+
|
186
|
+
def cond node
|
187
|
+
return nil if node.nil?
|
188
|
+
node = value_expr node
|
189
|
+
|
190
|
+
case node.first
|
191
|
+
when :lit then
|
192
|
+
if Regexp === node.last then
|
193
|
+
return s(:match, node)
|
194
|
+
else
|
195
|
+
return node
|
196
|
+
end
|
197
|
+
when :and then
|
198
|
+
return s(:and, cond(node[1]), cond(node[2]))
|
199
|
+
when :or then
|
200
|
+
return s(:or, cond(node[1]), cond(node[2]))
|
201
|
+
when :dot2 then
|
202
|
+
label = "flip#{node.hash}"
|
203
|
+
env[label] = :lvar
|
204
|
+
return s(:flip2, node[1], node[2])
|
205
|
+
when :dot3 then
|
206
|
+
label = "flip#{node.hash}"
|
207
|
+
env[label] = :lvar
|
208
|
+
return s(:flip3, node[1], node[2])
|
209
|
+
else
|
210
|
+
return node
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
##
|
215
|
+
# for pure ruby systems only
|
216
|
+
|
217
|
+
def do_parse
|
218
|
+
_racc_do_parse_rb(_racc_setup, false)
|
219
|
+
end if ENV['PURE_RUBY']
|
220
|
+
|
221
|
+
def get_match_node lhs, rhs # TODO: rename to new_match
|
222
|
+
if lhs then
|
223
|
+
case lhs[0]
|
224
|
+
when :dregx, :dregx_once then
|
225
|
+
return s(:match2, lhs, rhs).line(lhs.line)
|
226
|
+
when :lit then
|
227
|
+
return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
if rhs then
|
232
|
+
case rhs[0]
|
233
|
+
when :dregx, :dregx_once then
|
234
|
+
return s(:match3, rhs, lhs).line(lhs.line)
|
235
|
+
when :lit then
|
236
|
+
return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
|
241
|
+
end
|
242
|
+
|
243
|
+
def gettable(id)
|
244
|
+
id = id.to_sym if String === id
|
245
|
+
|
246
|
+
result = case id.to_s
|
247
|
+
when /^@@/ then
|
248
|
+
s(:cvar, id)
|
249
|
+
when /^@/ then
|
250
|
+
s(:ivar, id)
|
251
|
+
when /^\$/ then
|
252
|
+
s(:gvar, id)
|
253
|
+
when /^[A-Z]/ then
|
254
|
+
s(:const, id)
|
255
|
+
else
|
256
|
+
type = env[id]
|
257
|
+
if type then
|
258
|
+
s(type, id)
|
259
|
+
else
|
260
|
+
new_call(nil, id)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
raise "identifier #{id.inspect} is not valid" unless result
|
265
|
+
|
266
|
+
result
|
267
|
+
end
|
268
|
+
|
269
|
+
##
|
270
|
+
# Canonicalize conditionals. Eg:
|
271
|
+
#
|
272
|
+
# not x ? a : b
|
273
|
+
#
|
274
|
+
# becomes:
|
275
|
+
#
|
276
|
+
# x ? b : a
|
277
|
+
|
278
|
+
attr_accessor :canonicalize_conditions
|
279
|
+
|
280
|
+
def initialize(options = {})
|
281
|
+
super()
|
282
|
+
|
283
|
+
v = self.class.name[/1[89]/]
|
284
|
+
self.lexer = RubyLexer.new v && v.to_i
|
285
|
+
|
286
|
+
@env = RubyParserStuff::Environment.new
|
287
|
+
@comments = []
|
288
|
+
|
289
|
+
self.lexer.static_env = @env
|
290
|
+
|
291
|
+
@canonicalize_conditions = true
|
292
|
+
|
293
|
+
self.reset
|
294
|
+
end
|
295
|
+
|
296
|
+
def list_append list, item # TODO: nuke me *sigh*
|
297
|
+
return s(:array, item) unless list
|
298
|
+
list = s(:array, list) unless Sexp === list && list.first == :array
|
299
|
+
list << item
|
300
|
+
end
|
301
|
+
|
302
|
+
def list_prepend item, list # TODO: nuke me *sigh*
|
303
|
+
list = s(:array, list) unless Sexp === list && list[0] == :array
|
304
|
+
list.insert 1, item
|
305
|
+
list
|
306
|
+
end
|
307
|
+
|
308
|
+
def literal_concat head, tail
|
309
|
+
return tail unless head
|
310
|
+
return head unless tail
|
311
|
+
|
312
|
+
htype, ttype = head[0], tail[0]
|
313
|
+
|
314
|
+
head = s(:dstr, '', head) if htype == :evstr
|
315
|
+
|
316
|
+
case ttype
|
317
|
+
when :str then
|
318
|
+
if htype == :str
|
319
|
+
head[-1] << tail[-1]
|
320
|
+
elsif htype == :dstr and head.size == 2 then
|
321
|
+
head[-1] << tail[-1]
|
322
|
+
else
|
323
|
+
head << tail
|
324
|
+
end
|
325
|
+
when :dstr then
|
326
|
+
if htype == :str then
|
327
|
+
tail[1] = head[-1] + tail[1]
|
328
|
+
head = tail
|
329
|
+
else
|
330
|
+
tail[0] = :array
|
331
|
+
tail[1] = s(:str, tail[1])
|
332
|
+
tail.delete_at 1 if tail[1] == s(:str, '')
|
333
|
+
|
334
|
+
head.push(*tail[1..-1])
|
335
|
+
end
|
336
|
+
when :evstr then
|
337
|
+
head[0] = :dstr if htype == :str
|
338
|
+
if head.size == 2 and tail.size > 1 and tail[1][0] == :str then
|
339
|
+
head[-1] << tail[1][-1]
|
340
|
+
head[0] = :str if head.size == 2 # HACK ?
|
341
|
+
else
|
342
|
+
head.push(tail)
|
343
|
+
end
|
344
|
+
else
|
345
|
+
x = [head, tail]
|
346
|
+
raise "unknown type: #{x.inspect}"
|
347
|
+
end
|
348
|
+
|
349
|
+
return head
|
350
|
+
end
|
351
|
+
|
352
|
+
def logop(type, left, right) # TODO: rename logical_op
|
353
|
+
left = value_expr left
|
354
|
+
|
355
|
+
if left and left[0] == type and not left.paren then
|
356
|
+
node, second = left, nil
|
357
|
+
|
358
|
+
while (second = node[2]) && second[0] == type and not second.paren do
|
359
|
+
node = second
|
360
|
+
end
|
361
|
+
|
362
|
+
node[2] = s(type, second, right)
|
363
|
+
|
364
|
+
return left
|
365
|
+
end
|
366
|
+
|
367
|
+
return s(type, left, right)
|
368
|
+
end
|
369
|
+
|
370
|
+
def new_aref val
|
371
|
+
val[2] ||= s(:arglist)
|
372
|
+
val[2][0] = :arglist if val[2][0] == :array # REFACTOR
|
373
|
+
if val[0].node_type == :self then
|
374
|
+
result = new_call nil, :"[]", val[2]
|
375
|
+
else
|
376
|
+
result = new_call val[0], :"[]", val[2]
|
377
|
+
end
|
378
|
+
result
|
379
|
+
end
|
380
|
+
|
381
|
+
def new_body val
|
382
|
+
result = val[0]
|
383
|
+
|
384
|
+
if val[1] then
|
385
|
+
result = s(:rescue)
|
386
|
+
result << val[0] if val[0]
|
387
|
+
|
388
|
+
resbody = val[1]
|
389
|
+
|
390
|
+
while resbody do
|
391
|
+
result << resbody
|
392
|
+
resbody = resbody.resbody(true)
|
393
|
+
end
|
394
|
+
|
395
|
+
result << val[2] if val[2]
|
396
|
+
|
397
|
+
result.line = (val[0] || val[1]).line
|
398
|
+
elsif not val[2].nil? then
|
399
|
+
warning("else without rescue is useless")
|
400
|
+
result = block_append(result, val[2])
|
401
|
+
end
|
402
|
+
|
403
|
+
result = s(:ensure, result, val[3]).compact if val[3]
|
404
|
+
return result
|
405
|
+
end
|
406
|
+
|
407
|
+
def argl x
|
408
|
+
x = s(:arglist, x) if x and x[0] != :arglist
|
409
|
+
x
|
410
|
+
end
|
411
|
+
|
412
|
+
def backref_assign_error ref
|
413
|
+
# TODO: need a test for this... obviously
|
414
|
+
case ref.first
|
415
|
+
when :nth_ref then
|
416
|
+
raise "write a test 2"
|
417
|
+
raise SyntaxError, "Can't set variable %p" % ref.last
|
418
|
+
when :back_ref then
|
419
|
+
raise "write a test 3"
|
420
|
+
raise SyntaxError, "Can't set back reference %p" % ref.last
|
421
|
+
else
|
422
|
+
raise "Unknown backref type: #{ref.inspect}"
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def new_call recv, meth, args = nil
|
427
|
+
result = s(:call, recv, meth)
|
428
|
+
|
429
|
+
# TODO: need a test with f(&b) to produce block_pass
|
430
|
+
# TODO: need a test with f(&b) { } to produce warning
|
431
|
+
|
432
|
+
args ||= s(:arglist)
|
433
|
+
args[0] = :arglist if args.first == :array
|
434
|
+
args = s(:arglist, args) unless args.first == :arglist
|
435
|
+
|
436
|
+
# HACK quick hack to make this work quickly... easy to clean up above
|
437
|
+
result.concat args[1..-1]
|
438
|
+
|
439
|
+
line = result.grep(Sexp).map(&:line).compact.min
|
440
|
+
result.line = line if line
|
441
|
+
|
442
|
+
result
|
443
|
+
end
|
444
|
+
|
445
|
+
def new_case expr, body
|
446
|
+
result = s(:case, expr)
|
447
|
+
line = (expr || body).line
|
448
|
+
|
449
|
+
while body and body.node_type == :when
|
450
|
+
result << body
|
451
|
+
body = body.delete_at 3
|
452
|
+
end
|
453
|
+
|
454
|
+
result[2..-1].each do |node|
|
455
|
+
block = node.block(:delete)
|
456
|
+
node.concat block[1..-1] if block
|
457
|
+
end
|
458
|
+
|
459
|
+
# else
|
460
|
+
body = nil if body == s(:block)
|
461
|
+
result << body
|
462
|
+
|
463
|
+
result.line = line
|
464
|
+
result
|
465
|
+
end
|
466
|
+
|
467
|
+
def new_class val
|
468
|
+
line, path, superclass, body = val[1], val[2], val[3], val[5]
|
469
|
+
|
470
|
+
result = s(:class, path, superclass)
|
471
|
+
|
472
|
+
if body then
|
473
|
+
if body.first == :block then
|
474
|
+
result.push(*body[1..-1])
|
475
|
+
else
|
476
|
+
result.push body
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
result.line = line
|
481
|
+
result.comments = self.comments.pop
|
482
|
+
result
|
483
|
+
end
|
484
|
+
|
485
|
+
def new_compstmt val
|
486
|
+
result = void_stmts(val.grep(Sexp)[0])
|
487
|
+
result = remove_begin(result) if result
|
488
|
+
result
|
489
|
+
end
|
490
|
+
|
491
|
+
def new_defn val
|
492
|
+
name, args, body = val[1], val[3], val[4]
|
493
|
+
body ||= s(:nil)
|
494
|
+
|
495
|
+
result = s(:defn, name.to_sym, args)
|
496
|
+
|
497
|
+
if body then
|
498
|
+
if body.first == :block then
|
499
|
+
result.push(*body[1..-1])
|
500
|
+
else
|
501
|
+
result.push body
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
result.comments = self.comments.pop
|
506
|
+
result
|
507
|
+
end
|
508
|
+
|
509
|
+
def new_defs val
|
510
|
+
recv, name, args, body = val[1], val[4], val[6], val[7]
|
511
|
+
|
512
|
+
result = s(:defs, recv, name.to_sym, args)
|
513
|
+
|
514
|
+
if body then
|
515
|
+
if body.first == :block then
|
516
|
+
result.push(*body[1..-1])
|
517
|
+
else
|
518
|
+
result.push body
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
result.line = recv.line
|
523
|
+
result.comments = self.comments.pop
|
524
|
+
result
|
525
|
+
end
|
526
|
+
|
527
|
+
def new_for expr, var, body
|
528
|
+
result = s(:for, expr, var).line(var.line)
|
529
|
+
result << body if body
|
530
|
+
result
|
531
|
+
end
|
532
|
+
|
533
|
+
def new_if c, t, f
|
534
|
+
l = [c.line, t && t.line, f && f.line].compact.min
|
535
|
+
c = cond c
|
536
|
+
c, t, f = c.last, f, t if c[0] == :not and canonicalize_conditions
|
537
|
+
s(:if, c, t, f).line(l)
|
538
|
+
end
|
539
|
+
|
540
|
+
def new_iter call, args, body
|
541
|
+
body ||= nil
|
542
|
+
|
543
|
+
args ||= s(:args)
|
544
|
+
args = s(:args, args) if Symbol === args
|
545
|
+
|
546
|
+
result = s(:iter)
|
547
|
+
result << call if call
|
548
|
+
result << args
|
549
|
+
result << body if body
|
550
|
+
|
551
|
+
args[0] = :args unless args == 0
|
552
|
+
|
553
|
+
result
|
554
|
+
end
|
555
|
+
|
556
|
+
def new_masgn lhs, rhs, wrap = false
|
557
|
+
rhs = value_expr(rhs)
|
558
|
+
rhs = lhs[1] ? s(:to_ary, rhs) : s(:array, rhs) if wrap
|
559
|
+
|
560
|
+
lhs.delete_at 1 if lhs[1].nil?
|
561
|
+
lhs << rhs
|
562
|
+
|
563
|
+
lhs
|
564
|
+
end
|
565
|
+
|
566
|
+
def new_module val
|
567
|
+
line, path, body = val[1], val[2], val[4]
|
568
|
+
|
569
|
+
result = s(:module, path)
|
570
|
+
|
571
|
+
if body then # REFACTOR?
|
572
|
+
if body.first == :block then
|
573
|
+
result.push(*body[1..-1])
|
574
|
+
else
|
575
|
+
result.push body
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
result.line = line
|
580
|
+
result.comments = self.comments.pop
|
581
|
+
result
|
582
|
+
end
|
583
|
+
|
584
|
+
def new_op_asgn val
|
585
|
+
lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
|
586
|
+
name = lhs.value
|
587
|
+
arg = remove_begin(arg)
|
588
|
+
result = case asgn_op # REFACTOR
|
589
|
+
when :"||" then
|
590
|
+
lhs << arg
|
591
|
+
s(:op_asgn_or, self.gettable(name), lhs)
|
592
|
+
when :"&&" then
|
593
|
+
lhs << arg
|
594
|
+
s(:op_asgn_and, self.gettable(name), lhs)
|
595
|
+
else
|
596
|
+
# TODO: why [2] ?
|
597
|
+
lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
|
598
|
+
lhs
|
599
|
+
end
|
600
|
+
result.line = lhs.line
|
601
|
+
result
|
602
|
+
end
|
603
|
+
|
604
|
+
def new_regexp val
|
605
|
+
node = val[1] || s(:str, '')
|
606
|
+
options = val[3]
|
607
|
+
|
608
|
+
o, k = 0, nil
|
609
|
+
options.split(//).uniq.each do |c| # FIX: this has a better home
|
610
|
+
v = {
|
611
|
+
'x' => Regexp::EXTENDED,
|
612
|
+
'i' => Regexp::IGNORECASE,
|
613
|
+
'm' => Regexp::MULTILINE,
|
614
|
+
'o' => Regexp::ONCE,
|
615
|
+
'n' => Regexp::ENC_NONE,
|
616
|
+
'e' => Regexp::ENC_EUC,
|
617
|
+
's' => Regexp::ENC_SJIS,
|
618
|
+
'u' => Regexp::ENC_UTF8,
|
619
|
+
}[c]
|
620
|
+
raise "unknown regexp option: #{c}" unless v
|
621
|
+
o += v
|
622
|
+
k = c if c =~ /[esu]/
|
623
|
+
end
|
624
|
+
|
625
|
+
case node[0]
|
626
|
+
when :str then
|
627
|
+
node[0] = :lit
|
628
|
+
node[1] = if k then
|
629
|
+
Regexp.new(node[1], o, k)
|
630
|
+
else
|
631
|
+
begin
|
632
|
+
Regexp.new(node[1], o)
|
633
|
+
rescue RegexpError => e
|
634
|
+
warn "Ignoring: #{e.message}"
|
635
|
+
Regexp.new(node[1], Regexp::ENC_NONE)
|
636
|
+
end
|
637
|
+
end
|
638
|
+
when :dstr then
|
639
|
+
if options =~ /o/ then
|
640
|
+
node[0] = :dregx_once
|
641
|
+
else
|
642
|
+
node[0] = :dregx
|
643
|
+
end
|
644
|
+
node << o if o and o != 0
|
645
|
+
else
|
646
|
+
node = s(:dregx, '', node);
|
647
|
+
node[0] = :dregx_once if options =~ /o/
|
648
|
+
node << o if o and o != 0
|
649
|
+
end
|
650
|
+
|
651
|
+
node
|
652
|
+
end
|
653
|
+
|
654
|
+
def new_resbody cond, body
|
655
|
+
if body && body.first == :block then
|
656
|
+
body.shift # remove block and splat it in directly
|
657
|
+
else
|
658
|
+
body = [body]
|
659
|
+
end
|
660
|
+
s(:resbody, cond, *body)
|
661
|
+
end
|
662
|
+
|
663
|
+
def new_sclass val
|
664
|
+
recv, in_def, in_single, body = val[3], val[4], val[6], val[7]
|
665
|
+
|
666
|
+
result = s(:sclass, recv)
|
667
|
+
|
668
|
+
if body then
|
669
|
+
if body.first == :block then
|
670
|
+
result.push(*body[1..-1])
|
671
|
+
else
|
672
|
+
result.push body
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
result.line = val[2]
|
677
|
+
self.in_def = in_def
|
678
|
+
self.in_single = in_single
|
679
|
+
result
|
680
|
+
end
|
681
|
+
|
682
|
+
def new_super args
|
683
|
+
if args && args.node_type == :block_pass then
|
684
|
+
s(:super, args)
|
685
|
+
else
|
686
|
+
args ||= s(:arglist)
|
687
|
+
s(:super, *args[1..-1])
|
688
|
+
end
|
689
|
+
end
|
690
|
+
|
691
|
+
def new_undef n, m = nil
|
692
|
+
if m then
|
693
|
+
block_append(n, s(:undef, m))
|
694
|
+
else
|
695
|
+
s(:undef, n)
|
696
|
+
end
|
697
|
+
end
|
698
|
+
|
699
|
+
def new_until block, expr, pre
|
700
|
+
new_until_or_while :until, block, expr, pre
|
701
|
+
end
|
702
|
+
|
703
|
+
def new_until_or_while type, block, expr, pre
|
704
|
+
other = type == :until ? :while : :until
|
705
|
+
line = [block && block.line, expr.line].compact.min
|
706
|
+
block, pre = block.last, false if block && block[0] == :begin
|
707
|
+
|
708
|
+
expr = cond expr
|
709
|
+
|
710
|
+
result = unless expr.first == :not and canonicalize_conditions then
|
711
|
+
s(type, expr, block, pre)
|
712
|
+
else
|
713
|
+
s(other, expr.last, block, pre)
|
714
|
+
end
|
715
|
+
|
716
|
+
result.line = line
|
717
|
+
result
|
718
|
+
end
|
719
|
+
|
720
|
+
def new_when cond, body
|
721
|
+
s(:when, cond, body)
|
722
|
+
end
|
723
|
+
|
724
|
+
def new_while block, expr, pre
|
725
|
+
new_until_or_while :while, block, expr, pre
|
726
|
+
end
|
727
|
+
|
728
|
+
def new_xstring str
|
729
|
+
if str then
|
730
|
+
case str[0]
|
731
|
+
when :str
|
732
|
+
str[0] = :xstr
|
733
|
+
when :dstr
|
734
|
+
str[0] = :dxstr
|
735
|
+
else
|
736
|
+
str = s(:dxstr, '', str)
|
737
|
+
end
|
738
|
+
str
|
739
|
+
else
|
740
|
+
s(:xstr, '')
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
744
|
+
def new_yield args = nil
|
745
|
+
# TODO: raise args.inspect unless [:arglist].include? args.first # HACK
|
746
|
+
raise "write a test 4" if args && args.node_type == :block_pass
|
747
|
+
raise SyntaxError, "Block argument should not be given." if
|
748
|
+
args && args.node_type == :block_pass
|
749
|
+
|
750
|
+
args ||= s(:arglist)
|
751
|
+
|
752
|
+
# TODO: I can prolly clean this up
|
753
|
+
args[0] = :arglist if args.first == :array
|
754
|
+
args = s(:arglist, args) unless args.first == :arglist
|
755
|
+
|
756
|
+
return s(:yield, *args[1..-1])
|
757
|
+
end
|
758
|
+
|
759
|
+
def next_token
|
760
|
+
if defined?(MiniTest)
|
761
|
+
lexer.advance
|
762
|
+
else
|
763
|
+
lexer.advance_and_decorate
|
764
|
+
end
|
765
|
+
end
|
766
|
+
|
767
|
+
def node_assign(lhs, rhs) # TODO: rename new_assign
|
768
|
+
return nil unless lhs
|
769
|
+
|
770
|
+
rhs = value_expr rhs
|
771
|
+
|
772
|
+
case lhs[0]
|
773
|
+
when :gasgn, :iasgn, :lasgn, :masgn, :cdecl, :cvdecl, :cvasgn then
|
774
|
+
lhs << rhs
|
775
|
+
when :attrasgn, :call then
|
776
|
+
args = lhs.pop unless Symbol === lhs.last
|
777
|
+
lhs.concat arg_add(args, rhs)[1..-1]
|
778
|
+
when :const then
|
779
|
+
lhs[0] = :cdecl
|
780
|
+
lhs << rhs
|
781
|
+
else
|
782
|
+
raise "unknown lhs #{lhs.inspect}"
|
783
|
+
end
|
784
|
+
|
785
|
+
lhs
|
786
|
+
end
|
787
|
+
|
788
|
+
##
|
789
|
+
# Returns a UTF-8 encoded string after processing BOMs and magic
|
790
|
+
# encoding comments.
|
791
|
+
#
|
792
|
+
# Holy crap... ok. Here goes:
|
793
|
+
#
|
794
|
+
# Ruby's file handling and encoding support is insane. We need to be
|
795
|
+
# able to lex a file. The lexer file is explicitly UTF-8 to make
|
796
|
+
# things cleaner. This allows us to deal with extended chars in
|
797
|
+
# class and method names. In order to do this, we need to encode all
|
798
|
+
# input source files as UTF-8. First, we look for a UTF-8 BOM by
|
799
|
+
# looking at the first line while forcing its encoding to
|
800
|
+
# ASCII-8BIT. If we find a BOM, we strip it and set the expected
|
801
|
+
# encoding to UTF-8. Then, we search for a magic encoding comment.
|
802
|
+
# If found, it overrides the BOM. Finally, we force the encoding of
|
803
|
+
# the input string to whatever was found, and then encode that to
|
804
|
+
# UTF-8 for compatibility with the lexer.
|
805
|
+
|
806
|
+
def handle_encoding str
|
807
|
+
str = str.dup
|
808
|
+
ruby19 = str.respond_to? :encoding
|
809
|
+
encoding = nil
|
810
|
+
|
811
|
+
header = str.lines.first(2)
|
812
|
+
header.map! { |s| s.force_encoding "ASCII-8BIT" } if ruby19
|
813
|
+
|
814
|
+
first = header.first || ""
|
815
|
+
encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
|
816
|
+
|
817
|
+
encoding = $1.strip if header.find { |s|
|
818
|
+
s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
|
819
|
+
s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
|
820
|
+
}
|
821
|
+
|
822
|
+
if encoding then
|
823
|
+
if ruby19 then
|
824
|
+
encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
|
825
|
+
hack_encoding str, encoding
|
826
|
+
else
|
827
|
+
warn "Skipping magic encoding comment"
|
828
|
+
end
|
829
|
+
else
|
830
|
+
# nothing specified... ugh. try to encode as utf-8
|
831
|
+
hack_encoding str if ruby19
|
832
|
+
end
|
833
|
+
|
834
|
+
str
|
835
|
+
end
|
836
|
+
|
837
|
+
def hack_encoding str, extra = nil
|
838
|
+
# this is in sorted order of occurrence according to
|
839
|
+
# charlock_holmes against 500k files
|
840
|
+
encodings = [
|
841
|
+
extra,
|
842
|
+
Encoding::ISO_8859_1,
|
843
|
+
Encoding::UTF_8,
|
844
|
+
Encoding::ISO_8859_2,
|
845
|
+
Encoding::ISO_8859_9,
|
846
|
+
Encoding::SHIFT_JIS,
|
847
|
+
Encoding::WINDOWS_1252,
|
848
|
+
Encoding::EUC_JP,
|
849
|
+
].compact
|
850
|
+
|
851
|
+
# terrible, horrible, no good, very bad, last ditch effort.
|
852
|
+
encodings.each do |enc|
|
853
|
+
begin
|
854
|
+
str.force_encoding enc
|
855
|
+
if str.valid_encoding? then
|
856
|
+
str.encode! Encoding::UTF_8
|
857
|
+
break
|
858
|
+
end
|
859
|
+
rescue Encoding::InvalidByteSequenceError
|
860
|
+
# do nothing
|
861
|
+
rescue Encoding::UndefinedConversionError
|
862
|
+
# do nothing
|
863
|
+
end
|
864
|
+
end
|
865
|
+
|
866
|
+
# no amount of pain is enough for you.
|
867
|
+
raise "Bad encoding. Need a magic encoding comment." unless
|
868
|
+
str.encoding.name == "UTF-8"
|
869
|
+
end
|
870
|
+
|
871
|
+
##
|
872
|
+
# Parse +str+ at path +file+ and return a sexp. Raises
|
873
|
+
# Timeout::Error if it runs for more than +time+ seconds.
|
874
|
+
|
875
|
+
def process(str, file = "(string)", time = 10)
|
876
|
+
Timeout.timeout time do
|
877
|
+
raise "bad val: #{str.inspect}" unless String === str
|
878
|
+
|
879
|
+
str = handle_encoding str
|
880
|
+
|
881
|
+
self.file = file.dup
|
882
|
+
self.lexer.reset
|
883
|
+
self.lexer.source = str
|
884
|
+
|
885
|
+
@yydebug = ENV.has_key? 'DEBUG'
|
886
|
+
|
887
|
+
do_parse
|
888
|
+
end
|
889
|
+
end
|
890
|
+
|
891
|
+
alias :parse :process
|
892
|
+
|
893
|
+
def remove_begin node
|
894
|
+
oldnode = node
|
895
|
+
if node and :begin == node[0] and node.size == 2 then
|
896
|
+
node = node[-1]
|
897
|
+
node.line = oldnode.line
|
898
|
+
end
|
899
|
+
node
|
900
|
+
end
|
901
|
+
|
902
|
+
def reset
|
903
|
+
lexer.reset
|
904
|
+
self.in_def = false
|
905
|
+
self.in_single = 0
|
906
|
+
self.env.reset
|
907
|
+
self.comments.clear
|
908
|
+
end
|
909
|
+
|
910
|
+
def block_dup_check call_or_args, block
|
911
|
+
syntax_error "Both block arg and actual block given." if
|
912
|
+
block and call_or_args.block_pass?
|
913
|
+
end
|
914
|
+
|
915
|
+
def ret_args node
|
916
|
+
if node then
|
917
|
+
raise "write a test 5" if node[0] == :block_pass
|
918
|
+
|
919
|
+
raise SyntaxError, "block argument should not be given" if
|
920
|
+
node[0] == :block_pass
|
921
|
+
|
922
|
+
node = node.last if node[0] == :array && node.size == 2
|
923
|
+
# HACK matz wraps ONE of the FOUR splats in a newline to
|
924
|
+
# distinguish. I use paren for now. ugh
|
925
|
+
node = s(:svalue, node) if node[0] == :splat and not node.paren
|
926
|
+
node[0] = :svalue if node[0] == :arglist && node[1][0] == :splat
|
927
|
+
end
|
928
|
+
|
929
|
+
node
|
930
|
+
end
|
931
|
+
|
932
|
+
def s(*args)
|
933
|
+
result = Sexp.new(*args)
|
934
|
+
result.line ||= lexer.lineno if lexer.source
|
935
|
+
result.file = self.file
|
936
|
+
result
|
937
|
+
end
|
938
|
+
|
939
|
+
def value_expr oldnode # HACK
|
940
|
+
node = remove_begin oldnode
|
941
|
+
node.line = oldnode.line if oldnode
|
942
|
+
node[2] = value_expr(node[2]) if node and node[0] == :if
|
943
|
+
node
|
944
|
+
end
|
945
|
+
|
946
|
+
def void_stmts node
|
947
|
+
return nil unless node
|
948
|
+
return node unless node[0] == :block
|
949
|
+
|
950
|
+
node[1..-1] = node[1..-1].map { |n| remove_begin(n) }
|
951
|
+
node
|
952
|
+
end
|
953
|
+
|
954
|
+
def warning s
|
955
|
+
# do nothing for now
|
956
|
+
end
|
957
|
+
|
958
|
+
alias yyerror syntax_error
|
959
|
+
|
960
|
+
def on_error(et, ev, values)
|
961
|
+
super
|
962
|
+
rescue Racc::ParseError => e
|
963
|
+
# I don't like how the exception obscures the error message
|
964
|
+
e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
|
965
|
+
warn e.message if $DEBUG
|
966
|
+
raise
|
967
|
+
end
|
968
|
+
|
969
|
+
class Environment
|
970
|
+
attr_reader :env, :dyn
|
971
|
+
|
972
|
+
def [] k
|
973
|
+
self.all[k]
|
974
|
+
end
|
975
|
+
|
976
|
+
def []= k, v
|
977
|
+
raise "no" if v == true
|
978
|
+
self.current[k] = v
|
979
|
+
end
|
980
|
+
|
981
|
+
def all
|
982
|
+
idx = @dyn.index(false) || 0
|
983
|
+
@env[0..idx].reverse.inject { |env, scope| env.merge scope }
|
984
|
+
end
|
985
|
+
|
986
|
+
def current
|
987
|
+
@env.first
|
988
|
+
end
|
989
|
+
|
990
|
+
def extend dyn = false
|
991
|
+
@dyn.unshift dyn
|
992
|
+
@env.unshift({})
|
993
|
+
end
|
994
|
+
|
995
|
+
def initialize dyn = false
|
996
|
+
@dyn = []
|
997
|
+
@env = []
|
998
|
+
self.reset
|
999
|
+
end
|
1000
|
+
|
1001
|
+
def reset
|
1002
|
+
@dyn.clear
|
1003
|
+
@env.clear
|
1004
|
+
self.extend
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
def unextend
|
1008
|
+
@dyn.shift
|
1009
|
+
@env.shift
|
1010
|
+
raise "You went too far unextending env" if @env.empty?
|
1011
|
+
end
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
class StackState
|
1015
|
+
attr_reader :name
|
1016
|
+
attr_reader :stack
|
1017
|
+
attr_accessor :debug
|
1018
|
+
|
1019
|
+
def initialize(name)
|
1020
|
+
@name = name
|
1021
|
+
@stack = [false]
|
1022
|
+
@debug = false
|
1023
|
+
end
|
1024
|
+
|
1025
|
+
def inspect
|
1026
|
+
"StackState(#{@name}, #{@stack.inspect})"
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
def is_in_state
|
1030
|
+
p :stack_is_in_state => [name, @stack.last, caller.first] if debug
|
1031
|
+
@stack.last
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
def lexpop
|
1035
|
+
p :stack_lexpop => caller.first if debug
|
1036
|
+
raise if @stack.size == 0
|
1037
|
+
a = @stack.pop
|
1038
|
+
b = @stack.pop
|
1039
|
+
@stack.push(a || b)
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
def pop
|
1043
|
+
r = @stack.pop
|
1044
|
+
p :stack_pop => [name, r, @stack, caller.first] if debug
|
1045
|
+
@stack.push false if @stack.size == 0
|
1046
|
+
r
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
def push val
|
1050
|
+
@stack.push val
|
1051
|
+
p :stack_push => [name, @stack, caller.first] if debug
|
1052
|
+
nil
|
1053
|
+
end
|
1054
|
+
end
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
class Ruby19Parser < Racc::Parser
|
1058
|
+
include RubyParserStuff
|
1059
|
+
|
1060
|
+
def self.do(what)
|
1061
|
+
p new.process(what)
|
1062
|
+
end
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
class Ruby18Parser < Racc::Parser
|
1066
|
+
include RubyParserStuff
|
1067
|
+
|
1068
|
+
def self.do(what)
|
1069
|
+
p new.process(what)
|
1070
|
+
end
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
##
|
1074
|
+
# RubyParser is a compound parser that first attempts to parse using
|
1075
|
+
# the 1.9 syntax parser and falls back to the 1.8 syntax parser on a
|
1076
|
+
# parse error.
|
1077
|
+
|
1078
|
+
class RubyParser
|
1079
|
+
class SyntaxError < RuntimeError; end
|
1080
|
+
|
1081
|
+
def initialize
|
1082
|
+
@p18 = Ruby18Parser.new
|
1083
|
+
@p19 = Ruby19Parser.new
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
def process(s, f = "(string)", t = 10) # parens for emacs *sigh*
|
1087
|
+
@p19.process s, f, t
|
1088
|
+
rescue Racc::ParseError
|
1089
|
+
@p18.process s, f, t
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
alias :parse :process
|
1093
|
+
|
1094
|
+
def reset
|
1095
|
+
@p18.reset
|
1096
|
+
@p19.reset
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
def self.for_current_ruby
|
1100
|
+
case RUBY_VERSION
|
1101
|
+
when /^1\.8/ then
|
1102
|
+
Ruby18Parser.new
|
1103
|
+
when /^1\.9/ then
|
1104
|
+
Ruby19Parser.new
|
1105
|
+
else
|
1106
|
+
raise "unrecognized RUBY_VERSION #{RUBY_VERSION}"
|
1107
|
+
end
|
1108
|
+
end
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
############################################################
|
1112
|
+
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
1113
|
+
|
1114
|
+
class Sexp
|
1115
|
+
attr_writer :paren
|
1116
|
+
|
1117
|
+
def paren
|
1118
|
+
@paren ||= false
|
1119
|
+
end
|
1120
|
+
|
1121
|
+
def value
|
1122
|
+
raise "multi item sexp" if size > 2
|
1123
|
+
last
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
def to_sym
|
1127
|
+
raise "no"
|
1128
|
+
self.value.to_sym
|
1129
|
+
end
|
1130
|
+
|
1131
|
+
def add x
|
1132
|
+
concat x
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
def add_all x
|
1136
|
+
raise "no: #{self.inspect}.add_all #{x.inspect}" # TODO: need a test to trigger this
|
1137
|
+
end
|
1138
|
+
|
1139
|
+
def block_pass?
|
1140
|
+
any? { |s| Sexp === s && s[0] == :block_pass }
|
1141
|
+
end
|
1142
|
+
|
1143
|
+
alias :node_type :sexp_type
|
1144
|
+
alias :values :sexp_body # TODO: retire
|
1145
|
+
end
|
1146
|
+
|
1147
|
+
# END HACK
|
1148
|
+
############################################################
|