parser 0.9.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.autotest +50 -0
- data/.gemtest +0 -0
- data/History.txt +558 -0
- data/Manifest.txt +18 -0
- data/README.txt +87 -0
- data/Rakefile +192 -0
- data/bin/ruby_parse +96 -0
- data/bin/ruby_parse_extract_error +130 -0
- data/lib/gauntlet_rubyparser.rb +117 -0
- data/lib/ruby18_parser.rb +5706 -0
- data/lib/ruby18_parser.y +1846 -0
- data/lib/ruby19_parser.rb +6054 -0
- data/lib/ruby19_parser.y +2035 -0
- data/lib/ruby_lexer.rb +6789 -0
- data/lib/ruby_parser.rb +4 -0
- data/lib/ruby_parser_extras.rb +1148 -0
- data/test/test_ruby_lexer.rb +2028 -0
- data/test/test_ruby_parser.rb +1772 -0
- data/test/test_ruby_parser_extras.rb +228 -0
- metadata +163 -0
data/lib/ruby_parser.rb
ADDED
@@ -0,0 +1,1148 @@
|
|
1
|
+
# encoding: ASCII-8BIT
|
2
|
+
|
3
|
+
require 'racc/parser'
|
4
|
+
require 'ruby_lexer'
|
5
|
+
require 'sexp'
|
6
|
+
require 'timeout'
|
7
|
+
|
8
|
+
# WHY do I have to do this?!?
|
9
|
+
class Regexp
|
10
|
+
ONCE = 0 unless defined? ONCE # FIX: remove this - it makes no sense
|
11
|
+
|
12
|
+
unless defined? ENC_NONE then
|
13
|
+
ENC_NONE = /x/n.options
|
14
|
+
ENC_EUC = /x/e.options
|
15
|
+
ENC_SJIS = /x/s.options
|
16
|
+
ENC_UTF8 = /x/u.options
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# I hate ruby 1.9 string changes
|
21
|
+
class Fixnum
|
22
|
+
def ord
|
23
|
+
self
|
24
|
+
end
|
25
|
+
end unless "a"[0] == "a"
|
26
|
+
|
27
|
+
module RubyParserStuff
|
28
|
+
VERSION = '0.9.alpha'
|
29
|
+
|
30
|
+
attr_accessor :lexer, :in_def, :in_single, :file
|
31
|
+
attr_reader :env, :comments
|
32
|
+
|
33
|
+
def syntax_error msg
|
34
|
+
raise RubyParser::SyntaxError, msg
|
35
|
+
end
|
36
|
+
|
37
|
+
def arg_add(node1, node2) # TODO: nuke
|
38
|
+
return s(:arglist, node2) unless node1
|
39
|
+
|
40
|
+
node1[0] = :arglist if node1[0] == :array
|
41
|
+
return node1 << node2 if node1[0] == :arglist
|
42
|
+
|
43
|
+
return s(:arglist, node1, node2)
|
44
|
+
end
|
45
|
+
|
46
|
+
def arg_blk_pass node1, node2 # TODO: nuke
|
47
|
+
node1 = s(:arglist, node1) unless [:arglist, :array].include? node1.first
|
48
|
+
node1 << node2 if node2
|
49
|
+
node1
|
50
|
+
end
|
51
|
+
|
52
|
+
def arg_concat node1, node2 # TODO: nuke
|
53
|
+
raise "huh" unless node2
|
54
|
+
node1 << s(:splat, node2).compact
|
55
|
+
node1
|
56
|
+
end
|
57
|
+
|
58
|
+
def clean_mlhs sexp
|
59
|
+
case sexp.sexp_type
|
60
|
+
when :masgn then
|
61
|
+
if sexp.size == 2 and sexp[1].sexp_type == :array then
|
62
|
+
s(:masgn, *sexp[1][1..-1].map { |sub| clean_mlhs sub })
|
63
|
+
else
|
64
|
+
sexp
|
65
|
+
end
|
66
|
+
when :gasgn, :iasgn, :lasgn, :cvasgn then
|
67
|
+
if sexp.size == 2 then
|
68
|
+
sexp.last
|
69
|
+
else
|
70
|
+
sexp # optional value
|
71
|
+
end
|
72
|
+
else
|
73
|
+
raise "unsupported type: #{sexp.inspect}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def block_var *args
|
78
|
+
result = self.args args
|
79
|
+
result[0] = :masgn
|
80
|
+
result
|
81
|
+
end
|
82
|
+
|
83
|
+
def block_var18 ary, splat, block
|
84
|
+
ary ||= s(:array)
|
85
|
+
|
86
|
+
if splat then
|
87
|
+
splat = splat[1] unless Symbol === splat
|
88
|
+
ary << "*#{splat}".to_sym
|
89
|
+
end
|
90
|
+
|
91
|
+
ary << "&#{block[1]}".to_sym if block
|
92
|
+
|
93
|
+
if ary.length > 2 or ary.splat then # HACK
|
94
|
+
s(:masgn, *ary[1..-1])
|
95
|
+
else
|
96
|
+
ary.last
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def args args
|
101
|
+
result = s(:args)
|
102
|
+
|
103
|
+
args.each do |arg|
|
104
|
+
case arg
|
105
|
+
when Sexp then
|
106
|
+
case arg.sexp_type
|
107
|
+
when :args, :block, :array then
|
108
|
+
result.concat arg[1..-1]
|
109
|
+
when :block_arg then
|
110
|
+
result << :"&#{arg.last}"
|
111
|
+
when :masgn then
|
112
|
+
result << arg
|
113
|
+
else
|
114
|
+
raise "unhandled: #{arg.inspect}"
|
115
|
+
end
|
116
|
+
when Symbol then
|
117
|
+
result << arg
|
118
|
+
when ",", nil then
|
119
|
+
# ignore
|
120
|
+
else
|
121
|
+
raise "unhandled: #{arg.inspect}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
result
|
126
|
+
end
|
127
|
+
|
128
|
+
def aryset receiver, index
|
129
|
+
index ||= []
|
130
|
+
s(:attrasgn, receiver, :"[]=", *index[1..-1])
|
131
|
+
end
|
132
|
+
|
133
|
+
def assignable(lhs, value = nil)
|
134
|
+
id = lhs.to_sym
|
135
|
+
id = id.to_sym if Sexp === id
|
136
|
+
|
137
|
+
raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
|
138
|
+
|
139
|
+
raise SyntaxError, "Can't change the value of #{id}" if
|
140
|
+
id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
|
141
|
+
|
142
|
+
result = case id.to_s
|
143
|
+
when /^@@/ then
|
144
|
+
asgn = in_def || in_single > 0
|
145
|
+
s((asgn ? :cvasgn : :cvdecl), id)
|
146
|
+
when /^@/ then
|
147
|
+
s(:iasgn, id)
|
148
|
+
when /^\$/ then
|
149
|
+
s(:gasgn, id)
|
150
|
+
when /^[A-Z]/ then
|
151
|
+
s(:cdecl, id)
|
152
|
+
else
|
153
|
+
case self.env[id]
|
154
|
+
when :lvar, :dvar, nil then
|
155
|
+
s(:lasgn, id)
|
156
|
+
else
|
157
|
+
raise "wtf? unknown type: #{self.env[id]}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
self.env[id] ||= :lvar
|
162
|
+
|
163
|
+
result << value if value
|
164
|
+
|
165
|
+
return result
|
166
|
+
end
|
167
|
+
|
168
|
+
def block_append(head, tail)
|
169
|
+
return head if tail.nil?
|
170
|
+
return tail if head.nil?
|
171
|
+
|
172
|
+
case head[0]
|
173
|
+
when :lit, :str then
|
174
|
+
return tail
|
175
|
+
end
|
176
|
+
|
177
|
+
line = [head.line, tail.line].compact.min
|
178
|
+
|
179
|
+
head = remove_begin(head)
|
180
|
+
head = s(:block, head) unless head.node_type == :block
|
181
|
+
|
182
|
+
head.line = line
|
183
|
+
head << tail
|
184
|
+
end
|
185
|
+
|
186
|
+
def cond node
|
187
|
+
return nil if node.nil?
|
188
|
+
node = value_expr node
|
189
|
+
|
190
|
+
case node.first
|
191
|
+
when :lit then
|
192
|
+
if Regexp === node.last then
|
193
|
+
return s(:match, node)
|
194
|
+
else
|
195
|
+
return node
|
196
|
+
end
|
197
|
+
when :and then
|
198
|
+
return s(:and, cond(node[1]), cond(node[2]))
|
199
|
+
when :or then
|
200
|
+
return s(:or, cond(node[1]), cond(node[2]))
|
201
|
+
when :dot2 then
|
202
|
+
label = "flip#{node.hash}"
|
203
|
+
env[label] = :lvar
|
204
|
+
return s(:flip2, node[1], node[2])
|
205
|
+
when :dot3 then
|
206
|
+
label = "flip#{node.hash}"
|
207
|
+
env[label] = :lvar
|
208
|
+
return s(:flip3, node[1], node[2])
|
209
|
+
else
|
210
|
+
return node
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
##
|
215
|
+
# for pure ruby systems only
|
216
|
+
|
217
|
+
def do_parse
|
218
|
+
_racc_do_parse_rb(_racc_setup, false)
|
219
|
+
end if ENV['PURE_RUBY']
|
220
|
+
|
221
|
+
def get_match_node lhs, rhs # TODO: rename to new_match
|
222
|
+
if lhs then
|
223
|
+
case lhs[0]
|
224
|
+
when :dregx, :dregx_once then
|
225
|
+
return s(:match2, lhs, rhs).line(lhs.line)
|
226
|
+
when :lit then
|
227
|
+
return s(:match2, lhs, rhs).line(lhs.line) if Regexp === lhs.last
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
if rhs then
|
232
|
+
case rhs[0]
|
233
|
+
when :dregx, :dregx_once then
|
234
|
+
return s(:match3, rhs, lhs).line(lhs.line)
|
235
|
+
when :lit then
|
236
|
+
return s(:match3, rhs, lhs).line(lhs.line) if Regexp === rhs.last
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
return new_call(lhs, :"=~", argl(rhs)).line(lhs.line)
|
241
|
+
end
|
242
|
+
|
243
|
+
def gettable(id)
|
244
|
+
id = id.to_sym if String === id
|
245
|
+
|
246
|
+
result = case id.to_s
|
247
|
+
when /^@@/ then
|
248
|
+
s(:cvar, id)
|
249
|
+
when /^@/ then
|
250
|
+
s(:ivar, id)
|
251
|
+
when /^\$/ then
|
252
|
+
s(:gvar, id)
|
253
|
+
when /^[A-Z]/ then
|
254
|
+
s(:const, id)
|
255
|
+
else
|
256
|
+
type = env[id]
|
257
|
+
if type then
|
258
|
+
s(type, id)
|
259
|
+
else
|
260
|
+
new_call(nil, id)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
raise "identifier #{id.inspect} is not valid" unless result
|
265
|
+
|
266
|
+
result
|
267
|
+
end
|
268
|
+
|
269
|
+
##
|
270
|
+
# Canonicalize conditionals. Eg:
|
271
|
+
#
|
272
|
+
# not x ? a : b
|
273
|
+
#
|
274
|
+
# becomes:
|
275
|
+
#
|
276
|
+
# x ? b : a
|
277
|
+
|
278
|
+
attr_accessor :canonicalize_conditions
|
279
|
+
|
280
|
+
def initialize(options = {})
|
281
|
+
super()
|
282
|
+
|
283
|
+
v = self.class.name[/1[89]/]
|
284
|
+
self.lexer = RubyLexer.new v && v.to_i
|
285
|
+
|
286
|
+
@env = RubyParserStuff::Environment.new
|
287
|
+
@comments = []
|
288
|
+
|
289
|
+
self.lexer.static_env = @env
|
290
|
+
|
291
|
+
@canonicalize_conditions = true
|
292
|
+
|
293
|
+
self.reset
|
294
|
+
end
|
295
|
+
|
296
|
+
def list_append list, item # TODO: nuke me *sigh*
|
297
|
+
return s(:array, item) unless list
|
298
|
+
list = s(:array, list) unless Sexp === list && list.first == :array
|
299
|
+
list << item
|
300
|
+
end
|
301
|
+
|
302
|
+
def list_prepend item, list # TODO: nuke me *sigh*
|
303
|
+
list = s(:array, list) unless Sexp === list && list[0] == :array
|
304
|
+
list.insert 1, item
|
305
|
+
list
|
306
|
+
end
|
307
|
+
|
308
|
+
def literal_concat head, tail
|
309
|
+
return tail unless head
|
310
|
+
return head unless tail
|
311
|
+
|
312
|
+
htype, ttype = head[0], tail[0]
|
313
|
+
|
314
|
+
head = s(:dstr, '', head) if htype == :evstr
|
315
|
+
|
316
|
+
case ttype
|
317
|
+
when :str then
|
318
|
+
if htype == :str
|
319
|
+
head[-1] << tail[-1]
|
320
|
+
elsif htype == :dstr and head.size == 2 then
|
321
|
+
head[-1] << tail[-1]
|
322
|
+
else
|
323
|
+
head << tail
|
324
|
+
end
|
325
|
+
when :dstr then
|
326
|
+
if htype == :str then
|
327
|
+
tail[1] = head[-1] + tail[1]
|
328
|
+
head = tail
|
329
|
+
else
|
330
|
+
tail[0] = :array
|
331
|
+
tail[1] = s(:str, tail[1])
|
332
|
+
tail.delete_at 1 if tail[1] == s(:str, '')
|
333
|
+
|
334
|
+
head.push(*tail[1..-1])
|
335
|
+
end
|
336
|
+
when :evstr then
|
337
|
+
head[0] = :dstr if htype == :str
|
338
|
+
if head.size == 2 and tail.size > 1 and tail[1][0] == :str then
|
339
|
+
head[-1] << tail[1][-1]
|
340
|
+
head[0] = :str if head.size == 2 # HACK ?
|
341
|
+
else
|
342
|
+
head.push(tail)
|
343
|
+
end
|
344
|
+
else
|
345
|
+
x = [head, tail]
|
346
|
+
raise "unknown type: #{x.inspect}"
|
347
|
+
end
|
348
|
+
|
349
|
+
return head
|
350
|
+
end
|
351
|
+
|
352
|
+
def logop(type, left, right) # TODO: rename logical_op
|
353
|
+
left = value_expr left
|
354
|
+
|
355
|
+
if left and left[0] == type and not left.paren then
|
356
|
+
node, second = left, nil
|
357
|
+
|
358
|
+
while (second = node[2]) && second[0] == type and not second.paren do
|
359
|
+
node = second
|
360
|
+
end
|
361
|
+
|
362
|
+
node[2] = s(type, second, right)
|
363
|
+
|
364
|
+
return left
|
365
|
+
end
|
366
|
+
|
367
|
+
return s(type, left, right)
|
368
|
+
end
|
369
|
+
|
370
|
+
def new_aref val
|
371
|
+
val[2] ||= s(:arglist)
|
372
|
+
val[2][0] = :arglist if val[2][0] == :array # REFACTOR
|
373
|
+
if val[0].node_type == :self then
|
374
|
+
result = new_call nil, :"[]", val[2]
|
375
|
+
else
|
376
|
+
result = new_call val[0], :"[]", val[2]
|
377
|
+
end
|
378
|
+
result
|
379
|
+
end
|
380
|
+
|
381
|
+
def new_body val
|
382
|
+
result = val[0]
|
383
|
+
|
384
|
+
if val[1] then
|
385
|
+
result = s(:rescue)
|
386
|
+
result << val[0] if val[0]
|
387
|
+
|
388
|
+
resbody = val[1]
|
389
|
+
|
390
|
+
while resbody do
|
391
|
+
result << resbody
|
392
|
+
resbody = resbody.resbody(true)
|
393
|
+
end
|
394
|
+
|
395
|
+
result << val[2] if val[2]
|
396
|
+
|
397
|
+
result.line = (val[0] || val[1]).line
|
398
|
+
elsif not val[2].nil? then
|
399
|
+
warning("else without rescue is useless")
|
400
|
+
result = block_append(result, val[2])
|
401
|
+
end
|
402
|
+
|
403
|
+
result = s(:ensure, result, val[3]).compact if val[3]
|
404
|
+
return result
|
405
|
+
end
|
406
|
+
|
407
|
+
def argl x
|
408
|
+
x = s(:arglist, x) if x and x[0] != :arglist
|
409
|
+
x
|
410
|
+
end
|
411
|
+
|
412
|
+
def backref_assign_error ref
|
413
|
+
# TODO: need a test for this... obviously
|
414
|
+
case ref.first
|
415
|
+
when :nth_ref then
|
416
|
+
raise "write a test 2"
|
417
|
+
raise SyntaxError, "Can't set variable %p" % ref.last
|
418
|
+
when :back_ref then
|
419
|
+
raise "write a test 3"
|
420
|
+
raise SyntaxError, "Can't set back reference %p" % ref.last
|
421
|
+
else
|
422
|
+
raise "Unknown backref type: #{ref.inspect}"
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def new_call recv, meth, args = nil
|
427
|
+
result = s(:call, recv, meth)
|
428
|
+
|
429
|
+
# TODO: need a test with f(&b) to produce block_pass
|
430
|
+
# TODO: need a test with f(&b) { } to produce warning
|
431
|
+
|
432
|
+
args ||= s(:arglist)
|
433
|
+
args[0] = :arglist if args.first == :array
|
434
|
+
args = s(:arglist, args) unless args.first == :arglist
|
435
|
+
|
436
|
+
# HACK quick hack to make this work quickly... easy to clean up above
|
437
|
+
result.concat args[1..-1]
|
438
|
+
|
439
|
+
line = result.grep(Sexp).map(&:line).compact.min
|
440
|
+
result.line = line if line
|
441
|
+
|
442
|
+
result
|
443
|
+
end
|
444
|
+
|
445
|
+
def new_case expr, body
|
446
|
+
result = s(:case, expr)
|
447
|
+
line = (expr || body).line
|
448
|
+
|
449
|
+
while body and body.node_type == :when
|
450
|
+
result << body
|
451
|
+
body = body.delete_at 3
|
452
|
+
end
|
453
|
+
|
454
|
+
result[2..-1].each do |node|
|
455
|
+
block = node.block(:delete)
|
456
|
+
node.concat block[1..-1] if block
|
457
|
+
end
|
458
|
+
|
459
|
+
# else
|
460
|
+
body = nil if body == s(:block)
|
461
|
+
result << body
|
462
|
+
|
463
|
+
result.line = line
|
464
|
+
result
|
465
|
+
end
|
466
|
+
|
467
|
+
def new_class val
|
468
|
+
line, path, superclass, body = val[1], val[2], val[3], val[5]
|
469
|
+
|
470
|
+
result = s(:class, path, superclass)
|
471
|
+
|
472
|
+
if body then
|
473
|
+
if body.first == :block then
|
474
|
+
result.push(*body[1..-1])
|
475
|
+
else
|
476
|
+
result.push body
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
result.line = line
|
481
|
+
result.comments = self.comments.pop
|
482
|
+
result
|
483
|
+
end
|
484
|
+
|
485
|
+
def new_compstmt val
|
486
|
+
result = void_stmts(val.grep(Sexp)[0])
|
487
|
+
result = remove_begin(result) if result
|
488
|
+
result
|
489
|
+
end
|
490
|
+
|
491
|
+
def new_defn val
|
492
|
+
name, args, body = val[1], val[3], val[4]
|
493
|
+
body ||= s(:nil)
|
494
|
+
|
495
|
+
result = s(:defn, name.to_sym, args)
|
496
|
+
|
497
|
+
if body then
|
498
|
+
if body.first == :block then
|
499
|
+
result.push(*body[1..-1])
|
500
|
+
else
|
501
|
+
result.push body
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
result.comments = self.comments.pop
|
506
|
+
result
|
507
|
+
end
|
508
|
+
|
509
|
+
def new_defs val
|
510
|
+
recv, name, args, body = val[1], val[4], val[6], val[7]
|
511
|
+
|
512
|
+
result = s(:defs, recv, name.to_sym, args)
|
513
|
+
|
514
|
+
if body then
|
515
|
+
if body.first == :block then
|
516
|
+
result.push(*body[1..-1])
|
517
|
+
else
|
518
|
+
result.push body
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
result.line = recv.line
|
523
|
+
result.comments = self.comments.pop
|
524
|
+
result
|
525
|
+
end
|
526
|
+
|
527
|
+
def new_for expr, var, body
|
528
|
+
result = s(:for, expr, var).line(var.line)
|
529
|
+
result << body if body
|
530
|
+
result
|
531
|
+
end
|
532
|
+
|
533
|
+
def new_if c, t, f
|
534
|
+
l = [c.line, t && t.line, f && f.line].compact.min
|
535
|
+
c = cond c
|
536
|
+
c, t, f = c.last, f, t if c[0] == :not and canonicalize_conditions
|
537
|
+
s(:if, c, t, f).line(l)
|
538
|
+
end
|
539
|
+
|
540
|
+
def new_iter call, args, body
|
541
|
+
body ||= nil
|
542
|
+
|
543
|
+
args ||= s(:args)
|
544
|
+
args = s(:args, args) if Symbol === args
|
545
|
+
|
546
|
+
result = s(:iter)
|
547
|
+
result << call if call
|
548
|
+
result << args
|
549
|
+
result << body if body
|
550
|
+
|
551
|
+
args[0] = :args unless args == 0
|
552
|
+
|
553
|
+
result
|
554
|
+
end
|
555
|
+
|
556
|
+
def new_masgn lhs, rhs, wrap = false
|
557
|
+
rhs = value_expr(rhs)
|
558
|
+
rhs = lhs[1] ? s(:to_ary, rhs) : s(:array, rhs) if wrap
|
559
|
+
|
560
|
+
lhs.delete_at 1 if lhs[1].nil?
|
561
|
+
lhs << rhs
|
562
|
+
|
563
|
+
lhs
|
564
|
+
end
|
565
|
+
|
566
|
+
def new_module val
|
567
|
+
line, path, body = val[1], val[2], val[4]
|
568
|
+
|
569
|
+
result = s(:module, path)
|
570
|
+
|
571
|
+
if body then # REFACTOR?
|
572
|
+
if body.first == :block then
|
573
|
+
result.push(*body[1..-1])
|
574
|
+
else
|
575
|
+
result.push body
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
result.line = line
|
580
|
+
result.comments = self.comments.pop
|
581
|
+
result
|
582
|
+
end
|
583
|
+
|
584
|
+
def new_op_asgn val
|
585
|
+
lhs, asgn_op, arg = val[0], val[1].to_sym, val[2]
|
586
|
+
name = lhs.value
|
587
|
+
arg = remove_begin(arg)
|
588
|
+
result = case asgn_op # REFACTOR
|
589
|
+
when :"||" then
|
590
|
+
lhs << arg
|
591
|
+
s(:op_asgn_or, self.gettable(name), lhs)
|
592
|
+
when :"&&" then
|
593
|
+
lhs << arg
|
594
|
+
s(:op_asgn_and, self.gettable(name), lhs)
|
595
|
+
else
|
596
|
+
# TODO: why [2] ?
|
597
|
+
lhs[2] = new_call(self.gettable(name), asgn_op, argl(arg))
|
598
|
+
lhs
|
599
|
+
end
|
600
|
+
result.line = lhs.line
|
601
|
+
result
|
602
|
+
end
|
603
|
+
|
604
|
+
def new_regexp val
|
605
|
+
node = val[1] || s(:str, '')
|
606
|
+
options = val[3]
|
607
|
+
|
608
|
+
o, k = 0, nil
|
609
|
+
options.split(//).uniq.each do |c| # FIX: this has a better home
|
610
|
+
v = {
|
611
|
+
'x' => Regexp::EXTENDED,
|
612
|
+
'i' => Regexp::IGNORECASE,
|
613
|
+
'm' => Regexp::MULTILINE,
|
614
|
+
'o' => Regexp::ONCE,
|
615
|
+
'n' => Regexp::ENC_NONE,
|
616
|
+
'e' => Regexp::ENC_EUC,
|
617
|
+
's' => Regexp::ENC_SJIS,
|
618
|
+
'u' => Regexp::ENC_UTF8,
|
619
|
+
}[c]
|
620
|
+
raise "unknown regexp option: #{c}" unless v
|
621
|
+
o += v
|
622
|
+
k = c if c =~ /[esu]/
|
623
|
+
end
|
624
|
+
|
625
|
+
case node[0]
|
626
|
+
when :str then
|
627
|
+
node[0] = :lit
|
628
|
+
node[1] = if k then
|
629
|
+
Regexp.new(node[1], o, k)
|
630
|
+
else
|
631
|
+
begin
|
632
|
+
Regexp.new(node[1], o)
|
633
|
+
rescue RegexpError => e
|
634
|
+
warn "Ignoring: #{e.message}"
|
635
|
+
Regexp.new(node[1], Regexp::ENC_NONE)
|
636
|
+
end
|
637
|
+
end
|
638
|
+
when :dstr then
|
639
|
+
if options =~ /o/ then
|
640
|
+
node[0] = :dregx_once
|
641
|
+
else
|
642
|
+
node[0] = :dregx
|
643
|
+
end
|
644
|
+
node << o if o and o != 0
|
645
|
+
else
|
646
|
+
node = s(:dregx, '', node);
|
647
|
+
node[0] = :dregx_once if options =~ /o/
|
648
|
+
node << o if o and o != 0
|
649
|
+
end
|
650
|
+
|
651
|
+
node
|
652
|
+
end
|
653
|
+
|
654
|
+
def new_resbody cond, body
|
655
|
+
if body && body.first == :block then
|
656
|
+
body.shift # remove block and splat it in directly
|
657
|
+
else
|
658
|
+
body = [body]
|
659
|
+
end
|
660
|
+
s(:resbody, cond, *body)
|
661
|
+
end
|
662
|
+
|
663
|
+
def new_sclass val
|
664
|
+
recv, in_def, in_single, body = val[3], val[4], val[6], val[7]
|
665
|
+
|
666
|
+
result = s(:sclass, recv)
|
667
|
+
|
668
|
+
if body then
|
669
|
+
if body.first == :block then
|
670
|
+
result.push(*body[1..-1])
|
671
|
+
else
|
672
|
+
result.push body
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
result.line = val[2]
|
677
|
+
self.in_def = in_def
|
678
|
+
self.in_single = in_single
|
679
|
+
result
|
680
|
+
end
|
681
|
+
|
682
|
+
def new_super args
|
683
|
+
if args && args.node_type == :block_pass then
|
684
|
+
s(:super, args)
|
685
|
+
else
|
686
|
+
args ||= s(:arglist)
|
687
|
+
s(:super, *args[1..-1])
|
688
|
+
end
|
689
|
+
end
|
690
|
+
|
691
|
+
def new_undef n, m = nil
|
692
|
+
if m then
|
693
|
+
block_append(n, s(:undef, m))
|
694
|
+
else
|
695
|
+
s(:undef, n)
|
696
|
+
end
|
697
|
+
end
|
698
|
+
|
699
|
+
def new_until block, expr, pre
|
700
|
+
new_until_or_while :until, block, expr, pre
|
701
|
+
end
|
702
|
+
|
703
|
+
def new_until_or_while type, block, expr, pre
|
704
|
+
other = type == :until ? :while : :until
|
705
|
+
line = [block && block.line, expr.line].compact.min
|
706
|
+
block, pre = block.last, false if block && block[0] == :begin
|
707
|
+
|
708
|
+
expr = cond expr
|
709
|
+
|
710
|
+
result = unless expr.first == :not and canonicalize_conditions then
|
711
|
+
s(type, expr, block, pre)
|
712
|
+
else
|
713
|
+
s(other, expr.last, block, pre)
|
714
|
+
end
|
715
|
+
|
716
|
+
result.line = line
|
717
|
+
result
|
718
|
+
end
|
719
|
+
|
720
|
+
def new_when cond, body
|
721
|
+
s(:when, cond, body)
|
722
|
+
end
|
723
|
+
|
724
|
+
def new_while block, expr, pre
|
725
|
+
new_until_or_while :while, block, expr, pre
|
726
|
+
end
|
727
|
+
|
728
|
+
def new_xstring str
|
729
|
+
if str then
|
730
|
+
case str[0]
|
731
|
+
when :str
|
732
|
+
str[0] = :xstr
|
733
|
+
when :dstr
|
734
|
+
str[0] = :dxstr
|
735
|
+
else
|
736
|
+
str = s(:dxstr, '', str)
|
737
|
+
end
|
738
|
+
str
|
739
|
+
else
|
740
|
+
s(:xstr, '')
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
744
|
+
def new_yield args = nil
|
745
|
+
# TODO: raise args.inspect unless [:arglist].include? args.first # HACK
|
746
|
+
raise "write a test 4" if args && args.node_type == :block_pass
|
747
|
+
raise SyntaxError, "Block argument should not be given." if
|
748
|
+
args && args.node_type == :block_pass
|
749
|
+
|
750
|
+
args ||= s(:arglist)
|
751
|
+
|
752
|
+
# TODO: I can prolly clean this up
|
753
|
+
args[0] = :arglist if args.first == :array
|
754
|
+
args = s(:arglist, args) unless args.first == :arglist
|
755
|
+
|
756
|
+
return s(:yield, *args[1..-1])
|
757
|
+
end
|
758
|
+
|
759
|
+
def next_token
|
760
|
+
if defined?(MiniTest)
|
761
|
+
lexer.advance
|
762
|
+
else
|
763
|
+
lexer.advance_and_decorate
|
764
|
+
end
|
765
|
+
end
|
766
|
+
|
767
|
+
def node_assign(lhs, rhs) # TODO: rename new_assign
|
768
|
+
return nil unless lhs
|
769
|
+
|
770
|
+
rhs = value_expr rhs
|
771
|
+
|
772
|
+
case lhs[0]
|
773
|
+
when :gasgn, :iasgn, :lasgn, :masgn, :cdecl, :cvdecl, :cvasgn then
|
774
|
+
lhs << rhs
|
775
|
+
when :attrasgn, :call then
|
776
|
+
args = lhs.pop unless Symbol === lhs.last
|
777
|
+
lhs.concat arg_add(args, rhs)[1..-1]
|
778
|
+
when :const then
|
779
|
+
lhs[0] = :cdecl
|
780
|
+
lhs << rhs
|
781
|
+
else
|
782
|
+
raise "unknown lhs #{lhs.inspect}"
|
783
|
+
end
|
784
|
+
|
785
|
+
lhs
|
786
|
+
end
|
787
|
+
|
788
|
+
##
|
789
|
+
# Returns a UTF-8 encoded string after processing BOMs and magic
|
790
|
+
# encoding comments.
|
791
|
+
#
|
792
|
+
# Holy crap... ok. Here goes:
|
793
|
+
#
|
794
|
+
# Ruby's file handling and encoding support is insane. We need to be
|
795
|
+
# able to lex a file. The lexer file is explicitly UTF-8 to make
|
796
|
+
# things cleaner. This allows us to deal with extended chars in
|
797
|
+
# class and method names. In order to do this, we need to encode all
|
798
|
+
# input source files as UTF-8. First, we look for a UTF-8 BOM by
|
799
|
+
# looking at the first line while forcing its encoding to
|
800
|
+
# ASCII-8BIT. If we find a BOM, we strip it and set the expected
|
801
|
+
# encoding to UTF-8. Then, we search for a magic encoding comment.
|
802
|
+
# If found, it overrides the BOM. Finally, we force the encoding of
|
803
|
+
# the input string to whatever was found, and then encode that to
|
804
|
+
# UTF-8 for compatibility with the lexer.
|
805
|
+
|
806
|
+
def handle_encoding str
|
807
|
+
str = str.dup
|
808
|
+
ruby19 = str.respond_to? :encoding
|
809
|
+
encoding = nil
|
810
|
+
|
811
|
+
header = str.lines.first(2)
|
812
|
+
header.map! { |s| s.force_encoding "ASCII-8BIT" } if ruby19
|
813
|
+
|
814
|
+
first = header.first || ""
|
815
|
+
encoding, str = "utf-8", str[3..-1] if first =~ /\A\xEF\xBB\xBF/
|
816
|
+
|
817
|
+
encoding = $1.strip if header.find { |s|
|
818
|
+
s[/^#.*?-\*-.*?coding:\s*([^ ;]+).*?-\*-/, 1] ||
|
819
|
+
s[/^#.*(?:en)?coding(?:\s*[:=])\s*([\w-]+)/, 1]
|
820
|
+
}
|
821
|
+
|
822
|
+
if encoding then
|
823
|
+
if ruby19 then
|
824
|
+
encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats
|
825
|
+
hack_encoding str, encoding
|
826
|
+
else
|
827
|
+
warn "Skipping magic encoding comment"
|
828
|
+
end
|
829
|
+
else
|
830
|
+
# nothing specified... ugh. try to encode as utf-8
|
831
|
+
hack_encoding str if ruby19
|
832
|
+
end
|
833
|
+
|
834
|
+
str
|
835
|
+
end
|
836
|
+
|
837
|
+
def hack_encoding str, extra = nil
|
838
|
+
# this is in sorted order of occurrence according to
|
839
|
+
# charlock_holmes against 500k files
|
840
|
+
encodings = [
|
841
|
+
extra,
|
842
|
+
Encoding::ISO_8859_1,
|
843
|
+
Encoding::UTF_8,
|
844
|
+
Encoding::ISO_8859_2,
|
845
|
+
Encoding::ISO_8859_9,
|
846
|
+
Encoding::SHIFT_JIS,
|
847
|
+
Encoding::WINDOWS_1252,
|
848
|
+
Encoding::EUC_JP,
|
849
|
+
].compact
|
850
|
+
|
851
|
+
# terrible, horrible, no good, very bad, last ditch effort.
|
852
|
+
encodings.each do |enc|
|
853
|
+
begin
|
854
|
+
str.force_encoding enc
|
855
|
+
if str.valid_encoding? then
|
856
|
+
str.encode! Encoding::UTF_8
|
857
|
+
break
|
858
|
+
end
|
859
|
+
rescue Encoding::InvalidByteSequenceError
|
860
|
+
# do nothing
|
861
|
+
rescue Encoding::UndefinedConversionError
|
862
|
+
# do nothing
|
863
|
+
end
|
864
|
+
end
|
865
|
+
|
866
|
+
# no amount of pain is enough for you.
|
867
|
+
raise "Bad encoding. Need a magic encoding comment." unless
|
868
|
+
str.encoding.name == "UTF-8"
|
869
|
+
end
|
870
|
+
|
871
|
+
##
|
872
|
+
# Parse +str+ at path +file+ and return a sexp. Raises
|
873
|
+
# Timeout::Error if it runs for more than +time+ seconds.
|
874
|
+
|
875
|
+
def process(str, file = "(string)", time = 10)
|
876
|
+
Timeout.timeout time do
|
877
|
+
raise "bad val: #{str.inspect}" unless String === str
|
878
|
+
|
879
|
+
str = handle_encoding str
|
880
|
+
|
881
|
+
self.file = file.dup
|
882
|
+
self.lexer.reset
|
883
|
+
self.lexer.source = str
|
884
|
+
|
885
|
+
@yydebug = ENV.has_key? 'DEBUG'
|
886
|
+
|
887
|
+
do_parse
|
888
|
+
end
|
889
|
+
end
|
890
|
+
|
891
|
+
alias :parse :process
|
892
|
+
|
893
|
+
def remove_begin node
|
894
|
+
oldnode = node
|
895
|
+
if node and :begin == node[0] and node.size == 2 then
|
896
|
+
node = node[-1]
|
897
|
+
node.line = oldnode.line
|
898
|
+
end
|
899
|
+
node
|
900
|
+
end
|
901
|
+
|
902
|
+
def reset
|
903
|
+
lexer.reset
|
904
|
+
self.in_def = false
|
905
|
+
self.in_single = 0
|
906
|
+
self.env.reset
|
907
|
+
self.comments.clear
|
908
|
+
end
|
909
|
+
|
910
|
+
def block_dup_check call_or_args, block
|
911
|
+
syntax_error "Both block arg and actual block given." if
|
912
|
+
block and call_or_args.block_pass?
|
913
|
+
end
|
914
|
+
|
915
|
+
def ret_args node
|
916
|
+
if node then
|
917
|
+
raise "write a test 5" if node[0] == :block_pass
|
918
|
+
|
919
|
+
raise SyntaxError, "block argument should not be given" if
|
920
|
+
node[0] == :block_pass
|
921
|
+
|
922
|
+
node = node.last if node[0] == :array && node.size == 2
|
923
|
+
# HACK matz wraps ONE of the FOUR splats in a newline to
|
924
|
+
# distinguish. I use paren for now. ugh
|
925
|
+
node = s(:svalue, node) if node[0] == :splat and not node.paren
|
926
|
+
node[0] = :svalue if node[0] == :arglist && node[1][0] == :splat
|
927
|
+
end
|
928
|
+
|
929
|
+
node
|
930
|
+
end
|
931
|
+
|
932
|
+
def s(*args)
|
933
|
+
result = Sexp.new(*args)
|
934
|
+
result.line ||= lexer.lineno if lexer.source
|
935
|
+
result.file = self.file
|
936
|
+
result
|
937
|
+
end
|
938
|
+
|
939
|
+
def value_expr oldnode # HACK
|
940
|
+
node = remove_begin oldnode
|
941
|
+
node.line = oldnode.line if oldnode
|
942
|
+
node[2] = value_expr(node[2]) if node and node[0] == :if
|
943
|
+
node
|
944
|
+
end
|
945
|
+
|
946
|
+
def void_stmts node
|
947
|
+
return nil unless node
|
948
|
+
return node unless node[0] == :block
|
949
|
+
|
950
|
+
node[1..-1] = node[1..-1].map { |n| remove_begin(n) }
|
951
|
+
node
|
952
|
+
end
|
953
|
+
|
954
|
+
def warning s
|
955
|
+
# do nothing for now
|
956
|
+
end
|
957
|
+
|
958
|
+
alias yyerror syntax_error
|
959
|
+
|
960
|
+
def on_error(et, ev, values)
|
961
|
+
super
|
962
|
+
rescue Racc::ParseError => e
|
963
|
+
# I don't like how the exception obscures the error message
|
964
|
+
e.message.replace "%s:%p :: %s" % [self.file, lexer.lineno, e.message.strip]
|
965
|
+
warn e.message if $DEBUG
|
966
|
+
raise
|
967
|
+
end
|
968
|
+
|
969
|
+
class Environment
|
970
|
+
attr_reader :env, :dyn
|
971
|
+
|
972
|
+
def [] k
|
973
|
+
self.all[k]
|
974
|
+
end
|
975
|
+
|
976
|
+
def []= k, v
|
977
|
+
raise "no" if v == true
|
978
|
+
self.current[k] = v
|
979
|
+
end
|
980
|
+
|
981
|
+
def all
|
982
|
+
idx = @dyn.index(false) || 0
|
983
|
+
@env[0..idx].reverse.inject { |env, scope| env.merge scope }
|
984
|
+
end
|
985
|
+
|
986
|
+
def current
|
987
|
+
@env.first
|
988
|
+
end
|
989
|
+
|
990
|
+
def extend dyn = false
|
991
|
+
@dyn.unshift dyn
|
992
|
+
@env.unshift({})
|
993
|
+
end
|
994
|
+
|
995
|
+
def initialize dyn = false
|
996
|
+
@dyn = []
|
997
|
+
@env = []
|
998
|
+
self.reset
|
999
|
+
end
|
1000
|
+
|
1001
|
+
def reset
|
1002
|
+
@dyn.clear
|
1003
|
+
@env.clear
|
1004
|
+
self.extend
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
def unextend
|
1008
|
+
@dyn.shift
|
1009
|
+
@env.shift
|
1010
|
+
raise "You went too far unextending env" if @env.empty?
|
1011
|
+
end
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
class StackState
|
1015
|
+
attr_reader :name
|
1016
|
+
attr_reader :stack
|
1017
|
+
attr_accessor :debug
|
1018
|
+
|
1019
|
+
def initialize(name)
|
1020
|
+
@name = name
|
1021
|
+
@stack = [false]
|
1022
|
+
@debug = false
|
1023
|
+
end
|
1024
|
+
|
1025
|
+
def inspect
|
1026
|
+
"StackState(#{@name}, #{@stack.inspect})"
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
def is_in_state
|
1030
|
+
p :stack_is_in_state => [name, @stack.last, caller.first] if debug
|
1031
|
+
@stack.last
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
def lexpop
|
1035
|
+
p :stack_lexpop => caller.first if debug
|
1036
|
+
raise if @stack.size == 0
|
1037
|
+
a = @stack.pop
|
1038
|
+
b = @stack.pop
|
1039
|
+
@stack.push(a || b)
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
def pop
|
1043
|
+
r = @stack.pop
|
1044
|
+
p :stack_pop => [name, r, @stack, caller.first] if debug
|
1045
|
+
@stack.push false if @stack.size == 0
|
1046
|
+
r
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
def push val
|
1050
|
+
@stack.push val
|
1051
|
+
p :stack_push => [name, @stack, caller.first] if debug
|
1052
|
+
nil
|
1053
|
+
end
|
1054
|
+
end
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
class Ruby19Parser < Racc::Parser
|
1058
|
+
include RubyParserStuff
|
1059
|
+
|
1060
|
+
def self.do(what)
|
1061
|
+
p new.process(what)
|
1062
|
+
end
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
class Ruby18Parser < Racc::Parser
|
1066
|
+
include RubyParserStuff
|
1067
|
+
|
1068
|
+
def self.do(what)
|
1069
|
+
p new.process(what)
|
1070
|
+
end
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
##
|
1074
|
+
# RubyParser is a compound parser that first attempts to parse using
|
1075
|
+
# the 1.9 syntax parser and falls back to the 1.8 syntax parser on a
|
1076
|
+
# parse error.
|
1077
|
+
|
1078
|
+
class RubyParser
|
1079
|
+
class SyntaxError < RuntimeError; end
|
1080
|
+
|
1081
|
+
def initialize
|
1082
|
+
@p18 = Ruby18Parser.new
|
1083
|
+
@p19 = Ruby19Parser.new
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
def process(s, f = "(string)", t = 10) # parens for emacs *sigh*
|
1087
|
+
@p19.process s, f, t
|
1088
|
+
rescue Racc::ParseError
|
1089
|
+
@p18.process s, f, t
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
alias :parse :process
|
1093
|
+
|
1094
|
+
def reset
|
1095
|
+
@p18.reset
|
1096
|
+
@p19.reset
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
def self.for_current_ruby
|
1100
|
+
case RUBY_VERSION
|
1101
|
+
when /^1\.8/ then
|
1102
|
+
Ruby18Parser.new
|
1103
|
+
when /^1\.9/ then
|
1104
|
+
Ruby19Parser.new
|
1105
|
+
else
|
1106
|
+
raise "unrecognized RUBY_VERSION #{RUBY_VERSION}"
|
1107
|
+
end
|
1108
|
+
end
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
############################################################
|
1112
|
+
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
1113
|
+
|
1114
|
+
class Sexp
|
1115
|
+
attr_writer :paren
|
1116
|
+
|
1117
|
+
def paren
|
1118
|
+
@paren ||= false
|
1119
|
+
end
|
1120
|
+
|
1121
|
+
def value
|
1122
|
+
raise "multi item sexp" if size > 2
|
1123
|
+
last
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
def to_sym
|
1127
|
+
raise "no"
|
1128
|
+
self.value.to_sym
|
1129
|
+
end
|
1130
|
+
|
1131
|
+
def add x
|
1132
|
+
concat x
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
def add_all x
|
1136
|
+
raise "no: #{self.inspect}.add_all #{x.inspect}" # TODO: need a test to trigger this
|
1137
|
+
end
|
1138
|
+
|
1139
|
+
def block_pass?
|
1140
|
+
any? { |s| Sexp === s && s[0] == :block_pass }
|
1141
|
+
end
|
1142
|
+
|
1143
|
+
alias :node_type :sexp_type
|
1144
|
+
alias :values :sexp_body # TODO: retire
|
1145
|
+
end
|
1146
|
+
|
1147
|
+
# END HACK
|
1148
|
+
############################################################
|