redparse 0.8.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -0
- data/COPYING.LGPL +503 -158
- data/History.txt +192 -0
- data/Makefile +9 -0
- data/README.txt +72 -39
- data/bin/redparse +108 -14
- data/lib/miniredparse.rb +1543 -0
- data/lib/redparse.rb +971 -105
- data/lib/redparse/ReduceWithsFor_RedParse_1_8.rb +17412 -0
- data/lib/redparse/ReduceWithsFor_RedParse_1_9.rb +17633 -0
- data/lib/redparse/babynodes.rb +17 -0
- data/lib/redparse/babyparser.rb +17 -0
- data/lib/redparse/cache.rb +290 -6
- data/lib/redparse/compile.rb +6 -97
- data/lib/redparse/decisiontree.rb +1 -1
- data/lib/redparse/float_accurate_to_s.rb +30 -6
- data/lib/redparse/generate.rb +18 -0
- data/lib/redparse/node.rb +415 -124
- data/lib/redparse/parse_tree_server.rb +20 -2
- data/lib/redparse/problemfiles.rb +1 -1
- data/lib/redparse/pthelper.rb +17 -31
- data/lib/redparse/reg_more_sugar.rb +1 -1
- data/lib/redparse/replacing/parse_tree.rb +30 -0
- data/lib/redparse/replacing/ripper.rb +20 -0
- data/lib/redparse/replacing/ruby_parser.rb +28 -0
- data/lib/redparse/ripper.rb +393 -0
- data/lib/redparse/ripper_sexp.rb +153 -0
- data/lib/redparse/stackableclasses.rb +113 -0
- data/lib/redparse/version.rb +18 -1
- data/redparse.gemspec +29 -9
- data/rplt.txt +31 -0
- data/test/data/hd_with_blank_string.rb +3 -0
- data/test/data/pt_known_output.rb +13273 -0
- data/test/data/wp.pp +0 -0
- data/test/generate_parse_tree_server_rc.rb +17 -0
- data/test/rp-locatetest.rb +2 -2
- data/test/test_1.9.rb +338 -35
- data/test/test_all.rb +22 -3
- data/test/test_part.rb +32 -0
- data/test/test_redparse.rb +396 -74
- data/test/test_xform_tree.rb +18 -0
- data/test/unparse_1.9_exceptions.txt +85 -0
- data/test/unparse_1.9_exceptions.txt.old +81 -0
- metadata +71 -46
- data/Rakefile +0 -35
data/lib/miniredparse.rb
ADDED
@@ -0,0 +1,1543 @@
|
|
1
|
+
=begin
|
2
|
+
redparse - a ruby parser written in ruby
|
3
|
+
Copyright (C) 2008,2009, 2012, 2016 Caleb Clausen
|
4
|
+
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU Lesser General Public License as published by
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
=end
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
require 'forwardable'
|
22
|
+
|
23
|
+
require 'digest/sha2'
|
24
|
+
|
25
|
+
begin
|
26
|
+
require 'rubygems'
|
27
|
+
rescue LoadError=>e
|
28
|
+
#hope we don't need it
|
29
|
+
raise unless /rubygems/===e.message
|
30
|
+
end
|
31
|
+
require 'rubylexer'
|
32
|
+
require 'reg'
|
33
|
+
require 'reglookab'
|
34
|
+
|
35
|
+
require "redparse/node"
|
36
|
+
#require "redparse/decisiontree"
|
37
|
+
require "redparse/reg_more_sugar"
|
38
|
+
#require "redparse/generate"
|
39
|
+
require "redparse/cache"
|
40
|
+
#require "redparse/compile"
|
41
|
+
|
42
|
+
class RedParse
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
alias :dump :inspect # preserve old inspect functionality
|
47
|
+
|
48
|
+
# irb friendly #inspect/#to_s
|
49
|
+
def to_s
|
50
|
+
mods=class<<self;self end.ancestors-self.class.ancestors
|
51
|
+
mods=mods.map{|mod| mod.name }.join('+')
|
52
|
+
mods="+"<<mods unless mods.empty?
|
53
|
+
"#<#{self.class.name}#{mods}: [#{@input.inspect}]>"
|
54
|
+
end
|
55
|
+
|
56
|
+
alias :inspect :to_s
|
57
|
+
|
58
|
+
####### generic stuff for parsing any(?) language
|
59
|
+
# include Nodes
|
60
|
+
class StackMonkey
|
61
|
+
def initialize(name,first_changed_index,and_expect_node,options={},&monkey_code)
|
62
|
+
first_changed_index=-first_changed_index if first_changed_index>0
|
63
|
+
@name,@first_changed_index,@and_expect_node,@monkey_code=
|
64
|
+
name,first_changed_index,and_expect_node,monkey_code
|
65
|
+
end
|
66
|
+
|
67
|
+
attr_reader :name, :first_changed_index, :and_expect_node, :monkey_code
|
68
|
+
alias hint and_expect_node
|
69
|
+
attr_accessor :exemplars
|
70
|
+
|
71
|
+
def [](stack)
|
72
|
+
result=@monkey_code[stack]
|
73
|
+
return result
|
74
|
+
end
|
75
|
+
|
76
|
+
def _dump depth
|
77
|
+
@name
|
78
|
+
end
|
79
|
+
|
80
|
+
def self._load str
|
81
|
+
Thread.current[:$RedParse_parser].undumpables[@name]
|
82
|
+
end
|
83
|
+
|
84
|
+
def action2c
|
85
|
+
#"return the whole thing on first call, just a goto stmt after that"
|
86
|
+
return " goto #@goto_label;\n" if defined? @goto_label
|
87
|
+
|
88
|
+
=begin
|
89
|
+
<<-E
|
90
|
+
#{@goto_label=@name.gsub(/[^a-z0-9_]/,'_')}:
|
91
|
+
monkey=rb_hash_get(undumpables,rb_cstr2str("#@name"));
|
92
|
+
rb_funcall(monkey,rb_intern("[]"),huh_stack);
|
93
|
+
|
94
|
+
/*recover from stackmonkey fiddling*/
|
95
|
+
for(i=0;i<#{-@first_changed_index};++i) {
|
96
|
+
rb_ary_unshift(lexer_moretokens,
|
97
|
+
rb_ary_pop(huh_semantic_stack));
|
98
|
+
rb_ary_pop(huh_syntax_stack);
|
99
|
+
}
|
100
|
+
|
101
|
+
goto #{Node===@and_expect_node ?
|
102
|
+
postreduceaction4this_state(@and_expect_node) :
|
103
|
+
shiftaction4this_state
|
104
|
+
};
|
105
|
+
E
|
106
|
+
=end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
class DeleteMonkey<StackMonkey
|
110
|
+
def initialize(index,name)
|
111
|
+
index=-index if index>0
|
112
|
+
@index=index
|
113
|
+
super(name,index,nil){|stack| stack.delete_at( index )}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
def stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
|
117
|
+
def self.stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
|
118
|
+
def delete_monkey(index,name) DeleteMonkey.new(index,name) end
|
119
|
+
|
120
|
+
def evaluate rule
|
121
|
+
#dissect the rule
|
122
|
+
if false
|
123
|
+
rule=rule.dup
|
124
|
+
lookahead_processor=(rule.pop if Proc===rule.last)
|
125
|
+
node_type=rule.pop
|
126
|
+
else
|
127
|
+
Reg::Transform===rule or fail
|
128
|
+
node_type= rule.right
|
129
|
+
rule=rule.left.subregs.dup
|
130
|
+
lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
|
131
|
+
lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
|
132
|
+
end
|
133
|
+
|
134
|
+
#index of data at which to start matching
|
135
|
+
i=@stack.size-1 #-1 because last element of @stack is always lookahead
|
136
|
+
|
137
|
+
#I could call this a JIT compiler, but that's a bit grandiose....
|
138
|
+
#more of a JIT pre-processor
|
139
|
+
compiled_rule=@compiled_rules[rule]||=
|
140
|
+
rule.map{|pattern|
|
141
|
+
String|Regexp===pattern ? KW(pattern) : pattern
|
142
|
+
}
|
143
|
+
|
144
|
+
#what's the minimum @stack size this rule could match?
|
145
|
+
rule_min_size=@min_sizes[compiled_rule]||=
|
146
|
+
compiled_rule.inject(0){|sum,pattern|
|
147
|
+
sum + pattern.itemrange.begin
|
148
|
+
}
|
149
|
+
i>=rule_min_size or return false
|
150
|
+
|
151
|
+
matching=[]
|
152
|
+
|
153
|
+
#actually try to match rule elements against each @stack element in turn
|
154
|
+
compiled_rule.reverse_each{|matcher|
|
155
|
+
i.zero? and fail
|
156
|
+
target=matching
|
157
|
+
#is this matcher optional? looping?
|
158
|
+
loop= matcher.itemrange.last.to_f.infinite?
|
159
|
+
minimum=matcher.itemrange.first
|
160
|
+
optional=minimum.zero?
|
161
|
+
matching.unshift target=[] if loop
|
162
|
+
if loop or optional
|
163
|
+
matcher=matcher.subregs[0]
|
164
|
+
end
|
165
|
+
|
166
|
+
begin
|
167
|
+
if matcher===@stack[i-=1] #try match
|
168
|
+
target.unshift @stack[i]
|
169
|
+
else
|
170
|
+
#if match failed, the whole rule fails
|
171
|
+
#unless this match was optional, in which case, ignore it
|
172
|
+
#or was looping and met its minimum
|
173
|
+
#but bump the data position back up, since the latest datum
|
174
|
+
#didn't actually match anything.
|
175
|
+
return false unless optional or loop&&target.size>=minimum
|
176
|
+
i+=1
|
177
|
+
matching.unshift nil unless loop
|
178
|
+
break
|
179
|
+
end
|
180
|
+
end while loop
|
181
|
+
}
|
182
|
+
|
183
|
+
matchrange= i...-1 #what elems in @stack were matched?
|
184
|
+
|
185
|
+
#give lookahead matcher (if any) a chance to fail the match
|
186
|
+
case lookahead_processor
|
187
|
+
when ::Reg::LookAhead
|
188
|
+
return false unless lookahead_processor.subregs[0]===@stack.last
|
189
|
+
when Proc
|
190
|
+
return false unless lookahead_processor[self,@stack.last]
|
191
|
+
end
|
192
|
+
|
193
|
+
#if there was a lookback item, don't include it in the new node
|
194
|
+
if lookback
|
195
|
+
matchrange= i+1...-1 #what elems in @stack were matched?
|
196
|
+
matching.shift
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
#replace matching elements in @stack with node type found
|
201
|
+
case node_type
|
202
|
+
when Class
|
203
|
+
node=node_type.create(*matching)
|
204
|
+
node.startline||=@stack[matchrange.first].startline
|
205
|
+
node.endline=@endline
|
206
|
+
@stack[matchrange]=[node]
|
207
|
+
when Proc,StackMonkey; node_type[@stack]
|
208
|
+
when :shift; return 0
|
209
|
+
when :accept,:error; throw :ParserDone
|
210
|
+
else fail
|
211
|
+
end
|
212
|
+
|
213
|
+
return true #let caller know we found a match
|
214
|
+
|
215
|
+
|
216
|
+
rescue Exception=>e
|
217
|
+
#puts "error (#{e}) while executing rule: #{rule.inspect}"
|
218
|
+
#puts e.backtrace.join("\n")
|
219
|
+
raise
|
220
|
+
end
|
221
|
+
|
222
|
+
class ParseError<RuntimeError
|
223
|
+
def initialize(msg,stack)
|
224
|
+
super(msg)
|
225
|
+
@stack=stack
|
226
|
+
if false
|
227
|
+
ranges=(1..stack.size-2).map{|i|
|
228
|
+
node=stack[i]
|
229
|
+
if node.respond_to? :linerange
|
230
|
+
node.linerange
|
231
|
+
elsif node.respond_to? :endline
|
232
|
+
node.endline..node.endline
|
233
|
+
end
|
234
|
+
}
|
235
|
+
types=(1..stack.size-2).map{|i| stack[i].class }
|
236
|
+
msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
|
237
|
+
end
|
238
|
+
super(msg)
|
239
|
+
end
|
240
|
+
attr :stack
|
241
|
+
end
|
242
|
+
|
243
|
+
def [](*args)
|
244
|
+
@stack.[](*args)
|
245
|
+
end
|
246
|
+
|
247
|
+
def []=(*args)
|
248
|
+
@stack.[]=(*args)
|
249
|
+
end
|
250
|
+
|
251
|
+
#try all possible reductions
|
252
|
+
def reduce
|
253
|
+
shift=nil
|
254
|
+
@rules.reverse_each{|rule|
|
255
|
+
shift=evaluate(rule) and break
|
256
|
+
}
|
257
|
+
return shift
|
258
|
+
end
|
259
|
+
|
260
|
+
def parse
|
261
|
+
|
262
|
+
#hack, so StringToken can know what parser its called from
|
263
|
+
#so it can use it to parse inclusions
|
264
|
+
oldparser=Thread.current[:$RedParse_parser]
|
265
|
+
Thread.current[:$RedParse_parser]||=self
|
266
|
+
|
267
|
+
return @cached_result if defined? @cached_result
|
268
|
+
|
269
|
+
@rules||=expanded_RULES()
|
270
|
+
# @inputs||=enumerate_exemplars
|
271
|
+
|
272
|
+
@stack=[StartToken.new, get_token]
|
273
|
+
#last token on @stack is always implicitly the lookahead
|
274
|
+
catch(:ParserDone){ loop {
|
275
|
+
#try all possible reductions
|
276
|
+
next if reduce==true
|
277
|
+
|
278
|
+
#no rule can match current @stack, get another token
|
279
|
+
tok=get_token or break
|
280
|
+
|
281
|
+
#are we done yet?
|
282
|
+
#tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
|
283
|
+
|
284
|
+
#shift our token onto the @stack
|
285
|
+
@stack.push tok
|
286
|
+
}}
|
287
|
+
|
288
|
+
@stack.size==2 and return result=NopNode.new #handle empty parse string
|
289
|
+
|
290
|
+
#unless the @stack is 3 tokens,
|
291
|
+
#with the last an Eoi, and first a StartToken
|
292
|
+
#there was a parse error
|
293
|
+
unless @stack.size==3
|
294
|
+
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
295
|
+
top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
|
296
|
+
raise ParseError.new(top.msg,@stack)
|
297
|
+
end
|
298
|
+
EoiToken===@stack.last or fail
|
299
|
+
StartToken===@stack.first or fail
|
300
|
+
|
301
|
+
result= @stack[1]
|
302
|
+
|
303
|
+
|
304
|
+
#multiple assignment must be resolved
|
305
|
+
#afterwards by walking the parse tree.
|
306
|
+
#(because the relative precedences of = and ,
|
307
|
+
#are reversed in multiple assignment.)
|
308
|
+
# result.respond_to? :fixup_multiple_assignments! and
|
309
|
+
# result=result.fixup_multiple_assignments!
|
310
|
+
|
311
|
+
#relative precedence of = and rescue are also inverted sometimes
|
312
|
+
# result.respond_to? :fixup_rescue_assignments! and
|
313
|
+
# result=result.fixup_rescue_assignments!
|
314
|
+
|
315
|
+
#do something with error nodes
|
316
|
+
msgs=[]
|
317
|
+
result.walk{|parent,i,subi,node|
|
318
|
+
if node.respond_to? :error? and node.error?(@rubyversion)
|
319
|
+
msgs<< @filename+":"+node.blame.msg
|
320
|
+
false
|
321
|
+
else
|
322
|
+
true
|
323
|
+
end
|
324
|
+
} if result.respond_to? :walk #hack hack
|
325
|
+
result.errors=msgs unless msgs.empty?
|
326
|
+
#other types of errors (lexer errors, exceptions in lexer or parser actions)
|
327
|
+
#should be handled in the same way, but currently are not
|
328
|
+
# puts msgs.join("\n")
|
329
|
+
|
330
|
+
rescue Exception=>e
|
331
|
+
input=@lexer
|
332
|
+
if Array===input
|
333
|
+
STDERR.puts "error while parsing:"
|
334
|
+
STDERR.write input.pretty_inspect
|
335
|
+
input=nil
|
336
|
+
else
|
337
|
+
input=input.original_file
|
338
|
+
inputname=@lexer.filename
|
339
|
+
STDERR.puts "error while parsing #@filename:#@endline: <<< #{input if input.to_s.size<=1000} >>>"
|
340
|
+
end
|
341
|
+
e.backtrace.each{|l| p l }
|
342
|
+
raise
|
343
|
+
else
|
344
|
+
unless msgs.empty?
|
345
|
+
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
346
|
+
raise RedParse::ParseError.new(msgs.join("\n"),@stack)
|
347
|
+
end
|
348
|
+
|
349
|
+
# result=NopNode.new if EoiToken===result
|
350
|
+
return result
|
351
|
+
ensure
|
352
|
+
@write_cache.put(@input,result) if @write_cache and result and !result.errors
|
353
|
+
@stack=nil
|
354
|
+
Thread.current[:$RedParse_parser]=oldparser
|
355
|
+
end
|
356
|
+
|
357
|
+
|
358
|
+
#HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
|
359
|
+
|
360
|
+
def new_disabled_reduce
|
361
|
+
#@hier||=Class::FlattenedHierarchy.new *STACKABLE_CLASSES()
|
362
|
+
@reducer||=Reducer.new(@rules)
|
363
|
+
|
364
|
+
@reducer.reduce(@stack)
|
365
|
+
end #
|
366
|
+
|
367
|
+
|
368
|
+
#inline any subsequences in RULES right into the patterns
|
369
|
+
#reg should do this already, but current release does not
|
370
|
+
def expanded_RULES
|
371
|
+
result=RULES()
|
372
|
+
return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
|
373
|
+
result.map!{|rule|
|
374
|
+
unless rule.left.subregs.grep(Reg::Subseq)
|
375
|
+
then rule
|
376
|
+
else
|
377
|
+
right=rule.right
|
378
|
+
rule=rule.left.subregs.dup
|
379
|
+
(rule.size-1).downto(0){|i|
|
380
|
+
if Reg::Subseq===rule[i]
|
381
|
+
rule[i,1]=rule[i].subregs
|
382
|
+
end
|
383
|
+
}
|
384
|
+
-rule>>right
|
385
|
+
end
|
386
|
+
}
|
387
|
+
end
|
388
|
+
|
389
|
+
###### specific to parsing ruby
|
390
|
+
|
391
|
+
|
392
|
+
UCLETTER=RubyLexer::UCLETTER
|
393
|
+
|
394
|
+
LCLETTER=RubyLexer::LCLETTER
|
395
|
+
LETTER=RubyLexer::LETTER
|
396
|
+
LETTER_DIGIT=RubyLexer::LETTER_DIGIT
|
397
|
+
|
398
|
+
def vertices; self.class.constants.grep(Node|Token) end
|
399
|
+
|
400
|
+
def self.has_return_hash_fix? #is this needed? it's not used in this file....
|
401
|
+
rl=RubyLexer.new("","return {}.size")
|
402
|
+
return(
|
403
|
+
FileAndLineToken===rl.get1token and
|
404
|
+
MethNameToken===rl.get1token and
|
405
|
+
ImplicitParamListStartToken===rl.get1token and
|
406
|
+
WsToken===rl.get1token and
|
407
|
+
KeywordToken===rl.get1token and
|
408
|
+
KeywordToken===rl.get1token and
|
409
|
+
KeywordToken===rl.get1token and
|
410
|
+
MethNameToken===rl.get1token and
|
411
|
+
ImplicitParamListStartToken===rl.get1token and
|
412
|
+
ImplicitParamListEndToken===rl.get1token and
|
413
|
+
ImplicitParamListEndToken===rl.get1token and
|
414
|
+
EoiToken===rl.get1token
|
415
|
+
)
|
416
|
+
end
|
417
|
+
|
418
|
+
#see pickaxe, 1st ed, page 221
|
419
|
+
def RIGHT_ASSOCIATIVE
|
420
|
+
{
|
421
|
+
# "defined?"=>120.5,
|
422
|
+
"**"=>118,
|
423
|
+
|
424
|
+
"="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
|
425
|
+
"|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
|
426
|
+
"&&="=>105, "||="=>105, "**="=>105, "^="=>105,
|
427
|
+
|
428
|
+
|
429
|
+
# "and"=>99, "or"=>99,
|
430
|
+
|
431
|
+
# "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98,
|
432
|
+
|
433
|
+
# "&&"=>109, "||"=>108,
|
434
|
+
}
|
435
|
+
end
|
436
|
+
|
437
|
+
def PRECEDENCE
|
438
|
+
{
|
439
|
+
|
440
|
+
# "("=>122, #method param list
|
441
|
+
# "{"=>122, "do"=>122, #blocks
|
442
|
+
|
443
|
+
"::"=>121, "."=>121,
|
444
|
+
|
445
|
+
# "defined?"=>120.5,
|
446
|
+
|
447
|
+
"["=>120, #[] []= methods
|
448
|
+
|
449
|
+
"!"=>119, "~"=>119,
|
450
|
+
"+@"=>119,
|
451
|
+
|
452
|
+
"**"=>118,
|
453
|
+
|
454
|
+
"-@"=>117,
|
455
|
+
|
456
|
+
"*"=>116, "/"=>116, "%"=>116,
|
457
|
+
|
458
|
+
"+"=>115, "-"=>115,
|
459
|
+
|
460
|
+
"<<"=>114, ">>"=>114,
|
461
|
+
|
462
|
+
"&"=>113,
|
463
|
+
|
464
|
+
"^"=>112, "|"=>112,
|
465
|
+
|
466
|
+
"<="=>111, ">="=>111, "<"=>111, ">"=>111,
|
467
|
+
|
468
|
+
"<=>"=>110, "=="=>110, "==="=>110,
|
469
|
+
"!="=>110, "=~"=>110, "!~"=>110,
|
470
|
+
|
471
|
+
"&&"=>109,
|
472
|
+
|
473
|
+
"||"=>108,
|
474
|
+
|
475
|
+
".."=>107, "..."=>107,
|
476
|
+
|
477
|
+
"?"=>106, # ":"=>106, #not sure what to do with ":"
|
478
|
+
|
479
|
+
"unary&"=>105, #unary * and & operators
|
480
|
+
"lhs*"=>105, #this should remain above =
|
481
|
+
"lhs,"=>105,
|
482
|
+
"rescue3"=>105,
|
483
|
+
|
484
|
+
"="=>104, "%="=>104, "/="=>104, "-="=>104, "+="=>104,
|
485
|
+
"|="=>104, "&="=>104, ">>="=>104, "<<="=>104, "*="=>104,
|
486
|
+
"&&="=>104, "||="=>104, "**="=>104, "^="=>104,
|
487
|
+
|
488
|
+
"defined?"=>103,
|
489
|
+
"not"=>103,
|
490
|
+
":"=>102, #but not when used as a substitute for 'then'
|
491
|
+
|
492
|
+
"=>"=>101,
|
493
|
+
"rhs,"=>100, #"call,"=>100, "array,"=>100, "param,"=>100,
|
494
|
+
","=>100, "rhs*"=>100, "unary*"=>100,
|
495
|
+
#the 'precedence' of comma is somewhat controversial. it actually has
|
496
|
+
#several different precedences depending on which kind of comma it is.
|
497
|
+
#the precedence of , is higher than :, => and the assignment operators
|
498
|
+
#in certain (lhs) contexts. therefore, the precedence of lhs-comma should
|
499
|
+
#really be above "=".
|
500
|
+
|
501
|
+
#"unary" prefix function names seen has operators have this precedence
|
502
|
+
#but, rubylexer handles precedence of these and outputs fake parens
|
503
|
+
#to tell us how its parsed
|
504
|
+
|
505
|
+
"or"=>99, "and"=>99,
|
506
|
+
|
507
|
+
"if"=>98, "unless"=>98, "while"=>98, "until"=>98,
|
508
|
+
|
509
|
+
"rescue"=>98,
|
510
|
+
|
511
|
+
";"=>96,
|
512
|
+
}
|
513
|
+
end
|
514
|
+
|
515
|
+
module BracketsCall; end
|
516
|
+
Value= #NumberToken|SymbolToken|
|
517
|
+
#HerePlaceholderToken|
|
518
|
+
ValueNode&-{:lvalue =>nil}
|
519
|
+
Expr=Value
|
520
|
+
|
521
|
+
if defined? SPECIALIZED_KEYWORDS
|
522
|
+
class SpecializedKeywordToken<KeywordToken
|
523
|
+
def inspect
|
524
|
+
"#<"+self.class.name+">"
|
525
|
+
end
|
526
|
+
alias image inspect
|
527
|
+
end
|
528
|
+
|
529
|
+
KW2class={}
|
530
|
+
|
531
|
+
Punc2name={
|
532
|
+
"("=>"lparen", ")"=>"rparen",
|
533
|
+
"["=>"lbracket", "]"=>"rbracket",
|
534
|
+
"{"=>"lbrace", "}"=>"rbrace",
|
535
|
+
","=>"comma",
|
536
|
+
";"=>"semicolon",
|
537
|
+
"::"=>"double_colon",
|
538
|
+
"."=>"dot",
|
539
|
+
"?"=>"question_mark", ":"=>"colon",
|
540
|
+
"="=>"equals",
|
541
|
+
"|"=>"pipe",
|
542
|
+
"<<"=>"leftleft", ">>"=>"rightright",
|
543
|
+
"=>"=>"arrow",
|
544
|
+
}
|
545
|
+
end
|
546
|
+
|
547
|
+
def self.KW(ident)
|
548
|
+
if defined? SPECIALIZED_KEYWORDS
|
549
|
+
fail if /\\/===ident
|
550
|
+
orig_ident=ident
|
551
|
+
if Regexp===ident
|
552
|
+
list=ident.to_s[/\(?-mix:\^\((.*)\)\$\)/,1]
|
553
|
+
|
554
|
+
#pick apart any char class in ident
|
555
|
+
if open_bracket_idx=list.index(/([^\\]|^)\[/)
|
556
|
+
open_bracket_idx+=1 unless list[open_bracket_idx]=="["
|
557
|
+
close_bracket_idx=list.index(/[^\\]\]/,open_bracket_idx+1)
|
558
|
+
close_bracket_idx+=1 unless list[close_bracket_idx]=="]"
|
559
|
+
cclass=list.slice!(open_bracket_idx..close_bracket_idx)
|
560
|
+
cclass=cclass[1...-1]
|
561
|
+
cclass=cclass.scan( /[^\\]|\\./ )
|
562
|
+
cclass.map!{|ch| ch.size==1 ? ch : ch[1..1] }
|
563
|
+
end
|
564
|
+
|
565
|
+
#rest of it should be a list of words separated by |
|
566
|
+
list=list.split(/\|/).reject{|x| x==''}
|
567
|
+
list.concat cclass if cclass
|
568
|
+
list.map{|w|
|
569
|
+
w.gsub!(/\\/,'')
|
570
|
+
KW(w)
|
571
|
+
}.inject{|sum,kw| sum|kw}
|
572
|
+
else
|
573
|
+
fail unless String===ident
|
574
|
+
ident=Punc2name[ident] unless /^(?:(?!#{LETTER_DIGIT}).)+$/o===ident
|
575
|
+
fail "no name for #{orig_ident}" unless ident
|
576
|
+
eval %{
|
577
|
+
class Keyword_#{ident} < SpecializedKeywordToken
|
578
|
+
def ident; '#{orig_ident}' end
|
579
|
+
# def self.instance; @instance ||= allocate end
|
580
|
+
# def self.new; instance end
|
581
|
+
def initialize(offset)
|
582
|
+
@offset=offset
|
583
|
+
end
|
584
|
+
end
|
585
|
+
}
|
586
|
+
KW2class[ident]||=const_get("Keyword_#{ident}")
|
587
|
+
end
|
588
|
+
else
|
589
|
+
ident=case ident
|
590
|
+
when Integer; ident.chr
|
591
|
+
when String,Regexp; ident
|
592
|
+
else ident.to_s
|
593
|
+
end
|
594
|
+
|
595
|
+
return KeywordToken&-{:ident=>ident}
|
596
|
+
end
|
597
|
+
end
|
598
|
+
def KW(ident); self.class.KW(ident) end
|
599
|
+
|
600
|
+
if defined? SPECIALIZED_KEYWORDS
|
601
|
+
def make_specialized_kw(name,offset)
|
602
|
+
name=Punc2name[name] unless /^((?!#{LETTER_DIGIT}).)+$/o===name
|
603
|
+
KW2class[name].new(offset)
|
604
|
+
end
|
605
|
+
alias make_kw make_specialized_kw
|
606
|
+
else
|
607
|
+
def make_kw(name,offset)
|
608
|
+
KeywordToken.new(name,offset)
|
609
|
+
end
|
610
|
+
end
|
611
|
+
|
612
|
+
UNOP=
|
613
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
614
|
+
:ident=>/^(?:[+-]@|unary[&*]|(?:lhs|rhs)[*])$/,
|
615
|
+
# :ident=>/^(?:[+-]@|unary[&])$/,
|
616
|
+
#:unary =>true,
|
617
|
+
}|
|
618
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
619
|
+
:ident=>/^([~!]|not|defined\?)$/, #defined? should be removed from here, its handled separately
|
620
|
+
} #|
|
621
|
+
DEFOP=
|
622
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
623
|
+
:ident=>"defined?",
|
624
|
+
}
|
625
|
+
=begin
|
626
|
+
MethNameToken&-{ #hack, shouldn't be necessary
|
627
|
+
#rubylexer should know to generally treat "defined?" as a keyword
|
628
|
+
#or operator. (like most keywords, it can also be used as a method
|
629
|
+
# name....)
|
630
|
+
:ident=>"defined?"
|
631
|
+
}
|
632
|
+
=end
|
633
|
+
|
634
|
+
def self.Op(ident=nil, allow_keyword=false)
|
635
|
+
result=OperatorToken
|
636
|
+
result |= KeywordToken if allow_keyword
|
637
|
+
result &= -{:ident=>ident} if ident
|
638
|
+
#result[:infix?]=true
|
639
|
+
return result
|
640
|
+
end
|
641
|
+
def Op(*args); self.class.Op(*args); end
|
642
|
+
BINOP_KEYWORDS=%w[if unless while until and or && \|\|]
|
643
|
+
|
644
|
+
#HAS_PRECEDENCE=Op(/^#{PRECEDENCE.keys.map{|k| Regexp.quote k}.join('|')}$/,true)
|
645
|
+
=begin
|
646
|
+
KeywordOp=
|
647
|
+
KeywordToken & -{
|
648
|
+
:ident=>/^(#{BINOP_KEYWORDS.join('|')})$/
|
649
|
+
}
|
650
|
+
KeywordOp2=
|
651
|
+
KeywordToken & -{
|
652
|
+
:ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
|
653
|
+
}
|
654
|
+
=end
|
655
|
+
DotOp= KW('.') #KeywordToken & -{ :ident=>"." }
|
656
|
+
DoubleColonOp= KW('::') #KeywordToken & -{ :ident=>"::" }
|
657
|
+
|
658
|
+
Op=Op()
|
659
|
+
MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
|
660
|
+
NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
|
661
|
+
KW_Op= #some of these ought to be regular operators, fer gosh sake
|
662
|
+
Op(/^(![=~]|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
|
663
|
+
|
664
|
+
EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
|
665
|
+
fail unless 1+EPSILON>1
|
666
|
+
fail unless EPSILON<0.1
|
667
|
+
|
668
|
+
def left_op_higher(op,op2)
|
669
|
+
KeywordToken===op2 or OperatorToken===op2 or return true
|
670
|
+
rightprec=@precedence[op2.to_s] or return true
|
671
|
+
rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
|
672
|
+
return @precedence[op.to_s]>=rightprec
|
673
|
+
end
|
674
|
+
|
675
|
+
# LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
|
676
|
+
module LowerOp_inspect
|
677
|
+
def inspect; "lower_op" end
|
678
|
+
end
|
679
|
+
|
680
|
+
def lower_op
|
681
|
+
return @lower_op if defined? @lower_op
|
682
|
+
lower_op=item_that{|op| left_op_higher(@stack[-3],op) }
|
683
|
+
lower_op=(LOWEST_OP|(~VALUELIKE_LA() & lower_op)).la
|
684
|
+
lower_op.extend LowerOp_inspect
|
685
|
+
@lower_op=lower_op
|
686
|
+
end
|
687
|
+
|
688
|
+
#this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
|
689
|
+
def item_that(*a,&b)
|
690
|
+
if defined? @generating_parse_tables
|
691
|
+
huh unless b
|
692
|
+
#double supers, one of them in a block executed after this method returns....
|
693
|
+
#man that's weird
|
694
|
+
super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true}
|
695
|
+
else
|
696
|
+
super(*a,&b) #and then here's another
|
697
|
+
end
|
698
|
+
end
|
699
|
+
|
700
|
+
WANTS_SEMI=%w[while until if unless
|
701
|
+
def case when in rescue
|
702
|
+
elsif class module << => . ::
|
703
|
+
]
|
704
|
+
def wants_semi_context
|
705
|
+
Op(/^(<<|=>|\.|::)$/)|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
|
706
|
+
end
|
707
|
+
def dont_postpone_semi
|
708
|
+
@dps||=~wants_semi_context
|
709
|
+
end
|
710
|
+
|
711
|
+
#NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
|
712
|
+
#FakeBegin=KW('(')&-{:not_real? =>true}
|
713
|
+
#FakeEnd=KW(')')&-{:not_real? =>true}
|
714
|
+
|
715
|
+
#rule format:
|
716
|
+
# -[syntax pattern_matchers.+, lookahead.-]>>node type
|
717
|
+
|
718
|
+
DotCall=stack_monkey("DotCall",4,CallNode){|stack|
|
719
|
+
left,dot=*stack.slice!(-4..-3)
|
720
|
+
right=stack[-2]
|
721
|
+
|
722
|
+
right.startline=left.startline
|
723
|
+
right.set_receiver! left
|
724
|
+
}
|
725
|
+
|
726
|
+
Lvalue=(VarNode|CallSiteNode|BracketsGetNode|CommaOpNode|
|
727
|
+
ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue =>true}
|
728
|
+
|
729
|
+
BareMethod=MethNameToken|(LiteralNode&-{:bare_method=>true})
|
730
|
+
|
731
|
+
#BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
|
732
|
+
ENDWORDLIST=%w"end ) ] }"
|
733
|
+
ENDWORDS=ENDWORDLIST.map{|x| Regexp.quote x}.join('|')
|
734
|
+
BEGINWORDS=RubyLexer::BEGINWORDS
|
735
|
+
INNERBOUNDINGWORDS=RubyLexer::INNERBOUNDINGWORDS
|
736
|
+
|
737
|
+
BEGIN2END={"{"=>"}", "("=>")", "["=>"]", BEGINWORDS=>"end"}
|
738
|
+
def beginsendsmatcher
|
739
|
+
@bem||=
|
740
|
+
/^(#{BEGINWORDS}|#{ENDWORDS})$/
|
741
|
+
end
|
742
|
+
|
743
|
+
MULTIASSIGN=UnaryStarNode|CommaOpNode|ParenedNode
|
744
|
+
WITHCOMMAS=UnaryStarNode|CommaOpNode|(CallSiteNode&-{:with_commas=>true})
|
745
|
+
#(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}})
|
746
|
+
|
747
|
+
BEGINAFTEREQUALS=
|
748
|
+
BeginNode&
|
749
|
+
-{:after_equals =>nil}&-{:non_empty=>true}
|
750
|
+
BEGINAFTEREQUALS_MARKED=
|
751
|
+
BeginNode&
|
752
|
+
-{:after_equals =>true}&-{:non_empty=>true}
|
753
|
+
|
754
|
+
LHS_COMMA=Op('lhs,',true)#&-{:tag => :lhs}
|
755
|
+
RHS_COMMA=Op('rhs,',true)#&-{:tag => :rhs}
|
756
|
+
#PARAM_COMMA=Op('param,',true)#&-{:tag => :param}
|
757
|
+
def FUNCLIKE_KEYWORD
|
758
|
+
KeywordToken&-{:ident=>@funclikes}
|
759
|
+
end
|
760
|
+
IGN_SEMI_BEFORE=KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')[1...-1]}|end|[)}\]])$/)|EoiToken
|
761
|
+
IGN_SEMI_AFTER=KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|BlockFormalsNode
|
762
|
+
|
763
|
+
#for use in lookback patterns
|
764
|
+
OPERATORLIKE_LB=OperatorToken|
|
765
|
+
KW(/^(not | defined\? | rescue3 | .*[@,] | [~!;\(\[\{?:] | \.{1,3} | :: | => | ![=~])$/x)|
|
766
|
+
KW(%r{^( \*\*? | << | >> | &&? | \|\|? | \^ | % | / | - | \+ )?=$}x)|
|
767
|
+
KW(BEGINWORDS)|KW(/^#{INNERBOUNDINGWORDS}$/)|RescueHeaderNode|StartToken|
|
768
|
+
GoalPostToken|BlockFormalsNode|AssignmentRhsListStartToken
|
769
|
+
|
770
|
+
#for use in lookahead patterns
|
771
|
+
def VALUELIKE_LA
|
772
|
+
KW(@varlikes)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
|
773
|
+
KW(/^[({]$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|
|
774
|
+
KW(BEGINWORDS)|FUNCLIKE_KEYWORD()|AssignmentRhsListStartToken
|
775
|
+
|
776
|
+
#why isn't this a sufficient implementation of this method:
|
777
|
+
# KW('(')
|
778
|
+
#in which case, '(' can be made the highest precedence operator instead
|
779
|
+
end
|
780
|
+
LOWEST_OP=KW(/^(#{ENDWORDS})$/)|KW(/^#{INNERBOUNDINGWORDS.sub('rescue|','')}$/)|
|
781
|
+
EoiToken|GoalPostToken|AssignmentRhsListEndToken
|
782
|
+
|
783
|
+
RESCUE_BODY=-[Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,]
|
784
|
+
|
785
|
+
RESCUE_OP=Op('rescue') #|(KW('rescue')&-{:infix=>true})
|
786
|
+
|
787
|
+
RESCUE_KW=KW('rescue')&-{:infix=>nil}
|
788
|
+
|
789
|
+
inspect_constant_names if respond_to? :inspect_constant_names
|
790
|
+
|
791
|
+
(constants-%w[RawOpNode ParenedNode SequenceNode LiteralNode Node MisparsedNode]).each{|k|
|
792
|
+
if /Node$/===k.to_s
|
793
|
+
remove_const k
|
794
|
+
end
|
795
|
+
}
|
796
|
+
|
797
|
+
def RULES
|
798
|
+
lower_op= lower_op()
|
799
|
+
|
800
|
+
|
801
|
+
result=
|
802
|
+
[-[StartToken.lb, Expr.-, EoiToken.la]>>:accept,
|
803
|
+
-[EoiToken]>>:error,
|
804
|
+
]+
|
805
|
+
|
806
|
+
#these must be the lowest possible priority, and hence first in the rules list
|
807
|
+
# BEGIN2END.map{|_beg,_end|
|
808
|
+
# -[KW(_beg), (KW(_beg)|KW(_end)).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
|
809
|
+
# }+
|
810
|
+
|
811
|
+
[
|
812
|
+
# -[UNOP, Expr, lower_op]>>UnOpNode,
|
813
|
+
# -[DEFOP, ParenedNode]>>UnOpNode,
|
814
|
+
# -[Op(/^(?:unary|lhs|rhs)\*$/), ValueNode, lower_op]>>UnaryStarNode,
|
815
|
+
|
816
|
+
# -[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true),
|
817
|
+
# Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
|
818
|
+
# -[MethNameToken|FUNCLIKE_KEYWORD(), KW('('),
|
819
|
+
# Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
|
820
|
+
#star should not be used in an lhs if an rhs or param list context is available to eat it.
|
821
|
+
#(including param lists for keywords such as return,break,next,rescue,yield,when)
|
822
|
+
|
823
|
+
# -[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling *
|
824
|
+
# -[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
|
825
|
+
# stack_monkey("DanglingComma",1,DanglingCommaNode){|stack|
|
826
|
+
# dcomma=DanglingCommaNode.new
|
827
|
+
# dcomma.offset=stack.last.offset
|
828
|
+
# stack.push dcomma, stack.pop
|
829
|
+
# },
|
830
|
+
#hmmm.... | in char classes above looks useless (predates GoalPostToken)
|
831
|
+
|
832
|
+
-[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
|
833
|
+
|
834
|
+
#assignment
|
835
|
+
# -[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode,
|
836
|
+
# -[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode,
|
837
|
+
# -[AssignmentRhsListStartToken, Expr, AssignmentRhsListEndToken]>>AssignmentRhsNode,
|
838
|
+
|
839
|
+
# a = b rescue c acts like a ternary,,,
|
840
|
+
#provided that both a and b are not multiple and b
|
841
|
+
#(if it is a parenless callsite) has just 1 param
|
842
|
+
# -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
|
843
|
+
# Op('rescue3'), Expr, lower_op]>>AssignNode,
|
844
|
+
# -[Lvalue, Op('=',true), AssignmentRhsNode, Op('rescue3'), Expr, lower_op]>>AssignNode,
|
845
|
+
|
846
|
+
# -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
|
847
|
+
# Op('rescue3',true).la]>>:shift,
|
848
|
+
|
849
|
+
# -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
|
850
|
+
# RESCUE_OP.la] >>
|
851
|
+
# stack_monkey("rescue3",1,Op('rescue3',true)){|stack|
|
852
|
+
# resc=stack.last.dup
|
853
|
+
# resc.ident += '3'
|
854
|
+
# stack[-1]=resc
|
855
|
+
# },
|
856
|
+
#relative precedence of = and rescue are to be inverted if rescue
|
857
|
+
#is to the right and assignment is not multiple.
|
858
|
+
|
859
|
+
#if assignment rhs contains commas, don't reduce til they've been read
|
860
|
+
#(unless we're already on an rhs)
|
861
|
+
# -[(Op('=',true)|Expr).~.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la]>>:shift,
|
862
|
+
# -[RHS_COMMA.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la ]>>AssignNode,
|
863
|
+
# -[ValueNode, LHS_COMMA, ValueNode, Op('=',true).la]>>CommaOpNode,
|
864
|
+
#relative precedence of = and lhs/rhs , are to be inverted.
|
865
|
+
|
866
|
+
#mark parentheses and unary stars that come after lhs commas
|
867
|
+
# -[LHS_COMMA, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
|
868
|
+
# stack_monkey("after_comma",3,(UnaryStarNode|ParenedNode)&-{:after_comma =>true}){|stack|
|
869
|
+
# stack[-3].after_comma=true},
|
870
|
+
#mebbe this should be a lexer hack?
|
871
|
+
|
872
|
+
# -[#(OPERATORLIKE_LB&~Op('=',true)).lb,
|
873
|
+
# Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode,
|
874
|
+
|
875
|
+
#dot and double-colon
|
876
|
+
# -[DoubleColonOp, VarNode, lower_op]>>ConstantNode,#unary ::
|
877
|
+
# -[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary .
|
878
|
+
# -[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary ::
|
879
|
+
# -[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary ::
|
880
|
+
#lower_op constaints on lookahead are unnecessary in above 4 (unless I give openparen a precedence)
|
881
|
+
|
882
|
+
# -[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode,
|
883
|
+
|
884
|
+
|
885
|
+
# -[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode,
|
886
|
+
# -[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
|
887
|
+
|
888
|
+
-[#(OPERATORLIKE_LB&
|
889
|
+
(MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
|
890
|
+
'(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>ParenedNode,
|
891
|
+
|
892
|
+
# -[#(OPERATORLIKE_LB&
|
893
|
+
# (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
|
894
|
+
# '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>VarLikeNode, #(), alias for nil
|
895
|
+
#constraint on do in above 2 rules is probably overkill
|
896
|
+
|
897
|
+
# -[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode,
|
898
|
+
|
899
|
+
-[(OPERATORLIKE_LB&dont_postpone_semi).lb,
|
900
|
+
Expr, ';', Expr, lower_op]>>SequenceNode,
|
901
|
+
|
902
|
+
|
903
|
+
# -[#(OPERATORLIKE_LB&~KW(')')).lb,
|
904
|
+
# '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode, #-40
|
905
|
+
|
906
|
+
# -[KW(')').lb, 'do', BlockFormalsNode.-, Expr.-, 'end']>>BlockNode,
|
907
|
+
#this does {} as well... converted to do...end
|
908
|
+
#rubylexer handles the 'low precedence' of do...end
|
909
|
+
|
910
|
+
# -[GoalPostToken, Expr.-, GoalPostToken]>>BlockFormalsNode,
|
911
|
+
#rubylexer disambiguated operator vs keyword '|'
|
912
|
+
|
913
|
+
# -[/^(while|until)$/, Expr, /^([:;]|do)$/, Expr.-, 'end']>>LoopNode,
|
914
|
+
|
915
|
+
# -[/^(if|unless)$/, Expr, /^(;|then|:)$/,
|
916
|
+
# Expr.-, ElsifNode.*, ElseNode.-, 'end'
|
917
|
+
# ]>>IfNode,
|
918
|
+
|
919
|
+
# -['else', Expr.-, KW(/^(ensure|end)$/).la]>>ElseNode,
|
920
|
+
|
921
|
+
# -['elsif', Expr, /^(;|then|:)$/, Expr.-,
|
922
|
+
# KW(/^(end|else|elsif)$/).la
|
923
|
+
# ]>>ElsifNode,
|
924
|
+
|
925
|
+
# -['module', ConstantNode|VarNode, KW(/^(;|::)$/).~.la]>>
|
926
|
+
# stack_monkey(1,KW(';')){|stack| #insert ; at end of module header if none was present
|
927
|
+
# stack.push KeywordToken.new(';'), stack.pop
|
928
|
+
# },
|
929
|
+
# -['module', ConstantNode|VarNode, ';', RESCUE_BODY, 'end']>>ModuleNode,
|
930
|
+
# -['class', Expr, ';', RESCUE_BODY, 'end']>>ClassNode,
|
931
|
+
# -['class', Expr, Op('<'), Expr, KW(';').~.la]>>:shift,
|
932
|
+
# -['class', Op('<<'), Expr, ';', RESCUE_BODY, 'end']>>MetaClassNode, #-30
|
933
|
+
|
934
|
+
# -['alias', BareMethod|VarNode, BareMethod|VarNode]>>AliasNode,
|
935
|
+
# -['undef', BareMethod]>>UndefNode,
|
936
|
+
# -[UndefNode, Op(',',true), BareMethod]>>UndefNode,
|
937
|
+
|
938
|
+
# -['def', CallSiteNode, Op('=').-, KW(';'), RESCUE_BODY,
|
939
|
+
# Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
|
940
|
+
# 'end'
|
941
|
+
# ]>>MethodNode,
|
942
|
+
|
943
|
+
# -['begin', RESCUE_BODY,
|
944
|
+
# Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
|
945
|
+
# 'end'
|
946
|
+
# ]>>BeginNode,
|
947
|
+
|
948
|
+
# -[Op('=',true), BEGINAFTEREQUALS, RESCUE_OP.la]>>
|
949
|
+
# stack_monkey("begin after equals",2,BEGINAFTEREQUALS_MARKED){ |stack| stack[-2].after_equals=true },
|
950
|
+
#this is bs. all for an extra :begin in the parsetree
|
951
|
+
|
952
|
+
# -[(KW(/^(;|begin)$/)|RescueNode).lb, #ParenedNode|RescueOpNode|BeginNode used to be here too
|
953
|
+
# RESCUE_KW, KW('=>').-, Expr.-, /^([:;]|then)$/,
|
954
|
+
# ]>>RescueHeaderNode,
|
955
|
+
# -[ RescueHeaderNode, Expr.-, KW(';').-, (KW(/^(else|ensure|end)$/)|RESCUE_KW).la
|
956
|
+
# ]>>RescueNode,
|
957
|
+
|
958
|
+
# -['ensure', Expr.-, KW('end').la]>>EnsureNode,
|
959
|
+
|
960
|
+
# -['[', Expr.-, ']']>>ArrayLiteralNode, #-20
|
961
|
+
|
962
|
+
# -[Expr, '[', Expr.-, ']']>>BracketsGetNode,
|
963
|
+
|
964
|
+
# -[HereDocNode, StringToken+1, StringToken.~.la]>>StringCatNode,
|
965
|
+
# -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken+2, StringToken.~.la]>>StringCatNode,
|
966
|
+
# -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken, StringToken.~.la]>>StringNode,
|
967
|
+
#includes regexp, wordlist, backquotes
|
968
|
+
|
969
|
+
# -['case', Expr.-, KW(';').-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
|
970
|
+
|
971
|
+
# -['when', Expr, /^([:;]|then)$/, Expr.-,
|
972
|
+
# KW(/^(when|else|end)$/).la
|
973
|
+
# ]>>WhenNode,
|
974
|
+
|
975
|
+
# -['for', Expr, 'in', Expr, /^([:;]|do)$/, Expr.-, 'end']>>ForNode,
|
976
|
+
|
977
|
+
#semicolon cleanup....
|
978
|
+
# -[(OPERATORLIKE_LB&dont_postpone_semi).lb,Expr, ';', IGN_SEMI_BEFORE.la] \
|
979
|
+
# >>delete_monkey(2,"semi_cleanup_before_ISB"),
|
980
|
+
# -[Expr, ';', KW('then').la] >>delete_monkey(2,"semi_cleanup_before_then"),
|
981
|
+
# -[dont_postpone_semi.lb, Expr, ';', RescueNode] >>delete_monkey(3,"semi_cleanup_before_rescue"), #-10
|
982
|
+
# -[IGN_SEMI_AFTER.lb, ';'] >>delete_monkey(2,"semi_cleanup_after_oplike"),
|
983
|
+
# -[(StartToken|RescueHeaderNode).lb, ';' ] >>delete_monkey(2,"semi_cleanup_after_rescue"),
|
984
|
+
#this rule is somewhat more forgiving than matz' parser...
|
985
|
+
#not all semicolons after :, (, and { keywords should
|
986
|
+
#be ignored. some should cause syntax errors.
|
987
|
+
|
988
|
+
|
989
|
+
#comma cleanup....
|
990
|
+
# -[Op(/,$/,true), KW(/^([}\]])$/).la] >>delete_monkey(2, "comma_cleanup"),
|
991
|
+
#likewise, this is somewhat too forgiving.
|
992
|
+
#some commas before } or ] should cause syntax errors
|
993
|
+
|
994
|
+
#turn lvalues into rvalues if not followed by an assignop
|
995
|
+
# -[-{:lvalue =>true}, (Op('=',true)|MODIFYASSIGNOP|LHS_COMMA).~.la]>>
|
996
|
+
# stack_monkey("lval2rval",2,-{:lvalue =>nil}){|stack|
|
997
|
+
# stack[-2].lvalue=nil
|
998
|
+
# },
|
999
|
+
|
1000
|
+
#expand the = into a separate token in calls to settors (after . or ::).
|
1001
|
+
#but not in method headers
|
1002
|
+
# -[(OPERATORLIKE_LB&~KW('def')).lb, Expr, DotOp|DoubleColonOp,
|
1003
|
+
# (MethNameToken&-{:has_equals=>true}).la]>>
|
1004
|
+
# stack_monkey("expand_equals",1,CallNode){|stack|
|
1005
|
+
# methname=stack.pop
|
1006
|
+
# methname.ident.chomp!('=')
|
1007
|
+
# offset=methname.offset+methname.ident.size
|
1008
|
+
# stack.push(
|
1009
|
+
# CallNode.new(methname,nil,nil,nil,nil),
|
1010
|
+
# OperatorToken.new('=',offset)
|
1011
|
+
# )
|
1012
|
+
# },
|
1013
|
+
|
1014
|
+
-[NumberToken|SymbolToken]>>LiteralNode,
|
1015
|
+
|
1016
|
+
#lexer does the wrong thing with -22**44.5, making the - part
|
1017
|
+
#of the first number token. it's actually lower precedence than
|
1018
|
+
#**... this rule fixes that problem.
|
1019
|
+
#in theory, unary - is lower precedence than ., ::, and [] as well, but
|
1020
|
+
#that appears not to apply to unary - in numeric tokens
|
1021
|
+
# -[NumberToken&-{:negative=>true}, Op('**').la]>>
|
1022
|
+
# stack_monkey("fix_neg_exp",2,Op("-@",true)){|stack|
|
1023
|
+
# #neg_op.unary=true
|
1024
|
+
# num=stack[-2]
|
1025
|
+
# op=OperatorToken.new("-@",num.offset)
|
1026
|
+
# op.startline=num.startline
|
1027
|
+
# stack[-2,0]=op
|
1028
|
+
# num.ident.sub!(/\A-/,'')
|
1029
|
+
# num.offset+=1
|
1030
|
+
# },
|
1031
|
+
|
1032
|
+
#treat these keywords like (rvalue) variables.
|
1033
|
+
# -[@varlikes]>>VarLikeNode,
|
1034
|
+
|
1035
|
+
#here docs
|
1036
|
+
# -[HerePlaceholderToken]>>HereDocNode,
|
1037
|
+
# -[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"), ##this is rediculous. this should be a lexer hack?
|
1038
|
+
|
1039
|
+
# -[VarNameToken]>>VarNode,
|
1040
|
+
|
1041
|
+
|
1042
|
+
]
|
1043
|
+
|
1044
|
+
if @rubyversion >= 1.9
|
1045
|
+
result.concat [
|
1046
|
+
# -['->', ParenedNode.-, 'do', Expr.-, 'end']>>ProcLiteralNode,
|
1047
|
+
# -['->', VarLikeNode["nil",{:@value=>nil}].reg, 'do', Expr.-, 'end']>>ProcLiteralNode,
|
1048
|
+
-[(DotOp|DoubleColonOp).lb, '(',Expr.-,')', BlockNode.-, KW('do').~.la]>>CallNode,
|
1049
|
+
]
|
1050
|
+
end
|
1051
|
+
|
1052
|
+
return result
|
1053
|
+
end
|
1054
|
+
|
1055
|
+
if defined? END_ATTACK
|
1056
|
+
module Reducer; end
|
1057
|
+
include Reducer
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
def signature
|
1061
|
+
RedParse.signature(class<<self; ancestors end)
|
1062
|
+
end
|
1063
|
+
def RedParse.signature(ancs=ancestors)
|
1064
|
+
[ancs.map{|m| m.name},
|
1065
|
+
Digest::SHA256.file(__FILE__),
|
1066
|
+
Digest::SHA256.file(__FILE__.sub(/\.rb\z/,"/node.rb")),
|
1067
|
+
]
|
1068
|
+
end
|
1069
|
+
|
1070
|
+
def initialize(input,name="(eval)",line=1,lvars=[],options={})
|
1071
|
+
@rubyversion=options[:rubyversion]||1.8
|
1072
|
+
|
1073
|
+
encoding=options[:encoding]||:ascii
|
1074
|
+
encoding=:binary if @rubyversion<=1.8
|
1075
|
+
cache=Cache.new(
|
1076
|
+
File===input,name,
|
1077
|
+
:line,line,:encoding,encoding,:locals,lvars.sort.join(","),
|
1078
|
+
@rubyversion, :/, *signature
|
1079
|
+
)
|
1080
|
+
cache_mode=options[:cache_mode]||:read_write
|
1081
|
+
raise ArgumentError unless /^(?:read_(?:write|only)|write_only|none)$/===cache_mode.to_s
|
1082
|
+
read_cache= /read/===cache_mode.to_s
|
1083
|
+
input.binmode if input.respond_to? :binmode
|
1084
|
+
if read_cache and cache and result=cache.get(input)
|
1085
|
+
@cached_result=result
|
1086
|
+
@write_cache=nil
|
1087
|
+
return
|
1088
|
+
end
|
1089
|
+
if /write/===cache_mode.to_s
|
1090
|
+
@write_cache,@input= cache,input
|
1091
|
+
else
|
1092
|
+
@write_cache=nil
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
if Array===input
|
1096
|
+
def input.get1token; shift end
|
1097
|
+
@lexer=input
|
1098
|
+
if @rubyversion>=1.9
|
1099
|
+
@funclikes=RubyLexer::RubyLexer1_9::FUNCLIKE_KEYWORDS
|
1100
|
+
@varlikes=RubyLexer::RubyLexer1_9::VARLIKE_KEYWORDS
|
1101
|
+
else
|
1102
|
+
@funclikes=RubyLexer::FUNCLIKE_KEYWORDS
|
1103
|
+
@varlikes=RubyLexer::VARLIKE_KEYWORDS
|
1104
|
+
end
|
1105
|
+
else
|
1106
|
+
@lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion,:encoding=>encoding)
|
1107
|
+
@funclikes=@lexer::FUNCLIKE_KEYWORDS()
|
1108
|
+
@varlikes=@lexer::VARLIKE_KEYWORDS()
|
1109
|
+
lvars.each{|lvar| @lexer.localvars[lvar]=true }
|
1110
|
+
encoding=@lexer.encoding_name_normalize(encoding.to_s).to_sym
|
1111
|
+
warn "#{encoding} encoding won't really work right now" if RubyLexer::NONWORKING_ENCODINGS.include? encoding
|
1112
|
+
end
|
1113
|
+
@funclikes=/#@funclikes|^->$/ if @rubyversion>=1.9
|
1114
|
+
@filename=name
|
1115
|
+
@min_sizes={}
|
1116
|
+
@compiled_rules={}
|
1117
|
+
@moretokens=[]
|
1118
|
+
@unary_or_binary_op=/^[-+]$/
|
1119
|
+
# @rules=self.expaneded_RULES
|
1120
|
+
@precedence=self.PRECEDENCE
|
1121
|
+
@RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
|
1122
|
+
if defined? END_ATTACK
|
1123
|
+
compile
|
1124
|
+
end
|
1125
|
+
@saw_item_that=nil
|
1126
|
+
@print_filter=proc{true}
|
1127
|
+
end
|
1128
|
+
|
1129
|
+
attr_accessor :lexer, :print_filter
|
1130
|
+
attr :rubyversion
|
1131
|
+
|
1132
|
+
def get_token(recursing=false)
|
1133
|
+
unless @moretokens.empty?
|
1134
|
+
@last_token=@moretokens.shift
|
1135
|
+
p @last_token if ENV['PRINT_TOKENS'] && @print_filter[@last_token] and not recursing
|
1136
|
+
return @last_token
|
1137
|
+
end
|
1138
|
+
|
1139
|
+
rpt=ENV['RAW_PRINT_TOKENS']
|
1140
|
+
begin
|
1141
|
+
result=@lexer.get1token or break
|
1142
|
+
p result if rpt and @print_filter[result]
|
1143
|
+
|
1144
|
+
#set token's line
|
1145
|
+
result.startline= @endline||=1
|
1146
|
+
#result.endline||=@endline if result.respond_to? :endline=
|
1147
|
+
|
1148
|
+
if result.respond_to?(:as) and as=result.as
|
1149
|
+
#result=make_kw(as,result.offset)
|
1150
|
+
#result.originally=result.ident
|
1151
|
+
if OperatorToken===result #or KeywordToken===result
|
1152
|
+
result=result.dup
|
1153
|
+
result.ident=as
|
1154
|
+
else
|
1155
|
+
result2=make_kw(as,result.offset)
|
1156
|
+
result2.startline=result.startline
|
1157
|
+
result2.endline=result.endline
|
1158
|
+
result=result2
|
1159
|
+
end
|
1160
|
+
result.not_real! if result.respond_to? :not_real!
|
1161
|
+
else
|
1162
|
+
|
1163
|
+
case result
|
1164
|
+
when FileAndLineToken #so __FILE__ and __LINE__ can know what their values are
|
1165
|
+
@file=result.file
|
1166
|
+
@endline=result.line
|
1167
|
+
redo
|
1168
|
+
|
1169
|
+
when OperatorToken
|
1170
|
+
if @unary_or_binary_op===result.ident and result.unary || result.tag==:unary
|
1171
|
+
result=result.dup
|
1172
|
+
result.ident+="@"
|
1173
|
+
end
|
1174
|
+
|
1175
|
+
#more symbol table maintenance....
|
1176
|
+
when KeywordToken
|
1177
|
+
case name=result.ident
|
1178
|
+
|
1179
|
+
when /^(#{BINOP_KEYWORDS.join '|'})$/o #should be like this in rubylexer
|
1180
|
+
unless result.has_end?
|
1181
|
+
orig=result
|
1182
|
+
result=OperatorToken.new(name,result.offset)
|
1183
|
+
result.endline=orig.endline
|
1184
|
+
end
|
1185
|
+
when "|";
|
1186
|
+
orig=result
|
1187
|
+
result=GoalPostToken.new(result.offset) #is this needed still? (yes)
|
1188
|
+
result.endline=orig.endline
|
1189
|
+
when "__FILE__"; #I wish rubylexer would handle this
|
1190
|
+
#class<<result; attr_accessor :value; end
|
1191
|
+
assert result.value==@file.dup
|
1192
|
+
when "__LINE__"; #I wish rubylexer would handle this
|
1193
|
+
#class<<result; attr_accessor :value; end
|
1194
|
+
assert result.value==@endline
|
1195
|
+
else
|
1196
|
+
result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS
|
1197
|
+
#warning, this may discard information stored in instance vars of result
|
1198
|
+
end
|
1199
|
+
|
1200
|
+
when StringToken,HerePlaceholderToken
|
1201
|
+
@endline=result.endline
|
1202
|
+
|
1203
|
+
when EoiToken; break
|
1204
|
+
when HereBodyToken;
|
1205
|
+
@endline=result.endline
|
1206
|
+
break
|
1207
|
+
when AssignmentRhsListStartToken; break
|
1208
|
+
when AssignmentRhsListEndToken; break
|
1209
|
+
when IgnoreToken; redo
|
1210
|
+
end
|
1211
|
+
end
|
1212
|
+
end while false
|
1213
|
+
p result if ENV['PRINT_TOKENS'] && @print_filter[@last_token] unless recursing
|
1214
|
+
|
1215
|
+
#ugly weak assertion
|
1216
|
+
assert result.endline==@endline unless result.ident==';' && result.endline-1==@endline or EoiToken===result
|
1217
|
+
|
1218
|
+
return @last_token=result
|
1219
|
+
end
|
1220
|
+
|
1221
|
+
def unget_tokens(*tokens)
|
1222
|
+
@moretokens=tokens.concat @moretokens
|
1223
|
+
end
|
1224
|
+
|
1225
|
+
def unget_token(token)
|
1226
|
+
@moretokens.unshift token
|
1227
|
+
end
|
1228
|
+
|
1229
|
+
=begin
|
1230
|
+
self.LOOKAHEAD_CLASSES.each_with_index{|classes,i|
|
1231
|
+
case classes
|
1232
|
+
when Class: huh
|
1233
|
+
when Array: classes.flatten.each{huh}
|
1234
|
+
else
|
1235
|
+
end
|
1236
|
+
}
|
1237
|
+
=end
|
1238
|
+
|
1239
|
+
# def fixup_multiple_assignments!; end
|
1240
|
+
end
|
1241
|
+
|
1242
|
+
|
1243
|
+
if __FILE__==$0
|
1244
|
+
#this code has moved to bin/redparse; really, all this should just go away
|
1245
|
+
require 'problemfiles'
|
1246
|
+
class NeverExecThis<RuntimeError; end
|
1247
|
+
|
1248
|
+
def arraydiff(a,b)
|
1249
|
+
a==b and return [a,false]
|
1250
|
+
(Array===a or a=[a])
|
1251
|
+
result= a.dup
|
1252
|
+
diff=false
|
1253
|
+
size= a.size >= b.size ? a.size : b.size
|
1254
|
+
size.times{|i|
|
1255
|
+
ai=a[i]
|
1256
|
+
bi=b[i]
|
1257
|
+
if Array===ai and Array===bi
|
1258
|
+
result_i,diff_i= arraydiff(ai,bi)
|
1259
|
+
diff||=diff_i
|
1260
|
+
result[i]=result_i
|
1261
|
+
elsif ai!=bi
|
1262
|
+
next if Regexp===ai and ai.to_s==bi.to_s and
|
1263
|
+
ai.options==bi.options
|
1264
|
+
diff=true
|
1265
|
+
result[i]={ai=>bi}
|
1266
|
+
elsif ai.nil?
|
1267
|
+
result[i]={'size mismatch'=>"#{a.size} for #{b.size}"} if a.size!=b.size
|
1268
|
+
diff=true
|
1269
|
+
end
|
1270
|
+
if i.nonzero? and Hash===result[i] and Hash===result[i-1]
|
1271
|
+
old=result[i-1]
|
1272
|
+
oldkeys=old.keys
|
1273
|
+
oldvals=old.values
|
1274
|
+
if Reg::Subseq===oldkeys.first
|
1275
|
+
oldkeys=oldkeys.children
|
1276
|
+
oldval=oldvals.children
|
1277
|
+
end
|
1278
|
+
result[i-1..i]=[ {-[*oldkeys+result[i].keys]=>-[*oldvals+result[i].values]} ]
|
1279
|
+
end
|
1280
|
+
}
|
1281
|
+
return result,diff
|
1282
|
+
end
|
1283
|
+
|
1284
|
+
output=:pp
|
1285
|
+
quiet=true
|
1286
|
+
while /^-/===ARGV.first
|
1287
|
+
case opt=ARGV.shift
|
1288
|
+
when "--"; break
|
1289
|
+
when "--pp"; output=:pp
|
1290
|
+
when "--lisp"; output=:lisp
|
1291
|
+
when "--parsetree"; output=:parsetree
|
1292
|
+
when "--vsparsetree"; output=:vsparsetree
|
1293
|
+
when "--vsparsetree2"; output=:vsparsetree2
|
1294
|
+
when "--update-problemfiles"; problemfiles=ProblemFiles.new
|
1295
|
+
when "-q"; quiet=true
|
1296
|
+
when "-v"; quiet=false
|
1297
|
+
when "-e"; inputs=[ARGV.join(" ")]; names=["-e"]; break
|
1298
|
+
else fail "unknown option: #{opt}"
|
1299
|
+
|
1300
|
+
end
|
1301
|
+
end
|
1302
|
+
|
1303
|
+
unless inputs
|
1304
|
+
if ARGV.empty?
|
1305
|
+
inputs=[STDIN.read]
|
1306
|
+
names=["-"]
|
1307
|
+
elsif ARGV.size==1 and (Dir.entries(ARGV.first) rescue false)
|
1308
|
+
names=Dir[ARGV.first+"/**/*.rb"]
|
1309
|
+
else
|
1310
|
+
names=ARGV.dup
|
1311
|
+
end
|
1312
|
+
inputs||=names.map{|name| File.open(name).read rescue nil}
|
1313
|
+
end
|
1314
|
+
|
1315
|
+
result=0
|
1316
|
+
|
1317
|
+
safety="BEGIN{raise NeverExecThis};BEGIN{throw :never_exec_this,1};\n"
|
1318
|
+
nullsafety="\n"
|
1319
|
+
safe_inputs=inputs.map{|input| safety+input}
|
1320
|
+
|
1321
|
+
inputs.each_index{|i|
|
1322
|
+
begin
|
1323
|
+
|
1324
|
+
input=inputs[i] or next
|
1325
|
+
name=names[i]
|
1326
|
+
|
1327
|
+
input=nullsafety+input
|
1328
|
+
#print name+"... "; STDOUT.flush
|
1329
|
+
|
1330
|
+
begin
|
1331
|
+
tree=nil
|
1332
|
+
if catch(:never_exec_this){
|
1333
|
+
tree=RedParse.new(input,name).parse; nil
|
1334
|
+
} #raise NeverExecThis
|
1335
|
+
# rescue RedParse::ParseError=>e
|
1336
|
+
# require 'pp'
|
1337
|
+
# pp e.stack[-[15,e.stack.size].min..-1]
|
1338
|
+
# raise
|
1339
|
+
# rescue NeverExecThis
|
1340
|
+
puts "RedParse attempted to execute parse data in #{name}"
|
1341
|
+
next
|
1342
|
+
end
|
1343
|
+
rescue Interrupt; exit 2
|
1344
|
+
rescue Exception=>e
|
1345
|
+
# puts e.backtrace.join("\n")
|
1346
|
+
e.message << " during parse of #{name}"
|
1347
|
+
# err=e.class.new(e.message+" during parse of #{name}")
|
1348
|
+
# err.set_backtrace e.backtrace
|
1349
|
+
problemfiles.push name if problemfiles
|
1350
|
+
raise e
|
1351
|
+
end
|
1352
|
+
tree or fail "parsetree was nil for #{name}"
|
1353
|
+
|
1354
|
+
case output
|
1355
|
+
when :pp
|
1356
|
+
require 'pp'
|
1357
|
+
pp tree
|
1358
|
+
when :lisp
|
1359
|
+
puts tree.to_lisp
|
1360
|
+
when :parsetree
|
1361
|
+
pp tree.to_parsetree
|
1362
|
+
when :vsparsetree,:vsparsetree2
|
1363
|
+
begin
|
1364
|
+
require 'rubygems'
|
1365
|
+
rescue Exception
|
1366
|
+
end
|
1367
|
+
require 'parse_tree'
|
1368
|
+
#require 'algorithm/diff'
|
1369
|
+
begin
|
1370
|
+
mine=tree.to_parsetree(:quirks)
|
1371
|
+
if IO===input
|
1372
|
+
input.rewind
|
1373
|
+
input=input.read
|
1374
|
+
end
|
1375
|
+
ryans=nil
|
1376
|
+
catch(:never_exec_this){
|
1377
|
+
ryans=ParseTree.new.parse_tree_for_string(safe_inputs[i],name); nil
|
1378
|
+
} and raise NeverExecThis
|
1379
|
+
delta,is_diff=arraydiff(mine,ryans)
|
1380
|
+
rescue NeverExecThis
|
1381
|
+
puts "ParseTree attempted to execute parse data in #{name}"
|
1382
|
+
next
|
1383
|
+
rescue Interrupt; exit 2
|
1384
|
+
rescue Exception=>e
|
1385
|
+
#raise( RuntimeError.new( "#{e} during to_parsetree of #{name}" ) )
|
1386
|
+
puts "error during to_parsetree of #{name}"
|
1387
|
+
problemfiles.push name if problemfiles
|
1388
|
+
raise
|
1389
|
+
end
|
1390
|
+
if output==:vsparsetree2
|
1391
|
+
if !quiet or is_diff
|
1392
|
+
puts "mine:"
|
1393
|
+
pp mine
|
1394
|
+
puts "ryans:" if is_diff
|
1395
|
+
pp ryans if is_diff
|
1396
|
+
end
|
1397
|
+
elsif !quiet or is_diff
|
1398
|
+
puts 'differences in '+name if is_diff
|
1399
|
+
pp delta
|
1400
|
+
end
|
1401
|
+
if is_diff
|
1402
|
+
result=1
|
1403
|
+
problemfiles.push name if problemfiles
|
1404
|
+
else
|
1405
|
+
puts "no differences in "+name
|
1406
|
+
problemfiles.delete name if problemfiles
|
1407
|
+
end
|
1408
|
+
end
|
1409
|
+
|
1410
|
+
rescue NeverExecThis
|
1411
|
+
puts "mysterious attempt to execute parse data in #{name}"
|
1412
|
+
next
|
1413
|
+
rescue Interrupt,SystemExit; exit 2
|
1414
|
+
rescue Exception=>e
|
1415
|
+
puts "#{e}:#{e.class}"
|
1416
|
+
puts e.backtrace.join("\n")
|
1417
|
+
#problemfiles.push name if problemfiles
|
1418
|
+
#raise
|
1419
|
+
ensure
|
1420
|
+
STDOUT.flush
|
1421
|
+
end
|
1422
|
+
}
|
1423
|
+
exit result
|
1424
|
+
end
|
1425
|
+
|
1426
|
+
=begin old todo:
|
1427
|
+
v merge DotCallNode and CallSiteNode and CallWithBlockNode
|
1428
|
+
v remove actual Tokens from parse tree...
|
1429
|
+
v split ParenedNode into ParenedNode + Rescue/EnsureNode
|
1430
|
+
x 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
|
1431
|
+
x -should not appear in final output
|
1432
|
+
v split keywordopnode into loop and if varieties?
|
1433
|
+
=end
|
1434
|
+
|
1435
|
+
=begin old optimization opportunities:, ha!
|
1436
|
+
top of stack slot contains mostly keywords, specific node classes, and Expr
|
1437
|
+
lookahead slot contains mostly lower_op and keywords, with a few classes and inverted keywords
|
1438
|
+
-(lower_op is hard to optimize)
|
1439
|
+
if top of stack matcher is Expr, then the next matcher down is mostly keywords, with some operators
|
1440
|
+
class membership can be optimized to test of integer within a range
|
1441
|
+
keywords could be stored as symbols instead of strings
|
1442
|
+
a few rules may need exploding (eg, ensure) to spoon feed the optimizer
|
1443
|
+
make all Nodes descendants of Array
|
1444
|
+
=end
|
1445
|
+
|
1446
|
+
#todo:
|
1447
|
+
#each node should have a corresponding range of tokens
|
1448
|
+
#-in an (optional) array of all tokens printed by the tokenizer.
|
1449
|
+
#v test stack_monkey mods
|
1450
|
+
#v break ParenedNode into 2 (3?) classes
|
1451
|
+
#x invent BEGINNode/ENDNode? (what other keywords?)
|
1452
|
+
#v at least make BEGIN/END be KWCallNode
|
1453
|
+
#v replace VarNameToken with VarNode in parser
|
1454
|
+
#x convert raw rules to lists of vertex identities?
|
1455
|
+
#v DottedRule class
|
1456
|
+
#v ParserState class (set of DottedRules)
|
1457
|
+
#v MultiReduce
|
1458
|
+
#v MultiShift
|
1459
|
+
#v ParserState#evolve(identity)
|
1460
|
+
#v DottedRule#evolve(identity)
|
1461
|
+
#v RedParse#enumerate_states
|
1462
|
+
#v RedParse#enumerate_exemplars
|
1463
|
+
#v Node/Token.enumerate_exemplars
|
1464
|
+
#v Node/Token.identity_param
|
1465
|
+
#v rename #lvalue? => #lvalue
|
1466
|
+
#x likewise get rid of other oddly named identity params
|
1467
|
+
#v BareMethod,WITHCOMMAS,BEGINAFTEREQUALS should have predicate methods defined for them
|
1468
|
+
#v do something about BEGINAFTEREQUALS... lots predicates, ugly to identify
|
1469
|
+
#v document identity parameters in nodes and tokens
|
1470
|
+
#operator and keyword tokens have some identity_param variations remaining...maybe?
|
1471
|
+
#xx all identity readers have to have writers as well (even if fake)
|
1472
|
+
#v sort out vertex identities... call identity_param in apt classes
|
1473
|
+
#convert identities<=>small ints
|
1474
|
+
#convert ParserStates<=>small ints
|
1475
|
+
#> lower_op/proc lookahead requires special action type with shift and reduce branches
|
1476
|
+
#x stack monkeys dictate some nodes appear in s/r table... which ones?
|
1477
|
+
#x some stack monkeys pushback nodes, action table must take take those as input
|
1478
|
+
#v retype GoalPostNode => GoalPostToken
|
1479
|
+
#v then, pushback* should go away
|
1480
|
+
#v build shift/reduce table
|
1481
|
+
#v build goto table
|
1482
|
+
#split tables into shift/reduce and goto....?
|
1483
|
+
#v integrate with c code generator
|
1484
|
+
#finish c code generator
|
1485
|
+
#code generator needs a way to deal with :
|
1486
|
+
#backtracking (to more than 1 node/token???)
|
1487
|
+
#actions (stack monkeys/lower_op)
|
1488
|
+
#every reduce requires b/ting thru the lookahead
|
1489
|
+
#garbage collection
|
1490
|
+
#sharing ruby objects between ruby code and generated c code
|
1491
|
+
#optimizer?
|
1492
|
+
#ruby code generator?
|
1493
|
+
#v what to do with :shift ?
|
1494
|
+
#what to do with :accept ?
|
1495
|
+
#what to do with :error ?
|
1496
|
+
#Node.create (used in generated code)
|
1497
|
+
#Node.create <= takes input directly from semantic stack
|
1498
|
+
#build Node.create param list generator
|
1499
|
+
#v names for rules, dotted rules, parser states, identities
|
1500
|
+
#x StartNode may be a problem... used by a stack monkey,
|
1501
|
+
#to remove extra ;s from the very beginning of input.
|
1502
|
+
#use a lexer hack instead?
|
1503
|
+
#v convert StartNode to StartToken?
|
1504
|
+
#convert names to numbers and numbers to names
|
1505
|
+
#for states, rules, vertex identities
|
1506
|
+
#in ruby and c (??)
|
1507
|
+
#x rule for HereBodyToken should be a lexer hack?
|
1508
|
+
#v stack monkeys should have names
|
1509
|
+
#how to handle a stack monkey whose 2nd parameter is not a single identity?
|
1510
|
+
#even reduces may not have enough info since 1 node class may have multiple identities
|
1511
|
+
#v RedParse constants should be named in inspect
|
1512
|
+
#v toplevel rule?
|
1513
|
+
#v semantic stack in generated c code should be a ruby array
|
1514
|
+
#x state stack should keep size of semantic stack at the time states are pushed,
|
1515
|
+
#so that i can restore semantic stack to former state when b-ting/reducing
|
1516
|
+
#urk, how do I know how many levels of state stack to pop when reducing?
|
1517
|
+
#in looping error rules, just scan back in semantic stack for rule start
|
1518
|
+
#in regular looping rules, transition to loop state is saved on a special stack
|
1519
|
+
#so that at reduce time, we can b/t to that point for a start
|
1520
|
+
#if rule contains only scalars, b/t is easy
|
1521
|
+
#else rule contains scalars and optionals:
|
1522
|
+
#scan for rule start vertex starting at highest node
|
1523
|
+
#on semantic stack that can contain it and working downward.
|
1524
|
+
#also, statically verify that relevent rules contain no collisions among first (how many?) matchers
|
1525
|
+
|
1526
|
+
#is lookahead in code generator even useful? my tables have built-in lookahead....
|
1527
|
+
#need hack to declare nonerror looping matchers as irrevokable (for speed, when reducing)
|
1528
|
+
#v assignmentRhsNode needs an identity_param for with_commas
|
1529
|
+
#v -** fixup and setter breakout rules need dedicated identity_params too
|
1530
|
+
# = rescue ternary is broken again now...
|
1531
|
+
#v instead of shift states and is_shift_state? to find them,
|
1532
|
+
#v i should have shift transitions. (transitions that imply a shift... in response to a token input.)
|
1533
|
+
#v all states will have 2 entry points, for shift and nonshift transitions.
|
1534
|
+
#split big table into goto(node) and sr(token) tables
|
1535
|
+
#in each state, most common sr action should be made default
|
1536
|
+
#unused entries in goto table can be ignored.
|
1537
|
+
#most common goto entries (if any) can be default.
|
1538
|
+
#is the change_index arg in stack_monkey calls really correct everywhere? what are
|
1539
|
+
#the exact semantics of that argument? what about stack_monkeys that change the stack size?
|
1540
|
+
#should there be another arg to keep track of that?
|
1541
|
+
#maybe rewrite stack_monkeys so they're a little clearer and easier to analyze (by hand)
|
1542
|
+
#MultiShift/MultiReduce are not supported actions in generate.rb
|
1543
|
+
#:accept/:error are not supported actions in generate.rb
|