redparse 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +33 -2
- data/Manifest.txt +2 -3
- data/README.txt +157 -265
- data/Rakefile +13 -4
- data/bin/redparse +3 -2
- data/lib/redparse.rb +2423 -489
- data/lib/redparse/generate.rb +378 -0
- data/lib/redparse/node.rb +1497 -620
- data/lib/redparse/reg_more_sugar.rb +21 -0
- data/lib/redparse/version.rb +1 -1
- data/test/data/def_spec.rb +2 -0
- data/test/rp-locatetest.rb +2 -1
- data/test/test_redparse.rb +594 -61
- metadata +17 -8
- data/nurli/test_control.nurli +0 -261
- data/redparse.vpj +0 -92
- data/redparse.vpw +0 -8
data/Rakefile
CHANGED
@@ -3,7 +3,15 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'hoe'
|
5
5
|
require 'lib/redparse/version.rb'
|
6
|
-
|
6
|
+
|
7
|
+
if $*==["test"]
|
8
|
+
#hack to get 'rake test' to stay in one process
|
9
|
+
#which keeps netbeans happy
|
10
|
+
$:<<"lib"
|
11
|
+
require "test/test_redparse.rb"
|
12
|
+
Test::Unit::AutoRunner.run
|
13
|
+
exit
|
14
|
+
end
|
7
15
|
|
8
16
|
readme=open("README.txt")
|
9
17
|
readme.readline("\n== DESCRIPTION:")
|
@@ -13,13 +21,14 @@ require 'lib/redparse/version.rb'
|
|
13
21
|
hoe=Hoe.new("redparse", RedParse::VERSION) do |_|
|
14
22
|
_.author = "Caleb Clausen"
|
15
23
|
_.email = "redparse-owner @at@ inforadical .dot. net"
|
16
|
-
_.url = ["http://
|
17
|
-
_.extra_deps << ['rubylexer', '>= 0.7.
|
24
|
+
_.url = ["http://github.com/coatl/redparse/", "http://rubyforge.org/projects/redparse/"]
|
25
|
+
_.extra_deps << ['rubylexer', '>= 0.7.4']
|
18
26
|
_.extra_deps << ['reg', '>= 0.4.7']
|
27
|
+
_.extra_deps << 'Ron'
|
19
28
|
# _.test_globs=["test/*"]
|
20
29
|
_.description=desc
|
21
30
|
_.summary=desc[/\A[^.]+\./]
|
22
|
-
|
31
|
+
_.spec_extras={:bindir=>'bin/'}
|
23
32
|
# _.rdoc_pattern=/\A(README\.txt|lib\/.*\.rb)\Z/
|
24
33
|
# _.remote_rdoc_dir="/"
|
25
34
|
end
|
data/bin/redparse
CHANGED
@@ -73,10 +73,11 @@ class ParseTree<RawParseTree
|
|
73
73
|
return tree,warnings
|
74
74
|
ensure
|
75
75
|
STDERR.reopen oldSTDERR
|
76
|
-
|
76
|
+
if warnstash
|
77
77
|
warnstash.rewind
|
78
78
|
warnings.replace warnstash.read.split
|
79
79
|
warnstash.close
|
80
|
+
end
|
80
81
|
end
|
81
82
|
end
|
82
83
|
|
@@ -185,7 +186,7 @@ inputs.each_index{|i|
|
|
185
186
|
when :lisp
|
186
187
|
puts tree.to_lisp
|
187
188
|
when :unparse
|
188
|
-
puts tree.unparse
|
189
|
+
puts tree.unparse
|
189
190
|
when :parsetree
|
190
191
|
tree=tree.to_parsetree
|
191
192
|
hack=tree.dup
|
data/lib/redparse.rb
CHANGED
@@ -19,19 +19,1968 @@
|
|
19
19
|
#warn 'hacking up LOAD_PATH to include the latest RubyLexer!'
|
20
20
|
#$:.unshift Dir.pwd+'/../rubylexer/lib', Dir.pwd+'/../rubylexer'
|
21
21
|
|
22
|
-
# "faster rule compiler is untested"
|
23
22
|
|
24
|
-
require '
|
23
|
+
require 'forwardable'
|
24
|
+
|
25
|
+
begin
|
26
|
+
require 'rubygems'
|
27
|
+
rescue LoadError=>e
|
28
|
+
#hope we don't need it
|
29
|
+
raise unless /rubygems/===e.message
|
30
|
+
end
|
25
31
|
require 'rubylexer'
|
26
32
|
require 'reg'
|
27
33
|
|
28
|
-
require "redparse/node"
|
29
|
-
#require "redparse/decisiontree"
|
30
|
-
require "redparse/reg_more_sugar"
|
31
|
-
|
32
|
-
|
34
|
+
require "redparse/node"
|
35
|
+
#require "redparse/decisiontree"
|
36
|
+
require "redparse/reg_more_sugar"
|
37
|
+
require "redparse/generate"
|
38
|
+
|
39
|
+
class RedParse
|
40
|
+
|
41
|
+
####### generic stuff for parsing any(?) language
|
42
|
+
|
43
|
+
# include Nodes
|
44
|
+
class StackMonkey
|
45
|
+
def initialize(name,first_changed_index,and_expect_node,options={},&monkey_code)
|
46
|
+
first_changed_index=-first_changed_index if first_changed_index>0
|
47
|
+
@name,@first_changed_index,@and_expect_node,@monkey_code=
|
48
|
+
name,first_changed_index,and_expect_node,monkey_code
|
49
|
+
end
|
50
|
+
|
51
|
+
attr_reader :name, :first_changed_index, :and_expect_node, :monkey_code
|
52
|
+
alias hint and_expect_node
|
53
|
+
attr_accessor :exemplars
|
54
|
+
|
55
|
+
def [](stack)
|
56
|
+
result=@monkey_code[stack]
|
57
|
+
return result
|
58
|
+
end
|
59
|
+
|
60
|
+
def _dump depth
|
61
|
+
@name
|
62
|
+
end
|
63
|
+
|
64
|
+
def self._load str
|
65
|
+
Thread.current[:$RedParse_parser].undumpables[@name]
|
66
|
+
end
|
67
|
+
|
68
|
+
def action2c
|
69
|
+
#"return the whole thing on first call, just a goto stmt after that"
|
70
|
+
return " goto #@goto_label;\n" if defined? @goto_label
|
71
|
+
|
72
|
+
=begin
|
73
|
+
<<-E
|
74
|
+
#{@goto_label=@name.gsub(/[^a-z0-9_]/,'_')}:
|
75
|
+
monkey=rb_hash_get(undumpables,rb_cstr2str("#@name"));
|
76
|
+
rb_funcall(monkey,rb_intern("[]"),huh_stack);
|
77
|
+
|
78
|
+
/*recover from stackmonkey fiddling*/
|
79
|
+
for(i=0;i<#{-@first_changed_index};++i) {
|
80
|
+
rb_ary_unshift(lexer_moretokens,
|
81
|
+
rb_ary_pop(huh_semantic_stack));
|
82
|
+
rb_ary_pop(huh_syntax_stack);
|
83
|
+
}
|
84
|
+
|
85
|
+
goto #{Node===@and_expect_node ?
|
86
|
+
postreduceaction4this_state(@and_expect_node) :
|
87
|
+
shiftaction4this_state
|
88
|
+
};
|
89
|
+
E
|
90
|
+
=end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
class DeleteMonkey<StackMonkey
|
94
|
+
def initialize(index,name)
|
95
|
+
index=-index if index>0
|
96
|
+
@index=index
|
97
|
+
super(name,index,nil){|stack| stack.delete_at( index )}
|
98
|
+
end
|
99
|
+
end
|
100
|
+
def stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
|
101
|
+
def self.stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
|
102
|
+
def delete_monkey(index,name) DeleteMonkey.new(index,name) end
|
103
|
+
|
104
|
+
def evaluate rule
|
105
|
+
#dissect the rule
|
106
|
+
if false
|
107
|
+
rule=rule.dup
|
108
|
+
lookahead_processor=(rule.pop if Proc===rule.last)
|
109
|
+
node_type=rule.pop
|
110
|
+
else
|
111
|
+
Reg::Transform===rule or fail
|
112
|
+
node_type= rule.right
|
113
|
+
rule=rule.left.subregs.dup
|
114
|
+
lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
|
115
|
+
lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
|
116
|
+
end
|
117
|
+
|
118
|
+
#index of data at which to start matching
|
119
|
+
i=@stack.size-1 #-1 because last element of @stack is always lookahead
|
120
|
+
|
121
|
+
#I could call this a JIT compiler, but that's a bit grandiose....
|
122
|
+
#more of a JIT pre-processor
|
123
|
+
compiled_rule=@compiled_rules[rule]||=
|
124
|
+
rule.map{|pattern|
|
125
|
+
String|Regexp===pattern ? KW(pattern) : pattern
|
126
|
+
}
|
127
|
+
|
128
|
+
#what's the minimum @stack size this rule could match?
|
129
|
+
rule_min_size=@min_sizes[compiled_rule]||=
|
130
|
+
compiled_rule.inject(0){|sum,pattern|
|
131
|
+
sum + pattern.itemrange.begin
|
132
|
+
}
|
133
|
+
i>=rule_min_size or return false
|
134
|
+
|
135
|
+
matching=[]
|
136
|
+
|
137
|
+
#actually try to match rule elements against each @stack element in turn
|
138
|
+
compiled_rule.reverse_each{|matcher|
|
139
|
+
i.zero? and fail
|
140
|
+
target=matching
|
141
|
+
#is this matcher optional? looping?
|
142
|
+
loop= matcher.itemrange.last.to_f.infinite?
|
143
|
+
minimum=matcher.itemrange.first
|
144
|
+
optional=minimum.zero?
|
145
|
+
matching.unshift target=[] if loop
|
146
|
+
if loop or optional
|
147
|
+
matcher=matcher.subregs[0]
|
148
|
+
end
|
149
|
+
|
150
|
+
begin
|
151
|
+
if matcher===@stack[i-=1] #try match
|
152
|
+
target.unshift @stack[i]
|
153
|
+
else
|
154
|
+
#if match failed, the whole rule fails
|
155
|
+
#unless this match was optional, in which case, ignore it
|
156
|
+
#or was looping and met its minimum
|
157
|
+
#but bump the data position back up, since the latest datum
|
158
|
+
#didn't actually match anything.
|
159
|
+
return false unless optional or loop&&target.size>=minimum
|
160
|
+
i+=1
|
161
|
+
matching.unshift nil unless loop
|
162
|
+
break
|
163
|
+
end
|
164
|
+
end while loop
|
165
|
+
}
|
166
|
+
|
167
|
+
matchrange= i...-1 #what elems in @stack were matched?
|
168
|
+
|
169
|
+
#give lookahead matcher (if any) a chance to fail the match
|
170
|
+
case lookahead_processor
|
171
|
+
when ::Reg::LookAhead
|
172
|
+
return false unless lookahead_processor.subregs[0]===@stack.last
|
173
|
+
when Proc
|
174
|
+
return false unless lookahead_processor[self,@stack.last]
|
175
|
+
end
|
176
|
+
|
177
|
+
#if there was a lookback item, don't include it in the new node
|
178
|
+
if lookback
|
179
|
+
matchrange= i+1...-1 #what elems in @stack were matched?
|
180
|
+
matching.shift
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
#replace matching elements in @stack with node type found
|
185
|
+
case node_type
|
186
|
+
when Class
|
187
|
+
node=node_type.new(*matching)
|
188
|
+
node.startline||=@stack[matchrange.first].startline
|
189
|
+
node.endline=@endline
|
190
|
+
@stack[matchrange]=[node]
|
191
|
+
when Proc,StackMonkey; node_type[@stack]
|
192
|
+
when :shift; return 0
|
193
|
+
when :accept,:error; throw :ParserDone
|
194
|
+
else fail
|
195
|
+
end
|
196
|
+
|
197
|
+
return true #let caller know we found a match
|
198
|
+
|
199
|
+
|
200
|
+
rescue Exception=>e
|
201
|
+
#puts "error (#{e}) while executing rule: #{rule.inspect}"
|
202
|
+
#puts e.backtrace.join("\n")
|
203
|
+
raise
|
204
|
+
end
|
205
|
+
|
206
|
+
class ParseError<RuntimeError
|
207
|
+
def initialize(msg,stack)
|
208
|
+
super(msg)
|
209
|
+
@stack=stack
|
210
|
+
if false
|
211
|
+
ranges=(1..stack.size-2).map{|i|
|
212
|
+
node=stack[i]
|
213
|
+
if node.respond_to? :linerange
|
214
|
+
node.linerange
|
215
|
+
elsif node.respond_to? :endline
|
216
|
+
node.endline..node.endline
|
217
|
+
end
|
218
|
+
}
|
219
|
+
types=(1..stack.size-2).map{|i| stack[i].class }
|
220
|
+
msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
|
221
|
+
end
|
222
|
+
super(msg)
|
223
|
+
end
|
224
|
+
attr :stack
|
225
|
+
end
|
226
|
+
|
227
|
+
def [](*args)
|
228
|
+
@stack.[](*args)
|
229
|
+
end
|
230
|
+
|
231
|
+
def []=(*args)
|
232
|
+
@stack.[]=(*args)
|
233
|
+
end
|
234
|
+
|
235
|
+
#try all possible reductions
|
236
|
+
def reduce
|
237
|
+
shift=nil
|
238
|
+
@rules.reverse_each{|rule|
|
239
|
+
shift=evaluate(rule) and break
|
240
|
+
}
|
241
|
+
return shift
|
242
|
+
end
|
243
|
+
|
244
|
+
def parse
|
245
|
+
#hack, so StringToken can know what parser its called from
|
246
|
+
#so it can use it to parse inclusions
|
247
|
+
oldparser=Thread.current[:$RedParse_parser]
|
248
|
+
Thread.current[:$RedParse_parser]||=self
|
249
|
+
|
250
|
+
@rules||=expanded_RULES()
|
251
|
+
# @inputs||=enumerate_exemplars
|
252
|
+
|
253
|
+
@stack=[StartToken.new, get_token]
|
254
|
+
#last token on @stack is always implicitly the lookahead
|
255
|
+
catch(:ParserDone){ loop {
|
256
|
+
#try all possible reductions
|
257
|
+
next if reduce==true
|
258
|
+
|
259
|
+
#no rule can match current @stack, get another token
|
260
|
+
tok=get_token or break
|
261
|
+
|
262
|
+
#are we done yet?
|
263
|
+
#tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
|
264
|
+
|
265
|
+
#shift our token onto the @stack
|
266
|
+
@stack.push tok
|
267
|
+
}}
|
268
|
+
|
269
|
+
@stack.size==2 and return NopNode.new #handle empty parse string
|
270
|
+
|
271
|
+
#unless the @stack is 3 tokens,
|
272
|
+
#with the last an Eoi, and first a StartToken
|
273
|
+
#there was a parse error
|
274
|
+
unless @stack.size==3
|
275
|
+
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
276
|
+
top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
|
277
|
+
raise ParseError.new(top.msg,@stack)
|
278
|
+
end
|
279
|
+
EoiToken===@stack.last or fail
|
280
|
+
StartToken===@stack.first or fail
|
281
|
+
|
282
|
+
result= @stack[1]
|
283
|
+
|
284
|
+
|
285
|
+
#multiple assignment must be resolved
|
286
|
+
#afterwards by walking the parse tree.
|
287
|
+
#(because the relative precedences of = and ,
|
288
|
+
#are reversed in multiple assignment.)
|
289
|
+
# result.respond_to? :fixup_multiple_assignments! and
|
290
|
+
# result=result.fixup_multiple_assignments!
|
291
|
+
|
292
|
+
#relative precedence of = and rescue are also inverted sometimes
|
293
|
+
# result.respond_to? :fixup_rescue_assignments! and
|
294
|
+
# result=result.fixup_rescue_assignments!
|
295
|
+
|
296
|
+
#do something with error nodes
|
297
|
+
msgs=[]
|
298
|
+
result.walk{|parent,i,subi,node|
|
299
|
+
not if node.respond_to? :error and node.error?(@rubyversion)
|
300
|
+
msgs<< @filename+":"+node.blame.msg
|
301
|
+
end
|
302
|
+
} if result.respond_to? :walk #hack hack
|
303
|
+
result.errors=msgs unless msgs.empty?
|
304
|
+
#other types of errors (lexer errors, exceptions in lexer or parser actions)
|
305
|
+
#should be handled in the same way, but currently are not
|
306
|
+
# puts msgs.join("\n")
|
307
|
+
|
308
|
+
=begin
|
309
|
+
rescue Exception=>e
|
310
|
+
input=@lexer
|
311
|
+
if Array===input
|
312
|
+
puts "error while parsing:"
|
313
|
+
pp input
|
314
|
+
input=nil
|
315
|
+
else
|
316
|
+
input=input.original_file
|
317
|
+
inputname=@lexer.filename
|
318
|
+
input.to_s.size>1000 and input=inputname
|
319
|
+
puts "error while parsing: <<< #{input} >>>"
|
320
|
+
end
|
321
|
+
raise
|
322
|
+
else
|
323
|
+
=end
|
324
|
+
unless msgs.empty?
|
325
|
+
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
326
|
+
raise RedParse::ParseError.new(msgs.join("\n"),@stack)
|
327
|
+
end
|
328
|
+
|
329
|
+
# result=NopNode.new if EoiToken===result
|
330
|
+
return result
|
331
|
+
ensure
|
332
|
+
@stack=nil
|
333
|
+
Thread.current[:$RedParse_parser]=oldparser
|
334
|
+
end
|
335
|
+
|
336
|
+
|
337
|
+
#HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
|
338
|
+
|
339
|
+
def new_disabled_reduce
|
340
|
+
#@hier||=Class::FlattenedHierarchy.new *STACKABLE_CLASSES()
|
341
|
+
@reducer||=Reducer.new(@rules)
|
342
|
+
|
343
|
+
@reducer.reduce(@stack)
|
344
|
+
end #
|
345
|
+
#
|
346
|
+
if defined? END_ATTACK
|
347
|
+
class RuleSet
|
348
|
+
def initialize(rules)
|
349
|
+
@rules=rules.reverse
|
350
|
+
#rule order must be reversed relative to the usual RedParse rule
|
351
|
+
#order... merely so that ffs can work right.
|
352
|
+
@maxmask=(1<<@rules.size)-1
|
353
|
+
@subclasses_of=child_relations_among(*STACKABLE_CLASSES())
|
354
|
+
end
|
355
|
+
|
356
|
+
def rules2mask(rules)
|
357
|
+
mask=0
|
358
|
+
@rules.each_with_index{|r,i|
|
359
|
+
mask |= 1<<i if rules.include? r
|
360
|
+
}
|
361
|
+
return mask
|
362
|
+
end
|
363
|
+
|
364
|
+
def mask2rules(mask)
|
365
|
+
rules=[]
|
366
|
+
@rules.each_with_index{|r,i|
|
367
|
+
rules<<r if mask&(1<<i)
|
368
|
+
}
|
369
|
+
return rules
|
370
|
+
end
|
371
|
+
|
372
|
+
def mask2rules(mask)
|
373
|
+
result=[]
|
374
|
+
while mask.nonzero?
|
375
|
+
result<< @rules[i=ffs(mask)-1]
|
376
|
+
mask &= ~(1<<i)
|
377
|
+
end
|
378
|
+
return result
|
379
|
+
end
|
380
|
+
|
381
|
+
def each_rule(mask=-1)
|
382
|
+
@rules.each_with_index{|r,i|
|
383
|
+
yield r,i if mask&(1<<i)
|
384
|
+
}
|
385
|
+
end
|
386
|
+
|
387
|
+
def each_rule(mask=@maxmask)
|
388
|
+
while mask.nonzero?
|
389
|
+
yield @rules[i=ffs(mask)-1],i
|
390
|
+
mask &= ~(1<<i)
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
|
395
|
+
@@FFS_TABLE=[nil]
|
396
|
+
1.upto(8){|n|
|
397
|
+
@@FFS_TABLE*=2
|
398
|
+
@@FFS_TABLE[@@FFS_TABLE.size/2]=n
|
399
|
+
}
|
400
|
+
def rb_ffs(mask)
|
401
|
+
chunks=0
|
402
|
+
until mask.zero?
|
403
|
+
result=@@FFS_TABLE[mask&0xFF]
|
404
|
+
return result+(chunks<<3) if result
|
405
|
+
chunks+=1
|
406
|
+
mask>>=8
|
407
|
+
end
|
408
|
+
return 0
|
409
|
+
end
|
410
|
+
|
411
|
+
begin
|
412
|
+
require 'inline'
|
413
|
+
inline{|inline|
|
414
|
+
inline.prefix '#define _GNU_SOURCE'
|
415
|
+
inline.include '"string.h"'
|
416
|
+
inline.include '"limits.h"'
|
417
|
+
inline.c %{
|
418
|
+
unsigned c_ffs(VALUE mask){
|
419
|
+
if FIXNUM_P(mask) {
|
420
|
+
return ffsl(NUM2UINT(mask));
|
421
|
+
} else if(TYPE(mask)==T_BIGNUM) {
|
422
|
+
struct RBignum* bn=RBIGNUM(mask);
|
423
|
+
int len=bn->len;
|
424
|
+
int i;
|
425
|
+
unsigned offset=0;
|
426
|
+
unsigned result=0;
|
427
|
+
for(i=0;i<len;++i){
|
428
|
+
/*printf("least:%x\\n", ((BDIGIT*)(bn->digits))[i]);*/
|
429
|
+
/*printf("most:%x\\n", ((BDIGIT*)(bn->digits))[len]);*/
|
430
|
+
result=ffs(((BDIGIT*)(bn->digits))[i]);
|
431
|
+
if (result) break;
|
432
|
+
offset+=sizeof(int)*CHAR_BIT;
|
433
|
+
}
|
434
|
+
if (result==0) return 0;
|
435
|
+
return result+offset;
|
436
|
+
} else {
|
437
|
+
rb_fatal("bad argument to ffs");
|
438
|
+
}
|
439
|
+
}
|
440
|
+
}
|
441
|
+
}
|
442
|
+
alias ffs c_ffs
|
443
|
+
rescue Exception=>e
|
444
|
+
warn "error (#{e.class}) while defining inline c ffs()"
|
445
|
+
warn "original error: #{e}"
|
446
|
+
warn "falling back to ruby version of ffs()"
|
447
|
+
alias ffs rb_ffs
|
448
|
+
|
449
|
+
end
|
450
|
+
|
451
|
+
|
452
|
+
|
453
|
+
|
454
|
+
#just the left side (the stack/lookahead matchers)
|
455
|
+
def LEFT
|
456
|
+
@rules.map{|r| r.left.subregs }.flatten
|
457
|
+
end
|
458
|
+
|
459
|
+
#remove lookahead and lookback decoration
|
460
|
+
def LEFT_NO_LOOKING
|
461
|
+
l=LEFT()
|
462
|
+
l.map!{|m|
|
463
|
+
case m #
|
464
|
+
when Reg::LookAhead,Reg::LookBack; m.subregs[0]
|
465
|
+
when Proc; []
|
466
|
+
else m #
|
467
|
+
end #
|
468
|
+
}
|
469
|
+
l
|
470
|
+
end
|
471
|
+
|
472
|
+
#all classes mentioned in rules, on left and right sides
|
473
|
+
def STACKABLE_CLASSES #
|
474
|
+
return @sc_result unless @sc_result.nil?
|
475
|
+
@sc_result=false
|
476
|
+
l=LEFT_NO_LOOKING()
|
477
|
+
l=l.map{|lm| sc_juice lm}.flatten.compact
|
478
|
+
r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
|
479
|
+
result=l+r
|
480
|
+
@sc_result=result.grep(Class).uniq
|
481
|
+
fail if @sc_result.empty?
|
482
|
+
return @sc_result
|
483
|
+
end
|
484
|
+
|
485
|
+
def juice(m)
|
486
|
+
case m #
|
487
|
+
when Class;
|
488
|
+
return [m] unless @subclasses_of
|
489
|
+
result=[m] # and subclasses too
|
490
|
+
i=0
|
491
|
+
while item=result[i]
|
492
|
+
#p item
|
493
|
+
result.concat @subclasses_of[item]
|
494
|
+
i += 1
|
495
|
+
end
|
496
|
+
result
|
497
|
+
when String,Regexp; juice(RedParse.KW(m))
|
498
|
+
when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
|
499
|
+
when Reg::Or; m.subregs.map( &method(:juice) )
|
500
|
+
when Reg::Not;
|
501
|
+
m=m.subregs[0]
|
502
|
+
if Class===m or (Reg::Or===m and
|
503
|
+
m.subregs.inject{|sum,x| sum && (Class===x) })
|
504
|
+
j=juice(m)
|
505
|
+
STACKABLE_CLASSES()-j.flatten.compact rescue j
|
506
|
+
else
|
507
|
+
STACKABLE_CLASSES()
|
508
|
+
end
|
509
|
+
else STACKABLE_CLASSES()
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
513
|
+
def sc_juice(m)
|
514
|
+
case m #
|
515
|
+
when Class; [m]
|
516
|
+
when String,Regexp; juice(RedParse.KW(m))
|
517
|
+
# when String,Regexp; [KeywordToken]
|
518
|
+
when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
|
519
|
+
when Reg::Or; m.subregs.map( &method(:sc_juice) )
|
520
|
+
when Reg::Not; sc_juice(m.subregs[0])
|
521
|
+
when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
|
522
|
+
else []
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
526
|
+
def LOOKAHEAD_CLASSES rule
|
527
|
+
last=rule.left.subregs.last
|
528
|
+
return STACKABLE_CLASSES() unless Reg::LookAhead===last
|
529
|
+
la= last.subregs[0]
|
530
|
+
return juice(la).flatten.compact
|
531
|
+
end
|
532
|
+
#
|
533
|
+
def TOS_CLASSES rule
|
534
|
+
i=-1
|
535
|
+
mats=rule.left.subregs
|
536
|
+
m=mats[i]
|
537
|
+
m=mats[i-=1] if Reg::LookAhead===m || Proc===m
|
538
|
+
result=[]
|
539
|
+
while Reg::Repeat===m and m.times.min.zero?
|
540
|
+
result<<juice(m.subregs[0])
|
541
|
+
m=mats[i-=1]
|
542
|
+
end
|
543
|
+
return (result+juice(m)).flatten.compact
|
544
|
+
end
|
545
|
+
|
546
|
+
def [](i)
|
547
|
+
@rules[i]
|
548
|
+
end
|
549
|
+
|
550
|
+
end #
|
551
|
+
#
|
552
|
+
module Reducer
|
553
|
+
@@rulesets={}
|
554
|
+
@@class_narrowerses={}
|
555
|
+
def compile(recompile=false)
|
556
|
+
klass=self.class
|
557
|
+
|
558
|
+
#use cached result if available
|
559
|
+
if @@rulesets[klass] and !recompile
|
560
|
+
@ruleset=@@rulesets[klass]
|
561
|
+
@class_narrowers=@@class_narrowerses[klass]
|
562
|
+
return
|
563
|
+
end
|
564
|
+
|
565
|
+
#actual rule compilation
|
566
|
+
@ruleset=RuleSet.new @rules
|
567
|
+
@class_narrowers=[tos=Hash.new(0),la=Hash.new(0)]
|
568
|
+
@ruleset.each_rule{|r,i|
|
569
|
+
@ruleset.LOOKAHEAD_CLASSES(r).each{|klass2|
|
570
|
+
la[klass2] |= 1<<i
|
571
|
+
}
|
572
|
+
@ruleset.TOS_CLASSES(r).each{|klass2|
|
573
|
+
tos[klass2] |= 1<<i
|
574
|
+
}
|
575
|
+
}
|
576
|
+
|
577
|
+
#save result to cache if not too dynamic
|
578
|
+
if !recompile
|
579
|
+
@@rulesets[klass]=@ruleset
|
580
|
+
@@class_narrowerses[klass]=@class_narrowers
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
def new_reduce
|
585
|
+
# mask=-1
|
586
|
+
# (-1).downto(-@class_narrowers.size){|i|
|
587
|
+
# mask &= @class_narrowers[i][@stack[i].class]
|
588
|
+
# }
|
589
|
+
mask=
|
590
|
+
@class_narrowers[-1][@stack[-1].class]&
|
591
|
+
@class_narrowers[-2][@stack[-2].class]
|
592
|
+
@ruleset.each_rule(mask){|r,i|
|
593
|
+
res=evaluate(r) and return res
|
594
|
+
}
|
595
|
+
return false
|
596
|
+
end
|
597
|
+
end
|
598
|
+
end
|
599
|
+
|
600
|
+
def map_with_index(list)
|
601
|
+
result=[]
|
602
|
+
list.each_with_index{|elem,i| result<<yield(elem,i)}
|
603
|
+
result
|
604
|
+
end
|
605
|
+
|
606
|
+
def all_rules
|
607
|
+
return @all_rules if defined? @all_rules
|
608
|
+
|
609
|
+
@inputs||=enumerate_exemplars
|
610
|
+
@rules=expanded_RULES #force it to be recalculated
|
611
|
+
@all_rules = map_with_index(@rules){|r,i| Rule.new r,i}
|
612
|
+
|
613
|
+
@all_rules.each{|r|
|
614
|
+
if StackMonkey===r.action
|
615
|
+
r.action.exemplars=@inputs.grep r.action.hint
|
616
|
+
end
|
617
|
+
}
|
618
|
+
|
619
|
+
warn "error recovery rules disabled for now; creates too many states and masks errors"
|
620
|
+
@all_rules.reject!{|r| r.action==MisparsedNode }
|
621
|
+
|
622
|
+
#names have to be allocated globally to make sure they don't collide
|
623
|
+
names=@all_rules.map{|r|
|
624
|
+
if r.action.respond_to? :name
|
625
|
+
r.action.name
|
626
|
+
else
|
627
|
+
r.action.to_s
|
628
|
+
end
|
629
|
+
}.sort
|
630
|
+
dups={}
|
631
|
+
names.each_with_index{|name,i|
|
632
|
+
dups[name]=0 if name==names[i+1]
|
633
|
+
}
|
634
|
+
@all_rules.each{|r|
|
635
|
+
r.name=
|
636
|
+
if r.action.respond_to? :name
|
637
|
+
r.action.name.dup
|
638
|
+
else
|
639
|
+
r.action.to_s
|
640
|
+
end
|
641
|
+
if dups[r.name]
|
642
|
+
count=dups[r.name]+=1
|
643
|
+
r.name<<"_#{count}"
|
644
|
+
end
|
645
|
+
}
|
646
|
+
end
|
647
|
+
|
648
|
+
def all_dotted_rules
|
649
|
+
all_rules.map{|rule|
|
650
|
+
(0...rule.patterns.size).map{|i|
|
651
|
+
DottedRule.create(rule,i,self)
|
652
|
+
}
|
653
|
+
}.flatten
|
654
|
+
end
|
655
|
+
|
656
|
+
#$OLD_PAA=1
|
657
|
+
|
658
|
+
def all_initial_dotted_rules
|
659
|
+
return @all_initial_dotted_rules if defined? @all_initial_dotted_rules
|
660
|
+
@all_initial_dotted_rules=result=
|
661
|
+
all_rules.map{|rule| DottedRule.create(rule,0,nil) }
|
662
|
+
|
663
|
+
p :all_init
|
664
|
+
|
665
|
+
unless defined? $OLD_PAA
|
666
|
+
scanning=result
|
667
|
+
provisionals=nil
|
668
|
+
while true
|
669
|
+
old_provisionals=provisionals
|
670
|
+
provisionals={}
|
671
|
+
scanning.each{|dr|
|
672
|
+
dr.also_allow=dr.compute_also_allow(provisional=[false]) #fill out dr.also_allow
|
673
|
+
provisionals[dr]=provisional[0]
|
674
|
+
}
|
675
|
+
scanning=provisionals.map{|dr,val| dr if val }.compact
|
676
|
+
end until provisionals==old_provisionals
|
677
|
+
end
|
678
|
+
p :all_init_done
|
679
|
+
|
680
|
+
return result
|
681
|
+
end
|
682
|
+
|
683
|
+
class Rule #original user rules, slightly chewed on
|
684
|
+
def initialize(rawrule,priority)
|
685
|
+
@priority=priority
|
686
|
+
@action=rawrule.right
|
687
|
+
@patterns=rawrule.left.subregs.dup
|
688
|
+
#remove lookback decoration if any, just note that lb was present
|
689
|
+
if Reg::LookBack===@patterns[0]
|
690
|
+
@lookback=true
|
691
|
+
@patterns[0]=@patterns[0].subregs[0]
|
692
|
+
end
|
693
|
+
|
694
|
+
case @patterns[-1]
|
695
|
+
#Symbol is pointless here, methinks.
|
696
|
+
when Proc,Symbol; #do nothing
|
697
|
+
when Reg::LookAhead; @patterns[-1]=@patterns[-1].subregs[0]
|
698
|
+
else @patterns.push Object #add la if none was present
|
699
|
+
end
|
700
|
+
|
701
|
+
#search for looping matchers with minimum >0 and replace them
|
702
|
+
#with a number of scalars (== the minimum) followed by a loop with 0 min.
|
703
|
+
#search for bare strings or regexps and replace with KW( ) wrapper
|
704
|
+
@patterns.each_with_index{|p,i|
|
705
|
+
case p
|
706
|
+
when String,Regexp; @patterns[i]=RedParse.KW(p)
|
707
|
+
when Reg::Repeat
|
708
|
+
if p.itemrange.first>0
|
709
|
+
@patterns[i,1]=
|
710
|
+
*[p.subregs[0]]*p.itemrange.first<< #minimum # as scalars
|
711
|
+
p.subregs[0].reg.* #0-based looper
|
712
|
+
end
|
713
|
+
end
|
714
|
+
}
|
715
|
+
@drs=[]
|
716
|
+
end
|
717
|
+
|
718
|
+
attr_reader :drs
|
719
|
+
|
720
|
+
def hash; priority end
|
721
|
+
def == other; Rule===other and priority==other.priority end
|
722
|
+
alias eql? ==
|
723
|
+
|
724
|
+
def lookback?; @lookback if defined? @lookback end
|
725
|
+
|
726
|
+
attr_reader :patterns,:action,:priority
|
727
|
+
attr_accessor :name
|
728
|
+
|
729
|
+
def at(n)
|
730
|
+
result=patterns[n]
|
731
|
+
result=result.subregs[0] if Reg::Repeat===result
|
732
|
+
result
|
733
|
+
end
|
734
|
+
def optional? n
|
735
|
+
p=patterns[n]
|
736
|
+
return Reg::Repeat===p && p.itemrange.first.zero?
|
737
|
+
end
|
738
|
+
def looping? n
|
739
|
+
p=patterns[n]
|
740
|
+
return false unless Reg::Repeat===p
|
741
|
+
return false if p.itemrange.last==1
|
742
|
+
fail unless p.itemrange.last.infinite?
|
743
|
+
return true
|
744
|
+
rescue Exception
|
745
|
+
return false
|
746
|
+
end
|
747
|
+
|
748
|
+
def reduces_to
|
749
|
+
case @action
|
750
|
+
when Class; @action
|
751
|
+
when StackMonkey; @action.exemplars
|
752
|
+
when :error,:shift,:accept; nil
|
753
|
+
else fail "#@action unexpected in reduces_to"
|
754
|
+
end
|
755
|
+
end
|
756
|
+
|
757
|
+
def unruly?
|
758
|
+
return if action==:accept
|
759
|
+
action.class!=Class || lookback?
|
760
|
+
end
|
761
|
+
|
762
|
+
def final_promised_pattern
|
763
|
+
case @action
|
764
|
+
when DeleteMonkey #delete_monkey
|
765
|
+
vector_indexes=(@action.first_changed_index..-1).select{|i| Reg::Repeat===@patterns[i] }
|
766
|
+
fail unless vector_indexes.empty?
|
767
|
+
result=@patterns.dup
|
768
|
+
result.delete_at @action.first_changed_index
|
769
|
+
when StackMonkey #stack_monkey
|
770
|
+
result=@patterns.dup
|
771
|
+
result[@action.first_changed_index..-1]=[@action.hint]
|
772
|
+
when Class
|
773
|
+
result= [@action,@patterns.last]
|
774
|
+
result.unshift @patterns.first if lookback?
|
775
|
+
when :accept, :error, :shift
|
776
|
+
result=@patterns.dup
|
777
|
+
else
|
778
|
+
pp @action
|
779
|
+
fail
|
780
|
+
end
|
781
|
+
result[-1]=result[-1].la unless result.empty?
|
782
|
+
result
|
783
|
+
end
|
784
|
+
|
785
|
+
def final_promised_rule
|
786
|
+
@final_promised_rule ||=
|
787
|
+
Rule.new(-final_promised_pattern>>nil,-priority)
|
788
|
+
end
|
789
|
+
end
|
790
|
+
|
791
|
+
class DottedRule
|
792
|
+
def initialize(rule,pos,parser)
|
793
|
+
@rule,@pos=rule,pos
|
794
|
+
fail unless (0...rule.patterns.size)===@pos
|
795
|
+
# @also_allow= compute_also_allow(parser) if parser unless defined? $OLD_PAA
|
796
|
+
end
|
797
|
+
def compute_also_allow(parser,provisional=[false])
|
798
|
+
parser.all_initial_dotted_rules.map{|dr|
|
799
|
+
next if dr==self
|
800
|
+
fake_rule=dr.rule.final_promised_rule
|
801
|
+
final_more_dr=DottedRule.create(fake_rule,0,nil)
|
802
|
+
also=dr.also_allow
|
803
|
+
unless also
|
804
|
+
provisional[0]||=0
|
805
|
+
provisional[0]+=1
|
806
|
+
also=[]
|
807
|
+
end
|
808
|
+
also+[dr] if optionally_combine final_more_dr,parser
|
809
|
+
}.flatten.compact.uniq
|
810
|
+
end
|
811
|
+
attr_reader :rule,:pos
|
812
|
+
attr_accessor :also_allow
|
813
|
+
|
814
|
+
def self.create(rule,pos,parser)
|
815
|
+
result=rule.drs[pos] and return result
|
816
|
+
result=rule.drs[pos]=DottedRule.new(rule,pos,parser)
|
817
|
+
unless defined? $OLD_PAA
|
818
|
+
result.also_allow=result.compute_also_allow(parser) if parser
|
819
|
+
end
|
820
|
+
return result
|
821
|
+
end
|
822
|
+
|
823
|
+
def hash; (@rule.priority<<3)^@pos end
|
824
|
+
def == other; DottedRule===other and @pos==other.pos and @rule==other.rule end
|
825
|
+
alias eql? ==
|
826
|
+
|
827
|
+
def name; @rule.name+"@#@pos" end
|
828
|
+
|
829
|
+
def looping?
|
830
|
+
@rule.looping?(@pos)
|
831
|
+
end
|
832
|
+
|
833
|
+
#returns Conditional|Rule|DottedRule|+[DottedRule.+]|nil
|
834
|
+
def evolve input, parser, seenlist,result2
|
835
|
+
#print "["
|
836
|
+
#$stdout.flush
|
837
|
+
idname=input.identity_name
|
838
|
+
idname=parser.identity_name_alias? idname
|
839
|
+
cache=seenlist[[self,idname]]
|
840
|
+
unless cache==:dunno_yet
|
841
|
+
result2.concat Array(cache).flatten.compact.uniq.sort_by{|x| x.name}
|
842
|
+
return cache
|
843
|
+
end
|
844
|
+
i=pos
|
845
|
+
lasti=i-1
|
846
|
+
result=[]
|
847
|
+
result=loop do #might need multiple tries if optional matcher(s) here
|
848
|
+
fail unless i>lasti
|
849
|
+
lasti=i
|
850
|
+
p=@rule.at(i) #what is current pattern in this dottedrule?
|
851
|
+
fail if Proc===p #shouldnt happen anymore
|
852
|
+
if parser.pattern_matches_nodes? p
|
853
|
+
|
854
|
+
#if any dotted rules have nodes at this point,
|
855
|
+
#also include the set of rules@0 which
|
856
|
+
#can (possibly indirectly) generate that node.
|
857
|
+
#(match tokens found on left sides of productions for p)
|
858
|
+
seenlist[[self,idname]]=result
|
859
|
+
if false
|
860
|
+
result.concat recurse_match_drs(parser).uniq.map{|dr|
|
861
|
+
dr and
|
862
|
+
#begin print "{#{dr.name}"
|
863
|
+
dr.evolve input,parser,seenlist,result2
|
864
|
+
#ensure print "}" end
|
865
|
+
}.flatten.compact.uniq
|
866
|
+
end
|
867
|
+
end
|
868
|
+
@saw_item_that={}
|
869
|
+
if p===input
|
870
|
+
i+=1 unless @rule.looping?(i)
|
871
|
+
fail if i>@rule.patterns.size
|
872
|
+
|
873
|
+
if !@saw_item_that.empty?
|
874
|
+
p(:saw_item_that!)
|
875
|
+
fail unless @saw_item_that.size==1
|
876
|
+
pair=@saw_item_that.to_a.first
|
877
|
+
fail unless p.equal? pair.last
|
878
|
+
it=pair.first
|
879
|
+
action=
|
880
|
+
if i==@rule.patterns.size
|
881
|
+
@rule
|
882
|
+
else
|
883
|
+
DottedRule.create(@rule,i,parser)
|
884
|
+
end
|
885
|
+
break Conditional.new(it,action)
|
886
|
+
end
|
887
|
+
@saw_item_that=nil
|
888
|
+
|
889
|
+
if i == @rule.patterns.size
|
890
|
+
break @rule
|
891
|
+
else
|
892
|
+
break result<<DottedRule.create(@rule,i,parser)
|
893
|
+
end
|
894
|
+
elsif !@rule.optional?(i)
|
895
|
+
break result.empty? ? nil : result
|
896
|
+
elsif (i+=1) >= @rule.patterns.size
|
897
|
+
break @rule
|
898
|
+
#else next p
|
899
|
+
end
|
900
|
+
end #loop
|
901
|
+
seenlist[[self,idname]]=result
|
902
|
+
result2.concat Array(result).flatten.compact.uniq.sort_by{|x| x.name}
|
903
|
+
return result
|
904
|
+
#ensure print "]"
|
905
|
+
end
|
906
|
+
|
907
|
+
#returns +[(DottedRule|nil).*]
|
908
|
+
def recurse_match_drs parser, result=nil
|
909
|
+
unless result
|
910
|
+
table=parser.rmd_cache
|
911
|
+
if table
|
912
|
+
cache=table[self]
|
913
|
+
return cache if cache
|
914
|
+
else
|
915
|
+
parser.rmd_cache={}
|
916
|
+
end
|
917
|
+
|
918
|
+
result=[]
|
919
|
+
end
|
920
|
+
#print "("
|
921
|
+
#print @rule.name+"@#@pos"
|
922
|
+
p=@rule.at(@pos)
|
923
|
+
|
924
|
+
#find set of nodes that could match here
|
925
|
+
nodes_here=parser.exemplars_that_match(p&Node)
|
926
|
+
|
927
|
+
#find the set of rules that could generate a node in our list
|
928
|
+
rrules=parser.all_rules.select{|rule|
|
929
|
+
!rule.unruly? and !nodes_here.grep(rule.action).empty?
|
930
|
+
}.map{|rule|
|
931
|
+
DottedRule.create(rule,0,parser)
|
932
|
+
}
|
933
|
+
|
934
|
+
#if any generating rules match a node in the leftmost pattern,
|
935
|
+
#add the rules which can generate _that_ node too.
|
936
|
+
result.push self #force self to be excluded from future recursion
|
937
|
+
oldsize=result.size
|
938
|
+
unless rrules.empty?
|
939
|
+
result.concat rrules
|
940
|
+
|
941
|
+
unless result.respond_to? :index_of
|
942
|
+
class<<result
|
943
|
+
attr_accessor :index_of
|
944
|
+
end
|
945
|
+
result.index_of={}
|
946
|
+
end
|
947
|
+
rio=result.index_of
|
948
|
+
oldsize.upto(result.size){|i| rio[result[i]]||=i }
|
949
|
+
rrules.each{|rrule|
|
950
|
+
i=rio[rrule] or fail #index() inside each() == O(N**2) complexity. this is the slow line.
|
951
|
+
#but skip recursion on rules already done at a higher level
|
952
|
+
rrule.recurse_match_drs parser,result if i>=oldsize
|
953
|
+
}
|
954
|
+
end
|
955
|
+
result[oldsize-1]=nil #don't actually include self in result
|
956
|
+
#result.update_indices oldsize-1, oldsize-1
|
957
|
+
|
958
|
+
parser.rmd_cache[self]=result
|
959
|
+
return result
|
960
|
+
#ensure print ")"
|
961
|
+
end
|
962
|
+
|
963
|
+
def optionally_combine weaker,parser
|
964
|
+
#lotsa caching needed if this is ever to be performant
|
965
|
+
if parser.oc_cache
|
966
|
+
result=parser.oc_cache[[self,weaker]]
|
967
|
+
return result unless result.nil?
|
968
|
+
else
|
969
|
+
parser.oc_cache={}
|
970
|
+
end
|
971
|
+
|
972
|
+
other=weaker
|
973
|
+
mymatches,myposes= self.outcomes
|
974
|
+
matches, poses = other.outcomes
|
975
|
+
matches.each_with_index{|match,i|
|
976
|
+
mymatches.each_with_index{|mymatch,myi|
|
977
|
+
intersect=parser.inputs.grep(match&mymatch)
|
978
|
+
unless intersect.empty?
|
979
|
+
|
980
|
+
#but don't allow matches that would be matched
|
981
|
+
#by an earlier (but optional) pattern.
|
982
|
+
disallowed=Reg::Or.new(
|
983
|
+
*possible_matchers_til(myi)+
|
984
|
+
other.possible_matchers_til(i)
|
985
|
+
)
|
986
|
+
intersect.reject{|x| disallowed===x }
|
987
|
+
|
988
|
+
if intersect.empty?
|
989
|
+
return result=false
|
990
|
+
elsif poses[i]>=other.rule.patterns.size
|
991
|
+
return result=true #success if weaker rule is at an end
|
992
|
+
elsif myposes[myi]>=rule.patterns.size
|
993
|
+
return result=false #fail if stronger rule at an end
|
994
|
+
else
|
995
|
+
p [:**,rule.name,myposes[myi]]
|
996
|
+
mynew=DottedRule.create(rule,myposes[myi],parser)
|
997
|
+
new=DottedRule.create(other.rule,poses[i],parser)
|
998
|
+
return result=mynew.optionally_combine( new,parser )
|
999
|
+
end
|
1000
|
+
end
|
1001
|
+
}
|
1002
|
+
}
|
1003
|
+
return result=false
|
1004
|
+
ensure
|
1005
|
+
parser.oc_cache[[self,weaker]]=result
|
1006
|
+
end
|
1007
|
+
|
1008
|
+
def possible_matchers_til i
|
1009
|
+
(pos...i-1).map{|j|
|
1010
|
+
m=rule.at(j)
|
1011
|
+
Reg::Repeat===m ? m.subregs[0] : m
|
1012
|
+
}
|
1013
|
+
end
|
1014
|
+
|
1015
|
+
def outcomes
|
1016
|
+
til=@rule.patterns.size
|
1017
|
+
at=@pos
|
1018
|
+
result=[[],[]]
|
1019
|
+
loop do
|
1020
|
+
m=@rule.patterns[at]
|
1021
|
+
case m
|
1022
|
+
when Proc;
|
1023
|
+
result.first.push Object
|
1024
|
+
result.last.push at+1
|
1025
|
+
break
|
1026
|
+
when Reg::Repeat
|
1027
|
+
assert @rule.optional?(at)
|
1028
|
+
to=at
|
1029
|
+
to+=1 unless @rule.looping? at
|
1030
|
+
result.first.push m.subregs[0]
|
1031
|
+
result.last.push to
|
1032
|
+
else
|
1033
|
+
result.first.push m
|
1034
|
+
result.last.push at+1
|
1035
|
+
break
|
1036
|
+
end
|
1037
|
+
at+=1
|
1038
|
+
break if at>=til
|
1039
|
+
end
|
1040
|
+
return result
|
1041
|
+
end
|
1042
|
+
|
1043
|
+
end
|
1044
|
+
|
1045
|
+
attr_accessor :rmd_cache
|
1046
|
+
attr_accessor :oc_cache
|
1047
|
+
attr_accessor :sl2ms_cache
|
1048
|
+
|
1049
|
+
class Conditional
|
1050
|
+
def initialize(condition,action)
|
1051
|
+
@condition,@action=condition,action
|
1052
|
+
@condition.restore :hash,:==
|
1053
|
+
end
|
1054
|
+
attr_reader :condition,:action
|
1055
|
+
|
1056
|
+
def hash
|
1057
|
+
@condition.hash^@action.hash
|
1058
|
+
end
|
1059
|
+
def == other
|
1060
|
+
Conditional===other and @condition==other.condition and @action==other.action
|
1061
|
+
end
|
1062
|
+
alias eql? ==
|
1063
|
+
|
1064
|
+
def name; @condition.inspect+"?"+@action.name end
|
1065
|
+
|
1066
|
+
def priority; @action.priority end
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
class ParserState; end
|
1070
|
+
class MultiShift; end
|
1071
|
+
class MultiReduce; end
|
1072
|
+
|
1073
|
+
ACTION_PATTERN=ParserState|Rule|MultiShift|MultiReduce|:accept|:error
|
1074
|
+
class ParserState #a union of dotted rules
|
1075
|
+
def initialize(dotteds,index)
|
1076
|
+
fail if dotteds.empty? #error state
|
1077
|
+
fail unless dotteds.grep(nil).empty?
|
1078
|
+
@dotteds=dotteds
|
1079
|
+
@index=index
|
1080
|
+
sort_substates!
|
1081
|
+
@actions={} #key is an input, value is ParserState|Rule|MultiShift|MultiReduce|:accept|:error
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
attr_reader :actions
|
1085
|
+
|
1086
|
+
def [](k)
|
1087
|
+
result=@actions[k]
|
1088
|
+
assert ACTION_PATTERN===result
|
1089
|
+
result
|
1090
|
+
end
|
1091
|
+
def []=(k,v)
|
1092
|
+
assert ACTION_PATTERN===v
|
1093
|
+
@actions[k]=v
|
1094
|
+
end
|
1095
|
+
|
1096
|
+
def sort_substates!
|
1097
|
+
@dotteds=@dotteds.sort_by{|dotted| -dotted.pos}.uniq
|
1098
|
+
end
|
1099
|
+
attr :dotteds
|
1100
|
+
|
1101
|
+
def dup
|
1102
|
+
result=super
|
1103
|
+
result.instance_variable_set(:@dotteds,@dotteds.dup)
|
1104
|
+
return result
|
1105
|
+
end
|
1106
|
+
|
1107
|
+
def substates; [self] end
|
1108
|
+
|
1109
|
+
def shiftlist2multishift? shiftlist,parser
|
1110
|
+
return :error if shiftlist.empty?
|
1111
|
+
parser.sl2ms_cache||={}
|
1112
|
+
cache=parser.sl2ms_cache[shiftlist]
|
1113
|
+
return cache if cache
|
1114
|
+
fixed,varying=shiftlist.partition{|res| DottedRule===res}
|
1115
|
+
result=ParserState.new(fixed,nil)
|
1116
|
+
result.perhaps_also_allow parser.all_rules,parser
|
1117
|
+
unless varying.empty? #MultiShift
|
1118
|
+
varying.map!{|v| [v.condition,v.action]}.flatten
|
1119
|
+
result=MultiShift.new(result,varying)
|
1120
|
+
end
|
1121
|
+
parser.sl2ms_cache[shiftlist]=result
|
1122
|
+
return result
|
1123
|
+
end
|
1124
|
+
|
1125
|
+
#given a list of rules, see if any of them are compatible with
|
1126
|
+
#a current substate. (compatibility means the aggregate patterns
|
1127
|
+
#can be anded together and still be able to conceivably match something.)
|
1128
|
+
#if any of morerules are actually compatible, add it to current state.
|
1129
|
+
def perhaps_also_allow(morerules,parser)
|
1130
|
+
fail unless morerules==parser.all_rules
|
1131
|
+
@dotteds.concat @dotteds.map{|d| d.also_allow }.flatten.compact.uniq
|
1132
|
+
sort_substates!
|
1133
|
+
end
|
1134
|
+
def old_perhaps_also_allow(morerules,parser)
|
1135
|
+
morerules=morerules.dup
|
1136
|
+
need_sort=false
|
1137
|
+
scan_rules=@dotteds
|
1138
|
+
added={}
|
1139
|
+
while true
|
1140
|
+
adding=[]
|
1141
|
+
morerules.each{|morerule|
|
1142
|
+
next if added[morerule]
|
1143
|
+
fake_rule=morerule.final_promised_rule
|
1144
|
+
final_more_dr=DottedRule.create(fake_rule,0,parser)
|
1145
|
+
scan_rules.each{|dotted|
|
1146
|
+
if dotted.optionally_combine final_more_dr,parser
|
1147
|
+
adding<<DottedRule.create(morerule,0,parser)
|
1148
|
+
added[morerule]=1
|
1149
|
+
break
|
1150
|
+
end
|
1151
|
+
}
|
1152
|
+
}
|
1153
|
+
break if adding.empty?
|
1154
|
+
@dotteds.concat adding
|
1155
|
+
need_sort=true
|
1156
|
+
scan_rules=adding
|
1157
|
+
end
|
1158
|
+
sort_substates! if need_sort
|
1159
|
+
end
|
1160
|
+
alias perhaps_also_allow old_perhaps_also_allow if defined? $OLD_PAA
|
1161
|
+
|
1162
|
+
|
1163
|
+
#returns ParserState|MultiShift|MultiReduce|Rule|:accept|:error
|
1164
|
+
def evolve input,parser,seenlist
|
1165
|
+
result2=[]
|
1166
|
+
@dotteds.each{|dotted|
|
1167
|
+
dotted.evolve input,parser,seenlist,result2
|
1168
|
+
}
|
1169
|
+
|
1170
|
+
result=
|
1171
|
+
#seenlist.values.flatten.compact.uniq.sort_by{|x| x.name}
|
1172
|
+
result2=result2.uniq.compact.sort_by{|x| x.name}
|
1173
|
+
#pp [result,result2].map{|x| x.map{|res| DottedRule===res ? res.name : res }}
|
1174
|
+
#pp result2.map{|res| DottedRule===res ? res.name : res }
|
1175
|
+
# result==result2 or fail
|
1176
|
+
|
1177
|
+
return result=:error if result.empty?
|
1178
|
+
|
1179
|
+
|
1180
|
+
#ok, who wants to shift and who wants to reduce?
|
1181
|
+
shiftlist,reducelist=result.partition{|res|
|
1182
|
+
DottedRule===res or
|
1183
|
+
Conditional===res && DottedRule===res.action
|
1184
|
+
}
|
1185
|
+
|
1186
|
+
#if no reducers at all, just try (multi?)shift
|
1187
|
+
return result=shiftlist2multishift?( shiftlist,parser )if reducelist.empty?
|
1188
|
+
|
1189
|
+
#line up reducers by priority
|
1190
|
+
actions=reducelist \
|
1191
|
+
.sort_by{|rule| -rule.priority }
|
1192
|
+
# .map{|rule| rule.action }
|
1193
|
+
#actions is +[(Rule|Conditional[Rule]).*]
|
1194
|
+
action=actions.shift #this first (unless conditional)
|
1195
|
+
#action is Rule|Conditional[Rule]
|
1196
|
+
result=
|
1197
|
+
case action.action
|
1198
|
+
when :error; return :error
|
1199
|
+
when Class, StackMonkey
|
1200
|
+
action
|
1201
|
+
when :accept
|
1202
|
+
:accept
|
1203
|
+
when :shift #this counts as a reduce at this point, but it writes shift instructions
|
1204
|
+
shiftlist2multishift? shiftlist,parser
|
1205
|
+
when Rule #oy, vey, was a Conditional
|
1206
|
+
shiftaction=shiftlist2multishift?(shiftlist,parser)
|
1207
|
+
fail unless Rule===action.action
|
1208
|
+
case action.action.action
|
1209
|
+
when :error; huh
|
1210
|
+
when :shift, StackMonkey, :accept, Class #MultiReduce
|
1211
|
+
first_fixed_index=actions.size
|
1212
|
+
#actions is +[(Rule|Conditional[Rule]).*]
|
1213
|
+
actions.each_with_index{|act,i|
|
1214
|
+
break first_fixed_index=i unless Conditional===act
|
1215
|
+
}
|
1216
|
+
condactions=actions[0...first_fixed_index].unshift(action)
|
1217
|
+
condactions=condactions.inject([]){|sum,cond|
|
1218
|
+
act=cond.action
|
1219
|
+
act=shiftaction if act==:shift #=>shiftlist?
|
1220
|
+
sum.push cond.condition, act
|
1221
|
+
}
|
1222
|
+
#possible optimization: one or more :shift right at end could be ignored
|
1223
|
+
if actions[first_fixed_index]
|
1224
|
+
action=actions[first_fixed_index].action
|
1225
|
+
else
|
1226
|
+
action=shiftaction
|
1227
|
+
end
|
1228
|
+
MultiReduce.new condactions,action #=>shiftlist?
|
1229
|
+
else fail
|
1230
|
+
end
|
1231
|
+
else fail "#{action} not expected here"
|
1232
|
+
end
|
1233
|
+
#stack monkeys/:accept are treated like reduce here
|
1234
|
+
ensure
|
1235
|
+
assert ACTION_PATTERN===result
|
1236
|
+
end
|
1237
|
+
|
1238
|
+
def name
|
1239
|
+
@name||@dotteds.map{|dotted| dotted.name}.join(",")
|
1240
|
+
end
|
1241
|
+
attr_writer :name
|
1242
|
+
|
1243
|
+
def rename(name2count)
|
1244
|
+
return @name if defined? @name
|
1245
|
+
name=most_prominent_members.map{|dotted| dotted.name}.join(",")
|
1246
|
+
if name2count[name]
|
1247
|
+
name2count[name]+=1
|
1248
|
+
name+="___"+name2count[name].to_s
|
1249
|
+
else
|
1250
|
+
name2count[name]=1
|
1251
|
+
end
|
1252
|
+
|
1253
|
+
@name=name
|
1254
|
+
end
|
1255
|
+
|
1256
|
+
def most_prominent_members
|
1257
|
+
result=@dotteds.select{|dr| dr.pos==@dotteds.first.pos }
|
1258
|
+
close2end=@dotteds.map{|dr| [dr,dr.rule.patterns.size-dr.pos]}.sort_by{|(o,k)| -k}
|
1259
|
+
result+=close2end.select{|(dr,k)| k==close2end.first.last}.map{|(dr,k)| dr}
|
1260
|
+
result2=result.reject{|dr| dr.pos==0 or dr.pos==1&&dr.rule.lookback?}
|
1261
|
+
result=result2 unless result2.empty?
|
1262
|
+
return result
|
1263
|
+
end
|
1264
|
+
|
1265
|
+
def hash
|
1266
|
+
-@dotteds.hash
|
1267
|
+
end
|
1268
|
+
def == other
|
1269
|
+
ParserState===other and
|
1270
|
+
@dotteds==other.dotteds
|
1271
|
+
end
|
1272
|
+
alias eql? ==
|
1273
|
+
|
1274
|
+
def looping?
|
1275
|
+
@dotteds.any?{|dotted| dotted.looping? }
|
1276
|
+
end
|
1277
|
+
|
1278
|
+
def transition_to_loop? input #not used
|
1279
|
+
action=@actions.input
|
1280
|
+
case action
|
1281
|
+
when :error; false
|
1282
|
+
when ParserState; action.looping? and action!=self
|
1283
|
+
when MultiShift,MultiReduce;
|
1284
|
+
action.transition_to_loop? input
|
1285
|
+
else fail
|
1286
|
+
end
|
1287
|
+
end
|
1288
|
+
|
1289
|
+
def make_sr_goto_tables
|
1290
|
+
name2exemplar={}
|
1291
|
+
@inputs.each{|i| name2exemplar[i.name]=i }
|
1292
|
+
|
1293
|
+
@goto={}; @sr={}
|
1294
|
+
goto_counts=Hash.new(0); sr_counts=Hash.new(0)
|
1295
|
+
actions.each_pair{|k,v|
|
1296
|
+
if Node===name2exemplar[k]
|
1297
|
+
@goto[k]=v
|
1298
|
+
goto_counts[v]+=1
|
1299
|
+
else
|
1300
|
+
assert(Token===name2exemplar[k])
|
1301
|
+
@sr[k]=v
|
1302
|
+
sr_counts[v]+=1
|
1303
|
+
end
|
1304
|
+
}
|
1305
|
+
dflt=goto_counts.sort_by{|v,c| c}.last[0]
|
1306
|
+
@goto.delete_if{|k,v| v==dflt }
|
1307
|
+
@goto.default=dflt
|
1308
|
+
|
1309
|
+
dflt=sr_counts.sort_by{|v,c| c}.last[0]
|
1310
|
+
@sr.delete_if{|k,v| v==dflt }
|
1311
|
+
@sr.default=dflt
|
1312
|
+
|
1313
|
+
@actions=nil
|
1314
|
+
end
|
1315
|
+
|
1316
|
+
end
|
1317
|
+
|
1318
|
+
class MultiReduce
|
1319
|
+
def initialize(list,default)
|
1320
|
+
@list,@default=list,default
|
1321
|
+
#default can be any valid action (except another MultiReduce)
|
1322
|
+
end
|
1323
|
+
|
1324
|
+
attr_reader :list,:default
|
1325
|
+
|
1326
|
+
def act(x)
|
1327
|
+
(0...@list.size).step(2){|i|
|
1328
|
+
return @list[i+1] if @list[i]===x
|
1329
|
+
}
|
1330
|
+
return default
|
1331
|
+
end
|
1332
|
+
|
1333
|
+
def substates
|
1334
|
+
if @default.respond_to? :substates
|
1335
|
+
@default.substates
|
1336
|
+
else
|
1337
|
+
[]
|
1338
|
+
end
|
1339
|
+
end
|
1340
|
+
|
1341
|
+
def actions
|
1342
|
+
result=[]
|
1343
|
+
(1...@list.size).step(2){|i|
|
1344
|
+
result << @list[i]
|
1345
|
+
}
|
1346
|
+
if @default.respond_to? :actions
|
1347
|
+
result.concat @default.actions
|
1348
|
+
elsif @default
|
1349
|
+
result<<@default
|
1350
|
+
end
|
1351
|
+
result
|
1352
|
+
end
|
1353
|
+
|
1354
|
+
def transition_to_loop? input #not used
|
1355
|
+
@default.transition_to_loop? input
|
1356
|
+
end
|
1357
|
+
|
1358
|
+
def hash
|
1359
|
+
@list.hash^~@default.hash
|
1360
|
+
end
|
1361
|
+
|
1362
|
+
def == other
|
1363
|
+
@list==other.list and @default==other.default
|
1364
|
+
end
|
1365
|
+
alias eql? ==
|
1366
|
+
end
|
1367
|
+
|
1368
|
+
class MultiShift
|
1369
|
+
def initialize(base,modifiers)
|
1370
|
+
@base,@modifiers=base,modifiers
|
1371
|
+
@map=
|
1372
|
+
(0...2**(modifiers.size/2)).map{|i| base.dup}
|
1373
|
+
@map.each_with_index{|state,i| #for each branch to the multishift
|
1374
|
+
(0...modifiers.size).step(2){|j| #for each predicate in the multishift
|
1375
|
+
if (i&(1<<j)).non_zero? #if the predicate tests true in this branch
|
1376
|
+
state.append modifiers[j+1] #add the predicates modifier to the state
|
1377
|
+
end
|
1378
|
+
}
|
1379
|
+
state.sort_substates!
|
1380
|
+
}
|
1381
|
+
end
|
1382
|
+
|
1383
|
+
def act(x)
|
1384
|
+
result=0
|
1385
|
+
(0...@modifiers.size).step(2){|i|
|
1386
|
+
result|=(1<<(i/2)) if @modifiers[i]===x
|
1387
|
+
}
|
1388
|
+
@map[result]
|
1389
|
+
end
|
1390
|
+
|
1391
|
+
attr_reader :map, :modifiers
|
1392
|
+
|
1393
|
+
def substates
|
1394
|
+
@map.dup
|
1395
|
+
end
|
1396
|
+
|
1397
|
+
def actions
|
1398
|
+
@map.dup
|
1399
|
+
end
|
1400
|
+
|
1401
|
+
def transition_to_loop? input #not used
|
1402
|
+
huh
|
1403
|
+
end
|
1404
|
+
|
1405
|
+
def hash
|
1406
|
+
huh
|
1407
|
+
end
|
1408
|
+
def == other
|
1409
|
+
huh
|
1410
|
+
end
|
1411
|
+
alias eql? ==
|
1412
|
+
end
|
1413
|
+
|
1414
|
+
#an action is one of:
|
1415
|
+
#a ParserState (shift)
|
1416
|
+
#a Rule (reduce)
|
1417
|
+
#nil (error)
|
1418
|
+
#:accept
|
1419
|
+
#MultiReduce
|
1420
|
+
#MultiShift
|
1421
|
+
|
1422
|
+
#just the left side (the stack/lookahead matchers)
|
1423
|
+
def LEFT
|
1424
|
+
# require 'md5'
|
1425
|
+
@rules=expanded_RULES()
|
1426
|
+
# p MD5.new(@rules).to_s
|
1427
|
+
@rules.map{|r| r.left.subregs }.flatten
|
1428
|
+
end
|
1429
|
+
|
1430
|
+
#remove lookahead and lookback decoration (not used?)
|
1431
|
+
def LEFT_NO_LOOKING
|
1432
|
+
l=LEFT()
|
1433
|
+
l.map!{|m|
|
1434
|
+
case m #
|
1435
|
+
when Reg::LookAhead,Reg::LookBack; fail #should be gone already now
|
1436
|
+
when Proc; []
|
1437
|
+
else m #
|
1438
|
+
end #
|
1439
|
+
}
|
1440
|
+
l
|
1441
|
+
end
|
1442
|
+
|
1443
|
+
def child_relations_among(*classes)
|
1444
|
+
classes.unshift Object
|
1445
|
+
result={}
|
1446
|
+
classes.each{|klass| result[klass]=[] }
|
1447
|
+
|
1448
|
+
#p classes
|
1449
|
+
classes.each{|klass|
|
1450
|
+
anclist=klass.ancestors
|
1451
|
+
anclist.shift==klass or fail
|
1452
|
+
anclist.each{|anc|
|
1453
|
+
if anc=result[anc]
|
1454
|
+
anc << klass
|
1455
|
+
break
|
1456
|
+
end
|
1457
|
+
}
|
1458
|
+
}
|
1459
|
+
|
1460
|
+
return result
|
1461
|
+
end
|
1462
|
+
|
1463
|
+
#all classes mentioned in rules, on left and right sides
|
1464
|
+
def STACKABLE_CLASSES #
|
1465
|
+
return @sc_result if defined? @sc_result
|
1466
|
+
@sc_result=[]
|
1467
|
+
@subclasses_of=child_relations_among(*vertices)
|
1468
|
+
# @sc_result=false
|
1469
|
+
l=LEFT()
|
1470
|
+
l=l.map{|lm| sc_juice lm}.flatten.compact
|
1471
|
+
assert l.grep(nil).empty?
|
1472
|
+
r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
|
1473
|
+
result=l+r
|
1474
|
+
@subclasses_of=nil
|
1475
|
+
@sc_result.replace result.grep(Class).uniq
|
1476
|
+
fail if @sc_result.empty?
|
1477
|
+
return @sc_result
|
1478
|
+
end
|
1479
|
+
|
1480
|
+
# def juice(m)
|
1481
|
+
# case m #
|
1482
|
+
# when Class
|
1483
|
+
# return [m] unless @subclasses_of
|
1484
|
+
# result=[m] # and subclasses too
|
1485
|
+
# i=0
|
1486
|
+
# while item=result[i]
|
1487
|
+
# p item
|
1488
|
+
# result.concat @subclasses_of[item] rescue nil
|
1489
|
+
# i += 1
|
1490
|
+
# end
|
1491
|
+
# result
|
1492
|
+
# when String,Regexp; juice(RedParse.KW(m))
|
1493
|
+
# when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
|
1494
|
+
# when Reg::Or; m.subregs.map &method(:juice)
|
1495
|
+
# when Reg::Not
|
1496
|
+
# m=m.subregs[0]
|
1497
|
+
# if Class===m or (Reg::Or===m and
|
1498
|
+
# m.subregs.find{|x| Class===x })
|
1499
|
+
# juice(m)
|
1500
|
+
# else []
|
1501
|
+
# end
|
1502
|
+
# else []
|
1503
|
+
# end
|
1504
|
+
# end
|
1505
|
+
|
1506
|
+
def sc_juice(m)
|
1507
|
+
case m #
|
1508
|
+
when Class; [m]
|
1509
|
+
when String,Regexp; [KeywordToken]
|
1510
|
+
when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
|
1511
|
+
when Reg::Or; m.subregs.map(&method(:sc_juice))
|
1512
|
+
when Reg::Not; sc_juice(m.subregs[0])
|
1513
|
+
when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
|
1514
|
+
when Reg::Repeat; sc_juice(m.subregs[0])
|
1515
|
+
else []
|
1516
|
+
end
|
1517
|
+
end
|
33
1518
|
|
34
|
-
def
|
1519
|
+
def unruly_rules
|
1520
|
+
return @unruly_rules if defined? @unruly_rules
|
1521
|
+
|
1522
|
+
@unruly_rules=
|
1523
|
+
all_rules.select{|rule| rule.unruly? }
|
1524
|
+
|
1525
|
+
p :unruly_rules
|
1526
|
+
pp @unruly_rules.map{|r| r.name}
|
1527
|
+
|
1528
|
+
@unruly_rules
|
1529
|
+
end
|
1530
|
+
|
1531
|
+
def enumerate_exemplars
|
1532
|
+
return @@exemplars if defined? @@exemplars #dunno why this is necessary
|
1533
|
+
|
1534
|
+
result= STACKABLE_CLASSES() \
|
1535
|
+
.map{|sc| sc.enumerate_exemplars } \
|
1536
|
+
.inject{|sum,sc| sum+sc}
|
1537
|
+
|
1538
|
+
result.map!{|sc|
|
1539
|
+
res=sc.shift.allocate
|
1540
|
+
until sc.empty?
|
1541
|
+
eval "def res.#{sc.shift}; #{sc.shift.inspect} end"
|
1542
|
+
end
|
1543
|
+
def res.to_s; identity_name end
|
1544
|
+
res
|
1545
|
+
}
|
1546
|
+
|
1547
|
+
return @@exemplars=result
|
1548
|
+
end
|
1549
|
+
|
1550
|
+
def check_for_parsealike_inputs
|
1551
|
+
all_patterns=all_rules.map{|r| r.patterns.map{|rp| Reg::Repeat===rp and rp=rp.subregs[0]; rp }}.flatten.uniq
|
1552
|
+
seen={}
|
1553
|
+
@identity_name_aliases={}
|
1554
|
+
warn "why are non_empty and after_equals params to BeginNode appearently ignored?"
|
1555
|
+
warn "some token identities overlap themselves?!?"
|
1556
|
+
warn "some overlaps are duplicated"
|
1557
|
+
warn ". and :: overlap => ..... surely that's not right"
|
1558
|
+
@inputs.map{|input|
|
1559
|
+
profile=all_patterns.map{|pat| Proc===pat ? pat : !!(pat===input)}
|
1560
|
+
if seen[profile]
|
1561
|
+
puts "#{input} overlaps #{seen[profile]}"
|
1562
|
+
@identity_name_aliases[seen[profile]]=input
|
1563
|
+
nil
|
1564
|
+
else
|
1565
|
+
seen[profile]=input
|
1566
|
+
end
|
1567
|
+
}.compact
|
1568
|
+
end
|
1569
|
+
|
1570
|
+
def enumerate_states
|
1571
|
+
inputs=check_for_parsealike_inputs
|
1572
|
+
inputs.reject!{|x| StartToken===x}
|
1573
|
+
|
1574
|
+
result=[]
|
1575
|
+
todo=[start_state]
|
1576
|
+
|
1577
|
+
seenlist = {}
|
1578
|
+
seenlist.default=:dunno_yet
|
1579
|
+
|
1580
|
+
j=0
|
1581
|
+
start=was=Time.now
|
1582
|
+
in_result={} #this should go away; obsoleted by @states
|
1583
|
+
state_num=-1
|
1584
|
+
todo.each{|st| in_result[st]=(state_num+=1) }
|
1585
|
+
ps=todo.first
|
1586
|
+
pp [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
|
1587
|
+
old_todo_size=todo.size
|
1588
|
+
while state=todo.shift
|
1589
|
+
result<<state
|
1590
|
+
|
1591
|
+
i=0
|
1592
|
+
inputs.each {|input|
|
1593
|
+
newstate=state.evolve input,self,seenlist
|
1594
|
+
assert ACTION_PATTERN===newstate
|
1595
|
+
#newstate is ParserState|MultiShift|MultiReduce|Rule|:accept|:error
|
1596
|
+
state[input.identity_name]=newstate
|
1597
|
+
next unless newstate.respond_to? :substates
|
1598
|
+
#newstate.substates is just [newstate] for plain ParserStates
|
1599
|
+
morestates=newstate.substates.reject{|x| in_result[x]}
|
1600
|
+
morestates.each{|st| in_result[st]=(state_num+=1) }
|
1601
|
+
# p [in_result[state],:+,input.identity_name,:>>,pretty(newstate,in_result)]
|
1602
|
+
todo.concat morestates
|
1603
|
+
|
1604
|
+
# pp morestates.map{|ps|
|
1605
|
+
# [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
|
1606
|
+
# }
|
1607
|
+
# pp pretty(newstate,in_result) unless ParserState===newstate
|
1608
|
+
}
|
1609
|
+
|
1610
|
+
now=Time.now
|
1611
|
+
p [:*,j+=1,todo.size,todo.size-old_todo_size,now-was,j/(now-start),(100.0*j/(j+todo.size)).to_i]
|
1612
|
+
old_todo_size=todo.size
|
1613
|
+
was=now
|
1614
|
+
|
1615
|
+
# if state.actions.values.uniq==[:error]
|
1616
|
+
#this can happen when the only dotted rule is for an :error
|
1617
|
+
#maybe this case can be optimized?
|
1618
|
+
# end
|
1619
|
+
end
|
1620
|
+
self.rmd_cache=nil
|
1621
|
+
self.oc_cache=nil
|
1622
|
+
self.sl2ms_cache=nil
|
1623
|
+
return result
|
1624
|
+
end
|
1625
|
+
|
1626
|
+
def pretty(x,in_result)
|
1627
|
+
case x
|
1628
|
+
when ParserState; in_result[x]
|
1629
|
+
when MultiReduce
|
1630
|
+
pairs=x.list.dup
|
1631
|
+
result=[]
|
1632
|
+
until pairs.empty?
|
1633
|
+
cond,act,*pairs=*pairs
|
1634
|
+
cond = cond.inspect
|
1635
|
+
result<<[cond,pretty(act.action,in_result)]
|
1636
|
+
end
|
1637
|
+
result<<pretty(x.default,in_result)
|
1638
|
+
result.unshift :MultiReduce
|
1639
|
+
when MultiShift
|
1640
|
+
h={}
|
1641
|
+
mods=x.modifiers
|
1642
|
+
its=[]
|
1643
|
+
(0...mods.size).step(2){|i| its<<mods[i] }
|
1644
|
+
x.map.each_with_index{|xx,i| h[i]=pretty(xx) }
|
1645
|
+
[:MultiShift, its,h]
|
1646
|
+
when Class; x.name
|
1647
|
+
when StackMonkey; x.name
|
1648
|
+
when :accept,:error; x
|
1649
|
+
else fail "not a valid action: #{x}"
|
1650
|
+
end
|
1651
|
+
end
|
1652
|
+
|
1653
|
+
attr_accessor :inputs
|
1654
|
+
|
1655
|
+
def all_states
|
1656
|
+
return @all_states if defined? @all_states
|
1657
|
+
@all_states=enumerate_states
|
1658
|
+
end
|
1659
|
+
|
1660
|
+
def exemplars_that_match p
|
1661
|
+
@inputs.grep p
|
1662
|
+
end
|
1663
|
+
|
1664
|
+
def pattern_matches_nodes? p
|
1665
|
+
!@inputs.grep(Node&p).empty?
|
1666
|
+
end
|
1667
|
+
|
1668
|
+
def pattern_matches_tokens? p
|
1669
|
+
!@inputs.grep(Token&p).empty?
|
1670
|
+
end
|
1671
|
+
|
1672
|
+
def identity_name_alias? name
|
1673
|
+
alias_=@identity_name_aliases[name]
|
1674
|
+
return( alias_||name )
|
1675
|
+
end
|
1676
|
+
|
1677
|
+
def compile
|
1678
|
+
oldparser=Thread.current[:$RedParse_parser]
|
1679
|
+
Thread.current[:$RedParse_parser]||=self
|
1680
|
+
|
1681
|
+
if File.exist?("cached_parse_tables.drb")
|
1682
|
+
dup=Marshal.load(f=open("cached_parse_tables.drb","rb"))
|
1683
|
+
instance_variables.each{|var| remove_instance_variable var }
|
1684
|
+
extend SingleForwardable
|
1685
|
+
def_singleton_delegators(dup,public_methods+private_methods+protected_methods)
|
1686
|
+
|
1687
|
+
self.inputs=enumerate_exemplars
|
1688
|
+
else
|
1689
|
+
@generating_parse_tables=true
|
1690
|
+
@inputs||=enumerate_exemplars
|
1691
|
+
|
1692
|
+
states=all_states
|
1693
|
+
# @rules=expanded_RULES
|
1694
|
+
@inputs=nil #Marshal no like it
|
1695
|
+
|
1696
|
+
begin
|
1697
|
+
p :dumping
|
1698
|
+
Marshal.dump(self,f=open("cached_parse_tables.drb","wb"))
|
1699
|
+
p :dump_done!
|
1700
|
+
rescue Exception
|
1701
|
+
p :dump_failed
|
1702
|
+
File.unlink "cached_parse_tables.drb"
|
1703
|
+
ensure
|
1704
|
+
@inputs=enumerate_exemplars
|
1705
|
+
end
|
1706
|
+
end
|
1707
|
+
f.close
|
1708
|
+
|
1709
|
+
#look for unused dotted rules and actions
|
1710
|
+
#also states with drs past the end
|
1711
|
+
past_end=0
|
1712
|
+
drs=all_dotted_rules
|
1713
|
+
dr_count=Hash.new(0)
|
1714
|
+
acts=all_rules#.map{|r| r.action }.uniq
|
1715
|
+
act_count=Hash.new(0)
|
1716
|
+
states.each{|state|
|
1717
|
+
state.dotteds.each{|dr|
|
1718
|
+
dr_count[dr]+=1
|
1719
|
+
past_end+=1 if dr.pos>=dr.rule.patterns.size
|
1720
|
+
}
|
1721
|
+
sav=state.actions.values
|
1722
|
+
sav.grep(Class|StackMonkey).each{|act| act_count[act.__id__]+=1 }
|
1723
|
+
sav.grep(MultiReduce|MultiShift).each{|multi| multi.actions.each{|act| act_count[act.__id__]+=1} }
|
1724
|
+
#p state.name if state.dotteds.select{|dr| dr.rule.action==BeginNode}
|
1725
|
+
}
|
1726
|
+
puts "#{past_end} dotted rules found past the end of their rule" if past_end>0
|
1727
|
+
nevers=0
|
1728
|
+
drs.each{|dr|
|
1729
|
+
next unless dr_count[dr].zero?
|
1730
|
+
puts "never reached #{dr.name}"
|
1731
|
+
nevers+=1
|
1732
|
+
}
|
1733
|
+
puts "#{nevers} dotted rules were never reached (out of #{drs.size})"
|
1734
|
+
nevers=0
|
1735
|
+
acts.each{|act|
|
1736
|
+
next unless act_count[act.__id__].zero?
|
1737
|
+
puts "never reached #{act.name rescue act}"
|
1738
|
+
nevers+=1
|
1739
|
+
}
|
1740
|
+
puts "#{nevers} actions were never reached (out of #{acts.size})"
|
1741
|
+
p :most_popular_nontrivial_drs
|
1742
|
+
pp dr_count.reject{|(dr,n)| dr.pos.zero? or dr.pos==1 && dr.rule.lookback?} \
|
1743
|
+
.sort_by{|(dr,n)| n}[-15..-1].map{|(dr,n)| [dr.name,n] }
|
1744
|
+
|
1745
|
+
#look for duplicate states
|
1746
|
+
actions2state={}
|
1747
|
+
dup_states=0
|
1748
|
+
states.each{|st|
|
1749
|
+
cache=actions2state[st.actions]
|
1750
|
+
if cache
|
1751
|
+
st.equivalent_to=cache
|
1752
|
+
dup_states+=1
|
1753
|
+
else
|
1754
|
+
actions2state[st.actions]=st
|
1755
|
+
end
|
1756
|
+
}
|
1757
|
+
puts "#{dup_states} duplicate states" if dup_states.nonzero?
|
1758
|
+
|
1759
|
+
name2count={}
|
1760
|
+
states.each{|state| state.rename(name2count) }
|
1761
|
+
|
1762
|
+
#divide each state's actions into sr and goto tables
|
1763
|
+
#also scan states for the most common sr and goto actions and make them default
|
1764
|
+
states.each{|state| state.make_sr_goto_tables }
|
1765
|
+
|
1766
|
+
|
1767
|
+
# pp states
|
1768
|
+
# pp states.size
|
1769
|
+
|
1770
|
+
generate_c $stdout
|
1771
|
+
return self
|
1772
|
+
ensure
|
1773
|
+
remove_instance_variable :@generating_parse_tables rescue nil
|
1774
|
+
Thread.current[:$RedParse_parser]=oldparser
|
1775
|
+
end
|
1776
|
+
|
1777
|
+
def ultimate_goal_nodes
|
1778
|
+
result=[]
|
1779
|
+
all_rules.each{|rule|
|
1780
|
+
if rule.patterns.size==0 and
|
1781
|
+
rule.patterns.first==StartToken and
|
1782
|
+
rule.patterns.last==EoiToken
|
1783
|
+
result << juice(rule.patterns[1])
|
1784
|
+
end
|
1785
|
+
}
|
1786
|
+
result.flatten!
|
1787
|
+
return result
|
1788
|
+
end
|
1789
|
+
|
1790
|
+
|
1791
|
+
# def start_state
|
1792
|
+
# goal=ultimate_goal_nodes
|
1793
|
+
# result=all_rules.select{|rule|
|
1794
|
+
# rt=rule.reduces_to and
|
1795
|
+
# !goal.select{|node| node>=rt}.empty?
|
1796
|
+
# }
|
1797
|
+
# result.map!{|rule| DottedRule.create(rule,0,parser)}
|
1798
|
+
#
|
1799
|
+
# result=ParserState.new result
|
1800
|
+
# result.name="start_state"
|
1801
|
+
# result
|
1802
|
+
# end
|
1803
|
+
|
1804
|
+
def new_state(drs,unruly_also=false)
|
1805
|
+
result=ParserState.new drs,@states.size
|
1806
|
+
result.perhaps_also_allow all_rules,self
|
1807
|
+
cache=@states[result]
|
1808
|
+
return cache if cache
|
1809
|
+
@states[result]=@states.size
|
1810
|
+
return result
|
1811
|
+
end
|
1812
|
+
|
1813
|
+
def initial_state
|
1814
|
+
@states={}
|
1815
|
+
all_initial_dotted_rules #is this still needed?
|
1816
|
+
result=new_state all_rules.map{|r| DottedRule.create(r,0,self)}
|
1817
|
+
result.name="initial"
|
1818
|
+
#result.perhaps_also_allow all_rules,self #silly here
|
1819
|
+
result
|
1820
|
+
end
|
1821
|
+
|
1822
|
+
attr_reader :states
|
1823
|
+
|
1824
|
+
def start_state
|
1825
|
+
seenlist = {}
|
1826
|
+
seenlist.default=:dunno_yet
|
1827
|
+
result=initial_state.evolve StartToken.new, self,seenlist
|
1828
|
+
result.perhaps_also_allow all_rules,self
|
1829
|
+
result.name="start"
|
1830
|
+
result
|
1831
|
+
#pp [:initial_seenlist, seenlist]
|
1832
|
+
#ensure p :/
|
1833
|
+
end
|
1834
|
+
|
1835
|
+
#inline any subsequences in RULES right into the patterns
|
1836
|
+
#reg should do this already, but current release does not
|
1837
|
+
def expanded_RULES
|
1838
|
+
result=RULES()
|
1839
|
+
return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
|
1840
|
+
result.map!{|rule|
|
1841
|
+
unless rule.left.subregs.grep(Reg::Subseq)
|
1842
|
+
then rule
|
1843
|
+
else
|
1844
|
+
right=rule.right
|
1845
|
+
rule=rule.left.subregs.dup
|
1846
|
+
(rule.size-1).downto(0){|i|
|
1847
|
+
if Reg::Subseq===rule[i]
|
1848
|
+
rule[i,1]=rule[i].subregs
|
1849
|
+
end
|
1850
|
+
}
|
1851
|
+
-rule>>right
|
1852
|
+
end
|
1853
|
+
}
|
1854
|
+
end
|
1855
|
+
|
1856
|
+
module NamedConstant
|
1857
|
+
attr_accessor :constant_name
|
1858
|
+
def inspect; constant_name end
|
1859
|
+
end
|
1860
|
+
def self.inspect_constant_names
|
1861
|
+
constants.each{|kn|
|
1862
|
+
k=const_get(kn)
|
1863
|
+
next if Class|Module|Numeric|Symbol|true|false|nil===k
|
1864
|
+
k.extend NamedConstant
|
1865
|
+
k.constant_name=kn
|
1866
|
+
}
|
1867
|
+
end
|
1868
|
+
|
1869
|
+
def undumpables
|
1870
|
+
return @undumpables if @undumpables
|
1871
|
+
@rules||=expanded_RULES
|
1872
|
+
n=-1
|
1873
|
+
@undumpables={}
|
1874
|
+
abortable_graphwalk(@rules){|cntr,o,i,ty|
|
1875
|
+
!case o
|
1876
|
+
when StackMonkey
|
1877
|
+
@undumpables[o.name]=o
|
1878
|
+
when Reg::Deferred
|
1879
|
+
@undumpables[n+=1]=o
|
1880
|
+
class<<o
|
1881
|
+
attr_accessor :undump_key
|
1882
|
+
end
|
1883
|
+
o.undump_key=n
|
1884
|
+
end
|
1885
|
+
}
|
1886
|
+
end
|
1887
|
+
|
1888
|
+
class ::Proc #hack hack hack
|
1889
|
+
#only define hacky _dump if one isn't defined already
|
1890
|
+
unless instance_methods.include?("_dump") or
|
1891
|
+
instance_methods.include?("marshal_dump") or
|
1892
|
+
(Marshal.dump(proc{}) rescue false)
|
1893
|
+
def _dump depth
|
1894
|
+
undump_key.to_s
|
1895
|
+
end
|
1896
|
+
def self._load str
|
1897
|
+
Thread.current[:$RedParse_parser].undumpables[str.to_i]
|
1898
|
+
end
|
1899
|
+
end
|
1900
|
+
end
|
1901
|
+
|
1902
|
+
=begin disabled, uses too much memory!!
|
1903
|
+
class MarshalProxy
|
1904
|
+
def initialize(key)
|
1905
|
+
@key=key
|
1906
|
+
end
|
1907
|
+
attr :key
|
1908
|
+
end
|
1909
|
+
|
1910
|
+
#convert unmarshalables, such as stackmonkeys into proxies
|
1911
|
+
def proxify
|
1912
|
+
n=-1
|
1913
|
+
seen={}
|
1914
|
+
mkproxy=proc{|cntr,o,i,ty,useit|
|
1915
|
+
case o
|
1916
|
+
when StackMonkey
|
1917
|
+
useit[0]=true
|
1918
|
+
seen[o.__id__]||=MarshalProxy.new(o.name)
|
1919
|
+
when Reg::Deferred
|
1920
|
+
useit[0]=true
|
1921
|
+
seen[o.__id__]||=MarshalProxy.new(n+=1)
|
1922
|
+
end
|
1923
|
+
}
|
1924
|
+
Ron::GraphWalk.graphmodify!(@rules,&mkproxy)
|
1925
|
+
Ron::GraphWalk.graphmodify!(self,&mkproxy)
|
1926
|
+
|
1927
|
+
end
|
1928
|
+
|
1929
|
+
def _dump depth
|
1930
|
+
fail unless @rules
|
1931
|
+
proxify
|
1932
|
+
ivs=instance_variables
|
1933
|
+
a=ivs+ivs.reverse.map{|var| instance_variable_get var }
|
1934
|
+
result=Marshal.dump(a,depth)
|
1935
|
+
unproxify
|
1936
|
+
return result
|
1937
|
+
end
|
1938
|
+
|
1939
|
+
#convert marshal proxies back to the real thing
|
1940
|
+
def unproxify
|
1941
|
+
#build a lookup table for unmarshalables by walking @rules
|
1942
|
+
@rules||=expanded_RULES
|
1943
|
+
n=-1;lookup={}
|
1944
|
+
Ron::GraphWalk.graphwalk(@rules){|cntr,o,i,ty|
|
1945
|
+
case o
|
1946
|
+
when StackMonkey
|
1947
|
+
lookup[o.name]=o
|
1948
|
+
when Reg::Deferred
|
1949
|
+
lookup[n+=1]=o
|
1950
|
+
end
|
1951
|
+
}
|
1952
|
+
|
1953
|
+
Ron::GraphWalk.graphmodify!(self){|cntr,o,i,ty,useit|
|
1954
|
+
if MarshalProxy===o
|
1955
|
+
useit[0]=true
|
1956
|
+
lookup[o.key]
|
1957
|
+
end
|
1958
|
+
}
|
1959
|
+
end
|
1960
|
+
|
1961
|
+
def self._load(str,*more)
|
1962
|
+
result=allocate
|
1963
|
+
a=Marshal.load(str,*more)
|
1964
|
+
|
1965
|
+
result.unproxify
|
1966
|
+
|
1967
|
+
(0...a.size/2).each{|i| result.instance_variable_set a[i],a[-i] }
|
1968
|
+
return result
|
1969
|
+
end
|
1970
|
+
=end
|
1971
|
+
|
1972
|
+
###### specific to parsing ruby
|
1973
|
+
|
1974
|
+
|
1975
|
+
UCLETTER=RubyLexer::UCLETTER
|
1976
|
+
|
1977
|
+
LCLETTER=RubyLexer::LCLETTER
|
1978
|
+
LETTER=RubyLexer::LETTER
|
1979
|
+
LETTER_DIGIT=RubyLexer::LETTER_DIGIT
|
1980
|
+
|
1981
|
+
def vertices; self.class.constants.grep(Node|Token) end
|
1982
|
+
|
1983
|
+
def self.has_return_hash_fix? #is this needed? it's not used in this file....
|
35
1984
|
rl=RubyLexer.new("","return {}.size")
|
36
1985
|
return(
|
37
1986
|
FileAndLineToken===rl.get1token and
|
@@ -109,11 +2058,12 @@ class RedParse
|
|
109
2058
|
|
110
2059
|
"?"=>106, # ":"=>106, #not sure what to do with ":"
|
111
2060
|
|
112
|
-
"
|
2061
|
+
"unary*"=>105, "unary&"=>105, #unary * and & operators
|
2062
|
+
"lhs*"=>105, "rhs*"=>105, #this should remain above =, but other unary stars are below it
|
113
2063
|
|
114
|
-
"="=>
|
115
|
-
"|="=>
|
116
|
-
"&&="=>
|
2064
|
+
"="=>104, "%="=>104, "/="=>104, "-="=>104, "+="=>104,
|
2065
|
+
"|="=>104, "&="=>104, ">>="=>104, "<<="=>104, "*="=>104,
|
2066
|
+
"&&="=>104, "||="=>104, "**="=>104, "^="=>104,
|
117
2067
|
|
118
2068
|
"defined?"=>103,
|
119
2069
|
"not"=>103,
|
@@ -121,11 +2071,14 @@ class RedParse
|
|
121
2071
|
"rescue3"=>102,
|
122
2072
|
|
123
2073
|
"=>"=>101,
|
124
|
-
","=>100,
|
2074
|
+
"lhs,"=>100,
|
2075
|
+
"rhs,"=>100, #"call,"=>100, "array,"=>100, "param,"=>100,
|
2076
|
+
","=>100,
|
125
2077
|
#the 'precedence' of comma is somewhat controversial. it actually has
|
126
2078
|
#several different precedences depending on which kind of comma it is.
|
127
2079
|
#the precedence of , is higher than :, => and the assignment operators
|
128
|
-
#in certain contexts.
|
2080
|
+
#in certain (lhs) contexts. therefore, the precedence of lhs, should
|
2081
|
+
#really be above =.
|
129
2082
|
|
130
2083
|
#"unary" prefix function names seen has operators have this precedence
|
131
2084
|
#but, rubylexer handles precedence of these and outputs fake parens
|
@@ -142,29 +2095,110 @@ class RedParse
|
|
142
2095
|
end
|
143
2096
|
|
144
2097
|
module BracketsCall; end
|
145
|
-
|
146
2098
|
Value= #NumberToken|SymbolToken|
|
147
2099
|
#HerePlaceholderToken|
|
148
|
-
|
2100
|
+
ValueNode&-{:lvalue =>nil}
|
149
2101
|
Expr=Value
|
150
2102
|
|
2103
|
+
if defined? SPECIALIZED_KEYWORDS
|
2104
|
+
class SpecializedKeywordToken<KeywordToken
|
2105
|
+
def inspect
|
2106
|
+
"#<"+self.class.name+">"
|
2107
|
+
end
|
2108
|
+
alias image inspect
|
2109
|
+
end
|
2110
|
+
|
2111
|
+
KW2class={}
|
2112
|
+
|
2113
|
+
Punc2name={
|
2114
|
+
"("=>"lparen", ")"=>"rparen",
|
2115
|
+
"["=>"lbracket", "]"=>"rbracket",
|
2116
|
+
"{"=>"lbrace", "}"=>"rbrace",
|
2117
|
+
","=>"comma",
|
2118
|
+
";"=>"semicolon",
|
2119
|
+
"::"=>"double_colon",
|
2120
|
+
"."=>"dot",
|
2121
|
+
"?"=>"question_mark", ":"=>"colon",
|
2122
|
+
"="=>"equals",
|
2123
|
+
"|"=>"pipe",
|
2124
|
+
"<<"=>"leftleft", ">>"=>"rightright",
|
2125
|
+
"=>"=>"arrow",
|
2126
|
+
}
|
2127
|
+
end
|
2128
|
+
|
151
2129
|
def self.KW(ident)
|
2130
|
+
if defined? SPECIALIZED_KEYWORDS
|
2131
|
+
fail if /\\/===ident
|
2132
|
+
orig_ident=ident
|
2133
|
+
if Regexp===ident
|
2134
|
+
list=ident.to_s[/\(?-mix:\^\((.*)\)\$\)/,1]
|
2135
|
+
|
2136
|
+
#pick apart any char class in ident
|
2137
|
+
if open_bracket_idx=list.index(/([^\\]|^)\[/)
|
2138
|
+
open_bracket_idx+=1 unless list[open_bracket_idx]=="["
|
2139
|
+
close_bracket_idx=list.index(/[^\\]\]/,open_bracket_idx+1)
|
2140
|
+
close_bracket_idx+=1 unless list[close_bracket_idx]=="]"
|
2141
|
+
cclass=list.slice!(open_bracket_idx..close_bracket_idx)
|
2142
|
+
cclass=cclass[1...-1]
|
2143
|
+
cclass=cclass.scan( /[^\\]|\\./ )
|
2144
|
+
cclass.map!{|ch| ch.size==1 ? ch : ch[1..1] }
|
2145
|
+
end
|
2146
|
+
|
2147
|
+
#rest of it should be a list of words separated by |
|
2148
|
+
list=list.split(/\|/).reject{|x| x==''}
|
2149
|
+
list.concat cclass if cclass
|
2150
|
+
list.map{|w|
|
2151
|
+
w.gsub!(/\\/,'')
|
2152
|
+
KW(w)
|
2153
|
+
}.inject{|sum,kw| sum|kw}
|
2154
|
+
else
|
2155
|
+
fail unless String===ident
|
2156
|
+
ident=Punc2name[ident] unless /^(?:(?!#{LETTER_DIGIT}).)+$/o===ident
|
2157
|
+
fail "no name for #{orig_ident}" unless ident
|
2158
|
+
eval %{
|
2159
|
+
class Keyword_#{ident} < SpecializedKeywordToken
|
2160
|
+
def ident; '#{orig_ident}' end
|
2161
|
+
# def self.instance; @instance ||= allocate end
|
2162
|
+
# def self.new; instance end
|
2163
|
+
def initialize(offset)
|
2164
|
+
@offset=offset
|
2165
|
+
end
|
2166
|
+
end
|
2167
|
+
}
|
2168
|
+
KW2class[ident]||=const_get("Keyword_#{ident}")
|
2169
|
+
end
|
2170
|
+
else
|
152
2171
|
ident=case ident
|
153
|
-
when Integer
|
154
|
-
when String,Regexp
|
2172
|
+
when Integer; ident.chr
|
2173
|
+
when String,Regexp; ident
|
155
2174
|
else ident.to_s
|
156
2175
|
end
|
157
2176
|
|
158
2177
|
return KeywordToken&-{:ident=>ident}
|
2178
|
+
end
|
159
2179
|
end
|
160
2180
|
def KW(ident); self.class.KW(ident) end
|
2181
|
+
|
2182
|
+
if defined? SPECIALIZED_KEYWORDS
|
2183
|
+
def make_specialized_kw(name,offset)
|
2184
|
+
name=Punc2name[name] unless /^((?!#{LETTER_DIGIT}).)+$/o===name
|
2185
|
+
KW2class[name].new(offset)
|
2186
|
+
end
|
2187
|
+
alias make_kw make_specialized_kw
|
2188
|
+
else
|
2189
|
+
def make_kw(name,offset)
|
2190
|
+
KeywordToken.new(name,offset)
|
2191
|
+
end
|
2192
|
+
end
|
2193
|
+
|
161
2194
|
UNOP=
|
162
2195
|
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
163
|
-
:ident=>/^[
|
164
|
-
:unary
|
2196
|
+
# :ident=>/^(?:[+-]@|unary[&*]|(?:lhs|rhs)[*])$/,
|
2197
|
+
:ident=>/^(?:[+-]@|unary[&])$/,
|
2198
|
+
#:unary =>true,
|
165
2199
|
}|
|
166
2200
|
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
167
|
-
:ident=>/^([~!]|not|defined\?)$/,
|
2201
|
+
:ident=>/^([~!]|not|defined\?)$/, #defined? should be removed from here, its handled separately
|
168
2202
|
} #|
|
169
2203
|
DEFOP=
|
170
2204
|
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
@@ -200,285 +2234,325 @@ class RedParse
|
|
200
2234
|
:ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
|
201
2235
|
}
|
202
2236
|
=end
|
203
|
-
DotOp= KeywordToken & -{ :ident=>"." }
|
204
|
-
DoubleColonOp= KeywordToken & -{ :ident=>"::" }
|
2237
|
+
DotOp= KW('.') #KeywordToken & -{ :ident=>"." }
|
2238
|
+
DoubleColonOp= KW('::') #KeywordToken & -{ :ident=>"::" }
|
205
2239
|
|
206
2240
|
Op=Op()
|
207
2241
|
MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
|
208
2242
|
NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
|
209
2243
|
KW_Op= #some of these ought to be regular operators, fer gosh sake
|
210
|
-
Op(/^(
|
2244
|
+
Op(/^(![=~]|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
|
211
2245
|
|
212
2246
|
EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
|
213
2247
|
fail unless 1+EPSILON>1
|
214
2248
|
fail unless EPSILON<0.1
|
215
2249
|
|
216
2250
|
def left_op_higher(op,op2)
|
217
|
-
# (Op|KeywordOp|KeywordOp2|ASSIGNOP===op2) or return true
|
218
2251
|
KeywordToken===op2 or OperatorToken===op2 or return true
|
219
2252
|
rightprec=@precedence[op2.to_s] or return true
|
220
|
-
#or fail "unrecognized right operator: #{op2.inspect}"
|
221
2253
|
rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
|
222
2254
|
return @precedence[op.to_s]>=rightprec
|
223
2255
|
end
|
224
2256
|
|
225
|
-
LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
|
2257
|
+
# LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
|
2258
|
+
def lower_op
|
2259
|
+
return @lower_op if defined? @lower_op
|
2260
|
+
lower_op=item_that{|op| left_op_higher(@stack[-3],op) }
|
2261
|
+
lower_op=(LOWEST_OP|(~VALUELIKE_LA & lower_op)).la
|
2262
|
+
def lower_op.inspect; "lower_op" end
|
2263
|
+
@lower_op=lower_op
|
2264
|
+
end
|
226
2265
|
|
227
|
-
|
228
|
-
|
2266
|
+
#this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
|
2267
|
+
def item_that(*a,&b)
|
2268
|
+
if defined? @generating_parse_tables
|
2269
|
+
huh unless b
|
2270
|
+
#double supers, one of them in a block executed after this method returns....
|
2271
|
+
#man that's weird
|
2272
|
+
super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true}
|
2273
|
+
else
|
2274
|
+
super(*a,&b) #and then here's another
|
2275
|
+
end
|
229
2276
|
end
|
2277
|
+
|
230
2278
|
WANTS_SEMI=%w[while until if unless
|
231
2279
|
def case when in rescue
|
232
2280
|
elsif class module << => . ::
|
233
2281
|
]
|
234
2282
|
def wants_semi_context
|
235
|
-
Op(
|
2283
|
+
Op(/^(<<|=>|\.|::)$/)|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
|
2284
|
+
end
|
2285
|
+
def dont_postpone_semi
|
2286
|
+
@dps||=~wants_semi_context
|
236
2287
|
end
|
237
2288
|
|
238
|
-
NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
|
239
|
-
FakeBegin=KW('(')&-{:not_real? =>true}
|
240
|
-
FakeEnd=KW(')')&-{:not_real? =>true}
|
2289
|
+
#NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
|
2290
|
+
#FakeBegin=KW('(')&-{:not_real? =>true}
|
2291
|
+
#FakeEnd=KW(')')&-{:not_real? =>true}
|
241
2292
|
|
242
2293
|
#rule format:
|
243
2294
|
# -[syntax pattern_matchers.+, lookahead.-]>>node type
|
244
2295
|
|
245
|
-
DotCall=
|
2296
|
+
DotCall=stack_monkey("DotCall",4,CallNode){|stack|
|
2297
|
+
left,dot=*stack.slice!(-4..-3)
|
246
2298
|
right=stack[-2]
|
247
|
-
left,bogus=*stack.slice!(-4..-3)
|
248
2299
|
|
2300
|
+
right.startline=left.startline
|
249
2301
|
right.set_receiver! left
|
250
2302
|
}
|
251
2303
|
|
252
|
-
Lvalue=(
|
253
|
-
ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue
|
2304
|
+
Lvalue=(VarNode|CallSiteNode|BracketsGetNode|CommaOpNode|
|
2305
|
+
ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue =>true}
|
254
2306
|
|
255
|
-
BareMethod=MethNameToken|LiteralNode&-{:
|
2307
|
+
BareMethod=MethNameToken|(LiteralNode&-{:bare_method=>true})
|
256
2308
|
|
257
2309
|
BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
|
258
2310
|
ENDWORDLIST=%w"end ) ] }"
|
259
|
-
|
260
|
-
|
261
|
-
|
2311
|
+
ENDWORDS=ENDWORDLIST.map{|x| Regexp.quote x}.join('|')
|
2312
|
+
BEGINWORDS=RubyLexer::BEGINWORDS
|
2313
|
+
INNERBOUNDINGWORDS=RubyLexer::INNERBOUNDINGWORDS
|
2314
|
+
|
2315
|
+
BEGIN2END={"{"=>"}", "("=>")", "["=>"]", BEGINWORDS=>"end"}
|
262
2316
|
def beginsendsmatcher
|
263
2317
|
@bem||=
|
264
|
-
/^(#{
|
2318
|
+
/^(#{BEGINWORDS}|#{ENDWORDS})$/
|
265
2319
|
end
|
266
2320
|
|
267
|
-
MULTIASSIGN=UnaryStarNode|CommaOpNode|
|
268
|
-
WITHCOMMAS=UnaryStarNode|CommaOpNode|
|
269
|
-
(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}})
|
2321
|
+
MULTIASSIGN=UnaryStarNode|CommaOpNode|ParenedNode
|
2322
|
+
WITHCOMMAS=UnaryStarNode|CommaOpNode|(CallSiteNode&-{:with_commas=>true})
|
2323
|
+
#(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}})
|
270
2324
|
|
271
2325
|
BEGINAFTEREQUALS=
|
272
|
-
|
273
|
-
-{:
|
274
|
-
|
275
|
-
|
2326
|
+
BeginNode&
|
2327
|
+
-{:after_equals =>nil}&-{:non_empty=>true}
|
2328
|
+
BEGINAFTEREQUALS_MARKED=
|
2329
|
+
BeginNode&
|
2330
|
+
-{:after_equals =>true}&-{:non_empty=>true}
|
276
2331
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
PARAM_COMMA=Op(',',true)&-{:comma_type => :param}
|
2332
|
+
LHS_COMMA=Op('lhs,',true)#&-{:tag => :lhs}
|
2333
|
+
RHS_COMMA=Op('rhs,',true)#&-{:tag => :rhs}
|
2334
|
+
#PARAM_COMMA=Op('param,',true)#&-{:tag => :param}
|
281
2335
|
FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
|
2336
|
+
IGN_SEMI_BEFORE=KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')[1...-1]}|end|[)}\]])$/)|EoiToken
|
2337
|
+
IGN_SEMI_AFTER=KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|BlockFormalsNode
|
2338
|
+
|
2339
|
+
#for use in lookback patterns
|
2340
|
+
OPERATORLIKE_LB=OperatorToken|
|
2341
|
+
KW(/^(not | defined\? | .*[@,] | [ ~ ! ; \( \[ \{ ? : ] | \.{1,3} | :: | => | ![=~])$/x)|
|
2342
|
+
KW(%r{^( \*\*? | << | >> | &&? | \|\|? | \^ | % | / | - | \+ )?=$}x)|
|
2343
|
+
KW(BEGINWORDS)|KW(/^#{INNERBOUNDINGWORDS}$/)|RescueHeaderNode|StartToken|
|
2344
|
+
GoalPostToken|BlockFormalsNode
|
2345
|
+
|
2346
|
+
#for use in lookahead patterns
|
2347
|
+
VALUELIKE_LA=KW(RubyLexer::VARLIKE_KEYWORDS)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
|
2348
|
+
KW(/^( \( | \{ | )$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|KW(BEGINWORDS)|FUNCLIKE_KEYWORD
|
2349
|
+
LOWEST_OP=KW(/^(#{ENDWORDS})$/)|KW(/^#{INNERBOUNDINGWORDS.sub('rescue|','')}$/)|EoiToken|GoalPostToken
|
2350
|
+
|
2351
|
+
RESCUE_BODY=-[Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,]
|
2352
|
+
|
2353
|
+
RESCUE_OP=Op('rescue')|(KW('rescue')&-{:infix=>true})
|
2354
|
+
|
2355
|
+
RESCUE_KW=KW('rescue')&-{:infix=>nil}
|
2356
|
+
|
2357
|
+
inspect_constant_names
|
282
2358
|
|
283
2359
|
def RULES
|
2360
|
+
lower_op= lower_op()
|
2361
|
+
|
2362
|
+
[-[StartToken.lb, Expr.-, EoiToken.la]>>:accept,
|
2363
|
+
-[EoiToken]>>:error,
|
2364
|
+
]+
|
2365
|
+
|
284
2366
|
#these must be the lowest possible priority, and hence first in the rules list
|
285
2367
|
BEGIN2END.map{|_beg,_end|
|
286
|
-
-[KW(_beg), KW(
|
2368
|
+
-[KW(_beg), (KW(_beg)|KW(_end)).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
|
287
2369
|
}+
|
288
2370
|
|
289
2371
|
[
|
290
|
-
-[UNOP,
|
291
|
-
-[DEFOP, ParenedNode
|
292
|
-
-[Op(
|
2372
|
+
-[UNOP, Expr, lower_op]>>UnOpNode,
|
2373
|
+
-[DEFOP, ParenedNode]>>UnOpNode,
|
2374
|
+
-[Op(/^(?:unary|lhs|rhs)\*$/), ValueNode, lower_op]>>UnaryStarNode,
|
293
2375
|
|
294
|
-
-[Op('=',true)|KW(/^(rescue|when|\[)$/)
|
295
|
-
Op(
|
2376
|
+
-[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true),
|
2377
|
+
Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
|
296
2378
|
-[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
|
297
|
-
Op(
|
298
|
-
# -[KW('[')|-{:comma_type=>:call.reg|:array},
|
299
|
-
# Op('*@'), VarNameToken|ValueNode, Op('=',true).la]>>:shift,
|
2379
|
+
Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
|
300
2380
|
#star should not be used in an lhs if an rhs or param list context is available to eat it.
|
301
|
-
#(including param lists for keywords such as return,break,next,
|
2381
|
+
#(including param lists for keywords such as return,break,next,rescue,yield,when)
|
302
2382
|
|
303
|
-
|
304
|
-
-[
|
305
|
-
|
2383
|
+
#hmmm.... | in char classes below looks useless (predates GoalPostToken)
|
2384
|
+
-[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling *
|
2385
|
+
-[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
|
2386
|
+
stack_monkey("DanglingComma",1,DanglingCommaNode){|stack|
|
306
2387
|
dcomma=DanglingCommaNode.new
|
307
2388
|
dcomma.offset=stack.last.offset
|
308
2389
|
stack.push dcomma, stack.pop
|
309
2390
|
},
|
310
2391
|
|
311
|
-
-[
|
2392
|
+
-[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
|
312
2393
|
|
313
2394
|
#assignment
|
314
|
-
-[Lvalue, MODIFYASSIGNOP,
|
315
|
-
-[Lvalue, Op('=',true), AssignmentRhsNode,
|
316
|
-
-[Op('=',true).lb,
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
2395
|
+
-[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode,
|
2396
|
+
-[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode,
|
2397
|
+
-[Op('=',true).lb, Expr, lower_op]>>AssignmentRhsNode,
|
2398
|
+
|
2399
|
+
# a = b rescue c acts like a ternary,,,
|
2400
|
+
#provided that both a and b are not multiple and b
|
2401
|
+
#(if it is a parenless callsite) has just 1 param
|
2402
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
|
2403
|
+
Op('rescue3',true), Expr, lower_op]>>AssignNode,
|
2404
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
|
321
2405
|
Op('rescue3',true).la]>>:shift,
|
322
|
-
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:
|
323
|
-
|
2406
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
|
2407
|
+
RESCUE_OP.la] >>
|
2408
|
+
stack_monkey("rescue3",1,Op('rescue3',true)){|stack|
|
324
2409
|
resc=stack.last.dup
|
325
2410
|
resc.ident += '3'
|
326
2411
|
stack[-1]=resc
|
327
2412
|
},
|
328
|
-
# a = b rescue c acts like a ternary,,,
|
329
|
-
#provided that both a and b are not multiple and b
|
330
|
-
#(if it is a parenless callsite) has just 1 param
|
331
|
-
|
332
|
-
# -[Op('=',true), ~WITHCOMMAS, Op('rescue',true).la]>>:shift,
|
333
2413
|
#relative precedence of = and rescue are to be inverted if rescue
|
334
2414
|
#is to the right and assignment is not multiple.
|
335
2415
|
|
336
|
-
|
337
|
-
|
338
|
-
-[
|
2416
|
+
#if assignment rhs contains commas, don't reduce til they've been read
|
2417
|
+
#(unless we're already on an rhs)
|
2418
|
+
-[(Op('=',true)|Expr).~.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la]>>:shift,
|
2419
|
+
-[RHS_COMMA.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la ]>>AssignNode,
|
2420
|
+
-[ValueNode, LHS_COMMA, ValueNode, Op('=',true).la]>>CommaOpNode,
|
339
2421
|
#relative precedence of = and lhs/rhs , are to be inverted.
|
340
2422
|
|
341
|
-
-[KW(',')&-{:comma_type=>:lhs}, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
|
342
|
-
proc{|stack| stack[-3].after_comma=true}, #mebbe this should be a lexer hack
|
343
2423
|
#mark parentheses and unary stars that come after lhs commas
|
2424
|
+
-[LHS_COMMA, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
|
2425
|
+
stack_monkey("after_comma",3,(UnaryStarNode|ParenedNode)&-{:after_comma =>true}){|stack|
|
2426
|
+
stack[-3].after_comma=true},
|
2427
|
+
#mebbe this should be a lexer hack?
|
344
2428
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
-[(MethNameToken|FUNCLIKE_KEYWORD).~.lb, '(', ')']>>VarLikeNode, #alias for nil
|
2429
|
+
-[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
|
2430
|
+
'(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>ParenedNode,
|
2431
|
+
-[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
|
2432
|
+
'(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>VarLikeNode, #(), alias for nil
|
350
2433
|
|
351
|
-
|
352
|
-
|
2434
|
+
-[#(OPERATORLIKE_LB&~Op('=',true)).lb,
|
2435
|
+
Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode,
|
353
2436
|
|
354
2437
|
#dot and double-colon
|
355
|
-
-[DoubleColonOp,
|
356
|
-
-[
|
357
|
-
-[
|
358
|
-
-[
|
2438
|
+
-[DoubleColonOp, VarNode, lower_op]>>ConstantNode,#unary ::
|
2439
|
+
-[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary .
|
2440
|
+
-[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary ::
|
2441
|
+
-[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary ::
|
359
2442
|
|
360
|
-
-[
|
2443
|
+
-[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode,
|
361
2444
|
|
362
|
-
# -[Value, /^\.\.\.?$/, Value, LowerOp]>>RangeNode,
|
363
2445
|
|
364
|
-
-[MethNameToken, '(',
|
365
|
-
-[FUNCLIKE_KEYWORD, '(',
|
366
|
-
BlockNode.-, KW(/^(do|\{)$/).~.la]>>KWCallNode,
|
2446
|
+
-[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode,
|
2447
|
+
-[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
|
367
2448
|
|
368
|
-
-[ValueNode
|
2449
|
+
-[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode,
|
369
2450
|
|
370
|
-
-[dont_postpone_semi.lb,
|
371
|
-
|
2451
|
+
-[(OPERATORLIKE_LB&dont_postpone_semi).lb,
|
2452
|
+
Expr, ';', Expr, lower_op]>>SequenceNode,
|
372
2453
|
|
373
|
-
# -[Value, '=>', Value, LowerOp]>>ArrowOpNode,
|
374
2454
|
|
375
|
-
-[KW(')')
|
2455
|
+
-[#(OPERATORLIKE_LB&~KW(')')).lb,
|
2456
|
+
'{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode, #-40
|
376
2457
|
|
377
|
-
|
378
|
-
|
379
|
-
# -[KW(')').lb, '{', BlockFormalsNode.-, Value.-, '}']>>BlockNode,
|
380
|
-
-[KW(')').lb, 'do', BlockFormalsNode.-, Value.-, 'end']>>BlockNode,
|
2458
|
+
-[KW(')').lb, 'do', BlockFormalsNode.-, Expr.-, 'end']>>BlockNode,
|
2459
|
+
#this does {} as well... converted to do...end
|
381
2460
|
#rubylexer handles the 'low precedence' of do...end
|
382
2461
|
|
383
|
-
-[
|
2462
|
+
-[GoalPostToken, Expr.-, GoalPostToken]>>BlockFormalsNode,
|
384
2463
|
#rubylexer disambiguated operator vs keyword '|'
|
385
2464
|
|
386
|
-
-[/^(while|until)$/,
|
2465
|
+
-[/^(while|until)$/, Expr, /^([:;]|do)$/, Expr.-, 'end']>>LoopNode,
|
387
2466
|
|
388
|
-
-[/^(if|unless)$/,
|
389
|
-
|
2467
|
+
-[/^(if|unless)$/, Expr, /^(;|then|:)$/,
|
2468
|
+
Expr.-, ElsifNode.*, ElseNode.-, 'end'
|
390
2469
|
]>>IfNode,
|
391
2470
|
|
392
|
-
-['else',
|
2471
|
+
-['else', Expr.-, KW(/^(ensure|end)$/).la]>>ElseNode,
|
393
2472
|
|
394
|
-
-['elsif',
|
395
|
-
KW(/^
|
2473
|
+
-['elsif', Expr, /^(;|then|:)$/, Expr.-,
|
2474
|
+
KW(/^(end|else|elsif)$/).la
|
396
2475
|
]>>ElsifNode,
|
397
2476
|
|
398
|
-
|
399
|
-
#
|
400
|
-
# proc{|stack| #insert ; at end of module header if none was present
|
2477
|
+
# -['module', ConstantNode|VarNode, KW(/^(;|::)$/).~.la]>>
|
2478
|
+
# stack_monkey(1,KW(';')){|stack| #insert ; at end of module header if none was present
|
401
2479
|
# stack.push KeywordToken.new(';'), stack.pop
|
402
2480
|
# },
|
403
|
-
-['
|
404
|
-
-['class',
|
405
|
-
-['class', Op('
|
2481
|
+
-['module', ConstantNode|VarNode, ';', RESCUE_BODY, 'end']>>ModuleNode,
|
2482
|
+
-['class', Expr, ';', RESCUE_BODY, 'end']>>ClassNode,
|
2483
|
+
-['class', Expr, Op('<'), Expr, KW(';').~.la]>>:shift,
|
2484
|
+
-['class', Op('<<'), Expr, ';', RESCUE_BODY, 'end']>>MetaClassNode, #-30
|
406
2485
|
|
407
|
-
-['alias', BareMethod|
|
2486
|
+
-['alias', BareMethod|VarNode, BareMethod|VarNode]>>AliasNode,
|
408
2487
|
-['undef', BareMethod]>>UndefNode,
|
409
|
-
-[UndefNode, ',', BareMethod]>>UndefNode,
|
2488
|
+
-[UndefNode, Op(',',true), BareMethod]>>UndefNode,
|
410
2489
|
|
411
|
-
-['def', CallSiteNode, Op('=').-, KW(';'),
|
412
|
-
|
2490
|
+
-['def', CallSiteNode, Op('=').-, KW(';'), RESCUE_BODY,
|
2491
|
+
# Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
|
413
2492
|
'end'
|
414
2493
|
]>>MethodNode,
|
415
2494
|
|
416
|
-
-['begin',
|
417
|
-
|
2495
|
+
-['begin', RESCUE_BODY,
|
2496
|
+
# Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
|
418
2497
|
'end'
|
419
|
-
]>>
|
2498
|
+
]>>BeginNode,
|
420
2499
|
|
421
|
-
-[Op('=',true), BEGINAFTEREQUALS,
|
422
|
-
|
2500
|
+
-[Op('=',true), BEGINAFTEREQUALS, RESCUE_OP.la]>>
|
2501
|
+
stack_monkey("begin after equals",2,BEGINAFTEREQUALS_MARKED){ |stack| stack[-2].after_equals=true },
|
423
2502
|
#this is bs. all for an extra :begin in the parsetree
|
424
2503
|
|
425
|
-
-[(KW(/^(;|begin)$/)|
|
426
|
-
|
2504
|
+
-[(KW(/^(;|begin)$/)|RescueNode).lb, #ParenedNode|RescueOpNode|BeginNode used to be here too
|
2505
|
+
RESCUE_KW, KW('=>').-, Expr.-, /^([:;]|then)$/,
|
427
2506
|
]>>RescueHeaderNode,
|
428
|
-
-[ RescueHeaderNode,
|
2507
|
+
-[ RescueHeaderNode, Expr.-, KW(';').-, (KW(/^(else|ensure|end)$/)|RESCUE_KW).la
|
429
2508
|
]>>RescueNode,
|
430
2509
|
|
431
|
-
-['ensure',
|
2510
|
+
-['ensure', Expr.-, KW('end').la]>>EnsureNode,
|
432
2511
|
|
433
|
-
-['[',
|
2512
|
+
-['[', Expr.-, ']']>>ArrayLiteralNode, #-20
|
434
2513
|
|
435
|
-
-[
|
2514
|
+
-[Expr, '[', Expr.-, ']']>>BracketsGetNode,
|
436
2515
|
|
437
|
-
-[HereDocNode, StringToken
|
438
|
-
-[(StringToken|HereDocNode)
|
439
|
-
-[(StringToken|HereDocNode)
|
2516
|
+
-[HereDocNode, StringToken+1, StringToken.~.la]>>StringCatNode,
|
2517
|
+
-[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken+2, StringToken.~.la]>>StringCatNode,
|
2518
|
+
-[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
|
440
2519
|
|
441
|
-
-['case',
|
2520
|
+
-['case', Expr.-, KW(';').-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
|
442
2521
|
|
443
|
-
-['when',
|
2522
|
+
-['when', Expr, /^([:;]|then)$/, Expr.-,
|
444
2523
|
KW(/^(when|else|end)$/).la
|
445
2524
|
]>>WhenNode,
|
446
2525
|
|
447
|
-
-['for',
|
2526
|
+
-['for', Expr, 'in', Expr, /^([:;]|do)$/, Expr.-, 'end']>>ForNode,
|
448
2527
|
|
449
2528
|
#semicolon cleanup....
|
450
|
-
-[dont_postpone_semi.lb,
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
-[
|
455
|
-
|
456
|
-
-[dont_postpone_semi.lb, Value, ';', RescueNode
|
457
|
-
]>>proc{|stack| stack.delete_at -3 },
|
458
|
-
-[(KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|StartNode|RescueHeaderNode).lb, ';'
|
459
|
-
]>>proc{|stack| stack.delete_at -2 },
|
2529
|
+
-[(OPERATORLIKE_LB&dont_postpone_semi).lb,Expr, ';', IGN_SEMI_BEFORE.la] \
|
2530
|
+
>>delete_monkey(2,"semi_cleanup_before_ISB"),
|
2531
|
+
-[Expr, ';', KW('then').la] >>delete_monkey(2,"semi_cleanup_before_then"),
|
2532
|
+
-[dont_postpone_semi.lb, Expr, ';', RescueNode] >>delete_monkey(3,"semi_cleanup_before_rescue"), #-10
|
2533
|
+
-[IGN_SEMI_AFTER.lb, ';'] >>delete_monkey(2,"semi_cleanup_after_oplike"),
|
2534
|
+
-[(StartToken|RescueHeaderNode).lb, ';' ] >>delete_monkey(2,"semi_cleanup_after_rescue"),
|
460
2535
|
#this rule is somewhat more forgiving than matz' parser...
|
461
2536
|
#not all semicolons after :, (, and { keywords should
|
462
2537
|
#be ignored. some should cause syntax errors.
|
463
2538
|
|
464
2539
|
|
465
2540
|
#comma cleanup....
|
466
|
-
-[
|
2541
|
+
-[Op(/,$/,true), KW(/^([}\]])$/).la] >>delete_monkey(2, "comma_cleanup"),
|
467
2542
|
#likewise, this is somewhat too forgiving.
|
468
2543
|
#some commas before } or ] should cause syntax errors
|
469
2544
|
|
470
|
-
#multiple assignment.... (handled in a subsequent stage?)
|
471
|
-
#(cause it requires that the relative priorities of = and , be reversed!)
|
472
|
-
|
473
|
-
|
474
2545
|
#turn lvalues into rvalues if not followed by an assignop
|
475
|
-
-[-{:lvalue
|
2546
|
+
-[-{:lvalue =>true}, (Op('=',true)|MODIFYASSIGNOP|LHS_COMMA).~.la]>>
|
2547
|
+
stack_monkey("lval2rval",2,-{:lvalue =>nil}){|stack|
|
2548
|
+
stack[-2].lvalue=nil
|
2549
|
+
},
|
476
2550
|
|
477
2551
|
#expand the = into a separate token in calls to settors (after . or ::).
|
478
2552
|
#but not in method headers
|
479
|
-
-[KW('def')
|
480
|
-
(MethNameToken&-{:
|
481
|
-
|
2553
|
+
-[(OPERATORLIKE_LB&~KW('def')).lb, Expr, DotOp|DoubleColonOp,
|
2554
|
+
(MethNameToken&-{:has_equals=>true}).la]>>
|
2555
|
+
stack_monkey("expand_equals",1,CallNode){|stack|
|
482
2556
|
methname=stack.pop
|
483
2557
|
methname.ident.chomp!('=')
|
484
2558
|
offset=methname.offset+methname.ident.size
|
@@ -493,48 +2567,63 @@ class RedParse
|
|
493
2567
|
#lexer does the wrong thing with -22**44.5, making the - part
|
494
2568
|
#of the first number token. it's actually lower precedence than
|
495
2569
|
#**... this rule fixes that problem.
|
496
|
-
-[
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
stack[-2]
|
502
|
-
|
2570
|
+
#in theory, unary - is lower precedence than ., ::, and [] as well, but
|
2571
|
+
#that appears not to apply to unary - in numeric tokens
|
2572
|
+
-[NumberToken&-{:negative=>true}, Op('**').la]>>
|
2573
|
+
stack_monkey("fix_neg_exp",2,Op("-@",true)){|stack|
|
2574
|
+
#neg_op.unary=true
|
2575
|
+
num=stack[-2]
|
2576
|
+
op=OperatorToken.new("-@",num.offset)
|
2577
|
+
# op.startline=num.startline
|
2578
|
+
stack[-2,0]=op
|
2579
|
+
num.ident.sub!(/\A-/,'')
|
2580
|
+
num.offset+=1
|
503
2581
|
},
|
504
2582
|
|
505
2583
|
#treat these keywords like (rvalue) variables.
|
506
|
-
-[
|
2584
|
+
-[RubyLexer::VARLIKE_KEYWORDS]>>VarLikeNode,
|
507
2585
|
|
508
2586
|
#here docs
|
509
2587
|
-[HerePlaceholderToken]>>HereDocNode,
|
510
|
-
-[HereBodyToken]>>
|
511
|
-
|
512
|
-
|
2588
|
+
-[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"),
|
2589
|
+
##this is rediculous. this should be a lexer hack?
|
2590
|
+
|
2591
|
+
-[VarNameToken]>>VarNode,
|
2592
|
+
|
513
2593
|
|
514
2594
|
]
|
515
2595
|
end
|
516
2596
|
|
2597
|
+
if defined? END_ATTACK
|
2598
|
+
module Reducer; end
|
2599
|
+
include Reducer
|
2600
|
+
end
|
517
2601
|
|
518
|
-
|
519
|
-
|
2602
|
+
def initialize(input,name="(eval)",line=1,lvars=[],options={:rubyversion=>1.8})
|
2603
|
+
@rubyversion=options[:rubyversion]
|
520
2604
|
if Array===input
|
521
2605
|
def input.get1token; shift end
|
522
2606
|
@lexer=input
|
523
2607
|
else
|
524
|
-
@lexer=RubyLexer.new(name,input,line)
|
2608
|
+
@lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion)
|
525
2609
|
lvars.each{|lvar| @lexer.localvars[lvar]=true }
|
526
2610
|
end
|
527
2611
|
@filename=name
|
528
2612
|
@min_sizes={}
|
529
2613
|
@compiled_rules={}
|
530
2614
|
@moretokens=[]
|
531
|
-
@unary_or_binary_op=/^[
|
532
|
-
@rules=self.
|
2615
|
+
@unary_or_binary_op=/^[-+]$/
|
2616
|
+
# @rules=self.expaneded_RULES
|
533
2617
|
@precedence=self.PRECEDENCE
|
534
2618
|
@RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
|
2619
|
+
if defined? END_ATTACK
|
2620
|
+
compile
|
2621
|
+
end
|
2622
|
+
@saw_item_that=nil
|
535
2623
|
end
|
536
2624
|
|
537
2625
|
attr_accessor :lexer
|
2626
|
+
attr :rubyversion
|
538
2627
|
|
539
2628
|
def get_token(recursing=false)
|
540
2629
|
unless @moretokens.empty?
|
@@ -543,78 +2632,60 @@ class RedParse
|
|
543
2632
|
return @last_token
|
544
2633
|
end
|
545
2634
|
|
2635
|
+
rpt=ENV['RAW_PRINT_TOKENS']
|
546
2636
|
begin
|
547
2637
|
result=@lexer.get1token or break
|
548
|
-
p result if
|
2638
|
+
p result if rpt
|
549
2639
|
|
550
|
-
#set token's line
|
551
|
-
result.
|
2640
|
+
#set token's line
|
2641
|
+
result.startline= @endline||=1
|
2642
|
+
result.endline||=@endline if result.respond_to? :endline=
|
552
2643
|
|
553
2644
|
if result.respond_to?(:as) and as=result.as
|
554
|
-
result=
|
555
|
-
result.
|
2645
|
+
#result=make_kw(as,result.offset)
|
2646
|
+
#result.originally=result.ident
|
2647
|
+
if OperatorToken===result #or KeywordToken===result
|
2648
|
+
result=result.dup
|
2649
|
+
result.ident=as
|
2650
|
+
else
|
2651
|
+
result=make_kw(as,result.offset)
|
2652
|
+
end
|
2653
|
+
result.not_real! if result.respond_to? :not_real!
|
556
2654
|
else
|
557
2655
|
|
558
2656
|
case result
|
559
|
-
|
560
|
-
when ImplicitParamListStartToken: #treat it like (
|
561
|
-
result=KeywordToken.new('(', result.offset)
|
562
|
-
result.not_real!
|
563
|
-
#=end
|
564
|
-
#=begin
|
565
|
-
when ImplicitParamListEndToken:
|
566
|
-
result=KeywordToken.new(')', result.offset)
|
567
|
-
result.not_real!
|
568
|
-
#=end
|
569
|
-
# when AssignmentRhsListStartToken, AssignmentRhsListEndToken:
|
570
|
-
#do nothing, pass it thru
|
571
|
-
#=begin
|
572
|
-
when NewlineToken:
|
573
|
-
result=KeywordToken.new(';',result.offset)
|
574
|
-
#=end
|
575
|
-
when FileAndLineToken: #so __FILE__ and __LINE__ can know what their values are
|
2657
|
+
when FileAndLineToken #so __FILE__ and __LINE__ can know what their values are
|
576
2658
|
@file=result.file
|
577
|
-
@
|
2659
|
+
@endline=result.line
|
578
2660
|
redo
|
579
|
-
when NoWsToken:
|
580
|
-
#rubylexer disambiguates array literal from
|
581
|
-
#call to [] or []= method with a preceding NoWsToken...
|
582
|
-
#kind of a dumb interface.
|
583
|
-
result=get_token(true)
|
584
|
-
result.ident=='[' and result.extend BracketsCall
|
585
|
-
|
586
2661
|
|
587
|
-
when OperatorToken
|
588
|
-
if @unary_or_binary_op===result.ident and result.unary
|
2662
|
+
when OperatorToken
|
2663
|
+
if @unary_or_binary_op===result.ident and result.unary || result.tag==:unary
|
589
2664
|
result=result.dup
|
590
2665
|
result.ident+="@"
|
591
2666
|
end
|
592
2667
|
|
593
2668
|
#more symbol table maintenance....
|
594
|
-
when KeywordToken
|
2669
|
+
when KeywordToken
|
595
2670
|
case name=result.ident
|
596
2671
|
|
597
|
-
#=begin
|
598
|
-
when "do":
|
599
|
-
if result.has_end?
|
600
|
-
else
|
601
|
-
result=KeywordToken.new(';',result.offset)
|
602
|
-
end
|
603
|
-
#=end
|
604
2672
|
when /^(#{BINOP_KEYWORDS.join '|'})$/: #should be like this in rubylexer
|
605
2673
|
result=OperatorToken.new(name,result.offset) unless result.has_end?
|
606
|
-
when "|"
|
607
|
-
when "__FILE__"
|
2674
|
+
when "|"; result=GoalPostToken.new(result.offset) #is this needed still?
|
2675
|
+
when "__FILE__"; #I wish rubylexer would handle this
|
608
2676
|
class<<result; attr_accessor :value; end
|
609
2677
|
result.value=@file.dup
|
610
|
-
when "__LINE__"
|
2678
|
+
when "__LINE__"; #I wish rubylexer would handle this
|
611
2679
|
class<<result; attr_accessor :value; end
|
612
|
-
result.value=@
|
2680
|
+
result.value=@endline
|
2681
|
+
else
|
2682
|
+
result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS
|
2683
|
+
#warning, this may discard information stored in instance vars of result
|
613
2684
|
end
|
614
2685
|
|
615
|
-
when EoiToken
|
616
|
-
when HereBodyToken
|
617
|
-
when IgnoreToken
|
2686
|
+
when EoiToken; break
|
2687
|
+
when HereBodyToken; break
|
2688
|
+
when IgnoreToken; redo
|
618
2689
|
end
|
619
2690
|
end
|
620
2691
|
end while false
|
@@ -622,251 +2693,16 @@ class RedParse
|
|
622
2693
|
return @last_token=result
|
623
2694
|
end
|
624
2695
|
|
625
|
-
def
|
626
|
-
|
627
|
-
if false
|
628
|
-
rule=rule.dup
|
629
|
-
lookahead_processor=(rule.pop if Proc===rule.last)
|
630
|
-
node_type=rule.pop
|
631
|
-
else
|
632
|
-
Reg::Transform===rule or fail
|
633
|
-
node_type= rule.right
|
634
|
-
rule=rule.left.subregs.dup
|
635
|
-
lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
|
636
|
-
lookback=rule[0]=rule[0].regs(0) if ::Reg::LookBack===rule[0]
|
637
|
-
end
|
638
|
-
|
639
|
-
#index of data at which to start matching
|
640
|
-
i=@stack.size-1 #-1 because last element of @stack is always lookahead
|
641
|
-
|
642
|
-
#I could call this a JIT compiler, but that's a bit grandiose....
|
643
|
-
#more of a JIT pre-processor
|
644
|
-
compiled_rule=@compiled_rules[rule]||=
|
645
|
-
rule.map{|pattern|
|
646
|
-
String|Regexp===pattern ? KW(pattern) : pattern
|
647
|
-
}
|
648
|
-
|
649
|
-
#what's the minimum @stack size this rule could match?
|
650
|
-
rule_min_size=@min_sizes[compiled_rule]||=
|
651
|
-
compiled_rule.inject(0){|sum,pattern|
|
652
|
-
sum + pattern.itemrange.begin
|
653
|
-
}
|
654
|
-
i>=rule_min_size or return false
|
655
|
-
|
656
|
-
matching=[]
|
657
|
-
|
658
|
-
#actually try to match rule elements against each @stack element in turn
|
659
|
-
compiled_rule.reverse_each{|matcher|
|
660
|
-
i.zero? and fail
|
661
|
-
target=matching
|
662
|
-
#is this matcher optional? looping?
|
663
|
-
loop= matcher.itemrange.last.to_f.infinite?
|
664
|
-
optional=matcher.itemrange.first.zero?
|
665
|
-
matching.unshift target=[] if loop
|
666
|
-
if loop or optional
|
667
|
-
matcher=matcher.regs(0)
|
668
|
-
end
|
669
|
-
|
670
|
-
begin
|
671
|
-
if matcher===@stack[i-=1] #try match
|
672
|
-
target.unshift @stack[i]
|
673
|
-
else
|
674
|
-
#if match failed, the whole rule fails
|
675
|
-
#unless this match was optional, in which case, ignore it
|
676
|
-
#but bump the data position back up, since the latest datum
|
677
|
-
#didn't actually match anything.
|
678
|
-
return false unless optional or loop&&!target.empty?
|
679
|
-
i+=1
|
680
|
-
matching.unshift nil unless loop
|
681
|
-
break
|
682
|
-
end
|
683
|
-
end while loop
|
684
|
-
}
|
685
|
-
|
686
|
-
matchrange= i...-1 #what elems in @stack were matched?
|
687
|
-
|
688
|
-
#give lookahead matcher (if any) a chance to fail the match
|
689
|
-
case lookahead_processor
|
690
|
-
when ::Reg::LookAhead:
|
691
|
-
return false unless lookahead_processor.regs(0)===@stack.last
|
692
|
-
when Proc:
|
693
|
-
return false unless lookahead_processor[self,@stack.last]
|
694
|
-
end
|
695
|
-
|
696
|
-
#if there was a lookback item, don't include it in the new node
|
697
|
-
if lookback
|
698
|
-
matchrange= i+1...-1 #what elems in @stack were matched?
|
699
|
-
matching.shift
|
700
|
-
end
|
701
|
-
|
702
|
-
#replace matching elements in @stack with node type found
|
703
|
-
case node_type
|
704
|
-
when Class
|
705
|
-
node=node_type.new(*matching)
|
706
|
-
node.line=@line
|
707
|
-
@stack[matchrange]=[node]
|
708
|
-
when Proc; node_type[@stack]
|
709
|
-
when :shift; return 0
|
710
|
-
else fail
|
711
|
-
end
|
712
|
-
|
713
|
-
return true #let caller know we found a match
|
714
|
-
|
715
|
-
|
716
|
-
rescue Exception=>e
|
717
|
-
puts "error (#{e}) while executing rule: #{rule.inspect}"
|
718
|
-
puts e.backtrace.join("\n")
|
719
|
-
raise
|
720
|
-
end
|
721
|
-
|
722
|
-
class ParseError<RuntimeError
|
723
|
-
def initialize(msg,stack)
|
724
|
-
super(msg)
|
725
|
-
@stack=stack
|
726
|
-
if false
|
727
|
-
ranges=(1..stack.size-2).map{|i|
|
728
|
-
node=stack[i]
|
729
|
-
if node.respond_to? :linerange
|
730
|
-
node.linerange
|
731
|
-
elsif node.respond_to? :line
|
732
|
-
node.line..node.line
|
733
|
-
end
|
734
|
-
}
|
735
|
-
types=(1..stack.size-2).map{|i| stack[i].class }
|
736
|
-
msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
|
737
|
-
end
|
738
|
-
super(msg)
|
739
|
-
end
|
740
|
-
attr :stack
|
741
|
-
end
|
742
|
-
|
743
|
-
def [](*args)
|
744
|
-
@stack.[] *args
|
745
|
-
end
|
746
|
-
|
747
|
-
def []=(*args)
|
748
|
-
@stack.[]= *args
|
2696
|
+
def unget_tokens(*tokens)
|
2697
|
+
@moretokens=tokens.concat @moretokens
|
749
2698
|
end
|
750
2699
|
|
751
|
-
def
|
752
|
-
|
753
|
-
Thread.current[:$RedParse_parser]||=self
|
754
|
-
|
755
|
-
@stack=[StartNode.new, get_token]
|
756
|
-
#last token on @stack is always implicitly the lookahead
|
757
|
-
loop {
|
758
|
-
#try all possible reductions
|
759
|
-
shift=nil
|
760
|
-
@rules.reverse_each{|rule|
|
761
|
-
shift=evaluate(rule) and break
|
762
|
-
}
|
763
|
-
next if shift==true
|
764
|
-
|
765
|
-
#no rule can match current @stack, get another token
|
766
|
-
tok=get_token
|
767
|
-
|
768
|
-
#are we done yet?
|
769
|
-
tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
|
770
|
-
|
771
|
-
#shift our token onto the @stack
|
772
|
-
@stack.push tok
|
773
|
-
}
|
774
|
-
|
775
|
-
@stack.size==2 and return NopNode.new #handle empty parse string
|
776
|
-
|
777
|
-
#unless the @stack is 3 tokens,
|
778
|
-
#with the last an Eoi, and first a StartNode
|
779
|
-
#there was a parse error
|
780
|
-
unless @stack.size==3
|
781
|
-
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
782
|
-
top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
|
783
|
-
raise ParseError.new(top.msg,@stack)
|
784
|
-
end
|
785
|
-
EoiToken===@stack.last or fail
|
786
|
-
StartNode===@stack.first or fail
|
787
|
-
|
788
|
-
result= @stack[1]
|
789
|
-
|
790
|
-
|
791
|
-
#multiple assignment must be resolved
|
792
|
-
#afterwards by walking the parse tree.
|
793
|
-
#(because the relative precedences of = and ,
|
794
|
-
#are reversed in multiple assignment.)
|
795
|
-
# result.respond_to? :fixup_multiple_assignments! and
|
796
|
-
# result=result.fixup_multiple_assignments!
|
797
|
-
|
798
|
-
#relative precedence of = and rescue are also inverted sometimes
|
799
|
-
# result.respond_to? :fixup_rescue_assignments! and
|
800
|
-
# result=result.fixup_rescue_assignments!
|
801
|
-
|
802
|
-
#do something with error nodes
|
803
|
-
msgs=[]
|
804
|
-
result.walk{|parent,i,subi,node|
|
805
|
-
not if ErrorNode===node
|
806
|
-
msgs<< @filename+":"+node.blame.msg
|
807
|
-
end
|
808
|
-
} if result.respond_to? :walk #hack hack
|
809
|
-
result.errors=msgs unless msgs.empty?
|
810
|
-
#other types of errors (lexer errors, exceptions in lexer or parser actions)
|
811
|
-
#should be handled in the same way, but currently are not
|
812
|
-
# puts msgs.join("\n")
|
813
|
-
|
814
|
-
rescue Exception=>e
|
815
|
-
# input=@filename
|
816
|
-
# if input=="(eval)"
|
817
|
-
input=@lexer
|
818
|
-
if Array===input
|
819
|
-
puts "error while parsing:"
|
820
|
-
pp input
|
821
|
-
input=nil
|
822
|
-
else
|
823
|
-
input=input.original_file
|
824
|
-
inputname=@lexer.filename
|
825
|
-
input.to_s.size>1000 and input=inputname
|
826
|
-
end
|
827
|
-
# end
|
828
|
-
puts "error while parsing: <<< #{input} >>>"
|
829
|
-
raise
|
830
|
-
else
|
831
|
-
unless msgs.empty?
|
832
|
-
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
833
|
-
raise RedParse::ParseError.new(msgs.join("\n"),@stack)
|
834
|
-
end
|
835
|
-
|
836
|
-
return result
|
837
|
-
ensure
|
838
|
-
Thread.current[:$RedParse_parser]=oldparser
|
2700
|
+
def unget_token(token)
|
2701
|
+
@moretokens.unshift token
|
839
2702
|
end
|
840
2703
|
|
841
|
-
def LEFT_MATCHERS;self.RULES.map{|r| r.left.subregs }.flatten; end
|
842
|
-
def STACKABLE_CLASSES
|
843
|
-
|
844
|
-
|
845
|
-
_LEFT_MATCHERS.map!{|m|
|
846
|
-
case m
|
847
|
-
when Reg::LookAhead,Reg::LookBack: m.regs(0)
|
848
|
-
else m
|
849
|
-
end
|
850
|
-
} #remove lookahead and lookback decoration
|
851
|
-
rule_juicer=proc{|m|
|
852
|
-
case m
|
853
|
-
when Class: m
|
854
|
-
when Reg::And: m.subregs.map &rule_juicer
|
855
|
-
when Reg::Or: m.subregs.map &rule_juicer
|
856
|
-
else #fukit
|
857
|
-
end
|
858
|
-
}
|
859
|
-
_LEFT_CLASSES=_LEFT_MATCHERS.map{|m| rule_juicer[m] }.flatten.compact
|
860
|
-
_RIGHT_CLASSES= self.RULES.map{|r| r.right }.grep(Class) #classes in productions
|
861
|
-
_LEFT_CLASSES+_RIGHT_CLASSES
|
862
|
-
end
|
863
2704
|
=begin
|
864
|
-
|
865
|
-
|
866
|
-
LOOKAHEAD_MATCHERS=self.RULES.map{|r| r.left.subregs.last }.map{|la| Reg::LookAhead===la and la.regs(0) }
|
867
|
-
|
868
|
-
LOOKAHEAD_CLASSES=LOOKAHEAD_MATCHERS.map(&rule_juicer)
|
869
|
-
LOOKAHEAD_CLASSES.each_with_index{|classes,i|
|
2705
|
+
self.LOOKAHEAD_CLASSES.each_with_index{|classes,i|
|
870
2706
|
case classes
|
871
2707
|
when Class: huh
|
872
2708
|
when Array: classes.flatten.each{huh}
|
@@ -874,6 +2710,7 @@ end
|
|
874
2710
|
end
|
875
2711
|
}
|
876
2712
|
=end
|
2713
|
+
|
877
2714
|
# def fixup_multiple_assignments!; end
|
878
2715
|
end
|
879
2716
|
|
@@ -922,16 +2759,16 @@ if __FILE__==$0
|
|
922
2759
|
quiet=true
|
923
2760
|
while /^-/===ARGV.first
|
924
2761
|
case opt=ARGV.shift
|
925
|
-
when "--"
|
926
|
-
when "--pp"
|
927
|
-
when "--lisp"
|
928
|
-
when "--parsetree"
|
929
|
-
when "--vsparsetree"
|
930
|
-
when "--vsparsetree2"
|
931
|
-
when "--update-problemfiles"
|
932
|
-
when "-q"
|
933
|
-
when "-v"
|
934
|
-
when "-e"
|
2762
|
+
when "--"; break
|
2763
|
+
when "--pp"; output=:pp
|
2764
|
+
when "--lisp"; output=:lisp
|
2765
|
+
when "--parsetree"; output=:parsetree
|
2766
|
+
when "--vsparsetree"; output=:vsparsetree
|
2767
|
+
when "--vsparsetree2"; output=:vsparsetree2
|
2768
|
+
when "--update-problemfiles"; problemfiles=ProblemFiles.new
|
2769
|
+
when "-q"; quiet=true
|
2770
|
+
when "-v"; quiet=false
|
2771
|
+
when "-e"; inputs=[ARGV.join(" ")]; names=["-e"]; break
|
935
2772
|
else fail "unknown option: #{opt}"
|
936
2773
|
|
937
2774
|
end
|
@@ -1060,24 +2897,121 @@ if __FILE__==$0
|
|
1060
2897
|
exit result
|
1061
2898
|
end
|
1062
2899
|
|
1063
|
-
=begin todo:
|
2900
|
+
=begin old todo:
|
1064
2901
|
v merge DotCallNode and CallSiteNode and CallWithBlockNode
|
1065
|
-
remove actual Tokens from parse tree...
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
|
1070
|
-
-should not appear in final output
|
2902
|
+
v remove actual Tokens from parse tree...
|
2903
|
+
v split ParenedNode into ParenedNode + Rescue/EnsureNode
|
2904
|
+
x 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
|
2905
|
+
x -should not appear in final output
|
1071
2906
|
v split keywordopnode into loop and if varieties?
|
1072
2907
|
=end
|
1073
2908
|
|
1074
|
-
=begin optimization opportunities
|
1075
|
-
top of stack slot contains mostly keywords, specific node classes, and
|
1076
|
-
lookahead slot contains mostly
|
1077
|
-
-(
|
1078
|
-
if top of stack matcher is
|
2909
|
+
=begin old optimization opportunities:, ha!
|
2910
|
+
top of stack slot contains mostly keywords, specific node classes, and Expr
|
2911
|
+
lookahead slot contains mostly lower_op and keywords, with a few classes and inverted keywords
|
2912
|
+
-(lower_op is hard to optimize)
|
2913
|
+
if top of stack matcher is Expr, then the next matcher down is mostly keywords, with some operators
|
1079
2914
|
class membership can be optimized to test of integer within a range
|
1080
2915
|
keywords could be stored as symbols instead of strings
|
1081
2916
|
a few rules may need exploding (eg, ensure) to spoon feed the optimizer
|
1082
2917
|
make all Nodes descendants of Array
|
1083
2918
|
=end
|
2919
|
+
|
2920
|
+
#todo:
|
2921
|
+
#each node should have a corresponding range of tokens
|
2922
|
+
#-in an (optional) array of all tokens printed by the tokenizer.
|
2923
|
+
#v test stack_monkey mods
|
2924
|
+
#v break ParenedNode into 2 (3?) classes
|
2925
|
+
#x invent BEGINNode/ENDNode? (what other keywords?)
|
2926
|
+
#v at least make BEGIN/END be KWCallNode
|
2927
|
+
#v replace VarNameToken with VarNode in parser
|
2928
|
+
#x convert raw rules to lists of vertex identities?
|
2929
|
+
#v DottedRule class
|
2930
|
+
#v ParserState class (set of DottedRules)
|
2931
|
+
#v MultiReduce
|
2932
|
+
#v MultiShift
|
2933
|
+
#v ParserState#evolve(identity)
|
2934
|
+
#v DottedRule#evolve(identity)
|
2935
|
+
#v RedParse#enumerate_states
|
2936
|
+
#v RedParse#enumerate_exemplars
|
2937
|
+
#v Node/Token.enumerate_exemplars
|
2938
|
+
#v Node/Token.identity_param
|
2939
|
+
#v rename #lvalue? => #lvalue
|
2940
|
+
#x likewise get rid of other oddly named identity params
|
2941
|
+
#v BareMethod,WITHCOMMAS,BEGINAFTEREQUALS should have predicate methods defined for them
|
2942
|
+
#v do something about BEGINAFTEREQUALS... lots predicates, ugly to identify
|
2943
|
+
#v document identity parameters in nodes and tokens
|
2944
|
+
#operator and keyword tokens have some identity_param variations remaining...maybe?
|
2945
|
+
#xx all identity readers have to have writers as well (even if fake)
|
2946
|
+
#v sort out vertex identities... call identity_param in apt classes
|
2947
|
+
#convert identities<=>small ints
|
2948
|
+
#convert ParserStates<=>small ints
|
2949
|
+
#> lower_op/proc lookahead requires special action type with shift and reduce branches
|
2950
|
+
#x stack monkeys dictate some nodes appear in s/r table... which ones?
|
2951
|
+
#x some stack monkeys pushback nodes, action table must take take those as input
|
2952
|
+
#v retype GoalPostNode => GoalPostToken
|
2953
|
+
#v then, pushback* should go away
|
2954
|
+
#v build shift/reduce table
|
2955
|
+
#v build goto table
|
2956
|
+
#split tables into shift/reduce and goto....?
|
2957
|
+
#v integrate with c code generator
|
2958
|
+
#finish c code generator
|
2959
|
+
#code generator needs a way to deal with :
|
2960
|
+
#backtracking (to more than 1 node/token???)
|
2961
|
+
#actions (stack monkeys/lower_op)
|
2962
|
+
#every reduce requires b/ting thru the lookahead
|
2963
|
+
#garbage collection
|
2964
|
+
#sharing ruby objects between ruby code and generated c code
|
2965
|
+
#optimizer?
|
2966
|
+
#ruby code generator?
|
2967
|
+
#v what to do with :shift ?
|
2968
|
+
#what to do with :accept ?
|
2969
|
+
#what to do with :error ?
|
2970
|
+
#Node.create (used in generated code)
|
2971
|
+
#Node.create <= takes input directly from semantic stack
|
2972
|
+
#build Node.create param list generator
|
2973
|
+
#v names for rules, dotted rules, parser states, identities
|
2974
|
+
#x StartNode may be a problem... used by a stack monkey,
|
2975
|
+
#to remove extra ;s from the very beginning of input.
|
2976
|
+
#use a lexer hack instead?
|
2977
|
+
#v convert StartNode to StartToken?
|
2978
|
+
#convert names to numbers and numbers to names
|
2979
|
+
#for states, rules, vertex identities
|
2980
|
+
#in ruby and c (??)
|
2981
|
+
#x rule for HereBodyToken should be a lexer hack?
|
2982
|
+
#v stack monkeys should have names
|
2983
|
+
#how to handle a stack monkey whose 2nd parameter is not a single identity?
|
2984
|
+
#even reduces may not have enough info since 1 node class may have multiple identities
|
2985
|
+
#v RedParse constants should be named in inspect
|
2986
|
+
#v toplevel rule?
|
2987
|
+
#v semantic stack in generated c code should be a ruby array
|
2988
|
+
#x state stack should keep size of semantic stack at the time states are pushed,
|
2989
|
+
#so that i can restore semantic stack to former state when b-ting/reducing
|
2990
|
+
#urk, how do I know how many levels of state stack to pop when reducing?
|
2991
|
+
#in looping error rules, just scan back in semantic stack for rule start
|
2992
|
+
#in regular looping rules, transition to loop state is saved on a special stack
|
2993
|
+
#so that at reduce time, we can b/t to that point for a start
|
2994
|
+
#if rule contains only scalars, b/t is easy
|
2995
|
+
#else rule contains scalars and optionals:
|
2996
|
+
#scan for rule start vertex starting at highest node
|
2997
|
+
#on semantic stack that can contain it and working downward.
|
2998
|
+
#also, statically verify that relevent rules contain no collisions among first (how many?) matchers
|
2999
|
+
|
3000
|
+
#is lookahead in code generator even useful? my tables have built-in lookahead....
|
3001
|
+
#need hack to declare nonerror looping matchers as irrevokable (for speed, when reducing)
|
3002
|
+
#v assignmentRhsNode needs an identity_param for with_commas
|
3003
|
+
#v -** fixup and setter breakout rules need dedicated identity_params too
|
3004
|
+
# = rescue ternary is broken again now...
|
3005
|
+
#v instead of shift states and is_shift_state? to find them,
|
3006
|
+
#v i should have shift transitions. (transitions that imply a shift... in response to a token input.)
|
3007
|
+
#v all states will have 2 entry points, for shift and nonshift transitions.
|
3008
|
+
#split big table into goto(node) and sr(token) tables
|
3009
|
+
#in each state, most common sr action should be made default
|
3010
|
+
#unused entries in goto table can be ignored.
|
3011
|
+
#most common goto entries (if any) can be default.
|
3012
|
+
#is the change_index arg in stack_monkey calls really correct everywhere? what are
|
3013
|
+
#the exact semantics of that argument? what about stack_monkeys that change the stack size?
|
3014
|
+
#should there be another arg to keep track of that?
|
3015
|
+
#maybe rewrite stack_monkeys so they're a little clearer and easier to analyze (by hand)
|
3016
|
+
#MultiShift/MultiReduce are not supported actions in generate.rb
|
3017
|
+
#:accept/:error are not supported actions in generate.rb
|