redparse 0.8.3 → 0.8.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,172 @@
1
+ require 'digest/sha2'
2
+ class RedParse
3
+ class Cache
4
+ def initialize *params
5
+ @callersfile=Digest::SHA2.hexdigest params.join(',')
6
+ @homedir=find_home+"/.redparse/"
7
+ Dir.mkdir @homedir unless File.exist? @homedir
8
+ Dir.mkdir cachedir unless File.exist? cachedir
9
+ saved_digest= File.open(@homedir+"/parserdigest","rb"){|fd| fd.read.chomp } if File.exist?(@homedir+"/parserdigest")
10
+ actual_digest= @@saved_parser_digest ||= redparse_rb_hexdigest
11
+ if saved_digest!=actual_digest
12
+ File.unlink(*all_entry_files) #flush cache
13
+ File.open(@homedir+"/parserdigest","wb"){|fd| fd.puts actual_digest } #update saved digest
14
+ end
15
+ retire_old_entries
16
+ end
17
+
18
+ def cachedir
19
+ @homedir+@callersfile+"/"
20
+ end
21
+
22
+ def entry_files
23
+ Dir[cachedir+"*"]
24
+ end
25
+
26
+ def all_entry_files
27
+ Dir[@homedir+"*"].select{|fn|
28
+ File.directory? fn
29
+ }.map{|dirname|
30
+ Dir[dirname+"/*"]
31
+ }.flatten
32
+ end
33
+
34
+ def retire_old_entries
35
+ size=max_size||10_000_000
36
+ files=entry_files
37
+ total=files.inject(0){|sum,fn| sum+File.size(fn) }
38
+ if total>size
39
+ files=files.sort_by{|fn| File::mtime(fn)}
40
+ while total>size
41
+ f=files.shift
42
+ total-=File.size(f)
43
+ File.unlink(f)
44
+ end
45
+ end
46
+ end
47
+
48
+ def redparse_rb_hexdigest
49
+ full_name=nil
50
+ $:.find{|dir| File.exist? full_name=dir+"/redparse.rb"}
51
+ File.open(full_name,"rb"){|fd| hexdigest_of_file fd }
52
+ end
53
+
54
+ def hexdigest_of_file fd
55
+ sha2=Digest::SHA2.new
56
+ fd.rewind
57
+ while chunk=fd.read(4096)
58
+ sha2.update chunk
59
+ end
60
+ fd.rewind
61
+ return sha2.hexdigest
62
+ end
63
+
64
+ def max_size
65
+ File.open(@homedir+"/size"){|fd| fd.read.chomp.to_i } rescue nil
66
+ end
67
+
68
+ ##
69
+ # Finds the user's home directory.
70
+ #--
71
+ # Some comments from the ruby-talk list regarding finding the home
72
+ # directory:
73
+ #
74
+ # I have HOME, USERPROFILE and HOMEDRIVE + HOMEPATH. Ruby seems
75
+ # to be depending on HOME in those code samples. I propose that
76
+ # it should fallback to USERPROFILE and HOMEDRIVE + HOMEPATH (at
77
+ # least on Win32).
78
+ #(originally stolen from rubygems)
79
+ def find_home
80
+ ['HOME', 'USERPROFILE'].each do |homekey|
81
+ return ENV[homekey] if ENV[homekey]
82
+ end
83
+
84
+ if ENV['HOMEDRIVE'] && ENV['HOMEPATH'] then
85
+ return "#{ENV['HOMEDRIVE']}#{ENV['HOMEPATH']}"
86
+ end
87
+
88
+ begin
89
+ File.expand_path("~")
90
+ rescue
91
+ if File::ALT_SEPARATOR then
92
+ "C:/"
93
+ else
94
+ "/"
95
+ end
96
+ end
97
+ end
98
+ private :find_home, :entry_files, :redparse_rb_hexdigest, :retire_old_entries, :max_size, :hexdigest_of_file
99
+
100
+ def hash_of_input input
101
+ if IO===input
102
+ hexdigest_of_file input
103
+ else
104
+ Digest::SHA2.hexdigest input
105
+ end
106
+ end
107
+
108
+ def get input
109
+ hash=hash_of_input input
110
+ cachefile=cachedir+hash
111
+ if File.exist? cachefile
112
+ result=File.open(cachefile,"rb"){|fd|
113
+ line=fd.readline
114
+ fd.rewind
115
+ if /#encoded with Ron\n/i===line
116
+ begin
117
+ require 'ron'
118
+ Ron.load fd.read
119
+ rescue Exception
120
+ return nil
121
+ end
122
+ else
123
+ begin
124
+ Marshal.load fd
125
+ rescue Exception=>e
126
+ warn "#{e.class}: #{e}"
127
+ warn "cache read failed for:\n#{input}"
128
+ return nil
129
+ end
130
+ end
131
+ }
132
+
133
+ begin
134
+ t=Time.now
135
+ File.utime(t,t,cachefile)
136
+ rescue Exception
137
+ File.open(cachefile,"a"){|fd| } #touch cache date
138
+ end
139
+ return result
140
+ end
141
+ rescue EOFError
142
+ return nil
143
+ end
144
+
145
+ def put input,result
146
+ hash=hash_of_input input
147
+ File.open(cachedir+hash, "wb"){|fd|
148
+ begin
149
+ Thread.current["Marshal.ignore_sclass"]=true
150
+ Marshal.dump(result,fd)
151
+ rescue TypeError=>e #dump failed
152
+ File.unlink cachedir+hash
153
+ begin
154
+ require 'ron'
155
+ File.open(cachedir+hash, "wb"){|fd2|
156
+ fd2.write "#encoded with Ron\n"
157
+ fd2.write Ron.dump(result)
158
+ }
159
+ rescue Exception
160
+ return
161
+ end
162
+ ensure
163
+ Thread.current["Marshal.ignore_sclass"]=nil
164
+ end
165
+ }
166
+ rescue Exception=>e #dump failed
167
+ warn "#{e.class}: #{e}"
168
+ warn "cache write failed for:\n#{result.inspect}"
169
+ File.unlink cachedir+hash
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,1648 @@
1
+ =begin
2
+ redparse - a ruby parser written in ruby
3
+ Copyright (C) 2008,2009 Caleb Clausen
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU Lesser General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+
20
+
21
+ require 'forwardable'
22
+
23
+ begin
24
+ require 'rubygems'
25
+ rescue LoadError=>e
26
+ #hope we don't need it
27
+ raise unless /rubygems/===e.message
28
+ end
29
+ require 'rubylexer'
30
+ require 'reg'
31
+ require 'reglookab'
32
+
33
+ require "redparse/node"
34
+ #require "redparse/decisiontree"
35
+ require "redparse/reg_more_sugar"
36
+ require "redparse/generate"
37
+ require "redparse/cache"
38
+
39
+ class RedParse
40
+
41
+ if defined? END_ATTACK
42
+ class RuleSet
43
+ def initialize(rules)
44
+ @rules=rules.reverse
45
+ #rule order must be reversed relative to the usual RedParse rule
46
+ #order... merely so that ffs can work right.
47
+ @maxmask=(1<<@rules.size)-1
48
+ @subclasses_of=child_relations_among(*STACKABLE_CLASSES())
49
+ end
50
+
51
+ def rules2mask(rules)
52
+ mask=0
53
+ @rules.each_with_index{|r,i|
54
+ mask |= 1<<i if rules.include? r
55
+ }
56
+ return mask
57
+ end
58
+
59
+ def mask2rules(mask)
60
+ rules=[]
61
+ @rules.each_with_index{|r,i|
62
+ rules<<r if mask&(1<<i)
63
+ }
64
+ return rules
65
+ end
66
+
67
+ def mask2rules(mask)
68
+ result=[]
69
+ while mask.nonzero?
70
+ result<< @rules[i=ffs(mask)-1]
71
+ mask &= ~(1<<i)
72
+ end
73
+ return result
74
+ end
75
+
76
+ def each_rule(mask=-1)
77
+ @rules.each_with_index{|r,i|
78
+ yield r,i if mask&(1<<i)
79
+ }
80
+ end
81
+
82
+ def each_rule(mask=@maxmask)
83
+ while mask.nonzero?
84
+ yield @rules[i=ffs(mask)-1],i
85
+ mask &= ~(1<<i)
86
+ end
87
+ end
88
+
89
+
90
+ @@FFS_TABLE=[nil]
91
+ 1.upto(8){|n|
92
+ @@FFS_TABLE*=2
93
+ @@FFS_TABLE[@@FFS_TABLE.size/2]=n
94
+ }
95
+ def rb_ffs(mask)
96
+ chunks=0
97
+ until mask.zero?
98
+ result=@@FFS_TABLE[mask&0xFF]
99
+ return result+(chunks<<3) if result
100
+ chunks+=1
101
+ mask>>=8
102
+ end
103
+ return 0
104
+ end
105
+
106
+ begin
107
+ require 'inline'
108
+ inline{|inline|
109
+ inline.prefix '#define _GNU_SOURCE'
110
+ inline.include '"string.h"'
111
+ inline.include '"limits.h"'
112
+ inline.c %{
113
+ unsigned c_ffs(VALUE mask){
114
+ if FIXNUM_P(mask) {
115
+ return ffsl(NUM2UINT(mask));
116
+ } else if(TYPE(mask)==T_BIGNUM) {
117
+ struct RBignum* bn=RBIGNUM(mask);
118
+ int len=bn->len;
119
+ int i;
120
+ unsigned offset=0;
121
+ unsigned result=0;
122
+ for(i=0;i<len;++i){
123
+ /*printf("least:%x\\n", ((BDIGIT*)(bn->digits))[i]);*/
124
+ /*printf("most:%x\\n", ((BDIGIT*)(bn->digits))[len]);*/
125
+ result=ffs(((BDIGIT*)(bn->digits))[i]);
126
+ if (result) break;
127
+ offset+=sizeof(int)*CHAR_BIT;
128
+ }
129
+ if (result==0) return 0;
130
+ return result+offset;
131
+ } else {
132
+ rb_fatal("bad argument to ffs");
133
+ }
134
+ }
135
+ }
136
+ }
137
+ alias ffs c_ffs
138
+ rescue Exception=>e
139
+ warn "error (#{e.class}) while defining inline c ffs()"
140
+ warn "original error: #{e}"
141
+ warn "falling back to ruby version of ffs()"
142
+ alias ffs rb_ffs
143
+
144
+ end
145
+
146
+
147
+
148
+
149
+ #just the left side (the stack/lookahead matchers)
150
+ def LEFT
151
+ @rules.map{|r| r.left.subregs }.flatten
152
+ end
153
+
154
+ #remove lookahead and lookback decoration
155
+ def LEFT_NO_LOOKING
156
+ l=LEFT()
157
+ l.map!{|m|
158
+ case m #
159
+ when Reg::LookAhead,Reg::LookBack; m.subregs[0]
160
+ when Proc; []
161
+ else m #
162
+ end #
163
+ }
164
+ l
165
+ end
166
+
167
+ #all classes mentioned in rules, on left and right sides
168
+ def STACKABLE_CLASSES #
169
+ return @sc_result unless @sc_result.nil?
170
+ @sc_result=false
171
+ l=LEFT_NO_LOOKING()
172
+ l=l.map{|lm| sc_juice lm}.flatten.compact
173
+ r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
174
+ result=l+r
175
+ @sc_result=result.grep(Class).uniq
176
+ fail if @sc_result.empty?
177
+ return @sc_result
178
+ end
179
+
180
+ def juice(m)
181
+ case m #
182
+ when Class;
183
+ return [m] unless @subclasses_of
184
+ result=[m] # and subclasses too
185
+ i=0
186
+ while item=result[i]
187
+ #p item
188
+ result.concat @subclasses_of[item]
189
+ i += 1
190
+ end
191
+ result
192
+ when String,Regexp; juice(RedParse.KW(m))
193
+ when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
194
+ when Reg::Or; m.subregs.map( &method(:juice) )
195
+ when Reg::Not;
196
+ m=m.subregs[0]
197
+ if Class===m or (Reg::Or===m and
198
+ m.subregs.inject{|sum,x| sum && (Class===x) })
199
+ j=juice(m)
200
+ STACKABLE_CLASSES()-j.flatten.compact rescue j
201
+ else
202
+ STACKABLE_CLASSES()
203
+ end
204
+ else STACKABLE_CLASSES()
205
+ end
206
+ end
207
+
208
+ def sc_juice(m)
209
+ case m #
210
+ when Class; [m]
211
+ when String,Regexp; juice(RedParse.KW(m))
212
+ # when String,Regexp; [KeywordToken]
213
+ when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
214
+ when Reg::Or; m.subregs.map( &method(:sc_juice) )
215
+ when Reg::Not; sc_juice(m.subregs[0])
216
+ when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
217
+ else []
218
+ end
219
+ end
220
+
221
+ def LOOKAHEAD_CLASSES rule
222
+ last=rule.left.subregs.last
223
+ return STACKABLE_CLASSES() unless Reg::LookAhead===last
224
+ la= last.subregs[0]
225
+ return juice(la).flatten.compact
226
+ end
227
+ #
228
+ def TOS_CLASSES rule
229
+ i=-1
230
+ mats=rule.left.subregs
231
+ m=mats[i]
232
+ m=mats[i-=1] if Reg::LookAhead===m || Proc===m
233
+ result=[]
234
+ while Reg::Repeat===m and m.times.min.zero?
235
+ result<<juice(m.subregs[0])
236
+ m=mats[i-=1]
237
+ end
238
+ return (result+juice(m)).flatten.compact
239
+ end
240
+
241
+ def [](i)
242
+ @rules[i]
243
+ end
244
+
245
+ end #
246
+ #
247
+ module Reducer
248
+ @@rulesets={}
249
+ @@class_narrowerses={}
250
+ def compile(recompile=false)
251
+ klass=self.class
252
+
253
+ #use cached result if available
254
+ if @@rulesets[klass] and !recompile
255
+ @ruleset=@@rulesets[klass]
256
+ @class_narrowers=@@class_narrowerses[klass]
257
+ return
258
+ end
259
+
260
+ #actual rule compilation
261
+ @ruleset=RuleSet.new @rules
262
+ @class_narrowers=[tos=Hash.new(0),la=Hash.new(0)]
263
+ @ruleset.each_rule{|r,i|
264
+ @ruleset.LOOKAHEAD_CLASSES(r).each{|klass2|
265
+ la[klass2] |= 1<<i
266
+ }
267
+ @ruleset.TOS_CLASSES(r).each{|klass2|
268
+ tos[klass2] |= 1<<i
269
+ }
270
+ }
271
+
272
+ #save result to cache if not too dynamic
273
+ if !recompile
274
+ @@rulesets[klass]=@ruleset
275
+ @@class_narrowerses[klass]=@class_narrowers
276
+ end
277
+ end
278
+
279
+ def new_reduce
280
+ # mask=-1
281
+ # (-1).downto(-@class_narrowers.size){|i|
282
+ # mask &= @class_narrowers[i][@stack[i].class]
283
+ # }
284
+ mask=
285
+ @class_narrowers[-1][@stack[-1].class]&
286
+ @class_narrowers[-2][@stack[-2].class]
287
+ @ruleset.each_rule(mask){|r,i|
288
+ res=evaluate(r) and return res
289
+ }
290
+ return false
291
+ end
292
+ end
293
+ end
294
+
295
+ def map_with_index(list)
296
+ result=[]
297
+ list.each_with_index{|elem,i| result<<yield(elem,i)}
298
+ result
299
+ end
300
+
301
+ def all_rules
302
+ return @all_rules if defined? @all_rules
303
+
304
+ @inputs||=enumerate_exemplars
305
+ @rules=expanded_RULES #force it to be recalculated
306
+ @all_rules = map_with_index(@rules){|r,i| Rule.new r,i}
307
+
308
+ @all_rules.each{|r|
309
+ if StackMonkey===r.action
310
+ r.action.exemplars=@inputs.grep r.action.hint
311
+ end
312
+ }
313
+
314
+ warn "error recovery rules disabled for now; creates too many states and masks errors"
315
+ @all_rules.reject!{|r| r.action==MisparsedNode }
316
+
317
+ #names have to be allocated globally to make sure they don't collide
318
+ names=@all_rules.map{|r|
319
+ if r.action.respond_to? :name
320
+ r.action.name
321
+ else
322
+ r.action.to_s
323
+ end
324
+ }.sort
325
+ dups={}
326
+ names.each_with_index{|name,i|
327
+ dups[name]=0 if name==names[i+1]
328
+ }
329
+ @all_rules.each{|r|
330
+ r.name=
331
+ if r.action.respond_to? :name
332
+ r.action.name.dup
333
+ else
334
+ r.action.to_s
335
+ end
336
+ if dups[r.name]
337
+ count=dups[r.name]+=1
338
+ r.name<<"_#{count}"
339
+ end
340
+ }
341
+ end
342
+
343
+ def all_dotted_rules
344
+ all_rules.map{|rule|
345
+ (0...rule.patterns.size).map{|i|
346
+ DottedRule.create(rule,i,self)
347
+ }
348
+ }.flatten
349
+ end
350
+
351
+ #$OLD_PAA=1
352
+
353
+ def all_initial_dotted_rules
354
+ return @all_initial_dotted_rules if defined? @all_initial_dotted_rules
355
+ @all_initial_dotted_rules=result=
356
+ all_rules.map{|rule| DottedRule.create(rule,0,nil) }
357
+
358
+ p :all_init
359
+
360
+ unless defined? $OLD_PAA
361
+ scanning=result
362
+ provisionals=nil
363
+ while true
364
+ old_provisionals=provisionals
365
+ provisionals={}
366
+ scanning.each{|dr|
367
+ dr.also_allow=dr.compute_also_allow(provisional=[false]) #fill out dr.also_allow
368
+ provisionals[dr]=provisional[0]
369
+ }
370
+ scanning=provisionals.map{|dr,val| dr if val }.compact
371
+ end until provisionals==old_provisionals
372
+ end
373
+ p :all_init_done
374
+
375
+ return result
376
+ end
377
+
378
+ class Rule #original user rules, slightly chewed on
379
+ def initialize(rawrule,priority)
380
+ @priority=priority
381
+ @action=rawrule.right
382
+ @patterns=rawrule.left.subregs.dup
383
+ #remove lookback decoration if any, just note that lb was present
384
+ if Reg::LookBack===@patterns[0]
385
+ @lookback=true
386
+ @patterns[0]=@patterns[0].subregs[0]
387
+ end
388
+
389
+ case @patterns[-1]
390
+ #Symbol is pointless here, methinks.
391
+ when Proc,Symbol; #do nothing
392
+ when Reg::LookAhead; @patterns[-1]=@patterns[-1].subregs[0]
393
+ else @patterns.push Object #add la if none was present
394
+ end
395
+
396
+ #search for looping matchers with minimum >0 and replace them
397
+ #with a number of scalars (== the minimum) followed by a loop with 0 min.
398
+ #search for bare strings or regexps and replace with KW( ) wrapper
399
+ @patterns.each_with_index{|p,i|
400
+ case p
401
+ when String,Regexp; @patterns[i]=RedParse.KW(p)
402
+ when Reg::Repeat
403
+ if p.itemrange.first>0
404
+ @patterns[i,1]=
405
+ *[p.subregs[0]]*p.itemrange.first<< #minimum # as scalars
406
+ p.subregs[0].reg.* #0-based looper
407
+ end
408
+ end
409
+ }
410
+ @drs=[]
411
+ end
412
+
413
+ attr_reader :drs
414
+
415
+ def hash; priority end
416
+ def == other; Rule===other and priority==other.priority end
417
+ alias eql? ==
418
+
419
+ def lookback?; @lookback if defined? @lookback end
420
+
421
+ attr_reader :patterns,:action,:priority
422
+ attr_accessor :name
423
+
424
+ def at(n)
425
+ result=patterns[n]
426
+ result=result.subregs[0] if Reg::Repeat===result
427
+ result
428
+ end
429
+ def optional? n
430
+ p=patterns[n]
431
+ return Reg::Repeat===p && p.itemrange.first.zero?
432
+ end
433
+ def looping? n
434
+ p=patterns[n]
435
+ return false unless Reg::Repeat===p
436
+ return false if p.itemrange.last==1
437
+ fail unless p.itemrange.last.infinite?
438
+ return true
439
+ rescue Exception
440
+ return false
441
+ end
442
+
443
+ def reduces_to
444
+ case @action
445
+ when Class; @action
446
+ when StackMonkey; @action.exemplars
447
+ when :error,:shift,:accept; nil
448
+ else fail "#@action unexpected in reduces_to"
449
+ end
450
+ end
451
+
452
+ def unruly?
453
+ return if action==:accept
454
+ action.class!=Class || lookback?
455
+ end
456
+
457
+ def final_promised_pattern
458
+ case @action
459
+ when DeleteMonkey #delete_monkey
460
+ vector_indexes=(@action.first_changed_index..-1).select{|i| Reg::Repeat===@patterns[i] }
461
+ fail unless vector_indexes.empty?
462
+ result=@patterns.dup
463
+ result.delete_at @action.first_changed_index
464
+ when StackMonkey #stack_monkey
465
+ result=@patterns.dup
466
+ result[@action.first_changed_index..-1]=[@action.hint]
467
+ when Class
468
+ result= [@action,@patterns.last]
469
+ result.unshift @patterns.first if lookback?
470
+ when :accept, :error, :shift
471
+ result=@patterns.dup
472
+ else
473
+ pp @action
474
+ fail
475
+ end
476
+ result[-1]=result[-1].la unless result.empty?
477
+ result
478
+ end
479
+
480
+ def final_promised_rule
481
+ @final_promised_rule ||=
482
+ Rule.new(-final_promised_pattern>>nil,-priority)
483
+ end
484
+ end
485
+
486
+ class DottedRule
487
+ def initialize(rule,pos,parser)
488
+ @rule,@pos=rule,pos
489
+ fail unless (0...rule.patterns.size)===@pos
490
+ # @also_allow= compute_also_allow(parser) if parser unless defined? $OLD_PAA
491
+ end
492
+ def compute_also_allow(parser,provisional=[false])
493
+ parser.all_initial_dotted_rules.map{|dr|
494
+ next if dr==self
495
+ fake_rule=dr.rule.final_promised_rule
496
+ final_more_dr=DottedRule.create(fake_rule,0,nil)
497
+ also=dr.also_allow
498
+ unless also
499
+ provisional[0]||=0
500
+ provisional[0]+=1
501
+ also=[]
502
+ end
503
+ also+[dr] if optionally_combine final_more_dr,parser
504
+ }.flatten.compact.uniq
505
+ end
506
+ attr_reader :rule,:pos
507
+ attr_accessor :also_allow
508
+
509
+ def self.create(rule,pos,parser)
510
+ result=rule.drs[pos] and return result
511
+ result=rule.drs[pos]=DottedRule.new(rule,pos,parser)
512
+ unless defined? $OLD_PAA
513
+ result.also_allow=result.compute_also_allow(parser) if parser
514
+ end
515
+ return result
516
+ end
517
+
518
+ def hash; (@rule.priority<<3)^@pos end
519
+ def == other; DottedRule===other and @pos==other.pos and @rule==other.rule end
520
+ alias eql? ==
521
+
522
+ def name; @rule.name+"@#@pos" end
523
+
524
+ def looping?
525
+ @rule.looping?(@pos)
526
+ end
527
+
528
+ #returns Conditional|Rule|DottedRule|+[DottedRule.+]|nil
529
+ def evolve input, parser, seenlist,result2
530
+ #print "["
531
+ #$stdout.flush
532
+ idname=input.identity_name
533
+ idname=parser.identity_name_alias? idname
534
+ cache=seenlist[[self,idname]]
535
+ unless cache==:dunno_yet
536
+ result2.concat Array(cache).flatten.compact.uniq.sort_by{|x| x.name}
537
+ return cache
538
+ end
539
+ i=pos
540
+ lasti=i-1
541
+ result=[]
542
+ result=loop do #might need multiple tries if optional matcher(s) here
543
+ fail unless i>lasti
544
+ lasti=i
545
+ p=@rule.at(i) #what is current pattern in this dottedrule?
546
+ fail if Proc===p #shouldnt happen anymore
547
+ if parser.pattern_matches_nodes? p
548
+
549
+ #if any dotted rules have nodes at this point,
550
+ #also include the set of rules@0 which
551
+ #can (possibly indirectly) generate that node.
552
+ #(match tokens found on left sides of productions for p)
553
+ seenlist[[self,idname]]=result
554
+ if false
555
+ result.concat recurse_match_drs(parser).uniq.map{|dr|
556
+ dr and
557
+ #begin print "{#{dr.name}"
558
+ dr.evolve input,parser,seenlist,result2
559
+ #ensure print "}" end
560
+ }.flatten.compact.uniq
561
+ end
562
+ end
563
+ @saw_item_that={}
564
+ if p===input
565
+ i+=1 unless @rule.looping?(i)
566
+ fail if i>@rule.patterns.size
567
+
568
+ if !@saw_item_that.empty?
569
+ p(:saw_item_that!)
570
+ fail unless @saw_item_that.size==1
571
+ pair=@saw_item_that.to_a.first
572
+ fail unless p.equal? pair.last
573
+ it=pair.first
574
+ action=
575
+ if i==@rule.patterns.size
576
+ @rule
577
+ else
578
+ DottedRule.create(@rule,i,parser)
579
+ end
580
+ break Conditional.new(it,action)
581
+ end
582
+ @saw_item_that=nil
583
+
584
+ if i == @rule.patterns.size
585
+ break @rule
586
+ else
587
+ break result<<DottedRule.create(@rule,i,parser)
588
+ end
589
+ elsif !@rule.optional?(i)
590
+ break result.empty? ? nil : result
591
+ elsif (i+=1) >= @rule.patterns.size
592
+ break @rule
593
+ #else next p
594
+ end
595
+ end #loop
596
+ seenlist[[self,idname]]=result
597
+ result2.concat Array(result).flatten.compact.uniq.sort_by{|x| x.name}
598
+ return result
599
+ #ensure print "]"
600
+ end
601
+
602
+ #returns +[(DottedRule|nil).*]
603
+ def recurse_match_drs parser, result=nil
604
+ unless result
605
+ table=parser.rmd_cache
606
+ if table
607
+ cache=table[self]
608
+ return cache if cache
609
+ else
610
+ parser.rmd_cache={}
611
+ end
612
+
613
+ result=[]
614
+ end
615
+ #print "("
616
+ #print @rule.name+"@#@pos"
617
+ p=@rule.at(@pos)
618
+
619
+ #find set of nodes that could match here
620
+ nodes_here=parser.exemplars_that_match(p&Node)
621
+
622
+ #find the set of rules that could generate a node in our list
623
+ rrules=parser.all_rules.select{|rule|
624
+ !rule.unruly? and !nodes_here.grep(rule.action).empty?
625
+ }.map{|rule|
626
+ DottedRule.create(rule,0,parser)
627
+ }
628
+
629
+ #if any generating rules match a node in the leftmost pattern,
630
+ #add the rules which can generate _that_ node too.
631
+ result.push self #force self to be excluded from future recursion
632
+ oldsize=result.size
633
+ unless rrules.empty?
634
+ result.concat rrules
635
+
636
+ unless result.respond_to? :index_of
637
+ class<<result
638
+ attr_accessor :index_of
639
+ end
640
+ result.index_of={}
641
+ end
642
+ rio=result.index_of
643
+ oldsize.upto(result.size){|i| rio[result[i]]||=i }
644
+ rrules.each{|rrule|
645
+ i=rio[rrule] or fail #index() inside each() == O(N**2) complexity. this is the slow line.
646
+ #but skip recursion on rules already done at a higher level
647
+ rrule.recurse_match_drs parser,result if i>=oldsize
648
+ }
649
+ end
650
+ result[oldsize-1]=nil #don't actually include self in result
651
+ #result.update_indices oldsize-1, oldsize-1
652
+
653
+ parser.rmd_cache[self]=result
654
+ return result
655
+ #ensure print ")"
656
+ end
657
+
658
+ def optionally_combine weaker,parser
659
+ #lotsa caching needed if this is ever to be performant
660
+ if parser.oc_cache
661
+ result=parser.oc_cache[[self,weaker]]
662
+ return result unless result.nil?
663
+ else
664
+ parser.oc_cache={}
665
+ end
666
+
667
+ other=weaker
668
+ mymatches,myposes= self.outcomes
669
+ matches, poses = other.outcomes
670
+ matches.each_with_index{|match,i|
671
+ mymatches.each_with_index{|mymatch,myi|
672
+ intersect=parser.inputs.grep(match&mymatch)
673
+ unless intersect.empty?
674
+
675
+ #but don't allow matches that would be matched
676
+ #by an earlier (but optional) pattern.
677
+ disallowed=Reg::Or.new(
678
+ *possible_matchers_til(myi)+
679
+ other.possible_matchers_til(i)
680
+ )
681
+ intersect.reject{|x| disallowed===x }
682
+
683
+ if intersect.empty?
684
+ return result=false
685
+ elsif poses[i]>=other.rule.patterns.size
686
+ return result=true #success if weaker rule is at an end
687
+ elsif myposes[myi]>=rule.patterns.size
688
+ return result=false #fail if stronger rule at an end
689
+ else
690
+ p [:**,rule.name,myposes[myi]]
691
+ mynew=DottedRule.create(rule,myposes[myi],parser)
692
+ new=DottedRule.create(other.rule,poses[i],parser)
693
+ return result=mynew.optionally_combine( new,parser )
694
+ end
695
+ end
696
+ }
697
+ }
698
+ return result=false
699
+ ensure
700
+ parser.oc_cache[[self,weaker]]=result
701
+ end
702
+
703
+ def possible_matchers_til i
704
+ (pos...i-1).map{|j|
705
+ m=rule.at(j)
706
+ Reg::Repeat===m ? m.subregs[0] : m
707
+ }
708
+ end
709
+
710
+ def outcomes
711
+ til=@rule.patterns.size
712
+ at=@pos
713
+ result=[[],[]]
714
+ loop do
715
+ m=@rule.patterns[at]
716
+ case m
717
+ when Proc;
718
+ result.first.push Object
719
+ result.last.push at+1
720
+ break
721
+ when Reg::Repeat
722
+ assert @rule.optional?(at)
723
+ to=at
724
+ to+=1 unless @rule.looping? at
725
+ result.first.push m.subregs[0]
726
+ result.last.push to
727
+ else
728
+ result.first.push m
729
+ result.last.push at+1
730
+ break
731
+ end
732
+ at+=1
733
+ break if at>=til
734
+ end
735
+ return result
736
+ end
737
+
738
+ end
739
+
740
+ attr_accessor :rmd_cache
741
+ attr_accessor :oc_cache
742
+ attr_accessor :sl2ms_cache
743
+
744
+ class Conditional
745
+ def initialize(condition,action)
746
+ @condition,@action=condition,action
747
+ @condition.restore :hash,:==
748
+ end
749
+ attr_reader :condition,:action
750
+
751
+ def hash
752
+ @condition.hash^@action.hash
753
+ end
754
+ def == other
755
+ Conditional===other and @condition==other.condition and @action==other.action
756
+ end
757
+ alias eql? ==
758
+
759
+ def name; @condition.inspect+"?"+@action.name end
760
+
761
+ def priority; @action.priority end
762
+ end
763
+
764
+ class ParserState; end
765
+ class MultiShift; end
766
+ class MultiReduce; end
767
+
768
+ ACTION_PATTERN=ParserState|Rule|MultiShift|MultiReduce|:accept|:error
769
+ class ParserState #a union of dotted rules
770
+ def initialize(dotteds,index)
771
+ fail if dotteds.empty? #error state
772
+ fail unless dotteds.grep(nil).empty?
773
+ @dotteds=dotteds
774
+ @index=index
775
+ sort_substates!
776
+ @actions={} #key is an input, value is ParserState|Rule|MultiShift|MultiReduce|:accept|:error
777
+ end
778
+
779
+ attr_reader :actions
780
+
781
+ def [](k)
782
+ result=@actions[k]
783
+ assert ACTION_PATTERN===result
784
+ result
785
+ end
786
+ def []=(k,v)
787
+ assert ACTION_PATTERN===v
788
+ @actions[k]=v
789
+ end
790
+
791
+ def sort_substates!
792
+ @dotteds=@dotteds.sort_by{|dotted| -dotted.pos}.uniq
793
+ end
794
+ attr :dotteds
795
+
796
+ def dup
797
+ result=super
798
+ result.instance_variable_set(:@dotteds,@dotteds.dup)
799
+ return result
800
+ end
801
+
802
+ def substates; [self] end
803
+
804
+ def shiftlist2multishift? shiftlist,parser
805
+ return :error if shiftlist.empty?
806
+ parser.sl2ms_cache||={}
807
+ cache=parser.sl2ms_cache[shiftlist]
808
+ return cache if cache
809
+ fixed,varying=shiftlist.partition{|res| DottedRule===res}
810
+ result=ParserState.new(fixed,nil)
811
+ result.perhaps_also_allow parser.all_rules,parser
812
+ unless varying.empty? #MultiShift
813
+ varying.map!{|v| [v.condition,v.action]}.flatten
814
+ result=MultiShift.new(result,varying)
815
+ end
816
+ parser.sl2ms_cache[shiftlist]=result
817
+ return result
818
+ end
819
+
820
+ #given a list of rules, see if any of them are compatible with
821
+ #a current substate. (compatibility means the aggregate patterns
822
+ #can be anded together and still be able to conceivably match something.)
823
+ #if any of morerules are actually compatible, add it to current state.
824
+ def perhaps_also_allow(morerules,parser)
825
+ fail unless morerules==parser.all_rules
826
+ @dotteds.concat @dotteds.map{|d| d.also_allow }.flatten.compact.uniq
827
+ sort_substates!
828
+ end
829
+ def old_perhaps_also_allow(morerules,parser)
830
+ morerules=morerules.dup
831
+ need_sort=false
832
+ scan_rules=@dotteds
833
+ added={}
834
+ while true
835
+ adding=[]
836
+ morerules.each{|morerule|
837
+ next if added[morerule]
838
+ fake_rule=morerule.final_promised_rule
839
+ final_more_dr=DottedRule.create(fake_rule,0,parser)
840
+ scan_rules.each{|dotted|
841
+ if dotted.optionally_combine final_more_dr,parser
842
+ adding<<DottedRule.create(morerule,0,parser)
843
+ added[morerule]=1
844
+ break
845
+ end
846
+ }
847
+ }
848
+ break if adding.empty?
849
+ @dotteds.concat adding
850
+ need_sort=true
851
+ scan_rules=adding
852
+ end
853
+ sort_substates! if need_sort
854
+ end
855
+ alias perhaps_also_allow old_perhaps_also_allow if defined? $OLD_PAA
856
+
857
+
858
+ #returns ParserState|MultiShift|MultiReduce|Rule|:accept|:error
859
+ def evolve input,parser,seenlist
860
+ result2=[]
861
+ @dotteds.each{|dotted|
862
+ dotted.evolve input,parser,seenlist,result2
863
+ }
864
+
865
+ result=
866
+ #seenlist.values.flatten.compact.uniq.sort_by{|x| x.name}
867
+ result2=result2.uniq.compact.sort_by{|x| x.name}
868
+ #pp [result,result2].map{|x| x.map{|res| DottedRule===res ? res.name : res }}
869
+ #pp result2.map{|res| DottedRule===res ? res.name : res }
870
+ # result==result2 or fail
871
+
872
+ return result=:error if result.empty?
873
+
874
+
875
+ #ok, who wants to shift and who wants to reduce?
876
+ shiftlist,reducelist=result.partition{|res|
877
+ DottedRule===res or
878
+ Conditional===res && DottedRule===res.action
879
+ }
880
+
881
+ #if no reducers at all, just try (multi?)shift
882
+ return result=shiftlist2multishift?( shiftlist,parser )if reducelist.empty?
883
+
884
+ #line up reducers by priority
885
+ actions=reducelist \
886
+ .sort_by{|rule| -rule.priority }
887
+ # .map{|rule| rule.action }
888
+ #actions is +[(Rule|Conditional[Rule]).*]
889
+ action=actions.shift #this first (unless conditional)
890
+ #action is Rule|Conditional[Rule]
891
+ result=
892
+ case action.action
893
+ when :error; return :error
894
+ when Class, StackMonkey
895
+ action
896
+ when :accept
897
+ :accept
898
+ when :shift #this counts as a reduce at this point, but it writes shift instructions
899
+ shiftlist2multishift? shiftlist,parser
900
+ when Rule #oy, vey, was a Conditional
901
+ shiftaction=shiftlist2multishift?(shiftlist,parser)
902
+ fail unless Rule===action.action
903
+ case action.action.action
904
+ when :error; huh
905
+ when :shift, StackMonkey, :accept, Class #MultiReduce
906
+ first_fixed_index=actions.size
907
+ #actions is +[(Rule|Conditional[Rule]).*]
908
+ actions.each_with_index{|act,i|
909
+ break first_fixed_index=i unless Conditional===act
910
+ }
911
+ condactions=actions[0...first_fixed_index].unshift(action)
912
+ condactions=condactions.inject([]){|sum,cond|
913
+ act=cond.action
914
+ act=shiftaction if act==:shift #=>shiftlist?
915
+ sum.push cond.condition, act
916
+ }
917
+ #possible optimization: one or more :shift right at end could be ignored
918
+ if actions[first_fixed_index]
919
+ action=actions[first_fixed_index].action
920
+ else
921
+ action=shiftaction
922
+ end
923
+ MultiReduce.new condactions,action #=>shiftlist?
924
+ else fail
925
+ end
926
+ else fail "#{action} not expected here"
927
+ end
928
+ #stack monkeys/:accept are treated like reduce here
929
+ ensure
930
+ assert ACTION_PATTERN===result
931
+ end
932
+
933
+ def name
934
+ @name||@dotteds.map{|dotted| dotted.name}.join(",")
935
+ end
936
+ attr_writer :name
937
+
938
+ def rename(name2count)
939
+ return @name if defined? @name
940
+ name=most_prominent_members.map{|dotted| dotted.name}.join(",")
941
+ if name2count[name]
942
+ name2count[name]+=1
943
+ name+="___"+name2count[name].to_s
944
+ else
945
+ name2count[name]=1
946
+ end
947
+
948
+ @name=name
949
+ end
950
+
951
+ def most_prominent_members
952
+ result=@dotteds.select{|dr| dr.pos==@dotteds.first.pos }
953
+ close2end=@dotteds.map{|dr| [dr,dr.rule.patterns.size-dr.pos]}.sort_by{|(o,k)| -k}
954
+ result+=close2end.select{|(dr,k)| k==close2end.first.last}.map{|(dr,k)| dr}
955
+ result2=result.reject{|dr| dr.pos==0 or dr.pos==1&&dr.rule.lookback?}
956
+ result=result2 unless result2.empty?
957
+ return result
958
+ end
959
+
960
+ def hash
961
+ -@dotteds.hash
962
+ end
963
+ def == other
964
+ ParserState===other and
965
+ @dotteds==other.dotteds
966
+ end
967
+ alias eql? ==
968
+
969
+ def looping?
970
+ @dotteds.any?{|dotted| dotted.looping? }
971
+ end
972
+
973
+ def transition_to_loop? input #not used
974
+ action=@actions.input
975
+ case action
976
+ when :error; false
977
+ when ParserState; action.looping? and action!=self
978
+ when MultiShift,MultiReduce;
979
+ action.transition_to_loop? input
980
+ else fail
981
+ end
982
+ end
983
+
984
+ def make_sr_goto_tables inputs
985
+ name2exemplar={}
986
+ inputs.each{|i| name2exemplar[i.name]=i }
987
+
988
+ @goto={}; @sr={}
989
+ goto_counts=Hash.new(0); sr_counts=Hash.new(0)
990
+ actions.each_pair{|k,v|
991
+ if Node===name2exemplar[k]
992
+ @goto[k]=v
993
+ goto_counts[v]+=1
994
+ else
995
+ assert(Token===name2exemplar[k])
996
+ @sr[k]=v
997
+ sr_counts[v]+=1
998
+ end
999
+ }
1000
+ dflt=goto_counts.sort_by{|v,c| c}.last[0]
1001
+ @goto.delete_if{|k,v| v==dflt }
1002
+ @goto.default=dflt
1003
+
1004
+ dflt=sr_counts.sort_by{|v,c| c}.last[0]
1005
+ @sr.delete_if{|k,v| v==dflt }
1006
+ @sr.default=dflt
1007
+
1008
+ @actions=nil
1009
+ end
1010
+
1011
+ end
1012
+
1013
+ class MultiReduce
1014
+ def initialize(list,default)
1015
+ @list,@default=list,default
1016
+ #default can be any valid action (except another MultiReduce)
1017
+ end
1018
+
1019
+ attr_reader :list,:default
1020
+
1021
+ def act(x)
1022
+ (0...@list.size).step(2){|i|
1023
+ return @list[i+1] if @list[i]===x
1024
+ }
1025
+ return default
1026
+ end
1027
+
1028
+ def substates
1029
+ if @default.respond_to? :substates
1030
+ @default.substates
1031
+ else
1032
+ []
1033
+ end
1034
+ end
1035
+
1036
+ def actions
1037
+ result=[]
1038
+ (1...@list.size).step(2){|i|
1039
+ result << @list[i]
1040
+ }
1041
+ if @default.respond_to? :actions
1042
+ result.concat @default.actions
1043
+ elsif @default
1044
+ result<<@default
1045
+ end
1046
+ result
1047
+ end
1048
+
1049
+ def transition_to_loop? input #not used
1050
+ @default.transition_to_loop? input
1051
+ end
1052
+
1053
+ def hash
1054
+ @list.hash^~@default.hash
1055
+ end
1056
+
1057
+ def == other
1058
+ @list==other.list and @default==other.default
1059
+ end
1060
+ alias eql? ==
1061
+ end
1062
+
1063
+ class MultiShift
1064
+ def initialize(base,modifiers)
1065
+ @base,@modifiers=base,modifiers
1066
+ @map=
1067
+ (0...2**(modifiers.size/2)).map{|i| base.dup}
1068
+ @map.each_with_index{|state,i| #for each branch to the multishift
1069
+ (0...modifiers.size).step(2){|j| #for each predicate in the multishift
1070
+ if (i&(1<<j)).non_zero? #if the predicate tests true in this branch
1071
+ state.append modifiers[j+1] #add the predicates modifier to the state
1072
+ end
1073
+ }
1074
+ state.sort_substates!
1075
+ }
1076
+ end
1077
+
1078
+ def act(x)
1079
+ result=0
1080
+ (0...@modifiers.size).step(2){|i|
1081
+ result|=(1<<(i/2)) if @modifiers[i]===x
1082
+ }
1083
+ @map[result]
1084
+ end
1085
+
1086
+ attr_reader :map, :modifiers
1087
+
1088
+ def substates
1089
+ @map.dup
1090
+ end
1091
+
1092
+ def actions
1093
+ @map.dup
1094
+ end
1095
+
1096
+ def transition_to_loop? input #not used
1097
+ huh
1098
+ end
1099
+
1100
+ def hash
1101
+ huh
1102
+ end
1103
+ def == other
1104
+ huh
1105
+ end
1106
+ alias eql? ==
1107
+ end
1108
+
1109
+ #an action is one of:
1110
+ #a ParserState (shift)
1111
+ #a Rule (reduce)
1112
+ #nil (error)
1113
+ #:accept
1114
+ #MultiReduce
1115
+ #MultiShift
1116
+
1117
+ #just the left side (the stack/lookahead matchers)
1118
+ def LEFT
1119
+ # require 'md5'
1120
+ @rules=expanded_RULES()
1121
+ # p MD5.new(@rules).to_s
1122
+ @rules.map{|r| r.left.subregs }.flatten
1123
+ end
1124
+
1125
+ #remove lookahead and lookback decoration (not used?)
1126
+ def LEFT_NO_LOOKING
1127
+ l=LEFT()
1128
+ l.map!{|m|
1129
+ case m #
1130
+ when Reg::LookAhead,Reg::LookBack; fail #should be gone already now
1131
+ when Proc; []
1132
+ else m #
1133
+ end #
1134
+ }
1135
+ l
1136
+ end
1137
+
1138
+ def child_relations_among(*classes)
1139
+ classes.unshift Object
1140
+ result={}
1141
+ classes.each{|klass| result[klass]=[] }
1142
+
1143
+ #p classes
1144
+ classes.each{|klass|
1145
+ anclist=klass.ancestors
1146
+ anclist.shift==klass or fail
1147
+ anclist.each{|anc|
1148
+ if anc=result[anc]
1149
+ anc << klass
1150
+ break
1151
+ end
1152
+ }
1153
+ }
1154
+
1155
+ return result
1156
+ end
1157
+
1158
+ #all classes mentioned in rules, on left and right sides
1159
+ def STACKABLE_CLASSES #
1160
+ return @sc_result if defined? @sc_result
1161
+ @sc_result=[]
1162
+ @subclasses_of=child_relations_among(*vertices)
1163
+ # @sc_result=false
1164
+ l=LEFT()
1165
+ l=l.map{|lm| sc_juice lm}.flatten.compact
1166
+ assert l.grep(nil).empty?
1167
+ r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
1168
+ result=l+r
1169
+ @subclasses_of=nil
1170
+ @sc_result.replace result.grep(Class).uniq
1171
+ fail if @sc_result.empty?
1172
+ return @sc_result
1173
+ end
1174
+
1175
+ # def juice(m)
1176
+ # case m #
1177
+ # when Class
1178
+ # return [m] unless @subclasses_of
1179
+ # result=[m] # and subclasses too
1180
+ # i=0
1181
+ # while item=result[i]
1182
+ # p item
1183
+ # result.concat @subclasses_of[item] rescue nil
1184
+ # i += 1
1185
+ # end
1186
+ # result
1187
+ # when String,Regexp; juice(RedParse.KW(m))
1188
+ # when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
1189
+ # when Reg::Or; m.subregs.map &method(:juice)
1190
+ # when Reg::Not
1191
+ # m=m.subregs[0]
1192
+ # if Class===m or (Reg::Or===m and
1193
+ # m.subregs.find{|x| Class===x })
1194
+ # juice(m)
1195
+ # else []
1196
+ # end
1197
+ # else []
1198
+ # end
1199
+ # end
1200
+
1201
+ def sc_juice(m)
1202
+ case m #
1203
+ when Class; [m]
1204
+ when String,Regexp; [KeywordToken]
1205
+ when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
1206
+ when Reg::Or; m.subregs.map(&method(:sc_juice))
1207
+ when Reg::Not; sc_juice(m.subregs[0])
1208
+ when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
1209
+ when Reg::Repeat; sc_juice(m.subregs[0])
1210
+ else []
1211
+ end
1212
+ end
1213
+
1214
+ def unruly_rules
1215
+ return @unruly_rules if defined? @unruly_rules
1216
+
1217
+ @unruly_rules=
1218
+ all_rules.select{|rule| rule.unruly? }
1219
+
1220
+ p :unruly_rules
1221
+ pp @unruly_rules.map{|r| r.name}
1222
+
1223
+ @unruly_rules
1224
+ end
1225
+
1226
+ def enumerate_exemplars
1227
+ return @@exemplars if defined? @@exemplars #dunno why this is necessary
1228
+
1229
+ result= STACKABLE_CLASSES() \
1230
+ .map{|sc| sc.enumerate_exemplars } \
1231
+ .inject{|sum,sc| sum+sc}
1232
+
1233
+ result.map!{|sc|
1234
+ res=sc.shift.allocate
1235
+ until sc.empty?
1236
+ eval "def res.#{sc.shift}; #{sc.shift.inspect} end"
1237
+ end
1238
+ def res.to_s; identity_name end
1239
+ res
1240
+ }
1241
+
1242
+ return @@exemplars=result
1243
+ end
1244
+
1245
+ def check_for_parsealike_inputs
1246
+ all_patterns=all_rules.map{|r| r.patterns.map{|rp| Reg::Repeat===rp and rp=rp.subregs[0]; rp }}.flatten.uniq
1247
+ seen={}
1248
+ @identity_name_aliases={}
1249
+ warn "why are non_empty and after_equals params to BeginNode appearently ignored?"
1250
+ warn "some token identities overlap themselves?!?"
1251
+ warn "some overlaps are duplicated"
1252
+ warn ". and :: overlap => ..... surely that's not right"
1253
+ @inputs.map{|input|
1254
+ profile=all_patterns.map{|pat| Proc===pat ? pat : !!(pat===input)}
1255
+ if seen[profile]
1256
+ puts "#{input} overlaps #{seen[profile]}"
1257
+ @identity_name_aliases[seen[profile]]=input
1258
+ nil
1259
+ else
1260
+ seen[profile]=input
1261
+ end
1262
+ }.compact
1263
+ end
1264
+
1265
+ def enumerate_states
1266
+ inputs=check_for_parsealike_inputs
1267
+ inputs.reject!{|x| StartToken===x}
1268
+
1269
+ result=[]
1270
+ todo=[start_state]
1271
+
1272
+ seenlist = {}
1273
+ seenlist.default=:dunno_yet
1274
+
1275
+ j=0
1276
+ start=was=Time.now
1277
+ in_result={} #this should go away; obsoleted by @states
1278
+ state_num=-1
1279
+ todo.each{|st| in_result[st]=(state_num+=1) }
1280
+ ps=todo.first
1281
+ pp [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
1282
+ old_todo_size=todo.size
1283
+ while state=todo.shift
1284
+ result<<state
1285
+
1286
+ i=0
1287
+ inputs.each {|input|
1288
+ newstate=state.evolve input,self,seenlist
1289
+ assert ACTION_PATTERN===newstate
1290
+ #newstate is ParserState|MultiShift|MultiReduce|Rule|:accept|:error
1291
+ state[input.identity_name]=newstate
1292
+ next unless newstate.respond_to? :substates
1293
+ #newstate.substates is just [newstate] for plain ParserStates
1294
+ morestates=newstate.substates.reject{|x| in_result[x]}
1295
+ morestates.each{|st| in_result[st]=(state_num+=1) }
1296
+ # p [in_result[state],:+,input.identity_name,:>>,pretty(newstate,in_result)]
1297
+ todo.concat morestates
1298
+
1299
+ # pp morestates.map{|ps|
1300
+ # [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
1301
+ # }
1302
+ # pp pretty(newstate,in_result) unless ParserState===newstate
1303
+ }
1304
+
1305
+ now=Time.now
1306
+ p [:*,j+=1,todo.size,todo.size-old_todo_size,now-was,j/(now-start),(100.0*j/(j+todo.size)).to_i]
1307
+ old_todo_size=todo.size
1308
+ was=now
1309
+
1310
+ # if state.actions.values.uniq==[:error]
1311
+ #this can happen when the only dotted rule is for an :error
1312
+ #maybe this case can be optimized?
1313
+ # end
1314
+ end
1315
+ self.rmd_cache=nil
1316
+ self.oc_cache=nil
1317
+ self.sl2ms_cache=nil
1318
+ return result
1319
+ end
1320
+
1321
+ def pretty(x,in_result)
1322
+ case x
1323
+ when ParserState; in_result[x]
1324
+ when MultiReduce
1325
+ pairs=x.list.dup
1326
+ result=[]
1327
+ until pairs.empty?
1328
+ cond,act,*pairs=*pairs
1329
+ cond = cond.inspect
1330
+ result<<[cond,pretty(act.action,in_result)]
1331
+ end
1332
+ result<<pretty(x.default,in_result)
1333
+ result.unshift :MultiReduce
1334
+ when MultiShift
1335
+ h={}
1336
+ mods=x.modifiers
1337
+ its=[]
1338
+ (0...mods.size).step(2){|i| its<<mods[i] }
1339
+ x.map.each_with_index{|xx,i| h[i]=pretty(xx) }
1340
+ [:MultiShift, its,h]
1341
+ when Class; x.name
1342
+ when StackMonkey; x.name
1343
+ when :accept,:error; x
1344
+ else fail "not a valid action: #{x}"
1345
+ end
1346
+ end
1347
+
1348
+ attr_accessor :inputs
1349
+
1350
+ def all_states
1351
+ return @all_states if defined? @all_states
1352
+ @all_states=enumerate_states
1353
+ end
1354
+
1355
+ def exemplars_that_match p
1356
+ @inputs.grep p
1357
+ end
1358
+
1359
+ def pattern_matches_nodes? p
1360
+ !@inputs.grep(Node&p).empty?
1361
+ end
1362
+
1363
+ def pattern_matches_tokens? p
1364
+ !@inputs.grep(Token&p).empty?
1365
+ end
1366
+
1367
+ def identity_name_alias? name
1368
+ alias_=@identity_name_aliases[name]
1369
+ return( alias_||name )
1370
+ end
1371
+
1372
+ def compile
1373
+ oldparser=Thread.current[:$RedParse_parser]
1374
+ Thread.current[:$RedParse_parser]||=self
1375
+
1376
+ if File.exist?("cached_parse_tables.drb")
1377
+ dup=Marshal.load(f=open("cached_parse_tables.drb","rb"))
1378
+ instance_variables.each{|var| remove_instance_variable var }
1379
+ extend SingleForwardable
1380
+ def_singleton_delegators(dup,public_methods+private_methods+protected_methods)
1381
+
1382
+ self.inputs=enumerate_exemplars
1383
+ else
1384
+ @generating_parse_tables=true
1385
+ @inputs||=enumerate_exemplars
1386
+
1387
+ states=all_states
1388
+ # @rules=expanded_RULES
1389
+ @inputs=nil #Marshal no like it
1390
+
1391
+ begin
1392
+ p :dumping
1393
+ Marshal.dump(self,f=open("cached_parse_tables.drb","wb"))
1394
+ p :dump_done!
1395
+ rescue Exception
1396
+ p :dump_failed
1397
+ File.unlink "cached_parse_tables.drb"
1398
+ ensure
1399
+ @inputs=enumerate_exemplars
1400
+ end
1401
+ end
1402
+ f.close
1403
+
1404
+ #look for unused dotted rules and actions
1405
+ #also states with drs past the end
1406
+ past_end=0
1407
+ drs=all_dotted_rules
1408
+ dr_count=Hash.new(0)
1409
+ acts=all_rules#.map{|r| r.action }.uniq
1410
+ act_count=Hash.new(0)
1411
+ states.each{|state|
1412
+ state.dotteds.each{|dr|
1413
+ dr_count[dr]+=1
1414
+ past_end+=1 if dr.pos>=dr.rule.patterns.size
1415
+ }
1416
+ sav=state.actions.values
1417
+ sav.grep(Class|StackMonkey).each{|act| act_count[act.__id__]+=1 }
1418
+ sav.grep(MultiReduce|MultiShift).each{|multi| multi.actions.each{|act| act_count[act.__id__]+=1} }
1419
+ #p state.name if state.dotteds.select{|dr| dr.rule.action==BeginNode}
1420
+ }
1421
+ puts "#{past_end} dotted rules found past the end of their rule" if past_end>0
1422
+ nevers=0
1423
+ drs.each{|dr|
1424
+ next unless dr_count[dr].zero?
1425
+ puts "never reached #{dr.name}"
1426
+ nevers+=1
1427
+ }
1428
+ puts "#{nevers} dotted rules were never reached (out of #{drs.size})"
1429
+ nevers=0
1430
+ acts.each{|act|
1431
+ next unless act_count[act.__id__].zero?
1432
+ puts "never reached #{act.name rescue act}"
1433
+ nevers+=1
1434
+ }
1435
+ puts "#{nevers} actions were never reached (out of #{acts.size})"
1436
+ p :most_popular_nontrivial_drs
1437
+ pp dr_count.reject{|(dr,n)| dr.pos.zero? or dr.pos==1 && dr.rule.lookback?} \
1438
+ .sort_by{|(dr,n)| n}[-15..-1].map{|(dr,n)| [dr.name,n] }
1439
+
1440
+ #look for duplicate states
1441
+ actions2state={}
1442
+ dup_states=0
1443
+ states.each{|st|
1444
+ cache=actions2state[st.actions]
1445
+ if cache
1446
+ st.equivalent_to=cache
1447
+ dup_states+=1
1448
+ else
1449
+ actions2state[st.actions]=st
1450
+ end
1451
+ }
1452
+ puts "#{dup_states} duplicate states" if dup_states.nonzero?
1453
+
1454
+ name2count={}
1455
+ states.each{|state| state.rename(name2count) }
1456
+
1457
+ #divide each state's actions into sr and goto tables
1458
+ #also scan states for the most common sr and goto actions and make them default
1459
+ states.each{|state| state.make_sr_goto_tables @inputs}
1460
+
1461
+
1462
+ # pp states
1463
+ # pp states.size
1464
+
1465
+ generate_c $stdout
1466
+ return self
1467
+ ensure
1468
+ remove_instance_variable :@generating_parse_tables rescue nil
1469
+ Thread.current[:$RedParse_parser]=oldparser
1470
+ end
1471
+
1472
+ def ultimate_goal_nodes
1473
+ result=[]
1474
+ all_rules.each{|rule|
1475
+ if rule.patterns.size==0 and
1476
+ rule.patterns.first==StartToken and
1477
+ rule.patterns.last==EoiToken
1478
+ result << juice(rule.patterns[1])
1479
+ end
1480
+ }
1481
+ result.flatten!
1482
+ return result
1483
+ end
1484
+
1485
+
1486
+ # def start_state
1487
+ # goal=ultimate_goal_nodes
1488
+ # result=all_rules.select{|rule|
1489
+ # rt=rule.reduces_to and
1490
+ # !goal.select{|node| node>=rt}.empty?
1491
+ # }
1492
+ # result.map!{|rule| DottedRule.create(rule,0,parser)}
1493
+ #
1494
+ # result=ParserState.new result
1495
+ # result.name="start_state"
1496
+ # result
1497
+ # end
1498
+
1499
+ def new_state(drs,unruly_also=false)
1500
+ result=ParserState.new drs,@states.size
1501
+ result.perhaps_also_allow all_rules,self
1502
+ cache=@states[result]
1503
+ return cache if cache
1504
+ @states[result]=@states.size
1505
+ return result
1506
+ end
1507
+
1508
+ def initial_state
1509
+ @states={}
1510
+ all_initial_dotted_rules #is this still needed?
1511
+ result=new_state all_rules.map{|r| DottedRule.create(r,0,self)}
1512
+ result.name="initial"
1513
+ #result.perhaps_also_allow all_rules,self #silly here
1514
+ result
1515
+ end
1516
+
1517
+ attr_reader :states
1518
+
1519
+ def start_state
1520
+ seenlist = {}
1521
+ seenlist.default=:dunno_yet
1522
+ result=initial_state.evolve StartToken.new, self,seenlist
1523
+ result.perhaps_also_allow all_rules,self
1524
+ result.name="start"
1525
+ result
1526
+ #pp [:initial_seenlist, seenlist]
1527
+ #ensure p :/
1528
+ end
1529
+
1530
+ module NamedConstant
1531
+ attr_accessor :constant_name
1532
+ def inspect; constant_name end
1533
+ end
1534
+ def self.inspect_constant_names
1535
+ constants.each{|kn|
1536
+ k=const_get(kn)
1537
+ next if Class|Module|Numeric|Symbol|true|false|nil===k
1538
+ k.extend NamedConstant
1539
+ k.constant_name=kn
1540
+ }
1541
+ end
1542
+
1543
+ def undumpables
1544
+ return @undumpables if @undumpables
1545
+ @rules||=expanded_RULES
1546
+ n=-1
1547
+ @undumpables={}
1548
+ abortable_graphwalk(@rules){|cntr,o,i,ty|
1549
+ !case o
1550
+ when StackMonkey
1551
+ @undumpables[o.name]=o
1552
+ when Reg::Deferred
1553
+ @undumpables[n+=1]=o
1554
+ class<<o
1555
+ attr_accessor :undump_key
1556
+ end
1557
+ o.undump_key=n
1558
+ end
1559
+ }
1560
+ end
1561
+
1562
+ class ::Proc #hack hack hack
1563
+ #only define hacky _dump if one isn't defined already
1564
+ unless Proc.new{}.respond_to? :_dump or
1565
+ Proc.new{}.respond_to? :marshal_dump or
1566
+ (Marshal.dump(proc{}) rescue false)
1567
+ def _dump depth
1568
+ undump_key.to_s
1569
+ end
1570
+ def self._load str
1571
+ Thread.current[:$RedParse_parser].undumpables[str.to_i]
1572
+ end
1573
+ end
1574
+ end
1575
+
1576
+ =begin disabled, uses too much memory!!
1577
+ class MarshalProxy
1578
+ def initialize(key)
1579
+ @key=key
1580
+ end
1581
+ attr :key
1582
+ end
1583
+
1584
+ #convert unmarshalables, such as stackmonkeys into proxies
1585
+ def proxify
1586
+ n=-1
1587
+ seen={}
1588
+ mkproxy=proc{|cntr,o,i,ty,useit|
1589
+ case o
1590
+ when StackMonkey
1591
+ useit[0]=true
1592
+ seen[o.__id__]||=MarshalProxy.new(o.name)
1593
+ when Reg::Deferred
1594
+ useit[0]=true
1595
+ seen[o.__id__]||=MarshalProxy.new(n+=1)
1596
+ end
1597
+ }
1598
+ Ron::GraphWalk.graphmodify!(@rules,&mkproxy)
1599
+ Ron::GraphWalk.graphmodify!(self,&mkproxy)
1600
+
1601
+ end
1602
+
1603
+ def _dump depth
1604
+ fail unless @rules
1605
+ proxify
1606
+ ivs=instance_variables
1607
+ a=ivs+ivs.reverse.map{|var| instance_variable_get var }
1608
+ result=Marshal.dump(a,depth)
1609
+ unproxify
1610
+ return result
1611
+ end
1612
+
1613
+ #convert marshal proxies back to the real thing
1614
+ def unproxify
1615
+ #build a lookup table for unmarshalables by walking @rules
1616
+ @rules||=expanded_RULES
1617
+ n=-1;lookup={}
1618
+ Ron::GraphWalk.graphwalk(@rules){|cntr,o,i,ty|
1619
+ case o
1620
+ when StackMonkey
1621
+ lookup[o.name]=o
1622
+ when Reg::Deferred
1623
+ lookup[n+=1]=o
1624
+ end
1625
+ }
1626
+
1627
+ Ron::GraphWalk.graphmodify!(self){|cntr,o,i,ty,useit|
1628
+ if MarshalProxy===o
1629
+ useit[0]=true
1630
+ lookup[o.key]
1631
+ end
1632
+ }
1633
+ end
1634
+
1635
+ def self._load(str,*more)
1636
+ result=allocate
1637
+ a=Marshal.load(str,*more)
1638
+
1639
+ result.unproxify
1640
+
1641
+ (0...a.size/2).each{|i| result.instance_variable_set a[i],a[-i] }
1642
+ return result
1643
+ end
1644
+ =end
1645
+
1646
+ end
1647
+
1648
+