redparse 0.8.3 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,172 @@
1
+ require 'digest/sha2'
2
+ class RedParse
3
+ class Cache
4
+ def initialize *params
5
+ @callersfile=Digest::SHA2.hexdigest params.join(',')
6
+ @homedir=find_home+"/.redparse/"
7
+ Dir.mkdir @homedir unless File.exist? @homedir
8
+ Dir.mkdir cachedir unless File.exist? cachedir
9
+ saved_digest= File.open(@homedir+"/parserdigest","rb"){|fd| fd.read.chomp } if File.exist?(@homedir+"/parserdigest")
10
+ actual_digest= @@saved_parser_digest ||= redparse_rb_hexdigest
11
+ if saved_digest!=actual_digest
12
+ File.unlink(*all_entry_files) #flush cache
13
+ File.open(@homedir+"/parserdigest","wb"){|fd| fd.puts actual_digest } #update saved digest
14
+ end
15
+ retire_old_entries
16
+ end
17
+
18
+ def cachedir
19
+ @homedir+@callersfile+"/"
20
+ end
21
+
22
+ def entry_files
23
+ Dir[cachedir+"*"]
24
+ end
25
+
26
+ def all_entry_files
27
+ Dir[@homedir+"*"].select{|fn|
28
+ File.directory? fn
29
+ }.map{|dirname|
30
+ Dir[dirname+"/*"]
31
+ }.flatten
32
+ end
33
+
34
+ def retire_old_entries
35
+ size=max_size||10_000_000
36
+ files=entry_files
37
+ total=files.inject(0){|sum,fn| sum+File.size(fn) }
38
+ if total>size
39
+ files=files.sort_by{|fn| File::mtime(fn)}
40
+ while total>size
41
+ f=files.shift
42
+ total-=File.size(f)
43
+ File.unlink(f)
44
+ end
45
+ end
46
+ end
47
+
48
+ def redparse_rb_hexdigest
49
+ full_name=nil
50
+ $:.find{|dir| File.exist? full_name=dir+"/redparse.rb"}
51
+ File.open(full_name,"rb"){|fd| hexdigest_of_file fd }
52
+ end
53
+
54
+ def hexdigest_of_file fd
55
+ sha2=Digest::SHA2.new
56
+ fd.rewind
57
+ while chunk=fd.read(4096)
58
+ sha2.update chunk
59
+ end
60
+ fd.rewind
61
+ return sha2.hexdigest
62
+ end
63
+
64
+ def max_size
65
+ File.open(@homedir+"/size"){|fd| fd.read.chomp.to_i } rescue nil
66
+ end
67
+
68
+ ##
69
+ # Finds the user's home directory.
70
+ #--
71
+ # Some comments from the ruby-talk list regarding finding the home
72
+ # directory:
73
+ #
74
+ # I have HOME, USERPROFILE and HOMEDRIVE + HOMEPATH. Ruby seems
75
+ # to be depending on HOME in those code samples. I propose that
76
+ # it should fallback to USERPROFILE and HOMEDRIVE + HOMEPATH (at
77
+ # least on Win32).
78
+ #(originally stolen from rubygems)
79
+ def find_home
80
+ ['HOME', 'USERPROFILE'].each do |homekey|
81
+ return ENV[homekey] if ENV[homekey]
82
+ end
83
+
84
+ if ENV['HOMEDRIVE'] && ENV['HOMEPATH'] then
85
+ return "#{ENV['HOMEDRIVE']}#{ENV['HOMEPATH']}"
86
+ end
87
+
88
+ begin
89
+ File.expand_path("~")
90
+ rescue
91
+ if File::ALT_SEPARATOR then
92
+ "C:/"
93
+ else
94
+ "/"
95
+ end
96
+ end
97
+ end
98
+ private :find_home, :entry_files, :redparse_rb_hexdigest, :retire_old_entries, :max_size, :hexdigest_of_file
99
+
100
+ def hash_of_input input
101
+ if IO===input
102
+ hexdigest_of_file input
103
+ else
104
+ Digest::SHA2.hexdigest input
105
+ end
106
+ end
107
+
108
+ def get input
109
+ hash=hash_of_input input
110
+ cachefile=cachedir+hash
111
+ if File.exist? cachefile
112
+ result=File.open(cachefile,"rb"){|fd|
113
+ line=fd.readline
114
+ fd.rewind
115
+ if /#encoded with Ron\n/i===line
116
+ begin
117
+ require 'ron'
118
+ Ron.load fd.read
119
+ rescue Exception
120
+ return nil
121
+ end
122
+ else
123
+ begin
124
+ Marshal.load fd
125
+ rescue Exception=>e
126
+ warn "#{e.class}: #{e}"
127
+ warn "cache read failed for:\n#{input}"
128
+ return nil
129
+ end
130
+ end
131
+ }
132
+
133
+ begin
134
+ t=Time.now
135
+ File.utime(t,t,cachefile)
136
+ rescue Exception
137
+ File.open(cachefile,"a"){|fd| } #touch cache date
138
+ end
139
+ return result
140
+ end
141
+ rescue EOFError
142
+ return nil
143
+ end
144
+
145
+ def put input,result
146
+ hash=hash_of_input input
147
+ File.open(cachedir+hash, "wb"){|fd|
148
+ begin
149
+ Thread.current["Marshal.ignore_sclass"]=true
150
+ Marshal.dump(result,fd)
151
+ rescue TypeError=>e #dump failed
152
+ File.unlink cachedir+hash
153
+ begin
154
+ require 'ron'
155
+ File.open(cachedir+hash, "wb"){|fd2|
156
+ fd2.write "#encoded with Ron\n"
157
+ fd2.write Ron.dump(result)
158
+ }
159
+ rescue Exception
160
+ return
161
+ end
162
+ ensure
163
+ Thread.current["Marshal.ignore_sclass"]=nil
164
+ end
165
+ }
166
+ rescue Exception=>e #dump failed
167
+ warn "#{e.class}: #{e}"
168
+ warn "cache write failed for:\n#{result.inspect}"
169
+ File.unlink cachedir+hash
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,1648 @@
1
+ =begin
2
+ redparse - a ruby parser written in ruby
3
+ Copyright (C) 2008,2009 Caleb Clausen
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU Lesser General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+
20
+
21
+ require 'forwardable'
22
+
23
+ begin
24
+ require 'rubygems'
25
+ rescue LoadError=>e
26
+ #hope we don't need it
27
+ raise unless /rubygems/===e.message
28
+ end
29
+ require 'rubylexer'
30
+ require 'reg'
31
+ require 'reglookab'
32
+
33
+ require "redparse/node"
34
+ #require "redparse/decisiontree"
35
+ require "redparse/reg_more_sugar"
36
+ require "redparse/generate"
37
+ require "redparse/cache"
38
+
39
+ class RedParse
40
+
41
+ if defined? END_ATTACK
42
+ class RuleSet
43
+ def initialize(rules)
44
+ @rules=rules.reverse
45
+ #rule order must be reversed relative to the usual RedParse rule
46
+ #order... merely so that ffs can work right.
47
+ @maxmask=(1<<@rules.size)-1
48
+ @subclasses_of=child_relations_among(*STACKABLE_CLASSES())
49
+ end
50
+
51
+ def rules2mask(rules)
52
+ mask=0
53
+ @rules.each_with_index{|r,i|
54
+ mask |= 1<<i if rules.include? r
55
+ }
56
+ return mask
57
+ end
58
+
59
+ def mask2rules(mask)
60
+ rules=[]
61
+ @rules.each_with_index{|r,i|
62
+ rules<<r if mask&(1<<i)
63
+ }
64
+ return rules
65
+ end
66
+
67
+ def mask2rules(mask)
68
+ result=[]
69
+ while mask.nonzero?
70
+ result<< @rules[i=ffs(mask)-1]
71
+ mask &= ~(1<<i)
72
+ end
73
+ return result
74
+ end
75
+
76
+ def each_rule(mask=-1)
77
+ @rules.each_with_index{|r,i|
78
+ yield r,i if mask&(1<<i)
79
+ }
80
+ end
81
+
82
+ def each_rule(mask=@maxmask)
83
+ while mask.nonzero?
84
+ yield @rules[i=ffs(mask)-1],i
85
+ mask &= ~(1<<i)
86
+ end
87
+ end
88
+
89
+
90
+ @@FFS_TABLE=[nil]
91
+ 1.upto(8){|n|
92
+ @@FFS_TABLE*=2
93
+ @@FFS_TABLE[@@FFS_TABLE.size/2]=n
94
+ }
95
+ def rb_ffs(mask)
96
+ chunks=0
97
+ until mask.zero?
98
+ result=@@FFS_TABLE[mask&0xFF]
99
+ return result+(chunks<<3) if result
100
+ chunks+=1
101
+ mask>>=8
102
+ end
103
+ return 0
104
+ end
105
+
106
+ begin
107
+ require 'inline'
108
+ inline{|inline|
109
+ inline.prefix '#define _GNU_SOURCE'
110
+ inline.include '"string.h"'
111
+ inline.include '"limits.h"'
112
+ inline.c %{
113
+ unsigned c_ffs(VALUE mask){
114
+ if FIXNUM_P(mask) {
115
+ return ffsl(NUM2UINT(mask));
116
+ } else if(TYPE(mask)==T_BIGNUM) {
117
+ struct RBignum* bn=RBIGNUM(mask);
118
+ int len=bn->len;
119
+ int i;
120
+ unsigned offset=0;
121
+ unsigned result=0;
122
+ for(i=0;i<len;++i){
123
+ /*printf("least:%x\\n", ((BDIGIT*)(bn->digits))[i]);*/
124
+ /*printf("most:%x\\n", ((BDIGIT*)(bn->digits))[len]);*/
125
+ result=ffs(((BDIGIT*)(bn->digits))[i]);
126
+ if (result) break;
127
+ offset+=sizeof(int)*CHAR_BIT;
128
+ }
129
+ if (result==0) return 0;
130
+ return result+offset;
131
+ } else {
132
+ rb_fatal("bad argument to ffs");
133
+ }
134
+ }
135
+ }
136
+ }
137
+ alias ffs c_ffs
138
+ rescue Exception=>e
139
+ warn "error (#{e.class}) while defining inline c ffs()"
140
+ warn "original error: #{e}"
141
+ warn "falling back to ruby version of ffs()"
142
+ alias ffs rb_ffs
143
+
144
+ end
145
+
146
+
147
+
148
+
149
+ #just the left side (the stack/lookahead matchers)
150
+ def LEFT
151
+ @rules.map{|r| r.left.subregs }.flatten
152
+ end
153
+
154
+ #remove lookahead and lookback decoration
155
+ def LEFT_NO_LOOKING
156
+ l=LEFT()
157
+ l.map!{|m|
158
+ case m #
159
+ when Reg::LookAhead,Reg::LookBack; m.subregs[0]
160
+ when Proc; []
161
+ else m #
162
+ end #
163
+ }
164
+ l
165
+ end
166
+
167
+ #all classes mentioned in rules, on left and right sides
168
+ def STACKABLE_CLASSES #
169
+ return @sc_result unless @sc_result.nil?
170
+ @sc_result=false
171
+ l=LEFT_NO_LOOKING()
172
+ l=l.map{|lm| sc_juice lm}.flatten.compact
173
+ r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
174
+ result=l+r
175
+ @sc_result=result.grep(Class).uniq
176
+ fail if @sc_result.empty?
177
+ return @sc_result
178
+ end
179
+
180
+ def juice(m)
181
+ case m #
182
+ when Class;
183
+ return [m] unless @subclasses_of
184
+ result=[m] # and subclasses too
185
+ i=0
186
+ while item=result[i]
187
+ #p item
188
+ result.concat @subclasses_of[item]
189
+ i += 1
190
+ end
191
+ result
192
+ when String,Regexp; juice(RedParse.KW(m))
193
+ when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
194
+ when Reg::Or; m.subregs.map( &method(:juice) )
195
+ when Reg::Not;
196
+ m=m.subregs[0]
197
+ if Class===m or (Reg::Or===m and
198
+ m.subregs.inject{|sum,x| sum && (Class===x) })
199
+ j=juice(m)
200
+ STACKABLE_CLASSES()-j.flatten.compact rescue j
201
+ else
202
+ STACKABLE_CLASSES()
203
+ end
204
+ else STACKABLE_CLASSES()
205
+ end
206
+ end
207
+
208
+ def sc_juice(m)
209
+ case m #
210
+ when Class; [m]
211
+ when String,Regexp; juice(RedParse.KW(m))
212
+ # when String,Regexp; [KeywordToken]
213
+ when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
214
+ when Reg::Or; m.subregs.map( &method(:sc_juice) )
215
+ when Reg::Not; sc_juice(m.subregs[0])
216
+ when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
217
+ else []
218
+ end
219
+ end
220
+
221
+ def LOOKAHEAD_CLASSES rule
222
+ last=rule.left.subregs.last
223
+ return STACKABLE_CLASSES() unless Reg::LookAhead===last
224
+ la= last.subregs[0]
225
+ return juice(la).flatten.compact
226
+ end
227
+ #
228
+ def TOS_CLASSES rule
229
+ i=-1
230
+ mats=rule.left.subregs
231
+ m=mats[i]
232
+ m=mats[i-=1] if Reg::LookAhead===m || Proc===m
233
+ result=[]
234
+ while Reg::Repeat===m and m.times.min.zero?
235
+ result<<juice(m.subregs[0])
236
+ m=mats[i-=1]
237
+ end
238
+ return (result+juice(m)).flatten.compact
239
+ end
240
+
241
+ def [](i)
242
+ @rules[i]
243
+ end
244
+
245
+ end #
246
+ #
247
+ module Reducer
248
+ @@rulesets={}
249
+ @@class_narrowerses={}
250
+ def compile(recompile=false)
251
+ klass=self.class
252
+
253
+ #use cached result if available
254
+ if @@rulesets[klass] and !recompile
255
+ @ruleset=@@rulesets[klass]
256
+ @class_narrowers=@@class_narrowerses[klass]
257
+ return
258
+ end
259
+
260
+ #actual rule compilation
261
+ @ruleset=RuleSet.new @rules
262
+ @class_narrowers=[tos=Hash.new(0),la=Hash.new(0)]
263
+ @ruleset.each_rule{|r,i|
264
+ @ruleset.LOOKAHEAD_CLASSES(r).each{|klass2|
265
+ la[klass2] |= 1<<i
266
+ }
267
+ @ruleset.TOS_CLASSES(r).each{|klass2|
268
+ tos[klass2] |= 1<<i
269
+ }
270
+ }
271
+
272
+ #save result to cache if not too dynamic
273
+ if !recompile
274
+ @@rulesets[klass]=@ruleset
275
+ @@class_narrowerses[klass]=@class_narrowers
276
+ end
277
+ end
278
+
279
+ def new_reduce
280
+ # mask=-1
281
+ # (-1).downto(-@class_narrowers.size){|i|
282
+ # mask &= @class_narrowers[i][@stack[i].class]
283
+ # }
284
+ mask=
285
+ @class_narrowers[-1][@stack[-1].class]&
286
+ @class_narrowers[-2][@stack[-2].class]
287
+ @ruleset.each_rule(mask){|r,i|
288
+ res=evaluate(r) and return res
289
+ }
290
+ return false
291
+ end
292
+ end
293
+ end
294
+
295
+ def map_with_index(list)
296
+ result=[]
297
+ list.each_with_index{|elem,i| result<<yield(elem,i)}
298
+ result
299
+ end
300
+
301
+ def all_rules
302
+ return @all_rules if defined? @all_rules
303
+
304
+ @inputs||=enumerate_exemplars
305
+ @rules=expanded_RULES #force it to be recalculated
306
+ @all_rules = map_with_index(@rules){|r,i| Rule.new r,i}
307
+
308
+ @all_rules.each{|r|
309
+ if StackMonkey===r.action
310
+ r.action.exemplars=@inputs.grep r.action.hint
311
+ end
312
+ }
313
+
314
+ warn "error recovery rules disabled for now; creates too many states and masks errors"
315
+ @all_rules.reject!{|r| r.action==MisparsedNode }
316
+
317
+ #names have to be allocated globally to make sure they don't collide
318
+ names=@all_rules.map{|r|
319
+ if r.action.respond_to? :name
320
+ r.action.name
321
+ else
322
+ r.action.to_s
323
+ end
324
+ }.sort
325
+ dups={}
326
+ names.each_with_index{|name,i|
327
+ dups[name]=0 if name==names[i+1]
328
+ }
329
+ @all_rules.each{|r|
330
+ r.name=
331
+ if r.action.respond_to? :name
332
+ r.action.name.dup
333
+ else
334
+ r.action.to_s
335
+ end
336
+ if dups[r.name]
337
+ count=dups[r.name]+=1
338
+ r.name<<"_#{count}"
339
+ end
340
+ }
341
+ end
342
+
343
+ def all_dotted_rules
344
+ all_rules.map{|rule|
345
+ (0...rule.patterns.size).map{|i|
346
+ DottedRule.create(rule,i,self)
347
+ }
348
+ }.flatten
349
+ end
350
+
351
+ #$OLD_PAA=1
352
+
353
+ def all_initial_dotted_rules
354
+ return @all_initial_dotted_rules if defined? @all_initial_dotted_rules
355
+ @all_initial_dotted_rules=result=
356
+ all_rules.map{|rule| DottedRule.create(rule,0,nil) }
357
+
358
+ p :all_init
359
+
360
+ unless defined? $OLD_PAA
361
+ scanning=result
362
+ provisionals=nil
363
+ while true
364
+ old_provisionals=provisionals
365
+ provisionals={}
366
+ scanning.each{|dr|
367
+ dr.also_allow=dr.compute_also_allow(provisional=[false]) #fill out dr.also_allow
368
+ provisionals[dr]=provisional[0]
369
+ }
370
+ scanning=provisionals.map{|dr,val| dr if val }.compact
371
+ end until provisionals==old_provisionals
372
+ end
373
+ p :all_init_done
374
+
375
+ return result
376
+ end
377
+
378
+ class Rule #original user rules, slightly chewed on
379
+ def initialize(rawrule,priority)
380
+ @priority=priority
381
+ @action=rawrule.right
382
+ @patterns=rawrule.left.subregs.dup
383
+ #remove lookback decoration if any, just note that lb was present
384
+ if Reg::LookBack===@patterns[0]
385
+ @lookback=true
386
+ @patterns[0]=@patterns[0].subregs[0]
387
+ end
388
+
389
+ case @patterns[-1]
390
+ #Symbol is pointless here, methinks.
391
+ when Proc,Symbol; #do nothing
392
+ when Reg::LookAhead; @patterns[-1]=@patterns[-1].subregs[0]
393
+ else @patterns.push Object #add la if none was present
394
+ end
395
+
396
+ #search for looping matchers with minimum >0 and replace them
397
+ #with a number of scalars (== the minimum) followed by a loop with 0 min.
398
+ #search for bare strings or regexps and replace with KW( ) wrapper
399
+ @patterns.each_with_index{|p,i|
400
+ case p
401
+ when String,Regexp; @patterns[i]=RedParse.KW(p)
402
+ when Reg::Repeat
403
+ if p.itemrange.first>0
404
+ @patterns[i,1]=
405
+ *[p.subregs[0]]*p.itemrange.first<< #minimum # as scalars
406
+ p.subregs[0].reg.* #0-based looper
407
+ end
408
+ end
409
+ }
410
+ @drs=[]
411
+ end
412
+
413
+ attr_reader :drs
414
+
415
+ def hash; priority end
416
+ def == other; Rule===other and priority==other.priority end
417
+ alias eql? ==
418
+
419
+ def lookback?; @lookback if defined? @lookback end
420
+
421
+ attr_reader :patterns,:action,:priority
422
+ attr_accessor :name
423
+
424
+ def at(n)
425
+ result=patterns[n]
426
+ result=result.subregs[0] if Reg::Repeat===result
427
+ result
428
+ end
429
+ def optional? n
430
+ p=patterns[n]
431
+ return Reg::Repeat===p && p.itemrange.first.zero?
432
+ end
433
+ def looping? n
434
+ p=patterns[n]
435
+ return false unless Reg::Repeat===p
436
+ return false if p.itemrange.last==1
437
+ fail unless p.itemrange.last.infinite?
438
+ return true
439
+ rescue Exception
440
+ return false
441
+ end
442
+
443
+ def reduces_to
444
+ case @action
445
+ when Class; @action
446
+ when StackMonkey; @action.exemplars
447
+ when :error,:shift,:accept; nil
448
+ else fail "#@action unexpected in reduces_to"
449
+ end
450
+ end
451
+
452
+ def unruly?
453
+ return if action==:accept
454
+ action.class!=Class || lookback?
455
+ end
456
+
457
+ def final_promised_pattern
458
+ case @action
459
+ when DeleteMonkey #delete_monkey
460
+ vector_indexes=(@action.first_changed_index..-1).select{|i| Reg::Repeat===@patterns[i] }
461
+ fail unless vector_indexes.empty?
462
+ result=@patterns.dup
463
+ result.delete_at @action.first_changed_index
464
+ when StackMonkey #stack_monkey
465
+ result=@patterns.dup
466
+ result[@action.first_changed_index..-1]=[@action.hint]
467
+ when Class
468
+ result= [@action,@patterns.last]
469
+ result.unshift @patterns.first if lookback?
470
+ when :accept, :error, :shift
471
+ result=@patterns.dup
472
+ else
473
+ pp @action
474
+ fail
475
+ end
476
+ result[-1]=result[-1].la unless result.empty?
477
+ result
478
+ end
479
+
480
+ def final_promised_rule
481
+ @final_promised_rule ||=
482
+ Rule.new(-final_promised_pattern>>nil,-priority)
483
+ end
484
+ end
485
+
486
+ class DottedRule
487
+ def initialize(rule,pos,parser)
488
+ @rule,@pos=rule,pos
489
+ fail unless (0...rule.patterns.size)===@pos
490
+ # @also_allow= compute_also_allow(parser) if parser unless defined? $OLD_PAA
491
+ end
492
+ def compute_also_allow(parser,provisional=[false])
493
+ parser.all_initial_dotted_rules.map{|dr|
494
+ next if dr==self
495
+ fake_rule=dr.rule.final_promised_rule
496
+ final_more_dr=DottedRule.create(fake_rule,0,nil)
497
+ also=dr.also_allow
498
+ unless also
499
+ provisional[0]||=0
500
+ provisional[0]+=1
501
+ also=[]
502
+ end
503
+ also+[dr] if optionally_combine final_more_dr,parser
504
+ }.flatten.compact.uniq
505
+ end
506
+ attr_reader :rule,:pos
507
+ attr_accessor :also_allow
508
+
509
+ def self.create(rule,pos,parser)
510
+ result=rule.drs[pos] and return result
511
+ result=rule.drs[pos]=DottedRule.new(rule,pos,parser)
512
+ unless defined? $OLD_PAA
513
+ result.also_allow=result.compute_also_allow(parser) if parser
514
+ end
515
+ return result
516
+ end
517
+
518
+ def hash; (@rule.priority<<3)^@pos end
519
+ def == other; DottedRule===other and @pos==other.pos and @rule==other.rule end
520
+ alias eql? ==
521
+
522
+ def name; @rule.name+"@#@pos" end
523
+
524
+ def looping?
525
+ @rule.looping?(@pos)
526
+ end
527
+
528
+ #returns Conditional|Rule|DottedRule|+[DottedRule.+]|nil
529
+ def evolve input, parser, seenlist,result2
530
+ #print "["
531
+ #$stdout.flush
532
+ idname=input.identity_name
533
+ idname=parser.identity_name_alias? idname
534
+ cache=seenlist[[self,idname]]
535
+ unless cache==:dunno_yet
536
+ result2.concat Array(cache).flatten.compact.uniq.sort_by{|x| x.name}
537
+ return cache
538
+ end
539
+ i=pos
540
+ lasti=i-1
541
+ result=[]
542
+ result=loop do #might need multiple tries if optional matcher(s) here
543
+ fail unless i>lasti
544
+ lasti=i
545
+ p=@rule.at(i) #what is current pattern in this dottedrule?
546
+ fail if Proc===p #shouldnt happen anymore
547
+ if parser.pattern_matches_nodes? p
548
+
549
+ #if any dotted rules have nodes at this point,
550
+ #also include the set of rules@0 which
551
+ #can (possibly indirectly) generate that node.
552
+ #(match tokens found on left sides of productions for p)
553
+ seenlist[[self,idname]]=result
554
+ if false
555
+ result.concat recurse_match_drs(parser).uniq.map{|dr|
556
+ dr and
557
+ #begin print "{#{dr.name}"
558
+ dr.evolve input,parser,seenlist,result2
559
+ #ensure print "}" end
560
+ }.flatten.compact.uniq
561
+ end
562
+ end
563
+ @saw_item_that={}
564
+ if p===input
565
+ i+=1 unless @rule.looping?(i)
566
+ fail if i>@rule.patterns.size
567
+
568
+ if !@saw_item_that.empty?
569
+ p(:saw_item_that!)
570
+ fail unless @saw_item_that.size==1
571
+ pair=@saw_item_that.to_a.first
572
+ fail unless p.equal? pair.last
573
+ it=pair.first
574
+ action=
575
+ if i==@rule.patterns.size
576
+ @rule
577
+ else
578
+ DottedRule.create(@rule,i,parser)
579
+ end
580
+ break Conditional.new(it,action)
581
+ end
582
+ @saw_item_that=nil
583
+
584
+ if i == @rule.patterns.size
585
+ break @rule
586
+ else
587
+ break result<<DottedRule.create(@rule,i,parser)
588
+ end
589
+ elsif !@rule.optional?(i)
590
+ break result.empty? ? nil : result
591
+ elsif (i+=1) >= @rule.patterns.size
592
+ break @rule
593
+ #else next p
594
+ end
595
+ end #loop
596
+ seenlist[[self,idname]]=result
597
+ result2.concat Array(result).flatten.compact.uniq.sort_by{|x| x.name}
598
+ return result
599
+ #ensure print "]"
600
+ end
601
+
602
+ #returns +[(DottedRule|nil).*]
603
+ def recurse_match_drs parser, result=nil
604
+ unless result
605
+ table=parser.rmd_cache
606
+ if table
607
+ cache=table[self]
608
+ return cache if cache
609
+ else
610
+ parser.rmd_cache={}
611
+ end
612
+
613
+ result=[]
614
+ end
615
+ #print "("
616
+ #print @rule.name+"@#@pos"
617
+ p=@rule.at(@pos)
618
+
619
+ #find set of nodes that could match here
620
+ nodes_here=parser.exemplars_that_match(p&Node)
621
+
622
+ #find the set of rules that could generate a node in our list
623
+ rrules=parser.all_rules.select{|rule|
624
+ !rule.unruly? and !nodes_here.grep(rule.action).empty?
625
+ }.map{|rule|
626
+ DottedRule.create(rule,0,parser)
627
+ }
628
+
629
+ #if any generating rules match a node in the leftmost pattern,
630
+ #add the rules which can generate _that_ node too.
631
+ result.push self #force self to be excluded from future recursion
632
+ oldsize=result.size
633
+ unless rrules.empty?
634
+ result.concat rrules
635
+
636
+ unless result.respond_to? :index_of
637
+ class<<result
638
+ attr_accessor :index_of
639
+ end
640
+ result.index_of={}
641
+ end
642
+ rio=result.index_of
643
+ oldsize.upto(result.size){|i| rio[result[i]]||=i }
644
+ rrules.each{|rrule|
645
+ i=rio[rrule] or fail #index() inside each() == O(N**2) complexity. this is the slow line.
646
+ #but skip recursion on rules already done at a higher level
647
+ rrule.recurse_match_drs parser,result if i>=oldsize
648
+ }
649
+ end
650
+ result[oldsize-1]=nil #don't actually include self in result
651
+ #result.update_indices oldsize-1, oldsize-1
652
+
653
+ parser.rmd_cache[self]=result
654
+ return result
655
+ #ensure print ")"
656
+ end
657
+
658
+ def optionally_combine weaker,parser
659
+ #lotsa caching needed if this is ever to be performant
660
+ if parser.oc_cache
661
+ result=parser.oc_cache[[self,weaker]]
662
+ return result unless result.nil?
663
+ else
664
+ parser.oc_cache={}
665
+ end
666
+
667
+ other=weaker
668
+ mymatches,myposes= self.outcomes
669
+ matches, poses = other.outcomes
670
+ matches.each_with_index{|match,i|
671
+ mymatches.each_with_index{|mymatch,myi|
672
+ intersect=parser.inputs.grep(match&mymatch)
673
+ unless intersect.empty?
674
+
675
+ #but don't allow matches that would be matched
676
+ #by an earlier (but optional) pattern.
677
+ disallowed=Reg::Or.new(
678
+ *possible_matchers_til(myi)+
679
+ other.possible_matchers_til(i)
680
+ )
681
+ intersect.reject{|x| disallowed===x }
682
+
683
+ if intersect.empty?
684
+ return result=false
685
+ elsif poses[i]>=other.rule.patterns.size
686
+ return result=true #success if weaker rule is at an end
687
+ elsif myposes[myi]>=rule.patterns.size
688
+ return result=false #fail if stronger rule at an end
689
+ else
690
+ p [:**,rule.name,myposes[myi]]
691
+ mynew=DottedRule.create(rule,myposes[myi],parser)
692
+ new=DottedRule.create(other.rule,poses[i],parser)
693
+ return result=mynew.optionally_combine( new,parser )
694
+ end
695
+ end
696
+ }
697
+ }
698
+ return result=false
699
+ ensure
700
+ parser.oc_cache[[self,weaker]]=result
701
+ end
702
+
703
+ def possible_matchers_til i
704
+ (pos...i-1).map{|j|
705
+ m=rule.at(j)
706
+ Reg::Repeat===m ? m.subregs[0] : m
707
+ }
708
+ end
709
+
710
+ def outcomes
711
+ til=@rule.patterns.size
712
+ at=@pos
713
+ result=[[],[]]
714
+ loop do
715
+ m=@rule.patterns[at]
716
+ case m
717
+ when Proc;
718
+ result.first.push Object
719
+ result.last.push at+1
720
+ break
721
+ when Reg::Repeat
722
+ assert @rule.optional?(at)
723
+ to=at
724
+ to+=1 unless @rule.looping? at
725
+ result.first.push m.subregs[0]
726
+ result.last.push to
727
+ else
728
+ result.first.push m
729
+ result.last.push at+1
730
+ break
731
+ end
732
+ at+=1
733
+ break if at>=til
734
+ end
735
+ return result
736
+ end
737
+
738
+ end
739
+
740
+ attr_accessor :rmd_cache
741
+ attr_accessor :oc_cache
742
+ attr_accessor :sl2ms_cache
743
+
744
+ class Conditional
745
+ def initialize(condition,action)
746
+ @condition,@action=condition,action
747
+ @condition.restore :hash,:==
748
+ end
749
+ attr_reader :condition,:action
750
+
751
+ def hash
752
+ @condition.hash^@action.hash
753
+ end
754
+ def == other
755
+ Conditional===other and @condition==other.condition and @action==other.action
756
+ end
757
+ alias eql? ==
758
+
759
+ def name; @condition.inspect+"?"+@action.name end
760
+
761
+ def priority; @action.priority end
762
+ end
763
+
764
+ class ParserState; end
765
+ class MultiShift; end
766
+ class MultiReduce; end
767
+
768
+ ACTION_PATTERN=ParserState|Rule|MultiShift|MultiReduce|:accept|:error
769
+ class ParserState #a union of dotted rules
770
+ def initialize(dotteds,index)
771
+ fail if dotteds.empty? #error state
772
+ fail unless dotteds.grep(nil).empty?
773
+ @dotteds=dotteds
774
+ @index=index
775
+ sort_substates!
776
+ @actions={} #key is an input, value is ParserState|Rule|MultiShift|MultiReduce|:accept|:error
777
+ end
778
+
779
+ attr_reader :actions
780
+
781
+ def [](k)
782
+ result=@actions[k]
783
+ assert ACTION_PATTERN===result
784
+ result
785
+ end
786
+ def []=(k,v)
787
+ assert ACTION_PATTERN===v
788
+ @actions[k]=v
789
+ end
790
+
791
+ def sort_substates!
792
+ @dotteds=@dotteds.sort_by{|dotted| -dotted.pos}.uniq
793
+ end
794
+ attr :dotteds
795
+
796
+ def dup
797
+ result=super
798
+ result.instance_variable_set(:@dotteds,@dotteds.dup)
799
+ return result
800
+ end
801
+
802
+ def substates; [self] end
803
+
804
+ def shiftlist2multishift? shiftlist,parser
805
+ return :error if shiftlist.empty?
806
+ parser.sl2ms_cache||={}
807
+ cache=parser.sl2ms_cache[shiftlist]
808
+ return cache if cache
809
+ fixed,varying=shiftlist.partition{|res| DottedRule===res}
810
+ result=ParserState.new(fixed,nil)
811
+ result.perhaps_also_allow parser.all_rules,parser
812
+ unless varying.empty? #MultiShift
813
+ varying.map!{|v| [v.condition,v.action]}.flatten
814
+ result=MultiShift.new(result,varying)
815
+ end
816
+ parser.sl2ms_cache[shiftlist]=result
817
+ return result
818
+ end
819
+
820
+ #given a list of rules, see if any of them are compatible with
821
+ #a current substate. (compatibility means the aggregate patterns
822
+ #can be anded together and still be able to conceivably match something.)
823
+ #if any of morerules are actually compatible, add it to current state.
824
+ def perhaps_also_allow(morerules,parser)
825
+ fail unless morerules==parser.all_rules
826
+ @dotteds.concat @dotteds.map{|d| d.also_allow }.flatten.compact.uniq
827
+ sort_substates!
828
+ end
829
+ def old_perhaps_also_allow(morerules,parser)
830
+ morerules=morerules.dup
831
+ need_sort=false
832
+ scan_rules=@dotteds
833
+ added={}
834
+ while true
835
+ adding=[]
836
+ morerules.each{|morerule|
837
+ next if added[morerule]
838
+ fake_rule=morerule.final_promised_rule
839
+ final_more_dr=DottedRule.create(fake_rule,0,parser)
840
+ scan_rules.each{|dotted|
841
+ if dotted.optionally_combine final_more_dr,parser
842
+ adding<<DottedRule.create(morerule,0,parser)
843
+ added[morerule]=1
844
+ break
845
+ end
846
+ }
847
+ }
848
+ break if adding.empty?
849
+ @dotteds.concat adding
850
+ need_sort=true
851
+ scan_rules=adding
852
+ end
853
+ sort_substates! if need_sort
854
+ end
855
+ alias perhaps_also_allow old_perhaps_also_allow if defined? $OLD_PAA
856
+
857
+
858
+ #returns ParserState|MultiShift|MultiReduce|Rule|:accept|:error
859
+ def evolve input,parser,seenlist
860
+ result2=[]
861
+ @dotteds.each{|dotted|
862
+ dotted.evolve input,parser,seenlist,result2
863
+ }
864
+
865
+ result=
866
+ #seenlist.values.flatten.compact.uniq.sort_by{|x| x.name}
867
+ result2=result2.uniq.compact.sort_by{|x| x.name}
868
+ #pp [result,result2].map{|x| x.map{|res| DottedRule===res ? res.name : res }}
869
+ #pp result2.map{|res| DottedRule===res ? res.name : res }
870
+ # result==result2 or fail
871
+
872
+ return result=:error if result.empty?
873
+
874
+
875
+ #ok, who wants to shift and who wants to reduce?
876
+ shiftlist,reducelist=result.partition{|res|
877
+ DottedRule===res or
878
+ Conditional===res && DottedRule===res.action
879
+ }
880
+
881
+ #if no reducers at all, just try (multi?)shift
882
+ return result=shiftlist2multishift?( shiftlist,parser )if reducelist.empty?
883
+
884
+ #line up reducers by priority
885
+ actions=reducelist \
886
+ .sort_by{|rule| -rule.priority }
887
+ # .map{|rule| rule.action }
888
+ #actions is +[(Rule|Conditional[Rule]).*]
889
+ action=actions.shift #this first (unless conditional)
890
+ #action is Rule|Conditional[Rule]
891
+ result=
892
+ case action.action
893
+ when :error; return :error
894
+ when Class, StackMonkey
895
+ action
896
+ when :accept
897
+ :accept
898
+ when :shift #this counts as a reduce at this point, but it writes shift instructions
899
+ shiftlist2multishift? shiftlist,parser
900
+ when Rule #oy, vey, was a Conditional
901
+ shiftaction=shiftlist2multishift?(shiftlist,parser)
902
+ fail unless Rule===action.action
903
+ case action.action.action
904
+ when :error; huh
905
+ when :shift, StackMonkey, :accept, Class #MultiReduce
906
+ first_fixed_index=actions.size
907
+ #actions is +[(Rule|Conditional[Rule]).*]
908
+ actions.each_with_index{|act,i|
909
+ break first_fixed_index=i unless Conditional===act
910
+ }
911
+ condactions=actions[0...first_fixed_index].unshift(action)
912
+ condactions=condactions.inject([]){|sum,cond|
913
+ act=cond.action
914
+ act=shiftaction if act==:shift #=>shiftlist?
915
+ sum.push cond.condition, act
916
+ }
917
+ #possible optimization: one or more :shift right at end could be ignored
918
+ if actions[first_fixed_index]
919
+ action=actions[first_fixed_index].action
920
+ else
921
+ action=shiftaction
922
+ end
923
+ MultiReduce.new condactions,action #=>shiftlist?
924
+ else fail
925
+ end
926
+ else fail "#{action} not expected here"
927
+ end
928
+ #stack monkeys/:accept are treated like reduce here
929
+ ensure
930
+ assert ACTION_PATTERN===result
931
+ end
932
+
933
+ def name
934
+ @name||@dotteds.map{|dotted| dotted.name}.join(",")
935
+ end
936
+ attr_writer :name
937
+
938
+ def rename(name2count)
939
+ return @name if defined? @name
940
+ name=most_prominent_members.map{|dotted| dotted.name}.join(",")
941
+ if name2count[name]
942
+ name2count[name]+=1
943
+ name+="___"+name2count[name].to_s
944
+ else
945
+ name2count[name]=1
946
+ end
947
+
948
+ @name=name
949
+ end
950
+
951
+ def most_prominent_members
952
+ result=@dotteds.select{|dr| dr.pos==@dotteds.first.pos }
953
+ close2end=@dotteds.map{|dr| [dr,dr.rule.patterns.size-dr.pos]}.sort_by{|(o,k)| -k}
954
+ result+=close2end.select{|(dr,k)| k==close2end.first.last}.map{|(dr,k)| dr}
955
+ result2=result.reject{|dr| dr.pos==0 or dr.pos==1&&dr.rule.lookback?}
956
+ result=result2 unless result2.empty?
957
+ return result
958
+ end
959
+
960
+ def hash
961
+ -@dotteds.hash
962
+ end
963
+ def == other
964
+ ParserState===other and
965
+ @dotteds==other.dotteds
966
+ end
967
+ alias eql? ==
968
+
969
+ def looping?
970
+ @dotteds.any?{|dotted| dotted.looping? }
971
+ end
972
+
973
+ def transition_to_loop? input #not used
974
+ action=@actions.input
975
+ case action
976
+ when :error; false
977
+ when ParserState; action.looping? and action!=self
978
+ when MultiShift,MultiReduce;
979
+ action.transition_to_loop? input
980
+ else fail
981
+ end
982
+ end
983
+
984
+ def make_sr_goto_tables inputs
985
+ name2exemplar={}
986
+ inputs.each{|i| name2exemplar[i.name]=i }
987
+
988
+ @goto={}; @sr={}
989
+ goto_counts=Hash.new(0); sr_counts=Hash.new(0)
990
+ actions.each_pair{|k,v|
991
+ if Node===name2exemplar[k]
992
+ @goto[k]=v
993
+ goto_counts[v]+=1
994
+ else
995
+ assert(Token===name2exemplar[k])
996
+ @sr[k]=v
997
+ sr_counts[v]+=1
998
+ end
999
+ }
1000
+ dflt=goto_counts.sort_by{|v,c| c}.last[0]
1001
+ @goto.delete_if{|k,v| v==dflt }
1002
+ @goto.default=dflt
1003
+
1004
+ dflt=sr_counts.sort_by{|v,c| c}.last[0]
1005
+ @sr.delete_if{|k,v| v==dflt }
1006
+ @sr.default=dflt
1007
+
1008
+ @actions=nil
1009
+ end
1010
+
1011
+ end
1012
+
1013
+ class MultiReduce
1014
+ def initialize(list,default)
1015
+ @list,@default=list,default
1016
+ #default can be any valid action (except another MultiReduce)
1017
+ end
1018
+
1019
+ attr_reader :list,:default
1020
+
1021
+ def act(x)
1022
+ (0...@list.size).step(2){|i|
1023
+ return @list[i+1] if @list[i]===x
1024
+ }
1025
+ return default
1026
+ end
1027
+
1028
+ def substates
1029
+ if @default.respond_to? :substates
1030
+ @default.substates
1031
+ else
1032
+ []
1033
+ end
1034
+ end
1035
+
1036
+ def actions
1037
+ result=[]
1038
+ (1...@list.size).step(2){|i|
1039
+ result << @list[i]
1040
+ }
1041
+ if @default.respond_to? :actions
1042
+ result.concat @default.actions
1043
+ elsif @default
1044
+ result<<@default
1045
+ end
1046
+ result
1047
+ end
1048
+
1049
+ def transition_to_loop? input #not used
1050
+ @default.transition_to_loop? input
1051
+ end
1052
+
1053
+ def hash
1054
+ @list.hash^~@default.hash
1055
+ end
1056
+
1057
+ def == other
1058
+ @list==other.list and @default==other.default
1059
+ end
1060
+ alias eql? ==
1061
+ end
1062
+
1063
+ class MultiShift
1064
+ def initialize(base,modifiers)
1065
+ @base,@modifiers=base,modifiers
1066
+ @map=
1067
+ (0...2**(modifiers.size/2)).map{|i| base.dup}
1068
+ @map.each_with_index{|state,i| #for each branch to the multishift
1069
+ (0...modifiers.size).step(2){|j| #for each predicate in the multishift
1070
+ if (i&(1<<j)).non_zero? #if the predicate tests true in this branch
1071
+ state.append modifiers[j+1] #add the predicates modifier to the state
1072
+ end
1073
+ }
1074
+ state.sort_substates!
1075
+ }
1076
+ end
1077
+
1078
+ def act(x)
1079
+ result=0
1080
+ (0...@modifiers.size).step(2){|i|
1081
+ result|=(1<<(i/2)) if @modifiers[i]===x
1082
+ }
1083
+ @map[result]
1084
+ end
1085
+
1086
+ attr_reader :map, :modifiers
1087
+
1088
+ def substates
1089
+ @map.dup
1090
+ end
1091
+
1092
+ def actions
1093
+ @map.dup
1094
+ end
1095
+
1096
+ def transition_to_loop? input #not used
1097
+ huh
1098
+ end
1099
+
1100
+ def hash
1101
+ huh
1102
+ end
1103
+ def == other
1104
+ huh
1105
+ end
1106
+ alias eql? ==
1107
+ end
1108
+
1109
+ #an action is one of:
1110
+ #a ParserState (shift)
1111
+ #a Rule (reduce)
1112
+ #nil (error)
1113
+ #:accept
1114
+ #MultiReduce
1115
+ #MultiShift
1116
+
1117
+ #just the left side (the stack/lookahead matchers)
1118
+ def LEFT
1119
+ # require 'md5'
1120
+ @rules=expanded_RULES()
1121
+ # p MD5.new(@rules).to_s
1122
+ @rules.map{|r| r.left.subregs }.flatten
1123
+ end
1124
+
1125
+ #remove lookahead and lookback decoration (not used?)
1126
+ def LEFT_NO_LOOKING
1127
+ l=LEFT()
1128
+ l.map!{|m|
1129
+ case m #
1130
+ when Reg::LookAhead,Reg::LookBack; fail #should be gone already now
1131
+ when Proc; []
1132
+ else m #
1133
+ end #
1134
+ }
1135
+ l
1136
+ end
1137
+
1138
+ def child_relations_among(*classes)
1139
+ classes.unshift Object
1140
+ result={}
1141
+ classes.each{|klass| result[klass]=[] }
1142
+
1143
+ #p classes
1144
+ classes.each{|klass|
1145
+ anclist=klass.ancestors
1146
+ anclist.shift==klass or fail
1147
+ anclist.each{|anc|
1148
+ if anc=result[anc]
1149
+ anc << klass
1150
+ break
1151
+ end
1152
+ }
1153
+ }
1154
+
1155
+ return result
1156
+ end
1157
+
1158
+ #all classes mentioned in rules, on left and right sides
1159
+ def STACKABLE_CLASSES #
1160
+ return @sc_result if defined? @sc_result
1161
+ @sc_result=[]
1162
+ @subclasses_of=child_relations_among(*vertices)
1163
+ # @sc_result=false
1164
+ l=LEFT()
1165
+ l=l.map{|lm| sc_juice lm}.flatten.compact
1166
+ assert l.grep(nil).empty?
1167
+ r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
1168
+ result=l+r
1169
+ @subclasses_of=nil
1170
+ @sc_result.replace result.grep(Class).uniq
1171
+ fail if @sc_result.empty?
1172
+ return @sc_result
1173
+ end
1174
+
1175
+ # def juice(m)
1176
+ # case m #
1177
+ # when Class
1178
+ # return [m] unless @subclasses_of
1179
+ # result=[m] # and subclasses too
1180
+ # i=0
1181
+ # while item=result[i]
1182
+ # p item
1183
+ # result.concat @subclasses_of[item] rescue nil
1184
+ # i += 1
1185
+ # end
1186
+ # result
1187
+ # when String,Regexp; juice(RedParse.KW(m))
1188
+ # when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
1189
+ # when Reg::Or; m.subregs.map &method(:juice)
1190
+ # when Reg::Not
1191
+ # m=m.subregs[0]
1192
+ # if Class===m or (Reg::Or===m and
1193
+ # m.subregs.find{|x| Class===x })
1194
+ # juice(m)
1195
+ # else []
1196
+ # end
1197
+ # else []
1198
+ # end
1199
+ # end
1200
+
1201
+ def sc_juice(m)
1202
+ case m #
1203
+ when Class; [m]
1204
+ when String,Regexp; [KeywordToken]
1205
+ when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
1206
+ when Reg::Or; m.subregs.map(&method(:sc_juice))
1207
+ when Reg::Not; sc_juice(m.subregs[0])
1208
+ when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
1209
+ when Reg::Repeat; sc_juice(m.subregs[0])
1210
+ else []
1211
+ end
1212
+ end
1213
+
1214
+ def unruly_rules
1215
+ return @unruly_rules if defined? @unruly_rules
1216
+
1217
+ @unruly_rules=
1218
+ all_rules.select{|rule| rule.unruly? }
1219
+
1220
+ p :unruly_rules
1221
+ pp @unruly_rules.map{|r| r.name}
1222
+
1223
+ @unruly_rules
1224
+ end
1225
+
1226
+ def enumerate_exemplars
1227
+ return @@exemplars if defined? @@exemplars #dunno why this is necessary
1228
+
1229
+ result= STACKABLE_CLASSES() \
1230
+ .map{|sc| sc.enumerate_exemplars } \
1231
+ .inject{|sum,sc| sum+sc}
1232
+
1233
+ result.map!{|sc|
1234
+ res=sc.shift.allocate
1235
+ until sc.empty?
1236
+ eval "def res.#{sc.shift}; #{sc.shift.inspect} end"
1237
+ end
1238
+ def res.to_s; identity_name end
1239
+ res
1240
+ }
1241
+
1242
+ return @@exemplars=result
1243
+ end
1244
+
1245
+ def check_for_parsealike_inputs
1246
+ all_patterns=all_rules.map{|r| r.patterns.map{|rp| Reg::Repeat===rp and rp=rp.subregs[0]; rp }}.flatten.uniq
1247
+ seen={}
1248
+ @identity_name_aliases={}
1249
+ warn "why are non_empty and after_equals params to BeginNode appearently ignored?"
1250
+ warn "some token identities overlap themselves?!?"
1251
+ warn "some overlaps are duplicated"
1252
+ warn ". and :: overlap => ..... surely that's not right"
1253
+ @inputs.map{|input|
1254
+ profile=all_patterns.map{|pat| Proc===pat ? pat : !!(pat===input)}
1255
+ if seen[profile]
1256
+ puts "#{input} overlaps #{seen[profile]}"
1257
+ @identity_name_aliases[seen[profile]]=input
1258
+ nil
1259
+ else
1260
+ seen[profile]=input
1261
+ end
1262
+ }.compact
1263
+ end
1264
+
1265
+ def enumerate_states
1266
+ inputs=check_for_parsealike_inputs
1267
+ inputs.reject!{|x| StartToken===x}
1268
+
1269
+ result=[]
1270
+ todo=[start_state]
1271
+
1272
+ seenlist = {}
1273
+ seenlist.default=:dunno_yet
1274
+
1275
+ j=0
1276
+ start=was=Time.now
1277
+ in_result={} #this should go away; obsoleted by @states
1278
+ state_num=-1
1279
+ todo.each{|st| in_result[st]=(state_num+=1) }
1280
+ ps=todo.first
1281
+ pp [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
1282
+ old_todo_size=todo.size
1283
+ while state=todo.shift
1284
+ result<<state
1285
+
1286
+ i=0
1287
+ inputs.each {|input|
1288
+ newstate=state.evolve input,self,seenlist
1289
+ assert ACTION_PATTERN===newstate
1290
+ #newstate is ParserState|MultiShift|MultiReduce|Rule|:accept|:error
1291
+ state[input.identity_name]=newstate
1292
+ next unless newstate.respond_to? :substates
1293
+ #newstate.substates is just [newstate] for plain ParserStates
1294
+ morestates=newstate.substates.reject{|x| in_result[x]}
1295
+ morestates.each{|st| in_result[st]=(state_num+=1) }
1296
+ # p [in_result[state],:+,input.identity_name,:>>,pretty(newstate,in_result)]
1297
+ todo.concat morestates
1298
+
1299
+ # pp morestates.map{|ps|
1300
+ # [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
1301
+ # }
1302
+ # pp pretty(newstate,in_result) unless ParserState===newstate
1303
+ }
1304
+
1305
+ now=Time.now
1306
+ p [:*,j+=1,todo.size,todo.size-old_todo_size,now-was,j/(now-start),(100.0*j/(j+todo.size)).to_i]
1307
+ old_todo_size=todo.size
1308
+ was=now
1309
+
1310
+ # if state.actions.values.uniq==[:error]
1311
+ #this can happen when the only dotted rule is for an :error
1312
+ #maybe this case can be optimized?
1313
+ # end
1314
+ end
1315
+ self.rmd_cache=nil
1316
+ self.oc_cache=nil
1317
+ self.sl2ms_cache=nil
1318
+ return result
1319
+ end
1320
+
1321
+ def pretty(x,in_result)
1322
+ case x
1323
+ when ParserState; in_result[x]
1324
+ when MultiReduce
1325
+ pairs=x.list.dup
1326
+ result=[]
1327
+ until pairs.empty?
1328
+ cond,act,*pairs=*pairs
1329
+ cond = cond.inspect
1330
+ result<<[cond,pretty(act.action,in_result)]
1331
+ end
1332
+ result<<pretty(x.default,in_result)
1333
+ result.unshift :MultiReduce
1334
+ when MultiShift
1335
+ h={}
1336
+ mods=x.modifiers
1337
+ its=[]
1338
+ (0...mods.size).step(2){|i| its<<mods[i] }
1339
+ x.map.each_with_index{|xx,i| h[i]=pretty(xx) }
1340
+ [:MultiShift, its,h]
1341
+ when Class; x.name
1342
+ when StackMonkey; x.name
1343
+ when :accept,:error; x
1344
+ else fail "not a valid action: #{x}"
1345
+ end
1346
+ end
1347
+
1348
+ attr_accessor :inputs
1349
+
1350
+ def all_states
1351
+ return @all_states if defined? @all_states
1352
+ @all_states=enumerate_states
1353
+ end
1354
+
1355
+ def exemplars_that_match p
1356
+ @inputs.grep p
1357
+ end
1358
+
1359
+ def pattern_matches_nodes? p
1360
+ !@inputs.grep(Node&p).empty?
1361
+ end
1362
+
1363
+ def pattern_matches_tokens? p
1364
+ !@inputs.grep(Token&p).empty?
1365
+ end
1366
+
1367
+ def identity_name_alias? name
1368
+ alias_=@identity_name_aliases[name]
1369
+ return( alias_||name )
1370
+ end
1371
+
1372
+ def compile
1373
+ oldparser=Thread.current[:$RedParse_parser]
1374
+ Thread.current[:$RedParse_parser]||=self
1375
+
1376
+ if File.exist?("cached_parse_tables.drb")
1377
+ dup=Marshal.load(f=open("cached_parse_tables.drb","rb"))
1378
+ instance_variables.each{|var| remove_instance_variable var }
1379
+ extend SingleForwardable
1380
+ def_singleton_delegators(dup,public_methods+private_methods+protected_methods)
1381
+
1382
+ self.inputs=enumerate_exemplars
1383
+ else
1384
+ @generating_parse_tables=true
1385
+ @inputs||=enumerate_exemplars
1386
+
1387
+ states=all_states
1388
+ # @rules=expanded_RULES
1389
+ @inputs=nil #Marshal no like it
1390
+
1391
+ begin
1392
+ p :dumping
1393
+ Marshal.dump(self,f=open("cached_parse_tables.drb","wb"))
1394
+ p :dump_done!
1395
+ rescue Exception
1396
+ p :dump_failed
1397
+ File.unlink "cached_parse_tables.drb"
1398
+ ensure
1399
+ @inputs=enumerate_exemplars
1400
+ end
1401
+ end
1402
+ f.close
1403
+
1404
+ #look for unused dotted rules and actions
1405
+ #also states with drs past the end
1406
+ past_end=0
1407
+ drs=all_dotted_rules
1408
+ dr_count=Hash.new(0)
1409
+ acts=all_rules#.map{|r| r.action }.uniq
1410
+ act_count=Hash.new(0)
1411
+ states.each{|state|
1412
+ state.dotteds.each{|dr|
1413
+ dr_count[dr]+=1
1414
+ past_end+=1 if dr.pos>=dr.rule.patterns.size
1415
+ }
1416
+ sav=state.actions.values
1417
+ sav.grep(Class|StackMonkey).each{|act| act_count[act.__id__]+=1 }
1418
+ sav.grep(MultiReduce|MultiShift).each{|multi| multi.actions.each{|act| act_count[act.__id__]+=1} }
1419
+ #p state.name if state.dotteds.select{|dr| dr.rule.action==BeginNode}
1420
+ }
1421
+ puts "#{past_end} dotted rules found past the end of their rule" if past_end>0
1422
+ nevers=0
1423
+ drs.each{|dr|
1424
+ next unless dr_count[dr].zero?
1425
+ puts "never reached #{dr.name}"
1426
+ nevers+=1
1427
+ }
1428
+ puts "#{nevers} dotted rules were never reached (out of #{drs.size})"
1429
+ nevers=0
1430
+ acts.each{|act|
1431
+ next unless act_count[act.__id__].zero?
1432
+ puts "never reached #{act.name rescue act}"
1433
+ nevers+=1
1434
+ }
1435
+ puts "#{nevers} actions were never reached (out of #{acts.size})"
1436
+ p :most_popular_nontrivial_drs
1437
+ pp dr_count.reject{|(dr,n)| dr.pos.zero? or dr.pos==1 && dr.rule.lookback?} \
1438
+ .sort_by{|(dr,n)| n}[-15..-1].map{|(dr,n)| [dr.name,n] }
1439
+
1440
+ #look for duplicate states
1441
+ actions2state={}
1442
+ dup_states=0
1443
+ states.each{|st|
1444
+ cache=actions2state[st.actions]
1445
+ if cache
1446
+ st.equivalent_to=cache
1447
+ dup_states+=1
1448
+ else
1449
+ actions2state[st.actions]=st
1450
+ end
1451
+ }
1452
+ puts "#{dup_states} duplicate states" if dup_states.nonzero?
1453
+
1454
+ name2count={}
1455
+ states.each{|state| state.rename(name2count) }
1456
+
1457
+ #divide each state's actions into sr and goto tables
1458
+ #also scan states for the most common sr and goto actions and make them default
1459
+ states.each{|state| state.make_sr_goto_tables @inputs}
1460
+
1461
+
1462
+ # pp states
1463
+ # pp states.size
1464
+
1465
+ generate_c $stdout
1466
+ return self
1467
+ ensure
1468
+ remove_instance_variable :@generating_parse_tables rescue nil
1469
+ Thread.current[:$RedParse_parser]=oldparser
1470
+ end
1471
+
1472
+ def ultimate_goal_nodes
1473
+ result=[]
1474
+ all_rules.each{|rule|
1475
+ if rule.patterns.size==0 and
1476
+ rule.patterns.first==StartToken and
1477
+ rule.patterns.last==EoiToken
1478
+ result << juice(rule.patterns[1])
1479
+ end
1480
+ }
1481
+ result.flatten!
1482
+ return result
1483
+ end
1484
+
1485
+
1486
+ # def start_state
1487
+ # goal=ultimate_goal_nodes
1488
+ # result=all_rules.select{|rule|
1489
+ # rt=rule.reduces_to and
1490
+ # !goal.select{|node| node>=rt}.empty?
1491
+ # }
1492
+ # result.map!{|rule| DottedRule.create(rule,0,parser)}
1493
+ #
1494
+ # result=ParserState.new result
1495
+ # result.name="start_state"
1496
+ # result
1497
+ # end
1498
+
1499
+ def new_state(drs,unruly_also=false)
1500
+ result=ParserState.new drs,@states.size
1501
+ result.perhaps_also_allow all_rules,self
1502
+ cache=@states[result]
1503
+ return cache if cache
1504
+ @states[result]=@states.size
1505
+ return result
1506
+ end
1507
+
1508
+ def initial_state
1509
+ @states={}
1510
+ all_initial_dotted_rules #is this still needed?
1511
+ result=new_state all_rules.map{|r| DottedRule.create(r,0,self)}
1512
+ result.name="initial"
1513
+ #result.perhaps_also_allow all_rules,self #silly here
1514
+ result
1515
+ end
1516
+
1517
+ attr_reader :states
1518
+
1519
+ def start_state
1520
+ seenlist = {}
1521
+ seenlist.default=:dunno_yet
1522
+ result=initial_state.evolve StartToken.new, self,seenlist
1523
+ result.perhaps_also_allow all_rules,self
1524
+ result.name="start"
1525
+ result
1526
+ #pp [:initial_seenlist, seenlist]
1527
+ #ensure p :/
1528
+ end
1529
+
1530
+ module NamedConstant
1531
+ attr_accessor :constant_name
1532
+ def inspect; constant_name end
1533
+ end
1534
+ def self.inspect_constant_names
1535
+ constants.each{|kn|
1536
+ k=const_get(kn)
1537
+ next if Class|Module|Numeric|Symbol|true|false|nil===k
1538
+ k.extend NamedConstant
1539
+ k.constant_name=kn
1540
+ }
1541
+ end
1542
+
1543
+ def undumpables
1544
+ return @undumpables if @undumpables
1545
+ @rules||=expanded_RULES
1546
+ n=-1
1547
+ @undumpables={}
1548
+ abortable_graphwalk(@rules){|cntr,o,i,ty|
1549
+ !case o
1550
+ when StackMonkey
1551
+ @undumpables[o.name]=o
1552
+ when Reg::Deferred
1553
+ @undumpables[n+=1]=o
1554
+ class<<o
1555
+ attr_accessor :undump_key
1556
+ end
1557
+ o.undump_key=n
1558
+ end
1559
+ }
1560
+ end
1561
+
1562
+ class ::Proc #hack hack hack
1563
+ #only define hacky _dump if one isn't defined already
1564
+ unless Proc.new{}.respond_to? :_dump or
1565
+ Proc.new{}.respond_to? :marshal_dump or
1566
+ (Marshal.dump(proc{}) rescue false)
1567
+ def _dump depth
1568
+ undump_key.to_s
1569
+ end
1570
+ def self._load str
1571
+ Thread.current[:$RedParse_parser].undumpables[str.to_i]
1572
+ end
1573
+ end
1574
+ end
1575
+
1576
+ =begin disabled, uses too much memory!!
1577
+ class MarshalProxy
1578
+ def initialize(key)
1579
+ @key=key
1580
+ end
1581
+ attr :key
1582
+ end
1583
+
1584
+ #convert unmarshalables, such as stackmonkeys into proxies
1585
+ def proxify
1586
+ n=-1
1587
+ seen={}
1588
+ mkproxy=proc{|cntr,o,i,ty,useit|
1589
+ case o
1590
+ when StackMonkey
1591
+ useit[0]=true
1592
+ seen[o.__id__]||=MarshalProxy.new(o.name)
1593
+ when Reg::Deferred
1594
+ useit[0]=true
1595
+ seen[o.__id__]||=MarshalProxy.new(n+=1)
1596
+ end
1597
+ }
1598
+ Ron::GraphWalk.graphmodify!(@rules,&mkproxy)
1599
+ Ron::GraphWalk.graphmodify!(self,&mkproxy)
1600
+
1601
+ end
1602
+
1603
+ def _dump depth
1604
+ fail unless @rules
1605
+ proxify
1606
+ ivs=instance_variables
1607
+ a=ivs+ivs.reverse.map{|var| instance_variable_get var }
1608
+ result=Marshal.dump(a,depth)
1609
+ unproxify
1610
+ return result
1611
+ end
1612
+
1613
+ #convert marshal proxies back to the real thing
1614
+ def unproxify
1615
+ #build a lookup table for unmarshalables by walking @rules
1616
+ @rules||=expanded_RULES
1617
+ n=-1;lookup={}
1618
+ Ron::GraphWalk.graphwalk(@rules){|cntr,o,i,ty|
1619
+ case o
1620
+ when StackMonkey
1621
+ lookup[o.name]=o
1622
+ when Reg::Deferred
1623
+ lookup[n+=1]=o
1624
+ end
1625
+ }
1626
+
1627
+ Ron::GraphWalk.graphmodify!(self){|cntr,o,i,ty,useit|
1628
+ if MarshalProxy===o
1629
+ useit[0]=true
1630
+ lookup[o.key]
1631
+ end
1632
+ }
1633
+ end
1634
+
1635
+ def self._load(str,*more)
1636
+ result=allocate
1637
+ a=Marshal.load(str,*more)
1638
+
1639
+ result.unproxify
1640
+
1641
+ (0...a.size/2).each{|i| result.instance_variable_set a[i],a[-i] }
1642
+ return result
1643
+ end
1644
+ =end
1645
+
1646
+ end
1647
+
1648
+