redparse 0.8.3 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +63 -4
- data/Makefile +43 -0
- data/README.txt +101 -166
- data/Rakefile +1 -1
- data/bin/redparse +49 -21
- data/lib/redparse.rb +88 -1654
- data/lib/redparse/cache.rb +172 -0
- data/lib/redparse/compile.rb +1648 -0
- data/lib/redparse/float_accurate_to_s.rb +162 -0
- data/lib/redparse/generate.rb +6 -2
- data/lib/redparse/node.rb +677 -397
- data/lib/redparse/parse_tree_server.rb +129 -0
- data/lib/redparse/pthelper.rb +43 -0
- data/lib/redparse/reg_more_sugar.rb +5 -5
- data/lib/redparse/version.rb +1 -1
- data/redparse.gemspec +43 -0
- data/test/data/skkdictools.rb +3 -0
- data/test/generate_parse_tree_server_rc.rb +43 -0
- data/test/rp-locatetest.rb +41 -1
- data/test/test_1.9.rb +114 -0
- data/test/test_all.rb +3 -0
- data/test/test_redparse.rb +283 -124
- data/test/test_xform_tree.rb +66 -0
- metadata +57 -56
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'digest/sha2'
|
2
|
+
class RedParse
|
3
|
+
class Cache
|
4
|
+
def initialize *params
|
5
|
+
@callersfile=Digest::SHA2.hexdigest params.join(',')
|
6
|
+
@homedir=find_home+"/.redparse/"
|
7
|
+
Dir.mkdir @homedir unless File.exist? @homedir
|
8
|
+
Dir.mkdir cachedir unless File.exist? cachedir
|
9
|
+
saved_digest= File.open(@homedir+"/parserdigest","rb"){|fd| fd.read.chomp } if File.exist?(@homedir+"/parserdigest")
|
10
|
+
actual_digest= @@saved_parser_digest ||= redparse_rb_hexdigest
|
11
|
+
if saved_digest!=actual_digest
|
12
|
+
File.unlink(*all_entry_files) #flush cache
|
13
|
+
File.open(@homedir+"/parserdigest","wb"){|fd| fd.puts actual_digest } #update saved digest
|
14
|
+
end
|
15
|
+
retire_old_entries
|
16
|
+
end
|
17
|
+
|
18
|
+
def cachedir
|
19
|
+
@homedir+@callersfile+"/"
|
20
|
+
end
|
21
|
+
|
22
|
+
def entry_files
|
23
|
+
Dir[cachedir+"*"]
|
24
|
+
end
|
25
|
+
|
26
|
+
def all_entry_files
|
27
|
+
Dir[@homedir+"*"].select{|fn|
|
28
|
+
File.directory? fn
|
29
|
+
}.map{|dirname|
|
30
|
+
Dir[dirname+"/*"]
|
31
|
+
}.flatten
|
32
|
+
end
|
33
|
+
|
34
|
+
def retire_old_entries
|
35
|
+
size=max_size||10_000_000
|
36
|
+
files=entry_files
|
37
|
+
total=files.inject(0){|sum,fn| sum+File.size(fn) }
|
38
|
+
if total>size
|
39
|
+
files=files.sort_by{|fn| File::mtime(fn)}
|
40
|
+
while total>size
|
41
|
+
f=files.shift
|
42
|
+
total-=File.size(f)
|
43
|
+
File.unlink(f)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def redparse_rb_hexdigest
|
49
|
+
full_name=nil
|
50
|
+
$:.find{|dir| File.exist? full_name=dir+"/redparse.rb"}
|
51
|
+
File.open(full_name,"rb"){|fd| hexdigest_of_file fd }
|
52
|
+
end
|
53
|
+
|
54
|
+
def hexdigest_of_file fd
|
55
|
+
sha2=Digest::SHA2.new
|
56
|
+
fd.rewind
|
57
|
+
while chunk=fd.read(4096)
|
58
|
+
sha2.update chunk
|
59
|
+
end
|
60
|
+
fd.rewind
|
61
|
+
return sha2.hexdigest
|
62
|
+
end
|
63
|
+
|
64
|
+
def max_size
|
65
|
+
File.open(@homedir+"/size"){|fd| fd.read.chomp.to_i } rescue nil
|
66
|
+
end
|
67
|
+
|
68
|
+
##
|
69
|
+
# Finds the user's home directory.
|
70
|
+
#--
|
71
|
+
# Some comments from the ruby-talk list regarding finding the home
|
72
|
+
# directory:
|
73
|
+
#
|
74
|
+
# I have HOME, USERPROFILE and HOMEDRIVE + HOMEPATH. Ruby seems
|
75
|
+
# to be depending on HOME in those code samples. I propose that
|
76
|
+
# it should fallback to USERPROFILE and HOMEDRIVE + HOMEPATH (at
|
77
|
+
# least on Win32).
|
78
|
+
#(originally stolen from rubygems)
|
79
|
+
def find_home
|
80
|
+
['HOME', 'USERPROFILE'].each do |homekey|
|
81
|
+
return ENV[homekey] if ENV[homekey]
|
82
|
+
end
|
83
|
+
|
84
|
+
if ENV['HOMEDRIVE'] && ENV['HOMEPATH'] then
|
85
|
+
return "#{ENV['HOMEDRIVE']}#{ENV['HOMEPATH']}"
|
86
|
+
end
|
87
|
+
|
88
|
+
begin
|
89
|
+
File.expand_path("~")
|
90
|
+
rescue
|
91
|
+
if File::ALT_SEPARATOR then
|
92
|
+
"C:/"
|
93
|
+
else
|
94
|
+
"/"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
private :find_home, :entry_files, :redparse_rb_hexdigest, :retire_old_entries, :max_size, :hexdigest_of_file
|
99
|
+
|
100
|
+
def hash_of_input input
|
101
|
+
if IO===input
|
102
|
+
hexdigest_of_file input
|
103
|
+
else
|
104
|
+
Digest::SHA2.hexdigest input
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def get input
|
109
|
+
hash=hash_of_input input
|
110
|
+
cachefile=cachedir+hash
|
111
|
+
if File.exist? cachefile
|
112
|
+
result=File.open(cachefile,"rb"){|fd|
|
113
|
+
line=fd.readline
|
114
|
+
fd.rewind
|
115
|
+
if /#encoded with Ron\n/i===line
|
116
|
+
begin
|
117
|
+
require 'ron'
|
118
|
+
Ron.load fd.read
|
119
|
+
rescue Exception
|
120
|
+
return nil
|
121
|
+
end
|
122
|
+
else
|
123
|
+
begin
|
124
|
+
Marshal.load fd
|
125
|
+
rescue Exception=>e
|
126
|
+
warn "#{e.class}: #{e}"
|
127
|
+
warn "cache read failed for:\n#{input}"
|
128
|
+
return nil
|
129
|
+
end
|
130
|
+
end
|
131
|
+
}
|
132
|
+
|
133
|
+
begin
|
134
|
+
t=Time.now
|
135
|
+
File.utime(t,t,cachefile)
|
136
|
+
rescue Exception
|
137
|
+
File.open(cachefile,"a"){|fd| } #touch cache date
|
138
|
+
end
|
139
|
+
return result
|
140
|
+
end
|
141
|
+
rescue EOFError
|
142
|
+
return nil
|
143
|
+
end
|
144
|
+
|
145
|
+
def put input,result
|
146
|
+
hash=hash_of_input input
|
147
|
+
File.open(cachedir+hash, "wb"){|fd|
|
148
|
+
begin
|
149
|
+
Thread.current["Marshal.ignore_sclass"]=true
|
150
|
+
Marshal.dump(result,fd)
|
151
|
+
rescue TypeError=>e #dump failed
|
152
|
+
File.unlink cachedir+hash
|
153
|
+
begin
|
154
|
+
require 'ron'
|
155
|
+
File.open(cachedir+hash, "wb"){|fd2|
|
156
|
+
fd2.write "#encoded with Ron\n"
|
157
|
+
fd2.write Ron.dump(result)
|
158
|
+
}
|
159
|
+
rescue Exception
|
160
|
+
return
|
161
|
+
end
|
162
|
+
ensure
|
163
|
+
Thread.current["Marshal.ignore_sclass"]=nil
|
164
|
+
end
|
165
|
+
}
|
166
|
+
rescue Exception=>e #dump failed
|
167
|
+
warn "#{e.class}: #{e}"
|
168
|
+
warn "cache write failed for:\n#{result.inspect}"
|
169
|
+
File.unlink cachedir+hash
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,1648 @@
|
|
1
|
+
=begin
|
2
|
+
redparse - a ruby parser written in ruby
|
3
|
+
Copyright (C) 2008,2009 Caleb Clausen
|
4
|
+
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU Lesser General Public License as published by
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
=end
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
require 'forwardable'
|
22
|
+
|
23
|
+
begin
|
24
|
+
require 'rubygems'
|
25
|
+
rescue LoadError=>e
|
26
|
+
#hope we don't need it
|
27
|
+
raise unless /rubygems/===e.message
|
28
|
+
end
|
29
|
+
require 'rubylexer'
|
30
|
+
require 'reg'
|
31
|
+
require 'reglookab'
|
32
|
+
|
33
|
+
require "redparse/node"
|
34
|
+
#require "redparse/decisiontree"
|
35
|
+
require "redparse/reg_more_sugar"
|
36
|
+
require "redparse/generate"
|
37
|
+
require "redparse/cache"
|
38
|
+
|
39
|
+
class RedParse
|
40
|
+
|
41
|
+
if defined? END_ATTACK
|
42
|
+
class RuleSet
|
43
|
+
def initialize(rules)
|
44
|
+
@rules=rules.reverse
|
45
|
+
#rule order must be reversed relative to the usual RedParse rule
|
46
|
+
#order... merely so that ffs can work right.
|
47
|
+
@maxmask=(1<<@rules.size)-1
|
48
|
+
@subclasses_of=child_relations_among(*STACKABLE_CLASSES())
|
49
|
+
end
|
50
|
+
|
51
|
+
def rules2mask(rules)
|
52
|
+
mask=0
|
53
|
+
@rules.each_with_index{|r,i|
|
54
|
+
mask |= 1<<i if rules.include? r
|
55
|
+
}
|
56
|
+
return mask
|
57
|
+
end
|
58
|
+
|
59
|
+
def mask2rules(mask)
|
60
|
+
rules=[]
|
61
|
+
@rules.each_with_index{|r,i|
|
62
|
+
rules<<r if mask&(1<<i)
|
63
|
+
}
|
64
|
+
return rules
|
65
|
+
end
|
66
|
+
|
67
|
+
def mask2rules(mask)
|
68
|
+
result=[]
|
69
|
+
while mask.nonzero?
|
70
|
+
result<< @rules[i=ffs(mask)-1]
|
71
|
+
mask &= ~(1<<i)
|
72
|
+
end
|
73
|
+
return result
|
74
|
+
end
|
75
|
+
|
76
|
+
def each_rule(mask=-1)
|
77
|
+
@rules.each_with_index{|r,i|
|
78
|
+
yield r,i if mask&(1<<i)
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
def each_rule(mask=@maxmask)
|
83
|
+
while mask.nonzero?
|
84
|
+
yield @rules[i=ffs(mask)-1],i
|
85
|
+
mask &= ~(1<<i)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
@@FFS_TABLE=[nil]
|
91
|
+
1.upto(8){|n|
|
92
|
+
@@FFS_TABLE*=2
|
93
|
+
@@FFS_TABLE[@@FFS_TABLE.size/2]=n
|
94
|
+
}
|
95
|
+
def rb_ffs(mask)
|
96
|
+
chunks=0
|
97
|
+
until mask.zero?
|
98
|
+
result=@@FFS_TABLE[mask&0xFF]
|
99
|
+
return result+(chunks<<3) if result
|
100
|
+
chunks+=1
|
101
|
+
mask>>=8
|
102
|
+
end
|
103
|
+
return 0
|
104
|
+
end
|
105
|
+
|
106
|
+
begin
|
107
|
+
require 'inline'
|
108
|
+
inline{|inline|
|
109
|
+
inline.prefix '#define _GNU_SOURCE'
|
110
|
+
inline.include '"string.h"'
|
111
|
+
inline.include '"limits.h"'
|
112
|
+
inline.c %{
|
113
|
+
unsigned c_ffs(VALUE mask){
|
114
|
+
if FIXNUM_P(mask) {
|
115
|
+
return ffsl(NUM2UINT(mask));
|
116
|
+
} else if(TYPE(mask)==T_BIGNUM) {
|
117
|
+
struct RBignum* bn=RBIGNUM(mask);
|
118
|
+
int len=bn->len;
|
119
|
+
int i;
|
120
|
+
unsigned offset=0;
|
121
|
+
unsigned result=0;
|
122
|
+
for(i=0;i<len;++i){
|
123
|
+
/*printf("least:%x\\n", ((BDIGIT*)(bn->digits))[i]);*/
|
124
|
+
/*printf("most:%x\\n", ((BDIGIT*)(bn->digits))[len]);*/
|
125
|
+
result=ffs(((BDIGIT*)(bn->digits))[i]);
|
126
|
+
if (result) break;
|
127
|
+
offset+=sizeof(int)*CHAR_BIT;
|
128
|
+
}
|
129
|
+
if (result==0) return 0;
|
130
|
+
return result+offset;
|
131
|
+
} else {
|
132
|
+
rb_fatal("bad argument to ffs");
|
133
|
+
}
|
134
|
+
}
|
135
|
+
}
|
136
|
+
}
|
137
|
+
alias ffs c_ffs
|
138
|
+
rescue Exception=>e
|
139
|
+
warn "error (#{e.class}) while defining inline c ffs()"
|
140
|
+
warn "original error: #{e}"
|
141
|
+
warn "falling back to ruby version of ffs()"
|
142
|
+
alias ffs rb_ffs
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
#just the left side (the stack/lookahead matchers)
|
150
|
+
def LEFT
|
151
|
+
@rules.map{|r| r.left.subregs }.flatten
|
152
|
+
end
|
153
|
+
|
154
|
+
#remove lookahead and lookback decoration
|
155
|
+
def LEFT_NO_LOOKING
|
156
|
+
l=LEFT()
|
157
|
+
l.map!{|m|
|
158
|
+
case m #
|
159
|
+
when Reg::LookAhead,Reg::LookBack; m.subregs[0]
|
160
|
+
when Proc; []
|
161
|
+
else m #
|
162
|
+
end #
|
163
|
+
}
|
164
|
+
l
|
165
|
+
end
|
166
|
+
|
167
|
+
#all classes mentioned in rules, on left and right sides
|
168
|
+
def STACKABLE_CLASSES #
|
169
|
+
return @sc_result unless @sc_result.nil?
|
170
|
+
@sc_result=false
|
171
|
+
l=LEFT_NO_LOOKING()
|
172
|
+
l=l.map{|lm| sc_juice lm}.flatten.compact
|
173
|
+
r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
|
174
|
+
result=l+r
|
175
|
+
@sc_result=result.grep(Class).uniq
|
176
|
+
fail if @sc_result.empty?
|
177
|
+
return @sc_result
|
178
|
+
end
|
179
|
+
|
180
|
+
def juice(m)
|
181
|
+
case m #
|
182
|
+
when Class;
|
183
|
+
return [m] unless @subclasses_of
|
184
|
+
result=[m] # and subclasses too
|
185
|
+
i=0
|
186
|
+
while item=result[i]
|
187
|
+
#p item
|
188
|
+
result.concat @subclasses_of[item]
|
189
|
+
i += 1
|
190
|
+
end
|
191
|
+
result
|
192
|
+
when String,Regexp; juice(RedParse.KW(m))
|
193
|
+
when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
|
194
|
+
when Reg::Or; m.subregs.map( &method(:juice) )
|
195
|
+
when Reg::Not;
|
196
|
+
m=m.subregs[0]
|
197
|
+
if Class===m or (Reg::Or===m and
|
198
|
+
m.subregs.inject{|sum,x| sum && (Class===x) })
|
199
|
+
j=juice(m)
|
200
|
+
STACKABLE_CLASSES()-j.flatten.compact rescue j
|
201
|
+
else
|
202
|
+
STACKABLE_CLASSES()
|
203
|
+
end
|
204
|
+
else STACKABLE_CLASSES()
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def sc_juice(m)
|
209
|
+
case m #
|
210
|
+
when Class; [m]
|
211
|
+
when String,Regexp; juice(RedParse.KW(m))
|
212
|
+
# when String,Regexp; [KeywordToken]
|
213
|
+
when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
|
214
|
+
when Reg::Or; m.subregs.map( &method(:sc_juice) )
|
215
|
+
when Reg::Not; sc_juice(m.subregs[0])
|
216
|
+
when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
|
217
|
+
else []
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def LOOKAHEAD_CLASSES rule
|
222
|
+
last=rule.left.subregs.last
|
223
|
+
return STACKABLE_CLASSES() unless Reg::LookAhead===last
|
224
|
+
la= last.subregs[0]
|
225
|
+
return juice(la).flatten.compact
|
226
|
+
end
|
227
|
+
#
|
228
|
+
def TOS_CLASSES rule
|
229
|
+
i=-1
|
230
|
+
mats=rule.left.subregs
|
231
|
+
m=mats[i]
|
232
|
+
m=mats[i-=1] if Reg::LookAhead===m || Proc===m
|
233
|
+
result=[]
|
234
|
+
while Reg::Repeat===m and m.times.min.zero?
|
235
|
+
result<<juice(m.subregs[0])
|
236
|
+
m=mats[i-=1]
|
237
|
+
end
|
238
|
+
return (result+juice(m)).flatten.compact
|
239
|
+
end
|
240
|
+
|
241
|
+
def [](i)
|
242
|
+
@rules[i]
|
243
|
+
end
|
244
|
+
|
245
|
+
end #
|
246
|
+
#
|
247
|
+
module Reducer
|
248
|
+
@@rulesets={}
|
249
|
+
@@class_narrowerses={}
|
250
|
+
def compile(recompile=false)
|
251
|
+
klass=self.class
|
252
|
+
|
253
|
+
#use cached result if available
|
254
|
+
if @@rulesets[klass] and !recompile
|
255
|
+
@ruleset=@@rulesets[klass]
|
256
|
+
@class_narrowers=@@class_narrowerses[klass]
|
257
|
+
return
|
258
|
+
end
|
259
|
+
|
260
|
+
#actual rule compilation
|
261
|
+
@ruleset=RuleSet.new @rules
|
262
|
+
@class_narrowers=[tos=Hash.new(0),la=Hash.new(0)]
|
263
|
+
@ruleset.each_rule{|r,i|
|
264
|
+
@ruleset.LOOKAHEAD_CLASSES(r).each{|klass2|
|
265
|
+
la[klass2] |= 1<<i
|
266
|
+
}
|
267
|
+
@ruleset.TOS_CLASSES(r).each{|klass2|
|
268
|
+
tos[klass2] |= 1<<i
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
#save result to cache if not too dynamic
|
273
|
+
if !recompile
|
274
|
+
@@rulesets[klass]=@ruleset
|
275
|
+
@@class_narrowerses[klass]=@class_narrowers
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
def new_reduce
|
280
|
+
# mask=-1
|
281
|
+
# (-1).downto(-@class_narrowers.size){|i|
|
282
|
+
# mask &= @class_narrowers[i][@stack[i].class]
|
283
|
+
# }
|
284
|
+
mask=
|
285
|
+
@class_narrowers[-1][@stack[-1].class]&
|
286
|
+
@class_narrowers[-2][@stack[-2].class]
|
287
|
+
@ruleset.each_rule(mask){|r,i|
|
288
|
+
res=evaluate(r) and return res
|
289
|
+
}
|
290
|
+
return false
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def map_with_index(list)
|
296
|
+
result=[]
|
297
|
+
list.each_with_index{|elem,i| result<<yield(elem,i)}
|
298
|
+
result
|
299
|
+
end
|
300
|
+
|
301
|
+
def all_rules
|
302
|
+
return @all_rules if defined? @all_rules
|
303
|
+
|
304
|
+
@inputs||=enumerate_exemplars
|
305
|
+
@rules=expanded_RULES #force it to be recalculated
|
306
|
+
@all_rules = map_with_index(@rules){|r,i| Rule.new r,i}
|
307
|
+
|
308
|
+
@all_rules.each{|r|
|
309
|
+
if StackMonkey===r.action
|
310
|
+
r.action.exemplars=@inputs.grep r.action.hint
|
311
|
+
end
|
312
|
+
}
|
313
|
+
|
314
|
+
warn "error recovery rules disabled for now; creates too many states and masks errors"
|
315
|
+
@all_rules.reject!{|r| r.action==MisparsedNode }
|
316
|
+
|
317
|
+
#names have to be allocated globally to make sure they don't collide
|
318
|
+
names=@all_rules.map{|r|
|
319
|
+
if r.action.respond_to? :name
|
320
|
+
r.action.name
|
321
|
+
else
|
322
|
+
r.action.to_s
|
323
|
+
end
|
324
|
+
}.sort
|
325
|
+
dups={}
|
326
|
+
names.each_with_index{|name,i|
|
327
|
+
dups[name]=0 if name==names[i+1]
|
328
|
+
}
|
329
|
+
@all_rules.each{|r|
|
330
|
+
r.name=
|
331
|
+
if r.action.respond_to? :name
|
332
|
+
r.action.name.dup
|
333
|
+
else
|
334
|
+
r.action.to_s
|
335
|
+
end
|
336
|
+
if dups[r.name]
|
337
|
+
count=dups[r.name]+=1
|
338
|
+
r.name<<"_#{count}"
|
339
|
+
end
|
340
|
+
}
|
341
|
+
end
|
342
|
+
|
343
|
+
def all_dotted_rules
|
344
|
+
all_rules.map{|rule|
|
345
|
+
(0...rule.patterns.size).map{|i|
|
346
|
+
DottedRule.create(rule,i,self)
|
347
|
+
}
|
348
|
+
}.flatten
|
349
|
+
end
|
350
|
+
|
351
|
+
#$OLD_PAA=1
|
352
|
+
|
353
|
+
def all_initial_dotted_rules
|
354
|
+
return @all_initial_dotted_rules if defined? @all_initial_dotted_rules
|
355
|
+
@all_initial_dotted_rules=result=
|
356
|
+
all_rules.map{|rule| DottedRule.create(rule,0,nil) }
|
357
|
+
|
358
|
+
p :all_init
|
359
|
+
|
360
|
+
unless defined? $OLD_PAA
|
361
|
+
scanning=result
|
362
|
+
provisionals=nil
|
363
|
+
while true
|
364
|
+
old_provisionals=provisionals
|
365
|
+
provisionals={}
|
366
|
+
scanning.each{|dr|
|
367
|
+
dr.also_allow=dr.compute_also_allow(provisional=[false]) #fill out dr.also_allow
|
368
|
+
provisionals[dr]=provisional[0]
|
369
|
+
}
|
370
|
+
scanning=provisionals.map{|dr,val| dr if val }.compact
|
371
|
+
end until provisionals==old_provisionals
|
372
|
+
end
|
373
|
+
p :all_init_done
|
374
|
+
|
375
|
+
return result
|
376
|
+
end
|
377
|
+
|
378
|
+
class Rule #original user rules, slightly chewed on
|
379
|
+
def initialize(rawrule,priority)
|
380
|
+
@priority=priority
|
381
|
+
@action=rawrule.right
|
382
|
+
@patterns=rawrule.left.subregs.dup
|
383
|
+
#remove lookback decoration if any, just note that lb was present
|
384
|
+
if Reg::LookBack===@patterns[0]
|
385
|
+
@lookback=true
|
386
|
+
@patterns[0]=@patterns[0].subregs[0]
|
387
|
+
end
|
388
|
+
|
389
|
+
case @patterns[-1]
|
390
|
+
#Symbol is pointless here, methinks.
|
391
|
+
when Proc,Symbol; #do nothing
|
392
|
+
when Reg::LookAhead; @patterns[-1]=@patterns[-1].subregs[0]
|
393
|
+
else @patterns.push Object #add la if none was present
|
394
|
+
end
|
395
|
+
|
396
|
+
#search for looping matchers with minimum >0 and replace them
|
397
|
+
#with a number of scalars (== the minimum) followed by a loop with 0 min.
|
398
|
+
#search for bare strings or regexps and replace with KW( ) wrapper
|
399
|
+
@patterns.each_with_index{|p,i|
|
400
|
+
case p
|
401
|
+
when String,Regexp; @patterns[i]=RedParse.KW(p)
|
402
|
+
when Reg::Repeat
|
403
|
+
if p.itemrange.first>0
|
404
|
+
@patterns[i,1]=
|
405
|
+
*[p.subregs[0]]*p.itemrange.first<< #minimum # as scalars
|
406
|
+
p.subregs[0].reg.* #0-based looper
|
407
|
+
end
|
408
|
+
end
|
409
|
+
}
|
410
|
+
@drs=[]
|
411
|
+
end
|
412
|
+
|
413
|
+
attr_reader :drs
|
414
|
+
|
415
|
+
def hash; priority end
|
416
|
+
def == other; Rule===other and priority==other.priority end
|
417
|
+
alias eql? ==
|
418
|
+
|
419
|
+
def lookback?; @lookback if defined? @lookback end
|
420
|
+
|
421
|
+
attr_reader :patterns,:action,:priority
|
422
|
+
attr_accessor :name
|
423
|
+
|
424
|
+
def at(n)
|
425
|
+
result=patterns[n]
|
426
|
+
result=result.subregs[0] if Reg::Repeat===result
|
427
|
+
result
|
428
|
+
end
|
429
|
+
def optional? n
|
430
|
+
p=patterns[n]
|
431
|
+
return Reg::Repeat===p && p.itemrange.first.zero?
|
432
|
+
end
|
433
|
+
def looping? n
|
434
|
+
p=patterns[n]
|
435
|
+
return false unless Reg::Repeat===p
|
436
|
+
return false if p.itemrange.last==1
|
437
|
+
fail unless p.itemrange.last.infinite?
|
438
|
+
return true
|
439
|
+
rescue Exception
|
440
|
+
return false
|
441
|
+
end
|
442
|
+
|
443
|
+
def reduces_to
|
444
|
+
case @action
|
445
|
+
when Class; @action
|
446
|
+
when StackMonkey; @action.exemplars
|
447
|
+
when :error,:shift,:accept; nil
|
448
|
+
else fail "#@action unexpected in reduces_to"
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
def unruly?
|
453
|
+
return if action==:accept
|
454
|
+
action.class!=Class || lookback?
|
455
|
+
end
|
456
|
+
|
457
|
+
def final_promised_pattern
|
458
|
+
case @action
|
459
|
+
when DeleteMonkey #delete_monkey
|
460
|
+
vector_indexes=(@action.first_changed_index..-1).select{|i| Reg::Repeat===@patterns[i] }
|
461
|
+
fail unless vector_indexes.empty?
|
462
|
+
result=@patterns.dup
|
463
|
+
result.delete_at @action.first_changed_index
|
464
|
+
when StackMonkey #stack_monkey
|
465
|
+
result=@patterns.dup
|
466
|
+
result[@action.first_changed_index..-1]=[@action.hint]
|
467
|
+
when Class
|
468
|
+
result= [@action,@patterns.last]
|
469
|
+
result.unshift @patterns.first if lookback?
|
470
|
+
when :accept, :error, :shift
|
471
|
+
result=@patterns.dup
|
472
|
+
else
|
473
|
+
pp @action
|
474
|
+
fail
|
475
|
+
end
|
476
|
+
result[-1]=result[-1].la unless result.empty?
|
477
|
+
result
|
478
|
+
end
|
479
|
+
|
480
|
+
def final_promised_rule
|
481
|
+
@final_promised_rule ||=
|
482
|
+
Rule.new(-final_promised_pattern>>nil,-priority)
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
class DottedRule
|
487
|
+
def initialize(rule,pos,parser)
|
488
|
+
@rule,@pos=rule,pos
|
489
|
+
fail unless (0...rule.patterns.size)===@pos
|
490
|
+
# @also_allow= compute_also_allow(parser) if parser unless defined? $OLD_PAA
|
491
|
+
end
|
492
|
+
def compute_also_allow(parser,provisional=[false])
|
493
|
+
parser.all_initial_dotted_rules.map{|dr|
|
494
|
+
next if dr==self
|
495
|
+
fake_rule=dr.rule.final_promised_rule
|
496
|
+
final_more_dr=DottedRule.create(fake_rule,0,nil)
|
497
|
+
also=dr.also_allow
|
498
|
+
unless also
|
499
|
+
provisional[0]||=0
|
500
|
+
provisional[0]+=1
|
501
|
+
also=[]
|
502
|
+
end
|
503
|
+
also+[dr] if optionally_combine final_more_dr,parser
|
504
|
+
}.flatten.compact.uniq
|
505
|
+
end
|
506
|
+
attr_reader :rule,:pos
|
507
|
+
attr_accessor :also_allow
|
508
|
+
|
509
|
+
def self.create(rule,pos,parser)
|
510
|
+
result=rule.drs[pos] and return result
|
511
|
+
result=rule.drs[pos]=DottedRule.new(rule,pos,parser)
|
512
|
+
unless defined? $OLD_PAA
|
513
|
+
result.also_allow=result.compute_also_allow(parser) if parser
|
514
|
+
end
|
515
|
+
return result
|
516
|
+
end
|
517
|
+
|
518
|
+
def hash; (@rule.priority<<3)^@pos end
|
519
|
+
def == other; DottedRule===other and @pos==other.pos and @rule==other.rule end
|
520
|
+
alias eql? ==
|
521
|
+
|
522
|
+
def name; @rule.name+"@#@pos" end
|
523
|
+
|
524
|
+
def looping?
|
525
|
+
@rule.looping?(@pos)
|
526
|
+
end
|
527
|
+
|
528
|
+
#returns Conditional|Rule|DottedRule|+[DottedRule.+]|nil
|
529
|
+
def evolve input, parser, seenlist,result2
|
530
|
+
#print "["
|
531
|
+
#$stdout.flush
|
532
|
+
idname=input.identity_name
|
533
|
+
idname=parser.identity_name_alias? idname
|
534
|
+
cache=seenlist[[self,idname]]
|
535
|
+
unless cache==:dunno_yet
|
536
|
+
result2.concat Array(cache).flatten.compact.uniq.sort_by{|x| x.name}
|
537
|
+
return cache
|
538
|
+
end
|
539
|
+
i=pos
|
540
|
+
lasti=i-1
|
541
|
+
result=[]
|
542
|
+
result=loop do #might need multiple tries if optional matcher(s) here
|
543
|
+
fail unless i>lasti
|
544
|
+
lasti=i
|
545
|
+
p=@rule.at(i) #what is current pattern in this dottedrule?
|
546
|
+
fail if Proc===p #shouldnt happen anymore
|
547
|
+
if parser.pattern_matches_nodes? p
|
548
|
+
|
549
|
+
#if any dotted rules have nodes at this point,
|
550
|
+
#also include the set of rules@0 which
|
551
|
+
#can (possibly indirectly) generate that node.
|
552
|
+
#(match tokens found on left sides of productions for p)
|
553
|
+
seenlist[[self,idname]]=result
|
554
|
+
if false
|
555
|
+
result.concat recurse_match_drs(parser).uniq.map{|dr|
|
556
|
+
dr and
|
557
|
+
#begin print "{#{dr.name}"
|
558
|
+
dr.evolve input,parser,seenlist,result2
|
559
|
+
#ensure print "}" end
|
560
|
+
}.flatten.compact.uniq
|
561
|
+
end
|
562
|
+
end
|
563
|
+
@saw_item_that={}
|
564
|
+
if p===input
|
565
|
+
i+=1 unless @rule.looping?(i)
|
566
|
+
fail if i>@rule.patterns.size
|
567
|
+
|
568
|
+
if !@saw_item_that.empty?
|
569
|
+
p(:saw_item_that!)
|
570
|
+
fail unless @saw_item_that.size==1
|
571
|
+
pair=@saw_item_that.to_a.first
|
572
|
+
fail unless p.equal? pair.last
|
573
|
+
it=pair.first
|
574
|
+
action=
|
575
|
+
if i==@rule.patterns.size
|
576
|
+
@rule
|
577
|
+
else
|
578
|
+
DottedRule.create(@rule,i,parser)
|
579
|
+
end
|
580
|
+
break Conditional.new(it,action)
|
581
|
+
end
|
582
|
+
@saw_item_that=nil
|
583
|
+
|
584
|
+
if i == @rule.patterns.size
|
585
|
+
break @rule
|
586
|
+
else
|
587
|
+
break result<<DottedRule.create(@rule,i,parser)
|
588
|
+
end
|
589
|
+
elsif !@rule.optional?(i)
|
590
|
+
break result.empty? ? nil : result
|
591
|
+
elsif (i+=1) >= @rule.patterns.size
|
592
|
+
break @rule
|
593
|
+
#else next p
|
594
|
+
end
|
595
|
+
end #loop
|
596
|
+
seenlist[[self,idname]]=result
|
597
|
+
result2.concat Array(result).flatten.compact.uniq.sort_by{|x| x.name}
|
598
|
+
return result
|
599
|
+
#ensure print "]"
|
600
|
+
end
|
601
|
+
|
602
|
+
#returns +[(DottedRule|nil).*]
|
603
|
+
def recurse_match_drs parser, result=nil
|
604
|
+
unless result
|
605
|
+
table=parser.rmd_cache
|
606
|
+
if table
|
607
|
+
cache=table[self]
|
608
|
+
return cache if cache
|
609
|
+
else
|
610
|
+
parser.rmd_cache={}
|
611
|
+
end
|
612
|
+
|
613
|
+
result=[]
|
614
|
+
end
|
615
|
+
#print "("
|
616
|
+
#print @rule.name+"@#@pos"
|
617
|
+
p=@rule.at(@pos)
|
618
|
+
|
619
|
+
#find set of nodes that could match here
|
620
|
+
nodes_here=parser.exemplars_that_match(p&Node)
|
621
|
+
|
622
|
+
#find the set of rules that could generate a node in our list
|
623
|
+
rrules=parser.all_rules.select{|rule|
|
624
|
+
!rule.unruly? and !nodes_here.grep(rule.action).empty?
|
625
|
+
}.map{|rule|
|
626
|
+
DottedRule.create(rule,0,parser)
|
627
|
+
}
|
628
|
+
|
629
|
+
#if any generating rules match a node in the leftmost pattern,
|
630
|
+
#add the rules which can generate _that_ node too.
|
631
|
+
result.push self #force self to be excluded from future recursion
|
632
|
+
oldsize=result.size
|
633
|
+
unless rrules.empty?
|
634
|
+
result.concat rrules
|
635
|
+
|
636
|
+
unless result.respond_to? :index_of
|
637
|
+
class<<result
|
638
|
+
attr_accessor :index_of
|
639
|
+
end
|
640
|
+
result.index_of={}
|
641
|
+
end
|
642
|
+
rio=result.index_of
|
643
|
+
oldsize.upto(result.size){|i| rio[result[i]]||=i }
|
644
|
+
rrules.each{|rrule|
|
645
|
+
i=rio[rrule] or fail #index() inside each() == O(N**2) complexity. this is the slow line.
|
646
|
+
#but skip recursion on rules already done at a higher level
|
647
|
+
rrule.recurse_match_drs parser,result if i>=oldsize
|
648
|
+
}
|
649
|
+
end
|
650
|
+
result[oldsize-1]=nil #don't actually include self in result
|
651
|
+
#result.update_indices oldsize-1, oldsize-1
|
652
|
+
|
653
|
+
parser.rmd_cache[self]=result
|
654
|
+
return result
|
655
|
+
#ensure print ")"
|
656
|
+
end
|
657
|
+
|
658
|
+
def optionally_combine weaker,parser
|
659
|
+
#lotsa caching needed if this is ever to be performant
|
660
|
+
if parser.oc_cache
|
661
|
+
result=parser.oc_cache[[self,weaker]]
|
662
|
+
return result unless result.nil?
|
663
|
+
else
|
664
|
+
parser.oc_cache={}
|
665
|
+
end
|
666
|
+
|
667
|
+
other=weaker
|
668
|
+
mymatches,myposes= self.outcomes
|
669
|
+
matches, poses = other.outcomes
|
670
|
+
matches.each_with_index{|match,i|
|
671
|
+
mymatches.each_with_index{|mymatch,myi|
|
672
|
+
intersect=parser.inputs.grep(match&mymatch)
|
673
|
+
unless intersect.empty?
|
674
|
+
|
675
|
+
#but don't allow matches that would be matched
|
676
|
+
#by an earlier (but optional) pattern.
|
677
|
+
disallowed=Reg::Or.new(
|
678
|
+
*possible_matchers_til(myi)+
|
679
|
+
other.possible_matchers_til(i)
|
680
|
+
)
|
681
|
+
intersect.reject{|x| disallowed===x }
|
682
|
+
|
683
|
+
if intersect.empty?
|
684
|
+
return result=false
|
685
|
+
elsif poses[i]>=other.rule.patterns.size
|
686
|
+
return result=true #success if weaker rule is at an end
|
687
|
+
elsif myposes[myi]>=rule.patterns.size
|
688
|
+
return result=false #fail if stronger rule at an end
|
689
|
+
else
|
690
|
+
p [:**,rule.name,myposes[myi]]
|
691
|
+
mynew=DottedRule.create(rule,myposes[myi],parser)
|
692
|
+
new=DottedRule.create(other.rule,poses[i],parser)
|
693
|
+
return result=mynew.optionally_combine( new,parser )
|
694
|
+
end
|
695
|
+
end
|
696
|
+
}
|
697
|
+
}
|
698
|
+
return result=false
|
699
|
+
ensure
|
700
|
+
parser.oc_cache[[self,weaker]]=result
|
701
|
+
end
|
702
|
+
|
703
|
+
def possible_matchers_til i
|
704
|
+
(pos...i-1).map{|j|
|
705
|
+
m=rule.at(j)
|
706
|
+
Reg::Repeat===m ? m.subregs[0] : m
|
707
|
+
}
|
708
|
+
end
|
709
|
+
|
710
|
+
def outcomes
|
711
|
+
til=@rule.patterns.size
|
712
|
+
at=@pos
|
713
|
+
result=[[],[]]
|
714
|
+
loop do
|
715
|
+
m=@rule.patterns[at]
|
716
|
+
case m
|
717
|
+
when Proc;
|
718
|
+
result.first.push Object
|
719
|
+
result.last.push at+1
|
720
|
+
break
|
721
|
+
when Reg::Repeat
|
722
|
+
assert @rule.optional?(at)
|
723
|
+
to=at
|
724
|
+
to+=1 unless @rule.looping? at
|
725
|
+
result.first.push m.subregs[0]
|
726
|
+
result.last.push to
|
727
|
+
else
|
728
|
+
result.first.push m
|
729
|
+
result.last.push at+1
|
730
|
+
break
|
731
|
+
end
|
732
|
+
at+=1
|
733
|
+
break if at>=til
|
734
|
+
end
|
735
|
+
return result
|
736
|
+
end
|
737
|
+
|
738
|
+
end
|
739
|
+
|
740
|
+
attr_accessor :rmd_cache
|
741
|
+
attr_accessor :oc_cache
|
742
|
+
attr_accessor :sl2ms_cache
|
743
|
+
|
744
|
+
class Conditional
|
745
|
+
def initialize(condition,action)
|
746
|
+
@condition,@action=condition,action
|
747
|
+
@condition.restore :hash,:==
|
748
|
+
end
|
749
|
+
attr_reader :condition,:action
|
750
|
+
|
751
|
+
def hash
|
752
|
+
@condition.hash^@action.hash
|
753
|
+
end
|
754
|
+
def == other
|
755
|
+
Conditional===other and @condition==other.condition and @action==other.action
|
756
|
+
end
|
757
|
+
alias eql? ==
|
758
|
+
|
759
|
+
def name; @condition.inspect+"?"+@action.name end
|
760
|
+
|
761
|
+
def priority; @action.priority end
|
762
|
+
end
|
763
|
+
|
764
|
+
class ParserState; end
|
765
|
+
class MultiShift; end
|
766
|
+
class MultiReduce; end
|
767
|
+
|
768
|
+
ACTION_PATTERN=ParserState|Rule|MultiShift|MultiReduce|:accept|:error
|
769
|
+
class ParserState #a union of dotted rules
|
770
|
+
def initialize(dotteds,index)
|
771
|
+
fail if dotteds.empty? #error state
|
772
|
+
fail unless dotteds.grep(nil).empty?
|
773
|
+
@dotteds=dotteds
|
774
|
+
@index=index
|
775
|
+
sort_substates!
|
776
|
+
@actions={} #key is an input, value is ParserState|Rule|MultiShift|MultiReduce|:accept|:error
|
777
|
+
end
|
778
|
+
|
779
|
+
attr_reader :actions
|
780
|
+
|
781
|
+
def [](k)
|
782
|
+
result=@actions[k]
|
783
|
+
assert ACTION_PATTERN===result
|
784
|
+
result
|
785
|
+
end
|
786
|
+
def []=(k,v)
|
787
|
+
assert ACTION_PATTERN===v
|
788
|
+
@actions[k]=v
|
789
|
+
end
|
790
|
+
|
791
|
+
def sort_substates!
|
792
|
+
@dotteds=@dotteds.sort_by{|dotted| -dotted.pos}.uniq
|
793
|
+
end
|
794
|
+
attr :dotteds
|
795
|
+
|
796
|
+
def dup
|
797
|
+
result=super
|
798
|
+
result.instance_variable_set(:@dotteds,@dotteds.dup)
|
799
|
+
return result
|
800
|
+
end
|
801
|
+
|
802
|
+
def substates; [self] end
|
803
|
+
|
804
|
+
def shiftlist2multishift? shiftlist,parser
|
805
|
+
return :error if shiftlist.empty?
|
806
|
+
parser.sl2ms_cache||={}
|
807
|
+
cache=parser.sl2ms_cache[shiftlist]
|
808
|
+
return cache if cache
|
809
|
+
fixed,varying=shiftlist.partition{|res| DottedRule===res}
|
810
|
+
result=ParserState.new(fixed,nil)
|
811
|
+
result.perhaps_also_allow parser.all_rules,parser
|
812
|
+
unless varying.empty? #MultiShift
|
813
|
+
varying.map!{|v| [v.condition,v.action]}.flatten
|
814
|
+
result=MultiShift.new(result,varying)
|
815
|
+
end
|
816
|
+
parser.sl2ms_cache[shiftlist]=result
|
817
|
+
return result
|
818
|
+
end
|
819
|
+
|
820
|
+
#given a list of rules, see if any of them are compatible with
|
821
|
+
#a current substate. (compatibility means the aggregate patterns
|
822
|
+
#can be anded together and still be able to conceivably match something.)
|
823
|
+
#if any of morerules are actually compatible, add it to current state.
|
824
|
+
def perhaps_also_allow(morerules,parser)
|
825
|
+
fail unless morerules==parser.all_rules
|
826
|
+
@dotteds.concat @dotteds.map{|d| d.also_allow }.flatten.compact.uniq
|
827
|
+
sort_substates!
|
828
|
+
end
|
829
|
+
def old_perhaps_also_allow(morerules,parser)
|
830
|
+
morerules=morerules.dup
|
831
|
+
need_sort=false
|
832
|
+
scan_rules=@dotteds
|
833
|
+
added={}
|
834
|
+
while true
|
835
|
+
adding=[]
|
836
|
+
morerules.each{|morerule|
|
837
|
+
next if added[morerule]
|
838
|
+
fake_rule=morerule.final_promised_rule
|
839
|
+
final_more_dr=DottedRule.create(fake_rule,0,parser)
|
840
|
+
scan_rules.each{|dotted|
|
841
|
+
if dotted.optionally_combine final_more_dr,parser
|
842
|
+
adding<<DottedRule.create(morerule,0,parser)
|
843
|
+
added[morerule]=1
|
844
|
+
break
|
845
|
+
end
|
846
|
+
}
|
847
|
+
}
|
848
|
+
break if adding.empty?
|
849
|
+
@dotteds.concat adding
|
850
|
+
need_sort=true
|
851
|
+
scan_rules=adding
|
852
|
+
end
|
853
|
+
sort_substates! if need_sort
|
854
|
+
end
|
855
|
+
alias perhaps_also_allow old_perhaps_also_allow if defined? $OLD_PAA
|
856
|
+
|
857
|
+
|
858
|
+
#returns ParserState|MultiShift|MultiReduce|Rule|:accept|:error
|
859
|
+
def evolve input,parser,seenlist
|
860
|
+
result2=[]
|
861
|
+
@dotteds.each{|dotted|
|
862
|
+
dotted.evolve input,parser,seenlist,result2
|
863
|
+
}
|
864
|
+
|
865
|
+
result=
|
866
|
+
#seenlist.values.flatten.compact.uniq.sort_by{|x| x.name}
|
867
|
+
result2=result2.uniq.compact.sort_by{|x| x.name}
|
868
|
+
#pp [result,result2].map{|x| x.map{|res| DottedRule===res ? res.name : res }}
|
869
|
+
#pp result2.map{|res| DottedRule===res ? res.name : res }
|
870
|
+
# result==result2 or fail
|
871
|
+
|
872
|
+
return result=:error if result.empty?
|
873
|
+
|
874
|
+
|
875
|
+
#ok, who wants to shift and who wants to reduce?
|
876
|
+
shiftlist,reducelist=result.partition{|res|
|
877
|
+
DottedRule===res or
|
878
|
+
Conditional===res && DottedRule===res.action
|
879
|
+
}
|
880
|
+
|
881
|
+
#if no reducers at all, just try (multi?)shift
|
882
|
+
return result=shiftlist2multishift?( shiftlist,parser )if reducelist.empty?
|
883
|
+
|
884
|
+
#line up reducers by priority
|
885
|
+
actions=reducelist \
|
886
|
+
.sort_by{|rule| -rule.priority }
|
887
|
+
# .map{|rule| rule.action }
|
888
|
+
#actions is +[(Rule|Conditional[Rule]).*]
|
889
|
+
action=actions.shift #this first (unless conditional)
|
890
|
+
#action is Rule|Conditional[Rule]
|
891
|
+
result=
|
892
|
+
case action.action
|
893
|
+
when :error; return :error
|
894
|
+
when Class, StackMonkey
|
895
|
+
action
|
896
|
+
when :accept
|
897
|
+
:accept
|
898
|
+
when :shift #this counts as a reduce at this point, but it writes shift instructions
|
899
|
+
shiftlist2multishift? shiftlist,parser
|
900
|
+
when Rule #oy, vey, was a Conditional
|
901
|
+
shiftaction=shiftlist2multishift?(shiftlist,parser)
|
902
|
+
fail unless Rule===action.action
|
903
|
+
case action.action.action
|
904
|
+
when :error; huh
|
905
|
+
when :shift, StackMonkey, :accept, Class #MultiReduce
|
906
|
+
first_fixed_index=actions.size
|
907
|
+
#actions is +[(Rule|Conditional[Rule]).*]
|
908
|
+
actions.each_with_index{|act,i|
|
909
|
+
break first_fixed_index=i unless Conditional===act
|
910
|
+
}
|
911
|
+
condactions=actions[0...first_fixed_index].unshift(action)
|
912
|
+
condactions=condactions.inject([]){|sum,cond|
|
913
|
+
act=cond.action
|
914
|
+
act=shiftaction if act==:shift #=>shiftlist?
|
915
|
+
sum.push cond.condition, act
|
916
|
+
}
|
917
|
+
#possible optimization: one or more :shift right at end could be ignored
|
918
|
+
if actions[first_fixed_index]
|
919
|
+
action=actions[first_fixed_index].action
|
920
|
+
else
|
921
|
+
action=shiftaction
|
922
|
+
end
|
923
|
+
MultiReduce.new condactions,action #=>shiftlist?
|
924
|
+
else fail
|
925
|
+
end
|
926
|
+
else fail "#{action} not expected here"
|
927
|
+
end
|
928
|
+
#stack monkeys/:accept are treated like reduce here
|
929
|
+
ensure
|
930
|
+
assert ACTION_PATTERN===result
|
931
|
+
end
|
932
|
+
|
933
|
+
def name
|
934
|
+
@name||@dotteds.map{|dotted| dotted.name}.join(",")
|
935
|
+
end
|
936
|
+
attr_writer :name
|
937
|
+
|
938
|
+
def rename(name2count)
|
939
|
+
return @name if defined? @name
|
940
|
+
name=most_prominent_members.map{|dotted| dotted.name}.join(",")
|
941
|
+
if name2count[name]
|
942
|
+
name2count[name]+=1
|
943
|
+
name+="___"+name2count[name].to_s
|
944
|
+
else
|
945
|
+
name2count[name]=1
|
946
|
+
end
|
947
|
+
|
948
|
+
@name=name
|
949
|
+
end
|
950
|
+
|
951
|
+
def most_prominent_members
|
952
|
+
result=@dotteds.select{|dr| dr.pos==@dotteds.first.pos }
|
953
|
+
close2end=@dotteds.map{|dr| [dr,dr.rule.patterns.size-dr.pos]}.sort_by{|(o,k)| -k}
|
954
|
+
result+=close2end.select{|(dr,k)| k==close2end.first.last}.map{|(dr,k)| dr}
|
955
|
+
result2=result.reject{|dr| dr.pos==0 or dr.pos==1&&dr.rule.lookback?}
|
956
|
+
result=result2 unless result2.empty?
|
957
|
+
return result
|
958
|
+
end
|
959
|
+
|
960
|
+
def hash
|
961
|
+
-@dotteds.hash
|
962
|
+
end
|
963
|
+
def == other
|
964
|
+
ParserState===other and
|
965
|
+
@dotteds==other.dotteds
|
966
|
+
end
|
967
|
+
alias eql? ==
|
968
|
+
|
969
|
+
def looping?
|
970
|
+
@dotteds.any?{|dotted| dotted.looping? }
|
971
|
+
end
|
972
|
+
|
973
|
+
def transition_to_loop? input #not used
|
974
|
+
action=@actions.input
|
975
|
+
case action
|
976
|
+
when :error; false
|
977
|
+
when ParserState; action.looping? and action!=self
|
978
|
+
when MultiShift,MultiReduce;
|
979
|
+
action.transition_to_loop? input
|
980
|
+
else fail
|
981
|
+
end
|
982
|
+
end
|
983
|
+
|
984
|
+
def make_sr_goto_tables inputs
|
985
|
+
name2exemplar={}
|
986
|
+
inputs.each{|i| name2exemplar[i.name]=i }
|
987
|
+
|
988
|
+
@goto={}; @sr={}
|
989
|
+
goto_counts=Hash.new(0); sr_counts=Hash.new(0)
|
990
|
+
actions.each_pair{|k,v|
|
991
|
+
if Node===name2exemplar[k]
|
992
|
+
@goto[k]=v
|
993
|
+
goto_counts[v]+=1
|
994
|
+
else
|
995
|
+
assert(Token===name2exemplar[k])
|
996
|
+
@sr[k]=v
|
997
|
+
sr_counts[v]+=1
|
998
|
+
end
|
999
|
+
}
|
1000
|
+
dflt=goto_counts.sort_by{|v,c| c}.last[0]
|
1001
|
+
@goto.delete_if{|k,v| v==dflt }
|
1002
|
+
@goto.default=dflt
|
1003
|
+
|
1004
|
+
dflt=sr_counts.sort_by{|v,c| c}.last[0]
|
1005
|
+
@sr.delete_if{|k,v| v==dflt }
|
1006
|
+
@sr.default=dflt
|
1007
|
+
|
1008
|
+
@actions=nil
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
class MultiReduce
|
1014
|
+
def initialize(list,default)
|
1015
|
+
@list,@default=list,default
|
1016
|
+
#default can be any valid action (except another MultiReduce)
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
attr_reader :list,:default
|
1020
|
+
|
1021
|
+
def act(x)
|
1022
|
+
(0...@list.size).step(2){|i|
|
1023
|
+
return @list[i+1] if @list[i]===x
|
1024
|
+
}
|
1025
|
+
return default
|
1026
|
+
end
|
1027
|
+
|
1028
|
+
def substates
|
1029
|
+
if @default.respond_to? :substates
|
1030
|
+
@default.substates
|
1031
|
+
else
|
1032
|
+
[]
|
1033
|
+
end
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
def actions
|
1037
|
+
result=[]
|
1038
|
+
(1...@list.size).step(2){|i|
|
1039
|
+
result << @list[i]
|
1040
|
+
}
|
1041
|
+
if @default.respond_to? :actions
|
1042
|
+
result.concat @default.actions
|
1043
|
+
elsif @default
|
1044
|
+
result<<@default
|
1045
|
+
end
|
1046
|
+
result
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
def transition_to_loop? input #not used
|
1050
|
+
@default.transition_to_loop? input
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
def hash
|
1054
|
+
@list.hash^~@default.hash
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
def == other
|
1058
|
+
@list==other.list and @default==other.default
|
1059
|
+
end
|
1060
|
+
alias eql? ==
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
class MultiShift
|
1064
|
+
def initialize(base,modifiers)
|
1065
|
+
@base,@modifiers=base,modifiers
|
1066
|
+
@map=
|
1067
|
+
(0...2**(modifiers.size/2)).map{|i| base.dup}
|
1068
|
+
@map.each_with_index{|state,i| #for each branch to the multishift
|
1069
|
+
(0...modifiers.size).step(2){|j| #for each predicate in the multishift
|
1070
|
+
if (i&(1<<j)).non_zero? #if the predicate tests true in this branch
|
1071
|
+
state.append modifiers[j+1] #add the predicates modifier to the state
|
1072
|
+
end
|
1073
|
+
}
|
1074
|
+
state.sort_substates!
|
1075
|
+
}
|
1076
|
+
end
|
1077
|
+
|
1078
|
+
def act(x)
|
1079
|
+
result=0
|
1080
|
+
(0...@modifiers.size).step(2){|i|
|
1081
|
+
result|=(1<<(i/2)) if @modifiers[i]===x
|
1082
|
+
}
|
1083
|
+
@map[result]
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
attr_reader :map, :modifiers
|
1087
|
+
|
1088
|
+
def substates
|
1089
|
+
@map.dup
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
def actions
|
1093
|
+
@map.dup
|
1094
|
+
end
|
1095
|
+
|
1096
|
+
def transition_to_loop? input #not used
|
1097
|
+
huh
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
def hash
|
1101
|
+
huh
|
1102
|
+
end
|
1103
|
+
def == other
|
1104
|
+
huh
|
1105
|
+
end
|
1106
|
+
alias eql? ==
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
#an action is one of:
|
1110
|
+
#a ParserState (shift)
|
1111
|
+
#a Rule (reduce)
|
1112
|
+
#nil (error)
|
1113
|
+
#:accept
|
1114
|
+
#MultiReduce
|
1115
|
+
#MultiShift
|
1116
|
+
|
1117
|
+
#just the left side (the stack/lookahead matchers)
|
1118
|
+
def LEFT
|
1119
|
+
# require 'md5'
|
1120
|
+
@rules=expanded_RULES()
|
1121
|
+
# p MD5.new(@rules).to_s
|
1122
|
+
@rules.map{|r| r.left.subregs }.flatten
|
1123
|
+
end
|
1124
|
+
|
1125
|
+
#remove lookahead and lookback decoration (not used?)
|
1126
|
+
def LEFT_NO_LOOKING
|
1127
|
+
l=LEFT()
|
1128
|
+
l.map!{|m|
|
1129
|
+
case m #
|
1130
|
+
when Reg::LookAhead,Reg::LookBack; fail #should be gone already now
|
1131
|
+
when Proc; []
|
1132
|
+
else m #
|
1133
|
+
end #
|
1134
|
+
}
|
1135
|
+
l
|
1136
|
+
end
|
1137
|
+
|
1138
|
+
def child_relations_among(*classes)
|
1139
|
+
classes.unshift Object
|
1140
|
+
result={}
|
1141
|
+
classes.each{|klass| result[klass]=[] }
|
1142
|
+
|
1143
|
+
#p classes
|
1144
|
+
classes.each{|klass|
|
1145
|
+
anclist=klass.ancestors
|
1146
|
+
anclist.shift==klass or fail
|
1147
|
+
anclist.each{|anc|
|
1148
|
+
if anc=result[anc]
|
1149
|
+
anc << klass
|
1150
|
+
break
|
1151
|
+
end
|
1152
|
+
}
|
1153
|
+
}
|
1154
|
+
|
1155
|
+
return result
|
1156
|
+
end
|
1157
|
+
|
1158
|
+
#all classes mentioned in rules, on left and right sides
|
1159
|
+
def STACKABLE_CLASSES #
|
1160
|
+
return @sc_result if defined? @sc_result
|
1161
|
+
@sc_result=[]
|
1162
|
+
@subclasses_of=child_relations_among(*vertices)
|
1163
|
+
# @sc_result=false
|
1164
|
+
l=LEFT()
|
1165
|
+
l=l.map{|lm| sc_juice lm}.flatten.compact
|
1166
|
+
assert l.grep(nil).empty?
|
1167
|
+
r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
|
1168
|
+
result=l+r
|
1169
|
+
@subclasses_of=nil
|
1170
|
+
@sc_result.replace result.grep(Class).uniq
|
1171
|
+
fail if @sc_result.empty?
|
1172
|
+
return @sc_result
|
1173
|
+
end
|
1174
|
+
|
1175
|
+
# def juice(m)
|
1176
|
+
# case m #
|
1177
|
+
# when Class
|
1178
|
+
# return [m] unless @subclasses_of
|
1179
|
+
# result=[m] # and subclasses too
|
1180
|
+
# i=0
|
1181
|
+
# while item=result[i]
|
1182
|
+
# p item
|
1183
|
+
# result.concat @subclasses_of[item] rescue nil
|
1184
|
+
# i += 1
|
1185
|
+
# end
|
1186
|
+
# result
|
1187
|
+
# when String,Regexp; juice(RedParse.KW(m))
|
1188
|
+
# when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
|
1189
|
+
# when Reg::Or; m.subregs.map &method(:juice)
|
1190
|
+
# when Reg::Not
|
1191
|
+
# m=m.subregs[0]
|
1192
|
+
# if Class===m or (Reg::Or===m and
|
1193
|
+
# m.subregs.find{|x| Class===x })
|
1194
|
+
# juice(m)
|
1195
|
+
# else []
|
1196
|
+
# end
|
1197
|
+
# else []
|
1198
|
+
# end
|
1199
|
+
# end
|
1200
|
+
|
1201
|
+
def sc_juice(m)
|
1202
|
+
case m #
|
1203
|
+
when Class; [m]
|
1204
|
+
when String,Regexp; [KeywordToken]
|
1205
|
+
when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
|
1206
|
+
when Reg::Or; m.subregs.map(&method(:sc_juice))
|
1207
|
+
when Reg::Not; sc_juice(m.subregs[0])
|
1208
|
+
when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
|
1209
|
+
when Reg::Repeat; sc_juice(m.subregs[0])
|
1210
|
+
else []
|
1211
|
+
end
|
1212
|
+
end
|
1213
|
+
|
1214
|
+
def unruly_rules
|
1215
|
+
return @unruly_rules if defined? @unruly_rules
|
1216
|
+
|
1217
|
+
@unruly_rules=
|
1218
|
+
all_rules.select{|rule| rule.unruly? }
|
1219
|
+
|
1220
|
+
p :unruly_rules
|
1221
|
+
pp @unruly_rules.map{|r| r.name}
|
1222
|
+
|
1223
|
+
@unruly_rules
|
1224
|
+
end
|
1225
|
+
|
1226
|
+
def enumerate_exemplars
|
1227
|
+
return @@exemplars if defined? @@exemplars #dunno why this is necessary
|
1228
|
+
|
1229
|
+
result= STACKABLE_CLASSES() \
|
1230
|
+
.map{|sc| sc.enumerate_exemplars } \
|
1231
|
+
.inject{|sum,sc| sum+sc}
|
1232
|
+
|
1233
|
+
result.map!{|sc|
|
1234
|
+
res=sc.shift.allocate
|
1235
|
+
until sc.empty?
|
1236
|
+
eval "def res.#{sc.shift}; #{sc.shift.inspect} end"
|
1237
|
+
end
|
1238
|
+
def res.to_s; identity_name end
|
1239
|
+
res
|
1240
|
+
}
|
1241
|
+
|
1242
|
+
return @@exemplars=result
|
1243
|
+
end
|
1244
|
+
|
1245
|
+
def check_for_parsealike_inputs
|
1246
|
+
all_patterns=all_rules.map{|r| r.patterns.map{|rp| Reg::Repeat===rp and rp=rp.subregs[0]; rp }}.flatten.uniq
|
1247
|
+
seen={}
|
1248
|
+
@identity_name_aliases={}
|
1249
|
+
warn "why are non_empty and after_equals params to BeginNode appearently ignored?"
|
1250
|
+
warn "some token identities overlap themselves?!?"
|
1251
|
+
warn "some overlaps are duplicated"
|
1252
|
+
warn ". and :: overlap => ..... surely that's not right"
|
1253
|
+
@inputs.map{|input|
|
1254
|
+
profile=all_patterns.map{|pat| Proc===pat ? pat : !!(pat===input)}
|
1255
|
+
if seen[profile]
|
1256
|
+
puts "#{input} overlaps #{seen[profile]}"
|
1257
|
+
@identity_name_aliases[seen[profile]]=input
|
1258
|
+
nil
|
1259
|
+
else
|
1260
|
+
seen[profile]=input
|
1261
|
+
end
|
1262
|
+
}.compact
|
1263
|
+
end
|
1264
|
+
|
1265
|
+
def enumerate_states
|
1266
|
+
inputs=check_for_parsealike_inputs
|
1267
|
+
inputs.reject!{|x| StartToken===x}
|
1268
|
+
|
1269
|
+
result=[]
|
1270
|
+
todo=[start_state]
|
1271
|
+
|
1272
|
+
seenlist = {}
|
1273
|
+
seenlist.default=:dunno_yet
|
1274
|
+
|
1275
|
+
j=0
|
1276
|
+
start=was=Time.now
|
1277
|
+
in_result={} #this should go away; obsoleted by @states
|
1278
|
+
state_num=-1
|
1279
|
+
todo.each{|st| in_result[st]=(state_num+=1) }
|
1280
|
+
ps=todo.first
|
1281
|
+
pp [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
|
1282
|
+
old_todo_size=todo.size
|
1283
|
+
while state=todo.shift
|
1284
|
+
result<<state
|
1285
|
+
|
1286
|
+
i=0
|
1287
|
+
inputs.each {|input|
|
1288
|
+
newstate=state.evolve input,self,seenlist
|
1289
|
+
assert ACTION_PATTERN===newstate
|
1290
|
+
#newstate is ParserState|MultiShift|MultiReduce|Rule|:accept|:error
|
1291
|
+
state[input.identity_name]=newstate
|
1292
|
+
next unless newstate.respond_to? :substates
|
1293
|
+
#newstate.substates is just [newstate] for plain ParserStates
|
1294
|
+
morestates=newstate.substates.reject{|x| in_result[x]}
|
1295
|
+
morestates.each{|st| in_result[st]=(state_num+=1) }
|
1296
|
+
# p [in_result[state],:+,input.identity_name,:>>,pretty(newstate,in_result)]
|
1297
|
+
todo.concat morestates
|
1298
|
+
|
1299
|
+
# pp morestates.map{|ps|
|
1300
|
+
# [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
|
1301
|
+
# }
|
1302
|
+
# pp pretty(newstate,in_result) unless ParserState===newstate
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
now=Time.now
|
1306
|
+
p [:*,j+=1,todo.size,todo.size-old_todo_size,now-was,j/(now-start),(100.0*j/(j+todo.size)).to_i]
|
1307
|
+
old_todo_size=todo.size
|
1308
|
+
was=now
|
1309
|
+
|
1310
|
+
# if state.actions.values.uniq==[:error]
|
1311
|
+
#this can happen when the only dotted rule is for an :error
|
1312
|
+
#maybe this case can be optimized?
|
1313
|
+
# end
|
1314
|
+
end
|
1315
|
+
self.rmd_cache=nil
|
1316
|
+
self.oc_cache=nil
|
1317
|
+
self.sl2ms_cache=nil
|
1318
|
+
return result
|
1319
|
+
end
|
1320
|
+
|
1321
|
+
def pretty(x,in_result)
|
1322
|
+
case x
|
1323
|
+
when ParserState; in_result[x]
|
1324
|
+
when MultiReduce
|
1325
|
+
pairs=x.list.dup
|
1326
|
+
result=[]
|
1327
|
+
until pairs.empty?
|
1328
|
+
cond,act,*pairs=*pairs
|
1329
|
+
cond = cond.inspect
|
1330
|
+
result<<[cond,pretty(act.action,in_result)]
|
1331
|
+
end
|
1332
|
+
result<<pretty(x.default,in_result)
|
1333
|
+
result.unshift :MultiReduce
|
1334
|
+
when MultiShift
|
1335
|
+
h={}
|
1336
|
+
mods=x.modifiers
|
1337
|
+
its=[]
|
1338
|
+
(0...mods.size).step(2){|i| its<<mods[i] }
|
1339
|
+
x.map.each_with_index{|xx,i| h[i]=pretty(xx) }
|
1340
|
+
[:MultiShift, its,h]
|
1341
|
+
when Class; x.name
|
1342
|
+
when StackMonkey; x.name
|
1343
|
+
when :accept,:error; x
|
1344
|
+
else fail "not a valid action: #{x}"
|
1345
|
+
end
|
1346
|
+
end
|
1347
|
+
|
1348
|
+
attr_accessor :inputs
|
1349
|
+
|
1350
|
+
def all_states
|
1351
|
+
return @all_states if defined? @all_states
|
1352
|
+
@all_states=enumerate_states
|
1353
|
+
end
|
1354
|
+
|
1355
|
+
def exemplars_that_match p
|
1356
|
+
@inputs.grep p
|
1357
|
+
end
|
1358
|
+
|
1359
|
+
def pattern_matches_nodes? p
|
1360
|
+
!@inputs.grep(Node&p).empty?
|
1361
|
+
end
|
1362
|
+
|
1363
|
+
def pattern_matches_tokens? p
|
1364
|
+
!@inputs.grep(Token&p).empty?
|
1365
|
+
end
|
1366
|
+
|
1367
|
+
def identity_name_alias? name
|
1368
|
+
alias_=@identity_name_aliases[name]
|
1369
|
+
return( alias_||name )
|
1370
|
+
end
|
1371
|
+
|
1372
|
+
def compile
|
1373
|
+
oldparser=Thread.current[:$RedParse_parser]
|
1374
|
+
Thread.current[:$RedParse_parser]||=self
|
1375
|
+
|
1376
|
+
if File.exist?("cached_parse_tables.drb")
|
1377
|
+
dup=Marshal.load(f=open("cached_parse_tables.drb","rb"))
|
1378
|
+
instance_variables.each{|var| remove_instance_variable var }
|
1379
|
+
extend SingleForwardable
|
1380
|
+
def_singleton_delegators(dup,public_methods+private_methods+protected_methods)
|
1381
|
+
|
1382
|
+
self.inputs=enumerate_exemplars
|
1383
|
+
else
|
1384
|
+
@generating_parse_tables=true
|
1385
|
+
@inputs||=enumerate_exemplars
|
1386
|
+
|
1387
|
+
states=all_states
|
1388
|
+
# @rules=expanded_RULES
|
1389
|
+
@inputs=nil #Marshal no like it
|
1390
|
+
|
1391
|
+
begin
|
1392
|
+
p :dumping
|
1393
|
+
Marshal.dump(self,f=open("cached_parse_tables.drb","wb"))
|
1394
|
+
p :dump_done!
|
1395
|
+
rescue Exception
|
1396
|
+
p :dump_failed
|
1397
|
+
File.unlink "cached_parse_tables.drb"
|
1398
|
+
ensure
|
1399
|
+
@inputs=enumerate_exemplars
|
1400
|
+
end
|
1401
|
+
end
|
1402
|
+
f.close
|
1403
|
+
|
1404
|
+
#look for unused dotted rules and actions
|
1405
|
+
#also states with drs past the end
|
1406
|
+
past_end=0
|
1407
|
+
drs=all_dotted_rules
|
1408
|
+
dr_count=Hash.new(0)
|
1409
|
+
acts=all_rules#.map{|r| r.action }.uniq
|
1410
|
+
act_count=Hash.new(0)
|
1411
|
+
states.each{|state|
|
1412
|
+
state.dotteds.each{|dr|
|
1413
|
+
dr_count[dr]+=1
|
1414
|
+
past_end+=1 if dr.pos>=dr.rule.patterns.size
|
1415
|
+
}
|
1416
|
+
sav=state.actions.values
|
1417
|
+
sav.grep(Class|StackMonkey).each{|act| act_count[act.__id__]+=1 }
|
1418
|
+
sav.grep(MultiReduce|MultiShift).each{|multi| multi.actions.each{|act| act_count[act.__id__]+=1} }
|
1419
|
+
#p state.name if state.dotteds.select{|dr| dr.rule.action==BeginNode}
|
1420
|
+
}
|
1421
|
+
puts "#{past_end} dotted rules found past the end of their rule" if past_end>0
|
1422
|
+
nevers=0
|
1423
|
+
drs.each{|dr|
|
1424
|
+
next unless dr_count[dr].zero?
|
1425
|
+
puts "never reached #{dr.name}"
|
1426
|
+
nevers+=1
|
1427
|
+
}
|
1428
|
+
puts "#{nevers} dotted rules were never reached (out of #{drs.size})"
|
1429
|
+
nevers=0
|
1430
|
+
acts.each{|act|
|
1431
|
+
next unless act_count[act.__id__].zero?
|
1432
|
+
puts "never reached #{act.name rescue act}"
|
1433
|
+
nevers+=1
|
1434
|
+
}
|
1435
|
+
puts "#{nevers} actions were never reached (out of #{acts.size})"
|
1436
|
+
p :most_popular_nontrivial_drs
|
1437
|
+
pp dr_count.reject{|(dr,n)| dr.pos.zero? or dr.pos==1 && dr.rule.lookback?} \
|
1438
|
+
.sort_by{|(dr,n)| n}[-15..-1].map{|(dr,n)| [dr.name,n] }
|
1439
|
+
|
1440
|
+
#look for duplicate states
|
1441
|
+
actions2state={}
|
1442
|
+
dup_states=0
|
1443
|
+
states.each{|st|
|
1444
|
+
cache=actions2state[st.actions]
|
1445
|
+
if cache
|
1446
|
+
st.equivalent_to=cache
|
1447
|
+
dup_states+=1
|
1448
|
+
else
|
1449
|
+
actions2state[st.actions]=st
|
1450
|
+
end
|
1451
|
+
}
|
1452
|
+
puts "#{dup_states} duplicate states" if dup_states.nonzero?
|
1453
|
+
|
1454
|
+
name2count={}
|
1455
|
+
states.each{|state| state.rename(name2count) }
|
1456
|
+
|
1457
|
+
#divide each state's actions into sr and goto tables
|
1458
|
+
#also scan states for the most common sr and goto actions and make them default
|
1459
|
+
states.each{|state| state.make_sr_goto_tables @inputs}
|
1460
|
+
|
1461
|
+
|
1462
|
+
# pp states
|
1463
|
+
# pp states.size
|
1464
|
+
|
1465
|
+
generate_c $stdout
|
1466
|
+
return self
|
1467
|
+
ensure
|
1468
|
+
remove_instance_variable :@generating_parse_tables rescue nil
|
1469
|
+
Thread.current[:$RedParse_parser]=oldparser
|
1470
|
+
end
|
1471
|
+
|
1472
|
+
def ultimate_goal_nodes
|
1473
|
+
result=[]
|
1474
|
+
all_rules.each{|rule|
|
1475
|
+
if rule.patterns.size==0 and
|
1476
|
+
rule.patterns.first==StartToken and
|
1477
|
+
rule.patterns.last==EoiToken
|
1478
|
+
result << juice(rule.patterns[1])
|
1479
|
+
end
|
1480
|
+
}
|
1481
|
+
result.flatten!
|
1482
|
+
return result
|
1483
|
+
end
|
1484
|
+
|
1485
|
+
|
1486
|
+
# def start_state
|
1487
|
+
# goal=ultimate_goal_nodes
|
1488
|
+
# result=all_rules.select{|rule|
|
1489
|
+
# rt=rule.reduces_to and
|
1490
|
+
# !goal.select{|node| node>=rt}.empty?
|
1491
|
+
# }
|
1492
|
+
# result.map!{|rule| DottedRule.create(rule,0,parser)}
|
1493
|
+
#
|
1494
|
+
# result=ParserState.new result
|
1495
|
+
# result.name="start_state"
|
1496
|
+
# result
|
1497
|
+
# end
|
1498
|
+
|
1499
|
+
def new_state(drs,unruly_also=false)
|
1500
|
+
result=ParserState.new drs,@states.size
|
1501
|
+
result.perhaps_also_allow all_rules,self
|
1502
|
+
cache=@states[result]
|
1503
|
+
return cache if cache
|
1504
|
+
@states[result]=@states.size
|
1505
|
+
return result
|
1506
|
+
end
|
1507
|
+
|
1508
|
+
def initial_state
|
1509
|
+
@states={}
|
1510
|
+
all_initial_dotted_rules #is this still needed?
|
1511
|
+
result=new_state all_rules.map{|r| DottedRule.create(r,0,self)}
|
1512
|
+
result.name="initial"
|
1513
|
+
#result.perhaps_also_allow all_rules,self #silly here
|
1514
|
+
result
|
1515
|
+
end
|
1516
|
+
|
1517
|
+
attr_reader :states
|
1518
|
+
|
1519
|
+
def start_state
|
1520
|
+
seenlist = {}
|
1521
|
+
seenlist.default=:dunno_yet
|
1522
|
+
result=initial_state.evolve StartToken.new, self,seenlist
|
1523
|
+
result.perhaps_also_allow all_rules,self
|
1524
|
+
result.name="start"
|
1525
|
+
result
|
1526
|
+
#pp [:initial_seenlist, seenlist]
|
1527
|
+
#ensure p :/
|
1528
|
+
end
|
1529
|
+
|
1530
|
+
module NamedConstant
|
1531
|
+
attr_accessor :constant_name
|
1532
|
+
def inspect; constant_name end
|
1533
|
+
end
|
1534
|
+
def self.inspect_constant_names
|
1535
|
+
constants.each{|kn|
|
1536
|
+
k=const_get(kn)
|
1537
|
+
next if Class|Module|Numeric|Symbol|true|false|nil===k
|
1538
|
+
k.extend NamedConstant
|
1539
|
+
k.constant_name=kn
|
1540
|
+
}
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
def undumpables
|
1544
|
+
return @undumpables if @undumpables
|
1545
|
+
@rules||=expanded_RULES
|
1546
|
+
n=-1
|
1547
|
+
@undumpables={}
|
1548
|
+
abortable_graphwalk(@rules){|cntr,o,i,ty|
|
1549
|
+
!case o
|
1550
|
+
when StackMonkey
|
1551
|
+
@undumpables[o.name]=o
|
1552
|
+
when Reg::Deferred
|
1553
|
+
@undumpables[n+=1]=o
|
1554
|
+
class<<o
|
1555
|
+
attr_accessor :undump_key
|
1556
|
+
end
|
1557
|
+
o.undump_key=n
|
1558
|
+
end
|
1559
|
+
}
|
1560
|
+
end
|
1561
|
+
|
1562
|
+
class ::Proc #hack hack hack
|
1563
|
+
#only define hacky _dump if one isn't defined already
|
1564
|
+
unless Proc.new{}.respond_to? :_dump or
|
1565
|
+
Proc.new{}.respond_to? :marshal_dump or
|
1566
|
+
(Marshal.dump(proc{}) rescue false)
|
1567
|
+
def _dump depth
|
1568
|
+
undump_key.to_s
|
1569
|
+
end
|
1570
|
+
def self._load str
|
1571
|
+
Thread.current[:$RedParse_parser].undumpables[str.to_i]
|
1572
|
+
end
|
1573
|
+
end
|
1574
|
+
end
|
1575
|
+
|
1576
|
+
=begin disabled, uses too much memory!!
|
1577
|
+
class MarshalProxy
|
1578
|
+
def initialize(key)
|
1579
|
+
@key=key
|
1580
|
+
end
|
1581
|
+
attr :key
|
1582
|
+
end
|
1583
|
+
|
1584
|
+
#convert unmarshalables, such as stackmonkeys into proxies
|
1585
|
+
def proxify
|
1586
|
+
n=-1
|
1587
|
+
seen={}
|
1588
|
+
mkproxy=proc{|cntr,o,i,ty,useit|
|
1589
|
+
case o
|
1590
|
+
when StackMonkey
|
1591
|
+
useit[0]=true
|
1592
|
+
seen[o.__id__]||=MarshalProxy.new(o.name)
|
1593
|
+
when Reg::Deferred
|
1594
|
+
useit[0]=true
|
1595
|
+
seen[o.__id__]||=MarshalProxy.new(n+=1)
|
1596
|
+
end
|
1597
|
+
}
|
1598
|
+
Ron::GraphWalk.graphmodify!(@rules,&mkproxy)
|
1599
|
+
Ron::GraphWalk.graphmodify!(self,&mkproxy)
|
1600
|
+
|
1601
|
+
end
|
1602
|
+
|
1603
|
+
def _dump depth
|
1604
|
+
fail unless @rules
|
1605
|
+
proxify
|
1606
|
+
ivs=instance_variables
|
1607
|
+
a=ivs+ivs.reverse.map{|var| instance_variable_get var }
|
1608
|
+
result=Marshal.dump(a,depth)
|
1609
|
+
unproxify
|
1610
|
+
return result
|
1611
|
+
end
|
1612
|
+
|
1613
|
+
#convert marshal proxies back to the real thing
|
1614
|
+
def unproxify
|
1615
|
+
#build a lookup table for unmarshalables by walking @rules
|
1616
|
+
@rules||=expanded_RULES
|
1617
|
+
n=-1;lookup={}
|
1618
|
+
Ron::GraphWalk.graphwalk(@rules){|cntr,o,i,ty|
|
1619
|
+
case o
|
1620
|
+
when StackMonkey
|
1621
|
+
lookup[o.name]=o
|
1622
|
+
when Reg::Deferred
|
1623
|
+
lookup[n+=1]=o
|
1624
|
+
end
|
1625
|
+
}
|
1626
|
+
|
1627
|
+
Ron::GraphWalk.graphmodify!(self){|cntr,o,i,ty,useit|
|
1628
|
+
if MarshalProxy===o
|
1629
|
+
useit[0]=true
|
1630
|
+
lookup[o.key]
|
1631
|
+
end
|
1632
|
+
}
|
1633
|
+
end
|
1634
|
+
|
1635
|
+
def self._load(str,*more)
|
1636
|
+
result=allocate
|
1637
|
+
a=Marshal.load(str,*more)
|
1638
|
+
|
1639
|
+
result.unproxify
|
1640
|
+
|
1641
|
+
(0...a.size/2).each{|i| result.instance_variable_set a[i],a[-i] }
|
1642
|
+
return result
|
1643
|
+
end
|
1644
|
+
=end
|
1645
|
+
|
1646
|
+
end
|
1647
|
+
|
1648
|
+
|