tokn 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.txt +194 -0
- data/bin/tokncompile +16 -0
- data/bin/toknprocess +26 -0
- data/figures/sample_dfa.pdf +0 -0
- data/lib/tokn/code_set.rb +392 -0
- data/lib/tokn/dfa.rb +196 -0
- data/lib/tokn/dfa_builder.rb +261 -0
- data/lib/tokn/range_partition.rb +233 -0
- data/lib/tokn/reg_parse.rb +379 -0
- data/lib/tokn/state.rb +320 -0
- data/lib/tokn/token_defn_parser.rb +156 -0
- data/lib/tokn/tokenizer.rb +211 -0
- data/lib/tokn/tokn_const.rb +29 -0
- data/lib/tokn/tools.rb +186 -0
- data/lib/tokn.rb +1 -0
- data/test/data/sampletext.txt +11 -0
- data/test/data/sampletokens.txt +32 -0
- data/test/simple.rb +33 -0
- data/test/test.rb +519 -0
- data/test/testcmds +4 -0
- metadata +69 -0
data/lib/tokn/dfa.rb
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'json'
|
2
|
+
require_relative 'tools'
|
3
|
+
req('code_set state')
|
4
|
+
|
5
|
+
|
6
|
+
# A DFA for tokenizing; includes pointer to a start state, and
|
7
|
+
# a list of token names
|
8
|
+
#
|
9
|
+
class DFA
|
10
|
+
|
11
|
+
include Tokn
|
12
|
+
|
13
|
+
# Compile a Tokenizer DFA from a token definition script.
|
14
|
+
# If persistPath is not null, it first checks if the file exists and
|
15
|
+
# if so, assumes it contains (in JSON form) a previously compiled
|
16
|
+
# DFA matching this script, and reads the DFA from it.
|
17
|
+
# Second, if no such file exists, it writes the DFA to it after compilation.
|
18
|
+
#
|
19
|
+
def self.dfa_from_script(script, persistPath = nil)
|
20
|
+
|
21
|
+
if persistPath and File.exist?(persistPath)
|
22
|
+
return extractDFA(readTextFile(persistPath))
|
23
|
+
end
|
24
|
+
|
25
|
+
req('token_defn_parser')
|
26
|
+
|
27
|
+
td = TokenDefParser.new(script)
|
28
|
+
dfa = td.dfa
|
29
|
+
|
30
|
+
if persistPath
|
31
|
+
writeTextFile(persistPath, dfa.serialize())
|
32
|
+
end
|
33
|
+
|
34
|
+
dfa
|
35
|
+
end
|
36
|
+
|
37
|
+
# Similar to dfa_from_script, but reads the script into memory from
|
38
|
+
# the file at scriptPath.
|
39
|
+
#
|
40
|
+
def self.dfa_from_script_file(scriptPath, persistPath = nil)
|
41
|
+
self.dfa_from_script(readTextFile(scriptPath), persistPath)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Compile a Tokenizer DFA from a text file (that contains a
|
45
|
+
# JSON string)
|
46
|
+
#
|
47
|
+
def self.dfa_from_file(path)
|
48
|
+
dfa_from_json(readTextFile(path))
|
49
|
+
end
|
50
|
+
|
51
|
+
# Compile a Tokenizer DFA from a JSON string
|
52
|
+
#
|
53
|
+
def self.dfa_from_json(jsonStr)
|
54
|
+
db = false
|
55
|
+
|
56
|
+
!db|| pr("\n\nextractDFA %s...\n",jsonStr)
|
57
|
+
|
58
|
+
h = JSON.parse(jsonStr)
|
59
|
+
|
60
|
+
tNames = h["tokens"]
|
61
|
+
stateInfo = h["states"]
|
62
|
+
|
63
|
+
!db|| pr("tokens=%s\n",d(tNames))
|
64
|
+
!db|| pr("stateInfo=\n%s\n",d(stateInfo))
|
65
|
+
|
66
|
+
st = []
|
67
|
+
stateInfo.each_with_index do |(key,val),i|
|
68
|
+
!db|| pr(" creating new state, id=%d\n",i)
|
69
|
+
st.push(State.new(i))
|
70
|
+
end
|
71
|
+
|
72
|
+
st.each do |s|
|
73
|
+
!db|| pr("proc state %s\n",d(s))
|
74
|
+
|
75
|
+
finalState, edgeList = stateInfo[s.id]
|
76
|
+
s.finalState = finalState
|
77
|
+
edgeList.each do |edge|
|
78
|
+
label,destState = edge
|
79
|
+
cr = CodeSet.new()
|
80
|
+
cr.setArray(label)
|
81
|
+
s.addEdge(cr, st[destState])
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
DFA.new(tNames, st[0])
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
attr_reader :startState, :tokenNames
|
90
|
+
|
91
|
+
# Construct a DFA, given a list of token names and a starting state.
|
92
|
+
#
|
93
|
+
def initialize(tokenNameList, startState)
|
94
|
+
@tokenNames = tokenNameList
|
95
|
+
@startState = startState
|
96
|
+
end
|
97
|
+
|
98
|
+
# Determine the name of a token, given its id.
|
99
|
+
# Returns <UNKNOWN> if its id is UNKNOWN_TOKEN, or <EOF> if
|
100
|
+
# the tokenId is nil. Otherwise, assumes tokenId is 0..n, where
|
101
|
+
# n is the number of token names in the DFA.
|
102
|
+
#
|
103
|
+
def tokenName(tokenId)
|
104
|
+
if !tokenId
|
105
|
+
nm = "<EOF>"
|
106
|
+
elsif tokenId == UNKNOWN_TOKEN
|
107
|
+
nm = "<UNKNOWN>"
|
108
|
+
else
|
109
|
+
if tokenId < 0 || tokenId >= tokenNames.size
|
110
|
+
raise IndexError, "No such token id: "+tokenId.to_s
|
111
|
+
end
|
112
|
+
nm = tokenNames[tokenId]
|
113
|
+
end
|
114
|
+
nm
|
115
|
+
end
|
116
|
+
|
117
|
+
# Serialize this DFA to a JSON string.
|
118
|
+
# The DFA in JSON form has this structure:
|
119
|
+
#
|
120
|
+
# {
|
121
|
+
# "tokens" => array of token names (strings)
|
122
|
+
# "states" => array of states, ordered by id (0,1,..)
|
123
|
+
# }
|
124
|
+
#
|
125
|
+
# Each state has this format:
|
126
|
+
# [ finalState (boolean),
|
127
|
+
# [edge0, edge1, ...]
|
128
|
+
# ]
|
129
|
+
#
|
130
|
+
# Edge:
|
131
|
+
# [label, destination id (integer)]
|
132
|
+
#
|
133
|
+
# Labels are arrays of integers, exactly the structure of
|
134
|
+
# a CodeSet array.
|
135
|
+
#
|
136
|
+
def serialize
|
137
|
+
|
138
|
+
h = {}
|
139
|
+
|
140
|
+
h["tokens"] = tokenNames
|
141
|
+
|
142
|
+
stateSet,_,_ = startState.reachableStates
|
143
|
+
|
144
|
+
idToStateMap = {}
|
145
|
+
stateSet.each do |st|
|
146
|
+
idToStateMap[st.id] = st
|
147
|
+
end
|
148
|
+
|
149
|
+
stateList = []
|
150
|
+
|
151
|
+
nextId = 0
|
152
|
+
idToStateMap.each_pair do |id, st|
|
153
|
+
if nextId != id
|
154
|
+
raise ArgumentError, "unexpected state ids"
|
155
|
+
end
|
156
|
+
nextId += 1
|
157
|
+
|
158
|
+
stateList.push(st)
|
159
|
+
end
|
160
|
+
|
161
|
+
if stateList.size == 0
|
162
|
+
raise ArgumentError, "bad states"
|
163
|
+
end
|
164
|
+
|
165
|
+
if stateList[0] != startState
|
166
|
+
raise ArgumentError, "bad start state"
|
167
|
+
end
|
168
|
+
|
169
|
+
stateInfo = []
|
170
|
+
stateList.each do |st|
|
171
|
+
stateInfo.push(stateToList(st))
|
172
|
+
end
|
173
|
+
h["states"] = stateInfo
|
174
|
+
|
175
|
+
JSON.generate(h)
|
176
|
+
end
|
177
|
+
|
178
|
+
private
|
179
|
+
|
180
|
+
def stateToList(state)
|
181
|
+
list = []
|
182
|
+
|
183
|
+
list.push(state.finalState?)
|
184
|
+
ed = []
|
185
|
+
state.edges.each do |lbl, dest|
|
186
|
+
edInfo = [lbl.array, dest.id]
|
187
|
+
ed.push(edInfo)
|
188
|
+
end
|
189
|
+
list.push(ed)
|
190
|
+
|
191
|
+
list
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
|
@@ -0,0 +1,261 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
req('tokn_const code_set state range_partition reg_parse')
|
3
|
+
|
4
|
+
# Converts NFAs (nondeterministic, finite state automata) to
|
5
|
+
# minimal DFAs.
|
6
|
+
#
|
7
|
+
# Performs the subset construction algorithm described in
|
8
|
+
# (among other placess) http://en.wikipedia.org/wiki/Powerset_construction
|
9
|
+
#
|
10
|
+
# Also implements an innovative algorithm to partition a set of
|
11
|
+
# edge labels into a set that has the property that no two elements
|
12
|
+
# have overlapping regions. This allows us to perform the subset construction
|
13
|
+
# (and closure operations) efficiently while supporting large possible character
|
14
|
+
# sets (e.g., unicode, which ranges from 0..0x10ffff. See RangePartition.rb
|
15
|
+
# for more details.
|
16
|
+
#
|
17
|
+
class DFABuilder
|
18
|
+
|
19
|
+
include Tokn
|
20
|
+
|
21
|
+
|
22
|
+
# Convert an NFA to a DFA.
|
23
|
+
#
|
24
|
+
# @param startState the start state of the NFA
|
25
|
+
# @param db if true, generates PDF files for debug purposes, showing various
|
26
|
+
# steps of the procedure
|
27
|
+
#
|
28
|
+
def self.nfa_to_dfa(startState, db = false)
|
29
|
+
|
30
|
+
!db || startState.generatePDF("original_nfa")
|
31
|
+
|
32
|
+
# Reverse this NFA, convert to DFA, then
|
33
|
+
# reverse it, and convert it again. Apparently this
|
34
|
+
# produces a minimal DFA.
|
35
|
+
|
36
|
+
rev = startState.reverseNFA()
|
37
|
+
!db || rev.generatePDF("reversed_nfa")
|
38
|
+
|
39
|
+
bld = DFABuilder.new(rev)
|
40
|
+
dfa = bld.build(true, false) # partition, but don't normalize
|
41
|
+
|
42
|
+
!db || dfa.generatePDF("reversed_dfa")
|
43
|
+
|
44
|
+
rev2 = dfa.reverseNFA()
|
45
|
+
bld = DFABuilder.new(rev2)
|
46
|
+
|
47
|
+
# Don't regenerate the partition; it is still valid
|
48
|
+
# for this second build process
|
49
|
+
#
|
50
|
+
dfa = bld.build(false, true) # don't partition, but do normalize
|
51
|
+
|
52
|
+
# If there are edges that contain more than one token identifier,
|
53
|
+
# remove all but the first (i.e. the one with the highest token id)
|
54
|
+
|
55
|
+
stSet, _, _ = dfa.reachableStates
|
56
|
+
stSet.each do |s|
|
57
|
+
s.edges.each do |lbl, dest|
|
58
|
+
a = lbl.array
|
59
|
+
if !a.size
|
60
|
+
next
|
61
|
+
end
|
62
|
+
|
63
|
+
primeId = a[0]
|
64
|
+
|
65
|
+
if primeId >= EPSILON-1
|
66
|
+
next
|
67
|
+
end
|
68
|
+
|
69
|
+
lbl.difference!(CodeSet.new(primeId+1, EPSILON))
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
!db || dfa.generatePDF("minimal_dfa")
|
74
|
+
|
75
|
+
dfa
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
# Constructs a builder object
|
81
|
+
#
|
82
|
+
def initialize(nfaStartState)
|
83
|
+
@nextId = 0
|
84
|
+
@nfaStart = nfaStartState
|
85
|
+
|
86
|
+
# Build a map of nfa state ids => nfa states
|
87
|
+
@nfaStateMap = {}
|
88
|
+
nfas, _, _ = @nfaStart.reachableStates
|
89
|
+
nfas.each {|s| @nfaStateMap[s.id] = s}
|
90
|
+
|
91
|
+
# Initialize an array of nfa state lists, indexed by dfa state id
|
92
|
+
@nfaStateLists = []
|
93
|
+
|
94
|
+
# Map of existing DFA states; key is array of NFA state ids
|
95
|
+
@dfaStateMap = {}
|
96
|
+
end
|
97
|
+
|
98
|
+
# Perform the build algorithm
|
99
|
+
#
|
100
|
+
# @param partition if true, partitions the edge labels into disjoint code sets
|
101
|
+
# @param normalize if true, normalizes the states afterward
|
102
|
+
#
|
103
|
+
def build(partition = true, normalize = true)
|
104
|
+
db = false
|
105
|
+
|
106
|
+
!partition || partitionEdges(@nfaStart)
|
107
|
+
|
108
|
+
iset = Set.new
|
109
|
+
iset.add(@nfaStart)
|
110
|
+
epsClosure(iset)
|
111
|
+
|
112
|
+
@dfaStart,_ = createDFAState(stateSetToIdArray(iset))
|
113
|
+
|
114
|
+
markedStates = Set.new
|
115
|
+
|
116
|
+
unmarked = [@dfaStart]
|
117
|
+
|
118
|
+
until unmarked.empty?
|
119
|
+
dfaState = unmarked.pop
|
120
|
+
|
121
|
+
nfaIds = @nfaStateLists[dfaState.id]
|
122
|
+
|
123
|
+
# map of CodeSet => set of NFA states
|
124
|
+
moveMap = {}
|
125
|
+
|
126
|
+
nfaIds.each do |nfaId|
|
127
|
+
nfaState = @nfaStateMap[nfaId]
|
128
|
+
nfaState.edges.each do |lbl,dest|
|
129
|
+
if lbl.array[0] == EPSILON
|
130
|
+
next
|
131
|
+
end
|
132
|
+
|
133
|
+
nfaStates = moveMap[lbl]
|
134
|
+
if !nfaStates
|
135
|
+
nfaStates = Set.new
|
136
|
+
moveMap[lbl] = nfaStates
|
137
|
+
end
|
138
|
+
nfaStates.add(dest)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
moveMap.each_pair do |charRange,nfaStates|
|
143
|
+
# May be better to test if already in set before calc closure; or simply has closure
|
144
|
+
epsClosure(nfaStates)
|
145
|
+
dfaDestState, isNew = createDFAState(stateSetToIdArray(nfaStates))
|
146
|
+
if isNew
|
147
|
+
unmarked.push(dfaDestState)
|
148
|
+
end
|
149
|
+
dfaState.addEdge(charRange, dfaDestState)
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
if normalize
|
155
|
+
!db || @dfaStart.generatePDF("prior_normalize")
|
156
|
+
|
157
|
+
!db || pr("Normalizing states for:\n\n%s\n",State.dumpNFA(@dfaStart))
|
158
|
+
State.normalizeStates(@dfaStart)
|
159
|
+
!db || pr("After normalizing:\n\n%s\n",State.dumpNFA(@dfaStart))
|
160
|
+
!db || @dfaStart.generatePDF("post_normalize")
|
161
|
+
end
|
162
|
+
|
163
|
+
@dfaStart
|
164
|
+
end
|
165
|
+
|
166
|
+
private
|
167
|
+
|
168
|
+
# Adds a DFA state for a set of NFA states, if one doesn't already exist
|
169
|
+
# for the set
|
170
|
+
# @param nfaStateList a sorted array of NFA state ids
|
171
|
+
# @return a pair [DFA State,
|
172
|
+
# created flag (boolean): true if this did not already exist]
|
173
|
+
#
|
174
|
+
def createDFAState(nfaStateList)
|
175
|
+
|
176
|
+
lst = nfaStateList
|
177
|
+
|
178
|
+
newState = @nfaStateMap[lst]
|
179
|
+
isNewState = !newState
|
180
|
+
if isNewState
|
181
|
+
newState = State.new(@nextId)
|
182
|
+
|
183
|
+
# Determine if any of the NFA states were final states
|
184
|
+
newState.finalState = nfaStateList.any?{|id| @nfaStateMap[id].finalState?}
|
185
|
+
|
186
|
+
if false
|
187
|
+
# Set label of DFA state to show which NFA states produced it
|
188
|
+
# (useful for debugging)
|
189
|
+
newState.label = lst.map {|x| x.to_s}.join(' ')
|
190
|
+
end
|
191
|
+
|
192
|
+
@nextId += 1
|
193
|
+
@nfaStateMap[lst] = newState
|
194
|
+
@nfaStateLists.push(lst)
|
195
|
+
|
196
|
+
end
|
197
|
+
return [newState,isNewState]
|
198
|
+
end
|
199
|
+
|
200
|
+
def stateSetToIdArray(s)
|
201
|
+
s.to_a.map {|x| x.id}.sort
|
202
|
+
end
|
203
|
+
|
204
|
+
# Calculate the epsilon closure of a set of NFA states
|
205
|
+
# @return a set of states
|
206
|
+
#
|
207
|
+
def epsClosure(stateSet)
|
208
|
+
stk = stateSet.to_a
|
209
|
+
while !stk.empty?
|
210
|
+
s = stk.pop
|
211
|
+
s.edges.each do |lbl,dest|
|
212
|
+
if lbl.contains? EPSILON
|
213
|
+
if stateSet.add?(dest)
|
214
|
+
stk.push(dest)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
stateSet
|
220
|
+
end
|
221
|
+
|
222
|
+
# Modify edges so each is labelled with a disjoint subset
|
223
|
+
# of characters. See the notes at the start of this class,
|
224
|
+
# as well as RangePartition.rb.
|
225
|
+
#
|
226
|
+
def partitionEdges(startState)
|
227
|
+
|
228
|
+
db = false
|
229
|
+
|
230
|
+
par = RangePartition.new
|
231
|
+
|
232
|
+
stateSet, _, _ = startState.reachableStates
|
233
|
+
|
234
|
+
stateSet.each do |s|
|
235
|
+
s.edges.each {|lbl,dest| par.addSet(lbl) }
|
236
|
+
end
|
237
|
+
|
238
|
+
par.prepare
|
239
|
+
|
240
|
+
stateSet.each do |s|
|
241
|
+
newEdges = []
|
242
|
+
s.edges.each do |lbl, dest|
|
243
|
+
!db||pr(" old edge: %s => %s\n",d(lbl),d(dest.name))
|
244
|
+
newLbls = par.apply(lbl)
|
245
|
+
newLbls.each {|x| newEdges.push([x, dest]) }
|
246
|
+
end
|
247
|
+
s.clearEdges()
|
248
|
+
|
249
|
+
newEdges.each do |lbl,dest|
|
250
|
+
!db||pr(" new edge: %s => %s\n",d(lbl),d(dest.name))
|
251
|
+
s.addEdge(lbl,dest)
|
252
|
+
end
|
253
|
+
!db||pr("\n")
|
254
|
+
end
|
255
|
+
|
256
|
+
end
|
257
|
+
|
258
|
+
|
259
|
+
end
|
260
|
+
|
261
|
+
|
@@ -0,0 +1,233 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
req('tokn_const code_set')
|
3
|
+
|
4
|
+
|
5
|
+
# A data structure that transforms a set of CodeSets to a
|
6
|
+
# disjoint set of them, such that no two range sets overlap.
|
7
|
+
#
|
8
|
+
# This is improve the efficiency of the NFA => DFA algorithm,
|
9
|
+
# which involves gathering information about what states are
|
10
|
+
# reachable on certain characters. We can't afford to treat each
|
11
|
+
# character as a singleton, since the ranges can be quite large.
|
12
|
+
# Hence, we want to treat ranges of characters as single entities;
|
13
|
+
# this will only work if no two such ranges overlap.
|
14
|
+
#
|
15
|
+
# It works by starting with a tree whose node is labelled with
|
16
|
+
# the maximal superset of character values. Then, for each edge
|
17
|
+
# in the NFA, performs a DFS on this tree, splitting any node that
|
18
|
+
# only partially intersects any one set that appears in the edge label.
|
19
|
+
# The running time is O(n log k), where n is the size of the NFA, and
|
20
|
+
# k is the height of the resulting tree.
|
21
|
+
#
|
22
|
+
# We encourage k to be small by sorting the NFA edges by their
|
23
|
+
# label complexity.
|
24
|
+
#
|
25
|
+
class RangePartition
|
26
|
+
include Tokn
|
27
|
+
|
28
|
+
def initialize()
|
29
|
+
# We will build a tree, where each node has a CodeSet
|
30
|
+
# associated with it, and the child nodes (if present)
|
31
|
+
# partition this CodeSet into smaller, nonempty sets.
|
32
|
+
|
33
|
+
# A tree is represented by a node, where each node is a pair [x,y],
|
34
|
+
# with x the node's CodeSet, and y a list of the node's children.
|
35
|
+
|
36
|
+
@nextNodeId = 0
|
37
|
+
|
38
|
+
# Make the root node hold the largest possible CodeSet.
|
39
|
+
# We want to be able to include all the token ids as well.
|
40
|
+
|
41
|
+
@rootNode = buildNode(CodeSet.new(CODEMIN,CODEMAX))
|
42
|
+
|
43
|
+
@setsToAdd = Set.new
|
44
|
+
|
45
|
+
# Add epsilon immediately, so it's always in its own subset
|
46
|
+
addSet(CodeSet.new(EPSILON))
|
47
|
+
|
48
|
+
@prepared = false
|
49
|
+
end
|
50
|
+
|
51
|
+
def addSet(s)
|
52
|
+
if @prepared
|
53
|
+
raise IllegalStateException
|
54
|
+
end
|
55
|
+
@setsToAdd.add(s)
|
56
|
+
end
|
57
|
+
|
58
|
+
def prepare()
|
59
|
+
if @prepared
|
60
|
+
raise IllegalStateException
|
61
|
+
end
|
62
|
+
|
63
|
+
# Construct partition from previously added sets
|
64
|
+
|
65
|
+
list = @setsToAdd.to_a
|
66
|
+
|
67
|
+
# Sort set by cardinality: probably get a more balanced tree
|
68
|
+
# if larger sets are processed first
|
69
|
+
list.sort!{ |x,y| y.cardinality <=> x.cardinality }
|
70
|
+
|
71
|
+
list.each do |s|
|
72
|
+
addSetAux(s)
|
73
|
+
end
|
74
|
+
|
75
|
+
@prepared = true
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# Generate a .dot file, and from that, a PDF, for debug purposes
|
80
|
+
#
|
81
|
+
def generatePDF(name = "partition")
|
82
|
+
if !@prepared
|
83
|
+
raise IllegalStateException
|
84
|
+
end
|
85
|
+
|
86
|
+
g = ""
|
87
|
+
g += "digraph "+name+" {\n\n"
|
88
|
+
|
89
|
+
nodes = []
|
90
|
+
buildNodeList(nodes)
|
91
|
+
nodes.each do |node|
|
92
|
+
g += " '" + d(node) + "' [shape=rect] [label='" + node.set.to_s_alt + "']\n"
|
93
|
+
end
|
94
|
+
|
95
|
+
g += "\n"
|
96
|
+
nodes.each do |node|
|
97
|
+
node.children.each do |ch|
|
98
|
+
g += " '" + d(node) + "' -> '" + d(ch) + "'\n"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
g += "\n}\n"
|
103
|
+
g.gsub!( /'/, '"' )
|
104
|
+
|
105
|
+
dotToPDF(g,name)
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
# Apply the partition to a CodeSet
|
111
|
+
#
|
112
|
+
# > s CodeSet
|
113
|
+
# < array of subsets from the partition whose union equals s
|
114
|
+
# (this array will be the single element s if no partitioning was necessary)
|
115
|
+
#
|
116
|
+
def apply(s)
|
117
|
+
if !@prepared
|
118
|
+
raise IllegalStateException
|
119
|
+
end
|
120
|
+
|
121
|
+
list = []
|
122
|
+
s2 = s.makeCopy
|
123
|
+
applyAux(@rootNode, s2, list)
|
124
|
+
|
125
|
+
# Sort the list of subsets by their first elements
|
126
|
+
list.sort! { |x,y| x.array[0] <=> y.array[0] }
|
127
|
+
|
128
|
+
list
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def applyAux(n, s, list)
|
135
|
+
db = false
|
136
|
+
|
137
|
+
!db||pr("applyAux to set[%s], node=[%s]\n",d(s),d(n.set))
|
138
|
+
|
139
|
+
if n.children.empty?
|
140
|
+
# # Verify that this set equals the input set
|
141
|
+
# myAssert(s.eql? n.set)
|
142
|
+
list.push(s)
|
143
|
+
else
|
144
|
+
n.children.each do |m|
|
145
|
+
s1 = s.intersect(m.set)
|
146
|
+
!db||pr(" child set=[%s], intersection=[%s]\n",d(m.set),d(s1))
|
147
|
+
|
148
|
+
if s1.empty?
|
149
|
+
next
|
150
|
+
end
|
151
|
+
|
152
|
+
applyAux(m, s1, list)
|
153
|
+
|
154
|
+
!db||pr(" subtracting child set [%s] from s=[%s]\n",d(m.set),d(s))
|
155
|
+
s = s.difference(m.set)
|
156
|
+
!db||pr(" subtracted child set, now [%s]\n",d(s))
|
157
|
+
if s.empty?
|
158
|
+
break
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def buildNode(rangeSet)
|
165
|
+
id = @nextNodeId
|
166
|
+
@nextNodeId += 1
|
167
|
+
n = RPNode.new(id, rangeSet, [])
|
168
|
+
n
|
169
|
+
end
|
170
|
+
|
171
|
+
def buildNodeList(list, root = nil)
|
172
|
+
if not root
|
173
|
+
root = @rootNode
|
174
|
+
end
|
175
|
+
list.push(root)
|
176
|
+
root.children.each do |x|
|
177
|
+
buildNodeList(list, x)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# Add a set to the tree, extending the tree as necessary to
|
182
|
+
# maintain a (disjoint) partition
|
183
|
+
#
|
184
|
+
def addSetAux(s, n = @rootNode)
|
185
|
+
#
|
186
|
+
# The algorithm is this:
|
187
|
+
#
|
188
|
+
# add (s, n) # add set s to node n; s must be subset of n.set
|
189
|
+
# if n.set = s, return
|
190
|
+
# if n is leaf:
|
191
|
+
# x = n.set - s
|
192
|
+
# add x,y as child sets of n
|
193
|
+
# else
|
194
|
+
# for each child m of n:
|
195
|
+
# t = intersect of m.set and s
|
196
|
+
# if t is nonempty, add(t, m)
|
197
|
+
#
|
198
|
+
if n.set.eql? s
|
199
|
+
return
|
200
|
+
end
|
201
|
+
if n.children.empty?
|
202
|
+
x = n.set.difference(s)
|
203
|
+
n.children.push buildNode(x)
|
204
|
+
n.children.push buildNode(s)
|
205
|
+
else
|
206
|
+
n.children.each do |m|
|
207
|
+
t = m.set.intersect(s)
|
208
|
+
addSetAux(t,m) unless t.empty?
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
# A node within a RangePartition tree
|
216
|
+
#
|
217
|
+
class RPNode
|
218
|
+
|
219
|
+
attr_accessor :id, :set, :children
|
220
|
+
|
221
|
+
def initialize(id, set, children)
|
222
|
+
@id = id
|
223
|
+
@set = set
|
224
|
+
@children = children
|
225
|
+
end
|
226
|
+
|
227
|
+
def inspect
|
228
|
+
return 'N' + id.to_s
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
232
|
+
|
233
|
+
|