tokn 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.txt +194 -0
- data/bin/tokncompile +16 -0
- data/bin/toknprocess +26 -0
- data/figures/sample_dfa.pdf +0 -0
- data/lib/tokn/code_set.rb +392 -0
- data/lib/tokn/dfa.rb +196 -0
- data/lib/tokn/dfa_builder.rb +261 -0
- data/lib/tokn/range_partition.rb +233 -0
- data/lib/tokn/reg_parse.rb +379 -0
- data/lib/tokn/state.rb +320 -0
- data/lib/tokn/token_defn_parser.rb +156 -0
- data/lib/tokn/tokenizer.rb +211 -0
- data/lib/tokn/tokn_const.rb +29 -0
- data/lib/tokn/tools.rb +186 -0
- data/lib/tokn.rb +1 -0
- data/test/data/sampletext.txt +11 -0
- data/test/data/sampletokens.txt +32 -0
- data/test/simple.rb +33 -0
- data/test/test.rb +519 -0
- data/test/testcmds +4 -0
- metadata +69 -0
data/lib/tokn/dfa.rb
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'json'
|
2
|
+
require_relative 'tools'
|
3
|
+
req('code_set state')
|
4
|
+
|
5
|
+
|
6
|
+
# A DFA for tokenizing; includes pointer to a start state, and
|
7
|
+
# a list of token names
|
8
|
+
#
|
9
|
+
class DFA
|
10
|
+
|
11
|
+
include Tokn
|
12
|
+
|
13
|
+
# Compile a Tokenizer DFA from a token definition script.
|
14
|
+
# If persistPath is not null, it first checks if the file exists and
|
15
|
+
# if so, assumes it contains (in JSON form) a previously compiled
|
16
|
+
# DFA matching this script, and reads the DFA from it.
|
17
|
+
# Second, if no such file exists, it writes the DFA to it after compilation.
|
18
|
+
#
|
19
|
+
def self.dfa_from_script(script, persistPath = nil)
|
20
|
+
|
21
|
+
if persistPath and File.exist?(persistPath)
|
22
|
+
return extractDFA(readTextFile(persistPath))
|
23
|
+
end
|
24
|
+
|
25
|
+
req('token_defn_parser')
|
26
|
+
|
27
|
+
td = TokenDefParser.new(script)
|
28
|
+
dfa = td.dfa
|
29
|
+
|
30
|
+
if persistPath
|
31
|
+
writeTextFile(persistPath, dfa.serialize())
|
32
|
+
end
|
33
|
+
|
34
|
+
dfa
|
35
|
+
end
|
36
|
+
|
37
|
+
# Similar to dfa_from_script, but reads the script into memory from
|
38
|
+
# the file at scriptPath.
|
39
|
+
#
|
40
|
+
def self.dfa_from_script_file(scriptPath, persistPath = nil)
|
41
|
+
self.dfa_from_script(readTextFile(scriptPath), persistPath)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Compile a Tokenizer DFA from a text file (that contains a
|
45
|
+
# JSON string)
|
46
|
+
#
|
47
|
+
def self.dfa_from_file(path)
|
48
|
+
dfa_from_json(readTextFile(path))
|
49
|
+
end
|
50
|
+
|
51
|
+
# Compile a Tokenizer DFA from a JSON string
|
52
|
+
#
|
53
|
+
def self.dfa_from_json(jsonStr)
|
54
|
+
db = false
|
55
|
+
|
56
|
+
!db|| pr("\n\nextractDFA %s...\n",jsonStr)
|
57
|
+
|
58
|
+
h = JSON.parse(jsonStr)
|
59
|
+
|
60
|
+
tNames = h["tokens"]
|
61
|
+
stateInfo = h["states"]
|
62
|
+
|
63
|
+
!db|| pr("tokens=%s\n",d(tNames))
|
64
|
+
!db|| pr("stateInfo=\n%s\n",d(stateInfo))
|
65
|
+
|
66
|
+
st = []
|
67
|
+
stateInfo.each_with_index do |(key,val),i|
|
68
|
+
!db|| pr(" creating new state, id=%d\n",i)
|
69
|
+
st.push(State.new(i))
|
70
|
+
end
|
71
|
+
|
72
|
+
st.each do |s|
|
73
|
+
!db|| pr("proc state %s\n",d(s))
|
74
|
+
|
75
|
+
finalState, edgeList = stateInfo[s.id]
|
76
|
+
s.finalState = finalState
|
77
|
+
edgeList.each do |edge|
|
78
|
+
label,destState = edge
|
79
|
+
cr = CodeSet.new()
|
80
|
+
cr.setArray(label)
|
81
|
+
s.addEdge(cr, st[destState])
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
DFA.new(tNames, st[0])
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
attr_reader :startState, :tokenNames
|
90
|
+
|
91
|
+
# Construct a DFA, given a list of token names and a starting state.
|
92
|
+
#
|
93
|
+
def initialize(tokenNameList, startState)
|
94
|
+
@tokenNames = tokenNameList
|
95
|
+
@startState = startState
|
96
|
+
end
|
97
|
+
|
98
|
+
# Determine the name of a token, given its id.
|
99
|
+
# Returns <UNKNOWN> if its id is UNKNOWN_TOKEN, or <EOF> if
|
100
|
+
# the tokenId is nil. Otherwise, assumes tokenId is 0..n, where
|
101
|
+
# n is the number of token names in the DFA.
|
102
|
+
#
|
103
|
+
def tokenName(tokenId)
|
104
|
+
if !tokenId
|
105
|
+
nm = "<EOF>"
|
106
|
+
elsif tokenId == UNKNOWN_TOKEN
|
107
|
+
nm = "<UNKNOWN>"
|
108
|
+
else
|
109
|
+
if tokenId < 0 || tokenId >= tokenNames.size
|
110
|
+
raise IndexError, "No such token id: "+tokenId.to_s
|
111
|
+
end
|
112
|
+
nm = tokenNames[tokenId]
|
113
|
+
end
|
114
|
+
nm
|
115
|
+
end
|
116
|
+
|
117
|
+
# Serialize this DFA to a JSON string.
|
118
|
+
# The DFA in JSON form has this structure:
|
119
|
+
#
|
120
|
+
# {
|
121
|
+
# "tokens" => array of token names (strings)
|
122
|
+
# "states" => array of states, ordered by id (0,1,..)
|
123
|
+
# }
|
124
|
+
#
|
125
|
+
# Each state has this format:
|
126
|
+
# [ finalState (boolean),
|
127
|
+
# [edge0, edge1, ...]
|
128
|
+
# ]
|
129
|
+
#
|
130
|
+
# Edge:
|
131
|
+
# [label, destination id (integer)]
|
132
|
+
#
|
133
|
+
# Labels are arrays of integers, exactly the structure of
|
134
|
+
# a CodeSet array.
|
135
|
+
#
|
136
|
+
def serialize
|
137
|
+
|
138
|
+
h = {}
|
139
|
+
|
140
|
+
h["tokens"] = tokenNames
|
141
|
+
|
142
|
+
stateSet,_,_ = startState.reachableStates
|
143
|
+
|
144
|
+
idToStateMap = {}
|
145
|
+
stateSet.each do |st|
|
146
|
+
idToStateMap[st.id] = st
|
147
|
+
end
|
148
|
+
|
149
|
+
stateList = []
|
150
|
+
|
151
|
+
nextId = 0
|
152
|
+
idToStateMap.each_pair do |id, st|
|
153
|
+
if nextId != id
|
154
|
+
raise ArgumentError, "unexpected state ids"
|
155
|
+
end
|
156
|
+
nextId += 1
|
157
|
+
|
158
|
+
stateList.push(st)
|
159
|
+
end
|
160
|
+
|
161
|
+
if stateList.size == 0
|
162
|
+
raise ArgumentError, "bad states"
|
163
|
+
end
|
164
|
+
|
165
|
+
if stateList[0] != startState
|
166
|
+
raise ArgumentError, "bad start state"
|
167
|
+
end
|
168
|
+
|
169
|
+
stateInfo = []
|
170
|
+
stateList.each do |st|
|
171
|
+
stateInfo.push(stateToList(st))
|
172
|
+
end
|
173
|
+
h["states"] = stateInfo
|
174
|
+
|
175
|
+
JSON.generate(h)
|
176
|
+
end
|
177
|
+
|
178
|
+
private
|
179
|
+
|
180
|
+
def stateToList(state)
|
181
|
+
list = []
|
182
|
+
|
183
|
+
list.push(state.finalState?)
|
184
|
+
ed = []
|
185
|
+
state.edges.each do |lbl, dest|
|
186
|
+
edInfo = [lbl.array, dest.id]
|
187
|
+
ed.push(edInfo)
|
188
|
+
end
|
189
|
+
list.push(ed)
|
190
|
+
|
191
|
+
list
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
|
@@ -0,0 +1,261 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
req('tokn_const code_set state range_partition reg_parse')
|
3
|
+
|
4
|
+
# Converts NFAs (nondeterministic, finite state automata) to
|
5
|
+
# minimal DFAs.
|
6
|
+
#
|
7
|
+
# Performs the subset construction algorithm described in
|
8
|
+
# (among other placess) http://en.wikipedia.org/wiki/Powerset_construction
|
9
|
+
#
|
10
|
+
# Also implements an innovative algorithm to partition a set of
|
11
|
+
# edge labels into a set that has the property that no two elements
|
12
|
+
# have overlapping regions. This allows us to perform the subset construction
|
13
|
+
# (and closure operations) efficiently while supporting large possible character
|
14
|
+
# sets (e.g., unicode, which ranges from 0..0x10ffff. See RangePartition.rb
|
15
|
+
# for more details.
|
16
|
+
#
|
17
|
+
class DFABuilder
|
18
|
+
|
19
|
+
include Tokn
|
20
|
+
|
21
|
+
|
22
|
+
# Convert an NFA to a DFA.
|
23
|
+
#
|
24
|
+
# @param startState the start state of the NFA
|
25
|
+
# @param db if true, generates PDF files for debug purposes, showing various
|
26
|
+
# steps of the procedure
|
27
|
+
#
|
28
|
+
def self.nfa_to_dfa(startState, db = false)
|
29
|
+
|
30
|
+
!db || startState.generatePDF("original_nfa")
|
31
|
+
|
32
|
+
# Reverse this NFA, convert to DFA, then
|
33
|
+
# reverse it, and convert it again. Apparently this
|
34
|
+
# produces a minimal DFA.
|
35
|
+
|
36
|
+
rev = startState.reverseNFA()
|
37
|
+
!db || rev.generatePDF("reversed_nfa")
|
38
|
+
|
39
|
+
bld = DFABuilder.new(rev)
|
40
|
+
dfa = bld.build(true, false) # partition, but don't normalize
|
41
|
+
|
42
|
+
!db || dfa.generatePDF("reversed_dfa")
|
43
|
+
|
44
|
+
rev2 = dfa.reverseNFA()
|
45
|
+
bld = DFABuilder.new(rev2)
|
46
|
+
|
47
|
+
# Don't regenerate the partition; it is still valid
|
48
|
+
# for this second build process
|
49
|
+
#
|
50
|
+
dfa = bld.build(false, true) # don't partition, but do normalize
|
51
|
+
|
52
|
+
# If there are edges that contain more than one token identifier,
|
53
|
+
# remove all but the first (i.e. the one with the highest token id)
|
54
|
+
|
55
|
+
stSet, _, _ = dfa.reachableStates
|
56
|
+
stSet.each do |s|
|
57
|
+
s.edges.each do |lbl, dest|
|
58
|
+
a = lbl.array
|
59
|
+
if !a.size
|
60
|
+
next
|
61
|
+
end
|
62
|
+
|
63
|
+
primeId = a[0]
|
64
|
+
|
65
|
+
if primeId >= EPSILON-1
|
66
|
+
next
|
67
|
+
end
|
68
|
+
|
69
|
+
lbl.difference!(CodeSet.new(primeId+1, EPSILON))
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
!db || dfa.generatePDF("minimal_dfa")
|
74
|
+
|
75
|
+
dfa
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
# Constructs a builder object
|
81
|
+
#
|
82
|
+
def initialize(nfaStartState)
|
83
|
+
@nextId = 0
|
84
|
+
@nfaStart = nfaStartState
|
85
|
+
|
86
|
+
# Build a map of nfa state ids => nfa states
|
87
|
+
@nfaStateMap = {}
|
88
|
+
nfas, _, _ = @nfaStart.reachableStates
|
89
|
+
nfas.each {|s| @nfaStateMap[s.id] = s}
|
90
|
+
|
91
|
+
# Initialize an array of nfa state lists, indexed by dfa state id
|
92
|
+
@nfaStateLists = []
|
93
|
+
|
94
|
+
# Map of existing DFA states; key is array of NFA state ids
|
95
|
+
@dfaStateMap = {}
|
96
|
+
end
|
97
|
+
|
98
|
+
# Perform the build algorithm
|
99
|
+
#
|
100
|
+
# @param partition if true, partitions the edge labels into disjoint code sets
|
101
|
+
# @param normalize if true, normalizes the states afterward
|
102
|
+
#
|
103
|
+
def build(partition = true, normalize = true)
|
104
|
+
db = false
|
105
|
+
|
106
|
+
!partition || partitionEdges(@nfaStart)
|
107
|
+
|
108
|
+
iset = Set.new
|
109
|
+
iset.add(@nfaStart)
|
110
|
+
epsClosure(iset)
|
111
|
+
|
112
|
+
@dfaStart,_ = createDFAState(stateSetToIdArray(iset))
|
113
|
+
|
114
|
+
markedStates = Set.new
|
115
|
+
|
116
|
+
unmarked = [@dfaStart]
|
117
|
+
|
118
|
+
until unmarked.empty?
|
119
|
+
dfaState = unmarked.pop
|
120
|
+
|
121
|
+
nfaIds = @nfaStateLists[dfaState.id]
|
122
|
+
|
123
|
+
# map of CodeSet => set of NFA states
|
124
|
+
moveMap = {}
|
125
|
+
|
126
|
+
nfaIds.each do |nfaId|
|
127
|
+
nfaState = @nfaStateMap[nfaId]
|
128
|
+
nfaState.edges.each do |lbl,dest|
|
129
|
+
if lbl.array[0] == EPSILON
|
130
|
+
next
|
131
|
+
end
|
132
|
+
|
133
|
+
nfaStates = moveMap[lbl]
|
134
|
+
if !nfaStates
|
135
|
+
nfaStates = Set.new
|
136
|
+
moveMap[lbl] = nfaStates
|
137
|
+
end
|
138
|
+
nfaStates.add(dest)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
moveMap.each_pair do |charRange,nfaStates|
|
143
|
+
# May be better to test if already in set before calc closure; or simply has closure
|
144
|
+
epsClosure(nfaStates)
|
145
|
+
dfaDestState, isNew = createDFAState(stateSetToIdArray(nfaStates))
|
146
|
+
if isNew
|
147
|
+
unmarked.push(dfaDestState)
|
148
|
+
end
|
149
|
+
dfaState.addEdge(charRange, dfaDestState)
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
if normalize
|
155
|
+
!db || @dfaStart.generatePDF("prior_normalize")
|
156
|
+
|
157
|
+
!db || pr("Normalizing states for:\n\n%s\n",State.dumpNFA(@dfaStart))
|
158
|
+
State.normalizeStates(@dfaStart)
|
159
|
+
!db || pr("After normalizing:\n\n%s\n",State.dumpNFA(@dfaStart))
|
160
|
+
!db || @dfaStart.generatePDF("post_normalize")
|
161
|
+
end
|
162
|
+
|
163
|
+
@dfaStart
|
164
|
+
end
|
165
|
+
|
166
|
+
private
|
167
|
+
|
168
|
+
# Adds a DFA state for a set of NFA states, if one doesn't already exist
|
169
|
+
# for the set
|
170
|
+
# @param nfaStateList a sorted array of NFA state ids
|
171
|
+
# @return a pair [DFA State,
|
172
|
+
# created flag (boolean): true if this did not already exist]
|
173
|
+
#
|
174
|
+
def createDFAState(nfaStateList)
|
175
|
+
|
176
|
+
lst = nfaStateList
|
177
|
+
|
178
|
+
newState = @nfaStateMap[lst]
|
179
|
+
isNewState = !newState
|
180
|
+
if isNewState
|
181
|
+
newState = State.new(@nextId)
|
182
|
+
|
183
|
+
# Determine if any of the NFA states were final states
|
184
|
+
newState.finalState = nfaStateList.any?{|id| @nfaStateMap[id].finalState?}
|
185
|
+
|
186
|
+
if false
|
187
|
+
# Set label of DFA state to show which NFA states produced it
|
188
|
+
# (useful for debugging)
|
189
|
+
newState.label = lst.map {|x| x.to_s}.join(' ')
|
190
|
+
end
|
191
|
+
|
192
|
+
@nextId += 1
|
193
|
+
@nfaStateMap[lst] = newState
|
194
|
+
@nfaStateLists.push(lst)
|
195
|
+
|
196
|
+
end
|
197
|
+
return [newState,isNewState]
|
198
|
+
end
|
199
|
+
|
200
|
+
def stateSetToIdArray(s)
|
201
|
+
s.to_a.map {|x| x.id}.sort
|
202
|
+
end
|
203
|
+
|
204
|
+
# Calculate the epsilon closure of a set of NFA states
|
205
|
+
# @return a set of states
|
206
|
+
#
|
207
|
+
def epsClosure(stateSet)
|
208
|
+
stk = stateSet.to_a
|
209
|
+
while !stk.empty?
|
210
|
+
s = stk.pop
|
211
|
+
s.edges.each do |lbl,dest|
|
212
|
+
if lbl.contains? EPSILON
|
213
|
+
if stateSet.add?(dest)
|
214
|
+
stk.push(dest)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
stateSet
|
220
|
+
end
|
221
|
+
|
222
|
+
# Modify edges so each is labelled with a disjoint subset
|
223
|
+
# of characters. See the notes at the start of this class,
|
224
|
+
# as well as RangePartition.rb.
|
225
|
+
#
|
226
|
+
def partitionEdges(startState)
|
227
|
+
|
228
|
+
db = false
|
229
|
+
|
230
|
+
par = RangePartition.new
|
231
|
+
|
232
|
+
stateSet, _, _ = startState.reachableStates
|
233
|
+
|
234
|
+
stateSet.each do |s|
|
235
|
+
s.edges.each {|lbl,dest| par.addSet(lbl) }
|
236
|
+
end
|
237
|
+
|
238
|
+
par.prepare
|
239
|
+
|
240
|
+
stateSet.each do |s|
|
241
|
+
newEdges = []
|
242
|
+
s.edges.each do |lbl, dest|
|
243
|
+
!db||pr(" old edge: %s => %s\n",d(lbl),d(dest.name))
|
244
|
+
newLbls = par.apply(lbl)
|
245
|
+
newLbls.each {|x| newEdges.push([x, dest]) }
|
246
|
+
end
|
247
|
+
s.clearEdges()
|
248
|
+
|
249
|
+
newEdges.each do |lbl,dest|
|
250
|
+
!db||pr(" new edge: %s => %s\n",d(lbl),d(dest.name))
|
251
|
+
s.addEdge(lbl,dest)
|
252
|
+
end
|
253
|
+
!db||pr("\n")
|
254
|
+
end
|
255
|
+
|
256
|
+
end
|
257
|
+
|
258
|
+
|
259
|
+
end
|
260
|
+
|
261
|
+
|
@@ -0,0 +1,233 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
req('tokn_const code_set')
|
3
|
+
|
4
|
+
|
5
|
+
# A data structure that transforms a set of CodeSets to a
|
6
|
+
# disjoint set of them, such that no two range sets overlap.
|
7
|
+
#
|
8
|
+
# This is improve the efficiency of the NFA => DFA algorithm,
|
9
|
+
# which involves gathering information about what states are
|
10
|
+
# reachable on certain characters. We can't afford to treat each
|
11
|
+
# character as a singleton, since the ranges can be quite large.
|
12
|
+
# Hence, we want to treat ranges of characters as single entities;
|
13
|
+
# this will only work if no two such ranges overlap.
|
14
|
+
#
|
15
|
+
# It works by starting with a tree whose node is labelled with
|
16
|
+
# the maximal superset of character values. Then, for each edge
|
17
|
+
# in the NFA, performs a DFS on this tree, splitting any node that
|
18
|
+
# only partially intersects any one set that appears in the edge label.
|
19
|
+
# The running time is O(n log k), where n is the size of the NFA, and
|
20
|
+
# k is the height of the resulting tree.
|
21
|
+
#
|
22
|
+
# We encourage k to be small by sorting the NFA edges by their
|
23
|
+
# label complexity.
|
24
|
+
#
|
25
|
+
class RangePartition
|
26
|
+
include Tokn
|
27
|
+
|
28
|
+
def initialize()
|
29
|
+
# We will build a tree, where each node has a CodeSet
|
30
|
+
# associated with it, and the child nodes (if present)
|
31
|
+
# partition this CodeSet into smaller, nonempty sets.
|
32
|
+
|
33
|
+
# A tree is represented by a node, where each node is a pair [x,y],
|
34
|
+
# with x the node's CodeSet, and y a list of the node's children.
|
35
|
+
|
36
|
+
@nextNodeId = 0
|
37
|
+
|
38
|
+
# Make the root node hold the largest possible CodeSet.
|
39
|
+
# We want to be able to include all the token ids as well.
|
40
|
+
|
41
|
+
@rootNode = buildNode(CodeSet.new(CODEMIN,CODEMAX))
|
42
|
+
|
43
|
+
@setsToAdd = Set.new
|
44
|
+
|
45
|
+
# Add epsilon immediately, so it's always in its own subset
|
46
|
+
addSet(CodeSet.new(EPSILON))
|
47
|
+
|
48
|
+
@prepared = false
|
49
|
+
end
|
50
|
+
|
51
|
+
def addSet(s)
|
52
|
+
if @prepared
|
53
|
+
raise IllegalStateException
|
54
|
+
end
|
55
|
+
@setsToAdd.add(s)
|
56
|
+
end
|
57
|
+
|
58
|
+
def prepare()
|
59
|
+
if @prepared
|
60
|
+
raise IllegalStateException
|
61
|
+
end
|
62
|
+
|
63
|
+
# Construct partition from previously added sets
|
64
|
+
|
65
|
+
list = @setsToAdd.to_a
|
66
|
+
|
67
|
+
# Sort set by cardinality: probably get a more balanced tree
|
68
|
+
# if larger sets are processed first
|
69
|
+
list.sort!{ |x,y| y.cardinality <=> x.cardinality }
|
70
|
+
|
71
|
+
list.each do |s|
|
72
|
+
addSetAux(s)
|
73
|
+
end
|
74
|
+
|
75
|
+
@prepared = true
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# Generate a .dot file, and from that, a PDF, for debug purposes
|
80
|
+
#
|
81
|
+
def generatePDF(name = "partition")
|
82
|
+
if !@prepared
|
83
|
+
raise IllegalStateException
|
84
|
+
end
|
85
|
+
|
86
|
+
g = ""
|
87
|
+
g += "digraph "+name+" {\n\n"
|
88
|
+
|
89
|
+
nodes = []
|
90
|
+
buildNodeList(nodes)
|
91
|
+
nodes.each do |node|
|
92
|
+
g += " '" + d(node) + "' [shape=rect] [label='" + node.set.to_s_alt + "']\n"
|
93
|
+
end
|
94
|
+
|
95
|
+
g += "\n"
|
96
|
+
nodes.each do |node|
|
97
|
+
node.children.each do |ch|
|
98
|
+
g += " '" + d(node) + "' -> '" + d(ch) + "'\n"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
g += "\n}\n"
|
103
|
+
g.gsub!( /'/, '"' )
|
104
|
+
|
105
|
+
dotToPDF(g,name)
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
# Apply the partition to a CodeSet
|
111
|
+
#
|
112
|
+
# > s CodeSet
|
113
|
+
# < array of subsets from the partition whose union equals s
|
114
|
+
# (this array will be the single element s if no partitioning was necessary)
|
115
|
+
#
|
116
|
+
def apply(s)
|
117
|
+
if !@prepared
|
118
|
+
raise IllegalStateException
|
119
|
+
end
|
120
|
+
|
121
|
+
list = []
|
122
|
+
s2 = s.makeCopy
|
123
|
+
applyAux(@rootNode, s2, list)
|
124
|
+
|
125
|
+
# Sort the list of subsets by their first elements
|
126
|
+
list.sort! { |x,y| x.array[0] <=> y.array[0] }
|
127
|
+
|
128
|
+
list
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def applyAux(n, s, list)
|
135
|
+
db = false
|
136
|
+
|
137
|
+
!db||pr("applyAux to set[%s], node=[%s]\n",d(s),d(n.set))
|
138
|
+
|
139
|
+
if n.children.empty?
|
140
|
+
# # Verify that this set equals the input set
|
141
|
+
# myAssert(s.eql? n.set)
|
142
|
+
list.push(s)
|
143
|
+
else
|
144
|
+
n.children.each do |m|
|
145
|
+
s1 = s.intersect(m.set)
|
146
|
+
!db||pr(" child set=[%s], intersection=[%s]\n",d(m.set),d(s1))
|
147
|
+
|
148
|
+
if s1.empty?
|
149
|
+
next
|
150
|
+
end
|
151
|
+
|
152
|
+
applyAux(m, s1, list)
|
153
|
+
|
154
|
+
!db||pr(" subtracting child set [%s] from s=[%s]\n",d(m.set),d(s))
|
155
|
+
s = s.difference(m.set)
|
156
|
+
!db||pr(" subtracted child set, now [%s]\n",d(s))
|
157
|
+
if s.empty?
|
158
|
+
break
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def buildNode(rangeSet)
|
165
|
+
id = @nextNodeId
|
166
|
+
@nextNodeId += 1
|
167
|
+
n = RPNode.new(id, rangeSet, [])
|
168
|
+
n
|
169
|
+
end
|
170
|
+
|
171
|
+
def buildNodeList(list, root = nil)
|
172
|
+
if not root
|
173
|
+
root = @rootNode
|
174
|
+
end
|
175
|
+
list.push(root)
|
176
|
+
root.children.each do |x|
|
177
|
+
buildNodeList(list, x)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# Add a set to the tree, extending the tree as necessary to
|
182
|
+
# maintain a (disjoint) partition
|
183
|
+
#
|
184
|
+
def addSetAux(s, n = @rootNode)
|
185
|
+
#
|
186
|
+
# The algorithm is this:
|
187
|
+
#
|
188
|
+
# add (s, n) # add set s to node n; s must be subset of n.set
|
189
|
+
# if n.set = s, return
|
190
|
+
# if n is leaf:
|
191
|
+
# x = n.set - s
|
192
|
+
# add x,y as child sets of n
|
193
|
+
# else
|
194
|
+
# for each child m of n:
|
195
|
+
# t = intersect of m.set and s
|
196
|
+
# if t is nonempty, add(t, m)
|
197
|
+
#
|
198
|
+
if n.set.eql? s
|
199
|
+
return
|
200
|
+
end
|
201
|
+
if n.children.empty?
|
202
|
+
x = n.set.difference(s)
|
203
|
+
n.children.push buildNode(x)
|
204
|
+
n.children.push buildNode(s)
|
205
|
+
else
|
206
|
+
n.children.each do |m|
|
207
|
+
t = m.set.intersect(s)
|
208
|
+
addSetAux(t,m) unless t.empty?
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
# A node within a RangePartition tree
|
216
|
+
#
|
217
|
+
class RPNode
|
218
|
+
|
219
|
+
attr_accessor :id, :set, :children
|
220
|
+
|
221
|
+
def initialize(id, set, children)
|
222
|
+
@id = id
|
223
|
+
@set = set
|
224
|
+
@children = children
|
225
|
+
end
|
226
|
+
|
227
|
+
def inspect
|
228
|
+
return 'N' + id.to_s
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
232
|
+
|
233
|
+
|