tokn 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.txt +4 -5
- data/bin/tokncompile +1 -1
- data/bin/toknprocess +10 -4
- data/lib/tokn/code_set.rb +332 -337
- data/lib/tokn/dfa.rb +187 -162
- data/lib/tokn/dfa_builder.rb +218 -220
- data/lib/tokn/range_partition.rb +205 -203
- data/lib/tokn/reg_parse.rb +336 -331
- data/lib/tokn/state.rb +267 -270
- data/lib/tokn/token_defn_parser.rb +144 -139
- data/lib/tokn/tokenizer.rb +243 -175
- data/lib/tokn/tokn_const.rb +11 -6
- data/lib/tokn/tools.rb +42 -20
- data/test/Example1.rb +50 -0
- data/test/data/compileddfa.txt +1 -0
- data/test/data/sampletext.txt +6 -1
- data/test/test.rb +17 -12
- metadata +7 -6
- data/test/simple.rb +0 -33
data/lib/tokn/state.rb
CHANGED
@@ -1,320 +1,317 @@
|
|
1
1
|
require 'set'
|
2
2
|
require_relative 'tools'
|
3
|
-
req 'tokn_const'
|
4
3
|
|
5
|
-
|
6
|
-
# A state within a state machine (NFA or DFA); also, various utility functions
|
7
|
-
# for manipulating state machines. Observe that a state machine can be
|
8
|
-
# referred to by its start state.
|
9
|
-
#
|
10
|
-
# Each state has a set of directed edges to other states, where each edge is
|
11
|
-
# labelled with a CodeSet.
|
12
|
-
#
|
13
|
-
# It also has a unique id (unique within a particular state machine),
|
14
|
-
# and a (boolean) final state flag.
|
15
|
-
#
|
16
|
-
# For debug purposes, both the state and its edges can be labelled.
|
17
|
-
#
|
18
|
-
class State
|
19
|
-
include Tokn
|
20
|
-
|
21
|
-
attr_accessor :id
|
22
|
-
attr_accessor :finalState
|
23
|
-
alias_method :finalState?, :finalState
|
24
|
-
attr_accessor :label
|
4
|
+
module ToknInternal
|
25
5
|
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
-
# Produce a readable description of an NFA, for debug purposes
|
30
|
-
#
|
31
|
-
# > st start state
|
6
|
+
# A state within a state machine (NFA or DFA); also, various utility functions
|
7
|
+
# for manipulating state machines. Observe that a state machine can be
|
8
|
+
# referred to by its start state.
|
32
9
|
#
|
33
|
-
|
34
|
-
|
35
|
-
map,_,_ = st.reachableStates
|
36
|
-
map.each do |s|
|
37
|
-
str += " "+d(s)+"\n"
|
38
|
-
str += " edges= "+d(s.edges)+"\n"
|
39
|
-
s.edges.each{ |lbl,dest| str += " "+d(lbl)+" ==> "+d(dest)+"\n"}
|
40
|
-
end
|
41
|
-
str
|
42
|
-
end
|
43
|
-
|
44
|
-
def hash
|
45
|
-
return @id
|
46
|
-
end
|
47
|
-
|
48
|
-
def eql?(other)
|
49
|
-
return id == other.id
|
50
|
-
end
|
51
|
-
|
52
|
-
def initialize(id)
|
53
|
-
@edges = []
|
54
|
-
@id = id
|
55
|
-
end
|
56
|
-
|
57
|
-
def clearEdges
|
58
|
-
@edges.clear
|
59
|
-
end
|
60
|
-
|
61
|
-
# Add an edge
|
62
|
-
# codeSet : the character codes to label it with
|
63
|
-
# destState : destination state
|
64
|
-
#
|
65
|
-
def addEdge(codeSet,destState)
|
66
|
-
@edges.push([codeSet, destState])
|
67
|
-
end
|
68
|
-
|
69
|
-
# Add a e-transition edge
|
70
|
-
# destState : destination state
|
71
|
-
#
|
72
|
-
def addEps(destState)
|
73
|
-
addEdge(CodeSet.new(EPSILON), destState)
|
74
|
-
end
|
75
|
-
|
76
|
-
def inspect
|
77
|
-
name
|
78
|
-
end
|
79
|
-
|
80
|
-
def name
|
81
|
-
nm = 'S' + d(id)
|
82
|
-
if label
|
83
|
-
nm += ": "+label
|
84
|
-
end
|
85
|
-
nm
|
86
|
-
end
|
87
|
-
|
88
|
-
# Normalize a state machine.
|
10
|
+
# Each state has a set of directed edges to other states, where each edge is
|
11
|
+
# labelled with a CodeSet.
|
89
12
|
#
|
90
|
-
#
|
91
|
-
#
|
92
|
-
# [] delete edges that have empty labels
|
93
|
-
# [] sort edges by destination state ids
|
13
|
+
# It also has a unique id (unique within a particular state machine),
|
14
|
+
# and a (boolean) final state flag.
|
94
15
|
#
|
95
|
-
#
|
16
|
+
# For debug purposes, both the state and its edges can be labelled.
|
96
17
|
#
|
97
|
-
|
98
|
-
stateSet, _,_ = startState.reachableStates
|
99
|
-
stateSet.map{|s| s.normalize}
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
# Generate a PDF of the state machine;
|
104
|
-
# Makes a system call to the dot utility to convert a .dot file to a .pdf
|
105
|
-
#
|
106
|
-
def generatePDF(title = "nfa")
|
107
|
-
stateList = {}
|
18
|
+
class State
|
108
19
|
|
109
|
-
|
110
|
-
|
20
|
+
attr_accessor :id
|
21
|
+
attr_accessor :finalState
|
22
|
+
alias_method :finalState?, :finalState
|
23
|
+
attr_accessor :label
|
111
24
|
|
112
|
-
|
113
|
-
|
114
|
-
g += " '' [shape=none]\n"
|
25
|
+
# Edges are a list of [label:CharSetRange, dest:State] pairs
|
26
|
+
attr_reader :edges
|
115
27
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
28
|
+
# Produce a readable description of an NFA, for debug purposes
|
29
|
+
#
|
30
|
+
# > st start state
|
31
|
+
#
|
32
|
+
def self.dumpNFA(st)
|
33
|
+
str = "NFA:\n"
|
34
|
+
map,_,_ = st.reachableStates
|
35
|
+
map.each do |s|
|
36
|
+
str += " "+d(s)+"\n"
|
37
|
+
str += " edges= "+d(s.edges)+"\n"
|
38
|
+
s.edges.each{ |lbl,dest| str += " "+d(lbl)+" ==> "+d(dest)+"\n"}
|
122
39
|
end
|
123
|
-
|
40
|
+
str
|
124
41
|
end
|
125
42
|
|
126
|
-
|
127
|
-
|
128
|
-
stateList.each_value do |s|
|
129
|
-
s.edges.each do |crs, s2|
|
130
|
-
g += " '"+s.name+"' -> '" + s2.name + "' [label='"
|
131
|
-
g += d(crs)
|
132
|
-
g += "'][fontname=Courier][fontsize=12]\n"
|
133
|
-
end
|
43
|
+
def hash
|
44
|
+
return @id
|
134
45
|
end
|
135
46
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
dotToPDF(g,title)
|
140
|
-
end
|
141
|
-
|
47
|
+
def eql?(other)
|
48
|
+
return id == other.id
|
49
|
+
end
|
142
50
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
# [] sort edges by destination state ids
|
148
|
-
#
|
149
|
-
def normalize()
|
51
|
+
def initialize(id)
|
52
|
+
@edges = []
|
53
|
+
@id = id
|
54
|
+
end
|
150
55
|
|
151
|
-
|
56
|
+
def clearEdges
|
57
|
+
@edges.clear
|
58
|
+
end
|
152
59
|
|
153
|
-
|
60
|
+
# Add an edge
|
61
|
+
# codeSet : the character codes to label it with
|
62
|
+
# destState : destination state
|
63
|
+
#
|
64
|
+
def addEdge(codeSet,destState)
|
65
|
+
@edges.push([codeSet, destState])
|
66
|
+
end
|
154
67
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
68
|
+
# Add a e-transition edge
|
69
|
+
# destState : destination state
|
70
|
+
#
|
71
|
+
def addEps(destState)
|
72
|
+
addEdge(CodeSet.new(EPSILON), destState)
|
73
|
+
end
|
161
74
|
|
162
|
-
|
163
|
-
|
75
|
+
def inspect
|
76
|
+
name
|
77
|
+
end
|
78
|
+
|
79
|
+
def name
|
80
|
+
nm = 'S' + d(id)
|
81
|
+
if label
|
82
|
+
nm += ": "+label
|
83
|
+
end
|
84
|
+
nm
|
85
|
+
end
|
86
|
+
|
87
|
+
# Normalize a state machine.
|
88
|
+
#
|
89
|
+
# For each state:
|
90
|
+
# [] merge edges that go to a common state
|
91
|
+
# [] delete edges that have empty labels
|
92
|
+
# [] sort edges by destination state ids
|
93
|
+
#
|
94
|
+
# > start state
|
95
|
+
#
|
96
|
+
def self.normalizeStates(startState)
|
97
|
+
stateSet, _,_ = startState.reachableStates
|
98
|
+
stateSet.map{|s| s.normalize}
|
99
|
+
end
|
164
100
|
|
165
|
-
edges.each do |label,dest|
|
166
|
-
!db || pr(" processing edge %s, %s\n",d(label),d(dest))
|
167
101
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
102
|
+
# Generate a PDF of the state machine;
|
103
|
+
# Makes a system call to the dot utility to convert a .dot file to a .pdf
|
104
|
+
#
|
105
|
+
def generatePDF(title = "nfa")
|
106
|
+
stateList = {}
|
107
|
+
|
108
|
+
startState = self
|
109
|
+
genAux(stateList, startState)
|
110
|
+
|
111
|
+
g = ""
|
112
|
+
g += "digraph "+title+" {\n"
|
113
|
+
g += " '' [shape=none]\n"
|
114
|
+
|
115
|
+
stateList.each_value do |s|
|
116
|
+
g += " '" + s.name + "' [shape="
|
117
|
+
if s.finalState?
|
118
|
+
g += "doubleoctagon"
|
119
|
+
else
|
120
|
+
g += "octagon"
|
181
121
|
end
|
122
|
+
g += "]\n"
|
182
123
|
end
|
183
|
-
if prevDest
|
184
|
-
newEdges.push([prevLabel,prevDest])
|
185
|
-
end
|
186
|
-
|
187
|
-
@edges = newEdges
|
188
|
-
!db || pr("edges now: %s\n",d(@edges))
|
189
|
-
end
|
190
|
-
|
191
|
-
|
192
|
-
# Duplicate the NFA reachable from this state, possibly with new ids
|
193
|
-
#
|
194
|
-
# > dupBaseId : lowest id to use for duplicate; if nil, uses
|
195
|
-
# next available id
|
196
|
-
# < [ map of original states => duplicate states;
|
197
|
-
# 1 + highest id in new NFA ]
|
198
|
-
#
|
199
|
-
def duplicateNFA(dupBaseId = nil)
|
200
|
-
oldStates, oldMinId, oldMaxId = reachableStates()
|
201
|
-
dupBaseId ||= oldMaxId
|
202
124
|
|
203
|
-
|
204
|
-
|
125
|
+
g += "\n"
|
126
|
+
g += " '' -> '" + startState.name + "'\n"
|
127
|
+
stateList.each_value do |s|
|
128
|
+
s.edges.each do |crs, s2|
|
129
|
+
g += " '"+s.name+"' -> '" + s2.name + "' [label='"
|
130
|
+
g += d(crs)
|
131
|
+
g += "'][fontname=Courier][fontsize=12]\n"
|
132
|
+
end
|
133
|
+
end
|
205
134
|
|
206
|
-
|
207
|
-
|
208
|
-
s2.finalState = s.finalState?
|
209
|
-
s2.label = s.label
|
135
|
+
g += "\n}\n"
|
136
|
+
g.gsub!( /'/, '"' )
|
210
137
|
|
211
|
-
|
138
|
+
dotToPDF(g,title)
|
212
139
|
end
|
140
|
+
|
213
141
|
|
214
|
-
|
215
|
-
|
216
|
-
|
142
|
+
# Normalize a state
|
143
|
+
#
|
144
|
+
# [] merge edges that go to a common state
|
145
|
+
# [] delete edges that have empty labels
|
146
|
+
# [] sort edges by destination state ids
|
147
|
+
#
|
148
|
+
def normalize()
|
149
|
+
|
150
|
+
db = false
|
151
|
+
|
152
|
+
!db || pr("\n\nnormalize state:\n %s\nedges=\n%s\n",d(self),d(@edges))
|
153
|
+
|
154
|
+
@edges.sort!{|x,y|
|
155
|
+
label1,dest1 = x
|
156
|
+
label2,dest2 = y
|
157
|
+
dest1.id <=> dest2.id
|
158
|
+
}
|
159
|
+
!db || pr(" sorted edges: %s\n",d(@edges))
|
160
|
+
|
161
|
+
newEdges = []
|
162
|
+
prevLabel, prevDest = nil,nil
|
163
|
+
|
164
|
+
edges.each do |label,dest|
|
165
|
+
!db || pr(" processing edge %s, %s\n",d(label),d(dest))
|
166
|
+
|
167
|
+
if prevDest and prevDest.id == dest.id
|
168
|
+
# changed = true
|
169
|
+
!db || pr(" adding set %s to prevLabel %s...\n",d(label),d(prevLabel))
|
170
|
+
prevLabel.addSet(label)
|
171
|
+
!db || pr(" ...now %s\n",d(prevLabel))
|
172
|
+
else
|
173
|
+
if prevDest
|
174
|
+
newEdges.push([prevLabel,prevDest])
|
175
|
+
end
|
176
|
+
# Must start a fresh copy! Don't want to modify the original label.
|
177
|
+
prevLabel = label.makeCopy()
|
178
|
+
prevDest = dest
|
179
|
+
!db || pr(" pushed onto new edges\n")
|
180
|
+
end
|
181
|
+
end
|
182
|
+
if prevDest
|
183
|
+
newEdges.push([prevLabel,prevDest])
|
184
|
+
end
|
185
|
+
|
186
|
+
@edges = newEdges
|
187
|
+
!db || pr("edges now: %s\n",d(@edges))
|
217
188
|
end
|
218
|
-
|
219
|
-
[oldToNewStateMap, (oldMaxId - oldMinId) + dupBaseId]
|
220
|
-
end
|
221
|
-
|
222
|
-
|
223
189
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
newStateMap = {}
|
237
|
-
|
238
|
-
stateSet.each do |s|
|
239
|
-
|
240
|
-
u = State.new(s.id)
|
241
|
-
newStateMap[u.id] = u
|
190
|
+
|
191
|
+
# Duplicate the NFA reachable from this state, possibly with new ids
|
192
|
+
#
|
193
|
+
# > dupBaseId : lowest id to use for duplicate; if nil, uses
|
194
|
+
# next available id
|
195
|
+
# < [ map of original states => duplicate states;
|
196
|
+
# 1 + highest id in new NFA ]
|
197
|
+
#
|
198
|
+
def duplicateNFA(dupBaseId = nil)
|
199
|
+
oldStates, oldMinId, oldMaxId = reachableStates()
|
200
|
+
dupBaseId ||= oldMaxId
|
242
201
|
|
243
|
-
|
244
|
-
|
245
|
-
u.finalState = true
|
246
|
-
end
|
202
|
+
|
203
|
+
oldToNewStateMap = {}
|
247
204
|
|
248
|
-
|
249
|
-
|
205
|
+
oldStates.each do |s|
|
206
|
+
s2 = State.new((s.id - oldMinId) + dupBaseId)
|
207
|
+
s2.finalState = s.finalState?
|
208
|
+
s2.label = s.label
|
209
|
+
|
210
|
+
oldToNewStateMap[s] = s2
|
250
211
|
end
|
251
212
|
|
252
|
-
|
213
|
+
oldStates.each do |s|
|
214
|
+
s2 = oldToNewStateMap[s]
|
215
|
+
s.edges.each{ |lbl,dest| s2.addEdge(lbl, oldToNewStateMap[dest])}
|
216
|
+
end
|
253
217
|
|
218
|
+
[oldToNewStateMap, (oldMaxId - oldMinId) + dupBaseId]
|
254
219
|
end
|
255
|
-
|
256
|
-
edgeList.each do |srcId, destId, lbl|
|
257
|
-
srcState = newStateMap[srcId]
|
258
|
-
destState = newStateMap[destId]
|
259
|
-
srcState.addEdge(lbl, destState)
|
260
|
-
end
|
261
|
-
|
262
|
-
# Create a distinguished start node that points to each of the start nodes
|
263
|
-
w = State.new(maxId)
|
264
|
-
newStartStateList.each {|s| w.addEps(s)}
|
265
|
-
w
|
266
|
-
end
|
267
220
|
|
268
|
-
|
269
|
-
# Build set of states reachable from this state
|
270
|
-
#
|
271
|
-
# > list of starting states
|
272
|
-
# < [ set, set of states reachable from those states
|
273
|
-
# minId, lowest id in set
|
274
|
-
# maxId 1 + highest id in set
|
275
|
-
# ]
|
276
|
-
#
|
277
|
-
def reachableStates()
|
278
|
-
set = Set.new
|
279
|
-
stack = []
|
280
|
-
stack.push(self)
|
281
|
-
|
282
|
-
maxId = nil
|
283
|
-
minId = nil
|
221
|
+
|
284
222
|
|
285
|
-
|
286
|
-
|
287
|
-
|
223
|
+
# Construct the reverse of the NFA starting at this state
|
224
|
+
# < start state of reversed NFA
|
225
|
+
#
|
226
|
+
def reverseNFA()
|
227
|
+
|
228
|
+
stateSet, minId, maxId = reachableStates()
|
229
|
+
|
230
|
+
edgeList = []
|
231
|
+
|
232
|
+
newStartStateList = []
|
233
|
+
newFinalStateList = []
|
288
234
|
|
289
|
-
|
290
|
-
|
235
|
+
newStateMap = {}
|
236
|
+
|
237
|
+
stateSet.each do |s|
|
238
|
+
|
239
|
+
u = State.new(s.id)
|
240
|
+
newStateMap[u.id] = u
|
241
|
+
|
242
|
+
if s.id == self.id
|
243
|
+
newFinalStateList.push(u)
|
244
|
+
u.finalState = true
|
245
|
+
end
|
246
|
+
|
247
|
+
if s.finalState?
|
248
|
+
newStartStateList.push(u)
|
249
|
+
end
|
250
|
+
|
251
|
+
s.edges.each {|lbl, dest| edgeList.push([dest.id, s.id, lbl])}
|
252
|
+
|
291
253
|
end
|
292
|
-
|
293
|
-
|
254
|
+
|
255
|
+
edgeList.each do |srcId, destId, lbl|
|
256
|
+
srcState = newStateMap[srcId]
|
257
|
+
destState = newStateMap[destId]
|
258
|
+
srcState.addEdge(lbl, destState)
|
294
259
|
end
|
295
260
|
|
296
|
-
|
297
|
-
|
298
|
-
|
261
|
+
# Create a distinguished start node that points to each of the start nodes
|
262
|
+
w = State.new(maxId)
|
263
|
+
newStartStateList.each {|s| w.addEps(s)}
|
264
|
+
w
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
# Build set of states reachable from this state
|
269
|
+
#
|
270
|
+
# > list of starting states
|
271
|
+
# < [ set, set of states reachable from those states
|
272
|
+
# minId, lowest id in set
|
273
|
+
# maxId 1 + highest id in set
|
274
|
+
# ]
|
275
|
+
#
|
276
|
+
def reachableStates()
|
277
|
+
set = Set.new
|
278
|
+
stack = []
|
279
|
+
stack.push(self)
|
280
|
+
|
281
|
+
maxId = nil
|
282
|
+
minId = nil
|
283
|
+
|
284
|
+
while !stack.empty?
|
285
|
+
st = stack.pop
|
286
|
+
set.add(st)
|
287
|
+
|
288
|
+
if !minId || minId > st.id
|
289
|
+
minId = st.id
|
290
|
+
end
|
291
|
+
if !maxId || maxId <= st.id
|
292
|
+
maxId = 1 + st.id
|
293
|
+
end
|
294
|
+
|
295
|
+
st.edges.each do |lbl, dest|
|
296
|
+
if set.add?(dest)
|
297
|
+
stack.push(dest)
|
298
|
+
end
|
299
299
|
end
|
300
300
|
end
|
301
|
+
[set, minId, maxId]
|
301
302
|
end
|
302
|
-
|
303
|
-
end
|
304
|
-
|
303
|
+
|
305
304
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
if not stateList.member?(st.name)
|
316
|
-
stateList[st.name] = st
|
317
|
-
st.edges.each {|label, dest| genAux(stateList, dest)}
|
305
|
+
private
|
306
|
+
|
307
|
+
def genAux(stateList, st)
|
308
|
+
if not stateList.member?(st.name)
|
309
|
+
stateList[st.name] = st
|
310
|
+
st.edges.each {|label, dest| genAux(stateList, dest)}
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
318
314
|
end
|
319
|
-
|
315
|
+
|
316
|
+
end # module ToknInternal
|
320
317
|
|