tokn 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.txt +4 -5
- data/bin/tokncompile +1 -1
- data/bin/toknprocess +10 -4
- data/lib/tokn/code_set.rb +332 -337
- data/lib/tokn/dfa.rb +187 -162
- data/lib/tokn/dfa_builder.rb +218 -220
- data/lib/tokn/range_partition.rb +205 -203
- data/lib/tokn/reg_parse.rb +336 -331
- data/lib/tokn/state.rb +267 -270
- data/lib/tokn/token_defn_parser.rb +144 -139
- data/lib/tokn/tokenizer.rb +243 -175
- data/lib/tokn/tokn_const.rb +11 -6
- data/lib/tokn/tools.rb +42 -20
- data/test/Example1.rb +50 -0
- data/test/data/compileddfa.txt +1 -0
- data/test/data/sampletext.txt +6 -1
- data/test/test.rb +17 -12
- metadata +7 -6
- data/test/simple.rb +0 -33
data/lib/tokn/state.rb
CHANGED
@@ -1,320 +1,317 @@
|
|
1
1
|
require 'set'
|
2
2
|
require_relative 'tools'
|
3
|
-
req 'tokn_const'
|
4
3
|
|
5
|
-
|
6
|
-
# A state within a state machine (NFA or DFA); also, various utility functions
|
7
|
-
# for manipulating state machines. Observe that a state machine can be
|
8
|
-
# referred to by its start state.
|
9
|
-
#
|
10
|
-
# Each state has a set of directed edges to other states, where each edge is
|
11
|
-
# labelled with a CodeSet.
|
12
|
-
#
|
13
|
-
# It also has a unique id (unique within a particular state machine),
|
14
|
-
# and a (boolean) final state flag.
|
15
|
-
#
|
16
|
-
# For debug purposes, both the state and its edges can be labelled.
|
17
|
-
#
|
18
|
-
class State
|
19
|
-
include Tokn
|
20
|
-
|
21
|
-
attr_accessor :id
|
22
|
-
attr_accessor :finalState
|
23
|
-
alias_method :finalState?, :finalState
|
24
|
-
attr_accessor :label
|
4
|
+
module ToknInternal
|
25
5
|
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
-
# Produce a readable description of an NFA, for debug purposes
|
30
|
-
#
|
31
|
-
# > st start state
|
6
|
+
# A state within a state machine (NFA or DFA); also, various utility functions
|
7
|
+
# for manipulating state machines. Observe that a state machine can be
|
8
|
+
# referred to by its start state.
|
32
9
|
#
|
33
|
-
|
34
|
-
|
35
|
-
map,_,_ = st.reachableStates
|
36
|
-
map.each do |s|
|
37
|
-
str += " "+d(s)+"\n"
|
38
|
-
str += " edges= "+d(s.edges)+"\n"
|
39
|
-
s.edges.each{ |lbl,dest| str += " "+d(lbl)+" ==> "+d(dest)+"\n"}
|
40
|
-
end
|
41
|
-
str
|
42
|
-
end
|
43
|
-
|
44
|
-
def hash
|
45
|
-
return @id
|
46
|
-
end
|
47
|
-
|
48
|
-
def eql?(other)
|
49
|
-
return id == other.id
|
50
|
-
end
|
51
|
-
|
52
|
-
def initialize(id)
|
53
|
-
@edges = []
|
54
|
-
@id = id
|
55
|
-
end
|
56
|
-
|
57
|
-
def clearEdges
|
58
|
-
@edges.clear
|
59
|
-
end
|
60
|
-
|
61
|
-
# Add an edge
|
62
|
-
# codeSet : the character codes to label it with
|
63
|
-
# destState : destination state
|
64
|
-
#
|
65
|
-
def addEdge(codeSet,destState)
|
66
|
-
@edges.push([codeSet, destState])
|
67
|
-
end
|
68
|
-
|
69
|
-
# Add a e-transition edge
|
70
|
-
# destState : destination state
|
71
|
-
#
|
72
|
-
def addEps(destState)
|
73
|
-
addEdge(CodeSet.new(EPSILON), destState)
|
74
|
-
end
|
75
|
-
|
76
|
-
def inspect
|
77
|
-
name
|
78
|
-
end
|
79
|
-
|
80
|
-
def name
|
81
|
-
nm = 'S' + d(id)
|
82
|
-
if label
|
83
|
-
nm += ": "+label
|
84
|
-
end
|
85
|
-
nm
|
86
|
-
end
|
87
|
-
|
88
|
-
# Normalize a state machine.
|
10
|
+
# Each state has a set of directed edges to other states, where each edge is
|
11
|
+
# labelled with a CodeSet.
|
89
12
|
#
|
90
|
-
#
|
91
|
-
#
|
92
|
-
# [] delete edges that have empty labels
|
93
|
-
# [] sort edges by destination state ids
|
13
|
+
# It also has a unique id (unique within a particular state machine),
|
14
|
+
# and a (boolean) final state flag.
|
94
15
|
#
|
95
|
-
#
|
16
|
+
# For debug purposes, both the state and its edges can be labelled.
|
96
17
|
#
|
97
|
-
|
98
|
-
stateSet, _,_ = startState.reachableStates
|
99
|
-
stateSet.map{|s| s.normalize}
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
# Generate a PDF of the state machine;
|
104
|
-
# Makes a system call to the dot utility to convert a .dot file to a .pdf
|
105
|
-
#
|
106
|
-
def generatePDF(title = "nfa")
|
107
|
-
stateList = {}
|
18
|
+
class State
|
108
19
|
|
109
|
-
|
110
|
-
|
20
|
+
attr_accessor :id
|
21
|
+
attr_accessor :finalState
|
22
|
+
alias_method :finalState?, :finalState
|
23
|
+
attr_accessor :label
|
111
24
|
|
112
|
-
|
113
|
-
|
114
|
-
g += " '' [shape=none]\n"
|
25
|
+
# Edges are a list of [label:CharSetRange, dest:State] pairs
|
26
|
+
attr_reader :edges
|
115
27
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
28
|
+
# Produce a readable description of an NFA, for debug purposes
|
29
|
+
#
|
30
|
+
# > st start state
|
31
|
+
#
|
32
|
+
def self.dumpNFA(st)
|
33
|
+
str = "NFA:\n"
|
34
|
+
map,_,_ = st.reachableStates
|
35
|
+
map.each do |s|
|
36
|
+
str += " "+d(s)+"\n"
|
37
|
+
str += " edges= "+d(s.edges)+"\n"
|
38
|
+
s.edges.each{ |lbl,dest| str += " "+d(lbl)+" ==> "+d(dest)+"\n"}
|
122
39
|
end
|
123
|
-
|
40
|
+
str
|
124
41
|
end
|
125
42
|
|
126
|
-
|
127
|
-
|
128
|
-
stateList.each_value do |s|
|
129
|
-
s.edges.each do |crs, s2|
|
130
|
-
g += " '"+s.name+"' -> '" + s2.name + "' [label='"
|
131
|
-
g += d(crs)
|
132
|
-
g += "'][fontname=Courier][fontsize=12]\n"
|
133
|
-
end
|
43
|
+
def hash
|
44
|
+
return @id
|
134
45
|
end
|
135
46
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
dotToPDF(g,title)
|
140
|
-
end
|
141
|
-
|
47
|
+
def eql?(other)
|
48
|
+
return id == other.id
|
49
|
+
end
|
142
50
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
# [] sort edges by destination state ids
|
148
|
-
#
|
149
|
-
def normalize()
|
51
|
+
def initialize(id)
|
52
|
+
@edges = []
|
53
|
+
@id = id
|
54
|
+
end
|
150
55
|
|
151
|
-
|
56
|
+
def clearEdges
|
57
|
+
@edges.clear
|
58
|
+
end
|
152
59
|
|
153
|
-
|
60
|
+
# Add an edge
|
61
|
+
# codeSet : the character codes to label it with
|
62
|
+
# destState : destination state
|
63
|
+
#
|
64
|
+
def addEdge(codeSet,destState)
|
65
|
+
@edges.push([codeSet, destState])
|
66
|
+
end
|
154
67
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
68
|
+
# Add a e-transition edge
|
69
|
+
# destState : destination state
|
70
|
+
#
|
71
|
+
def addEps(destState)
|
72
|
+
addEdge(CodeSet.new(EPSILON), destState)
|
73
|
+
end
|
161
74
|
|
162
|
-
|
163
|
-
|
75
|
+
def inspect
|
76
|
+
name
|
77
|
+
end
|
78
|
+
|
79
|
+
def name
|
80
|
+
nm = 'S' + d(id)
|
81
|
+
if label
|
82
|
+
nm += ": "+label
|
83
|
+
end
|
84
|
+
nm
|
85
|
+
end
|
86
|
+
|
87
|
+
# Normalize a state machine.
|
88
|
+
#
|
89
|
+
# For each state:
|
90
|
+
# [] merge edges that go to a common state
|
91
|
+
# [] delete edges that have empty labels
|
92
|
+
# [] sort edges by destination state ids
|
93
|
+
#
|
94
|
+
# > start state
|
95
|
+
#
|
96
|
+
def self.normalizeStates(startState)
|
97
|
+
stateSet, _,_ = startState.reachableStates
|
98
|
+
stateSet.map{|s| s.normalize}
|
99
|
+
end
|
164
100
|
|
165
|
-
edges.each do |label,dest|
|
166
|
-
!db || pr(" processing edge %s, %s\n",d(label),d(dest))
|
167
101
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
102
|
+
# Generate a PDF of the state machine;
|
103
|
+
# Makes a system call to the dot utility to convert a .dot file to a .pdf
|
104
|
+
#
|
105
|
+
def generatePDF(title = "nfa")
|
106
|
+
stateList = {}
|
107
|
+
|
108
|
+
startState = self
|
109
|
+
genAux(stateList, startState)
|
110
|
+
|
111
|
+
g = ""
|
112
|
+
g += "digraph "+title+" {\n"
|
113
|
+
g += " '' [shape=none]\n"
|
114
|
+
|
115
|
+
stateList.each_value do |s|
|
116
|
+
g += " '" + s.name + "' [shape="
|
117
|
+
if s.finalState?
|
118
|
+
g += "doubleoctagon"
|
119
|
+
else
|
120
|
+
g += "octagon"
|
181
121
|
end
|
122
|
+
g += "]\n"
|
182
123
|
end
|
183
|
-
if prevDest
|
184
|
-
newEdges.push([prevLabel,prevDest])
|
185
|
-
end
|
186
|
-
|
187
|
-
@edges = newEdges
|
188
|
-
!db || pr("edges now: %s\n",d(@edges))
|
189
|
-
end
|
190
|
-
|
191
|
-
|
192
|
-
# Duplicate the NFA reachable from this state, possibly with new ids
|
193
|
-
#
|
194
|
-
# > dupBaseId : lowest id to use for duplicate; if nil, uses
|
195
|
-
# next available id
|
196
|
-
# < [ map of original states => duplicate states;
|
197
|
-
# 1 + highest id in new NFA ]
|
198
|
-
#
|
199
|
-
def duplicateNFA(dupBaseId = nil)
|
200
|
-
oldStates, oldMinId, oldMaxId = reachableStates()
|
201
|
-
dupBaseId ||= oldMaxId
|
202
124
|
|
203
|
-
|
204
|
-
|
125
|
+
g += "\n"
|
126
|
+
g += " '' -> '" + startState.name + "'\n"
|
127
|
+
stateList.each_value do |s|
|
128
|
+
s.edges.each do |crs, s2|
|
129
|
+
g += " '"+s.name+"' -> '" + s2.name + "' [label='"
|
130
|
+
g += d(crs)
|
131
|
+
g += "'][fontname=Courier][fontsize=12]\n"
|
132
|
+
end
|
133
|
+
end
|
205
134
|
|
206
|
-
|
207
|
-
|
208
|
-
s2.finalState = s.finalState?
|
209
|
-
s2.label = s.label
|
135
|
+
g += "\n}\n"
|
136
|
+
g.gsub!( /'/, '"' )
|
210
137
|
|
211
|
-
|
138
|
+
dotToPDF(g,title)
|
212
139
|
end
|
140
|
+
|
213
141
|
|
214
|
-
|
215
|
-
|
216
|
-
|
142
|
+
# Normalize a state
|
143
|
+
#
|
144
|
+
# [] merge edges that go to a common state
|
145
|
+
# [] delete edges that have empty labels
|
146
|
+
# [] sort edges by destination state ids
|
147
|
+
#
|
148
|
+
def normalize()
|
149
|
+
|
150
|
+
db = false
|
151
|
+
|
152
|
+
!db || pr("\n\nnormalize state:\n %s\nedges=\n%s\n",d(self),d(@edges))
|
153
|
+
|
154
|
+
@edges.sort!{|x,y|
|
155
|
+
label1,dest1 = x
|
156
|
+
label2,dest2 = y
|
157
|
+
dest1.id <=> dest2.id
|
158
|
+
}
|
159
|
+
!db || pr(" sorted edges: %s\n",d(@edges))
|
160
|
+
|
161
|
+
newEdges = []
|
162
|
+
prevLabel, prevDest = nil,nil
|
163
|
+
|
164
|
+
edges.each do |label,dest|
|
165
|
+
!db || pr(" processing edge %s, %s\n",d(label),d(dest))
|
166
|
+
|
167
|
+
if prevDest and prevDest.id == dest.id
|
168
|
+
# changed = true
|
169
|
+
!db || pr(" adding set %s to prevLabel %s...\n",d(label),d(prevLabel))
|
170
|
+
prevLabel.addSet(label)
|
171
|
+
!db || pr(" ...now %s\n",d(prevLabel))
|
172
|
+
else
|
173
|
+
if prevDest
|
174
|
+
newEdges.push([prevLabel,prevDest])
|
175
|
+
end
|
176
|
+
# Must start a fresh copy! Don't want to modify the original label.
|
177
|
+
prevLabel = label.makeCopy()
|
178
|
+
prevDest = dest
|
179
|
+
!db || pr(" pushed onto new edges\n")
|
180
|
+
end
|
181
|
+
end
|
182
|
+
if prevDest
|
183
|
+
newEdges.push([prevLabel,prevDest])
|
184
|
+
end
|
185
|
+
|
186
|
+
@edges = newEdges
|
187
|
+
!db || pr("edges now: %s\n",d(@edges))
|
217
188
|
end
|
218
|
-
|
219
|
-
[oldToNewStateMap, (oldMaxId - oldMinId) + dupBaseId]
|
220
|
-
end
|
221
|
-
|
222
|
-
|
223
189
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
newStateMap = {}
|
237
|
-
|
238
|
-
stateSet.each do |s|
|
239
|
-
|
240
|
-
u = State.new(s.id)
|
241
|
-
newStateMap[u.id] = u
|
190
|
+
|
191
|
+
# Duplicate the NFA reachable from this state, possibly with new ids
|
192
|
+
#
|
193
|
+
# > dupBaseId : lowest id to use for duplicate; if nil, uses
|
194
|
+
# next available id
|
195
|
+
# < [ map of original states => duplicate states;
|
196
|
+
# 1 + highest id in new NFA ]
|
197
|
+
#
|
198
|
+
def duplicateNFA(dupBaseId = nil)
|
199
|
+
oldStates, oldMinId, oldMaxId = reachableStates()
|
200
|
+
dupBaseId ||= oldMaxId
|
242
201
|
|
243
|
-
|
244
|
-
|
245
|
-
u.finalState = true
|
246
|
-
end
|
202
|
+
|
203
|
+
oldToNewStateMap = {}
|
247
204
|
|
248
|
-
|
249
|
-
|
205
|
+
oldStates.each do |s|
|
206
|
+
s2 = State.new((s.id - oldMinId) + dupBaseId)
|
207
|
+
s2.finalState = s.finalState?
|
208
|
+
s2.label = s.label
|
209
|
+
|
210
|
+
oldToNewStateMap[s] = s2
|
250
211
|
end
|
251
212
|
|
252
|
-
|
213
|
+
oldStates.each do |s|
|
214
|
+
s2 = oldToNewStateMap[s]
|
215
|
+
s.edges.each{ |lbl,dest| s2.addEdge(lbl, oldToNewStateMap[dest])}
|
216
|
+
end
|
253
217
|
|
218
|
+
[oldToNewStateMap, (oldMaxId - oldMinId) + dupBaseId]
|
254
219
|
end
|
255
|
-
|
256
|
-
edgeList.each do |srcId, destId, lbl|
|
257
|
-
srcState = newStateMap[srcId]
|
258
|
-
destState = newStateMap[destId]
|
259
|
-
srcState.addEdge(lbl, destState)
|
260
|
-
end
|
261
|
-
|
262
|
-
# Create a distinguished start node that points to each of the start nodes
|
263
|
-
w = State.new(maxId)
|
264
|
-
newStartStateList.each {|s| w.addEps(s)}
|
265
|
-
w
|
266
|
-
end
|
267
220
|
|
268
|
-
|
269
|
-
# Build set of states reachable from this state
|
270
|
-
#
|
271
|
-
# > list of starting states
|
272
|
-
# < [ set, set of states reachable from those states
|
273
|
-
# minId, lowest id in set
|
274
|
-
# maxId 1 + highest id in set
|
275
|
-
# ]
|
276
|
-
#
|
277
|
-
def reachableStates()
|
278
|
-
set = Set.new
|
279
|
-
stack = []
|
280
|
-
stack.push(self)
|
281
|
-
|
282
|
-
maxId = nil
|
283
|
-
minId = nil
|
221
|
+
|
284
222
|
|
285
|
-
|
286
|
-
|
287
|
-
|
223
|
+
# Construct the reverse of the NFA starting at this state
|
224
|
+
# < start state of reversed NFA
|
225
|
+
#
|
226
|
+
def reverseNFA()
|
227
|
+
|
228
|
+
stateSet, minId, maxId = reachableStates()
|
229
|
+
|
230
|
+
edgeList = []
|
231
|
+
|
232
|
+
newStartStateList = []
|
233
|
+
newFinalStateList = []
|
288
234
|
|
289
|
-
|
290
|
-
|
235
|
+
newStateMap = {}
|
236
|
+
|
237
|
+
stateSet.each do |s|
|
238
|
+
|
239
|
+
u = State.new(s.id)
|
240
|
+
newStateMap[u.id] = u
|
241
|
+
|
242
|
+
if s.id == self.id
|
243
|
+
newFinalStateList.push(u)
|
244
|
+
u.finalState = true
|
245
|
+
end
|
246
|
+
|
247
|
+
if s.finalState?
|
248
|
+
newStartStateList.push(u)
|
249
|
+
end
|
250
|
+
|
251
|
+
s.edges.each {|lbl, dest| edgeList.push([dest.id, s.id, lbl])}
|
252
|
+
|
291
253
|
end
|
292
|
-
|
293
|
-
|
254
|
+
|
255
|
+
edgeList.each do |srcId, destId, lbl|
|
256
|
+
srcState = newStateMap[srcId]
|
257
|
+
destState = newStateMap[destId]
|
258
|
+
srcState.addEdge(lbl, destState)
|
294
259
|
end
|
295
260
|
|
296
|
-
|
297
|
-
|
298
|
-
|
261
|
+
# Create a distinguished start node that points to each of the start nodes
|
262
|
+
w = State.new(maxId)
|
263
|
+
newStartStateList.each {|s| w.addEps(s)}
|
264
|
+
w
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
# Build set of states reachable from this state
|
269
|
+
#
|
270
|
+
# > list of starting states
|
271
|
+
# < [ set, set of states reachable from those states
|
272
|
+
# minId, lowest id in set
|
273
|
+
# maxId 1 + highest id in set
|
274
|
+
# ]
|
275
|
+
#
|
276
|
+
def reachableStates()
|
277
|
+
set = Set.new
|
278
|
+
stack = []
|
279
|
+
stack.push(self)
|
280
|
+
|
281
|
+
maxId = nil
|
282
|
+
minId = nil
|
283
|
+
|
284
|
+
while !stack.empty?
|
285
|
+
st = stack.pop
|
286
|
+
set.add(st)
|
287
|
+
|
288
|
+
if !minId || minId > st.id
|
289
|
+
minId = st.id
|
290
|
+
end
|
291
|
+
if !maxId || maxId <= st.id
|
292
|
+
maxId = 1 + st.id
|
293
|
+
end
|
294
|
+
|
295
|
+
st.edges.each do |lbl, dest|
|
296
|
+
if set.add?(dest)
|
297
|
+
stack.push(dest)
|
298
|
+
end
|
299
299
|
end
|
300
300
|
end
|
301
|
+
[set, minId, maxId]
|
301
302
|
end
|
302
|
-
|
303
|
-
end
|
304
|
-
|
303
|
+
|
305
304
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
if not stateList.member?(st.name)
|
316
|
-
stateList[st.name] = st
|
317
|
-
st.edges.each {|label, dest| genAux(stateList, dest)}
|
305
|
+
private
|
306
|
+
|
307
|
+
def genAux(stateList, st)
|
308
|
+
if not stateList.member?(st.name)
|
309
|
+
stateList[st.name] = st
|
310
|
+
st.edges.each {|label, dest| genAux(stateList, dest)}
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
318
314
|
end
|
319
|
-
|
315
|
+
|
316
|
+
end # module ToknInternal
|
320
317
|
|