tokn 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.txt +194 -0
- data/bin/tokncompile +16 -0
- data/bin/toknprocess +26 -0
- data/figures/sample_dfa.pdf +0 -0
- data/lib/tokn/code_set.rb +392 -0
- data/lib/tokn/dfa.rb +196 -0
- data/lib/tokn/dfa_builder.rb +261 -0
- data/lib/tokn/range_partition.rb +233 -0
- data/lib/tokn/reg_parse.rb +379 -0
- data/lib/tokn/state.rb +320 -0
- data/lib/tokn/token_defn_parser.rb +156 -0
- data/lib/tokn/tokenizer.rb +211 -0
- data/lib/tokn/tokn_const.rb +29 -0
- data/lib/tokn/tools.rb +186 -0
- data/lib/tokn.rb +1 -0
- data/test/data/sampletext.txt +11 -0
- data/test/data/sampletokens.txt +32 -0
- data/test/simple.rb +33 -0
- data/test/test.rb +519 -0
- data/test/testcmds +4 -0
- metadata +69 -0
@@ -0,0 +1,379 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
req('code_set state')
|
3
|
+
|
4
|
+
class ParseException < Exception
|
5
|
+
end
|
6
|
+
|
7
|
+
# Parses a single regular expression from a string.
|
8
|
+
# Produces an NFA with distinguished start and end states
|
9
|
+
# (none of these states are marked as final states)
|
10
|
+
#
|
11
|
+
# Here is the grammar for regular expressions. Spaces are ignored,
|
12
|
+
# and can be liberally sprinkled within the regular expressions to
|
13
|
+
# aid readability. To represent a space, the \s escape sequence must be used.
|
14
|
+
# See the file 'sampletokens.txt' for some examples.
|
15
|
+
#
|
16
|
+
# Expressions have one of these types:
|
17
|
+
#
|
18
|
+
# E : base class
|
19
|
+
# J : a Join expression, formed by concatenating one or more together
|
20
|
+
# Q : a Quantified expression; followed optionally by '*', '+', or '?'
|
21
|
+
# P : a Parenthesized expression, which is optionally surrounded with (), {}, []
|
22
|
+
#
|
23
|
+
# E -> J '|' E
|
24
|
+
# | J
|
25
|
+
#
|
26
|
+
# J -> Q J
|
27
|
+
# | Q
|
28
|
+
#
|
29
|
+
# Q -> P '*'
|
30
|
+
# | P '+'
|
31
|
+
# | P '?'
|
32
|
+
# | P
|
33
|
+
#
|
34
|
+
# P -> '(' E ')'
|
35
|
+
# | '{' TOKENNAME '}'
|
36
|
+
# | '[^' SETSEQ ']' A code not appearing in the set
|
37
|
+
# | '[' SETSEQ ']'
|
38
|
+
# | CHARCODE
|
39
|
+
#
|
40
|
+
# SETSEQ -> SET SETSEQ
|
41
|
+
# | SET
|
42
|
+
#
|
43
|
+
# SET -> CHARCODE
|
44
|
+
# | CHARCODE '-' CHARCODE
|
45
|
+
#
|
46
|
+
# CHARCODE ->
|
47
|
+
# a | b | c ... any printable except {,},[, etc.
|
48
|
+
# | \xhh hex value from 00...ff
|
49
|
+
# | \uhhhh hex value from 0000...ffff (e.g., unicode)
|
50
|
+
# | \f | \n | \r | \t formfeed, linefeed, return, tab
|
51
|
+
# | \s a space (' ')
|
52
|
+
# | \* where * is some other non-alphabetic
|
53
|
+
# character that needs to be escaped
|
54
|
+
#
|
55
|
+
# The parser performs recursive descent parsing;
|
56
|
+
# each method returns an NFA represented by
|
57
|
+
# a pair of states: the start and end states.
|
58
|
+
#
|
59
|
+
class RegParse
|
60
|
+
|
61
|
+
attr_reader :startState, :endState
|
62
|
+
|
63
|
+
# Construct a parser and perform the parsing
|
64
|
+
# @param script script to parse
|
65
|
+
# @param tokenDefMap if not nil, a map of previously parsed regular expressions
|
66
|
+
# (mapping names to ids) to be consulted if a curly brace expression appears
|
67
|
+
# in the script
|
68
|
+
#
|
69
|
+
def initialize(script, tokenDefMap = nil)
|
70
|
+
@script = script.strip
|
71
|
+
@nextStateId = 0
|
72
|
+
@tokenDefMap = tokenDefMap
|
73
|
+
parseScript
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
def inspect
|
78
|
+
s = "RegParse: #{@script}"
|
79
|
+
s += " start:"+d(@startState)+" end:"+d(@endState)
|
80
|
+
return s
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
# Raise a ParseException, with a helpful message indicating
|
86
|
+
# the parser's current location within the string
|
87
|
+
#
|
88
|
+
def abort(msg)
|
89
|
+
# Assume we've already read the problem character
|
90
|
+
i = @cursor - 1
|
91
|
+
s = ''
|
92
|
+
if i > 4
|
93
|
+
s += '...'
|
94
|
+
end
|
95
|
+
s += @script[i-3...i] || ""
|
96
|
+
s += ' !!! '
|
97
|
+
s += @script[i...i+3] || ""
|
98
|
+
if i +3 < @script.size
|
99
|
+
s += '...'
|
100
|
+
end
|
101
|
+
raise ParseException, msg + ": "+s
|
102
|
+
end
|
103
|
+
|
104
|
+
# Read next character as a hex digit
|
105
|
+
#
|
106
|
+
def readHex
|
107
|
+
v = read.upcase.ord
|
108
|
+
if v >= 48 and v < 58
|
109
|
+
return v - 48
|
110
|
+
elsif v >= 65 and v < 71
|
111
|
+
return v - 65 + 10
|
112
|
+
else
|
113
|
+
abort "Missing hex digit"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
NO_ESCAPE_CHARS = Regexp.new("[A-Za-z0-9]")
|
119
|
+
|
120
|
+
# Parse character definition (CHARCODE) from input
|
121
|
+
#
|
122
|
+
def parseChar
|
123
|
+
|
124
|
+
c = read
|
125
|
+
|
126
|
+
val = c.ord
|
127
|
+
|
128
|
+
if "{}[]*?+|-^()".include?(c) or val <= 0x20
|
129
|
+
abort "Unexpected or unescaped character"
|
130
|
+
end
|
131
|
+
|
132
|
+
if c == '\\'
|
133
|
+
|
134
|
+
c = read
|
135
|
+
|
136
|
+
if "xX".include? c
|
137
|
+
val = (readHex() << 4) | readHex()
|
138
|
+
elsif "uU".include? c
|
139
|
+
val = (readHex() << 12) | (readHex() << 8) | (readHex() << 4) | readHex()
|
140
|
+
else
|
141
|
+
if c == 'f'
|
142
|
+
val = "\f".ord
|
143
|
+
elsif c == 'r'
|
144
|
+
val == "\r".ord
|
145
|
+
elsif c == 'n'
|
146
|
+
val = "\n".ord
|
147
|
+
elsif c == 't'
|
148
|
+
val = "\t".ord
|
149
|
+
elsif c == 's'
|
150
|
+
val = " ".ord
|
151
|
+
else
|
152
|
+
if c =~ NO_ESCAPE_CHARS
|
153
|
+
abort "Unsupported escape sequence ("+c+")"
|
154
|
+
end
|
155
|
+
val = c.ord
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
return val
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
def parseCharNFA
|
165
|
+
val = parseChar
|
166
|
+
|
167
|
+
# Construct a pair of states with an edge between them
|
168
|
+
# labelled with this character code
|
169
|
+
|
170
|
+
sA = newState
|
171
|
+
sB = newState
|
172
|
+
cset = CodeSet.new
|
173
|
+
cset.add(val)
|
174
|
+
sA.addEdge(cset, sB)
|
175
|
+
return [sA,sB]
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
|
180
|
+
def dbInfo
|
181
|
+
j = @cursor
|
182
|
+
k = j + 5
|
183
|
+
if k >= @script.size
|
184
|
+
return @script[j..k]+"<<<== end"
|
185
|
+
else
|
186
|
+
return @script[j..k]+"..."
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def parseScript
|
191
|
+
# Set up the input scanner
|
192
|
+
@cursor = 0
|
193
|
+
|
194
|
+
exp = parseE
|
195
|
+
@startState = exp[0]
|
196
|
+
@endState = exp[1]
|
197
|
+
end
|
198
|
+
|
199
|
+
def newState
|
200
|
+
s = State.new(@nextStateId)
|
201
|
+
@nextStateId += 1
|
202
|
+
return s
|
203
|
+
end
|
204
|
+
|
205
|
+
def parseSET
|
206
|
+
u = parseChar
|
207
|
+
v = u+1
|
208
|
+
if readIf('-')
|
209
|
+
v = parseChar() + 1
|
210
|
+
if v <= u
|
211
|
+
abort "Illegal range"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
return u,v
|
215
|
+
end
|
216
|
+
|
217
|
+
def parseSETSEQ
|
218
|
+
db = false
|
219
|
+
|
220
|
+
!db || pr("parseSETSEQ\n")
|
221
|
+
|
222
|
+
read('[')
|
223
|
+
negated = readIf('^')
|
224
|
+
!db || pr(" negated=%s\n",negated)
|
225
|
+
|
226
|
+
rs = CodeSet.new
|
227
|
+
|
228
|
+
u,v = parseSET
|
229
|
+
rs.add(u,v)
|
230
|
+
!db || pr(" initial set=%s\n",d(rs))
|
231
|
+
|
232
|
+
while not readIf(']')
|
233
|
+
u,v = parseSET
|
234
|
+
rs.add(u,v)
|
235
|
+
!db || pr(" added another; %s\n",d(rs))
|
236
|
+
end
|
237
|
+
if negated
|
238
|
+
rs.negate
|
239
|
+
!db || pr(" negated=%s\n",d(rs))
|
240
|
+
end
|
241
|
+
|
242
|
+
if rs.empty?
|
243
|
+
abort "Empty character range"
|
244
|
+
end
|
245
|
+
|
246
|
+
sA = newState
|
247
|
+
sB = newState
|
248
|
+
sA.addEdge(rs, sB)
|
249
|
+
return [sA,sB]
|
250
|
+
end
|
251
|
+
|
252
|
+
TOKENREF_EXPR = Regexp.new('^[_A-Za-z][_A-Za-z0-9]*$')
|
253
|
+
|
254
|
+
def parseTokenDef
|
255
|
+
read('{')
|
256
|
+
name = ''
|
257
|
+
while !readIf('}')
|
258
|
+
name += read
|
259
|
+
end
|
260
|
+
# pr("name=[%s], TR=[%s], match=[%s]\n",d(name),d(TOKENREF_EXPR),d(name =~ TOKENREF_EXPR))
|
261
|
+
if name !~ TOKENREF_EXPR
|
262
|
+
abort "Problem with token name"
|
263
|
+
end
|
264
|
+
tokInfo = nil
|
265
|
+
if @tokenDefMap
|
266
|
+
tokInfo = @tokenDefMap[name]
|
267
|
+
end
|
268
|
+
if !tokInfo
|
269
|
+
abort "Undefined token"
|
270
|
+
end
|
271
|
+
rg = tokInfo[1]
|
272
|
+
|
273
|
+
oldToNewMap, @nextStateId = rg.startState.duplicateNFA(@nextStateId)
|
274
|
+
|
275
|
+
newStart = oldToNewMap[rg.startState]
|
276
|
+
newEnd = oldToNewMap[rg.endState]
|
277
|
+
|
278
|
+
[newStart, newEnd]
|
279
|
+
|
280
|
+
|
281
|
+
end
|
282
|
+
|
283
|
+
|
284
|
+
def parseP
|
285
|
+
ch = peek
|
286
|
+
if ch == '('
|
287
|
+
read
|
288
|
+
e1 = parseE
|
289
|
+
read ')'
|
290
|
+
elsif ch == '{'
|
291
|
+
e1 = parseTokenDef
|
292
|
+
elsif ch == '['
|
293
|
+
e1 = parseSETSEQ
|
294
|
+
else
|
295
|
+
e1 = parseCharNFA
|
296
|
+
end
|
297
|
+
return e1
|
298
|
+
end
|
299
|
+
|
300
|
+
|
301
|
+
def parseE
|
302
|
+
e1 = parseJ
|
303
|
+
if readIf('|')
|
304
|
+
e2 = parseE
|
305
|
+
|
306
|
+
u = newState
|
307
|
+
v = newState
|
308
|
+
u.addEps(e1[0])
|
309
|
+
u.addEps(e2[0])
|
310
|
+
e1[1].addEps(v)
|
311
|
+
e2[1].addEps(v)
|
312
|
+
e1 = [u,v]
|
313
|
+
end
|
314
|
+
return e1
|
315
|
+
end
|
316
|
+
|
317
|
+
def parseJ
|
318
|
+
e1 = parseQ
|
319
|
+
p = peek
|
320
|
+
if p and not "|)".include? p
|
321
|
+
e2 = parseJ
|
322
|
+
e1[1].addEps(e2[0])
|
323
|
+
e1 = [e1[0],e2[1]]
|
324
|
+
end
|
325
|
+
|
326
|
+
return e1
|
327
|
+
end
|
328
|
+
|
329
|
+
def parseQ
|
330
|
+
e1 = parseP
|
331
|
+
p = peek
|
332
|
+
|
333
|
+
if p == '*'
|
334
|
+
read
|
335
|
+
e1[0].addEps(e1[1])
|
336
|
+
e1[1].addEps(e1[0])
|
337
|
+
elsif p == '+'
|
338
|
+
read
|
339
|
+
e1[1].addEps(e1[0])
|
340
|
+
elsif p == '?'
|
341
|
+
read
|
342
|
+
e1[0].addEps(e1[1])
|
343
|
+
# e1[0].generatePDF("optional")
|
344
|
+
end
|
345
|
+
return e1
|
346
|
+
end
|
347
|
+
|
348
|
+
|
349
|
+
def peek(mustExist = false)
|
350
|
+
# skip over any non-linefeed whitespace
|
351
|
+
while @cursor < @script.size && " \t".index(@script[@cursor])
|
352
|
+
@cursor += 1
|
353
|
+
end
|
354
|
+
if mustExist or @cursor < @script.size
|
355
|
+
@script[@cursor]
|
356
|
+
else
|
357
|
+
nil
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
def readIf(expChar)
|
362
|
+
r = (peek == expChar)
|
363
|
+
if r
|
364
|
+
read
|
365
|
+
end
|
366
|
+
return r
|
367
|
+
end
|
368
|
+
|
369
|
+
def read(expChar = nil)
|
370
|
+
ch = peek
|
371
|
+
if ch and ((not expChar) or ch == expChar)
|
372
|
+
@cursor += 1
|
373
|
+
ch
|
374
|
+
else
|
375
|
+
abort 'Unexpected end of input'
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
data/lib/tokn/state.rb
ADDED
@@ -0,0 +1,320 @@
|
|
1
|
+
require 'set'
|
2
|
+
require_relative 'tools'
|
3
|
+
req 'tokn_const'
|
4
|
+
|
5
|
+
|
6
|
+
# A state within a state machine (NFA or DFA); also, various utility functions
|
7
|
+
# for manipulating state machines. Observe that a state machine can be
|
8
|
+
# referred to by its start state.
|
9
|
+
#
|
10
|
+
# Each state has a set of directed edges to other states, where each edge is
|
11
|
+
# labelled with a CodeSet.
|
12
|
+
#
|
13
|
+
# It also has a unique id (unique within a particular state machine),
|
14
|
+
# and a (boolean) final state flag.
|
15
|
+
#
|
16
|
+
# For debug purposes, both the state and its edges can be labelled.
|
17
|
+
#
|
18
|
+
class State
|
19
|
+
include Tokn
|
20
|
+
|
21
|
+
attr_accessor :id
|
22
|
+
attr_accessor :finalState
|
23
|
+
alias_method :finalState?, :finalState
|
24
|
+
attr_accessor :label
|
25
|
+
|
26
|
+
# Edges are a list of [label:CharSetRange, dest:State] pairs
|
27
|
+
attr_reader :edges
|
28
|
+
|
29
|
+
# Produce a readable description of an NFA, for debug purposes
|
30
|
+
#
|
31
|
+
# > st start state
|
32
|
+
#
|
33
|
+
def self.dumpNFA(st)
|
34
|
+
str = "NFA:\n"
|
35
|
+
map,_,_ = st.reachableStates
|
36
|
+
map.each do |s|
|
37
|
+
str += " "+d(s)+"\n"
|
38
|
+
str += " edges= "+d(s.edges)+"\n"
|
39
|
+
s.edges.each{ |lbl,dest| str += " "+d(lbl)+" ==> "+d(dest)+"\n"}
|
40
|
+
end
|
41
|
+
str
|
42
|
+
end
|
43
|
+
|
44
|
+
def hash
|
45
|
+
return @id
|
46
|
+
end
|
47
|
+
|
48
|
+
def eql?(other)
|
49
|
+
return id == other.id
|
50
|
+
end
|
51
|
+
|
52
|
+
def initialize(id)
|
53
|
+
@edges = []
|
54
|
+
@id = id
|
55
|
+
end
|
56
|
+
|
57
|
+
def clearEdges
|
58
|
+
@edges.clear
|
59
|
+
end
|
60
|
+
|
61
|
+
# Add an edge
|
62
|
+
# codeSet : the character codes to label it with
|
63
|
+
# destState : destination state
|
64
|
+
#
|
65
|
+
def addEdge(codeSet,destState)
|
66
|
+
@edges.push([codeSet, destState])
|
67
|
+
end
|
68
|
+
|
69
|
+
# Add a e-transition edge
|
70
|
+
# destState : destination state
|
71
|
+
#
|
72
|
+
def addEps(destState)
|
73
|
+
addEdge(CodeSet.new(EPSILON), destState)
|
74
|
+
end
|
75
|
+
|
76
|
+
def inspect
|
77
|
+
name
|
78
|
+
end
|
79
|
+
|
80
|
+
def name
|
81
|
+
nm = 'S' + d(id)
|
82
|
+
if label
|
83
|
+
nm += ": "+label
|
84
|
+
end
|
85
|
+
nm
|
86
|
+
end
|
87
|
+
|
88
|
+
# Normalize a state machine.
|
89
|
+
#
|
90
|
+
# For each state:
|
91
|
+
# [] merge edges that go to a common state
|
92
|
+
# [] delete edges that have empty labels
|
93
|
+
# [] sort edges by destination state ids
|
94
|
+
#
|
95
|
+
# > start state
|
96
|
+
#
|
97
|
+
def self.normalizeStates(startState)
|
98
|
+
stateSet, _,_ = startState.reachableStates
|
99
|
+
stateSet.map{|s| s.normalize}
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
# Generate a PDF of the state machine;
|
104
|
+
# Makes a system call to the dot utility to convert a .dot file to a .pdf
|
105
|
+
#
|
106
|
+
def generatePDF(title = "nfa")
|
107
|
+
stateList = {}
|
108
|
+
|
109
|
+
startState = self
|
110
|
+
genAux(stateList, startState)
|
111
|
+
|
112
|
+
g = ""
|
113
|
+
g += "digraph "+title+" {\n"
|
114
|
+
g += " '' [shape=none]\n"
|
115
|
+
|
116
|
+
stateList.each_value do |s|
|
117
|
+
g += " '" + s.name + "' [shape="
|
118
|
+
if s.finalState?
|
119
|
+
g += "doubleoctagon"
|
120
|
+
else
|
121
|
+
g += "octagon"
|
122
|
+
end
|
123
|
+
g += "]\n"
|
124
|
+
end
|
125
|
+
|
126
|
+
g += "\n"
|
127
|
+
g += " '' -> '" + startState.name + "'\n"
|
128
|
+
stateList.each_value do |s|
|
129
|
+
s.edges.each do |crs, s2|
|
130
|
+
g += " '"+s.name+"' -> '" + s2.name + "' [label='"
|
131
|
+
g += d(crs)
|
132
|
+
g += "'][fontname=Courier][fontsize=12]\n"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
g += "\n}\n"
|
137
|
+
g.gsub!( /'/, '"' )
|
138
|
+
|
139
|
+
dotToPDF(g,title)
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
# Normalize a state
|
144
|
+
#
|
145
|
+
# [] merge edges that go to a common state
|
146
|
+
# [] delete edges that have empty labels
|
147
|
+
# [] sort edges by destination state ids
|
148
|
+
#
|
149
|
+
def normalize()
|
150
|
+
|
151
|
+
db = false
|
152
|
+
|
153
|
+
!db || pr("\n\nnormalize state:\n %s\nedges=\n%s\n",d(self),d(@edges))
|
154
|
+
|
155
|
+
@edges.sort!{|x,y|
|
156
|
+
label1,dest1 = x
|
157
|
+
label2,dest2 = y
|
158
|
+
dest1.id <=> dest2.id
|
159
|
+
}
|
160
|
+
!db || pr(" sorted edges: %s\n",d(@edges))
|
161
|
+
|
162
|
+
newEdges = []
|
163
|
+
prevLabel, prevDest = nil,nil
|
164
|
+
|
165
|
+
edges.each do |label,dest|
|
166
|
+
!db || pr(" processing edge %s, %s\n",d(label),d(dest))
|
167
|
+
|
168
|
+
if prevDest and prevDest.id == dest.id
|
169
|
+
# changed = true
|
170
|
+
!db || pr(" adding set %s to prevLabel %s...\n",d(label),d(prevLabel))
|
171
|
+
prevLabel.addSet(label)
|
172
|
+
!db || pr(" ...now %s\n",d(prevLabel))
|
173
|
+
else
|
174
|
+
if prevDest
|
175
|
+
newEdges.push([prevLabel,prevDest])
|
176
|
+
end
|
177
|
+
# Must start a fresh copy! Don't want to modify the original label.
|
178
|
+
prevLabel = label.makeCopy()
|
179
|
+
prevDest = dest
|
180
|
+
!db || pr(" pushed onto new edges\n")
|
181
|
+
end
|
182
|
+
end
|
183
|
+
if prevDest
|
184
|
+
newEdges.push([prevLabel,prevDest])
|
185
|
+
end
|
186
|
+
|
187
|
+
@edges = newEdges
|
188
|
+
!db || pr("edges now: %s\n",d(@edges))
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
# Duplicate the NFA reachable from this state, possibly with new ids
|
193
|
+
#
|
194
|
+
# > dupBaseId : lowest id to use for duplicate; if nil, uses
|
195
|
+
# next available id
|
196
|
+
# < [ map of original states => duplicate states;
|
197
|
+
# 1 + highest id in new NFA ]
|
198
|
+
#
|
199
|
+
def duplicateNFA(dupBaseId = nil)
|
200
|
+
oldStates, oldMinId, oldMaxId = reachableStates()
|
201
|
+
dupBaseId ||= oldMaxId
|
202
|
+
|
203
|
+
|
204
|
+
oldToNewStateMap = {}
|
205
|
+
|
206
|
+
oldStates.each do |s|
|
207
|
+
s2 = State.new((s.id - oldMinId) + dupBaseId)
|
208
|
+
s2.finalState = s.finalState?
|
209
|
+
s2.label = s.label
|
210
|
+
|
211
|
+
oldToNewStateMap[s] = s2
|
212
|
+
end
|
213
|
+
|
214
|
+
oldStates.each do |s|
|
215
|
+
s2 = oldToNewStateMap[s]
|
216
|
+
s.edges.each{ |lbl,dest| s2.addEdge(lbl, oldToNewStateMap[dest])}
|
217
|
+
end
|
218
|
+
|
219
|
+
[oldToNewStateMap, (oldMaxId - oldMinId) + dupBaseId]
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
# Construct the reverse of the NFA starting at this state
|
225
|
+
# < start state of reversed NFA
|
226
|
+
#
|
227
|
+
def reverseNFA()
|
228
|
+
|
229
|
+
stateSet, minId, maxId = reachableStates()
|
230
|
+
|
231
|
+
edgeList = []
|
232
|
+
|
233
|
+
newStartStateList = []
|
234
|
+
newFinalStateList = []
|
235
|
+
|
236
|
+
newStateMap = {}
|
237
|
+
|
238
|
+
stateSet.each do |s|
|
239
|
+
|
240
|
+
u = State.new(s.id)
|
241
|
+
newStateMap[u.id] = u
|
242
|
+
|
243
|
+
if s.id == self.id
|
244
|
+
newFinalStateList.push(u)
|
245
|
+
u.finalState = true
|
246
|
+
end
|
247
|
+
|
248
|
+
if s.finalState?
|
249
|
+
newStartStateList.push(u)
|
250
|
+
end
|
251
|
+
|
252
|
+
s.edges.each {|lbl, dest| edgeList.push([dest.id, s.id, lbl])}
|
253
|
+
|
254
|
+
end
|
255
|
+
|
256
|
+
edgeList.each do |srcId, destId, lbl|
|
257
|
+
srcState = newStateMap[srcId]
|
258
|
+
destState = newStateMap[destId]
|
259
|
+
srcState.addEdge(lbl, destState)
|
260
|
+
end
|
261
|
+
|
262
|
+
# Create a distinguished start node that points to each of the start nodes
|
263
|
+
w = State.new(maxId)
|
264
|
+
newStartStateList.each {|s| w.addEps(s)}
|
265
|
+
w
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
# Build set of states reachable from this state
|
270
|
+
#
|
271
|
+
# > list of starting states
|
272
|
+
# < [ set, set of states reachable from those states
|
273
|
+
# minId, lowest id in set
|
274
|
+
# maxId 1 + highest id in set
|
275
|
+
# ]
|
276
|
+
#
|
277
|
+
def reachableStates()
|
278
|
+
set = Set.new
|
279
|
+
stack = []
|
280
|
+
stack.push(self)
|
281
|
+
|
282
|
+
maxId = nil
|
283
|
+
minId = nil
|
284
|
+
|
285
|
+
while !stack.empty?
|
286
|
+
st = stack.pop
|
287
|
+
set.add(st)
|
288
|
+
|
289
|
+
if !minId || minId > st.id
|
290
|
+
minId = st.id
|
291
|
+
end
|
292
|
+
if !maxId || maxId <= st.id
|
293
|
+
maxId = 1 + st.id
|
294
|
+
end
|
295
|
+
|
296
|
+
st.edges.each do |lbl, dest|
|
297
|
+
if set.add?(dest)
|
298
|
+
stack.push(dest)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
[set, minId, maxId]
|
303
|
+
end
|
304
|
+
|
305
|
+
|
306
|
+
|
307
|
+
|
308
|
+
end
|
309
|
+
|
310
|
+
|
311
|
+
|
312
|
+
private
|
313
|
+
|
314
|
+
def genAux(stateList, st)
|
315
|
+
if not stateList.member?(st.name)
|
316
|
+
stateList[st.name] = st
|
317
|
+
st.edges.each {|label, dest| genAux(stateList, dest)}
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|