tokn 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.txt +194 -0
- data/bin/tokncompile +16 -0
- data/bin/toknprocess +26 -0
- data/figures/sample_dfa.pdf +0 -0
- data/lib/tokn/code_set.rb +392 -0
- data/lib/tokn/dfa.rb +196 -0
- data/lib/tokn/dfa_builder.rb +261 -0
- data/lib/tokn/range_partition.rb +233 -0
- data/lib/tokn/reg_parse.rb +379 -0
- data/lib/tokn/state.rb +320 -0
- data/lib/tokn/token_defn_parser.rb +156 -0
- data/lib/tokn/tokenizer.rb +211 -0
- data/lib/tokn/tokn_const.rb +29 -0
- data/lib/tokn/tools.rb +186 -0
- data/lib/tokn.rb +1 -0
- data/test/data/sampletext.txt +11 -0
- data/test/data/sampletokens.txt +32 -0
- data/test/simple.rb +33 -0
- data/test/test.rb +519 -0
- data/test/testcmds +4 -0
- metadata +69 -0
@@ -0,0 +1,379 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
req('code_set state')
|
3
|
+
|
4
|
+
class ParseException < Exception
|
5
|
+
end
|
6
|
+
|
7
|
+
# Parses a single regular expression from a string.
|
8
|
+
# Produces an NFA with distinguished start and end states
|
9
|
+
# (none of these states are marked as final states)
|
10
|
+
#
|
11
|
+
# Here is the grammar for regular expressions. Spaces are ignored,
|
12
|
+
# and can be liberally sprinkled within the regular expressions to
|
13
|
+
# aid readability. To represent a space, the \s escape sequence must be used.
|
14
|
+
# See the file 'sampletokens.txt' for some examples.
|
15
|
+
#
|
16
|
+
# Expressions have one of these types:
|
17
|
+
#
|
18
|
+
# E : base class
|
19
|
+
# J : a Join expression, formed by concatenating one or more together
|
20
|
+
# Q : a Quantified expression; followed optionally by '*', '+', or '?'
|
21
|
+
# P : a Parenthesized expression, which is optionally surrounded with (), {}, []
|
22
|
+
#
|
23
|
+
# E -> J '|' E
|
24
|
+
# | J
|
25
|
+
#
|
26
|
+
# J -> Q J
|
27
|
+
# | Q
|
28
|
+
#
|
29
|
+
# Q -> P '*'
|
30
|
+
# | P '+'
|
31
|
+
# | P '?'
|
32
|
+
# | P
|
33
|
+
#
|
34
|
+
# P -> '(' E ')'
|
35
|
+
# | '{' TOKENNAME '}'
|
36
|
+
# | '[^' SETSEQ ']' A code not appearing in the set
|
37
|
+
# | '[' SETSEQ ']'
|
38
|
+
# | CHARCODE
|
39
|
+
#
|
40
|
+
# SETSEQ -> SET SETSEQ
|
41
|
+
# | SET
|
42
|
+
#
|
43
|
+
# SET -> CHARCODE
|
44
|
+
# | CHARCODE '-' CHARCODE
|
45
|
+
#
|
46
|
+
# CHARCODE ->
|
47
|
+
# a | b | c ... any printable except {,},[, etc.
|
48
|
+
# | \xhh hex value from 00...ff
|
49
|
+
# | \uhhhh hex value from 0000...ffff (e.g., unicode)
|
50
|
+
# | \f | \n | \r | \t formfeed, linefeed, return, tab
|
51
|
+
# | \s a space (' ')
|
52
|
+
# | \* where * is some other non-alphabetic
|
53
|
+
# character that needs to be escaped
|
54
|
+
#
|
55
|
+
# The parser performs recursive descent parsing;
|
56
|
+
# each method returns an NFA represented by
|
57
|
+
# a pair of states: the start and end states.
|
58
|
+
#
|
59
|
+
class RegParse
|
60
|
+
|
61
|
+
attr_reader :startState, :endState
|
62
|
+
|
63
|
+
# Construct a parser and perform the parsing
|
64
|
+
# @param script script to parse
|
65
|
+
# @param tokenDefMap if not nil, a map of previously parsed regular expressions
|
66
|
+
# (mapping names to ids) to be consulted if a curly brace expression appears
|
67
|
+
# in the script
|
68
|
+
#
|
69
|
+
def initialize(script, tokenDefMap = nil)
|
70
|
+
@script = script.strip
|
71
|
+
@nextStateId = 0
|
72
|
+
@tokenDefMap = tokenDefMap
|
73
|
+
parseScript
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
def inspect
|
78
|
+
s = "RegParse: #{@script}"
|
79
|
+
s += " start:"+d(@startState)+" end:"+d(@endState)
|
80
|
+
return s
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
# Raise a ParseException, with a helpful message indicating
|
86
|
+
# the parser's current location within the string
|
87
|
+
#
|
88
|
+
def abort(msg)
|
89
|
+
# Assume we've already read the problem character
|
90
|
+
i = @cursor - 1
|
91
|
+
s = ''
|
92
|
+
if i > 4
|
93
|
+
s += '...'
|
94
|
+
end
|
95
|
+
s += @script[i-3...i] || ""
|
96
|
+
s += ' !!! '
|
97
|
+
s += @script[i...i+3] || ""
|
98
|
+
if i +3 < @script.size
|
99
|
+
s += '...'
|
100
|
+
end
|
101
|
+
raise ParseException, msg + ": "+s
|
102
|
+
end
|
103
|
+
|
104
|
+
# Read next character as a hex digit
|
105
|
+
#
|
106
|
+
def readHex
|
107
|
+
v = read.upcase.ord
|
108
|
+
if v >= 48 and v < 58
|
109
|
+
return v - 48
|
110
|
+
elsif v >= 65 and v < 71
|
111
|
+
return v - 65 + 10
|
112
|
+
else
|
113
|
+
abort "Missing hex digit"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
NO_ESCAPE_CHARS = Regexp.new("[A-Za-z0-9]")
|
119
|
+
|
120
|
+
# Parse character definition (CHARCODE) from input
|
121
|
+
#
|
122
|
+
def parseChar
|
123
|
+
|
124
|
+
c = read
|
125
|
+
|
126
|
+
val = c.ord
|
127
|
+
|
128
|
+
if "{}[]*?+|-^()".include?(c) or val <= 0x20
|
129
|
+
abort "Unexpected or unescaped character"
|
130
|
+
end
|
131
|
+
|
132
|
+
if c == '\\'
|
133
|
+
|
134
|
+
c = read
|
135
|
+
|
136
|
+
if "xX".include? c
|
137
|
+
val = (readHex() << 4) | readHex()
|
138
|
+
elsif "uU".include? c
|
139
|
+
val = (readHex() << 12) | (readHex() << 8) | (readHex() << 4) | readHex()
|
140
|
+
else
|
141
|
+
if c == 'f'
|
142
|
+
val = "\f".ord
|
143
|
+
elsif c == 'r'
|
144
|
+
val == "\r".ord
|
145
|
+
elsif c == 'n'
|
146
|
+
val = "\n".ord
|
147
|
+
elsif c == 't'
|
148
|
+
val = "\t".ord
|
149
|
+
elsif c == 's'
|
150
|
+
val = " ".ord
|
151
|
+
else
|
152
|
+
if c =~ NO_ESCAPE_CHARS
|
153
|
+
abort "Unsupported escape sequence ("+c+")"
|
154
|
+
end
|
155
|
+
val = c.ord
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
return val
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
def parseCharNFA
|
165
|
+
val = parseChar
|
166
|
+
|
167
|
+
# Construct a pair of states with an edge between them
|
168
|
+
# labelled with this character code
|
169
|
+
|
170
|
+
sA = newState
|
171
|
+
sB = newState
|
172
|
+
cset = CodeSet.new
|
173
|
+
cset.add(val)
|
174
|
+
sA.addEdge(cset, sB)
|
175
|
+
return [sA,sB]
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
|
180
|
+
def dbInfo
|
181
|
+
j = @cursor
|
182
|
+
k = j + 5
|
183
|
+
if k >= @script.size
|
184
|
+
return @script[j..k]+"<<<== end"
|
185
|
+
else
|
186
|
+
return @script[j..k]+"..."
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def parseScript
|
191
|
+
# Set up the input scanner
|
192
|
+
@cursor = 0
|
193
|
+
|
194
|
+
exp = parseE
|
195
|
+
@startState = exp[0]
|
196
|
+
@endState = exp[1]
|
197
|
+
end
|
198
|
+
|
199
|
+
def newState
|
200
|
+
s = State.new(@nextStateId)
|
201
|
+
@nextStateId += 1
|
202
|
+
return s
|
203
|
+
end
|
204
|
+
|
205
|
+
def parseSET
|
206
|
+
u = parseChar
|
207
|
+
v = u+1
|
208
|
+
if readIf('-')
|
209
|
+
v = parseChar() + 1
|
210
|
+
if v <= u
|
211
|
+
abort "Illegal range"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
return u,v
|
215
|
+
end
|
216
|
+
|
217
|
+
def parseSETSEQ
|
218
|
+
db = false
|
219
|
+
|
220
|
+
!db || pr("parseSETSEQ\n")
|
221
|
+
|
222
|
+
read('[')
|
223
|
+
negated = readIf('^')
|
224
|
+
!db || pr(" negated=%s\n",negated)
|
225
|
+
|
226
|
+
rs = CodeSet.new
|
227
|
+
|
228
|
+
u,v = parseSET
|
229
|
+
rs.add(u,v)
|
230
|
+
!db || pr(" initial set=%s\n",d(rs))
|
231
|
+
|
232
|
+
while not readIf(']')
|
233
|
+
u,v = parseSET
|
234
|
+
rs.add(u,v)
|
235
|
+
!db || pr(" added another; %s\n",d(rs))
|
236
|
+
end
|
237
|
+
if negated
|
238
|
+
rs.negate
|
239
|
+
!db || pr(" negated=%s\n",d(rs))
|
240
|
+
end
|
241
|
+
|
242
|
+
if rs.empty?
|
243
|
+
abort "Empty character range"
|
244
|
+
end
|
245
|
+
|
246
|
+
sA = newState
|
247
|
+
sB = newState
|
248
|
+
sA.addEdge(rs, sB)
|
249
|
+
return [sA,sB]
|
250
|
+
end
|
251
|
+
|
252
|
+
TOKENREF_EXPR = Regexp.new('^[_A-Za-z][_A-Za-z0-9]*$')
|
253
|
+
|
254
|
+
def parseTokenDef
|
255
|
+
read('{')
|
256
|
+
name = ''
|
257
|
+
while !readIf('}')
|
258
|
+
name += read
|
259
|
+
end
|
260
|
+
# pr("name=[%s], TR=[%s], match=[%s]\n",d(name),d(TOKENREF_EXPR),d(name =~ TOKENREF_EXPR))
|
261
|
+
if name !~ TOKENREF_EXPR
|
262
|
+
abort "Problem with token name"
|
263
|
+
end
|
264
|
+
tokInfo = nil
|
265
|
+
if @tokenDefMap
|
266
|
+
tokInfo = @tokenDefMap[name]
|
267
|
+
end
|
268
|
+
if !tokInfo
|
269
|
+
abort "Undefined token"
|
270
|
+
end
|
271
|
+
rg = tokInfo[1]
|
272
|
+
|
273
|
+
oldToNewMap, @nextStateId = rg.startState.duplicateNFA(@nextStateId)
|
274
|
+
|
275
|
+
newStart = oldToNewMap[rg.startState]
|
276
|
+
newEnd = oldToNewMap[rg.endState]
|
277
|
+
|
278
|
+
[newStart, newEnd]
|
279
|
+
|
280
|
+
|
281
|
+
end
|
282
|
+
|
283
|
+
|
284
|
+
def parseP
|
285
|
+
ch = peek
|
286
|
+
if ch == '('
|
287
|
+
read
|
288
|
+
e1 = parseE
|
289
|
+
read ')'
|
290
|
+
elsif ch == '{'
|
291
|
+
e1 = parseTokenDef
|
292
|
+
elsif ch == '['
|
293
|
+
e1 = parseSETSEQ
|
294
|
+
else
|
295
|
+
e1 = parseCharNFA
|
296
|
+
end
|
297
|
+
return e1
|
298
|
+
end
|
299
|
+
|
300
|
+
|
301
|
+
def parseE
|
302
|
+
e1 = parseJ
|
303
|
+
if readIf('|')
|
304
|
+
e2 = parseE
|
305
|
+
|
306
|
+
u = newState
|
307
|
+
v = newState
|
308
|
+
u.addEps(e1[0])
|
309
|
+
u.addEps(e2[0])
|
310
|
+
e1[1].addEps(v)
|
311
|
+
e2[1].addEps(v)
|
312
|
+
e1 = [u,v]
|
313
|
+
end
|
314
|
+
return e1
|
315
|
+
end
|
316
|
+
|
317
|
+
def parseJ
|
318
|
+
e1 = parseQ
|
319
|
+
p = peek
|
320
|
+
if p and not "|)".include? p
|
321
|
+
e2 = parseJ
|
322
|
+
e1[1].addEps(e2[0])
|
323
|
+
e1 = [e1[0],e2[1]]
|
324
|
+
end
|
325
|
+
|
326
|
+
return e1
|
327
|
+
end
|
328
|
+
|
329
|
+
def parseQ
|
330
|
+
e1 = parseP
|
331
|
+
p = peek
|
332
|
+
|
333
|
+
if p == '*'
|
334
|
+
read
|
335
|
+
e1[0].addEps(e1[1])
|
336
|
+
e1[1].addEps(e1[0])
|
337
|
+
elsif p == '+'
|
338
|
+
read
|
339
|
+
e1[1].addEps(e1[0])
|
340
|
+
elsif p == '?'
|
341
|
+
read
|
342
|
+
e1[0].addEps(e1[1])
|
343
|
+
# e1[0].generatePDF("optional")
|
344
|
+
end
|
345
|
+
return e1
|
346
|
+
end
|
347
|
+
|
348
|
+
|
349
|
+
def peek(mustExist = false)
|
350
|
+
# skip over any non-linefeed whitespace
|
351
|
+
while @cursor < @script.size && " \t".index(@script[@cursor])
|
352
|
+
@cursor += 1
|
353
|
+
end
|
354
|
+
if mustExist or @cursor < @script.size
|
355
|
+
@script[@cursor]
|
356
|
+
else
|
357
|
+
nil
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
def readIf(expChar)
|
362
|
+
r = (peek == expChar)
|
363
|
+
if r
|
364
|
+
read
|
365
|
+
end
|
366
|
+
return r
|
367
|
+
end
|
368
|
+
|
369
|
+
def read(expChar = nil)
|
370
|
+
ch = peek
|
371
|
+
if ch and ((not expChar) or ch == expChar)
|
372
|
+
@cursor += 1
|
373
|
+
ch
|
374
|
+
else
|
375
|
+
abort 'Unexpected end of input'
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
data/lib/tokn/state.rb
ADDED
@@ -0,0 +1,320 @@
|
|
1
|
+
require 'set'
|
2
|
+
require_relative 'tools'
|
3
|
+
req 'tokn_const'
|
4
|
+
|
5
|
+
|
6
|
+
# A state within a state machine (NFA or DFA); also, various utility functions
|
7
|
+
# for manipulating state machines. Observe that a state machine can be
|
8
|
+
# referred to by its start state.
|
9
|
+
#
|
10
|
+
# Each state has a set of directed edges to other states, where each edge is
|
11
|
+
# labelled with a CodeSet.
|
12
|
+
#
|
13
|
+
# It also has a unique id (unique within a particular state machine),
|
14
|
+
# and a (boolean) final state flag.
|
15
|
+
#
|
16
|
+
# For debug purposes, both the state and its edges can be labelled.
|
17
|
+
#
|
18
|
+
class State
|
19
|
+
include Tokn
|
20
|
+
|
21
|
+
attr_accessor :id
|
22
|
+
attr_accessor :finalState
|
23
|
+
alias_method :finalState?, :finalState
|
24
|
+
attr_accessor :label
|
25
|
+
|
26
|
+
# Edges are a list of [label:CharSetRange, dest:State] pairs
|
27
|
+
attr_reader :edges
|
28
|
+
|
29
|
+
# Produce a readable description of an NFA, for debug purposes
|
30
|
+
#
|
31
|
+
# > st start state
|
32
|
+
#
|
33
|
+
def self.dumpNFA(st)
|
34
|
+
str = "NFA:\n"
|
35
|
+
map,_,_ = st.reachableStates
|
36
|
+
map.each do |s|
|
37
|
+
str += " "+d(s)+"\n"
|
38
|
+
str += " edges= "+d(s.edges)+"\n"
|
39
|
+
s.edges.each{ |lbl,dest| str += " "+d(lbl)+" ==> "+d(dest)+"\n"}
|
40
|
+
end
|
41
|
+
str
|
42
|
+
end
|
43
|
+
|
44
|
+
def hash
|
45
|
+
return @id
|
46
|
+
end
|
47
|
+
|
48
|
+
def eql?(other)
|
49
|
+
return id == other.id
|
50
|
+
end
|
51
|
+
|
52
|
+
def initialize(id)
|
53
|
+
@edges = []
|
54
|
+
@id = id
|
55
|
+
end
|
56
|
+
|
57
|
+
def clearEdges
|
58
|
+
@edges.clear
|
59
|
+
end
|
60
|
+
|
61
|
+
# Add an edge
|
62
|
+
# codeSet : the character codes to label it with
|
63
|
+
# destState : destination state
|
64
|
+
#
|
65
|
+
def addEdge(codeSet,destState)
|
66
|
+
@edges.push([codeSet, destState])
|
67
|
+
end
|
68
|
+
|
69
|
+
# Add a e-transition edge
|
70
|
+
# destState : destination state
|
71
|
+
#
|
72
|
+
def addEps(destState)
|
73
|
+
addEdge(CodeSet.new(EPSILON), destState)
|
74
|
+
end
|
75
|
+
|
76
|
+
def inspect
|
77
|
+
name
|
78
|
+
end
|
79
|
+
|
80
|
+
def name
|
81
|
+
nm = 'S' + d(id)
|
82
|
+
if label
|
83
|
+
nm += ": "+label
|
84
|
+
end
|
85
|
+
nm
|
86
|
+
end
|
87
|
+
|
88
|
+
# Normalize a state machine.
|
89
|
+
#
|
90
|
+
# For each state:
|
91
|
+
# [] merge edges that go to a common state
|
92
|
+
# [] delete edges that have empty labels
|
93
|
+
# [] sort edges by destination state ids
|
94
|
+
#
|
95
|
+
# > start state
|
96
|
+
#
|
97
|
+
def self.normalizeStates(startState)
|
98
|
+
stateSet, _,_ = startState.reachableStates
|
99
|
+
stateSet.map{|s| s.normalize}
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
# Generate a PDF of the state machine;
|
104
|
+
# Makes a system call to the dot utility to convert a .dot file to a .pdf
|
105
|
+
#
|
106
|
+
def generatePDF(title = "nfa")
|
107
|
+
stateList = {}
|
108
|
+
|
109
|
+
startState = self
|
110
|
+
genAux(stateList, startState)
|
111
|
+
|
112
|
+
g = ""
|
113
|
+
g += "digraph "+title+" {\n"
|
114
|
+
g += " '' [shape=none]\n"
|
115
|
+
|
116
|
+
stateList.each_value do |s|
|
117
|
+
g += " '" + s.name + "' [shape="
|
118
|
+
if s.finalState?
|
119
|
+
g += "doubleoctagon"
|
120
|
+
else
|
121
|
+
g += "octagon"
|
122
|
+
end
|
123
|
+
g += "]\n"
|
124
|
+
end
|
125
|
+
|
126
|
+
g += "\n"
|
127
|
+
g += " '' -> '" + startState.name + "'\n"
|
128
|
+
stateList.each_value do |s|
|
129
|
+
s.edges.each do |crs, s2|
|
130
|
+
g += " '"+s.name+"' -> '" + s2.name + "' [label='"
|
131
|
+
g += d(crs)
|
132
|
+
g += "'][fontname=Courier][fontsize=12]\n"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
g += "\n}\n"
|
137
|
+
g.gsub!( /'/, '"' )
|
138
|
+
|
139
|
+
dotToPDF(g,title)
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
# Normalize a state
|
144
|
+
#
|
145
|
+
# [] merge edges that go to a common state
|
146
|
+
# [] delete edges that have empty labels
|
147
|
+
# [] sort edges by destination state ids
|
148
|
+
#
|
149
|
+
def normalize()
|
150
|
+
|
151
|
+
db = false
|
152
|
+
|
153
|
+
!db || pr("\n\nnormalize state:\n %s\nedges=\n%s\n",d(self),d(@edges))
|
154
|
+
|
155
|
+
@edges.sort!{|x,y|
|
156
|
+
label1,dest1 = x
|
157
|
+
label2,dest2 = y
|
158
|
+
dest1.id <=> dest2.id
|
159
|
+
}
|
160
|
+
!db || pr(" sorted edges: %s\n",d(@edges))
|
161
|
+
|
162
|
+
newEdges = []
|
163
|
+
prevLabel, prevDest = nil,nil
|
164
|
+
|
165
|
+
edges.each do |label,dest|
|
166
|
+
!db || pr(" processing edge %s, %s\n",d(label),d(dest))
|
167
|
+
|
168
|
+
if prevDest and prevDest.id == dest.id
|
169
|
+
# changed = true
|
170
|
+
!db || pr(" adding set %s to prevLabel %s...\n",d(label),d(prevLabel))
|
171
|
+
prevLabel.addSet(label)
|
172
|
+
!db || pr(" ...now %s\n",d(prevLabel))
|
173
|
+
else
|
174
|
+
if prevDest
|
175
|
+
newEdges.push([prevLabel,prevDest])
|
176
|
+
end
|
177
|
+
# Must start a fresh copy! Don't want to modify the original label.
|
178
|
+
prevLabel = label.makeCopy()
|
179
|
+
prevDest = dest
|
180
|
+
!db || pr(" pushed onto new edges\n")
|
181
|
+
end
|
182
|
+
end
|
183
|
+
if prevDest
|
184
|
+
newEdges.push([prevLabel,prevDest])
|
185
|
+
end
|
186
|
+
|
187
|
+
@edges = newEdges
|
188
|
+
!db || pr("edges now: %s\n",d(@edges))
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
# Duplicate the NFA reachable from this state, possibly with new ids
|
193
|
+
#
|
194
|
+
# > dupBaseId : lowest id to use for duplicate; if nil, uses
|
195
|
+
# next available id
|
196
|
+
# < [ map of original states => duplicate states;
|
197
|
+
# 1 + highest id in new NFA ]
|
198
|
+
#
|
199
|
+
def duplicateNFA(dupBaseId = nil)
|
200
|
+
oldStates, oldMinId, oldMaxId = reachableStates()
|
201
|
+
dupBaseId ||= oldMaxId
|
202
|
+
|
203
|
+
|
204
|
+
oldToNewStateMap = {}
|
205
|
+
|
206
|
+
oldStates.each do |s|
|
207
|
+
s2 = State.new((s.id - oldMinId) + dupBaseId)
|
208
|
+
s2.finalState = s.finalState?
|
209
|
+
s2.label = s.label
|
210
|
+
|
211
|
+
oldToNewStateMap[s] = s2
|
212
|
+
end
|
213
|
+
|
214
|
+
oldStates.each do |s|
|
215
|
+
s2 = oldToNewStateMap[s]
|
216
|
+
s.edges.each{ |lbl,dest| s2.addEdge(lbl, oldToNewStateMap[dest])}
|
217
|
+
end
|
218
|
+
|
219
|
+
[oldToNewStateMap, (oldMaxId - oldMinId) + dupBaseId]
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
# Construct the reverse of the NFA starting at this state
|
225
|
+
# < start state of reversed NFA
|
226
|
+
#
|
227
|
+
def reverseNFA()
|
228
|
+
|
229
|
+
stateSet, minId, maxId = reachableStates()
|
230
|
+
|
231
|
+
edgeList = []
|
232
|
+
|
233
|
+
newStartStateList = []
|
234
|
+
newFinalStateList = []
|
235
|
+
|
236
|
+
newStateMap = {}
|
237
|
+
|
238
|
+
stateSet.each do |s|
|
239
|
+
|
240
|
+
u = State.new(s.id)
|
241
|
+
newStateMap[u.id] = u
|
242
|
+
|
243
|
+
if s.id == self.id
|
244
|
+
newFinalStateList.push(u)
|
245
|
+
u.finalState = true
|
246
|
+
end
|
247
|
+
|
248
|
+
if s.finalState?
|
249
|
+
newStartStateList.push(u)
|
250
|
+
end
|
251
|
+
|
252
|
+
s.edges.each {|lbl, dest| edgeList.push([dest.id, s.id, lbl])}
|
253
|
+
|
254
|
+
end
|
255
|
+
|
256
|
+
edgeList.each do |srcId, destId, lbl|
|
257
|
+
srcState = newStateMap[srcId]
|
258
|
+
destState = newStateMap[destId]
|
259
|
+
srcState.addEdge(lbl, destState)
|
260
|
+
end
|
261
|
+
|
262
|
+
# Create a distinguished start node that points to each of the start nodes
|
263
|
+
w = State.new(maxId)
|
264
|
+
newStartStateList.each {|s| w.addEps(s)}
|
265
|
+
w
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
# Build set of states reachable from this state
|
270
|
+
#
|
271
|
+
# > list of starting states
|
272
|
+
# < [ set, set of states reachable from those states
|
273
|
+
# minId, lowest id in set
|
274
|
+
# maxId 1 + highest id in set
|
275
|
+
# ]
|
276
|
+
#
|
277
|
+
def reachableStates()
|
278
|
+
set = Set.new
|
279
|
+
stack = []
|
280
|
+
stack.push(self)
|
281
|
+
|
282
|
+
maxId = nil
|
283
|
+
minId = nil
|
284
|
+
|
285
|
+
while !stack.empty?
|
286
|
+
st = stack.pop
|
287
|
+
set.add(st)
|
288
|
+
|
289
|
+
if !minId || minId > st.id
|
290
|
+
minId = st.id
|
291
|
+
end
|
292
|
+
if !maxId || maxId <= st.id
|
293
|
+
maxId = 1 + st.id
|
294
|
+
end
|
295
|
+
|
296
|
+
st.edges.each do |lbl, dest|
|
297
|
+
if set.add?(dest)
|
298
|
+
stack.push(dest)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
[set, minId, maxId]
|
303
|
+
end
|
304
|
+
|
305
|
+
|
306
|
+
|
307
|
+
|
308
|
+
end
|
309
|
+
|
310
|
+
|
311
|
+
|
312
|
+
private
|
313
|
+
|
314
|
+
def genAux(stateList, st)
|
315
|
+
if not stateList.member?(st.name)
|
316
|
+
stateList[st.name] = st
|
317
|
+
st.edges.each {|label, dest| genAux(stateList, dest)}
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|