scanner_generator 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/README +0 -0
- data/Rakefile +1 -0
- data/lib/scanner_generator/finite_state_machine.rb +619 -0
- data/lib/scanner_generator/thompson_construction.rb +387 -0
- data/lib/scanner_generator/version.rb +3 -0
- data/lib/scanner_generator.rb +5 -0
- data/push.sh +1 -0
- data/scanner_generator.gemspec +29 -0
- data/spec/scanner_generator_spec.rb +166 -0
- metadata +89 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,619 @@
|
|
1
|
+
require 'graphviz'
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
module ScannerGenerator
|
5
|
+
require File.dirname(__FILE__) + '/thompson_construction.rb'
|
6
|
+
|
7
|
+
LAMBDA = "LAMBDA"; SOURCE = 0; DEST = 1; LABEL = 2;
|
8
|
+
ERROR = 0; MACHINE_ACCEPT = 1; HALT_RETURN = 2 # Action Table codes [E, MA, HR]
|
9
|
+
ACC = 3; WIDTH = 3
|
10
|
+
|
11
|
+
# True if the needle (subset) is found in the haystack (superset).
|
12
|
+
def subset(needle,haystack)
|
13
|
+
a = needle.sort
|
14
|
+
b = haystack.sort
|
15
|
+
ii = 0
|
16
|
+
jj = 0
|
17
|
+
a_last_index = a.length-1
|
18
|
+
b_last_index = b.length-1
|
19
|
+
loop do
|
20
|
+
if(a[ii]==b[jj])
|
21
|
+
return true if(ii==a_last_index)
|
22
|
+
ii+=1
|
23
|
+
jj+=1
|
24
|
+
elsif(a[ii] > b[jj])
|
25
|
+
return false if(jj>=b_last_index)
|
26
|
+
jj+= 1
|
27
|
+
else # a[ii] < b[jj]
|
28
|
+
return false
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# is needle contained in a haystack?
|
34
|
+
def subset_of_list_element?(needle,list_of_haystacks)
|
35
|
+
list_of_haystacks.each{|haystack| return true if subset(needle,haystack)}
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
|
39
|
+
class FiniteStateMachine
|
40
|
+
include ThompsonConstruction
|
41
|
+
|
42
|
+
attr_reader :graph_hash, :accept_states, :origin
|
43
|
+
attr_accessor :labels
|
44
|
+
|
45
|
+
# Must set @accept_states, @edges, @edge_labels, @node_labels, @graph_hash
|
46
|
+
# edge/node labels are derived from @graph_hash
|
47
|
+
def initialize(input)
|
48
|
+
raise "Bunk input" if input[:accept_states].nil? || input[:graph_hash].nil?
|
49
|
+
@accept_states = input[:accept_states]
|
50
|
+
@graph_hash = input[:graph_hash]
|
51
|
+
@origin = input[:origin] || 0
|
52
|
+
@labels = input[:labels] || {}
|
53
|
+
@edge_labels = get_edge_labels
|
54
|
+
@rankdir = input[:rankdir] || "TB" # TB is top-to-bottom; LR is left-to-right
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
def copy(graph)
|
59
|
+
@accept_states = graph.accept_states
|
60
|
+
@graph_hash = graph.graph_hash
|
61
|
+
@origin = graph.origin
|
62
|
+
@edge_labels = get_edge_labels
|
63
|
+
self
|
64
|
+
end
|
65
|
+
|
66
|
+
# Regex keys specify that any matching edge labels transition to the dest
|
67
|
+
# node.
|
68
|
+
# Example:
|
69
|
+
# Suppose our language = {a,b,1,\n,*,/}.
|
70
|
+
# An edge labeled "[^\n\*\/]" matches anything but newline, *, or /.
|
71
|
+
# This function replaces that edge with multiple edges from the
|
72
|
+
# language's alphabet. In this case, the [^\n\*\/] edge gets replaced by
|
73
|
+
# 3 edges: an "a" edge, a "b" edge, & a "1" edge.
|
74
|
+
# NOTE: Invoke this AFTER drawing dfa, but BEFORE dumping the module.
|
75
|
+
def expand_regex_edges
|
76
|
+
#puts "Expanding regex edges..."
|
77
|
+
@graph_hash.each_pair do |source, edge_dest_hash| # state, hash(edge=>state)
|
78
|
+
new_edges_for_same_destination = Hash.new
|
79
|
+
#puts "before: @graph_hash[#{source}] #{@graph_hash[source]}" if
|
80
|
+
edge_dest_hash.each_pair do |regex_edge, dest| # e.g. /[^\n] => 98
|
81
|
+
next if regex_edge.class != Regexp
|
82
|
+
#puts "before: @graph_hash[#{source}][#{edge}] = #{@graph_hash[source][edge]}"
|
83
|
+
|
84
|
+
for label in @edge_labels
|
85
|
+
if label.class == String && label.match(regex_edge)
|
86
|
+
# unless clause prevents sloppy regex from overwriting other edges
|
87
|
+
new_edges_for_same_destination[label] = dest unless @graph_hash[source].key?(label)
|
88
|
+
# new_edges_for_same_destination[label] ||= dest
|
89
|
+
end
|
90
|
+
end
|
91
|
+
@graph_hash[source].delete(regex_edge) # remove old regex edge
|
92
|
+
#puts "after: @graph_hash[#{source}][#{edge}] = #{@graph_hash[source][edge].class}"
|
93
|
+
end
|
94
|
+
#puts @new_edges_for_same_destination.to_s
|
95
|
+
unless new_edges_for_same_destination.empty?
|
96
|
+
@graph_hash[source].merge!(new_edges_for_same_destination)
|
97
|
+
#puts "after: @graph_hash[#{source}] #{@graph_hash[source]}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
self
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_node_names
|
104
|
+
names = @graph_hash.keys # Ensure source nodes represented.
|
105
|
+
@graph_hash.each_pair do |source_node, sub_hash|
|
106
|
+
names << source_node
|
107
|
+
names << sub_hash.values
|
108
|
+
end
|
109
|
+
#names.flatten.map {|n| n.to_s}.uniq.sort
|
110
|
+
names.flatten.uniq.sort
|
111
|
+
end
|
112
|
+
|
113
|
+
def get_edge_labels
|
114
|
+
aggregate_keys = []
|
115
|
+
@graph_hash.values.each {|sub_hash| aggregate_keys << sub_hash.keys }
|
116
|
+
aggregate_keys.flatten.uniq
|
117
|
+
end
|
118
|
+
|
119
|
+
def subsetify(start_node = @origin)
|
120
|
+
new_graph_hash = {}
|
121
|
+
new_accept_states = {}
|
122
|
+
new_labels = {}
|
123
|
+
|
124
|
+
states = [closure_of(start_node)] # if passed a start node as an int, this will fail without .to_s
|
125
|
+
|
126
|
+
edge_labels = get_edge_labels
|
127
|
+
|
128
|
+
states.each do |state|
|
129
|
+
new_graph_hash[state] = {}
|
130
|
+
edge_labels.each do |label|
|
131
|
+
next if label == LAMBDA
|
132
|
+
closures_via_label = []
|
133
|
+
|
134
|
+
state.each do |node|
|
135
|
+
next if (@graph_hash[node].nil? || @graph_hash[node][label].nil?)
|
136
|
+
found_closure = closure_of(@graph_hash[node][label])
|
137
|
+
closures_via_label << found_closure if !closures_via_label.include?(found_closure)
|
138
|
+
end
|
139
|
+
|
140
|
+
next if closures_via_label == []
|
141
|
+
closures_via_label.flatten!
|
142
|
+
new_graph_hash[state][label] = closures_via_label
|
143
|
+
states << closures_via_label unless states.include?(closures_via_label)
|
144
|
+
end
|
145
|
+
|
146
|
+
new_accept_states[state] = accept_state_of(state) if accept_state_of(state) != false
|
147
|
+
new_graph_hash.delete(state) if new_graph_hash[state] == {}
|
148
|
+
#new_labels[state]
|
149
|
+
end
|
150
|
+
|
151
|
+
#puts "New graph shit:"
|
152
|
+
#ap new_graph_hash
|
153
|
+
#ap new_accept_states
|
154
|
+
#ap new_labels
|
155
|
+
|
156
|
+
# THIS IS THE NEW PART FOR LABELS
|
157
|
+
states.each do |state|
|
158
|
+
label = ""
|
159
|
+
state.each do |substate|
|
160
|
+
label << @labels[substate] + "\n" unless @labels[substate].nil? || label.include?(@labels[substate])
|
161
|
+
end
|
162
|
+
new_labels[state] = label.chomp unless label == ""
|
163
|
+
end
|
164
|
+
# END NEW PART FOR LABELS
|
165
|
+
|
166
|
+
return FiniteStateMachine.new({
|
167
|
+
:graph_hash=>new_graph_hash,
|
168
|
+
:accept_states=>new_accept_states,
|
169
|
+
:labels => new_labels}
|
170
|
+
).beautify
|
171
|
+
end
|
172
|
+
|
173
|
+
def subsetify!(start_node_label = 0)
|
174
|
+
dfa = subsetify(start_node_label)
|
175
|
+
@graph_hash,@accept_states,@labels = dfa.graph_hash, dfa.accept_states, dfa.labels
|
176
|
+
return self
|
177
|
+
end
|
178
|
+
|
179
|
+
def draw_graph(filename = "output", svgname = :Finite_Automata_Graph, shape = "circle", path = nil)
|
180
|
+
graph = GraphViz::new(:Finite_Automata_Graph)
|
181
|
+
graph[:rankdir] = @rankdir
|
182
|
+
# !!! going to have to check of @labels[node_num] (label for node # node_num exists and specify it with :label => @label[node_num] when present)
|
183
|
+
get_node_names.each do |node|
|
184
|
+
label = @labels[node] || node.to_s #((@labels[node].nil?) ? node.to_s : @labels[node])
|
185
|
+
is_accept = @accept_states.include?(node)
|
186
|
+
graph.add_nodes(node.to_s,
|
187
|
+
:shape => shape,
|
188
|
+
:label => label,
|
189
|
+
:peripheries => ((is_accept) ? 2 : 1),
|
190
|
+
:color => ((is_accept && shape == "Mrecord") ? "#66DD66" : "#000000"))
|
191
|
+
end
|
192
|
+
|
193
|
+
@graph_hash.each_pair do |source_label,sub_hash|
|
194
|
+
sub_hash.each_pair do |edge_label,destination_nodes|
|
195
|
+
[destination_nodes].flatten.each do |dest_label| # ensure d_n is 1-d array
|
196
|
+
source_node = graph.get_node(source_label.to_s)
|
197
|
+
dest_node = graph.get_node(dest_label.to_s)
|
198
|
+
graph.add_edges(source_node, dest_node, :label => label_friendly(edge_label).gsub('\\','\\\\\\\\'))
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
if path
|
204
|
+
graph.output(:svg => "#{filename}.svg", :path => path)
|
205
|
+
else
|
206
|
+
graph.output(:svg => "#{filename}.svg")
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def draw_state_labeled_graph(filename = "output", svgname = :Finite_Automata_Graph, shape = "circle", path = nil)
|
211
|
+
labels = @labels.dup
|
212
|
+
# modify the labels
|
213
|
+
@labels.each_with_index do |label, ii|
|
214
|
+
lines = ""
|
215
|
+
#ap label
|
216
|
+
label[1].each_line {|i| lines << "<tr><td align=\"left\">#{i}</td></tr>"}
|
217
|
+
table_border_color = (@accept_states.include?(ii)) ? "#448844" : "#ffffff"
|
218
|
+
heading = "State #{ii}"
|
219
|
+
@labels[ii] = '<<table color="'+table_border_color+'" style="ROUNDED" border="1" cellborder="0" cellpadding="5"><tr><td align="center" colspan="1"><font color="#666666" point-size="8">'+heading+'</font></td></tr>'+lines+'</table>>'
|
220
|
+
end
|
221
|
+
result = draw_graph(filename, svgname, shape, path)
|
222
|
+
@labels = labels
|
223
|
+
result
|
224
|
+
end
|
225
|
+
|
226
|
+
# adapted from class notes
|
227
|
+
def closure_of(node_label)
|
228
|
+
closure = [node_label].flatten
|
229
|
+
changed = true
|
230
|
+
while (changed == true)
|
231
|
+
changed = false
|
232
|
+
closure.each do |node|
|
233
|
+
# if there is m not already in C and n->lambda->m then add m to c
|
234
|
+
if(!@graph_hash[node].nil? && !@graph_hash[node][LAMBDA].nil?)
|
235
|
+
lambda_reachables = [@graph_hash[node][LAMBDA]].flatten
|
236
|
+
lambda_reachables.each do |l_node|
|
237
|
+
if !closure.include?(l_node)
|
238
|
+
closure << l_node
|
239
|
+
changed = true
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
return closure #.flatten
|
247
|
+
end
|
248
|
+
|
249
|
+
# returns true if the any of the closure's states are included in the set
|
250
|
+
# of accept_states
|
251
|
+
def accept_state_of(closure)
|
252
|
+
closure.each do |set|
|
253
|
+
if @accept_states.include?(set)
|
254
|
+
return @accept_states[set] # change this to "true" if reverting to crappy system
|
255
|
+
end
|
256
|
+
end
|
257
|
+
return false
|
258
|
+
end
|
259
|
+
|
260
|
+
# numbers sets and makes them the new keys (cleans up the graph_hash's keys)
|
261
|
+
def beautify
|
262
|
+
clean_hash, clean_accept_states, pretty, new_labels = {}, {}, {}, {}
|
263
|
+
|
264
|
+
# Number our closures.
|
265
|
+
i = -1
|
266
|
+
@graph_hash.each_pair do |key,subhash|
|
267
|
+
pretty[key] = i+= 1 if pretty[key].nil?
|
268
|
+
subhash.values.each {|subval| pretty[subval]=i+=1 if pretty[subval].nil?}
|
269
|
+
end
|
270
|
+
|
271
|
+
# Replace instances of old closure names with their new closure-numbers.
|
272
|
+
@graph_hash.keys.each do |old_key|
|
273
|
+
new_key = pretty[old_key]
|
274
|
+
clean_hash[new_key] = Hash.new
|
275
|
+
@graph_hash[old_key].each_pair do |subkey, subval| # subkey is edge label
|
276
|
+
clean_hash[new_key][subkey] = pretty[subval]
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
@accept_states.each_pair do |state, acc_type|
|
281
|
+
clean_accept_states[pretty[state]] = acc_type
|
282
|
+
end
|
283
|
+
|
284
|
+
@labels.each_pair do |state, label|
|
285
|
+
new_labels[pretty[state]] = @labels[state]
|
286
|
+
end # Be sure to bring labels along.
|
287
|
+
|
288
|
+
FiniteStateMachine.new({
|
289
|
+
:graph_hash => clean_hash,
|
290
|
+
:accept_states => clean_accept_states,
|
291
|
+
:labels => new_labels
|
292
|
+
})
|
293
|
+
end
|
294
|
+
|
295
|
+
def generate_initialize
|
296
|
+
return "def initialize\n" +
|
297
|
+
"#{" "*indent_width}#{lookup_code_string}\n" + # array of edge labels
|
298
|
+
"#{" "*indent_width}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
|
299
|
+
dump_table(:state) + "\n" +
|
300
|
+
dump_table(:action) + "\n" +
|
301
|
+
dump_table(:lookup) + "\n" +
|
302
|
+
"#{ind(1)}end"
|
303
|
+
end
|
304
|
+
|
305
|
+
# Module Dumping
|
306
|
+
def generate_module(name = 'ScannerModule', indent_width = 2)
|
307
|
+
expand_regex_edges
|
308
|
+
return "module #{name}\n def initialize\n" +
|
309
|
+
"#{ind(1)}#{lookup_code_string}\n" + # array of edge labels
|
310
|
+
"#{ind(1)}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
|
311
|
+
dump_table(:state) + "\n" +
|
312
|
+
dump_table(:action) + "\n" +
|
313
|
+
dump_table(:lookup) + "\n" +
|
314
|
+
"#{ind(2)}super\n" +
|
315
|
+
"#{ind(1)}end\nend"
|
316
|
+
end
|
317
|
+
|
318
|
+
# This horrific kluge ports the ruby dump_tables to Javascript. Sorta.
|
319
|
+
# Smelly code, but output passes JSLint and is the path of least resistance.
|
320
|
+
#
|
321
|
+
# TODO: Write something that generates the tables as ruby objects, then
|
322
|
+
# refactor these table dumping functions, using array.to_s
|
323
|
+
def js_tables(name = 'ScannerModule', indent_width = 2)
|
324
|
+
expand_regex_edges
|
325
|
+
replacements = {
|
326
|
+
' # E' => '// E', # Action table's label
|
327
|
+
' # ' => '// ', # table-leading comments
|
328
|
+
'# ' => ' // ', # row-trailing descriptions
|
329
|
+
'[[' => ' [', # first row of table
|
330
|
+
'@state_table = ' => 'SCANNER.state_table = [',
|
331
|
+
'@action_table = ' => 'SCANNER.action_table = [',
|
332
|
+
'@lookup_table = ' => 'SCANNER.lookup_table = [',
|
333
|
+
'@lookup_codes' => 'SCANNER.lookup_codes',
|
334
|
+
'@label_codes' => 'SCANNER.label_codes',
|
335
|
+
' [' => ' [', # linty js indent of 4
|
336
|
+
']] ' => ']];', # semicolon ending tbales
|
337
|
+
':other' => '"other"', # :other symbol.to_s
|
338
|
+
'"=>' => '" : ' # javascript hash notation
|
339
|
+
}
|
340
|
+
s = "var SCANNER = {};\n" +
|
341
|
+
dump_table(:state, 0, 0) + "\n" +
|
342
|
+
dump_table(:action, 0, 0) + "\n" +
|
343
|
+
dump_table(:lookup, 0, 0) + "\n" +
|
344
|
+
"#{ind(0)}#{lookup_code_string};\n" + # array of edge labels
|
345
|
+
"#{ind(0)}#{label_code_string};\n" # hash mapping accept states to the type accepted by them
|
346
|
+
replacements.each_pair{|k,v| s.gsub!(k,v)}
|
347
|
+
s
|
348
|
+
end
|
349
|
+
|
350
|
+
def generate_scanner(indent_width = 2)
|
351
|
+
expand_regex_edges
|
352
|
+
|
353
|
+
scanner_function =<<-'END_SCANNER'
|
354
|
+
def scan(input)
|
355
|
+
@token = ""
|
356
|
+
@state = 0
|
357
|
+
@buffered = false
|
358
|
+
results = Array.new
|
359
|
+
|
360
|
+
input.each_char do |ch|
|
361
|
+
current_read = case ch # Map chars onto char-classes by editing case/when
|
362
|
+
when /[a-zA-Z]/ then @label_codes["L"]
|
363
|
+
when /[0-9]/ then @label_codes["D"]
|
364
|
+
else @label_codes[ch] || @label_codes[:other]
|
365
|
+
end
|
366
|
+
if((@action_table[@state][current_read]==1) && (@state_table[@state][current_read] != -1))
|
367
|
+
@buffered = false # action=MA (Machine-Accept) (=1). Append char to token.
|
368
|
+
@token += ch unless ch[/\s/] && @label_codes[ch].nil? # Uncomment if recognizing some whitespace.
|
369
|
+
@state=@state_table[@state][current_read]
|
370
|
+
elsif((@state_table[@state][current_read]==-1) && (@action_table[@state][current_read]==2))
|
371
|
+
@buffered = true # action=HR (Halt-Return) (=2). Accept current token.
|
372
|
+
results.push [@lookup_codes[@lookup_table[@state][current_read]],@token]
|
373
|
+
@state = 0
|
374
|
+
@token = ""
|
375
|
+
else # ? Hitting this block indicates action=ERR (ERROR) (=3)
|
376
|
+
next
|
377
|
+
end
|
378
|
+
redo if(@buffered==true && current_read!=@label_codes[:other]) # repeat w/o advancing to next char
|
379
|
+
end
|
380
|
+
results
|
381
|
+
end
|
382
|
+
|
383
|
+
# Appends a newline to the file in case of its absence, to ensure
|
384
|
+
# the presence of terminating whitespace. Convert Windows newlines
|
385
|
+
# to UNIX style ones.
|
386
|
+
def scan_file(filename = "test_file.txt")
|
387
|
+
scan((File.open(filename, "r").read+"\n").gsub("\r\n","\n"))
|
388
|
+
end
|
389
|
+
END_SCANNER
|
390
|
+
return "class Scanner\n" +
|
391
|
+
"#{ind(1)}def initialize\n" +
|
392
|
+
"#{ind(2)}#{lookup_code_string}\n" + # array of edge labels
|
393
|
+
"#{ind(2)}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
|
394
|
+
dump_table(:state, 2,2) + "\n" + # Note: the 1s should be 2s, but dump_table's results seem
|
395
|
+
dump_table(:action, 2,2) + "\n" + # to mysteriously have an extra leading two spaces. Can't
|
396
|
+
dump_table(:lookup, 2,2) + "\n" + # for the life of me figure out how or why.
|
397
|
+
"#{ind(1)}end\n\n" +
|
398
|
+
scanner_function +
|
399
|
+
"\nend"
|
400
|
+
end
|
401
|
+
|
402
|
+
# Module Dumping
|
403
|
+
def dump_module(name, indent_width = 2)
|
404
|
+
#return generate_module if filename == "" || filename.nil?
|
405
|
+
filename = underscore(name)
|
406
|
+
file = File.open("./modules/#{filename}.rb", "w")
|
407
|
+
"Successfully wrote #{file.write(generate_module)} characters to #{filename}.rb"
|
408
|
+
end
|
409
|
+
|
410
|
+
def friendly_edge_labels
|
411
|
+
# Convert whitespace line "\n" into strings describing their contents.
|
412
|
+
get_edge_labels.collect do |label|
|
413
|
+
(!label[/\s/].nil?) ? label.inspect[1..-2] : label
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
def label_friendly(label)
|
418
|
+
#puts "label: '#{label}' (#{label.class})"
|
419
|
+
if label.class == Fixnum
|
420
|
+
return label.to_s
|
421
|
+
elsif label.class == Regexp
|
422
|
+
return ('/' + label.to_s[7..-2] + '/') # replace each slash with 2 slashes.
|
423
|
+
elsif label == LAMBDA || label.to_s == "LAMBDA" || label.to_s == "EPSILON" || label.to_s.empty? # http://stackoverflow.com/questions/9684807/how-can-one-insert-a-mathematical-greek-etc-symbol-in-dot-file
|
424
|
+
'ε' # epsilon-lower is 949
|
425
|
+
else
|
426
|
+
return ((!label[/\s/].nil?) ? label.inspect[1..-2] : label)
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
|
431
|
+
def ind(level, width=2)
|
432
|
+
return " "*(level*width)
|
433
|
+
end
|
434
|
+
|
435
|
+
# converts a CamelCased name to k_and_r style for filename
|
436
|
+
def underscore(name)
|
437
|
+
s = name[0].downcase
|
438
|
+
name[1,name.length].each_char do |ch|
|
439
|
+
s += (ch.match(/[^A-Z]/) ? ch : "_"+ch.downcase)
|
440
|
+
end
|
441
|
+
return s
|
442
|
+
end
|
443
|
+
|
444
|
+
|
445
|
+
def lookup_codes
|
446
|
+
(["!accept"] | @accept_states.values)
|
447
|
+
end
|
448
|
+
# Array unions: fuck yeah.
|
449
|
+
def lookup_code_string
|
450
|
+
"@lookup_codes = #{lookup_codes.to_s}"
|
451
|
+
end
|
452
|
+
|
453
|
+
def label_code_string
|
454
|
+
h = {}
|
455
|
+
get_edge_labels.each_with_index{|label,ii| h[label]=ii } # !!! DANGER
|
456
|
+
"@label_codes = #{h.to_s[0..-2]}, :other=>#{get_edge_labels.length}\}"
|
457
|
+
end
|
458
|
+
|
459
|
+
# Graph Attachment
|
460
|
+
def increment_node_labels(amount)
|
461
|
+
new_hash, new_accepts = Hash.new, Hash.new
|
462
|
+
|
463
|
+
@graph_hash.each_pair do |key,subhash|
|
464
|
+
new_subhash = Hash.new
|
465
|
+
subhash.each_pair do |subkey,value|
|
466
|
+
if value.class == Fixnum
|
467
|
+
new_subhash[subkey] = value+amount
|
468
|
+
elsif value.class == Array
|
469
|
+
new_subhash[subkey] = value.map {|n| n+amount}
|
470
|
+
else
|
471
|
+
raise "value (#{value}) is a #{value.class}!"
|
472
|
+
end
|
473
|
+
|
474
|
+
end
|
475
|
+
new_hash[key+amount] = new_subhash
|
476
|
+
end
|
477
|
+
|
478
|
+
@accept_states.keys.each{|key| new_accepts[key+amount] = @accept_states[key]}
|
479
|
+
@graph_hash, @accept_states = new_hash, new_accepts
|
480
|
+
@origin += amount
|
481
|
+
end
|
482
|
+
|
483
|
+
def get_node_count; get_node_names.size; end
|
484
|
+
|
485
|
+
# considerations:
|
486
|
+
# do we need a flag for when we have to strip an attach point of being an accept state?
|
487
|
+
def attach_graph(attach_point, fsm)
|
488
|
+
node_count = get_node_count
|
489
|
+
raise "#{attach_point} out of graph bounds." if attach_point >= node_count
|
490
|
+
raise "going to break everything by attaching to myself!" if fsm == self
|
491
|
+
#dfa = fsm.subsetify #.subsetify
|
492
|
+
dfa = fsm # Before, we were subsetifying
|
493
|
+
dfa.increment_node_labels(node_count)
|
494
|
+
#@graph_hash[attach_point] = {LAMBDA => dfa.origin} # THIS IS OUR CULPRIT!
|
495
|
+
#puts "before #{@graph_hash[attach_point]}"
|
496
|
+
|
497
|
+
#if (@graph_hash[attach_point]!=nil)
|
498
|
+
# lambdas = [@graph_hash[attach_point][LAMBDA]] || [] # this is an array!
|
499
|
+
# lambdas << dfa.origin
|
500
|
+
# lambdas = lambdas.flatten.find{|entry| !entry.nil?}
|
501
|
+
# @graph_hash[attach_point][LAMBDA] = lambdas
|
502
|
+
#else
|
503
|
+
# @graph_hash[attach_point] = {LAMBDA => dfa.origin}
|
504
|
+
#end
|
505
|
+
|
506
|
+
# if attach point was on the graph w/o outgoing edges
|
507
|
+
@graph_hash[attach_point] = Hash.new if @graph_hash[attach_point].nil?
|
508
|
+
|
509
|
+
if @graph_hash[attach_point][LAMBDA].nil?
|
510
|
+
@graph_hash[attach_point][LAMBDA] = [dfa.origin]
|
511
|
+
else # attach point already has outgoing lambda edges
|
512
|
+
@graph_hash[attach_point][LAMBDA] << dfa.origin
|
513
|
+
end
|
514
|
+
|
515
|
+
#@graph_hash[attach_point]["foo"] = lambdas
|
516
|
+
#puts "after #{@graph_hash[attach_point]}"
|
517
|
+
#puts "@gh = #{graph_hash}\ndfah = #{dfa.graph_hash}"
|
518
|
+
#puts "merged: #{@graph_hash.merge(dfa.graph_hash)}"
|
519
|
+
@graph_hash.merge!(dfa.graph_hash)
|
520
|
+
#@graph_hash.merge!({4=>{"L"=>21}})
|
521
|
+
@accept_states.merge!(dfa.accept_states)
|
522
|
+
#subsetify!
|
523
|
+
get_node_count
|
524
|
+
end
|
525
|
+
|
526
|
+
# Dumps either a state table, an action table, or a lookup table
|
527
|
+
# This function is kind of half-refactored with dump_module and needs cleaning
|
528
|
+
# like the wizard needs food.
|
529
|
+
def dump_table(type = :state, indent_width = 2, indent_level = 2)
|
530
|
+
# edge_labels = friendly_edge_labels << " Other" # I suspect this line is ruining the code.
|
531
|
+
edge_labels = get_edge_labels << " Other"
|
532
|
+
node_names = get_node_names
|
533
|
+
|
534
|
+
s = "#{ind(indent_level)}@#{type}_table = " +
|
535
|
+
((type == :action) ? "\n#{ind(indent_level+1)}\# ERROR = 0; MACHINE_ACCEPT = 1; HALT_RETURN = 2" : "") +
|
536
|
+
"\n#{ind(indent_level+1)}#"
|
537
|
+
edge_labels.each do |label|
|
538
|
+
s += sprintf("%#{WIDTH+1}s", label_friendly(label))
|
539
|
+
end
|
540
|
+
s += "\n#{ind(indent_level+1)}"
|
541
|
+
|
542
|
+
node_names.each_with_index do |node,ii|
|
543
|
+
on_last_node = (ii == node_names.size-1)
|
544
|
+
is_accept = !@accept_states[node].nil?
|
545
|
+
s += ((ii==0) ? "[" : " ") + "["
|
546
|
+
|
547
|
+
edge_labels.each_with_index do |edge,jj|
|
548
|
+
on_last_edge = (jj == edge_labels.size-1)
|
549
|
+
if(@graph_hash[node].nil?||
|
550
|
+
@graph_hash[node][edge].nil?||@graph_hash[node][edge][0].nil?)
|
551
|
+
sdest = "-1"
|
552
|
+
adest = ((is_accept) ? HALT_RETURN.to_s : ERROR.to_s)
|
553
|
+
if(!accept_states[node].nil?)
|
554
|
+
ldest = ((is_accept) ? (lookup_codes.find_index(accept_states[node]).to_i).to_s : "0")
|
555
|
+
else
|
556
|
+
ldest = "0"
|
557
|
+
end
|
558
|
+
else
|
559
|
+
sdest = graph_hash[node][edge].to_s
|
560
|
+
adest = MACHINE_ACCEPT.to_s # MA if NON-ACCEPT state
|
561
|
+
ldest = "0"
|
562
|
+
end
|
563
|
+
case type
|
564
|
+
when :state
|
565
|
+
s += sprintf("%#{WIDTH}s", sdest) +
|
566
|
+
((!on_last_edge) ? "," \
|
567
|
+
: "]" + ((!on_last_node) ? "," \
|
568
|
+
: "]" ) + " \# #{node}#{(is_accept ? " ACCEPT":"")}\n#{ind(indent_level+1)}")
|
569
|
+
when :action
|
570
|
+
s += sprintf("%#{WIDTH}s", adest) +
|
571
|
+
(!on_last_edge ? "," \
|
572
|
+
: "]" + (!on_last_node ? "," \
|
573
|
+
: "]" ) + " \# #{node}#{(is_accept ? " ACCEPT" : "")}\n#{ind(indent_level+1)}")
|
574
|
+
when :lookup
|
575
|
+
s += sprintf("%#{WIDTH}s", ldest) +
|
576
|
+
(!on_last_edge ? "," \
|
577
|
+
: "]" + (!on_last_node ? "," \
|
578
|
+
: "]" ) + " \# #{node}#{(is_accept ? " #{@accept_states[node]}" : "")}\n#{ind(indent_level+1)}")
|
579
|
+
end
|
580
|
+
end
|
581
|
+
end
|
582
|
+
s.rstrip
|
583
|
+
end
|
584
|
+
|
585
|
+
# Clobbers the old accept type, if any was present.
|
586
|
+
def add_accept_state(state, type)
|
587
|
+
@accept_states[state] = type
|
588
|
+
end
|
589
|
+
|
590
|
+
def add_edge(src, label, dest)
|
591
|
+
@graph_hash[src] = Hash.new if @graph_hash[src].nil?
|
592
|
+
if @graph_hash[src][label].nil?
|
593
|
+
@graph_hash[src][label] = [dest]
|
594
|
+
else
|
595
|
+
if @graph_hash[src][label].class != Array
|
596
|
+
@graph_hash[src][label] = [@graph_hash[src][label]]
|
597
|
+
end
|
598
|
+
@graph_hash[src][label] << dest if !@graph_hash[src][label].include?(dest)
|
599
|
+
end
|
600
|
+
self
|
601
|
+
end
|
602
|
+
|
603
|
+
# Fail silently on deleting stuff that doesn't exist.
|
604
|
+
def delete_edge(src, label, dest)
|
605
|
+
return self if @graph_hash[src].nil?
|
606
|
+
return self if @graph_hash[src][label].nil?
|
607
|
+
@graph_hash[src][label].reject! {|node| node==dest}
|
608
|
+
if @graph_hash[src][label].empty?
|
609
|
+
@graph_hash[src].delete(label)
|
610
|
+
end
|
611
|
+
# !!! TODO: Add code to handle (delete) orphaned nodes.
|
612
|
+
self
|
613
|
+
end
|
614
|
+
|
615
|
+
def is_accept?(num)
|
616
|
+
@accept_states.include?(num)
|
617
|
+
end
|
618
|
+
end
|
619
|
+
end
|
@@ -0,0 +1,387 @@
|
|
1
|
+
module ThompsonConstruction
|
2
|
+
PENDING = 0;
|
3
|
+
#############################################################################
|
4
|
+
# Thompson-McNaughton-Yamada Construction Section
|
5
|
+
#############################################################################
|
6
|
+
def build_machine_stack(re)
|
7
|
+
skip = 0
|
8
|
+
escaped = false
|
9
|
+
machines = Array.new
|
10
|
+
(0...re.length).each do |ii| # the pointer in some cases.
|
11
|
+
(skip -= 1) && next if skip != 0 # Advance ptr until past () group
|
12
|
+
ch = re[ii] #re[-ii-1]
|
13
|
+
if escaped
|
14
|
+
case ch
|
15
|
+
when 'n'
|
16
|
+
machines.push([cat_machine("\n"), nil])
|
17
|
+
else
|
18
|
+
machines.push([cat_machine(ch), nil])
|
19
|
+
end
|
20
|
+
escaped = false
|
21
|
+
next
|
22
|
+
end
|
23
|
+
case(ch)
|
24
|
+
when '*' then machines.push([kleene_machine, [1,2]])
|
25
|
+
when '+' then machines.push([plus_machine, [1,2]])
|
26
|
+
#when '+' then machines.push([plus_machine, [[0,1],[1,1]]])
|
27
|
+
when '?' then machines.push([question_machine, [0,1]])
|
28
|
+
when '|' then machines.push([alt_machine, [1,2,3,4]])
|
29
|
+
when ']' then raise "mismatched bracket closed a non-open class"
|
30
|
+
when ')' then raise "mismatched paren closed a non-open group"
|
31
|
+
when '('# ; puts "#{ms}\tGRPOPEN\nencounted closing paren. following chars #{re[ii+1]}#{re[ii+2]}"
|
32
|
+
subexpression = ''
|
33
|
+
nesting = 0
|
34
|
+
until (ch2 = re[ii+=1]) == ')' && nesting == 0 # Until the next character is '('
|
35
|
+
nesting -= 1 if ch2 == ')'
|
36
|
+
nesting += 1 if ch2 == '('
|
37
|
+
subexpression << ch2
|
38
|
+
#skip += 1
|
39
|
+
end
|
40
|
+
#skip += 1
|
41
|
+
subgraph = re2nfa(subexpression)
|
42
|
+
skip = subexpression.length+1 # the +1 is used to skip the closing )
|
43
|
+
machines.push([subgraph, nil])
|
44
|
+
when '['
|
45
|
+
char_class = get_char_class(re[ii..-1]) # search rest of the string for []-expression
|
46
|
+
machines.push([cat_machine(/#{char_class}/), nil])
|
47
|
+
skip = char_class.length - 1 + char_class.scan(/\\/).length # compensate for 2 '\'s counting as 1
|
48
|
+
# The below skip assignment works if we want to allow for odd numbers of slashes, but it's
|
49
|
+
# not desirable, because it would allow [\n] to be [n].
|
50
|
+
# We're reserving \ for escaping *, +, ?, etc. symbols.
|
51
|
+
#skip = char_class.length - 1 +
|
52
|
+
# char_class.scan(/\\/).length*2 -
|
53
|
+
# char_class.scan(/\\\\/).length # compensate for 2 '\'s counting as 1
|
54
|
+
when '\\' #; escaped = true unless escaped== true
|
55
|
+
if escaped # '\\' -> cat a slash
|
56
|
+
machines.push([cat_machine(ch), nil])
|
57
|
+
escaped = false
|
58
|
+
else
|
59
|
+
escaped = true
|
60
|
+
end
|
61
|
+
else
|
62
|
+
machines.push([cat_machine(ch), nil])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
machines
|
66
|
+
end
|
67
|
+
|
68
|
+
def get_char_class(str)
|
69
|
+
escaped = false
|
70
|
+
result = ''
|
71
|
+
|
72
|
+
str.each_char.with_index do |ch,ii|
|
73
|
+
if escaped == false && ch == ']' # done reading current class
|
74
|
+
result += ch
|
75
|
+
return result
|
76
|
+
elsif escaped == true
|
77
|
+
result = result[0..-2]+ch
|
78
|
+
else
|
79
|
+
result += ch
|
80
|
+
end
|
81
|
+
escaped = (ch == '\\' && escaped==false)
|
82
|
+
end
|
83
|
+
raise 'character class improperly closed!'
|
84
|
+
end
|
85
|
+
|
86
|
+
def kleene_up(machines)
|
87
|
+
new_machines = Array.new
|
88
|
+
machines.each_with_index do |mach,ii|
|
89
|
+
if mach[1].nil? || mach[1].empty? # This machine is complete.
|
90
|
+
new_machines.push([mach[0],nil])
|
91
|
+
else
|
92
|
+
if mach[1].length == 2 # Deals with *, ?, and +, who all have same precedence
|
93
|
+
src, dest = mach[1].shift, mach[1].shift
|
94
|
+
#m = mach[0].lambda_replace_edge(src,PENDING,dest,new_machines.pop) # LAMBDA VERSION
|
95
|
+
m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION
|
96
|
+
new_machines.push([m,nil])
|
97
|
+
else # dealing with |
|
98
|
+
new_machines.push([mach[0],mach[1]])
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
new_machines
|
103
|
+
end
|
104
|
+
|
105
|
+
def catify(machines)
|
106
|
+
new_machines = Array.new
|
107
|
+
machines.each_with_index do |mach,ii|
|
108
|
+
if ii == 0
|
109
|
+
new_machines.push([mach[0],nil])
|
110
|
+
elsif (mach[1].nil? && machines[ii-1][1].nil?)
|
111
|
+
# This machine AND PREVIOUS are each a cat or finished */?/+
|
112
|
+
# This code is suspiciously similar to the wrap-up code of re2nfa()
|
113
|
+
# which implies that it's not DRY. This is something to revisit.
|
114
|
+
lead = new_machines.pop[0]
|
115
|
+
offset = lead.get_node_count-1
|
116
|
+
acc = lead.accept_states.keys.first || 0
|
117
|
+
lead.imp_attach_graph(acc,mach[0])
|
118
|
+
lead.accept_states.delete_if do |acc_st|
|
119
|
+
!mach[0].accept_states.keys.include?(acc_st-offset)
|
120
|
+
end
|
121
|
+
new_machines.push([lead,nil])
|
122
|
+
else
|
123
|
+
new_machines.push([mach[0],mach[1]])
|
124
|
+
end
|
125
|
+
end
|
126
|
+
new_machines
|
127
|
+
end
|
128
|
+
|
129
|
+
def handle_alternation(machines)
|
130
|
+
machines = absorb_left_alt(machines)
|
131
|
+
machines = absorb_right_alt(machines)
|
132
|
+
end
|
133
|
+
|
134
|
+
def absorb_left_alt(machines)
|
135
|
+
new_machines = Array.new
|
136
|
+
machines.each_with_index do |mach,ii|
|
137
|
+
if mach[1].nil? || mach[1].empty? # This machine is complete.
|
138
|
+
new_machines.push([mach[0],nil])
|
139
|
+
else
|
140
|
+
src, dest = mach[1].shift, mach[1].shift
|
141
|
+
m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION
|
142
|
+
new_machines.push([m,mach[1]])
|
143
|
+
end
|
144
|
+
end
|
145
|
+
new_machines
|
146
|
+
end
|
147
|
+
|
148
|
+
def absorb_right_alt(machines)
|
149
|
+
absorb_left_alt(machines.reverse).reverse
|
150
|
+
end
|
151
|
+
|
152
|
+
# This is a Thompson construction of a regular expression to a NFA.
|
153
|
+
# The machine stack is a series of 2-tuples. The first element of which
|
154
|
+
# is a small NFA, the second of which is a listing of the edges it needs
|
155
|
+
# to fill in by cannibalizing an adjacent NFA.
|
156
|
+
|
157
|
+
# mptr = machines.length - 1 # machine index pointer
|
158
|
+
# m = machines[mptr]
|
159
|
+
# * eats below IF below complete
|
160
|
+
# | eats above and below if they're complete
|
161
|
+
|
162
|
+
# make one pass forwards, completing all kleene stars and all alt LHSs
|
163
|
+
# make one pass backwards, completing all alt RHSs
|
164
|
+
# if any unfulfilled dependencies remain, my assumptions were mistaken
|
165
|
+
def re2nfa(re)
|
166
|
+
#puts "re2nfa: #{re}"
|
167
|
+
fsconstruct = FiniteStateMachine.new({:accept_states => {0=>'eh'},
|
168
|
+
:graph_hash => {0=>{PENDING=>[0]}}})
|
169
|
+
machines = build_machine_stack(re)
|
170
|
+
machines = kleene_up(machines)
|
171
|
+
machines = catify(machines)
|
172
|
+
machines = handle_alternation(machines)
|
173
|
+
|
174
|
+
#puts "New machines:"
|
175
|
+
machines.each_with_index do |mach,ii|
|
176
|
+
m = mach[0]
|
177
|
+
offset = fsconstruct.get_node_count-1
|
178
|
+
acc = fsconstruct.accept_states.keys.first || 0 # Attachment point is accept state
|
179
|
+
fsconstruct.imp_attach_graph(acc, m)
|
180
|
+
fsconstruct.accept_states.delete_if do |acc_st|
|
181
|
+
#puts "purging acc #{acc}" if !m.accept_states.keys.include?(acc-offset)
|
182
|
+
!m.accept_states.keys.include?(acc_st-offset)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
fsconstruct.delete_edge(0,PENDING,0)
|
187
|
+
#@graph_hash = fsconstruct.graph_hash
|
188
|
+
#@accept_states = fsconstruct.accept_states
|
189
|
+
FiniteStateMachine.new({
|
190
|
+
:graph_hash => fsconstruct.graph_hash,
|
191
|
+
:accept_states => fsconstruct.accept_states
|
192
|
+
})
|
193
|
+
end
|
194
|
+
|
195
|
+
def set_new_accept(node_number, type='end')
|
196
|
+
@accept_states = {node_number => 'end'}
|
197
|
+
end
|
198
|
+
|
199
|
+
def prepend_graph(fsm)
|
200
|
+
fsm.imp_attach_graph(fsm.accept_states.keys[0],self)
|
201
|
+
copy(fsm)
|
202
|
+
end
|
203
|
+
|
204
|
+
def cat_machine(ch)
|
205
|
+
FiniteStateMachine.new({
|
206
|
+
:accept_states => {1=>'end'},
|
207
|
+
:graph_hash => {0 => {ch => [1]}}
|
208
|
+
})
|
209
|
+
end
|
210
|
+
|
211
|
+
def question_machine
|
212
|
+
FiniteStateMachine.new({
|
213
|
+
:accept_states => {1=>'end'},
|
214
|
+
:graph_hash => {0 => {PENDING => [1], LAMBDA => [1]}}
|
215
|
+
#:accept_states => {3=>'end'},
|
216
|
+
#:graph_hash => {
|
217
|
+
# 0 => {LAMBDA => [1,3]},
|
218
|
+
# 1 => {PENDING => [2]},
|
219
|
+
# 2 => {LAMBDA => [3]}
|
220
|
+
#}
|
221
|
+
})
|
222
|
+
end
|
223
|
+
|
224
|
+
def alt_machine
|
225
|
+
FiniteStateMachine.new({
|
226
|
+
:accept_states => {5=>'end'},
|
227
|
+
:graph_hash => {
|
228
|
+
0 => {LAMBDA => [1,3]},
|
229
|
+
1 => {PENDING => [2]},
|
230
|
+
2 => {LAMBDA => [5]},
|
231
|
+
3 => {PENDING => [4]},
|
232
|
+
4 => {LAMBDA => [5]}
|
233
|
+
}
|
234
|
+
})
|
235
|
+
end
|
236
|
+
|
237
|
+
def kleene_machine
|
238
|
+
FiniteStateMachine.new({
|
239
|
+
:accept_states => {3=>'end'},
|
240
|
+
:graph_hash => {
|
241
|
+
0 => {LAMBDA => [1,3]},
|
242
|
+
1 => {PENDING => [2]},
|
243
|
+
2 => {LAMBDA => [1,3]}
|
244
|
+
}
|
245
|
+
})
|
246
|
+
end
|
247
|
+
|
248
|
+
def plus_machine
|
249
|
+
FiniteStateMachine.new({
|
250
|
+
:accept_states => {3=>'end'},
|
251
|
+
:graph_hash => {
|
252
|
+
0 => {LAMBDA => [1]},
|
253
|
+
1 => {PENDING => [2]},
|
254
|
+
2 => {LAMBDA => [1,3]}
|
255
|
+
}
|
256
|
+
# The below machine would be more concise, but we'd need to add in logic to replace TWO EDGES with one absorb.
|
257
|
+
#:accept_states => {1=>'end'},
|
258
|
+
#:graph_hash => {
|
259
|
+
# 0 => {PENDING => [1]},
|
260
|
+
# 1 => {PENDING => [1]}
|
261
|
+
#}
|
262
|
+
})
|
263
|
+
end
|
264
|
+
|
265
|
+
#############################################################################
|
266
|
+
# Misc functions primarily supporting re2nfa
|
267
|
+
#############################################################################
|
268
|
+
# graph edges going from origin become outgoing from src
|
269
|
+
# graph edges going TO final state instead go TO dest
|
270
|
+
# What is "final state?" Any accept state?
|
271
|
+
#@graph_hash[src][label].delete(dest)
|
272
|
+
def replace_edge(src, label, dest, graph)
|
273
|
+
raise "can't inject a graph that had no accept states" if graph.accept_states.nil? || graph.accept_states.empty?
|
274
|
+
if @graph_hash[src][label].class == Fixnum
|
275
|
+
@graph_hash[src][label] = [@graph_hash[src][label]]
|
276
|
+
end
|
277
|
+
|
278
|
+
offset = get_node_count-1
|
279
|
+
imp_attach_graph(src, graph)
|
280
|
+
|
281
|
+
#draw_graph('intermediate-self')
|
282
|
+
#graph.draw_graph('intermediate-graft')
|
283
|
+
|
284
|
+
# for each of the edges pointing at the accept state of the graph
|
285
|
+
# redirect them to point at dest
|
286
|
+
#draw_graph('retarget-pre')
|
287
|
+
graph.accept_states.keys.each do |acc|
|
288
|
+
retarget_edges(acc+offset,dest)
|
289
|
+
accept_states.delete(acc+offset)
|
290
|
+
end
|
291
|
+
delete_edge(src,label,dest)
|
292
|
+
|
293
|
+
renumber!
|
294
|
+
#draw_graph('retarget-post')
|
295
|
+
|
296
|
+
self
|
297
|
+
end
|
298
|
+
|
299
|
+
# ensure no gaps in our node names!
|
300
|
+
def renumber!
|
301
|
+
get_node_names.each_with_index do |n,ii|
|
302
|
+
if n != ii
|
303
|
+
retarget_edges(n,ii)
|
304
|
+
@accept_states[ii] = @accept_states.delete(n) unless @accept_states[n].nil?
|
305
|
+
@graph_hash[ii] = @graph_hash.delete(n)
|
306
|
+
end
|
307
|
+
end
|
308
|
+
self
|
309
|
+
end
|
310
|
+
|
311
|
+
# imp_attach_graph: increments fsm's node numbers by 1-CALLER.node_count
|
312
|
+
# takes edges outgoing from fsm.origin and adds them to attach_point
|
313
|
+
def imp_attach_graph(attach_point, fsm)
|
314
|
+
my_node_count = get_node_count
|
315
|
+
graft = fsm.clone
|
316
|
+
graft.increment_node_labels(my_node_count-1) # prevent collisions
|
317
|
+
|
318
|
+
graft_root_edges = graft.graph_hash.delete(graft.origin)
|
319
|
+
@graph_hash[attach_point] ||= Hash.new
|
320
|
+
@graph_hash[attach_point].merge!(graft_root_edges)
|
321
|
+
|
322
|
+
@accept_states.merge!(graft.accept_states)
|
323
|
+
@graph_hash.merge!(graft.graph_hash)
|
324
|
+
get_node_count
|
325
|
+
end
|
326
|
+
|
327
|
+
def retarget_edges(old_dest, new_dest)
|
328
|
+
@graph_hash.each_pair do |node,edge_hash|
|
329
|
+
edge_hash.each_pair do |label, dest|
|
330
|
+
if dest.include? old_dest
|
331
|
+
#puts "#{node}[#{label}] changed from #{dest} to #{new_dest}"
|
332
|
+
add_edge( node, label, new_dest)
|
333
|
+
delete_edge(node, label, old_dest)
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
self
|
338
|
+
end
|
339
|
+
|
340
|
+
def lambda_replace_edge(src, label, dest, graph)
|
341
|
+
if @graph_hash[src][label].class == Fixnum
|
342
|
+
@graph_hash[src][label] = [@graph_hash[src][label]]
|
343
|
+
end
|
344
|
+
#@graph_hash[src][label].delete(dest)
|
345
|
+
lambda_inject_graph(graph,src,dest)
|
346
|
+
delete_edge(src,label,dest)
|
347
|
+
self
|
348
|
+
end
|
349
|
+
|
350
|
+
def lambda_inject_graph(graph, src, dest)
|
351
|
+
old_node_count = get_node_count
|
352
|
+
lambda_attach_graph(src, graph)
|
353
|
+
graph.accept_states.keys.each {|k| add_edge(k+old_node_count, LAMBDA, dest)}
|
354
|
+
graph.accept_states.keys.each {|k| @accept_states.delete(k+old_node_count)}
|
355
|
+
self
|
356
|
+
end
|
357
|
+
|
358
|
+
def clone
|
359
|
+
Marshal.load( Marshal.dump(self) )
|
360
|
+
end
|
361
|
+
=begin
|
362
|
+
def add_edge(src, label, dest)
|
363
|
+
@graph_hash[src] = Hash.new if @graph_hash[src].nil?
|
364
|
+
if @graph_hash[src][label].nil?
|
365
|
+
@graph_hash[src][label] = [dest]
|
366
|
+
else
|
367
|
+
if @graph_hash[src][label].class != Array
|
368
|
+
@graph_hash[src][label] = [@graph_hash[src][label]]
|
369
|
+
end
|
370
|
+
@graph_hash[src][label] << dest if !@graph_hash[src][label].include?(dest)
|
371
|
+
end
|
372
|
+
self
|
373
|
+
end
|
374
|
+
|
375
|
+
# Fail silently on deleting stuff that doesn't exist.
|
376
|
+
def delete_edge(src, label, dest)
|
377
|
+
return self if @graph_hash[src].nil?
|
378
|
+
return self if @graph_hash[src][label].nil?
|
379
|
+
@graph_hash[src][label].reject! {|node| node==dest}
|
380
|
+
if @graph_hash[src][label].empty?
|
381
|
+
@graph_hash[src].delete(label)
|
382
|
+
end
|
383
|
+
# !!! may need to add something to delete orphaned nodes, here
|
384
|
+
self
|
385
|
+
end
|
386
|
+
=end
|
387
|
+
end
|
data/push.sh
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
git push git@github.com:hackingoff/scanner-generator
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "scanner_generator/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "scanner_generator"
|
7
|
+
s.version = ScannerGenerator::VERSION
|
8
|
+
s.authors = ["Hacking Off"]
|
9
|
+
s.email = ["source@hackingoff.com"]
|
10
|
+
s.homepage = "https://github.com/hackingoff/context-free-grammar"
|
11
|
+
s.summary = %q{Parser generation and CFG analysis.}
|
12
|
+
s.description = %q{Part of the compiler construction toolkit's guts.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "scanner_generator"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency "rspec", "~> 2.6"
|
22
|
+
s.add_development_dependency "awesome_print"
|
23
|
+
|
24
|
+
s.add_dependency "ruby-graphviz" # graph visualizations
|
25
|
+
|
26
|
+
# specify any dependencies here; for example:
|
27
|
+
# s.add_development_dependency "rspec"
|
28
|
+
# s.add_runtime_dependency "rest-client"
|
29
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'scanner_generator'
|
2
|
+
|
3
|
+
describe ScannerGenerator::FiniteStateMachine do
|
4
|
+
it "generates graphs" do
|
5
|
+
# obj.should eql(val)
|
6
|
+
end
|
7
|
+
|
8
|
+
it "has no epsilons/lambdas in DFAs" do
|
9
|
+
end
|
10
|
+
|
11
|
+
it "replaces edges successfully" do
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
it "handles edge cases" do
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Radar's example test from Foodie:
|
20
|
+
#it "anything else is delicious" do
|
21
|
+
#Foodie::Food.portray("Not Broccoli").should eql("Delicious!")
|
22
|
+
#end
|
23
|
+
|
24
|
+
=begin
|
25
|
+
# The below tests verify Thompson Construction (conversion from regular
|
26
|
+
# expressions to NFAs). The tests made output for human eyes.
|
27
|
+
|
28
|
+
# TODO: Verify the tests are all still satisfied correctly, then hard-code
|
29
|
+
# graphs and tables satisfying ".should eql()" invocation via RSpec.
|
30
|
+
|
31
|
+
# TODO: Track down the other tests.
|
32
|
+
|
33
|
+
# Non-RSpec test code follows.
|
34
|
+
#!/usr/bin/ruby
|
35
|
+
require '../../../cct/app/models/finite_state_machine.rb'
|
36
|
+
require "awesome_print"
|
37
|
+
|
38
|
+
def replace_edge_test
|
39
|
+
fsa = FiniteStateMachine.new({
|
40
|
+
:accept_states => {2=>'accm2', 3=>'accm3'},
|
41
|
+
:graph_hash => {
|
42
|
+
0 => {"LAMBDA"=>[1]},
|
43
|
+
1 => {"M2" => 2,
|
44
|
+
"M3" => 3},
|
45
|
+
2 => {"a" => 4 },
|
46
|
+
4 => {"b" => 3}
|
47
|
+
}
|
48
|
+
})
|
49
|
+
fsa2 = fsa.clone
|
50
|
+
alter = FiniteStateMachine.new({
|
51
|
+
:accept_states => {3=>'alt_end'},
|
52
|
+
:graph_hash => {
|
53
|
+
0 => {"LAMBDA" => [1,4]},
|
54
|
+
1 => {"M1" => 2},
|
55
|
+
2 => {"LAMBDA" => 3},
|
56
|
+
4 => {"M2" => 5},
|
57
|
+
5 => {"LAMBDA" => 3}
|
58
|
+
}
|
59
|
+
})
|
60
|
+
alt = FiniteStateMachine.new({
|
61
|
+
:accept_states => {2 => 'end'},
|
62
|
+
:graph_hash => {
|
63
|
+
0 => {'a' => [1]},
|
64
|
+
1 => {'b' => [2]}
|
65
|
+
}
|
66
|
+
})
|
67
|
+
fsa.draw_graph("before")
|
68
|
+
fsa.lambda_replace_edge(1,"M3",3, alt.clone)
|
69
|
+
fsa.draw_graph("lambda-after")
|
70
|
+
fsa2.replace_edge(1,"M3",3, alt.clone)
|
71
|
+
fsa2.draw_graph("after")
|
72
|
+
|
73
|
+
#malt = alt.clone
|
74
|
+
#malt.draw_graph('alt-before-imp-attach')
|
75
|
+
#malt.imp_attach_graph(2,alt)
|
76
|
+
#malt.imp_attach_graph(2,alter)
|
77
|
+
#malt.imp_attach_graph(7,alt)
|
78
|
+
#malt.draw_graph('alt-after-imp-attach')
|
79
|
+
end
|
80
|
+
|
81
|
+
def prepend_test
|
82
|
+
fsa = FiniteStateMachine.new({
|
83
|
+
:accept_states => {1=>'accept'},
|
84
|
+
:graph_hash => {0=>{"fuck"=>1}}
|
85
|
+
})
|
86
|
+
fsa.prepend_graph(fsa.kleene_machine)
|
87
|
+
fsa.prepend_graph(fsa.cat_machine('LAMBDA'))
|
88
|
+
fsa.draw_graph('prepend-test')
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
# Interesting notes:
|
94
|
+
# (a|b|ab)* can read aab with more than one parse tree.
|
95
|
+
|
96
|
+
def re2nfa_test
|
97
|
+
fsa = FiniteStateMachine.new({
|
98
|
+
:accept_states => {1=>'accept'},
|
99
|
+
:graph_hash => {0=>{"LAMBDA"=>1}}}
|
100
|
+
)
|
101
|
+
example = FiniteStateMachine.new({
|
102
|
+
:accept_states => {8=>"end"},
|
103
|
+
:graph_hash => {
|
104
|
+
0 => {"LAMBDA" => [1,3]},
|
105
|
+
1 => {"LAMBDA" => [4,6]},
|
106
|
+
2 => {"LAMBDA" => [1,3]},
|
107
|
+
3 => {"c" => 8},
|
108
|
+
4 => {"a" => 5},
|
109
|
+
5 => {"LAMBDA" => 2},
|
110
|
+
6 => {"b" => 7},
|
111
|
+
7 => {"LAMBDA" => 2}
|
112
|
+
},
|
113
|
+
:origin => 0
|
114
|
+
})
|
115
|
+
|
116
|
+
#s = '(a|b)*c|de*|f'
|
117
|
+
s = 'a|b(d*|(e|f)*)'
|
118
|
+
#s = 'a((b))'
|
119
|
+
# the following strings breaks it, in some way
|
120
|
+
# 'a*|b|cde**|k' causes lots of duplication
|
121
|
+
# see examples/2012-03-01_22-47-54_-0800-nfa.png
|
122
|
+
# shows a* OR b OR replace_me, followed by
|
123
|
+
# b | c
|
124
|
+
# followed by c
|
125
|
+
# '(a|b)*'
|
126
|
+
#s = '(a|b)*c'
|
127
|
+
puts "rendering regex: #{s}"
|
128
|
+
fsa = fsa.re2nfa(s)
|
129
|
+
fsa.draw_graph("draw_re2nfa")
|
130
|
+
puts 'bad nfa'
|
131
|
+
ap fsa
|
132
|
+
fsa.subsetify.draw_graph("draw_re2dfa")
|
133
|
+
example.draw_graph("ex_nfa")
|
134
|
+
puts 'good nfa'
|
135
|
+
ap example
|
136
|
+
example.subsetify.draw_graph("ex_dfa")
|
137
|
+
fsa.graph_hash.each_pair do |k,v|
|
138
|
+
if v != example.graph_hash[k]
|
139
|
+
puts "#{v} != #{example.graph_hash[k]}"
|
140
|
+
else
|
141
|
+
puts "same"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def re2nfa_ends_in_or_test
|
147
|
+
fsa = FiniteStateMachine.new({
|
148
|
+
:graph_hash=>{0 => {"1" => 1}},
|
149
|
+
:accept_states => {}
|
150
|
+
})
|
151
|
+
fsa = fsa.re2nfa("1|0")
|
152
|
+
fsa.add_edge(5,LAMBDA,6)
|
153
|
+
fsa.set_new_accept(5,nil)
|
154
|
+
fsa.set_new_accept(6,"ok")
|
155
|
+
fsa.draw_graph("end_test_nfa")
|
156
|
+
fsa.subsetify!
|
157
|
+
fsa.draw_graph("end_test_dfa")
|
158
|
+
end
|
159
|
+
|
160
|
+
re2nfa_test
|
161
|
+
#injection_test
|
162
|
+
#replace_edge_test
|
163
|
+
#prepend_test
|
164
|
+
#re2nfa_ends_in_or_test
|
165
|
+
|
166
|
+
=end
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scanner_generator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Hacking Off
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-26 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &11360640 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '2.6'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *11360640
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: awesome_print
|
27
|
+
requirement: &11360100 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *11360100
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: ruby-graphviz
|
38
|
+
requirement: &11359540 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *11359540
|
47
|
+
description: Part of the compiler construction toolkit's guts.
|
48
|
+
email:
|
49
|
+
- source@hackingoff.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .gitignore
|
55
|
+
- Gemfile
|
56
|
+
- README
|
57
|
+
- Rakefile
|
58
|
+
- lib/scanner_generator.rb
|
59
|
+
- lib/scanner_generator/finite_state_machine.rb
|
60
|
+
- lib/scanner_generator/thompson_construction.rb
|
61
|
+
- lib/scanner_generator/version.rb
|
62
|
+
- push.sh
|
63
|
+
- scanner_generator.gemspec
|
64
|
+
- spec/scanner_generator_spec.rb
|
65
|
+
homepage: https://github.com/hackingoff/context-free-grammar
|
66
|
+
licenses: []
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project: scanner_generator
|
85
|
+
rubygems_version: 1.8.15
|
86
|
+
signing_key:
|
87
|
+
specification_version: 3
|
88
|
+
summary: Parser generation and CFG analysis.
|
89
|
+
test_files: []
|