scanner_generator 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/README +0 -0
- data/Rakefile +1 -0
- data/lib/scanner_generator/finite_state_machine.rb +619 -0
- data/lib/scanner_generator/thompson_construction.rb +387 -0
- data/lib/scanner_generator/version.rb +3 -0
- data/lib/scanner_generator.rb +5 -0
- data/push.sh +1 -0
- data/scanner_generator.gemspec +29 -0
- data/spec/scanner_generator_spec.rb +166 -0
- metadata +89 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,619 @@
|
|
1
|
+
require 'graphviz'
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
module ScannerGenerator
|
5
|
+
require File.dirname(__FILE__) + '/thompson_construction.rb'
|
6
|
+
|
7
|
+
LAMBDA = "LAMBDA"; SOURCE = 0; DEST = 1; LABEL = 2;
|
8
|
+
ERROR = 0; MACHINE_ACCEPT = 1; HALT_RETURN = 2 # Action Table codes [E, MA, HR]
|
9
|
+
ACC = 3; WIDTH = 3
|
10
|
+
|
11
|
+
# True if the needle (subset) is found in the haystack (superset).
|
12
|
+
def subset(needle,haystack)
|
13
|
+
a = needle.sort
|
14
|
+
b = haystack.sort
|
15
|
+
ii = 0
|
16
|
+
jj = 0
|
17
|
+
a_last_index = a.length-1
|
18
|
+
b_last_index = b.length-1
|
19
|
+
loop do
|
20
|
+
if(a[ii]==b[jj])
|
21
|
+
return true if(ii==a_last_index)
|
22
|
+
ii+=1
|
23
|
+
jj+=1
|
24
|
+
elsif(a[ii] > b[jj])
|
25
|
+
return false if(jj>=b_last_index)
|
26
|
+
jj+= 1
|
27
|
+
else # a[ii] < b[jj]
|
28
|
+
return false
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# is needle contained in a haystack?
|
34
|
+
def subset_of_list_element?(needle,list_of_haystacks)
|
35
|
+
list_of_haystacks.each{|haystack| return true if subset(needle,haystack)}
|
36
|
+
return false
|
37
|
+
end
|
38
|
+
|
39
|
+
class FiniteStateMachine
|
40
|
+
include ThompsonConstruction
|
41
|
+
|
42
|
+
attr_reader :graph_hash, :accept_states, :origin
|
43
|
+
attr_accessor :labels
|
44
|
+
|
45
|
+
# Must set @accept_states, @edges, @edge_labels, @node_labels, @graph_hash
|
46
|
+
# edge/node labels are derived from @graph_hash
|
47
|
+
def initialize(input)
|
48
|
+
raise "Bunk input" if input[:accept_states].nil? || input[:graph_hash].nil?
|
49
|
+
@accept_states = input[:accept_states]
|
50
|
+
@graph_hash = input[:graph_hash]
|
51
|
+
@origin = input[:origin] || 0
|
52
|
+
@labels = input[:labels] || {}
|
53
|
+
@edge_labels = get_edge_labels
|
54
|
+
@rankdir = input[:rankdir] || "TB" # TB is top-to-bottom; LR is left-to-right
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
def copy(graph)
|
59
|
+
@accept_states = graph.accept_states
|
60
|
+
@graph_hash = graph.graph_hash
|
61
|
+
@origin = graph.origin
|
62
|
+
@edge_labels = get_edge_labels
|
63
|
+
self
|
64
|
+
end
|
65
|
+
|
66
|
+
# Regex keys specify that any matching edge labels transition to the dest
|
67
|
+
# node.
|
68
|
+
# Example:
|
69
|
+
# Suppose our language = {a,b,1,\n,*,/}.
|
70
|
+
# An edge labeled "[^\n\*\/]" matches anything but newline, *, or /.
|
71
|
+
# This function replaces that edge with multiple edges from the
|
72
|
+
# language's alphabet. In this case, the [^\n\*\/] edge gets replaced by
|
73
|
+
# 3 edges: an "a" edge, a "b" edge, & a "1" edge.
|
74
|
+
# NOTE: Invoke this AFTER drawing dfa, but BEFORE dumping the module.
|
75
|
+
def expand_regex_edges
|
76
|
+
#puts "Expanding regex edges..."
|
77
|
+
@graph_hash.each_pair do |source, edge_dest_hash| # state, hash(edge=>state)
|
78
|
+
new_edges_for_same_destination = Hash.new
|
79
|
+
#puts "before: @graph_hash[#{source}] #{@graph_hash[source]}" if
|
80
|
+
edge_dest_hash.each_pair do |regex_edge, dest| # e.g. /[^\n] => 98
|
81
|
+
next if regex_edge.class != Regexp
|
82
|
+
#puts "before: @graph_hash[#{source}][#{edge}] = #{@graph_hash[source][edge]}"
|
83
|
+
|
84
|
+
for label in @edge_labels
|
85
|
+
if label.class == String && label.match(regex_edge)
|
86
|
+
# unless clause prevents sloppy regex from overwriting other edges
|
87
|
+
new_edges_for_same_destination[label] = dest unless @graph_hash[source].key?(label)
|
88
|
+
# new_edges_for_same_destination[label] ||= dest
|
89
|
+
end
|
90
|
+
end
|
91
|
+
@graph_hash[source].delete(regex_edge) # remove old regex edge
|
92
|
+
#puts "after: @graph_hash[#{source}][#{edge}] = #{@graph_hash[source][edge].class}"
|
93
|
+
end
|
94
|
+
#puts @new_edges_for_same_destination.to_s
|
95
|
+
unless new_edges_for_same_destination.empty?
|
96
|
+
@graph_hash[source].merge!(new_edges_for_same_destination)
|
97
|
+
#puts "after: @graph_hash[#{source}] #{@graph_hash[source]}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
self
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_node_names
|
104
|
+
names = @graph_hash.keys # Ensure source nodes represented.
|
105
|
+
@graph_hash.each_pair do |source_node, sub_hash|
|
106
|
+
names << source_node
|
107
|
+
names << sub_hash.values
|
108
|
+
end
|
109
|
+
#names.flatten.map {|n| n.to_s}.uniq.sort
|
110
|
+
names.flatten.uniq.sort
|
111
|
+
end
|
112
|
+
|
113
|
+
def get_edge_labels
|
114
|
+
aggregate_keys = []
|
115
|
+
@graph_hash.values.each {|sub_hash| aggregate_keys << sub_hash.keys }
|
116
|
+
aggregate_keys.flatten.uniq
|
117
|
+
end
|
118
|
+
|
119
|
+
def subsetify(start_node = @origin)
|
120
|
+
new_graph_hash = {}
|
121
|
+
new_accept_states = {}
|
122
|
+
new_labels = {}
|
123
|
+
|
124
|
+
states = [closure_of(start_node)] # if passed a start node as an int, this will fail without .to_s
|
125
|
+
|
126
|
+
edge_labels = get_edge_labels
|
127
|
+
|
128
|
+
states.each do |state|
|
129
|
+
new_graph_hash[state] = {}
|
130
|
+
edge_labels.each do |label|
|
131
|
+
next if label == LAMBDA
|
132
|
+
closures_via_label = []
|
133
|
+
|
134
|
+
state.each do |node|
|
135
|
+
next if (@graph_hash[node].nil? || @graph_hash[node][label].nil?)
|
136
|
+
found_closure = closure_of(@graph_hash[node][label])
|
137
|
+
closures_via_label << found_closure if !closures_via_label.include?(found_closure)
|
138
|
+
end
|
139
|
+
|
140
|
+
next if closures_via_label == []
|
141
|
+
closures_via_label.flatten!
|
142
|
+
new_graph_hash[state][label] = closures_via_label
|
143
|
+
states << closures_via_label unless states.include?(closures_via_label)
|
144
|
+
end
|
145
|
+
|
146
|
+
new_accept_states[state] = accept_state_of(state) if accept_state_of(state) != false
|
147
|
+
new_graph_hash.delete(state) if new_graph_hash[state] == {}
|
148
|
+
#new_labels[state]
|
149
|
+
end
|
150
|
+
|
151
|
+
#puts "New graph shit:"
|
152
|
+
#ap new_graph_hash
|
153
|
+
#ap new_accept_states
|
154
|
+
#ap new_labels
|
155
|
+
|
156
|
+
# THIS IS THE NEW PART FOR LABELS
|
157
|
+
states.each do |state|
|
158
|
+
label = ""
|
159
|
+
state.each do |substate|
|
160
|
+
label << @labels[substate] + "\n" unless @labels[substate].nil? || label.include?(@labels[substate])
|
161
|
+
end
|
162
|
+
new_labels[state] = label.chomp unless label == ""
|
163
|
+
end
|
164
|
+
# END NEW PART FOR LABELS
|
165
|
+
|
166
|
+
return FiniteStateMachine.new({
|
167
|
+
:graph_hash=>new_graph_hash,
|
168
|
+
:accept_states=>new_accept_states,
|
169
|
+
:labels => new_labels}
|
170
|
+
).beautify
|
171
|
+
end
|
172
|
+
|
173
|
+
def subsetify!(start_node_label = 0)
|
174
|
+
dfa = subsetify(start_node_label)
|
175
|
+
@graph_hash,@accept_states,@labels = dfa.graph_hash, dfa.accept_states, dfa.labels
|
176
|
+
return self
|
177
|
+
end
|
178
|
+
|
179
|
+
def draw_graph(filename = "output", svgname = :Finite_Automata_Graph, shape = "circle", path = nil)
|
180
|
+
graph = GraphViz::new(:Finite_Automata_Graph)
|
181
|
+
graph[:rankdir] = @rankdir
|
182
|
+
# !!! going to have to check of @labels[node_num] (label for node # node_num exists and specify it with :label => @label[node_num] when present)
|
183
|
+
get_node_names.each do |node|
|
184
|
+
label = @labels[node] || node.to_s #((@labels[node].nil?) ? node.to_s : @labels[node])
|
185
|
+
is_accept = @accept_states.include?(node)
|
186
|
+
graph.add_nodes(node.to_s,
|
187
|
+
:shape => shape,
|
188
|
+
:label => label,
|
189
|
+
:peripheries => ((is_accept) ? 2 : 1),
|
190
|
+
:color => ((is_accept && shape == "Mrecord") ? "#66DD66" : "#000000"))
|
191
|
+
end
|
192
|
+
|
193
|
+
@graph_hash.each_pair do |source_label,sub_hash|
|
194
|
+
sub_hash.each_pair do |edge_label,destination_nodes|
|
195
|
+
[destination_nodes].flatten.each do |dest_label| # ensure d_n is 1-d array
|
196
|
+
source_node = graph.get_node(source_label.to_s)
|
197
|
+
dest_node = graph.get_node(dest_label.to_s)
|
198
|
+
graph.add_edges(source_node, dest_node, :label => label_friendly(edge_label).gsub('\\','\\\\\\\\'))
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
if path
|
204
|
+
graph.output(:svg => "#{filename}.svg", :path => path)
|
205
|
+
else
|
206
|
+
graph.output(:svg => "#{filename}.svg")
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def draw_state_labeled_graph(filename = "output", svgname = :Finite_Automata_Graph, shape = "circle", path = nil)
|
211
|
+
labels = @labels.dup
|
212
|
+
# modify the labels
|
213
|
+
@labels.each_with_index do |label, ii|
|
214
|
+
lines = ""
|
215
|
+
#ap label
|
216
|
+
label[1].each_line {|i| lines << "<tr><td align=\"left\">#{i}</td></tr>"}
|
217
|
+
table_border_color = (@accept_states.include?(ii)) ? "#448844" : "#ffffff"
|
218
|
+
heading = "State #{ii}"
|
219
|
+
@labels[ii] = '<<table color="'+table_border_color+'" style="ROUNDED" border="1" cellborder="0" cellpadding="5"><tr><td align="center" colspan="1"><font color="#666666" point-size="8">'+heading+'</font></td></tr>'+lines+'</table>>'
|
220
|
+
end
|
221
|
+
result = draw_graph(filename, svgname, shape, path)
|
222
|
+
@labels = labels
|
223
|
+
result
|
224
|
+
end
|
225
|
+
|
226
|
+
# adapted from class notes
|
227
|
+
def closure_of(node_label)
|
228
|
+
closure = [node_label].flatten
|
229
|
+
changed = true
|
230
|
+
while (changed == true)
|
231
|
+
changed = false
|
232
|
+
closure.each do |node|
|
233
|
+
# if there is m not already in C and n->lambda->m then add m to c
|
234
|
+
if(!@graph_hash[node].nil? && !@graph_hash[node][LAMBDA].nil?)
|
235
|
+
lambda_reachables = [@graph_hash[node][LAMBDA]].flatten
|
236
|
+
lambda_reachables.each do |l_node|
|
237
|
+
if !closure.include?(l_node)
|
238
|
+
closure << l_node
|
239
|
+
changed = true
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
return closure #.flatten
|
247
|
+
end
|
248
|
+
|
249
|
+
# returns true if the any of the closure's states are included in the set
|
250
|
+
# of accept_states
|
251
|
+
def accept_state_of(closure)
|
252
|
+
closure.each do |set|
|
253
|
+
if @accept_states.include?(set)
|
254
|
+
return @accept_states[set] # change this to "true" if reverting to crappy system
|
255
|
+
end
|
256
|
+
end
|
257
|
+
return false
|
258
|
+
end
|
259
|
+
|
260
|
+
# numbers sets and makes them the new keys (cleans up the graph_hash's keys)
|
261
|
+
def beautify
|
262
|
+
clean_hash, clean_accept_states, pretty, new_labels = {}, {}, {}, {}
|
263
|
+
|
264
|
+
# Number our closures.
|
265
|
+
i = -1
|
266
|
+
@graph_hash.each_pair do |key,subhash|
|
267
|
+
pretty[key] = i+= 1 if pretty[key].nil?
|
268
|
+
subhash.values.each {|subval| pretty[subval]=i+=1 if pretty[subval].nil?}
|
269
|
+
end
|
270
|
+
|
271
|
+
# Replace instances of old closure names with their new closure-numbers.
|
272
|
+
@graph_hash.keys.each do |old_key|
|
273
|
+
new_key = pretty[old_key]
|
274
|
+
clean_hash[new_key] = Hash.new
|
275
|
+
@graph_hash[old_key].each_pair do |subkey, subval| # subkey is edge label
|
276
|
+
clean_hash[new_key][subkey] = pretty[subval]
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
@accept_states.each_pair do |state, acc_type|
|
281
|
+
clean_accept_states[pretty[state]] = acc_type
|
282
|
+
end
|
283
|
+
|
284
|
+
@labels.each_pair do |state, label|
|
285
|
+
new_labels[pretty[state]] = @labels[state]
|
286
|
+
end # Be sure to bring labels along.
|
287
|
+
|
288
|
+
FiniteStateMachine.new({
|
289
|
+
:graph_hash => clean_hash,
|
290
|
+
:accept_states => clean_accept_states,
|
291
|
+
:labels => new_labels
|
292
|
+
})
|
293
|
+
end
|
294
|
+
|
295
|
+
def generate_initialize
|
296
|
+
return "def initialize\n" +
|
297
|
+
"#{" "*indent_width}#{lookup_code_string}\n" + # array of edge labels
|
298
|
+
"#{" "*indent_width}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
|
299
|
+
dump_table(:state) + "\n" +
|
300
|
+
dump_table(:action) + "\n" +
|
301
|
+
dump_table(:lookup) + "\n" +
|
302
|
+
"#{ind(1)}end"
|
303
|
+
end
|
304
|
+
|
305
|
+
# Module Dumping
|
306
|
+
def generate_module(name = 'ScannerModule', indent_width = 2)
|
307
|
+
expand_regex_edges
|
308
|
+
return "module #{name}\n def initialize\n" +
|
309
|
+
"#{ind(1)}#{lookup_code_string}\n" + # array of edge labels
|
310
|
+
"#{ind(1)}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
|
311
|
+
dump_table(:state) + "\n" +
|
312
|
+
dump_table(:action) + "\n" +
|
313
|
+
dump_table(:lookup) + "\n" +
|
314
|
+
"#{ind(2)}super\n" +
|
315
|
+
"#{ind(1)}end\nend"
|
316
|
+
end
|
317
|
+
|
318
|
+
# This horrific kluge ports the ruby dump_tables to Javascript. Sorta.
|
319
|
+
# Smelly code, but output passes JSLint and is the path of least resistance.
|
320
|
+
#
|
321
|
+
# TODO: Write something that generates the tables as ruby objects, then
|
322
|
+
# refactor these table dumping functions, using array.to_s
|
323
|
+
def js_tables(name = 'ScannerModule', indent_width = 2)
|
324
|
+
expand_regex_edges
|
325
|
+
replacements = {
|
326
|
+
' # E' => '// E', # Action table's label
|
327
|
+
' # ' => '// ', # table-leading comments
|
328
|
+
'# ' => ' // ', # row-trailing descriptions
|
329
|
+
'[[' => ' [', # first row of table
|
330
|
+
'@state_table = ' => 'SCANNER.state_table = [',
|
331
|
+
'@action_table = ' => 'SCANNER.action_table = [',
|
332
|
+
'@lookup_table = ' => 'SCANNER.lookup_table = [',
|
333
|
+
'@lookup_codes' => 'SCANNER.lookup_codes',
|
334
|
+
'@label_codes' => 'SCANNER.label_codes',
|
335
|
+
' [' => ' [', # linty js indent of 4
|
336
|
+
']] ' => ']];', # semicolon ending tbales
|
337
|
+
':other' => '"other"', # :other symbol.to_s
|
338
|
+
'"=>' => '" : ' # javascript hash notation
|
339
|
+
}
|
340
|
+
s = "var SCANNER = {};\n" +
|
341
|
+
dump_table(:state, 0, 0) + "\n" +
|
342
|
+
dump_table(:action, 0, 0) + "\n" +
|
343
|
+
dump_table(:lookup, 0, 0) + "\n" +
|
344
|
+
"#{ind(0)}#{lookup_code_string};\n" + # array of edge labels
|
345
|
+
"#{ind(0)}#{label_code_string};\n" # hash mapping accept states to the type accepted by them
|
346
|
+
replacements.each_pair{|k,v| s.gsub!(k,v)}
|
347
|
+
s
|
348
|
+
end
|
349
|
+
|
350
|
+
def generate_scanner(indent_width = 2)
|
351
|
+
expand_regex_edges
|
352
|
+
|
353
|
+
scanner_function =<<-'END_SCANNER'
|
354
|
+
def scan(input)
|
355
|
+
@token = ""
|
356
|
+
@state = 0
|
357
|
+
@buffered = false
|
358
|
+
results = Array.new
|
359
|
+
|
360
|
+
input.each_char do |ch|
|
361
|
+
current_read = case ch # Map chars onto char-classes by editing case/when
|
362
|
+
when /[a-zA-Z]/ then @label_codes["L"]
|
363
|
+
when /[0-9]/ then @label_codes["D"]
|
364
|
+
else @label_codes[ch] || @label_codes[:other]
|
365
|
+
end
|
366
|
+
if((@action_table[@state][current_read]==1) && (@state_table[@state][current_read] != -1))
|
367
|
+
@buffered = false # action=MA (Machine-Accept) (=1). Append char to token.
|
368
|
+
@token += ch unless ch[/\s/] && @label_codes[ch].nil? # Uncomment if recognizing some whitespace.
|
369
|
+
@state=@state_table[@state][current_read]
|
370
|
+
elsif((@state_table[@state][current_read]==-1) && (@action_table[@state][current_read]==2))
|
371
|
+
@buffered = true # action=HR (Halt-Return) (=2). Accept current token.
|
372
|
+
results.push [@lookup_codes[@lookup_table[@state][current_read]],@token]
|
373
|
+
@state = 0
|
374
|
+
@token = ""
|
375
|
+
else # ? Hitting this block indicates action=ERR (ERROR) (=3)
|
376
|
+
next
|
377
|
+
end
|
378
|
+
redo if(@buffered==true && current_read!=@label_codes[:other]) # repeat w/o advancing to next char
|
379
|
+
end
|
380
|
+
results
|
381
|
+
end
|
382
|
+
|
383
|
+
# Appends a newline to the file in case of its absence, to ensure
|
384
|
+
# the presence of terminating whitespace. Convert Windows newlines
|
385
|
+
# to UNIX style ones.
|
386
|
+
def scan_file(filename = "test_file.txt")
|
387
|
+
scan((File.open(filename, "r").read+"\n").gsub("\r\n","\n"))
|
388
|
+
end
|
389
|
+
END_SCANNER
|
390
|
+
return "class Scanner\n" +
|
391
|
+
"#{ind(1)}def initialize\n" +
|
392
|
+
"#{ind(2)}#{lookup_code_string}\n" + # array of edge labels
|
393
|
+
"#{ind(2)}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
|
394
|
+
dump_table(:state, 2,2) + "\n" + # Note: the 1s should be 2s, but dump_table's results seem
|
395
|
+
dump_table(:action, 2,2) + "\n" + # to mysteriously have an extra leading two spaces. Can't
|
396
|
+
dump_table(:lookup, 2,2) + "\n" + # for the life of me figure out how or why.
|
397
|
+
"#{ind(1)}end\n\n" +
|
398
|
+
scanner_function +
|
399
|
+
"\nend"
|
400
|
+
end
|
401
|
+
|
402
|
+
# Module Dumping
|
403
|
+
def dump_module(name, indent_width = 2)
|
404
|
+
#return generate_module if filename == "" || filename.nil?
|
405
|
+
filename = underscore(name)
|
406
|
+
file = File.open("./modules/#{filename}.rb", "w")
|
407
|
+
"Successfully wrote #{file.write(generate_module)} characters to #{filename}.rb"
|
408
|
+
end
|
409
|
+
|
410
|
+
def friendly_edge_labels
|
411
|
+
# Convert whitespace line "\n" into strings describing their contents.
|
412
|
+
get_edge_labels.collect do |label|
|
413
|
+
(!label[/\s/].nil?) ? label.inspect[1..-2] : label
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
def label_friendly(label)
|
418
|
+
#puts "label: '#{label}' (#{label.class})"
|
419
|
+
if label.class == Fixnum
|
420
|
+
return label.to_s
|
421
|
+
elsif label.class == Regexp
|
422
|
+
return ('/' + label.to_s[7..-2] + '/') # replace each slash with 2 slashes.
|
423
|
+
elsif label == LAMBDA || label.to_s == "LAMBDA" || label.to_s == "EPSILON" || label.to_s.empty? # http://stackoverflow.com/questions/9684807/how-can-one-insert-a-mathematical-greek-etc-symbol-in-dot-file
|
424
|
+
'ε' # epsilon-lower is 949
|
425
|
+
else
|
426
|
+
return ((!label[/\s/].nil?) ? label.inspect[1..-2] : label)
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
|
431
|
+
def ind(level, width=2)
|
432
|
+
return " "*(level*width)
|
433
|
+
end
|
434
|
+
|
435
|
+
# converts a CamelCased name to k_and_r style for filename
|
436
|
+
def underscore(name)
|
437
|
+
s = name[0].downcase
|
438
|
+
name[1,name.length].each_char do |ch|
|
439
|
+
s += (ch.match(/[^A-Z]/) ? ch : "_"+ch.downcase)
|
440
|
+
end
|
441
|
+
return s
|
442
|
+
end
|
443
|
+
|
444
|
+
|
445
|
+
def lookup_codes
|
446
|
+
(["!accept"] | @accept_states.values)
|
447
|
+
end
|
448
|
+
# Array unions: fuck yeah.
|
449
|
+
def lookup_code_string
|
450
|
+
"@lookup_codes = #{lookup_codes.to_s}"
|
451
|
+
end
|
452
|
+
|
453
|
+
def label_code_string
|
454
|
+
h = {}
|
455
|
+
get_edge_labels.each_with_index{|label,ii| h[label]=ii } # !!! DANGER
|
456
|
+
"@label_codes = #{h.to_s[0..-2]}, :other=>#{get_edge_labels.length}\}"
|
457
|
+
end
|
458
|
+
|
459
|
+
# Graph Attachment
|
460
|
+
def increment_node_labels(amount)
|
461
|
+
new_hash, new_accepts = Hash.new, Hash.new
|
462
|
+
|
463
|
+
@graph_hash.each_pair do |key,subhash|
|
464
|
+
new_subhash = Hash.new
|
465
|
+
subhash.each_pair do |subkey,value|
|
466
|
+
if value.class == Fixnum
|
467
|
+
new_subhash[subkey] = value+amount
|
468
|
+
elsif value.class == Array
|
469
|
+
new_subhash[subkey] = value.map {|n| n+amount}
|
470
|
+
else
|
471
|
+
raise "value (#{value}) is a #{value.class}!"
|
472
|
+
end
|
473
|
+
|
474
|
+
end
|
475
|
+
new_hash[key+amount] = new_subhash
|
476
|
+
end
|
477
|
+
|
478
|
+
@accept_states.keys.each{|key| new_accepts[key+amount] = @accept_states[key]}
|
479
|
+
@graph_hash, @accept_states = new_hash, new_accepts
|
480
|
+
@origin += amount
|
481
|
+
end
|
482
|
+
|
483
|
+
def get_node_count; get_node_names.size; end
|
484
|
+
|
485
|
+
# considerations:
|
486
|
+
# do we need a flag for when we have to strip an attach point of being an accept state?
|
487
|
+
def attach_graph(attach_point, fsm)
|
488
|
+
node_count = get_node_count
|
489
|
+
raise "#{attach_point} out of graph bounds." if attach_point >= node_count
|
490
|
+
raise "going to break everything by attaching to myself!" if fsm == self
|
491
|
+
#dfa = fsm.subsetify #.subsetify
|
492
|
+
dfa = fsm # Before, we were subsetifying
|
493
|
+
dfa.increment_node_labels(node_count)
|
494
|
+
#@graph_hash[attach_point] = {LAMBDA => dfa.origin} # THIS IS OUR CULPRIT!
|
495
|
+
#puts "before #{@graph_hash[attach_point]}"
|
496
|
+
|
497
|
+
#if (@graph_hash[attach_point]!=nil)
|
498
|
+
# lambdas = [@graph_hash[attach_point][LAMBDA]] || [] # this is an array!
|
499
|
+
# lambdas << dfa.origin
|
500
|
+
# lambdas = lambdas.flatten.find{|entry| !entry.nil?}
|
501
|
+
# @graph_hash[attach_point][LAMBDA] = lambdas
|
502
|
+
#else
|
503
|
+
# @graph_hash[attach_point] = {LAMBDA => dfa.origin}
|
504
|
+
#end
|
505
|
+
|
506
|
+
# if attach point was on the graph w/o outgoing edges
|
507
|
+
@graph_hash[attach_point] = Hash.new if @graph_hash[attach_point].nil?
|
508
|
+
|
509
|
+
if @graph_hash[attach_point][LAMBDA].nil?
|
510
|
+
@graph_hash[attach_point][LAMBDA] = [dfa.origin]
|
511
|
+
else # attach point already has outgoing lambda edges
|
512
|
+
@graph_hash[attach_point][LAMBDA] << dfa.origin
|
513
|
+
end
|
514
|
+
|
515
|
+
#@graph_hash[attach_point]["foo"] = lambdas
|
516
|
+
#puts "after #{@graph_hash[attach_point]}"
|
517
|
+
#puts "@gh = #{graph_hash}\ndfah = #{dfa.graph_hash}"
|
518
|
+
#puts "merged: #{@graph_hash.merge(dfa.graph_hash)}"
|
519
|
+
@graph_hash.merge!(dfa.graph_hash)
|
520
|
+
#@graph_hash.merge!({4=>{"L"=>21}})
|
521
|
+
@accept_states.merge!(dfa.accept_states)
|
522
|
+
#subsetify!
|
523
|
+
get_node_count
|
524
|
+
end
|
525
|
+
|
526
|
+
# Dumps either a state table, an action table, or a lookup table
|
527
|
+
# This function is kind of half-refactored with dump_module and needs cleaning
|
528
|
+
# like the wizard needs food.
|
529
|
+
def dump_table(type = :state, indent_width = 2, indent_level = 2)
|
530
|
+
# edge_labels = friendly_edge_labels << " Other" # I suspect this line is ruining the code.
|
531
|
+
edge_labels = get_edge_labels << " Other"
|
532
|
+
node_names = get_node_names
|
533
|
+
|
534
|
+
s = "#{ind(indent_level)}@#{type}_table = " +
|
535
|
+
((type == :action) ? "\n#{ind(indent_level+1)}\# ERROR = 0; MACHINE_ACCEPT = 1; HALT_RETURN = 2" : "") +
|
536
|
+
"\n#{ind(indent_level+1)}#"
|
537
|
+
edge_labels.each do |label|
|
538
|
+
s += sprintf("%#{WIDTH+1}s", label_friendly(label))
|
539
|
+
end
|
540
|
+
s += "\n#{ind(indent_level+1)}"
|
541
|
+
|
542
|
+
node_names.each_with_index do |node,ii|
|
543
|
+
on_last_node = (ii == node_names.size-1)
|
544
|
+
is_accept = !@accept_states[node].nil?
|
545
|
+
s += ((ii==0) ? "[" : " ") + "["
|
546
|
+
|
547
|
+
edge_labels.each_with_index do |edge,jj|
|
548
|
+
on_last_edge = (jj == edge_labels.size-1)
|
549
|
+
if(@graph_hash[node].nil?||
|
550
|
+
@graph_hash[node][edge].nil?||@graph_hash[node][edge][0].nil?)
|
551
|
+
sdest = "-1"
|
552
|
+
adest = ((is_accept) ? HALT_RETURN.to_s : ERROR.to_s)
|
553
|
+
if(!accept_states[node].nil?)
|
554
|
+
ldest = ((is_accept) ? (lookup_codes.find_index(accept_states[node]).to_i).to_s : "0")
|
555
|
+
else
|
556
|
+
ldest = "0"
|
557
|
+
end
|
558
|
+
else
|
559
|
+
sdest = graph_hash[node][edge].to_s
|
560
|
+
adest = MACHINE_ACCEPT.to_s # MA if NON-ACCEPT state
|
561
|
+
ldest = "0"
|
562
|
+
end
|
563
|
+
case type
|
564
|
+
when :state
|
565
|
+
s += sprintf("%#{WIDTH}s", sdest) +
|
566
|
+
((!on_last_edge) ? "," \
|
567
|
+
: "]" + ((!on_last_node) ? "," \
|
568
|
+
: "]" ) + " \# #{node}#{(is_accept ? " ACCEPT":"")}\n#{ind(indent_level+1)}")
|
569
|
+
when :action
|
570
|
+
s += sprintf("%#{WIDTH}s", adest) +
|
571
|
+
(!on_last_edge ? "," \
|
572
|
+
: "]" + (!on_last_node ? "," \
|
573
|
+
: "]" ) + " \# #{node}#{(is_accept ? " ACCEPT" : "")}\n#{ind(indent_level+1)}")
|
574
|
+
when :lookup
|
575
|
+
s += sprintf("%#{WIDTH}s", ldest) +
|
576
|
+
(!on_last_edge ? "," \
|
577
|
+
: "]" + (!on_last_node ? "," \
|
578
|
+
: "]" ) + " \# #{node}#{(is_accept ? " #{@accept_states[node]}" : "")}\n#{ind(indent_level+1)}")
|
579
|
+
end
|
580
|
+
end
|
581
|
+
end
|
582
|
+
s.rstrip
|
583
|
+
end
|
584
|
+
|
585
|
+
# Clobbers the old accept type, if any was present.
|
586
|
+
def add_accept_state(state, type)
|
587
|
+
@accept_states[state] = type
|
588
|
+
end
|
589
|
+
|
590
|
+
def add_edge(src, label, dest)
|
591
|
+
@graph_hash[src] = Hash.new if @graph_hash[src].nil?
|
592
|
+
if @graph_hash[src][label].nil?
|
593
|
+
@graph_hash[src][label] = [dest]
|
594
|
+
else
|
595
|
+
if @graph_hash[src][label].class != Array
|
596
|
+
@graph_hash[src][label] = [@graph_hash[src][label]]
|
597
|
+
end
|
598
|
+
@graph_hash[src][label] << dest if !@graph_hash[src][label].include?(dest)
|
599
|
+
end
|
600
|
+
self
|
601
|
+
end
|
602
|
+
|
603
|
+
# Fail silently on deleting stuff that doesn't exist.
|
604
|
+
def delete_edge(src, label, dest)
|
605
|
+
return self if @graph_hash[src].nil?
|
606
|
+
return self if @graph_hash[src][label].nil?
|
607
|
+
@graph_hash[src][label].reject! {|node| node==dest}
|
608
|
+
if @graph_hash[src][label].empty?
|
609
|
+
@graph_hash[src].delete(label)
|
610
|
+
end
|
611
|
+
# !!! TODO: Add code to handle (delete) orphaned nodes.
|
612
|
+
self
|
613
|
+
end
|
614
|
+
|
615
|
+
def is_accept?(num)
|
616
|
+
@accept_states.include?(num)
|
617
|
+
end
|
618
|
+
end
|
619
|
+
end
|
@@ -0,0 +1,387 @@
|
|
1
|
+
module ThompsonConstruction
|
2
|
+
PENDING = 0;
|
3
|
+
#############################################################################
|
4
|
+
# Thompson-McNaughton-Yamada Construction Section
|
5
|
+
#############################################################################
|
6
|
+
def build_machine_stack(re)
|
7
|
+
skip = 0
|
8
|
+
escaped = false
|
9
|
+
machines = Array.new
|
10
|
+
(0...re.length).each do |ii| # the pointer in some cases.
|
11
|
+
(skip -= 1) && next if skip != 0 # Advance ptr until past () group
|
12
|
+
ch = re[ii] #re[-ii-1]
|
13
|
+
if escaped
|
14
|
+
case ch
|
15
|
+
when 'n'
|
16
|
+
machines.push([cat_machine("\n"), nil])
|
17
|
+
else
|
18
|
+
machines.push([cat_machine(ch), nil])
|
19
|
+
end
|
20
|
+
escaped = false
|
21
|
+
next
|
22
|
+
end
|
23
|
+
case(ch)
|
24
|
+
when '*' then machines.push([kleene_machine, [1,2]])
|
25
|
+
when '+' then machines.push([plus_machine, [1,2]])
|
26
|
+
#when '+' then machines.push([plus_machine, [[0,1],[1,1]]])
|
27
|
+
when '?' then machines.push([question_machine, [0,1]])
|
28
|
+
when '|' then machines.push([alt_machine, [1,2,3,4]])
|
29
|
+
when ']' then raise "mismatched bracket closed a non-open class"
|
30
|
+
when ')' then raise "mismatched paren closed a non-open group"
|
31
|
+
when '('# ; puts "#{ms}\tGRPOPEN\nencounted closing paren. following chars #{re[ii+1]}#{re[ii+2]}"
|
32
|
+
subexpression = ''
|
33
|
+
nesting = 0
|
34
|
+
until (ch2 = re[ii+=1]) == ')' && nesting == 0 # Until the next character is '('
|
35
|
+
nesting -= 1 if ch2 == ')'
|
36
|
+
nesting += 1 if ch2 == '('
|
37
|
+
subexpression << ch2
|
38
|
+
#skip += 1
|
39
|
+
end
|
40
|
+
#skip += 1
|
41
|
+
subgraph = re2nfa(subexpression)
|
42
|
+
skip = subexpression.length+1 # the +1 is used to skip the closing )
|
43
|
+
machines.push([subgraph, nil])
|
44
|
+
when '['
|
45
|
+
char_class = get_char_class(re[ii..-1]) # search rest of the string for []-expression
|
46
|
+
machines.push([cat_machine(/#{char_class}/), nil])
|
47
|
+
skip = char_class.length - 1 + char_class.scan(/\\/).length # compensate for 2 '\'s counting as 1
|
48
|
+
# The below skip assignment works if we want to allow for odd numbers of slashes, but it's
|
49
|
+
# not desirable, because it would allow [\n] to be [n].
|
50
|
+
# We're reserving \ for escaping *, +, ?, etc. symbols.
|
51
|
+
#skip = char_class.length - 1 +
|
52
|
+
# char_class.scan(/\\/).length*2 -
|
53
|
+
# char_class.scan(/\\\\/).length # compensate for 2 '\'s counting as 1
|
54
|
+
when '\\' #; escaped = true unless escaped== true
|
55
|
+
if escaped # '\\' -> cat a slash
|
56
|
+
machines.push([cat_machine(ch), nil])
|
57
|
+
escaped = false
|
58
|
+
else
|
59
|
+
escaped = true
|
60
|
+
end
|
61
|
+
else
|
62
|
+
machines.push([cat_machine(ch), nil])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
machines
|
66
|
+
end
|
67
|
+
|
68
|
+
def get_char_class(str)
|
69
|
+
escaped = false
|
70
|
+
result = ''
|
71
|
+
|
72
|
+
str.each_char.with_index do |ch,ii|
|
73
|
+
if escaped == false && ch == ']' # done reading current class
|
74
|
+
result += ch
|
75
|
+
return result
|
76
|
+
elsif escaped == true
|
77
|
+
result = result[0..-2]+ch
|
78
|
+
else
|
79
|
+
result += ch
|
80
|
+
end
|
81
|
+
escaped = (ch == '\\' && escaped==false)
|
82
|
+
end
|
83
|
+
raise 'character class improperly closed!'
|
84
|
+
end
|
85
|
+
|
86
|
+
def kleene_up(machines)
|
87
|
+
new_machines = Array.new
|
88
|
+
machines.each_with_index do |mach,ii|
|
89
|
+
if mach[1].nil? || mach[1].empty? # This machine is complete.
|
90
|
+
new_machines.push([mach[0],nil])
|
91
|
+
else
|
92
|
+
if mach[1].length == 2 # Deals with *, ?, and +, who all have same precedence
|
93
|
+
src, dest = mach[1].shift, mach[1].shift
|
94
|
+
#m = mach[0].lambda_replace_edge(src,PENDING,dest,new_machines.pop) # LAMBDA VERSION
|
95
|
+
m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION
|
96
|
+
new_machines.push([m,nil])
|
97
|
+
else # dealing with |
|
98
|
+
new_machines.push([mach[0],mach[1]])
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
new_machines
|
103
|
+
end
|
104
|
+
|
105
|
+
def catify(machines)
|
106
|
+
new_machines = Array.new
|
107
|
+
machines.each_with_index do |mach,ii|
|
108
|
+
if ii == 0
|
109
|
+
new_machines.push([mach[0],nil])
|
110
|
+
elsif (mach[1].nil? && machines[ii-1][1].nil?)
|
111
|
+
# This machine AND PREVIOUS are each a cat or finished */?/+
|
112
|
+
# This code is suspiciously similar to the wrap-up code of re2nfa()
|
113
|
+
# which implies that it's not DRY. This is something to revisit.
|
114
|
+
lead = new_machines.pop[0]
|
115
|
+
offset = lead.get_node_count-1
|
116
|
+
acc = lead.accept_states.keys.first || 0
|
117
|
+
lead.imp_attach_graph(acc,mach[0])
|
118
|
+
lead.accept_states.delete_if do |acc_st|
|
119
|
+
!mach[0].accept_states.keys.include?(acc_st-offset)
|
120
|
+
end
|
121
|
+
new_machines.push([lead,nil])
|
122
|
+
else
|
123
|
+
new_machines.push([mach[0],mach[1]])
|
124
|
+
end
|
125
|
+
end
|
126
|
+
new_machines
|
127
|
+
end
|
128
|
+
|
129
|
+
def handle_alternation(machines)
|
130
|
+
machines = absorb_left_alt(machines)
|
131
|
+
machines = absorb_right_alt(machines)
|
132
|
+
end
|
133
|
+
|
134
|
+
def absorb_left_alt(machines)
|
135
|
+
new_machines = Array.new
|
136
|
+
machines.each_with_index do |mach,ii|
|
137
|
+
if mach[1].nil? || mach[1].empty? # This machine is complete.
|
138
|
+
new_machines.push([mach[0],nil])
|
139
|
+
else
|
140
|
+
src, dest = mach[1].shift, mach[1].shift
|
141
|
+
m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION
|
142
|
+
new_machines.push([m,mach[1]])
|
143
|
+
end
|
144
|
+
end
|
145
|
+
new_machines
|
146
|
+
end
|
147
|
+
|
148
|
+
def absorb_right_alt(machines)
|
149
|
+
absorb_left_alt(machines.reverse).reverse
|
150
|
+
end
|
151
|
+
|
152
|
+
# This is a Thompson construction of a regular expression to a NFA.
|
153
|
+
# The machine stack is a series of 2-tuples. The first element of which
|
154
|
+
# is a small NFA, the second of which is a listing of the edges it needs
|
155
|
+
# to fill in by cannibalizing an adjacent NFA.
|
156
|
+
|
157
|
+
# mptr = machines.length - 1 # machine index pointer
|
158
|
+
# m = machines[mptr]
|
159
|
+
# * eats below IF below complete
|
160
|
+
# | eats above and below if they're complete
|
161
|
+
|
162
|
+
# make one pass forwards, completing all kleene stars and all alt LHSs
|
163
|
+
# make one pass backwards, completing all alt RHSs
|
164
|
+
# if any unfulfilled dependencies remain, my assumptions were mistaken
|
165
|
+
def re2nfa(re)
|
166
|
+
#puts "re2nfa: #{re}"
|
167
|
+
fsconstruct = FiniteStateMachine.new({:accept_states => {0=>'eh'},
|
168
|
+
:graph_hash => {0=>{PENDING=>[0]}}})
|
169
|
+
machines = build_machine_stack(re)
|
170
|
+
machines = kleene_up(machines)
|
171
|
+
machines = catify(machines)
|
172
|
+
machines = handle_alternation(machines)
|
173
|
+
|
174
|
+
#puts "New machines:"
|
175
|
+
machines.each_with_index do |mach,ii|
|
176
|
+
m = mach[0]
|
177
|
+
offset = fsconstruct.get_node_count-1
|
178
|
+
acc = fsconstruct.accept_states.keys.first || 0 # Attachment point is accept state
|
179
|
+
fsconstruct.imp_attach_graph(acc, m)
|
180
|
+
fsconstruct.accept_states.delete_if do |acc_st|
|
181
|
+
#puts "purging acc #{acc}" if !m.accept_states.keys.include?(acc-offset)
|
182
|
+
!m.accept_states.keys.include?(acc_st-offset)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
fsconstruct.delete_edge(0,PENDING,0)
|
187
|
+
#@graph_hash = fsconstruct.graph_hash
|
188
|
+
#@accept_states = fsconstruct.accept_states
|
189
|
+
FiniteStateMachine.new({
|
190
|
+
:graph_hash => fsconstruct.graph_hash,
|
191
|
+
:accept_states => fsconstruct.accept_states
|
192
|
+
})
|
193
|
+
end
|
194
|
+
|
195
|
+
def set_new_accept(node_number, type='end')
|
196
|
+
@accept_states = {node_number => 'end'}
|
197
|
+
end
|
198
|
+
|
199
|
+
def prepend_graph(fsm)
|
200
|
+
fsm.imp_attach_graph(fsm.accept_states.keys[0],self)
|
201
|
+
copy(fsm)
|
202
|
+
end
|
203
|
+
|
204
|
+
def cat_machine(ch)
|
205
|
+
FiniteStateMachine.new({
|
206
|
+
:accept_states => {1=>'end'},
|
207
|
+
:graph_hash => {0 => {ch => [1]}}
|
208
|
+
})
|
209
|
+
end
|
210
|
+
|
211
|
+
def question_machine
|
212
|
+
FiniteStateMachine.new({
|
213
|
+
:accept_states => {1=>'end'},
|
214
|
+
:graph_hash => {0 => {PENDING => [1], LAMBDA => [1]}}
|
215
|
+
#:accept_states => {3=>'end'},
|
216
|
+
#:graph_hash => {
|
217
|
+
# 0 => {LAMBDA => [1,3]},
|
218
|
+
# 1 => {PENDING => [2]},
|
219
|
+
# 2 => {LAMBDA => [3]}
|
220
|
+
#}
|
221
|
+
})
|
222
|
+
end
|
223
|
+
|
224
|
+
def alt_machine
|
225
|
+
FiniteStateMachine.new({
|
226
|
+
:accept_states => {5=>'end'},
|
227
|
+
:graph_hash => {
|
228
|
+
0 => {LAMBDA => [1,3]},
|
229
|
+
1 => {PENDING => [2]},
|
230
|
+
2 => {LAMBDA => [5]},
|
231
|
+
3 => {PENDING => [4]},
|
232
|
+
4 => {LAMBDA => [5]}
|
233
|
+
}
|
234
|
+
})
|
235
|
+
end
|
236
|
+
|
237
|
+
def kleene_machine
|
238
|
+
FiniteStateMachine.new({
|
239
|
+
:accept_states => {3=>'end'},
|
240
|
+
:graph_hash => {
|
241
|
+
0 => {LAMBDA => [1,3]},
|
242
|
+
1 => {PENDING => [2]},
|
243
|
+
2 => {LAMBDA => [1,3]}
|
244
|
+
}
|
245
|
+
})
|
246
|
+
end
|
247
|
+
|
248
|
+
def plus_machine
|
249
|
+
FiniteStateMachine.new({
|
250
|
+
:accept_states => {3=>'end'},
|
251
|
+
:graph_hash => {
|
252
|
+
0 => {LAMBDA => [1]},
|
253
|
+
1 => {PENDING => [2]},
|
254
|
+
2 => {LAMBDA => [1,3]}
|
255
|
+
}
|
256
|
+
# The below machine would be more concise, but we'd need to add in logic to replace TWO EDGES with one absorb.
|
257
|
+
#:accept_states => {1=>'end'},
|
258
|
+
#:graph_hash => {
|
259
|
+
# 0 => {PENDING => [1]},
|
260
|
+
# 1 => {PENDING => [1]}
|
261
|
+
#}
|
262
|
+
})
|
263
|
+
end
|
264
|
+
|
265
|
+
#############################################################################
|
266
|
+
# Misc functions primarily supporting re2nfa
|
267
|
+
#############################################################################
|
268
|
+
# graph edges going from origin become outgoing from src
|
269
|
+
# graph edges going TO final state instead go TO dest
|
270
|
+
# What is "final state?" Any accept state?
|
271
|
+
#@graph_hash[src][label].delete(dest)
|
272
|
+
def replace_edge(src, label, dest, graph)
|
273
|
+
raise "can't inject a graph that had no accept states" if graph.accept_states.nil? || graph.accept_states.empty?
|
274
|
+
if @graph_hash[src][label].class == Fixnum
|
275
|
+
@graph_hash[src][label] = [@graph_hash[src][label]]
|
276
|
+
end
|
277
|
+
|
278
|
+
offset = get_node_count-1
|
279
|
+
imp_attach_graph(src, graph)
|
280
|
+
|
281
|
+
#draw_graph('intermediate-self')
|
282
|
+
#graph.draw_graph('intermediate-graft')
|
283
|
+
|
284
|
+
# for each of the edges pointing at the accept state of the graph
|
285
|
+
# redirect them to point at dest
|
286
|
+
#draw_graph('retarget-pre')
|
287
|
+
graph.accept_states.keys.each do |acc|
|
288
|
+
retarget_edges(acc+offset,dest)
|
289
|
+
accept_states.delete(acc+offset)
|
290
|
+
end
|
291
|
+
delete_edge(src,label,dest)
|
292
|
+
|
293
|
+
renumber!
|
294
|
+
#draw_graph('retarget-post')
|
295
|
+
|
296
|
+
self
|
297
|
+
end
|
298
|
+
|
299
|
+
# ensure no gaps in our node names!
|
300
|
+
def renumber!
|
301
|
+
get_node_names.each_with_index do |n,ii|
|
302
|
+
if n != ii
|
303
|
+
retarget_edges(n,ii)
|
304
|
+
@accept_states[ii] = @accept_states.delete(n) unless @accept_states[n].nil?
|
305
|
+
@graph_hash[ii] = @graph_hash.delete(n)
|
306
|
+
end
|
307
|
+
end
|
308
|
+
self
|
309
|
+
end
|
310
|
+
|
311
|
+
# imp_attach_graph: increments fsm's node numbers by 1-CALLER.node_count
|
312
|
+
# takes edges outgoing from fsm.origin and adds them to attach_point
|
313
|
+
def imp_attach_graph(attach_point, fsm)
|
314
|
+
my_node_count = get_node_count
|
315
|
+
graft = fsm.clone
|
316
|
+
graft.increment_node_labels(my_node_count-1) # prevent collisions
|
317
|
+
|
318
|
+
graft_root_edges = graft.graph_hash.delete(graft.origin)
|
319
|
+
@graph_hash[attach_point] ||= Hash.new
|
320
|
+
@graph_hash[attach_point].merge!(graft_root_edges)
|
321
|
+
|
322
|
+
@accept_states.merge!(graft.accept_states)
|
323
|
+
@graph_hash.merge!(graft.graph_hash)
|
324
|
+
get_node_count
|
325
|
+
end
|
326
|
+
|
327
|
+
def retarget_edges(old_dest, new_dest)
|
328
|
+
@graph_hash.each_pair do |node,edge_hash|
|
329
|
+
edge_hash.each_pair do |label, dest|
|
330
|
+
if dest.include? old_dest
|
331
|
+
#puts "#{node}[#{label}] changed from #{dest} to #{new_dest}"
|
332
|
+
add_edge( node, label, new_dest)
|
333
|
+
delete_edge(node, label, old_dest)
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
self
|
338
|
+
end
|
339
|
+
|
340
|
+
def lambda_replace_edge(src, label, dest, graph)
|
341
|
+
if @graph_hash[src][label].class == Fixnum
|
342
|
+
@graph_hash[src][label] = [@graph_hash[src][label]]
|
343
|
+
end
|
344
|
+
#@graph_hash[src][label].delete(dest)
|
345
|
+
lambda_inject_graph(graph,src,dest)
|
346
|
+
delete_edge(src,label,dest)
|
347
|
+
self
|
348
|
+
end
|
349
|
+
|
350
|
+
def lambda_inject_graph(graph, src, dest)
|
351
|
+
old_node_count = get_node_count
|
352
|
+
lambda_attach_graph(src, graph)
|
353
|
+
graph.accept_states.keys.each {|k| add_edge(k+old_node_count, LAMBDA, dest)}
|
354
|
+
graph.accept_states.keys.each {|k| @accept_states.delete(k+old_node_count)}
|
355
|
+
self
|
356
|
+
end
|
357
|
+
|
358
|
+
def clone
|
359
|
+
Marshal.load( Marshal.dump(self) )
|
360
|
+
end
|
361
|
+
=begin
|
362
|
+
def add_edge(src, label, dest)
|
363
|
+
@graph_hash[src] = Hash.new if @graph_hash[src].nil?
|
364
|
+
if @graph_hash[src][label].nil?
|
365
|
+
@graph_hash[src][label] = [dest]
|
366
|
+
else
|
367
|
+
if @graph_hash[src][label].class != Array
|
368
|
+
@graph_hash[src][label] = [@graph_hash[src][label]]
|
369
|
+
end
|
370
|
+
@graph_hash[src][label] << dest if !@graph_hash[src][label].include?(dest)
|
371
|
+
end
|
372
|
+
self
|
373
|
+
end
|
374
|
+
|
375
|
+
# Fail silently on deleting stuff that doesn't exist.
|
376
|
+
def delete_edge(src, label, dest)
|
377
|
+
return self if @graph_hash[src].nil?
|
378
|
+
return self if @graph_hash[src][label].nil?
|
379
|
+
@graph_hash[src][label].reject! {|node| node==dest}
|
380
|
+
if @graph_hash[src][label].empty?
|
381
|
+
@graph_hash[src].delete(label)
|
382
|
+
end
|
383
|
+
# !!! may need to add something to delete orphaned nodes, here
|
384
|
+
self
|
385
|
+
end
|
386
|
+
=end
|
387
|
+
end
|
data/push.sh
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
git push git@github.com:hackingoff/scanner-generator
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "scanner_generator/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "scanner_generator"
|
7
|
+
s.version = ScannerGenerator::VERSION
|
8
|
+
s.authors = ["Hacking Off"]
|
9
|
+
s.email = ["source@hackingoff.com"]
|
10
|
+
s.homepage = "https://github.com/hackingoff/context-free-grammar"
|
11
|
+
s.summary = %q{Parser generation and CFG analysis.}
|
12
|
+
s.description = %q{Part of the compiler construction toolkit's guts.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "scanner_generator"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency "rspec", "~> 2.6"
|
22
|
+
s.add_development_dependency "awesome_print"
|
23
|
+
|
24
|
+
s.add_dependency "ruby-graphviz" # graph visualizations
|
25
|
+
|
26
|
+
# specify any dependencies here; for example:
|
27
|
+
# s.add_development_dependency "rspec"
|
28
|
+
# s.add_runtime_dependency "rest-client"
|
29
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'scanner_generator'
|
2
|
+
|
3
|
+
describe ScannerGenerator::FiniteStateMachine do
|
4
|
+
it "generates graphs" do
|
5
|
+
# obj.should eql(val)
|
6
|
+
end
|
7
|
+
|
8
|
+
it "has no epsilons/lambdas in DFAs" do
|
9
|
+
end
|
10
|
+
|
11
|
+
it "replaces edges successfully" do
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
it "handles edge cases" do
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Radar's example test from Foodie:
|
20
|
+
#it "anything else is delicious" do
|
21
|
+
#Foodie::Food.portray("Not Broccoli").should eql("Delicious!")
|
22
|
+
#end
|
23
|
+
|
24
|
+
=begin
|
25
|
+
# The below tests verify Thompson Construction (conversion from regular
|
26
|
+
# expressions to NFAs). The tests made output for human eyes.
|
27
|
+
|
28
|
+
# TODO: Verify the tests are all still satisfied correctly, then hard-code
|
29
|
+
# graphs and tables satisfying ".should eql()" invocation via RSpec.
|
30
|
+
|
31
|
+
# TODO: Track down the other tests.
|
32
|
+
|
33
|
+
# Non-RSpec test code follows.
|
34
|
+
#!/usr/bin/ruby
|
35
|
+
require '../../../cct/app/models/finite_state_machine.rb'
|
36
|
+
require "awesome_print"
|
37
|
+
|
38
|
+
def replace_edge_test
|
39
|
+
fsa = FiniteStateMachine.new({
|
40
|
+
:accept_states => {2=>'accm2', 3=>'accm3'},
|
41
|
+
:graph_hash => {
|
42
|
+
0 => {"LAMBDA"=>[1]},
|
43
|
+
1 => {"M2" => 2,
|
44
|
+
"M3" => 3},
|
45
|
+
2 => {"a" => 4 },
|
46
|
+
4 => {"b" => 3}
|
47
|
+
}
|
48
|
+
})
|
49
|
+
fsa2 = fsa.clone
|
50
|
+
alter = FiniteStateMachine.new({
|
51
|
+
:accept_states => {3=>'alt_end'},
|
52
|
+
:graph_hash => {
|
53
|
+
0 => {"LAMBDA" => [1,4]},
|
54
|
+
1 => {"M1" => 2},
|
55
|
+
2 => {"LAMBDA" => 3},
|
56
|
+
4 => {"M2" => 5},
|
57
|
+
5 => {"LAMBDA" => 3}
|
58
|
+
}
|
59
|
+
})
|
60
|
+
alt = FiniteStateMachine.new({
|
61
|
+
:accept_states => {2 => 'end'},
|
62
|
+
:graph_hash => {
|
63
|
+
0 => {'a' => [1]},
|
64
|
+
1 => {'b' => [2]}
|
65
|
+
}
|
66
|
+
})
|
67
|
+
fsa.draw_graph("before")
|
68
|
+
fsa.lambda_replace_edge(1,"M3",3, alt.clone)
|
69
|
+
fsa.draw_graph("lambda-after")
|
70
|
+
fsa2.replace_edge(1,"M3",3, alt.clone)
|
71
|
+
fsa2.draw_graph("after")
|
72
|
+
|
73
|
+
#malt = alt.clone
|
74
|
+
#malt.draw_graph('alt-before-imp-attach')
|
75
|
+
#malt.imp_attach_graph(2,alt)
|
76
|
+
#malt.imp_attach_graph(2,alter)
|
77
|
+
#malt.imp_attach_graph(7,alt)
|
78
|
+
#malt.draw_graph('alt-after-imp-attach')
|
79
|
+
end
|
80
|
+
|
81
|
+
def prepend_test
|
82
|
+
fsa = FiniteStateMachine.new({
|
83
|
+
:accept_states => {1=>'accept'},
|
84
|
+
:graph_hash => {0=>{"fuck"=>1}}
|
85
|
+
})
|
86
|
+
fsa.prepend_graph(fsa.kleene_machine)
|
87
|
+
fsa.prepend_graph(fsa.cat_machine('LAMBDA'))
|
88
|
+
fsa.draw_graph('prepend-test')
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
# Interesting notes:
|
94
|
+
# (a|b|ab)* can read aab with more than one parse tree.
|
95
|
+
|
96
|
+
def re2nfa_test
|
97
|
+
fsa = FiniteStateMachine.new({
|
98
|
+
:accept_states => {1=>'accept'},
|
99
|
+
:graph_hash => {0=>{"LAMBDA"=>1}}}
|
100
|
+
)
|
101
|
+
example = FiniteStateMachine.new({
|
102
|
+
:accept_states => {8=>"end"},
|
103
|
+
:graph_hash => {
|
104
|
+
0 => {"LAMBDA" => [1,3]},
|
105
|
+
1 => {"LAMBDA" => [4,6]},
|
106
|
+
2 => {"LAMBDA" => [1,3]},
|
107
|
+
3 => {"c" => 8},
|
108
|
+
4 => {"a" => 5},
|
109
|
+
5 => {"LAMBDA" => 2},
|
110
|
+
6 => {"b" => 7},
|
111
|
+
7 => {"LAMBDA" => 2}
|
112
|
+
},
|
113
|
+
:origin => 0
|
114
|
+
})
|
115
|
+
|
116
|
+
#s = '(a|b)*c|de*|f'
|
117
|
+
s = 'a|b(d*|(e|f)*)'
|
118
|
+
#s = 'a((b))'
|
119
|
+
# the following strings breaks it, in some way
|
120
|
+
# 'a*|b|cde**|k' causes lots of duplication
|
121
|
+
# see examples/2012-03-01_22-47-54_-0800-nfa.png
|
122
|
+
# shows a* OR b OR replace_me, followed by
|
123
|
+
# b | c
|
124
|
+
# followed by c
|
125
|
+
# '(a|b)*'
|
126
|
+
#s = '(a|b)*c'
|
127
|
+
puts "rendering regex: #{s}"
|
128
|
+
fsa = fsa.re2nfa(s)
|
129
|
+
fsa.draw_graph("draw_re2nfa")
|
130
|
+
puts 'bad nfa'
|
131
|
+
ap fsa
|
132
|
+
fsa.subsetify.draw_graph("draw_re2dfa")
|
133
|
+
example.draw_graph("ex_nfa")
|
134
|
+
puts 'good nfa'
|
135
|
+
ap example
|
136
|
+
example.subsetify.draw_graph("ex_dfa")
|
137
|
+
fsa.graph_hash.each_pair do |k,v|
|
138
|
+
if v != example.graph_hash[k]
|
139
|
+
puts "#{v} != #{example.graph_hash[k]}"
|
140
|
+
else
|
141
|
+
puts "same"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def re2nfa_ends_in_or_test
|
147
|
+
fsa = FiniteStateMachine.new({
|
148
|
+
:graph_hash=>{0 => {"1" => 1}},
|
149
|
+
:accept_states => {}
|
150
|
+
})
|
151
|
+
fsa = fsa.re2nfa("1|0")
|
152
|
+
fsa.add_edge(5,LAMBDA,6)
|
153
|
+
fsa.set_new_accept(5,nil)
|
154
|
+
fsa.set_new_accept(6,"ok")
|
155
|
+
fsa.draw_graph("end_test_nfa")
|
156
|
+
fsa.subsetify!
|
157
|
+
fsa.draw_graph("end_test_dfa")
|
158
|
+
end
|
159
|
+
|
160
|
+
re2nfa_test
|
161
|
+
#injection_test
|
162
|
+
#replace_edge_test
|
163
|
+
#prepend_test
|
164
|
+
#re2nfa_ends_in_or_test
|
165
|
+
|
166
|
+
=end
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scanner_generator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Hacking Off
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-26 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &11360640 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '2.6'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *11360640
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: awesome_print
|
27
|
+
requirement: &11360100 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *11360100
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: ruby-graphviz
|
38
|
+
requirement: &11359540 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *11359540
|
47
|
+
description: Part of the compiler construction toolkit's guts.
|
48
|
+
email:
|
49
|
+
- source@hackingoff.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .gitignore
|
55
|
+
- Gemfile
|
56
|
+
- README
|
57
|
+
- Rakefile
|
58
|
+
- lib/scanner_generator.rb
|
59
|
+
- lib/scanner_generator/finite_state_machine.rb
|
60
|
+
- lib/scanner_generator/thompson_construction.rb
|
61
|
+
- lib/scanner_generator/version.rb
|
62
|
+
- push.sh
|
63
|
+
- scanner_generator.gemspec
|
64
|
+
- spec/scanner_generator_spec.rb
|
65
|
+
homepage: https://github.com/hackingoff/context-free-grammar
|
66
|
+
licenses: []
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project: scanner_generator
|
85
|
+
rubygems_version: 1.8.15
|
86
|
+
signing_key:
|
87
|
+
specification_version: 3
|
88
|
+
summary: Parser generation and CFG analysis.
|
89
|
+
test_files: []
|