scanner_generator 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in scanner_generator.gemspec
4
+ gemspec
data/README ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,619 @@
1
+ require 'graphviz'
2
+ require 'set'
3
+
4
+ module ScannerGenerator
5
+ require File.dirname(__FILE__) + '/thompson_construction.rb'
6
+
7
+ LAMBDA = "LAMBDA"; SOURCE = 0; DEST = 1; LABEL = 2;
8
+ ERROR = 0; MACHINE_ACCEPT = 1; HALT_RETURN = 2 # Action Table codes [E, MA, HR]
9
+ ACC = 3; WIDTH = 3
10
+
11
+ # True if the needle (subset) is found in the haystack (superset).
12
+ def subset(needle,haystack)
13
+ a = needle.sort
14
+ b = haystack.sort
15
+ ii = 0
16
+ jj = 0
17
+ a_last_index = a.length-1
18
+ b_last_index = b.length-1
19
+ loop do
20
+ if(a[ii]==b[jj])
21
+ return true if(ii==a_last_index)
22
+ ii+=1
23
+ jj+=1
24
+ elsif(a[ii] > b[jj])
25
+ return false if(jj>=b_last_index)
26
+ jj+= 1
27
+ else # a[ii] < b[jj]
28
+ return false
29
+ end
30
+ end
31
+ end
32
+
33
+ # is needle contained in a haystack?
34
+ def subset_of_list_element?(needle,list_of_haystacks)
35
+ list_of_haystacks.each{|haystack| return true if subset(needle,haystack)}
36
+ return false
37
+ end
38
+
39
+ class FiniteStateMachine
40
+ include ThompsonConstruction
41
+
42
+ attr_reader :graph_hash, :accept_states, :origin
43
+ attr_accessor :labels
44
+
45
+ # Must set @accept_states, @edges, @edge_labels, @node_labels, @graph_hash
46
+ # edge/node labels are derived from @graph_hash
47
+ def initialize(input)
48
+ raise "Bunk input" if input[:accept_states].nil? || input[:graph_hash].nil?
49
+ @accept_states = input[:accept_states]
50
+ @graph_hash = input[:graph_hash]
51
+ @origin = input[:origin] || 0
52
+ @labels = input[:labels] || {}
53
+ @edge_labels = get_edge_labels
54
+ @rankdir = input[:rankdir] || "TB" # TB is top-to-bottom; LR is left-to-right
55
+ self
56
+ end
57
+
58
+ def copy(graph)
59
+ @accept_states = graph.accept_states
60
+ @graph_hash = graph.graph_hash
61
+ @origin = graph.origin
62
+ @edge_labels = get_edge_labels
63
+ self
64
+ end
65
+
66
+ # Regex keys specify that any matching edge labels transition to the dest
67
+ # node.
68
+ # Example:
69
+ # Suppose our language = {a,b,1,\n,*,/}.
70
+ # An edge labeled "[^\n\*\/]" matches anything but newline, *, or /.
71
+ # This function replaces that edge with multiple edges from the
72
+ # language's alphabet. In this case, the [^\n\*\/] edge gets replaced by
73
+ # 3 edges: an "a" edge, a "b" edge, & a "1" edge.
74
+ # NOTE: Invoke this AFTER drawing dfa, but BEFORE dumping the module.
75
+ def expand_regex_edges
76
+ #puts "Expanding regex edges..."
77
+ @graph_hash.each_pair do |source, edge_dest_hash| # state, hash(edge=>state)
78
+ new_edges_for_same_destination = Hash.new
79
+ #puts "before: @graph_hash[#{source}] #{@graph_hash[source]}" if
80
+ edge_dest_hash.each_pair do |regex_edge, dest| # e.g. /[^\n] => 98
81
+ next if regex_edge.class != Regexp
82
+ #puts "before: @graph_hash[#{source}][#{edge}] = #{@graph_hash[source][edge]}"
83
+
84
+ for label in @edge_labels
85
+ if label.class == String && label.match(regex_edge)
86
+ # unless clause prevents sloppy regex from overwriting other edges
87
+ new_edges_for_same_destination[label] = dest unless @graph_hash[source].key?(label)
88
+ # new_edges_for_same_destination[label] ||= dest
89
+ end
90
+ end
91
+ @graph_hash[source].delete(regex_edge) # remove old regex edge
92
+ #puts "after: @graph_hash[#{source}][#{edge}] = #{@graph_hash[source][edge].class}"
93
+ end
94
+ #puts @new_edges_for_same_destination.to_s
95
+ unless new_edges_for_same_destination.empty?
96
+ @graph_hash[source].merge!(new_edges_for_same_destination)
97
+ #puts "after: @graph_hash[#{source}] #{@graph_hash[source]}"
98
+ end
99
+ end
100
+ self
101
+ end
102
+
103
+ def get_node_names
104
+ names = @graph_hash.keys # Ensure source nodes represented.
105
+ @graph_hash.each_pair do |source_node, sub_hash|
106
+ names << source_node
107
+ names << sub_hash.values
108
+ end
109
+ #names.flatten.map {|n| n.to_s}.uniq.sort
110
+ names.flatten.uniq.sort
111
+ end
112
+
113
+ def get_edge_labels
114
+ aggregate_keys = []
115
+ @graph_hash.values.each {|sub_hash| aggregate_keys << sub_hash.keys }
116
+ aggregate_keys.flatten.uniq
117
+ end
118
+
119
+ def subsetify(start_node = @origin)
120
+ new_graph_hash = {}
121
+ new_accept_states = {}
122
+ new_labels = {}
123
+
124
+ states = [closure_of(start_node)] # if passed a start node as an int, this will fail without .to_s
125
+
126
+ edge_labels = get_edge_labels
127
+
128
+ states.each do |state|
129
+ new_graph_hash[state] = {}
130
+ edge_labels.each do |label|
131
+ next if label == LAMBDA
132
+ closures_via_label = []
133
+
134
+ state.each do |node|
135
+ next if (@graph_hash[node].nil? || @graph_hash[node][label].nil?)
136
+ found_closure = closure_of(@graph_hash[node][label])
137
+ closures_via_label << found_closure if !closures_via_label.include?(found_closure)
138
+ end
139
+
140
+ next if closures_via_label == []
141
+ closures_via_label.flatten!
142
+ new_graph_hash[state][label] = closures_via_label
143
+ states << closures_via_label unless states.include?(closures_via_label)
144
+ end
145
+
146
+ new_accept_states[state] = accept_state_of(state) if accept_state_of(state) != false
147
+ new_graph_hash.delete(state) if new_graph_hash[state] == {}
148
+ #new_labels[state]
149
+ end
150
+
151
+ #puts "New graph shit:"
152
+ #ap new_graph_hash
153
+ #ap new_accept_states
154
+ #ap new_labels
155
+
156
+ # THIS IS THE NEW PART FOR LABELS
157
+ states.each do |state|
158
+ label = ""
159
+ state.each do |substate|
160
+ label << @labels[substate] + "\n" unless @labels[substate].nil? || label.include?(@labels[substate])
161
+ end
162
+ new_labels[state] = label.chomp unless label == ""
163
+ end
164
+ # END NEW PART FOR LABELS
165
+
166
+ return FiniteStateMachine.new({
167
+ :graph_hash=>new_graph_hash,
168
+ :accept_states=>new_accept_states,
169
+ :labels => new_labels}
170
+ ).beautify
171
+ end
172
+
173
+ def subsetify!(start_node_label = 0)
174
+ dfa = subsetify(start_node_label)
175
+ @graph_hash,@accept_states,@labels = dfa.graph_hash, dfa.accept_states, dfa.labels
176
+ return self
177
+ end
178
+
179
+ def draw_graph(filename = "output", svgname = :Finite_Automata_Graph, shape = "circle", path = nil)
180
+ graph = GraphViz::new(:Finite_Automata_Graph)
181
+ graph[:rankdir] = @rankdir
182
+ # !!! going to have to check of @labels[node_num] (label for node # node_num exists and specify it with :label => @label[node_num] when present)
183
+ get_node_names.each do |node|
184
+ label = @labels[node] || node.to_s #((@labels[node].nil?) ? node.to_s : @labels[node])
185
+ is_accept = @accept_states.include?(node)
186
+ graph.add_nodes(node.to_s,
187
+ :shape => shape,
188
+ :label => label,
189
+ :peripheries => ((is_accept) ? 2 : 1),
190
+ :color => ((is_accept && shape == "Mrecord") ? "#66DD66" : "#000000"))
191
+ end
192
+
193
+ @graph_hash.each_pair do |source_label,sub_hash|
194
+ sub_hash.each_pair do |edge_label,destination_nodes|
195
+ [destination_nodes].flatten.each do |dest_label| # ensure d_n is 1-d array
196
+ source_node = graph.get_node(source_label.to_s)
197
+ dest_node = graph.get_node(dest_label.to_s)
198
+ graph.add_edges(source_node, dest_node, :label => label_friendly(edge_label).gsub('\\','\\\\\\\\'))
199
+ end
200
+ end
201
+ end
202
+
203
+ if path
204
+ graph.output(:svg => "#{filename}.svg", :path => path)
205
+ else
206
+ graph.output(:svg => "#{filename}.svg")
207
+ end
208
+ end
209
+
210
+ def draw_state_labeled_graph(filename = "output", svgname = :Finite_Automata_Graph, shape = "circle", path = nil)
211
+ labels = @labels.dup
212
+ # modify the labels
213
+ @labels.each_with_index do |label, ii|
214
+ lines = ""
215
+ #ap label
216
+ label[1].each_line {|i| lines << "<tr><td align=\"left\">#{i}</td></tr>"}
217
+ table_border_color = (@accept_states.include?(ii)) ? "#448844" : "#ffffff"
218
+ heading = "State #{ii}"
219
+ @labels[ii] = '<<table color="'+table_border_color+'" style="ROUNDED" border="1" cellborder="0" cellpadding="5"><tr><td align="center" colspan="1"><font color="#666666" point-size="8">'+heading+'</font></td></tr>'+lines+'</table>>'
220
+ end
221
+ result = draw_graph(filename, svgname, shape, path)
222
+ @labels = labels
223
+ result
224
+ end
225
+
226
+ # adapted from class notes
227
+ def closure_of(node_label)
228
+ closure = [node_label].flatten
229
+ changed = true
230
+ while (changed == true)
231
+ changed = false
232
+ closure.each do |node|
233
+ # if there is m not already in C and n->lambda->m then add m to c
234
+ if(!@graph_hash[node].nil? && !@graph_hash[node][LAMBDA].nil?)
235
+ lambda_reachables = [@graph_hash[node][LAMBDA]].flatten
236
+ lambda_reachables.each do |l_node|
237
+ if !closure.include?(l_node)
238
+ closure << l_node
239
+ changed = true
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
245
+
246
+ return closure #.flatten
247
+ end
248
+
249
+ # returns true if the any of the closure's states are included in the set
250
+ # of accept_states
251
+ def accept_state_of(closure)
252
+ closure.each do |set|
253
+ if @accept_states.include?(set)
254
+ return @accept_states[set] # change this to "true" if reverting to crappy system
255
+ end
256
+ end
257
+ return false
258
+ end
259
+
260
+ # numbers sets and makes them the new keys (cleans up the graph_hash's keys)
261
+ def beautify
262
+ clean_hash, clean_accept_states, pretty, new_labels = {}, {}, {}, {}
263
+
264
+ # Number our closures.
265
+ i = -1
266
+ @graph_hash.each_pair do |key,subhash|
267
+ pretty[key] = i+= 1 if pretty[key].nil?
268
+ subhash.values.each {|subval| pretty[subval]=i+=1 if pretty[subval].nil?}
269
+ end
270
+
271
+ # Replace instances of old closure names with their new closure-numbers.
272
+ @graph_hash.keys.each do |old_key|
273
+ new_key = pretty[old_key]
274
+ clean_hash[new_key] = Hash.new
275
+ @graph_hash[old_key].each_pair do |subkey, subval| # subkey is edge label
276
+ clean_hash[new_key][subkey] = pretty[subval]
277
+ end
278
+ end
279
+
280
+ @accept_states.each_pair do |state, acc_type|
281
+ clean_accept_states[pretty[state]] = acc_type
282
+ end
283
+
284
+ @labels.each_pair do |state, label|
285
+ new_labels[pretty[state]] = @labels[state]
286
+ end # Be sure to bring labels along.
287
+
288
+ FiniteStateMachine.new({
289
+ :graph_hash => clean_hash,
290
+ :accept_states => clean_accept_states,
291
+ :labels => new_labels
292
+ })
293
+ end
294
+
295
+ def generate_initialize
296
+ return "def initialize\n" +
297
+ "#{" "*indent_width}#{lookup_code_string}\n" + # array of edge labels
298
+ "#{" "*indent_width}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
299
+ dump_table(:state) + "\n" +
300
+ dump_table(:action) + "\n" +
301
+ dump_table(:lookup) + "\n" +
302
+ "#{ind(1)}end"
303
+ end
304
+
305
+ # Module Dumping
306
+ def generate_module(name = 'ScannerModule', indent_width = 2)
307
+ expand_regex_edges
308
+ return "module #{name}\n def initialize\n" +
309
+ "#{ind(1)}#{lookup_code_string}\n" + # array of edge labels
310
+ "#{ind(1)}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
311
+ dump_table(:state) + "\n" +
312
+ dump_table(:action) + "\n" +
313
+ dump_table(:lookup) + "\n" +
314
+ "#{ind(2)}super\n" +
315
+ "#{ind(1)}end\nend"
316
+ end
317
+
318
+ # This horrific kluge ports the ruby dump_tables to Javascript. Sorta.
319
+ # Smelly code, but output passes JSLint and is the path of least resistance.
320
+ #
321
+ # TODO: Write something that generates the tables as ruby objects, then
322
+ # refactor these table dumping functions, using array.to_s
323
+ def js_tables(name = 'ScannerModule', indent_width = 2)
324
+ expand_regex_edges
325
+ replacements = {
326
+ ' # E' => '// E', # Action table's label
327
+ ' # ' => '// ', # table-leading comments
328
+ '# ' => ' // ', # row-trailing descriptions
329
+ '[[' => ' [', # first row of table
330
+ '@state_table = ' => 'SCANNER.state_table = [',
331
+ '@action_table = ' => 'SCANNER.action_table = [',
332
+ '@lookup_table = ' => 'SCANNER.lookup_table = [',
333
+ '@lookup_codes' => 'SCANNER.lookup_codes',
334
+ '@label_codes' => 'SCANNER.label_codes',
335
+ ' [' => ' [', # linty js indent of 4
336
+ ']] ' => ']];', # semicolon ending tbales
337
+ ':other' => '"other"', # :other symbol.to_s
338
+ '"=>' => '" : ' # javascript hash notation
339
+ }
340
+ s = "var SCANNER = {};\n" +
341
+ dump_table(:state, 0, 0) + "\n" +
342
+ dump_table(:action, 0, 0) + "\n" +
343
+ dump_table(:lookup, 0, 0) + "\n" +
344
+ "#{ind(0)}#{lookup_code_string};\n" + # array of edge labels
345
+ "#{ind(0)}#{label_code_string};\n" # hash mapping accept states to the type accepted by them
346
+ replacements.each_pair{|k,v| s.gsub!(k,v)}
347
+ s
348
+ end
349
+
350
+ def generate_scanner(indent_width = 2)
351
+ expand_regex_edges
352
+
353
+ scanner_function =<<-'END_SCANNER'
354
+ def scan(input)
355
+ @token = ""
356
+ @state = 0
357
+ @buffered = false
358
+ results = Array.new
359
+
360
+ input.each_char do |ch|
361
+ current_read = case ch # Map chars onto char-classes by editing case/when
362
+ when /[a-zA-Z]/ then @label_codes["L"]
363
+ when /[0-9]/ then @label_codes["D"]
364
+ else @label_codes[ch] || @label_codes[:other]
365
+ end
366
+ if((@action_table[@state][current_read]==1) && (@state_table[@state][current_read] != -1))
367
+ @buffered = false # action=MA (Machine-Accept) (=1). Append char to token.
368
+ @token += ch unless ch[/\s/] && @label_codes[ch].nil? # Uncomment if recognizing some whitespace.
369
+ @state=@state_table[@state][current_read]
370
+ elsif((@state_table[@state][current_read]==-1) && (@action_table[@state][current_read]==2))
371
+ @buffered = true # action=HR (Halt-Return) (=2). Accept current token.
372
+ results.push [@lookup_codes[@lookup_table[@state][current_read]],@token]
373
+ @state = 0
374
+ @token = ""
375
+ else # ? Hitting this block indicates action=ERR (ERROR) (=3)
376
+ next
377
+ end
378
+ redo if(@buffered==true && current_read!=@label_codes[:other]) # repeat w/o advancing to next char
379
+ end
380
+ results
381
+ end
382
+
383
+ # Appends a newline to the file in case of its absence, to ensure
384
+ # the presence of terminating whitespace. Convert Windows newlines
385
+ # to UNIX style ones.
386
+ def scan_file(filename = "test_file.txt")
387
+ scan((File.open(filename, "r").read+"\n").gsub("\r\n","\n"))
388
+ end
389
+ END_SCANNER
390
+ return "class Scanner\n" +
391
+ "#{ind(1)}def initialize\n" +
392
+ "#{ind(2)}#{lookup_code_string}\n" + # array of edge labels
393
+ "#{ind(2)}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
394
+ dump_table(:state, 2,2) + "\n" + # Note: the 1s should be 2s, but dump_table's results seem
395
+ dump_table(:action, 2,2) + "\n" + # to mysteriously have an extra leading two spaces. Can't
396
+ dump_table(:lookup, 2,2) + "\n" + # for the life of me figure out how or why.
397
+ "#{ind(1)}end\n\n" +
398
+ scanner_function +
399
+ "\nend"
400
+ end
401
+
402
+ # Module Dumping
403
+ def dump_module(name, indent_width = 2)
404
+ #return generate_module if filename == "" || filename.nil?
405
+ filename = underscore(name)
406
+ file = File.open("./modules/#{filename}.rb", "w")
407
+ "Successfully wrote #{file.write(generate_module)} characters to #{filename}.rb"
408
+ end
409
+
410
+ def friendly_edge_labels
411
+ # Convert whitespace line "\n" into strings describing their contents.
412
+ get_edge_labels.collect do |label|
413
+ (!label[/\s/].nil?) ? label.inspect[1..-2] : label
414
+ end
415
+ end
416
+
417
+ def label_friendly(label)
418
+ #puts "label: '#{label}' (#{label.class})"
419
+ if label.class == Fixnum
420
+ return label.to_s
421
+ elsif label.class == Regexp
422
+ return ('/' + label.to_s[7..-2] + '/') # replace each slash with 2 slashes.
423
+ elsif label == LAMBDA || label.to_s == "LAMBDA" || label.to_s == "EPSILON" || label.to_s.empty? # http://stackoverflow.com/questions/9684807/how-can-one-insert-a-mathematical-greek-etc-symbol-in-dot-file
424
+ '&#949;' # epsilon-lower is 949
425
+ else
426
+ return ((!label[/\s/].nil?) ? label.inspect[1..-2] : label)
427
+ end
428
+ end
429
+
430
+
431
+ def ind(level, width=2)
432
+ return " "*(level*width)
433
+ end
434
+
435
+ # converts a CamelCased name to k_and_r style for filename
436
+ def underscore(name)
437
+ s = name[0].downcase
438
+ name[1,name.length].each_char do |ch|
439
+ s += (ch.match(/[^A-Z]/) ? ch : "_"+ch.downcase)
440
+ end
441
+ return s
442
+ end
443
+
444
+
445
+ def lookup_codes
446
+ (["!accept"] | @accept_states.values)
447
+ end
448
+ # Array unions: fuck yeah.
449
+ def lookup_code_string
450
+ "@lookup_codes = #{lookup_codes.to_s}"
451
+ end
452
+
453
+ def label_code_string
454
+ h = {}
455
+ get_edge_labels.each_with_index{|label,ii| h[label]=ii } # !!! DANGER
456
+ "@label_codes = #{h.to_s[0..-2]}, :other=>#{get_edge_labels.length}\}"
457
+ end
458
+
459
+ # Graph Attachment
460
+ def increment_node_labels(amount)
461
+ new_hash, new_accepts = Hash.new, Hash.new
462
+
463
+ @graph_hash.each_pair do |key,subhash|
464
+ new_subhash = Hash.new
465
+ subhash.each_pair do |subkey,value|
466
+ if value.class == Fixnum
467
+ new_subhash[subkey] = value+amount
468
+ elsif value.class == Array
469
+ new_subhash[subkey] = value.map {|n| n+amount}
470
+ else
471
+ raise "value (#{value}) is a #{value.class}!"
472
+ end
473
+
474
+ end
475
+ new_hash[key+amount] = new_subhash
476
+ end
477
+
478
+ @accept_states.keys.each{|key| new_accepts[key+amount] = @accept_states[key]}
479
+ @graph_hash, @accept_states = new_hash, new_accepts
480
+ @origin += amount
481
+ end
482
+
483
+ def get_node_count; get_node_names.size; end
484
+
485
+ # considerations:
486
+ # do we need a flag for when we have to strip an attach point of being an accept state?
487
+ def attach_graph(attach_point, fsm)
488
+ node_count = get_node_count
489
+ raise "#{attach_point} out of graph bounds." if attach_point >= node_count
490
+ raise "going to break everything by attaching to myself!" if fsm == self
491
+ #dfa = fsm.subsetify #.subsetify
492
+ dfa = fsm # Before, we were subsetifying
493
+ dfa.increment_node_labels(node_count)
494
+ #@graph_hash[attach_point] = {LAMBDA => dfa.origin} # THIS IS OUR CULPRIT!
495
+ #puts "before #{@graph_hash[attach_point]}"
496
+
497
+ #if (@graph_hash[attach_point]!=nil)
498
+ # lambdas = [@graph_hash[attach_point][LAMBDA]] || [] # this is an array!
499
+ # lambdas << dfa.origin
500
+ # lambdas = lambdas.flatten.find{|entry| !entry.nil?}
501
+ # @graph_hash[attach_point][LAMBDA] = lambdas
502
+ #else
503
+ # @graph_hash[attach_point] = {LAMBDA => dfa.origin}
504
+ #end
505
+
506
+ # if attach point was on the graph w/o outgoing edges
507
+ @graph_hash[attach_point] = Hash.new if @graph_hash[attach_point].nil?
508
+
509
+ if @graph_hash[attach_point][LAMBDA].nil?
510
+ @graph_hash[attach_point][LAMBDA] = [dfa.origin]
511
+ else # attach point already has outgoing lambda edges
512
+ @graph_hash[attach_point][LAMBDA] << dfa.origin
513
+ end
514
+
515
+ #@graph_hash[attach_point]["foo"] = lambdas
516
+ #puts "after #{@graph_hash[attach_point]}"
517
+ #puts "@gh = #{graph_hash}\ndfah = #{dfa.graph_hash}"
518
+ #puts "merged: #{@graph_hash.merge(dfa.graph_hash)}"
519
+ @graph_hash.merge!(dfa.graph_hash)
520
+ #@graph_hash.merge!({4=>{"L"=>21}})
521
+ @accept_states.merge!(dfa.accept_states)
522
+ #subsetify!
523
+ get_node_count
524
+ end
525
+
526
+ # Dumps either a state table, an action table, or a lookup table
527
+ # This function is kind of half-refactored with dump_module and needs cleaning
528
+ # like the wizard needs food.
529
+ def dump_table(type = :state, indent_width = 2, indent_level = 2)
530
+ # edge_labels = friendly_edge_labels << " Other" # I suspect this line is ruining the code.
531
+ edge_labels = get_edge_labels << " Other"
532
+ node_names = get_node_names
533
+
534
+ s = "#{ind(indent_level)}@#{type}_table = " +
535
+ ((type == :action) ? "\n#{ind(indent_level+1)}\# ERROR = 0; MACHINE_ACCEPT = 1; HALT_RETURN = 2" : "") +
536
+ "\n#{ind(indent_level+1)}#"
537
+ edge_labels.each do |label|
538
+ s += sprintf("%#{WIDTH+1}s", label_friendly(label))
539
+ end
540
+ s += "\n#{ind(indent_level+1)}"
541
+
542
+ node_names.each_with_index do |node,ii|
543
+ on_last_node = (ii == node_names.size-1)
544
+ is_accept = !@accept_states[node].nil?
545
+ s += ((ii==0) ? "[" : " ") + "["
546
+
547
+ edge_labels.each_with_index do |edge,jj|
548
+ on_last_edge = (jj == edge_labels.size-1)
549
+ if(@graph_hash[node].nil?||
550
+ @graph_hash[node][edge].nil?||@graph_hash[node][edge][0].nil?)
551
+ sdest = "-1"
552
+ adest = ((is_accept) ? HALT_RETURN.to_s : ERROR.to_s)
553
+ if(!accept_states[node].nil?)
554
+ ldest = ((is_accept) ? (lookup_codes.find_index(accept_states[node]).to_i).to_s : "0")
555
+ else
556
+ ldest = "0"
557
+ end
558
+ else
559
+ sdest = graph_hash[node][edge].to_s
560
+ adest = MACHINE_ACCEPT.to_s # MA if NON-ACCEPT state
561
+ ldest = "0"
562
+ end
563
+ case type
564
+ when :state
565
+ s += sprintf("%#{WIDTH}s", sdest) +
566
+ ((!on_last_edge) ? "," \
567
+ : "]" + ((!on_last_node) ? "," \
568
+ : "]" ) + " \# #{node}#{(is_accept ? " ACCEPT":"")}\n#{ind(indent_level+1)}")
569
+ when :action
570
+ s += sprintf("%#{WIDTH}s", adest) +
571
+ (!on_last_edge ? "," \
572
+ : "]" + (!on_last_node ? "," \
573
+ : "]" ) + " \# #{node}#{(is_accept ? " ACCEPT" : "")}\n#{ind(indent_level+1)}")
574
+ when :lookup
575
+ s += sprintf("%#{WIDTH}s", ldest) +
576
+ (!on_last_edge ? "," \
577
+ : "]" + (!on_last_node ? "," \
578
+ : "]" ) + " \# #{node}#{(is_accept ? " #{@accept_states[node]}" : "")}\n#{ind(indent_level+1)}")
579
+ end
580
+ end
581
+ end
582
+ s.rstrip
583
+ end
584
+
585
+ # Clobbers the old accept type, if any was present.
586
+ def add_accept_state(state, type)
587
+ @accept_states[state] = type
588
+ end
589
+
590
+ def add_edge(src, label, dest)
591
+ @graph_hash[src] = Hash.new if @graph_hash[src].nil?
592
+ if @graph_hash[src][label].nil?
593
+ @graph_hash[src][label] = [dest]
594
+ else
595
+ if @graph_hash[src][label].class != Array
596
+ @graph_hash[src][label] = [@graph_hash[src][label]]
597
+ end
598
+ @graph_hash[src][label] << dest if !@graph_hash[src][label].include?(dest)
599
+ end
600
+ self
601
+ end
602
+
603
+ # Fail silently on deleting stuff that doesn't exist.
604
+ def delete_edge(src, label, dest)
605
+ return self if @graph_hash[src].nil?
606
+ return self if @graph_hash[src][label].nil?
607
+ @graph_hash[src][label].reject! {|node| node==dest}
608
+ if @graph_hash[src][label].empty?
609
+ @graph_hash[src].delete(label)
610
+ end
611
+ # !!! TODO: Add code to handle (delete) orphaned nodes.
612
+ self
613
+ end
614
+
615
+ def is_accept?(num)
616
+ @accept_states.include?(num)
617
+ end
618
+ end
619
+ end
@@ -0,0 +1,387 @@
1
+ module ThompsonConstruction
2
+ PENDING = 0;
3
+ #############################################################################
4
+ # Thompson-McNaughton-Yamada Construction Section
5
+ #############################################################################
6
+ def build_machine_stack(re)
7
+ skip = 0
8
+ escaped = false
9
+ machines = Array.new
10
+ (0...re.length).each do |ii| # the pointer in some cases.
11
+ (skip -= 1) && next if skip != 0 # Advance ptr until past () group
12
+ ch = re[ii] #re[-ii-1]
13
+ if escaped
14
+ case ch
15
+ when 'n'
16
+ machines.push([cat_machine("\n"), nil])
17
+ else
18
+ machines.push([cat_machine(ch), nil])
19
+ end
20
+ escaped = false
21
+ next
22
+ end
23
+ case(ch)
24
+ when '*' then machines.push([kleene_machine, [1,2]])
25
+ when '+' then machines.push([plus_machine, [1,2]])
26
+ #when '+' then machines.push([plus_machine, [[0,1],[1,1]]])
27
+ when '?' then machines.push([question_machine, [0,1]])
28
+ when '|' then machines.push([alt_machine, [1,2,3,4]])
29
+ when ']' then raise "mismatched bracket closed a non-open class"
30
+ when ')' then raise "mismatched paren closed a non-open group"
31
+ when '('# ; puts "#{ms}\tGRPOPEN\nencounted closing paren. following chars #{re[ii+1]}#{re[ii+2]}"
32
+ subexpression = ''
33
+ nesting = 0
34
+ until (ch2 = re[ii+=1]) == ')' && nesting == 0 # Until the next character is '('
35
+ nesting -= 1 if ch2 == ')'
36
+ nesting += 1 if ch2 == '('
37
+ subexpression << ch2
38
+ #skip += 1
39
+ end
40
+ #skip += 1
41
+ subgraph = re2nfa(subexpression)
42
+ skip = subexpression.length+1 # the +1 is used to skip the closing )
43
+ machines.push([subgraph, nil])
44
+ when '['
45
+ char_class = get_char_class(re[ii..-1]) # search rest of the string for []-expression
46
+ machines.push([cat_machine(/#{char_class}/), nil])
47
+ skip = char_class.length - 1 + char_class.scan(/\\/).length # compensate for 2 '\'s counting as 1
48
+ # The below skip assignment works if we want to allow for odd numbers of slashes, but it's
49
+ # not desirable, because it would allow [\n] to be [n].
50
+ # We're reserving \ for escaping *, +, ?, etc. symbols.
51
+ #skip = char_class.length - 1 +
52
+ # char_class.scan(/\\/).length*2 -
53
+ # char_class.scan(/\\\\/).length # compensate for 2 '\'s counting as 1
54
+ when '\\' #; escaped = true unless escaped== true
55
+ if escaped # '\\' -> cat a slash
56
+ machines.push([cat_machine(ch), nil])
57
+ escaped = false
58
+ else
59
+ escaped = true
60
+ end
61
+ else
62
+ machines.push([cat_machine(ch), nil])
63
+ end
64
+ end
65
+ machines
66
+ end
67
+
68
+ def get_char_class(str)
69
+ escaped = false
70
+ result = ''
71
+
72
+ str.each_char.with_index do |ch,ii|
73
+ if escaped == false && ch == ']' # done reading current class
74
+ result += ch
75
+ return result
76
+ elsif escaped == true
77
+ result = result[0..-2]+ch
78
+ else
79
+ result += ch
80
+ end
81
+ escaped = (ch == '\\' && escaped==false)
82
+ end
83
+ raise 'character class improperly closed!'
84
+ end
85
+
86
+ def kleene_up(machines)
87
+ new_machines = Array.new
88
+ machines.each_with_index do |mach,ii|
89
+ if mach[1].nil? || mach[1].empty? # This machine is complete.
90
+ new_machines.push([mach[0],nil])
91
+ else
92
+ if mach[1].length == 2 # Deals with *, ?, and +, who all have same precedence
93
+ src, dest = mach[1].shift, mach[1].shift
94
+ #m = mach[0].lambda_replace_edge(src,PENDING,dest,new_machines.pop) # LAMBDA VERSION
95
+ m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION
96
+ new_machines.push([m,nil])
97
+ else # dealing with |
98
+ new_machines.push([mach[0],mach[1]])
99
+ end
100
+ end
101
+ end
102
+ new_machines
103
+ end
104
+
105
+ def catify(machines)
106
+ new_machines = Array.new
107
+ machines.each_with_index do |mach,ii|
108
+ if ii == 0
109
+ new_machines.push([mach[0],nil])
110
+ elsif (mach[1].nil? && machines[ii-1][1].nil?)
111
+ # This machine AND PREVIOUS are each a cat or finished */?/+
112
+ # This code is suspiciously similar to the wrap-up code of re2nfa()
113
+ # which implies that it's not DRY. This is something to revisit.
114
+ lead = new_machines.pop[0]
115
+ offset = lead.get_node_count-1
116
+ acc = lead.accept_states.keys.first || 0
117
+ lead.imp_attach_graph(acc,mach[0])
118
+ lead.accept_states.delete_if do |acc_st|
119
+ !mach[0].accept_states.keys.include?(acc_st-offset)
120
+ end
121
+ new_machines.push([lead,nil])
122
+ else
123
+ new_machines.push([mach[0],mach[1]])
124
+ end
125
+ end
126
+ new_machines
127
+ end
128
+
129
+ def handle_alternation(machines)
130
+ machines = absorb_left_alt(machines)
131
+ machines = absorb_right_alt(machines)
132
+ end
133
+
134
+ def absorb_left_alt(machines)
135
+ new_machines = Array.new
136
+ machines.each_with_index do |mach,ii|
137
+ if mach[1].nil? || mach[1].empty? # This machine is complete.
138
+ new_machines.push([mach[0],nil])
139
+ else
140
+ src, dest = mach[1].shift, mach[1].shift
141
+ m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION
142
+ new_machines.push([m,mach[1]])
143
+ end
144
+ end
145
+ new_machines
146
+ end
147
+
148
+ def absorb_right_alt(machines)
149
+ absorb_left_alt(machines.reverse).reverse
150
+ end
151
+
152
+ # This is a Thompson construction of a regular expression to a NFA.
153
+ # The machine stack is a series of 2-tuples. The first element of which
154
+ # is a small NFA, the second of which is a listing of the edges it needs
155
+ # to fill in by cannibalizing an adjacent NFA.
156
+
157
+ # mptr = machines.length - 1 # machine index pointer
158
+ # m = machines[mptr]
159
+ # * eats below IF below complete
160
+ # | eats above and below if they're complete
161
+
162
+ # make one pass forwards, completing all kleene stars and all alt LHSs
163
+ # make one pass backwards, completing all alt RHSs
164
+ # if any unfulfilled dependencies remain, my assumptions were mistaken
165
+ def re2nfa(re)
166
+ #puts "re2nfa: #{re}"
167
+ fsconstruct = FiniteStateMachine.new({:accept_states => {0=>'eh'},
168
+ :graph_hash => {0=>{PENDING=>[0]}}})
169
+ machines = build_machine_stack(re)
170
+ machines = kleene_up(machines)
171
+ machines = catify(machines)
172
+ machines = handle_alternation(machines)
173
+
174
+ #puts "New machines:"
175
+ machines.each_with_index do |mach,ii|
176
+ m = mach[0]
177
+ offset = fsconstruct.get_node_count-1
178
+ acc = fsconstruct.accept_states.keys.first || 0 # Attachment point is accept state
179
+ fsconstruct.imp_attach_graph(acc, m)
180
+ fsconstruct.accept_states.delete_if do |acc_st|
181
+ #puts "purging acc #{acc}" if !m.accept_states.keys.include?(acc-offset)
182
+ !m.accept_states.keys.include?(acc_st-offset)
183
+ end
184
+ end
185
+
186
+ fsconstruct.delete_edge(0,PENDING,0)
187
+ #@graph_hash = fsconstruct.graph_hash
188
+ #@accept_states = fsconstruct.accept_states
189
+ FiniteStateMachine.new({
190
+ :graph_hash => fsconstruct.graph_hash,
191
+ :accept_states => fsconstruct.accept_states
192
+ })
193
+ end
194
+
195
+ def set_new_accept(node_number, type='end')
196
+ @accept_states = {node_number => 'end'}
197
+ end
198
+
199
+ def prepend_graph(fsm)
200
+ fsm.imp_attach_graph(fsm.accept_states.keys[0],self)
201
+ copy(fsm)
202
+ end
203
+
204
+ def cat_machine(ch)
205
+ FiniteStateMachine.new({
206
+ :accept_states => {1=>'end'},
207
+ :graph_hash => {0 => {ch => [1]}}
208
+ })
209
+ end
210
+
211
+ def question_machine
212
+ FiniteStateMachine.new({
213
+ :accept_states => {1=>'end'},
214
+ :graph_hash => {0 => {PENDING => [1], LAMBDA => [1]}}
215
+ #:accept_states => {3=>'end'},
216
+ #:graph_hash => {
217
+ # 0 => {LAMBDA => [1,3]},
218
+ # 1 => {PENDING => [2]},
219
+ # 2 => {LAMBDA => [3]}
220
+ #}
221
+ })
222
+ end
223
+
224
+ def alt_machine
225
+ FiniteStateMachine.new({
226
+ :accept_states => {5=>'end'},
227
+ :graph_hash => {
228
+ 0 => {LAMBDA => [1,3]},
229
+ 1 => {PENDING => [2]},
230
+ 2 => {LAMBDA => [5]},
231
+ 3 => {PENDING => [4]},
232
+ 4 => {LAMBDA => [5]}
233
+ }
234
+ })
235
+ end
236
+
237
+ def kleene_machine
238
+ FiniteStateMachine.new({
239
+ :accept_states => {3=>'end'},
240
+ :graph_hash => {
241
+ 0 => {LAMBDA => [1,3]},
242
+ 1 => {PENDING => [2]},
243
+ 2 => {LAMBDA => [1,3]}
244
+ }
245
+ })
246
+ end
247
+
248
+ def plus_machine
249
+ FiniteStateMachine.new({
250
+ :accept_states => {3=>'end'},
251
+ :graph_hash => {
252
+ 0 => {LAMBDA => [1]},
253
+ 1 => {PENDING => [2]},
254
+ 2 => {LAMBDA => [1,3]}
255
+ }
256
+ # The below machine would be more concise, but we'd need to add in logic to replace TWO EDGES with one absorb.
257
+ #:accept_states => {1=>'end'},
258
+ #:graph_hash => {
259
+ # 0 => {PENDING => [1]},
260
+ # 1 => {PENDING => [1]}
261
+ #}
262
+ })
263
+ end
264
+
265
+ #############################################################################
266
+ # Misc functions primarily supporting re2nfa
267
+ #############################################################################
268
+ # graph edges going from origin become outgoing from src
269
+ # graph edges going TO final state instead go TO dest
270
+ # What is "final state?" Any accept state?
271
+ #@graph_hash[src][label].delete(dest)
272
+ def replace_edge(src, label, dest, graph)
273
+ raise "can't inject a graph that had no accept states" if graph.accept_states.nil? || graph.accept_states.empty?
274
+ if @graph_hash[src][label].class == Fixnum
275
+ @graph_hash[src][label] = [@graph_hash[src][label]]
276
+ end
277
+
278
+ offset = get_node_count-1
279
+ imp_attach_graph(src, graph)
280
+
281
+ #draw_graph('intermediate-self')
282
+ #graph.draw_graph('intermediate-graft')
283
+
284
+ # for each of the edges pointing at the accept state of the graph
285
+ # redirect them to point at dest
286
+ #draw_graph('retarget-pre')
287
+ graph.accept_states.keys.each do |acc|
288
+ retarget_edges(acc+offset,dest)
289
+ accept_states.delete(acc+offset)
290
+ end
291
+ delete_edge(src,label,dest)
292
+
293
+ renumber!
294
+ #draw_graph('retarget-post')
295
+
296
+ self
297
+ end
298
+
299
+ # ensure no gaps in our node names!
300
+ def renumber!
301
+ get_node_names.each_with_index do |n,ii|
302
+ if n != ii
303
+ retarget_edges(n,ii)
304
+ @accept_states[ii] = @accept_states.delete(n) unless @accept_states[n].nil?
305
+ @graph_hash[ii] = @graph_hash.delete(n)
306
+ end
307
+ end
308
+ self
309
+ end
310
+
311
+ # imp_attach_graph: increments fsm's node numbers by 1-CALLER.node_count
312
+ # takes edges outgoing from fsm.origin and adds them to attach_point
313
+ def imp_attach_graph(attach_point, fsm)
314
+ my_node_count = get_node_count
315
+ graft = fsm.clone
316
+ graft.increment_node_labels(my_node_count-1) # prevent collisions
317
+
318
+ graft_root_edges = graft.graph_hash.delete(graft.origin)
319
+ @graph_hash[attach_point] ||= Hash.new
320
+ @graph_hash[attach_point].merge!(graft_root_edges)
321
+
322
+ @accept_states.merge!(graft.accept_states)
323
+ @graph_hash.merge!(graft.graph_hash)
324
+ get_node_count
325
+ end
326
+
327
+ def retarget_edges(old_dest, new_dest)
328
+ @graph_hash.each_pair do |node,edge_hash|
329
+ edge_hash.each_pair do |label, dest|
330
+ if dest.include? old_dest
331
+ #puts "#{node}[#{label}] changed from #{dest} to #{new_dest}"
332
+ add_edge( node, label, new_dest)
333
+ delete_edge(node, label, old_dest)
334
+ end
335
+ end
336
+ end
337
+ self
338
+ end
339
+
340
+ def lambda_replace_edge(src, label, dest, graph)
341
+ if @graph_hash[src][label].class == Fixnum
342
+ @graph_hash[src][label] = [@graph_hash[src][label]]
343
+ end
344
+ #@graph_hash[src][label].delete(dest)
345
+ lambda_inject_graph(graph,src,dest)
346
+ delete_edge(src,label,dest)
347
+ self
348
+ end
349
+
350
+ def lambda_inject_graph(graph, src, dest)
351
+ old_node_count = get_node_count
352
+ lambda_attach_graph(src, graph)
353
+ graph.accept_states.keys.each {|k| add_edge(k+old_node_count, LAMBDA, dest)}
354
+ graph.accept_states.keys.each {|k| @accept_states.delete(k+old_node_count)}
355
+ self
356
+ end
357
+
358
+ def clone
359
+ Marshal.load( Marshal.dump(self) )
360
+ end
361
+ =begin
362
+ def add_edge(src, label, dest)
363
+ @graph_hash[src] = Hash.new if @graph_hash[src].nil?
364
+ if @graph_hash[src][label].nil?
365
+ @graph_hash[src][label] = [dest]
366
+ else
367
+ if @graph_hash[src][label].class != Array
368
+ @graph_hash[src][label] = [@graph_hash[src][label]]
369
+ end
370
+ @graph_hash[src][label] << dest if !@graph_hash[src][label].include?(dest)
371
+ end
372
+ self
373
+ end
374
+
375
+ # Fail silently on deleting stuff that doesn't exist.
376
+ def delete_edge(src, label, dest)
377
+ return self if @graph_hash[src].nil?
378
+ return self if @graph_hash[src][label].nil?
379
+ @graph_hash[src][label].reject! {|node| node==dest}
380
+ if @graph_hash[src][label].empty?
381
+ @graph_hash[src].delete(label)
382
+ end
383
+ # !!! may need to add something to delete orphaned nodes, here
384
+ self
385
+ end
386
+ =end
387
+ end
@@ -0,0 +1,3 @@
1
+ module ScannerGenerator
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require "scanner_generator/version"
2
+
3
+ module ScannerGenerator
4
+ require 'scanner_generator/finite_state_machine'
5
+ end
data/push.sh ADDED
@@ -0,0 +1 @@
1
+ git push git@github.com:hackingoff/scanner-generator
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "scanner_generator/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "scanner_generator"
7
+ s.version = ScannerGenerator::VERSION
8
+ s.authors = ["Hacking Off"]
9
+ s.email = ["source@hackingoff.com"]
10
+ s.homepage = "https://github.com/hackingoff/context-free-grammar"
11
+ s.summary = %q{Parser generation and CFG analysis.}
12
+ s.description = %q{Part of the compiler construction toolkit's guts.}
13
+
14
+ s.rubyforge_project = "scanner_generator"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency "rspec", "~> 2.6"
22
+ s.add_development_dependency "awesome_print"
23
+
24
+ s.add_dependency "ruby-graphviz" # graph visualizations
25
+
26
+ # specify any dependencies here; for example:
27
+ # s.add_development_dependency "rspec"
28
+ # s.add_runtime_dependency "rest-client"
29
+ end
@@ -0,0 +1,166 @@
1
+ require 'scanner_generator'
2
+
3
+ describe ScannerGenerator::FiniteStateMachine do
4
+ it "generates graphs" do
5
+ # obj.should eql(val)
6
+ end
7
+
8
+ it "has no epsilons/lambdas in DFAs" do
9
+ end
10
+
11
+ it "replaces edges successfully" do
12
+
13
+ end
14
+
15
+ it "handles edge cases" do
16
+ end
17
+ end
18
+
19
+ # Radar's example test from Foodie:
20
+ #it "anything else is delicious" do
21
+ #Foodie::Food.portray("Not Broccoli").should eql("Delicious!")
22
+ #end
23
+
24
+ =begin
25
+ # The below tests verify Thompson Construction (conversion from regular
26
+ # expressions to NFAs). The tests made output for human eyes.
27
+
28
+ # TODO: Verify the tests are all still satisfied correctly, then hard-code
29
+ # graphs and tables satisfying ".should eql()" invocation via RSpec.
30
+
31
+ # TODO: Track down the other tests.
32
+
33
+ # Non-RSpec test code follows.
34
+ #!/usr/bin/ruby
35
+ require '../../../cct/app/models/finite_state_machine.rb'
36
+ require "awesome_print"
37
+
38
+ def replace_edge_test
39
+ fsa = FiniteStateMachine.new({
40
+ :accept_states => {2=>'accm2', 3=>'accm3'},
41
+ :graph_hash => {
42
+ 0 => {"LAMBDA"=>[1]},
43
+ 1 => {"M2" => 2,
44
+ "M3" => 3},
45
+ 2 => {"a" => 4 },
46
+ 4 => {"b" => 3}
47
+ }
48
+ })
49
+ fsa2 = fsa.clone
50
+ alter = FiniteStateMachine.new({
51
+ :accept_states => {3=>'alt_end'},
52
+ :graph_hash => {
53
+ 0 => {"LAMBDA" => [1,4]},
54
+ 1 => {"M1" => 2},
55
+ 2 => {"LAMBDA" => 3},
56
+ 4 => {"M2" => 5},
57
+ 5 => {"LAMBDA" => 3}
58
+ }
59
+ })
60
+ alt = FiniteStateMachine.new({
61
+ :accept_states => {2 => 'end'},
62
+ :graph_hash => {
63
+ 0 => {'a' => [1]},
64
+ 1 => {'b' => [2]}
65
+ }
66
+ })
67
+ fsa.draw_graph("before")
68
+ fsa.lambda_replace_edge(1,"M3",3, alt.clone)
69
+ fsa.draw_graph("lambda-after")
70
+ fsa2.replace_edge(1,"M3",3, alt.clone)
71
+ fsa2.draw_graph("after")
72
+
73
+ #malt = alt.clone
74
+ #malt.draw_graph('alt-before-imp-attach')
75
+ #malt.imp_attach_graph(2,alt)
76
+ #malt.imp_attach_graph(2,alter)
77
+ #malt.imp_attach_graph(7,alt)
78
+ #malt.draw_graph('alt-after-imp-attach')
79
+ end
80
+
81
+ def prepend_test
82
+ fsa = FiniteStateMachine.new({
83
+ :accept_states => {1=>'accept'},
84
+ :graph_hash => {0=>{"fuck"=>1}}
85
+ })
86
+ fsa.prepend_graph(fsa.kleene_machine)
87
+ fsa.prepend_graph(fsa.cat_machine('LAMBDA'))
88
+ fsa.draw_graph('prepend-test')
89
+ end
90
+
91
+
92
+
93
+ # Interesting notes:
94
+ # (a|b|ab)* can read aab with more than one parse tree.
95
+
96
+ def re2nfa_test
97
+ fsa = FiniteStateMachine.new({
98
+ :accept_states => {1=>'accept'},
99
+ :graph_hash => {0=>{"LAMBDA"=>1}}}
100
+ )
101
+ example = FiniteStateMachine.new({
102
+ :accept_states => {8=>"end"},
103
+ :graph_hash => {
104
+ 0 => {"LAMBDA" => [1,3]},
105
+ 1 => {"LAMBDA" => [4,6]},
106
+ 2 => {"LAMBDA" => [1,3]},
107
+ 3 => {"c" => 8},
108
+ 4 => {"a" => 5},
109
+ 5 => {"LAMBDA" => 2},
110
+ 6 => {"b" => 7},
111
+ 7 => {"LAMBDA" => 2}
112
+ },
113
+ :origin => 0
114
+ })
115
+
116
+ #s = '(a|b)*c|de*|f'
117
+ s = 'a|b(d*|(e|f)*)'
118
+ #s = 'a((b))'
119
+ # the following strings breaks it, in some way
120
+ # 'a*|b|cde**|k' causes lots of duplication
121
+ # see examples/2012-03-01_22-47-54_-0800-nfa.png
122
+ # shows a* OR b OR replace_me, followed by
123
+ # b | c
124
+ # followed by c
125
+ # '(a|b)*'
126
+ #s = '(a|b)*c'
127
+ puts "rendering regex: #{s}"
128
+ fsa = fsa.re2nfa(s)
129
+ fsa.draw_graph("draw_re2nfa")
130
+ puts 'bad nfa'
131
+ ap fsa
132
+ fsa.subsetify.draw_graph("draw_re2dfa")
133
+ example.draw_graph("ex_nfa")
134
+ puts 'good nfa'
135
+ ap example
136
+ example.subsetify.draw_graph("ex_dfa")
137
+ fsa.graph_hash.each_pair do |k,v|
138
+ if v != example.graph_hash[k]
139
+ puts "#{v} != #{example.graph_hash[k]}"
140
+ else
141
+ puts "same"
142
+ end
143
+ end
144
+ end
145
+
146
+ def re2nfa_ends_in_or_test
147
+ fsa = FiniteStateMachine.new({
148
+ :graph_hash=>{0 => {"1" => 1}},
149
+ :accept_states => {}
150
+ })
151
+ fsa = fsa.re2nfa("1|0")
152
+ fsa.add_edge(5,LAMBDA,6)
153
+ fsa.set_new_accept(5,nil)
154
+ fsa.set_new_accept(6,"ok")
155
+ fsa.draw_graph("end_test_nfa")
156
+ fsa.subsetify!
157
+ fsa.draw_graph("end_test_dfa")
158
+ end
159
+
160
+ re2nfa_test
161
+ #injection_test
162
+ #replace_edge_test
163
+ #prepend_test
164
+ #re2nfa_ends_in_or_test
165
+
166
+ =end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scanner_generator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Hacking Off
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &11360640 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2.6'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *11360640
25
+ - !ruby/object:Gem::Dependency
26
+ name: awesome_print
27
+ requirement: &11360100 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *11360100
36
+ - !ruby/object:Gem::Dependency
37
+ name: ruby-graphviz
38
+ requirement: &11359540 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *11359540
47
+ description: Part of the compiler construction toolkit's guts.
48
+ email:
49
+ - source@hackingoff.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - README
57
+ - Rakefile
58
+ - lib/scanner_generator.rb
59
+ - lib/scanner_generator/finite_state_machine.rb
60
+ - lib/scanner_generator/thompson_construction.rb
61
+ - lib/scanner_generator/version.rb
62
+ - push.sh
63
+ - scanner_generator.gemspec
64
+ - spec/scanner_generator_spec.rb
65
+ homepage: https://github.com/hackingoff/context-free-grammar
66
+ licenses: []
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project: scanner_generator
85
+ rubygems_version: 1.8.15
86
+ signing_key:
87
+ specification_version: 3
88
+ summary: Parser generation and CFG analysis.
89
+ test_files: []