scanner_generator 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in scanner_generator.gemspec
4
+ gemspec
data/README ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,619 @@
1
+ require 'graphviz'
2
+ require 'set'
3
+
4
+ module ScannerGenerator
5
+ require File.dirname(__FILE__) + '/thompson_construction.rb'
6
+
7
+ LAMBDA = "LAMBDA"; SOURCE = 0; DEST = 1; LABEL = 2;
8
+ ERROR = 0; MACHINE_ACCEPT = 1; HALT_RETURN = 2 # Action Table codes [E, MA, HR]
9
+ ACC = 3; WIDTH = 3
10
+
11
+ # True if the needle (subset) is found in the haystack (superset).
12
+ def subset(needle,haystack)
13
+ a = needle.sort
14
+ b = haystack.sort
15
+ ii = 0
16
+ jj = 0
17
+ a_last_index = a.length-1
18
+ b_last_index = b.length-1
19
+ loop do
20
+ if(a[ii]==b[jj])
21
+ return true if(ii==a_last_index)
22
+ ii+=1
23
+ jj+=1
24
+ elsif(a[ii] > b[jj])
25
+ return false if(jj>=b_last_index)
26
+ jj+= 1
27
+ else # a[ii] < b[jj]
28
+ return false
29
+ end
30
+ end
31
+ end
32
+
33
+ # is needle contained in a haystack?
34
+ def subset_of_list_element?(needle,list_of_haystacks)
35
+ list_of_haystacks.each{|haystack| return true if subset(needle,haystack)}
36
+ return false
37
+ end
38
+
39
+ class FiniteStateMachine
40
+ include ThompsonConstruction
41
+
42
+ attr_reader :graph_hash, :accept_states, :origin
43
+ attr_accessor :labels
44
+
45
+ # Must set @accept_states, @edges, @edge_labels, @node_labels, @graph_hash
46
+ # edge/node labels are derived from @graph_hash
47
+ def initialize(input)
48
+ raise "Bunk input" if input[:accept_states].nil? || input[:graph_hash].nil?
49
+ @accept_states = input[:accept_states]
50
+ @graph_hash = input[:graph_hash]
51
+ @origin = input[:origin] || 0
52
+ @labels = input[:labels] || {}
53
+ @edge_labels = get_edge_labels
54
+ @rankdir = input[:rankdir] || "TB" # TB is top-to-bottom; LR is left-to-right
55
+ self
56
+ end
57
+
58
+ def copy(graph)
59
+ @accept_states = graph.accept_states
60
+ @graph_hash = graph.graph_hash
61
+ @origin = graph.origin
62
+ @edge_labels = get_edge_labels
63
+ self
64
+ end
65
+
66
+ # Regex keys specify that any matching edge labels transition to the dest
67
+ # node.
68
+ # Example:
69
+ # Suppose our language = {a,b,1,\n,*,/}.
70
+ # An edge labeled "[^\n\*\/]" matches anything but newline, *, or /.
71
+ # This function replaces that edge with multiple edges from the
72
+ # language's alphabet. In this case, the [^\n\*\/] edge gets replaced by
73
+ # 3 edges: an "a" edge, a "b" edge, & a "1" edge.
74
+ # NOTE: Invoke this AFTER drawing dfa, but BEFORE dumping the module.
75
+ def expand_regex_edges
76
+ #puts "Expanding regex edges..."
77
+ @graph_hash.each_pair do |source, edge_dest_hash| # state, hash(edge=>state)
78
+ new_edges_for_same_destination = Hash.new
79
+ #puts "before: @graph_hash[#{source}] #{@graph_hash[source]}" if
80
+ edge_dest_hash.each_pair do |regex_edge, dest| # e.g. /[^\n] => 98
81
+ next if regex_edge.class != Regexp
82
+ #puts "before: @graph_hash[#{source}][#{edge}] = #{@graph_hash[source][edge]}"
83
+
84
+ for label in @edge_labels
85
+ if label.class == String && label.match(regex_edge)
86
+ # unless clause prevents sloppy regex from overwriting other edges
87
+ new_edges_for_same_destination[label] = dest unless @graph_hash[source].key?(label)
88
+ # new_edges_for_same_destination[label] ||= dest
89
+ end
90
+ end
91
+ @graph_hash[source].delete(regex_edge) # remove old regex edge
92
+ #puts "after: @graph_hash[#{source}][#{edge}] = #{@graph_hash[source][edge].class}"
93
+ end
94
+ #puts @new_edges_for_same_destination.to_s
95
+ unless new_edges_for_same_destination.empty?
96
+ @graph_hash[source].merge!(new_edges_for_same_destination)
97
+ #puts "after: @graph_hash[#{source}] #{@graph_hash[source]}"
98
+ end
99
+ end
100
+ self
101
+ end
102
+
103
+ def get_node_names
104
+ names = @graph_hash.keys # Ensure source nodes represented.
105
+ @graph_hash.each_pair do |source_node, sub_hash|
106
+ names << source_node
107
+ names << sub_hash.values
108
+ end
109
+ #names.flatten.map {|n| n.to_s}.uniq.sort
110
+ names.flatten.uniq.sort
111
+ end
112
+
113
+ def get_edge_labels
114
+ aggregate_keys = []
115
+ @graph_hash.values.each {|sub_hash| aggregate_keys << sub_hash.keys }
116
+ aggregate_keys.flatten.uniq
117
+ end
118
+
119
+ def subsetify(start_node = @origin)
120
+ new_graph_hash = {}
121
+ new_accept_states = {}
122
+ new_labels = {}
123
+
124
+ states = [closure_of(start_node)] # if passed a start node as an int, this will fail without .to_s
125
+
126
+ edge_labels = get_edge_labels
127
+
128
+ states.each do |state|
129
+ new_graph_hash[state] = {}
130
+ edge_labels.each do |label|
131
+ next if label == LAMBDA
132
+ closures_via_label = []
133
+
134
+ state.each do |node|
135
+ next if (@graph_hash[node].nil? || @graph_hash[node][label].nil?)
136
+ found_closure = closure_of(@graph_hash[node][label])
137
+ closures_via_label << found_closure if !closures_via_label.include?(found_closure)
138
+ end
139
+
140
+ next if closures_via_label == []
141
+ closures_via_label.flatten!
142
+ new_graph_hash[state][label] = closures_via_label
143
+ states << closures_via_label unless states.include?(closures_via_label)
144
+ end
145
+
146
+ new_accept_states[state] = accept_state_of(state) if accept_state_of(state) != false
147
+ new_graph_hash.delete(state) if new_graph_hash[state] == {}
148
+ #new_labels[state]
149
+ end
150
+
151
+ #puts "New graph shit:"
152
+ #ap new_graph_hash
153
+ #ap new_accept_states
154
+ #ap new_labels
155
+
156
+ # THIS IS THE NEW PART FOR LABELS
157
+ states.each do |state|
158
+ label = ""
159
+ state.each do |substate|
160
+ label << @labels[substate] + "\n" unless @labels[substate].nil? || label.include?(@labels[substate])
161
+ end
162
+ new_labels[state] = label.chomp unless label == ""
163
+ end
164
+ # END NEW PART FOR LABELS
165
+
166
+ return FiniteStateMachine.new({
167
+ :graph_hash=>new_graph_hash,
168
+ :accept_states=>new_accept_states,
169
+ :labels => new_labels}
170
+ ).beautify
171
+ end
172
+
173
+ def subsetify!(start_node_label = 0)
174
+ dfa = subsetify(start_node_label)
175
+ @graph_hash,@accept_states,@labels = dfa.graph_hash, dfa.accept_states, dfa.labels
176
+ return self
177
+ end
178
+
179
+ def draw_graph(filename = "output", svgname = :Finite_Automata_Graph, shape = "circle", path = nil)
180
+ graph = GraphViz::new(:Finite_Automata_Graph)
181
+ graph[:rankdir] = @rankdir
182
+ # !!! going to have to check of @labels[node_num] (label for node # node_num exists and specify it with :label => @label[node_num] when present)
183
+ get_node_names.each do |node|
184
+ label = @labels[node] || node.to_s #((@labels[node].nil?) ? node.to_s : @labels[node])
185
+ is_accept = @accept_states.include?(node)
186
+ graph.add_nodes(node.to_s,
187
+ :shape => shape,
188
+ :label => label,
189
+ :peripheries => ((is_accept) ? 2 : 1),
190
+ :color => ((is_accept && shape == "Mrecord") ? "#66DD66" : "#000000"))
191
+ end
192
+
193
+ @graph_hash.each_pair do |source_label,sub_hash|
194
+ sub_hash.each_pair do |edge_label,destination_nodes|
195
+ [destination_nodes].flatten.each do |dest_label| # ensure d_n is 1-d array
196
+ source_node = graph.get_node(source_label.to_s)
197
+ dest_node = graph.get_node(dest_label.to_s)
198
+ graph.add_edges(source_node, dest_node, :label => label_friendly(edge_label).gsub('\\','\\\\\\\\'))
199
+ end
200
+ end
201
+ end
202
+
203
+ if path
204
+ graph.output(:svg => "#{filename}.svg", :path => path)
205
+ else
206
+ graph.output(:svg => "#{filename}.svg")
207
+ end
208
+ end
209
+
210
+ def draw_state_labeled_graph(filename = "output", svgname = :Finite_Automata_Graph, shape = "circle", path = nil)
211
+ labels = @labels.dup
212
+ # modify the labels
213
+ @labels.each_with_index do |label, ii|
214
+ lines = ""
215
+ #ap label
216
+ label[1].each_line {|i| lines << "<tr><td align=\"left\">#{i}</td></tr>"}
217
+ table_border_color = (@accept_states.include?(ii)) ? "#448844" : "#ffffff"
218
+ heading = "State #{ii}"
219
+ @labels[ii] = '<<table color="'+table_border_color+'" style="ROUNDED" border="1" cellborder="0" cellpadding="5"><tr><td align="center" colspan="1"><font color="#666666" point-size="8">'+heading+'</font></td></tr>'+lines+'</table>>'
220
+ end
221
+ result = draw_graph(filename, svgname, shape, path)
222
+ @labels = labels
223
+ result
224
+ end
225
+
226
+ # adapted from class notes
227
+ def closure_of(node_label)
228
+ closure = [node_label].flatten
229
+ changed = true
230
+ while (changed == true)
231
+ changed = false
232
+ closure.each do |node|
233
+ # if there is m not already in C and n->lambda->m then add m to c
234
+ if(!@graph_hash[node].nil? && !@graph_hash[node][LAMBDA].nil?)
235
+ lambda_reachables = [@graph_hash[node][LAMBDA]].flatten
236
+ lambda_reachables.each do |l_node|
237
+ if !closure.include?(l_node)
238
+ closure << l_node
239
+ changed = true
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
245
+
246
+ return closure #.flatten
247
+ end
248
+
249
+ # returns true if the any of the closure's states are included in the set
250
+ # of accept_states
251
+ def accept_state_of(closure)
252
+ closure.each do |set|
253
+ if @accept_states.include?(set)
254
+ return @accept_states[set] # change this to "true" if reverting to crappy system
255
+ end
256
+ end
257
+ return false
258
+ end
259
+
260
+ # numbers sets and makes them the new keys (cleans up the graph_hash's keys)
261
+ def beautify
262
+ clean_hash, clean_accept_states, pretty, new_labels = {}, {}, {}, {}
263
+
264
+ # Number our closures.
265
+ i = -1
266
+ @graph_hash.each_pair do |key,subhash|
267
+ pretty[key] = i+= 1 if pretty[key].nil?
268
+ subhash.values.each {|subval| pretty[subval]=i+=1 if pretty[subval].nil?}
269
+ end
270
+
271
+ # Replace instances of old closure names with their new closure-numbers.
272
+ @graph_hash.keys.each do |old_key|
273
+ new_key = pretty[old_key]
274
+ clean_hash[new_key] = Hash.new
275
+ @graph_hash[old_key].each_pair do |subkey, subval| # subkey is edge label
276
+ clean_hash[new_key][subkey] = pretty[subval]
277
+ end
278
+ end
279
+
280
+ @accept_states.each_pair do |state, acc_type|
281
+ clean_accept_states[pretty[state]] = acc_type
282
+ end
283
+
284
+ @labels.each_pair do |state, label|
285
+ new_labels[pretty[state]] = @labels[state]
286
+ end # Be sure to bring labels along.
287
+
288
+ FiniteStateMachine.new({
289
+ :graph_hash => clean_hash,
290
+ :accept_states => clean_accept_states,
291
+ :labels => new_labels
292
+ })
293
+ end
294
+
295
+ def generate_initialize
296
+ return "def initialize\n" +
297
+ "#{" "*indent_width}#{lookup_code_string}\n" + # array of edge labels
298
+ "#{" "*indent_width}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
299
+ dump_table(:state) + "\n" +
300
+ dump_table(:action) + "\n" +
301
+ dump_table(:lookup) + "\n" +
302
+ "#{ind(1)}end"
303
+ end
304
+
305
+ # Module Dumping
306
+ def generate_module(name = 'ScannerModule', indent_width = 2)
307
+ expand_regex_edges
308
+ return "module #{name}\n def initialize\n" +
309
+ "#{ind(1)}#{lookup_code_string}\n" + # array of edge labels
310
+ "#{ind(1)}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
311
+ dump_table(:state) + "\n" +
312
+ dump_table(:action) + "\n" +
313
+ dump_table(:lookup) + "\n" +
314
+ "#{ind(2)}super\n" +
315
+ "#{ind(1)}end\nend"
316
+ end
317
+
318
+ # This horrific kluge ports the ruby dump_tables to Javascript. Sorta.
319
+ # Smelly code, but output passes JSLint and is the path of least resistance.
320
+ #
321
+ # TODO: Write something that generates the tables as ruby objects, then
322
+ # refactor these table dumping functions, using array.to_s
323
+ def js_tables(name = 'ScannerModule', indent_width = 2)
324
+ expand_regex_edges
325
+ replacements = {
326
+ ' # E' => '// E', # Action table's label
327
+ ' # ' => '// ', # table-leading comments
328
+ '# ' => ' // ', # row-trailing descriptions
329
+ '[[' => ' [', # first row of table
330
+ '@state_table = ' => 'SCANNER.state_table = [',
331
+ '@action_table = ' => 'SCANNER.action_table = [',
332
+ '@lookup_table = ' => 'SCANNER.lookup_table = [',
333
+ '@lookup_codes' => 'SCANNER.lookup_codes',
334
+ '@label_codes' => 'SCANNER.label_codes',
335
+ ' [' => ' [', # linty js indent of 4
336
+ ']] ' => ']];', # semicolon ending tbales
337
+ ':other' => '"other"', # :other symbol.to_s
338
+ '"=>' => '" : ' # javascript hash notation
339
+ }
340
+ s = "var SCANNER = {};\n" +
341
+ dump_table(:state, 0, 0) + "\n" +
342
+ dump_table(:action, 0, 0) + "\n" +
343
+ dump_table(:lookup, 0, 0) + "\n" +
344
+ "#{ind(0)}#{lookup_code_string};\n" + # array of edge labels
345
+ "#{ind(0)}#{label_code_string};\n" # hash mapping accept states to the type accepted by them
346
+ replacements.each_pair{|k,v| s.gsub!(k,v)}
347
+ s
348
+ end
349
+
350
+ def generate_scanner(indent_width = 2)
351
+ expand_regex_edges
352
+
353
+ scanner_function =<<-'END_SCANNER'
354
+ def scan(input)
355
+ @token = ""
356
+ @state = 0
357
+ @buffered = false
358
+ results = Array.new
359
+
360
+ input.each_char do |ch|
361
+ current_read = case ch # Map chars onto char-classes by editing case/when
362
+ when /[a-zA-Z]/ then @label_codes["L"]
363
+ when /[0-9]/ then @label_codes["D"]
364
+ else @label_codes[ch] || @label_codes[:other]
365
+ end
366
+ if((@action_table[@state][current_read]==1) && (@state_table[@state][current_read] != -1))
367
+ @buffered = false # action=MA (Machine-Accept) (=1). Append char to token.
368
+ @token += ch unless ch[/\s/] && @label_codes[ch].nil? # Uncomment if recognizing some whitespace.
369
+ @state=@state_table[@state][current_read]
370
+ elsif((@state_table[@state][current_read]==-1) && (@action_table[@state][current_read]==2))
371
+ @buffered = true # action=HR (Halt-Return) (=2). Accept current token.
372
+ results.push [@lookup_codes[@lookup_table[@state][current_read]],@token]
373
+ @state = 0
374
+ @token = ""
375
+ else # ? Hitting this block indicates action=ERR (ERROR) (=3)
376
+ next
377
+ end
378
+ redo if(@buffered==true && current_read!=@label_codes[:other]) # repeat w/o advancing to next char
379
+ end
380
+ results
381
+ end
382
+
383
+ # Appends a newline to the file in case of its absence, to ensure
384
+ # the presence of terminating whitespace. Convert Windows newlines
385
+ # to UNIX style ones.
386
+ def scan_file(filename = "test_file.txt")
387
+ scan((File.open(filename, "r").read+"\n").gsub("\r\n","\n"))
388
+ end
389
+ END_SCANNER
390
+ return "class Scanner\n" +
391
+ "#{ind(1)}def initialize\n" +
392
+ "#{ind(2)}#{lookup_code_string}\n" + # array of edge labels
393
+ "#{ind(2)}#{label_code_string}\n" + # hash mapping accept states to the type accepted by them
394
+ dump_table(:state, 2,2) + "\n" + # Note: the 1s should be 2s, but dump_table's results seem
395
+ dump_table(:action, 2,2) + "\n" + # to mysteriously have an extra leading two spaces. Can't
396
+ dump_table(:lookup, 2,2) + "\n" + # for the life of me figure out how or why.
397
+ "#{ind(1)}end\n\n" +
398
+ scanner_function +
399
+ "\nend"
400
+ end
401
+
402
+ # Module Dumping
403
+ def dump_module(name, indent_width = 2)
404
+ #return generate_module if filename == "" || filename.nil?
405
+ filename = underscore(name)
406
+ file = File.open("./modules/#{filename}.rb", "w")
407
+ "Successfully wrote #{file.write(generate_module)} characters to #{filename}.rb"
408
+ end
409
+
410
+ def friendly_edge_labels
411
+ # Convert whitespace line "\n" into strings describing their contents.
412
+ get_edge_labels.collect do |label|
413
+ (!label[/\s/].nil?) ? label.inspect[1..-2] : label
414
+ end
415
+ end
416
+
417
+ def label_friendly(label)
418
+ #puts "label: '#{label}' (#{label.class})"
419
+ if label.class == Fixnum
420
+ return label.to_s
421
+ elsif label.class == Regexp
422
+ return ('/' + label.to_s[7..-2] + '/') # replace each slash with 2 slashes.
423
+ elsif label == LAMBDA || label.to_s == "LAMBDA" || label.to_s == "EPSILON" || label.to_s.empty? # http://stackoverflow.com/questions/9684807/how-can-one-insert-a-mathematical-greek-etc-symbol-in-dot-file
424
+ '&#949;' # epsilon-lower is 949
425
+ else
426
+ return ((!label[/\s/].nil?) ? label.inspect[1..-2] : label)
427
+ end
428
+ end
429
+
430
+
431
+ def ind(level, width=2)
432
+ return " "*(level*width)
433
+ end
434
+
435
+ # converts a CamelCased name to k_and_r style for filename
436
+ def underscore(name)
437
+ s = name[0].downcase
438
+ name[1,name.length].each_char do |ch|
439
+ s += (ch.match(/[^A-Z]/) ? ch : "_"+ch.downcase)
440
+ end
441
+ return s
442
+ end
443
+
444
+
445
+ def lookup_codes
446
+ (["!accept"] | @accept_states.values)
447
+ end
448
+ # Array unions: fuck yeah.
449
+ def lookup_code_string
450
+ "@lookup_codes = #{lookup_codes.to_s}"
451
+ end
452
+
453
+ def label_code_string
454
+ h = {}
455
+ get_edge_labels.each_with_index{|label,ii| h[label]=ii } # !!! DANGER
456
+ "@label_codes = #{h.to_s[0..-2]}, :other=>#{get_edge_labels.length}\}"
457
+ end
458
+
459
+ # Graph Attachment
460
+ def increment_node_labels(amount)
461
+ new_hash, new_accepts = Hash.new, Hash.new
462
+
463
+ @graph_hash.each_pair do |key,subhash|
464
+ new_subhash = Hash.new
465
+ subhash.each_pair do |subkey,value|
466
+ if value.class == Fixnum
467
+ new_subhash[subkey] = value+amount
468
+ elsif value.class == Array
469
+ new_subhash[subkey] = value.map {|n| n+amount}
470
+ else
471
+ raise "value (#{value}) is a #{value.class}!"
472
+ end
473
+
474
+ end
475
+ new_hash[key+amount] = new_subhash
476
+ end
477
+
478
+ @accept_states.keys.each{|key| new_accepts[key+amount] = @accept_states[key]}
479
+ @graph_hash, @accept_states = new_hash, new_accepts
480
+ @origin += amount
481
+ end
482
+
483
+ def get_node_count; get_node_names.size; end
484
+
485
+ # considerations:
486
+ # do we need a flag for when we have to strip an attach point of being an accept state?
487
+ def attach_graph(attach_point, fsm)
488
+ node_count = get_node_count
489
+ raise "#{attach_point} out of graph bounds." if attach_point >= node_count
490
+ raise "going to break everything by attaching to myself!" if fsm == self
491
+ #dfa = fsm.subsetify #.subsetify
492
+ dfa = fsm # Before, we were subsetifying
493
+ dfa.increment_node_labels(node_count)
494
+ #@graph_hash[attach_point] = {LAMBDA => dfa.origin} # THIS IS OUR CULPRIT!
495
+ #puts "before #{@graph_hash[attach_point]}"
496
+
497
+ #if (@graph_hash[attach_point]!=nil)
498
+ # lambdas = [@graph_hash[attach_point][LAMBDA]] || [] # this is an array!
499
+ # lambdas << dfa.origin
500
+ # lambdas = lambdas.flatten.find{|entry| !entry.nil?}
501
+ # @graph_hash[attach_point][LAMBDA] = lambdas
502
+ #else
503
+ # @graph_hash[attach_point] = {LAMBDA => dfa.origin}
504
+ #end
505
+
506
+ # if attach point was on the graph w/o outgoing edges
507
+ @graph_hash[attach_point] = Hash.new if @graph_hash[attach_point].nil?
508
+
509
+ if @graph_hash[attach_point][LAMBDA].nil?
510
+ @graph_hash[attach_point][LAMBDA] = [dfa.origin]
511
+ else # attach point already has outgoing lambda edges
512
+ @graph_hash[attach_point][LAMBDA] << dfa.origin
513
+ end
514
+
515
+ #@graph_hash[attach_point]["foo"] = lambdas
516
+ #puts "after #{@graph_hash[attach_point]}"
517
+ #puts "@gh = #{graph_hash}\ndfah = #{dfa.graph_hash}"
518
+ #puts "merged: #{@graph_hash.merge(dfa.graph_hash)}"
519
+ @graph_hash.merge!(dfa.graph_hash)
520
+ #@graph_hash.merge!({4=>{"L"=>21}})
521
+ @accept_states.merge!(dfa.accept_states)
522
+ #subsetify!
523
+ get_node_count
524
+ end
525
+
526
+ # Dumps either a state table, an action table, or a lookup table
527
+ # This function is kind of half-refactored with dump_module and needs cleaning
528
+ # like the wizard needs food.
529
+ def dump_table(type = :state, indent_width = 2, indent_level = 2)
530
+ # edge_labels = friendly_edge_labels << " Other" # I suspect this line is ruining the code.
531
+ edge_labels = get_edge_labels << " Other"
532
+ node_names = get_node_names
533
+
534
+ s = "#{ind(indent_level)}@#{type}_table = " +
535
+ ((type == :action) ? "\n#{ind(indent_level+1)}\# ERROR = 0; MACHINE_ACCEPT = 1; HALT_RETURN = 2" : "") +
536
+ "\n#{ind(indent_level+1)}#"
537
+ edge_labels.each do |label|
538
+ s += sprintf("%#{WIDTH+1}s", label_friendly(label))
539
+ end
540
+ s += "\n#{ind(indent_level+1)}"
541
+
542
+ node_names.each_with_index do |node,ii|
543
+ on_last_node = (ii == node_names.size-1)
544
+ is_accept = !@accept_states[node].nil?
545
+ s += ((ii==0) ? "[" : " ") + "["
546
+
547
+ edge_labels.each_with_index do |edge,jj|
548
+ on_last_edge = (jj == edge_labels.size-1)
549
+ if(@graph_hash[node].nil?||
550
+ @graph_hash[node][edge].nil?||@graph_hash[node][edge][0].nil?)
551
+ sdest = "-1"
552
+ adest = ((is_accept) ? HALT_RETURN.to_s : ERROR.to_s)
553
+ if(!accept_states[node].nil?)
554
+ ldest = ((is_accept) ? (lookup_codes.find_index(accept_states[node]).to_i).to_s : "0")
555
+ else
556
+ ldest = "0"
557
+ end
558
+ else
559
+ sdest = graph_hash[node][edge].to_s
560
+ adest = MACHINE_ACCEPT.to_s # MA if NON-ACCEPT state
561
+ ldest = "0"
562
+ end
563
+ case type
564
+ when :state
565
+ s += sprintf("%#{WIDTH}s", sdest) +
566
+ ((!on_last_edge) ? "," \
567
+ : "]" + ((!on_last_node) ? "," \
568
+ : "]" ) + " \# #{node}#{(is_accept ? " ACCEPT":"")}\n#{ind(indent_level+1)}")
569
+ when :action
570
+ s += sprintf("%#{WIDTH}s", adest) +
571
+ (!on_last_edge ? "," \
572
+ : "]" + (!on_last_node ? "," \
573
+ : "]" ) + " \# #{node}#{(is_accept ? " ACCEPT" : "")}\n#{ind(indent_level+1)}")
574
+ when :lookup
575
+ s += sprintf("%#{WIDTH}s", ldest) +
576
+ (!on_last_edge ? "," \
577
+ : "]" + (!on_last_node ? "," \
578
+ : "]" ) + " \# #{node}#{(is_accept ? " #{@accept_states[node]}" : "")}\n#{ind(indent_level+1)}")
579
+ end
580
+ end
581
+ end
582
+ s.rstrip
583
+ end
584
+
585
+ # Clobbers the old accept type, if any was present.
586
+ def add_accept_state(state, type)
587
+ @accept_states[state] = type
588
+ end
589
+
590
+ def add_edge(src, label, dest)
591
+ @graph_hash[src] = Hash.new if @graph_hash[src].nil?
592
+ if @graph_hash[src][label].nil?
593
+ @graph_hash[src][label] = [dest]
594
+ else
595
+ if @graph_hash[src][label].class != Array
596
+ @graph_hash[src][label] = [@graph_hash[src][label]]
597
+ end
598
+ @graph_hash[src][label] << dest if !@graph_hash[src][label].include?(dest)
599
+ end
600
+ self
601
+ end
602
+
603
+ # Fail silently on deleting stuff that doesn't exist.
604
+ def delete_edge(src, label, dest)
605
+ return self if @graph_hash[src].nil?
606
+ return self if @graph_hash[src][label].nil?
607
+ @graph_hash[src][label].reject! {|node| node==dest}
608
+ if @graph_hash[src][label].empty?
609
+ @graph_hash[src].delete(label)
610
+ end
611
+ # !!! TODO: Add code to handle (delete) orphaned nodes.
612
+ self
613
+ end
614
+
615
+ def is_accept?(num)
616
+ @accept_states.include?(num)
617
+ end
618
+ end
619
+ end
@@ -0,0 +1,387 @@
1
+ module ThompsonConstruction
2
+ PENDING = 0;
3
+ #############################################################################
4
+ # Thompson-McNaughton-Yamada Construction Section
5
+ #############################################################################
6
+ def build_machine_stack(re)
7
+ skip = 0
8
+ escaped = false
9
+ machines = Array.new
10
+ (0...re.length).each do |ii| # the pointer in some cases.
11
+ (skip -= 1) && next if skip != 0 # Advance ptr until past () group
12
+ ch = re[ii] #re[-ii-1]
13
+ if escaped
14
+ case ch
15
+ when 'n'
16
+ machines.push([cat_machine("\n"), nil])
17
+ else
18
+ machines.push([cat_machine(ch), nil])
19
+ end
20
+ escaped = false
21
+ next
22
+ end
23
+ case(ch)
24
+ when '*' then machines.push([kleene_machine, [1,2]])
25
+ when '+' then machines.push([plus_machine, [1,2]])
26
+ #when '+' then machines.push([plus_machine, [[0,1],[1,1]]])
27
+ when '?' then machines.push([question_machine, [0,1]])
28
+ when '|' then machines.push([alt_machine, [1,2,3,4]])
29
+ when ']' then raise "mismatched bracket closed a non-open class"
30
+ when ')' then raise "mismatched paren closed a non-open group"
31
+ when '('# ; puts "#{ms}\tGRPOPEN\nencounted closing paren. following chars #{re[ii+1]}#{re[ii+2]}"
32
+ subexpression = ''
33
+ nesting = 0
34
+ until (ch2 = re[ii+=1]) == ')' && nesting == 0 # Until the next character is '('
35
+ nesting -= 1 if ch2 == ')'
36
+ nesting += 1 if ch2 == '('
37
+ subexpression << ch2
38
+ #skip += 1
39
+ end
40
+ #skip += 1
41
+ subgraph = re2nfa(subexpression)
42
+ skip = subexpression.length+1 # the +1 is used to skip the closing )
43
+ machines.push([subgraph, nil])
44
+ when '['
45
+ char_class = get_char_class(re[ii..-1]) # search rest of the string for []-expression
46
+ machines.push([cat_machine(/#{char_class}/), nil])
47
+ skip = char_class.length - 1 + char_class.scan(/\\/).length # compensate for 2 '\'s counting as 1
48
+ # The below skip assignment works if we want to allow for odd numbers of slashes, but it's
49
+ # not desirable, because it would allow [\n] to be [n].
50
+ # We're reserving \ for escaping *, +, ?, etc. symbols.
51
+ #skip = char_class.length - 1 +
52
+ # char_class.scan(/\\/).length*2 -
53
+ # char_class.scan(/\\\\/).length # compensate for 2 '\'s counting as 1
54
+ when '\\' #; escaped = true unless escaped== true
55
+ if escaped # '\\' -> cat a slash
56
+ machines.push([cat_machine(ch), nil])
57
+ escaped = false
58
+ else
59
+ escaped = true
60
+ end
61
+ else
62
+ machines.push([cat_machine(ch), nil])
63
+ end
64
+ end
65
+ machines
66
+ end
67
+
68
+ def get_char_class(str)
69
+ escaped = false
70
+ result = ''
71
+
72
+ str.each_char.with_index do |ch,ii|
73
+ if escaped == false && ch == ']' # done reading current class
74
+ result += ch
75
+ return result
76
+ elsif escaped == true
77
+ result = result[0..-2]+ch
78
+ else
79
+ result += ch
80
+ end
81
+ escaped = (ch == '\\' && escaped==false)
82
+ end
83
+ raise 'character class improperly closed!'
84
+ end
85
+
86
+ def kleene_up(machines)
87
+ new_machines = Array.new
88
+ machines.each_with_index do |mach,ii|
89
+ if mach[1].nil? || mach[1].empty? # This machine is complete.
90
+ new_machines.push([mach[0],nil])
91
+ else
92
+ if mach[1].length == 2 # Deals with *, ?, and +, who all have same precedence
93
+ src, dest = mach[1].shift, mach[1].shift
94
+ #m = mach[0].lambda_replace_edge(src,PENDING,dest,new_machines.pop) # LAMBDA VERSION
95
+ m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION
96
+ new_machines.push([m,nil])
97
+ else # dealing with |
98
+ new_machines.push([mach[0],mach[1]])
99
+ end
100
+ end
101
+ end
102
+ new_machines
103
+ end
104
+
105
+ def catify(machines)
106
+ new_machines = Array.new
107
+ machines.each_with_index do |mach,ii|
108
+ if ii == 0
109
+ new_machines.push([mach[0],nil])
110
+ elsif (mach[1].nil? && machines[ii-1][1].nil?)
111
+ # This machine AND PREVIOUS are each a cat or finished */?/+
112
+ # This code is suspiciously similar to the wrap-up code of re2nfa()
113
+ # which implies that it's not DRY. This is something to revisit.
114
+ lead = new_machines.pop[0]
115
+ offset = lead.get_node_count-1
116
+ acc = lead.accept_states.keys.first || 0
117
+ lead.imp_attach_graph(acc,mach[0])
118
+ lead.accept_states.delete_if do |acc_st|
119
+ !mach[0].accept_states.keys.include?(acc_st-offset)
120
+ end
121
+ new_machines.push([lead,nil])
122
+ else
123
+ new_machines.push([mach[0],mach[1]])
124
+ end
125
+ end
126
+ new_machines
127
+ end
128
+
129
+ def handle_alternation(machines)
130
+ machines = absorb_left_alt(machines)
131
+ machines = absorb_right_alt(machines)
132
+ end
133
+
134
+ def absorb_left_alt(machines)
135
+ new_machines = Array.new
136
+ machines.each_with_index do |mach,ii|
137
+ if mach[1].nil? || mach[1].empty? # This machine is complete.
138
+ new_machines.push([mach[0],nil])
139
+ else
140
+ src, dest = mach[1].shift, mach[1].shift
141
+ m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION
142
+ new_machines.push([m,mach[1]])
143
+ end
144
+ end
145
+ new_machines
146
+ end
147
+
148
+ def absorb_right_alt(machines)
149
+ absorb_left_alt(machines.reverse).reverse
150
+ end
151
+
152
+ # This is a Thompson construction of a regular expression to a NFA.
153
+ # The machine stack is a series of 2-tuples. The first element of which
154
+ # is a small NFA, the second of which is a listing of the edges it needs
155
+ # to fill in by cannibalizing an adjacent NFA.
156
+
157
+ # mptr = machines.length - 1 # machine index pointer
158
+ # m = machines[mptr]
159
+ # * eats below IF below complete
160
+ # | eats above and below if they're complete
161
+
162
+ # make one pass forwards, completing all kleene stars and all alt LHSs
163
+ # make one pass backwards, completing all alt RHSs
164
+ # if any unfulfilled dependencies remain, my assumptions were mistaken
165
+ def re2nfa(re)
166
+ #puts "re2nfa: #{re}"
167
+ fsconstruct = FiniteStateMachine.new({:accept_states => {0=>'eh'},
168
+ :graph_hash => {0=>{PENDING=>[0]}}})
169
+ machines = build_machine_stack(re)
170
+ machines = kleene_up(machines)
171
+ machines = catify(machines)
172
+ machines = handle_alternation(machines)
173
+
174
+ #puts "New machines:"
175
+ machines.each_with_index do |mach,ii|
176
+ m = mach[0]
177
+ offset = fsconstruct.get_node_count-1
178
+ acc = fsconstruct.accept_states.keys.first || 0 # Attachment point is accept state
179
+ fsconstruct.imp_attach_graph(acc, m)
180
+ fsconstruct.accept_states.delete_if do |acc_st|
181
+ #puts "purging acc #{acc}" if !m.accept_states.keys.include?(acc-offset)
182
+ !m.accept_states.keys.include?(acc_st-offset)
183
+ end
184
+ end
185
+
186
+ fsconstruct.delete_edge(0,PENDING,0)
187
+ #@graph_hash = fsconstruct.graph_hash
188
+ #@accept_states = fsconstruct.accept_states
189
+ FiniteStateMachine.new({
190
+ :graph_hash => fsconstruct.graph_hash,
191
+ :accept_states => fsconstruct.accept_states
192
+ })
193
+ end
194
+
195
+ def set_new_accept(node_number, type='end')
196
+ @accept_states = {node_number => 'end'}
197
+ end
198
+
199
+ def prepend_graph(fsm)
200
+ fsm.imp_attach_graph(fsm.accept_states.keys[0],self)
201
+ copy(fsm)
202
+ end
203
+
204
+ def cat_machine(ch)
205
+ FiniteStateMachine.new({
206
+ :accept_states => {1=>'end'},
207
+ :graph_hash => {0 => {ch => [1]}}
208
+ })
209
+ end
210
+
211
+ def question_machine
212
+ FiniteStateMachine.new({
213
+ :accept_states => {1=>'end'},
214
+ :graph_hash => {0 => {PENDING => [1], LAMBDA => [1]}}
215
+ #:accept_states => {3=>'end'},
216
+ #:graph_hash => {
217
+ # 0 => {LAMBDA => [1,3]},
218
+ # 1 => {PENDING => [2]},
219
+ # 2 => {LAMBDA => [3]}
220
+ #}
221
+ })
222
+ end
223
+
224
+ def alt_machine
225
+ FiniteStateMachine.new({
226
+ :accept_states => {5=>'end'},
227
+ :graph_hash => {
228
+ 0 => {LAMBDA => [1,3]},
229
+ 1 => {PENDING => [2]},
230
+ 2 => {LAMBDA => [5]},
231
+ 3 => {PENDING => [4]},
232
+ 4 => {LAMBDA => [5]}
233
+ }
234
+ })
235
+ end
236
+
237
+ def kleene_machine
238
+ FiniteStateMachine.new({
239
+ :accept_states => {3=>'end'},
240
+ :graph_hash => {
241
+ 0 => {LAMBDA => [1,3]},
242
+ 1 => {PENDING => [2]},
243
+ 2 => {LAMBDA => [1,3]}
244
+ }
245
+ })
246
+ end
247
+
248
+ def plus_machine
249
+ FiniteStateMachine.new({
250
+ :accept_states => {3=>'end'},
251
+ :graph_hash => {
252
+ 0 => {LAMBDA => [1]},
253
+ 1 => {PENDING => [2]},
254
+ 2 => {LAMBDA => [1,3]}
255
+ }
256
+ # The below machine would be more concise, but we'd need to add in logic to replace TWO EDGES with one absorb.
257
+ #:accept_states => {1=>'end'},
258
+ #:graph_hash => {
259
+ # 0 => {PENDING => [1]},
260
+ # 1 => {PENDING => [1]}
261
+ #}
262
+ })
263
+ end
264
+
265
+ #############################################################################
266
+ # Misc functions primarily supporting re2nfa
267
+ #############################################################################
268
+ # graph edges going from origin become outgoing from src
269
+ # graph edges going TO final state instead go TO dest
270
+ # What is "final state?" Any accept state?
271
+ #@graph_hash[src][label].delete(dest)
272
+ def replace_edge(src, label, dest, graph)
273
+ raise "can't inject a graph that had no accept states" if graph.accept_states.nil? || graph.accept_states.empty?
274
+ if @graph_hash[src][label].class == Fixnum
275
+ @graph_hash[src][label] = [@graph_hash[src][label]]
276
+ end
277
+
278
+ offset = get_node_count-1
279
+ imp_attach_graph(src, graph)
280
+
281
+ #draw_graph('intermediate-self')
282
+ #graph.draw_graph('intermediate-graft')
283
+
284
+ # for each of the edges pointing at the accept state of the graph
285
+ # redirect them to point at dest
286
+ #draw_graph('retarget-pre')
287
+ graph.accept_states.keys.each do |acc|
288
+ retarget_edges(acc+offset,dest)
289
+ accept_states.delete(acc+offset)
290
+ end
291
+ delete_edge(src,label,dest)
292
+
293
+ renumber!
294
+ #draw_graph('retarget-post')
295
+
296
+ self
297
+ end
298
+
299
+ # ensure no gaps in our node names!
300
+ def renumber!
301
+ get_node_names.each_with_index do |n,ii|
302
+ if n != ii
303
+ retarget_edges(n,ii)
304
+ @accept_states[ii] = @accept_states.delete(n) unless @accept_states[n].nil?
305
+ @graph_hash[ii] = @graph_hash.delete(n)
306
+ end
307
+ end
308
+ self
309
+ end
310
+
311
+ # imp_attach_graph: increments fsm's node numbers by 1-CALLER.node_count
312
+ # takes edges outgoing from fsm.origin and adds them to attach_point
313
+ def imp_attach_graph(attach_point, fsm)
314
+ my_node_count = get_node_count
315
+ graft = fsm.clone
316
+ graft.increment_node_labels(my_node_count-1) # prevent collisions
317
+
318
+ graft_root_edges = graft.graph_hash.delete(graft.origin)
319
+ @graph_hash[attach_point] ||= Hash.new
320
+ @graph_hash[attach_point].merge!(graft_root_edges)
321
+
322
+ @accept_states.merge!(graft.accept_states)
323
+ @graph_hash.merge!(graft.graph_hash)
324
+ get_node_count
325
+ end
326
+
327
+ def retarget_edges(old_dest, new_dest)
328
+ @graph_hash.each_pair do |node,edge_hash|
329
+ edge_hash.each_pair do |label, dest|
330
+ if dest.include? old_dest
331
+ #puts "#{node}[#{label}] changed from #{dest} to #{new_dest}"
332
+ add_edge( node, label, new_dest)
333
+ delete_edge(node, label, old_dest)
334
+ end
335
+ end
336
+ end
337
+ self
338
+ end
339
+
340
+ def lambda_replace_edge(src, label, dest, graph)
341
+ if @graph_hash[src][label].class == Fixnum
342
+ @graph_hash[src][label] = [@graph_hash[src][label]]
343
+ end
344
+ #@graph_hash[src][label].delete(dest)
345
+ lambda_inject_graph(graph,src,dest)
346
+ delete_edge(src,label,dest)
347
+ self
348
+ end
349
+
350
+ def lambda_inject_graph(graph, src, dest)
351
+ old_node_count = get_node_count
352
+ lambda_attach_graph(src, graph)
353
+ graph.accept_states.keys.each {|k| add_edge(k+old_node_count, LAMBDA, dest)}
354
+ graph.accept_states.keys.each {|k| @accept_states.delete(k+old_node_count)}
355
+ self
356
+ end
357
+
358
+ def clone
359
+ Marshal.load( Marshal.dump(self) )
360
+ end
361
+ =begin
362
+ def add_edge(src, label, dest)
363
+ @graph_hash[src] = Hash.new if @graph_hash[src].nil?
364
+ if @graph_hash[src][label].nil?
365
+ @graph_hash[src][label] = [dest]
366
+ else
367
+ if @graph_hash[src][label].class != Array
368
+ @graph_hash[src][label] = [@graph_hash[src][label]]
369
+ end
370
+ @graph_hash[src][label] << dest if !@graph_hash[src][label].include?(dest)
371
+ end
372
+ self
373
+ end
374
+
375
+ # Fail silently on deleting stuff that doesn't exist.
376
+ def delete_edge(src, label, dest)
377
+ return self if @graph_hash[src].nil?
378
+ return self if @graph_hash[src][label].nil?
379
+ @graph_hash[src][label].reject! {|node| node==dest}
380
+ if @graph_hash[src][label].empty?
381
+ @graph_hash[src].delete(label)
382
+ end
383
+ # !!! may need to add something to delete orphaned nodes, here
384
+ self
385
+ end
386
+ =end
387
+ end
@@ -0,0 +1,3 @@
1
+ module ScannerGenerator
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require "scanner_generator/version"
2
+
3
+ module ScannerGenerator
4
+ require 'scanner_generator/finite_state_machine'
5
+ end
data/push.sh ADDED
@@ -0,0 +1 @@
1
+ git push git@github.com:hackingoff/scanner-generator
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "scanner_generator/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "scanner_generator"
7
+ s.version = ScannerGenerator::VERSION
8
+ s.authors = ["Hacking Off"]
9
+ s.email = ["source@hackingoff.com"]
10
+ s.homepage = "https://github.com/hackingoff/context-free-grammar"
11
+ s.summary = %q{Parser generation and CFG analysis.}
12
+ s.description = %q{Part of the compiler construction toolkit's guts.}
13
+
14
+ s.rubyforge_project = "scanner_generator"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency "rspec", "~> 2.6"
22
+ s.add_development_dependency "awesome_print"
23
+
24
+ s.add_dependency "ruby-graphviz" # graph visualizations
25
+
26
+ # specify any dependencies here; for example:
27
+ # s.add_development_dependency "rspec"
28
+ # s.add_runtime_dependency "rest-client"
29
+ end
@@ -0,0 +1,166 @@
1
+ require 'scanner_generator'
2
+
3
+ describe ScannerGenerator::FiniteStateMachine do
4
+ it "generates graphs" do
5
+ # obj.should eql(val)
6
+ end
7
+
8
+ it "has no epsilons/lambdas in DFAs" do
9
+ end
10
+
11
+ it "replaces edges successfully" do
12
+
13
+ end
14
+
15
+ it "handles edge cases" do
16
+ end
17
+ end
18
+
19
+ # Radar's example test from Foodie:
20
+ #it "anything else is delicious" do
21
+ #Foodie::Food.portray("Not Broccoli").should eql("Delicious!")
22
+ #end
23
+
24
+ =begin
25
+ # The below tests verify Thompson Construction (conversion from regular
26
+ # expressions to NFAs). The tests made output for human eyes.
27
+
28
+ # TODO: Verify the tests are all still satisfied correctly, then hard-code
29
+ # graphs and tables satisfying ".should eql()" invocation via RSpec.
30
+
31
+ # TODO: Track down the other tests.
32
+
33
+ # Non-RSpec test code follows.
34
+ #!/usr/bin/ruby
35
+ require '../../../cct/app/models/finite_state_machine.rb'
36
+ require "awesome_print"
37
+
38
+ def replace_edge_test
39
+ fsa = FiniteStateMachine.new({
40
+ :accept_states => {2=>'accm2', 3=>'accm3'},
41
+ :graph_hash => {
42
+ 0 => {"LAMBDA"=>[1]},
43
+ 1 => {"M2" => 2,
44
+ "M3" => 3},
45
+ 2 => {"a" => 4 },
46
+ 4 => {"b" => 3}
47
+ }
48
+ })
49
+ fsa2 = fsa.clone
50
+ alter = FiniteStateMachine.new({
51
+ :accept_states => {3=>'alt_end'},
52
+ :graph_hash => {
53
+ 0 => {"LAMBDA" => [1,4]},
54
+ 1 => {"M1" => 2},
55
+ 2 => {"LAMBDA" => 3},
56
+ 4 => {"M2" => 5},
57
+ 5 => {"LAMBDA" => 3}
58
+ }
59
+ })
60
+ alt = FiniteStateMachine.new({
61
+ :accept_states => {2 => 'end'},
62
+ :graph_hash => {
63
+ 0 => {'a' => [1]},
64
+ 1 => {'b' => [2]}
65
+ }
66
+ })
67
+ fsa.draw_graph("before")
68
+ fsa.lambda_replace_edge(1,"M3",3, alt.clone)
69
+ fsa.draw_graph("lambda-after")
70
+ fsa2.replace_edge(1,"M3",3, alt.clone)
71
+ fsa2.draw_graph("after")
72
+
73
+ #malt = alt.clone
74
+ #malt.draw_graph('alt-before-imp-attach')
75
+ #malt.imp_attach_graph(2,alt)
76
+ #malt.imp_attach_graph(2,alter)
77
+ #malt.imp_attach_graph(7,alt)
78
+ #malt.draw_graph('alt-after-imp-attach')
79
+ end
80
+
81
+ def prepend_test
82
+ fsa = FiniteStateMachine.new({
83
+ :accept_states => {1=>'accept'},
84
+ :graph_hash => {0=>{"fuck"=>1}}
85
+ })
86
+ fsa.prepend_graph(fsa.kleene_machine)
87
+ fsa.prepend_graph(fsa.cat_machine('LAMBDA'))
88
+ fsa.draw_graph('prepend-test')
89
+ end
90
+
91
+
92
+
93
+ # Interesting notes:
94
+ # (a|b|ab)* can read aab with more than one parse tree.
95
+
96
+ def re2nfa_test
97
+ fsa = FiniteStateMachine.new({
98
+ :accept_states => {1=>'accept'},
99
+ :graph_hash => {0=>{"LAMBDA"=>1}}}
100
+ )
101
+ example = FiniteStateMachine.new({
102
+ :accept_states => {8=>"end"},
103
+ :graph_hash => {
104
+ 0 => {"LAMBDA" => [1,3]},
105
+ 1 => {"LAMBDA" => [4,6]},
106
+ 2 => {"LAMBDA" => [1,3]},
107
+ 3 => {"c" => 8},
108
+ 4 => {"a" => 5},
109
+ 5 => {"LAMBDA" => 2},
110
+ 6 => {"b" => 7},
111
+ 7 => {"LAMBDA" => 2}
112
+ },
113
+ :origin => 0
114
+ })
115
+
116
+ #s = '(a|b)*c|de*|f'
117
+ s = 'a|b(d*|(e|f)*)'
118
+ #s = 'a((b))'
119
+ # the following strings breaks it, in some way
120
+ # 'a*|b|cde**|k' causes lots of duplication
121
+ # see examples/2012-03-01_22-47-54_-0800-nfa.png
122
+ # shows a* OR b OR replace_me, followed by
123
+ # b | c
124
+ # followed by c
125
+ # '(a|b)*'
126
+ #s = '(a|b)*c'
127
+ puts "rendering regex: #{s}"
128
+ fsa = fsa.re2nfa(s)
129
+ fsa.draw_graph("draw_re2nfa")
130
+ puts 'bad nfa'
131
+ ap fsa
132
+ fsa.subsetify.draw_graph("draw_re2dfa")
133
+ example.draw_graph("ex_nfa")
134
+ puts 'good nfa'
135
+ ap example
136
+ example.subsetify.draw_graph("ex_dfa")
137
+ fsa.graph_hash.each_pair do |k,v|
138
+ if v != example.graph_hash[k]
139
+ puts "#{v} != #{example.graph_hash[k]}"
140
+ else
141
+ puts "same"
142
+ end
143
+ end
144
+ end
145
+
146
+ def re2nfa_ends_in_or_test
147
+ fsa = FiniteStateMachine.new({
148
+ :graph_hash=>{0 => {"1" => 1}},
149
+ :accept_states => {}
150
+ })
151
+ fsa = fsa.re2nfa("1|0")
152
+ fsa.add_edge(5,LAMBDA,6)
153
+ fsa.set_new_accept(5,nil)
154
+ fsa.set_new_accept(6,"ok")
155
+ fsa.draw_graph("end_test_nfa")
156
+ fsa.subsetify!
157
+ fsa.draw_graph("end_test_dfa")
158
+ end
159
+
160
+ re2nfa_test
161
+ #injection_test
162
+ #replace_edge_test
163
+ #prepend_test
164
+ #re2nfa_ends_in_or_test
165
+
166
+ =end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scanner_generator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Hacking Off
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &11360640 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2.6'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *11360640
25
+ - !ruby/object:Gem::Dependency
26
+ name: awesome_print
27
+ requirement: &11360100 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *11360100
36
+ - !ruby/object:Gem::Dependency
37
+ name: ruby-graphviz
38
+ requirement: &11359540 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *11359540
47
+ description: Part of the compiler construction toolkit's guts.
48
+ email:
49
+ - source@hackingoff.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - README
57
+ - Rakefile
58
+ - lib/scanner_generator.rb
59
+ - lib/scanner_generator/finite_state_machine.rb
60
+ - lib/scanner_generator/thompson_construction.rb
61
+ - lib/scanner_generator/version.rb
62
+ - push.sh
63
+ - scanner_generator.gemspec
64
+ - spec/scanner_generator_spec.rb
65
+ homepage: https://github.com/hackingoff/context-free-grammar
66
+ licenses: []
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project: scanner_generator
85
+ rubygems_version: 1.8.15
86
+ signing_key:
87
+ specification_version: 3
88
+ summary: Parser generation and CFG analysis.
89
+ test_files: []