asmodis-rlsm 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rlsm.rb ADDED
@@ -0,0 +1,22 @@
1
+ # Copyright 2008 Gunther Diemant
2
+ #
3
+ # This file is part of the RLSM module.
4
+ #
5
+ # Foobar is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # RLSM is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with RLSM. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+
19
+ require File.join(File.dirname(__FILE__), 'monoid')
20
+ require File.join(File.dirname(__FILE__), 'mgen')
21
+ require File.join(File.dirname(__FILE__), 'dfa')
22
+ require File.join(File.dirname(__FILE__), 'rlsm_regexp')
@@ -0,0 +1,584 @@
1
+ # Copyright 2008 Gunther Diemant
2
+ #
3
+ # This file is part of the RLSM module.
4
+ #
5
+ # Foobar is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # RLSM is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with RLSM. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+
19
+ require File.join(File.dirname(__FILE__), 'monkey_patching')
20
+
21
+ module RLSM
22
+ class RegExp
23
+ #Creates a new RegExp from a string description. Metacharacters are
24
+ # & * | ( )
25
+ #Here & is the empty word and an empty string represents the empty set.
26
+ def initialize(str = "")
27
+ #Is the argument a well formed RegExp?
28
+ _well_formed?(str)
29
+
30
+ #More than one & or * in a row is useless
31
+ re = str.squeeze('&*')
32
+
33
+ #* on a & is &
34
+ re = re.gsub('&*', '&')
35
+
36
+ @re = NodeFactory.new_node(nil, re)
37
+ end
38
+
39
+ #--
40
+ #Operations of a regexp
41
+ #++
42
+
43
+ #Kleene star of the regexp. Alters the regexp in place
44
+ def star!
45
+ #For empty set and empty word a star changes nothing.
46
+ #A double star is also useless
47
+ return if empty? or lambda? or (@re.class == Star)
48
+ str = '(' + to_s + ')*'
49
+ @re = NodeFactory.new_node(nil, str)
50
+
51
+ #Unset the str rep
52
+ @re_str = nil
53
+
54
+ self
55
+ end
56
+
57
+ #Returns the kleene star of this regexp. Leaves the regexp untouched.
58
+ def star
59
+ self.deep_copy.star!
60
+ end
61
+
62
+ #Returns the concatenation of two regexps
63
+ def *(other)
64
+ return RegExp.new if empty? or other.empty?
65
+ RegExp.new('(' + to_s + ')(' + other.to_s + ')')
66
+ end
67
+
68
+ #Returns the union of two regexps
69
+ def +(other)
70
+ return self.deep_copy if other.empty?
71
+ return other.deep_copy if empty?
72
+ RegExp.new('(' + to_s + ')|(' + other.to_s + ')')
73
+ end
74
+
75
+ #--
76
+ #Some small flags
77
+ #++
78
+ #Returns true if this regexp is the empty word.
79
+ def lambda?
80
+ @re.lambda?
81
+ end
82
+
83
+ #Returns true if this regexp is the empty set.
84
+ def empty?
85
+ @re.empty?
86
+ end
87
+
88
+ #Returns true if the empty word matches this regexp
89
+ def null?
90
+ @re.null?
91
+ end
92
+
93
+ #--
94
+ #Some properties of a regexp
95
+ #++
96
+
97
+ #Returns an array of beginning symbols of the regexp.
98
+ def first
99
+ @re.first
100
+ end
101
+
102
+ #Returns an array of end symbols of the regexp.
103
+ def last
104
+ @re.last
105
+ end
106
+
107
+ #Returns an array of all possible two letter substrings of words matched by the regexp.
108
+ def follow
109
+ @re.follow.uniq
110
+ end
111
+
112
+ #--
113
+ #Conversion methods
114
+ #++
115
+ #Returns a string representation of the regexp
116
+ def to_s
117
+ @re_str ||= @re.to_s
118
+ end
119
+
120
+ #Returns a minimal DFA which accepts the same language as the regexp.
121
+ def to_dfa
122
+ #Step 1: Substitute every char such that every character is unique
123
+ #Add also a beginning marker
124
+
125
+ orig = []
126
+ rre = [0]
127
+ to_s.each_char do |c|
128
+ if ['(', ')', '|', '*', '&'].include? c
129
+ rre << c
130
+ else
131
+ orig << c
132
+ rre << (orig.size)
133
+ end
134
+ end
135
+
136
+ tmp_re = NodeFactory.new_node(nil, rre)
137
+
138
+ #Step 2a: Construct a DFA representation of this new regexp
139
+ #Step 2b: reverse the substitution (yields (maybe) a NFA)
140
+
141
+ alph = orig.uniq
142
+ initial = 0
143
+
144
+ tmp_finals = tmp_re.last
145
+
146
+ tmp_trans = tmp_re.follow.map do |s1,s2|
147
+ [orig[s2-1], s1, s2]
148
+ end
149
+
150
+ #Step 4: Transform the NFA to a DFA
151
+ states = [[0]]
152
+ new_states = [[0]]
153
+ trans = []
154
+ while new_states.size > 0
155
+ tmp = new_states.deep_copy
156
+ new_states = []
157
+
158
+ tmp.each do |new_state|
159
+ alph.each do |char|
160
+ tr_set = tmp_trans.find_all do |c,s1,s2|
161
+ c == char and new_state.include? s1
162
+ end
163
+
164
+ unless tr_set.empty?
165
+ state = tr_set.map { |c,s1,s2| s2 }.sort
166
+
167
+ #Found a new state?
168
+ unless states.include? state
169
+ new_states << state
170
+ states << state
171
+ end
172
+
173
+ tr = [char, states.index(new_state), states.index(state)]
174
+
175
+ #Found new trans?
176
+ trans << tr unless trans.include? tr
177
+ end
178
+ end
179
+ end
180
+ end
181
+
182
+ finals = states.find_all do |state|
183
+ tmp_finals.any? { |tf| state.include? tf }
184
+ end.map { |fi| states.index(fi) }
185
+
186
+ states = (0...states.size).to_a
187
+
188
+ #Step 5: Return the result
189
+ RLSM::DFA.new(alph,states,initial,finals,trans).minimize(:rename_states => true)
190
+ end
191
+
192
+ def inspect # :nodoc:
193
+ "<#{self.class} : '#{to_s}' >"
194
+ end
195
+
196
+ #Returns true if the two regexps are the same, i.e. the dfas are isomorphic.
197
+ def ==(other)
198
+ to_dfa == other.to_dfa
199
+ end
200
+
201
+
202
+ private
203
+ def _well_formed?(str)
204
+ #parantheses must be balanced, somthing like |) or *a or (| isn't allowed
205
+ #1 balanced parenthesis
206
+ state = 0
207
+ count = Hash.new(0)
208
+ count['('] = 1
209
+ count[')'] = -1
210
+ str.each_char do |c|
211
+ state += count[c]
212
+ end
213
+
214
+ if state != 0
215
+ raise Exception, "Unbalanced parenthesis in #{str}"
216
+ end
217
+
218
+ #2 bad sequenzes
219
+ if str =~ /\(\)|\|\)|\(\||\|\*|^\*|\(\*/
220
+ raise Exception, "Bad character sequence #{$&} found in #{str}"
221
+ end
222
+ end
223
+
224
+ class PrimExp
225
+ def initialize(parent, str)
226
+ @parent = parent
227
+ if str == '&' or str == ['&']
228
+ @content = '&'
229
+ @null = true
230
+ else
231
+ @content = str.reject { |c| c == '&' }
232
+ @null = false
233
+ end
234
+ end
235
+
236
+ def null?
237
+ @null
238
+ end
239
+
240
+ def first
241
+ @null ? [] : @content[0,1]
242
+ end
243
+
244
+ def last
245
+ @null ? [] : @content[-1,1]
246
+ end
247
+
248
+ def follow
249
+ res = []
250
+
251
+ (1...@content.length).each do |i|
252
+ res << [@content[i-1,1], @content[i,1]]
253
+ end
254
+
255
+ res
256
+ end
257
+
258
+ def to_s
259
+ @content.to_s
260
+ end
261
+
262
+ def lambda?
263
+ @null
264
+ end
265
+
266
+ def empty?
267
+ @content == '' or @content == []
268
+ end
269
+ end
270
+
271
+ class Star
272
+ def initialize(parent, str)
273
+ @parent = parent
274
+ @child = NodeFactory.new_node(self, str[(0..-2)])
275
+ end
276
+
277
+ def null?
278
+ true
279
+ end
280
+
281
+ def first
282
+ @child.first
283
+ end
284
+
285
+ def last
286
+ @child.last
287
+ end
288
+
289
+ def follow
290
+ res = @child.follow
291
+
292
+ #Cross of last and first
293
+ first.each do |f|
294
+ last.each do |l|
295
+ res << [l,f]
296
+ end
297
+ end
298
+
299
+ res
300
+ end
301
+
302
+ def to_s
303
+ if @child.class == PrimExp and @child.to_s.length == 1
304
+ return "#{@child.to_s}*"
305
+ else
306
+ return "(#{@child.to_s})*"
307
+ end
308
+ end
309
+
310
+ def lambda?
311
+ false
312
+ end
313
+
314
+ def empty?
315
+ false
316
+ end
317
+ end
318
+
319
+ class Union
320
+ def initialize(parent, str)
321
+ @parent = parent
322
+ @childs = _split(str).map do |substr|
323
+ NodeFactory.new_node(self, substr)
324
+ end
325
+ end
326
+
327
+ def null?
328
+ @childs.any? { |child| child.null? }
329
+ end
330
+
331
+ def first
332
+ res = []
333
+ @childs.each do |child|
334
+ child.first.each do |f|
335
+ res << f
336
+ end
337
+ end
338
+
339
+ res
340
+ end
341
+
342
+ def last
343
+ res = []
344
+ @childs.each do |child|
345
+ child.last.each do |l|
346
+ res << l
347
+ end
348
+ end
349
+
350
+ res
351
+ end
352
+
353
+ def follow
354
+ res = []
355
+ @childs.each do |child|
356
+ child.follow.each do |f|
357
+ res << f
358
+ end
359
+ end
360
+
361
+ res
362
+ end
363
+
364
+ def to_s
365
+ if @parent.nil? or @parent.class == Union
366
+ return @childs.map { |child| child.to_s }.join('|')
367
+ else
368
+ return '(' + @childs.map { |child| child.to_s }.join('|') + ')'
369
+ end
370
+ end
371
+
372
+ def lambda?
373
+ false
374
+ end
375
+
376
+ def empty?
377
+ false
378
+ end
379
+
380
+ private
381
+ def _split(str)
382
+ state = 0
383
+ count = Hash.new(0)
384
+ count['('] = 1
385
+ count[')'] = -1
386
+
387
+ res = [[]]
388
+
389
+ str.each_char do |c|
390
+ state += count[c]
391
+ if c == '|' and state == 0
392
+ res << []
393
+ else
394
+ res.last << c
395
+ end
396
+ end
397
+
398
+ res#.map { |substr| substr.join }
399
+ end
400
+ end
401
+
402
+ class Concat
403
+ def initialize(parent, str)
404
+ @parent = parent
405
+ @childs = _split(str).map do |substr|
406
+ NodeFactory.new_node(self, substr)
407
+ end.reject { |child| child.lambda? }
408
+ end
409
+
410
+ def null?
411
+ @childs.all? { |child| child.null? }
412
+ end
413
+
414
+ def first
415
+ res = []
416
+ @childs.each do |child|
417
+ child.first.each do |f|
418
+ res << f
419
+ end
420
+
421
+ break unless child.null?
422
+ end
423
+
424
+ res
425
+ end
426
+
427
+ def last
428
+ res = []
429
+ @childs.reverse.each do |child|
430
+ child.last.each do |f|
431
+ res << f
432
+ end
433
+
434
+ break unless child.null?
435
+ end
436
+
437
+ res
438
+ end
439
+
440
+ def follow
441
+ res = []
442
+
443
+ @childs.each do |child|
444
+ child.follow.each do |f|
445
+ res << f
446
+ end
447
+ end
448
+
449
+ (1...@childs.size).each do |i|
450
+ @childs[i-1].last.each do |l|
451
+ @childs[(i..-1)].each do |ch|
452
+ ch.first.each do |f|
453
+ res << [l,f]
454
+ end
455
+
456
+ break unless ch.null?
457
+ end
458
+ end
459
+ end
460
+
461
+ res
462
+ end
463
+
464
+ def to_s
465
+ @childs.map { |child| child.to_s }.join
466
+ end
467
+
468
+ def lambda?
469
+ false
470
+ end
471
+
472
+ def empty?
473
+ false
474
+ end
475
+
476
+ private
477
+ def _split(str)
478
+ state = 0
479
+ count = Hash.new(0)
480
+ count['('] = 1
481
+ count[')'] = -1
482
+
483
+ res = [[]]
484
+ previous = nil
485
+ str.each_char do |c|
486
+ state += count[c]
487
+
488
+ if state == 1 and c == '('
489
+ res << []
490
+ res.last << c
491
+ elsif state == 0 and c == '*'
492
+ if previous == ')'
493
+ res[-2] << c
494
+ else
495
+ res << [res.last.pop, c]
496
+ res << []
497
+ end
498
+ elsif state == 0 and c == ')'
499
+ res.last << c
500
+ res << []
501
+ else
502
+ res.last << c
503
+ end
504
+
505
+ previous = c
506
+ end
507
+
508
+ res.select { |subarr| subarr.size > 0 }#.map { |substr| substr.join }
509
+ end
510
+ end
511
+
512
+ class NodeFactory
513
+ def self.new_node(parent, arg)
514
+
515
+ #Remove parentheses
516
+ str = arg.dup
517
+ while sp(str)
518
+ str = str[(1..-2)]
519
+ end
520
+ #puts "Processing: #{arg} from #{parent.class}"
521
+ #Choose the right node type
522
+ if prim?(str)
523
+ return PrimExp.new(parent, str)
524
+ elsif star?(str)
525
+ return Star.new(parent, str)
526
+ elsif union?(str)
527
+ return Union.new(parent, str)
528
+ else
529
+ return Concat.new(parent, str)
530
+ end
531
+
532
+ end
533
+
534
+ private
535
+ def self.sp(str)
536
+ if str[0,1].include? '(' and str[-1,1].include? ')'
537
+ state = 0
538
+ l = 0
539
+ count = Hash.new(0)
540
+ count['('] = 1
541
+ count[')'] = -1
542
+
543
+ str.each_char do |c|
544
+ state += count[c]
545
+ l += 1
546
+ break if state == 0
547
+ end
548
+
549
+ return true if str.length == l
550
+ end
551
+
552
+ false
553
+ end
554
+
555
+ def self.prim?(str)
556
+ not ['(', ')', '|', '*'].any? { |c| str.include? c }
557
+ end
558
+
559
+ def self.star?(str)
560
+ if str[-1,1].include? '*'
561
+ return true if sp(str[(0..-2)]) #something like (....)*
562
+ return true if str.length == 2 #something like a*
563
+ end
564
+
565
+ false
566
+ end
567
+
568
+ def self.union?(str)
569
+ state = 0
570
+ count = Hash.new(0)
571
+ count['('] = 1
572
+ count[')'] = -1
573
+
574
+ str.each_char do |c|
575
+ state += count[c]
576
+
577
+ return true if c == '|' and state == 0
578
+ end
579
+
580
+ false
581
+ end
582
+ end
583
+ end
584
+ end