asmodis-rlsm 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rlsm.rb ADDED
@@ -0,0 +1,22 @@
1
+ # Copyright 2008 Gunther Diemant
2
+ #
3
+ # This file is part of the RLSM module.
4
+ #
5
+ # Foobar is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # RLSM is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with RLSM. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+
19
+ require File.join(File.dirname(__FILE__), 'monoid')
20
+ require File.join(File.dirname(__FILE__), 'mgen')
21
+ require File.join(File.dirname(__FILE__), 'dfa')
22
+ require File.join(File.dirname(__FILE__), 'rlsm_regexp')
@@ -0,0 +1,584 @@
1
+ # Copyright 2008 Gunther Diemant
2
+ #
3
+ # This file is part of the RLSM module.
4
+ #
5
+ # Foobar is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # RLSM is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with RLSM. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+
19
+ require File.join(File.dirname(__FILE__), 'monkey_patching')
20
+
21
+ module RLSM
22
+ class RegExp
23
+ #Creates a new RegExp from a string description. Metacharacters are
24
+ # & * | ( )
25
+ #Here & is the empty word and an empty string represents the empty set.
26
+ def initialize(str = "")
27
+ #Is the argument a well formed RegExp?
28
+ _well_formed?(str)
29
+
30
+ #More than one & or * in a row is useless
31
+ re = str.squeeze('&*')
32
+
33
+ #* on a & is &
34
+ re = re.gsub('&*', '&')
35
+
36
+ @re = NodeFactory.new_node(nil, re)
37
+ end
38
+
39
+ #--
40
+ #Operations of a regexp
41
+ #++
42
+
43
+ #Kleene star of the regexp. Alters the regexp in place
44
+ def star!
45
+ #For empty set and empty word a star changes nothing.
46
+ #A double star is also useless
47
+ return if empty? or lambda? or (@re.class == Star)
48
+ str = '(' + to_s + ')*'
49
+ @re = NodeFactory.new_node(nil, str)
50
+
51
+ #Unset the str rep
52
+ @re_str = nil
53
+
54
+ self
55
+ end
56
+
57
+ #Returns the kleene star of this regexp. Leaves the regexp untouched.
58
+ def star
59
+ self.deep_copy.star!
60
+ end
61
+
62
+ #Returns the concatenation of two regexps
63
+ def *(other)
64
+ return RegExp.new if empty? or other.empty?
65
+ RegExp.new('(' + to_s + ')(' + other.to_s + ')')
66
+ end
67
+
68
+ #Returns the union of two regexps
69
+ def +(other)
70
+ return self.deep_copy if other.empty?
71
+ return other.deep_copy if empty?
72
+ RegExp.new('(' + to_s + ')|(' + other.to_s + ')')
73
+ end
74
+
75
+ #--
76
+ #Some small flags
77
+ #++
78
+ #Returns true if this regexp is the empty word.
79
+ def lambda?
80
+ @re.lambda?
81
+ end
82
+
83
+ #Returns true if this regexp is the empty set.
84
+ def empty?
85
+ @re.empty?
86
+ end
87
+
88
+ #Returns true if the empty word matches this regexp
89
+ def null?
90
+ @re.null?
91
+ end
92
+
93
+ #--
94
+ #Some properties of a regexp
95
+ #++
96
+
97
+ #Returns an array of beginning symbols of the regexp.
98
+ def first
99
+ @re.first
100
+ end
101
+
102
+ #Returns an array of end symbols of the regexp.
103
+ def last
104
+ @re.last
105
+ end
106
+
107
+ #Returns an array of all possible two letter substrings of words matched by the regexp.
108
+ def follow
109
+ @re.follow.uniq
110
+ end
111
+
112
+ #--
113
+ #Conversion methods
114
+ #++
115
+ #Returns a string representation of the regexp
116
+ def to_s
117
+ @re_str ||= @re.to_s
118
+ end
119
+
120
+ #Returns a minimal DFA which accepts the same language as the regexp.
121
+ def to_dfa
122
+ #Step 1: Substitute every char such that every character is unique
123
+ #Add also a beginning marker
124
+
125
+ orig = []
126
+ rre = [0]
127
+ to_s.each_char do |c|
128
+ if ['(', ')', '|', '*', '&'].include? c
129
+ rre << c
130
+ else
131
+ orig << c
132
+ rre << (orig.size)
133
+ end
134
+ end
135
+
136
+ tmp_re = NodeFactory.new_node(nil, rre)
137
+
138
+ #Step 2a: Construct a DFA representation of this new regexp
139
+ #Step 2b: reverse the substitution (yields (maybe) a NFA)
140
+
141
+ alph = orig.uniq
142
+ initial = 0
143
+
144
+ tmp_finals = tmp_re.last
145
+
146
+ tmp_trans = tmp_re.follow.map do |s1,s2|
147
+ [orig[s2-1], s1, s2]
148
+ end
149
+
150
+ #Step 4: Transform the NFA to a DFA
151
+ states = [[0]]
152
+ new_states = [[0]]
153
+ trans = []
154
+ while new_states.size > 0
155
+ tmp = new_states.deep_copy
156
+ new_states = []
157
+
158
+ tmp.each do |new_state|
159
+ alph.each do |char|
160
+ tr_set = tmp_trans.find_all do |c,s1,s2|
161
+ c == char and new_state.include? s1
162
+ end
163
+
164
+ unless tr_set.empty?
165
+ state = tr_set.map { |c,s1,s2| s2 }.sort
166
+
167
+ #Found a new state?
168
+ unless states.include? state
169
+ new_states << state
170
+ states << state
171
+ end
172
+
173
+ tr = [char, states.index(new_state), states.index(state)]
174
+
175
+ #Found new trans?
176
+ trans << tr unless trans.include? tr
177
+ end
178
+ end
179
+ end
180
+ end
181
+
182
+ finals = states.find_all do |state|
183
+ tmp_finals.any? { |tf| state.include? tf }
184
+ end.map { |fi| states.index(fi) }
185
+
186
+ states = (0...states.size).to_a
187
+
188
+ #Step 5: Return the result
189
+ RLSM::DFA.new(alph,states,initial,finals,trans).minimize(:rename_states => true)
190
+ end
191
+
192
+ def inspect # :nodoc:
193
+ "<#{self.class} : '#{to_s}' >"
194
+ end
195
+
196
+ #Returns true if the two regexps are the same, i.e. the dfas are isomorphic.
197
+ def ==(other)
198
+ to_dfa == other.to_dfa
199
+ end
200
+
201
+
202
+ private
203
+ def _well_formed?(str)
204
+ #parantheses must be balanced, somthing like |) or *a or (| isn't allowed
205
+ #1 balanced parenthesis
206
+ state = 0
207
+ count = Hash.new(0)
208
+ count['('] = 1
209
+ count[')'] = -1
210
+ str.each_char do |c|
211
+ state += count[c]
212
+ end
213
+
214
+ if state != 0
215
+ raise Exception, "Unbalanced parenthesis in #{str}"
216
+ end
217
+
218
+ #2 bad sequenzes
219
+ if str =~ /\(\)|\|\)|\(\||\|\*|^\*|\(\*/
220
+ raise Exception, "Bad character sequence #{$&} found in #{str}"
221
+ end
222
+ end
223
+
224
+ class PrimExp
225
+ def initialize(parent, str)
226
+ @parent = parent
227
+ if str == '&' or str == ['&']
228
+ @content = '&'
229
+ @null = true
230
+ else
231
+ @content = str.reject { |c| c == '&' }
232
+ @null = false
233
+ end
234
+ end
235
+
236
+ def null?
237
+ @null
238
+ end
239
+
240
+ def first
241
+ @null ? [] : @content[0,1]
242
+ end
243
+
244
+ def last
245
+ @null ? [] : @content[-1,1]
246
+ end
247
+
248
+ def follow
249
+ res = []
250
+
251
+ (1...@content.length).each do |i|
252
+ res << [@content[i-1,1], @content[i,1]]
253
+ end
254
+
255
+ res
256
+ end
257
+
258
+ def to_s
259
+ @content.to_s
260
+ end
261
+
262
+ def lambda?
263
+ @null
264
+ end
265
+
266
+ def empty?
267
+ @content == '' or @content == []
268
+ end
269
+ end
270
+
271
+ class Star
272
+ def initialize(parent, str)
273
+ @parent = parent
274
+ @child = NodeFactory.new_node(self, str[(0..-2)])
275
+ end
276
+
277
+ def null?
278
+ true
279
+ end
280
+
281
+ def first
282
+ @child.first
283
+ end
284
+
285
+ def last
286
+ @child.last
287
+ end
288
+
289
+ def follow
290
+ res = @child.follow
291
+
292
+ #Cross of last and first
293
+ first.each do |f|
294
+ last.each do |l|
295
+ res << [l,f]
296
+ end
297
+ end
298
+
299
+ res
300
+ end
301
+
302
+ def to_s
303
+ if @child.class == PrimExp and @child.to_s.length == 1
304
+ return "#{@child.to_s}*"
305
+ else
306
+ return "(#{@child.to_s})*"
307
+ end
308
+ end
309
+
310
+ def lambda?
311
+ false
312
+ end
313
+
314
+ def empty?
315
+ false
316
+ end
317
+ end
318
+
319
+ class Union
320
+ def initialize(parent, str)
321
+ @parent = parent
322
+ @childs = _split(str).map do |substr|
323
+ NodeFactory.new_node(self, substr)
324
+ end
325
+ end
326
+
327
+ def null?
328
+ @childs.any? { |child| child.null? }
329
+ end
330
+
331
+ def first
332
+ res = []
333
+ @childs.each do |child|
334
+ child.first.each do |f|
335
+ res << f
336
+ end
337
+ end
338
+
339
+ res
340
+ end
341
+
342
+ def last
343
+ res = []
344
+ @childs.each do |child|
345
+ child.last.each do |l|
346
+ res << l
347
+ end
348
+ end
349
+
350
+ res
351
+ end
352
+
353
+ def follow
354
+ res = []
355
+ @childs.each do |child|
356
+ child.follow.each do |f|
357
+ res << f
358
+ end
359
+ end
360
+
361
+ res
362
+ end
363
+
364
+ def to_s
365
+ if @parent.nil? or @parent.class == Union
366
+ return @childs.map { |child| child.to_s }.join('|')
367
+ else
368
+ return '(' + @childs.map { |child| child.to_s }.join('|') + ')'
369
+ end
370
+ end
371
+
372
+ def lambda?
373
+ false
374
+ end
375
+
376
+ def empty?
377
+ false
378
+ end
379
+
380
+ private
381
+ def _split(str)
382
+ state = 0
383
+ count = Hash.new(0)
384
+ count['('] = 1
385
+ count[')'] = -1
386
+
387
+ res = [[]]
388
+
389
+ str.each_char do |c|
390
+ state += count[c]
391
+ if c == '|' and state == 0
392
+ res << []
393
+ else
394
+ res.last << c
395
+ end
396
+ end
397
+
398
+ res#.map { |substr| substr.join }
399
+ end
400
+ end
401
+
402
+ class Concat
403
+ def initialize(parent, str)
404
+ @parent = parent
405
+ @childs = _split(str).map do |substr|
406
+ NodeFactory.new_node(self, substr)
407
+ end.reject { |child| child.lambda? }
408
+ end
409
+
410
+ def null?
411
+ @childs.all? { |child| child.null? }
412
+ end
413
+
414
+ def first
415
+ res = []
416
+ @childs.each do |child|
417
+ child.first.each do |f|
418
+ res << f
419
+ end
420
+
421
+ break unless child.null?
422
+ end
423
+
424
+ res
425
+ end
426
+
427
+ def last
428
+ res = []
429
+ @childs.reverse.each do |child|
430
+ child.last.each do |f|
431
+ res << f
432
+ end
433
+
434
+ break unless child.null?
435
+ end
436
+
437
+ res
438
+ end
439
+
440
+ def follow
441
+ res = []
442
+
443
+ @childs.each do |child|
444
+ child.follow.each do |f|
445
+ res << f
446
+ end
447
+ end
448
+
449
+ (1...@childs.size).each do |i|
450
+ @childs[i-1].last.each do |l|
451
+ @childs[(i..-1)].each do |ch|
452
+ ch.first.each do |f|
453
+ res << [l,f]
454
+ end
455
+
456
+ break unless ch.null?
457
+ end
458
+ end
459
+ end
460
+
461
+ res
462
+ end
463
+
464
+ def to_s
465
+ @childs.map { |child| child.to_s }.join
466
+ end
467
+
468
+ def lambda?
469
+ false
470
+ end
471
+
472
+ def empty?
473
+ false
474
+ end
475
+
476
+ private
477
+ def _split(str)
478
+ state = 0
479
+ count = Hash.new(0)
480
+ count['('] = 1
481
+ count[')'] = -1
482
+
483
+ res = [[]]
484
+ previous = nil
485
+ str.each_char do |c|
486
+ state += count[c]
487
+
488
+ if state == 1 and c == '('
489
+ res << []
490
+ res.last << c
491
+ elsif state == 0 and c == '*'
492
+ if previous == ')'
493
+ res[-2] << c
494
+ else
495
+ res << [res.last.pop, c]
496
+ res << []
497
+ end
498
+ elsif state == 0 and c == ')'
499
+ res.last << c
500
+ res << []
501
+ else
502
+ res.last << c
503
+ end
504
+
505
+ previous = c
506
+ end
507
+
508
+ res.select { |subarr| subarr.size > 0 }#.map { |substr| substr.join }
509
+ end
510
+ end
511
+
512
+ class NodeFactory
513
+ def self.new_node(parent, arg)
514
+
515
+ #Remove parentheses
516
+ str = arg.dup
517
+ while sp(str)
518
+ str = str[(1..-2)]
519
+ end
520
+ #puts "Processing: #{arg} from #{parent.class}"
521
+ #Choose the right node type
522
+ if prim?(str)
523
+ return PrimExp.new(parent, str)
524
+ elsif star?(str)
525
+ return Star.new(parent, str)
526
+ elsif union?(str)
527
+ return Union.new(parent, str)
528
+ else
529
+ return Concat.new(parent, str)
530
+ end
531
+
532
+ end
533
+
534
+ private
535
+ def self.sp(str)
536
+ if str[0,1].include? '(' and str[-1,1].include? ')'
537
+ state = 0
538
+ l = 0
539
+ count = Hash.new(0)
540
+ count['('] = 1
541
+ count[')'] = -1
542
+
543
+ str.each_char do |c|
544
+ state += count[c]
545
+ l += 1
546
+ break if state == 0
547
+ end
548
+
549
+ return true if str.length == l
550
+ end
551
+
552
+ false
553
+ end
554
+
555
+ def self.prim?(str)
556
+ not ['(', ')', '|', '*'].any? { |c| str.include? c }
557
+ end
558
+
559
+ def self.star?(str)
560
+ if str[-1,1].include? '*'
561
+ return true if sp(str[(0..-2)]) #something like (....)*
562
+ return true if str.length == 2 #something like a*
563
+ end
564
+
565
+ false
566
+ end
567
+
568
+ def self.union?(str)
569
+ state = 0
570
+ count = Hash.new(0)
571
+ count['('] = 1
572
+ count[')'] = -1
573
+
574
+ str.each_char do |c|
575
+ state += count[c]
576
+
577
+ return true if c == '|' and state == 0
578
+ end
579
+
580
+ false
581
+ end
582
+ end
583
+ end
584
+ end