rlsm 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,593 @@
1
+ #
2
+ # This file is part of the RLSM gem.
3
+ #
4
+ #(The MIT License)
5
+ #
6
+ #Copyright (c) 2008 Gunther Diemant <g.diemant@gmx.net>
7
+ #
8
+ #Permission is hereby granted, free of charge, to any person obtaining
9
+ #a copy of this software and associated documentation files (the
10
+ #'Software'), to deal in the Software without restriction, including
11
+ #without limitation the rights to use, copy, modify, merge, publish,
12
+ #distribute, sublicense, and/or sell copies of the Software, and to
13
+ #permit persons to whom the Software is furnished to do so, subject to
14
+ #the following conditions:
15
+ #
16
+ #The above copyright notice and this permission notice shall be
17
+ #included in all copies or substantial portions of the Software.
18
+ #
19
+ #THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
20
+ #EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ #MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
+ #IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23
+ #CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
+ #TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
+ #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
+ #
27
+
28
+ require File.join(File.dirname(__FILE__), 'monkey_patching')
29
+
30
+ module RLSM
31
+ class RegExp
32
+ #Creates a new RegExp from a string description. Metacharacters are
33
+ # & * | ( )
34
+ #Here & is the empty word and an empty string represents the empty set.
35
+ def initialize(str = "")
36
+ #Is the argument a well formed RegExp?
37
+ _well_formed?(str)
38
+
39
+ #More than one & or * in a row is useless
40
+ re = str.squeeze('&*')
41
+
42
+ #* on a & is &
43
+ re = re.gsub('&*', '&')
44
+
45
+ @re = NodeFactory.new_node(nil, re)
46
+ end
47
+
48
+ #--
49
+ #Operations of a regexp
50
+ #++
51
+
52
+ #Kleene star of the regexp. Alters the regexp in place
53
+ def star!
54
+ #For empty set and empty word a star changes nothing.
55
+ #A double star is also useless
56
+ return if empty? or lambda? or (@re.class == Star)
57
+ str = '(' + to_s + ')*'
58
+ @re = NodeFactory.new_node(nil, str)
59
+
60
+ #Unset the str rep
61
+ @re_str = nil
62
+
63
+ self
64
+ end
65
+
66
+ #Returns the kleene star of this regexp. Leaves the regexp untouched.
67
+ def star
68
+ self.deep_copy.star!
69
+ end
70
+
71
+ #Returns the concatenation of two regexps
72
+ def *(other)
73
+ return RegExp.new if empty? or other.empty?
74
+ RegExp.new('(' + to_s + ')(' + other.to_s + ')')
75
+ end
76
+
77
+ #Returns the union of two regexps
78
+ def +(other)
79
+ return self.deep_copy if other.empty?
80
+ return other.deep_copy if empty?
81
+ RegExp.new('(' + to_s + ')|(' + other.to_s + ')')
82
+ end
83
+
84
+ #--
85
+ #Some small flags
86
+ #++
87
+ #Returns true if this regexp is the empty word.
88
+ def lambda?
89
+ @re.lambda?
90
+ end
91
+
92
+ #Returns true if this regexp is the empty set.
93
+ def empty?
94
+ @re.empty?
95
+ end
96
+
97
+ #Returns true if the empty word matches this regexp
98
+ def null?
99
+ @re.null?
100
+ end
101
+
102
+ #--
103
+ #Some properties of a regexp
104
+ #++
105
+
106
+ #Returns an array of beginning symbols of the regexp.
107
+ def first
108
+ @re.first
109
+ end
110
+
111
+ #Returns an array of end symbols of the regexp.
112
+ def last
113
+ @re.last
114
+ end
115
+
116
+ #Returns an array of all possible two letter substrings of words matched by the regexp.
117
+ def follow
118
+ @re.follow.uniq
119
+ end
120
+
121
+ #--
122
+ #Conversion methods
123
+ #++
124
+ #Returns a string representation of the regexp
125
+ def to_s
126
+ @re_str ||= @re.to_s
127
+ end
128
+
129
+ #Returns a minimal DFA which accepts the same language as the regexp.
130
+ def to_dfa
131
+ #Step 1: Substitute every char such that every character is unique
132
+ #Add also a beginning marker
133
+
134
+ orig = []
135
+ rre = [0]
136
+ to_s.each_char do |c|
137
+ if ['(', ')', '|', '*', '&'].include? c
138
+ rre << c
139
+ else
140
+ orig << c
141
+ rre << (orig.size)
142
+ end
143
+ end
144
+
145
+ tmp_re = NodeFactory.new_node(nil, rre)
146
+
147
+ #Step 2a: Construct a DFA representation of this new regexp
148
+ #Step 2b: reverse the substitution (yields (maybe) a NFA)
149
+
150
+ alph = orig.uniq
151
+ initial = 0
152
+
153
+ tmp_finals = tmp_re.last
154
+
155
+ tmp_trans = tmp_re.follow.map do |s1,s2|
156
+ [orig[s2-1], s1, s2]
157
+ end
158
+
159
+ #Step 4: Transform the NFA to a DFA
160
+ states = [[0]]
161
+ new_states = [[0]]
162
+ trans = []
163
+ while new_states.size > 0
164
+ tmp = new_states.deep_copy
165
+ new_states = []
166
+
167
+ tmp.each do |new_state|
168
+ alph.each do |char|
169
+ tr_set = tmp_trans.find_all do |c,s1,s2|
170
+ c == char and new_state.include? s1
171
+ end
172
+
173
+ unless tr_set.empty?
174
+ state = tr_set.map { |c,s1,s2| s2 }.sort
175
+
176
+ #Found a new state?
177
+ unless states.include? state
178
+ new_states << state
179
+ states << state
180
+ end
181
+
182
+ tr = [char, states.index(new_state), states.index(state)]
183
+
184
+ #Found new trans?
185
+ trans << tr unless trans.include? tr
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ finals = states.find_all do |state|
192
+ tmp_finals.any? { |tf| state.include? tf }
193
+ end.map { |fi| states.index(fi) }
194
+
195
+ states = (0...states.size).to_a
196
+
197
+ #Step 5: Return the result
198
+ RLSM::DFA.new(alph,states,initial,finals,trans).minimize(:rename_states => true)
199
+ end
200
+
201
+ def inspect # :nodoc:
202
+ "<#{self.class} : '#{to_s}' >"
203
+ end
204
+
205
+ #Returns true if the two regexps are the same, i.e. the dfas are isomorphic.
206
+ def ==(other)
207
+ to_dfa == other.to_dfa
208
+ end
209
+
210
+
211
+ private
212
+ def _well_formed?(str)
213
+ #parantheses must be balanced, somthing like |) or *a or (| isn't allowed
214
+ #1 balanced parenthesis
215
+ state = 0
216
+ count = Hash.new(0)
217
+ count['('] = 1
218
+ count[')'] = -1
219
+ str.each_char do |c|
220
+ state += count[c]
221
+ end
222
+
223
+ if state != 0
224
+ raise RegExpException, "Unbalanced parenthesis in #{str}"
225
+ end
226
+
227
+ #2 bad sequenzes
228
+ if str =~ /\(\)|\|\)|\(\||\|\*|^\*|\(\*/
229
+ raise RegExpException, "Bad character sequence #{$&} found in #{str}"
230
+ end
231
+ end
232
+
233
+ class PrimExp
234
+ def initialize(parent, str)
235
+ @parent = parent
236
+ if str == '&' or str == ['&']
237
+ @content = '&'
238
+ @null = true
239
+ else
240
+ @content = str.reject { |c| c == '&' }
241
+ @null = false
242
+ end
243
+ end
244
+
245
+ def null?
246
+ @null
247
+ end
248
+
249
+ def first
250
+ @null ? [] : @content[0,1]
251
+ end
252
+
253
+ def last
254
+ @null ? [] : @content[-1,1]
255
+ end
256
+
257
+ def follow
258
+ res = []
259
+
260
+ (1...@content.length).each do |i|
261
+ res << [@content[i-1,1], @content[i,1]]
262
+ end
263
+
264
+ res
265
+ end
266
+
267
+ def to_s
268
+ @content.to_s
269
+ end
270
+
271
+ def lambda?
272
+ @null
273
+ end
274
+
275
+ def empty?
276
+ @content == '' or @content == []
277
+ end
278
+ end
279
+
280
+ class Star
281
+ def initialize(parent, str)
282
+ @parent = parent
283
+ @child = NodeFactory.new_node(self, str[(0..-2)])
284
+ end
285
+
286
+ def null?
287
+ true
288
+ end
289
+
290
+ def first
291
+ @child.first
292
+ end
293
+
294
+ def last
295
+ @child.last
296
+ end
297
+
298
+ def follow
299
+ res = @child.follow
300
+
301
+ #Cross of last and first
302
+ first.each do |f|
303
+ last.each do |l|
304
+ res << [l,f]
305
+ end
306
+ end
307
+
308
+ res
309
+ end
310
+
311
+ def to_s
312
+ if @child.class == PrimExp and @child.to_s.length == 1
313
+ return "#{@child.to_s}*"
314
+ else
315
+ return "(#{@child.to_s})*"
316
+ end
317
+ end
318
+
319
+ def lambda?
320
+ false
321
+ end
322
+
323
+ def empty?
324
+ false
325
+ end
326
+ end
327
+
328
+ class Union
329
+ def initialize(parent, str)
330
+ @parent = parent
331
+ @childs = _split(str).map do |substr|
332
+ NodeFactory.new_node(self, substr)
333
+ end
334
+ end
335
+
336
+ def null?
337
+ @childs.any? { |child| child.null? }
338
+ end
339
+
340
+ def first
341
+ res = []
342
+ @childs.each do |child|
343
+ child.first.each do |f|
344
+ res << f
345
+ end
346
+ end
347
+
348
+ res
349
+ end
350
+
351
+ def last
352
+ res = []
353
+ @childs.each do |child|
354
+ child.last.each do |l|
355
+ res << l
356
+ end
357
+ end
358
+
359
+ res
360
+ end
361
+
362
+ def follow
363
+ res = []
364
+ @childs.each do |child|
365
+ child.follow.each do |f|
366
+ res << f
367
+ end
368
+ end
369
+
370
+ res
371
+ end
372
+
373
+ def to_s
374
+ if @parent.nil? or @parent.class == Union or @paarent.class == Star
375
+ return @childs.map { |child| child.to_s }.join('|')
376
+ else
377
+ return '(' + @childs.map { |child| child.to_s }.join('|') + ')'
378
+ end
379
+ end
380
+
381
+ def lambda?
382
+ false
383
+ end
384
+
385
+ def empty?
386
+ false
387
+ end
388
+
389
+ private
390
+ def _split(str)
391
+ state = 0
392
+ count = Hash.new(0)
393
+ count['('] = 1
394
+ count[')'] = -1
395
+
396
+ res = [[]]
397
+
398
+ str.each_char do |c|
399
+ state += count[c]
400
+ if c == '|' and state == 0
401
+ res << []
402
+ else
403
+ res.last << c
404
+ end
405
+ end
406
+
407
+ res#.map { |substr| substr.join }
408
+ end
409
+ end
410
+
411
+ class Concat
412
+ def initialize(parent, str)
413
+ @parent = parent
414
+ @childs = _split(str).map do |substr|
415
+ NodeFactory.new_node(self, substr)
416
+ end.reject { |child| child.lambda? }
417
+ end
418
+
419
+ def null?
420
+ @childs.all? { |child| child.null? }
421
+ end
422
+
423
+ def first
424
+ res = []
425
+ @childs.each do |child|
426
+ child.first.each do |f|
427
+ res << f
428
+ end
429
+
430
+ break unless child.null?
431
+ end
432
+
433
+ res
434
+ end
435
+
436
+ def last
437
+ res = []
438
+ @childs.reverse.each do |child|
439
+ child.last.each do |f|
440
+ res << f
441
+ end
442
+
443
+ break unless child.null?
444
+ end
445
+
446
+ res
447
+ end
448
+
449
+ def follow
450
+ res = []
451
+
452
+ @childs.each do |child|
453
+ child.follow.each do |f|
454
+ res << f
455
+ end
456
+ end
457
+
458
+ (1...@childs.size).each do |i|
459
+ @childs[i-1].last.each do |l|
460
+ @childs[(i..-1)].each do |ch|
461
+ ch.first.each do |f|
462
+ res << [l,f]
463
+ end
464
+
465
+ break unless ch.null?
466
+ end
467
+ end
468
+ end
469
+
470
+ res
471
+ end
472
+
473
+ def to_s
474
+ @childs.map { |child| child.to_s }.join
475
+ end
476
+
477
+ def lambda?
478
+ false
479
+ end
480
+
481
+ def empty?
482
+ false
483
+ end
484
+
485
+ private
486
+ def _split(str)
487
+ state = 0
488
+ count = Hash.new(0)
489
+ count['('] = 1
490
+ count[')'] = -1
491
+
492
+ res = [[]]
493
+ previous = nil
494
+ str.each_char do |c|
495
+ state += count[c]
496
+
497
+ if state == 1 and c == '('
498
+ res << []
499
+ res.last << c
500
+ elsif state == 0 and c == '*'
501
+ if previous == ')'
502
+ res[-2] << c
503
+ else
504
+ res << [res.last.pop, c]
505
+ res << []
506
+ end
507
+ elsif state == 0 and c == ')'
508
+ res.last << c
509
+ res << []
510
+ else
511
+ res.last << c
512
+ end
513
+
514
+ previous = c
515
+ end
516
+
517
+ res.select { |subarr| subarr.size > 0 }#.map { |substr| substr.join }
518
+ end
519
+ end
520
+
521
+ class NodeFactory
522
+ def self.new_node(parent, arg)
523
+
524
+ #Remove parentheses
525
+ str = arg.dup
526
+ while sp(str)
527
+ str = str[(1..-2)]
528
+ end
529
+ #puts "Processing: #{arg} from #{parent.class}"
530
+ #Choose the right node type
531
+ if prim?(str)
532
+ return PrimExp.new(parent, str)
533
+ elsif star?(str)
534
+ return Star.new(parent, str)
535
+ elsif union?(str)
536
+ return Union.new(parent, str)
537
+ else
538
+ return Concat.new(parent, str)
539
+ end
540
+
541
+ end
542
+
543
+ private
544
+ def self.sp(str)
545
+ if str[0,1].include? '(' and str[-1,1].include? ')'
546
+ state = 0
547
+ l = 0
548
+ count = Hash.new(0)
549
+ count['('] = 1
550
+ count[')'] = -1
551
+
552
+ str.each_char do |c|
553
+ state += count[c]
554
+ l += 1
555
+ break if state == 0
556
+ end
557
+
558
+ return true if str.length == l
559
+ end
560
+
561
+ false
562
+ end
563
+
564
+ def self.prim?(str)
565
+ not ['(', ')', '|', '*'].any? { |c| str.include? c }
566
+ end
567
+
568
+ def self.star?(str)
569
+ if str[-1,1].include? '*'
570
+ return true if sp(str[(0..-2)]) #something like (....)*
571
+ return true if str.length == 2 #something like a*
572
+ end
573
+
574
+ false
575
+ end
576
+
577
+ def self.union?(str)
578
+ state = 0
579
+ count = Hash.new(0)
580
+ count['('] = 1
581
+ count[')'] = -1
582
+
583
+ str.each_char do |c|
584
+ state += count[c]
585
+
586
+ return true if c == '|' and state == 0
587
+ end
588
+
589
+ false
590
+ end
591
+ end
592
+ end
593
+ end
data/lib/rlsm.rb ADDED
@@ -0,0 +1,39 @@
1
+ #
2
+ # This file is part of the RLSM gem.
3
+ #
4
+ #(The MIT License)
5
+ #
6
+ #Copyright (c) 2008 Gunther Diemant <g.diemant@gmx.net>
7
+ #
8
+ #Permission is hereby granted, free of charge, to any person obtaining
9
+ #a copy of this software and associated documentation files (the
10
+ #'Software'), to deal in the Software without restriction, including
11
+ #without limitation the rights to use, copy, modify, merge, publish,
12
+ #distribute, sublicense, and/or sell copies of the Software, and to
13
+ #permit persons to whom the Software is furnished to do so, subject to
14
+ #the following conditions:
15
+ #
16
+ #The above copyright notice and this permission notice shall be
17
+ #included in all copies or substantial portions of the Software.
18
+ #
19
+ #THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
20
+ #EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ #MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
+ #IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23
+ #CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
+ #TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
+ #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
+ #
27
+
28
+
29
+ $:.unshift File.join(File.dirname(__FILE__), 'rlsm')
30
+
31
+ require 'monoid'
32
+ require 'dfa'
33
+ require "regexp"
34
+ require "mgen"
35
+ require "monoid_db"
36
+
37
+ module RLSM
38
+ VERSION = "0.2.2"
39
+ end
data/test/test_rlsm.rb ADDED
File without changes
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rlsm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.2
5
+ platform: ruby
6
+ authors:
7
+ - asmodis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-11-10 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: sqlite3-ruby
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hoe
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.8.2
34
+ version:
35
+ description: "This is a ruby implementation of three concepts: - Deterministic Finite Automata (DFA) - Regular Expressions (in the sense of theoretical computer sience) - Monoids (an algebraic construct)"
36
+ email:
37
+ - g.diemant@gmx.net
38
+ executables:
39
+ - rlsm
40
+ extensions: []
41
+
42
+ extra_rdoc_files:
43
+ - History.txt
44
+ - Manifest.txt
45
+ - README.txt
46
+ files:
47
+ - History.txt
48
+ - Manifest.txt
49
+ - README.txt
50
+ - Rakefile
51
+ - bin/rlsm
52
+ - lib/rlsm.rb
53
+ - lib/data/monoids.db
54
+ - lib/rlsm/monoid.rb
55
+ - lib/rlsm/dfa.rb
56
+ - lib/rlsm/regexp.rb
57
+ - lib/rlsm/monoid_db.rb
58
+ - lib/rlsm/mgen.rb
59
+ - lib/rlsm/monkey_patching.rb
60
+ - lib/rlsm/exceptions.rb
61
+ - test/test_rlsm.rb
62
+ has_rdoc: true
63
+ homepage: http://www.github.com/asmodis/rlsm
64
+ post_install_message:
65
+ rdoc_options:
66
+ - --main
67
+ - README.txt
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: "0"
75
+ version:
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: "0"
81
+ version:
82
+ requirements: []
83
+
84
+ rubyforge_project: rlsm
85
+ rubygems_version: 1.2.0
86
+ signing_key:
87
+ specification_version: 2
88
+ summary: "This is a ruby implementation of three concepts: - Deterministic Finite Automata (DFA) - Regular Expressions (in the sense of theoretical computer sience) - Monoids (an algebraic construct)"
89
+ test_files:
90
+ - test/test_rlsm.rb