rlsm 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,593 @@
1
+ #
2
+ # This file is part of the RLSM gem.
3
+ #
4
+ #(The MIT License)
5
+ #
6
+ #Copyright (c) 2008 Gunther Diemant <g.diemant@gmx.net>
7
+ #
8
+ #Permission is hereby granted, free of charge, to any person obtaining
9
+ #a copy of this software and associated documentation files (the
10
+ #'Software'), to deal in the Software without restriction, including
11
+ #without limitation the rights to use, copy, modify, merge, publish,
12
+ #distribute, sublicense, and/or sell copies of the Software, and to
13
+ #permit persons to whom the Software is furnished to do so, subject to
14
+ #the following conditions:
15
+ #
16
+ #The above copyright notice and this permission notice shall be
17
+ #included in all copies or substantial portions of the Software.
18
+ #
19
+ #THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
20
+ #EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ #MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
+ #IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23
+ #CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
+ #TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
+ #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
+ #
27
+
28
+ require File.join(File.dirname(__FILE__), 'monkey_patching')
29
+
30
+ module RLSM
31
+ class RegExp
32
+ #Creates a new RegExp from a string description. Metacharacters are
33
+ # & * | ( )
34
+ #Here & is the empty word and an empty string represents the empty set.
35
+ def initialize(str = "")
36
+ #Is the argument a well formed RegExp?
37
+ _well_formed?(str)
38
+
39
+ #More than one & or * in a row is useless
40
+ re = str.squeeze('&*')
41
+
42
+ #* on a & is &
43
+ re = re.gsub('&*', '&')
44
+
45
+ @re = NodeFactory.new_node(nil, re)
46
+ end
47
+
48
+ #--
49
+ #Operations of a regexp
50
+ #++
51
+
52
+ #Kleene star of the regexp. Alters the regexp in place
53
+ def star!
54
+ #For empty set and empty word a star changes nothing.
55
+ #A double star is also useless
56
+ return if empty? or lambda? or (@re.class == Star)
57
+ str = '(' + to_s + ')*'
58
+ @re = NodeFactory.new_node(nil, str)
59
+
60
+ #Unset the str rep
61
+ @re_str = nil
62
+
63
+ self
64
+ end
65
+
66
+ #Returns the kleene star of this regexp. Leaves the regexp untouched.
67
+ def star
68
+ self.deep_copy.star!
69
+ end
70
+
71
+ #Returns the concatenation of two regexps
72
+ def *(other)
73
+ return RegExp.new if empty? or other.empty?
74
+ RegExp.new('(' + to_s + ')(' + other.to_s + ')')
75
+ end
76
+
77
+ #Returns the union of two regexps
78
+ def +(other)
79
+ return self.deep_copy if other.empty?
80
+ return other.deep_copy if empty?
81
+ RegExp.new('(' + to_s + ')|(' + other.to_s + ')')
82
+ end
83
+
84
+ #--
85
+ #Some small flags
86
+ #++
87
+ #Returns true if this regexp is the empty word.
88
+ def lambda?
89
+ @re.lambda?
90
+ end
91
+
92
+ #Returns true if this regexp is the empty set.
93
+ def empty?
94
+ @re.empty?
95
+ end
96
+
97
+ #Returns true if the empty word matches this regexp
98
+ def null?
99
+ @re.null?
100
+ end
101
+
102
+ #--
103
+ #Some properties of a regexp
104
+ #++
105
+
106
+ #Returns an array of beginning symbols of the regexp.
107
+ def first
108
+ @re.first
109
+ end
110
+
111
+ #Returns an array of end symbols of the regexp.
112
+ def last
113
+ @re.last
114
+ end
115
+
116
+ #Returns an array of all possible two letter substrings of words matched by the regexp.
117
+ def follow
118
+ @re.follow.uniq
119
+ end
120
+
121
+ #--
122
+ #Conversion methods
123
+ #++
124
+ #Returns a string representation of the regexp
125
+ def to_s
126
+ @re_str ||= @re.to_s
127
+ end
128
+
129
+ #Returns a minimal DFA which accepts the same language as the regexp.
130
+ def to_dfa
131
+ #Step 1: Substitute every char such that every character is unique
132
+ #Add also a beginning marker
133
+
134
+ orig = []
135
+ rre = [0]
136
+ to_s.each_char do |c|
137
+ if ['(', ')', '|', '*', '&'].include? c
138
+ rre << c
139
+ else
140
+ orig << c
141
+ rre << (orig.size)
142
+ end
143
+ end
144
+
145
+ tmp_re = NodeFactory.new_node(nil, rre)
146
+
147
+ #Step 2a: Construct a DFA representation of this new regexp
148
+ #Step 2b: reverse the substitution (yields (maybe) a NFA)
149
+
150
+ alph = orig.uniq
151
+ initial = 0
152
+
153
+ tmp_finals = tmp_re.last
154
+
155
+ tmp_trans = tmp_re.follow.map do |s1,s2|
156
+ [orig[s2-1], s1, s2]
157
+ end
158
+
159
+ #Step 4: Transform the NFA to a DFA
160
+ states = [[0]]
161
+ new_states = [[0]]
162
+ trans = []
163
+ while new_states.size > 0
164
+ tmp = new_states.deep_copy
165
+ new_states = []
166
+
167
+ tmp.each do |new_state|
168
+ alph.each do |char|
169
+ tr_set = tmp_trans.find_all do |c,s1,s2|
170
+ c == char and new_state.include? s1
171
+ end
172
+
173
+ unless tr_set.empty?
174
+ state = tr_set.map { |c,s1,s2| s2 }.sort
175
+
176
+ #Found a new state?
177
+ unless states.include? state
178
+ new_states << state
179
+ states << state
180
+ end
181
+
182
+ tr = [char, states.index(new_state), states.index(state)]
183
+
184
+ #Found new trans?
185
+ trans << tr unless trans.include? tr
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ finals = states.find_all do |state|
192
+ tmp_finals.any? { |tf| state.include? tf }
193
+ end.map { |fi| states.index(fi) }
194
+
195
+ states = (0...states.size).to_a
196
+
197
+ #Step 5: Return the result
198
+ RLSM::DFA.new(alph,states,initial,finals,trans).minimize(:rename_states => true)
199
+ end
200
+
201
+ def inspect # :nodoc:
202
+ "<#{self.class} : '#{to_s}' >"
203
+ end
204
+
205
+ #Returns true if the two regexps are the same, i.e. the dfas are isomorphic.
206
+ def ==(other)
207
+ to_dfa == other.to_dfa
208
+ end
209
+
210
+
211
+ private
212
+ def _well_formed?(str)
213
+ #parantheses must be balanced, somthing like |) or *a or (| isn't allowed
214
+ #1 balanced parenthesis
215
+ state = 0
216
+ count = Hash.new(0)
217
+ count['('] = 1
218
+ count[')'] = -1
219
+ str.each_char do |c|
220
+ state += count[c]
221
+ end
222
+
223
+ if state != 0
224
+ raise RegExpException, "Unbalanced parenthesis in #{str}"
225
+ end
226
+
227
+ #2 bad sequenzes
228
+ if str =~ /\(\)|\|\)|\(\||\|\*|^\*|\(\*/
229
+ raise RegExpException, "Bad character sequence #{$&} found in #{str}"
230
+ end
231
+ end
232
+
233
+ class PrimExp
234
+ def initialize(parent, str)
235
+ @parent = parent
236
+ if str == '&' or str == ['&']
237
+ @content = '&'
238
+ @null = true
239
+ else
240
+ @content = str.reject { |c| c == '&' }
241
+ @null = false
242
+ end
243
+ end
244
+
245
+ def null?
246
+ @null
247
+ end
248
+
249
+ def first
250
+ @null ? [] : @content[0,1]
251
+ end
252
+
253
+ def last
254
+ @null ? [] : @content[-1,1]
255
+ end
256
+
257
+ def follow
258
+ res = []
259
+
260
+ (1...@content.length).each do |i|
261
+ res << [@content[i-1,1], @content[i,1]]
262
+ end
263
+
264
+ res
265
+ end
266
+
267
+ def to_s
268
+ @content.to_s
269
+ end
270
+
271
+ def lambda?
272
+ @null
273
+ end
274
+
275
+ def empty?
276
+ @content == '' or @content == []
277
+ end
278
+ end
279
+
280
+ class Star
281
+ def initialize(parent, str)
282
+ @parent = parent
283
+ @child = NodeFactory.new_node(self, str[(0..-2)])
284
+ end
285
+
286
+ def null?
287
+ true
288
+ end
289
+
290
+ def first
291
+ @child.first
292
+ end
293
+
294
+ def last
295
+ @child.last
296
+ end
297
+
298
+ def follow
299
+ res = @child.follow
300
+
301
+ #Cross of last and first
302
+ first.each do |f|
303
+ last.each do |l|
304
+ res << [l,f]
305
+ end
306
+ end
307
+
308
+ res
309
+ end
310
+
311
+ def to_s
312
+ if @child.class == PrimExp and @child.to_s.length == 1
313
+ return "#{@child.to_s}*"
314
+ else
315
+ return "(#{@child.to_s})*"
316
+ end
317
+ end
318
+
319
+ def lambda?
320
+ false
321
+ end
322
+
323
+ def empty?
324
+ false
325
+ end
326
+ end
327
+
328
+ class Union
329
+ def initialize(parent, str)
330
+ @parent = parent
331
+ @childs = _split(str).map do |substr|
332
+ NodeFactory.new_node(self, substr)
333
+ end
334
+ end
335
+
336
+ def null?
337
+ @childs.any? { |child| child.null? }
338
+ end
339
+
340
+ def first
341
+ res = []
342
+ @childs.each do |child|
343
+ child.first.each do |f|
344
+ res << f
345
+ end
346
+ end
347
+
348
+ res
349
+ end
350
+
351
+ def last
352
+ res = []
353
+ @childs.each do |child|
354
+ child.last.each do |l|
355
+ res << l
356
+ end
357
+ end
358
+
359
+ res
360
+ end
361
+
362
+ def follow
363
+ res = []
364
+ @childs.each do |child|
365
+ child.follow.each do |f|
366
+ res << f
367
+ end
368
+ end
369
+
370
+ res
371
+ end
372
+
373
+ def to_s
374
+ if @parent.nil? or @parent.class == Union or @paarent.class == Star
375
+ return @childs.map { |child| child.to_s }.join('|')
376
+ else
377
+ return '(' + @childs.map { |child| child.to_s }.join('|') + ')'
378
+ end
379
+ end
380
+
381
+ def lambda?
382
+ false
383
+ end
384
+
385
+ def empty?
386
+ false
387
+ end
388
+
389
+ private
390
+ def _split(str)
391
+ state = 0
392
+ count = Hash.new(0)
393
+ count['('] = 1
394
+ count[')'] = -1
395
+
396
+ res = [[]]
397
+
398
+ str.each_char do |c|
399
+ state += count[c]
400
+ if c == '|' and state == 0
401
+ res << []
402
+ else
403
+ res.last << c
404
+ end
405
+ end
406
+
407
+ res#.map { |substr| substr.join }
408
+ end
409
+ end
410
+
411
+ class Concat
412
+ def initialize(parent, str)
413
+ @parent = parent
414
+ @childs = _split(str).map do |substr|
415
+ NodeFactory.new_node(self, substr)
416
+ end.reject { |child| child.lambda? }
417
+ end
418
+
419
+ def null?
420
+ @childs.all? { |child| child.null? }
421
+ end
422
+
423
+ def first
424
+ res = []
425
+ @childs.each do |child|
426
+ child.first.each do |f|
427
+ res << f
428
+ end
429
+
430
+ break unless child.null?
431
+ end
432
+
433
+ res
434
+ end
435
+
436
+ def last
437
+ res = []
438
+ @childs.reverse.each do |child|
439
+ child.last.each do |f|
440
+ res << f
441
+ end
442
+
443
+ break unless child.null?
444
+ end
445
+
446
+ res
447
+ end
448
+
449
+ def follow
450
+ res = []
451
+
452
+ @childs.each do |child|
453
+ child.follow.each do |f|
454
+ res << f
455
+ end
456
+ end
457
+
458
+ (1...@childs.size).each do |i|
459
+ @childs[i-1].last.each do |l|
460
+ @childs[(i..-1)].each do |ch|
461
+ ch.first.each do |f|
462
+ res << [l,f]
463
+ end
464
+
465
+ break unless ch.null?
466
+ end
467
+ end
468
+ end
469
+
470
+ res
471
+ end
472
+
473
+ def to_s
474
+ @childs.map { |child| child.to_s }.join
475
+ end
476
+
477
+ def lambda?
478
+ false
479
+ end
480
+
481
+ def empty?
482
+ false
483
+ end
484
+
485
+ private
486
+ def _split(str)
487
+ state = 0
488
+ count = Hash.new(0)
489
+ count['('] = 1
490
+ count[')'] = -1
491
+
492
+ res = [[]]
493
+ previous = nil
494
+ str.each_char do |c|
495
+ state += count[c]
496
+
497
+ if state == 1 and c == '('
498
+ res << []
499
+ res.last << c
500
+ elsif state == 0 and c == '*'
501
+ if previous == ')'
502
+ res[-2] << c
503
+ else
504
+ res << [res.last.pop, c]
505
+ res << []
506
+ end
507
+ elsif state == 0 and c == ')'
508
+ res.last << c
509
+ res << []
510
+ else
511
+ res.last << c
512
+ end
513
+
514
+ previous = c
515
+ end
516
+
517
+ res.select { |subarr| subarr.size > 0 }#.map { |substr| substr.join }
518
+ end
519
+ end
520
+
521
+ class NodeFactory
522
+ def self.new_node(parent, arg)
523
+
524
+ #Remove parentheses
525
+ str = arg.dup
526
+ while sp(str)
527
+ str = str[(1..-2)]
528
+ end
529
+ #puts "Processing: #{arg} from #{parent.class}"
530
+ #Choose the right node type
531
+ if prim?(str)
532
+ return PrimExp.new(parent, str)
533
+ elsif star?(str)
534
+ return Star.new(parent, str)
535
+ elsif union?(str)
536
+ return Union.new(parent, str)
537
+ else
538
+ return Concat.new(parent, str)
539
+ end
540
+
541
+ end
542
+
543
+ private
544
+ def self.sp(str)
545
+ if str[0,1].include? '(' and str[-1,1].include? ')'
546
+ state = 0
547
+ l = 0
548
+ count = Hash.new(0)
549
+ count['('] = 1
550
+ count[')'] = -1
551
+
552
+ str.each_char do |c|
553
+ state += count[c]
554
+ l += 1
555
+ break if state == 0
556
+ end
557
+
558
+ return true if str.length == l
559
+ end
560
+
561
+ false
562
+ end
563
+
564
+ def self.prim?(str)
565
+ not ['(', ')', '|', '*'].any? { |c| str.include? c }
566
+ end
567
+
568
+ def self.star?(str)
569
+ if str[-1,1].include? '*'
570
+ return true if sp(str[(0..-2)]) #something like (....)*
571
+ return true if str.length == 2 #something like a*
572
+ end
573
+
574
+ false
575
+ end
576
+
577
+ def self.union?(str)
578
+ state = 0
579
+ count = Hash.new(0)
580
+ count['('] = 1
581
+ count[')'] = -1
582
+
583
+ str.each_char do |c|
584
+ state += count[c]
585
+
586
+ return true if c == '|' and state == 0
587
+ end
588
+
589
+ false
590
+ end
591
+ end
592
+ end
593
+ end
data/lib/rlsm.rb ADDED
@@ -0,0 +1,39 @@
1
+ #
2
+ # This file is part of the RLSM gem.
3
+ #
4
+ #(The MIT License)
5
+ #
6
+ #Copyright (c) 2008 Gunther Diemant <g.diemant@gmx.net>
7
+ #
8
+ #Permission is hereby granted, free of charge, to any person obtaining
9
+ #a copy of this software and associated documentation files (the
10
+ #'Software'), to deal in the Software without restriction, including
11
+ #without limitation the rights to use, copy, modify, merge, publish,
12
+ #distribute, sublicense, and/or sell copies of the Software, and to
13
+ #permit persons to whom the Software is furnished to do so, subject to
14
+ #the following conditions:
15
+ #
16
+ #The above copyright notice and this permission notice shall be
17
+ #included in all copies or substantial portions of the Software.
18
+ #
19
+ #THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
20
+ #EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ #MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
+ #IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23
+ #CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
+ #TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
+ #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
+ #
27
+
28
+
29
+ $:.unshift File.join(File.dirname(__FILE__), 'rlsm')
30
+
31
+ require 'monoid'
32
+ require 'dfa'
33
+ require "regexp"
34
+ require "mgen"
35
+ require "monoid_db"
36
+
37
+ module RLSM
38
+ VERSION = "0.2.2"
39
+ end
data/test/test_rlsm.rb ADDED
File without changes
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rlsm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.2
5
+ platform: ruby
6
+ authors:
7
+ - asmodis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-11-10 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: sqlite3-ruby
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hoe
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.8.2
34
+ version:
35
+ description: "This is a ruby implementation of three concepts: - Deterministic Finite Automata (DFA) - Regular Expressions (in the sense of theoretical computer sience) - Monoids (an algebraic construct)"
36
+ email:
37
+ - g.diemant@gmx.net
38
+ executables:
39
+ - rlsm
40
+ extensions: []
41
+
42
+ extra_rdoc_files:
43
+ - History.txt
44
+ - Manifest.txt
45
+ - README.txt
46
+ files:
47
+ - History.txt
48
+ - Manifest.txt
49
+ - README.txt
50
+ - Rakefile
51
+ - bin/rlsm
52
+ - lib/rlsm.rb
53
+ - lib/data/monoids.db
54
+ - lib/rlsm/monoid.rb
55
+ - lib/rlsm/dfa.rb
56
+ - lib/rlsm/regexp.rb
57
+ - lib/rlsm/monoid_db.rb
58
+ - lib/rlsm/mgen.rb
59
+ - lib/rlsm/monkey_patching.rb
60
+ - lib/rlsm/exceptions.rb
61
+ - test/test_rlsm.rb
62
+ has_rdoc: true
63
+ homepage: http://www.github.com/asmodis/rlsm
64
+ post_install_message:
65
+ rdoc_options:
66
+ - --main
67
+ - README.txt
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: "0"
75
+ version:
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: "0"
81
+ version:
82
+ requirements: []
83
+
84
+ rubyforge_project: rlsm
85
+ rubygems_version: 1.2.0
86
+ signing_key:
87
+ specification_version: 2
88
+ summary: "This is a ruby implementation of three concepts: - Deterministic Finite Automata (DFA) - Regular Expressions (in the sense of theoretical computer sience) - Monoids (an algebraic construct)"
89
+ test_files:
90
+ - test/test_rlsm.rb