rlsm 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ require File.join(File.dirname(__FILE__), 'helper')
2
+ require File.join(File.dirname(__FILE__), 'regexp_parser')
3
+ require File.join(File.dirname(__FILE__), 'dfa')
4
+
5
+ module RLSM
6
+ class RegExp
7
+ #Returns a RegExp which is the empty word.
8
+ def self.empty_word
9
+ new RLSM::RE::ParserHelpers::EmptyWordSymbol
10
+ end
11
+
12
+ #Returns a RegExp which represents the empty language.
13
+ def self.empty_set
14
+ new ''
15
+ end
16
+
17
+ #Synonym for new.
18
+ def self.[](description)
19
+ new(description)
20
+ end
21
+
22
+ #Creates a new RegExp. The +description+ is a string consiting of latin letters, numbers and the following special characters
23
+ #1. +(+, +)+ for grouping subexpressions
24
+ #2. +|+ for union of regular expressions
25
+ #3. +*+ for the Kleene-Closure of a regular expression
26
+ #4. +@+ the empty word.
27
+ #
28
+ #Whitspaces will be ignored and the empty string represents the empty language.
29
+ def initialize(description)
30
+ @parse_tree = RE::Parser[ description ]
31
+ @string = @parse_tree.to_s
32
+ end
33
+
34
+ attr_reader :parse_tree, :string
35
+
36
+ #Concatenate +self+ with +other+.
37
+ def +(other)
38
+ RLSM::RegExp.new "(#@string)(#{other.string})"
39
+ end
40
+
41
+ #Returns the union of +self+ and +other+
42
+ def |(other)
43
+ RLSM::RegExp.new "#@string|#{other.string}"
44
+ end
45
+
46
+ #Returns the Kleene closure of +self+.
47
+ def star
48
+ RLSM::RegExp.new "(#@string)*"
49
+ end
50
+
51
+ #Calculates a minimal DFA which represents the same languge as +self+.
52
+ def to_dfa
53
+ RLSM::DFA.new(subset_construction).minimize!
54
+ end
55
+
56
+ #Simply returns self.
57
+ def to_regexp
58
+ self
59
+ end
60
+
61
+ #Calculates the syntactic monoid of the represented language.
62
+ def to_monoid
63
+ to_dfa.to_monoid
64
+ end
65
+
66
+ #Checks if +self+ is equal to +other+, i.e. they represent the same language.
67
+ def ==(other)
68
+ return true if @string == other.string
69
+
70
+ first = @parse_tree.first.map { |pos| pos.to_s }.uniq
71
+ other_first = other.parse_tree.first.map { |pos| pos.to_s }.uniq
72
+ return false if first != other_first
73
+
74
+ last = @parse_tree.last.map { |pos| pos.to_s }.uniq
75
+ other_last = other.parse_tree.last.map { |pos| pos.to_s }.uniq
76
+ return false if last != other_last
77
+
78
+ to_dfa =~ other.to_dfa
79
+ end
80
+
81
+ private
82
+ def set_up_subset_construction
83
+ follow = @parse_tree.follow
84
+ initial = RE::Position.new('i',-1)
85
+ @parse_tree.first.each { |char| follow << [initial, char] }
86
+
87
+ [[initial], @parse_tree.null? ? [[initial]] : [], follow, @parse_tree.last]
88
+ end
89
+
90
+ def subset_construction
91
+ initial, finals, follow, last = set_up_subset_construction
92
+ transitions = []
93
+
94
+ unmarked = [initial]
95
+ marked = []
96
+ until unmarked.empty?
97
+ marked << unmarked.shift
98
+ new_states(marked.last,follow).each_pair do |char,state|
99
+ unmarked << state unless (unmarked | marked).include? state #bug
100
+ finals |= [state] if last.any? { |pos| state.any? { |st_pos| st_pos === pos } }
101
+ transitions << [marked.last,state, char]
102
+ end
103
+ end
104
+
105
+ string = "}s0 "
106
+ string += finals.map { |state| "*s#{marked.index(state)}" }.join(' ')
107
+ string += ' '
108
+
109
+ string += transitions.map do |tr|
110
+ "s#{marked.index(tr[0])}-#{tr[2]}->s#{marked.index(tr[1])}"
111
+ end.join(' ')
112
+ end
113
+
114
+ def new_states(origin,follow)
115
+ tmp = origin.map { |pos| follow.find_all { |pair| pair[0] === pos }.
116
+ map { |pair| pair[-1] } }.flatten
117
+
118
+ tmp.inject({}) do |result, pos|
119
+ (result[pos.to_s] ||= []) << pos
120
+ result
121
+ end
122
+ end
123
+ end # of class RegExp
124
+ end # of module RLSM
125
+
@@ -0,0 +1,450 @@
1
+ require File.join(File.dirname(__FILE__), 'helper')
2
+
3
+ module RLSM
4
+ module RE #:nodoc:
5
+ module ParserHelpers #:nodoc:
6
+ OpenBracket = '('
7
+ CloseBracket = ')'
8
+ UnionSymbol = '|'
9
+ StarSymbol = '*'
10
+ EmptyWordSymbol = '@'
11
+ LetterRegexp = /[a-zA-Z0-9]/
12
+
13
+ def open_bracket?(char)
14
+ char.to_s == OpenBracket
15
+ end
16
+
17
+ def close_bracket?(char)
18
+ char.to_s == CloseBracket
19
+ end
20
+
21
+ def union_symbol?(char)
22
+ char.to_s == UnionSymbol
23
+ end
24
+
25
+ def star_symbol?(char)
26
+ char.to_s == StarSymbol
27
+ end
28
+
29
+ def empty_symbol?(char)
30
+ char.to_s == EmptyWordSymbol
31
+ end
32
+
33
+ def letter?(char)
34
+ char.to_s =~ LetterRegexp
35
+ end
36
+
37
+ def empty_set?(input)
38
+ !input.any? { |position| letter?(position) or empty_symbol?(position) }
39
+ end
40
+
41
+ def empty_word?(input)
42
+ input.any? { |position| empty_symbol?(position) } and
43
+ input.all? { |position| !letter?(position) } and not
44
+ input.join.include?(OpenBracket + CloseBracket)
45
+ end
46
+
47
+ def single_letter?(input)
48
+ input.size == 1 and letter?(input[0])
49
+ end
50
+
51
+ def union?(input)
52
+ depth = 0
53
+ input.each do |position|
54
+ return true if depth == 0 and union_symbol?(position)
55
+ depth += position.weight
56
+ end
57
+
58
+ false
59
+ end
60
+
61
+ def star?(input)
62
+ return false unless star_symbol?(input[-1])
63
+ return true if input.size == 2
64
+
65
+ return star?(input[0..-2]) if star_symbol?(input[-2])
66
+
67
+ open_bracket?(input[0]) and close_bracket?(input[-2]) and
68
+ !Parser.unbalanced_brackets?(input[1..-3])
69
+ end
70
+ end
71
+
72
+ class Position #:nodoc:
73
+ include Comparable
74
+
75
+ def initialize(char, index = nil)
76
+ @letter = char
77
+ @index = index
78
+ end
79
+
80
+ attr_reader :letter, :index
81
+
82
+ def <=>(other)
83
+ case other
84
+ when String
85
+ @letter <=> other
86
+ when Numeric
87
+ @index <=> other
88
+ when Position
89
+ @letter == other.letter ? @index <=> other.index : @letter <=> other.letter
90
+ else
91
+ nil
92
+ end
93
+ end
94
+
95
+ def weight
96
+ return 1 if Parser.open_bracket?(@letter)
97
+ return -1 if Parser.close_bracket?(@letter)
98
+
99
+ 0
100
+ end
101
+
102
+ def to_s
103
+ @letter
104
+ end
105
+
106
+ def inspect
107
+ "P(#@letter,#@index)"
108
+ end
109
+ end
110
+
111
+ class Parser #:nodoc:
112
+ extend ParserHelpers
113
+
114
+ def self.[](string)
115
+ index = -1
116
+ input = string.gsub(/\s+/,'').scan(/./).map do |char|
117
+ letter?(char) ? Position.new(char,index += 1) : Position.new(char)
118
+ end
119
+
120
+ if unbalanced_brackets?(input)
121
+ raise RegExpError, "Parse Error: Unbalanced brackets."
122
+ end
123
+
124
+ parse(input)
125
+ end
126
+
127
+ def self.parse(input)
128
+ input = remove_surrounding_brackets(input)
129
+
130
+ if empty_set?(input)
131
+ EmptySet[]
132
+ elsif empty_word?(input)
133
+ EmptyWord[]
134
+ elsif single_letter?(input)
135
+ Prim[ input.first ]
136
+ elsif star?(input)
137
+ create_star_node( input )
138
+ elsif union?(input)
139
+ create_union_node(input)
140
+ else #must be a concat
141
+ create_concat_node(input)
142
+ end
143
+ end
144
+
145
+ private
146
+ def self.create_star_node(input)
147
+ content = parse(input[0..-2])
148
+
149
+ if [Star, EmptySet, EmptyWord].include? content.class
150
+ content
151
+ elsif Union === content
152
+ star_content = Union[ content.content.reject { |subexpr| subexpr == EmptyWord[] } ]
153
+ Star[ star_content ]
154
+ else
155
+ Star[ content ]
156
+ end
157
+ end
158
+
159
+ def self.create_union_node(input)
160
+ subexpressions = union_split(input).map { |subexpression| parse(subexpression) }
161
+
162
+ subexpressions = subexpressions.inject([]) do |result,subexpr|
163
+ unless EmptySet === subexpr or result.include?(subexpr)
164
+ result << subexpr
165
+ end
166
+
167
+ result
168
+ end
169
+
170
+ if subexpressions.any? { |subexpr| EmptyWord === subexpr }
171
+ subexpressions = subexpressions.reject { |subexpr| subexpr == EmptyWord[] }
172
+ unless subexpressions.any? { |subexpr| subexpr.null? }
173
+ subexpressions.unshift EmptyWord[]
174
+ end
175
+ end
176
+
177
+ star_exprs, subexpressions = subexpressions.partition { |subexpr| Star === subexpr }
178
+ subexpressions.reject! { |subexpr| star_exprs.any? { |star| star.content == subexpr } }
179
+ subexpressions |= star_exprs
180
+
181
+ if subexpressions.size == 1
182
+ subexpressions.first
183
+ else
184
+ Union[ subexpressions.sort ]
185
+ end
186
+ end
187
+
188
+ def self.create_concat_node(input)
189
+ subexpressions = concat_split(input)
190
+
191
+ return EmptySet[] if subexpressions.any? { |subexpr| subexpr == EmptySet[] }
192
+
193
+ subexpressions = subexpressions.reject { |subexpr| subexpr == EmptyWord[] }
194
+
195
+ if subexpressions.empty?
196
+ EmptyWord[]
197
+ elsif subexpressions.size == 1
198
+ subexpressions.first
199
+ else
200
+ Concat[ subexpressions ]
201
+ end
202
+ end
203
+
204
+ def self.remove_surrounding_brackets(string)
205
+ result = string
206
+ result = result[1..-2] while( open_bracket?(result.first) &&
207
+ close_bracket?(result.last) &&
208
+ !unbalanced_brackets?(result[1..-2]) )
209
+
210
+ result
211
+ end
212
+
213
+ def self.unbalanced_brackets?(string)
214
+ nesting = string.inject(0) do |depth,char|
215
+ depth += char.weight
216
+ return true if depth < 0
217
+ depth
218
+ end
219
+
220
+ nesting != 0 ? true : false
221
+ end
222
+
223
+ def self.union_split(string)
224
+ result = [[]]
225
+ depth = 0
226
+ string.each do |char|
227
+ if depth == 0 and union_symbol?(char)
228
+ result << []
229
+ else
230
+ result.last << char
231
+ end
232
+ depth += char.weight
233
+ end
234
+
235
+ result
236
+ end
237
+
238
+ def self.concat_split(string)
239
+ result = []
240
+ subexpr = []
241
+
242
+ depth = 0
243
+ string.each_with_index do |char,index|
244
+ depth += char.weight
245
+
246
+ if depth == 0
247
+ subexpr << char if close_bracket?(char)
248
+
249
+ unless subexpr.empty?
250
+ subexpr << string[index+1] if star_symbol?(string[index + 1])
251
+ result << parse(subexpr)
252
+ subexpr = []
253
+ end
254
+
255
+ if letter?(char)
256
+ if star_symbol?(string[index+1])
257
+ result << Star[ Prim[ char ] ]
258
+ else
259
+ result << Prim[ char ]
260
+ end
261
+ end
262
+ else #depth != 0
263
+ subexpr << char
264
+ end
265
+ end
266
+
267
+ result
268
+ end
269
+ end
270
+
271
+ class SyntaxNode #:nodoc:
272
+ include Comparable
273
+
274
+ def self.[](input = nil)
275
+ self.new(input)
276
+ end
277
+
278
+ def initialize(input = nil)
279
+ @content = input
280
+ end
281
+
282
+ attr_accessor :content
283
+
284
+ def null?
285
+ true
286
+ end
287
+
288
+ def first
289
+ []
290
+ end
291
+
292
+ def last
293
+ []
294
+ end
295
+
296
+ def follow
297
+ nil
298
+ end
299
+
300
+ def <=>(other)
301
+ to_s <=> other.to_s
302
+ end
303
+
304
+ def to_s
305
+ @content.to_s
306
+ end
307
+
308
+ def inspect
309
+ "#{self.class}[ #{@content.inspect} ]"
310
+ end
311
+ end
312
+
313
+ class EmptySet < SyntaxNode #:nodoc:
314
+ def initialize(input = nil)
315
+ super ''
316
+ end
317
+ end
318
+
319
+ class EmptyWord < SyntaxNode #:nodoc:
320
+ def initialize(input = nil)
321
+ super '@'
322
+ end
323
+ end
324
+
325
+ class Prim < SyntaxNode #:nodoc:
326
+ def null?
327
+ false
328
+ end
329
+
330
+ def first
331
+ [ @content ]
332
+ end
333
+
334
+ def last
335
+ [ @content ]
336
+ end
337
+
338
+ def follow
339
+ []
340
+ end
341
+ end
342
+
343
+ class Star < SyntaxNode #:nodoc:
344
+ def first
345
+ @content.first
346
+ end
347
+
348
+ def last
349
+ @content.last
350
+ end
351
+
352
+ def follow
353
+ result = []
354
+
355
+ @content.last.each do |char1|
356
+ @content.first.each do |char2|
357
+ result << [char1,char2]
358
+ end
359
+ end
360
+
361
+ (@content.follow | result).sort
362
+ end
363
+
364
+ def to_s
365
+ string = @content.to_s
366
+
367
+ string.length > 1 ? "(#{string})*" : "#{string}*"
368
+ end
369
+ end
370
+
371
+ class Union < SyntaxNode #:nodoc:
372
+ def null?
373
+ @content.any? { |subexpr| subexpr.null? }
374
+ end
375
+
376
+ def first
377
+ @content.map { |subexpr| subexpr.first }.flatten.sort
378
+ end
379
+
380
+ def last
381
+ @content.map { |subexpr| subexpr.last }.flatten.sort
382
+ end
383
+
384
+ def follow
385
+ @content.inject([]) { |result, subexpr| result | subexpr.follow }.sort
386
+ end
387
+
388
+ def to_s
389
+ @content.map { |subexpr| subexpr.to_s }.join('|')
390
+ end
391
+ end
392
+
393
+ class Concat < SyntaxNode #:nodoc:
394
+ def null?
395
+ @content.all? { |subexpr| subexpr.null? }
396
+ end
397
+
398
+ def first
399
+ result = []
400
+ @content.each do |subexpr|
401
+ result << subexpr.first
402
+ break unless subexpr.null?
403
+ end
404
+
405
+ result.flatten.sort
406
+ end
407
+
408
+ def last
409
+ result = []
410
+ @content.reverse.each do |subexpr|
411
+ result << subexpr.last
412
+ break unless subexpr.null?
413
+ end
414
+
415
+ result.flatten.sort
416
+ end
417
+
418
+ def follow
419
+ result = []
420
+
421
+ (@content.size-1).times do |i|
422
+ result |= @content[i].follow
423
+ @content[i].last.each do |char1|
424
+ @content[i+1].first.each do |char2|
425
+ result << [char1, char2]
426
+ end
427
+ end
428
+ end
429
+
430
+ result |= @content[-1].follow
431
+
432
+ result.sort
433
+ end
434
+
435
+ def to_s
436
+ string = ''
437
+
438
+ @content.each do |subexpr|
439
+ if Union === subexpr
440
+ string += "(#{subexpr.to_s})"
441
+ else
442
+ string += subexpr.to_s
443
+ end
444
+ end
445
+
446
+ string
447
+ end
448
+ end
449
+ end #of module RE
450
+ end #of module RLSM