rlsm 0.2.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,492 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'rlsm'))
2
+ require 'dfa'
3
+
4
+ require 'enumerator'
5
+
6
+ class RLSM::RE
7
+ LeftBracket = '('
8
+ RightBracket = ')'
9
+ Star = '*'
10
+ Union = '|'
11
+ Lambda = '&'
12
+ Specials = [LeftBracket, RightBracket, Star, Union, Lambda]
13
+
14
+ def inspect
15
+ "<#{self.class}: #@pattern>"
16
+ end
17
+
18
+ def initialize(pattern = '')
19
+ if pattern == ''
20
+ @empty_set = true
21
+ elsif pattern.scan(/./).all? { |x| Specials.include? x }
22
+ @pattern = Lambda
23
+ else
24
+ @pattern = pattern
25
+ end
26
+
27
+ unless @empty_set
28
+ validate_brackets_balanced
29
+ validate_form
30
+ preparse
31
+ parse
32
+ else
33
+ @pattern = ''
34
+ @parsed = { :first => [], :last => [], :follow => [], :null => false }
35
+ end
36
+ end
37
+
38
+ #Returns the patttern of the regexp
39
+ attr_reader :pattern
40
+
41
+ #Returns the union of this and the other re as new re.
42
+ def +(other)
43
+ #One is the empty set?
44
+ return RLSM::RE.new(@pattern) if other.pattern == ''
45
+ return RLSM::RE.new(other.pattern) if @pattern == ''
46
+
47
+ RLSM::RE.new(LeftBracket + @pattern + RightBracket +
48
+ Union +
49
+ LeftBracket + other.pattern + RightBracket)
50
+ end
51
+
52
+ #Returns the catenation of this and other re.
53
+ def *(other)
54
+ #One is the empty set?
55
+ return RLSM::RE.new if other.pattern == '' or @pattern == ''
56
+
57
+ RLSM::RE.new(LeftBracket + @pattern + RightBracket +
58
+ LeftBracket + other.pattern + RightBracket)
59
+ end
60
+
61
+ #Returns the stared re.
62
+ def star
63
+ return RLSM::RE.new if @pattern == ''
64
+ RLSM::RE.new(LeftBracket + @pattern + RightBracket + Star)
65
+ end
66
+
67
+ #Alters the re in place to the star form. Returns the altered re.
68
+ def star!
69
+ unless @pattern == ''
70
+ @pattern = LeftBracket + @pattern + RightBracket + Star
71
+ parse
72
+ end
73
+
74
+ self
75
+ end
76
+
77
+ #Returns a minimal DFA which accepts the same language.
78
+ def to_dfa
79
+ if @empty_set
80
+ RLSM::DFA.new(:alphabet => [],:states => ['0'],:initial => '0',
81
+ :finals => [], :transitions => [])
82
+ else
83
+ add_initial_state
84
+ perform_subset_construction
85
+ RLSM::DFA.create(@dfa_hash).minimize!(:rename => :new)
86
+ end
87
+ end
88
+
89
+ #Returns true if the res are equal
90
+ def ==(other)
91
+ return true if @pattern == other.pattern
92
+
93
+ to_dfa.isomorph_to?(other.to_dfa)
94
+ end
95
+
96
+ private
97
+ def add_initial_state
98
+ @parsed[:initial] = [-1]
99
+ @parsed[:follow] |= @parsed[:initial].product(@parsed[:first])
100
+ end
101
+
102
+ def perform_subset_construction
103
+ @dfa_hash = {:transitions => [], :finals => [], :initial => '0'}
104
+ @dfa_hash[:finals] << @parsed[:initial] if @parsed[:null]
105
+ alphabet = @iso.uniq
106
+ unmarked = [@parsed[:initial]]
107
+ marked = []
108
+ until unmarked.empty?
109
+ state = unmarked.shift
110
+ marked << state
111
+ alphabet.each do |char|
112
+ nstate = move(state, char)
113
+ unmarked << nstate unless (unmarked | marked).include? nstate
114
+ if @parsed[:last].any? { |x| nstate.include? x }
115
+ @dfa_hash[:finals] << nstate unless @dfa_hash[:finals].include? nstate
116
+ end
117
+ @dfa_hash[:transitions] << [char, state, nstate]
118
+ end
119
+ end
120
+
121
+ @dfa_hash[:finals].map! { |x| marked.index(x).to_s }
122
+ @dfa_hash[:transitions].map! { |c,x,y| [c,marked.index(x).to_s,
123
+ marked.index(y).to_s] }
124
+ end
125
+
126
+ def move(state,c)
127
+ state.map do |x|
128
+ @parsed[:follow].find_all { |y,z| y == x and @iso[z] == c }.map do |a|
129
+ a.last
130
+ end
131
+ end.flatten.uniq.sort
132
+ end
133
+
134
+ def parse
135
+ pat, @iso = transform_pattern_to_unique_identifiers
136
+
137
+ @parsed = parse_pattern(pat)
138
+ @pattern = @parsed[:pattern]
139
+ end
140
+
141
+ def parse_pattern(pat, parent = nil)
142
+ pat = remove_surrounding_brackets pat
143
+ pat = [Lambda] if pat.all? { |x| Specials.include? x }
144
+
145
+ case type_of pat
146
+ when :term : return parse_term(pat, parent)
147
+ when :star : return parse_star(pat, parent)
148
+ when :union : return parse_union(pat, parent)
149
+ when :cat : return parse_cat(pat, parent)
150
+ else
151
+ raise REException, "Unable to parse pattern: #{pat.join}"
152
+ end
153
+ end
154
+
155
+ def parse_children(childs, parent)
156
+ childs.map { |child| parse_pattern(child, parent) }
157
+ end
158
+
159
+ def recursive_split(child, type)
160
+ if type_of(child) == type
161
+ return self.send "split_#{type}".to_sym, child
162
+ else
163
+ return [child]
164
+ end
165
+ end
166
+
167
+ def parse_union(p, parent)
168
+ childs = parse_children(split_union(p), parent)
169
+ childs = simplify_union(childs, parent)
170
+
171
+ #If after simplification there is only one child left, the union isn't
172
+ #needed anymore.
173
+ return childs.first if childs.size == 1
174
+
175
+ childs = sort_union(childs)
176
+
177
+ construct_union_result_from childs
178
+ end
179
+
180
+ def split_union(p)
181
+ depth = 0
182
+ splitted = [[]]
183
+ p.each do |x|
184
+ depth += count(x)
185
+ if depth == 0 and x == Union
186
+ splitted << remove_surrounding_brackets(splitted.pop)
187
+ splitted << []
188
+ else
189
+ splitted.last << x
190
+ end
191
+ end
192
+
193
+ splitted.inject([]) { |res,x| res | recursive_split(x, :union) }
194
+ end
195
+
196
+ def simplify_union(childs, parent)
197
+ #Check if we need an empty word, not the case if
198
+ # - parent is a star
199
+ # - some nullable choices exists
200
+ if childs.any? { |x| x[:null] and x[:pattern] != Lambda } or parent == :star
201
+ childs = childs.reject { |x| x[:pattern] == Lambda }
202
+ end
203
+
204
+ #Simplify somthing like 'a|a' to 'a'
205
+ childs.inject([]) do |res,child|
206
+ res << child unless res.any? { |x| x[:pattern] == child[:pattern] }
207
+ res
208
+ end
209
+ end
210
+
211
+ def sort_union(childs)
212
+ childs.sort do |x1,x2|
213
+ if x1[:pattern] == Lambda
214
+ -1
215
+ elsif x2[:pattern] == Lambda
216
+ 1
217
+ else
218
+ x1[:pattern] <=> x2[:pattern]
219
+ end
220
+ end
221
+ end
222
+
223
+ def construct_union_result_from(childs)
224
+ res = {}
225
+ res[:type] = :union
226
+
227
+ res[:null] = childs.any? { |x| x[:null] }
228
+ res[:first] = childs.map { |x| x[:first] }.flatten
229
+ res[:last] = childs.map { |x| x[:last] }.flatten
230
+ res[:follow] = childs.inject([]) { |r,x| r | x[:follow] }
231
+ res[:pattern] = childs.map { |x| x[:pattern] }.join(Union)
232
+
233
+ res
234
+ end
235
+
236
+ def parse_cat(p, parent)
237
+ childs = parse_children(split_cat(p), parent)
238
+
239
+ childs = simplify_cat(childs, parent)
240
+
241
+ #If after simplification there is only one child left, the cat isn't
242
+ #needed anymore.
243
+ return childs.first if childs.size == 1
244
+
245
+ construct_cat_result_from childs
246
+ end
247
+
248
+ def split_cat(p)
249
+ splitted = [[]]
250
+ depth = 0
251
+ p.each_with_index do |x,i|
252
+ depth += count(x)
253
+ if depth == 1 and x == LeftBracket
254
+ splitted << [LeftBracket]
255
+ elsif depth == 0
256
+ if p[i+1] == Star
257
+ if x == RightBracket
258
+ splitted.last << RightBracket
259
+ splitted.last << Star
260
+ splitted << []
261
+ else
262
+ splitted << [x,Star]
263
+ splitted << []
264
+ end
265
+ else
266
+ splitted.last << x unless x == Star
267
+ if x == RightBracket
268
+ last = splitted.pop
269
+ splitted << remove_surrounding_brackets(last) unless last.empty?
270
+ splitted << []
271
+ end
272
+ end
273
+ else
274
+ splitted.last << x
275
+ end
276
+ end
277
+
278
+ splitted.inject([]) do |res,x|
279
+ unless x.empty? or x == [Lambda]
280
+ res | recursive_split(x, :cat)
281
+ else
282
+ res
283
+ end
284
+ end
285
+ end
286
+
287
+ def simplify_cat(childs, parent)
288
+ #Simplify a*a* to a*
289
+ childs = childs.inject([]) do |res, child|
290
+ unless child[:type] == :star and
291
+ res.last and res.last[:type] == :star and
292
+ child[:pattern] == res.last[:pattern]
293
+ res << child
294
+ end
295
+
296
+ res
297
+ end
298
+
299
+ #Simplify (aa*)* to a*
300
+ if parent == :star and childs.size == 2
301
+ star_exp, other = childs.partition { |x| x[:type] == :star }
302
+ unless star_exp.empty? or other.empty?
303
+ p1 = remove_surrounding_brackets(star_exp.first[:pattern].
304
+ scan(/./)[0..-2])
305
+ p2 = remove_surrounding_brackets(other.first[:pattern].
306
+ scan(/./))
307
+
308
+ if p1 == p2
309
+ return other
310
+ end
311
+ end
312
+ end
313
+
314
+ childs
315
+ end
316
+
317
+ def construct_cat_result_from(childs)
318
+ childs.map! do |x|
319
+ if x[:type] == :union
320
+ x[:pattern] = LeftBracket + x[:pattern] + RightBracket
321
+ x
322
+ else
323
+ x
324
+ end
325
+ end
326
+
327
+ res = {}
328
+ res[:null] = childs.all? { |x| x[:null] }
329
+
330
+ childs.each do |x|
331
+ res[:first] = (res[:first] ||= []) | x[:first]
332
+ break unless x[:null]
333
+ end
334
+
335
+ childs.reverse.each do |x|
336
+ res[:last] = (res[:last] ||= []) | x[:last]
337
+ break unless x[:null]
338
+ end
339
+
340
+ res[:follow] = childs.inject([]) { |r,x| r | x[:follow] }
341
+
342
+ (1...childs.size).each do |i|
343
+ res[:follow] |= childs[i-1][:last].product(childs[i][:first])
344
+ j = i
345
+ while childs[j][:null] and (j < childs.size - 1)
346
+ res[:follow] |= childs[i-1][:last].product(childs[j+1][:first])
347
+ j += 1
348
+ end
349
+ end
350
+
351
+ res[:pattern] = childs.map { |x| x[:pattern] }.join
352
+
353
+ res
354
+ end
355
+
356
+ def parse_term(pat, parent)
357
+ pat = pat.reject { |x| x == Lambda }
358
+
359
+ res = {}
360
+
361
+ res[:first] = [pat.first].compact
362
+ res[:last] = [pat.last].compact
363
+ res[:follow] = pat.enum_cons(2).to_a
364
+ res[:null] = pat.empty?
365
+ res[:pattern] = if res[:null]
366
+ Lambda
367
+ else
368
+ pat.map { |i| @iso[i] }.join
369
+ end
370
+ res
371
+ end
372
+
373
+ def parse_star(pat, parent)
374
+ pat = remove_surrounding_brackets(pat[0..-2])
375
+ child = parse_pattern(pat, :star)
376
+
377
+ if child[:pattern] == Lambda or child[:type] == :star
378
+ return child
379
+ else
380
+ res = {}
381
+ res[:type] = :star
382
+ res[:null] = true
383
+ res[:first] = child[:first]
384
+ res[:last] = child[:last]
385
+ res[:follow] = (child[:follow] | child[:last].product(child[:first]))
386
+ res[:pattern] = if child[:pattern].size > 1
387
+ LeftBracket + child[:pattern] + RightBracket + Star
388
+ else
389
+ child[:pattern] + Star
390
+ end
391
+
392
+ return res
393
+ end
394
+ end
395
+
396
+ def remove_surrounding_brackets(pat)
397
+ pat = pat[1..-2] while type_of(pat) == :surr_brackets
398
+ pat
399
+ end
400
+
401
+ def type_of(p)
402
+ return :term unless p.any? { |x| (Specials - [Lambda]).include? x }
403
+
404
+ unnested_characters = []
405
+ depth = 0
406
+ p.each do |x|
407
+ if x == LeftBracket
408
+ unnested_characters << x if depth == 0
409
+ depth += 1
410
+ elsif x == RightBracket
411
+ depth -= 1
412
+ unnested_characters << x if depth == 0
413
+ else
414
+ unnested_characters << x if depth == 0
415
+ end
416
+ end
417
+
418
+ return :union if unnested_characters.include? Union
419
+ return :star if p.size == 2 and p.last == Star and !Specials.include? p[0]
420
+ return :star if unnested_characters == [LeftBracket, RightBracket, Star]
421
+ return :surr_brackets if unnested_characters == [LeftBracket, RightBracket]
422
+ :cat
423
+ end
424
+
425
+ def transform_pattern_to_unique_identifiers
426
+ pat = @pattern.scan(/./)
427
+ iso = []
428
+ for i in (0...pat.size)
429
+ next if Specials.include? pat[i]
430
+ iso << pat[i]
431
+ pat[i] = iso.size - 1
432
+ end
433
+
434
+ [pat, iso]
435
+ end
436
+
437
+ def preparse
438
+ substitute_empty_brackets
439
+ simplify_brackets_around_singeltons
440
+ simplify_lambdastar_to_lambda
441
+ simplify_implicit_empty_set_unions
442
+ squeeze_repeated_specials
443
+ end
444
+
445
+ def squeeze_repeated_specials
446
+ @pattern.squeeze! Star
447
+ @pattern.squeeze! Union
448
+ @pattern.squeeze! Lambda
449
+ end
450
+
451
+ def simplify_implicit_empty_set_unions
452
+ @pattern.gsub!(LeftBracket + Union, LeftBracket)
453
+ @pattern.gsub!(LeftBracket + Star + Union, LeftBracket)
454
+ @pattern.gsub!(Union + Star + Union, Union)
455
+ end
456
+
457
+ def simplify_lambdastar_to_lambda
458
+ str = Lambda + Star
459
+ @pattern.gsub!(str, Lambda) while @pattern.include? str
460
+ end
461
+
462
+ def substitute_empty_brackets
463
+ @pattern.gsub! LeftBracket + RightBracket, Lambda
464
+ end
465
+
466
+ def simplify_brackets_around_singeltons
467
+ re = Regexp.new("\\#{LeftBracket}(.)\\#{RightBracket}")
468
+ @pattern.gsub!(re, '\1') while @pattern =~ re
469
+ end
470
+
471
+ def validate_form
472
+ if @pattern =~ Regexp.new("\\#{LeftBracket}\\#{Star}[^#{Specials.join}]")
473
+ raise REException, "Not wellformed. Detected '#{Regexp.last_match(0)}'"
474
+ end
475
+ end
476
+
477
+ def validate_brackets_balanced
478
+ unless 0 == @pattern.scan(/./).inject(0) do |res,x|
479
+ res += count(x)
480
+ break if res < 0
481
+ res
482
+ end
483
+ raise REException, "Unbalanced parentheses in pattern!"
484
+ end
485
+ end
486
+
487
+ def count(x)
488
+ return 1 if x == LeftBracket
489
+ return -1 if x == RightBracket
490
+ 0
491
+ end
492
+ end