rlsm 0.2.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +0 -0
- data/Manifest.txt +3 -28
- data/README.txt +0 -0
- data/Rakefile +0 -0
- data/bin/smon +1 -3
- data/lib/data/monoids.db +0 -0
- data/lib/dfa.rb +656 -0
- data/lib/monoid.rb +778 -0
- data/lib/re.rb +492 -0
- data/lib/rlsm.rb +57 -36
- metadata +5 -30
- data/lib/rlsm/dfa.rb +0 -705
- data/lib/rlsm/exceptions.rb +0 -39
- data/lib/rlsm/mgen.rb +0 -138
- data/lib/rlsm/monkey_patching.rb +0 -126
- data/lib/rlsm/monoid.rb +0 -552
- data/lib/rlsm/monoid_db.rb +0 -123
- data/lib/rlsm/regexp.rb +0 -229
- data/lib/rlsm/regexp_nodes/concat.rb +0 -112
- data/lib/rlsm/regexp_nodes/primexp.rb +0 -49
- data/lib/rlsm/regexp_nodes/renodes.rb +0 -95
- data/lib/rlsm/regexp_nodes/star.rb +0 -50
- data/lib/rlsm/regexp_nodes/union.rb +0 -85
- data/lib/smon/commands/db_find.rb +0 -37
- data/lib/smon/commands/db_stat.rb +0 -20
- data/lib/smon/commands/exit.rb +0 -9
- data/lib/smon/commands/help.rb +0 -31
- data/lib/smon/commands/intro.rb +0 -32
- data/lib/smon/commands/monoid.rb +0 -27
- data/lib/smon/commands/quit.rb +0 -10
- data/lib/smon/commands/regexp.rb +0 -20
- data/lib/smon/commands/reload.rb +0 -22
- data/lib/smon/commands/show.rb +0 -21
- data/lib/smon/presenter.rb +0 -18
- data/lib/smon/presenter/txt_presenter.rb +0 -157
- data/lib/smon/smon.rb +0 -79
- data/test/dfa_spec.rb +0 -99
- data/test/monoid_spec.rb +0 -270
- data/test/regexp_spec.rb +0 -25
data/lib/re.rb
ADDED
@@ -0,0 +1,492 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'rlsm'))
|
2
|
+
require 'dfa'
|
3
|
+
|
4
|
+
require 'enumerator'
|
5
|
+
|
6
|
+
class RLSM::RE
|
7
|
+
LeftBracket = '('
|
8
|
+
RightBracket = ')'
|
9
|
+
Star = '*'
|
10
|
+
Union = '|'
|
11
|
+
Lambda = '&'
|
12
|
+
Specials = [LeftBracket, RightBracket, Star, Union, Lambda]
|
13
|
+
|
14
|
+
def inspect
|
15
|
+
"<#{self.class}: #@pattern>"
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(pattern = '')
|
19
|
+
if pattern == ''
|
20
|
+
@empty_set = true
|
21
|
+
elsif pattern.scan(/./).all? { |x| Specials.include? x }
|
22
|
+
@pattern = Lambda
|
23
|
+
else
|
24
|
+
@pattern = pattern
|
25
|
+
end
|
26
|
+
|
27
|
+
unless @empty_set
|
28
|
+
validate_brackets_balanced
|
29
|
+
validate_form
|
30
|
+
preparse
|
31
|
+
parse
|
32
|
+
else
|
33
|
+
@pattern = ''
|
34
|
+
@parsed = { :first => [], :last => [], :follow => [], :null => false }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
#Returns the patttern of the regexp
|
39
|
+
attr_reader :pattern
|
40
|
+
|
41
|
+
#Returns the union of this and the other re as new re.
|
42
|
+
def +(other)
|
43
|
+
#One is the empty set?
|
44
|
+
return RLSM::RE.new(@pattern) if other.pattern == ''
|
45
|
+
return RLSM::RE.new(other.pattern) if @pattern == ''
|
46
|
+
|
47
|
+
RLSM::RE.new(LeftBracket + @pattern + RightBracket +
|
48
|
+
Union +
|
49
|
+
LeftBracket + other.pattern + RightBracket)
|
50
|
+
end
|
51
|
+
|
52
|
+
#Returns the catenation of this and other re.
|
53
|
+
def *(other)
|
54
|
+
#One is the empty set?
|
55
|
+
return RLSM::RE.new if other.pattern == '' or @pattern == ''
|
56
|
+
|
57
|
+
RLSM::RE.new(LeftBracket + @pattern + RightBracket +
|
58
|
+
LeftBracket + other.pattern + RightBracket)
|
59
|
+
end
|
60
|
+
|
61
|
+
#Returns the stared re.
|
62
|
+
def star
|
63
|
+
return RLSM::RE.new if @pattern == ''
|
64
|
+
RLSM::RE.new(LeftBracket + @pattern + RightBracket + Star)
|
65
|
+
end
|
66
|
+
|
67
|
+
#Alters the re in place to the star form. Returns the altered re.
|
68
|
+
def star!
|
69
|
+
unless @pattern == ''
|
70
|
+
@pattern = LeftBracket + @pattern + RightBracket + Star
|
71
|
+
parse
|
72
|
+
end
|
73
|
+
|
74
|
+
self
|
75
|
+
end
|
76
|
+
|
77
|
+
#Returns a minimal DFA which accepts the same language.
|
78
|
+
def to_dfa
|
79
|
+
if @empty_set
|
80
|
+
RLSM::DFA.new(:alphabet => [],:states => ['0'],:initial => '0',
|
81
|
+
:finals => [], :transitions => [])
|
82
|
+
else
|
83
|
+
add_initial_state
|
84
|
+
perform_subset_construction
|
85
|
+
RLSM::DFA.create(@dfa_hash).minimize!(:rename => :new)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#Returns true if the res are equal
|
90
|
+
def ==(other)
|
91
|
+
return true if @pattern == other.pattern
|
92
|
+
|
93
|
+
to_dfa.isomorph_to?(other.to_dfa)
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
def add_initial_state
|
98
|
+
@parsed[:initial] = [-1]
|
99
|
+
@parsed[:follow] |= @parsed[:initial].product(@parsed[:first])
|
100
|
+
end
|
101
|
+
|
102
|
+
def perform_subset_construction
|
103
|
+
@dfa_hash = {:transitions => [], :finals => [], :initial => '0'}
|
104
|
+
@dfa_hash[:finals] << @parsed[:initial] if @parsed[:null]
|
105
|
+
alphabet = @iso.uniq
|
106
|
+
unmarked = [@parsed[:initial]]
|
107
|
+
marked = []
|
108
|
+
until unmarked.empty?
|
109
|
+
state = unmarked.shift
|
110
|
+
marked << state
|
111
|
+
alphabet.each do |char|
|
112
|
+
nstate = move(state, char)
|
113
|
+
unmarked << nstate unless (unmarked | marked).include? nstate
|
114
|
+
if @parsed[:last].any? { |x| nstate.include? x }
|
115
|
+
@dfa_hash[:finals] << nstate unless @dfa_hash[:finals].include? nstate
|
116
|
+
end
|
117
|
+
@dfa_hash[:transitions] << [char, state, nstate]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
@dfa_hash[:finals].map! { |x| marked.index(x).to_s }
|
122
|
+
@dfa_hash[:transitions].map! { |c,x,y| [c,marked.index(x).to_s,
|
123
|
+
marked.index(y).to_s] }
|
124
|
+
end
|
125
|
+
|
126
|
+
def move(state,c)
|
127
|
+
state.map do |x|
|
128
|
+
@parsed[:follow].find_all { |y,z| y == x and @iso[z] == c }.map do |a|
|
129
|
+
a.last
|
130
|
+
end
|
131
|
+
end.flatten.uniq.sort
|
132
|
+
end
|
133
|
+
|
134
|
+
def parse
|
135
|
+
pat, @iso = transform_pattern_to_unique_identifiers
|
136
|
+
|
137
|
+
@parsed = parse_pattern(pat)
|
138
|
+
@pattern = @parsed[:pattern]
|
139
|
+
end
|
140
|
+
|
141
|
+
def parse_pattern(pat, parent = nil)
|
142
|
+
pat = remove_surrounding_brackets pat
|
143
|
+
pat = [Lambda] if pat.all? { |x| Specials.include? x }
|
144
|
+
|
145
|
+
case type_of pat
|
146
|
+
when :term : return parse_term(pat, parent)
|
147
|
+
when :star : return parse_star(pat, parent)
|
148
|
+
when :union : return parse_union(pat, parent)
|
149
|
+
when :cat : return parse_cat(pat, parent)
|
150
|
+
else
|
151
|
+
raise REException, "Unable to parse pattern: #{pat.join}"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def parse_children(childs, parent)
|
156
|
+
childs.map { |child| parse_pattern(child, parent) }
|
157
|
+
end
|
158
|
+
|
159
|
+
def recursive_split(child, type)
|
160
|
+
if type_of(child) == type
|
161
|
+
return self.send "split_#{type}".to_sym, child
|
162
|
+
else
|
163
|
+
return [child]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def parse_union(p, parent)
|
168
|
+
childs = parse_children(split_union(p), parent)
|
169
|
+
childs = simplify_union(childs, parent)
|
170
|
+
|
171
|
+
#If after simplification there is only one child left, the union isn't
|
172
|
+
#needed anymore.
|
173
|
+
return childs.first if childs.size == 1
|
174
|
+
|
175
|
+
childs = sort_union(childs)
|
176
|
+
|
177
|
+
construct_union_result_from childs
|
178
|
+
end
|
179
|
+
|
180
|
+
def split_union(p)
|
181
|
+
depth = 0
|
182
|
+
splitted = [[]]
|
183
|
+
p.each do |x|
|
184
|
+
depth += count(x)
|
185
|
+
if depth == 0 and x == Union
|
186
|
+
splitted << remove_surrounding_brackets(splitted.pop)
|
187
|
+
splitted << []
|
188
|
+
else
|
189
|
+
splitted.last << x
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
splitted.inject([]) { |res,x| res | recursive_split(x, :union) }
|
194
|
+
end
|
195
|
+
|
196
|
+
def simplify_union(childs, parent)
|
197
|
+
#Check if we need an empty word, not the case if
|
198
|
+
# - parent is a star
|
199
|
+
# - some nullable choices exists
|
200
|
+
if childs.any? { |x| x[:null] and x[:pattern] != Lambda } or parent == :star
|
201
|
+
childs = childs.reject { |x| x[:pattern] == Lambda }
|
202
|
+
end
|
203
|
+
|
204
|
+
#Simplify somthing like 'a|a' to 'a'
|
205
|
+
childs.inject([]) do |res,child|
|
206
|
+
res << child unless res.any? { |x| x[:pattern] == child[:pattern] }
|
207
|
+
res
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def sort_union(childs)
|
212
|
+
childs.sort do |x1,x2|
|
213
|
+
if x1[:pattern] == Lambda
|
214
|
+
-1
|
215
|
+
elsif x2[:pattern] == Lambda
|
216
|
+
1
|
217
|
+
else
|
218
|
+
x1[:pattern] <=> x2[:pattern]
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def construct_union_result_from(childs)
|
224
|
+
res = {}
|
225
|
+
res[:type] = :union
|
226
|
+
|
227
|
+
res[:null] = childs.any? { |x| x[:null] }
|
228
|
+
res[:first] = childs.map { |x| x[:first] }.flatten
|
229
|
+
res[:last] = childs.map { |x| x[:last] }.flatten
|
230
|
+
res[:follow] = childs.inject([]) { |r,x| r | x[:follow] }
|
231
|
+
res[:pattern] = childs.map { |x| x[:pattern] }.join(Union)
|
232
|
+
|
233
|
+
res
|
234
|
+
end
|
235
|
+
|
236
|
+
def parse_cat(p, parent)
|
237
|
+
childs = parse_children(split_cat(p), parent)
|
238
|
+
|
239
|
+
childs = simplify_cat(childs, parent)
|
240
|
+
|
241
|
+
#If after simplification there is only one child left, the cat isn't
|
242
|
+
#needed anymore.
|
243
|
+
return childs.first if childs.size == 1
|
244
|
+
|
245
|
+
construct_cat_result_from childs
|
246
|
+
end
|
247
|
+
|
248
|
+
def split_cat(p)
|
249
|
+
splitted = [[]]
|
250
|
+
depth = 0
|
251
|
+
p.each_with_index do |x,i|
|
252
|
+
depth += count(x)
|
253
|
+
if depth == 1 and x == LeftBracket
|
254
|
+
splitted << [LeftBracket]
|
255
|
+
elsif depth == 0
|
256
|
+
if p[i+1] == Star
|
257
|
+
if x == RightBracket
|
258
|
+
splitted.last << RightBracket
|
259
|
+
splitted.last << Star
|
260
|
+
splitted << []
|
261
|
+
else
|
262
|
+
splitted << [x,Star]
|
263
|
+
splitted << []
|
264
|
+
end
|
265
|
+
else
|
266
|
+
splitted.last << x unless x == Star
|
267
|
+
if x == RightBracket
|
268
|
+
last = splitted.pop
|
269
|
+
splitted << remove_surrounding_brackets(last) unless last.empty?
|
270
|
+
splitted << []
|
271
|
+
end
|
272
|
+
end
|
273
|
+
else
|
274
|
+
splitted.last << x
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
splitted.inject([]) do |res,x|
|
279
|
+
unless x.empty? or x == [Lambda]
|
280
|
+
res | recursive_split(x, :cat)
|
281
|
+
else
|
282
|
+
res
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def simplify_cat(childs, parent)
|
288
|
+
#Simplify a*a* to a*
|
289
|
+
childs = childs.inject([]) do |res, child|
|
290
|
+
unless child[:type] == :star and
|
291
|
+
res.last and res.last[:type] == :star and
|
292
|
+
child[:pattern] == res.last[:pattern]
|
293
|
+
res << child
|
294
|
+
end
|
295
|
+
|
296
|
+
res
|
297
|
+
end
|
298
|
+
|
299
|
+
#Simplify (aa*)* to a*
|
300
|
+
if parent == :star and childs.size == 2
|
301
|
+
star_exp, other = childs.partition { |x| x[:type] == :star }
|
302
|
+
unless star_exp.empty? or other.empty?
|
303
|
+
p1 = remove_surrounding_brackets(star_exp.first[:pattern].
|
304
|
+
scan(/./)[0..-2])
|
305
|
+
p2 = remove_surrounding_brackets(other.first[:pattern].
|
306
|
+
scan(/./))
|
307
|
+
|
308
|
+
if p1 == p2
|
309
|
+
return other
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
childs
|
315
|
+
end
|
316
|
+
|
317
|
+
def construct_cat_result_from(childs)
|
318
|
+
childs.map! do |x|
|
319
|
+
if x[:type] == :union
|
320
|
+
x[:pattern] = LeftBracket + x[:pattern] + RightBracket
|
321
|
+
x
|
322
|
+
else
|
323
|
+
x
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
res = {}
|
328
|
+
res[:null] = childs.all? { |x| x[:null] }
|
329
|
+
|
330
|
+
childs.each do |x|
|
331
|
+
res[:first] = (res[:first] ||= []) | x[:first]
|
332
|
+
break unless x[:null]
|
333
|
+
end
|
334
|
+
|
335
|
+
childs.reverse.each do |x|
|
336
|
+
res[:last] = (res[:last] ||= []) | x[:last]
|
337
|
+
break unless x[:null]
|
338
|
+
end
|
339
|
+
|
340
|
+
res[:follow] = childs.inject([]) { |r,x| r | x[:follow] }
|
341
|
+
|
342
|
+
(1...childs.size).each do |i|
|
343
|
+
res[:follow] |= childs[i-1][:last].product(childs[i][:first])
|
344
|
+
j = i
|
345
|
+
while childs[j][:null] and (j < childs.size - 1)
|
346
|
+
res[:follow] |= childs[i-1][:last].product(childs[j+1][:first])
|
347
|
+
j += 1
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
res[:pattern] = childs.map { |x| x[:pattern] }.join
|
352
|
+
|
353
|
+
res
|
354
|
+
end
|
355
|
+
|
356
|
+
def parse_term(pat, parent)
|
357
|
+
pat = pat.reject { |x| x == Lambda }
|
358
|
+
|
359
|
+
res = {}
|
360
|
+
|
361
|
+
res[:first] = [pat.first].compact
|
362
|
+
res[:last] = [pat.last].compact
|
363
|
+
res[:follow] = pat.enum_cons(2).to_a
|
364
|
+
res[:null] = pat.empty?
|
365
|
+
res[:pattern] = if res[:null]
|
366
|
+
Lambda
|
367
|
+
else
|
368
|
+
pat.map { |i| @iso[i] }.join
|
369
|
+
end
|
370
|
+
res
|
371
|
+
end
|
372
|
+
|
373
|
+
def parse_star(pat, parent)
|
374
|
+
pat = remove_surrounding_brackets(pat[0..-2])
|
375
|
+
child = parse_pattern(pat, :star)
|
376
|
+
|
377
|
+
if child[:pattern] == Lambda or child[:type] == :star
|
378
|
+
return child
|
379
|
+
else
|
380
|
+
res = {}
|
381
|
+
res[:type] = :star
|
382
|
+
res[:null] = true
|
383
|
+
res[:first] = child[:first]
|
384
|
+
res[:last] = child[:last]
|
385
|
+
res[:follow] = (child[:follow] | child[:last].product(child[:first]))
|
386
|
+
res[:pattern] = if child[:pattern].size > 1
|
387
|
+
LeftBracket + child[:pattern] + RightBracket + Star
|
388
|
+
else
|
389
|
+
child[:pattern] + Star
|
390
|
+
end
|
391
|
+
|
392
|
+
return res
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
def remove_surrounding_brackets(pat)
|
397
|
+
pat = pat[1..-2] while type_of(pat) == :surr_brackets
|
398
|
+
pat
|
399
|
+
end
|
400
|
+
|
401
|
+
def type_of(p)
|
402
|
+
return :term unless p.any? { |x| (Specials - [Lambda]).include? x }
|
403
|
+
|
404
|
+
unnested_characters = []
|
405
|
+
depth = 0
|
406
|
+
p.each do |x|
|
407
|
+
if x == LeftBracket
|
408
|
+
unnested_characters << x if depth == 0
|
409
|
+
depth += 1
|
410
|
+
elsif x == RightBracket
|
411
|
+
depth -= 1
|
412
|
+
unnested_characters << x if depth == 0
|
413
|
+
else
|
414
|
+
unnested_characters << x if depth == 0
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
return :union if unnested_characters.include? Union
|
419
|
+
return :star if p.size == 2 and p.last == Star and !Specials.include? p[0]
|
420
|
+
return :star if unnested_characters == [LeftBracket, RightBracket, Star]
|
421
|
+
return :surr_brackets if unnested_characters == [LeftBracket, RightBracket]
|
422
|
+
:cat
|
423
|
+
end
|
424
|
+
|
425
|
+
def transform_pattern_to_unique_identifiers
|
426
|
+
pat = @pattern.scan(/./)
|
427
|
+
iso = []
|
428
|
+
for i in (0...pat.size)
|
429
|
+
next if Specials.include? pat[i]
|
430
|
+
iso << pat[i]
|
431
|
+
pat[i] = iso.size - 1
|
432
|
+
end
|
433
|
+
|
434
|
+
[pat, iso]
|
435
|
+
end
|
436
|
+
|
437
|
+
def preparse
|
438
|
+
substitute_empty_brackets
|
439
|
+
simplify_brackets_around_singeltons
|
440
|
+
simplify_lambdastar_to_lambda
|
441
|
+
simplify_implicit_empty_set_unions
|
442
|
+
squeeze_repeated_specials
|
443
|
+
end
|
444
|
+
|
445
|
+
def squeeze_repeated_specials
|
446
|
+
@pattern.squeeze! Star
|
447
|
+
@pattern.squeeze! Union
|
448
|
+
@pattern.squeeze! Lambda
|
449
|
+
end
|
450
|
+
|
451
|
+
def simplify_implicit_empty_set_unions
|
452
|
+
@pattern.gsub!(LeftBracket + Union, LeftBracket)
|
453
|
+
@pattern.gsub!(LeftBracket + Star + Union, LeftBracket)
|
454
|
+
@pattern.gsub!(Union + Star + Union, Union)
|
455
|
+
end
|
456
|
+
|
457
|
+
def simplify_lambdastar_to_lambda
|
458
|
+
str = Lambda + Star
|
459
|
+
@pattern.gsub!(str, Lambda) while @pattern.include? str
|
460
|
+
end
|
461
|
+
|
462
|
+
def substitute_empty_brackets
|
463
|
+
@pattern.gsub! LeftBracket + RightBracket, Lambda
|
464
|
+
end
|
465
|
+
|
466
|
+
def simplify_brackets_around_singeltons
|
467
|
+
re = Regexp.new("\\#{LeftBracket}(.)\\#{RightBracket}")
|
468
|
+
@pattern.gsub!(re, '\1') while @pattern =~ re
|
469
|
+
end
|
470
|
+
|
471
|
+
def validate_form
|
472
|
+
if @pattern =~ Regexp.new("\\#{LeftBracket}\\#{Star}[^#{Specials.join}]")
|
473
|
+
raise REException, "Not wellformed. Detected '#{Regexp.last_match(0)}'"
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
def validate_brackets_balanced
|
478
|
+
unless 0 == @pattern.scan(/./).inject(0) do |res,x|
|
479
|
+
res += count(x)
|
480
|
+
break if res < 0
|
481
|
+
res
|
482
|
+
end
|
483
|
+
raise REException, "Unbalanced parentheses in pattern!"
|
484
|
+
end
|
485
|
+
end
|
486
|
+
|
487
|
+
def count(x)
|
488
|
+
return 1 if x == LeftBracket
|
489
|
+
return -1 if x == RightBracket
|
490
|
+
0
|
491
|
+
end
|
492
|
+
end
|