rlsm 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{README.txt → README} +10 -23
- data/Rakefile +79 -7
- data/ext/array/array_c_ext.c +137 -0
- data/ext/array/extconf.rb +2 -0
- data/ext/binop/binop_c_ext.c +57 -0
- data/ext/binop/extconf.rb +2 -0
- data/ext/monoid/extconf.rb +2 -0
- data/ext/monoid/monoid_c_ext.c +330 -0
- data/lib/rlsm.rb +10 -14
- data/lib/rlsm/binary_operation.rb +151 -0
- data/lib/rlsm/dfa.rb +418 -602
- data/lib/rlsm/helper.rb +12 -0
- data/lib/rlsm/monoid.rb +454 -694
- data/lib/rlsm/regexp.rb +125 -0
- data/lib/rlsm/regexp_parser.rb +450 -0
- data/test/helpers.rb +66 -0
- data/test/test_binop.rb +119 -0
- data/test/test_dfa.rb +435 -0
- data/test/test_monoid.rb +552 -0
- data/test/test_regexp.rb +440 -0
- metadata +109 -37
- data/History.txt +0 -6
- data/Manifest.txt +0 -18
- data/bin/smon +0 -39
- data/data/monoids.db +0 -0
- data/lib/database.rb +0 -95
- data/lib/monkey_patching/array_ext.rb +0 -50
- data/lib/rlsm/re.rb +0 -504
- data/lib/smon/base.rb +0 -284
- data/lib/smon/db.rb +0 -98
- data/lib/smon/dot.rb +0 -65
- data/lib/smon/latex.rb +0 -313
- data/lib/smon/smon.rb +0 -183
- data/stdarb.tex +0 -118
data/lib/rlsm/regexp.rb
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
+
require File.join(File.dirname(__FILE__), 'regexp_parser')
|
3
|
+
require File.join(File.dirname(__FILE__), 'dfa')
|
4
|
+
|
5
|
+
module RLSM
|
6
|
+
class RegExp
|
7
|
+
#Returns a RegExp which is the empty word.
|
8
|
+
def self.empty_word
|
9
|
+
new RLSM::RE::ParserHelpers::EmptyWordSymbol
|
10
|
+
end
|
11
|
+
|
12
|
+
#Returns a RegExp which represents the empty language.
|
13
|
+
def self.empty_set
|
14
|
+
new ''
|
15
|
+
end
|
16
|
+
|
17
|
+
#Synonym for new.
|
18
|
+
def self.[](description)
|
19
|
+
new(description)
|
20
|
+
end
|
21
|
+
|
22
|
+
#Creates a new RegExp. The +description+ is a string consiting of latin letters, numbers and the following special characters
|
23
|
+
#1. +(+, +)+ for grouping subexpressions
|
24
|
+
#2. +|+ for union of regular expressions
|
25
|
+
#3. +*+ for the Kleene-Closure of a regular expression
|
26
|
+
#4. +@+ the empty word.
|
27
|
+
#
|
28
|
+
#Whitspaces will be ignored and the empty string represents the empty language.
|
29
|
+
def initialize(description)
|
30
|
+
@parse_tree = RE::Parser[ description ]
|
31
|
+
@string = @parse_tree.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_reader :parse_tree, :string
|
35
|
+
|
36
|
+
#Concatenate +self+ with +other+.
|
37
|
+
def +(other)
|
38
|
+
RLSM::RegExp.new "(#@string)(#{other.string})"
|
39
|
+
end
|
40
|
+
|
41
|
+
#Returns the union of +self+ and +other+
|
42
|
+
def |(other)
|
43
|
+
RLSM::RegExp.new "#@string|#{other.string}"
|
44
|
+
end
|
45
|
+
|
46
|
+
#Returns the Kleene closure of +self+.
|
47
|
+
def star
|
48
|
+
RLSM::RegExp.new "(#@string)*"
|
49
|
+
end
|
50
|
+
|
51
|
+
#Calculates a minimal DFA which represents the same languge as +self+.
|
52
|
+
def to_dfa
|
53
|
+
RLSM::DFA.new(subset_construction).minimize!
|
54
|
+
end
|
55
|
+
|
56
|
+
#Simply returns self.
|
57
|
+
def to_regexp
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
#Calculates the syntactic monoid of the represented language.
|
62
|
+
def to_monoid
|
63
|
+
to_dfa.to_monoid
|
64
|
+
end
|
65
|
+
|
66
|
+
#Checks if +self+ is equal to +other+, i.e. they represent the same language.
|
67
|
+
def ==(other)
|
68
|
+
return true if @string == other.string
|
69
|
+
|
70
|
+
first = @parse_tree.first.map { |pos| pos.to_s }.uniq
|
71
|
+
other_first = other.parse_tree.first.map { |pos| pos.to_s }.uniq
|
72
|
+
return false if first != other_first
|
73
|
+
|
74
|
+
last = @parse_tree.last.map { |pos| pos.to_s }.uniq
|
75
|
+
other_last = other.parse_tree.last.map { |pos| pos.to_s }.uniq
|
76
|
+
return false if last != other_last
|
77
|
+
|
78
|
+
to_dfa =~ other.to_dfa
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
def set_up_subset_construction
|
83
|
+
follow = @parse_tree.follow
|
84
|
+
initial = RE::Position.new('i',-1)
|
85
|
+
@parse_tree.first.each { |char| follow << [initial, char] }
|
86
|
+
|
87
|
+
[[initial], @parse_tree.null? ? [[initial]] : [], follow, @parse_tree.last]
|
88
|
+
end
|
89
|
+
|
90
|
+
def subset_construction
|
91
|
+
initial, finals, follow, last = set_up_subset_construction
|
92
|
+
transitions = []
|
93
|
+
|
94
|
+
unmarked = [initial]
|
95
|
+
marked = []
|
96
|
+
until unmarked.empty?
|
97
|
+
marked << unmarked.shift
|
98
|
+
new_states(marked.last,follow).each_pair do |char,state|
|
99
|
+
unmarked << state unless (unmarked | marked).include? state #bug
|
100
|
+
finals |= [state] if last.any? { |pos| state.any? { |st_pos| st_pos === pos } }
|
101
|
+
transitions << [marked.last,state, char]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
string = "}s0 "
|
106
|
+
string += finals.map { |state| "*s#{marked.index(state)}" }.join(' ')
|
107
|
+
string += ' '
|
108
|
+
|
109
|
+
string += transitions.map do |tr|
|
110
|
+
"s#{marked.index(tr[0])}-#{tr[2]}->s#{marked.index(tr[1])}"
|
111
|
+
end.join(' ')
|
112
|
+
end
|
113
|
+
|
114
|
+
def new_states(origin,follow)
|
115
|
+
tmp = origin.map { |pos| follow.find_all { |pair| pair[0] === pos }.
|
116
|
+
map { |pair| pair[-1] } }.flatten
|
117
|
+
|
118
|
+
tmp.inject({}) do |result, pos|
|
119
|
+
(result[pos.to_s] ||= []) << pos
|
120
|
+
result
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end # of class RegExp
|
124
|
+
end # of module RLSM
|
125
|
+
|
@@ -0,0 +1,450 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
+
|
3
|
+
module RLSM
|
4
|
+
module RE #:nodoc:
|
5
|
+
module ParserHelpers #:nodoc:
|
6
|
+
OpenBracket = '('
|
7
|
+
CloseBracket = ')'
|
8
|
+
UnionSymbol = '|'
|
9
|
+
StarSymbol = '*'
|
10
|
+
EmptyWordSymbol = '@'
|
11
|
+
LetterRegexp = /[a-zA-Z0-9]/
|
12
|
+
|
13
|
+
def open_bracket?(char)
|
14
|
+
char.to_s == OpenBracket
|
15
|
+
end
|
16
|
+
|
17
|
+
def close_bracket?(char)
|
18
|
+
char.to_s == CloseBracket
|
19
|
+
end
|
20
|
+
|
21
|
+
def union_symbol?(char)
|
22
|
+
char.to_s == UnionSymbol
|
23
|
+
end
|
24
|
+
|
25
|
+
def star_symbol?(char)
|
26
|
+
char.to_s == StarSymbol
|
27
|
+
end
|
28
|
+
|
29
|
+
def empty_symbol?(char)
|
30
|
+
char.to_s == EmptyWordSymbol
|
31
|
+
end
|
32
|
+
|
33
|
+
def letter?(char)
|
34
|
+
char.to_s =~ LetterRegexp
|
35
|
+
end
|
36
|
+
|
37
|
+
def empty_set?(input)
|
38
|
+
!input.any? { |position| letter?(position) or empty_symbol?(position) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def empty_word?(input)
|
42
|
+
input.any? { |position| empty_symbol?(position) } and
|
43
|
+
input.all? { |position| !letter?(position) } and not
|
44
|
+
input.join.include?(OpenBracket + CloseBracket)
|
45
|
+
end
|
46
|
+
|
47
|
+
def single_letter?(input)
|
48
|
+
input.size == 1 and letter?(input[0])
|
49
|
+
end
|
50
|
+
|
51
|
+
def union?(input)
|
52
|
+
depth = 0
|
53
|
+
input.each do |position|
|
54
|
+
return true if depth == 0 and union_symbol?(position)
|
55
|
+
depth += position.weight
|
56
|
+
end
|
57
|
+
|
58
|
+
false
|
59
|
+
end
|
60
|
+
|
61
|
+
def star?(input)
|
62
|
+
return false unless star_symbol?(input[-1])
|
63
|
+
return true if input.size == 2
|
64
|
+
|
65
|
+
return star?(input[0..-2]) if star_symbol?(input[-2])
|
66
|
+
|
67
|
+
open_bracket?(input[0]) and close_bracket?(input[-2]) and
|
68
|
+
!Parser.unbalanced_brackets?(input[1..-3])
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
class Position #:nodoc:
|
73
|
+
include Comparable
|
74
|
+
|
75
|
+
def initialize(char, index = nil)
|
76
|
+
@letter = char
|
77
|
+
@index = index
|
78
|
+
end
|
79
|
+
|
80
|
+
attr_reader :letter, :index
|
81
|
+
|
82
|
+
def <=>(other)
|
83
|
+
case other
|
84
|
+
when String
|
85
|
+
@letter <=> other
|
86
|
+
when Numeric
|
87
|
+
@index <=> other
|
88
|
+
when Position
|
89
|
+
@letter == other.letter ? @index <=> other.index : @letter <=> other.letter
|
90
|
+
else
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def weight
|
96
|
+
return 1 if Parser.open_bracket?(@letter)
|
97
|
+
return -1 if Parser.close_bracket?(@letter)
|
98
|
+
|
99
|
+
0
|
100
|
+
end
|
101
|
+
|
102
|
+
def to_s
|
103
|
+
@letter
|
104
|
+
end
|
105
|
+
|
106
|
+
def inspect
|
107
|
+
"P(#@letter,#@index)"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class Parser #:nodoc:
|
112
|
+
extend ParserHelpers
|
113
|
+
|
114
|
+
def self.[](string)
|
115
|
+
index = -1
|
116
|
+
input = string.gsub(/\s+/,'').scan(/./).map do |char|
|
117
|
+
letter?(char) ? Position.new(char,index += 1) : Position.new(char)
|
118
|
+
end
|
119
|
+
|
120
|
+
if unbalanced_brackets?(input)
|
121
|
+
raise RegExpError, "Parse Error: Unbalanced brackets."
|
122
|
+
end
|
123
|
+
|
124
|
+
parse(input)
|
125
|
+
end
|
126
|
+
|
127
|
+
def self.parse(input)
|
128
|
+
input = remove_surrounding_brackets(input)
|
129
|
+
|
130
|
+
if empty_set?(input)
|
131
|
+
EmptySet[]
|
132
|
+
elsif empty_word?(input)
|
133
|
+
EmptyWord[]
|
134
|
+
elsif single_letter?(input)
|
135
|
+
Prim[ input.first ]
|
136
|
+
elsif star?(input)
|
137
|
+
create_star_node( input )
|
138
|
+
elsif union?(input)
|
139
|
+
create_union_node(input)
|
140
|
+
else #must be a concat
|
141
|
+
create_concat_node(input)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
private
|
146
|
+
def self.create_star_node(input)
|
147
|
+
content = parse(input[0..-2])
|
148
|
+
|
149
|
+
if [Star, EmptySet, EmptyWord].include? content.class
|
150
|
+
content
|
151
|
+
elsif Union === content
|
152
|
+
star_content = Union[ content.content.reject { |subexpr| subexpr == EmptyWord[] } ]
|
153
|
+
Star[ star_content ]
|
154
|
+
else
|
155
|
+
Star[ content ]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.create_union_node(input)
|
160
|
+
subexpressions = union_split(input).map { |subexpression| parse(subexpression) }
|
161
|
+
|
162
|
+
subexpressions = subexpressions.inject([]) do |result,subexpr|
|
163
|
+
unless EmptySet === subexpr or result.include?(subexpr)
|
164
|
+
result << subexpr
|
165
|
+
end
|
166
|
+
|
167
|
+
result
|
168
|
+
end
|
169
|
+
|
170
|
+
if subexpressions.any? { |subexpr| EmptyWord === subexpr }
|
171
|
+
subexpressions = subexpressions.reject { |subexpr| subexpr == EmptyWord[] }
|
172
|
+
unless subexpressions.any? { |subexpr| subexpr.null? }
|
173
|
+
subexpressions.unshift EmptyWord[]
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
star_exprs, subexpressions = subexpressions.partition { |subexpr| Star === subexpr }
|
178
|
+
subexpressions.reject! { |subexpr| star_exprs.any? { |star| star.content == subexpr } }
|
179
|
+
subexpressions |= star_exprs
|
180
|
+
|
181
|
+
if subexpressions.size == 1
|
182
|
+
subexpressions.first
|
183
|
+
else
|
184
|
+
Union[ subexpressions.sort ]
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def self.create_concat_node(input)
|
189
|
+
subexpressions = concat_split(input)
|
190
|
+
|
191
|
+
return EmptySet[] if subexpressions.any? { |subexpr| subexpr == EmptySet[] }
|
192
|
+
|
193
|
+
subexpressions = subexpressions.reject { |subexpr| subexpr == EmptyWord[] }
|
194
|
+
|
195
|
+
if subexpressions.empty?
|
196
|
+
EmptyWord[]
|
197
|
+
elsif subexpressions.size == 1
|
198
|
+
subexpressions.first
|
199
|
+
else
|
200
|
+
Concat[ subexpressions ]
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def self.remove_surrounding_brackets(string)
|
205
|
+
result = string
|
206
|
+
result = result[1..-2] while( open_bracket?(result.first) &&
|
207
|
+
close_bracket?(result.last) &&
|
208
|
+
!unbalanced_brackets?(result[1..-2]) )
|
209
|
+
|
210
|
+
result
|
211
|
+
end
|
212
|
+
|
213
|
+
def self.unbalanced_brackets?(string)
|
214
|
+
nesting = string.inject(0) do |depth,char|
|
215
|
+
depth += char.weight
|
216
|
+
return true if depth < 0
|
217
|
+
depth
|
218
|
+
end
|
219
|
+
|
220
|
+
nesting != 0 ? true : false
|
221
|
+
end
|
222
|
+
|
223
|
+
def self.union_split(string)
|
224
|
+
result = [[]]
|
225
|
+
depth = 0
|
226
|
+
string.each do |char|
|
227
|
+
if depth == 0 and union_symbol?(char)
|
228
|
+
result << []
|
229
|
+
else
|
230
|
+
result.last << char
|
231
|
+
end
|
232
|
+
depth += char.weight
|
233
|
+
end
|
234
|
+
|
235
|
+
result
|
236
|
+
end
|
237
|
+
|
238
|
+
def self.concat_split(string)
|
239
|
+
result = []
|
240
|
+
subexpr = []
|
241
|
+
|
242
|
+
depth = 0
|
243
|
+
string.each_with_index do |char,index|
|
244
|
+
depth += char.weight
|
245
|
+
|
246
|
+
if depth == 0
|
247
|
+
subexpr << char if close_bracket?(char)
|
248
|
+
|
249
|
+
unless subexpr.empty?
|
250
|
+
subexpr << string[index+1] if star_symbol?(string[index + 1])
|
251
|
+
result << parse(subexpr)
|
252
|
+
subexpr = []
|
253
|
+
end
|
254
|
+
|
255
|
+
if letter?(char)
|
256
|
+
if star_symbol?(string[index+1])
|
257
|
+
result << Star[ Prim[ char ] ]
|
258
|
+
else
|
259
|
+
result << Prim[ char ]
|
260
|
+
end
|
261
|
+
end
|
262
|
+
else #depth != 0
|
263
|
+
subexpr << char
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
result
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
class SyntaxNode #:nodoc:
|
272
|
+
include Comparable
|
273
|
+
|
274
|
+
def self.[](input = nil)
|
275
|
+
self.new(input)
|
276
|
+
end
|
277
|
+
|
278
|
+
def initialize(input = nil)
|
279
|
+
@content = input
|
280
|
+
end
|
281
|
+
|
282
|
+
attr_accessor :content
|
283
|
+
|
284
|
+
def null?
|
285
|
+
true
|
286
|
+
end
|
287
|
+
|
288
|
+
def first
|
289
|
+
[]
|
290
|
+
end
|
291
|
+
|
292
|
+
def last
|
293
|
+
[]
|
294
|
+
end
|
295
|
+
|
296
|
+
def follow
|
297
|
+
nil
|
298
|
+
end
|
299
|
+
|
300
|
+
def <=>(other)
|
301
|
+
to_s <=> other.to_s
|
302
|
+
end
|
303
|
+
|
304
|
+
def to_s
|
305
|
+
@content.to_s
|
306
|
+
end
|
307
|
+
|
308
|
+
def inspect
|
309
|
+
"#{self.class}[ #{@content.inspect} ]"
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
class EmptySet < SyntaxNode #:nodoc:
|
314
|
+
def initialize(input = nil)
|
315
|
+
super ''
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
class EmptyWord < SyntaxNode #:nodoc:
|
320
|
+
def initialize(input = nil)
|
321
|
+
super '@'
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
class Prim < SyntaxNode #:nodoc:
|
326
|
+
def null?
|
327
|
+
false
|
328
|
+
end
|
329
|
+
|
330
|
+
def first
|
331
|
+
[ @content ]
|
332
|
+
end
|
333
|
+
|
334
|
+
def last
|
335
|
+
[ @content ]
|
336
|
+
end
|
337
|
+
|
338
|
+
def follow
|
339
|
+
[]
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
class Star < SyntaxNode #:nodoc:
|
344
|
+
def first
|
345
|
+
@content.first
|
346
|
+
end
|
347
|
+
|
348
|
+
def last
|
349
|
+
@content.last
|
350
|
+
end
|
351
|
+
|
352
|
+
def follow
|
353
|
+
result = []
|
354
|
+
|
355
|
+
@content.last.each do |char1|
|
356
|
+
@content.first.each do |char2|
|
357
|
+
result << [char1,char2]
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
(@content.follow | result).sort
|
362
|
+
end
|
363
|
+
|
364
|
+
def to_s
|
365
|
+
string = @content.to_s
|
366
|
+
|
367
|
+
string.length > 1 ? "(#{string})*" : "#{string}*"
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
class Union < SyntaxNode #:nodoc:
|
372
|
+
def null?
|
373
|
+
@content.any? { |subexpr| subexpr.null? }
|
374
|
+
end
|
375
|
+
|
376
|
+
def first
|
377
|
+
@content.map { |subexpr| subexpr.first }.flatten.sort
|
378
|
+
end
|
379
|
+
|
380
|
+
def last
|
381
|
+
@content.map { |subexpr| subexpr.last }.flatten.sort
|
382
|
+
end
|
383
|
+
|
384
|
+
def follow
|
385
|
+
@content.inject([]) { |result, subexpr| result | subexpr.follow }.sort
|
386
|
+
end
|
387
|
+
|
388
|
+
def to_s
|
389
|
+
@content.map { |subexpr| subexpr.to_s }.join('|')
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
class Concat < SyntaxNode #:nodoc:
|
394
|
+
def null?
|
395
|
+
@content.all? { |subexpr| subexpr.null? }
|
396
|
+
end
|
397
|
+
|
398
|
+
def first
|
399
|
+
result = []
|
400
|
+
@content.each do |subexpr|
|
401
|
+
result << subexpr.first
|
402
|
+
break unless subexpr.null?
|
403
|
+
end
|
404
|
+
|
405
|
+
result.flatten.sort
|
406
|
+
end
|
407
|
+
|
408
|
+
def last
|
409
|
+
result = []
|
410
|
+
@content.reverse.each do |subexpr|
|
411
|
+
result << subexpr.last
|
412
|
+
break unless subexpr.null?
|
413
|
+
end
|
414
|
+
|
415
|
+
result.flatten.sort
|
416
|
+
end
|
417
|
+
|
418
|
+
def follow
|
419
|
+
result = []
|
420
|
+
|
421
|
+
(@content.size-1).times do |i|
|
422
|
+
result |= @content[i].follow
|
423
|
+
@content[i].last.each do |char1|
|
424
|
+
@content[i+1].first.each do |char2|
|
425
|
+
result << [char1, char2]
|
426
|
+
end
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
result |= @content[-1].follow
|
431
|
+
|
432
|
+
result.sort
|
433
|
+
end
|
434
|
+
|
435
|
+
def to_s
|
436
|
+
string = ''
|
437
|
+
|
438
|
+
@content.each do |subexpr|
|
439
|
+
if Union === subexpr
|
440
|
+
string += "(#{subexpr.to_s})"
|
441
|
+
else
|
442
|
+
string += subexpr.to_s
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
string
|
447
|
+
end
|
448
|
+
end
|
449
|
+
end #of module RE
|
450
|
+
end #of module RLSM
|