abnf 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +16 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +194 -0
- data/Rakefile +18 -0
- data/abnf.gemspec +24 -0
- data/lib/abnf.rb +57 -0
- data/lib/abnf/abnf.rb +136 -0
- data/lib/abnf/corerules.rb +28 -0
- data/lib/abnf/grammar.rb +183 -0
- data/lib/abnf/parser.output +348 -0
- data/lib/abnf/parser.rb +821 -0
- data/lib/abnf/parser.y +156 -0
- data/lib/abnf/regexp.rb +394 -0
- data/lib/abnf/version.rb +3 -0
- data/lib/natset.rb +411 -0
- data/lib/regexptree.rb +530 -0
- data/sample/in-place.rb +26 -0
- data/sample/ipv6.rb +42 -0
- data/sample/multiples-of-3.rb +19 -0
- data/sample/uri.rb +75 -0
- data/test/abnf_test.rb +82 -0
- data/test/regexptree_test.rb +12 -0
- data/test/test_helper.rb +3 -0
- metadata +115 -0
data/lib/abnf/version.rb
ADDED
data/lib/natset.rb
ADDED
@@ -0,0 +1,411 @@
|
|
1
|
+
=begin
|
2
|
+
= NatSet
|
3
|
+
|
4
|
+
NatSet represents a set of naturals - non-negative integers.
|
5
|
+
|
6
|
+
== class methods
|
7
|
+
--- NatSet.empty
|
8
|
+
--- NatSet.universal
|
9
|
+
--- NatSet.new(integer_or_range, ...)
|
10
|
+
|
11
|
+
== methods
|
12
|
+
--- empty?
|
13
|
+
--- universal?
|
14
|
+
--- open?
|
15
|
+
--- singleton?
|
16
|
+
--- self == other
|
17
|
+
--- self === other
|
18
|
+
--- eql?(other)
|
19
|
+
--- hash
|
20
|
+
--- ~self
|
21
|
+
--- self + other
|
22
|
+
--- self - other
|
23
|
+
--- self & other
|
24
|
+
|
25
|
+
--- split_each(ns, ...) {|region, *nss| ... }
|
26
|
+
--- split(ns, ...)
|
27
|
+
|
28
|
+
--- min
|
29
|
+
--- max
|
30
|
+
|
31
|
+
--- each_range {|range| ... }
|
32
|
+
|
33
|
+
=end
|
34
|
+
|
35
|
+
class NatSet
|
36
|
+
class << NatSet
|
37
|
+
alias _new new
|
38
|
+
end
|
39
|
+
|
40
|
+
def NatSet.empty
|
41
|
+
self._new
|
42
|
+
end
|
43
|
+
|
44
|
+
def NatSet.universal
|
45
|
+
self._new(0)
|
46
|
+
end
|
47
|
+
|
48
|
+
def NatSet.new(*es)
|
49
|
+
r = self.empty
|
50
|
+
es.each {|e|
|
51
|
+
if String === e
|
52
|
+
e = e.ord
|
53
|
+
end
|
54
|
+
case e
|
55
|
+
when Range
|
56
|
+
if String === e.begin
|
57
|
+
e = Range.new(e.begin.ord, e.end.ord, e.exclude_end?)
|
58
|
+
end
|
59
|
+
unless Integer === e.begin && 0 <= e.begin
|
60
|
+
raise ArgumentError.new("bad value for #{self}.new: #{e}")
|
61
|
+
end
|
62
|
+
if e.end < 0
|
63
|
+
r += self._new(e.begin)
|
64
|
+
elsif e.exclude_end?
|
65
|
+
r += self._new(e.begin, e.end)
|
66
|
+
else
|
67
|
+
r += self._new(e.begin, e.end+1)
|
68
|
+
end
|
69
|
+
when Integer
|
70
|
+
unless 0 <= e
|
71
|
+
raise ArgumentError.new("bad value for #{self}.new: #{e}")
|
72
|
+
end
|
73
|
+
r += self._new(e, e+1)
|
74
|
+
when NatSet
|
75
|
+
r += e
|
76
|
+
else
|
77
|
+
raise ArgumentError.new("bad value for #{self}.new: #{e}")
|
78
|
+
end
|
79
|
+
}
|
80
|
+
r
|
81
|
+
end
|
82
|
+
|
83
|
+
def initialize(*es)
|
84
|
+
@es = es
|
85
|
+
end
|
86
|
+
attr_reader :es
|
87
|
+
|
88
|
+
def empty?
|
89
|
+
@es.empty?
|
90
|
+
end
|
91
|
+
|
92
|
+
def universal?
|
93
|
+
@es == [0]
|
94
|
+
end
|
95
|
+
|
96
|
+
def open?
|
97
|
+
@es.length & 1 != 0
|
98
|
+
end
|
99
|
+
|
100
|
+
def singleton?
|
101
|
+
if @es.length == 2 && @es[0] == @es[1] - 1
|
102
|
+
@es[0]
|
103
|
+
else
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def ==(other)
|
109
|
+
@es == other.es
|
110
|
+
end
|
111
|
+
alias === ==
|
112
|
+
alias eql? ==
|
113
|
+
|
114
|
+
def hash
|
115
|
+
@es.hash
|
116
|
+
end
|
117
|
+
|
118
|
+
def complement
|
119
|
+
if @es.empty?
|
120
|
+
self.class.universal
|
121
|
+
elsif @es[0] == 0
|
122
|
+
self.class._new(*@es[1..-1])
|
123
|
+
else
|
124
|
+
self.class._new(0, *@es)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
alias ~ complement
|
128
|
+
|
129
|
+
def union(other)
|
130
|
+
other.union_natset(self)
|
131
|
+
end
|
132
|
+
alias + union
|
133
|
+
alias | union
|
134
|
+
|
135
|
+
def union_natset(natset)
|
136
|
+
return self if natset.empty? || self.universal?
|
137
|
+
return natset if self.empty? || natset.universal?
|
138
|
+
merge(natset) {|a, b| a || b}
|
139
|
+
end
|
140
|
+
|
141
|
+
def intersect(other)
|
142
|
+
other.intersect_natset(self)
|
143
|
+
end
|
144
|
+
alias & intersect
|
145
|
+
|
146
|
+
def intersect_natset(natset)
|
147
|
+
return self if self.empty? || natset.universal?
|
148
|
+
return natset if natset.empty? || self.universal?
|
149
|
+
merge(natset) {|a, b| a && b}
|
150
|
+
end
|
151
|
+
|
152
|
+
def subtract(other)
|
153
|
+
other.subtract_natset(self)
|
154
|
+
end
|
155
|
+
alias - subtract
|
156
|
+
|
157
|
+
def subtract_natset(natset) # natset - self
|
158
|
+
# Since double dispatch *inverses* a receiver and an argument,
|
159
|
+
# condition should be inversed.
|
160
|
+
return natset if self.empty? || natset.empty?
|
161
|
+
return NatSet.empty if self.universal?
|
162
|
+
return ~self if natset.universal?
|
163
|
+
merge(natset) {|a, b| !a && b}
|
164
|
+
end
|
165
|
+
|
166
|
+
def merge(other)
|
167
|
+
es1 = @es.dup
|
168
|
+
es2 = other.es.dup
|
169
|
+
es0 = []
|
170
|
+
bool1 = bool2 = bool0 = false
|
171
|
+
s = 0
|
172
|
+
while !es1.empty? || !es2.empty?
|
173
|
+
if es2.empty? || !es1.empty? && es1[0] < es2[0]
|
174
|
+
e = es1.shift
|
175
|
+
if s < e && bool0 != yield(bool1, bool2)
|
176
|
+
es0 << s
|
177
|
+
bool0 = !bool0
|
178
|
+
end
|
179
|
+
s = e
|
180
|
+
bool1 = !bool1
|
181
|
+
elsif es1.empty? || !es2.empty? && es1[0] > es2[0]
|
182
|
+
e = es2.shift
|
183
|
+
if s < e && bool0 != yield(bool1, bool2)
|
184
|
+
es0 << s
|
185
|
+
bool0 = !bool0
|
186
|
+
end
|
187
|
+
s = e
|
188
|
+
bool2 = !bool2
|
189
|
+
else
|
190
|
+
e = es1.shift
|
191
|
+
es2.shift
|
192
|
+
if s < e && bool0 != yield(bool1, bool2)
|
193
|
+
es0 << s
|
194
|
+
bool0 = !bool0
|
195
|
+
end
|
196
|
+
s = e
|
197
|
+
bool1 = !bool1
|
198
|
+
bool2 = !bool2
|
199
|
+
end
|
200
|
+
end
|
201
|
+
if bool0 != yield(bool1, bool2)
|
202
|
+
es0 << s
|
203
|
+
end
|
204
|
+
self.class._new(*es0)
|
205
|
+
end
|
206
|
+
|
207
|
+
def split_each(*natsets)
|
208
|
+
if natsets.empty?
|
209
|
+
yield [self]
|
210
|
+
else
|
211
|
+
current = natsets.pop
|
212
|
+
|
213
|
+
a = self - current
|
214
|
+
unless a.empty?
|
215
|
+
a.split_each(*natsets) {|nss| yield nss}
|
216
|
+
end
|
217
|
+
|
218
|
+
a = self & current
|
219
|
+
unless a.empty?
|
220
|
+
a.split_each(*natsets) {|nss| nss.push current; yield nss}
|
221
|
+
end
|
222
|
+
end
|
223
|
+
nil
|
224
|
+
end
|
225
|
+
|
226
|
+
def split(*natsets)
|
227
|
+
result = []
|
228
|
+
split_each(*natsets) {|r| result << r}
|
229
|
+
result
|
230
|
+
end
|
231
|
+
|
232
|
+
# min returns a minimum element of the set.
|
233
|
+
# It returns nil if the set has no minimum element,
|
234
|
+
# i.e. the set has no element.
|
235
|
+
def min
|
236
|
+
if @es.empty?
|
237
|
+
nil
|
238
|
+
else
|
239
|
+
@es[0]
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# max returns a maximum element of the set.
|
244
|
+
# It returns nil if the set has no maximum element,
|
245
|
+
# i.e. the set is open or has no element.
|
246
|
+
def max
|
247
|
+
if @es.empty? || open?
|
248
|
+
nil
|
249
|
+
else
|
250
|
+
@es[-1] - 1
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
# each_range iterates on continuous ranges of the set from smallest to largest.
|
255
|
+
# For each range, it yields Range object which represent it.
|
256
|
+
# For last range in open set, the end of the object is -1.
|
257
|
+
# For all Range objects it yields, exclude_end? is true.
|
258
|
+
def each_range
|
259
|
+
(0...@es.length).step(2) {|i|
|
260
|
+
e1 = @es[i]
|
261
|
+
if i+1 == @es.length
|
262
|
+
yield e1..-1
|
263
|
+
else
|
264
|
+
e2 = @es[i+1]
|
265
|
+
yield e1..(e2-1)
|
266
|
+
end
|
267
|
+
}
|
268
|
+
end
|
269
|
+
|
270
|
+
def pretty_print(pp)
|
271
|
+
pp.object_group(self) {
|
272
|
+
pp.text ':'
|
273
|
+
each_range {|r|
|
274
|
+
pp.breakable
|
275
|
+
if r.end == -1
|
276
|
+
pp.text "#{r.begin}..inf"
|
277
|
+
elsif r.begin == r.end
|
278
|
+
pp.text r.begin.to_s
|
279
|
+
else
|
280
|
+
pp.text "#{r.begin}..#{r.end}"
|
281
|
+
end
|
282
|
+
}
|
283
|
+
}
|
284
|
+
end
|
285
|
+
|
286
|
+
def inspect
|
287
|
+
require 'pp'
|
288
|
+
PP.singleline_pp(self, '')
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
if __FILE__ == $0
|
293
|
+
require 'test/unit'
|
294
|
+
|
295
|
+
class NatSetTest < Test::Unit::TestCase
|
296
|
+
def test_empty
|
297
|
+
assert(NatSet.empty.empty?)
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_universal
|
301
|
+
assert(NatSet.universal.universal?)
|
302
|
+
end
|
303
|
+
|
304
|
+
def test_open
|
305
|
+
assert(!NatSet.empty.open?)
|
306
|
+
assert(NatSet.universal.open?)
|
307
|
+
end
|
308
|
+
|
309
|
+
def test_singleton
|
310
|
+
assert_equal(1, NatSet._new(1, 2).singleton?)
|
311
|
+
assert_equal(nil, NatSet._new(1, 3).singleton?)
|
312
|
+
end
|
313
|
+
|
314
|
+
def test_complement
|
315
|
+
assert_equal(NatSet.empty, ~NatSet.universal)
|
316
|
+
assert_equal(NatSet.universal, ~NatSet.empty)
|
317
|
+
assert_equal(NatSet._new(1, 2), ~NatSet._new(0, 1, 2))
|
318
|
+
assert_equal(NatSet._new(0, 1, 2), ~NatSet._new(1, 2))
|
319
|
+
end
|
320
|
+
|
321
|
+
def test_union
|
322
|
+
assert_equal(NatSet.empty, NatSet.empty + NatSet.empty)
|
323
|
+
assert_equal(NatSet.universal, NatSet.empty + NatSet.universal)
|
324
|
+
assert_equal(NatSet.universal, NatSet.universal + NatSet.empty)
|
325
|
+
assert_equal(NatSet.universal, NatSet.universal + NatSet.universal)
|
326
|
+
assert_equal(NatSet.new(0..2), NatSet.new(0, 2) + NatSet.new(0, 1))
|
327
|
+
end
|
328
|
+
|
329
|
+
def test_intersect
|
330
|
+
assert_equal(NatSet.empty, NatSet.empty & NatSet.empty)
|
331
|
+
assert_equal(NatSet.empty, NatSet.empty & NatSet.universal)
|
332
|
+
assert_equal(NatSet.empty, NatSet.universal & NatSet.empty)
|
333
|
+
assert_equal(NatSet.universal, NatSet.universal & NatSet.universal)
|
334
|
+
assert_equal(NatSet.new(0), NatSet.new(0, 2) & NatSet.new(0, 1))
|
335
|
+
end
|
336
|
+
|
337
|
+
def test_subtract
|
338
|
+
assert_equal(NatSet.empty, NatSet.empty - NatSet.empty)
|
339
|
+
assert_equal(NatSet.empty, NatSet.empty - NatSet.universal)
|
340
|
+
assert_equal(NatSet.universal, NatSet.universal - NatSet.empty)
|
341
|
+
assert_equal(NatSet.empty, NatSet.universal - NatSet.universal)
|
342
|
+
assert_equal(NatSet.new(2), NatSet.new(0, 2) - NatSet.new(0, 1))
|
343
|
+
end
|
344
|
+
|
345
|
+
def test_new
|
346
|
+
assert_equal([1, 2], NatSet.new(1).es)
|
347
|
+
assert_equal([1, 3], NatSet.new(1, 2).es)
|
348
|
+
assert_equal([1, 4], NatSet.new(1, 2, 3).es)
|
349
|
+
assert_equal([1, 4], NatSet.new(1, 3, 2).es)
|
350
|
+
assert_equal([10, 21], NatSet.new(10..20).es)
|
351
|
+
assert_equal([10, 20], NatSet.new(10...20).es)
|
352
|
+
assert_equal([1, 2, 3, 4, 5, 6], NatSet.new(1, 3, 5).es)
|
353
|
+
assert_equal([1, 16], NatSet.new(5..15, 1..10).es)
|
354
|
+
assert_equal([1, 16], NatSet.new(11..15, 1..10).es)
|
355
|
+
assert_raises(ArgumentError) {NatSet.new("a")}
|
356
|
+
assert_raises(ArgumentError) {NatSet.new("a".."b")}
|
357
|
+
assert_raises(ArgumentError) {NatSet.new(-1)}
|
358
|
+
assert_raises(ArgumentError) {NatSet.new(-1..3)}
|
359
|
+
end
|
360
|
+
|
361
|
+
def test_split
|
362
|
+
u = NatSet.universal
|
363
|
+
assert_equal([[NatSet.universal]], u.split())
|
364
|
+
assert_equal([[NatSet.universal]], u.split(NatSet.empty))
|
365
|
+
assert_equal([[NatSet.universal, u]], u.split(u))
|
366
|
+
|
367
|
+
n = NatSet.new(10..20)
|
368
|
+
assert_equal([[NatSet.new(0..9, 21..-1)],
|
369
|
+
[NatSet.new(10..20), n]],
|
370
|
+
u.split(n))
|
371
|
+
|
372
|
+
ns = [NatSet.new(10..20), NatSet.new(10..20)]
|
373
|
+
assert_equal([[NatSet.new(0..9, 21..-1)],
|
374
|
+
[NatSet.new(10..20), *ns]],
|
375
|
+
u.split(*ns))
|
376
|
+
|
377
|
+
ns = [NatSet.new(1..30), NatSet.new(5..40)]
|
378
|
+
assert_equal([[NatSet.new(0, 41..-1)],
|
379
|
+
[NatSet.new(1..4), ns[0]],
|
380
|
+
[NatSet.new(31..40), ns[1]],
|
381
|
+
[NatSet.new(5..30), *ns]],
|
382
|
+
u.split(*ns))
|
383
|
+
|
384
|
+
ns = [NatSet.new(1..30), NatSet.new(5..20)]
|
385
|
+
assert_equal([[NatSet.new(0, 31..-1)],
|
386
|
+
[NatSet.new(1..4, 21..30), ns[0]],
|
387
|
+
[NatSet.new(5..20), *ns]],
|
388
|
+
u.split(*ns))
|
389
|
+
end
|
390
|
+
|
391
|
+
def test_min
|
392
|
+
assert_equal(nil, NatSet.new().min)
|
393
|
+
assert_equal(1, NatSet.new(1..10).min)
|
394
|
+
end
|
395
|
+
|
396
|
+
def test_max
|
397
|
+
assert_equal(nil, NatSet.new().max)
|
398
|
+
assert_equal(10, NatSet.new(1..10).max)
|
399
|
+
assert_equal(nil, NatSet.new(1..-1).max)
|
400
|
+
end
|
401
|
+
|
402
|
+
def test_each_range
|
403
|
+
rs = []; NatSet.new() .each_range {|r| rs << r}; assert_equal([], rs)
|
404
|
+
rs = []; NatSet.new(0).each_range {|r| rs << r}; assert_equal([0..0], rs)
|
405
|
+
rs = []; NatSet.new(1).each_range {|r| rs << r}; assert_equal([1..1], rs)
|
406
|
+
rs = []; NatSet.new(1..3).each_range {|r| rs << r}; assert_equal([1..3], rs)
|
407
|
+
rs = []; NatSet.new(1...3).each_range {|r| rs << r}; assert_equal([1..2], rs)
|
408
|
+
rs = []; NatSet.new(1..-1).each_range {|r| rs << r}; assert_equal([1..-1], rs)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|
data/lib/regexptree.rb
ADDED
@@ -0,0 +1,530 @@
|
|
1
|
+
=begin
|
2
|
+
= RegexpTree
|
3
|
+
|
4
|
+
RegexpTree represents regular expression.
|
5
|
+
It can be converted to Regexp.
|
6
|
+
|
7
|
+
== class methods
|
8
|
+
--- RegexpTree.str(string)
|
9
|
+
returns an instance of RegexpTree which only matches ((|string|))
|
10
|
+
--- RegexpTree.alt(*regexp_trees)
|
11
|
+
returns an instance of RegexpTree which is alternation of ((|regexp_trees|)).
|
12
|
+
--- RegexpTree.seq(*regexp_trees)
|
13
|
+
returns an instance of RegexpTree which is concatination of ((|regexp_trees|)).
|
14
|
+
--- RegexpTree.rep(regexp_tree, min=0, max=nil, greedy=true)
|
15
|
+
returns an instance of RegexpTree which is reptation of ((|regexp_tree|)).
|
16
|
+
--- RegexpTree.charclass(natset)
|
17
|
+
returns an instance of RegexpTree which matches characters in ((|natset|)).
|
18
|
+
#--- RegexpTree.linebeg
|
19
|
+
#--- RegexpTree.lineend
|
20
|
+
#--- RegexpTree.strbeg
|
21
|
+
#--- RegexpTree.strend
|
22
|
+
#--- RegexpTree.strlineend
|
23
|
+
#--- RegexpTree.word_boundary
|
24
|
+
#--- RegexpTree.non_word_boundary
|
25
|
+
#--- RegexpTree.previous_match
|
26
|
+
#--- RegexpTree.backref(n)
|
27
|
+
|
28
|
+
== methods
|
29
|
+
--- regexp(anchored=false)
|
30
|
+
convert to Regexp.
|
31
|
+
|
32
|
+
If ((|anchored|)) is true, the Regexp is anchored by (({\A})) and (({\z})).
|
33
|
+
--- to_s
|
34
|
+
convert to String.
|
35
|
+
--- empty_set?
|
36
|
+
returns true iff self never matches.
|
37
|
+
--- empty_sequence?
|
38
|
+
returns true iff self only matches empty string.
|
39
|
+
--- self | other
|
40
|
+
returns alternation of ((|self|)) and ((|other|)).
|
41
|
+
--- self + other
|
42
|
+
returns concatination of ((|self|)) and ((|other|)).
|
43
|
+
--- self * n
|
44
|
+
returns ((|n|)) times repetation of ((|self|)).
|
45
|
+
--- rep(min=0, max=nil, greedy=true)
|
46
|
+
returns ((|min|)) to ((|max|)) times repetation of ((|self|)).
|
47
|
+
#--- closure(greedy=true)
|
48
|
+
#--- positive_closure(greedy=true)
|
49
|
+
#--- optional(greedy=true)
|
50
|
+
#--- ntimes(min, max=min, greedy=true)
|
51
|
+
#--- nongreedy_rep(min=0, max=nil)
|
52
|
+
#--- nongreedy_closure
|
53
|
+
#--- nongreedy_positive_closure
|
54
|
+
#--- nongreedy_optional
|
55
|
+
#--- nongreedy_ntimes(min, max=min)
|
56
|
+
=end
|
57
|
+
|
58
|
+
require 'prettyprint'
|
59
|
+
require 'natset'
|
60
|
+
|
61
|
+
class RegexpTree
|
62
|
+
@curr_prec = 1
|
63
|
+
def RegexpTree.inherited(c)
|
64
|
+
return if c.superclass != RegexpTree
|
65
|
+
c.const_set(:Prec, @curr_prec)
|
66
|
+
@curr_prec += 1
|
67
|
+
end
|
68
|
+
|
69
|
+
def parenthesize(target)
|
70
|
+
if target::Prec <= self.class::Prec
|
71
|
+
self
|
72
|
+
else
|
73
|
+
Paren.new(self)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def pretty_print(pp)
|
78
|
+
case_insensitive = case_insensitive?
|
79
|
+
pp.group(3, '%r{', '}x') {
|
80
|
+
(case_insensitive ? self.downcase : self).pretty_format(pp)
|
81
|
+
}
|
82
|
+
pp.text 'i' if case_insensitive
|
83
|
+
end
|
84
|
+
|
85
|
+
def inspect
|
86
|
+
case_insensitive = case_insensitive? ? "i" : ""
|
87
|
+
r = PrettyPrint.singleline_format('') {|out|
|
88
|
+
(case_insensitive ? self.downcase : self).pretty_format(out)
|
89
|
+
}
|
90
|
+
if %r{/} =~ r
|
91
|
+
"%r{#{r}}#{case_insensitive}"
|
92
|
+
else
|
93
|
+
"%r/#{r}/#{case_insensitive}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def regexp(anchored=false)
|
98
|
+
if case_insensitive?
|
99
|
+
r = downcase
|
100
|
+
opt = Regexp::IGNORECASE
|
101
|
+
else
|
102
|
+
r = self
|
103
|
+
opt = 0
|
104
|
+
end
|
105
|
+
r = RegexpTree.seq(RegexpTree.strbeg, r, RegexpTree.strend) if anchored
|
106
|
+
Regexp.compile(
|
107
|
+
PrettyPrint.singleline_format('') {|out|
|
108
|
+
r.pretty_format(out)
|
109
|
+
},
|
110
|
+
opt)
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_s
|
114
|
+
PrettyPrint.singleline_format('') {|out|
|
115
|
+
# x flag is not required because all whitespaces are escaped.
|
116
|
+
if case_insensitive?
|
117
|
+
out.text '(?i-m:'
|
118
|
+
downcase.pretty_format(out)
|
119
|
+
out.text ')'
|
120
|
+
else
|
121
|
+
out.text '(?-im:'
|
122
|
+
pretty_format(out)
|
123
|
+
out.text ')'
|
124
|
+
end
|
125
|
+
}
|
126
|
+
end
|
127
|
+
|
128
|
+
def empty_set?
|
129
|
+
false
|
130
|
+
end
|
131
|
+
|
132
|
+
def empty_sequence?
|
133
|
+
false
|
134
|
+
end
|
135
|
+
|
136
|
+
def |(other)
|
137
|
+
RegexpTree.alt(self, other)
|
138
|
+
end
|
139
|
+
def RegexpTree.alt(*rs)
|
140
|
+
rs2 = []
|
141
|
+
rs.each {|r|
|
142
|
+
if r.empty_set?
|
143
|
+
next
|
144
|
+
elsif Alt === r
|
145
|
+
rs2.concat r.rs
|
146
|
+
elsif CharClass === r
|
147
|
+
if CharClass === rs2.last
|
148
|
+
rs2[-1] = CharClass.new(rs2.last.natset + r.natset)
|
149
|
+
else
|
150
|
+
rs2 << r
|
151
|
+
end
|
152
|
+
else
|
153
|
+
rs2 << r
|
154
|
+
end
|
155
|
+
}
|
156
|
+
case rs2.length
|
157
|
+
when 0; EmptySet
|
158
|
+
when 1; rs2.first
|
159
|
+
else; Alt.new(rs2)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
class Alt < RegexpTree
|
163
|
+
def initialize(rs)
|
164
|
+
@rs = rs
|
165
|
+
end
|
166
|
+
attr_reader :rs
|
167
|
+
|
168
|
+
def empty_set?
|
169
|
+
@rs.empty?
|
170
|
+
end
|
171
|
+
|
172
|
+
def case_insensitive?
|
173
|
+
@rs.all? {|r| r.case_insensitive?}
|
174
|
+
end
|
175
|
+
|
176
|
+
def multiline_insensitive?
|
177
|
+
@rs.all? {|r| r.multiline_insensitive?}
|
178
|
+
end
|
179
|
+
|
180
|
+
def downcase
|
181
|
+
Alt.new(@rs.map {|r| r.downcase})
|
182
|
+
end
|
183
|
+
|
184
|
+
def pretty_format(out)
|
185
|
+
if @rs.empty?
|
186
|
+
out.text '(?!)'
|
187
|
+
else
|
188
|
+
out.group {
|
189
|
+
@rs.each_with_index {|r, i|
|
190
|
+
unless i == 0
|
191
|
+
out.text '|'
|
192
|
+
out.breakable ''
|
193
|
+
end
|
194
|
+
r.parenthesize(Alt).pretty_format(out)
|
195
|
+
}
|
196
|
+
}
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
EmptySet = Alt.new([])
|
201
|
+
|
202
|
+
def +(other)
|
203
|
+
RegexpTree.seq(self, other)
|
204
|
+
end
|
205
|
+
def RegexpTree.seq(*rs)
|
206
|
+
rs2 = []
|
207
|
+
rs.each {|r|
|
208
|
+
if r.empty_sequence?
|
209
|
+
next
|
210
|
+
elsif Seq === r
|
211
|
+
rs2.concat r.rs
|
212
|
+
elsif r.empty_set?
|
213
|
+
return EmptySet
|
214
|
+
else
|
215
|
+
rs2 << r
|
216
|
+
end
|
217
|
+
}
|
218
|
+
case rs2.length
|
219
|
+
when 0; EmptySequence
|
220
|
+
when 1; rs2.first
|
221
|
+
else; Seq.new(rs2)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
class Seq < RegexpTree
|
225
|
+
def initialize(rs)
|
226
|
+
@rs = rs
|
227
|
+
end
|
228
|
+
attr_reader :rs
|
229
|
+
|
230
|
+
def empty_sequence?
|
231
|
+
@rs.empty?
|
232
|
+
end
|
233
|
+
|
234
|
+
def case_insensitive?
|
235
|
+
@rs.all? {|r| r.case_insensitive?}
|
236
|
+
end
|
237
|
+
|
238
|
+
def multiline_insensitive?
|
239
|
+
@rs.all? {|r| r.multiline_insensitive?}
|
240
|
+
end
|
241
|
+
|
242
|
+
def downcase
|
243
|
+
Seq.new(@rs.map {|r| r.downcase})
|
244
|
+
end
|
245
|
+
|
246
|
+
def pretty_format(out)
|
247
|
+
out.group {
|
248
|
+
@rs.each_with_index {|r, i|
|
249
|
+
unless i == 0
|
250
|
+
out.group {out.breakable ''}
|
251
|
+
end
|
252
|
+
r.parenthesize(Seq).pretty_format(out)
|
253
|
+
}
|
254
|
+
}
|
255
|
+
end
|
256
|
+
end
|
257
|
+
EmptySequence = Seq.new([])
|
258
|
+
|
259
|
+
def *(n)
|
260
|
+
case n
|
261
|
+
when Integer
|
262
|
+
RegexpTree.rep(self, n, n)
|
263
|
+
when Range
|
264
|
+
RegexpTree.rep(self, n.first, n.last - (n.exclude_end? ? 1 : 0))
|
265
|
+
else
|
266
|
+
raise TypeError.new("Integer or Range expected: #{n}")
|
267
|
+
end
|
268
|
+
end
|
269
|
+
def nongreedy_closure() RegexpTree.rep(self, 0, nil, false) end
|
270
|
+
def nongreedy_positive_closure() RegexpTree.rep(self, 1, nil, false) end
|
271
|
+
def nongreedy_optional() RegexpTree.rep(self, 0, 1, false) end
|
272
|
+
def nongreedy_ntimes(m, n=m) RegexpTree.rep(self, m, n, false) end
|
273
|
+
def nongreedy_rep(m=0, n=nil) RegexpTree.rep(self, m, n, false) end
|
274
|
+
def closure(greedy=true) RegexpTree.rep(self, 0, nil, greedy) end
|
275
|
+
def positive_closure(greedy=true) RegexpTree.rep(self, 1, nil, greedy) end
|
276
|
+
def optional(greedy=true) RegexpTree.rep(self, 0, 1, greedy) end
|
277
|
+
def ntimes(m, n=m, greedy=true) RegexpTree.rep(self, m, n, greedy) end
|
278
|
+
def rep(m=0, n=nil, greedy=true) RegexpTree.rep(self, m, n, greedy) end
|
279
|
+
|
280
|
+
def RegexpTree.rep(r, m=0, n=nil, greedy=true)
|
281
|
+
return EmptySequence if m == 0 && n == 0
|
282
|
+
return r if m == 1 && n == 1
|
283
|
+
return EmptySequence if r.empty_sequence?
|
284
|
+
if r.empty_set?
|
285
|
+
return m == 0 ? EmptySequence : EmptySet
|
286
|
+
end
|
287
|
+
Rep.new(r, m, n, greedy)
|
288
|
+
end
|
289
|
+
|
290
|
+
class Rep < RegexpTree
|
291
|
+
def initialize(r, m=0, n=nil, greedy=true)
|
292
|
+
@r = r
|
293
|
+
@m = m
|
294
|
+
@n = n
|
295
|
+
@greedy = greedy
|
296
|
+
end
|
297
|
+
|
298
|
+
def case_insensitive?
|
299
|
+
@r.case_insensitive?
|
300
|
+
end
|
301
|
+
|
302
|
+
def multiline_insensitive?
|
303
|
+
@r.multiline_insensitive?
|
304
|
+
end
|
305
|
+
|
306
|
+
def downcase
|
307
|
+
Rep.new(@r.downcase, @m, @n, @greedy)
|
308
|
+
end
|
309
|
+
|
310
|
+
def pretty_format(out)
|
311
|
+
@r.parenthesize(Elt).pretty_format(out)
|
312
|
+
case @m
|
313
|
+
when 0
|
314
|
+
case @n
|
315
|
+
when 0
|
316
|
+
out.text '{0}'
|
317
|
+
when 1
|
318
|
+
out.text '?'
|
319
|
+
when nil
|
320
|
+
out.text '*'
|
321
|
+
else
|
322
|
+
out.text "{#{@m},#{@n}}"
|
323
|
+
end
|
324
|
+
when 1
|
325
|
+
case @n
|
326
|
+
when 1
|
327
|
+
when nil
|
328
|
+
out.text '+'
|
329
|
+
else
|
330
|
+
out.text "{#{@m},#{@n}}"
|
331
|
+
end
|
332
|
+
else
|
333
|
+
if @m == @n
|
334
|
+
out.text "{#{@m}}"
|
335
|
+
else
|
336
|
+
out.text "{#{@m},#{@n}}"
|
337
|
+
end
|
338
|
+
end
|
339
|
+
out.text '?' unless @greedy
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
class Elt < RegexpTree
|
344
|
+
end
|
345
|
+
|
346
|
+
def RegexpTree.charclass(natset)
|
347
|
+
if natset.empty?
|
348
|
+
EmptySet
|
349
|
+
else
|
350
|
+
CharClass.new(natset)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
class CharClass < Elt
|
354
|
+
None = NatSet.empty
|
355
|
+
Any = NatSet.universal
|
356
|
+
NL = NatSet.new(?\n)
|
357
|
+
NonNL = ~NL
|
358
|
+
Word = NatSet.new(?0..?9, ?A..?Z, ?_, ?a..?z)
|
359
|
+
NonWord = ~Word
|
360
|
+
Space = NatSet.new(?t, ?\n, ?\f, ?\r, ?\s)
|
361
|
+
NonSpace = ~Space
|
362
|
+
Digit = NatSet.new(?0..?9)
|
363
|
+
NonDigit = ~Digit
|
364
|
+
|
365
|
+
UpAlpha = NatSet.new(?A..?Z)
|
366
|
+
LowAlpha = NatSet.new(?a..?z)
|
367
|
+
|
368
|
+
def initialize(natset)
|
369
|
+
@natset = natset
|
370
|
+
end
|
371
|
+
attr_reader :natset
|
372
|
+
|
373
|
+
def empty_set?
|
374
|
+
@natset.empty?
|
375
|
+
end
|
376
|
+
|
377
|
+
def case_insensitive?
|
378
|
+
up = @natset & UpAlpha
|
379
|
+
low = @natset & LowAlpha
|
380
|
+
return false if up.es.length != low.es.length
|
381
|
+
up.es.map! {|ch|
|
382
|
+
ch - 0x41 + 0x61 # ?A + ?a
|
383
|
+
}
|
384
|
+
up == low
|
385
|
+
end
|
386
|
+
|
387
|
+
def multiline_insensitive?
|
388
|
+
@natset != NonNL
|
389
|
+
end
|
390
|
+
|
391
|
+
def downcase
|
392
|
+
up = @natset & UpAlpha
|
393
|
+
up.es.map! {|ch|
|
394
|
+
ch - 0x41 + 0x61 # ?A + ?a
|
395
|
+
}
|
396
|
+
CharClass.new((@natset - UpAlpha) | up)
|
397
|
+
end
|
398
|
+
|
399
|
+
def pretty_format(out)
|
400
|
+
case @natset
|
401
|
+
when None; out.text '(?!)'
|
402
|
+
when Any; out.text '[\s\S]'
|
403
|
+
when NL; out.text '\n'
|
404
|
+
when NonNL; out.text '.'
|
405
|
+
when Word; out.text '\w'
|
406
|
+
when NonWord; out.text '\W'
|
407
|
+
when Space; out.text '\s'
|
408
|
+
when NonSpace; out.text '\S'
|
409
|
+
when Digit; out.text '\d'
|
410
|
+
when NonDigit; out.text '\D'
|
411
|
+
else
|
412
|
+
if val = @natset.singleton?
|
413
|
+
out.text encode_elt(val)
|
414
|
+
else
|
415
|
+
if @natset.open?
|
416
|
+
neg_mark = '^'
|
417
|
+
es = (~@natset).es
|
418
|
+
else
|
419
|
+
neg_mark = ''
|
420
|
+
es = @natset.es.dup
|
421
|
+
end
|
422
|
+
r = ''
|
423
|
+
until es.empty?
|
424
|
+
if es[0] + 1 == es[1]
|
425
|
+
r << encode_elt(es[0])
|
426
|
+
elsif es[0] + 2 == es[1]
|
427
|
+
r << encode_elt(es[0]) << encode_elt(es[1] - 1)
|
428
|
+
else
|
429
|
+
r << encode_elt(es[0]) << '-' << encode_elt(es[1] - 1)
|
430
|
+
end
|
431
|
+
es.shift
|
432
|
+
es.shift
|
433
|
+
end
|
434
|
+
out.text "[#{neg_mark}#{r}]"
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
def encode_elt(e)
|
440
|
+
case e
|
441
|
+
when 0x09; '\t'
|
442
|
+
when 0x0a; '\n'
|
443
|
+
when 0x0d; '\r'
|
444
|
+
when 0x0c; '\f'
|
445
|
+
when 0x0b; '\v'
|
446
|
+
when 0x07; '\a'
|
447
|
+
when 0x1b; '\e'
|
448
|
+
#when ?!, ?", ?%, ?&, ?', ?,, ?:, ?;, ?<, ?=, ?>, ?/, ?0..?9, ?@, ?A..?Z, ?_, ?`, ?a..?z, ?~
|
449
|
+
when 0x21, 0x22, 0x25, 0x26, 0x27, 0x2c, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x2f, 0x30..0x39, 0x40, 0x41..0x5a, 0x5f, 0x60, 0x61..0x7a, 0x7e
|
450
|
+
sprintf("%c", e)
|
451
|
+
else
|
452
|
+
sprintf("\\x%02x", e)
|
453
|
+
end
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
def RegexpTree.linebeg() Special.new('^') end
|
458
|
+
def RegexpTree.lineend() Special.new('$') end
|
459
|
+
def RegexpTree.strbeg() Special.new('\A') end
|
460
|
+
def RegexpTree.strend() Special.new('\z') end
|
461
|
+
def RegexpTree.strlineend() Special.new('\Z') end
|
462
|
+
def RegexpTree.word_boundary() Special.new('\b') end
|
463
|
+
def RegexpTree.non_word_boundary() Special.new('\B') end
|
464
|
+
def RegexpTree.previous_match() Special.new('\G') end
|
465
|
+
def RegexpTree.backref(n) Special.new("\\#{n}") end
|
466
|
+
class Special < Elt
|
467
|
+
def initialize(str)
|
468
|
+
@str = str
|
469
|
+
end
|
470
|
+
|
471
|
+
def case_insensitive?
|
472
|
+
true
|
473
|
+
end
|
474
|
+
|
475
|
+
def multiline_insensitive?
|
476
|
+
true
|
477
|
+
end
|
478
|
+
|
479
|
+
def downcase
|
480
|
+
self
|
481
|
+
end
|
482
|
+
|
483
|
+
def pretty_format(out)
|
484
|
+
out.text @str
|
485
|
+
end
|
486
|
+
end
|
487
|
+
|
488
|
+
def group() Paren.new(self, '') end
|
489
|
+
def paren() Paren.new(self) end
|
490
|
+
def lookahead() Paren.new(self, '?=') end
|
491
|
+
def negative_lookahead() Paren.new(self, '?!') end
|
492
|
+
# (?ixm-ixm:...)
|
493
|
+
# (?>...)
|
494
|
+
class Paren < Elt
|
495
|
+
def initialize(r, mark='?:')
|
496
|
+
@mark = mark
|
497
|
+
@r = r
|
498
|
+
end
|
499
|
+
|
500
|
+
def case_insensitive?
|
501
|
+
# xxx: if @mark contains "i"...
|
502
|
+
@r.case_insensitive?
|
503
|
+
end
|
504
|
+
|
505
|
+
def multiline_insensitive?
|
506
|
+
# xxx: if @mark contains "m"...
|
507
|
+
@r.multiline_insensitive?
|
508
|
+
end
|
509
|
+
|
510
|
+
def downcase
|
511
|
+
Paren.new(@r.downcase, @mark)
|
512
|
+
end
|
513
|
+
|
514
|
+
def pretty_format(out)
|
515
|
+
out.group(1 + @mark.length, "(#@mark", ')') {
|
516
|
+
@r.pretty_format(out)
|
517
|
+
}
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
# def RegexpTree.comment(str) ... end # (?#...)
|
522
|
+
|
523
|
+
def RegexpTree.str(str)
|
524
|
+
ccs = []
|
525
|
+
str.each_byte {|ch|
|
526
|
+
ccs << CharClass.new(NatSet.new(ch))
|
527
|
+
}
|
528
|
+
seq(*ccs)
|
529
|
+
end
|
530
|
+
end
|