abnf 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +16 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +194 -0
- data/Rakefile +18 -0
- data/abnf.gemspec +24 -0
- data/lib/abnf.rb +57 -0
- data/lib/abnf/abnf.rb +136 -0
- data/lib/abnf/corerules.rb +28 -0
- data/lib/abnf/grammar.rb +183 -0
- data/lib/abnf/parser.output +348 -0
- data/lib/abnf/parser.rb +821 -0
- data/lib/abnf/parser.y +156 -0
- data/lib/abnf/regexp.rb +394 -0
- data/lib/abnf/version.rb +3 -0
- data/lib/natset.rb +411 -0
- data/lib/regexptree.rb +530 -0
- data/sample/in-place.rb +26 -0
- data/sample/ipv6.rb +42 -0
- data/sample/multiples-of-3.rb +19 -0
- data/sample/uri.rb +75 -0
- data/test/abnf_test.rb +82 -0
- data/test/regexptree_test.rb +12 -0
- data/test/test_helper.rb +3 -0
- metadata +115 -0
data/lib/abnf/version.rb
ADDED
data/lib/natset.rb
ADDED
@@ -0,0 +1,411 @@
|
|
1
|
+
=begin
|
2
|
+
= NatSet
|
3
|
+
|
4
|
+
NatSet represents a set of naturals - non-negative integers.
|
5
|
+
|
6
|
+
== class methods
|
7
|
+
--- NatSet.empty
|
8
|
+
--- NatSet.universal
|
9
|
+
--- NatSet.new(integer_or_range, ...)
|
10
|
+
|
11
|
+
== methods
|
12
|
+
--- empty?
|
13
|
+
--- universal?
|
14
|
+
--- open?
|
15
|
+
--- singleton?
|
16
|
+
--- self == other
|
17
|
+
--- self === other
|
18
|
+
--- eql?(other)
|
19
|
+
--- hash
|
20
|
+
--- ~self
|
21
|
+
--- self + other
|
22
|
+
--- self - other
|
23
|
+
--- self & other
|
24
|
+
|
25
|
+
--- split_each(ns, ...) {|region, *nss| ... }
|
26
|
+
--- split(ns, ...)
|
27
|
+
|
28
|
+
--- min
|
29
|
+
--- max
|
30
|
+
|
31
|
+
--- each_range {|range| ... }
|
32
|
+
|
33
|
+
=end
|
34
|
+
|
35
|
+
class NatSet
|
36
|
+
class << NatSet
|
37
|
+
alias _new new
|
38
|
+
end
|
39
|
+
|
40
|
+
def NatSet.empty
|
41
|
+
self._new
|
42
|
+
end
|
43
|
+
|
44
|
+
def NatSet.universal
|
45
|
+
self._new(0)
|
46
|
+
end
|
47
|
+
|
48
|
+
def NatSet.new(*es)
|
49
|
+
r = self.empty
|
50
|
+
es.each {|e|
|
51
|
+
if String === e
|
52
|
+
e = e.ord
|
53
|
+
end
|
54
|
+
case e
|
55
|
+
when Range
|
56
|
+
if String === e.begin
|
57
|
+
e = Range.new(e.begin.ord, e.end.ord, e.exclude_end?)
|
58
|
+
end
|
59
|
+
unless Integer === e.begin && 0 <= e.begin
|
60
|
+
raise ArgumentError.new("bad value for #{self}.new: #{e}")
|
61
|
+
end
|
62
|
+
if e.end < 0
|
63
|
+
r += self._new(e.begin)
|
64
|
+
elsif e.exclude_end?
|
65
|
+
r += self._new(e.begin, e.end)
|
66
|
+
else
|
67
|
+
r += self._new(e.begin, e.end+1)
|
68
|
+
end
|
69
|
+
when Integer
|
70
|
+
unless 0 <= e
|
71
|
+
raise ArgumentError.new("bad value for #{self}.new: #{e}")
|
72
|
+
end
|
73
|
+
r += self._new(e, e+1)
|
74
|
+
when NatSet
|
75
|
+
r += e
|
76
|
+
else
|
77
|
+
raise ArgumentError.new("bad value for #{self}.new: #{e}")
|
78
|
+
end
|
79
|
+
}
|
80
|
+
r
|
81
|
+
end
|
82
|
+
|
83
|
+
def initialize(*es)
|
84
|
+
@es = es
|
85
|
+
end
|
86
|
+
attr_reader :es
|
87
|
+
|
88
|
+
def empty?
|
89
|
+
@es.empty?
|
90
|
+
end
|
91
|
+
|
92
|
+
def universal?
|
93
|
+
@es == [0]
|
94
|
+
end
|
95
|
+
|
96
|
+
def open?
|
97
|
+
@es.length & 1 != 0
|
98
|
+
end
|
99
|
+
|
100
|
+
def singleton?
|
101
|
+
if @es.length == 2 && @es[0] == @es[1] - 1
|
102
|
+
@es[0]
|
103
|
+
else
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def ==(other)
|
109
|
+
@es == other.es
|
110
|
+
end
|
111
|
+
alias === ==
|
112
|
+
alias eql? ==
|
113
|
+
|
114
|
+
def hash
|
115
|
+
@es.hash
|
116
|
+
end
|
117
|
+
|
118
|
+
def complement
|
119
|
+
if @es.empty?
|
120
|
+
self.class.universal
|
121
|
+
elsif @es[0] == 0
|
122
|
+
self.class._new(*@es[1..-1])
|
123
|
+
else
|
124
|
+
self.class._new(0, *@es)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
alias ~ complement
|
128
|
+
|
129
|
+
def union(other)
|
130
|
+
other.union_natset(self)
|
131
|
+
end
|
132
|
+
alias + union
|
133
|
+
alias | union
|
134
|
+
|
135
|
+
def union_natset(natset)
|
136
|
+
return self if natset.empty? || self.universal?
|
137
|
+
return natset if self.empty? || natset.universal?
|
138
|
+
merge(natset) {|a, b| a || b}
|
139
|
+
end
|
140
|
+
|
141
|
+
def intersect(other)
|
142
|
+
other.intersect_natset(self)
|
143
|
+
end
|
144
|
+
alias & intersect
|
145
|
+
|
146
|
+
def intersect_natset(natset)
|
147
|
+
return self if self.empty? || natset.universal?
|
148
|
+
return natset if natset.empty? || self.universal?
|
149
|
+
merge(natset) {|a, b| a && b}
|
150
|
+
end
|
151
|
+
|
152
|
+
def subtract(other)
|
153
|
+
other.subtract_natset(self)
|
154
|
+
end
|
155
|
+
alias - subtract
|
156
|
+
|
157
|
+
def subtract_natset(natset) # natset - self
|
158
|
+
# Since double dispatch *inverses* a receiver and an argument,
|
159
|
+
# condition should be inversed.
|
160
|
+
return natset if self.empty? || natset.empty?
|
161
|
+
return NatSet.empty if self.universal?
|
162
|
+
return ~self if natset.universal?
|
163
|
+
merge(natset) {|a, b| !a && b}
|
164
|
+
end
|
165
|
+
|
166
|
+
def merge(other)
|
167
|
+
es1 = @es.dup
|
168
|
+
es2 = other.es.dup
|
169
|
+
es0 = []
|
170
|
+
bool1 = bool2 = bool0 = false
|
171
|
+
s = 0
|
172
|
+
while !es1.empty? || !es2.empty?
|
173
|
+
if es2.empty? || !es1.empty? && es1[0] < es2[0]
|
174
|
+
e = es1.shift
|
175
|
+
if s < e && bool0 != yield(bool1, bool2)
|
176
|
+
es0 << s
|
177
|
+
bool0 = !bool0
|
178
|
+
end
|
179
|
+
s = e
|
180
|
+
bool1 = !bool1
|
181
|
+
elsif es1.empty? || !es2.empty? && es1[0] > es2[0]
|
182
|
+
e = es2.shift
|
183
|
+
if s < e && bool0 != yield(bool1, bool2)
|
184
|
+
es0 << s
|
185
|
+
bool0 = !bool0
|
186
|
+
end
|
187
|
+
s = e
|
188
|
+
bool2 = !bool2
|
189
|
+
else
|
190
|
+
e = es1.shift
|
191
|
+
es2.shift
|
192
|
+
if s < e && bool0 != yield(bool1, bool2)
|
193
|
+
es0 << s
|
194
|
+
bool0 = !bool0
|
195
|
+
end
|
196
|
+
s = e
|
197
|
+
bool1 = !bool1
|
198
|
+
bool2 = !bool2
|
199
|
+
end
|
200
|
+
end
|
201
|
+
if bool0 != yield(bool1, bool2)
|
202
|
+
es0 << s
|
203
|
+
end
|
204
|
+
self.class._new(*es0)
|
205
|
+
end
|
206
|
+
|
207
|
+
def split_each(*natsets)
|
208
|
+
if natsets.empty?
|
209
|
+
yield [self]
|
210
|
+
else
|
211
|
+
current = natsets.pop
|
212
|
+
|
213
|
+
a = self - current
|
214
|
+
unless a.empty?
|
215
|
+
a.split_each(*natsets) {|nss| yield nss}
|
216
|
+
end
|
217
|
+
|
218
|
+
a = self & current
|
219
|
+
unless a.empty?
|
220
|
+
a.split_each(*natsets) {|nss| nss.push current; yield nss}
|
221
|
+
end
|
222
|
+
end
|
223
|
+
nil
|
224
|
+
end
|
225
|
+
|
226
|
+
def split(*natsets)
|
227
|
+
result = []
|
228
|
+
split_each(*natsets) {|r| result << r}
|
229
|
+
result
|
230
|
+
end
|
231
|
+
|
232
|
+
# min returns a minimum element of the set.
|
233
|
+
# It returns nil if the set has no minimum element,
|
234
|
+
# i.e. the set has no element.
|
235
|
+
def min
|
236
|
+
if @es.empty?
|
237
|
+
nil
|
238
|
+
else
|
239
|
+
@es[0]
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# max returns a maximum element of the set.
|
244
|
+
# It returns nil if the set has no maximum element,
|
245
|
+
# i.e. the set is open or has no element.
|
246
|
+
def max
|
247
|
+
if @es.empty? || open?
|
248
|
+
nil
|
249
|
+
else
|
250
|
+
@es[-1] - 1
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
# each_range iterates on continuous ranges of the set from smallest to largest.
|
255
|
+
# For each range, it yields Range object which represent it.
|
256
|
+
# For last range in open set, the end of the object is -1.
|
257
|
+
# For all Range objects it yields, exclude_end? is true.
|
258
|
+
def each_range
|
259
|
+
(0...@es.length).step(2) {|i|
|
260
|
+
e1 = @es[i]
|
261
|
+
if i+1 == @es.length
|
262
|
+
yield e1..-1
|
263
|
+
else
|
264
|
+
e2 = @es[i+1]
|
265
|
+
yield e1..(e2-1)
|
266
|
+
end
|
267
|
+
}
|
268
|
+
end
|
269
|
+
|
270
|
+
def pretty_print(pp)
|
271
|
+
pp.object_group(self) {
|
272
|
+
pp.text ':'
|
273
|
+
each_range {|r|
|
274
|
+
pp.breakable
|
275
|
+
if r.end == -1
|
276
|
+
pp.text "#{r.begin}..inf"
|
277
|
+
elsif r.begin == r.end
|
278
|
+
pp.text r.begin.to_s
|
279
|
+
else
|
280
|
+
pp.text "#{r.begin}..#{r.end}"
|
281
|
+
end
|
282
|
+
}
|
283
|
+
}
|
284
|
+
end
|
285
|
+
|
286
|
+
def inspect
|
287
|
+
require 'pp'
|
288
|
+
PP.singleline_pp(self, '')
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
if __FILE__ == $0
|
293
|
+
require 'test/unit'
|
294
|
+
|
295
|
+
class NatSetTest < Test::Unit::TestCase
|
296
|
+
def test_empty
|
297
|
+
assert(NatSet.empty.empty?)
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_universal
|
301
|
+
assert(NatSet.universal.universal?)
|
302
|
+
end
|
303
|
+
|
304
|
+
def test_open
|
305
|
+
assert(!NatSet.empty.open?)
|
306
|
+
assert(NatSet.universal.open?)
|
307
|
+
end
|
308
|
+
|
309
|
+
def test_singleton
|
310
|
+
assert_equal(1, NatSet._new(1, 2).singleton?)
|
311
|
+
assert_equal(nil, NatSet._new(1, 3).singleton?)
|
312
|
+
end
|
313
|
+
|
314
|
+
def test_complement
|
315
|
+
assert_equal(NatSet.empty, ~NatSet.universal)
|
316
|
+
assert_equal(NatSet.universal, ~NatSet.empty)
|
317
|
+
assert_equal(NatSet._new(1, 2), ~NatSet._new(0, 1, 2))
|
318
|
+
assert_equal(NatSet._new(0, 1, 2), ~NatSet._new(1, 2))
|
319
|
+
end
|
320
|
+
|
321
|
+
def test_union
|
322
|
+
assert_equal(NatSet.empty, NatSet.empty + NatSet.empty)
|
323
|
+
assert_equal(NatSet.universal, NatSet.empty + NatSet.universal)
|
324
|
+
assert_equal(NatSet.universal, NatSet.universal + NatSet.empty)
|
325
|
+
assert_equal(NatSet.universal, NatSet.universal + NatSet.universal)
|
326
|
+
assert_equal(NatSet.new(0..2), NatSet.new(0, 2) + NatSet.new(0, 1))
|
327
|
+
end
|
328
|
+
|
329
|
+
def test_intersect
|
330
|
+
assert_equal(NatSet.empty, NatSet.empty & NatSet.empty)
|
331
|
+
assert_equal(NatSet.empty, NatSet.empty & NatSet.universal)
|
332
|
+
assert_equal(NatSet.empty, NatSet.universal & NatSet.empty)
|
333
|
+
assert_equal(NatSet.universal, NatSet.universal & NatSet.universal)
|
334
|
+
assert_equal(NatSet.new(0), NatSet.new(0, 2) & NatSet.new(0, 1))
|
335
|
+
end
|
336
|
+
|
337
|
+
def test_subtract
|
338
|
+
assert_equal(NatSet.empty, NatSet.empty - NatSet.empty)
|
339
|
+
assert_equal(NatSet.empty, NatSet.empty - NatSet.universal)
|
340
|
+
assert_equal(NatSet.universal, NatSet.universal - NatSet.empty)
|
341
|
+
assert_equal(NatSet.empty, NatSet.universal - NatSet.universal)
|
342
|
+
assert_equal(NatSet.new(2), NatSet.new(0, 2) - NatSet.new(0, 1))
|
343
|
+
end
|
344
|
+
|
345
|
+
def test_new
|
346
|
+
assert_equal([1, 2], NatSet.new(1).es)
|
347
|
+
assert_equal([1, 3], NatSet.new(1, 2).es)
|
348
|
+
assert_equal([1, 4], NatSet.new(1, 2, 3).es)
|
349
|
+
assert_equal([1, 4], NatSet.new(1, 3, 2).es)
|
350
|
+
assert_equal([10, 21], NatSet.new(10..20).es)
|
351
|
+
assert_equal([10, 20], NatSet.new(10...20).es)
|
352
|
+
assert_equal([1, 2, 3, 4, 5, 6], NatSet.new(1, 3, 5).es)
|
353
|
+
assert_equal([1, 16], NatSet.new(5..15, 1..10).es)
|
354
|
+
assert_equal([1, 16], NatSet.new(11..15, 1..10).es)
|
355
|
+
assert_raises(ArgumentError) {NatSet.new("a")}
|
356
|
+
assert_raises(ArgumentError) {NatSet.new("a".."b")}
|
357
|
+
assert_raises(ArgumentError) {NatSet.new(-1)}
|
358
|
+
assert_raises(ArgumentError) {NatSet.new(-1..3)}
|
359
|
+
end
|
360
|
+
|
361
|
+
def test_split
|
362
|
+
u = NatSet.universal
|
363
|
+
assert_equal([[NatSet.universal]], u.split())
|
364
|
+
assert_equal([[NatSet.universal]], u.split(NatSet.empty))
|
365
|
+
assert_equal([[NatSet.universal, u]], u.split(u))
|
366
|
+
|
367
|
+
n = NatSet.new(10..20)
|
368
|
+
assert_equal([[NatSet.new(0..9, 21..-1)],
|
369
|
+
[NatSet.new(10..20), n]],
|
370
|
+
u.split(n))
|
371
|
+
|
372
|
+
ns = [NatSet.new(10..20), NatSet.new(10..20)]
|
373
|
+
assert_equal([[NatSet.new(0..9, 21..-1)],
|
374
|
+
[NatSet.new(10..20), *ns]],
|
375
|
+
u.split(*ns))
|
376
|
+
|
377
|
+
ns = [NatSet.new(1..30), NatSet.new(5..40)]
|
378
|
+
assert_equal([[NatSet.new(0, 41..-1)],
|
379
|
+
[NatSet.new(1..4), ns[0]],
|
380
|
+
[NatSet.new(31..40), ns[1]],
|
381
|
+
[NatSet.new(5..30), *ns]],
|
382
|
+
u.split(*ns))
|
383
|
+
|
384
|
+
ns = [NatSet.new(1..30), NatSet.new(5..20)]
|
385
|
+
assert_equal([[NatSet.new(0, 31..-1)],
|
386
|
+
[NatSet.new(1..4, 21..30), ns[0]],
|
387
|
+
[NatSet.new(5..20), *ns]],
|
388
|
+
u.split(*ns))
|
389
|
+
end
|
390
|
+
|
391
|
+
def test_min
|
392
|
+
assert_equal(nil, NatSet.new().min)
|
393
|
+
assert_equal(1, NatSet.new(1..10).min)
|
394
|
+
end
|
395
|
+
|
396
|
+
def test_max
|
397
|
+
assert_equal(nil, NatSet.new().max)
|
398
|
+
assert_equal(10, NatSet.new(1..10).max)
|
399
|
+
assert_equal(nil, NatSet.new(1..-1).max)
|
400
|
+
end
|
401
|
+
|
402
|
+
def test_each_range
|
403
|
+
rs = []; NatSet.new() .each_range {|r| rs << r}; assert_equal([], rs)
|
404
|
+
rs = []; NatSet.new(0).each_range {|r| rs << r}; assert_equal([0..0], rs)
|
405
|
+
rs = []; NatSet.new(1).each_range {|r| rs << r}; assert_equal([1..1], rs)
|
406
|
+
rs = []; NatSet.new(1..3).each_range {|r| rs << r}; assert_equal([1..3], rs)
|
407
|
+
rs = []; NatSet.new(1...3).each_range {|r| rs << r}; assert_equal([1..2], rs)
|
408
|
+
rs = []; NatSet.new(1..-1).each_range {|r| rs << r}; assert_equal([1..-1], rs)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|
data/lib/regexptree.rb
ADDED
@@ -0,0 +1,530 @@
|
|
1
|
+
=begin
|
2
|
+
= RegexpTree
|
3
|
+
|
4
|
+
RegexpTree represents regular expression.
|
5
|
+
It can be converted to Regexp.
|
6
|
+
|
7
|
+
== class methods
|
8
|
+
--- RegexpTree.str(string)
|
9
|
+
returns an instance of RegexpTree which only matches ((|string|))
|
10
|
+
--- RegexpTree.alt(*regexp_trees)
|
11
|
+
returns an instance of RegexpTree which is alternation of ((|regexp_trees|)).
|
12
|
+
--- RegexpTree.seq(*regexp_trees)
|
13
|
+
returns an instance of RegexpTree which is concatination of ((|regexp_trees|)).
|
14
|
+
--- RegexpTree.rep(regexp_tree, min=0, max=nil, greedy=true)
|
15
|
+
returns an instance of RegexpTree which is reptation of ((|regexp_tree|)).
|
16
|
+
--- RegexpTree.charclass(natset)
|
17
|
+
returns an instance of RegexpTree which matches characters in ((|natset|)).
|
18
|
+
#--- RegexpTree.linebeg
|
19
|
+
#--- RegexpTree.lineend
|
20
|
+
#--- RegexpTree.strbeg
|
21
|
+
#--- RegexpTree.strend
|
22
|
+
#--- RegexpTree.strlineend
|
23
|
+
#--- RegexpTree.word_boundary
|
24
|
+
#--- RegexpTree.non_word_boundary
|
25
|
+
#--- RegexpTree.previous_match
|
26
|
+
#--- RegexpTree.backref(n)
|
27
|
+
|
28
|
+
== methods
|
29
|
+
--- regexp(anchored=false)
|
30
|
+
convert to Regexp.
|
31
|
+
|
32
|
+
If ((|anchored|)) is true, the Regexp is anchored by (({\A})) and (({\z})).
|
33
|
+
--- to_s
|
34
|
+
convert to String.
|
35
|
+
--- empty_set?
|
36
|
+
returns true iff self never matches.
|
37
|
+
--- empty_sequence?
|
38
|
+
returns true iff self only matches empty string.
|
39
|
+
--- self | other
|
40
|
+
returns alternation of ((|self|)) and ((|other|)).
|
41
|
+
--- self + other
|
42
|
+
returns concatination of ((|self|)) and ((|other|)).
|
43
|
+
--- self * n
|
44
|
+
returns ((|n|)) times repetation of ((|self|)).
|
45
|
+
--- rep(min=0, max=nil, greedy=true)
|
46
|
+
returns ((|min|)) to ((|max|)) times repetation of ((|self|)).
|
47
|
+
#--- closure(greedy=true)
|
48
|
+
#--- positive_closure(greedy=true)
|
49
|
+
#--- optional(greedy=true)
|
50
|
+
#--- ntimes(min, max=min, greedy=true)
|
51
|
+
#--- nongreedy_rep(min=0, max=nil)
|
52
|
+
#--- nongreedy_closure
|
53
|
+
#--- nongreedy_positive_closure
|
54
|
+
#--- nongreedy_optional
|
55
|
+
#--- nongreedy_ntimes(min, max=min)
|
56
|
+
=end
|
57
|
+
|
58
|
+
require 'prettyprint'
|
59
|
+
require 'natset'
|
60
|
+
|
61
|
+
class RegexpTree
|
62
|
+
@curr_prec = 1
|
63
|
+
def RegexpTree.inherited(c)
|
64
|
+
return if c.superclass != RegexpTree
|
65
|
+
c.const_set(:Prec, @curr_prec)
|
66
|
+
@curr_prec += 1
|
67
|
+
end
|
68
|
+
|
69
|
+
def parenthesize(target)
|
70
|
+
if target::Prec <= self.class::Prec
|
71
|
+
self
|
72
|
+
else
|
73
|
+
Paren.new(self)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def pretty_print(pp)
|
78
|
+
case_insensitive = case_insensitive?
|
79
|
+
pp.group(3, '%r{', '}x') {
|
80
|
+
(case_insensitive ? self.downcase : self).pretty_format(pp)
|
81
|
+
}
|
82
|
+
pp.text 'i' if case_insensitive
|
83
|
+
end
|
84
|
+
|
85
|
+
def inspect
|
86
|
+
case_insensitive = case_insensitive? ? "i" : ""
|
87
|
+
r = PrettyPrint.singleline_format('') {|out|
|
88
|
+
(case_insensitive ? self.downcase : self).pretty_format(out)
|
89
|
+
}
|
90
|
+
if %r{/} =~ r
|
91
|
+
"%r{#{r}}#{case_insensitive}"
|
92
|
+
else
|
93
|
+
"%r/#{r}/#{case_insensitive}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def regexp(anchored=false)
|
98
|
+
if case_insensitive?
|
99
|
+
r = downcase
|
100
|
+
opt = Regexp::IGNORECASE
|
101
|
+
else
|
102
|
+
r = self
|
103
|
+
opt = 0
|
104
|
+
end
|
105
|
+
r = RegexpTree.seq(RegexpTree.strbeg, r, RegexpTree.strend) if anchored
|
106
|
+
Regexp.compile(
|
107
|
+
PrettyPrint.singleline_format('') {|out|
|
108
|
+
r.pretty_format(out)
|
109
|
+
},
|
110
|
+
opt)
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_s
|
114
|
+
PrettyPrint.singleline_format('') {|out|
|
115
|
+
# x flag is not required because all whitespaces are escaped.
|
116
|
+
if case_insensitive?
|
117
|
+
out.text '(?i-m:'
|
118
|
+
downcase.pretty_format(out)
|
119
|
+
out.text ')'
|
120
|
+
else
|
121
|
+
out.text '(?-im:'
|
122
|
+
pretty_format(out)
|
123
|
+
out.text ')'
|
124
|
+
end
|
125
|
+
}
|
126
|
+
end
|
127
|
+
|
128
|
+
def empty_set?
|
129
|
+
false
|
130
|
+
end
|
131
|
+
|
132
|
+
def empty_sequence?
|
133
|
+
false
|
134
|
+
end
|
135
|
+
|
136
|
+
def |(other)
|
137
|
+
RegexpTree.alt(self, other)
|
138
|
+
end
|
139
|
+
def RegexpTree.alt(*rs)
|
140
|
+
rs2 = []
|
141
|
+
rs.each {|r|
|
142
|
+
if r.empty_set?
|
143
|
+
next
|
144
|
+
elsif Alt === r
|
145
|
+
rs2.concat r.rs
|
146
|
+
elsif CharClass === r
|
147
|
+
if CharClass === rs2.last
|
148
|
+
rs2[-1] = CharClass.new(rs2.last.natset + r.natset)
|
149
|
+
else
|
150
|
+
rs2 << r
|
151
|
+
end
|
152
|
+
else
|
153
|
+
rs2 << r
|
154
|
+
end
|
155
|
+
}
|
156
|
+
case rs2.length
|
157
|
+
when 0; EmptySet
|
158
|
+
when 1; rs2.first
|
159
|
+
else; Alt.new(rs2)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
class Alt < RegexpTree
|
163
|
+
def initialize(rs)
|
164
|
+
@rs = rs
|
165
|
+
end
|
166
|
+
attr_reader :rs
|
167
|
+
|
168
|
+
def empty_set?
|
169
|
+
@rs.empty?
|
170
|
+
end
|
171
|
+
|
172
|
+
def case_insensitive?
|
173
|
+
@rs.all? {|r| r.case_insensitive?}
|
174
|
+
end
|
175
|
+
|
176
|
+
def multiline_insensitive?
|
177
|
+
@rs.all? {|r| r.multiline_insensitive?}
|
178
|
+
end
|
179
|
+
|
180
|
+
def downcase
|
181
|
+
Alt.new(@rs.map {|r| r.downcase})
|
182
|
+
end
|
183
|
+
|
184
|
+
def pretty_format(out)
|
185
|
+
if @rs.empty?
|
186
|
+
out.text '(?!)'
|
187
|
+
else
|
188
|
+
out.group {
|
189
|
+
@rs.each_with_index {|r, i|
|
190
|
+
unless i == 0
|
191
|
+
out.text '|'
|
192
|
+
out.breakable ''
|
193
|
+
end
|
194
|
+
r.parenthesize(Alt).pretty_format(out)
|
195
|
+
}
|
196
|
+
}
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
EmptySet = Alt.new([])
|
201
|
+
|
202
|
+
def +(other)
|
203
|
+
RegexpTree.seq(self, other)
|
204
|
+
end
|
205
|
+
def RegexpTree.seq(*rs)
|
206
|
+
rs2 = []
|
207
|
+
rs.each {|r|
|
208
|
+
if r.empty_sequence?
|
209
|
+
next
|
210
|
+
elsif Seq === r
|
211
|
+
rs2.concat r.rs
|
212
|
+
elsif r.empty_set?
|
213
|
+
return EmptySet
|
214
|
+
else
|
215
|
+
rs2 << r
|
216
|
+
end
|
217
|
+
}
|
218
|
+
case rs2.length
|
219
|
+
when 0; EmptySequence
|
220
|
+
when 1; rs2.first
|
221
|
+
else; Seq.new(rs2)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
class Seq < RegexpTree
|
225
|
+
def initialize(rs)
|
226
|
+
@rs = rs
|
227
|
+
end
|
228
|
+
attr_reader :rs
|
229
|
+
|
230
|
+
def empty_sequence?
|
231
|
+
@rs.empty?
|
232
|
+
end
|
233
|
+
|
234
|
+
def case_insensitive?
|
235
|
+
@rs.all? {|r| r.case_insensitive?}
|
236
|
+
end
|
237
|
+
|
238
|
+
def multiline_insensitive?
|
239
|
+
@rs.all? {|r| r.multiline_insensitive?}
|
240
|
+
end
|
241
|
+
|
242
|
+
def downcase
|
243
|
+
Seq.new(@rs.map {|r| r.downcase})
|
244
|
+
end
|
245
|
+
|
246
|
+
def pretty_format(out)
|
247
|
+
out.group {
|
248
|
+
@rs.each_with_index {|r, i|
|
249
|
+
unless i == 0
|
250
|
+
out.group {out.breakable ''}
|
251
|
+
end
|
252
|
+
r.parenthesize(Seq).pretty_format(out)
|
253
|
+
}
|
254
|
+
}
|
255
|
+
end
|
256
|
+
end
|
257
|
+
EmptySequence = Seq.new([])
|
258
|
+
|
259
|
+
def *(n)
|
260
|
+
case n
|
261
|
+
when Integer
|
262
|
+
RegexpTree.rep(self, n, n)
|
263
|
+
when Range
|
264
|
+
RegexpTree.rep(self, n.first, n.last - (n.exclude_end? ? 1 : 0))
|
265
|
+
else
|
266
|
+
raise TypeError.new("Integer or Range expected: #{n}")
|
267
|
+
end
|
268
|
+
end
|
269
|
+
def nongreedy_closure() RegexpTree.rep(self, 0, nil, false) end
|
270
|
+
def nongreedy_positive_closure() RegexpTree.rep(self, 1, nil, false) end
|
271
|
+
def nongreedy_optional() RegexpTree.rep(self, 0, 1, false) end
|
272
|
+
def nongreedy_ntimes(m, n=m) RegexpTree.rep(self, m, n, false) end
|
273
|
+
def nongreedy_rep(m=0, n=nil) RegexpTree.rep(self, m, n, false) end
|
274
|
+
def closure(greedy=true) RegexpTree.rep(self, 0, nil, greedy) end
|
275
|
+
def positive_closure(greedy=true) RegexpTree.rep(self, 1, nil, greedy) end
|
276
|
+
def optional(greedy=true) RegexpTree.rep(self, 0, 1, greedy) end
|
277
|
+
def ntimes(m, n=m, greedy=true) RegexpTree.rep(self, m, n, greedy) end
|
278
|
+
def rep(m=0, n=nil, greedy=true) RegexpTree.rep(self, m, n, greedy) end
|
279
|
+
|
280
|
+
def RegexpTree.rep(r, m=0, n=nil, greedy=true)
|
281
|
+
return EmptySequence if m == 0 && n == 0
|
282
|
+
return r if m == 1 && n == 1
|
283
|
+
return EmptySequence if r.empty_sequence?
|
284
|
+
if r.empty_set?
|
285
|
+
return m == 0 ? EmptySequence : EmptySet
|
286
|
+
end
|
287
|
+
Rep.new(r, m, n, greedy)
|
288
|
+
end
|
289
|
+
|
290
|
+
class Rep < RegexpTree
|
291
|
+
def initialize(r, m=0, n=nil, greedy=true)
|
292
|
+
@r = r
|
293
|
+
@m = m
|
294
|
+
@n = n
|
295
|
+
@greedy = greedy
|
296
|
+
end
|
297
|
+
|
298
|
+
def case_insensitive?
|
299
|
+
@r.case_insensitive?
|
300
|
+
end
|
301
|
+
|
302
|
+
def multiline_insensitive?
|
303
|
+
@r.multiline_insensitive?
|
304
|
+
end
|
305
|
+
|
306
|
+
def downcase
|
307
|
+
Rep.new(@r.downcase, @m, @n, @greedy)
|
308
|
+
end
|
309
|
+
|
310
|
+
def pretty_format(out)
|
311
|
+
@r.parenthesize(Elt).pretty_format(out)
|
312
|
+
case @m
|
313
|
+
when 0
|
314
|
+
case @n
|
315
|
+
when 0
|
316
|
+
out.text '{0}'
|
317
|
+
when 1
|
318
|
+
out.text '?'
|
319
|
+
when nil
|
320
|
+
out.text '*'
|
321
|
+
else
|
322
|
+
out.text "{#{@m},#{@n}}"
|
323
|
+
end
|
324
|
+
when 1
|
325
|
+
case @n
|
326
|
+
when 1
|
327
|
+
when nil
|
328
|
+
out.text '+'
|
329
|
+
else
|
330
|
+
out.text "{#{@m},#{@n}}"
|
331
|
+
end
|
332
|
+
else
|
333
|
+
if @m == @n
|
334
|
+
out.text "{#{@m}}"
|
335
|
+
else
|
336
|
+
out.text "{#{@m},#{@n}}"
|
337
|
+
end
|
338
|
+
end
|
339
|
+
out.text '?' unless @greedy
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
class Elt < RegexpTree
|
344
|
+
end
|
345
|
+
|
346
|
+
def RegexpTree.charclass(natset)
|
347
|
+
if natset.empty?
|
348
|
+
EmptySet
|
349
|
+
else
|
350
|
+
CharClass.new(natset)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
class CharClass < Elt
|
354
|
+
None = NatSet.empty
|
355
|
+
Any = NatSet.universal
|
356
|
+
NL = NatSet.new(?\n)
|
357
|
+
NonNL = ~NL
|
358
|
+
Word = NatSet.new(?0..?9, ?A..?Z, ?_, ?a..?z)
|
359
|
+
NonWord = ~Word
|
360
|
+
Space = NatSet.new(?t, ?\n, ?\f, ?\r, ?\s)
|
361
|
+
NonSpace = ~Space
|
362
|
+
Digit = NatSet.new(?0..?9)
|
363
|
+
NonDigit = ~Digit
|
364
|
+
|
365
|
+
UpAlpha = NatSet.new(?A..?Z)
|
366
|
+
LowAlpha = NatSet.new(?a..?z)
|
367
|
+
|
368
|
+
def initialize(natset)
|
369
|
+
@natset = natset
|
370
|
+
end
|
371
|
+
attr_reader :natset
|
372
|
+
|
373
|
+
def empty_set?
|
374
|
+
@natset.empty?
|
375
|
+
end
|
376
|
+
|
377
|
+
def case_insensitive?
|
378
|
+
up = @natset & UpAlpha
|
379
|
+
low = @natset & LowAlpha
|
380
|
+
return false if up.es.length != low.es.length
|
381
|
+
up.es.map! {|ch|
|
382
|
+
ch - 0x41 + 0x61 # ?A + ?a
|
383
|
+
}
|
384
|
+
up == low
|
385
|
+
end
|
386
|
+
|
387
|
+
def multiline_insensitive?
|
388
|
+
@natset != NonNL
|
389
|
+
end
|
390
|
+
|
391
|
+
def downcase
|
392
|
+
up = @natset & UpAlpha
|
393
|
+
up.es.map! {|ch|
|
394
|
+
ch - 0x41 + 0x61 # ?A + ?a
|
395
|
+
}
|
396
|
+
CharClass.new((@natset - UpAlpha) | up)
|
397
|
+
end
|
398
|
+
|
399
|
+
def pretty_format(out)
|
400
|
+
case @natset
|
401
|
+
when None; out.text '(?!)'
|
402
|
+
when Any; out.text '[\s\S]'
|
403
|
+
when NL; out.text '\n'
|
404
|
+
when NonNL; out.text '.'
|
405
|
+
when Word; out.text '\w'
|
406
|
+
when NonWord; out.text '\W'
|
407
|
+
when Space; out.text '\s'
|
408
|
+
when NonSpace; out.text '\S'
|
409
|
+
when Digit; out.text '\d'
|
410
|
+
when NonDigit; out.text '\D'
|
411
|
+
else
|
412
|
+
if val = @natset.singleton?
|
413
|
+
out.text encode_elt(val)
|
414
|
+
else
|
415
|
+
if @natset.open?
|
416
|
+
neg_mark = '^'
|
417
|
+
es = (~@natset).es
|
418
|
+
else
|
419
|
+
neg_mark = ''
|
420
|
+
es = @natset.es.dup
|
421
|
+
end
|
422
|
+
r = ''
|
423
|
+
until es.empty?
|
424
|
+
if es[0] + 1 == es[1]
|
425
|
+
r << encode_elt(es[0])
|
426
|
+
elsif es[0] + 2 == es[1]
|
427
|
+
r << encode_elt(es[0]) << encode_elt(es[1] - 1)
|
428
|
+
else
|
429
|
+
r << encode_elt(es[0]) << '-' << encode_elt(es[1] - 1)
|
430
|
+
end
|
431
|
+
es.shift
|
432
|
+
es.shift
|
433
|
+
end
|
434
|
+
out.text "[#{neg_mark}#{r}]"
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
def encode_elt(e)
|
440
|
+
case e
|
441
|
+
when 0x09; '\t'
|
442
|
+
when 0x0a; '\n'
|
443
|
+
when 0x0d; '\r'
|
444
|
+
when 0x0c; '\f'
|
445
|
+
when 0x0b; '\v'
|
446
|
+
when 0x07; '\a'
|
447
|
+
when 0x1b; '\e'
|
448
|
+
#when ?!, ?", ?%, ?&, ?', ?,, ?:, ?;, ?<, ?=, ?>, ?/, ?0..?9, ?@, ?A..?Z, ?_, ?`, ?a..?z, ?~
|
449
|
+
when 0x21, 0x22, 0x25, 0x26, 0x27, 0x2c, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x2f, 0x30..0x39, 0x40, 0x41..0x5a, 0x5f, 0x60, 0x61..0x7a, 0x7e
|
450
|
+
sprintf("%c", e)
|
451
|
+
else
|
452
|
+
sprintf("\\x%02x", e)
|
453
|
+
end
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
def RegexpTree.linebeg() Special.new('^') end
|
458
|
+
def RegexpTree.lineend() Special.new('$') end
|
459
|
+
def RegexpTree.strbeg() Special.new('\A') end
|
460
|
+
def RegexpTree.strend() Special.new('\z') end
|
461
|
+
def RegexpTree.strlineend() Special.new('\Z') end
|
462
|
+
def RegexpTree.word_boundary() Special.new('\b') end
|
463
|
+
def RegexpTree.non_word_boundary() Special.new('\B') end
|
464
|
+
def RegexpTree.previous_match() Special.new('\G') end
|
465
|
+
def RegexpTree.backref(n) Special.new("\\#{n}") end
|
466
|
+
class Special < Elt
|
467
|
+
def initialize(str)
|
468
|
+
@str = str
|
469
|
+
end
|
470
|
+
|
471
|
+
def case_insensitive?
|
472
|
+
true
|
473
|
+
end
|
474
|
+
|
475
|
+
def multiline_insensitive?
|
476
|
+
true
|
477
|
+
end
|
478
|
+
|
479
|
+
def downcase
|
480
|
+
self
|
481
|
+
end
|
482
|
+
|
483
|
+
def pretty_format(out)
|
484
|
+
out.text @str
|
485
|
+
end
|
486
|
+
end
|
487
|
+
|
488
|
+
def group() Paren.new(self, '') end
|
489
|
+
def paren() Paren.new(self) end
|
490
|
+
def lookahead() Paren.new(self, '?=') end
|
491
|
+
def negative_lookahead() Paren.new(self, '?!') end
|
492
|
+
# (?ixm-ixm:...)
|
493
|
+
# (?>...)
|
494
|
+
class Paren < Elt
|
495
|
+
def initialize(r, mark='?:')
|
496
|
+
@mark = mark
|
497
|
+
@r = r
|
498
|
+
end
|
499
|
+
|
500
|
+
def case_insensitive?
|
501
|
+
# xxx: if @mark contains "i"...
|
502
|
+
@r.case_insensitive?
|
503
|
+
end
|
504
|
+
|
505
|
+
def multiline_insensitive?
|
506
|
+
# xxx: if @mark contains "m"...
|
507
|
+
@r.multiline_insensitive?
|
508
|
+
end
|
509
|
+
|
510
|
+
def downcase
|
511
|
+
Paren.new(@r.downcase, @mark)
|
512
|
+
end
|
513
|
+
|
514
|
+
def pretty_format(out)
|
515
|
+
out.group(1 + @mark.length, "(#@mark", ')') {
|
516
|
+
@r.pretty_format(out)
|
517
|
+
}
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
# def RegexpTree.comment(str) ... end # (?#...)
|
522
|
+
|
523
|
+
def RegexpTree.str(str)
|
524
|
+
ccs = []
|
525
|
+
str.each_byte {|ch|
|
526
|
+
ccs << CharClass.new(NatSet.new(ch))
|
527
|
+
}
|
528
|
+
seq(*ccs)
|
529
|
+
end
|
530
|
+
end
|