abnf 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ module Abnf
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,411 @@
1
+ =begin
2
+ = NatSet
3
+
4
+ NatSet represents a set of naturals - non-negative integers.
5
+
6
+ == class methods
7
+ --- NatSet.empty
8
+ --- NatSet.universal
9
+ --- NatSet.new(integer_or_range, ...)
10
+
11
+ == methods
12
+ --- empty?
13
+ --- universal?
14
+ --- open?
15
+ --- singleton?
16
+ --- self == other
17
+ --- self === other
18
+ --- eql?(other)
19
+ --- hash
20
+ --- ~self
21
+ --- self + other
22
+ --- self - other
23
+ --- self & other
24
+
25
+ --- split_each(ns, ...) {|region, *nss| ... }
26
+ --- split(ns, ...)
27
+
28
+ --- min
29
+ --- max
30
+
31
+ --- each_range {|range| ... }
32
+
33
+ =end
34
+
35
+ class NatSet
36
+ class << NatSet
37
+ alias _new new
38
+ end
39
+
40
+ def NatSet.empty
41
+ self._new
42
+ end
43
+
44
+ def NatSet.universal
45
+ self._new(0)
46
+ end
47
+
48
+ def NatSet.new(*es)
49
+ r = self.empty
50
+ es.each {|e|
51
+ if String === e
52
+ e = e.ord
53
+ end
54
+ case e
55
+ when Range
56
+ if String === e.begin
57
+ e = Range.new(e.begin.ord, e.end.ord, e.exclude_end?)
58
+ end
59
+ unless Integer === e.begin && 0 <= e.begin
60
+ raise ArgumentError.new("bad value for #{self}.new: #{e}")
61
+ end
62
+ if e.end < 0
63
+ r += self._new(e.begin)
64
+ elsif e.exclude_end?
65
+ r += self._new(e.begin, e.end)
66
+ else
67
+ r += self._new(e.begin, e.end+1)
68
+ end
69
+ when Integer
70
+ unless 0 <= e
71
+ raise ArgumentError.new("bad value for #{self}.new: #{e}")
72
+ end
73
+ r += self._new(e, e+1)
74
+ when NatSet
75
+ r += e
76
+ else
77
+ raise ArgumentError.new("bad value for #{self}.new: #{e}")
78
+ end
79
+ }
80
+ r
81
+ end
82
+
83
+ def initialize(*es)
84
+ @es = es
85
+ end
86
+ attr_reader :es
87
+
88
+ def empty?
89
+ @es.empty?
90
+ end
91
+
92
+ def universal?
93
+ @es == [0]
94
+ end
95
+
96
+ def open?
97
+ @es.length & 1 != 0
98
+ end
99
+
100
+ def singleton?
101
+ if @es.length == 2 && @es[0] == @es[1] - 1
102
+ @es[0]
103
+ else
104
+ nil
105
+ end
106
+ end
107
+
108
+ def ==(other)
109
+ @es == other.es
110
+ end
111
+ alias === ==
112
+ alias eql? ==
113
+
114
+ def hash
115
+ @es.hash
116
+ end
117
+
118
+ def complement
119
+ if @es.empty?
120
+ self.class.universal
121
+ elsif @es[0] == 0
122
+ self.class._new(*@es[1..-1])
123
+ else
124
+ self.class._new(0, *@es)
125
+ end
126
+ end
127
+ alias ~ complement
128
+
129
+ def union(other)
130
+ other.union_natset(self)
131
+ end
132
+ alias + union
133
+ alias | union
134
+
135
+ def union_natset(natset)
136
+ return self if natset.empty? || self.universal?
137
+ return natset if self.empty? || natset.universal?
138
+ merge(natset) {|a, b| a || b}
139
+ end
140
+
141
+ def intersect(other)
142
+ other.intersect_natset(self)
143
+ end
144
+ alias & intersect
145
+
146
+ def intersect_natset(natset)
147
+ return self if self.empty? || natset.universal?
148
+ return natset if natset.empty? || self.universal?
149
+ merge(natset) {|a, b| a && b}
150
+ end
151
+
152
+ def subtract(other)
153
+ other.subtract_natset(self)
154
+ end
155
+ alias - subtract
156
+
157
+ def subtract_natset(natset) # natset - self
158
+ # Since double dispatch *inverses* a receiver and an argument,
159
+ # condition should be inversed.
160
+ return natset if self.empty? || natset.empty?
161
+ return NatSet.empty if self.universal?
162
+ return ~self if natset.universal?
163
+ merge(natset) {|a, b| !a && b}
164
+ end
165
+
166
+ def merge(other)
167
+ es1 = @es.dup
168
+ es2 = other.es.dup
169
+ es0 = []
170
+ bool1 = bool2 = bool0 = false
171
+ s = 0
172
+ while !es1.empty? || !es2.empty?
173
+ if es2.empty? || !es1.empty? && es1[0] < es2[0]
174
+ e = es1.shift
175
+ if s < e && bool0 != yield(bool1, bool2)
176
+ es0 << s
177
+ bool0 = !bool0
178
+ end
179
+ s = e
180
+ bool1 = !bool1
181
+ elsif es1.empty? || !es2.empty? && es1[0] > es2[0]
182
+ e = es2.shift
183
+ if s < e && bool0 != yield(bool1, bool2)
184
+ es0 << s
185
+ bool0 = !bool0
186
+ end
187
+ s = e
188
+ bool2 = !bool2
189
+ else
190
+ e = es1.shift
191
+ es2.shift
192
+ if s < e && bool0 != yield(bool1, bool2)
193
+ es0 << s
194
+ bool0 = !bool0
195
+ end
196
+ s = e
197
+ bool1 = !bool1
198
+ bool2 = !bool2
199
+ end
200
+ end
201
+ if bool0 != yield(bool1, bool2)
202
+ es0 << s
203
+ end
204
+ self.class._new(*es0)
205
+ end
206
+
207
+ def split_each(*natsets)
208
+ if natsets.empty?
209
+ yield [self]
210
+ else
211
+ current = natsets.pop
212
+
213
+ a = self - current
214
+ unless a.empty?
215
+ a.split_each(*natsets) {|nss| yield nss}
216
+ end
217
+
218
+ a = self & current
219
+ unless a.empty?
220
+ a.split_each(*natsets) {|nss| nss.push current; yield nss}
221
+ end
222
+ end
223
+ nil
224
+ end
225
+
226
+ def split(*natsets)
227
+ result = []
228
+ split_each(*natsets) {|r| result << r}
229
+ result
230
+ end
231
+
232
+ # min returns a minimum element of the set.
233
+ # It returns nil if the set has no minimum element,
234
+ # i.e. the set has no element.
235
+ def min
236
+ if @es.empty?
237
+ nil
238
+ else
239
+ @es[0]
240
+ end
241
+ end
242
+
243
+ # max returns a maximum element of the set.
244
+ # It returns nil if the set has no maximum element,
245
+ # i.e. the set is open or has no element.
246
+ def max
247
+ if @es.empty? || open?
248
+ nil
249
+ else
250
+ @es[-1] - 1
251
+ end
252
+ end
253
+
254
+ # each_range iterates on continuous ranges of the set from smallest to largest.
255
+ # For each range, it yields Range object which represent it.
256
+ # For last range in open set, the end of the object is -1.
257
+ # For all Range objects it yields, exclude_end? is true.
258
+ def each_range
259
+ (0...@es.length).step(2) {|i|
260
+ e1 = @es[i]
261
+ if i+1 == @es.length
262
+ yield e1..-1
263
+ else
264
+ e2 = @es[i+1]
265
+ yield e1..(e2-1)
266
+ end
267
+ }
268
+ end
269
+
270
+ def pretty_print(pp)
271
+ pp.object_group(self) {
272
+ pp.text ':'
273
+ each_range {|r|
274
+ pp.breakable
275
+ if r.end == -1
276
+ pp.text "#{r.begin}..inf"
277
+ elsif r.begin == r.end
278
+ pp.text r.begin.to_s
279
+ else
280
+ pp.text "#{r.begin}..#{r.end}"
281
+ end
282
+ }
283
+ }
284
+ end
285
+
286
+ def inspect
287
+ require 'pp'
288
+ PP.singleline_pp(self, '')
289
+ end
290
+ end
291
+
292
+ if __FILE__ == $0
293
+ require 'test/unit'
294
+
295
+ class NatSetTest < Test::Unit::TestCase
296
+ def test_empty
297
+ assert(NatSet.empty.empty?)
298
+ end
299
+
300
+ def test_universal
301
+ assert(NatSet.universal.universal?)
302
+ end
303
+
304
+ def test_open
305
+ assert(!NatSet.empty.open?)
306
+ assert(NatSet.universal.open?)
307
+ end
308
+
309
+ def test_singleton
310
+ assert_equal(1, NatSet._new(1, 2).singleton?)
311
+ assert_equal(nil, NatSet._new(1, 3).singleton?)
312
+ end
313
+
314
+ def test_complement
315
+ assert_equal(NatSet.empty, ~NatSet.universal)
316
+ assert_equal(NatSet.universal, ~NatSet.empty)
317
+ assert_equal(NatSet._new(1, 2), ~NatSet._new(0, 1, 2))
318
+ assert_equal(NatSet._new(0, 1, 2), ~NatSet._new(1, 2))
319
+ end
320
+
321
+ def test_union
322
+ assert_equal(NatSet.empty, NatSet.empty + NatSet.empty)
323
+ assert_equal(NatSet.universal, NatSet.empty + NatSet.universal)
324
+ assert_equal(NatSet.universal, NatSet.universal + NatSet.empty)
325
+ assert_equal(NatSet.universal, NatSet.universal + NatSet.universal)
326
+ assert_equal(NatSet.new(0..2), NatSet.new(0, 2) + NatSet.new(0, 1))
327
+ end
328
+
329
+ def test_intersect
330
+ assert_equal(NatSet.empty, NatSet.empty & NatSet.empty)
331
+ assert_equal(NatSet.empty, NatSet.empty & NatSet.universal)
332
+ assert_equal(NatSet.empty, NatSet.universal & NatSet.empty)
333
+ assert_equal(NatSet.universal, NatSet.universal & NatSet.universal)
334
+ assert_equal(NatSet.new(0), NatSet.new(0, 2) & NatSet.new(0, 1))
335
+ end
336
+
337
+ def test_subtract
338
+ assert_equal(NatSet.empty, NatSet.empty - NatSet.empty)
339
+ assert_equal(NatSet.empty, NatSet.empty - NatSet.universal)
340
+ assert_equal(NatSet.universal, NatSet.universal - NatSet.empty)
341
+ assert_equal(NatSet.empty, NatSet.universal - NatSet.universal)
342
+ assert_equal(NatSet.new(2), NatSet.new(0, 2) - NatSet.new(0, 1))
343
+ end
344
+
345
+ def test_new
346
+ assert_equal([1, 2], NatSet.new(1).es)
347
+ assert_equal([1, 3], NatSet.new(1, 2).es)
348
+ assert_equal([1, 4], NatSet.new(1, 2, 3).es)
349
+ assert_equal([1, 4], NatSet.new(1, 3, 2).es)
350
+ assert_equal([10, 21], NatSet.new(10..20).es)
351
+ assert_equal([10, 20], NatSet.new(10...20).es)
352
+ assert_equal([1, 2, 3, 4, 5, 6], NatSet.new(1, 3, 5).es)
353
+ assert_equal([1, 16], NatSet.new(5..15, 1..10).es)
354
+ assert_equal([1, 16], NatSet.new(11..15, 1..10).es)
355
+ assert_raises(ArgumentError) {NatSet.new("a")}
356
+ assert_raises(ArgumentError) {NatSet.new("a".."b")}
357
+ assert_raises(ArgumentError) {NatSet.new(-1)}
358
+ assert_raises(ArgumentError) {NatSet.new(-1..3)}
359
+ end
360
+
361
+ def test_split
362
+ u = NatSet.universal
363
+ assert_equal([[NatSet.universal]], u.split())
364
+ assert_equal([[NatSet.universal]], u.split(NatSet.empty))
365
+ assert_equal([[NatSet.universal, u]], u.split(u))
366
+
367
+ n = NatSet.new(10..20)
368
+ assert_equal([[NatSet.new(0..9, 21..-1)],
369
+ [NatSet.new(10..20), n]],
370
+ u.split(n))
371
+
372
+ ns = [NatSet.new(10..20), NatSet.new(10..20)]
373
+ assert_equal([[NatSet.new(0..9, 21..-1)],
374
+ [NatSet.new(10..20), *ns]],
375
+ u.split(*ns))
376
+
377
+ ns = [NatSet.new(1..30), NatSet.new(5..40)]
378
+ assert_equal([[NatSet.new(0, 41..-1)],
379
+ [NatSet.new(1..4), ns[0]],
380
+ [NatSet.new(31..40), ns[1]],
381
+ [NatSet.new(5..30), *ns]],
382
+ u.split(*ns))
383
+
384
+ ns = [NatSet.new(1..30), NatSet.new(5..20)]
385
+ assert_equal([[NatSet.new(0, 31..-1)],
386
+ [NatSet.new(1..4, 21..30), ns[0]],
387
+ [NatSet.new(5..20), *ns]],
388
+ u.split(*ns))
389
+ end
390
+
391
+ def test_min
392
+ assert_equal(nil, NatSet.new().min)
393
+ assert_equal(1, NatSet.new(1..10).min)
394
+ end
395
+
396
+ def test_max
397
+ assert_equal(nil, NatSet.new().max)
398
+ assert_equal(10, NatSet.new(1..10).max)
399
+ assert_equal(nil, NatSet.new(1..-1).max)
400
+ end
401
+
402
+ def test_each_range
403
+ rs = []; NatSet.new() .each_range {|r| rs << r}; assert_equal([], rs)
404
+ rs = []; NatSet.new(0).each_range {|r| rs << r}; assert_equal([0..0], rs)
405
+ rs = []; NatSet.new(1).each_range {|r| rs << r}; assert_equal([1..1], rs)
406
+ rs = []; NatSet.new(1..3).each_range {|r| rs << r}; assert_equal([1..3], rs)
407
+ rs = []; NatSet.new(1...3).each_range {|r| rs << r}; assert_equal([1..2], rs)
408
+ rs = []; NatSet.new(1..-1).each_range {|r| rs << r}; assert_equal([1..-1], rs)
409
+ end
410
+ end
411
+ end
@@ -0,0 +1,530 @@
1
+ =begin
2
+ = RegexpTree
3
+
4
+ RegexpTree represents regular expression.
5
+ It can be converted to Regexp.
6
+
7
+ == class methods
8
+ --- RegexpTree.str(string)
9
+ returns an instance of RegexpTree which only matches ((|string|))
10
+ --- RegexpTree.alt(*regexp_trees)
11
+ returns an instance of RegexpTree which is alternation of ((|regexp_trees|)).
12
+ --- RegexpTree.seq(*regexp_trees)
13
+ returns an instance of RegexpTree which is concatination of ((|regexp_trees|)).
14
+ --- RegexpTree.rep(regexp_tree, min=0, max=nil, greedy=true)
15
+ returns an instance of RegexpTree which is reptation of ((|regexp_tree|)).
16
+ --- RegexpTree.charclass(natset)
17
+ returns an instance of RegexpTree which matches characters in ((|natset|)).
18
+ #--- RegexpTree.linebeg
19
+ #--- RegexpTree.lineend
20
+ #--- RegexpTree.strbeg
21
+ #--- RegexpTree.strend
22
+ #--- RegexpTree.strlineend
23
+ #--- RegexpTree.word_boundary
24
+ #--- RegexpTree.non_word_boundary
25
+ #--- RegexpTree.previous_match
26
+ #--- RegexpTree.backref(n)
27
+
28
+ == methods
29
+ --- regexp(anchored=false)
30
+ convert to Regexp.
31
+
32
+ If ((|anchored|)) is true, the Regexp is anchored by (({\A})) and (({\z})).
33
+ --- to_s
34
+ convert to String.
35
+ --- empty_set?
36
+ returns true iff self never matches.
37
+ --- empty_sequence?
38
+ returns true iff self only matches empty string.
39
+ --- self | other
40
+ returns alternation of ((|self|)) and ((|other|)).
41
+ --- self + other
42
+ returns concatination of ((|self|)) and ((|other|)).
43
+ --- self * n
44
+ returns ((|n|)) times repetation of ((|self|)).
45
+ --- rep(min=0, max=nil, greedy=true)
46
+ returns ((|min|)) to ((|max|)) times repetation of ((|self|)).
47
+ #--- closure(greedy=true)
48
+ #--- positive_closure(greedy=true)
49
+ #--- optional(greedy=true)
50
+ #--- ntimes(min, max=min, greedy=true)
51
+ #--- nongreedy_rep(min=0, max=nil)
52
+ #--- nongreedy_closure
53
+ #--- nongreedy_positive_closure
54
+ #--- nongreedy_optional
55
+ #--- nongreedy_ntimes(min, max=min)
56
+ =end
57
+
58
+ require 'prettyprint'
59
+ require 'natset'
60
+
61
+ class RegexpTree
62
+ @curr_prec = 1
63
+ def RegexpTree.inherited(c)
64
+ return if c.superclass != RegexpTree
65
+ c.const_set(:Prec, @curr_prec)
66
+ @curr_prec += 1
67
+ end
68
+
69
+ def parenthesize(target)
70
+ if target::Prec <= self.class::Prec
71
+ self
72
+ else
73
+ Paren.new(self)
74
+ end
75
+ end
76
+
77
+ def pretty_print(pp)
78
+ case_insensitive = case_insensitive?
79
+ pp.group(3, '%r{', '}x') {
80
+ (case_insensitive ? self.downcase : self).pretty_format(pp)
81
+ }
82
+ pp.text 'i' if case_insensitive
83
+ end
84
+
85
+ def inspect
86
+ case_insensitive = case_insensitive? ? "i" : ""
87
+ r = PrettyPrint.singleline_format('') {|out|
88
+ (case_insensitive ? self.downcase : self).pretty_format(out)
89
+ }
90
+ if %r{/} =~ r
91
+ "%r{#{r}}#{case_insensitive}"
92
+ else
93
+ "%r/#{r}/#{case_insensitive}"
94
+ end
95
+ end
96
+
97
+ def regexp(anchored=false)
98
+ if case_insensitive?
99
+ r = downcase
100
+ opt = Regexp::IGNORECASE
101
+ else
102
+ r = self
103
+ opt = 0
104
+ end
105
+ r = RegexpTree.seq(RegexpTree.strbeg, r, RegexpTree.strend) if anchored
106
+ Regexp.compile(
107
+ PrettyPrint.singleline_format('') {|out|
108
+ r.pretty_format(out)
109
+ },
110
+ opt)
111
+ end
112
+
113
+ def to_s
114
+ PrettyPrint.singleline_format('') {|out|
115
+ # x flag is not required because all whitespaces are escaped.
116
+ if case_insensitive?
117
+ out.text '(?i-m:'
118
+ downcase.pretty_format(out)
119
+ out.text ')'
120
+ else
121
+ out.text '(?-im:'
122
+ pretty_format(out)
123
+ out.text ')'
124
+ end
125
+ }
126
+ end
127
+
128
+ def empty_set?
129
+ false
130
+ end
131
+
132
+ def empty_sequence?
133
+ false
134
+ end
135
+
136
+ def |(other)
137
+ RegexpTree.alt(self, other)
138
+ end
139
+ def RegexpTree.alt(*rs)
140
+ rs2 = []
141
+ rs.each {|r|
142
+ if r.empty_set?
143
+ next
144
+ elsif Alt === r
145
+ rs2.concat r.rs
146
+ elsif CharClass === r
147
+ if CharClass === rs2.last
148
+ rs2[-1] = CharClass.new(rs2.last.natset + r.natset)
149
+ else
150
+ rs2 << r
151
+ end
152
+ else
153
+ rs2 << r
154
+ end
155
+ }
156
+ case rs2.length
157
+ when 0; EmptySet
158
+ when 1; rs2.first
159
+ else; Alt.new(rs2)
160
+ end
161
+ end
162
+ class Alt < RegexpTree
163
+ def initialize(rs)
164
+ @rs = rs
165
+ end
166
+ attr_reader :rs
167
+
168
+ def empty_set?
169
+ @rs.empty?
170
+ end
171
+
172
+ def case_insensitive?
173
+ @rs.all? {|r| r.case_insensitive?}
174
+ end
175
+
176
+ def multiline_insensitive?
177
+ @rs.all? {|r| r.multiline_insensitive?}
178
+ end
179
+
180
+ def downcase
181
+ Alt.new(@rs.map {|r| r.downcase})
182
+ end
183
+
184
+ def pretty_format(out)
185
+ if @rs.empty?
186
+ out.text '(?!)'
187
+ else
188
+ out.group {
189
+ @rs.each_with_index {|r, i|
190
+ unless i == 0
191
+ out.text '|'
192
+ out.breakable ''
193
+ end
194
+ r.parenthesize(Alt).pretty_format(out)
195
+ }
196
+ }
197
+ end
198
+ end
199
+ end
200
+ EmptySet = Alt.new([])
201
+
202
+ def +(other)
203
+ RegexpTree.seq(self, other)
204
+ end
205
+ def RegexpTree.seq(*rs)
206
+ rs2 = []
207
+ rs.each {|r|
208
+ if r.empty_sequence?
209
+ next
210
+ elsif Seq === r
211
+ rs2.concat r.rs
212
+ elsif r.empty_set?
213
+ return EmptySet
214
+ else
215
+ rs2 << r
216
+ end
217
+ }
218
+ case rs2.length
219
+ when 0; EmptySequence
220
+ when 1; rs2.first
221
+ else; Seq.new(rs2)
222
+ end
223
+ end
224
+ class Seq < RegexpTree
225
+ def initialize(rs)
226
+ @rs = rs
227
+ end
228
+ attr_reader :rs
229
+
230
+ def empty_sequence?
231
+ @rs.empty?
232
+ end
233
+
234
+ def case_insensitive?
235
+ @rs.all? {|r| r.case_insensitive?}
236
+ end
237
+
238
+ def multiline_insensitive?
239
+ @rs.all? {|r| r.multiline_insensitive?}
240
+ end
241
+
242
+ def downcase
243
+ Seq.new(@rs.map {|r| r.downcase})
244
+ end
245
+
246
+ def pretty_format(out)
247
+ out.group {
248
+ @rs.each_with_index {|r, i|
249
+ unless i == 0
250
+ out.group {out.breakable ''}
251
+ end
252
+ r.parenthesize(Seq).pretty_format(out)
253
+ }
254
+ }
255
+ end
256
+ end
257
+ EmptySequence = Seq.new([])
258
+
259
+ def *(n)
260
+ case n
261
+ when Integer
262
+ RegexpTree.rep(self, n, n)
263
+ when Range
264
+ RegexpTree.rep(self, n.first, n.last - (n.exclude_end? ? 1 : 0))
265
+ else
266
+ raise TypeError.new("Integer or Range expected: #{n}")
267
+ end
268
+ end
269
+ def nongreedy_closure() RegexpTree.rep(self, 0, nil, false) end
270
+ def nongreedy_positive_closure() RegexpTree.rep(self, 1, nil, false) end
271
+ def nongreedy_optional() RegexpTree.rep(self, 0, 1, false) end
272
+ def nongreedy_ntimes(m, n=m) RegexpTree.rep(self, m, n, false) end
273
+ def nongreedy_rep(m=0, n=nil) RegexpTree.rep(self, m, n, false) end
274
+ def closure(greedy=true) RegexpTree.rep(self, 0, nil, greedy) end
275
+ def positive_closure(greedy=true) RegexpTree.rep(self, 1, nil, greedy) end
276
+ def optional(greedy=true) RegexpTree.rep(self, 0, 1, greedy) end
277
+ def ntimes(m, n=m, greedy=true) RegexpTree.rep(self, m, n, greedy) end
278
+ def rep(m=0, n=nil, greedy=true) RegexpTree.rep(self, m, n, greedy) end
279
+
280
+ def RegexpTree.rep(r, m=0, n=nil, greedy=true)
281
+ return EmptySequence if m == 0 && n == 0
282
+ return r if m == 1 && n == 1
283
+ return EmptySequence if r.empty_sequence?
284
+ if r.empty_set?
285
+ return m == 0 ? EmptySequence : EmptySet
286
+ end
287
+ Rep.new(r, m, n, greedy)
288
+ end
289
+
290
+ class Rep < RegexpTree
291
+ def initialize(r, m=0, n=nil, greedy=true)
292
+ @r = r
293
+ @m = m
294
+ @n = n
295
+ @greedy = greedy
296
+ end
297
+
298
+ def case_insensitive?
299
+ @r.case_insensitive?
300
+ end
301
+
302
+ def multiline_insensitive?
303
+ @r.multiline_insensitive?
304
+ end
305
+
306
+ def downcase
307
+ Rep.new(@r.downcase, @m, @n, @greedy)
308
+ end
309
+
310
+ def pretty_format(out)
311
+ @r.parenthesize(Elt).pretty_format(out)
312
+ case @m
313
+ when 0
314
+ case @n
315
+ when 0
316
+ out.text '{0}'
317
+ when 1
318
+ out.text '?'
319
+ when nil
320
+ out.text '*'
321
+ else
322
+ out.text "{#{@m},#{@n}}"
323
+ end
324
+ when 1
325
+ case @n
326
+ when 1
327
+ when nil
328
+ out.text '+'
329
+ else
330
+ out.text "{#{@m},#{@n}}"
331
+ end
332
+ else
333
+ if @m == @n
334
+ out.text "{#{@m}}"
335
+ else
336
+ out.text "{#{@m},#{@n}}"
337
+ end
338
+ end
339
+ out.text '?' unless @greedy
340
+ end
341
+ end
342
+
343
+ class Elt < RegexpTree
344
+ end
345
+
346
+ def RegexpTree.charclass(natset)
347
+ if natset.empty?
348
+ EmptySet
349
+ else
350
+ CharClass.new(natset)
351
+ end
352
+ end
353
+ class CharClass < Elt
354
+ None = NatSet.empty
355
+ Any = NatSet.universal
356
+ NL = NatSet.new(?\n)
357
+ NonNL = ~NL
358
+ Word = NatSet.new(?0..?9, ?A..?Z, ?_, ?a..?z)
359
+ NonWord = ~Word
360
+ Space = NatSet.new(?t, ?\n, ?\f, ?\r, ?\s)
361
+ NonSpace = ~Space
362
+ Digit = NatSet.new(?0..?9)
363
+ NonDigit = ~Digit
364
+
365
+ UpAlpha = NatSet.new(?A..?Z)
366
+ LowAlpha = NatSet.new(?a..?z)
367
+
368
+ def initialize(natset)
369
+ @natset = natset
370
+ end
371
+ attr_reader :natset
372
+
373
+ def empty_set?
374
+ @natset.empty?
375
+ end
376
+
377
+ def case_insensitive?
378
+ up = @natset & UpAlpha
379
+ low = @natset & LowAlpha
380
+ return false if up.es.length != low.es.length
381
+ up.es.map! {|ch|
382
+ ch - 0x41 + 0x61 # ?A + ?a
383
+ }
384
+ up == low
385
+ end
386
+
387
+ def multiline_insensitive?
388
+ @natset != NonNL
389
+ end
390
+
391
+ def downcase
392
+ up = @natset & UpAlpha
393
+ up.es.map! {|ch|
394
+ ch - 0x41 + 0x61 # ?A + ?a
395
+ }
396
+ CharClass.new((@natset - UpAlpha) | up)
397
+ end
398
+
399
+ def pretty_format(out)
400
+ case @natset
401
+ when None; out.text '(?!)'
402
+ when Any; out.text '[\s\S]'
403
+ when NL; out.text '\n'
404
+ when NonNL; out.text '.'
405
+ when Word; out.text '\w'
406
+ when NonWord; out.text '\W'
407
+ when Space; out.text '\s'
408
+ when NonSpace; out.text '\S'
409
+ when Digit; out.text '\d'
410
+ when NonDigit; out.text '\D'
411
+ else
412
+ if val = @natset.singleton?
413
+ out.text encode_elt(val)
414
+ else
415
+ if @natset.open?
416
+ neg_mark = '^'
417
+ es = (~@natset).es
418
+ else
419
+ neg_mark = ''
420
+ es = @natset.es.dup
421
+ end
422
+ r = ''
423
+ until es.empty?
424
+ if es[0] + 1 == es[1]
425
+ r << encode_elt(es[0])
426
+ elsif es[0] + 2 == es[1]
427
+ r << encode_elt(es[0]) << encode_elt(es[1] - 1)
428
+ else
429
+ r << encode_elt(es[0]) << '-' << encode_elt(es[1] - 1)
430
+ end
431
+ es.shift
432
+ es.shift
433
+ end
434
+ out.text "[#{neg_mark}#{r}]"
435
+ end
436
+ end
437
+ end
438
+
439
+ def encode_elt(e)
440
+ case e
441
+ when 0x09; '\t'
442
+ when 0x0a; '\n'
443
+ when 0x0d; '\r'
444
+ when 0x0c; '\f'
445
+ when 0x0b; '\v'
446
+ when 0x07; '\a'
447
+ when 0x1b; '\e'
448
+ #when ?!, ?", ?%, ?&, ?', ?,, ?:, ?;, ?<, ?=, ?>, ?/, ?0..?9, ?@, ?A..?Z, ?_, ?`, ?a..?z, ?~
449
+ when 0x21, 0x22, 0x25, 0x26, 0x27, 0x2c, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x2f, 0x30..0x39, 0x40, 0x41..0x5a, 0x5f, 0x60, 0x61..0x7a, 0x7e
450
+ sprintf("%c", e)
451
+ else
452
+ sprintf("\\x%02x", e)
453
+ end
454
+ end
455
+ end
456
+
457
+ def RegexpTree.linebeg() Special.new('^') end
458
+ def RegexpTree.lineend() Special.new('$') end
459
+ def RegexpTree.strbeg() Special.new('\A') end
460
+ def RegexpTree.strend() Special.new('\z') end
461
+ def RegexpTree.strlineend() Special.new('\Z') end
462
+ def RegexpTree.word_boundary() Special.new('\b') end
463
+ def RegexpTree.non_word_boundary() Special.new('\B') end
464
+ def RegexpTree.previous_match() Special.new('\G') end
465
+ def RegexpTree.backref(n) Special.new("\\#{n}") end
466
+ class Special < Elt
467
+ def initialize(str)
468
+ @str = str
469
+ end
470
+
471
+ def case_insensitive?
472
+ true
473
+ end
474
+
475
+ def multiline_insensitive?
476
+ true
477
+ end
478
+
479
+ def downcase
480
+ self
481
+ end
482
+
483
+ def pretty_format(out)
484
+ out.text @str
485
+ end
486
+ end
487
+
488
+ def group() Paren.new(self, '') end
489
+ def paren() Paren.new(self) end
490
+ def lookahead() Paren.new(self, '?=') end
491
+ def negative_lookahead() Paren.new(self, '?!') end
492
+ # (?ixm-ixm:...)
493
+ # (?>...)
494
+ class Paren < Elt
495
+ def initialize(r, mark='?:')
496
+ @mark = mark
497
+ @r = r
498
+ end
499
+
500
+ def case_insensitive?
501
+ # xxx: if @mark contains "i"...
502
+ @r.case_insensitive?
503
+ end
504
+
505
+ def multiline_insensitive?
506
+ # xxx: if @mark contains "m"...
507
+ @r.multiline_insensitive?
508
+ end
509
+
510
+ def downcase
511
+ Paren.new(@r.downcase, @mark)
512
+ end
513
+
514
+ def pretty_format(out)
515
+ out.group(1 + @mark.length, "(#@mark", ')') {
516
+ @r.pretty_format(out)
517
+ }
518
+ end
519
+ end
520
+
521
+ # def RegexpTree.comment(str) ... end # (?#...)
522
+
523
+ def RegexpTree.str(str)
524
+ ccs = []
525
+ str.each_byte {|ch|
526
+ ccs << CharClass.new(NatSet.new(ch))
527
+ }
528
+ seq(*ccs)
529
+ end
530
+ end