abnf 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ module Abnf
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,411 @@
1
+ =begin
2
+ = NatSet
3
+
4
+ NatSet represents a set of naturals - non-negative integers.
5
+
6
+ == class methods
7
+ --- NatSet.empty
8
+ --- NatSet.universal
9
+ --- NatSet.new(integer_or_range, ...)
10
+
11
+ == methods
12
+ --- empty?
13
+ --- universal?
14
+ --- open?
15
+ --- singleton?
16
+ --- self == other
17
+ --- self === other
18
+ --- eql?(other)
19
+ --- hash
20
+ --- ~self
21
+ --- self + other
22
+ --- self - other
23
+ --- self & other
24
+
25
+ --- split_each(ns, ...) {|region, *nss| ... }
26
+ --- split(ns, ...)
27
+
28
+ --- min
29
+ --- max
30
+
31
+ --- each_range {|range| ... }
32
+
33
+ =end
34
+
35
+ class NatSet
36
+ class << NatSet
37
+ alias _new new
38
+ end
39
+
40
+ def NatSet.empty
41
+ self._new
42
+ end
43
+
44
+ def NatSet.universal
45
+ self._new(0)
46
+ end
47
+
48
+ def NatSet.new(*es)
49
+ r = self.empty
50
+ es.each {|e|
51
+ if String === e
52
+ e = e.ord
53
+ end
54
+ case e
55
+ when Range
56
+ if String === e.begin
57
+ e = Range.new(e.begin.ord, e.end.ord, e.exclude_end?)
58
+ end
59
+ unless Integer === e.begin && 0 <= e.begin
60
+ raise ArgumentError.new("bad value for #{self}.new: #{e}")
61
+ end
62
+ if e.end < 0
63
+ r += self._new(e.begin)
64
+ elsif e.exclude_end?
65
+ r += self._new(e.begin, e.end)
66
+ else
67
+ r += self._new(e.begin, e.end+1)
68
+ end
69
+ when Integer
70
+ unless 0 <= e
71
+ raise ArgumentError.new("bad value for #{self}.new: #{e}")
72
+ end
73
+ r += self._new(e, e+1)
74
+ when NatSet
75
+ r += e
76
+ else
77
+ raise ArgumentError.new("bad value for #{self}.new: #{e}")
78
+ end
79
+ }
80
+ r
81
+ end
82
+
83
+ def initialize(*es)
84
+ @es = es
85
+ end
86
+ attr_reader :es
87
+
88
+ def empty?
89
+ @es.empty?
90
+ end
91
+
92
+ def universal?
93
+ @es == [0]
94
+ end
95
+
96
+ def open?
97
+ @es.length & 1 != 0
98
+ end
99
+
100
+ def singleton?
101
+ if @es.length == 2 && @es[0] == @es[1] - 1
102
+ @es[0]
103
+ else
104
+ nil
105
+ end
106
+ end
107
+
108
+ def ==(other)
109
+ @es == other.es
110
+ end
111
+ alias === ==
112
+ alias eql? ==
113
+
114
+ def hash
115
+ @es.hash
116
+ end
117
+
118
+ def complement
119
+ if @es.empty?
120
+ self.class.universal
121
+ elsif @es[0] == 0
122
+ self.class._new(*@es[1..-1])
123
+ else
124
+ self.class._new(0, *@es)
125
+ end
126
+ end
127
+ alias ~ complement
128
+
129
+ def union(other)
130
+ other.union_natset(self)
131
+ end
132
+ alias + union
133
+ alias | union
134
+
135
+ def union_natset(natset)
136
+ return self if natset.empty? || self.universal?
137
+ return natset if self.empty? || natset.universal?
138
+ merge(natset) {|a, b| a || b}
139
+ end
140
+
141
+ def intersect(other)
142
+ other.intersect_natset(self)
143
+ end
144
+ alias & intersect
145
+
146
+ def intersect_natset(natset)
147
+ return self if self.empty? || natset.universal?
148
+ return natset if natset.empty? || self.universal?
149
+ merge(natset) {|a, b| a && b}
150
+ end
151
+
152
+ def subtract(other)
153
+ other.subtract_natset(self)
154
+ end
155
+ alias - subtract
156
+
157
+ def subtract_natset(natset) # natset - self
158
+ # Since double dispatch *inverses* a receiver and an argument,
159
+ # condition should be inversed.
160
+ return natset if self.empty? || natset.empty?
161
+ return NatSet.empty if self.universal?
162
+ return ~self if natset.universal?
163
+ merge(natset) {|a, b| !a && b}
164
+ end
165
+
166
+ def merge(other)
167
+ es1 = @es.dup
168
+ es2 = other.es.dup
169
+ es0 = []
170
+ bool1 = bool2 = bool0 = false
171
+ s = 0
172
+ while !es1.empty? || !es2.empty?
173
+ if es2.empty? || !es1.empty? && es1[0] < es2[0]
174
+ e = es1.shift
175
+ if s < e && bool0 != yield(bool1, bool2)
176
+ es0 << s
177
+ bool0 = !bool0
178
+ end
179
+ s = e
180
+ bool1 = !bool1
181
+ elsif es1.empty? || !es2.empty? && es1[0] > es2[0]
182
+ e = es2.shift
183
+ if s < e && bool0 != yield(bool1, bool2)
184
+ es0 << s
185
+ bool0 = !bool0
186
+ end
187
+ s = e
188
+ bool2 = !bool2
189
+ else
190
+ e = es1.shift
191
+ es2.shift
192
+ if s < e && bool0 != yield(bool1, bool2)
193
+ es0 << s
194
+ bool0 = !bool0
195
+ end
196
+ s = e
197
+ bool1 = !bool1
198
+ bool2 = !bool2
199
+ end
200
+ end
201
+ if bool0 != yield(bool1, bool2)
202
+ es0 << s
203
+ end
204
+ self.class._new(*es0)
205
+ end
206
+
207
+ def split_each(*natsets)
208
+ if natsets.empty?
209
+ yield [self]
210
+ else
211
+ current = natsets.pop
212
+
213
+ a = self - current
214
+ unless a.empty?
215
+ a.split_each(*natsets) {|nss| yield nss}
216
+ end
217
+
218
+ a = self & current
219
+ unless a.empty?
220
+ a.split_each(*natsets) {|nss| nss.push current; yield nss}
221
+ end
222
+ end
223
+ nil
224
+ end
225
+
226
+ def split(*natsets)
227
+ result = []
228
+ split_each(*natsets) {|r| result << r}
229
+ result
230
+ end
231
+
232
+ # min returns a minimum element of the set.
233
+ # It returns nil if the set has no minimum element,
234
+ # i.e. the set has no element.
235
+ def min
236
+ if @es.empty?
237
+ nil
238
+ else
239
+ @es[0]
240
+ end
241
+ end
242
+
243
+ # max returns a maximum element of the set.
244
+ # It returns nil if the set has no maximum element,
245
+ # i.e. the set is open or has no element.
246
+ def max
247
+ if @es.empty? || open?
248
+ nil
249
+ else
250
+ @es[-1] - 1
251
+ end
252
+ end
253
+
254
+ # each_range iterates on continuous ranges of the set from smallest to largest.
255
+ # For each range, it yields Range object which represent it.
256
+ # For last range in open set, the end of the object is -1.
257
+ # For all Range objects it yields, exclude_end? is true.
258
+ def each_range
259
+ (0...@es.length).step(2) {|i|
260
+ e1 = @es[i]
261
+ if i+1 == @es.length
262
+ yield e1..-1
263
+ else
264
+ e2 = @es[i+1]
265
+ yield e1..(e2-1)
266
+ end
267
+ }
268
+ end
269
+
270
+ def pretty_print(pp)
271
+ pp.object_group(self) {
272
+ pp.text ':'
273
+ each_range {|r|
274
+ pp.breakable
275
+ if r.end == -1
276
+ pp.text "#{r.begin}..inf"
277
+ elsif r.begin == r.end
278
+ pp.text r.begin.to_s
279
+ else
280
+ pp.text "#{r.begin}..#{r.end}"
281
+ end
282
+ }
283
+ }
284
+ end
285
+
286
+ def inspect
287
+ require 'pp'
288
+ PP.singleline_pp(self, '')
289
+ end
290
+ end
291
+
292
+ if __FILE__ == $0
293
+ require 'test/unit'
294
+
295
+ class NatSetTest < Test::Unit::TestCase
296
+ def test_empty
297
+ assert(NatSet.empty.empty?)
298
+ end
299
+
300
+ def test_universal
301
+ assert(NatSet.universal.universal?)
302
+ end
303
+
304
+ def test_open
305
+ assert(!NatSet.empty.open?)
306
+ assert(NatSet.universal.open?)
307
+ end
308
+
309
+ def test_singleton
310
+ assert_equal(1, NatSet._new(1, 2).singleton?)
311
+ assert_equal(nil, NatSet._new(1, 3).singleton?)
312
+ end
313
+
314
+ def test_complement
315
+ assert_equal(NatSet.empty, ~NatSet.universal)
316
+ assert_equal(NatSet.universal, ~NatSet.empty)
317
+ assert_equal(NatSet._new(1, 2), ~NatSet._new(0, 1, 2))
318
+ assert_equal(NatSet._new(0, 1, 2), ~NatSet._new(1, 2))
319
+ end
320
+
321
+ def test_union
322
+ assert_equal(NatSet.empty, NatSet.empty + NatSet.empty)
323
+ assert_equal(NatSet.universal, NatSet.empty + NatSet.universal)
324
+ assert_equal(NatSet.universal, NatSet.universal + NatSet.empty)
325
+ assert_equal(NatSet.universal, NatSet.universal + NatSet.universal)
326
+ assert_equal(NatSet.new(0..2), NatSet.new(0, 2) + NatSet.new(0, 1))
327
+ end
328
+
329
+ def test_intersect
330
+ assert_equal(NatSet.empty, NatSet.empty & NatSet.empty)
331
+ assert_equal(NatSet.empty, NatSet.empty & NatSet.universal)
332
+ assert_equal(NatSet.empty, NatSet.universal & NatSet.empty)
333
+ assert_equal(NatSet.universal, NatSet.universal & NatSet.universal)
334
+ assert_equal(NatSet.new(0), NatSet.new(0, 2) & NatSet.new(0, 1))
335
+ end
336
+
337
+ def test_subtract
338
+ assert_equal(NatSet.empty, NatSet.empty - NatSet.empty)
339
+ assert_equal(NatSet.empty, NatSet.empty - NatSet.universal)
340
+ assert_equal(NatSet.universal, NatSet.universal - NatSet.empty)
341
+ assert_equal(NatSet.empty, NatSet.universal - NatSet.universal)
342
+ assert_equal(NatSet.new(2), NatSet.new(0, 2) - NatSet.new(0, 1))
343
+ end
344
+
345
+ def test_new
346
+ assert_equal([1, 2], NatSet.new(1).es)
347
+ assert_equal([1, 3], NatSet.new(1, 2).es)
348
+ assert_equal([1, 4], NatSet.new(1, 2, 3).es)
349
+ assert_equal([1, 4], NatSet.new(1, 3, 2).es)
350
+ assert_equal([10, 21], NatSet.new(10..20).es)
351
+ assert_equal([10, 20], NatSet.new(10...20).es)
352
+ assert_equal([1, 2, 3, 4, 5, 6], NatSet.new(1, 3, 5).es)
353
+ assert_equal([1, 16], NatSet.new(5..15, 1..10).es)
354
+ assert_equal([1, 16], NatSet.new(11..15, 1..10).es)
355
+ assert_raises(ArgumentError) {NatSet.new("a")}
356
+ assert_raises(ArgumentError) {NatSet.new("a".."b")}
357
+ assert_raises(ArgumentError) {NatSet.new(-1)}
358
+ assert_raises(ArgumentError) {NatSet.new(-1..3)}
359
+ end
360
+
361
+ def test_split
362
+ u = NatSet.universal
363
+ assert_equal([[NatSet.universal]], u.split())
364
+ assert_equal([[NatSet.universal]], u.split(NatSet.empty))
365
+ assert_equal([[NatSet.universal, u]], u.split(u))
366
+
367
+ n = NatSet.new(10..20)
368
+ assert_equal([[NatSet.new(0..9, 21..-1)],
369
+ [NatSet.new(10..20), n]],
370
+ u.split(n))
371
+
372
+ ns = [NatSet.new(10..20), NatSet.new(10..20)]
373
+ assert_equal([[NatSet.new(0..9, 21..-1)],
374
+ [NatSet.new(10..20), *ns]],
375
+ u.split(*ns))
376
+
377
+ ns = [NatSet.new(1..30), NatSet.new(5..40)]
378
+ assert_equal([[NatSet.new(0, 41..-1)],
379
+ [NatSet.new(1..4), ns[0]],
380
+ [NatSet.new(31..40), ns[1]],
381
+ [NatSet.new(5..30), *ns]],
382
+ u.split(*ns))
383
+
384
+ ns = [NatSet.new(1..30), NatSet.new(5..20)]
385
+ assert_equal([[NatSet.new(0, 31..-1)],
386
+ [NatSet.new(1..4, 21..30), ns[0]],
387
+ [NatSet.new(5..20), *ns]],
388
+ u.split(*ns))
389
+ end
390
+
391
+ def test_min
392
+ assert_equal(nil, NatSet.new().min)
393
+ assert_equal(1, NatSet.new(1..10).min)
394
+ end
395
+
396
+ def test_max
397
+ assert_equal(nil, NatSet.new().max)
398
+ assert_equal(10, NatSet.new(1..10).max)
399
+ assert_equal(nil, NatSet.new(1..-1).max)
400
+ end
401
+
402
+ def test_each_range
403
+ rs = []; NatSet.new() .each_range {|r| rs << r}; assert_equal([], rs)
404
+ rs = []; NatSet.new(0).each_range {|r| rs << r}; assert_equal([0..0], rs)
405
+ rs = []; NatSet.new(1).each_range {|r| rs << r}; assert_equal([1..1], rs)
406
+ rs = []; NatSet.new(1..3).each_range {|r| rs << r}; assert_equal([1..3], rs)
407
+ rs = []; NatSet.new(1...3).each_range {|r| rs << r}; assert_equal([1..2], rs)
408
+ rs = []; NatSet.new(1..-1).each_range {|r| rs << r}; assert_equal([1..-1], rs)
409
+ end
410
+ end
411
+ end
@@ -0,0 +1,530 @@
1
+ =begin
2
+ = RegexpTree
3
+
4
+ RegexpTree represents regular expression.
5
+ It can be converted to Regexp.
6
+
7
+ == class methods
8
+ --- RegexpTree.str(string)
9
+ returns an instance of RegexpTree which only matches ((|string|))
10
+ --- RegexpTree.alt(*regexp_trees)
11
+ returns an instance of RegexpTree which is alternation of ((|regexp_trees|)).
12
+ --- RegexpTree.seq(*regexp_trees)
13
+ returns an instance of RegexpTree which is concatination of ((|regexp_trees|)).
14
+ --- RegexpTree.rep(regexp_tree, min=0, max=nil, greedy=true)
15
+ returns an instance of RegexpTree which is reptation of ((|regexp_tree|)).
16
+ --- RegexpTree.charclass(natset)
17
+ returns an instance of RegexpTree which matches characters in ((|natset|)).
18
+ #--- RegexpTree.linebeg
19
+ #--- RegexpTree.lineend
20
+ #--- RegexpTree.strbeg
21
+ #--- RegexpTree.strend
22
+ #--- RegexpTree.strlineend
23
+ #--- RegexpTree.word_boundary
24
+ #--- RegexpTree.non_word_boundary
25
+ #--- RegexpTree.previous_match
26
+ #--- RegexpTree.backref(n)
27
+
28
+ == methods
29
+ --- regexp(anchored=false)
30
+ convert to Regexp.
31
+
32
+ If ((|anchored|)) is true, the Regexp is anchored by (({\A})) and (({\z})).
33
+ --- to_s
34
+ convert to String.
35
+ --- empty_set?
36
+ returns true iff self never matches.
37
+ --- empty_sequence?
38
+ returns true iff self only matches empty string.
39
+ --- self | other
40
+ returns alternation of ((|self|)) and ((|other|)).
41
+ --- self + other
42
+ returns concatination of ((|self|)) and ((|other|)).
43
+ --- self * n
44
+ returns ((|n|)) times repetation of ((|self|)).
45
+ --- rep(min=0, max=nil, greedy=true)
46
+ returns ((|min|)) to ((|max|)) times repetation of ((|self|)).
47
+ #--- closure(greedy=true)
48
+ #--- positive_closure(greedy=true)
49
+ #--- optional(greedy=true)
50
+ #--- ntimes(min, max=min, greedy=true)
51
+ #--- nongreedy_rep(min=0, max=nil)
52
+ #--- nongreedy_closure
53
+ #--- nongreedy_positive_closure
54
+ #--- nongreedy_optional
55
+ #--- nongreedy_ntimes(min, max=min)
56
+ =end
57
+
58
+ require 'prettyprint'
59
+ require 'natset'
60
+
61
+ class RegexpTree
62
+ @curr_prec = 1
63
+ def RegexpTree.inherited(c)
64
+ return if c.superclass != RegexpTree
65
+ c.const_set(:Prec, @curr_prec)
66
+ @curr_prec += 1
67
+ end
68
+
69
+ def parenthesize(target)
70
+ if target::Prec <= self.class::Prec
71
+ self
72
+ else
73
+ Paren.new(self)
74
+ end
75
+ end
76
+
77
+ def pretty_print(pp)
78
+ case_insensitive = case_insensitive?
79
+ pp.group(3, '%r{', '}x') {
80
+ (case_insensitive ? self.downcase : self).pretty_format(pp)
81
+ }
82
+ pp.text 'i' if case_insensitive
83
+ end
84
+
85
+ def inspect
86
+ case_insensitive = case_insensitive? ? "i" : ""
87
+ r = PrettyPrint.singleline_format('') {|out|
88
+ (case_insensitive ? self.downcase : self).pretty_format(out)
89
+ }
90
+ if %r{/} =~ r
91
+ "%r{#{r}}#{case_insensitive}"
92
+ else
93
+ "%r/#{r}/#{case_insensitive}"
94
+ end
95
+ end
96
+
97
+ def regexp(anchored=false)
98
+ if case_insensitive?
99
+ r = downcase
100
+ opt = Regexp::IGNORECASE
101
+ else
102
+ r = self
103
+ opt = 0
104
+ end
105
+ r = RegexpTree.seq(RegexpTree.strbeg, r, RegexpTree.strend) if anchored
106
+ Regexp.compile(
107
+ PrettyPrint.singleline_format('') {|out|
108
+ r.pretty_format(out)
109
+ },
110
+ opt)
111
+ end
112
+
113
+ def to_s
114
+ PrettyPrint.singleline_format('') {|out|
115
+ # x flag is not required because all whitespaces are escaped.
116
+ if case_insensitive?
117
+ out.text '(?i-m:'
118
+ downcase.pretty_format(out)
119
+ out.text ')'
120
+ else
121
+ out.text '(?-im:'
122
+ pretty_format(out)
123
+ out.text ')'
124
+ end
125
+ }
126
+ end
127
+
128
+ def empty_set?
129
+ false
130
+ end
131
+
132
+ def empty_sequence?
133
+ false
134
+ end
135
+
136
+ def |(other)
137
+ RegexpTree.alt(self, other)
138
+ end
139
+ def RegexpTree.alt(*rs)
140
+ rs2 = []
141
+ rs.each {|r|
142
+ if r.empty_set?
143
+ next
144
+ elsif Alt === r
145
+ rs2.concat r.rs
146
+ elsif CharClass === r
147
+ if CharClass === rs2.last
148
+ rs2[-1] = CharClass.new(rs2.last.natset + r.natset)
149
+ else
150
+ rs2 << r
151
+ end
152
+ else
153
+ rs2 << r
154
+ end
155
+ }
156
+ case rs2.length
157
+ when 0; EmptySet
158
+ when 1; rs2.first
159
+ else; Alt.new(rs2)
160
+ end
161
+ end
162
+ class Alt < RegexpTree
163
+ def initialize(rs)
164
+ @rs = rs
165
+ end
166
+ attr_reader :rs
167
+
168
+ def empty_set?
169
+ @rs.empty?
170
+ end
171
+
172
+ def case_insensitive?
173
+ @rs.all? {|r| r.case_insensitive?}
174
+ end
175
+
176
+ def multiline_insensitive?
177
+ @rs.all? {|r| r.multiline_insensitive?}
178
+ end
179
+
180
+ def downcase
181
+ Alt.new(@rs.map {|r| r.downcase})
182
+ end
183
+
184
+ def pretty_format(out)
185
+ if @rs.empty?
186
+ out.text '(?!)'
187
+ else
188
+ out.group {
189
+ @rs.each_with_index {|r, i|
190
+ unless i == 0
191
+ out.text '|'
192
+ out.breakable ''
193
+ end
194
+ r.parenthesize(Alt).pretty_format(out)
195
+ }
196
+ }
197
+ end
198
+ end
199
+ end
200
+ EmptySet = Alt.new([])
201
+
202
+ def +(other)
203
+ RegexpTree.seq(self, other)
204
+ end
205
+ def RegexpTree.seq(*rs)
206
+ rs2 = []
207
+ rs.each {|r|
208
+ if r.empty_sequence?
209
+ next
210
+ elsif Seq === r
211
+ rs2.concat r.rs
212
+ elsif r.empty_set?
213
+ return EmptySet
214
+ else
215
+ rs2 << r
216
+ end
217
+ }
218
+ case rs2.length
219
+ when 0; EmptySequence
220
+ when 1; rs2.first
221
+ else; Seq.new(rs2)
222
+ end
223
+ end
224
+ class Seq < RegexpTree
225
+ def initialize(rs)
226
+ @rs = rs
227
+ end
228
+ attr_reader :rs
229
+
230
+ def empty_sequence?
231
+ @rs.empty?
232
+ end
233
+
234
+ def case_insensitive?
235
+ @rs.all? {|r| r.case_insensitive?}
236
+ end
237
+
238
+ def multiline_insensitive?
239
+ @rs.all? {|r| r.multiline_insensitive?}
240
+ end
241
+
242
+ def downcase
243
+ Seq.new(@rs.map {|r| r.downcase})
244
+ end
245
+
246
+ def pretty_format(out)
247
+ out.group {
248
+ @rs.each_with_index {|r, i|
249
+ unless i == 0
250
+ out.group {out.breakable ''}
251
+ end
252
+ r.parenthesize(Seq).pretty_format(out)
253
+ }
254
+ }
255
+ end
256
+ end
257
+ EmptySequence = Seq.new([])
258
+
259
+ def *(n)
260
+ case n
261
+ when Integer
262
+ RegexpTree.rep(self, n, n)
263
+ when Range
264
+ RegexpTree.rep(self, n.first, n.last - (n.exclude_end? ? 1 : 0))
265
+ else
266
+ raise TypeError.new("Integer or Range expected: #{n}")
267
+ end
268
+ end
269
+ def nongreedy_closure() RegexpTree.rep(self, 0, nil, false) end
270
+ def nongreedy_positive_closure() RegexpTree.rep(self, 1, nil, false) end
271
+ def nongreedy_optional() RegexpTree.rep(self, 0, 1, false) end
272
+ def nongreedy_ntimes(m, n=m) RegexpTree.rep(self, m, n, false) end
273
+ def nongreedy_rep(m=0, n=nil) RegexpTree.rep(self, m, n, false) end
274
+ def closure(greedy=true) RegexpTree.rep(self, 0, nil, greedy) end
275
+ def positive_closure(greedy=true) RegexpTree.rep(self, 1, nil, greedy) end
276
+ def optional(greedy=true) RegexpTree.rep(self, 0, 1, greedy) end
277
+ def ntimes(m, n=m, greedy=true) RegexpTree.rep(self, m, n, greedy) end
278
+ def rep(m=0, n=nil, greedy=true) RegexpTree.rep(self, m, n, greedy) end
279
+
280
+ def RegexpTree.rep(r, m=0, n=nil, greedy=true)
281
+ return EmptySequence if m == 0 && n == 0
282
+ return r if m == 1 && n == 1
283
+ return EmptySequence if r.empty_sequence?
284
+ if r.empty_set?
285
+ return m == 0 ? EmptySequence : EmptySet
286
+ end
287
+ Rep.new(r, m, n, greedy)
288
+ end
289
+
290
+ class Rep < RegexpTree
291
+ def initialize(r, m=0, n=nil, greedy=true)
292
+ @r = r
293
+ @m = m
294
+ @n = n
295
+ @greedy = greedy
296
+ end
297
+
298
+ def case_insensitive?
299
+ @r.case_insensitive?
300
+ end
301
+
302
+ def multiline_insensitive?
303
+ @r.multiline_insensitive?
304
+ end
305
+
306
+ def downcase
307
+ Rep.new(@r.downcase, @m, @n, @greedy)
308
+ end
309
+
310
+ def pretty_format(out)
311
+ @r.parenthesize(Elt).pretty_format(out)
312
+ case @m
313
+ when 0
314
+ case @n
315
+ when 0
316
+ out.text '{0}'
317
+ when 1
318
+ out.text '?'
319
+ when nil
320
+ out.text '*'
321
+ else
322
+ out.text "{#{@m},#{@n}}"
323
+ end
324
+ when 1
325
+ case @n
326
+ when 1
327
+ when nil
328
+ out.text '+'
329
+ else
330
+ out.text "{#{@m},#{@n}}"
331
+ end
332
+ else
333
+ if @m == @n
334
+ out.text "{#{@m}}"
335
+ else
336
+ out.text "{#{@m},#{@n}}"
337
+ end
338
+ end
339
+ out.text '?' unless @greedy
340
+ end
341
+ end
342
+
343
+ class Elt < RegexpTree
344
+ end
345
+
346
+ def RegexpTree.charclass(natset)
347
+ if natset.empty?
348
+ EmptySet
349
+ else
350
+ CharClass.new(natset)
351
+ end
352
+ end
353
+ class CharClass < Elt
354
+ None = NatSet.empty
355
+ Any = NatSet.universal
356
+ NL = NatSet.new(?\n)
357
+ NonNL = ~NL
358
+ Word = NatSet.new(?0..?9, ?A..?Z, ?_, ?a..?z)
359
+ NonWord = ~Word
360
+ Space = NatSet.new(?t, ?\n, ?\f, ?\r, ?\s)
361
+ NonSpace = ~Space
362
+ Digit = NatSet.new(?0..?9)
363
+ NonDigit = ~Digit
364
+
365
+ UpAlpha = NatSet.new(?A..?Z)
366
+ LowAlpha = NatSet.new(?a..?z)
367
+
368
+ def initialize(natset)
369
+ @natset = natset
370
+ end
371
+ attr_reader :natset
372
+
373
+ def empty_set?
374
+ @natset.empty?
375
+ end
376
+
377
+ def case_insensitive?
378
+ up = @natset & UpAlpha
379
+ low = @natset & LowAlpha
380
+ return false if up.es.length != low.es.length
381
+ up.es.map! {|ch|
382
+ ch - 0x41 + 0x61 # ?A + ?a
383
+ }
384
+ up == low
385
+ end
386
+
387
+ def multiline_insensitive?
388
+ @natset != NonNL
389
+ end
390
+
391
+ def downcase
392
+ up = @natset & UpAlpha
393
+ up.es.map! {|ch|
394
+ ch - 0x41 + 0x61 # ?A + ?a
395
+ }
396
+ CharClass.new((@natset - UpAlpha) | up)
397
+ end
398
+
399
+ def pretty_format(out)
400
+ case @natset
401
+ when None; out.text '(?!)'
402
+ when Any; out.text '[\s\S]'
403
+ when NL; out.text '\n'
404
+ when NonNL; out.text '.'
405
+ when Word; out.text '\w'
406
+ when NonWord; out.text '\W'
407
+ when Space; out.text '\s'
408
+ when NonSpace; out.text '\S'
409
+ when Digit; out.text '\d'
410
+ when NonDigit; out.text '\D'
411
+ else
412
+ if val = @natset.singleton?
413
+ out.text encode_elt(val)
414
+ else
415
+ if @natset.open?
416
+ neg_mark = '^'
417
+ es = (~@natset).es
418
+ else
419
+ neg_mark = ''
420
+ es = @natset.es.dup
421
+ end
422
+ r = ''
423
+ until es.empty?
424
+ if es[0] + 1 == es[1]
425
+ r << encode_elt(es[0])
426
+ elsif es[0] + 2 == es[1]
427
+ r << encode_elt(es[0]) << encode_elt(es[1] - 1)
428
+ else
429
+ r << encode_elt(es[0]) << '-' << encode_elt(es[1] - 1)
430
+ end
431
+ es.shift
432
+ es.shift
433
+ end
434
+ out.text "[#{neg_mark}#{r}]"
435
+ end
436
+ end
437
+ end
438
+
439
+ def encode_elt(e)
440
+ case e
441
+ when 0x09; '\t'
442
+ when 0x0a; '\n'
443
+ when 0x0d; '\r'
444
+ when 0x0c; '\f'
445
+ when 0x0b; '\v'
446
+ when 0x07; '\a'
447
+ when 0x1b; '\e'
448
+ #when ?!, ?", ?%, ?&, ?', ?,, ?:, ?;, ?<, ?=, ?>, ?/, ?0..?9, ?@, ?A..?Z, ?_, ?`, ?a..?z, ?~
449
+ when 0x21, 0x22, 0x25, 0x26, 0x27, 0x2c, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x2f, 0x30..0x39, 0x40, 0x41..0x5a, 0x5f, 0x60, 0x61..0x7a, 0x7e
450
+ sprintf("%c", e)
451
+ else
452
+ sprintf("\\x%02x", e)
453
+ end
454
+ end
455
+ end
456
+
457
+ def RegexpTree.linebeg() Special.new('^') end
458
+ def RegexpTree.lineend() Special.new('$') end
459
+ def RegexpTree.strbeg() Special.new('\A') end
460
+ def RegexpTree.strend() Special.new('\z') end
461
+ def RegexpTree.strlineend() Special.new('\Z') end
462
+ def RegexpTree.word_boundary() Special.new('\b') end
463
+ def RegexpTree.non_word_boundary() Special.new('\B') end
464
+ def RegexpTree.previous_match() Special.new('\G') end
465
+ def RegexpTree.backref(n) Special.new("\\#{n}") end
466
+ class Special < Elt
467
+ def initialize(str)
468
+ @str = str
469
+ end
470
+
471
+ def case_insensitive?
472
+ true
473
+ end
474
+
475
+ def multiline_insensitive?
476
+ true
477
+ end
478
+
479
+ def downcase
480
+ self
481
+ end
482
+
483
+ def pretty_format(out)
484
+ out.text @str
485
+ end
486
+ end
487
+
488
+ def group() Paren.new(self, '') end
489
+ def paren() Paren.new(self) end
490
+ def lookahead() Paren.new(self, '?=') end
491
+ def negative_lookahead() Paren.new(self, '?!') end
492
+ # (?ixm-ixm:...)
493
+ # (?>...)
494
+ class Paren < Elt
495
+ def initialize(r, mark='?:')
496
+ @mark = mark
497
+ @r = r
498
+ end
499
+
500
+ def case_insensitive?
501
+ # xxx: if @mark contains "i"...
502
+ @r.case_insensitive?
503
+ end
504
+
505
+ def multiline_insensitive?
506
+ # xxx: if @mark contains "m"...
507
+ @r.multiline_insensitive?
508
+ end
509
+
510
+ def downcase
511
+ Paren.new(@r.downcase, @mark)
512
+ end
513
+
514
+ def pretty_format(out)
515
+ out.group(1 + @mark.length, "(#@mark", ')') {
516
+ @r.pretty_format(out)
517
+ }
518
+ end
519
+ end
520
+
521
+ # def RegexpTree.comment(str) ... end # (?#...)
522
+
523
+ def RegexpTree.str(str)
524
+ ccs = []
525
+ str.each_byte {|ch|
526
+ ccs << CharClass.new(NatSet.new(ch))
527
+ }
528
+ seq(*ccs)
529
+ end
530
+ end