abnf 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,156 @@
1
+ # RFC 2234
2
+ class Parser
3
+ rule
4
+ rulelist : { result = nil }
5
+ | rulelist rule {
6
+ name = val[1][0]
7
+ rhs = val[1][1]
8
+ @grammar.add(name, rhs)
9
+ result ||= name
10
+ }
11
+
12
+ rule : defname assign alt { result = [val[0], val[2]] }
13
+
14
+ alt : seq
15
+ | alt altop seq { result = val[0] | val[2] }
16
+
17
+ seq : rep
18
+ | seq rep { result = val[0] + val[1] }
19
+
20
+ rep : element
21
+ | repeat element { result = val[1].rep(*val[0]) }
22
+
23
+ repeat : repop { result = [0, nil] }
24
+ | repop int { result = [0, val[1]] }
25
+ | int { result = [val[0], val[0]] }
26
+ | int repop { result = [val[0], nil] }
27
+ | int repop int { result = [val[0], val[2]] }
28
+
29
+ element : name { result = Var.new(val[0]) }
30
+ | lparen alt rparen { result = val[1] }
31
+ | lbracket alt rbracket { result = val[1].rep(0, 1) }
32
+ | val
33
+ end
34
+
35
+ ---- header
36
+
37
+ require 'abnf/grammar'
38
+
39
+ class ABNF
40
+ def ABNF.parse(desc, dont_merge_core_rules=false)
41
+ grammar = ABNF.new
42
+ Parser.new(grammar).parse(desc)
43
+ grammar.merge(CoreRules) unless dont_merge_core_rules
44
+ grammar
45
+ end
46
+
47
+ ---- inner
48
+
49
+ def initialize(grammar)
50
+ @grammar = grammar
51
+ end
52
+
53
+ def parse(input)
54
+ @input = input
55
+ yyparse self, :scan
56
+ end
57
+
58
+ def scan
59
+ prev = nil
60
+ scan1 do |toktype, tokval|
61
+ if prev
62
+ if prev[0] == :name && toktype == :assign
63
+ yield [:defname, prev[1]]
64
+ else
65
+ yield prev
66
+ end
67
+ end
68
+ prev = [toktype, tokval]
69
+ end
70
+ yield prev
71
+ end
72
+
73
+ def scan1
74
+ @input.each_line {|line|
75
+ until line.empty?
76
+ case line
77
+ when /\A[ \t\r\n]+/
78
+ t = $&
79
+ when /\A;/
80
+ t = line
81
+ when /\A[A-Za-z][A-Za-z0-9\-_]*/ # _ is not permitted by ABNF
82
+ yield :name, (t = $&).downcase.intern
83
+ when /\A=\/?/
84
+ yield :assign, (t = $&) # | is not permitted by ABNF
85
+ when /\A[\/|]/
86
+ yield :altop, (t = $&)
87
+ when /\A\*/
88
+ yield :repop, (t = $&)
89
+ when /\A\(/
90
+ yield :lparen, (t = $&)
91
+ when /\A\)/
92
+ yield :rparen, (t = $&)
93
+ when /\A\[/
94
+ yield :lbracket, (t = $&)
95
+ when /\A\]/
96
+ yield :rbracket, (t = $&)
97
+ when /\A\d+/
98
+ yield :int, (t = $&).to_i
99
+ when /\A"([ !#-~]*)"/
100
+ es = []
101
+ (t = $&)[1...-1].each_byte {|b|
102
+ case b
103
+ when 0x41..0x5a # ?A..?Z
104
+ b2 = b - 0x41 + 0x61 # ?A + ?a
105
+ es << Term.new(NatSet.new(b, b2))
106
+ when 0x61..0x7a # ?a..?z
107
+ b2 = b - 0x61 + 0x41 # ?a + ?A
108
+ es << Term.new(NatSet.new(b, b2))
109
+ else
110
+ es << Term.new(NatSet.new(b))
111
+ end
112
+ }
113
+ yield :val, Seq.new(*es)
114
+ when /\A%b([01]+)-([01]+)/
115
+ t = $&
116
+ yield :val, Term.new(NatSet.new($1.to_i(2)..$2.to_i(2)))
117
+ when /\A%b[01]+(?:\.[01]+)*/
118
+ es = []
119
+ (t = $&).scan(/[0-1]+/) {|v|
120
+ es << Term.new(NatSet.new(v.to_i(2)))
121
+ }
122
+ yield :val, Seq.new(*es)
123
+ when /\A%d([0-9]+)-([0-9]+)/
124
+ t = $&
125
+ yield :val, Term.new(NatSet.new($1.to_i..$2.to_i))
126
+ when /\A%d[0-9]+(?:\.[0-9]+)*/
127
+ es = []
128
+ (t = $&).scan(/[0-9]+/) {|v|
129
+ es << Term.new(NatSet.new(v.to_i))
130
+ }
131
+ yield :val, Seq.new(*es)
132
+ when /\A%x([0-9A-Fa-f]+)-([0-9A-Fa-f]+)/
133
+ t = $&
134
+ yield :val, Term.new(NatSet.new($1.hex..$2.hex))
135
+ when /\A%x[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]+)*/
136
+ es = []
137
+ (t = $&).scan(/[0-9A-Fa-f]+/) {|v|
138
+ es << Term.new(NatSet.new(v.hex))
139
+ }
140
+ yield :val, Seq.new(*es)
141
+ when /\A<([\x20-\x3D\x3F-\x7E]*)>/
142
+ raise ScanError.new("prose-val is not supported: #{$&}")
143
+ else
144
+ raise ScanError.new(line)
145
+ end
146
+ line[0, t.length] = ''
147
+ end
148
+ }
149
+ yield false, false
150
+ end
151
+
152
+ class ScanError < StandardError
153
+ end
154
+
155
+ ---- footer
156
+ end
@@ -0,0 +1,394 @@
1
+ require 'abnf/abnf'
2
+ require 'regexptree'
3
+
4
+ class ABNF
5
+ class TooComplex < StandardError
6
+ end
7
+
8
+ def ABNF.regexp(desc, name=nil)
9
+ ABNF.regexp_tree(desc, name).regexp
10
+ end
11
+
12
+ def ABNF.regexp_tree(desc, name=nil)
13
+ ABNF.parse(desc).regexp_tree(name)
14
+ end
15
+
16
+ def regexp(name=start_symbol)
17
+ regexp_tree(name).regexp
18
+ end
19
+
20
+ # Convert a recursive rule to non-recursive rule if possible.
21
+ # This conversion is *not* perfect.
22
+ # It may fail even if possible.
23
+ # More work (survey) is needed.
24
+ def regexp_tree(name=nil)
25
+ name ||= start_symbol
26
+ env = {}
27
+ each_strongly_connected_component_from(name) {|ns|
28
+ rules = {}
29
+ ns.each {|n|
30
+ rules[n] = @rules[n]
31
+ }
32
+
33
+ resolved_rules = {}
34
+ updated = true
35
+ while updated
36
+ updated = false
37
+ ns.reject! {|n| !rules.include?(n)}
38
+
39
+ rs = {}
40
+ ns.reverse_each {|n|
41
+ e = rules[n]
42
+ if !e
43
+ raise ABNFError.new("no rule defined: #{n}")
44
+ end
45
+ rs[n] = e.recursion(ns, n)
46
+ if rs[n] & OtherRecursion != 0
47
+ raise TooComplex.new("too complex to convert to regexp: #{n} (#{ns.join(', ')})")
48
+ end
49
+ }
50
+
51
+ ns.reverse_each {|n|
52
+ e = rules[n]
53
+ r = rs[n]
54
+ if r & SelfRecursion == 0
55
+ resolved_rules[n] = e
56
+ rules.delete n
57
+ rules.each {|n2, e2| rules[n2] = e2.subst_var {|n3| n3 == n ? e : nil}}
58
+ updated = true
59
+ break
60
+ end
61
+ }
62
+ next if updated
63
+
64
+ # X = Y | a
65
+ # Y = X | b
66
+ # =>
67
+ # Y = Y | a | b
68
+ ns.reverse_each {|n|
69
+ e = rules[n]
70
+ r = rs[n]
71
+ if r & JustRecursion != 0 && r & ~(NonRecursion|JustRecursion) == 0
72
+ e = e.remove_just_recursion(n)
73
+ resolved_rules[n] = e
74
+ rules.delete n
75
+ rules.each {|n2, e2| rules[n2] = e2.subst_var {|n3| n3 == n ? e : nil}}
76
+ updated = true
77
+ break
78
+ end
79
+ }
80
+ next if updated
81
+
82
+ # X = X a | b
83
+ # =>
84
+ # X = b a*
85
+ ns.reverse_each {|n|
86
+ e = rules[n]
87
+ r = rs[n]
88
+ if r & LeftRecursion != 0 && r & ~(NonRecursion|JustRecursion|LeftRecursion|SelfRecursion) == 0
89
+ e = e.remove_left_recursion(n)
90
+ resolved_rules[n] = e
91
+ rules.delete n
92
+ rules.each {|n2, e2| rules[n2] = e2.subst_var {|n3| n3 == n ? e : nil}}
93
+ updated = true
94
+ break
95
+ end
96
+ }
97
+ next if updated
98
+
99
+ # X = a X | b
100
+ # =>
101
+ # X = a* b
102
+ ns.reverse_each {|n|
103
+ e = rules[n]
104
+ r = rs[n]
105
+ if r & RightRecursion != 0 && r & ~(NonRecursion|JustRecursion|RightRecursion|SelfRecursion) == 0
106
+ e = e.remove_right_recursion(n)
107
+ resolved_rules[n] = e
108
+ rules.delete n
109
+ rules.each {|n2, e2| rules[n2] = e2.subst_var {|n3| n3 == n ? e : nil}}
110
+ updated = true
111
+ break
112
+ end
113
+ }
114
+ next if updated
115
+ end
116
+
117
+ if 1 < rules.length
118
+ raise TooComplex.new("too complex to convert to regexp: (#{ns.join(', ')})")
119
+ end
120
+
121
+ if rules.length == 1
122
+ n, e = rules.shift
123
+ r = e.recursion(ns, n)
124
+ if r & OtherRecursion != 0
125
+ raise TooComplex.new("too complex to convert to regexp: #{n} (#{ns.join(', ')})")
126
+ end
127
+ if r == NonRecursion
128
+ resolved_rules[n] = e
129
+ else
130
+ # X = a X | b | X c
131
+ # =>
132
+ # X = a* b c*
133
+ left, middle, right = e.split_recursion(n)
134
+ resolved_rules[n] = Seq.new(Alt.new(left).rep, Alt.new(middle), Alt.new(right).rep)
135
+ end
136
+ end
137
+
138
+ class << resolved_rules
139
+ include TSort
140
+ alias tsort_each_node each_key
141
+ def tsort_each_child(n, &block)
142
+ self[n].each_var {|n2|
143
+ yield n2 if self.include? n2
144
+ }
145
+ end
146
+ end
147
+
148
+ resolved_rules.tsort_each {|n|
149
+ env[n] = resolved_rules[n].subst_var {|n2|
150
+ unless env[n2]
151
+ raise Exception.new("unresolved nonterminal: #{n}") # bug
152
+ end
153
+ env[n2]
154
+ }
155
+ }
156
+ }
157
+ env[name].regexp_tree
158
+ end
159
+
160
+ NonRecursion = 1 # X = a
161
+ JustRecursion = 2 # X = Y
162
+ LeftRecursion = 4 # X = Y a
163
+ RightRecursion = 8 # X = a Y
164
+ SelfRecursion = 16 # Y is X in JustRecursion, LeftRecursion and RightRecursion
165
+ OtherRecursion = 32 # otherwise
166
+
167
+ class Elt
168
+ def remove_left_recursion(n)
169
+ nonrec, rest = split_left_recursion(n)
170
+ Seq.new(nonrec, rest.rep)
171
+ end
172
+
173
+ def remove_right_recursion(n)
174
+ nonrec, rest = split_right_recursion(n)
175
+ Seq.new(rest.rep, nonrec)
176
+ end
177
+ end
178
+
179
+ class Alt
180
+ def recursion(syms, lhs)
181
+ @elts.inject(0) {|r, e| r | e.recursion(syms, lhs)}
182
+ end
183
+
184
+ def remove_just_recursion(n)
185
+ Alt.new(*@elts.map {|e| e.remove_just_recursion(n)})
186
+ end
187
+
188
+ def split_left_recursion(n)
189
+ nonrec = EmptySet
190
+ rest = EmptySet
191
+ @elts.each {|e|
192
+ nonrec1, rest1 = e.split_left_recursion(n)
193
+ nonrec |= nonrec1
194
+ rest |= rest1
195
+ }
196
+ [nonrec, rest]
197
+ end
198
+
199
+ def split_right_recursion(n)
200
+ nonrec = EmptySet
201
+ rest = EmptySet
202
+ @elts.each {|e|
203
+ nonrec1, rest1 = e.split_right_recursion(n)
204
+ nonrec |= nonrec1
205
+ rest |= rest1
206
+ }
207
+ [nonrec, rest]
208
+ end
209
+
210
+ def split_recursion(n)
211
+ rest_left = EmptySet
212
+ nonrec = EmptySet
213
+ rest_right = EmptySet
214
+ @elts.each {|e|
215
+ rest_left1, nonrec1, rest_right1 = e.split_recursion(n)
216
+ rest_left |= rest_left1
217
+ nonrec |= nonrec1
218
+ rest_right |= rest_right1
219
+ }
220
+ [rest_left, nonrec, rest_right]
221
+ end
222
+ end
223
+
224
+ class Seq
225
+ def recursion(syms, lhs)
226
+ case @elts.length
227
+ when 0
228
+ NonRecursion
229
+ when 1
230
+ @elts.first.recursion(syms, lhs)
231
+ else
232
+ (1...(@elts.length-1)).each {|i|
233
+ return OtherRecursion if @elts[i].recursion(syms, lhs) != NonRecursion
234
+ }
235
+
236
+ r_left = @elts.first.recursion(syms, lhs)
237
+ return OtherRecursion if r_left & ~(NonRecursion|JustRecursion|LeftRecursion|SelfRecursion) != 0
238
+ r_left = (r_left & ~JustRecursion) | LeftRecursion if r_left & JustRecursion != 0
239
+
240
+ r_right = @elts.last.recursion(syms, lhs)
241
+ return OtherRecursion if r_right & ~(NonRecursion|JustRecursion|RightRecursion|SelfRecursion) != 0
242
+ r_right = (r_right & ~JustRecursion) | RightRecursion if r_right & JustRecursion != 0
243
+
244
+ if r_left == NonRecursion
245
+ r_right
246
+ elsif r_right == NonRecursion
247
+ r_left
248
+ else
249
+ OtherRecursion
250
+ end
251
+ end
252
+ end
253
+
254
+ def remove_just_recursion(n)
255
+ self
256
+ end
257
+
258
+ def split_left_recursion(n)
259
+ case @elts.length
260
+ when 0
261
+ [self, EmptySet]
262
+ when 1
263
+ @elts.first.split_left_recursion(n)
264
+ else
265
+ nonrec, rest = @elts.first.split_left_recursion(n)
266
+ rest1 = Seq.new(*@elts[1..-1])
267
+ nonrec += rest1
268
+ rest += rest1
269
+ [nonrec, rest]
270
+ end
271
+ end
272
+
273
+ def split_right_recursion(n)
274
+ case @elts.length
275
+ when 0
276
+ [self, EmptySet]
277
+ when 1
278
+ @elts.first.split_right_recursion(n)
279
+ else
280
+ nonrec, rest = @elts.last.split_right_recursion(n)
281
+ rest1 = Seq.new(*@elts[0...-1])
282
+ nonrec = rest1 + nonrec
283
+ rest = rest1 + rest
284
+ [nonrec, rest]
285
+ end
286
+ end
287
+
288
+ def split_recursion(n)
289
+ case @elts.length
290
+ when 0
291
+ [EmptySet, self, EmptySet]
292
+ when 1
293
+ @elts.first.split_recursion(n)
294
+ else
295
+ leftmost_nonrec, leftmost_rest_right = @elts.first.split_left_recursion(n)
296
+ rightmost_nonrec, rightmost_rest_left = @elts.last.split_right_recursion(n)
297
+ rest_middle = Seq.new(*@elts[1...-1])
298
+
299
+ if leftmost_rest_right.empty_set?
300
+ [leftmost_nonrec + rest_middle + rightmost_rest_left,
301
+ leftmost_nonrec + rest_middle + rightmost_nonrec,
302
+ EmptySet]
303
+ elsif rightmost_rest_left.empty_set?
304
+ [EmptySet,
305
+ leftmost_nonrec + rest_middle + rightmost_nonrec,
306
+ leftmost_rest_right + rest_middle + rightmost_nonrec]
307
+ else
308
+ raise Exception.new("non left/right recursion") # bug
309
+ end
310
+ end
311
+ end
312
+
313
+ end
314
+
315
+ class Rep
316
+ def recursion(syms, lhs)
317
+ @elt.recursion(syms, lhs) == NonRecursion ? NonRecursion : OtherRecursion
318
+ end
319
+
320
+ def remove_just_recursion(n)
321
+ self
322
+ end
323
+
324
+ def split_left_recursion(n)
325
+ [self, EmptySet]
326
+ end
327
+ alias split_right_recursion split_left_recursion
328
+
329
+ def split_recursion(n)
330
+ [EmptySet, self, EmptySet]
331
+ end
332
+ end
333
+
334
+ class Term
335
+ def recursion(syms, lhs)
336
+ NonRecursion
337
+ end
338
+
339
+ def remove_just_recursion(n)
340
+ self
341
+ end
342
+
343
+ def split_left_recursion(n)
344
+ [self, EmptySet]
345
+ end
346
+ alias split_right_recursion split_left_recursion
347
+
348
+ def split_recursion(n)
349
+ [EmptySet, self, EmptySet]
350
+ end
351
+ end
352
+
353
+ class Var
354
+ def recursion(syms, lhs)
355
+ if lhs == self.name
356
+ JustRecursion | SelfRecursion
357
+ elsif syms.include? self.name
358
+ JustRecursion
359
+ else
360
+ NonRecursion
361
+ end
362
+ end
363
+
364
+ def remove_just_recursion(n)
365
+ if n == self.name
366
+ EmptySet
367
+ else
368
+ self
369
+ end
370
+ end
371
+
372
+ def split_left_recursion(n)
373
+ if n == self.name
374
+ [EmptySet, EmptySequence]
375
+ else
376
+ [self, EmptySet]
377
+ end
378
+ end
379
+ alias split_right_recursion split_left_recursion
380
+
381
+ def split_recursion(n)
382
+ if n == self.name
383
+ [EmptySet, EmptySet, EmptySet]
384
+ else
385
+ [EmptySet, self, EmptySet]
386
+ end
387
+ end
388
+ end
389
+
390
+ class Alt; def regexp_tree() RegexpTree.alt(*@elts.map {|e| e.regexp_tree}) end end
391
+ class Seq; def regexp_tree() RegexpTree.seq(*@elts.map {|e| e.regexp_tree}) end end
392
+ class Rep; def regexp_tree() @elt.regexp_tree.rep(min, max, greedy) end end
393
+ class Term; def regexp_tree() RegexpTree.charclass(@natset) end end
394
+ end