abnf 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,156 @@
1
+ # RFC 2234
2
+ class Parser
3
+ rule
4
+ rulelist : { result = nil }
5
+ | rulelist rule {
6
+ name = val[1][0]
7
+ rhs = val[1][1]
8
+ @grammar.add(name, rhs)
9
+ result ||= name
10
+ }
11
+
12
+ rule : defname assign alt { result = [val[0], val[2]] }
13
+
14
+ alt : seq
15
+ | alt altop seq { result = val[0] | val[2] }
16
+
17
+ seq : rep
18
+ | seq rep { result = val[0] + val[1] }
19
+
20
+ rep : element
21
+ | repeat element { result = val[1].rep(*val[0]) }
22
+
23
+ repeat : repop { result = [0, nil] }
24
+ | repop int { result = [0, val[1]] }
25
+ | int { result = [val[0], val[0]] }
26
+ | int repop { result = [val[0], nil] }
27
+ | int repop int { result = [val[0], val[2]] }
28
+
29
+ element : name { result = Var.new(val[0]) }
30
+ | lparen alt rparen { result = val[1] }
31
+ | lbracket alt rbracket { result = val[1].rep(0, 1) }
32
+ | val
33
+ end
34
+
35
+ ---- header
36
+
37
+ require 'abnf/grammar'
38
+
39
+ class ABNF
40
+ def ABNF.parse(desc, dont_merge_core_rules=false)
41
+ grammar = ABNF.new
42
+ Parser.new(grammar).parse(desc)
43
+ grammar.merge(CoreRules) unless dont_merge_core_rules
44
+ grammar
45
+ end
46
+
47
+ ---- inner
48
+
49
+ def initialize(grammar)
50
+ @grammar = grammar
51
+ end
52
+
53
+ def parse(input)
54
+ @input = input
55
+ yyparse self, :scan
56
+ end
57
+
58
+ def scan
59
+ prev = nil
60
+ scan1 do |toktype, tokval|
61
+ if prev
62
+ if prev[0] == :name && toktype == :assign
63
+ yield [:defname, prev[1]]
64
+ else
65
+ yield prev
66
+ end
67
+ end
68
+ prev = [toktype, tokval]
69
+ end
70
+ yield prev
71
+ end
72
+
73
+ def scan1
74
+ @input.each_line {|line|
75
+ until line.empty?
76
+ case line
77
+ when /\A[ \t\r\n]+/
78
+ t = $&
79
+ when /\A;/
80
+ t = line
81
+ when /\A[A-Za-z][A-Za-z0-9\-_]*/ # _ is not permitted by ABNF
82
+ yield :name, (t = $&).downcase.intern
83
+ when /\A=\/?/
84
+ yield :assign, (t = $&) # | is not permitted by ABNF
85
+ when /\A[\/|]/
86
+ yield :altop, (t = $&)
87
+ when /\A\*/
88
+ yield :repop, (t = $&)
89
+ when /\A\(/
90
+ yield :lparen, (t = $&)
91
+ when /\A\)/
92
+ yield :rparen, (t = $&)
93
+ when /\A\[/
94
+ yield :lbracket, (t = $&)
95
+ when /\A\]/
96
+ yield :rbracket, (t = $&)
97
+ when /\A\d+/
98
+ yield :int, (t = $&).to_i
99
+ when /\A"([ !#-~]*)"/
100
+ es = []
101
+ (t = $&)[1...-1].each_byte {|b|
102
+ case b
103
+ when 0x41..0x5a # ?A..?Z
104
+ b2 = b - 0x41 + 0x61 # ?A + ?a
105
+ es << Term.new(NatSet.new(b, b2))
106
+ when 0x61..0x7a # ?a..?z
107
+ b2 = b - 0x61 + 0x41 # ?a + ?A
108
+ es << Term.new(NatSet.new(b, b2))
109
+ else
110
+ es << Term.new(NatSet.new(b))
111
+ end
112
+ }
113
+ yield :val, Seq.new(*es)
114
+ when /\A%b([01]+)-([01]+)/
115
+ t = $&
116
+ yield :val, Term.new(NatSet.new($1.to_i(2)..$2.to_i(2)))
117
+ when /\A%b[01]+(?:\.[01]+)*/
118
+ es = []
119
+ (t = $&).scan(/[0-1]+/) {|v|
120
+ es << Term.new(NatSet.new(v.to_i(2)))
121
+ }
122
+ yield :val, Seq.new(*es)
123
+ when /\A%d([0-9]+)-([0-9]+)/
124
+ t = $&
125
+ yield :val, Term.new(NatSet.new($1.to_i..$2.to_i))
126
+ when /\A%d[0-9]+(?:\.[0-9]+)*/
127
+ es = []
128
+ (t = $&).scan(/[0-9]+/) {|v|
129
+ es << Term.new(NatSet.new(v.to_i))
130
+ }
131
+ yield :val, Seq.new(*es)
132
+ when /\A%x([0-9A-Fa-f]+)-([0-9A-Fa-f]+)/
133
+ t = $&
134
+ yield :val, Term.new(NatSet.new($1.hex..$2.hex))
135
+ when /\A%x[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]+)*/
136
+ es = []
137
+ (t = $&).scan(/[0-9A-Fa-f]+/) {|v|
138
+ es << Term.new(NatSet.new(v.hex))
139
+ }
140
+ yield :val, Seq.new(*es)
141
+ when /\A<([\x20-\x3D\x3F-\x7E]*)>/
142
+ raise ScanError.new("prose-val is not supported: #{$&}")
143
+ else
144
+ raise ScanError.new(line)
145
+ end
146
+ line[0, t.length] = ''
147
+ end
148
+ }
149
+ yield false, false
150
+ end
151
+
152
+ class ScanError < StandardError
153
+ end
154
+
155
+ ---- footer
156
+ end
@@ -0,0 +1,394 @@
1
+ require 'abnf/abnf'
2
+ require 'regexptree'
3
+
4
+ class ABNF
5
+ class TooComplex < StandardError
6
+ end
7
+
8
+ def ABNF.regexp(desc, name=nil)
9
+ ABNF.regexp_tree(desc, name).regexp
10
+ end
11
+
12
+ def ABNF.regexp_tree(desc, name=nil)
13
+ ABNF.parse(desc).regexp_tree(name)
14
+ end
15
+
16
+ def regexp(name=start_symbol)
17
+ regexp_tree(name).regexp
18
+ end
19
+
20
+ # Convert a recursive rule to non-recursive rule if possible.
21
+ # This conversion is *not* perfect.
22
+ # It may fail even if possible.
23
+ # More work (survey) is needed.
24
+ def regexp_tree(name=nil)
25
+ name ||= start_symbol
26
+ env = {}
27
+ each_strongly_connected_component_from(name) {|ns|
28
+ rules = {}
29
+ ns.each {|n|
30
+ rules[n] = @rules[n]
31
+ }
32
+
33
+ resolved_rules = {}
34
+ updated = true
35
+ while updated
36
+ updated = false
37
+ ns.reject! {|n| !rules.include?(n)}
38
+
39
+ rs = {}
40
+ ns.reverse_each {|n|
41
+ e = rules[n]
42
+ if !e
43
+ raise ABNFError.new("no rule defined: #{n}")
44
+ end
45
+ rs[n] = e.recursion(ns, n)
46
+ if rs[n] & OtherRecursion != 0
47
+ raise TooComplex.new("too complex to convert to regexp: #{n} (#{ns.join(', ')})")
48
+ end
49
+ }
50
+
51
+ ns.reverse_each {|n|
52
+ e = rules[n]
53
+ r = rs[n]
54
+ if r & SelfRecursion == 0
55
+ resolved_rules[n] = e
56
+ rules.delete n
57
+ rules.each {|n2, e2| rules[n2] = e2.subst_var {|n3| n3 == n ? e : nil}}
58
+ updated = true
59
+ break
60
+ end
61
+ }
62
+ next if updated
63
+
64
+ # X = Y | a
65
+ # Y = X | b
66
+ # =>
67
+ # Y = Y | a | b
68
+ ns.reverse_each {|n|
69
+ e = rules[n]
70
+ r = rs[n]
71
+ if r & JustRecursion != 0 && r & ~(NonRecursion|JustRecursion) == 0
72
+ e = e.remove_just_recursion(n)
73
+ resolved_rules[n] = e
74
+ rules.delete n
75
+ rules.each {|n2, e2| rules[n2] = e2.subst_var {|n3| n3 == n ? e : nil}}
76
+ updated = true
77
+ break
78
+ end
79
+ }
80
+ next if updated
81
+
82
+ # X = X a | b
83
+ # =>
84
+ # X = b a*
85
+ ns.reverse_each {|n|
86
+ e = rules[n]
87
+ r = rs[n]
88
+ if r & LeftRecursion != 0 && r & ~(NonRecursion|JustRecursion|LeftRecursion|SelfRecursion) == 0
89
+ e = e.remove_left_recursion(n)
90
+ resolved_rules[n] = e
91
+ rules.delete n
92
+ rules.each {|n2, e2| rules[n2] = e2.subst_var {|n3| n3 == n ? e : nil}}
93
+ updated = true
94
+ break
95
+ end
96
+ }
97
+ next if updated
98
+
99
+ # X = a X | b
100
+ # =>
101
+ # X = a* b
102
+ ns.reverse_each {|n|
103
+ e = rules[n]
104
+ r = rs[n]
105
+ if r & RightRecursion != 0 && r & ~(NonRecursion|JustRecursion|RightRecursion|SelfRecursion) == 0
106
+ e = e.remove_right_recursion(n)
107
+ resolved_rules[n] = e
108
+ rules.delete n
109
+ rules.each {|n2, e2| rules[n2] = e2.subst_var {|n3| n3 == n ? e : nil}}
110
+ updated = true
111
+ break
112
+ end
113
+ }
114
+ next if updated
115
+ end
116
+
117
+ if 1 < rules.length
118
+ raise TooComplex.new("too complex to convert to regexp: (#{ns.join(', ')})")
119
+ end
120
+
121
+ if rules.length == 1
122
+ n, e = rules.shift
123
+ r = e.recursion(ns, n)
124
+ if r & OtherRecursion != 0
125
+ raise TooComplex.new("too complex to convert to regexp: #{n} (#{ns.join(', ')})")
126
+ end
127
+ if r == NonRecursion
128
+ resolved_rules[n] = e
129
+ else
130
+ # X = a X | b | X c
131
+ # =>
132
+ # X = a* b c*
133
+ left, middle, right = e.split_recursion(n)
134
+ resolved_rules[n] = Seq.new(Alt.new(left).rep, Alt.new(middle), Alt.new(right).rep)
135
+ end
136
+ end
137
+
138
+ class << resolved_rules
139
+ include TSort
140
+ alias tsort_each_node each_key
141
+ def tsort_each_child(n, &block)
142
+ self[n].each_var {|n2|
143
+ yield n2 if self.include? n2
144
+ }
145
+ end
146
+ end
147
+
148
+ resolved_rules.tsort_each {|n|
149
+ env[n] = resolved_rules[n].subst_var {|n2|
150
+ unless env[n2]
151
+ raise Exception.new("unresolved nonterminal: #{n}") # bug
152
+ end
153
+ env[n2]
154
+ }
155
+ }
156
+ }
157
+ env[name].regexp_tree
158
+ end
159
+
160
+ NonRecursion = 1 # X = a
161
+ JustRecursion = 2 # X = Y
162
+ LeftRecursion = 4 # X = Y a
163
+ RightRecursion = 8 # X = a Y
164
+ SelfRecursion = 16 # Y is X in JustRecursion, LeftRecursion and RightRecursion
165
+ OtherRecursion = 32 # otherwise
166
+
167
+ class Elt
168
+ def remove_left_recursion(n)
169
+ nonrec, rest = split_left_recursion(n)
170
+ Seq.new(nonrec, rest.rep)
171
+ end
172
+
173
+ def remove_right_recursion(n)
174
+ nonrec, rest = split_right_recursion(n)
175
+ Seq.new(rest.rep, nonrec)
176
+ end
177
+ end
178
+
179
+ class Alt
180
+ def recursion(syms, lhs)
181
+ @elts.inject(0) {|r, e| r | e.recursion(syms, lhs)}
182
+ end
183
+
184
+ def remove_just_recursion(n)
185
+ Alt.new(*@elts.map {|e| e.remove_just_recursion(n)})
186
+ end
187
+
188
+ def split_left_recursion(n)
189
+ nonrec = EmptySet
190
+ rest = EmptySet
191
+ @elts.each {|e|
192
+ nonrec1, rest1 = e.split_left_recursion(n)
193
+ nonrec |= nonrec1
194
+ rest |= rest1
195
+ }
196
+ [nonrec, rest]
197
+ end
198
+
199
+ def split_right_recursion(n)
200
+ nonrec = EmptySet
201
+ rest = EmptySet
202
+ @elts.each {|e|
203
+ nonrec1, rest1 = e.split_right_recursion(n)
204
+ nonrec |= nonrec1
205
+ rest |= rest1
206
+ }
207
+ [nonrec, rest]
208
+ end
209
+
210
+ def split_recursion(n)
211
+ rest_left = EmptySet
212
+ nonrec = EmptySet
213
+ rest_right = EmptySet
214
+ @elts.each {|e|
215
+ rest_left1, nonrec1, rest_right1 = e.split_recursion(n)
216
+ rest_left |= rest_left1
217
+ nonrec |= nonrec1
218
+ rest_right |= rest_right1
219
+ }
220
+ [rest_left, nonrec, rest_right]
221
+ end
222
+ end
223
+
224
+ class Seq
225
+ def recursion(syms, lhs)
226
+ case @elts.length
227
+ when 0
228
+ NonRecursion
229
+ when 1
230
+ @elts.first.recursion(syms, lhs)
231
+ else
232
+ (1...(@elts.length-1)).each {|i|
233
+ return OtherRecursion if @elts[i].recursion(syms, lhs) != NonRecursion
234
+ }
235
+
236
+ r_left = @elts.first.recursion(syms, lhs)
237
+ return OtherRecursion if r_left & ~(NonRecursion|JustRecursion|LeftRecursion|SelfRecursion) != 0
238
+ r_left = (r_left & ~JustRecursion) | LeftRecursion if r_left & JustRecursion != 0
239
+
240
+ r_right = @elts.last.recursion(syms, lhs)
241
+ return OtherRecursion if r_right & ~(NonRecursion|JustRecursion|RightRecursion|SelfRecursion) != 0
242
+ r_right = (r_right & ~JustRecursion) | RightRecursion if r_right & JustRecursion != 0
243
+
244
+ if r_left == NonRecursion
245
+ r_right
246
+ elsif r_right == NonRecursion
247
+ r_left
248
+ else
249
+ OtherRecursion
250
+ end
251
+ end
252
+ end
253
+
254
+ def remove_just_recursion(n)
255
+ self
256
+ end
257
+
258
+ def split_left_recursion(n)
259
+ case @elts.length
260
+ when 0
261
+ [self, EmptySet]
262
+ when 1
263
+ @elts.first.split_left_recursion(n)
264
+ else
265
+ nonrec, rest = @elts.first.split_left_recursion(n)
266
+ rest1 = Seq.new(*@elts[1..-1])
267
+ nonrec += rest1
268
+ rest += rest1
269
+ [nonrec, rest]
270
+ end
271
+ end
272
+
273
+ def split_right_recursion(n)
274
+ case @elts.length
275
+ when 0
276
+ [self, EmptySet]
277
+ when 1
278
+ @elts.first.split_right_recursion(n)
279
+ else
280
+ nonrec, rest = @elts.last.split_right_recursion(n)
281
+ rest1 = Seq.new(*@elts[0...-1])
282
+ nonrec = rest1 + nonrec
283
+ rest = rest1 + rest
284
+ [nonrec, rest]
285
+ end
286
+ end
287
+
288
+ def split_recursion(n)
289
+ case @elts.length
290
+ when 0
291
+ [EmptySet, self, EmptySet]
292
+ when 1
293
+ @elts.first.split_recursion(n)
294
+ else
295
+ leftmost_nonrec, leftmost_rest_right = @elts.first.split_left_recursion(n)
296
+ rightmost_nonrec, rightmost_rest_left = @elts.last.split_right_recursion(n)
297
+ rest_middle = Seq.new(*@elts[1...-1])
298
+
299
+ if leftmost_rest_right.empty_set?
300
+ [leftmost_nonrec + rest_middle + rightmost_rest_left,
301
+ leftmost_nonrec + rest_middle + rightmost_nonrec,
302
+ EmptySet]
303
+ elsif rightmost_rest_left.empty_set?
304
+ [EmptySet,
305
+ leftmost_nonrec + rest_middle + rightmost_nonrec,
306
+ leftmost_rest_right + rest_middle + rightmost_nonrec]
307
+ else
308
+ raise Exception.new("non left/right recursion") # bug
309
+ end
310
+ end
311
+ end
312
+
313
+ end
314
+
315
+ class Rep
316
+ def recursion(syms, lhs)
317
+ @elt.recursion(syms, lhs) == NonRecursion ? NonRecursion : OtherRecursion
318
+ end
319
+
320
+ def remove_just_recursion(n)
321
+ self
322
+ end
323
+
324
+ def split_left_recursion(n)
325
+ [self, EmptySet]
326
+ end
327
+ alias split_right_recursion split_left_recursion
328
+
329
+ def split_recursion(n)
330
+ [EmptySet, self, EmptySet]
331
+ end
332
+ end
333
+
334
+ class Term
335
+ def recursion(syms, lhs)
336
+ NonRecursion
337
+ end
338
+
339
+ def remove_just_recursion(n)
340
+ self
341
+ end
342
+
343
+ def split_left_recursion(n)
344
+ [self, EmptySet]
345
+ end
346
+ alias split_right_recursion split_left_recursion
347
+
348
+ def split_recursion(n)
349
+ [EmptySet, self, EmptySet]
350
+ end
351
+ end
352
+
353
+ class Var
354
+ def recursion(syms, lhs)
355
+ if lhs == self.name
356
+ JustRecursion | SelfRecursion
357
+ elsif syms.include? self.name
358
+ JustRecursion
359
+ else
360
+ NonRecursion
361
+ end
362
+ end
363
+
364
+ def remove_just_recursion(n)
365
+ if n == self.name
366
+ EmptySet
367
+ else
368
+ self
369
+ end
370
+ end
371
+
372
+ def split_left_recursion(n)
373
+ if n == self.name
374
+ [EmptySet, EmptySequence]
375
+ else
376
+ [self, EmptySet]
377
+ end
378
+ end
379
+ alias split_right_recursion split_left_recursion
380
+
381
+ def split_recursion(n)
382
+ if n == self.name
383
+ [EmptySet, EmptySet, EmptySet]
384
+ else
385
+ [EmptySet, self, EmptySet]
386
+ end
387
+ end
388
+ end
389
+
390
+ class Alt; def regexp_tree() RegexpTree.alt(*@elts.map {|e| e.regexp_tree}) end end
391
+ class Seq; def regexp_tree() RegexpTree.seq(*@elts.map {|e| e.regexp_tree}) end end
392
+ class Rep; def regexp_tree() @elt.regexp_tree.rep(min, max, greedy) end end
393
+ class Term; def regexp_tree() RegexpTree.charclass(@natset) end end
394
+ end