abnc 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c7c05ad333b974b34b0617122a98072125248024
4
+ data.tar.gz: f0f184042cb6c275da5c848d804e2469fb856909
5
+ SHA512:
6
+ metadata.gz: 910af8ee601a03a3fa18eb2ba21bc25c4db181d03c6d6aa28975aca59b2a1b81da4a6dbf079d7efdcf663b089500f04f083de6073188a2c742a3236fad87c2fb
7
+ data.tar.gz: dc9dea38fd3c4b62ac8e72bdfd8addb93fc51d0df776b45b3ed499c29dd342e147b23ae4d7afc7729c91d8d583321452cffb83139bbad3d3f05e2c607191df36
@@ -0,0 +1,15 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "abnc"
3
+ s.version = "0.1.0"
4
+ s.summary = "RFC 5234+7405 ABNF compiler-let"
5
+ s.description = %q{Shifty support for tools based on IETF's ABNF}
6
+ s.author = "Carsten Bormann"
7
+ s.email = "cabo@tzi.org"
8
+ s.license = "Apache 2.0"
9
+ s.homepage = "http://github.com/cabo/abnc"
10
+ s.has_rdoc = false
11
+ s.files = Dir['lib/**/*.rb'] + %w(abnc.gemspec)
12
+ s.required_ruby_version = '>= 1.9.2'
13
+
14
+ s.require_paths = ["lib"]
15
+ end
@@ -0,0 +1,176 @@
1
+ require 'parse/parser'
2
+ require 'parse/builder'
3
+ require 'parse/ast'
4
+
5
+ module Peggy
6
+
7
+ # Implements the RFC 4234 ABNF, one of several grammars supported.
8
+ #
9
+ # Keep in mind, though, that the ABNF semantics is that of a BNF,
10
+ # i.e., non-deterministic; while the packrat parser underlying peggy
11
+ # is a PEG parser, which cuts decision points once a successful
12
+ # parse is made. You may have to exchange alternatives, e.g., for
13
+ # parsing ABNF itself using ABNF, you have to change RFC 4234's rule
14
+ # repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
15
+ # into
16
+ # repeat = (*DIGIT "*" *DIGIT) / 1*DIGIT
17
+ # as otherwise "1*(...)" will start to parse as the first
18
+ # alternative and never try the second.
19
+
20
+ class ABNF < Builder
21
+
22
+ class ABNFParser < Builder
23
+
24
+ def initialize
25
+ super
26
+ build
27
+ end
28
+
29
+ private
30
+
31
+ def build
32
+ self.ignore_productions = [:ws, :s]
33
+
34
+ grammar{seq{many{prod}; eof}}
35
+ prodname{lit /[A-Za-z][-A-Za-z0-9]*/}
36
+ ws{lit /(?:[ \t\n]|;[^\n]*\n)+/}
37
+ s{opt{ws}}
38
+ prod{seq{prodname; s; lit '='; s; prodalt; s}}
39
+ prodalt{seq{
40
+ prodterm
41
+ many{seq{s; lit '/'; s; prodterm}}
42
+ }}
43
+ prodterm{seq{
44
+ prodatom
45
+ many{
46
+ seq {s; prodatom}
47
+ }
48
+ }}
49
+ prodatom{
50
+ alt {
51
+ numlit
52
+ casese
53
+ seq{opt{lit "%i"}; casein}
54
+ seq{prodname; neg{seq{s; lit '='}}}
55
+ optgroup
56
+ repgroup # XXX: specific repetition is missing
57
+ group
58
+ }
59
+ }
60
+ numlit{alt{
61
+ lit /%x[0-9A-Fa-f][0-9A-Fa-f]([-.][0-9A-Fa-f][0-9A-Fa-f])*/
62
+ lit /%d[0-9]+([-.][0-9]+)*/
63
+ }}
64
+ casein{lit /"[^"]+"/} # "
65
+ casese{lit /%s"[^"]+"/} # "
66
+ optgroup{seq{lit "["; s; prodalt; s; lit "]"}}
67
+ group{seq{lit "("; s; prodalt; s; lit ")"}}
68
+ repgroup{seq{repspec; prodatom}}
69
+ repspec{lit /[0-9]*\*[0-9]*/}
70
+ end
71
+ end # ABNFParser
72
+
73
+ def compile! text, options={}
74
+ reset!
75
+ compiler = ABNFParser.new
76
+ # puts compiler
77
+ # compiler.debug_flag = true
78
+ result = compiler.parse? :grammar, text
79
+ #pp compiler.parse_results
80
+ # raise "Invalid ABNF grammar" unless result
81
+ grammar = compiler.ast? :ignore=>:s #options
82
+ ###puts grammar
83
+ raise "Invalid ABNF grammar at char #{compiler.parse_results.keys.max}" unless result
84
+ grammar.each :prod do |definition|
85
+ send(symbolize(definition.prodname.to_s)) do
86
+ build_prodalt definition.prodalt
87
+ end
88
+ end
89
+ #puts to_s
90
+ end
91
+
92
+ private
93
+
94
+ def symbolize name
95
+ name = name.downcase.gsub(/-/, "_")
96
+ if (Node.methods.include? name)
97
+ name = "p_" + name
98
+ end
99
+ name.to_sym
100
+ end
101
+
102
+ def build_prodalt prodalt
103
+ if prodalt._count(:prodterm) == 1
104
+ build_prodterm prodalt.prodterm
105
+ else
106
+ alt do
107
+ prodalt.each :prodterm do |prodterm|
108
+ build_prodterm prodterm
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ def build_prodterm prodterm
115
+ if prodterm._count(:prodatom) == 1
116
+ build_prodatom prodterm.prodatom
117
+ else
118
+ seq do
119
+ prodterm.each :prodatom do |prodatom|
120
+ build_prodatom prodatom
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ def build_prodatom prodatom
127
+ if c = prodatom.numlit
128
+ /^%([xd])([0-9A-Fa-f]+)(.*)/ =~ c.to_s
129
+ m = {"x" => :hex, "d" => :to_i}[$1];
130
+ r = $2.send(m).chr
131
+ s = $3
132
+ if s != ''
133
+ if s[0..0] == '.'
134
+ r += s[1..-1].split('.').map{ |x| x.send(m).chr}.join('')
135
+ else # XXX: need to barf if more than one...
136
+ t = s[1..-1].send(m).chr
137
+ r = /[#{r}-#{t}]/
138
+ end
139
+ end
140
+ lit r
141
+ elsif c = prodatom.casein
142
+ lit /(?i:#{Regexp.escape(c.to_s[1..-2])})/
143
+ elsif c = prodatom.casese
144
+ lit /#{Regexp.escape(c.to_s[3..-2])}/
145
+ elsif c = prodatom.prodname
146
+ send(symbolize(c.to_s))
147
+ elsif c = prodatom.optgroup
148
+ opt {
149
+ build_prodalt c.prodalt
150
+ }
151
+ elsif c = prodatom.repgroup
152
+ /^([0-9]*)\*([0-9]*)/ =~ c.repspec.to_s
153
+ minr = $1 == "" ? 0 : $1.to_i
154
+ maxr = $2 == "" ? nil : $2.to_i
155
+ case [minr, maxr]
156
+ when [1, nil]
157
+ m = :some
158
+ when [0, nil]
159
+ m = :many
160
+ when [0, 1]
161
+ m = :opt
162
+ else # This needs a better way to access Multiple!
163
+ raise "repgroup -- not implemented: #{c.repspec.to_s}"
164
+ end
165
+ send(m) {
166
+ build_prodatom c.prodatom
167
+ }
168
+ elsif c = prodatom.group
169
+ build_prodalt c.prodalt
170
+ else
171
+ raise "prodatom strangeness"
172
+ end
173
+ end
174
+
175
+ end #ABNF
176
+ end # Peggy
@@ -0,0 +1,226 @@
1
+ # Peggy packrat parster for Ruby
2
+ #
3
+ # ast.rb - Abstract Syntax Tree
4
+ #
5
+ # Copyright (c) 2006 Troy Heninger
6
+ #
7
+ # Peggy is copyrighted free software by Troy Heninger.
8
+ # You can redistribute it and/or modify it under the same terms as Ruby.
9
+
10
+ module Peggy
11
+
12
+ # A node in an Abstract Syntax Tree. Every node in the tree maps to a production
13
+ # found in the parse. You can navigate to the node's parent, first child, or next
14
+ # sibling. Nodes know their range of the source text.
15
+ class Node
16
+ include Enumerable
17
+
18
+ attr_accessor :_name, :_first, :_next, :_parent, :_range, :_source
19
+
20
+ # Constructor
21
+ def initialize name
22
+ self._name = name
23
+ end
24
+
25
+ # Add a child.
26
+ def << child
27
+ child._parent = self
28
+ #puts "#{_name}[first #{_first} last #{_last}] << child #{child._name}"
29
+ if _first
30
+ _last._next = child
31
+ else
32
+ self._first = child
33
+ end
34
+ end
35
+
36
+ # Iterate over each child. If name is supplied only nodes matching the name are iterated.
37
+ def each name=nil
38
+ child = _first
39
+ while child
40
+ yield child if name.nil? || name == child._name
41
+ child = child._next
42
+ end
43
+ end
44
+
45
+ def children name=nil
46
+ a = []
47
+ each(name) do |node|
48
+ a << node
49
+ end
50
+ a
51
+ end
52
+
53
+ # Count the children. If name is supplied only nodes matching the name are counted.
54
+ def _count name=nil
55
+ c = 0
56
+ each do |node|
57
+ c += 1 if name.nil? || name == node._name
58
+ end
59
+ c
60
+ end
61
+
62
+ # Get the number of nodes up to the root.
63
+ def _depth
64
+ depth = 0
65
+ node = self
66
+ depth += 1 while node=node._parent
67
+ depth
68
+ end
69
+
70
+ # Get the root node.
71
+ def _root
72
+ node = self
73
+ while (n2 = node._parent)
74
+ node = n2
75
+ end
76
+ node
77
+ end
78
+
79
+ # Get an option set when tree was created.
80
+ def _option option, default=nil
81
+ options = _root._options
82
+ return nil unless options
83
+ options[option] || options[option.to_sym] || default
84
+ end
85
+
86
+ # Get the length of the range.
87
+ def _length
88
+ _range.last - _range.first
89
+ end
90
+
91
+ # Get some or all of the source text covered by this node, depending on the length.
92
+ def _sample
93
+ return nil if _length == 0
94
+ str = _source[_range]
95
+ (str.length >= 40 ? str[0, 37] + '...' : str).inspect
96
+ end
97
+
98
+ # Format the node pretty printing.
99
+ def _format
100
+ result = "#{' '*_depth}#{_name} #{_sample}\n"
101
+ each do |node|
102
+ result << node._format
103
+ end
104
+ result
105
+ end
106
+
107
+ # Get the last child.
108
+ def _last
109
+ node = _first
110
+ return nil unless node
111
+ while (n2 = node._next)
112
+ node = n2
113
+ end
114
+ node
115
+ end
116
+
117
+ # Get the contents for inspection.
118
+ def inspect
119
+ "#{_name ? _name : self.class}[#{_range}] #{to_s.inspect}"
120
+ end
121
+
122
+ # Get the source text minus any ignored nodes.
123
+ def _strip
124
+ return @str if @str
125
+ str0 = str = _source[_range]
126
+ return @str = str unless (ignore = _option :ignore) && _first
127
+ remove = find_all{|node| node._name == ignore}
128
+ remove.reverse_each do |node|
129
+ from = node._range.first - _range.first
130
+ str = str[0, from] + str[from + node._length..-1]
131
+ end
132
+ # puts "before #{str0.inspect}, after #{str.inspect}" unless remove.empty?
133
+ @str = str
134
+ end
135
+
136
+ # Get the source text covered by this node.
137
+ def to_s
138
+ _source[_range]
139
+ end
140
+
141
+ # Get the stripped text as a Symbol.
142
+ def to_sym
143
+ _strip.to_sym
144
+ end
145
+
146
+ # Get the first node of the given name as a Symbol.
147
+ def [] name
148
+ method_missing name.to_sym
149
+ end
150
+
151
+ def method_missing name, *args
152
+ find {|node| name == node._name}
153
+ end
154
+
155
+ end
156
+
157
+ # The root node of an Abstract Syntax Tree. Every node in the tree maps to a production
158
+ # found in the parse.
159
+ class AST < Node
160
+
161
+ attr_reader :_options
162
+
163
+ def initialize source, results, options={}
164
+ super nil
165
+ @results = results
166
+ @_options = options
167
+ @ignore = Array(options[:ignore]) # XXX: turn to set
168
+ self._source = source
169
+ build_left nil, 0, self
170
+ end
171
+
172
+ def to_s
173
+ _format
174
+ end
175
+
176
+ private
177
+
178
+ def build_left parent, index, node=nil
179
+ result = parent ? parent._range.last : index
180
+ row = @results[index]
181
+ return result unless row
182
+ order = row[:found_order]
183
+ return result unless order
184
+ order.reverse_each do |name|
185
+ continue if @ignore.include? name
186
+ to = row[name]
187
+ if node
188
+ node._name = name
189
+ else
190
+ node = Node.new name
191
+ end
192
+ node._range = index...to
193
+ node._source = _source
194
+ #puts "Built #{node.to_s}"
195
+ parent << node if parent
196
+ build_children parent, to if parent && to > index && to < parent._range.last
197
+ parent = node
198
+ node = nil
199
+ end
200
+ result
201
+ end
202
+
203
+ def build_children parent, index
204
+ while index < parent._range.last
205
+ i2 = build_left parent, index
206
+ break if i2 <= index
207
+ index = i2
208
+ end
209
+ end
210
+
211
+ end
212
+
213
+ class Parser
214
+
215
+ # Create an Abstract Syntax Tree from the parse results. You must call parse?() prior to
216
+ # this. Valid options:
217
+ # * :ignore=>[symbol of element to ignore]
218
+ def ast? options={}
219
+ ast = AST.new source_text, parse_results, options
220
+ #puts ast
221
+ ast
222
+ end
223
+
224
+ end
225
+
226
+ end # Peggy
@@ -0,0 +1,460 @@
1
+ # Peggy packrat parser for Ruby
2
+ #
3
+ # builder.rb - parser builder
4
+ #
5
+ # Copyright (c) 2006 Troy Heninger
6
+ #
7
+ # Peggy is copyrighted free software by Troy Heninger.
8
+ # You can redistribute it and/or modify it under the same terms as Ruby.
9
+
10
+ require 'parse/parser'
11
+
12
+ module Peggy
13
+
14
+ # Base syntax element class.
15
+ class Element
16
+ # Create an element.
17
+ def self::build *args
18
+ new *args
19
+ end
20
+
21
+ # Test to see if there is a match of this element at the current index.
22
+ # Return's the index following if match is found, or NO_MATCH if not
23
+ def match parser, index
24
+ raise "Must override match"
25
+ end
26
+
27
+ # Used for debugging.
28
+ def report index
29
+ # puts "#{to_s} #{index}"
30
+ index
31
+ end
32
+ end
33
+
34
+ # An element with a single child element.
35
+ module OneChild
36
+ # The single child
37
+ attr_accessor :child
38
+
39
+ # synonym for child=(element)
40
+ alias :<< :child=
41
+
42
+ # Convert to String.
43
+ def to_s
44
+ wrap
45
+ end
46
+
47
+ # Enclose child in parentheses if appropriate.
48
+ def wrap
49
+ result = child.respond_to?(:each) ? "(#{child})" : child.to_s
50
+ end
51
+ end
52
+
53
+ # An element that matches a sequence of elements. All must match for the sequence to match.
54
+ class Sequence < Element
55
+ # Add a child element.
56
+ def add element
57
+ @list = [] unless @list
58
+ @list << element
59
+ end
60
+
61
+ # Synonym for add(element)
62
+ alias :<< :add
63
+
64
+ # Reference a child by index.
65
+ def [] index
66
+ @list[index]
67
+ end
68
+
69
+ # Child iterator.
70
+ def each &blk
71
+ @list.each &blk
72
+ end
73
+
74
+ # Match each child in sequence. If any fail this returns NO_MATCH. If all succeed this
75
+ # returns the end index of the last.
76
+ def match parser, index
77
+ raise "no children added to sequence" unless @list
78
+ each do |element|
79
+ index = element.match parser, index
80
+ return NO_MATCH unless index
81
+ end
82
+ report index
83
+ end
84
+
85
+ # Convert element to String.
86
+ def to_s
87
+ @list.map{|el| el.to_s}.join ' '
88
+ end
89
+ end
90
+
91
+ # An element which matches any one of its children. The children are tested in order. The first
92
+ # to match wins.
93
+ class Alternatives < Sequence
94
+ # Match any one of the children. The children are tried in order. The first to match wins.
95
+ # The result is the end index of the first matching child. If none match this returns NO_MATCH.
96
+ def match parser, index
97
+ raise "no children added to alternate" unless @list
98
+ each do |element|
99
+ found = element.match parser, index
100
+ return report(found) if found
101
+ end
102
+ report NO_MATCH
103
+ end
104
+
105
+ # Convert element to String.
106
+ def to_s
107
+ @list.map{|el| el.to_s}.join ' | '
108
+ end
109
+ end
110
+
111
+ # An element which tries its single child multiple times. It is greedy, meaning it will continue
112
+ # to match as long as possible, unless the range specifies a maximum number of matches.
113
+ class Multiple < Element
114
+ include OneChild
115
+
116
+ # A big number
117
+ MANY = 32767
118
+ # The minimum and maximum number of tries
119
+ attr_accessor :range
120
+
121
+ # Init the range
122
+ def initialize range
123
+ @range = range
124
+ end
125
+
126
+ # Matches the child multiple times. The range specifies the least and most number of matches.
127
+ # If the number of matches is less than the minimim of the range then NO_MATCH is returned.
128
+ # If equal or more than the minimim then the end index of the last match is returned.
129
+ def match parser, index
130
+ raise "multiple element child not set" unless child
131
+ raise "multiple element range not set" unless range
132
+ count = 0
133
+ while count < range.last
134
+ found = child.match parser, index
135
+ break unless found
136
+ index = found
137
+ count += 1
138
+ end
139
+ report range === count ? index : NO_MATCH
140
+ end
141
+
142
+ # Convert element to String.
143
+ def to_s
144
+ "#{wrap}{#{range.min}..#{range.max}}"
145
+ end
146
+ end
147
+
148
+ # Matcher of 0 or more times.
149
+ class AnyNumber < Multiple
150
+ def initialize
151
+ super 0..MANY
152
+ end
153
+
154
+ # Convert element to String.
155
+ def to_s
156
+ "#{wrap}*"
157
+ end
158
+ end
159
+
160
+ # Matcher of 1 or more times.
161
+ class AtLeastOne < Multiple
162
+ def initialize
163
+ super 1..MANY
164
+ end
165
+
166
+ # Convert element to String.
167
+ def to_s
168
+ "#{wrap}+"
169
+ end
170
+ end
171
+
172
+ # Matcher of 0 or 1 time.
173
+ class Optional < Multiple
174
+ def initialize
175
+ super 0..1
176
+ end
177
+
178
+ # Convert element to String.
179
+ def to_s
180
+ "#{wrap}?"
181
+ end
182
+ end
183
+
184
+ # An element which tries its single child but does not advance the index if found.
185
+ # Predicates control parse decisions.
186
+ class Predicate < Element
187
+ include OneChild
188
+ end
189
+
190
+ # Positive Predicate.
191
+ # If found the original index is returned. If not NO_MATCH is returned.
192
+ class Positive < Predicate
193
+
194
+ # Matches the child once. If found the original index is returned.
195
+ # If not found NO_MATCH is returned.
196
+ def match parser, index
197
+ raise "positive element child not set" unless child
198
+ found = child.match parser, index
199
+ found ? index : NO_MATCH
200
+ end
201
+
202
+ # Convert element to String.
203
+ def to_s
204
+ "&#{wrap}"
205
+ end
206
+ end
207
+
208
+ # Negative Predicate.
209
+ # If not found the original index is returned. If found NO_MATCH is returned.
210
+ class Negative < Predicate
211
+
212
+ # Matches the child once. If not found the original index is returned.
213
+ # If found NO_MATCH is returned.
214
+ def match parser, index
215
+ raise "negative element child not set" unless child
216
+ found = child.match parser, index
217
+ found ? NO_MATCH : index
218
+ end
219
+
220
+ # Convert element to String.
221
+ def to_s
222
+ "!#{wrap}"
223
+ end
224
+ end
225
+
226
+ # Match another production in the grammar.
227
+ class Reference < Element
228
+ # The name of the production to lookup and match.
229
+ attr_reader :name
230
+
231
+ # Init the name
232
+ def initialize name=nil
233
+ self.name = name
234
+ end
235
+
236
+ # Set the name of production to match.
237
+ def name= value
238
+ @name = value.to_sym
239
+ end
240
+
241
+ # Match the entire production from the parser grammar. If it matches
242
+ # the end index is returned. If not, NO_MATCH is returned.
243
+ def match parser, index
244
+ raise "reference name not set" unless name
245
+ parser.match? name, index
246
+ end
247
+
248
+ # Convert element to String.
249
+ def to_s
250
+ @name
251
+ end
252
+ end
253
+
254
+ # Matcher of a grammar production. The one and only child defines the production.
255
+ class Production < Reference
256
+ include OneChild
257
+
258
+ # The production definition.
259
+ attr_accessor :child
260
+
261
+ # Init the name and child.
262
+ def initialize name=nil, child=nil
263
+ super name
264
+ @child = child
265
+ end
266
+
267
+ # Synonym of child=(element)
268
+ alias :<< :child=
269
+
270
+ # Match the production one time. If it matches the end index is returned. If not,
271
+ # NO_MATCH is returned.
272
+ def match parser, index
273
+ raise "production name not set" unless name
274
+ raise "production child not set" unless child
275
+ report @child.match(parser, index)
276
+ end
277
+
278
+ # Convert element to String.
279
+ def to_s
280
+ "#{name}: #{child}"
281
+ end
282
+ end
283
+
284
+ # Matcher of a literal string or regular expression.
285
+ class Literal < Element
286
+ # Value to match.
287
+ attr_reader :value
288
+
289
+ # Init the value.
290
+ def initialize value=nil
291
+ @value = value
292
+ end
293
+
294
+ # Set the value to match.
295
+ def value= literal
296
+ # Make sure regular expressions check at the beginnig of the string
297
+ literal = correct_regexp literal if literal.is_a? Regexp
298
+ @value = literal
299
+ end
300
+
301
+ # Match the literal value. If it matches the end index is returned.
302
+ # If no, NO_MATCH is returned.
303
+ def match parser, index
304
+ report parser.literal?(value, index)
305
+ end
306
+
307
+ # Convert element to String.
308
+ def to_s
309
+ value.inspect
310
+ end
311
+ end
312
+
313
+ # Parser builder. The built in methods create syntax elements. Any other
314
+ # method called on this object create references to production, or actual
315
+ # productions, if called at the top level.
316
+ # Todo: Change to a class and separate from Parser.
317
+ class Builder < Parser
318
+ # Productions to build
319
+ attr_reader :productions
320
+ # Current parent being built
321
+ attr_reader :parent
322
+
323
+ # Constructor
324
+ def initialize
325
+ reset!
326
+ end
327
+
328
+ # Clear the parser and prepare it for a new parse.
329
+ def reset!
330
+ @building = true
331
+ @productions = {}
332
+ end
333
+
334
+ # Reference a production by its name index.
335
+ def [] index
336
+ productions[index]
337
+ end
338
+
339
+ # Create a production if at the top level, or a reference to a production a
340
+ # production is being built.
341
+ def method_missing name, *args
342
+ if @building
343
+ if @parent
344
+ ref = Reference.new name
345
+ @parent << ref
346
+ elsif block_given?
347
+ prod = Production.new name
348
+ @parent = prod
349
+ yield
350
+ @parent = nil
351
+ @productions[name] = prod
352
+ else
353
+ super
354
+ end
355
+ else
356
+ prod = @productions[name]
357
+ # pp name.inspect, @productions.keys unless prod
358
+ super unless prod
359
+ # puts "matching #{name} at #{args.first}"
360
+ prod.match self, args.first
361
+ end
362
+ end
363
+
364
+ # Build an Alternatives element.
365
+ def alt &blk
366
+ build_piece Alternatives, blk
367
+ end
368
+ # Synonym for alt().
369
+ alias :one :alt
370
+
371
+ # Build or match the end of file element. If currently building, a Reference to eof
372
+ # is built. Otherwise eof is matched.
373
+ def eof *args
374
+ if @building
375
+ method_missing :eof, *args
376
+ else
377
+ super args.first
378
+ end
379
+ end
380
+
381
+ # Build a Sequence element.
382
+ def seq &blk
383
+ build_piece Sequence, blk
384
+ end
385
+ # Synonym for each()
386
+ alias :each :seq
387
+
388
+ # Add an Literal element to the parent.
389
+ def lit *values
390
+ if values.size == 1
391
+ build_piece Literal, nil, values.first
392
+ else
393
+ one{
394
+ for v in values
395
+ build_piece Literal, nil, v
396
+ end
397
+ }
398
+ end
399
+ end
400
+
401
+ # Build an AnyNumber element.
402
+ def many &blk
403
+ build_piece AnyNumber, blk
404
+ end
405
+
406
+ # Build an Optional element.
407
+ def opt &blk
408
+ build_piece Optional, blk
409
+ end
410
+
411
+ # Build an AtLeastOne element.
412
+ def some &blk
413
+ build_piece AtLeastOne, blk
414
+ end
415
+
416
+ # Build a negative predicate. Use when you want to make sure the enclosed element is not present.
417
+ # The cursor is not advanced for predicates.
418
+ def neg &blk
419
+ build_piece Negative, blk
420
+ end
421
+
422
+ # Build a positive predicate. Use when you want to make sure the enclosed element is present.
423
+ # If matched the cursor is not advanced.
424
+ def pos &blk
425
+ build_piece Positive, blk
426
+ end
427
+
428
+ # Invokes the parser from the beginning of the source on the given production goal.
429
+ # You may provide the source here or you can set source_text prior to calling.
430
+ # If index is provided the parser will ignore characters previous to it.
431
+ def parse? goal, source=nil, index=0
432
+ @building = nil
433
+ super
434
+ end
435
+
436
+ # Convert productions to Peggy grammar. This is notable to out put any Ruby parse methods,
437
+ # only grammars built with Builder methods.
438
+ def to_s
439
+ productions.values.join "\n"
440
+ end
441
+
442
+ private
443
+
444
+ # Add an object of klass to the parent and yield to its block. If
445
+ # value is specified it is passed to the klass constructor.
446
+ def build_piece klass, blk=nil, value=nil
447
+ # puts "building #{klass.name} with #{value.inspect}"
448
+ elem = value ? klass.new(value) : klass.new
449
+ @parent << elem
450
+ if blk
451
+ parent = @parent
452
+ @parent = elem
453
+ blk.call
454
+ @parent = parent
455
+ end
456
+ end
457
+
458
+ end # Builder
459
+
460
+ end # Peggy
@@ -0,0 +1,252 @@
1
+ # Peggy packrat parster for Ruby
2
+ #
3
+ # parser.rb - packrat parser
4
+ #
5
+ # Copyright (c) 2006 Troy Heninger
6
+ #
7
+ # Peggy is copyrighted free software by Troy Heninger.
8
+ # You can redistribute it and/or modify it under the same terms as Ruby.
9
+
10
+ require 'pp'
11
+
12
+ # Peggy is a packrat parsing engine. Packrat parsers memoize every production so that
13
+ # parses can happen in linear time. No production needs to be processed more than once for
14
+ # a given position of the source. See http://pdos.csail.mit.edu/~baford/packrat/ for
15
+ # more details.
16
+ #
17
+ # Peggy also incorporates Parsing Expression Grammar (PEG) as proposed by Bryan Ford,
18
+ # as one of several input grammars. PEG is a formalized grammar specification needing
19
+ # no separate lexer/scanner step. See http://pdos.csail.mit.edu/~baford/packrat/popl04/
20
+ #
21
+ # As good as packrat parsers are, they have a few limitations. They cannot handle left
22
+ # recursion of a production, meaning a production cannot reference itself as the first
23
+ # element in a sequence. Also memoizing of production results means than memory consumption
24
+ # increasses with the size of the source being parsed. This is not usually a concern, execpt
25
+ # when attempting to parse multi-megabyte source files, such as a huge XML database.
26
+ module Peggy
27
+
28
+ # Returned when a production did not match
29
+ NO_MATCH = false
30
+ # Used to prevent infinite (left) recursions
31
+ IN_USE = true
32
+
33
+ # class OrderedHash < Hash
34
+ # alias_method :store, :[]=
35
+ # alias_method :each_pair, :each
36
+ #
37
+ # def initialize
38
+ # @keys = []
39
+ # super
40
+ # end
41
+ #
42
+ # def []=(key, val)
43
+ # @keys << key
44
+ # super
45
+ # end
46
+ #
47
+ # def delete(key)
48
+ # @keys.delete(key)
49
+ # super
50
+ # end
51
+ #
52
+ # def each
53
+ # @keys.sort.each { |k| yield k, self[k] }
54
+ # end
55
+ #
56
+ # def each_key
57
+ # @keys.sort.each { |k| yield k }
58
+ # end
59
+ #
60
+ # def each_value
61
+ # @keys.sort.each { |k| yield self[k] }
62
+ # end
63
+ # end
64
+
65
+ # Packrat parser class. Note all methods have a trailing exclamation (!) or question
66
+ # mark (?), or have long names with underscores (_). This is because productions are
67
+ # methods and we need to avoid name collisions. To use this class you must subclass
68
+ # Parser and provide your productions as methods. Your productions must call match?
69
+ # or one of the protected convenience routines to perform parsing. Productions must
70
+ # never call another production directly, or results will not get memoized and you
71
+ # will slow down your parse conciderably, and possibly risk getting into an infinite
72
+ # recursion (until the stack blows its top). Note, as a conveience in writting
73
+ # productions, you can call any match? function multiple times, passing each returned
74
+ # index, such as in a sequence, without checking the results of each production.
75
+ class Parser
76
+
77
+ # Tells parser to print intermediate results if set.
78
+ attr_accessor :debug_flag
79
+
80
+ # The source to parse, can be set prior to calling parse!().
81
+ attr_accessor :source_text
82
+
83
+ # The results of the parse. A hash (keys of indexs) of hashes (keys of production
84
+ # symbols and values of end indexes.
85
+ attr_reader :parse_results
86
+
87
+ # The productions to ignore.
88
+ attr_accessor :ignore_productions
89
+
90
+ # Return a range (or character) of the source_text.
91
+ def [] range
92
+ raise "source_text not set" if source_text.nil?
93
+ source_text[range]
94
+ end
95
+
96
+ # Invokes the parser from the beginning of the source on the given production goal.
97
+ # You may provide the source here or you can set source_text prior to calling.
98
+ # If index is provided the parser will ignore characters previous to it.
99
+ def parse? goal, source = nil, index = 0
100
+ self.source_text = source unless source.nil?
101
+ # Hash of automatic hashes
102
+ @parse_results = Hash.new {|h1, k1| h1[k1] = {}} # OrderedHash.new {|h1, k1| h1[k1] = {}}
103
+ @keys = nil
104
+ index = match? goal, index
105
+ pp(parse_results) if debug_flag
106
+ index
107
+ end
108
+
109
+ # Queries the parse results for a heirarchy of production matches. An array of
110
+ # index ranges is returned, or an empny array if none are found. This can only be
111
+ # called after parse_results have been set by a parse.
112
+ def query? *args
113
+ raise "You must first call parse!" unless parse_results
114
+ @keys = @parse_results.keys.sort unless @keys
115
+ found_list = []
116
+ index = 0
117
+ args.each do |arg|
118
+ index = find? arg, index
119
+ end
120
+ end
121
+
122
+ # Try to match a production from the given index. Returns the end index if found
123
+ # or start index if not found.
124
+ def allow? goal, index
125
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
126
+ found = match? goal, index
127
+ found == NO_MATCH ? index : found
128
+ end
129
+
130
+ # Try to match a production from the given index then backtrack. Returns index if
131
+ # found or NO_MATCH if not.
132
+ def check? goal, index
133
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
134
+ found = match? goal, index
135
+ found == NO_MATCH ? NO_MATCH : index
136
+ end
137
+
138
+ # Try not to match a production from the given index then backtrack. Returns index
139
+ # if not found or NO_MATCH if found.
140
+ def dissallow? goal, index
141
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
142
+ found = match? goal, index
143
+ found == NO_MATCH ? index : NO_MATCH
144
+ end
145
+
146
+ # Special production that only matches the end of source_text. Note, this function
147
+ # does not end in (?) or (!) because it is meant be used as a normal production.
148
+ def eof index
149
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
150
+ index >= source_text.length ? index : NO_MATCH
151
+ end
152
+
153
+ # Match a production from the given index. Returns the end index if found or NO_MATCH
154
+ # if not found.
155
+ def match? goal, index
156
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
157
+ index = ignore? index unless @ignoring
158
+ goal = goal.to_sym
159
+ position = parse_results[index]
160
+ found = position.fetch(goal) do
161
+ position[goal] = IN_USE # used to prevent inifinite recursion in case user attemts
162
+ # a left recursion
163
+ _memoize goal, index, send(goal, index), position
164
+ end
165
+ puts "found #{goal} at #{index}...#{found} #{source_text[index...found].inspect}" if found && debug_flag
166
+ raise "Parser cannot handle infinite (left) recursions. Please rewrite usage of '#{goal}'." if found == IN_USE
167
+ found
168
+ end
169
+
170
+ # Record the results of the parse in the parse_results memo.
171
+ def _memoize goal, index, result, position = parse_results[index]
172
+ if result
173
+ position[:found_order] = [] unless position.has_key?(:found_order)
174
+ position[:found_order] << goal
175
+ position[goal.to_s] = source_text[index...result] if result - index < 40 && goal.is_a?(Symbol)
176
+ end
177
+ position[goal] = result if result || goal.is_a?(Symbol)
178
+ result
179
+ end
180
+
181
+ # Match tokens that should be ignored. Used by match?(). Returns end index if found
182
+ # or start index if not found. Subclasses should override this method if they wish
183
+ # to ignore other text, such as comments.
184
+ def ignore? index
185
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
186
+ return index if @ignoring || ignore_productions.nil?
187
+ @ignoring = true
188
+ ignore_productions.each do |prod|
189
+ index = allow? prod, index
190
+ end
191
+ @ignoring = nil
192
+ index
193
+ end
194
+
195
+ # Match a literal string or regular expression from the given index. Returns
196
+ # the end index if found or NO_MATCH if not found.
197
+ def literal? value, index
198
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
199
+ case value
200
+ when String
201
+ string? value, index
202
+ when Regexp
203
+ regexp? value, index
204
+ else
205
+ raise "Unknown literal: #{value.inspect}"
206
+ end
207
+ end
208
+
209
+ # Match a string from the given index. Returns the end index if found
210
+ # or NO_MATCH if not found.
211
+ def string? value, index
212
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
213
+ value = value.to_s
214
+ index = ignore? index unless @ignoring
215
+ i2 = index + value.length
216
+ # puts source_text[index...i2].inspect + ' ' + value.inspect
217
+ _memoize(value, index, source_text[index...i2] == value ? i2 : NO_MATCH)
218
+ end
219
+
220
+ # Match a regular expression from the given index. Returns the end index
221
+ # if found or NO_MATCH if not found.
222
+ def regexp? value, index
223
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
224
+ value = correct_regexp! value
225
+ index = ignore? index unless @ignoring
226
+ found = value.match source_text[index..-1]
227
+ # puts "#{value.inspect} ~= #{found[0].inspect}" if found
228
+ _memoize(value, index, found ? found.end(0) + index : NO_MATCH)
229
+ end
230
+
231
+ # Make sure regular expressions match the beginning of the string, actually from
232
+ # the string from the given index.
233
+ def correct_regexp! re
234
+ source = re.source
235
+ source[0..1] == '\\A' ? re : Regexp.new("\\A(#{source})", re.options)
236
+ end
237
+
238
+ protected
239
+
240
+ # Create an index of the parse results. Todo: unfinished.
241
+ def index_results!
242
+ raise "You must first call parse!" unless parse_results
243
+ @index = new Hash {|h, k| h[k] = []}
244
+ parse_results.each_pair do |index, prod_map|
245
+ prod_map[:found_order].reverse_each
246
+ prod_map.each_value
247
+ @index[prod]
248
+ end
249
+ end
250
+ end # Parser
251
+
252
+ end # Peggy
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: abnc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Carsten Bormann
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Shifty support for tools based on IETF's ABNF
14
+ email: cabo@tzi.org
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - abnc.gemspec
20
+ - lib/abnc.rb
21
+ - lib/parse/ast.rb
22
+ - lib/parse/builder.rb
23
+ - lib/parse/parser.rb
24
+ homepage: http://github.com/cabo/abnc
25
+ licenses:
26
+ - Apache 2.0
27
+ metadata: {}
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 1.9.2
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 2.4.5
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: RFC 5234+7405 ABNF compiler-let
48
+ test_files: []
49
+ has_rdoc: false