abnc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c7c05ad333b974b34b0617122a98072125248024
4
+ data.tar.gz: f0f184042cb6c275da5c848d804e2469fb856909
5
+ SHA512:
6
+ metadata.gz: 910af8ee601a03a3fa18eb2ba21bc25c4db181d03c6d6aa28975aca59b2a1b81da4a6dbf079d7efdcf663b089500f04f083de6073188a2c742a3236fad87c2fb
7
+ data.tar.gz: dc9dea38fd3c4b62ac8e72bdfd8addb93fc51d0df776b45b3ed499c29dd342e147b23ae4d7afc7729c91d8d583321452cffb83139bbad3d3f05e2c607191df36
@@ -0,0 +1,15 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "abnc"
3
+ s.version = "0.1.0"
4
+ s.summary = "RFC 5234+7405 ABNF compiler-let"
5
+ s.description = %q{Shifty support for tools based on IETF's ABNF}
6
+ s.author = "Carsten Bormann"
7
+ s.email = "cabo@tzi.org"
8
+ s.license = "Apache 2.0"
9
+ s.homepage = "http://github.com/cabo/abnc"
10
+ s.has_rdoc = false
11
+ s.files = Dir['lib/**/*.rb'] + %w(abnc.gemspec)
12
+ s.required_ruby_version = '>= 1.9.2'
13
+
14
+ s.require_paths = ["lib"]
15
+ end
@@ -0,0 +1,176 @@
1
+ require 'parse/parser'
2
+ require 'parse/builder'
3
+ require 'parse/ast'
4
+
5
+ module Peggy
6
+
7
+ # Implements the RFC 4234 ABNF, one of several grammars supported.
8
+ #
9
+ # Keep in mind, though, that the ABNF semantics is that of a BNF,
10
+ # i.e., non-deterministic; while the packrat parser underlying peggy
11
+ # is a PEG parser, which cuts decision points once a successful
12
+ # parse is made. You may have to exchange alternatives, e.g., for
13
+ # parsing ABNF itself using ABNF, you have to change RFC 4234's rule
14
+ # repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
15
+ # into
16
+ # repeat = (*DIGIT "*" *DIGIT) / 1*DIGIT
17
+ # as otherwise "1*(...)" will start to parse as the first
18
+ # alternative and never try the second.
19
+
20
+ class ABNF < Builder
21
+
22
+ class ABNFParser < Builder
23
+
24
+ def initialize
25
+ super
26
+ build
27
+ end
28
+
29
+ private
30
+
31
+ def build
32
+ self.ignore_productions = [:ws, :s]
33
+
34
+ grammar{seq{many{prod}; eof}}
35
+ prodname{lit /[A-Za-z][-A-Za-z0-9]*/}
36
+ ws{lit /(?:[ \t\n]|;[^\n]*\n)+/}
37
+ s{opt{ws}}
38
+ prod{seq{prodname; s; lit '='; s; prodalt; s}}
39
+ prodalt{seq{
40
+ prodterm
41
+ many{seq{s; lit '/'; s; prodterm}}
42
+ }}
43
+ prodterm{seq{
44
+ prodatom
45
+ many{
46
+ seq {s; prodatom}
47
+ }
48
+ }}
49
+ prodatom{
50
+ alt {
51
+ numlit
52
+ casese
53
+ seq{opt{lit "%i"}; casein}
54
+ seq{prodname; neg{seq{s; lit '='}}}
55
+ optgroup
56
+ repgroup # XXX: specific repetition is missing
57
+ group
58
+ }
59
+ }
60
+ numlit{alt{
61
+ lit /%x[0-9A-Fa-f][0-9A-Fa-f]([-.][0-9A-Fa-f][0-9A-Fa-f])*/
62
+ lit /%d[0-9]+([-.][0-9]+)*/
63
+ }}
64
+ casein{lit /"[^"]+"/} # "
65
+ casese{lit /%s"[^"]+"/} # "
66
+ optgroup{seq{lit "["; s; prodalt; s; lit "]"}}
67
+ group{seq{lit "("; s; prodalt; s; lit ")"}}
68
+ repgroup{seq{repspec; prodatom}}
69
+ repspec{lit /[0-9]*\*[0-9]*/}
70
+ end
71
+ end # ABNFParser
72
+
73
+ def compile! text, options={}
74
+ reset!
75
+ compiler = ABNFParser.new
76
+ # puts compiler
77
+ # compiler.debug_flag = true
78
+ result = compiler.parse? :grammar, text
79
+ #pp compiler.parse_results
80
+ # raise "Invalid ABNF grammar" unless result
81
+ grammar = compiler.ast? :ignore=>:s #options
82
+ ###puts grammar
83
+ raise "Invalid ABNF grammar at char #{compiler.parse_results.keys.max}" unless result
84
+ grammar.each :prod do |definition|
85
+ send(symbolize(definition.prodname.to_s)) do
86
+ build_prodalt definition.prodalt
87
+ end
88
+ end
89
+ #puts to_s
90
+ end
91
+
92
+ private
93
+
94
+ def symbolize name
95
+ name = name.downcase.gsub(/-/, "_")
96
+ if (Node.methods.include? name)
97
+ name = "p_" + name
98
+ end
99
+ name.to_sym
100
+ end
101
+
102
+ def build_prodalt prodalt
103
+ if prodalt._count(:prodterm) == 1
104
+ build_prodterm prodalt.prodterm
105
+ else
106
+ alt do
107
+ prodalt.each :prodterm do |prodterm|
108
+ build_prodterm prodterm
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ def build_prodterm prodterm
115
+ if prodterm._count(:prodatom) == 1
116
+ build_prodatom prodterm.prodatom
117
+ else
118
+ seq do
119
+ prodterm.each :prodatom do |prodatom|
120
+ build_prodatom prodatom
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+ def build_prodatom prodatom
127
+ if c = prodatom.numlit
128
+ /^%([xd])([0-9A-Fa-f]+)(.*)/ =~ c.to_s
129
+ m = {"x" => :hex, "d" => :to_i}[$1];
130
+ r = $2.send(m).chr
131
+ s = $3
132
+ if s != ''
133
+ if s[0..0] == '.'
134
+ r += s[1..-1].split('.').map{ |x| x.send(m).chr}.join('')
135
+ else # XXX: need to barf if more than one...
136
+ t = s[1..-1].send(m).chr
137
+ r = /[#{r}-#{t}]/
138
+ end
139
+ end
140
+ lit r
141
+ elsif c = prodatom.casein
142
+ lit /(?i:#{Regexp.escape(c.to_s[1..-2])})/
143
+ elsif c = prodatom.casese
144
+ lit /#{Regexp.escape(c.to_s[3..-2])}/
145
+ elsif c = prodatom.prodname
146
+ send(symbolize(c.to_s))
147
+ elsif c = prodatom.optgroup
148
+ opt {
149
+ build_prodalt c.prodalt
150
+ }
151
+ elsif c = prodatom.repgroup
152
+ /^([0-9]*)\*([0-9]*)/ =~ c.repspec.to_s
153
+ minr = $1 == "" ? 0 : $1.to_i
154
+ maxr = $2 == "" ? nil : $2.to_i
155
+ case [minr, maxr]
156
+ when [1, nil]
157
+ m = :some
158
+ when [0, nil]
159
+ m = :many
160
+ when [0, 1]
161
+ m = :opt
162
+ else # This needs a better way to access Multiple!
163
+ raise "repgroup -- not implemented: #{c.repspec.to_s}"
164
+ end
165
+ send(m) {
166
+ build_prodatom c.prodatom
167
+ }
168
+ elsif c = prodatom.group
169
+ build_prodalt c.prodalt
170
+ else
171
+ raise "prodatom strangeness"
172
+ end
173
+ end
174
+
175
+ end #ABNF
176
+ end # Peggy
@@ -0,0 +1,226 @@
1
+ # Peggy packrat parster for Ruby
2
+ #
3
+ # ast.rb - Abstract Syntax Tree
4
+ #
5
+ # Copyright (c) 2006 Troy Heninger
6
+ #
7
+ # Peggy is copyrighted free software by Troy Heninger.
8
+ # You can redistribute it and/or modify it under the same terms as Ruby.
9
+
10
+ module Peggy
11
+
12
+ # A node in an Abstract Syntax Tree. Every node in the tree maps to a production
13
+ # found in the parse. You can navigate to the node's parent, first child, or next
14
+ # sibling. Nodes know their range of the source text.
15
+ class Node
16
+ include Enumerable
17
+
18
+ attr_accessor :_name, :_first, :_next, :_parent, :_range, :_source
19
+
20
+ # Constructor
21
+ def initialize name
22
+ self._name = name
23
+ end
24
+
25
+ # Add a child.
26
+ def << child
27
+ child._parent = self
28
+ #puts "#{_name}[first #{_first} last #{_last}] << child #{child._name}"
29
+ if _first
30
+ _last._next = child
31
+ else
32
+ self._first = child
33
+ end
34
+ end
35
+
36
+ # Iterate over each child. If name is supplied only nodes matching the name are iterated.
37
+ def each name=nil
38
+ child = _first
39
+ while child
40
+ yield child if name.nil? || name == child._name
41
+ child = child._next
42
+ end
43
+ end
44
+
45
+ def children name=nil
46
+ a = []
47
+ each(name) do |node|
48
+ a << node
49
+ end
50
+ a
51
+ end
52
+
53
+ # Count the children. If name is supplied only nodes matching the name are counted.
54
+ def _count name=nil
55
+ c = 0
56
+ each do |node|
57
+ c += 1 if name.nil? || name == node._name
58
+ end
59
+ c
60
+ end
61
+
62
+ # Get the number of nodes up to the root.
63
+ def _depth
64
+ depth = 0
65
+ node = self
66
+ depth += 1 while node=node._parent
67
+ depth
68
+ end
69
+
70
+ # Get the root node.
71
+ def _root
72
+ node = self
73
+ while (n2 = node._parent)
74
+ node = n2
75
+ end
76
+ node
77
+ end
78
+
79
+ # Get an option set when tree was created.
80
+ def _option option, default=nil
81
+ options = _root._options
82
+ return nil unless options
83
+ options[option] || options[option.to_sym] || default
84
+ end
85
+
86
+ # Get the length of the range.
87
+ def _length
88
+ _range.last - _range.first
89
+ end
90
+
91
+ # Get some or all of the source text covered by this node, depending on the length.
92
+ def _sample
93
+ return nil if _length == 0
94
+ str = _source[_range]
95
+ (str.length >= 40 ? str[0, 37] + '...' : str).inspect
96
+ end
97
+
98
+ # Format the node pretty printing.
99
+ def _format
100
+ result = "#{' '*_depth}#{_name} #{_sample}\n"
101
+ each do |node|
102
+ result << node._format
103
+ end
104
+ result
105
+ end
106
+
107
+ # Get the last child.
108
+ def _last
109
+ node = _first
110
+ return nil unless node
111
+ while (n2 = node._next)
112
+ node = n2
113
+ end
114
+ node
115
+ end
116
+
117
+ # Get the contents for inspection.
118
+ def inspect
119
+ "#{_name ? _name : self.class}[#{_range}] #{to_s.inspect}"
120
+ end
121
+
122
+ # Get the source text minus any ignored nodes.
123
+ def _strip
124
+ return @str if @str
125
+ str0 = str = _source[_range]
126
+ return @str = str unless (ignore = _option :ignore) && _first
127
+ remove = find_all{|node| node._name == ignore}
128
+ remove.reverse_each do |node|
129
+ from = node._range.first - _range.first
130
+ str = str[0, from] + str[from + node._length..-1]
131
+ end
132
+ # puts "before #{str0.inspect}, after #{str.inspect}" unless remove.empty?
133
+ @str = str
134
+ end
135
+
136
+ # Get the source text covered by this node.
137
+ def to_s
138
+ _source[_range]
139
+ end
140
+
141
+ # Get the stripped text as a Symbol.
142
+ def to_sym
143
+ _strip.to_sym
144
+ end
145
+
146
+ # Get the first node of the given name as a Symbol.
147
+ def [] name
148
+ method_missing name.to_sym
149
+ end
150
+
151
+ def method_missing name, *args
152
+ find {|node| name == node._name}
153
+ end
154
+
155
+ end
156
+
157
+ # The root node of an Abstract Syntax Tree. Every node in the tree maps to a production
158
+ # found in the parse.
159
+ class AST < Node
160
+
161
+ attr_reader :_options
162
+
163
+ def initialize source, results, options={}
164
+ super nil
165
+ @results = results
166
+ @_options = options
167
+ @ignore = Array(options[:ignore]) # XXX: turn to set
168
+ self._source = source
169
+ build_left nil, 0, self
170
+ end
171
+
172
+ def to_s
173
+ _format
174
+ end
175
+
176
+ private
177
+
178
+ def build_left parent, index, node=nil
179
+ result = parent ? parent._range.last : index
180
+ row = @results[index]
181
+ return result unless row
182
+ order = row[:found_order]
183
+ return result unless order
184
+ order.reverse_each do |name|
185
+ continue if @ignore.include? name
186
+ to = row[name]
187
+ if node
188
+ node._name = name
189
+ else
190
+ node = Node.new name
191
+ end
192
+ node._range = index...to
193
+ node._source = _source
194
+ #puts "Built #{node.to_s}"
195
+ parent << node if parent
196
+ build_children parent, to if parent && to > index && to < parent._range.last
197
+ parent = node
198
+ node = nil
199
+ end
200
+ result
201
+ end
202
+
203
+ def build_children parent, index
204
+ while index < parent._range.last
205
+ i2 = build_left parent, index
206
+ break if i2 <= index
207
+ index = i2
208
+ end
209
+ end
210
+
211
+ end
212
+
213
+ class Parser
214
+
215
+ # Create an Abstract Syntax Tree from the parse results. You must call parse?() prior to
216
+ # this. Valid options:
217
+ # * :ignore=>[symbol of element to ignore]
218
+ def ast? options={}
219
+ ast = AST.new source_text, parse_results, options
220
+ #puts ast
221
+ ast
222
+ end
223
+
224
+ end
225
+
226
+ end # Peggy
@@ -0,0 +1,460 @@
1
+ # Peggy packrat parser for Ruby
2
+ #
3
+ # builder.rb - parser builder
4
+ #
5
+ # Copyright (c) 2006 Troy Heninger
6
+ #
7
+ # Peggy is copyrighted free software by Troy Heninger.
8
+ # You can redistribute it and/or modify it under the same terms as Ruby.
9
+
10
+ require 'parse/parser'
11
+
12
+ module Peggy
13
+
14
+ # Base syntax element class.
15
+ class Element
16
+ # Create an element.
17
+ def self::build *args
18
+ new *args
19
+ end
20
+
21
+ # Test to see if there is a match of this element at the current index.
22
+ # Return's the index following if match is found, or NO_MATCH if not
23
+ def match parser, index
24
+ raise "Must override match"
25
+ end
26
+
27
+ # Used for debugging.
28
+ def report index
29
+ # puts "#{to_s} #{index}"
30
+ index
31
+ end
32
+ end
33
+
34
+ # An element with a single child element.
35
+ module OneChild
36
+ # The single child
37
+ attr_accessor :child
38
+
39
+ # synonym for child=(element)
40
+ alias :<< :child=
41
+
42
+ # Convert to String.
43
+ def to_s
44
+ wrap
45
+ end
46
+
47
+ # Enclose child in parentheses if appropriate.
48
+ def wrap
49
+ result = child.respond_to?(:each) ? "(#{child})" : child.to_s
50
+ end
51
+ end
52
+
53
+ # An element that matches a sequence of elements. All must match for the sequence to match.
54
+ class Sequence < Element
55
+ # Add a child element.
56
+ def add element
57
+ @list = [] unless @list
58
+ @list << element
59
+ end
60
+
61
+ # Synonym for add(element)
62
+ alias :<< :add
63
+
64
+ # Reference a child by index.
65
+ def [] index
66
+ @list[index]
67
+ end
68
+
69
+ # Child iterator.
70
+ def each &blk
71
+ @list.each &blk
72
+ end
73
+
74
+ # Match each child in sequence. If any fail this returns NO_MATCH. If all succeed this
75
+ # returns the end index of the last.
76
+ def match parser, index
77
+ raise "no children added to sequence" unless @list
78
+ each do |element|
79
+ index = element.match parser, index
80
+ return NO_MATCH unless index
81
+ end
82
+ report index
83
+ end
84
+
85
+ # Convert element to String.
86
+ def to_s
87
+ @list.map{|el| el.to_s}.join ' '
88
+ end
89
+ end
90
+
91
+ # An element which matches any one of its children. The children are tested in order. The first
92
+ # to match wins.
93
+ class Alternatives < Sequence
94
+ # Match any one of the children. The children are tried in order. The first to match wins.
95
+ # The result is the end index of the first matching child. If none match this returns NO_MATCH.
96
+ def match parser, index
97
+ raise "no children added to alternate" unless @list
98
+ each do |element|
99
+ found = element.match parser, index
100
+ return report(found) if found
101
+ end
102
+ report NO_MATCH
103
+ end
104
+
105
+ # Convert element to String.
106
+ def to_s
107
+ @list.map{|el| el.to_s}.join ' | '
108
+ end
109
+ end
110
+
111
+ # An element which tries its single child multiple times. It is greedy, meaning it will continue
112
+ # to match as long as possible, unless the range specifies a maximum number of matches.
113
+ class Multiple < Element
114
+ include OneChild
115
+
116
+ # A big number
117
+ MANY = 32767
118
+ # The minimum and maximum number of tries
119
+ attr_accessor :range
120
+
121
+ # Init the range
122
+ def initialize range
123
+ @range = range
124
+ end
125
+
126
+ # Matches the child multiple times. The range specifies the least and most number of matches.
127
+ # If the number of matches is less than the minimim of the range then NO_MATCH is returned.
128
+ # If equal or more than the minimim then the end index of the last match is returned.
129
+ def match parser, index
130
+ raise "multiple element child not set" unless child
131
+ raise "multiple element range not set" unless range
132
+ count = 0
133
+ while count < range.last
134
+ found = child.match parser, index
135
+ break unless found
136
+ index = found
137
+ count += 1
138
+ end
139
+ report range === count ? index : NO_MATCH
140
+ end
141
+
142
+ # Convert element to String.
143
+ def to_s
144
+ "#{wrap}{#{range.min}..#{range.max}}"
145
+ end
146
+ end
147
+
148
+ # Matcher of 0 or more times.
149
+ class AnyNumber < Multiple
150
+ def initialize
151
+ super 0..MANY
152
+ end
153
+
154
+ # Convert element to String.
155
+ def to_s
156
+ "#{wrap}*"
157
+ end
158
+ end
159
+
160
+ # Matcher of 1 or more times.
161
+ class AtLeastOne < Multiple
162
+ def initialize
163
+ super 1..MANY
164
+ end
165
+
166
+ # Convert element to String.
167
+ def to_s
168
+ "#{wrap}+"
169
+ end
170
+ end
171
+
172
+ # Matcher of 0 or 1 time.
173
+ class Optional < Multiple
174
+ def initialize
175
+ super 0..1
176
+ end
177
+
178
+ # Convert element to String.
179
+ def to_s
180
+ "#{wrap}?"
181
+ end
182
+ end
183
+
184
+ # An element which tries its single child but does not advance the index if found.
185
+ # Predicates control parse decisions.
186
+ class Predicate < Element
187
+ include OneChild
188
+ end
189
+
190
+ # Positive Predicate.
191
+ # If found the original index is returned. If not NO_MATCH is returned.
192
+ class Positive < Predicate
193
+
194
+ # Matches the child once. If found the original index is returned.
195
+ # If not found NO_MATCH is returned.
196
+ def match parser, index
197
+ raise "positive element child not set" unless child
198
+ found = child.match parser, index
199
+ found ? index : NO_MATCH
200
+ end
201
+
202
+ # Convert element to String.
203
+ def to_s
204
+ "&#{wrap}"
205
+ end
206
+ end
207
+
208
+ # Negative Predicate.
209
+ # If not found the original index is returned. If found NO_MATCH is returned.
210
+ class Negative < Predicate
211
+
212
+ # Matches the child once. If not found the original index is returned.
213
+ # If found NO_MATCH is returned.
214
+ def match parser, index
215
+ raise "negative element child not set" unless child
216
+ found = child.match parser, index
217
+ found ? NO_MATCH : index
218
+ end
219
+
220
+ # Convert element to String.
221
+ def to_s
222
+ "!#{wrap}"
223
+ end
224
+ end
225
+
226
+ # Match another production in the grammar.
227
+ class Reference < Element
228
+ # The name of the production to lookup and match.
229
+ attr_reader :name
230
+
231
+ # Init the name
232
+ def initialize name=nil
233
+ self.name = name
234
+ end
235
+
236
+ # Set the name of production to match.
237
+ def name= value
238
+ @name = value.to_sym
239
+ end
240
+
241
+ # Match the entire production from the parser grammar. If it matches
242
+ # the end index is returned. If not, NO_MATCH is returned.
243
+ def match parser, index
244
+ raise "reference name not set" unless name
245
+ parser.match? name, index
246
+ end
247
+
248
+ # Convert element to String.
249
+ def to_s
250
+ @name
251
+ end
252
+ end
253
+
254
+ # Matcher of a grammar production. The one and only child defines the production.
255
+ class Production < Reference
256
+ include OneChild
257
+
258
+ # The production definition.
259
+ attr_accessor :child
260
+
261
+ # Init the name and child.
262
+ def initialize name=nil, child=nil
263
+ super name
264
+ @child = child
265
+ end
266
+
267
+ # Synonym of child=(element)
268
+ alias :<< :child=
269
+
270
+ # Match the production one time. If it matches the end index is returned. If not,
271
+ # NO_MATCH is returned.
272
+ def match parser, index
273
+ raise "production name not set" unless name
274
+ raise "production child not set" unless child
275
+ report @child.match(parser, index)
276
+ end
277
+
278
+ # Convert element to String.
279
+ def to_s
280
+ "#{name}: #{child}"
281
+ end
282
+ end
283
+
284
+ # Matcher of a literal string or regular expression.
285
+ class Literal < Element
286
+ # Value to match.
287
+ attr_reader :value
288
+
289
+ # Init the value.
290
+ def initialize value=nil
291
+ @value = value
292
+ end
293
+
294
+ # Set the value to match.
295
+ def value= literal
296
+ # Make sure regular expressions check at the beginnig of the string
297
+ literal = correct_regexp literal if literal.is_a? Regexp
298
+ @value = literal
299
+ end
300
+
301
+ # Match the literal value. If it matches the end index is returned.
302
+ # If no, NO_MATCH is returned.
303
+ def match parser, index
304
+ report parser.literal?(value, index)
305
+ end
306
+
307
+ # Convert element to String.
308
+ def to_s
309
+ value.inspect
310
+ end
311
+ end
312
+
313
+ # Parser builder. The built in methods create syntax elements. Any other
314
+ # method called on this object create references to production, or actual
315
+ # productions, if called at the top level.
316
+ # Todo: Change to a class and separate from Parser.
317
+ class Builder < Parser
318
+ # Productions to build
319
+ attr_reader :productions
320
+ # Current parent being built
321
+ attr_reader :parent
322
+
323
+ # Constructor
324
+ def initialize
325
+ reset!
326
+ end
327
+
328
+ # Clear the parser and prepare it for a new parse.
329
+ def reset!
330
+ @building = true
331
+ @productions = {}
332
+ end
333
+
334
+ # Reference a production by its name index.
335
+ def [] index
336
+ productions[index]
337
+ end
338
+
339
+ # Create a production if at the top level, or a reference to a production a
340
+ # production is being built.
341
+ def method_missing name, *args
342
+ if @building
343
+ if @parent
344
+ ref = Reference.new name
345
+ @parent << ref
346
+ elsif block_given?
347
+ prod = Production.new name
348
+ @parent = prod
349
+ yield
350
+ @parent = nil
351
+ @productions[name] = prod
352
+ else
353
+ super
354
+ end
355
+ else
356
+ prod = @productions[name]
357
+ # pp name.inspect, @productions.keys unless prod
358
+ super unless prod
359
+ # puts "matching #{name} at #{args.first}"
360
+ prod.match self, args.first
361
+ end
362
+ end
363
+
364
+ # Build an Alternatives element.
365
+ def alt &blk
366
+ build_piece Alternatives, blk
367
+ end
368
+ # Synonym for alt().
369
+ alias :one :alt
370
+
371
+ # Build or match the end of file element. If currently building, a Reference to eof
372
+ # is built. Otherwise eof is matched.
373
+ def eof *args
374
+ if @building
375
+ method_missing :eof, *args
376
+ else
377
+ super args.first
378
+ end
379
+ end
380
+
381
+ # Build a Sequence element.
382
+ def seq &blk
383
+ build_piece Sequence, blk
384
+ end
385
+ # Synonym for each()
386
+ alias :each :seq
387
+
388
+ # Add an Literal element to the parent.
389
+ def lit *values
390
+ if values.size == 1
391
+ build_piece Literal, nil, values.first
392
+ else
393
+ one{
394
+ for v in values
395
+ build_piece Literal, nil, v
396
+ end
397
+ }
398
+ end
399
+ end
400
+
401
+ # Build an AnyNumber element.
402
+ def many &blk
403
+ build_piece AnyNumber, blk
404
+ end
405
+
406
+ # Build an Optional element.
407
+ def opt &blk
408
+ build_piece Optional, blk
409
+ end
410
+
411
+ # Build an AtLeastOne element.
412
+ def some &blk
413
+ build_piece AtLeastOne, blk
414
+ end
415
+
416
+ # Build a negative predicate. Use when you want to make sure the enclosed element is not present.
417
+ # The cursor is not advanced for predicates.
418
+ def neg &blk
419
+ build_piece Negative, blk
420
+ end
421
+
422
+ # Build a positive predicate. Use when you want to make sure the enclosed element is present.
423
+ # If matched the cursor is not advanced.
424
+ def pos &blk
425
+ build_piece Positive, blk
426
+ end
427
+
428
+ # Invokes the parser from the beginning of the source on the given production goal.
429
+ # You may provide the source here or you can set source_text prior to calling.
430
+ # If index is provided the parser will ignore characters previous to it.
431
+ def parse? goal, source=nil, index=0
432
+ @building = nil
433
+ super
434
+ end
435
+
436
+ # Convert productions to Peggy grammar. This is notable to out put any Ruby parse methods,
437
+ # only grammars built with Builder methods.
438
+ def to_s
439
+ productions.values.join "\n"
440
+ end
441
+
442
+ private
443
+
444
+ # Add an object of klass to the parent and yield to its block. If
445
+ # value is specified it is passed to the klass constructor.
446
+ def build_piece klass, blk=nil, value=nil
447
+ # puts "building #{klass.name} with #{value.inspect}"
448
+ elem = value ? klass.new(value) : klass.new
449
+ @parent << elem
450
+ if blk
451
+ parent = @parent
452
+ @parent = elem
453
+ blk.call
454
+ @parent = parent
455
+ end
456
+ end
457
+
458
+ end # Builder
459
+
460
+ end # Peggy
@@ -0,0 +1,252 @@
1
+ # Peggy packrat parster for Ruby
2
+ #
3
+ # parser.rb - packrat parser
4
+ #
5
+ # Copyright (c) 2006 Troy Heninger
6
+ #
7
+ # Peggy is copyrighted free software by Troy Heninger.
8
+ # You can redistribute it and/or modify it under the same terms as Ruby.
9
+
10
+ require 'pp'
11
+
12
+ # Peggy is a packrat parsing engine. Packrat parsers memoize every production so that
13
+ # parses can happen in linear time. No production needs to be processed more than once for
14
+ # a given position of the source. See http://pdos.csail.mit.edu/~baford/packrat/ for
15
+ # more details.
16
+ #
17
+ # Peggy also incorporates Parsing Expression Grammar (PEG) as proposed by Bryan Ford,
18
+ # as one of several input grammars. PEG is a formalized grammar specification needing
19
+ # no separate lexer/scanner step. See http://pdos.csail.mit.edu/~baford/packrat/popl04/
20
+ #
21
+ # As good as packrat parsers are, they have a few limitations. They cannot handle left
22
+ # recursion of a production, meaning a production cannot reference itself as the first
23
+ # element in a sequence. Also memoizing of production results means than memory consumption
24
+ # increasses with the size of the source being parsed. This is not usually a concern, execpt
25
+ # when attempting to parse multi-megabyte source files, such as a huge XML database.
26
+ module Peggy
27
+
28
+ # Returned when a production did not match
29
+ NO_MATCH = false
30
+ # Used to prevent infinite (left) recursions
31
+ IN_USE = true
32
+
33
+ # class OrderedHash < Hash
34
+ # alias_method :store, :[]=
35
+ # alias_method :each_pair, :each
36
+ #
37
+ # def initialize
38
+ # @keys = []
39
+ # super
40
+ # end
41
+ #
42
+ # def []=(key, val)
43
+ # @keys << key
44
+ # super
45
+ # end
46
+ #
47
+ # def delete(key)
48
+ # @keys.delete(key)
49
+ # super
50
+ # end
51
+ #
52
+ # def each
53
+ # @keys.sort.each { |k| yield k, self[k] }
54
+ # end
55
+ #
56
+ # def each_key
57
+ # @keys.sort.each { |k| yield k }
58
+ # end
59
+ #
60
+ # def each_value
61
+ # @keys.sort.each { |k| yield self[k] }
62
+ # end
63
+ # end
64
+
65
+ # Packrat parser class. Note all methods have a trailing exclamation (!) or question
66
+ # mark (?), or have long names with underscores (_). This is because productions are
67
+ # methods and we need to avoid name collisions. To use this class you must subclass
68
+ # Parser and provide your productions as methods. Your productions must call match?
69
+ # or one of the protected convenience routines to perform parsing. Productions must
70
+ # never call another production directly, or results will not get memoized and you
71
+ # will slow down your parse conciderably, and possibly risk getting into an infinite
72
+ # recursion (until the stack blows its top). Note, as a conveience in writting
73
+ # productions, you can call any match? function multiple times, passing each returned
74
+ # index, such as in a sequence, without checking the results of each production.
75
+ class Parser
76
+
77
+ # Tells parser to print intermediate results if set.
78
+ attr_accessor :debug_flag
79
+
80
+ # The source to parse, can be set prior to calling parse!().
81
+ attr_accessor :source_text
82
+
83
+ # The results of the parse. A hash (keys of indexs) of hashes (keys of production
84
+ # symbols and values of end indexes.
85
+ attr_reader :parse_results
86
+
87
+ # The productions to ignore.
88
+ attr_accessor :ignore_productions
89
+
90
+ # Return a range (or character) of the source_text.
91
+ def [] range
92
+ raise "source_text not set" if source_text.nil?
93
+ source_text[range]
94
+ end
95
+
96
+ # Invokes the parser from the beginning of the source on the given production goal.
97
+ # You may provide the source here or you can set source_text prior to calling.
98
+ # If index is provided the parser will ignore characters previous to it.
99
+ def parse? goal, source = nil, index = 0
100
+ self.source_text = source unless source.nil?
101
+ # Hash of automatic hashes
102
+ @parse_results = Hash.new {|h1, k1| h1[k1] = {}} # OrderedHash.new {|h1, k1| h1[k1] = {}}
103
+ @keys = nil
104
+ index = match? goal, index
105
+ pp(parse_results) if debug_flag
106
+ index
107
+ end
108
+
109
+ # Queries the parse results for a heirarchy of production matches. An array of
110
+ # index ranges is returned, or an empny array if none are found. This can only be
111
+ # called after parse_results have been set by a parse.
112
+ def query? *args
113
+ raise "You must first call parse!" unless parse_results
114
+ @keys = @parse_results.keys.sort unless @keys
115
+ found_list = []
116
+ index = 0
117
+ args.each do |arg|
118
+ index = find? arg, index
119
+ end
120
+ end
121
+
122
+ # Try to match a production from the given index. Returns the end index if found
123
+ # or start index if not found.
124
+ def allow? goal, index
125
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
126
+ found = match? goal, index
127
+ found == NO_MATCH ? index : found
128
+ end
129
+
130
+ # Try to match a production from the given index then backtrack. Returns index if
131
+ # found or NO_MATCH if not.
132
+ def check? goal, index
133
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
134
+ found = match? goal, index
135
+ found == NO_MATCH ? NO_MATCH : index
136
+ end
137
+
138
+ # Try not to match a production from the given index then backtrack. Returns index
139
+ # if not found or NO_MATCH if found.
140
+ def dissallow? goal, index
141
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
142
+ found = match? goal, index
143
+ found == NO_MATCH ? index : NO_MATCH
144
+ end
145
+
146
+ # Special production that only matches the end of source_text. Note, this function
147
+ # does not end in (?) or (!) because it is meant be used as a normal production.
148
+ def eof index
149
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
150
+ index >= source_text.length ? index : NO_MATCH
151
+ end
152
+
153
+ # Match a production from the given index. Returns the end index if found or NO_MATCH
154
+ # if not found.
155
+ def match? goal, index
156
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
157
+ index = ignore? index unless @ignoring
158
+ goal = goal.to_sym
159
+ position = parse_results[index]
160
+ found = position.fetch(goal) do
161
+ position[goal] = IN_USE # used to prevent inifinite recursion in case user attemts
162
+ # a left recursion
163
+ _memoize goal, index, send(goal, index), position
164
+ end
165
+ puts "found #{goal} at #{index}...#{found} #{source_text[index...found].inspect}" if found && debug_flag
166
+ raise "Parser cannot handle infinite (left) recursions. Please rewrite usage of '#{goal}'." if found == IN_USE
167
+ found
168
+ end
169
+
170
+ # Record the results of the parse in the parse_results memo.
171
+ def _memoize goal, index, result, position = parse_results[index]
172
+ if result
173
+ position[:found_order] = [] unless position.has_key?(:found_order)
174
+ position[:found_order] << goal
175
+ position[goal.to_s] = source_text[index...result] if result - index < 40 && goal.is_a?(Symbol)
176
+ end
177
+ position[goal] = result if result || goal.is_a?(Symbol)
178
+ result
179
+ end
180
+
181
+ # Match tokens that should be ignored. Used by match?(). Returns end index if found
182
+ # or start index if not found. Subclasses should override this method if they wish
183
+ # to ignore other text, such as comments.
184
+ def ignore? index
185
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
186
+ return index if @ignoring || ignore_productions.nil?
187
+ @ignoring = true
188
+ ignore_productions.each do |prod|
189
+ index = allow? prod, index
190
+ end
191
+ @ignoring = nil
192
+ index
193
+ end
194
+
195
+ # Match a literal string or regular expression from the given index. Returns
196
+ # the end index if found or NO_MATCH if not found.
197
+ def literal? value, index
198
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
199
+ case value
200
+ when String
201
+ string? value, index
202
+ when Regexp
203
+ regexp? value, index
204
+ else
205
+ raise "Unknown literal: #{value.inspect}"
206
+ end
207
+ end
208
+
209
+ # Match a string from the given index. Returns the end index if found
210
+ # or NO_MATCH if not found.
211
+ def string? value, index
212
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
213
+ value = value.to_s
214
+ index = ignore? index unless @ignoring
215
+ i2 = index + value.length
216
+ # puts source_text[index...i2].inspect + ' ' + value.inspect
217
+ _memoize(value, index, source_text[index...i2] == value ? i2 : NO_MATCH)
218
+ end
219
+
220
+ # Match a regular expression from the given index. Returns the end index
221
+ # if found or NO_MATCH if not found.
222
+ def regexp? value, index
223
+ return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
224
+ value = correct_regexp! value
225
+ index = ignore? index unless @ignoring
226
+ found = value.match source_text[index..-1]
227
+ # puts "#{value.inspect} ~= #{found[0].inspect}" if found
228
+ _memoize(value, index, found ? found.end(0) + index : NO_MATCH)
229
+ end
230
+
231
+ # Make sure regular expressions match the beginning of the string, actually from
232
+ # the string from the given index.
233
+ def correct_regexp! re
234
+ source = re.source
235
+ source[0..1] == '\\A' ? re : Regexp.new("\\A(#{source})", re.options)
236
+ end
237
+
238
+ protected
239
+
240
+ # Create an index of the parse results. Todo: unfinished.
241
+ def index_results!
242
+ raise "You must first call parse!" unless parse_results
243
+ @index = new Hash {|h, k| h[k] = []}
244
+ parse_results.each_pair do |index, prod_map|
245
+ prod_map[:found_order].reverse_each
246
+ prod_map.each_value
247
+ @index[prod]
248
+ end
249
+ end
250
+ end # Parser
251
+
252
+ end # Peggy
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: abnc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Carsten Bormann
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Shifty support for tools based on IETF's ABNF
14
+ email: cabo@tzi.org
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - abnc.gemspec
20
+ - lib/abnc.rb
21
+ - lib/parse/ast.rb
22
+ - lib/parse/builder.rb
23
+ - lib/parse/parser.rb
24
+ homepage: http://github.com/cabo/abnc
25
+ licenses:
26
+ - Apache 2.0
27
+ metadata: {}
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 1.9.2
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 2.4.5
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: RFC 5234+7405 ABNF compiler-let
48
+ test_files: []
49
+ has_rdoc: false