babel_bridge 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/babel_bridge.gemspec +5 -1
- data/lib/babel_bridge.rb +7 -187
- data/lib/nodes.rb +39 -17
- data/lib/pattern_element.rb +193 -0
- data/test/test_bb.rb +30 -5
- metadata +23 -39
data/babel_bridge.gemspec
CHANGED
@@ -10,7 +10,11 @@ $gemspec = Gem::Specification.new do |s|
|
|
10
10
|
s.platform = Gem::Platform::RUBY
|
11
11
|
s.rubyforge_project = "babel-bridge"
|
12
12
|
s.summary = "A Ruby-based parser-generator based on Parsing Expression Grammars."
|
13
|
-
s.description =
|
13
|
+
s.description = <<DESCRIPTION
|
14
|
+
Babel Bridge is an object oriented parser generator for parsing expression grammars (PEG).
|
15
|
+
Generate memoizing packrat parsers 100% in Ruby code with a simple embedded DSL.
|
16
|
+
DESCRIPTION
|
17
|
+
|
14
18
|
s.files = ["LICENSE", "README", "Rakefile", "babel_bridge.gemspec", "{test,lib,doc,examples}/**/*"].map{|p| Dir[p]}.flatten
|
15
19
|
s.has_rdoc = false
|
16
20
|
end
|
data/lib/babel_bridge.rb
CHANGED
@@ -1,33 +1,11 @@
|
|
1
1
|
=begin
|
2
|
-
|
3
|
-
See README
|
4
|
-
|
5
|
-
TODO-FEATURE: :pre_delimiter option
|
6
|
-
TODO-FEATURE: The "expecting" feature is so good I wonder if we should add the ability to automatically repair the parse!
|
7
|
-
This would need:
|
8
|
-
a) default values for regex termainals (string terminals are their own default values)
|
9
|
-
default values for regex should be verified to match the regex
|
10
|
-
b) an interactive prompter if there is more than one option
|
11
|
-
|
12
|
-
TODO-IMPROVEMENT: "Expecting" should show line numbers instead of char numbers, but it should only calculated
|
13
|
-
on demand. This means we need a smarter formatter for our possible-error-logging.
|
14
|
-
TODO-IMPROVEMENT: "Expecting" code lines dump should show line numbers
|
15
|
-
|
16
|
-
TODO-BUG: "Expecting" doesn't do the right thing of a "dont" clause matched
|
17
|
-
Should say "something other than #{the don't clause}"
|
18
|
-
Ideally, we would continue matching and list all the possible next clauses that would allow us to continue
|
19
|
-
|
20
|
-
IDEA: could use the "-" prefix operator to mean "dont":
|
21
|
-
-"this"
|
22
|
-
-:that
|
23
|
-
-match(:foo)
|
24
|
-
-many(:foo)
|
25
|
-
|
26
|
-
TODO-OPTIMIZATION: add memoizing (caching / dynamic-programming) to guarantee linear time parsing
|
27
|
-
http://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars
|
2
|
+
Copyright 2011 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
28
5
|
=end
|
29
6
|
|
30
7
|
require File.dirname(__FILE__) + "/nodes.rb"
|
8
|
+
require File.dirname(__FILE__) + "/pattern_element.rb"
|
31
9
|
|
32
10
|
class String
|
33
11
|
def camelize
|
@@ -51,164 +29,7 @@ class String
|
|
51
29
|
end
|
52
30
|
|
53
31
|
module BabelBridge
|
54
|
-
VERSION = "0.
|
55
|
-
|
56
|
-
# hash which can be used declaratively
|
57
|
-
class PatternElementHash < Hash
|
58
|
-
def method_missing(method_name, *args) #method_name is a symbol
|
59
|
-
return self if args.length==1 && !args[0] # if nil is provided, don't set anything
|
60
|
-
self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true
|
61
|
-
self
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# PatternElement provides optimized parsing for each Element of a pattern
|
66
|
-
# PatternElement provides all the logic for parsing:
|
67
|
-
# :many
|
68
|
-
# :optional
|
69
|
-
class PatternElement
|
70
|
-
attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
|
71
|
-
attr_accessor :match,:rule_variant
|
72
|
-
|
73
|
-
#match can be:
|
74
|
-
# true, Hash, Symbol, String, Regexp
|
75
|
-
def initialize(match,rule_variant)
|
76
|
-
self.rule_variant=rule_variant
|
77
|
-
init(match)
|
78
|
-
|
79
|
-
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
80
|
-
end
|
81
|
-
|
82
|
-
def to_s
|
83
|
-
match.inspect
|
84
|
-
end
|
85
|
-
|
86
|
-
def parse(parent_node)
|
87
|
-
# run element parser
|
88
|
-
match=parser.call(parent_node)
|
89
|
-
|
90
|
-
# Negative patterns (PEG: !element)
|
91
|
-
match=match ? nil : EmptyNode.new(parent_node) if negative
|
92
|
-
|
93
|
-
# Optional patterns (PEG: element?)
|
94
|
-
match=EmptyNode.new(parent_node) if !match && optional
|
95
|
-
|
96
|
-
# Could-match patterns (PEG: &element)
|
97
|
-
match.match_length=0 if match && could_match
|
98
|
-
|
99
|
-
# return match
|
100
|
-
match
|
101
|
-
end
|
102
|
-
|
103
|
-
private
|
104
|
-
|
105
|
-
def init(match)
|
106
|
-
self.match=match
|
107
|
-
case match
|
108
|
-
when TrueClass then init_true
|
109
|
-
when Hash then init_hash match
|
110
|
-
when Symbol then init_rule match
|
111
|
-
when String then init_string match
|
112
|
-
when Regexp then init_regex match
|
113
|
-
else raise "invalid pattern type: #{match.inspect}"
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
def init_rule(rule_name)
|
118
|
-
rule_name.to_s[/^([^?!]*)([?!])?$/]
|
119
|
-
rule_name=$1.to_sym
|
120
|
-
option=$2
|
121
|
-
match_rule=rule_variant.rule.parser.rules[rule_name]
|
122
|
-
raise "no rule for #{rule_name}" unless match_rule
|
123
|
-
|
124
|
-
self.parser =lambda {|parent_node| match_rule.parse(parent_node)}
|
125
|
-
self.name = rule_name
|
126
|
-
case option
|
127
|
-
when "?" then self.optional=true
|
128
|
-
when "!" then self.negative=true
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
def init_hash(hash)
|
133
|
-
if hash[:parser]
|
134
|
-
self.parser=hash[:parser]
|
135
|
-
elsif hash[:many]
|
136
|
-
init hash[:many]
|
137
|
-
#generate parser for poly
|
138
|
-
delimiter_pattern_element= PatternElement.new(hash[:delimiter]||true,rule_variant)
|
139
|
-
|
140
|
-
post_delimiter_element=case hash[:post_delimiter]
|
141
|
-
when TrueClass then delimiter_pattern_element
|
142
|
-
when nil then nil
|
143
|
-
else PatternElement.new(hash[:post_delimiter],rule_variant)
|
144
|
-
end
|
145
|
-
|
146
|
-
# convert the single element parser into a poly-parser
|
147
|
-
single_parser=parser
|
148
|
-
self.parser= lambda do |parent_node|
|
149
|
-
last_match=single_parser.call(parent_node)
|
150
|
-
many_node=ManyNode.new(parent_node)
|
151
|
-
while last_match
|
152
|
-
many_node<<last_match
|
153
|
-
|
154
|
-
#match delimiter
|
155
|
-
delimiter_match=delimiter_pattern_element.parse(many_node)
|
156
|
-
break unless delimiter_match
|
157
|
-
many_node.delimiter_matches<<delimiter_match
|
158
|
-
|
159
|
-
#match next
|
160
|
-
last_match=single_parser.call(many_node)
|
161
|
-
end
|
162
|
-
|
163
|
-
# success only if we have at least one match
|
164
|
-
return nil unless many_node.length>0
|
165
|
-
|
166
|
-
# pop the post delimiter matched with delimiter_pattern_element
|
167
|
-
many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
|
168
|
-
|
169
|
-
# If post_delimiter is requested, many_node and delimiter_matches must be the same length
|
170
|
-
if post_delimiter_element
|
171
|
-
post_delimiter_match=post_delimiter_element.parse(many_node)
|
172
|
-
|
173
|
-
# fail if post_delimiter didn't match
|
174
|
-
return nil unless post_delimiter_match
|
175
|
-
many_node.delimiter_matches<<post_delimiter_match
|
176
|
-
end
|
177
|
-
|
178
|
-
many_node
|
179
|
-
end
|
180
|
-
elsif hash[:match]
|
181
|
-
init hash[:match]
|
182
|
-
else
|
183
|
-
raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
|
184
|
-
end
|
185
|
-
|
186
|
-
self.name = hash[:as] || self.name
|
187
|
-
self.optional ||= hash[:optional] || hash[:optionally]
|
188
|
-
self.could_match ||= hash[:could]
|
189
|
-
self.negative ||= hash[:dont]
|
190
|
-
|
191
|
-
end
|
192
|
-
|
193
|
-
# "true" parser always matches the empty string
|
194
|
-
def init_true
|
195
|
-
self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
|
196
|
-
end
|
197
|
-
|
198
|
-
# parser that matches exactly the string specified
|
199
|
-
def init_string(string)
|
200
|
-
self.parser=lambda {|parent_node| parent_node.src[parent_node.next,string.length]==string && TerminalNode.new(parent_node,string.length,string)}
|
201
|
-
self.terminal=true
|
202
|
-
end
|
203
|
-
|
204
|
-
# parser that matches the given regex
|
205
|
-
def init_regex(regex)
|
206
|
-
self.parser=lambda {|parent_node| offset=parent_node.next;parent_node.src.index(regex,offset)==offset && (o=$~.offset(0)) && TerminalNode.new(parent_node,o[1]-o[0],regex)}
|
207
|
-
self.terminal=true
|
208
|
-
end
|
209
|
-
|
210
|
-
end
|
211
|
-
|
32
|
+
VERSION = "0.2.0"
|
212
33
|
|
213
34
|
# Each Rule has one or more RuleVariant
|
214
35
|
# Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
|
@@ -268,7 +89,7 @@ module BabelBridge
|
|
268
89
|
self.parser=parser
|
269
90
|
|
270
91
|
class_name = "#{parser.module_name}_#{name}_node".camelize
|
271
|
-
self.node_class = parser.const_set(class_name,Class.new(
|
92
|
+
self.node_class = parser.const_set(class_name,Class.new(NonTerminalNode))
|
272
93
|
end
|
273
94
|
|
274
95
|
def add_variant(pattern, &block)
|
@@ -286,8 +107,7 @@ module BabelBridge
|
|
286
107
|
end
|
287
108
|
|
288
109
|
variants.each do |v|
|
289
|
-
match=v.parse(node)
|
290
|
-
if match
|
110
|
+
if match=v.parse(node)
|
291
111
|
node.parser.cache_match(name,match)
|
292
112
|
return match
|
293
113
|
end
|
data/lib/nodes.rb
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2011 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
5
|
+
=end
|
6
|
+
|
1
7
|
module BabelBridge
|
2
8
|
|
3
9
|
# this is just so we can distinguish between normal arrays and arrays of matches
|
@@ -8,7 +14,7 @@ module BabelBridge
|
|
8
14
|
# base class for all parse-tree nodes
|
9
15
|
class Node
|
10
16
|
attr_accessor :src,:offset,:match_length,:parent,:parser
|
11
|
-
|
17
|
+
|
12
18
|
def to_s
|
13
19
|
text
|
14
20
|
end
|
@@ -81,14 +87,14 @@ module BabelBridge
|
|
81
87
|
def each(&block)
|
82
88
|
matches.each(&block)
|
83
89
|
end
|
84
|
-
end
|
90
|
+
end
|
85
91
|
|
86
92
|
class RootNode < Node
|
87
93
|
end
|
88
94
|
|
89
95
|
# non-terminal node
|
90
96
|
# subclassed automatically by parser.rule for each unique non-terminal
|
91
|
-
class
|
97
|
+
class NonTerminalNode < Node
|
92
98
|
attr_accessor :matches,:match_names
|
93
99
|
|
94
100
|
def match_names
|
@@ -151,22 +157,27 @@ end
|
|
151
157
|
@matches_by_name=nil
|
152
158
|
end
|
153
159
|
|
160
|
+
# defines where to forward missing methods to; override for custom behavior
|
161
|
+
def forward_to
|
162
|
+
matches[0]
|
163
|
+
end
|
164
|
+
|
154
165
|
def method_missing(method_name, *args) #method_name is a symbol
|
155
166
|
unless matches_by_name.has_key? method_name
|
156
|
-
if
|
157
|
-
|
158
|
-
return matches[0].send(method_name,*args)
|
167
|
+
if f=forward_to
|
168
|
+
return f.send(method_name,*args)
|
159
169
|
end
|
160
170
|
raise "#{self.class}: missing method #{method_name.inspect} / doesn't match named pattern element: #{matches_by_name.keys.inspect}"
|
161
171
|
end
|
162
|
-
matches_by_name[method_name]
|
172
|
+
case ret=matches_by_name[method_name]
|
173
|
+
when EmptyNode then nil
|
174
|
+
else ret
|
175
|
+
end
|
163
176
|
end
|
164
177
|
|
165
178
|
# adds a match with name (optional)
|
166
179
|
# returns self so you can chain add_match or concat methods
|
167
180
|
def add_match(match,name=nil)
|
168
|
-
raise "match must be a Node (match is a #{match.class})" unless match.kind_of?(Node)
|
169
|
-
raise "name must be a Symbol or nil (name is a #{name.class})" if name && !name.kind_of?(Symbol)
|
170
181
|
reset_matches_by_name
|
171
182
|
matches<<match
|
172
183
|
match_names<<name
|
@@ -192,16 +203,22 @@ end
|
|
192
203
|
node_init(parent)
|
193
204
|
self.matches=[]
|
194
205
|
self.delimiter_matches=[]
|
195
|
-
self.match_length=nil # use match_length as an override; if nil, then match_length is determined by the last node and delimiter_match
|
196
206
|
end
|
197
207
|
|
198
|
-
def match_length;
|
208
|
+
def match_length; self.next-offset end
|
209
|
+
|
199
210
|
def next
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
211
|
+
if m=matches[-1]
|
212
|
+
m_next=m.next
|
213
|
+
if d=delimiter_matches[-1]
|
214
|
+
d_next=d.next
|
215
|
+
m_next > d_next ? m_next : d_next
|
216
|
+
else
|
217
|
+
m_next
|
218
|
+
end
|
219
|
+
else
|
220
|
+
parent.next
|
221
|
+
end
|
205
222
|
end
|
206
223
|
|
207
224
|
def inspect_helper(list,options)
|
@@ -226,6 +243,11 @@ end
|
|
226
243
|
ret
|
227
244
|
end
|
228
245
|
end
|
246
|
+
|
247
|
+
def method_missing(method_name, *args) #method_name is a symbol
|
248
|
+
self.map {|match| match.send(method_name,*args)}
|
249
|
+
end
|
250
|
+
|
229
251
|
end
|
230
252
|
|
231
253
|
# used for String and Regexp PatternElements
|
@@ -239,7 +261,7 @@ end
|
|
239
261
|
end
|
240
262
|
|
241
263
|
def inspect(options={})
|
242
|
-
"#{text.inspect}" unless options[:simple] && text[/^\s*$/] # if simple && node only matched white-space, return nil
|
264
|
+
"#{text.inspect}" unless options[:simple] && text[/^\s*$/] # if simple && node only matched white-space, return nil
|
243
265
|
end
|
244
266
|
|
245
267
|
def matches; [self]; end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2010 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
5
|
+
=end
|
6
|
+
|
7
|
+
module BabelBridge
|
8
|
+
# hash which can be used declaratively
|
9
|
+
class PatternElementHash < Hash
|
10
|
+
def method_missing(method_name, *args) #method_name is a symbol
|
11
|
+
return self if args.length==1 && !args[0] # if nil is provided, don't set anything
|
12
|
+
self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true
|
13
|
+
self
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# PatternElement provides optimized parsing for each Element of a pattern
|
18
|
+
# PatternElement provides all the logic for parsing:
|
19
|
+
# :many
|
20
|
+
# :optional
|
21
|
+
class PatternElement
|
22
|
+
attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
|
23
|
+
attr_accessor :match,:rule_variant
|
24
|
+
|
25
|
+
#match can be:
|
26
|
+
# true, Hash, Symbol, String, Regexp
|
27
|
+
def initialize(match,rule_variant)
|
28
|
+
self.rule_variant=rule_variant
|
29
|
+
init(match)
|
30
|
+
|
31
|
+
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_s
|
35
|
+
match.inspect
|
36
|
+
end
|
37
|
+
|
38
|
+
# attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
|
39
|
+
def parse(parent_node)
|
40
|
+
# run element parser
|
41
|
+
match=parser.call(parent_node)
|
42
|
+
|
43
|
+
# Negative patterns (PEG: !element)
|
44
|
+
match=match ? nil : EmptyNode.new(parent_node) if negative
|
45
|
+
|
46
|
+
# Optional patterns (PEG: element?)
|
47
|
+
match=EmptyNode.new(parent_node) if !match && optional
|
48
|
+
|
49
|
+
# Could-match patterns (PEG: &element)
|
50
|
+
match.match_length=0 if match && could_match
|
51
|
+
|
52
|
+
# return match
|
53
|
+
match
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# initialize PatternElement based on the type of: match
|
59
|
+
def init(match)
|
60
|
+
self.match=match
|
61
|
+
case match
|
62
|
+
when TrueClass then init_true
|
63
|
+
when String then init_string match
|
64
|
+
when Regexp then init_regex match
|
65
|
+
when Symbol then init_rule match
|
66
|
+
when Hash then init_hash match
|
67
|
+
else raise "invalid pattern type: #{match.inspect}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# "true" parser always matches the empty string
|
72
|
+
def init_true
|
73
|
+
self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
|
74
|
+
end
|
75
|
+
|
76
|
+
# initialize PatternElement as a parser that matches exactly the string specified
|
77
|
+
def init_string(string)
|
78
|
+
self.parser=lambda do |parent_node|
|
79
|
+
if parent_node.src[parent_node.next,string.length]==string
|
80
|
+
TerminalNode.new(parent_node,string.length,string)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
self.terminal=true
|
84
|
+
end
|
85
|
+
|
86
|
+
# initialize PatternElement as a parser that matches the given regex
|
87
|
+
def init_regex(regex)
|
88
|
+
optimized_regex=/\A#{regex}/ # anchor the search
|
89
|
+
self.parser=lambda do |parent_node|
|
90
|
+
offset=parent_node.next
|
91
|
+
if parent_node.src[offset..-1].index(optimized_regex)==0
|
92
|
+
range=$~.offset(0)
|
93
|
+
TerminalNode.new(parent_node,range[1]-range[0],regex)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
self.terminal=true
|
97
|
+
end
|
98
|
+
|
99
|
+
# initialize PatternElement as a parser that matches a named sub-rule
|
100
|
+
def init_rule(rule_name)
|
101
|
+
rule_name.to_s[/^([^?!]*)([?!])?$/]
|
102
|
+
rule_name=$1.to_sym
|
103
|
+
option=$2
|
104
|
+
match_rule=rule_variant.rule.parser.rules[rule_name]
|
105
|
+
raise "no rule for #{rule_name}" unless match_rule
|
106
|
+
|
107
|
+
self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
|
108
|
+
self.name = rule_name
|
109
|
+
case option
|
110
|
+
when "?" then self.optional=true
|
111
|
+
when "!" then self.negative=true
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# initialize the PatternElement from hashed parameters
|
116
|
+
def init_hash(hash)
|
117
|
+
if hash[:parser]
|
118
|
+
self.parser=hash[:parser]
|
119
|
+
elsif hash[:many]
|
120
|
+
init_many hash
|
121
|
+
elsif hash[:match]
|
122
|
+
init hash[:match]
|
123
|
+
else
|
124
|
+
raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
|
125
|
+
end
|
126
|
+
|
127
|
+
self.name = hash[:as] || self.name
|
128
|
+
self.optional ||= hash[:optional] || hash[:optionally]
|
129
|
+
self.could_match ||= hash[:could]
|
130
|
+
self.negative ||= hash[:dont]
|
131
|
+
end
|
132
|
+
|
133
|
+
# initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
|
134
|
+
def init_many(hash)
|
135
|
+
# generate single_parser
|
136
|
+
init hash[:many]
|
137
|
+
single_parser=parser
|
138
|
+
|
139
|
+
# generate delimiter_pattern_element
|
140
|
+
delimiter_pattern_element= hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
|
141
|
+
|
142
|
+
# generate post_delimiter_element
|
143
|
+
post_delimiter_element=hash[:post_delimiter] && case hash[:post_delimiter]
|
144
|
+
when TrueClass then delimiter_pattern_element
|
145
|
+
else PatternElement.new(hash[:post_delimiter],rule_variant)
|
146
|
+
end
|
147
|
+
|
148
|
+
# generate many-parser
|
149
|
+
self.parser= lambda do |parent_node|
|
150
|
+
last_match=single_parser.call(parent_node)
|
151
|
+
many_node=ManyNode.new(parent_node)
|
152
|
+
|
153
|
+
if delimiter_pattern_element
|
154
|
+
# delimited matching
|
155
|
+
while last_match
|
156
|
+
many_node<<last_match
|
157
|
+
|
158
|
+
#match delimiter
|
159
|
+
delimiter_match=delimiter_pattern_element.parse(many_node)
|
160
|
+
break unless delimiter_match
|
161
|
+
many_node.delimiter_matches<<delimiter_match
|
162
|
+
|
163
|
+
#match next
|
164
|
+
last_match=single_parser.call(many_node)
|
165
|
+
end
|
166
|
+
else
|
167
|
+
# not delimited matching
|
168
|
+
while last_match
|
169
|
+
many_node<<last_match
|
170
|
+
last_match=single_parser.call(many_node)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# success only if we have at least one match
|
175
|
+
return nil unless many_node.length>0
|
176
|
+
|
177
|
+
# pop the post delimiter matched with delimiter_pattern_element
|
178
|
+
many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
|
179
|
+
|
180
|
+
# If post_delimiter is requested, many_node and delimiter_matches will be the same length
|
181
|
+
if post_delimiter_element
|
182
|
+
post_delimiter_match=post_delimiter_element.parse(many_node)
|
183
|
+
|
184
|
+
# fail if post_delimiter didn't match
|
185
|
+
return nil unless post_delimiter_match
|
186
|
+
many_node.delimiter_matches<<post_delimiter_match
|
187
|
+
end
|
188
|
+
|
189
|
+
many_node
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
data/test/test_bb.rb
CHANGED
@@ -68,7 +68,7 @@ class BBTests < TestHelper
|
|
68
68
|
|
69
69
|
def test_regex
|
70
70
|
parser=new_parser do
|
71
|
-
rule :foo,
|
71
|
+
rule :foo, /[0-9]+/
|
72
72
|
end
|
73
73
|
|
74
74
|
%w{ 0 1 10 123 1001 }.each do |numstr|
|
@@ -76,6 +76,16 @@ class BBTests < TestHelper
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
+
def test_regex_offset
|
80
|
+
parser=new_parser do
|
81
|
+
rule :foo, /[0-9]+/
|
82
|
+
rule :foo, "hi", /[0-9]+/
|
83
|
+
end
|
84
|
+
|
85
|
+
assert_equal 1,parser.parse("123").matches.length
|
86
|
+
assert_equal 2,parser.parse("hi123").matches.length
|
87
|
+
end
|
88
|
+
|
79
89
|
def test_optional
|
80
90
|
parser=new_parser do
|
81
91
|
rule :foo, ["foo", :bar?]
|
@@ -311,10 +321,11 @@ class BBTests < TestHelper
|
|
311
321
|
|
312
322
|
def test_poly_post_delimiter
|
313
323
|
parser=new_parser do
|
314
|
-
rule :foo, many?("foo",/ +/,true).as(:foo), "end"
|
324
|
+
rule :foo, many?("foo",/ +/,true).as(:foo), match("end").as(:end)
|
315
325
|
end
|
316
326
|
|
317
|
-
assert_equal
|
327
|
+
assert_equal nil,parser.parse("end").foo
|
328
|
+
assert_equal "end",parser.parse("end").end.to_s
|
318
329
|
assert_equal nil,parser.parse(" end")
|
319
330
|
assert_equal nil,parser.parse("foofoo end")
|
320
331
|
assert_equal ["foo"], parser.parse("foo end").foo.collect {|f| f.text}
|
@@ -378,10 +389,24 @@ class BBTests < TestHelper
|
|
378
389
|
end
|
379
390
|
parser.parse "-"
|
380
391
|
end
|
381
|
-
end
|
382
392
|
|
383
|
-
tests=BBTests.new
|
384
393
|
|
394
|
+
def regex_performance
|
395
|
+
parser=new_parser do
|
396
|
+
rule :foo, many(:element)
|
397
|
+
rule :element, /[0-9]+/
|
398
|
+
rule :element, "a"
|
399
|
+
end
|
385
400
|
|
401
|
+
str=("a"*10000)+"1"
|
402
|
+
start_time=Time.now
|
403
|
+
res=parser.parse(str)
|
404
|
+
end_time=Time.now
|
405
|
+
puts "time for matching string of length #{str.length}: #{((end_time-start_time)*1000).to_i}ms"
|
406
|
+
puts "parse tree size: #{res.element.length}"
|
407
|
+
assert res
|
408
|
+
end
|
409
|
+
end
|
386
410
|
|
411
|
+
tests=BBTests.new
|
387
412
|
tests.run_tests(ARGV.length>0 && ARGV)
|
metadata
CHANGED
@@ -1,69 +1,53 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: babel_bridge
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 1
|
8
|
-
- 1
|
9
|
-
version: 0.1.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Shane Brinkman-Davis
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
date: 2010-11-28 00:00:00 -08:00
|
18
|
-
default_executable:
|
12
|
+
date: 2010-11-28 00:00:00.000000000Z
|
19
13
|
dependencies: []
|
20
|
-
|
21
|
-
|
14
|
+
description: ! "Babel Bridge is an object oriented parser generator for parsing expression
|
15
|
+
grammars (PEG). \nGenerate memoizing packrat parsers 100% in Ruby code with a simple
|
16
|
+
embedded DSL.\n"
|
22
17
|
email: shanebdavis@gmail.com
|
23
18
|
executables: []
|
24
|
-
|
25
19
|
extensions: []
|
26
|
-
|
27
20
|
extra_rdoc_files: []
|
28
|
-
|
29
|
-
files:
|
21
|
+
files:
|
30
22
|
- README
|
31
23
|
- babel_bridge.gemspec
|
32
24
|
- test/test_bb.rb
|
33
25
|
- test/test_helper.rb
|
34
26
|
- lib/babel_bridge.rb
|
35
27
|
- lib/nodes.rb
|
36
|
-
|
28
|
+
- lib/pattern_element.rb
|
37
29
|
homepage: http://babel-bridge.rubyforge.org
|
38
30
|
licenses: []
|
39
|
-
|
40
31
|
post_install_message:
|
41
32
|
rdoc_options: []
|
42
|
-
|
43
|
-
require_paths:
|
33
|
+
require_paths:
|
44
34
|
- lib
|
45
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
36
|
none: false
|
47
|
-
requirements:
|
48
|
-
- -
|
49
|
-
- !ruby/object:Gem::Version
|
50
|
-
|
51
|
-
|
52
|
-
version: "0"
|
53
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
42
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
59
|
-
- 0
|
60
|
-
version: "0"
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
61
47
|
requirements: []
|
62
|
-
|
63
48
|
rubyforge_project: babel-bridge
|
64
|
-
rubygems_version: 1.
|
49
|
+
rubygems_version: 1.8.10
|
65
50
|
signing_key:
|
66
51
|
specification_version: 3
|
67
52
|
summary: A Ruby-based parser-generator based on Parsing Expression Grammars.
|
68
53
|
test_files: []
|
69
|
-
|