babel_bridge 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/babel_bridge.gemspec +5 -1
- data/lib/babel_bridge.rb +7 -187
- data/lib/nodes.rb +39 -17
- data/lib/pattern_element.rb +193 -0
- data/test/test_bb.rb +30 -5
- metadata +23 -39
data/babel_bridge.gemspec
CHANGED
@@ -10,7 +10,11 @@ $gemspec = Gem::Specification.new do |s|
|
|
10
10
|
s.platform = Gem::Platform::RUBY
|
11
11
|
s.rubyforge_project = "babel-bridge"
|
12
12
|
s.summary = "A Ruby-based parser-generator based on Parsing Expression Grammars."
|
13
|
-
s.description =
|
13
|
+
s.description = <<DESCRIPTION
|
14
|
+
Babel Bridge is an object oriented parser generator for parsing expression grammars (PEG).
|
15
|
+
Generate memoizing packrat parsers 100% in Ruby code with a simple embedded DSL.
|
16
|
+
DESCRIPTION
|
17
|
+
|
14
18
|
s.files = ["LICENSE", "README", "Rakefile", "babel_bridge.gemspec", "{test,lib,doc,examples}/**/*"].map{|p| Dir[p]}.flatten
|
15
19
|
s.has_rdoc = false
|
16
20
|
end
|
data/lib/babel_bridge.rb
CHANGED
@@ -1,33 +1,11 @@
|
|
1
1
|
=begin
|
2
|
-
|
3
|
-
See README
|
4
|
-
|
5
|
-
TODO-FEATURE: :pre_delimiter option
|
6
|
-
TODO-FEATURE: The "expecting" feature is so good I wonder if we should add the ability to automatically repair the parse!
|
7
|
-
This would need:
|
8
|
-
a) default values for regex termainals (string terminals are their own default values)
|
9
|
-
default values for regex should be verified to match the regex
|
10
|
-
b) an interactive prompter if there is more than one option
|
11
|
-
|
12
|
-
TODO-IMPROVEMENT: "Expecting" should show line numbers instead of char numbers, but it should only calculated
|
13
|
-
on demand. This means we need a smarter formatter for our possible-error-logging.
|
14
|
-
TODO-IMPROVEMENT: "Expecting" code lines dump should show line numbers
|
15
|
-
|
16
|
-
TODO-BUG: "Expecting" doesn't do the right thing of a "dont" clause matched
|
17
|
-
Should say "something other than #{the don't clause}"
|
18
|
-
Ideally, we would continue matching and list all the possible next clauses that would allow us to continue
|
19
|
-
|
20
|
-
IDEA: could use the "-" prefix operator to mean "dont":
|
21
|
-
-"this"
|
22
|
-
-:that
|
23
|
-
-match(:foo)
|
24
|
-
-many(:foo)
|
25
|
-
|
26
|
-
TODO-OPTIMIZATION: add memoizing (caching / dynamic-programming) to guarantee linear time parsing
|
27
|
-
http://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars
|
2
|
+
Copyright 2011 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
28
5
|
=end
|
29
6
|
|
30
7
|
require File.dirname(__FILE__) + "/nodes.rb"
|
8
|
+
require File.dirname(__FILE__) + "/pattern_element.rb"
|
31
9
|
|
32
10
|
class String
|
33
11
|
def camelize
|
@@ -51,164 +29,7 @@ class String
|
|
51
29
|
end
|
52
30
|
|
53
31
|
module BabelBridge
|
54
|
-
VERSION = "0.
|
55
|
-
|
56
|
-
# hash which can be used declaratively
|
57
|
-
class PatternElementHash < Hash
|
58
|
-
def method_missing(method_name, *args) #method_name is a symbol
|
59
|
-
return self if args.length==1 && !args[0] # if nil is provided, don't set anything
|
60
|
-
self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true
|
61
|
-
self
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# PatternElement provides optimized parsing for each Element of a pattern
|
66
|
-
# PatternElement provides all the logic for parsing:
|
67
|
-
# :many
|
68
|
-
# :optional
|
69
|
-
class PatternElement
|
70
|
-
attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
|
71
|
-
attr_accessor :match,:rule_variant
|
72
|
-
|
73
|
-
#match can be:
|
74
|
-
# true, Hash, Symbol, String, Regexp
|
75
|
-
def initialize(match,rule_variant)
|
76
|
-
self.rule_variant=rule_variant
|
77
|
-
init(match)
|
78
|
-
|
79
|
-
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
80
|
-
end
|
81
|
-
|
82
|
-
def to_s
|
83
|
-
match.inspect
|
84
|
-
end
|
85
|
-
|
86
|
-
def parse(parent_node)
|
87
|
-
# run element parser
|
88
|
-
match=parser.call(parent_node)
|
89
|
-
|
90
|
-
# Negative patterns (PEG: !element)
|
91
|
-
match=match ? nil : EmptyNode.new(parent_node) if negative
|
92
|
-
|
93
|
-
# Optional patterns (PEG: element?)
|
94
|
-
match=EmptyNode.new(parent_node) if !match && optional
|
95
|
-
|
96
|
-
# Could-match patterns (PEG: &element)
|
97
|
-
match.match_length=0 if match && could_match
|
98
|
-
|
99
|
-
# return match
|
100
|
-
match
|
101
|
-
end
|
102
|
-
|
103
|
-
private
|
104
|
-
|
105
|
-
def init(match)
|
106
|
-
self.match=match
|
107
|
-
case match
|
108
|
-
when TrueClass then init_true
|
109
|
-
when Hash then init_hash match
|
110
|
-
when Symbol then init_rule match
|
111
|
-
when String then init_string match
|
112
|
-
when Regexp then init_regex match
|
113
|
-
else raise "invalid pattern type: #{match.inspect}"
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
def init_rule(rule_name)
|
118
|
-
rule_name.to_s[/^([^?!]*)([?!])?$/]
|
119
|
-
rule_name=$1.to_sym
|
120
|
-
option=$2
|
121
|
-
match_rule=rule_variant.rule.parser.rules[rule_name]
|
122
|
-
raise "no rule for #{rule_name}" unless match_rule
|
123
|
-
|
124
|
-
self.parser =lambda {|parent_node| match_rule.parse(parent_node)}
|
125
|
-
self.name = rule_name
|
126
|
-
case option
|
127
|
-
when "?" then self.optional=true
|
128
|
-
when "!" then self.negative=true
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
def init_hash(hash)
|
133
|
-
if hash[:parser]
|
134
|
-
self.parser=hash[:parser]
|
135
|
-
elsif hash[:many]
|
136
|
-
init hash[:many]
|
137
|
-
#generate parser for poly
|
138
|
-
delimiter_pattern_element= PatternElement.new(hash[:delimiter]||true,rule_variant)
|
139
|
-
|
140
|
-
post_delimiter_element=case hash[:post_delimiter]
|
141
|
-
when TrueClass then delimiter_pattern_element
|
142
|
-
when nil then nil
|
143
|
-
else PatternElement.new(hash[:post_delimiter],rule_variant)
|
144
|
-
end
|
145
|
-
|
146
|
-
# convert the single element parser into a poly-parser
|
147
|
-
single_parser=parser
|
148
|
-
self.parser= lambda do |parent_node|
|
149
|
-
last_match=single_parser.call(parent_node)
|
150
|
-
many_node=ManyNode.new(parent_node)
|
151
|
-
while last_match
|
152
|
-
many_node<<last_match
|
153
|
-
|
154
|
-
#match delimiter
|
155
|
-
delimiter_match=delimiter_pattern_element.parse(many_node)
|
156
|
-
break unless delimiter_match
|
157
|
-
many_node.delimiter_matches<<delimiter_match
|
158
|
-
|
159
|
-
#match next
|
160
|
-
last_match=single_parser.call(many_node)
|
161
|
-
end
|
162
|
-
|
163
|
-
# success only if we have at least one match
|
164
|
-
return nil unless many_node.length>0
|
165
|
-
|
166
|
-
# pop the post delimiter matched with delimiter_pattern_element
|
167
|
-
many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
|
168
|
-
|
169
|
-
# If post_delimiter is requested, many_node and delimiter_matches must be the same length
|
170
|
-
if post_delimiter_element
|
171
|
-
post_delimiter_match=post_delimiter_element.parse(many_node)
|
172
|
-
|
173
|
-
# fail if post_delimiter didn't match
|
174
|
-
return nil unless post_delimiter_match
|
175
|
-
many_node.delimiter_matches<<post_delimiter_match
|
176
|
-
end
|
177
|
-
|
178
|
-
many_node
|
179
|
-
end
|
180
|
-
elsif hash[:match]
|
181
|
-
init hash[:match]
|
182
|
-
else
|
183
|
-
raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
|
184
|
-
end
|
185
|
-
|
186
|
-
self.name = hash[:as] || self.name
|
187
|
-
self.optional ||= hash[:optional] || hash[:optionally]
|
188
|
-
self.could_match ||= hash[:could]
|
189
|
-
self.negative ||= hash[:dont]
|
190
|
-
|
191
|
-
end
|
192
|
-
|
193
|
-
# "true" parser always matches the empty string
|
194
|
-
def init_true
|
195
|
-
self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
|
196
|
-
end
|
197
|
-
|
198
|
-
# parser that matches exactly the string specified
|
199
|
-
def init_string(string)
|
200
|
-
self.parser=lambda {|parent_node| parent_node.src[parent_node.next,string.length]==string && TerminalNode.new(parent_node,string.length,string)}
|
201
|
-
self.terminal=true
|
202
|
-
end
|
203
|
-
|
204
|
-
# parser that matches the given regex
|
205
|
-
def init_regex(regex)
|
206
|
-
self.parser=lambda {|parent_node| offset=parent_node.next;parent_node.src.index(regex,offset)==offset && (o=$~.offset(0)) && TerminalNode.new(parent_node,o[1]-o[0],regex)}
|
207
|
-
self.terminal=true
|
208
|
-
end
|
209
|
-
|
210
|
-
end
|
211
|
-
|
32
|
+
VERSION = "0.2.0"
|
212
33
|
|
213
34
|
# Each Rule has one or more RuleVariant
|
214
35
|
# Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
|
@@ -268,7 +89,7 @@ module BabelBridge
|
|
268
89
|
self.parser=parser
|
269
90
|
|
270
91
|
class_name = "#{parser.module_name}_#{name}_node".camelize
|
271
|
-
self.node_class = parser.const_set(class_name,Class.new(
|
92
|
+
self.node_class = parser.const_set(class_name,Class.new(NonTerminalNode))
|
272
93
|
end
|
273
94
|
|
274
95
|
def add_variant(pattern, &block)
|
@@ -286,8 +107,7 @@ module BabelBridge
|
|
286
107
|
end
|
287
108
|
|
288
109
|
variants.each do |v|
|
289
|
-
match=v.parse(node)
|
290
|
-
if match
|
110
|
+
if match=v.parse(node)
|
291
111
|
node.parser.cache_match(name,match)
|
292
112
|
return match
|
293
113
|
end
|
data/lib/nodes.rb
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2011 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
5
|
+
=end
|
6
|
+
|
1
7
|
module BabelBridge
|
2
8
|
|
3
9
|
# this is just so we can distinguish between normal arrays and arrays of matches
|
@@ -8,7 +14,7 @@ module BabelBridge
|
|
8
14
|
# base class for all parse-tree nodes
|
9
15
|
class Node
|
10
16
|
attr_accessor :src,:offset,:match_length,:parent,:parser
|
11
|
-
|
17
|
+
|
12
18
|
def to_s
|
13
19
|
text
|
14
20
|
end
|
@@ -81,14 +87,14 @@ module BabelBridge
|
|
81
87
|
def each(&block)
|
82
88
|
matches.each(&block)
|
83
89
|
end
|
84
|
-
end
|
90
|
+
end
|
85
91
|
|
86
92
|
class RootNode < Node
|
87
93
|
end
|
88
94
|
|
89
95
|
# non-terminal node
|
90
96
|
# subclassed automatically by parser.rule for each unique non-terminal
|
91
|
-
class
|
97
|
+
class NonTerminalNode < Node
|
92
98
|
attr_accessor :matches,:match_names
|
93
99
|
|
94
100
|
def match_names
|
@@ -151,22 +157,27 @@ end
|
|
151
157
|
@matches_by_name=nil
|
152
158
|
end
|
153
159
|
|
160
|
+
# defines where to forward missing methods to; override for custom behavior
|
161
|
+
def forward_to
|
162
|
+
matches[0]
|
163
|
+
end
|
164
|
+
|
154
165
|
def method_missing(method_name, *args) #method_name is a symbol
|
155
166
|
unless matches_by_name.has_key? method_name
|
156
|
-
if
|
157
|
-
|
158
|
-
return matches[0].send(method_name,*args)
|
167
|
+
if f=forward_to
|
168
|
+
return f.send(method_name,*args)
|
159
169
|
end
|
160
170
|
raise "#{self.class}: missing method #{method_name.inspect} / doesn't match named pattern element: #{matches_by_name.keys.inspect}"
|
161
171
|
end
|
162
|
-
matches_by_name[method_name]
|
172
|
+
case ret=matches_by_name[method_name]
|
173
|
+
when EmptyNode then nil
|
174
|
+
else ret
|
175
|
+
end
|
163
176
|
end
|
164
177
|
|
165
178
|
# adds a match with name (optional)
|
166
179
|
# returns self so you can chain add_match or concat methods
|
167
180
|
def add_match(match,name=nil)
|
168
|
-
raise "match must be a Node (match is a #{match.class})" unless match.kind_of?(Node)
|
169
|
-
raise "name must be a Symbol or nil (name is a #{name.class})" if name && !name.kind_of?(Symbol)
|
170
181
|
reset_matches_by_name
|
171
182
|
matches<<match
|
172
183
|
match_names<<name
|
@@ -192,16 +203,22 @@ end
|
|
192
203
|
node_init(parent)
|
193
204
|
self.matches=[]
|
194
205
|
self.delimiter_matches=[]
|
195
|
-
self.match_length=nil # use match_length as an override; if nil, then match_length is determined by the last node and delimiter_match
|
196
206
|
end
|
197
207
|
|
198
|
-
def match_length;
|
208
|
+
def match_length; self.next-offset end
|
209
|
+
|
199
210
|
def next
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
211
|
+
if m=matches[-1]
|
212
|
+
m_next=m.next
|
213
|
+
if d=delimiter_matches[-1]
|
214
|
+
d_next=d.next
|
215
|
+
m_next > d_next ? m_next : d_next
|
216
|
+
else
|
217
|
+
m_next
|
218
|
+
end
|
219
|
+
else
|
220
|
+
parent.next
|
221
|
+
end
|
205
222
|
end
|
206
223
|
|
207
224
|
def inspect_helper(list,options)
|
@@ -226,6 +243,11 @@ end
|
|
226
243
|
ret
|
227
244
|
end
|
228
245
|
end
|
246
|
+
|
247
|
+
def method_missing(method_name, *args) #method_name is a symbol
|
248
|
+
self.map {|match| match.send(method_name,*args)}
|
249
|
+
end
|
250
|
+
|
229
251
|
end
|
230
252
|
|
231
253
|
# used for String and Regexp PatternElements
|
@@ -239,7 +261,7 @@ end
|
|
239
261
|
end
|
240
262
|
|
241
263
|
def inspect(options={})
|
242
|
-
"#{text.inspect}" unless options[:simple] && text[/^\s*$/] # if simple && node only matched white-space, return nil
|
264
|
+
"#{text.inspect}" unless options[:simple] && text[/^\s*$/] # if simple && node only matched white-space, return nil
|
243
265
|
end
|
244
266
|
|
245
267
|
def matches; [self]; end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2010 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
5
|
+
=end
|
6
|
+
|
7
|
+
module BabelBridge
|
8
|
+
# hash which can be used declaratively
|
9
|
+
class PatternElementHash < Hash
|
10
|
+
def method_missing(method_name, *args) #method_name is a symbol
|
11
|
+
return self if args.length==1 && !args[0] # if nil is provided, don't set anything
|
12
|
+
self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true
|
13
|
+
self
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# PatternElement provides optimized parsing for each Element of a pattern
|
18
|
+
# PatternElement provides all the logic for parsing:
|
19
|
+
# :many
|
20
|
+
# :optional
|
21
|
+
class PatternElement
|
22
|
+
attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
|
23
|
+
attr_accessor :match,:rule_variant
|
24
|
+
|
25
|
+
#match can be:
|
26
|
+
# true, Hash, Symbol, String, Regexp
|
27
|
+
def initialize(match,rule_variant)
|
28
|
+
self.rule_variant=rule_variant
|
29
|
+
init(match)
|
30
|
+
|
31
|
+
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_s
|
35
|
+
match.inspect
|
36
|
+
end
|
37
|
+
|
38
|
+
# attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
|
39
|
+
def parse(parent_node)
|
40
|
+
# run element parser
|
41
|
+
match=parser.call(parent_node)
|
42
|
+
|
43
|
+
# Negative patterns (PEG: !element)
|
44
|
+
match=match ? nil : EmptyNode.new(parent_node) if negative
|
45
|
+
|
46
|
+
# Optional patterns (PEG: element?)
|
47
|
+
match=EmptyNode.new(parent_node) if !match && optional
|
48
|
+
|
49
|
+
# Could-match patterns (PEG: &element)
|
50
|
+
match.match_length=0 if match && could_match
|
51
|
+
|
52
|
+
# return match
|
53
|
+
match
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# initialize PatternElement based on the type of: match
|
59
|
+
def init(match)
|
60
|
+
self.match=match
|
61
|
+
case match
|
62
|
+
when TrueClass then init_true
|
63
|
+
when String then init_string match
|
64
|
+
when Regexp then init_regex match
|
65
|
+
when Symbol then init_rule match
|
66
|
+
when Hash then init_hash match
|
67
|
+
else raise "invalid pattern type: #{match.inspect}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# "true" parser always matches the empty string
|
72
|
+
def init_true
|
73
|
+
self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
|
74
|
+
end
|
75
|
+
|
76
|
+
# initialize PatternElement as a parser that matches exactly the string specified
|
77
|
+
def init_string(string)
|
78
|
+
self.parser=lambda do |parent_node|
|
79
|
+
if parent_node.src[parent_node.next,string.length]==string
|
80
|
+
TerminalNode.new(parent_node,string.length,string)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
self.terminal=true
|
84
|
+
end
|
85
|
+
|
86
|
+
# initialize PatternElement as a parser that matches the given regex
|
87
|
+
def init_regex(regex)
|
88
|
+
optimized_regex=/\A#{regex}/ # anchor the search
|
89
|
+
self.parser=lambda do |parent_node|
|
90
|
+
offset=parent_node.next
|
91
|
+
if parent_node.src[offset..-1].index(optimized_regex)==0
|
92
|
+
range=$~.offset(0)
|
93
|
+
TerminalNode.new(parent_node,range[1]-range[0],regex)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
self.terminal=true
|
97
|
+
end
|
98
|
+
|
99
|
+
# initialize PatternElement as a parser that matches a named sub-rule
|
100
|
+
def init_rule(rule_name)
|
101
|
+
rule_name.to_s[/^([^?!]*)([?!])?$/]
|
102
|
+
rule_name=$1.to_sym
|
103
|
+
option=$2
|
104
|
+
match_rule=rule_variant.rule.parser.rules[rule_name]
|
105
|
+
raise "no rule for #{rule_name}" unless match_rule
|
106
|
+
|
107
|
+
self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
|
108
|
+
self.name = rule_name
|
109
|
+
case option
|
110
|
+
when "?" then self.optional=true
|
111
|
+
when "!" then self.negative=true
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# initialize the PatternElement from hashed parameters
|
116
|
+
def init_hash(hash)
|
117
|
+
if hash[:parser]
|
118
|
+
self.parser=hash[:parser]
|
119
|
+
elsif hash[:many]
|
120
|
+
init_many hash
|
121
|
+
elsif hash[:match]
|
122
|
+
init hash[:match]
|
123
|
+
else
|
124
|
+
raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
|
125
|
+
end
|
126
|
+
|
127
|
+
self.name = hash[:as] || self.name
|
128
|
+
self.optional ||= hash[:optional] || hash[:optionally]
|
129
|
+
self.could_match ||= hash[:could]
|
130
|
+
self.negative ||= hash[:dont]
|
131
|
+
end
|
132
|
+
|
133
|
+
# initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
|
134
|
+
def init_many(hash)
|
135
|
+
# generate single_parser
|
136
|
+
init hash[:many]
|
137
|
+
single_parser=parser
|
138
|
+
|
139
|
+
# generate delimiter_pattern_element
|
140
|
+
delimiter_pattern_element= hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
|
141
|
+
|
142
|
+
# generate post_delimiter_element
|
143
|
+
post_delimiter_element=hash[:post_delimiter] && case hash[:post_delimiter]
|
144
|
+
when TrueClass then delimiter_pattern_element
|
145
|
+
else PatternElement.new(hash[:post_delimiter],rule_variant)
|
146
|
+
end
|
147
|
+
|
148
|
+
# generate many-parser
|
149
|
+
self.parser= lambda do |parent_node|
|
150
|
+
last_match=single_parser.call(parent_node)
|
151
|
+
many_node=ManyNode.new(parent_node)
|
152
|
+
|
153
|
+
if delimiter_pattern_element
|
154
|
+
# delimited matching
|
155
|
+
while last_match
|
156
|
+
many_node<<last_match
|
157
|
+
|
158
|
+
#match delimiter
|
159
|
+
delimiter_match=delimiter_pattern_element.parse(many_node)
|
160
|
+
break unless delimiter_match
|
161
|
+
many_node.delimiter_matches<<delimiter_match
|
162
|
+
|
163
|
+
#match next
|
164
|
+
last_match=single_parser.call(many_node)
|
165
|
+
end
|
166
|
+
else
|
167
|
+
# not delimited matching
|
168
|
+
while last_match
|
169
|
+
many_node<<last_match
|
170
|
+
last_match=single_parser.call(many_node)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# success only if we have at least one match
|
175
|
+
return nil unless many_node.length>0
|
176
|
+
|
177
|
+
# pop the post delimiter matched with delimiter_pattern_element
|
178
|
+
many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
|
179
|
+
|
180
|
+
# If post_delimiter is requested, many_node and delimiter_matches will be the same length
|
181
|
+
if post_delimiter_element
|
182
|
+
post_delimiter_match=post_delimiter_element.parse(many_node)
|
183
|
+
|
184
|
+
# fail if post_delimiter didn't match
|
185
|
+
return nil unless post_delimiter_match
|
186
|
+
many_node.delimiter_matches<<post_delimiter_match
|
187
|
+
end
|
188
|
+
|
189
|
+
many_node
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
data/test/test_bb.rb
CHANGED
@@ -68,7 +68,7 @@ class BBTests < TestHelper
|
|
68
68
|
|
69
69
|
def test_regex
|
70
70
|
parser=new_parser do
|
71
|
-
rule :foo,
|
71
|
+
rule :foo, /[0-9]+/
|
72
72
|
end
|
73
73
|
|
74
74
|
%w{ 0 1 10 123 1001 }.each do |numstr|
|
@@ -76,6 +76,16 @@ class BBTests < TestHelper
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
+
def test_regex_offset
|
80
|
+
parser=new_parser do
|
81
|
+
rule :foo, /[0-9]+/
|
82
|
+
rule :foo, "hi", /[0-9]+/
|
83
|
+
end
|
84
|
+
|
85
|
+
assert_equal 1,parser.parse("123").matches.length
|
86
|
+
assert_equal 2,parser.parse("hi123").matches.length
|
87
|
+
end
|
88
|
+
|
79
89
|
def test_optional
|
80
90
|
parser=new_parser do
|
81
91
|
rule :foo, ["foo", :bar?]
|
@@ -311,10 +321,11 @@ class BBTests < TestHelper
|
|
311
321
|
|
312
322
|
def test_poly_post_delimiter
|
313
323
|
parser=new_parser do
|
314
|
-
rule :foo, many?("foo",/ +/,true).as(:foo), "end"
|
324
|
+
rule :foo, many?("foo",/ +/,true).as(:foo), match("end").as(:end)
|
315
325
|
end
|
316
326
|
|
317
|
-
assert_equal
|
327
|
+
assert_equal nil,parser.parse("end").foo
|
328
|
+
assert_equal "end",parser.parse("end").end.to_s
|
318
329
|
assert_equal nil,parser.parse(" end")
|
319
330
|
assert_equal nil,parser.parse("foofoo end")
|
320
331
|
assert_equal ["foo"], parser.parse("foo end").foo.collect {|f| f.text}
|
@@ -378,10 +389,24 @@ class BBTests < TestHelper
|
|
378
389
|
end
|
379
390
|
parser.parse "-"
|
380
391
|
end
|
381
|
-
end
|
382
392
|
|
383
|
-
tests=BBTests.new
|
384
393
|
|
394
|
+
def regex_performance
|
395
|
+
parser=new_parser do
|
396
|
+
rule :foo, many(:element)
|
397
|
+
rule :element, /[0-9]+/
|
398
|
+
rule :element, "a"
|
399
|
+
end
|
385
400
|
|
401
|
+
str=("a"*10000)+"1"
|
402
|
+
start_time=Time.now
|
403
|
+
res=parser.parse(str)
|
404
|
+
end_time=Time.now
|
405
|
+
puts "time for matching string of length #{str.length}: #{((end_time-start_time)*1000).to_i}ms"
|
406
|
+
puts "parse tree size: #{res.element.length}"
|
407
|
+
assert res
|
408
|
+
end
|
409
|
+
end
|
386
410
|
|
411
|
+
tests=BBTests.new
|
387
412
|
tests.run_tests(ARGV.length>0 && ARGV)
|
metadata
CHANGED
@@ -1,69 +1,53 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: babel_bridge
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 1
|
8
|
-
- 1
|
9
|
-
version: 0.1.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Shane Brinkman-Davis
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
date: 2010-11-28 00:00:00 -08:00
|
18
|
-
default_executable:
|
12
|
+
date: 2010-11-28 00:00:00.000000000Z
|
19
13
|
dependencies: []
|
20
|
-
|
21
|
-
|
14
|
+
description: ! "Babel Bridge is an object oriented parser generator for parsing expression
|
15
|
+
grammars (PEG). \nGenerate memoizing packrat parsers 100% in Ruby code with a simple
|
16
|
+
embedded DSL.\n"
|
22
17
|
email: shanebdavis@gmail.com
|
23
18
|
executables: []
|
24
|
-
|
25
19
|
extensions: []
|
26
|
-
|
27
20
|
extra_rdoc_files: []
|
28
|
-
|
29
|
-
files:
|
21
|
+
files:
|
30
22
|
- README
|
31
23
|
- babel_bridge.gemspec
|
32
24
|
- test/test_bb.rb
|
33
25
|
- test/test_helper.rb
|
34
26
|
- lib/babel_bridge.rb
|
35
27
|
- lib/nodes.rb
|
36
|
-
|
28
|
+
- lib/pattern_element.rb
|
37
29
|
homepage: http://babel-bridge.rubyforge.org
|
38
30
|
licenses: []
|
39
|
-
|
40
31
|
post_install_message:
|
41
32
|
rdoc_options: []
|
42
|
-
|
43
|
-
require_paths:
|
33
|
+
require_paths:
|
44
34
|
- lib
|
45
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
36
|
none: false
|
47
|
-
requirements:
|
48
|
-
- -
|
49
|
-
- !ruby/object:Gem::Version
|
50
|
-
|
51
|
-
|
52
|
-
version: "0"
|
53
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
42
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
59
|
-
- 0
|
60
|
-
version: "0"
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
61
47
|
requirements: []
|
62
|
-
|
63
48
|
rubyforge_project: babel-bridge
|
64
|
-
rubygems_version: 1.
|
49
|
+
rubygems_version: 1.8.10
|
65
50
|
signing_key:
|
66
51
|
specification_version: 3
|
67
52
|
summary: A Ruby-based parser-generator based on Parsing Expression Grammars.
|
68
53
|
test_files: []
|
69
|
-
|