babel_bridge 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/examples/indention_grouping.rb +68 -0
- data/examples/indention_grouping_test.txt +10 -0
- data/examples/turing/test.rb +28 -0
- data/examples/turing/turing.rb +71 -0
- data/lib/babel_bridge.rb +13 -344
- data/lib/nodes.rb +9 -278
- data/lib/nodes/empty_node.rb +17 -0
- data/lib/nodes/many_node.rb +62 -0
- data/lib/nodes/node.rb +94 -0
- data/lib/nodes/non_terminal_node.rb +117 -0
- data/lib/nodes/terminal_node.rb +38 -0
- data/lib/parser.rb +285 -0
- data/lib/pattern_element.rb +152 -151
- data/lib/rule.rb +62 -0
- data/lib/rule_variant.rb +45 -0
- data/lib/shell.rb +36 -0
- data/lib/string.rb +26 -0
- data/lib/tools.rb +90 -0
- data/lib/version.rb +3 -0
- data/test/test_bb.rb +39 -3
- metadata +19 -3
@@ -0,0 +1,117 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2011 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
5
|
+
=end
|
6
|
+
|
7
|
+
module BabelBridge
|
8
|
+
# non-terminal node
|
9
|
+
# subclassed automatically by parser.rule for each unique non-terminal
|
10
|
+
class NonTerminalNode < Node
|
11
|
+
attr_accessor :matches,:match_names
|
12
|
+
|
13
|
+
def match_names
|
14
|
+
@match_names ||= []
|
15
|
+
end
|
16
|
+
def matches
|
17
|
+
@matches ||= []
|
18
|
+
end
|
19
|
+
|
20
|
+
# length returns the number of sub-nodes
|
21
|
+
def length
|
22
|
+
matches.length
|
23
|
+
end
|
24
|
+
|
25
|
+
def matches_by_name
|
26
|
+
@matches_by_name||= begin
|
27
|
+
raise "matches.length #{matches.length} != match_names.length #{match_names.length}" unless matches.length==match_names.length
|
28
|
+
mbn={}
|
29
|
+
mn=match_names
|
30
|
+
matches.each_with_index do |match,i|
|
31
|
+
name=mn[i]
|
32
|
+
next unless name
|
33
|
+
if current=mbn[name] # name already used
|
34
|
+
# convert to MultiMatchesArray if not already
|
35
|
+
mbn[name]=MultiMatchesArray.new([current]) if !current.kind_of? MultiMatchesArray
|
36
|
+
# add to array
|
37
|
+
mbn[name]<<match
|
38
|
+
else
|
39
|
+
mbn[name]=match
|
40
|
+
end
|
41
|
+
end
|
42
|
+
mbn
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def inspect(options={})
|
47
|
+
return "#{self.class}" if matches.length==0
|
48
|
+
matches_inspected=matches.collect{|a|a.inspect(options)}.compact
|
49
|
+
if matches_inspected.length==0 then nil
|
50
|
+
elsif matches_inspected.length==1
|
51
|
+
m=matches_inspected[0]
|
52
|
+
ret="#{self.class} > "+matches_inspected[0]
|
53
|
+
if options[:simple]
|
54
|
+
ret=if m["\n"] then m
|
55
|
+
else
|
56
|
+
# just show the first and last nodes in the chain
|
57
|
+
ret.gsub(/( > [A-Z][a-zA-Z0-9:]+ > (\.\.\. > )?)/," > ... > ")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
ret
|
61
|
+
else
|
62
|
+
(["#{self.class}"]+matches_inspected).join("\n").gsub("\n","\n ")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
#********************
|
67
|
+
# alter methods
|
68
|
+
#********************
|
69
|
+
def reset_matches_by_name
|
70
|
+
@matches_by_name=nil
|
71
|
+
end
|
72
|
+
|
73
|
+
# defines where to forward missing methods to; override for custom behavior
|
74
|
+
def forward_to(method_name)
|
75
|
+
matches.each {|m| return m if m.respond_to?(method_name)}
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
|
79
|
+
def respond_to?(method_name)
|
80
|
+
super ||
|
81
|
+
matches_by_name[method_name] ||
|
82
|
+
forward_to(method_name)
|
83
|
+
end
|
84
|
+
|
85
|
+
def method_missing(method_name, *args) #method_name is a symbol
|
86
|
+
unless matches_by_name.has_key? method_name
|
87
|
+
if f=forward_to(method_name)
|
88
|
+
return f.send(method_name,*args)
|
89
|
+
end
|
90
|
+
raise "#{self.class}: missing method #{method_name.inspect} / doesn't match named pattern element: #{matches_by_name.keys.inspect}"
|
91
|
+
end
|
92
|
+
case ret=matches_by_name[method_name]
|
93
|
+
when EmptyNode then nil
|
94
|
+
else ret
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# adds a match with name (optional)
|
99
|
+
# returns self so you can chain add_match or concat methods
|
100
|
+
def add_match(match,name=nil)
|
101
|
+
reset_matches_by_name
|
102
|
+
matches<<match
|
103
|
+
match_names<<name
|
104
|
+
|
105
|
+
self.match_length=match.next - offset
|
106
|
+
self
|
107
|
+
end
|
108
|
+
|
109
|
+
# concatinate all matches from another node
|
110
|
+
# returns self so you can chain add_match or concat methods
|
111
|
+
def concat(node)
|
112
|
+
names=node.match_names
|
113
|
+
node.matches.each_with_index { |match,i| add_match(match,names[i])}
|
114
|
+
self
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2011 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
5
|
+
=end
|
6
|
+
|
7
|
+
module BabelBridge
|
8
|
+
# used for String and Regexp PatternElements
|
9
|
+
# not subclassed
|
10
|
+
class TerminalNode < Node
|
11
|
+
attr_accessor :pattern
|
12
|
+
def initialize(parent,match_length,pattern)
|
13
|
+
node_init(parent)
|
14
|
+
self.match_length=match_length
|
15
|
+
self.pattern=pattern
|
16
|
+
@ignore_whitespace = parser.ignore_whitespace?
|
17
|
+
consume_trailing_whitespace if @ignore_whitespace
|
18
|
+
end
|
19
|
+
|
20
|
+
def consume_trailing_whitespace
|
21
|
+
offset = self.next
|
22
|
+
if src[offset..-1].index(/\A\s*/)==0
|
23
|
+
range = $~.offset(0)
|
24
|
+
self.match_length += range[1]-range[0]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s
|
29
|
+
@ignore_whitespace ? text.strip : text
|
30
|
+
end
|
31
|
+
|
32
|
+
def inspect(options={})
|
33
|
+
"#{text.inspect}" unless options[:simple] && text[/^\s*$/] # if simple && node only matched white-space, return nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def matches; [self]; end
|
37
|
+
end
|
38
|
+
end
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,285 @@
|
|
1
|
+
module BabelBridge
|
2
|
+
# primary object used by the client
|
3
|
+
# Used to generate the grammer with .rule methods
|
4
|
+
# Used to parse with .parse
|
5
|
+
class Parser
|
6
|
+
|
7
|
+
# Parser sub-class grammaer definition
|
8
|
+
# These methods are used in the creation of a Parser Sub-Class to define
|
9
|
+
# its grammar
|
10
|
+
class <<self
|
11
|
+
attr_accessor :rules,:module_name,:root_rule
|
12
|
+
|
13
|
+
def rules
|
14
|
+
@rules||={}
|
15
|
+
end
|
16
|
+
|
17
|
+
# Add a rule to the parser
|
18
|
+
#
|
19
|
+
# rules can be specified as:
|
20
|
+
# rule :name, to_match1, to_match2, etc...
|
21
|
+
#or
|
22
|
+
# rule :name, [to_match1, to_match2, etc...]
|
23
|
+
#
|
24
|
+
# Can define rules INSIDE class:
|
25
|
+
# class MyParser < BabelBridge::Parser
|
26
|
+
# rule :name, to_match1, to_match2, etc...
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Or can define rules OUTSIDE class:
|
30
|
+
# class MyParser < BabelBridge::Parser
|
31
|
+
# end
|
32
|
+
# MyParser.rule :name, to_match1, to_match2, etc...
|
33
|
+
#
|
34
|
+
# The first rule added is the root-rule for the parser.
|
35
|
+
# You can override by:
|
36
|
+
# class MyParser < BabelBridge::Parser
|
37
|
+
# root_rule = :new_root_rool
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# The block is executed in the context of the rule-varient's node type, a subclass of: NonTerminalNode
|
41
|
+
# This allows you to add whatever functionality you want to a your nodes in the final parse tree.
|
42
|
+
# Also note you can override the post_match method. This allows you to restructure the parse tree as it is parsed.
|
43
|
+
def rule(name,*pattern,&block)
|
44
|
+
pattern=pattern[0] if pattern[0].kind_of?(Array)
|
45
|
+
rule=self.rules[name]||=Rule.new(name,self)
|
46
|
+
self.root_rule||=name
|
47
|
+
rule.add_variant(pattern,&block)
|
48
|
+
end
|
49
|
+
|
50
|
+
# options
|
51
|
+
# => right_operators: list of all operators that should be evaluated right to left instead of left-to-write
|
52
|
+
# typical example is the "**" exponentiation operator which should be evaluated right-to-left.
|
53
|
+
def binary_operators_rule(name,elements_pattern,operators,options={},&block)
|
54
|
+
right_operators = options[:right_operators]
|
55
|
+
rule(name,many(elements_pattern,Tools::array_to_or_regexp(operators))) do
|
56
|
+
self.class_eval &block if block
|
57
|
+
class <<self
|
58
|
+
attr_accessor :operators_from_rule, :right_operators
|
59
|
+
def operator_processor
|
60
|
+
@operator_processor||=BinaryOperatorProcessor.new(operators_from_rule,self,right_operators)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
self.right_operators = right_operators
|
64
|
+
self.operators_from_rule = operators
|
65
|
+
|
66
|
+
def operator
|
67
|
+
@operator||=operator_node.to_s.to_sym
|
68
|
+
end
|
69
|
+
|
70
|
+
# Override the post_match method to take the results of the "many" match
|
71
|
+
# and restructure it into a binary tree of nodes based on the precidence of
|
72
|
+
# the "operators".
|
73
|
+
# TODO - I think maybe post_match should be run after the whole tree matches. If not, will this screw up caching?
|
74
|
+
def post_match
|
75
|
+
many_match = matches[0]
|
76
|
+
operands = many_match.matches
|
77
|
+
operators = many_match.delimiter_matches
|
78
|
+
# TODO - now! take many_match.matches and many_match.delimiter_matches, mishy-mashy, and make the super-tree!
|
79
|
+
self.class.operator_processor.generate_tree operands, operators, parent
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def node_class(name,&block)
|
85
|
+
klass=self.rules[name].node_class
|
86
|
+
return klass unless block
|
87
|
+
klass.class_eval &block
|
88
|
+
end
|
89
|
+
|
90
|
+
def [](i)
|
91
|
+
rules[i]
|
92
|
+
end
|
93
|
+
|
94
|
+
# rule can be symbol-name of one of the rules in rules_array or one of the actual Rule objects in that array
|
95
|
+
def root_rule=(rule)
|
96
|
+
raise "Symbol required" unless rule.kind_of?(Symbol)
|
97
|
+
raise "rule #{rule.inspect} not found" unless rules[rule]
|
98
|
+
@root_rule=rule
|
99
|
+
end
|
100
|
+
|
101
|
+
def ignore_whitespace
|
102
|
+
@ignore_whitespace = true
|
103
|
+
end
|
104
|
+
|
105
|
+
def ignore_whitespace?
|
106
|
+
@ignore_whitespace
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def ignore_whitespace?
|
111
|
+
self.class.ignore_whitespace?
|
112
|
+
end
|
113
|
+
|
114
|
+
#*********************************************
|
115
|
+
# pattern construction tools
|
116
|
+
#
|
117
|
+
# Ex:
|
118
|
+
# # match 'keyword'
|
119
|
+
# # (succeeds if keyword is matched; advances the read pointer)
|
120
|
+
# rule :sample_rule, "keyword"
|
121
|
+
# rule :sample_rule, match("keyword")
|
122
|
+
#
|
123
|
+
# # don't match 'keyword'
|
124
|
+
# # (succeeds only if keyword is NOT matched; does not advance the read pointer)
|
125
|
+
# rule :sample_rule, match!("keyword")
|
126
|
+
# rule :sample_rule, dont.match("keyword")
|
127
|
+
#
|
128
|
+
# # optionally match 'keyword'
|
129
|
+
# # (always succeeds; advances the read pointer if keyword is matched)
|
130
|
+
# rule :sample_rule, match?("keyword")
|
131
|
+
# rule :sample_rule, optionally.match("keyword")
|
132
|
+
#
|
133
|
+
# # ensure we could match 'keyword'
|
134
|
+
# # (succeeds only if keyword is matched, but does not advance the read pointer)
|
135
|
+
# rule :sample_rule, could.match("keyword")
|
136
|
+
#
|
137
|
+
#*********************************************
|
138
|
+
class <<self
|
139
|
+
def many(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
140
|
+
def many?(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
141
|
+
def many!(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
142
|
+
|
143
|
+
def match?(*args) PatternElementHash.new.optionally.match(*args) end
|
144
|
+
def match(*args) PatternElementHash.new.match(*args) end
|
145
|
+
def match!(*args) PatternElementHash.new.dont.match(*args) end
|
146
|
+
|
147
|
+
def dont; PatternElementHash.new.dont end
|
148
|
+
def optionally; PatternElementHash.new.optionally end
|
149
|
+
def could; PatternElementHash.new.could end
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
#*********************************************
|
154
|
+
#*********************************************
|
155
|
+
# parser instance implementation
|
156
|
+
# this methods are used for each actual parse run
|
157
|
+
# they are tied to an instnace of the Parser Sub-class to you can have more than one
|
158
|
+
# parser active at a time
|
159
|
+
attr_accessor :failure_index
|
160
|
+
attr_accessor :expecting_list
|
161
|
+
attr_accessor :src
|
162
|
+
attr_accessor :parse_cache
|
163
|
+
|
164
|
+
def initialize
|
165
|
+
reset_parser_tracking
|
166
|
+
end
|
167
|
+
|
168
|
+
def reset_parser_tracking
|
169
|
+
self.src=nil
|
170
|
+
self.failure_index=0
|
171
|
+
self.expecting_list={}
|
172
|
+
self.parse_cache={}
|
173
|
+
end
|
174
|
+
|
175
|
+
def cached(rule_class,offset)
|
176
|
+
(parse_cache[rule_class]||={})[offset]
|
177
|
+
end
|
178
|
+
|
179
|
+
def cache_match(rule_class,match)
|
180
|
+
(parse_cache[rule_class]||={})[match.offset]=match
|
181
|
+
end
|
182
|
+
|
183
|
+
def cache_no_match(rule_class,offset)
|
184
|
+
(parse_cache[rule_class]||={})[offset]=:no_match
|
185
|
+
end
|
186
|
+
|
187
|
+
def log_parsing_failure(index,expecting)
|
188
|
+
if index>failure_index
|
189
|
+
key=expecting[:pattern]
|
190
|
+
@expecting_list={key=>expecting}
|
191
|
+
@failure_index = index
|
192
|
+
elsif index == failure_index
|
193
|
+
key=expecting[:pattern]
|
194
|
+
self.expecting_list[key]=expecting
|
195
|
+
else
|
196
|
+
# ignored
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def parse(src,offset=0,rule=nil)
|
201
|
+
reset_parser_tracking
|
202
|
+
@start_time=Time.now
|
203
|
+
self.src=src
|
204
|
+
root_node=RootNode.new(self)
|
205
|
+
raise "No root rule defined." unless rule || self.class.root_rule
|
206
|
+
ret=self.class[rule||self.class.root_rule].parse(root_node)
|
207
|
+
unless rule
|
208
|
+
if ret
|
209
|
+
if ret.next<src.length # parse only succeeds if the whole input is matched
|
210
|
+
@parsing_did_not_match_entire_input=true
|
211
|
+
@failure_index=ret.next
|
212
|
+
ret=nil
|
213
|
+
else
|
214
|
+
reset_parser_tracking
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
@end_time=Time.now
|
219
|
+
ret
|
220
|
+
end
|
221
|
+
|
222
|
+
def parse_time
|
223
|
+
@end_time-@start_time
|
224
|
+
end
|
225
|
+
|
226
|
+
def parse_and_puts_errors(src,out=$stdout)
|
227
|
+
ret=parse(src)
|
228
|
+
unless ret
|
229
|
+
out.puts parser_failure_info
|
230
|
+
end
|
231
|
+
ret
|
232
|
+
end
|
233
|
+
|
234
|
+
def node_list_string(node_list,common_root=[])
|
235
|
+
node_list && node_list[common_root.length..-1].map{|p|"#{p.class}(#{p.offset})"}.join(" > ")
|
236
|
+
end
|
237
|
+
|
238
|
+
def parser_failure_info
|
239
|
+
return unless src
|
240
|
+
bracketing_lines=5
|
241
|
+
line,col=src.line_col(failure_index)
|
242
|
+
ret=<<-ENDTXT
|
243
|
+
Parsing error at line #{line} column #{col} offset #{failure_index}
|
244
|
+
|
245
|
+
Source:
|
246
|
+
...
|
247
|
+
#{(failure_index==0 ? "" : src[0..(failure_index-1)]).last_lines(bracketing_lines)}<HERE>#{src[(failure_index)..-1].first_lines(bracketing_lines)}
|
248
|
+
...
|
249
|
+
ENDTXT
|
250
|
+
|
251
|
+
if @parsing_did_not_match_entire_input
|
252
|
+
ret+="\nParser did not match entire input."
|
253
|
+
else
|
254
|
+
|
255
|
+
common_root=nil
|
256
|
+
expecting_list.values.each do |e|
|
257
|
+
node=e[:node]
|
258
|
+
pl=node.parent_list
|
259
|
+
if common_root
|
260
|
+
common_root.each_index do |i|
|
261
|
+
if pl[i]!=common_root[i]
|
262
|
+
common_root=common_root[0..i-1]
|
263
|
+
break
|
264
|
+
end
|
265
|
+
end
|
266
|
+
else
|
267
|
+
common_root=node.parent_list
|
268
|
+
end
|
269
|
+
end
|
270
|
+
ret+=<<ENDTXT
|
271
|
+
|
272
|
+
Successfully matched rules up to failure:
|
273
|
+
#{node_list_string(common_root)}
|
274
|
+
|
275
|
+
Expecting#{expecting_list.length>1 ? ' one of' : ''}:
|
276
|
+
#{expecting_list.values.collect do |a|
|
277
|
+
list=node_list_string(a[:node].parent_list,common_root)
|
278
|
+
[list,"#{a[:pattern].inspect} (#{list})"]
|
279
|
+
end.sort.map{|i|i[1]}.join("\n ")}
|
280
|
+
ENDTXT
|
281
|
+
end
|
282
|
+
ret
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
data/lib/pattern_element.rb
CHANGED
@@ -5,189 +5,190 @@ http://babel-bridge.rubyforge.org/
|
|
5
5
|
=end
|
6
6
|
|
7
7
|
module BabelBridge
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
8
|
+
# hash which can be used declaratively
|
9
|
+
class PatternElementHash < Hash
|
10
|
+
def method_missing(method_name, *args) #method_name is a symbol
|
11
|
+
return self if args.length==1 && !args[0] # if nil is provided, don't set anything
|
12
|
+
raise "More than one argument is not supported. #{self.class}##{method_name} args=#{args.inspect}" if args.length > 1
|
13
|
+
self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true
|
14
|
+
self
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# PatternElement provides optimized parsing for each Element of a pattern
|
19
|
+
# PatternElement provides all the logic for parsing:
|
20
|
+
# :many
|
21
|
+
# :optional
|
22
|
+
class PatternElement
|
23
|
+
attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
|
24
|
+
attr_accessor :match,:rule_variant
|
25
|
+
|
26
|
+
#match can be:
|
27
|
+
# true, Hash, Symbol, String, Regexp
|
28
|
+
def initialize(match,rule_variant)
|
29
|
+
self.rule_variant=rule_variant
|
30
|
+
init(match)
|
31
|
+
|
32
|
+
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
15
33
|
end
|
16
34
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# :optional
|
21
|
-
class PatternElement
|
22
|
-
attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
|
23
|
-
attr_accessor :match,:rule_variant
|
24
|
-
|
25
|
-
#match can be:
|
26
|
-
# true, Hash, Symbol, String, Regexp
|
27
|
-
def initialize(match,rule_variant)
|
28
|
-
self.rule_variant=rule_variant
|
29
|
-
init(match)
|
30
|
-
|
31
|
-
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
32
|
-
end
|
35
|
+
def to_s
|
36
|
+
match.inspect
|
37
|
+
end
|
33
38
|
|
34
|
-
|
35
|
-
|
36
|
-
|
39
|
+
# attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
|
40
|
+
def parse(parent_node)
|
41
|
+
# run element parser
|
42
|
+
match=parser.call(parent_node)
|
37
43
|
|
38
|
-
#
|
39
|
-
|
40
|
-
# run element parser
|
41
|
-
match=parser.call(parent_node)
|
44
|
+
# Negative patterns (PEG: !element)
|
45
|
+
match=match ? nil : EmptyNode.new(parent_node) if negative
|
42
46
|
|
43
|
-
|
44
|
-
|
47
|
+
# Optional patterns (PEG: element?)
|
48
|
+
match=EmptyNode.new(parent_node) if !match && optional
|
45
49
|
|
46
|
-
|
47
|
-
|
50
|
+
# Could-match patterns (PEG: &element)
|
51
|
+
match.match_length=0 if match && could_match
|
48
52
|
|
49
|
-
|
50
|
-
|
53
|
+
# return match
|
54
|
+
match
|
55
|
+
end
|
51
56
|
|
52
|
-
|
53
|
-
|
57
|
+
private
|
58
|
+
|
59
|
+
# initialize PatternElement based on the type of: match
|
60
|
+
def init(match)
|
61
|
+
self.match=match
|
62
|
+
case match
|
63
|
+
when TrueClass then init_true
|
64
|
+
when String then init_string match
|
65
|
+
when Regexp then init_regex match
|
66
|
+
when Symbol then init_rule match
|
67
|
+
when Hash then init_hash match
|
68
|
+
else raise "invalid pattern type: #{match.inspect}"
|
54
69
|
end
|
70
|
+
end
|
55
71
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
self.match=match
|
61
|
-
case match
|
62
|
-
when TrueClass then init_true
|
63
|
-
when String then init_string match
|
64
|
-
when Regexp then init_regex match
|
65
|
-
when Symbol then init_rule match
|
66
|
-
when Hash then init_hash match
|
67
|
-
else raise "invalid pattern type: #{match.inspect}"
|
68
|
-
end
|
69
|
-
end
|
72
|
+
# "true" parser always matches the empty string
|
73
|
+
def init_true
|
74
|
+
self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
|
75
|
+
end
|
70
76
|
|
71
|
-
|
72
|
-
|
73
|
-
|
77
|
+
# initialize PatternElement as a parser that matches exactly the string specified
|
78
|
+
def init_string(string)
|
79
|
+
self.parser=lambda do |parent_node|
|
80
|
+
if parent_node.src[parent_node.next,string.length]==string
|
81
|
+
TerminalNode.new(parent_node,string.length,string)
|
82
|
+
end
|
74
83
|
end
|
84
|
+
self.terminal=true
|
85
|
+
end
|
75
86
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
87
|
+
# initialize PatternElement as a parser that matches the given regex
|
88
|
+
def init_regex(regex)
|
89
|
+
optimized_regex=/\A#{regex}/ # anchor the search
|
90
|
+
self.parser=lambda do |parent_node|
|
91
|
+
offset=parent_node.next
|
92
|
+
if parent_node.src[offset..-1].index(optimized_regex)==0
|
93
|
+
range=$~.offset(0)
|
94
|
+
TerminalNode.new(parent_node,range[1]-range[0],regex)
|
82
95
|
end
|
83
|
-
self.terminal=true
|
84
96
|
end
|
97
|
+
self.terminal=true
|
98
|
+
end
|
85
99
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
100
|
+
# initialize PatternElement as a parser that matches a named sub-rule
|
101
|
+
def init_rule(rule_name)
|
102
|
+
rule_name.to_s[/^([^?!]*)([?!])?$/]
|
103
|
+
rule_name=$1.to_sym
|
104
|
+
option=$2
|
105
|
+
match_rule=rule_variant.rule.parser.rules[rule_name]
|
106
|
+
raise "no rule for #{rule_name}" unless match_rule
|
107
|
+
|
108
|
+
self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
|
109
|
+
self.name = rule_name
|
110
|
+
case option
|
111
|
+
when "?" then self.optional=true
|
112
|
+
when "!" then self.negative=true
|
97
113
|
end
|
114
|
+
end
|
98
115
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
case option
|
110
|
-
when "?" then self.optional=true
|
111
|
-
when "!" then self.negative=true
|
112
|
-
end
|
116
|
+
# initialize the PatternElement from hashed parameters
|
117
|
+
def init_hash(hash)
|
118
|
+
if hash[:parser]
|
119
|
+
self.parser=hash[:parser]
|
120
|
+
elsif hash[:many]
|
121
|
+
init_many hash
|
122
|
+
elsif hash[:match]
|
123
|
+
init hash[:match]
|
124
|
+
else
|
125
|
+
raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
|
113
126
|
end
|
114
127
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
128
|
+
self.name = hash[:as] || self.name
|
129
|
+
self.optional ||= hash[:optional] || hash[:optionally]
|
130
|
+
self.could_match ||= hash[:could]
|
131
|
+
self.negative ||= hash[:dont]
|
132
|
+
end
|
133
|
+
|
134
|
+
# initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
|
135
|
+
def init_many(hash)
|
136
|
+
# generate single_parser
|
137
|
+
init hash[:many]
|
138
|
+
single_parser=parser
|
139
|
+
|
140
|
+
# generate delimiter_pattern_element
|
141
|
+
delimiter_pattern_element= hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
|
126
142
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
143
|
+
# generate post_delimiter_element
|
144
|
+
post_delimiter_element=hash[:post_delimiter] && case hash[:post_delimiter]
|
145
|
+
when TrueClass then delimiter_pattern_element
|
146
|
+
else PatternElement.new(hash[:post_delimiter],rule_variant)
|
131
147
|
end
|
132
148
|
|
133
|
-
#
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
single_parser=parser
|
149
|
+
# generate many-parser
|
150
|
+
self.parser= lambda do |parent_node|
|
151
|
+
last_match=single_parser.call(parent_node)
|
152
|
+
many_node=ManyNode.new(parent_node)
|
138
153
|
|
139
|
-
|
140
|
-
|
154
|
+
if delimiter_pattern_element
|
155
|
+
# delimited matching
|
156
|
+
while last_match
|
157
|
+
many_node<<last_match
|
141
158
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
end
|
159
|
+
#match delimiter
|
160
|
+
delimiter_match = delimiter_pattern_element.parse(many_node)
|
161
|
+
break unless delimiter_match
|
162
|
+
many_node.delimiter_matches<<delimiter_match
|
147
163
|
|
148
|
-
|
149
|
-
|
150
|
-
last_match=single_parser.call(parent_node)
|
151
|
-
many_node=ManyNode.new(parent_node)
|
152
|
-
|
153
|
-
if delimiter_pattern_element
|
154
|
-
# delimited matching
|
155
|
-
while last_match
|
156
|
-
many_node<<last_match
|
157
|
-
|
158
|
-
#match delimiter
|
159
|
-
delimiter_match=delimiter_pattern_element.parse(many_node)
|
160
|
-
break unless delimiter_match
|
161
|
-
many_node.delimiter_matches<<delimiter_match
|
162
|
-
|
163
|
-
#match next
|
164
|
-
last_match=single_parser.call(many_node)
|
165
|
-
end
|
166
|
-
else
|
167
|
-
# not delimited matching
|
168
|
-
while last_match
|
169
|
-
many_node<<last_match
|
170
|
-
last_match=single_parser.call(many_node)
|
171
|
-
end
|
164
|
+
#match next
|
165
|
+
last_match=single_parser.call(many_node)
|
172
166
|
end
|
167
|
+
else
|
168
|
+
# not delimited matching
|
169
|
+
while last_match
|
170
|
+
many_node<<last_match
|
171
|
+
last_match=single_parser.call(many_node)
|
172
|
+
end
|
173
|
+
end
|
173
174
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
# pop the post delimiter matched with delimiter_pattern_element
|
178
|
-
many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
|
175
|
+
# success only if we have at least one match
|
176
|
+
return nil unless many_node.length>0
|
179
177
|
|
180
|
-
|
181
|
-
|
182
|
-
post_delimiter_match=post_delimiter_element.parse(many_node)
|
178
|
+
# pop the post delimiter matched with delimiter_pattern_element
|
179
|
+
many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
|
183
180
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
end
|
181
|
+
# If post_delimiter is requested, many_node and delimiter_matches will be the same length
|
182
|
+
if post_delimiter_element
|
183
|
+
post_delimiter_match=post_delimiter_element.parse(many_node)
|
188
184
|
|
189
|
-
|
185
|
+
# fail if post_delimiter didn't match
|
186
|
+
return nil unless post_delimiter_match
|
187
|
+
many_node.delimiter_matches<<post_delimiter_match
|
190
188
|
end
|
189
|
+
|
190
|
+
many_node
|
191
191
|
end
|
192
192
|
end
|
193
|
+
end
|
193
194
|
end
|