babel_bridge 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/examples/indention_grouping.rb +68 -0
- data/examples/indention_grouping_test.txt +10 -0
- data/examples/turing/test.rb +28 -0
- data/examples/turing/turing.rb +71 -0
- data/lib/babel_bridge.rb +13 -344
- data/lib/nodes.rb +9 -278
- data/lib/nodes/empty_node.rb +17 -0
- data/lib/nodes/many_node.rb +62 -0
- data/lib/nodes/node.rb +94 -0
- data/lib/nodes/non_terminal_node.rb +117 -0
- data/lib/nodes/terminal_node.rb +38 -0
- data/lib/parser.rb +285 -0
- data/lib/pattern_element.rb +152 -151
- data/lib/rule.rb +62 -0
- data/lib/rule_variant.rb +45 -0
- data/lib/shell.rb +36 -0
- data/lib/string.rb +26 -0
- data/lib/tools.rb +90 -0
- data/lib/version.rb +3 -0
- data/test/test_bb.rb +39 -3
- metadata +19 -3
@@ -0,0 +1,117 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2011 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
5
|
+
=end
|
6
|
+
|
7
|
+
module BabelBridge
|
8
|
+
# non-terminal node
|
9
|
+
# subclassed automatically by parser.rule for each unique non-terminal
|
10
|
+
class NonTerminalNode < Node
|
11
|
+
attr_accessor :matches,:match_names
|
12
|
+
|
13
|
+
def match_names
|
14
|
+
@match_names ||= []
|
15
|
+
end
|
16
|
+
def matches
|
17
|
+
@matches ||= []
|
18
|
+
end
|
19
|
+
|
20
|
+
# length returns the number of sub-nodes
|
21
|
+
def length
|
22
|
+
matches.length
|
23
|
+
end
|
24
|
+
|
25
|
+
def matches_by_name
|
26
|
+
@matches_by_name||= begin
|
27
|
+
raise "matches.length #{matches.length} != match_names.length #{match_names.length}" unless matches.length==match_names.length
|
28
|
+
mbn={}
|
29
|
+
mn=match_names
|
30
|
+
matches.each_with_index do |match,i|
|
31
|
+
name=mn[i]
|
32
|
+
next unless name
|
33
|
+
if current=mbn[name] # name already used
|
34
|
+
# convert to MultiMatchesArray if not already
|
35
|
+
mbn[name]=MultiMatchesArray.new([current]) if !current.kind_of? MultiMatchesArray
|
36
|
+
# add to array
|
37
|
+
mbn[name]<<match
|
38
|
+
else
|
39
|
+
mbn[name]=match
|
40
|
+
end
|
41
|
+
end
|
42
|
+
mbn
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def inspect(options={})
|
47
|
+
return "#{self.class}" if matches.length==0
|
48
|
+
matches_inspected=matches.collect{|a|a.inspect(options)}.compact
|
49
|
+
if matches_inspected.length==0 then nil
|
50
|
+
elsif matches_inspected.length==1
|
51
|
+
m=matches_inspected[0]
|
52
|
+
ret="#{self.class} > "+matches_inspected[0]
|
53
|
+
if options[:simple]
|
54
|
+
ret=if m["\n"] then m
|
55
|
+
else
|
56
|
+
# just show the first and last nodes in the chain
|
57
|
+
ret.gsub(/( > [A-Z][a-zA-Z0-9:]+ > (\.\.\. > )?)/," > ... > ")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
ret
|
61
|
+
else
|
62
|
+
(["#{self.class}"]+matches_inspected).join("\n").gsub("\n","\n ")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
#********************
|
67
|
+
# alter methods
|
68
|
+
#********************
|
69
|
+
def reset_matches_by_name
|
70
|
+
@matches_by_name=nil
|
71
|
+
end
|
72
|
+
|
73
|
+
# defines where to forward missing methods to; override for custom behavior
|
74
|
+
def forward_to(method_name)
|
75
|
+
matches.each {|m| return m if m.respond_to?(method_name)}
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
|
79
|
+
def respond_to?(method_name)
|
80
|
+
super ||
|
81
|
+
matches_by_name[method_name] ||
|
82
|
+
forward_to(method_name)
|
83
|
+
end
|
84
|
+
|
85
|
+
def method_missing(method_name, *args) #method_name is a symbol
|
86
|
+
unless matches_by_name.has_key? method_name
|
87
|
+
if f=forward_to(method_name)
|
88
|
+
return f.send(method_name,*args)
|
89
|
+
end
|
90
|
+
raise "#{self.class}: missing method #{method_name.inspect} / doesn't match named pattern element: #{matches_by_name.keys.inspect}"
|
91
|
+
end
|
92
|
+
case ret=matches_by_name[method_name]
|
93
|
+
when EmptyNode then nil
|
94
|
+
else ret
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# adds a match with name (optional)
|
99
|
+
# returns self so you can chain add_match or concat methods
|
100
|
+
def add_match(match,name=nil)
|
101
|
+
reset_matches_by_name
|
102
|
+
matches<<match
|
103
|
+
match_names<<name
|
104
|
+
|
105
|
+
self.match_length=match.next - offset
|
106
|
+
self
|
107
|
+
end
|
108
|
+
|
109
|
+
# concatinate all matches from another node
|
110
|
+
# returns self so you can chain add_match or concat methods
|
111
|
+
def concat(node)
|
112
|
+
names=node.match_names
|
113
|
+
node.matches.each_with_index { |match,i| add_match(match,names[i])}
|
114
|
+
self
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2011 Shane Brinkman-Davis
|
3
|
+
See README for licence information.
|
4
|
+
http://babel-bridge.rubyforge.org/
|
5
|
+
=end
|
6
|
+
|
7
|
+
module BabelBridge
|
8
|
+
# used for String and Regexp PatternElements
|
9
|
+
# not subclassed
|
10
|
+
class TerminalNode < Node
|
11
|
+
attr_accessor :pattern
|
12
|
+
def initialize(parent,match_length,pattern)
|
13
|
+
node_init(parent)
|
14
|
+
self.match_length=match_length
|
15
|
+
self.pattern=pattern
|
16
|
+
@ignore_whitespace = parser.ignore_whitespace?
|
17
|
+
consume_trailing_whitespace if @ignore_whitespace
|
18
|
+
end
|
19
|
+
|
20
|
+
def consume_trailing_whitespace
|
21
|
+
offset = self.next
|
22
|
+
if src[offset..-1].index(/\A\s*/)==0
|
23
|
+
range = $~.offset(0)
|
24
|
+
self.match_length += range[1]-range[0]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s
|
29
|
+
@ignore_whitespace ? text.strip : text
|
30
|
+
end
|
31
|
+
|
32
|
+
def inspect(options={})
|
33
|
+
"#{text.inspect}" unless options[:simple] && text[/^\s*$/] # if simple && node only matched white-space, return nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def matches; [self]; end
|
37
|
+
end
|
38
|
+
end
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,285 @@
|
|
1
|
+
module BabelBridge
|
2
|
+
# primary object used by the client
|
3
|
+
# Used to generate the grammer with .rule methods
|
4
|
+
# Used to parse with .parse
|
5
|
+
class Parser
|
6
|
+
|
7
|
+
# Parser sub-class grammaer definition
|
8
|
+
# These methods are used in the creation of a Parser Sub-Class to define
|
9
|
+
# its grammar
|
10
|
+
class <<self
|
11
|
+
attr_accessor :rules,:module_name,:root_rule
|
12
|
+
|
13
|
+
def rules
|
14
|
+
@rules||={}
|
15
|
+
end
|
16
|
+
|
17
|
+
# Add a rule to the parser
|
18
|
+
#
|
19
|
+
# rules can be specified as:
|
20
|
+
# rule :name, to_match1, to_match2, etc...
|
21
|
+
#or
|
22
|
+
# rule :name, [to_match1, to_match2, etc...]
|
23
|
+
#
|
24
|
+
# Can define rules INSIDE class:
|
25
|
+
# class MyParser < BabelBridge::Parser
|
26
|
+
# rule :name, to_match1, to_match2, etc...
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Or can define rules OUTSIDE class:
|
30
|
+
# class MyParser < BabelBridge::Parser
|
31
|
+
# end
|
32
|
+
# MyParser.rule :name, to_match1, to_match2, etc...
|
33
|
+
#
|
34
|
+
# The first rule added is the root-rule for the parser.
|
35
|
+
# You can override by:
|
36
|
+
# class MyParser < BabelBridge::Parser
|
37
|
+
# root_rule = :new_root_rool
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# The block is executed in the context of the rule-varient's node type, a subclass of: NonTerminalNode
|
41
|
+
# This allows you to add whatever functionality you want to a your nodes in the final parse tree.
|
42
|
+
# Also note you can override the post_match method. This allows you to restructure the parse tree as it is parsed.
|
43
|
+
def rule(name,*pattern,&block)
|
44
|
+
pattern=pattern[0] if pattern[0].kind_of?(Array)
|
45
|
+
rule=self.rules[name]||=Rule.new(name,self)
|
46
|
+
self.root_rule||=name
|
47
|
+
rule.add_variant(pattern,&block)
|
48
|
+
end
|
49
|
+
|
50
|
+
# options
|
51
|
+
# => right_operators: list of all operators that should be evaluated right to left instead of left-to-write
|
52
|
+
# typical example is the "**" exponentiation operator which should be evaluated right-to-left.
|
53
|
+
def binary_operators_rule(name,elements_pattern,operators,options={},&block)
|
54
|
+
right_operators = options[:right_operators]
|
55
|
+
rule(name,many(elements_pattern,Tools::array_to_or_regexp(operators))) do
|
56
|
+
self.class_eval &block if block
|
57
|
+
class <<self
|
58
|
+
attr_accessor :operators_from_rule, :right_operators
|
59
|
+
def operator_processor
|
60
|
+
@operator_processor||=BinaryOperatorProcessor.new(operators_from_rule,self,right_operators)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
self.right_operators = right_operators
|
64
|
+
self.operators_from_rule = operators
|
65
|
+
|
66
|
+
def operator
|
67
|
+
@operator||=operator_node.to_s.to_sym
|
68
|
+
end
|
69
|
+
|
70
|
+
# Override the post_match method to take the results of the "many" match
|
71
|
+
# and restructure it into a binary tree of nodes based on the precidence of
|
72
|
+
# the "operators".
|
73
|
+
# TODO - I think maybe post_match should be run after the whole tree matches. If not, will this screw up caching?
|
74
|
+
def post_match
|
75
|
+
many_match = matches[0]
|
76
|
+
operands = many_match.matches
|
77
|
+
operators = many_match.delimiter_matches
|
78
|
+
# TODO - now! take many_match.matches and many_match.delimiter_matches, mishy-mashy, and make the super-tree!
|
79
|
+
self.class.operator_processor.generate_tree operands, operators, parent
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def node_class(name,&block)
|
85
|
+
klass=self.rules[name].node_class
|
86
|
+
return klass unless block
|
87
|
+
klass.class_eval &block
|
88
|
+
end
|
89
|
+
|
90
|
+
def [](i)
|
91
|
+
rules[i]
|
92
|
+
end
|
93
|
+
|
94
|
+
# rule can be symbol-name of one of the rules in rules_array or one of the actual Rule objects in that array
|
95
|
+
def root_rule=(rule)
|
96
|
+
raise "Symbol required" unless rule.kind_of?(Symbol)
|
97
|
+
raise "rule #{rule.inspect} not found" unless rules[rule]
|
98
|
+
@root_rule=rule
|
99
|
+
end
|
100
|
+
|
101
|
+
def ignore_whitespace
|
102
|
+
@ignore_whitespace = true
|
103
|
+
end
|
104
|
+
|
105
|
+
def ignore_whitespace?
|
106
|
+
@ignore_whitespace
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def ignore_whitespace?
|
111
|
+
self.class.ignore_whitespace?
|
112
|
+
end
|
113
|
+
|
114
|
+
#*********************************************
|
115
|
+
# pattern construction tools
|
116
|
+
#
|
117
|
+
# Ex:
|
118
|
+
# # match 'keyword'
|
119
|
+
# # (succeeds if keyword is matched; advances the read pointer)
|
120
|
+
# rule :sample_rule, "keyword"
|
121
|
+
# rule :sample_rule, match("keyword")
|
122
|
+
#
|
123
|
+
# # don't match 'keyword'
|
124
|
+
# # (succeeds only if keyword is NOT matched; does not advance the read pointer)
|
125
|
+
# rule :sample_rule, match!("keyword")
|
126
|
+
# rule :sample_rule, dont.match("keyword")
|
127
|
+
#
|
128
|
+
# # optionally match 'keyword'
|
129
|
+
# # (always succeeds; advances the read pointer if keyword is matched)
|
130
|
+
# rule :sample_rule, match?("keyword")
|
131
|
+
# rule :sample_rule, optionally.match("keyword")
|
132
|
+
#
|
133
|
+
# # ensure we could match 'keyword'
|
134
|
+
# # (succeeds only if keyword is matched, but does not advance the read pointer)
|
135
|
+
# rule :sample_rule, could.match("keyword")
|
136
|
+
#
|
137
|
+
#*********************************************
|
138
|
+
class <<self
|
139
|
+
def many(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
140
|
+
def many?(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
141
|
+
def many!(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
142
|
+
|
143
|
+
def match?(*args) PatternElementHash.new.optionally.match(*args) end
|
144
|
+
def match(*args) PatternElementHash.new.match(*args) end
|
145
|
+
def match!(*args) PatternElementHash.new.dont.match(*args) end
|
146
|
+
|
147
|
+
def dont; PatternElementHash.new.dont end
|
148
|
+
def optionally; PatternElementHash.new.optionally end
|
149
|
+
def could; PatternElementHash.new.could end
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
#*********************************************
|
154
|
+
#*********************************************
|
155
|
+
# parser instance implementation
|
156
|
+
# this methods are used for each actual parse run
|
157
|
+
# they are tied to an instnace of the Parser Sub-class to you can have more than one
|
158
|
+
# parser active at a time
|
159
|
+
attr_accessor :failure_index
|
160
|
+
attr_accessor :expecting_list
|
161
|
+
attr_accessor :src
|
162
|
+
attr_accessor :parse_cache
|
163
|
+
|
164
|
+
def initialize
|
165
|
+
reset_parser_tracking
|
166
|
+
end
|
167
|
+
|
168
|
+
def reset_parser_tracking
|
169
|
+
self.src=nil
|
170
|
+
self.failure_index=0
|
171
|
+
self.expecting_list={}
|
172
|
+
self.parse_cache={}
|
173
|
+
end
|
174
|
+
|
175
|
+
def cached(rule_class,offset)
|
176
|
+
(parse_cache[rule_class]||={})[offset]
|
177
|
+
end
|
178
|
+
|
179
|
+
def cache_match(rule_class,match)
|
180
|
+
(parse_cache[rule_class]||={})[match.offset]=match
|
181
|
+
end
|
182
|
+
|
183
|
+
def cache_no_match(rule_class,offset)
|
184
|
+
(parse_cache[rule_class]||={})[offset]=:no_match
|
185
|
+
end
|
186
|
+
|
187
|
+
def log_parsing_failure(index,expecting)
|
188
|
+
if index>failure_index
|
189
|
+
key=expecting[:pattern]
|
190
|
+
@expecting_list={key=>expecting}
|
191
|
+
@failure_index = index
|
192
|
+
elsif index == failure_index
|
193
|
+
key=expecting[:pattern]
|
194
|
+
self.expecting_list[key]=expecting
|
195
|
+
else
|
196
|
+
# ignored
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def parse(src,offset=0,rule=nil)
|
201
|
+
reset_parser_tracking
|
202
|
+
@start_time=Time.now
|
203
|
+
self.src=src
|
204
|
+
root_node=RootNode.new(self)
|
205
|
+
raise "No root rule defined." unless rule || self.class.root_rule
|
206
|
+
ret=self.class[rule||self.class.root_rule].parse(root_node)
|
207
|
+
unless rule
|
208
|
+
if ret
|
209
|
+
if ret.next<src.length # parse only succeeds if the whole input is matched
|
210
|
+
@parsing_did_not_match_entire_input=true
|
211
|
+
@failure_index=ret.next
|
212
|
+
ret=nil
|
213
|
+
else
|
214
|
+
reset_parser_tracking
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
@end_time=Time.now
|
219
|
+
ret
|
220
|
+
end
|
221
|
+
|
222
|
+
def parse_time
|
223
|
+
@end_time-@start_time
|
224
|
+
end
|
225
|
+
|
226
|
+
def parse_and_puts_errors(src,out=$stdout)
|
227
|
+
ret=parse(src)
|
228
|
+
unless ret
|
229
|
+
out.puts parser_failure_info
|
230
|
+
end
|
231
|
+
ret
|
232
|
+
end
|
233
|
+
|
234
|
+
def node_list_string(node_list,common_root=[])
|
235
|
+
node_list && node_list[common_root.length..-1].map{|p|"#{p.class}(#{p.offset})"}.join(" > ")
|
236
|
+
end
|
237
|
+
|
238
|
+
def parser_failure_info
|
239
|
+
return unless src
|
240
|
+
bracketing_lines=5
|
241
|
+
line,col=src.line_col(failure_index)
|
242
|
+
ret=<<-ENDTXT
|
243
|
+
Parsing error at line #{line} column #{col} offset #{failure_index}
|
244
|
+
|
245
|
+
Source:
|
246
|
+
...
|
247
|
+
#{(failure_index==0 ? "" : src[0..(failure_index-1)]).last_lines(bracketing_lines)}<HERE>#{src[(failure_index)..-1].first_lines(bracketing_lines)}
|
248
|
+
...
|
249
|
+
ENDTXT
|
250
|
+
|
251
|
+
if @parsing_did_not_match_entire_input
|
252
|
+
ret+="\nParser did not match entire input."
|
253
|
+
else
|
254
|
+
|
255
|
+
common_root=nil
|
256
|
+
expecting_list.values.each do |e|
|
257
|
+
node=e[:node]
|
258
|
+
pl=node.parent_list
|
259
|
+
if common_root
|
260
|
+
common_root.each_index do |i|
|
261
|
+
if pl[i]!=common_root[i]
|
262
|
+
common_root=common_root[0..i-1]
|
263
|
+
break
|
264
|
+
end
|
265
|
+
end
|
266
|
+
else
|
267
|
+
common_root=node.parent_list
|
268
|
+
end
|
269
|
+
end
|
270
|
+
ret+=<<ENDTXT
|
271
|
+
|
272
|
+
Successfully matched rules up to failure:
|
273
|
+
#{node_list_string(common_root)}
|
274
|
+
|
275
|
+
Expecting#{expecting_list.length>1 ? ' one of' : ''}:
|
276
|
+
#{expecting_list.values.collect do |a|
|
277
|
+
list=node_list_string(a[:node].parent_list,common_root)
|
278
|
+
[list,"#{a[:pattern].inspect} (#{list})"]
|
279
|
+
end.sort.map{|i|i[1]}.join("\n ")}
|
280
|
+
ENDTXT
|
281
|
+
end
|
282
|
+
ret
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
data/lib/pattern_element.rb
CHANGED
@@ -5,189 +5,190 @@ http://babel-bridge.rubyforge.org/
|
|
5
5
|
=end
|
6
6
|
|
7
7
|
module BabelBridge
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
8
|
+
# hash which can be used declaratively
|
9
|
+
class PatternElementHash < Hash
|
10
|
+
def method_missing(method_name, *args) #method_name is a symbol
|
11
|
+
return self if args.length==1 && !args[0] # if nil is provided, don't set anything
|
12
|
+
raise "More than one argument is not supported. #{self.class}##{method_name} args=#{args.inspect}" if args.length > 1
|
13
|
+
self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true
|
14
|
+
self
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# PatternElement provides optimized parsing for each Element of a pattern
|
19
|
+
# PatternElement provides all the logic for parsing:
|
20
|
+
# :many
|
21
|
+
# :optional
|
22
|
+
class PatternElement
|
23
|
+
attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
|
24
|
+
attr_accessor :match,:rule_variant
|
25
|
+
|
26
|
+
#match can be:
|
27
|
+
# true, Hash, Symbol, String, Regexp
|
28
|
+
def initialize(match,rule_variant)
|
29
|
+
self.rule_variant=rule_variant
|
30
|
+
init(match)
|
31
|
+
|
32
|
+
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
15
33
|
end
|
16
34
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# :optional
|
21
|
-
class PatternElement
|
22
|
-
attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
|
23
|
-
attr_accessor :match,:rule_variant
|
24
|
-
|
25
|
-
#match can be:
|
26
|
-
# true, Hash, Symbol, String, Regexp
|
27
|
-
def initialize(match,rule_variant)
|
28
|
-
self.rule_variant=rule_variant
|
29
|
-
init(match)
|
30
|
-
|
31
|
-
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
32
|
-
end
|
35
|
+
def to_s
|
36
|
+
match.inspect
|
37
|
+
end
|
33
38
|
|
34
|
-
|
35
|
-
|
36
|
-
|
39
|
+
# attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
|
40
|
+
def parse(parent_node)
|
41
|
+
# run element parser
|
42
|
+
match=parser.call(parent_node)
|
37
43
|
|
38
|
-
#
|
39
|
-
|
40
|
-
# run element parser
|
41
|
-
match=parser.call(parent_node)
|
44
|
+
# Negative patterns (PEG: !element)
|
45
|
+
match=match ? nil : EmptyNode.new(parent_node) if negative
|
42
46
|
|
43
|
-
|
44
|
-
|
47
|
+
# Optional patterns (PEG: element?)
|
48
|
+
match=EmptyNode.new(parent_node) if !match && optional
|
45
49
|
|
46
|
-
|
47
|
-
|
50
|
+
# Could-match patterns (PEG: &element)
|
51
|
+
match.match_length=0 if match && could_match
|
48
52
|
|
49
|
-
|
50
|
-
|
53
|
+
# return match
|
54
|
+
match
|
55
|
+
end
|
51
56
|
|
52
|
-
|
53
|
-
|
57
|
+
private
|
58
|
+
|
59
|
+
# initialize PatternElement based on the type of: match
|
60
|
+
def init(match)
|
61
|
+
self.match=match
|
62
|
+
case match
|
63
|
+
when TrueClass then init_true
|
64
|
+
when String then init_string match
|
65
|
+
when Regexp then init_regex match
|
66
|
+
when Symbol then init_rule match
|
67
|
+
when Hash then init_hash match
|
68
|
+
else raise "invalid pattern type: #{match.inspect}"
|
54
69
|
end
|
70
|
+
end
|
55
71
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
self.match=match
|
61
|
-
case match
|
62
|
-
when TrueClass then init_true
|
63
|
-
when String then init_string match
|
64
|
-
when Regexp then init_regex match
|
65
|
-
when Symbol then init_rule match
|
66
|
-
when Hash then init_hash match
|
67
|
-
else raise "invalid pattern type: #{match.inspect}"
|
68
|
-
end
|
69
|
-
end
|
72
|
+
# "true" parser always matches the empty string
|
73
|
+
def init_true
|
74
|
+
self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
|
75
|
+
end
|
70
76
|
|
71
|
-
|
72
|
-
|
73
|
-
|
77
|
+
# initialize PatternElement as a parser that matches exactly the string specified
|
78
|
+
def init_string(string)
|
79
|
+
self.parser=lambda do |parent_node|
|
80
|
+
if parent_node.src[parent_node.next,string.length]==string
|
81
|
+
TerminalNode.new(parent_node,string.length,string)
|
82
|
+
end
|
74
83
|
end
|
84
|
+
self.terminal=true
|
85
|
+
end
|
75
86
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
87
|
+
# initialize PatternElement as a parser that matches the given regex
|
88
|
+
def init_regex(regex)
|
89
|
+
optimized_regex=/\A#{regex}/ # anchor the search
|
90
|
+
self.parser=lambda do |parent_node|
|
91
|
+
offset=parent_node.next
|
92
|
+
if parent_node.src[offset..-1].index(optimized_regex)==0
|
93
|
+
range=$~.offset(0)
|
94
|
+
TerminalNode.new(parent_node,range[1]-range[0],regex)
|
82
95
|
end
|
83
|
-
self.terminal=true
|
84
96
|
end
|
97
|
+
self.terminal=true
|
98
|
+
end
|
85
99
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
100
|
+
# initialize PatternElement as a parser that matches a named sub-rule
|
101
|
+
def init_rule(rule_name)
|
102
|
+
rule_name.to_s[/^([^?!]*)([?!])?$/]
|
103
|
+
rule_name=$1.to_sym
|
104
|
+
option=$2
|
105
|
+
match_rule=rule_variant.rule.parser.rules[rule_name]
|
106
|
+
raise "no rule for #{rule_name}" unless match_rule
|
107
|
+
|
108
|
+
self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
|
109
|
+
self.name = rule_name
|
110
|
+
case option
|
111
|
+
when "?" then self.optional=true
|
112
|
+
when "!" then self.negative=true
|
97
113
|
end
|
114
|
+
end
|
98
115
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
case option
|
110
|
-
when "?" then self.optional=true
|
111
|
-
when "!" then self.negative=true
|
112
|
-
end
|
116
|
+
# initialize the PatternElement from hashed parameters
|
117
|
+
def init_hash(hash)
|
118
|
+
if hash[:parser]
|
119
|
+
self.parser=hash[:parser]
|
120
|
+
elsif hash[:many]
|
121
|
+
init_many hash
|
122
|
+
elsif hash[:match]
|
123
|
+
init hash[:match]
|
124
|
+
else
|
125
|
+
raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
|
113
126
|
end
|
114
127
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
128
|
+
self.name = hash[:as] || self.name
|
129
|
+
self.optional ||= hash[:optional] || hash[:optionally]
|
130
|
+
self.could_match ||= hash[:could]
|
131
|
+
self.negative ||= hash[:dont]
|
132
|
+
end
|
133
|
+
|
134
|
+
# initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
|
135
|
+
def init_many(hash)
|
136
|
+
# generate single_parser
|
137
|
+
init hash[:many]
|
138
|
+
single_parser=parser
|
139
|
+
|
140
|
+
# generate delimiter_pattern_element
|
141
|
+
delimiter_pattern_element= hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
|
126
142
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
143
|
+
# generate post_delimiter_element
|
144
|
+
post_delimiter_element=hash[:post_delimiter] && case hash[:post_delimiter]
|
145
|
+
when TrueClass then delimiter_pattern_element
|
146
|
+
else PatternElement.new(hash[:post_delimiter],rule_variant)
|
131
147
|
end
|
132
148
|
|
133
|
-
#
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
single_parser=parser
|
149
|
+
# generate many-parser
|
150
|
+
self.parser= lambda do |parent_node|
|
151
|
+
last_match=single_parser.call(parent_node)
|
152
|
+
many_node=ManyNode.new(parent_node)
|
138
153
|
|
139
|
-
|
140
|
-
|
154
|
+
if delimiter_pattern_element
|
155
|
+
# delimited matching
|
156
|
+
while last_match
|
157
|
+
many_node<<last_match
|
141
158
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
end
|
159
|
+
#match delimiter
|
160
|
+
delimiter_match = delimiter_pattern_element.parse(many_node)
|
161
|
+
break unless delimiter_match
|
162
|
+
many_node.delimiter_matches<<delimiter_match
|
147
163
|
|
148
|
-
|
149
|
-
|
150
|
-
last_match=single_parser.call(parent_node)
|
151
|
-
many_node=ManyNode.new(parent_node)
|
152
|
-
|
153
|
-
if delimiter_pattern_element
|
154
|
-
# delimited matching
|
155
|
-
while last_match
|
156
|
-
many_node<<last_match
|
157
|
-
|
158
|
-
#match delimiter
|
159
|
-
delimiter_match=delimiter_pattern_element.parse(many_node)
|
160
|
-
break unless delimiter_match
|
161
|
-
many_node.delimiter_matches<<delimiter_match
|
162
|
-
|
163
|
-
#match next
|
164
|
-
last_match=single_parser.call(many_node)
|
165
|
-
end
|
166
|
-
else
|
167
|
-
# not delimited matching
|
168
|
-
while last_match
|
169
|
-
many_node<<last_match
|
170
|
-
last_match=single_parser.call(many_node)
|
171
|
-
end
|
164
|
+
#match next
|
165
|
+
last_match=single_parser.call(many_node)
|
172
166
|
end
|
167
|
+
else
|
168
|
+
# not delimited matching
|
169
|
+
while last_match
|
170
|
+
many_node<<last_match
|
171
|
+
last_match=single_parser.call(many_node)
|
172
|
+
end
|
173
|
+
end
|
173
174
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
# pop the post delimiter matched with delimiter_pattern_element
|
178
|
-
many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
|
175
|
+
# success only if we have at least one match
|
176
|
+
return nil unless many_node.length>0
|
179
177
|
|
180
|
-
|
181
|
-
|
182
|
-
post_delimiter_match=post_delimiter_element.parse(many_node)
|
178
|
+
# pop the post delimiter matched with delimiter_pattern_element
|
179
|
+
many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
|
183
180
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
end
|
181
|
+
# If post_delimiter is requested, many_node and delimiter_matches will be the same length
|
182
|
+
if post_delimiter_element
|
183
|
+
post_delimiter_match=post_delimiter_element.parse(many_node)
|
188
184
|
|
189
|
-
|
185
|
+
# fail if post_delimiter didn't match
|
186
|
+
return nil unless post_delimiter_match
|
187
|
+
many_node.delimiter_matches<<post_delimiter_match
|
190
188
|
end
|
189
|
+
|
190
|
+
many_node
|
191
191
|
end
|
192
192
|
end
|
193
|
+
end
|
193
194
|
end
|