babel_bridge 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/examples/indention_grouping.rb +68 -0
- data/examples/indention_grouping_test.txt +10 -0
- data/examples/turing/test.rb +28 -0
- data/examples/turing/turing.rb +71 -0
- data/lib/babel_bridge.rb +13 -344
- data/lib/nodes.rb +9 -278
- data/lib/nodes/empty_node.rb +17 -0
- data/lib/nodes/many_node.rb +62 -0
- data/lib/nodes/node.rb +94 -0
- data/lib/nodes/non_terminal_node.rb +117 -0
- data/lib/nodes/terminal_node.rb +38 -0
- data/lib/parser.rb +285 -0
- data/lib/pattern_element.rb +152 -151
- data/lib/rule.rb +62 -0
- data/lib/rule_variant.rb +45 -0
- data/lib/shell.rb +36 -0
- data/lib/string.rb +26 -0
- data/lib/tools.rb +90 -0
- data/lib/version.rb +3 -0
- data/test/test_bb.rb +39 -3
- metadata +19 -3
@@ -0,0 +1,68 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "babel_bridge"
|
3
|
+
|
4
|
+
class MyParser < BabelBridge::Parser
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
@indentions=[]
|
8
|
+
@tab=" "*4
|
9
|
+
end
|
10
|
+
|
11
|
+
def rollback_indention(offset)
|
12
|
+
@indentions.pop while @indentions.length>0 && @indentions[-1].offset>=offset
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_indention(parent_node)
|
16
|
+
offset=parent_node.next
|
17
|
+
src=parent_node.src
|
18
|
+
|
19
|
+
rollback_indention(offset)
|
20
|
+
|
21
|
+
regex=/\A[ \t]*/
|
22
|
+
puts "test_indention offset=#{offset}"
|
23
|
+
if parent_node.src[offset..-1].index(regex)==0
|
24
|
+
matched_indent_string=$~.to_s
|
25
|
+
range=$~.offset(0)
|
26
|
+
matched_indent_string.gsub("\t",@tab)
|
27
|
+
puts "test_indention offset=#{offset} match=#{matched_indent_string.inspect} indentions=#{@indentions.collect{|a|a.match_length}.inspect}"
|
28
|
+
if matched_indent_string && yield(@indentions,matched_indent_string)
|
29
|
+
puts "match"
|
30
|
+
@indentions<<BabelBridge::TerminalNode.new(parent_node,range[1]-range[0],regex)
|
31
|
+
@indentions[-1]
|
32
|
+
else
|
33
|
+
puts "nomatch"
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
rule :file, :sub_nodes, :rest
|
40
|
+
rule :node, :stuff, :endl, :sub_nodes?
|
41
|
+
rule :stuff, /[a-zA-Z][a-zA-Z0-9]*/
|
42
|
+
rule :sub_nodes, :increased_indention, many(:node,:same_indention)
|
43
|
+
rule :endl, /[\t ]*\n/
|
44
|
+
rule :rest, /.*/
|
45
|
+
|
46
|
+
rule :increased_indention, {:parser=>lambda do |parent_node|
|
47
|
+
puts "increase"
|
48
|
+
parent_node.parser.test_indention(parent_node) do |indentions,matched_indent_string|
|
49
|
+
indentions.length==0 || matched_indent_string.length>indentions[-1].match_length
|
50
|
+
end
|
51
|
+
end}
|
52
|
+
|
53
|
+
rule :same_indention, {:parser=>lambda do |parent_node|
|
54
|
+
puts "same"
|
55
|
+
parent_node.parser.test_indention(parent_node) do |indentions,matched_indent_string|
|
56
|
+
indentions.length>0 && matched_indent_string.length==indentions[-1].match_length
|
57
|
+
end
|
58
|
+
end}
|
59
|
+
end
|
60
|
+
|
61
|
+
parser = MyParser.new
|
62
|
+
res=parser.parse($stdin.read)
|
63
|
+
if res
|
64
|
+
puts "success"
|
65
|
+
puts res.inspect
|
66
|
+
else
|
67
|
+
puts parser.parser_failure_info
|
68
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),"..","..","lib","babel_bridge")
|
2
|
+
|
3
|
+
class TestParser < BabelBridge::Parser
|
4
|
+
|
5
|
+
rule :expr, :bin_op do
|
6
|
+
def evaluate
|
7
|
+
output = bin_op.evaluate
|
8
|
+
input = to_s
|
9
|
+
puts "bbtest: #{output} = #{eval output}"
|
10
|
+
puts "ruby: #{input} = #{eval input}"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
#rule :bin_op, many(:int,/[-+\/*]/) do
|
15
|
+
binary_operators_rule :bin_op, :operand, [[:+, "-"], [:/, :*], "**"], :right_operators => ["**"] do
|
16
|
+
def evaluate
|
17
|
+
"(#{left.evaluate}#{operator}#{right.evaluate})"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
rule :operand, "(", :bin_op, ")"
|
22
|
+
|
23
|
+
rule :operand, /[-]?[0-9]+/ do
|
24
|
+
def evaluate; to_s; end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
BabelBridge::Shell.new(TestParser.new).start
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),"..","..","lib","babel_bridge")
|
2
|
+
|
3
|
+
class TuringParser < BabelBridge::Parser
|
4
|
+
ignore_whitespace
|
5
|
+
|
6
|
+
def store
|
7
|
+
@store||=[]
|
8
|
+
end
|
9
|
+
|
10
|
+
rule :statements, many(:statement,";"), match?(";") do
|
11
|
+
def evaluate
|
12
|
+
ret = nil
|
13
|
+
statement.each do |s|
|
14
|
+
ret = s.evaluate
|
15
|
+
end
|
16
|
+
ret
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
rule :statement, /if\b/, :statement, /then\b/, :statement, :else_clause?, /end\b/ do
|
21
|
+
def evaluate
|
22
|
+
if statement[0].evaluate
|
23
|
+
statement[1].evaluate
|
24
|
+
else
|
25
|
+
else_clause.evaluate if else_clause
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
rule :else_clause, /else\b/, :statement
|
30
|
+
|
31
|
+
rule :statement, /while\b/, :statement, /do\b/, :statements, /end\b/ do
|
32
|
+
def evaluate
|
33
|
+
while statement.evaluate
|
34
|
+
statements.evaluate
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
binary_operators_rule :statement, :operand, [[:<, :<=, :>, :>=, :==], [:+, :-], [:/, :*]] do
|
40
|
+
def evaluate
|
41
|
+
case operator
|
42
|
+
when :<, :<=, :>, :>=, :==
|
43
|
+
(left.evaluate.send operator, right.evaluate) ? 1 : nil
|
44
|
+
else
|
45
|
+
left.evaluate.send operator, right.evaluate
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
rule :operand, "(", :statement, ")"
|
51
|
+
|
52
|
+
rule :operand, "[", :statement, "]", "=", :statement do
|
53
|
+
def evaluate
|
54
|
+
parser.store[statement[0].evaluate] = statement[1].evaluate
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
rule :operand, "[", :statement, "]" do
|
59
|
+
def evaluate
|
60
|
+
parser.store[statement.evaluate]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
rule :operand, /[-]?[0-9]+/ do
|
65
|
+
def evaluate
|
66
|
+
to_s.to_i
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
BabelBridge::Shell.new(TuringParser.new).start
|
data/lib/babel_bridge.rb
CHANGED
@@ -4,347 +4,16 @@ See README for licence information.
|
|
4
4
|
http://babel-bridge.rubyforge.org/
|
5
5
|
=end
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def last_lines(n)
|
21
|
-
lines=self.split("\n",-1)
|
22
|
-
lines.length<=n ? self : lines[-n..-1].join("\n")
|
23
|
-
end
|
24
|
-
|
25
|
-
def line_col(offset)
|
26
|
-
lines=self[0..offset-1].split("\n")
|
27
|
-
return lines.length, lines[-1].length
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
module BabelBridge
|
32
|
-
VERSION = "0.2.0"
|
33
|
-
|
34
|
-
# Each Rule has one or more RuleVariant
|
35
|
-
# Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
|
36
|
-
class RuleVariant
|
37
|
-
attr_accessor :pattern,:rule,:node_class
|
38
|
-
|
39
|
-
def initialize(pattern,rule,node_class=nil)
|
40
|
-
self.pattern=pattern
|
41
|
-
self.rule=rule
|
42
|
-
self.node_class=node_class
|
43
|
-
end
|
44
|
-
|
45
|
-
def inspect
|
46
|
-
pattern.collect{|a|a.inspect}.join(', ')
|
47
|
-
end
|
48
|
-
|
49
|
-
def to_s
|
50
|
-
"variant_class: #{node_class}, pattern: #{inspect}"
|
51
|
-
end
|
52
|
-
|
53
|
-
# convert the pattern into a set of lamba functions
|
54
|
-
def pattern_elements
|
55
|
-
@pattern_elements||=pattern.collect { |match| PatternElement.new match, self }
|
56
|
-
end
|
57
|
-
|
58
|
-
# returns a Node object if it matches, nil otherwise
|
59
|
-
def parse(parent_node)
|
60
|
-
#return parse_nongreedy_optional(src,offset,parent_node) # nongreedy optionals break standard PEG
|
61
|
-
node=node_class.new(parent_node)
|
62
|
-
|
63
|
-
pattern_elements.each do |pe|
|
64
|
-
match=pe.parse(node)
|
65
|
-
|
66
|
-
# if parse failed
|
67
|
-
if !match
|
68
|
-
if pe.terminal
|
69
|
-
# log failures on Terminal patterns for debug output if overall parse fails
|
70
|
-
node.parser.log_parsing_failure(node.next,:pattern=>pe.match,:node=>node)
|
71
|
-
end
|
72
|
-
return nil
|
73
|
-
end
|
74
|
-
|
75
|
-
# parse succeeded, add to node and continue
|
76
|
-
node.add_match(match,pe.name)
|
77
|
-
end
|
78
|
-
node
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
# Rules define one or more patterns (RuleVariants) to match for a given non-terminal
|
83
|
-
class Rule
|
84
|
-
attr_accessor :name,:variants,:parser,:node_class
|
85
|
-
|
86
|
-
def initialize(name,parser)
|
87
|
-
self.name=name
|
88
|
-
self.variants=[]
|
89
|
-
self.parser=parser
|
90
|
-
|
91
|
-
class_name = "#{parser.module_name}_#{name}_node".camelize
|
92
|
-
self.node_class = parser.const_set(class_name,Class.new(NonTerminalNode))
|
93
|
-
end
|
94
|
-
|
95
|
-
def add_variant(pattern, &block)
|
96
|
-
|
97
|
-
rule_variant_class_name = "#{name}_node#{self.variants.length+1}".camelize
|
98
|
-
rule_variant_class = parser.const_set(rule_variant_class_name,Class.new(node_class))
|
99
|
-
self.variants << RuleVariant.new(pattern,self,rule_variant_class)
|
100
|
-
rule_variant_class.class_eval &block if block
|
101
|
-
rule_variant_class
|
102
|
-
end
|
103
|
-
|
104
|
-
def parse(node)
|
105
|
-
if cached=node.parser.cached(name,node.next)
|
106
|
-
return cached==:no_match ? nil : cached # return nil if cached==:no_matched
|
107
|
-
end
|
108
|
-
|
109
|
-
variants.each do |v|
|
110
|
-
if match=v.parse(node)
|
111
|
-
node.parser.cache_match(name,match)
|
112
|
-
return match
|
113
|
-
end
|
114
|
-
end
|
115
|
-
node.parser.cache_no_match(name,node.next)
|
116
|
-
nil
|
117
|
-
end
|
118
|
-
|
119
|
-
# inspect returns a string which approximates the syntax for generating the rule and all its variants
|
120
|
-
def inspect
|
121
|
-
variants.collect do |v|
|
122
|
-
"rule #{name.inspect}, #{v.inspect}"
|
123
|
-
end.join("\n")
|
124
|
-
end
|
125
|
-
|
126
|
-
# returns a more human-readable explanation of the rule
|
127
|
-
def to_s
|
128
|
-
"rule #{name.inspect}, node_class: #{node_class}\n\t"+
|
129
|
-
"#{variants.collect {|v|v.to_s}.join("\n\t")}"
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
# primary object used by the client
|
134
|
-
# Used to generate the grammer with .rule methods
|
135
|
-
# Used to parse with .parse
|
136
|
-
class Parser
|
137
|
-
|
138
|
-
# Parser sub-class grammaer definition
|
139
|
-
# These methods are used in the creation of a Parser Sub-Class to define
|
140
|
-
# its grammar
|
141
|
-
class <<self
|
142
|
-
attr_accessor :rules,:module_name,:root_rule
|
143
|
-
|
144
|
-
def rules
|
145
|
-
@rules||={}
|
146
|
-
end
|
147
|
-
# rules can be specified as:
|
148
|
-
# parser.rule :name, to_match1, to_match2, etc...
|
149
|
-
#or
|
150
|
-
# parser.rule :name, [to_match1, to_match2, etc...]
|
151
|
-
def rule(name,*pattern,&block)
|
152
|
-
pattern=pattern[0] if pattern[0].kind_of?(Array)
|
153
|
-
rule=self.rules[name]||=Rule.new(name,self)
|
154
|
-
self.root_rule||=name
|
155
|
-
rule.add_variant(pattern,&block)
|
156
|
-
end
|
157
|
-
|
158
|
-
def node_class(name,&block)
|
159
|
-
klass=self.rules[name].node_class
|
160
|
-
return klass unless block
|
161
|
-
klass.class_eval &block
|
162
|
-
end
|
163
|
-
|
164
|
-
def [](i)
|
165
|
-
rules[i]
|
166
|
-
end
|
167
|
-
|
168
|
-
# rule can be symbol-name of one of the rules in rules_array or one of the actual Rule objects in that array
|
169
|
-
def root_rule=(rule)
|
170
|
-
raise "Symbol required" unless rule.kind_of?(Symbol)
|
171
|
-
raise "rule #{rule.inspect} not found" unless rules[rule]
|
172
|
-
@root_rule=rule
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
#*********************************************
|
177
|
-
# pattern construction tools
|
178
|
-
#
|
179
|
-
# Ex:
|
180
|
-
# # match 'keyword'
|
181
|
-
# # (succeeds if keyword is matched; advances the read pointer)
|
182
|
-
# rule :sample_rule, "keyword"
|
183
|
-
# rule :sample_rule, match("keyword")
|
184
|
-
#
|
185
|
-
# # don't match 'keyword'
|
186
|
-
# # (succeeds only if keyword is NOT matched; does not advance the read pointer)
|
187
|
-
# rule :sample_rule, match!("keyword")
|
188
|
-
# rule :sample_rule, dont.match("keyword")
|
189
|
-
#
|
190
|
-
# # optionally match 'keyword'
|
191
|
-
# # (always succeeds; advances the read pointer if keyword is matched)
|
192
|
-
# rule :sample_rule, match?("keyword")
|
193
|
-
# rule :sample_rule, optionally.match("keyword")
|
194
|
-
#
|
195
|
-
# # ensure we could match 'keyword'
|
196
|
-
# # (succeeds only if keyword is matched, but does not advance the read pointer)
|
197
|
-
# rule :sample_rule, could.match("keyword")
|
198
|
-
#
|
199
|
-
|
200
|
-
# dont.match("keyword") #
|
201
|
-
#*********************************************
|
202
|
-
class <<self
|
203
|
-
def many(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
204
|
-
def many?(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
205
|
-
def many!(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
206
|
-
|
207
|
-
def match?(*args) PatternElementHash.new.optionally.match(*args) end
|
208
|
-
def match(*args) PatternElementHash.new.match(*args) end
|
209
|
-
def match!(*args) PatternElementHash.new.dont.match(*args) end
|
210
|
-
|
211
|
-
def dont; PatternElementHash.new.dont end
|
212
|
-
def optionally; PatternElementHash.new.optionally end
|
213
|
-
def could; PatternElementHash.new.could end
|
214
|
-
end
|
215
|
-
|
216
|
-
|
217
|
-
#*********************************************
|
218
|
-
#*********************************************
|
219
|
-
# parser instance implementation
|
220
|
-
# this methods are used for each actual parse run
|
221
|
-
# they are tied to an instnace of the Parser Sub-class to you can have more than one
|
222
|
-
# parser active at a time
|
223
|
-
attr_accessor :failure_index
|
224
|
-
attr_accessor :expecting_list
|
225
|
-
attr_accessor :src
|
226
|
-
attr_accessor :parse_cache
|
227
|
-
|
228
|
-
def initialize
|
229
|
-
reset_parser_tracking
|
230
|
-
end
|
231
|
-
|
232
|
-
def reset_parser_tracking
|
233
|
-
self.src=nil
|
234
|
-
self.failure_index=0
|
235
|
-
self.expecting_list={}
|
236
|
-
self.parse_cache={}
|
237
|
-
end
|
238
|
-
|
239
|
-
def cached(rule_class,offset)
|
240
|
-
(parse_cache[rule_class]||={})[offset]
|
241
|
-
end
|
242
|
-
|
243
|
-
def cache_match(rule_class,match)
|
244
|
-
(parse_cache[rule_class]||={})[match.offset]=match
|
245
|
-
end
|
246
|
-
|
247
|
-
def cache_no_match(rule_class,offset)
|
248
|
-
(parse_cache[rule_class]||={})[offset]=:no_match
|
249
|
-
end
|
250
|
-
|
251
|
-
def log_parsing_failure(index,expecting)
|
252
|
-
if index>failure_index
|
253
|
-
key=expecting[:pattern]
|
254
|
-
@expecting_list={key=>expecting}
|
255
|
-
@failure_index = index
|
256
|
-
elsif index == failure_index
|
257
|
-
key=expecting[:pattern]
|
258
|
-
self.expecting_list[key]=expecting
|
259
|
-
else
|
260
|
-
# ignored
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
|
265
|
-
def parse(src,offset=0,rule=nil)
|
266
|
-
reset_parser_tracking
|
267
|
-
@start_time=Time.now
|
268
|
-
self.src=src
|
269
|
-
root_node=RootNode.new(self)
|
270
|
-
ret=self.class[rule||self.class.root_rule].parse(root_node)
|
271
|
-
unless rule
|
272
|
-
if ret
|
273
|
-
if ret.next<src.length # parse only succeeds if the whole input is matched
|
274
|
-
@parsing_did_not_match_entire_input=true
|
275
|
-
@failure_index=ret.next
|
276
|
-
ret=nil
|
277
|
-
else
|
278
|
-
reset_parser_tracking
|
279
|
-
end
|
280
|
-
end
|
281
|
-
end
|
282
|
-
@end_time=Time.now
|
283
|
-
ret
|
284
|
-
end
|
285
|
-
|
286
|
-
def parse_time
|
287
|
-
@end_time-@start_time
|
288
|
-
end
|
289
|
-
|
290
|
-
def parse_and_puts_errors(src,out=$stdout)
|
291
|
-
ret=parse(src)
|
292
|
-
unless ret
|
293
|
-
out.puts parser_failure_info
|
294
|
-
end
|
295
|
-
ret
|
296
|
-
end
|
297
|
-
|
298
|
-
def node_list_string(node_list,common_root=[])
|
299
|
-
node_list && node_list[common_root.length..-1].map{|p|"#{p.class}(#{p.offset})"}.join(" > ")
|
300
|
-
end
|
301
|
-
|
302
|
-
def parser_failure_info
|
303
|
-
return unless src
|
304
|
-
bracketing_lines=5
|
305
|
-
line,col=src.line_col(failure_index)
|
306
|
-
ret=<<-ENDTXT
|
307
|
-
Parsing error at line #{line} column #{col} offset #{failure_index}
|
308
|
-
|
309
|
-
Source:
|
310
|
-
...
|
311
|
-
#{(failure_index==0 ? "" : src[0..(failure_index-1)]).last_lines(bracketing_lines)}<HERE>#{src[(failure_index)..-1].first_lines(bracketing_lines)}
|
312
|
-
...
|
313
|
-
ENDTXT
|
314
|
-
|
315
|
-
if @parsing_did_not_match_entire_input
|
316
|
-
ret+="\nParser did not match entire input."
|
317
|
-
else
|
318
|
-
|
319
|
-
common_root=nil
|
320
|
-
expecting_list.values.each do |e|
|
321
|
-
node=e[:node]
|
322
|
-
pl=node.parent_list
|
323
|
-
if common_root
|
324
|
-
common_root.each_index do |i|
|
325
|
-
if pl[i]!=common_root[i]
|
326
|
-
common_root=common_root[0..i-1]
|
327
|
-
break
|
328
|
-
end
|
329
|
-
end
|
330
|
-
else
|
331
|
-
common_root=node.parent_list
|
332
|
-
end
|
333
|
-
end
|
334
|
-
ret+=<<ENDTXT
|
335
|
-
|
336
|
-
Successfully matched rules up to failure:
|
337
|
-
#{node_list_string(common_root)}
|
338
|
-
|
339
|
-
Expecting#{expecting_list.length>1 ? ' one of' : ''}:
|
340
|
-
#{expecting_list.values.collect do |a|
|
341
|
-
list=node_list_string(a[:node].parent_list,common_root)
|
342
|
-
[list,"#{a[:pattern].inspect} (#{list})"]
|
343
|
-
end.sort.map{|i|i[1]}.join("\n ")}
|
344
|
-
ENDTXT
|
345
|
-
end
|
346
|
-
ret
|
347
|
-
end
|
348
|
-
end
|
349
|
-
end
|
350
|
-
|
7
|
+
%w{
|
8
|
+
tools
|
9
|
+
string
|
10
|
+
version
|
11
|
+
nodes
|
12
|
+
pattern_element
|
13
|
+
shell
|
14
|
+
rule_variant
|
15
|
+
rule
|
16
|
+
parser
|
17
|
+
}.each do |file|
|
18
|
+
require File.join(File.dirname(__FILE__),file)
|
19
|
+
end
|