babel_bridge 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/examples/indention_grouping.rb +68 -0
- data/examples/indention_grouping_test.txt +10 -0
- data/examples/turing/test.rb +28 -0
- data/examples/turing/turing.rb +71 -0
- data/lib/babel_bridge.rb +13 -344
- data/lib/nodes.rb +9 -278
- data/lib/nodes/empty_node.rb +17 -0
- data/lib/nodes/many_node.rb +62 -0
- data/lib/nodes/node.rb +94 -0
- data/lib/nodes/non_terminal_node.rb +117 -0
- data/lib/nodes/terminal_node.rb +38 -0
- data/lib/parser.rb +285 -0
- data/lib/pattern_element.rb +152 -151
- data/lib/rule.rb +62 -0
- data/lib/rule_variant.rb +45 -0
- data/lib/shell.rb +36 -0
- data/lib/string.rb +26 -0
- data/lib/tools.rb +90 -0
- data/lib/version.rb +3 -0
- data/test/test_bb.rb +39 -3
- metadata +19 -3
@@ -0,0 +1,68 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "babel_bridge"
|
3
|
+
|
4
|
+
class MyParser < BabelBridge::Parser
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
@indentions=[]
|
8
|
+
@tab=" "*4
|
9
|
+
end
|
10
|
+
|
11
|
+
def rollback_indention(offset)
|
12
|
+
@indentions.pop while @indentions.length>0 && @indentions[-1].offset>=offset
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_indention(parent_node)
|
16
|
+
offset=parent_node.next
|
17
|
+
src=parent_node.src
|
18
|
+
|
19
|
+
rollback_indention(offset)
|
20
|
+
|
21
|
+
regex=/\A[ \t]*/
|
22
|
+
puts "test_indention offset=#{offset}"
|
23
|
+
if parent_node.src[offset..-1].index(regex)==0
|
24
|
+
matched_indent_string=$~.to_s
|
25
|
+
range=$~.offset(0)
|
26
|
+
matched_indent_string.gsub("\t",@tab)
|
27
|
+
puts "test_indention offset=#{offset} match=#{matched_indent_string.inspect} indentions=#{@indentions.collect{|a|a.match_length}.inspect}"
|
28
|
+
if matched_indent_string && yield(@indentions,matched_indent_string)
|
29
|
+
puts "match"
|
30
|
+
@indentions<<BabelBridge::TerminalNode.new(parent_node,range[1]-range[0],regex)
|
31
|
+
@indentions[-1]
|
32
|
+
else
|
33
|
+
puts "nomatch"
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
rule :file, :sub_nodes, :rest
|
40
|
+
rule :node, :stuff, :endl, :sub_nodes?
|
41
|
+
rule :stuff, /[a-zA-Z][a-zA-Z0-9]*/
|
42
|
+
rule :sub_nodes, :increased_indention, many(:node,:same_indention)
|
43
|
+
rule :endl, /[\t ]*\n/
|
44
|
+
rule :rest, /.*/
|
45
|
+
|
46
|
+
rule :increased_indention, {:parser=>lambda do |parent_node|
|
47
|
+
puts "increase"
|
48
|
+
parent_node.parser.test_indention(parent_node) do |indentions,matched_indent_string|
|
49
|
+
indentions.length==0 || matched_indent_string.length>indentions[-1].match_length
|
50
|
+
end
|
51
|
+
end}
|
52
|
+
|
53
|
+
rule :same_indention, {:parser=>lambda do |parent_node|
|
54
|
+
puts "same"
|
55
|
+
parent_node.parser.test_indention(parent_node) do |indentions,matched_indent_string|
|
56
|
+
indentions.length>0 && matched_indent_string.length==indentions[-1].match_length
|
57
|
+
end
|
58
|
+
end}
|
59
|
+
end
|
60
|
+
|
61
|
+
parser = MyParser.new
|
62
|
+
res=parser.parse($stdin.read)
|
63
|
+
if res
|
64
|
+
puts "success"
|
65
|
+
puts res.inspect
|
66
|
+
else
|
67
|
+
puts parser.parser_failure_info
|
68
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),"..","..","lib","babel_bridge")
|
2
|
+
|
3
|
+
class TestParser < BabelBridge::Parser
|
4
|
+
|
5
|
+
rule :expr, :bin_op do
|
6
|
+
def evaluate
|
7
|
+
output = bin_op.evaluate
|
8
|
+
input = to_s
|
9
|
+
puts "bbtest: #{output} = #{eval output}"
|
10
|
+
puts "ruby: #{input} = #{eval input}"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
#rule :bin_op, many(:int,/[-+\/*]/) do
|
15
|
+
binary_operators_rule :bin_op, :operand, [[:+, "-"], [:/, :*], "**"], :right_operators => ["**"] do
|
16
|
+
def evaluate
|
17
|
+
"(#{left.evaluate}#{operator}#{right.evaluate})"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
rule :operand, "(", :bin_op, ")"
|
22
|
+
|
23
|
+
rule :operand, /[-]?[0-9]+/ do
|
24
|
+
def evaluate; to_s; end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
BabelBridge::Shell.new(TestParser.new).start
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),"..","..","lib","babel_bridge")
|
2
|
+
|
3
|
+
class TuringParser < BabelBridge::Parser
|
4
|
+
ignore_whitespace
|
5
|
+
|
6
|
+
def store
|
7
|
+
@store||=[]
|
8
|
+
end
|
9
|
+
|
10
|
+
rule :statements, many(:statement,";"), match?(";") do
|
11
|
+
def evaluate
|
12
|
+
ret = nil
|
13
|
+
statement.each do |s|
|
14
|
+
ret = s.evaluate
|
15
|
+
end
|
16
|
+
ret
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
rule :statement, /if\b/, :statement, /then\b/, :statement, :else_clause?, /end\b/ do
|
21
|
+
def evaluate
|
22
|
+
if statement[0].evaluate
|
23
|
+
statement[1].evaluate
|
24
|
+
else
|
25
|
+
else_clause.evaluate if else_clause
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
rule :else_clause, /else\b/, :statement
|
30
|
+
|
31
|
+
rule :statement, /while\b/, :statement, /do\b/, :statements, /end\b/ do
|
32
|
+
def evaluate
|
33
|
+
while statement.evaluate
|
34
|
+
statements.evaluate
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
binary_operators_rule :statement, :operand, [[:<, :<=, :>, :>=, :==], [:+, :-], [:/, :*]] do
|
40
|
+
def evaluate
|
41
|
+
case operator
|
42
|
+
when :<, :<=, :>, :>=, :==
|
43
|
+
(left.evaluate.send operator, right.evaluate) ? 1 : nil
|
44
|
+
else
|
45
|
+
left.evaluate.send operator, right.evaluate
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
rule :operand, "(", :statement, ")"
|
51
|
+
|
52
|
+
rule :operand, "[", :statement, "]", "=", :statement do
|
53
|
+
def evaluate
|
54
|
+
parser.store[statement[0].evaluate] = statement[1].evaluate
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
rule :operand, "[", :statement, "]" do
|
59
|
+
def evaluate
|
60
|
+
parser.store[statement.evaluate]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
rule :operand, /[-]?[0-9]+/ do
|
65
|
+
def evaluate
|
66
|
+
to_s.to_i
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
BabelBridge::Shell.new(TuringParser.new).start
|
data/lib/babel_bridge.rb
CHANGED
@@ -4,347 +4,16 @@ See README for licence information.
|
|
4
4
|
http://babel-bridge.rubyforge.org/
|
5
5
|
=end
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def last_lines(n)
|
21
|
-
lines=self.split("\n",-1)
|
22
|
-
lines.length<=n ? self : lines[-n..-1].join("\n")
|
23
|
-
end
|
24
|
-
|
25
|
-
def line_col(offset)
|
26
|
-
lines=self[0..offset-1].split("\n")
|
27
|
-
return lines.length, lines[-1].length
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
module BabelBridge
|
32
|
-
VERSION = "0.2.0"
|
33
|
-
|
34
|
-
# Each Rule has one or more RuleVariant
|
35
|
-
# Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
|
36
|
-
class RuleVariant
|
37
|
-
attr_accessor :pattern,:rule,:node_class
|
38
|
-
|
39
|
-
def initialize(pattern,rule,node_class=nil)
|
40
|
-
self.pattern=pattern
|
41
|
-
self.rule=rule
|
42
|
-
self.node_class=node_class
|
43
|
-
end
|
44
|
-
|
45
|
-
def inspect
|
46
|
-
pattern.collect{|a|a.inspect}.join(', ')
|
47
|
-
end
|
48
|
-
|
49
|
-
def to_s
|
50
|
-
"variant_class: #{node_class}, pattern: #{inspect}"
|
51
|
-
end
|
52
|
-
|
53
|
-
# convert the pattern into a set of lamba functions
|
54
|
-
def pattern_elements
|
55
|
-
@pattern_elements||=pattern.collect { |match| PatternElement.new match, self }
|
56
|
-
end
|
57
|
-
|
58
|
-
# returns a Node object if it matches, nil otherwise
|
59
|
-
def parse(parent_node)
|
60
|
-
#return parse_nongreedy_optional(src,offset,parent_node) # nongreedy optionals break standard PEG
|
61
|
-
node=node_class.new(parent_node)
|
62
|
-
|
63
|
-
pattern_elements.each do |pe|
|
64
|
-
match=pe.parse(node)
|
65
|
-
|
66
|
-
# if parse failed
|
67
|
-
if !match
|
68
|
-
if pe.terminal
|
69
|
-
# log failures on Terminal patterns for debug output if overall parse fails
|
70
|
-
node.parser.log_parsing_failure(node.next,:pattern=>pe.match,:node=>node)
|
71
|
-
end
|
72
|
-
return nil
|
73
|
-
end
|
74
|
-
|
75
|
-
# parse succeeded, add to node and continue
|
76
|
-
node.add_match(match,pe.name)
|
77
|
-
end
|
78
|
-
node
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
# Rules define one or more patterns (RuleVariants) to match for a given non-terminal
|
83
|
-
class Rule
|
84
|
-
attr_accessor :name,:variants,:parser,:node_class
|
85
|
-
|
86
|
-
def initialize(name,parser)
|
87
|
-
self.name=name
|
88
|
-
self.variants=[]
|
89
|
-
self.parser=parser
|
90
|
-
|
91
|
-
class_name = "#{parser.module_name}_#{name}_node".camelize
|
92
|
-
self.node_class = parser.const_set(class_name,Class.new(NonTerminalNode))
|
93
|
-
end
|
94
|
-
|
95
|
-
def add_variant(pattern, &block)
|
96
|
-
|
97
|
-
rule_variant_class_name = "#{name}_node#{self.variants.length+1}".camelize
|
98
|
-
rule_variant_class = parser.const_set(rule_variant_class_name,Class.new(node_class))
|
99
|
-
self.variants << RuleVariant.new(pattern,self,rule_variant_class)
|
100
|
-
rule_variant_class.class_eval &block if block
|
101
|
-
rule_variant_class
|
102
|
-
end
|
103
|
-
|
104
|
-
def parse(node)
|
105
|
-
if cached=node.parser.cached(name,node.next)
|
106
|
-
return cached==:no_match ? nil : cached # return nil if cached==:no_matched
|
107
|
-
end
|
108
|
-
|
109
|
-
variants.each do |v|
|
110
|
-
if match=v.parse(node)
|
111
|
-
node.parser.cache_match(name,match)
|
112
|
-
return match
|
113
|
-
end
|
114
|
-
end
|
115
|
-
node.parser.cache_no_match(name,node.next)
|
116
|
-
nil
|
117
|
-
end
|
118
|
-
|
119
|
-
# inspect returns a string which approximates the syntax for generating the rule and all its variants
|
120
|
-
def inspect
|
121
|
-
variants.collect do |v|
|
122
|
-
"rule #{name.inspect}, #{v.inspect}"
|
123
|
-
end.join("\n")
|
124
|
-
end
|
125
|
-
|
126
|
-
# returns a more human-readable explanation of the rule
|
127
|
-
def to_s
|
128
|
-
"rule #{name.inspect}, node_class: #{node_class}\n\t"+
|
129
|
-
"#{variants.collect {|v|v.to_s}.join("\n\t")}"
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
# primary object used by the client
|
134
|
-
# Used to generate the grammer with .rule methods
|
135
|
-
# Used to parse with .parse
|
136
|
-
class Parser
|
137
|
-
|
138
|
-
# Parser sub-class grammaer definition
|
139
|
-
# These methods are used in the creation of a Parser Sub-Class to define
|
140
|
-
# its grammar
|
141
|
-
class <<self
|
142
|
-
attr_accessor :rules,:module_name,:root_rule
|
143
|
-
|
144
|
-
def rules
|
145
|
-
@rules||={}
|
146
|
-
end
|
147
|
-
# rules can be specified as:
|
148
|
-
# parser.rule :name, to_match1, to_match2, etc...
|
149
|
-
#or
|
150
|
-
# parser.rule :name, [to_match1, to_match2, etc...]
|
151
|
-
def rule(name,*pattern,&block)
|
152
|
-
pattern=pattern[0] if pattern[0].kind_of?(Array)
|
153
|
-
rule=self.rules[name]||=Rule.new(name,self)
|
154
|
-
self.root_rule||=name
|
155
|
-
rule.add_variant(pattern,&block)
|
156
|
-
end
|
157
|
-
|
158
|
-
def node_class(name,&block)
|
159
|
-
klass=self.rules[name].node_class
|
160
|
-
return klass unless block
|
161
|
-
klass.class_eval &block
|
162
|
-
end
|
163
|
-
|
164
|
-
def [](i)
|
165
|
-
rules[i]
|
166
|
-
end
|
167
|
-
|
168
|
-
# rule can be symbol-name of one of the rules in rules_array or one of the actual Rule objects in that array
|
169
|
-
def root_rule=(rule)
|
170
|
-
raise "Symbol required" unless rule.kind_of?(Symbol)
|
171
|
-
raise "rule #{rule.inspect} not found" unless rules[rule]
|
172
|
-
@root_rule=rule
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
#*********************************************
|
177
|
-
# pattern construction tools
|
178
|
-
#
|
179
|
-
# Ex:
|
180
|
-
# # match 'keyword'
|
181
|
-
# # (succeeds if keyword is matched; advances the read pointer)
|
182
|
-
# rule :sample_rule, "keyword"
|
183
|
-
# rule :sample_rule, match("keyword")
|
184
|
-
#
|
185
|
-
# # don't match 'keyword'
|
186
|
-
# # (succeeds only if keyword is NOT matched; does not advance the read pointer)
|
187
|
-
# rule :sample_rule, match!("keyword")
|
188
|
-
# rule :sample_rule, dont.match("keyword")
|
189
|
-
#
|
190
|
-
# # optionally match 'keyword'
|
191
|
-
# # (always succeeds; advances the read pointer if keyword is matched)
|
192
|
-
# rule :sample_rule, match?("keyword")
|
193
|
-
# rule :sample_rule, optionally.match("keyword")
|
194
|
-
#
|
195
|
-
# # ensure we could match 'keyword'
|
196
|
-
# # (succeeds only if keyword is matched, but does not advance the read pointer)
|
197
|
-
# rule :sample_rule, could.match("keyword")
|
198
|
-
#
|
199
|
-
|
200
|
-
# dont.match("keyword") #
|
201
|
-
#*********************************************
|
202
|
-
class <<self
|
203
|
-
def many(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
204
|
-
def many?(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
205
|
-
def many!(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
|
206
|
-
|
207
|
-
def match?(*args) PatternElementHash.new.optionally.match(*args) end
|
208
|
-
def match(*args) PatternElementHash.new.match(*args) end
|
209
|
-
def match!(*args) PatternElementHash.new.dont.match(*args) end
|
210
|
-
|
211
|
-
def dont; PatternElementHash.new.dont end
|
212
|
-
def optionally; PatternElementHash.new.optionally end
|
213
|
-
def could; PatternElementHash.new.could end
|
214
|
-
end
|
215
|
-
|
216
|
-
|
217
|
-
#*********************************************
|
218
|
-
#*********************************************
|
219
|
-
# parser instance implementation
|
220
|
-
# this methods are used for each actual parse run
|
221
|
-
# they are tied to an instnace of the Parser Sub-class to you can have more than one
|
222
|
-
# parser active at a time
|
223
|
-
attr_accessor :failure_index
|
224
|
-
attr_accessor :expecting_list
|
225
|
-
attr_accessor :src
|
226
|
-
attr_accessor :parse_cache
|
227
|
-
|
228
|
-
def initialize
|
229
|
-
reset_parser_tracking
|
230
|
-
end
|
231
|
-
|
232
|
-
def reset_parser_tracking
|
233
|
-
self.src=nil
|
234
|
-
self.failure_index=0
|
235
|
-
self.expecting_list={}
|
236
|
-
self.parse_cache={}
|
237
|
-
end
|
238
|
-
|
239
|
-
def cached(rule_class,offset)
|
240
|
-
(parse_cache[rule_class]||={})[offset]
|
241
|
-
end
|
242
|
-
|
243
|
-
def cache_match(rule_class,match)
|
244
|
-
(parse_cache[rule_class]||={})[match.offset]=match
|
245
|
-
end
|
246
|
-
|
247
|
-
def cache_no_match(rule_class,offset)
|
248
|
-
(parse_cache[rule_class]||={})[offset]=:no_match
|
249
|
-
end
|
250
|
-
|
251
|
-
def log_parsing_failure(index,expecting)
|
252
|
-
if index>failure_index
|
253
|
-
key=expecting[:pattern]
|
254
|
-
@expecting_list={key=>expecting}
|
255
|
-
@failure_index = index
|
256
|
-
elsif index == failure_index
|
257
|
-
key=expecting[:pattern]
|
258
|
-
self.expecting_list[key]=expecting
|
259
|
-
else
|
260
|
-
# ignored
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
|
265
|
-
def parse(src,offset=0,rule=nil)
|
266
|
-
reset_parser_tracking
|
267
|
-
@start_time=Time.now
|
268
|
-
self.src=src
|
269
|
-
root_node=RootNode.new(self)
|
270
|
-
ret=self.class[rule||self.class.root_rule].parse(root_node)
|
271
|
-
unless rule
|
272
|
-
if ret
|
273
|
-
if ret.next<src.length # parse only succeeds if the whole input is matched
|
274
|
-
@parsing_did_not_match_entire_input=true
|
275
|
-
@failure_index=ret.next
|
276
|
-
ret=nil
|
277
|
-
else
|
278
|
-
reset_parser_tracking
|
279
|
-
end
|
280
|
-
end
|
281
|
-
end
|
282
|
-
@end_time=Time.now
|
283
|
-
ret
|
284
|
-
end
|
285
|
-
|
286
|
-
def parse_time
|
287
|
-
@end_time-@start_time
|
288
|
-
end
|
289
|
-
|
290
|
-
def parse_and_puts_errors(src,out=$stdout)
|
291
|
-
ret=parse(src)
|
292
|
-
unless ret
|
293
|
-
out.puts parser_failure_info
|
294
|
-
end
|
295
|
-
ret
|
296
|
-
end
|
297
|
-
|
298
|
-
def node_list_string(node_list,common_root=[])
|
299
|
-
node_list && node_list[common_root.length..-1].map{|p|"#{p.class}(#{p.offset})"}.join(" > ")
|
300
|
-
end
|
301
|
-
|
302
|
-
def parser_failure_info
|
303
|
-
return unless src
|
304
|
-
bracketing_lines=5
|
305
|
-
line,col=src.line_col(failure_index)
|
306
|
-
ret=<<-ENDTXT
|
307
|
-
Parsing error at line #{line} column #{col} offset #{failure_index}
|
308
|
-
|
309
|
-
Source:
|
310
|
-
...
|
311
|
-
#{(failure_index==0 ? "" : src[0..(failure_index-1)]).last_lines(bracketing_lines)}<HERE>#{src[(failure_index)..-1].first_lines(bracketing_lines)}
|
312
|
-
...
|
313
|
-
ENDTXT
|
314
|
-
|
315
|
-
if @parsing_did_not_match_entire_input
|
316
|
-
ret+="\nParser did not match entire input."
|
317
|
-
else
|
318
|
-
|
319
|
-
common_root=nil
|
320
|
-
expecting_list.values.each do |e|
|
321
|
-
node=e[:node]
|
322
|
-
pl=node.parent_list
|
323
|
-
if common_root
|
324
|
-
common_root.each_index do |i|
|
325
|
-
if pl[i]!=common_root[i]
|
326
|
-
common_root=common_root[0..i-1]
|
327
|
-
break
|
328
|
-
end
|
329
|
-
end
|
330
|
-
else
|
331
|
-
common_root=node.parent_list
|
332
|
-
end
|
333
|
-
end
|
334
|
-
ret+=<<ENDTXT
|
335
|
-
|
336
|
-
Successfully matched rules up to failure:
|
337
|
-
#{node_list_string(common_root)}
|
338
|
-
|
339
|
-
Expecting#{expecting_list.length>1 ? ' one of' : ''}:
|
340
|
-
#{expecting_list.values.collect do |a|
|
341
|
-
list=node_list_string(a[:node].parent_list,common_root)
|
342
|
-
[list,"#{a[:pattern].inspect} (#{list})"]
|
343
|
-
end.sort.map{|i|i[1]}.join("\n ")}
|
344
|
-
ENDTXT
|
345
|
-
end
|
346
|
-
ret
|
347
|
-
end
|
348
|
-
end
|
349
|
-
end
|
350
|
-
|
7
|
+
%w{
|
8
|
+
tools
|
9
|
+
string
|
10
|
+
version
|
11
|
+
nodes
|
12
|
+
pattern_element
|
13
|
+
shell
|
14
|
+
rule_variant
|
15
|
+
rule
|
16
|
+
parser
|
17
|
+
}.each do |file|
|
18
|
+
require File.join(File.dirname(__FILE__),file)
|
19
|
+
end
|