babel_bridge 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/examples/indention_grouping.rb +68 -0
- data/examples/indention_grouping_test.txt +10 -0
- data/examples/turing/test.rb +28 -0
- data/examples/turing/turing.rb +71 -0
- data/lib/babel_bridge.rb +13 -344
- data/lib/nodes.rb +9 -278
- data/lib/nodes/empty_node.rb +17 -0
- data/lib/nodes/many_node.rb +62 -0
- data/lib/nodes/node.rb +94 -0
- data/lib/nodes/non_terminal_node.rb +117 -0
- data/lib/nodes/terminal_node.rb +38 -0
- data/lib/parser.rb +285 -0
- data/lib/pattern_element.rb +152 -151
- data/lib/rule.rb +62 -0
- data/lib/rule_variant.rb +45 -0
- data/lib/shell.rb +36 -0
- data/lib/string.rb +26 -0
- data/lib/tools.rb +90 -0
- data/lib/version.rb +3 -0
- data/test/test_bb.rb +39 -3
- metadata +19 -3
data/lib/rule.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module BabelBridge
|
2
|
+
# Rules define one or more patterns (RuleVariants) to match for a given non-terminal
|
3
|
+
class Rule
|
4
|
+
attr_accessor :name, :variants, :parser, :node_class
|
5
|
+
|
6
|
+
private
|
7
|
+
# creates a subclass of the NonTerminalNode for this Rule's node_class
|
8
|
+
def create_node_class
|
9
|
+
class_name = "#{parser.module_name}_#{name}_node".camelize
|
10
|
+
parser.const_set class_name, Class.new(NonTerminalNode)
|
11
|
+
end
|
12
|
+
|
13
|
+
# creates a new sub_class of the node_class for a variant
|
14
|
+
def create_next_node_variant_class
|
15
|
+
rule_variant_class_name = "#{name}_node#{self.variants.length+1}".camelize
|
16
|
+
parser.const_set rule_variant_class_name, Class.new(node_class)
|
17
|
+
end
|
18
|
+
|
19
|
+
public
|
20
|
+
def initialize(name,parser)
|
21
|
+
@name = name
|
22
|
+
@variants = []
|
23
|
+
@parser = parser
|
24
|
+
@node_class = create_node_class
|
25
|
+
end
|
26
|
+
|
27
|
+
def add_variant(pattern, &block)
|
28
|
+
rule_variant_class = create_next_node_variant_class
|
29
|
+
variants << RuleVariant.new(pattern, self, rule_variant_class)
|
30
|
+
rule_variant_class.class_eval &block if block
|
31
|
+
rule_variant_class
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse(node)
|
35
|
+
if cached = node.parser.cached(name,node.next)
|
36
|
+
return cached == :no_match ? nil : cached # return nil if cached==:no_matched
|
37
|
+
end
|
38
|
+
|
39
|
+
variants.each do |v|
|
40
|
+
if match = v.parse(node)
|
41
|
+
node.parser.cache_match(name,match)
|
42
|
+
return match
|
43
|
+
end
|
44
|
+
end
|
45
|
+
node.parser.cache_no_match(name,node.next)
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
# inspect returns a string which approximates the syntax for generating the rule and all its variants
|
50
|
+
def inspect
|
51
|
+
variants.collect do |v|
|
52
|
+
"rule #{name.inspect}, #{v.inspect}"
|
53
|
+
end.join("\n")
|
54
|
+
end
|
55
|
+
|
56
|
+
# returns a more human-readable explanation of the rule
|
57
|
+
def to_s
|
58
|
+
"rule #{name.inspect}, node_class: #{node_class}\n\t"+
|
59
|
+
"#{variants.collect {|v|v.to_s}.join("\n\t")}"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/rule_variant.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
module BabelBridge
|
2
|
+
|
3
|
+
# Each Rule has one or more RuleVariant
|
4
|
+
# Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
|
5
|
+
class RuleVariant
|
6
|
+
attr_accessor :pattern, :rule, :variant_node_class
|
7
|
+
|
8
|
+
def initialize(pattern, rule, variant_node_class=nil)
|
9
|
+
@pattern = pattern
|
10
|
+
@rule = rule
|
11
|
+
@variant_node_class = variant_node_class
|
12
|
+
end
|
13
|
+
|
14
|
+
# convert the pattern into a set of lamba functions
|
15
|
+
def pattern_elements
|
16
|
+
@pattern_elements||=pattern.collect { |match| PatternElement.new match, self }
|
17
|
+
end
|
18
|
+
|
19
|
+
# returns a Node object if it matches, nil otherwise
|
20
|
+
def parse(parent_node)
|
21
|
+
#return parse_nongreedy_optional(src,offset,parent_node) # nongreedy optionals break standard PEG
|
22
|
+
node = variant_node_class.new(parent_node)
|
23
|
+
|
24
|
+
pattern_elements.each do |pe|
|
25
|
+
match=pe.parse(node)
|
26
|
+
|
27
|
+
# if parse failed
|
28
|
+
if !match
|
29
|
+
if pe.terminal
|
30
|
+
# log failures on Terminal patterns for debug output if overall parse fails
|
31
|
+
node.parser.log_parsing_failure(node.next,:pattern=>pe.match,:node=>node)
|
32
|
+
end
|
33
|
+
return nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# parse succeeded, add to node and continue
|
37
|
+
node.add_match(match,pe.name)
|
38
|
+
end
|
39
|
+
node.post_match
|
40
|
+
end
|
41
|
+
|
42
|
+
def inspect; pattern.collect {|a| a.inspect}.join(', '); end
|
43
|
+
def to_s; "variant_class: #{variant_node_class}, pattern: #{inspect}"; end
|
44
|
+
end
|
45
|
+
end
|
data/lib/shell.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require "readline"
|
2
|
+
|
3
|
+
module BabelBridge
|
4
|
+
class Shell
|
5
|
+
attr_accessor :parser
|
6
|
+
def initialize(parser)
|
7
|
+
@parser = parser
|
8
|
+
end
|
9
|
+
|
10
|
+
def evaluate(parse_tree_node)
|
11
|
+
parse_tree_node.evaluate
|
12
|
+
rescue Exception => e
|
13
|
+
@stderr.puts "Error evaluating parse tree: #{e}\n "+e.backtrace.join("\n ")
|
14
|
+
end
|
15
|
+
|
16
|
+
# if block is provided, successful parsers are yield to block
|
17
|
+
# Otherwise, succuessful parsers are sent the "eval" method
|
18
|
+
def start(options={},&block)
|
19
|
+
@stdout = options[:stdout] || $stdout
|
20
|
+
@stderr = options[:stdout] || @stdout
|
21
|
+
@stdin = options[:stdin] || $stdin
|
22
|
+
while line = @stdin == $stdin ? Readline.readline("> ", true) : @stdin.gets
|
23
|
+
ret = parser.parse line.strip
|
24
|
+
if ret
|
25
|
+
if block
|
26
|
+
yield ret
|
27
|
+
else
|
28
|
+
@stdout.puts " => #{evaluate(ret).inspect}"
|
29
|
+
end
|
30
|
+
else
|
31
|
+
@stderr.puts parser.parser_failure_info
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/string.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
module BabelBridge
|
2
|
+
module StringExtensions
|
3
|
+
def camelize
|
4
|
+
self.split("_").collect {|a| a.capitalize}.join
|
5
|
+
end
|
6
|
+
|
7
|
+
def first_lines(n)
|
8
|
+
lines=self.split("\n",-1)
|
9
|
+
lines.length<=n ? self : lines[0..n-1].join("\n")
|
10
|
+
end
|
11
|
+
|
12
|
+
def last_lines(n)
|
13
|
+
lines=self.split("\n",-1)
|
14
|
+
lines.length<=n ? self : lines[-n..-1].join("\n")
|
15
|
+
end
|
16
|
+
|
17
|
+
def line_col(offset)
|
18
|
+
lines=self[0..offset-1].split("\n")
|
19
|
+
return lines.length, lines[-1].length
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class String
|
25
|
+
include BabelBridge::StringExtensions
|
26
|
+
end
|
data/lib/tools.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
module BabelBridge
|
2
|
+
class Tools
|
3
|
+
class << self
|
4
|
+
|
5
|
+
# Takes an array of Strings and Regexp and generates a new Regexp
|
6
|
+
# that matches the or ("|") of all strings and Regexp
|
7
|
+
def array_to_or_regexp_string(array)
|
8
|
+
new_re=array.flatten.collect do |op|
|
9
|
+
"("+case op
|
10
|
+
when Regexp then op.source
|
11
|
+
when String, Symbol then Regexp.escape(op.to_s)
|
12
|
+
end+")"
|
13
|
+
end.sort{|a|a.length}.join('|')
|
14
|
+
end
|
15
|
+
|
16
|
+
def array_to_anchored_or_regexp(array)
|
17
|
+
Regexp.new "^"+array_to_or_regexp_string(array)+"$"
|
18
|
+
end
|
19
|
+
|
20
|
+
def array_to_or_regexp(array)
|
21
|
+
Regexp.new array_to_or_regexp_string(array)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class BinaryOperatorProcessor
|
27
|
+
attr_accessor :node_class, :exact_operator_precedence, :regexp_operator_precedence, :right_operators
|
28
|
+
def initialize(operator_precedence,node_class,right_operators)
|
29
|
+
@right_operators_regexp= right_operators && Tools::array_to_anchored_or_regexp(right_operators)
|
30
|
+
@node_class=node_class
|
31
|
+
@exact_operator_precedence={}
|
32
|
+
@regexp_operator_precedence=[]
|
33
|
+
|
34
|
+
operator_precedence.each_with_index do |op_level,i|
|
35
|
+
(op_level.kind_of?(Array) ? op_level : [op_level]).each do |op|
|
36
|
+
case op
|
37
|
+
when String, Symbol then @exact_operator_precedence[op.to_s] = i
|
38
|
+
when Regexp then @regexp_operator_precedence << [op,i]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def operator_precedence(operator_string)
|
45
|
+
p = @exact_operator_precedence[operator_string]
|
46
|
+
return p if p
|
47
|
+
@regexp_operator_precedence.each do |regexp,p|
|
48
|
+
return p if operator_string[regexp]
|
49
|
+
end
|
50
|
+
raise "operator #{operator_string.inspect} didn't match #{@exact_operator_precedence} or #{@regexp_operator_precedence}"
|
51
|
+
end
|
52
|
+
|
53
|
+
# associativity =
|
54
|
+
# :left => operators of the same precidence execut from left to right
|
55
|
+
# :right => operators of the same precidence execut from right to left
|
56
|
+
def index_of_lowest_precedence(operators,associativity=:left)
|
57
|
+
lowest = lowest_precedence = nil
|
58
|
+
operators.each_with_index do |operator,i|
|
59
|
+
operator_string = operator.to_s
|
60
|
+
precedence = operator_precedence(operator_string)
|
61
|
+
right_associative = @right_operators_regexp && operator_string[@right_operators_regexp]
|
62
|
+
if !lowest || (right_associative ? precedence < lowest_precedence : precedence <= lowest_precedence)
|
63
|
+
lowest = i
|
64
|
+
lowest_precedence = precedence
|
65
|
+
end
|
66
|
+
end
|
67
|
+
lowest
|
68
|
+
end
|
69
|
+
|
70
|
+
# generates a tree of nodes of the specified node_class
|
71
|
+
# The nodes have access to the following useful methods:
|
72
|
+
# self.left -> return the left operand parse-tree-node
|
73
|
+
# self.right -> return the right operand parse-tree-node
|
74
|
+
# self.operator_node -> return the operator parse-tree-node
|
75
|
+
# self.operator -> return the operator as a ruby symbol
|
76
|
+
def generate_tree(operands, operators, parent_node)
|
77
|
+
return operands[0] if operands.length==1
|
78
|
+
|
79
|
+
i = index_of_lowest_precedence(operators)
|
80
|
+
|
81
|
+
operator = operators[i]
|
82
|
+
new_operand = node_class.new(parent_node)
|
83
|
+
new_operand.add_match generate_tree(operands[0..i], operators[0..i-1],new_operand), :left
|
84
|
+
new_operand.add_match operators[i], :operator_node
|
85
|
+
new_operand.add_match generate_tree(operands[i+1..-1], operators[i+1..-1],new_operand), :right
|
86
|
+
new_operand
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
data/lib/version.rb
ADDED
data/test/test_bb.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require File.dirname(__FILE__)
|
2
|
-
require File.dirname(__FILE__)
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__),"..","lib","babel_bridge"))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__),"test_helper"))
|
3
3
|
|
4
4
|
class BBTests < TestHelper
|
5
5
|
|
@@ -287,7 +287,9 @@ class BBTests < TestHelper
|
|
287
287
|
rule :foo, {:parser=>lambda do |parent_node|
|
288
288
|
offset=parent_node.next
|
289
289
|
src=parent_node.src
|
290
|
-
|
290
|
+
|
291
|
+
# Note, the \A anchors the search at the beginning of the string
|
292
|
+
if src[offset..-1].index(/\A[A-Z]+/)==0
|
291
293
|
endpattern=$~.to_s
|
292
294
|
if i=src.index(endpattern,offset+endpattern.length)
|
293
295
|
BabelBridge::TerminalNode.new(parent_node,i+endpattern.length-offset,"endpattern")
|
@@ -299,6 +301,7 @@ class BBTests < TestHelper
|
|
299
301
|
assert parser.parse("END this is in the middle END")
|
300
302
|
assert_equal "END this is in END",parser.parse("END this is in END the middle END",0,:foo).text
|
301
303
|
assert_nil parser.parse("END this is in the middle EN")
|
304
|
+
assert_nil parser.parse(" END this is in the middle END")
|
302
305
|
end
|
303
306
|
|
304
307
|
def test_poly
|
@@ -378,6 +381,38 @@ class BBTests < TestHelper
|
|
378
381
|
assert_equal({:match=>";",:could=>true}, BabelBridge::Parser.could.match(";"))
|
379
382
|
end
|
380
383
|
|
384
|
+
def test_ignore_whitespace
|
385
|
+
parser=new_parser do
|
386
|
+
ignore_whitespace
|
387
|
+
rule :pair, "foo", "bar"
|
388
|
+
end
|
389
|
+
assert parser.parse("foobar")
|
390
|
+
assert parser.parse("foo bar")
|
391
|
+
assert parser.parse("foobar ")
|
392
|
+
assert parser.parse("foo bar ")
|
393
|
+
end
|
394
|
+
|
395
|
+
def test_binary_operator_rule
|
396
|
+
parser=new_parser do
|
397
|
+
binary_operators_rule :bin_op, :int, [[:+, "-"], [:/, :*], "**"], :right_operators => ["**"] do
|
398
|
+
def evaluate
|
399
|
+
"(#{left.evaluate}#{operator}#{right.evaluate})"
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
rule :int, /[-]?[0-9]+/ do
|
404
|
+
def evaluate; to_s; end
|
405
|
+
end
|
406
|
+
end
|
407
|
+
assert_equal "(1+2)", parser.parse("1+2").evaluate
|
408
|
+
assert_equal "((1+2)+3)", parser.parse("1+2+3").evaluate
|
409
|
+
assert_equal "(1+(2*3))", parser.parse("1+2*3").evaluate
|
410
|
+
assert_equal "((1*2)+3)", parser.parse("1*2+3").evaluate
|
411
|
+
assert_equal "(5**6)", parser.parse("5**6").evaluate
|
412
|
+
assert_equal "((1-2)+((3*4)/(5**6)))", parser.parse("1-2+3*4/5**6").evaluate
|
413
|
+
assert_equal "(5**(6**7))", parser.parse("5**6**7").evaluate
|
414
|
+
end
|
415
|
+
|
381
416
|
def disabled_test_recursive_block
|
382
417
|
# PEG does have this problem, so this isn't really an error
|
383
418
|
# But maybe in the future we'll handle it better.
|
@@ -391,6 +426,7 @@ class BBTests < TestHelper
|
|
391
426
|
end
|
392
427
|
|
393
428
|
|
429
|
+
|
394
430
|
def regex_performance
|
395
431
|
parser=new_parser do
|
396
432
|
rule :foo, many(:element)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: babel_bridge
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2010-11-28 00:00:00.
|
12
|
+
date: 2010-11-28 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "Babel Bridge is an object oriented parser generator for parsing expression
|
15
15
|
grammars (PEG). \nGenerate memoizing packrat parsers 100% in Ruby code with a simple
|
@@ -24,8 +24,24 @@ files:
|
|
24
24
|
- test/test_bb.rb
|
25
25
|
- test/test_helper.rb
|
26
26
|
- lib/babel_bridge.rb
|
27
|
+
- lib/nodes/empty_node.rb
|
28
|
+
- lib/nodes/many_node.rb
|
29
|
+
- lib/nodes/node.rb
|
30
|
+
- lib/nodes/non_terminal_node.rb
|
31
|
+
- lib/nodes/terminal_node.rb
|
27
32
|
- lib/nodes.rb
|
33
|
+
- lib/parser.rb
|
28
34
|
- lib/pattern_element.rb
|
35
|
+
- lib/rule.rb
|
36
|
+
- lib/rule_variant.rb
|
37
|
+
- lib/shell.rb
|
38
|
+
- lib/string.rb
|
39
|
+
- lib/tools.rb
|
40
|
+
- lib/version.rb
|
41
|
+
- examples/indention_grouping.rb
|
42
|
+
- examples/indention_grouping_test.txt
|
43
|
+
- examples/turing/test.rb
|
44
|
+
- examples/turing/turing.rb
|
29
45
|
homepage: http://babel-bridge.rubyforge.org
|
30
46
|
licenses: []
|
31
47
|
post_install_message:
|
@@ -46,7 +62,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
46
62
|
version: '0'
|
47
63
|
requirements: []
|
48
64
|
rubyforge_project: babel-bridge
|
49
|
-
rubygems_version: 1.8.
|
65
|
+
rubygems_version: 1.8.24
|
50
66
|
signing_key:
|
51
67
|
specification_version: 3
|
52
68
|
summary: A Ruby-based parser-generator based on Parsing Expression Grammars.
|