babel_bridge 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +6 -0
- data/lib/nodes.rb +1 -1
- data/lib/nodes/empty_node.rb +1 -20
- data/lib/nodes/node.rb +33 -32
- data/lib/nodes/non_terminal_node.rb +10 -13
- data/lib/nodes/root_node.rb +4 -0
- data/lib/nodes/rule_node.rb +97 -44
- data/lib/nodes/terminal_node.rb +3 -3
- data/lib/parser.rb +95 -61
- data/lib/pattern_element.rb +71 -43
- data/lib/rule.rb +12 -6
- data/lib/rule_variant.rb +39 -15
- data/lib/string.rb +2 -9
- data/lib/tools.rb +26 -6
- data/lib/version.rb +2 -1
- data/spec/advanced_parsers_spec.rb +49 -0
- data/spec/basic_parsing_spec.rb +94 -0
- data/spec/bb_spec.rb +7 -163
- data/spec/ignore_whitespace_spec.rb +227 -0
- data/spec/inspect_spec.rb +50 -0
- data/spec/many_spec.rb +60 -0
- data/spec/node_spec.rb +117 -0
- data/spec/pattern_generators_spec.rb +41 -0
- data/spec/rule_parsing_spec.rb +61 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/tools_spec.rb +21 -0
- metadata +13 -4
- data/lib/nodes/many_node.rb +0 -53
- data/test/test_bb.rb +0 -458
- data/test/test_helper.rb +0 -44
data/lib/pattern_element.rb
CHANGED
@@ -6,11 +6,20 @@ http://babel-bridge.rubyforge.org/
|
|
6
6
|
|
7
7
|
module BabelBridge
|
8
8
|
# hash which can be used declaratively
|
9
|
-
class PatternElementHash
|
9
|
+
class PatternElementHash
|
10
|
+
attr_accessor :hash
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@hash = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def [](key) @hash[key] end
|
17
|
+
def []=(key,value) @hash[key]=value end
|
18
|
+
|
10
19
|
def method_missing(method_name, *args) #method_name is a symbol
|
11
20
|
return self if args.length==1 && !args[0] # if nil is provided, don't set anything
|
12
21
|
raise "More than one argument is not supported. #{self.class}##{method_name} args=#{args.inspect}" if args.length > 1
|
13
|
-
|
22
|
+
@hash[method_name] = args[0] || true # on the other hand, if no args are provided, assume true
|
14
23
|
self
|
15
24
|
end
|
16
25
|
end
|
@@ -20,32 +29,55 @@ end
|
|
20
29
|
# :many
|
21
30
|
# :optional
|
22
31
|
class PatternElement
|
23
|
-
attr_accessor :parser
|
24
|
-
attr_accessor :match
|
32
|
+
attr_accessor :parser, :optional, :negative, :name, :terminal, :could_match
|
33
|
+
attr_accessor :match, :rule_variant, :parser_class
|
34
|
+
|
35
|
+
# true if this is a delimiter
|
36
|
+
attr_accessor :delimiter
|
25
37
|
|
26
38
|
#match can be:
|
27
39
|
# true, Hash, Symbol, String, Regexp
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
40
|
+
# options
|
41
|
+
# :rule_varient
|
42
|
+
# :parser
|
43
|
+
def initialize(match, options={})
|
44
|
+
@init_options = options.clone
|
45
|
+
@rule_variant = options[:rule_variant]
|
46
|
+
@parser_class = options[:parser_class]
|
47
|
+
@delimiter = options[:delimiter]
|
48
|
+
@name = options[:name]
|
49
|
+
raise "rule_variant or parser_class required" unless @rule_variant || @parser_class
|
50
|
+
|
51
|
+
init match
|
32
52
|
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
33
53
|
end
|
34
54
|
|
35
55
|
def inspect
|
36
|
-
"<PatternElement rule_variant=#{rule_variant.variant_node_class} match=#{match.inspect}>"
|
56
|
+
"<PatternElement #{rule_variant && "rule_variant=#{rule_variant.variant_node_class} "}match=#{match.inspect}#{" delimiter" if delimiter}>"
|
37
57
|
end
|
38
58
|
|
39
59
|
def to_s
|
40
60
|
match.inspect
|
41
61
|
end
|
42
62
|
|
63
|
+
def parser_class
|
64
|
+
@parser_class || rule_variant.rule.parser
|
65
|
+
end
|
66
|
+
|
67
|
+
def rules
|
68
|
+
parser_class.rules
|
69
|
+
end
|
70
|
+
|
43
71
|
# attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
|
44
72
|
def parse(parent_node)
|
45
|
-
return RollbackWhitespaceNode.new(parent_node) if rewind_whitespace
|
46
73
|
|
47
74
|
# run element parser
|
48
|
-
|
75
|
+
begin
|
76
|
+
parent_node.parser.matching_negative if negative
|
77
|
+
match = parser.call(parent_node)
|
78
|
+
ensure
|
79
|
+
parent_node.parser.unmatching_negative if negative
|
80
|
+
end
|
49
81
|
|
50
82
|
# Negative patterns (PEG: !element)
|
51
83
|
match = match ? nil : EmptyNode.new(parent_node) if negative
|
@@ -56,11 +88,13 @@ class PatternElement
|
|
56
88
|
# Could-match patterns (PEG: &element)
|
57
89
|
match.match_length = 0 if match && could_match
|
58
90
|
|
59
|
-
if !match && terminal
|
91
|
+
if !match && (terminal || negative)
|
60
92
|
# log failures on Terminal patterns for debug output if overall parse fails
|
61
|
-
parent_node.parser.log_parsing_failure
|
93
|
+
parent_node.parser.log_parsing_failure parent_node.next, :pattern => self.match, :node => parent_node
|
62
94
|
end
|
63
95
|
|
96
|
+
match.delimiter = delimiter if match
|
97
|
+
|
64
98
|
# return match
|
65
99
|
match
|
66
100
|
end
|
@@ -70,12 +104,13 @@ class PatternElement
|
|
70
104
|
# initialize PatternElement based on the type of: match
|
71
105
|
def init(match)
|
72
106
|
self.match = match
|
107
|
+
match = match[0] if match.kind_of?(Array) && match.length == 1
|
73
108
|
case match
|
74
109
|
when TrueClass then init_true
|
75
110
|
when String then init_string match
|
76
111
|
when Regexp then init_regex match
|
77
112
|
when Symbol then init_rule match
|
78
|
-
when
|
113
|
+
when PatternElementHash then init_hash match
|
79
114
|
else raise "invalid pattern type: #{match.inspect}"
|
80
115
|
end
|
81
116
|
end
|
@@ -96,7 +131,7 @@ class PatternElement
|
|
96
131
|
self.parser=lambda do |parent_node|
|
97
132
|
offset = parent_node.next
|
98
133
|
if parent_node.src[offset..-1].index(optimized_regex)==0
|
99
|
-
range
|
134
|
+
range = $~.offset(0)
|
100
135
|
range = (range.min+offset)..(range.max+offset)
|
101
136
|
TerminalNode.new(parent_node,range,regex)
|
102
137
|
end
|
@@ -109,11 +144,11 @@ class PatternElement
|
|
109
144
|
rule_name.to_s[/^([^?!]*)([?!])?$/]
|
110
145
|
rule_name = $1.to_sym
|
111
146
|
option = $2
|
112
|
-
match_rule =
|
147
|
+
match_rule = rules[rule_name]
|
113
148
|
raise "no rule for #{rule_name}" unless match_rule
|
114
149
|
|
115
150
|
self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
|
116
|
-
self.name
|
151
|
+
self.name ||= rule_name
|
117
152
|
case option
|
118
153
|
when "?" then self.optional = true
|
119
154
|
when "!" then self.negative = true
|
@@ -128,9 +163,6 @@ class PatternElement
|
|
128
163
|
init_many hash
|
129
164
|
elsif hash[:match]
|
130
165
|
init hash[:match]
|
131
|
-
elsif hash[:rewind_whitespace]
|
132
|
-
self.rewind_whitespace = true
|
133
|
-
return
|
134
166
|
else
|
135
167
|
raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
|
136
168
|
end
|
@@ -144,41 +176,37 @@ class PatternElement
|
|
144
176
|
# initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
|
145
177
|
def init_many(hash)
|
146
178
|
# generate single_parser
|
147
|
-
|
148
|
-
single_parser = parser
|
179
|
+
pattern_element = PatternElement.new(hash[:many], @init_options.merge(name:hash[:as]))
|
149
180
|
|
150
181
|
# generate delimiter_pattern_element
|
151
|
-
|
182
|
+
many_delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter], @init_options.merge(name:hash[:delimiter_name]))
|
152
183
|
|
153
184
|
# generate many-parser
|
154
185
|
self.parser = lambda do |parent_node|
|
155
|
-
|
186
|
+
parent_node.match_name_is_poly(pattern_element.name)
|
187
|
+
|
188
|
+
# fail unless we can match at least one
|
189
|
+
return unless parent_node.match pattern_element
|
156
190
|
|
157
|
-
if
|
191
|
+
if many_delimiter_pattern_element
|
192
|
+
parent_node.match_name_is_poly(many_delimiter_pattern_element.name)
|
158
193
|
# delimited matching
|
159
|
-
while
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
#match delimiter
|
166
|
-
delimiter_match = delimiter_pattern_element.parse many_node
|
167
|
-
break unless delimiter_match
|
168
|
-
many_node.add_match delimiter_match
|
194
|
+
while (parent_node.attempt_match do
|
195
|
+
parent_node.match_delimiter &&
|
196
|
+
parent_node.match(many_delimiter_pattern_element).tap{|md|md&&md.many_delimiter=true} &&
|
197
|
+
parent_node.match_delimiter &&
|
198
|
+
parent_node.match(pattern_element)
|
199
|
+
end)
|
169
200
|
end
|
170
|
-
many_node.separate_delimiter_matches
|
171
201
|
else
|
172
202
|
# not delimited matching
|
173
|
-
while
|
174
|
-
|
175
|
-
|
176
|
-
|
203
|
+
while (parent_node.attempt_match do
|
204
|
+
parent_node.match_delimiter &&
|
205
|
+
parent_node.match(pattern_element)
|
206
|
+
end)
|
177
207
|
end
|
178
208
|
end
|
179
|
-
|
180
|
-
# success only if we have at least one match
|
181
|
-
many_node.length>0 && many_node
|
209
|
+
parent_node
|
182
210
|
end
|
183
211
|
end
|
184
212
|
end
|
data/lib/rule.rb
CHANGED
@@ -11,7 +11,7 @@ class Rule
|
|
11
11
|
end
|
12
12
|
|
13
13
|
# creates a new sub_class of the node_class for a variant
|
14
|
-
def
|
14
|
+
def new_variant_node_class
|
15
15
|
rule_variant_class_name = "#{name}_node#{self.variants.length+1}".camelize
|
16
16
|
parser.const_set rule_variant_class_name, Class.new(node_class)
|
17
17
|
end
|
@@ -24,11 +24,17 @@ class Rule
|
|
24
24
|
@node_class = create_node_class
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
def root_rule?
|
28
|
+
parser.root_rule == name
|
29
|
+
end
|
30
|
+
|
31
|
+
def add_variant(options={}, &block)
|
32
|
+
new_variant_node_class.tap do |variant_node_class|
|
33
|
+
options[:variant_node_class] = variant_node_class
|
34
|
+
options[:rule] = self
|
35
|
+
variants << RuleVariant.new(options)
|
36
|
+
variant_node_class.class_eval &block if block
|
37
|
+
end
|
32
38
|
end
|
33
39
|
|
34
40
|
def parse(node)
|
data/lib/rule_variant.rb
CHANGED
@@ -3,35 +3,59 @@ module BabelBridge
|
|
3
3
|
# Each Rule has one or more RuleVariant
|
4
4
|
# Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
|
5
5
|
class RuleVariant
|
6
|
-
attr_accessor :pattern, :rule, :variant_node_class
|
6
|
+
attr_accessor :pattern, :rule, :variant_node_class, :delimiter_pattern, :match_delimiter_prepost
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
# pattern: Array - the pattern to match
|
9
|
+
# rule: Rule instance
|
10
|
+
# variant_node_class: RuleVariant class
|
11
|
+
def initialize(options = {})
|
12
|
+
@pattern = options[:pattern]
|
13
|
+
@rule = options[:rule]
|
14
|
+
@variant_node_class = options[:variant_node_class]
|
15
|
+
raise "variant_node_class required" unless variant_node_class
|
16
|
+
@delimiter = options[:delimiter]
|
17
|
+
end
|
18
|
+
|
19
|
+
def parser
|
20
|
+
@rule.parser
|
21
|
+
end
|
22
|
+
|
23
|
+
def root_rule?
|
24
|
+
rule.root_rule?
|
25
|
+
end
|
26
|
+
|
27
|
+
def delimiter_pattern
|
28
|
+
@delimiter_pattern ||= if @delimiter
|
29
|
+
PatternElement.new(@delimiter, :rule_variant => self, :delimiter => true) unless @delimiter==// || @delimiter==""
|
30
|
+
else
|
31
|
+
parser.delimiter_pattern
|
32
|
+
end
|
12
33
|
end
|
13
34
|
|
14
35
|
# convert the pattern into a set of lamba functions
|
15
36
|
def pattern_elements
|
16
|
-
@pattern_elements||=pattern.collect { |match| PatternElement.new
|
37
|
+
@pattern_elements||=pattern.collect { |match| [PatternElement.new(match, :rule_variant => self, :pattern_element => true), delimiter_pattern] }.flatten[0..-2]
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_element(element_parser, node)
|
41
|
+
node.add_match element_parser.parse(node), element_parser.name
|
17
42
|
end
|
18
43
|
|
19
44
|
# returns a Node object if it matches, nil otherwise
|
20
45
|
def parse(parent_node)
|
21
46
|
#return parse_nongreedy_optional(src,offset,parent_node) # nongreedy optionals break standard PEG
|
22
|
-
node = variant_node_class.new(parent_node)
|
47
|
+
node = variant_node_class.new(parent_node, delimiter_pattern)
|
48
|
+
|
49
|
+
node.match parser.delimiter_pattern if root_rule?
|
23
50
|
|
24
51
|
pattern_elements.each do |pe|
|
25
|
-
match
|
52
|
+
return unless node.match(pe)
|
53
|
+
end
|
54
|
+
node.pop_match if node.last_match && node.last_match.delimiter
|
26
55
|
|
27
|
-
|
28
|
-
return if !match
|
29
|
-
match.matched
|
56
|
+
node.match parser.delimiter_pattern if root_rule?
|
30
57
|
|
31
|
-
|
32
|
-
node.add_match(match,pe.name)
|
33
|
-
end
|
34
|
-
node.post_match
|
58
|
+
node && node.post_match_processing
|
35
59
|
end
|
36
60
|
|
37
61
|
def inspect; pattern.collect {|a| a.inspect}.join(', '); end
|
data/lib/string.rb
CHANGED
@@ -14,16 +14,9 @@ module BabelBridge
|
|
14
14
|
lines.length<=n ? self : lines[-n..-1].join("\n")
|
15
15
|
end
|
16
16
|
|
17
|
-
# return the line and column of a given offset into this string
|
18
|
-
# line and column are 1-based
|
19
|
-
def line_col(offset)
|
20
|
-
return 1,1 if length==0 || offset==0
|
21
|
-
lines=(self[0..offset-1]+" ").split("\n")
|
22
|
-
return lines.length, lines[-1].length
|
23
|
-
end
|
24
17
|
end
|
25
18
|
end
|
26
|
-
|
19
|
+
|
27
20
|
class String
|
28
21
|
include BabelBridge::StringExtensions
|
29
|
-
end
|
22
|
+
end
|
data/lib/tools.rb
CHANGED
@@ -2,8 +2,29 @@ module BabelBridge
|
|
2
2
|
class Tools
|
3
3
|
class << self
|
4
4
|
|
5
|
-
def indent(string,
|
6
|
-
|
5
|
+
def indent(string, first_indent = " ", rest_indent = first_indent)
|
6
|
+
first_indent + string.gsub("\n", "\n#{rest_indent}")
|
7
|
+
end
|
8
|
+
|
9
|
+
def uniform_tabs(string)
|
10
|
+
lines = string.split("\n").collect{|line|line.split("\t")}
|
11
|
+
max_fields = lines.collect {|line| line.length}.max
|
12
|
+
max_fields.times do |field|
|
13
|
+
max_field_length = lines.collect {|line| (line[field]||"").length}.max
|
14
|
+
formatter = "%-#{max_field_length}s "
|
15
|
+
lines.each_with_index do |line,i|
|
16
|
+
lines[i][field] = formatter%line[field] if line[field]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
lines.collect {|line|line.join}.join("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
# return the line and column of a given offset into this string
|
23
|
+
# line and column are 1-based
|
24
|
+
def line_column(string, offset)
|
25
|
+
return 1,1 if string.length==0 || offset==0
|
26
|
+
lines = (string[0..offset-1] + " ").split("\n")
|
27
|
+
return lines.length, lines[-1].length
|
7
28
|
end
|
8
29
|
|
9
30
|
def symbols_to_strings(array)
|
@@ -21,14 +42,14 @@ class Tools
|
|
21
42
|
array.sort_by {|a| a.kind_of?(Regexp) ? 0 : -a.length}
|
22
43
|
end
|
23
44
|
|
24
|
-
# Takes an array of Strings and Regexp and generates a new Regexp
|
45
|
+
# Takes an array of Strings and Regexp and generates a new Regexp
|
25
46
|
# that matches the or ("|") of all strings and Regexp
|
26
47
|
def array_to_or_regexp_string(array)
|
27
48
|
array = symbols_to_strings array.flatten
|
28
49
|
array = sort_operator_patterns array
|
29
50
|
array = regexp_and_strings_to_regexpstrings array
|
30
51
|
|
31
|
-
array.collect {|op| "(#{op})"}.join('|')
|
52
|
+
array.collect {|op| "(#{op})"}.join('|')
|
32
53
|
end
|
33
54
|
|
34
55
|
def array_to_anchored_or_regexp(array)
|
@@ -96,7 +117,6 @@ class BinaryOperatorProcessor
|
|
96
117
|
return operands[0] if operands.length==1
|
97
118
|
|
98
119
|
i = index_of_lowest_precedence(operators)
|
99
|
-
|
100
120
|
operator = operators[i]
|
101
121
|
new_operand = node_class.new(parent_node)
|
102
122
|
new_operand.add_match generate_tree(operands[0..i], operators[0..i-1],new_operand), :left
|
@@ -106,4 +126,4 @@ class BinaryOperatorProcessor
|
|
106
126
|
end
|
107
127
|
|
108
128
|
end
|
109
|
-
end
|
129
|
+
end
|
data/lib/version.rb
CHANGED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "advanced parsing" do
|
4
|
+
include TestParserGenerator
|
5
|
+
|
6
|
+
it "test_custom_parser" do
|
7
|
+
new_parser do
|
8
|
+
rule :foo, (custom_parser do |parent_node|
|
9
|
+
offset=parent_node.next
|
10
|
+
src=parent_node.src
|
11
|
+
|
12
|
+
# Note, the \A anchors the search at the beginning of the string
|
13
|
+
if src[offset..-1].index(/\A[A-Z]+/)==0
|
14
|
+
endpattern=$~.to_s
|
15
|
+
if i=src.index(endpattern,offset+endpattern.length)
|
16
|
+
range = offset..(i+endpattern.length)
|
17
|
+
BabelBridge::TerminalNode.new(parent_node,range,"endpattern")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end)
|
21
|
+
end
|
22
|
+
|
23
|
+
test_parse("END this is in the middle END")
|
24
|
+
test_parse("END this is in END the middle END",:partial_match => true).text.should == "END this is in END"
|
25
|
+
test_parse "END this is in the middle EN", :should_fail_at => 0
|
26
|
+
test_parse " END this is in the middle END", :should_fail_at => 0
|
27
|
+
end
|
28
|
+
|
29
|
+
it "test_binary_operator_rule" do
|
30
|
+
new_parser do
|
31
|
+
binary_operators_rule :bin_op, :int, ["**", [:/, :*], [:+, "-"]], :right_operators => ["**"] do
|
32
|
+
def evaluate
|
33
|
+
"(#{left.evaluate}#{operator}#{right.evaluate})"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
rule :int, /[-]?[0-9]+/ do
|
38
|
+
def evaluate; to_s; end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
test_parse("1+2").evaluate .should == "(1+2)"
|
42
|
+
test_parse("1+2+3").evaluate .should == "((1+2)+3)"
|
43
|
+
test_parse("1+2*3").evaluate .should == "(1+(2*3))"
|
44
|
+
test_parse("1*2+3").evaluate .should == "((1*2)+3)"
|
45
|
+
test_parse("5**6").evaluate .should == "(5**6)"
|
46
|
+
test_parse("1-2+3*4/5**6").evaluate .should == "((1-2)+((3*4)/(5**6)))"
|
47
|
+
test_parse("5**6**7").evaluate .should == "(5**(6**7))"
|
48
|
+
end
|
49
|
+
end
|