babel_bridge 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +6 -0
- data/lib/nodes.rb +1 -1
- data/lib/nodes/empty_node.rb +1 -20
- data/lib/nodes/node.rb +33 -32
- data/lib/nodes/non_terminal_node.rb +10 -13
- data/lib/nodes/root_node.rb +4 -0
- data/lib/nodes/rule_node.rb +97 -44
- data/lib/nodes/terminal_node.rb +3 -3
- data/lib/parser.rb +95 -61
- data/lib/pattern_element.rb +71 -43
- data/lib/rule.rb +12 -6
- data/lib/rule_variant.rb +39 -15
- data/lib/string.rb +2 -9
- data/lib/tools.rb +26 -6
- data/lib/version.rb +2 -1
- data/spec/advanced_parsers_spec.rb +49 -0
- data/spec/basic_parsing_spec.rb +94 -0
- data/spec/bb_spec.rb +7 -163
- data/spec/ignore_whitespace_spec.rb +227 -0
- data/spec/inspect_spec.rb +50 -0
- data/spec/many_spec.rb +60 -0
- data/spec/node_spec.rb +117 -0
- data/spec/pattern_generators_spec.rb +41 -0
- data/spec/rule_parsing_spec.rb +61 -0
- data/spec/spec_helper.rb +31 -0
- data/spec/tools_spec.rb +21 -0
- metadata +13 -4
- data/lib/nodes/many_node.rb +0 -53
- data/test/test_bb.rb +0 -458
- data/test/test_helper.rb +0 -44
data/lib/pattern_element.rb
CHANGED
@@ -6,11 +6,20 @@ http://babel-bridge.rubyforge.org/
|
|
6
6
|
|
7
7
|
module BabelBridge
|
8
8
|
# hash which can be used declaratively
|
9
|
-
class PatternElementHash
|
9
|
+
class PatternElementHash
|
10
|
+
attr_accessor :hash
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@hash = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def [](key) @hash[key] end
|
17
|
+
def []=(key,value) @hash[key]=value end
|
18
|
+
|
10
19
|
def method_missing(method_name, *args) #method_name is a symbol
|
11
20
|
return self if args.length==1 && !args[0] # if nil is provided, don't set anything
|
12
21
|
raise "More than one argument is not supported. #{self.class}##{method_name} args=#{args.inspect}" if args.length > 1
|
13
|
-
|
22
|
+
@hash[method_name] = args[0] || true # on the other hand, if no args are provided, assume true
|
14
23
|
self
|
15
24
|
end
|
16
25
|
end
|
@@ -20,32 +29,55 @@ end
|
|
20
29
|
# :many
|
21
30
|
# :optional
|
22
31
|
class PatternElement
|
23
|
-
attr_accessor :parser
|
24
|
-
attr_accessor :match
|
32
|
+
attr_accessor :parser, :optional, :negative, :name, :terminal, :could_match
|
33
|
+
attr_accessor :match, :rule_variant, :parser_class
|
34
|
+
|
35
|
+
# true if this is a delimiter
|
36
|
+
attr_accessor :delimiter
|
25
37
|
|
26
38
|
#match can be:
|
27
39
|
# true, Hash, Symbol, String, Regexp
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
40
|
+
# options
|
41
|
+
# :rule_varient
|
42
|
+
# :parser
|
43
|
+
def initialize(match, options={})
|
44
|
+
@init_options = options.clone
|
45
|
+
@rule_variant = options[:rule_variant]
|
46
|
+
@parser_class = options[:parser_class]
|
47
|
+
@delimiter = options[:delimiter]
|
48
|
+
@name = options[:name]
|
49
|
+
raise "rule_variant or parser_class required" unless @rule_variant || @parser_class
|
50
|
+
|
51
|
+
init match
|
32
52
|
raise "pattern element cannot be both :dont and :optional" if negative && optional
|
33
53
|
end
|
34
54
|
|
35
55
|
def inspect
|
36
|
-
"<PatternElement rule_variant=#{rule_variant.variant_node_class} match=#{match.inspect}>"
|
56
|
+
"<PatternElement #{rule_variant && "rule_variant=#{rule_variant.variant_node_class} "}match=#{match.inspect}#{" delimiter" if delimiter}>"
|
37
57
|
end
|
38
58
|
|
39
59
|
def to_s
|
40
60
|
match.inspect
|
41
61
|
end
|
42
62
|
|
63
|
+
def parser_class
|
64
|
+
@parser_class || rule_variant.rule.parser
|
65
|
+
end
|
66
|
+
|
67
|
+
def rules
|
68
|
+
parser_class.rules
|
69
|
+
end
|
70
|
+
|
43
71
|
# attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
|
44
72
|
def parse(parent_node)
|
45
|
-
return RollbackWhitespaceNode.new(parent_node) if rewind_whitespace
|
46
73
|
|
47
74
|
# run element parser
|
48
|
-
|
75
|
+
begin
|
76
|
+
parent_node.parser.matching_negative if negative
|
77
|
+
match = parser.call(parent_node)
|
78
|
+
ensure
|
79
|
+
parent_node.parser.unmatching_negative if negative
|
80
|
+
end
|
49
81
|
|
50
82
|
# Negative patterns (PEG: !element)
|
51
83
|
match = match ? nil : EmptyNode.new(parent_node) if negative
|
@@ -56,11 +88,13 @@ class PatternElement
|
|
56
88
|
# Could-match patterns (PEG: &element)
|
57
89
|
match.match_length = 0 if match && could_match
|
58
90
|
|
59
|
-
if !match && terminal
|
91
|
+
if !match && (terminal || negative)
|
60
92
|
# log failures on Terminal patterns for debug output if overall parse fails
|
61
|
-
parent_node.parser.log_parsing_failure
|
93
|
+
parent_node.parser.log_parsing_failure parent_node.next, :pattern => self.match, :node => parent_node
|
62
94
|
end
|
63
95
|
|
96
|
+
match.delimiter = delimiter if match
|
97
|
+
|
64
98
|
# return match
|
65
99
|
match
|
66
100
|
end
|
@@ -70,12 +104,13 @@ class PatternElement
|
|
70
104
|
# initialize PatternElement based on the type of: match
|
71
105
|
def init(match)
|
72
106
|
self.match = match
|
107
|
+
match = match[0] if match.kind_of?(Array) && match.length == 1
|
73
108
|
case match
|
74
109
|
when TrueClass then init_true
|
75
110
|
when String then init_string match
|
76
111
|
when Regexp then init_regex match
|
77
112
|
when Symbol then init_rule match
|
78
|
-
when
|
113
|
+
when PatternElementHash then init_hash match
|
79
114
|
else raise "invalid pattern type: #{match.inspect}"
|
80
115
|
end
|
81
116
|
end
|
@@ -96,7 +131,7 @@ class PatternElement
|
|
96
131
|
self.parser=lambda do |parent_node|
|
97
132
|
offset = parent_node.next
|
98
133
|
if parent_node.src[offset..-1].index(optimized_regex)==0
|
99
|
-
range
|
134
|
+
range = $~.offset(0)
|
100
135
|
range = (range.min+offset)..(range.max+offset)
|
101
136
|
TerminalNode.new(parent_node,range,regex)
|
102
137
|
end
|
@@ -109,11 +144,11 @@ class PatternElement
|
|
109
144
|
rule_name.to_s[/^([^?!]*)([?!])?$/]
|
110
145
|
rule_name = $1.to_sym
|
111
146
|
option = $2
|
112
|
-
match_rule =
|
147
|
+
match_rule = rules[rule_name]
|
113
148
|
raise "no rule for #{rule_name}" unless match_rule
|
114
149
|
|
115
150
|
self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
|
116
|
-
self.name
|
151
|
+
self.name ||= rule_name
|
117
152
|
case option
|
118
153
|
when "?" then self.optional = true
|
119
154
|
when "!" then self.negative = true
|
@@ -128,9 +163,6 @@ class PatternElement
|
|
128
163
|
init_many hash
|
129
164
|
elsif hash[:match]
|
130
165
|
init hash[:match]
|
131
|
-
elsif hash[:rewind_whitespace]
|
132
|
-
self.rewind_whitespace = true
|
133
|
-
return
|
134
166
|
else
|
135
167
|
raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
|
136
168
|
end
|
@@ -144,41 +176,37 @@ class PatternElement
|
|
144
176
|
# initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
|
145
177
|
def init_many(hash)
|
146
178
|
# generate single_parser
|
147
|
-
|
148
|
-
single_parser = parser
|
179
|
+
pattern_element = PatternElement.new(hash[:many], @init_options.merge(name:hash[:as]))
|
149
180
|
|
150
181
|
# generate delimiter_pattern_element
|
151
|
-
|
182
|
+
many_delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter], @init_options.merge(name:hash[:delimiter_name]))
|
152
183
|
|
153
184
|
# generate many-parser
|
154
185
|
self.parser = lambda do |parent_node|
|
155
|
-
|
186
|
+
parent_node.match_name_is_poly(pattern_element.name)
|
187
|
+
|
188
|
+
# fail unless we can match at least one
|
189
|
+
return unless parent_node.match pattern_element
|
156
190
|
|
157
|
-
if
|
191
|
+
if many_delimiter_pattern_element
|
192
|
+
parent_node.match_name_is_poly(many_delimiter_pattern_element.name)
|
158
193
|
# delimited matching
|
159
|
-
while
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
#match delimiter
|
166
|
-
delimiter_match = delimiter_pattern_element.parse many_node
|
167
|
-
break unless delimiter_match
|
168
|
-
many_node.add_match delimiter_match
|
194
|
+
while (parent_node.attempt_match do
|
195
|
+
parent_node.match_delimiter &&
|
196
|
+
parent_node.match(many_delimiter_pattern_element).tap{|md|md&&md.many_delimiter=true} &&
|
197
|
+
parent_node.match_delimiter &&
|
198
|
+
parent_node.match(pattern_element)
|
199
|
+
end)
|
169
200
|
end
|
170
|
-
many_node.separate_delimiter_matches
|
171
201
|
else
|
172
202
|
# not delimited matching
|
173
|
-
while
|
174
|
-
|
175
|
-
|
176
|
-
|
203
|
+
while (parent_node.attempt_match do
|
204
|
+
parent_node.match_delimiter &&
|
205
|
+
parent_node.match(pattern_element)
|
206
|
+
end)
|
177
207
|
end
|
178
208
|
end
|
179
|
-
|
180
|
-
# success only if we have at least one match
|
181
|
-
many_node.length>0 && many_node
|
209
|
+
parent_node
|
182
210
|
end
|
183
211
|
end
|
184
212
|
end
|
data/lib/rule.rb
CHANGED
@@ -11,7 +11,7 @@ class Rule
|
|
11
11
|
end
|
12
12
|
|
13
13
|
# creates a new sub_class of the node_class for a variant
|
14
|
-
def
|
14
|
+
def new_variant_node_class
|
15
15
|
rule_variant_class_name = "#{name}_node#{self.variants.length+1}".camelize
|
16
16
|
parser.const_set rule_variant_class_name, Class.new(node_class)
|
17
17
|
end
|
@@ -24,11 +24,17 @@ class Rule
|
|
24
24
|
@node_class = create_node_class
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
def root_rule?
|
28
|
+
parser.root_rule == name
|
29
|
+
end
|
30
|
+
|
31
|
+
def add_variant(options={}, &block)
|
32
|
+
new_variant_node_class.tap do |variant_node_class|
|
33
|
+
options[:variant_node_class] = variant_node_class
|
34
|
+
options[:rule] = self
|
35
|
+
variants << RuleVariant.new(options)
|
36
|
+
variant_node_class.class_eval &block if block
|
37
|
+
end
|
32
38
|
end
|
33
39
|
|
34
40
|
def parse(node)
|
data/lib/rule_variant.rb
CHANGED
@@ -3,35 +3,59 @@ module BabelBridge
|
|
3
3
|
# Each Rule has one or more RuleVariant
|
4
4
|
# Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
|
5
5
|
class RuleVariant
|
6
|
-
attr_accessor :pattern, :rule, :variant_node_class
|
6
|
+
attr_accessor :pattern, :rule, :variant_node_class, :delimiter_pattern, :match_delimiter_prepost
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
# pattern: Array - the pattern to match
|
9
|
+
# rule: Rule instance
|
10
|
+
# variant_node_class: RuleVariant class
|
11
|
+
def initialize(options = {})
|
12
|
+
@pattern = options[:pattern]
|
13
|
+
@rule = options[:rule]
|
14
|
+
@variant_node_class = options[:variant_node_class]
|
15
|
+
raise "variant_node_class required" unless variant_node_class
|
16
|
+
@delimiter = options[:delimiter]
|
17
|
+
end
|
18
|
+
|
19
|
+
def parser
|
20
|
+
@rule.parser
|
21
|
+
end
|
22
|
+
|
23
|
+
def root_rule?
|
24
|
+
rule.root_rule?
|
25
|
+
end
|
26
|
+
|
27
|
+
def delimiter_pattern
|
28
|
+
@delimiter_pattern ||= if @delimiter
|
29
|
+
PatternElement.new(@delimiter, :rule_variant => self, :delimiter => true) unless @delimiter==// || @delimiter==""
|
30
|
+
else
|
31
|
+
parser.delimiter_pattern
|
32
|
+
end
|
12
33
|
end
|
13
34
|
|
14
35
|
# convert the pattern into a set of lamba functions
|
15
36
|
def pattern_elements
|
16
|
-
@pattern_elements||=pattern.collect { |match| PatternElement.new
|
37
|
+
@pattern_elements||=pattern.collect { |match| [PatternElement.new(match, :rule_variant => self, :pattern_element => true), delimiter_pattern] }.flatten[0..-2]
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_element(element_parser, node)
|
41
|
+
node.add_match element_parser.parse(node), element_parser.name
|
17
42
|
end
|
18
43
|
|
19
44
|
# returns a Node object if it matches, nil otherwise
|
20
45
|
def parse(parent_node)
|
21
46
|
#return parse_nongreedy_optional(src,offset,parent_node) # nongreedy optionals break standard PEG
|
22
|
-
node = variant_node_class.new(parent_node)
|
47
|
+
node = variant_node_class.new(parent_node, delimiter_pattern)
|
48
|
+
|
49
|
+
node.match parser.delimiter_pattern if root_rule?
|
23
50
|
|
24
51
|
pattern_elements.each do |pe|
|
25
|
-
match
|
52
|
+
return unless node.match(pe)
|
53
|
+
end
|
54
|
+
node.pop_match if node.last_match && node.last_match.delimiter
|
26
55
|
|
27
|
-
|
28
|
-
return if !match
|
29
|
-
match.matched
|
56
|
+
node.match parser.delimiter_pattern if root_rule?
|
30
57
|
|
31
|
-
|
32
|
-
node.add_match(match,pe.name)
|
33
|
-
end
|
34
|
-
node.post_match
|
58
|
+
node && node.post_match_processing
|
35
59
|
end
|
36
60
|
|
37
61
|
def inspect; pattern.collect {|a| a.inspect}.join(', '); end
|
data/lib/string.rb
CHANGED
@@ -14,16 +14,9 @@ module BabelBridge
|
|
14
14
|
lines.length<=n ? self : lines[-n..-1].join("\n")
|
15
15
|
end
|
16
16
|
|
17
|
-
# return the line and column of a given offset into this string
|
18
|
-
# line and column are 1-based
|
19
|
-
def line_col(offset)
|
20
|
-
return 1,1 if length==0 || offset==0
|
21
|
-
lines=(self[0..offset-1]+" ").split("\n")
|
22
|
-
return lines.length, lines[-1].length
|
23
|
-
end
|
24
17
|
end
|
25
18
|
end
|
26
|
-
|
19
|
+
|
27
20
|
class String
|
28
21
|
include BabelBridge::StringExtensions
|
29
|
-
end
|
22
|
+
end
|
data/lib/tools.rb
CHANGED
@@ -2,8 +2,29 @@ module BabelBridge
|
|
2
2
|
class Tools
|
3
3
|
class << self
|
4
4
|
|
5
|
-
def indent(string,
|
6
|
-
|
5
|
+
def indent(string, first_indent = " ", rest_indent = first_indent)
|
6
|
+
first_indent + string.gsub("\n", "\n#{rest_indent}")
|
7
|
+
end
|
8
|
+
|
9
|
+
def uniform_tabs(string)
|
10
|
+
lines = string.split("\n").collect{|line|line.split("\t")}
|
11
|
+
max_fields = lines.collect {|line| line.length}.max
|
12
|
+
max_fields.times do |field|
|
13
|
+
max_field_length = lines.collect {|line| (line[field]||"").length}.max
|
14
|
+
formatter = "%-#{max_field_length}s "
|
15
|
+
lines.each_with_index do |line,i|
|
16
|
+
lines[i][field] = formatter%line[field] if line[field]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
lines.collect {|line|line.join}.join("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
# return the line and column of a given offset into this string
|
23
|
+
# line and column are 1-based
|
24
|
+
def line_column(string, offset)
|
25
|
+
return 1,1 if string.length==0 || offset==0
|
26
|
+
lines = (string[0..offset-1] + " ").split("\n")
|
27
|
+
return lines.length, lines[-1].length
|
7
28
|
end
|
8
29
|
|
9
30
|
def symbols_to_strings(array)
|
@@ -21,14 +42,14 @@ class Tools
|
|
21
42
|
array.sort_by {|a| a.kind_of?(Regexp) ? 0 : -a.length}
|
22
43
|
end
|
23
44
|
|
24
|
-
# Takes an array of Strings and Regexp and generates a new Regexp
|
45
|
+
# Takes an array of Strings and Regexp and generates a new Regexp
|
25
46
|
# that matches the or ("|") of all strings and Regexp
|
26
47
|
def array_to_or_regexp_string(array)
|
27
48
|
array = symbols_to_strings array.flatten
|
28
49
|
array = sort_operator_patterns array
|
29
50
|
array = regexp_and_strings_to_regexpstrings array
|
30
51
|
|
31
|
-
array.collect {|op| "(#{op})"}.join('|')
|
52
|
+
array.collect {|op| "(#{op})"}.join('|')
|
32
53
|
end
|
33
54
|
|
34
55
|
def array_to_anchored_or_regexp(array)
|
@@ -96,7 +117,6 @@ class BinaryOperatorProcessor
|
|
96
117
|
return operands[0] if operands.length==1
|
97
118
|
|
98
119
|
i = index_of_lowest_precedence(operators)
|
99
|
-
|
100
120
|
operator = operators[i]
|
101
121
|
new_operand = node_class.new(parent_node)
|
102
122
|
new_operand.add_match generate_tree(operands[0..i], operators[0..i-1],new_operand), :left
|
@@ -106,4 +126,4 @@ class BinaryOperatorProcessor
|
|
106
126
|
end
|
107
127
|
|
108
128
|
end
|
109
|
-
end
|
129
|
+
end
|
data/lib/version.rb
CHANGED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "advanced parsing" do
|
4
|
+
include TestParserGenerator
|
5
|
+
|
6
|
+
it "test_custom_parser" do
|
7
|
+
new_parser do
|
8
|
+
rule :foo, (custom_parser do |parent_node|
|
9
|
+
offset=parent_node.next
|
10
|
+
src=parent_node.src
|
11
|
+
|
12
|
+
# Note, the \A anchors the search at the beginning of the string
|
13
|
+
if src[offset..-1].index(/\A[A-Z]+/)==0
|
14
|
+
endpattern=$~.to_s
|
15
|
+
if i=src.index(endpattern,offset+endpattern.length)
|
16
|
+
range = offset..(i+endpattern.length)
|
17
|
+
BabelBridge::TerminalNode.new(parent_node,range,"endpattern")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end)
|
21
|
+
end
|
22
|
+
|
23
|
+
test_parse("END this is in the middle END")
|
24
|
+
test_parse("END this is in END the middle END",:partial_match => true).text.should == "END this is in END"
|
25
|
+
test_parse "END this is in the middle EN", :should_fail_at => 0
|
26
|
+
test_parse " END this is in the middle END", :should_fail_at => 0
|
27
|
+
end
|
28
|
+
|
29
|
+
it "test_binary_operator_rule" do
|
30
|
+
new_parser do
|
31
|
+
binary_operators_rule :bin_op, :int, ["**", [:/, :*], [:+, "-"]], :right_operators => ["**"] do
|
32
|
+
def evaluate
|
33
|
+
"(#{left.evaluate}#{operator}#{right.evaluate})"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
rule :int, /[-]?[0-9]+/ do
|
38
|
+
def evaluate; to_s; end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
test_parse("1+2").evaluate .should == "(1+2)"
|
42
|
+
test_parse("1+2+3").evaluate .should == "((1+2)+3)"
|
43
|
+
test_parse("1+2*3").evaluate .should == "(1+(2*3))"
|
44
|
+
test_parse("1*2+3").evaluate .should == "((1*2)+3)"
|
45
|
+
test_parse("5**6").evaluate .should == "(5**6)"
|
46
|
+
test_parse("1-2+3*4/5**6").evaluate .should == "((1-2)+((3*4)/(5**6)))"
|
47
|
+
test_parse("5**6**7").evaluate .should == "(5**(6**7))"
|
48
|
+
end
|
49
|
+
end
|