babel_bridge 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,11 +6,20 @@ http://babel-bridge.rubyforge.org/
6
6
 
7
7
  module BabelBridge
8
8
  # hash which can be used declaratively
9
- class PatternElementHash < Hash
9
+ class PatternElementHash
10
+ attr_accessor :hash
11
+
12
+ def initialize
13
+ @hash = {}
14
+ end
15
+
16
+ def [](key) @hash[key] end
17
+ def []=(key,value) @hash[key]=value end
18
+
10
19
  def method_missing(method_name, *args) #method_name is a symbol
11
20
  return self if args.length==1 && !args[0] # if nil is provided, don't set anything
12
21
  raise "More than one argument is not supported. #{self.class}##{method_name} args=#{args.inspect}" if args.length > 1
13
- self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true
22
+ @hash[method_name] = args[0] || true # on the other hand, if no args are provided, assume true
14
23
  self
15
24
  end
16
25
  end
@@ -20,32 +29,55 @@ end
20
29
  # :many
21
30
  # :optional
22
31
  class PatternElement
23
- attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
24
- attr_accessor :match,:rule_variant,:rewind_whitespace
32
+ attr_accessor :parser, :optional, :negative, :name, :terminal, :could_match
33
+ attr_accessor :match, :rule_variant, :parser_class
34
+
35
+ # true if this is a delimiter
36
+ attr_accessor :delimiter
25
37
 
26
38
  #match can be:
27
39
  # true, Hash, Symbol, String, Regexp
28
- def initialize(match,rule_variant)
29
- self.rule_variant=rule_variant
30
- init(match)
31
-
40
+ # options
41
+ # :rule_varient
42
+ # :parser
43
+ def initialize(match, options={})
44
+ @init_options = options.clone
45
+ @rule_variant = options[:rule_variant]
46
+ @parser_class = options[:parser_class]
47
+ @delimiter = options[:delimiter]
48
+ @name = options[:name]
49
+ raise "rule_variant or parser_class required" unless @rule_variant || @parser_class
50
+
51
+ init match
32
52
  raise "pattern element cannot be both :dont and :optional" if negative && optional
33
53
  end
34
54
 
35
55
  def inspect
36
- "<PatternElement rule_variant=#{rule_variant.variant_node_class} match=#{match.inspect}>"
56
+ "<PatternElement #{rule_variant && "rule_variant=#{rule_variant.variant_node_class} "}match=#{match.inspect}#{" delimiter" if delimiter}>"
37
57
  end
38
58
 
39
59
  def to_s
40
60
  match.inspect
41
61
  end
42
62
 
63
+ def parser_class
64
+ @parser_class || rule_variant.rule.parser
65
+ end
66
+
67
+ def rules
68
+ parser_class.rules
69
+ end
70
+
43
71
  # attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
44
72
  def parse(parent_node)
45
- return RollbackWhitespaceNode.new(parent_node) if rewind_whitespace
46
73
 
47
74
  # run element parser
48
- match = parser.call(parent_node)
75
+ begin
76
+ parent_node.parser.matching_negative if negative
77
+ match = parser.call(parent_node)
78
+ ensure
79
+ parent_node.parser.unmatching_negative if negative
80
+ end
49
81
 
50
82
  # Negative patterns (PEG: !element)
51
83
  match = match ? nil : EmptyNode.new(parent_node) if negative
@@ -56,11 +88,13 @@ class PatternElement
56
88
  # Could-match patterns (PEG: &element)
57
89
  match.match_length = 0 if match && could_match
58
90
 
59
- if !match && terminal
91
+ if !match && (terminal || negative)
60
92
  # log failures on Terminal patterns for debug output if overall parse fails
61
- parent_node.parser.log_parsing_failure(parent_node.next,:pattern=>self.match,:node=>parent_node)
93
+ parent_node.parser.log_parsing_failure parent_node.next, :pattern => self.match, :node => parent_node
62
94
  end
63
95
 
96
+ match.delimiter = delimiter if match
97
+
64
98
  # return match
65
99
  match
66
100
  end
@@ -70,12 +104,13 @@ class PatternElement
70
104
  # initialize PatternElement based on the type of: match
71
105
  def init(match)
72
106
  self.match = match
107
+ match = match[0] if match.kind_of?(Array) && match.length == 1
73
108
  case match
74
109
  when TrueClass then init_true
75
110
  when String then init_string match
76
111
  when Regexp then init_regex match
77
112
  when Symbol then init_rule match
78
- when Hash then init_hash match
113
+ when PatternElementHash then init_hash match
79
114
  else raise "invalid pattern type: #{match.inspect}"
80
115
  end
81
116
  end
@@ -96,7 +131,7 @@ class PatternElement
96
131
  self.parser=lambda do |parent_node|
97
132
  offset = parent_node.next
98
133
  if parent_node.src[offset..-1].index(optimized_regex)==0
99
- range=$~.offset(0)
134
+ range = $~.offset(0)
100
135
  range = (range.min+offset)..(range.max+offset)
101
136
  TerminalNode.new(parent_node,range,regex)
102
137
  end
@@ -109,11 +144,11 @@ class PatternElement
109
144
  rule_name.to_s[/^([^?!]*)([?!])?$/]
110
145
  rule_name = $1.to_sym
111
146
  option = $2
112
- match_rule = rule_variant.rule.parser.rules[rule_name]
147
+ match_rule = rules[rule_name]
113
148
  raise "no rule for #{rule_name}" unless match_rule
114
149
 
115
150
  self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
116
- self.name = rule_name
151
+ self.name ||= rule_name
117
152
  case option
118
153
  when "?" then self.optional = true
119
154
  when "!" then self.negative = true
@@ -128,9 +163,6 @@ class PatternElement
128
163
  init_many hash
129
164
  elsif hash[:match]
130
165
  init hash[:match]
131
- elsif hash[:rewind_whitespace]
132
- self.rewind_whitespace = true
133
- return
134
166
  else
135
167
  raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
136
168
  end
@@ -144,41 +176,37 @@ class PatternElement
144
176
  # initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
145
177
  def init_many(hash)
146
178
  # generate single_parser
147
- init hash[:many]
148
- single_parser = parser
179
+ pattern_element = PatternElement.new(hash[:many], @init_options.merge(name:hash[:as]))
149
180
 
150
181
  # generate delimiter_pattern_element
151
- delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
182
+ many_delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter], @init_options.merge(name:hash[:delimiter_name]))
152
183
 
153
184
  # generate many-parser
154
185
  self.parser = lambda do |parent_node|
155
- many_node = ManyNode.new parent_node
186
+ parent_node.match_name_is_poly(pattern_element.name)
187
+
188
+ # fail unless we can match at least one
189
+ return unless parent_node.match pattern_element
156
190
 
157
- if delimiter_pattern_element
191
+ if many_delimiter_pattern_element
192
+ parent_node.match_name_is_poly(many_delimiter_pattern_element.name)
158
193
  # delimited matching
159
- while true
160
- #match primary
161
- match = single_parser.call many_node
162
- break unless match
163
- many_node.add_match match
164
-
165
- #match delimiter
166
- delimiter_match = delimiter_pattern_element.parse many_node
167
- break unless delimiter_match
168
- many_node.add_match delimiter_match
194
+ while (parent_node.attempt_match do
195
+ parent_node.match_delimiter &&
196
+ parent_node.match(many_delimiter_pattern_element).tap{|md|md&&md.many_delimiter=true} &&
197
+ parent_node.match_delimiter &&
198
+ parent_node.match(pattern_element)
199
+ end)
169
200
  end
170
- many_node.separate_delimiter_matches
171
201
  else
172
202
  # not delimited matching
173
- while true
174
- match = single_parser.call many_node
175
- break unless match
176
- many_node.add_match match
203
+ while (parent_node.attempt_match do
204
+ parent_node.match_delimiter &&
205
+ parent_node.match(pattern_element)
206
+ end)
177
207
  end
178
208
  end
179
-
180
- # success only if we have at least one match
181
- many_node.length>0 && many_node
209
+ parent_node
182
210
  end
183
211
  end
184
212
  end
@@ -11,7 +11,7 @@ class Rule
11
11
  end
12
12
 
13
13
  # creates a new sub_class of the node_class for a variant
14
- def create_next_node_variant_class
14
+ def new_variant_node_class
15
15
  rule_variant_class_name = "#{name}_node#{self.variants.length+1}".camelize
16
16
  parser.const_set rule_variant_class_name, Class.new(node_class)
17
17
  end
@@ -24,11 +24,17 @@ class Rule
24
24
  @node_class = create_node_class
25
25
  end
26
26
 
27
- def add_variant(pattern, &block)
28
- rule_variant_class = create_next_node_variant_class
29
- variants << RuleVariant.new(pattern, self, rule_variant_class)
30
- rule_variant_class.class_eval &block if block
31
- rule_variant_class
27
+ def root_rule?
28
+ parser.root_rule == name
29
+ end
30
+
31
+ def add_variant(options={}, &block)
32
+ new_variant_node_class.tap do |variant_node_class|
33
+ options[:variant_node_class] = variant_node_class
34
+ options[:rule] = self
35
+ variants << RuleVariant.new(options)
36
+ variant_node_class.class_eval &block if block
37
+ end
32
38
  end
33
39
 
34
40
  def parse(node)
@@ -3,35 +3,59 @@ module BabelBridge
3
3
  # Each Rule has one or more RuleVariant
4
4
  # Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
5
5
  class RuleVariant
6
- attr_accessor :pattern, :rule, :variant_node_class
6
+ attr_accessor :pattern, :rule, :variant_node_class, :delimiter_pattern, :match_delimiter_prepost
7
7
 
8
- def initialize(pattern, rule, variant_node_class=nil)
9
- @pattern = pattern
10
- @rule = rule
11
- @variant_node_class = variant_node_class
8
+ # pattern: Array - the pattern to match
9
+ # rule: Rule instance
10
+ # variant_node_class: RuleVariant class
11
+ def initialize(options = {})
12
+ @pattern = options[:pattern]
13
+ @rule = options[:rule]
14
+ @variant_node_class = options[:variant_node_class]
15
+ raise "variant_node_class required" unless variant_node_class
16
+ @delimiter = options[:delimiter]
17
+ end
18
+
19
+ def parser
20
+ @rule.parser
21
+ end
22
+
23
+ def root_rule?
24
+ rule.root_rule?
25
+ end
26
+
27
+ def delimiter_pattern
28
+ @delimiter_pattern ||= if @delimiter
29
+ PatternElement.new(@delimiter, :rule_variant => self, :delimiter => true) unless @delimiter==// || @delimiter==""
30
+ else
31
+ parser.delimiter_pattern
32
+ end
12
33
  end
13
34
 
14
35
  # convert the pattern into a set of lamba functions
15
36
  def pattern_elements
16
- @pattern_elements||=pattern.collect { |match| PatternElement.new match, self }
37
+ @pattern_elements||=pattern.collect { |match| [PatternElement.new(match, :rule_variant => self, :pattern_element => true), delimiter_pattern] }.flatten[0..-2]
38
+ end
39
+
40
+ def parse_element(element_parser, node)
41
+ node.add_match element_parser.parse(node), element_parser.name
17
42
  end
18
43
 
19
44
  # returns a Node object if it matches, nil otherwise
20
45
  def parse(parent_node)
21
46
  #return parse_nongreedy_optional(src,offset,parent_node) # nongreedy optionals break standard PEG
22
- node = variant_node_class.new(parent_node)
47
+ node = variant_node_class.new(parent_node, delimiter_pattern)
48
+
49
+ node.match parser.delimiter_pattern if root_rule?
23
50
 
24
51
  pattern_elements.each do |pe|
25
- match=pe.parse(node)
52
+ return unless node.match(pe)
53
+ end
54
+ node.pop_match if node.last_match && node.last_match.delimiter
26
55
 
27
- # if parse failed
28
- return if !match
29
- match.matched
56
+ node.match parser.delimiter_pattern if root_rule?
30
57
 
31
- # parse succeeded, add to node and continue
32
- node.add_match(match,pe.name)
33
- end
34
- node.post_match
58
+ node && node.post_match_processing
35
59
  end
36
60
 
37
61
  def inspect; pattern.collect {|a| a.inspect}.join(', '); end
@@ -14,16 +14,9 @@ module BabelBridge
14
14
  lines.length<=n ? self : lines[-n..-1].join("\n")
15
15
  end
16
16
 
17
- # return the line and column of a given offset into this string
18
- # line and column are 1-based
19
- def line_col(offset)
20
- return 1,1 if length==0 || offset==0
21
- lines=(self[0..offset-1]+" ").split("\n")
22
- return lines.length, lines[-1].length
23
- end
24
17
  end
25
18
  end
26
-
19
+
27
20
  class String
28
21
  include BabelBridge::StringExtensions
29
- end
22
+ end
@@ -2,8 +2,29 @@ module BabelBridge
2
2
  class Tools
3
3
  class << self
4
4
 
5
- def indent(string,indent=" ")
6
- indent + string.gsub("\n", "\n#{indent}")
5
+ def indent(string, first_indent = " ", rest_indent = first_indent)
6
+ first_indent + string.gsub("\n", "\n#{rest_indent}")
7
+ end
8
+
9
+ def uniform_tabs(string)
10
+ lines = string.split("\n").collect{|line|line.split("\t")}
11
+ max_fields = lines.collect {|line| line.length}.max
12
+ max_fields.times do |field|
13
+ max_field_length = lines.collect {|line| (line[field]||"").length}.max
14
+ formatter = "%-#{max_field_length}s "
15
+ lines.each_with_index do |line,i|
16
+ lines[i][field] = formatter%line[field] if line[field]
17
+ end
18
+ end
19
+ lines.collect {|line|line.join}.join("\n")
20
+ end
21
+
22
+ # return the line and column of a given offset into this string
23
+ # line and column are 1-based
24
+ def line_column(string, offset)
25
+ return 1,1 if string.length==0 || offset==0
26
+ lines = (string[0..offset-1] + " ").split("\n")
27
+ return lines.length, lines[-1].length
7
28
  end
8
29
 
9
30
  def symbols_to_strings(array)
@@ -21,14 +42,14 @@ class Tools
21
42
  array.sort_by {|a| a.kind_of?(Regexp) ? 0 : -a.length}
22
43
  end
23
44
 
24
- # Takes an array of Strings and Regexp and generates a new Regexp
45
+ # Takes an array of Strings and Regexp and generates a new Regexp
25
46
  # that matches the or ("|") of all strings and Regexp
26
47
  def array_to_or_regexp_string(array)
27
48
  array = symbols_to_strings array.flatten
28
49
  array = sort_operator_patterns array
29
50
  array = regexp_and_strings_to_regexpstrings array
30
51
 
31
- array.collect {|op| "(#{op})"}.join('|') #.tap {|a| puts "array_to_or_regexp_string(#{array.inspect}) -> /#{a}/"}
52
+ array.collect {|op| "(#{op})"}.join('|')
32
53
  end
33
54
 
34
55
  def array_to_anchored_or_regexp(array)
@@ -96,7 +117,6 @@ class BinaryOperatorProcessor
96
117
  return operands[0] if operands.length==1
97
118
 
98
119
  i = index_of_lowest_precedence(operators)
99
-
100
120
  operator = operators[i]
101
121
  new_operand = node_class.new(parent_node)
102
122
  new_operand.add_match generate_tree(operands[0..i], operators[0..i-1],new_operand), :left
@@ -106,4 +126,4 @@ class BinaryOperatorProcessor
106
126
  end
107
127
 
108
128
  end
109
- end
129
+ end
@@ -1,3 +1,4 @@
1
1
  module BabelBridge
2
- VERSION = "0.4.1"
2
+ # last release: 0.4.1
3
+ VERSION = "0.5.0"
3
4
  end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ describe "advanced parsing" do
4
+ include TestParserGenerator
5
+
6
+ it "test_custom_parser" do
7
+ new_parser do
8
+ rule :foo, (custom_parser do |parent_node|
9
+ offset=parent_node.next
10
+ src=parent_node.src
11
+
12
+ # Note, the \A anchors the search at the beginning of the string
13
+ if src[offset..-1].index(/\A[A-Z]+/)==0
14
+ endpattern=$~.to_s
15
+ if i=src.index(endpattern,offset+endpattern.length)
16
+ range = offset..(i+endpattern.length)
17
+ BabelBridge::TerminalNode.new(parent_node,range,"endpattern")
18
+ end
19
+ end
20
+ end)
21
+ end
22
+
23
+ test_parse("END this is in the middle END")
24
+ test_parse("END this is in END the middle END",:partial_match => true).text.should == "END this is in END"
25
+ test_parse "END this is in the middle EN", :should_fail_at => 0
26
+ test_parse " END this is in the middle END", :should_fail_at => 0
27
+ end
28
+
29
+ it "test_binary_operator_rule" do
30
+ new_parser do
31
+ binary_operators_rule :bin_op, :int, ["**", [:/, :*], [:+, "-"]], :right_operators => ["**"] do
32
+ def evaluate
33
+ "(#{left.evaluate}#{operator}#{right.evaluate})"
34
+ end
35
+ end
36
+
37
+ rule :int, /[-]?[0-9]+/ do
38
+ def evaluate; to_s; end
39
+ end
40
+ end
41
+ test_parse("1+2").evaluate .should == "(1+2)"
42
+ test_parse("1+2+3").evaluate .should == "((1+2)+3)"
43
+ test_parse("1+2*3").evaluate .should == "(1+(2*3))"
44
+ test_parse("1*2+3").evaluate .should == "((1*2)+3)"
45
+ test_parse("5**6").evaluate .should == "(5**6)"
46
+ test_parse("1-2+3*4/5**6").evaluate .should == "((1-2)+((3*4)/(5**6)))"
47
+ test_parse("5**6**7").evaluate .should == "(5**(6**7))"
48
+ end
49
+ end