babel_bridge 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,11 +6,20 @@ http://babel-bridge.rubyforge.org/
6
6
 
7
7
  module BabelBridge
8
8
  # hash which can be used declaratively
9
- class PatternElementHash < Hash
9
+ class PatternElementHash
10
+ attr_accessor :hash
11
+
12
+ def initialize
13
+ @hash = {}
14
+ end
15
+
16
+ def [](key) @hash[key] end
17
+ def []=(key,value) @hash[key]=value end
18
+
10
19
  def method_missing(method_name, *args) #method_name is a symbol
11
20
  return self if args.length==1 && !args[0] # if nil is provided, don't set anything
12
21
  raise "More than one argument is not supported. #{self.class}##{method_name} args=#{args.inspect}" if args.length > 1
13
- self[method_name]=args[0] || true # on the other hand, if no args are provided, assume true
22
+ @hash[method_name] = args[0] || true # on the other hand, if no args are provided, assume true
14
23
  self
15
24
  end
16
25
  end
@@ -20,32 +29,55 @@ end
20
29
  # :many
21
30
  # :optional
22
31
  class PatternElement
23
- attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
24
- attr_accessor :match,:rule_variant,:rewind_whitespace
32
+ attr_accessor :parser, :optional, :negative, :name, :terminal, :could_match
33
+ attr_accessor :match, :rule_variant, :parser_class
34
+
35
+ # true if this is a delimiter
36
+ attr_accessor :delimiter
25
37
 
26
38
  #match can be:
27
39
  # true, Hash, Symbol, String, Regexp
28
- def initialize(match,rule_variant)
29
- self.rule_variant=rule_variant
30
- init(match)
31
-
40
+ # options
41
+ # :rule_varient
42
+ # :parser
43
+ def initialize(match, options={})
44
+ @init_options = options.clone
45
+ @rule_variant = options[:rule_variant]
46
+ @parser_class = options[:parser_class]
47
+ @delimiter = options[:delimiter]
48
+ @name = options[:name]
49
+ raise "rule_variant or parser_class required" unless @rule_variant || @parser_class
50
+
51
+ init match
32
52
  raise "pattern element cannot be both :dont and :optional" if negative && optional
33
53
  end
34
54
 
35
55
  def inspect
36
- "<PatternElement rule_variant=#{rule_variant.variant_node_class} match=#{match.inspect}>"
56
+ "<PatternElement #{rule_variant && "rule_variant=#{rule_variant.variant_node_class} "}match=#{match.inspect}#{" delimiter" if delimiter}>"
37
57
  end
38
58
 
39
59
  def to_s
40
60
  match.inspect
41
61
  end
42
62
 
63
+ def parser_class
64
+ @parser_class || rule_variant.rule.parser
65
+ end
66
+
67
+ def rules
68
+ parser_class.rules
69
+ end
70
+
43
71
  # attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
44
72
  def parse(parent_node)
45
- return RollbackWhitespaceNode.new(parent_node) if rewind_whitespace
46
73
 
47
74
  # run element parser
48
- match = parser.call(parent_node)
75
+ begin
76
+ parent_node.parser.matching_negative if negative
77
+ match = parser.call(parent_node)
78
+ ensure
79
+ parent_node.parser.unmatching_negative if negative
80
+ end
49
81
 
50
82
  # Negative patterns (PEG: !element)
51
83
  match = match ? nil : EmptyNode.new(parent_node) if negative
@@ -56,11 +88,13 @@ class PatternElement
56
88
  # Could-match patterns (PEG: &element)
57
89
  match.match_length = 0 if match && could_match
58
90
 
59
- if !match && terminal
91
+ if !match && (terminal || negative)
60
92
  # log failures on Terminal patterns for debug output if overall parse fails
61
- parent_node.parser.log_parsing_failure(parent_node.next,:pattern=>self.match,:node=>parent_node)
93
+ parent_node.parser.log_parsing_failure parent_node.next, :pattern => self.match, :node => parent_node
62
94
  end
63
95
 
96
+ match.delimiter = delimiter if match
97
+
64
98
  # return match
65
99
  match
66
100
  end
@@ -70,12 +104,13 @@ class PatternElement
70
104
  # initialize PatternElement based on the type of: match
71
105
  def init(match)
72
106
  self.match = match
107
+ match = match[0] if match.kind_of?(Array) && match.length == 1
73
108
  case match
74
109
  when TrueClass then init_true
75
110
  when String then init_string match
76
111
  when Regexp then init_regex match
77
112
  when Symbol then init_rule match
78
- when Hash then init_hash match
113
+ when PatternElementHash then init_hash match
79
114
  else raise "invalid pattern type: #{match.inspect}"
80
115
  end
81
116
  end
@@ -96,7 +131,7 @@ class PatternElement
96
131
  self.parser=lambda do |parent_node|
97
132
  offset = parent_node.next
98
133
  if parent_node.src[offset..-1].index(optimized_regex)==0
99
- range=$~.offset(0)
134
+ range = $~.offset(0)
100
135
  range = (range.min+offset)..(range.max+offset)
101
136
  TerminalNode.new(parent_node,range,regex)
102
137
  end
@@ -109,11 +144,11 @@ class PatternElement
109
144
  rule_name.to_s[/^([^?!]*)([?!])?$/]
110
145
  rule_name = $1.to_sym
111
146
  option = $2
112
- match_rule = rule_variant.rule.parser.rules[rule_name]
147
+ match_rule = rules[rule_name]
113
148
  raise "no rule for #{rule_name}" unless match_rule
114
149
 
115
150
  self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
116
- self.name = rule_name
151
+ self.name ||= rule_name
117
152
  case option
118
153
  when "?" then self.optional = true
119
154
  when "!" then self.negative = true
@@ -128,9 +163,6 @@ class PatternElement
128
163
  init_many hash
129
164
  elsif hash[:match]
130
165
  init hash[:match]
131
- elsif hash[:rewind_whitespace]
132
- self.rewind_whitespace = true
133
- return
134
166
  else
135
167
  raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
136
168
  end
@@ -144,41 +176,37 @@ class PatternElement
144
176
  # initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
145
177
  def init_many(hash)
146
178
  # generate single_parser
147
- init hash[:many]
148
- single_parser = parser
179
+ pattern_element = PatternElement.new(hash[:many], @init_options.merge(name:hash[:as]))
149
180
 
150
181
  # generate delimiter_pattern_element
151
- delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
182
+ many_delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter], @init_options.merge(name:hash[:delimiter_name]))
152
183
 
153
184
  # generate many-parser
154
185
  self.parser = lambda do |parent_node|
155
- many_node = ManyNode.new parent_node
186
+ parent_node.match_name_is_poly(pattern_element.name)
187
+
188
+ # fail unless we can match at least one
189
+ return unless parent_node.match pattern_element
156
190
 
157
- if delimiter_pattern_element
191
+ if many_delimiter_pattern_element
192
+ parent_node.match_name_is_poly(many_delimiter_pattern_element.name)
158
193
  # delimited matching
159
- while true
160
- #match primary
161
- match = single_parser.call many_node
162
- break unless match
163
- many_node.add_match match
164
-
165
- #match delimiter
166
- delimiter_match = delimiter_pattern_element.parse many_node
167
- break unless delimiter_match
168
- many_node.add_match delimiter_match
194
+ while (parent_node.attempt_match do
195
+ parent_node.match_delimiter &&
196
+ parent_node.match(many_delimiter_pattern_element).tap{|md|md&&md.many_delimiter=true} &&
197
+ parent_node.match_delimiter &&
198
+ parent_node.match(pattern_element)
199
+ end)
169
200
  end
170
- many_node.separate_delimiter_matches
171
201
  else
172
202
  # not delimited matching
173
- while true
174
- match = single_parser.call many_node
175
- break unless match
176
- many_node.add_match match
203
+ while (parent_node.attempt_match do
204
+ parent_node.match_delimiter &&
205
+ parent_node.match(pattern_element)
206
+ end)
177
207
  end
178
208
  end
179
-
180
- # success only if we have at least one match
181
- many_node.length>0 && many_node
209
+ parent_node
182
210
  end
183
211
  end
184
212
  end
@@ -11,7 +11,7 @@ class Rule
11
11
  end
12
12
 
13
13
  # creates a new sub_class of the node_class for a variant
14
- def create_next_node_variant_class
14
+ def new_variant_node_class
15
15
  rule_variant_class_name = "#{name}_node#{self.variants.length+1}".camelize
16
16
  parser.const_set rule_variant_class_name, Class.new(node_class)
17
17
  end
@@ -24,11 +24,17 @@ class Rule
24
24
  @node_class = create_node_class
25
25
  end
26
26
 
27
- def add_variant(pattern, &block)
28
- rule_variant_class = create_next_node_variant_class
29
- variants << RuleVariant.new(pattern, self, rule_variant_class)
30
- rule_variant_class.class_eval &block if block
31
- rule_variant_class
27
+ def root_rule?
28
+ parser.root_rule == name
29
+ end
30
+
31
+ def add_variant(options={}, &block)
32
+ new_variant_node_class.tap do |variant_node_class|
33
+ options[:variant_node_class] = variant_node_class
34
+ options[:rule] = self
35
+ variants << RuleVariant.new(options)
36
+ variant_node_class.class_eval &block if block
37
+ end
32
38
  end
33
39
 
34
40
  def parse(node)
@@ -3,35 +3,59 @@ module BabelBridge
3
3
  # Each Rule has one or more RuleVariant
4
4
  # Rules attempt to match each of their Variants in order. The first one to succeed returns true and the Rule succeeds.
5
5
  class RuleVariant
6
- attr_accessor :pattern, :rule, :variant_node_class
6
+ attr_accessor :pattern, :rule, :variant_node_class, :delimiter_pattern, :match_delimiter_prepost
7
7
 
8
- def initialize(pattern, rule, variant_node_class=nil)
9
- @pattern = pattern
10
- @rule = rule
11
- @variant_node_class = variant_node_class
8
+ # pattern: Array - the pattern to match
9
+ # rule: Rule instance
10
+ # variant_node_class: RuleVariant class
11
+ def initialize(options = {})
12
+ @pattern = options[:pattern]
13
+ @rule = options[:rule]
14
+ @variant_node_class = options[:variant_node_class]
15
+ raise "variant_node_class required" unless variant_node_class
16
+ @delimiter = options[:delimiter]
17
+ end
18
+
19
+ def parser
20
+ @rule.parser
21
+ end
22
+
23
+ def root_rule?
24
+ rule.root_rule?
25
+ end
26
+
27
+ def delimiter_pattern
28
+ @delimiter_pattern ||= if @delimiter
29
+ PatternElement.new(@delimiter, :rule_variant => self, :delimiter => true) unless @delimiter==// || @delimiter==""
30
+ else
31
+ parser.delimiter_pattern
32
+ end
12
33
  end
13
34
 
14
35
  # convert the pattern into a set of lamba functions
15
36
  def pattern_elements
16
- @pattern_elements||=pattern.collect { |match| PatternElement.new match, self }
37
+ @pattern_elements||=pattern.collect { |match| [PatternElement.new(match, :rule_variant => self, :pattern_element => true), delimiter_pattern] }.flatten[0..-2]
38
+ end
39
+
40
+ def parse_element(element_parser, node)
41
+ node.add_match element_parser.parse(node), element_parser.name
17
42
  end
18
43
 
19
44
  # returns a Node object if it matches, nil otherwise
20
45
  def parse(parent_node)
21
46
  #return parse_nongreedy_optional(src,offset,parent_node) # nongreedy optionals break standard PEG
22
- node = variant_node_class.new(parent_node)
47
+ node = variant_node_class.new(parent_node, delimiter_pattern)
48
+
49
+ node.match parser.delimiter_pattern if root_rule?
23
50
 
24
51
  pattern_elements.each do |pe|
25
- match=pe.parse(node)
52
+ return unless node.match(pe)
53
+ end
54
+ node.pop_match if node.last_match && node.last_match.delimiter
26
55
 
27
- # if parse failed
28
- return if !match
29
- match.matched
56
+ node.match parser.delimiter_pattern if root_rule?
30
57
 
31
- # parse succeeded, add to node and continue
32
- node.add_match(match,pe.name)
33
- end
34
- node.post_match
58
+ node && node.post_match_processing
35
59
  end
36
60
 
37
61
  def inspect; pattern.collect {|a| a.inspect}.join(', '); end
@@ -14,16 +14,9 @@ module BabelBridge
14
14
  lines.length<=n ? self : lines[-n..-1].join("\n")
15
15
  end
16
16
 
17
- # return the line and column of a given offset into this string
18
- # line and column are 1-based
19
- def line_col(offset)
20
- return 1,1 if length==0 || offset==0
21
- lines=(self[0..offset-1]+" ").split("\n")
22
- return lines.length, lines[-1].length
23
- end
24
17
  end
25
18
  end
26
-
19
+
27
20
  class String
28
21
  include BabelBridge::StringExtensions
29
- end
22
+ end
@@ -2,8 +2,29 @@ module BabelBridge
2
2
  class Tools
3
3
  class << self
4
4
 
5
- def indent(string,indent=" ")
6
- indent + string.gsub("\n", "\n#{indent}")
5
+ def indent(string, first_indent = " ", rest_indent = first_indent)
6
+ first_indent + string.gsub("\n", "\n#{rest_indent}")
7
+ end
8
+
9
+ def uniform_tabs(string)
10
+ lines = string.split("\n").collect{|line|line.split("\t")}
11
+ max_fields = lines.collect {|line| line.length}.max
12
+ max_fields.times do |field|
13
+ max_field_length = lines.collect {|line| (line[field]||"").length}.max
14
+ formatter = "%-#{max_field_length}s "
15
+ lines.each_with_index do |line,i|
16
+ lines[i][field] = formatter%line[field] if line[field]
17
+ end
18
+ end
19
+ lines.collect {|line|line.join}.join("\n")
20
+ end
21
+
22
+ # return the line and column of a given offset into this string
23
+ # line and column are 1-based
24
+ def line_column(string, offset)
25
+ return 1,1 if string.length==0 || offset==0
26
+ lines = (string[0..offset-1] + " ").split("\n")
27
+ return lines.length, lines[-1].length
7
28
  end
8
29
 
9
30
  def symbols_to_strings(array)
@@ -21,14 +42,14 @@ class Tools
21
42
  array.sort_by {|a| a.kind_of?(Regexp) ? 0 : -a.length}
22
43
  end
23
44
 
24
- # Takes an array of Strings and Regexp and generates a new Regexp
45
+ # Takes an array of Strings and Regexp and generates a new Regexp
25
46
  # that matches the or ("|") of all strings and Regexp
26
47
  def array_to_or_regexp_string(array)
27
48
  array = symbols_to_strings array.flatten
28
49
  array = sort_operator_patterns array
29
50
  array = regexp_and_strings_to_regexpstrings array
30
51
 
31
- array.collect {|op| "(#{op})"}.join('|') #.tap {|a| puts "array_to_or_regexp_string(#{array.inspect}) -> /#{a}/"}
52
+ array.collect {|op| "(#{op})"}.join('|')
32
53
  end
33
54
 
34
55
  def array_to_anchored_or_regexp(array)
@@ -96,7 +117,6 @@ class BinaryOperatorProcessor
96
117
  return operands[0] if operands.length==1
97
118
 
98
119
  i = index_of_lowest_precedence(operators)
99
-
100
120
  operator = operators[i]
101
121
  new_operand = node_class.new(parent_node)
102
122
  new_operand.add_match generate_tree(operands[0..i], operators[0..i-1],new_operand), :left
@@ -106,4 +126,4 @@ class BinaryOperatorProcessor
106
126
  end
107
127
 
108
128
  end
109
- end
129
+ end
@@ -1,3 +1,4 @@
1
1
  module BabelBridge
2
- VERSION = "0.4.1"
2
+ # last release: 0.4.1
3
+ VERSION = "0.5.0"
3
4
  end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ describe "advanced parsing" do
4
+ include TestParserGenerator
5
+
6
+ it "test_custom_parser" do
7
+ new_parser do
8
+ rule :foo, (custom_parser do |parent_node|
9
+ offset=parent_node.next
10
+ src=parent_node.src
11
+
12
+ # Note, the \A anchors the search at the beginning of the string
13
+ if src[offset..-1].index(/\A[A-Z]+/)==0
14
+ endpattern=$~.to_s
15
+ if i=src.index(endpattern,offset+endpattern.length)
16
+ range = offset..(i+endpattern.length)
17
+ BabelBridge::TerminalNode.new(parent_node,range,"endpattern")
18
+ end
19
+ end
20
+ end)
21
+ end
22
+
23
+ test_parse("END this is in the middle END")
24
+ test_parse("END this is in END the middle END",:partial_match => true).text.should == "END this is in END"
25
+ test_parse "END this is in the middle EN", :should_fail_at => 0
26
+ test_parse " END this is in the middle END", :should_fail_at => 0
27
+ end
28
+
29
+ it "test_binary_operator_rule" do
30
+ new_parser do
31
+ binary_operators_rule :bin_op, :int, ["**", [:/, :*], [:+, "-"]], :right_operators => ["**"] do
32
+ def evaluate
33
+ "(#{left.evaluate}#{operator}#{right.evaluate})"
34
+ end
35
+ end
36
+
37
+ rule :int, /[-]?[0-9]+/ do
38
+ def evaluate; to_s; end
39
+ end
40
+ end
41
+ test_parse("1+2").evaluate .should == "(1+2)"
42
+ test_parse("1+2+3").evaluate .should == "((1+2)+3)"
43
+ test_parse("1+2*3").evaluate .should == "(1+(2*3))"
44
+ test_parse("1*2+3").evaluate .should == "((1*2)+3)"
45
+ test_parse("5**6").evaluate .should == "(5**6)"
46
+ test_parse("1-2+3*4/5**6").evaluate .should == "((1-2)+((3*4)/(5**6)))"
47
+ test_parse("5**6**7").evaluate .should == "(5**(6**7))"
48
+ end
49
+ end