babel_bridge 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,113 +5,46 @@ http://babel-bridge.rubyforge.org/
5
5
  =end
6
6
 
7
7
  module BabelBridge
8
- # non-terminal node
8
+ # rule node
9
9
  # subclassed automatically by parser.rule for each unique non-terminal
10
10
  class NonTerminalNode < Node
11
- attr_accessor :matches,:match_names
12
11
 
13
- def match_names
14
- @match_names ||= []
15
- end
16
- def matches
17
- @matches ||= []
18
- end
19
-
20
- # length returns the number of sub-nodes
21
- def length
22
- matches.length
23
- end
24
-
25
- def matches_by_name
26
- @matches_by_name||= begin
27
- raise "matches.length #{matches.length} != match_names.length #{match_names.length}" unless matches.length==match_names.length
28
- mbn={}
29
- mn=match_names
30
- matches.each_with_index do |match,i|
31
- name=mn[i]
32
- next unless name
33
- if current=mbn[name] # name already used
34
- # convert to MultiMatchesArray if not already
35
- mbn[name]=MultiMatchesArray.new([current]) if !current.kind_of? MultiMatchesArray
36
- # add to array
37
- mbn[name]<<match
38
- else
39
- mbn[name]=match
40
- end
41
- end
42
- mbn
43
- end
44
- end
45
-
46
- def inspect(options={})
47
- return "#{self.class}" if matches.length==0
48
- matches_inspected=matches.collect{|a|a.inspect(options)}.compact
49
- if matches_inspected.length==0 then nil
50
- elsif matches_inspected.length==1
51
- m=matches_inspected[0]
52
- ret="#{self.class} > "+matches_inspected[0]
53
- if options[:simple]
54
- ret=if m["\n"] then m
55
- else
56
- # just show the first and last nodes in the chain
57
- ret.gsub(/( > [A-Z][a-zA-Z0-9:]+ > (\.\.\. > )?)/," > ... > ")
58
- end
59
- end
60
- ret
12
+ def trailing_whitespace_range
13
+ if matches.length == 0
14
+ preceding_whitespace_range || (0..-1)
61
15
  else
62
- (["#{self.class}"]+matches_inspected).join("\n").gsub("\n","\n ")
16
+ matches[-1].trailing_whitespace_range
63
17
  end
64
18
  end
65
19
 
66
- #********************
67
- # alter methods
68
- #********************
69
- def reset_matches_by_name
70
- @matches_by_name=nil
20
+ def update_match_length
21
+ m = matches[-1]
22
+ @match_length = m ? m.offset_after_match - offset : 0
71
23
  end
72
24
 
73
- # defines where to forward missing methods to; override for custom behavior
74
- def forward_to(method_name)
75
- matches.each {|m| return m if m.respond_to?(method_name)}
76
- nil
25
+ #*****************************
26
+ # Array interface implementation
27
+ #*****************************
28
+ def matches
29
+ @matches ||= []
77
30
  end
78
31
 
79
- def respond_to?(method_name)
80
- super ||
81
- matches_by_name[method_name] ||
82
- forward_to(method_name)
32
+ include Enumerable
33
+ def length
34
+ matches.length
83
35
  end
84
36
 
85
- def method_missing(method_name, *args) #method_name is a symbol
86
- unless matches_by_name.has_key? method_name
87
- if f=forward_to(method_name)
88
- return f.send(method_name,*args)
89
- end
90
- raise "#{self.class}: missing method #{method_name.inspect} / doesn't match named pattern element: #{matches_by_name.keys.inspect}"
91
- end
92
- case ret=matches_by_name[method_name]
93
- when EmptyNode then nil
94
- else ret
95
- end
37
+ def <<(node)
38
+ matches<<node
39
+ update_match_length
96
40
  end
97
41
 
98
- # adds a match with name (optional)
99
- # returns self so you can chain add_match or concat methods
100
- def add_match(match,name=nil)
101
- reset_matches_by_name
102
- matches<<match
103
- match_names<<name
104
-
105
- self.match_length=match.next - offset
106
- self
42
+ def [](i)
43
+ matches[i]
107
44
  end
108
45
 
109
- # concatinate all matches from another node
110
- # returns self so you can chain add_match or concat methods
111
- def concat(node)
112
- names=node.match_names
113
- node.matches.each_with_index { |match,i| add_match(match,names[i])}
114
- self
46
+ def each(&block)
47
+ matches.each(&block)
115
48
  end
116
49
  end
117
50
  end
@@ -0,0 +1,102 @@
1
+ =begin
2
+ Copyright 2011 Shane Brinkman-Davis
3
+ See README for licence information.
4
+ http://babel-bridge.rubyforge.org/
5
+ =end
6
+
7
+ module BabelBridge
8
+ # rule node
9
+ # subclassed automatically by parser.rule for each unique non-terminal
10
+ class RuleNode < NonTerminalNode
11
+
12
+ def match_names
13
+ @match_names ||= []
14
+ end
15
+
16
+ def matches_by_name
17
+ @matches_by_name||= begin
18
+ raise "matches.length #{matches.length} != match_names.length #{match_names.length}" unless matches.length==match_names.length
19
+ mbn={}
20
+ mn=match_names
21
+ matches.each_with_index do |match,i|
22
+ name=mn[i]
23
+ next unless name
24
+ if current=mbn[name] # name already used
25
+ # convert to MultiMatchesArray if not already
26
+ mbn[name]=MultiMatchesArray.new([current]) if !current.kind_of? MultiMatchesArray
27
+ # add to array
28
+ mbn[name]<<match
29
+ else
30
+ mbn[name]=match
31
+ end
32
+ end
33
+ mbn
34
+ end
35
+ end
36
+
37
+ def inspect(options={})
38
+ return "#{self.class}" if matches.length==0
39
+ matches_inspected=matches.collect{|a|a.inspect(options)}.compact
40
+ if matches_inspected.length==0 then nil
41
+ elsif matches_inspected.length==1
42
+ m=matches_inspected[0]
43
+ ret="#{self.class} > "+matches_inspected[0]
44
+ if options[:simple]
45
+ ret=if m["\n"] then m
46
+ else
47
+ # just show the first and last nodes in the chain
48
+ ret.gsub(/( > [A-Z][a-zA-Z0-9:]+ > (\.\.\. > )?)/," > ... > ")
49
+ end
50
+ end
51
+ ret
52
+ else
53
+ (["#{self.class}"]+matches_inspected).join("\n").gsub("\n","\n ")
54
+ end
55
+ end
56
+
57
+ #********************
58
+ # alter methods
59
+ #********************
60
+ def reset_matches_by_name
61
+ @matches_by_name=nil
62
+ end
63
+
64
+ # defines where to forward missing methods to; override for custom behavior
65
+ def forward_to(method_name)
66
+ matches.each {|m| return m if m.respond_to?(method_name)}
67
+ nil
68
+ end
69
+
70
+ def respond_to?(method_name)
71
+ super ||
72
+ matches_by_name[method_name] ||
73
+ forward_to(method_name)
74
+ end
75
+
76
+ def method_missing(method_name, *args) #method_name is a symbol
77
+ unless matches_by_name.has_key? method_name
78
+ if f=forward_to(method_name)
79
+ return f.send(method_name,*args)
80
+ end
81
+ match_path = [self]
82
+ while match_path[-1].matches.length==1
83
+ match_path<<match_path[-1].matches[0]
84
+ end
85
+ raise "#{match_path.collect{|m|m.class}.join(' > ')}: no methods or named pattern elements match: #{method_name.inspect}"
86
+ end
87
+ case ret=matches_by_name[method_name]
88
+ when EmptyNode then nil
89
+ else ret
90
+ end
91
+ end
92
+
93
+ # adds a match with name (optional)
94
+ def add_match(match,name=nil)
95
+ reset_matches_by_name
96
+ matches<<match
97
+ match_names<<name
98
+
99
+ update_match_length
100
+ end
101
+ end
102
+ end
@@ -8,25 +8,12 @@ module BabelBridge
8
8
  # used for String and Regexp PatternElements
9
9
  # not subclassed
10
10
  class TerminalNode < Node
11
- attr_accessor :pattern
12
- def initialize(parent,match_length,pattern)
11
+ attr_accessor :pattern, :trailing_whitespace_offset
12
+ def initialize(parent,range,pattern)
13
13
  node_init(parent)
14
- self.match_length=match_length
15
- self.pattern=pattern
16
- @ignore_whitespace = parser.ignore_whitespace?
17
- consume_trailing_whitespace if @ignore_whitespace
18
- end
19
-
20
- def consume_trailing_whitespace
21
- offset = self.next
22
- if src[offset..-1].index(/\A\s*/)==0
23
- range = $~.offset(0)
24
- self.match_length += range[1]-range[0]
25
- end
26
- end
27
-
28
- def to_s
29
- @ignore_whitespace ? text.strip : text
14
+ self.offset = range.min
15
+ self.match_length = range.max-range.min
16
+ self.pattern = pattern
30
17
  end
31
18
 
32
19
  def inspect(options={})
@@ -34,5 +21,6 @@ class TerminalNode < Node
34
21
  end
35
22
 
36
23
  def matches; [self]; end
24
+
37
25
  end
38
26
  end
data/lib/parser.rb CHANGED
@@ -8,7 +8,7 @@ class Parser
8
8
  # These methods are used in the creation of a Parser Sub-Class to define
9
9
  # its grammar
10
10
  class <<self
11
- attr_accessor :rules,:module_name,:root_rule
11
+ attr_accessor :rules, :module_name, :root_rule, :whitespace_regexp
12
12
 
13
13
  def rules
14
14
  @rules||={}
@@ -32,12 +32,12 @@ class Parser
32
32
  # MyParser.rule :name, to_match1, to_match2, etc...
33
33
  #
34
34
  # The first rule added is the root-rule for the parser.
35
- # You can override by:
35
+ # You can override by:
36
36
  # class MyParser < BabelBridge::Parser
37
37
  # root_rule = :new_root_rool
38
38
  # end
39
39
  #
40
- # The block is executed in the context of the rule-varient's node type, a subclass of: NonTerminalNode
40
+ # The block is executed in the context of the rule-varient's node type, a subclass of: RuleNode
41
41
  # This allows you to add whatever functionality you want to a your nodes in the final parse tree.
42
42
  # Also note you can override the post_match method. This allows you to restructure the parse tree as it is parsed.
43
43
  def rule(name,*pattern,&block)
@@ -52,7 +52,7 @@ class Parser
52
52
  # typical example is the "**" exponentiation operator which should be evaluated right-to-left.
53
53
  def binary_operators_rule(name,elements_pattern,operators,options={},&block)
54
54
  right_operators = options[:right_operators]
55
- rule(name,many(elements_pattern,Tools::array_to_or_regexp(operators))) do
55
+ rule(name,many(elements_pattern,Tools::array_to_or_regexp(operators))) do
56
56
  self.class_eval &block if block
57
57
  class <<self
58
58
  attr_accessor :operators_from_rule, :right_operators
@@ -98,17 +98,13 @@ class Parser
98
98
  @root_rule=rule
99
99
  end
100
100
 
101
- def ignore_whitespace
102
- @ignore_whitespace = true
103
- end
104
-
105
- def ignore_whitespace?
106
- @ignore_whitespace
101
+ def ignore_whitespace(regexp = /\s*/)
102
+ @whitespace_regexp = /\A(#{regexp})?/
107
103
  end
108
104
  end
109
105
 
110
- def ignore_whitespace?
111
- self.class.ignore_whitespace?
106
+ def whitespace_regexp
107
+ self.class.whitespace_regexp || /\A/
112
108
  end
113
109
 
114
110
  #*********************************************
@@ -136,14 +132,18 @@ class Parser
136
132
  #
137
133
  #*********************************************
138
134
  class <<self
139
- def many(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
140
- def many?(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
141
- def many!(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
135
+ def many(m,delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter) end
136
+ def many?(m,delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter) end
137
+ def many!(m,delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter) end
142
138
 
143
139
  def match?(*args) PatternElementHash.new.optionally.match(*args) end
144
140
  def match(*args) PatternElementHash.new.match(*args) end
145
141
  def match!(*args) PatternElementHash.new.dont.match(*args) end
146
142
 
143
+ # if ignore_whitespace is used, after every TerminalNode, all whitespace is consumed. Wrapping include_whitespace around a pattern-element allows you to explicitly match the preceeding whitespace for that one element.
144
+ # NOTE: you can ALWAYS explicitly match any trailing whitespace
145
+ def include_whitespace(*args) PatternElementHash.new.include_whitespace.match(*args) end
146
+
147
147
  def dont; PatternElementHash.new.dont end
148
148
  def optionally; PatternElementHash.new.optionally end
149
149
  def could; PatternElementHash.new.could end
@@ -167,10 +167,24 @@ class Parser
167
167
  end
168
168
 
169
169
  def reset_parser_tracking
170
- self.src=nil
171
- self.failure_index=0
172
- self.expecting_list={}
173
- self.parse_cache={}
170
+ @parsing_did_not_match_entire_input = false
171
+ @src = nil
172
+ @failure_index = 0
173
+ @expecting_list = {}
174
+ @parse_cache = {}
175
+ @white_space_ranges = {}
176
+ end
177
+
178
+ # memoizing whitespace parser
179
+ def white_space_range(start)
180
+ @white_space_ranges[start]||=begin
181
+ # src should always be a string - unless this is called AFTER parsing is done. Currently this can happen with the way ManyNode handles .match_length and .next
182
+ # We should be able to just use:
183
+ # src[start..-1].index whitespace_regexp
184
+ ((src||"")[start..-1]||"").index whitespace_regexp
185
+ r = $~.offset 0
186
+ start+r[0] .. start+r[1]-1
187
+ end
174
188
  end
175
189
 
176
190
  def cached(rule_class,offset)
@@ -187,12 +201,10 @@ class Parser
187
201
 
188
202
  def log_parsing_failure(index,expecting)
189
203
  if index>failure_index
190
- key=expecting[:pattern]
191
- @expecting_list={key=>expecting}
204
+ @expecting_list = {expecting[:pattern] => expecting}
192
205
  @failure_index = index
193
206
  elsif index == failure_index
194
- key=expecting[:pattern]
195
- self.expecting_list[key]=expecting
207
+ @expecting_list[expecting[:pattern]] = expecting
196
208
  else
197
209
  # ignored
198
210
  end
@@ -208,9 +220,11 @@ class Parser
208
220
  unless rule
209
221
  if ret
210
222
  if ret.next<src.length # parse only succeeds if the whole input is matched
211
- @parsing_did_not_match_entire_input=true
212
- @failure_index=ret.next
213
- @failed_parse = ret
223
+ if ret.next >= @failure_index
224
+ @parsing_did_not_match_entire_input=true
225
+ @failure_index = ret.next
226
+ @failed_parse = ret
227
+ end
214
228
  ret=nil
215
229
  else
216
230
  reset_parser_tracking
@@ -240,7 +254,7 @@ class Parser
240
254
  def nodes_interesting_parse_path(node)
241
255
  path = node.parent_list
242
256
  path << node
243
- path.pop while path[-1] && !path[-1].kind_of?(NonTerminalNode)
257
+ path.pop while path[-1] && !path[-1].kind_of?(RuleNode)
244
258
  path
245
259
  end
246
260
 
@@ -277,7 +291,7 @@ ENDTXT
277
291
  end
278
292
 
279
293
  #option: :verbose => true
280
- def parser_failure_info(options={})
294
+ def parser_failure_info(options={})
281
295
  return unless src
282
296
  verbose = options[:verbose]
283
297
  bracketing_lines=5
@@ -295,10 +309,10 @@ ENDTXT
295
309
  ret+="\nParser did not match entire input.\n"
296
310
  if verbose
297
311
  ret+="\nParsed:\n#{Tools::indent failed_parse.inspect}\n"
298
- end
312
+ end
299
313
  end
300
-
314
+
301
315
  ret+expecting_output
302
316
  end
303
317
  end
304
- end
318
+ end
@@ -21,7 +21,7 @@ end
21
21
  # :optional
22
22
  class PatternElement
23
23
  attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
24
- attr_accessor :match,:rule_variant
24
+ attr_accessor :match,:rule_variant,:include_whitespace
25
25
 
26
26
  #match can be:
27
27
  # true, Hash, Symbol, String, Regexp
@@ -56,7 +56,7 @@ class PatternElement
56
56
 
57
57
  if !match && terminal
58
58
  # log failures on Terminal patterns for debug output if overall parse fails
59
- parent_node.parser.log_parsing_failure(parent_node.next,:pattern=>self.match,:node=>parent_node)
59
+ parent_node.parser.log_parsing_failure(match_start_index(parent_node),:pattern=>self.match,:node=>parent_node)
60
60
  end
61
61
 
62
62
  # return match
@@ -83,24 +83,29 @@ class PatternElement
83
83
  self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
84
84
  end
85
85
 
86
+ def match_start_index(parent_node)
87
+ if include_whitespace
88
+ parent_node.trailing_whitespace_range.first
89
+ else
90
+ parent_node.next
91
+ end
92
+ end
93
+
94
+
86
95
  # initialize PatternElement as a parser that matches exactly the string specified
87
96
  def init_string(string)
88
- self.parser=lambda do |parent_node|
89
- if parent_node.src[parent_node.next,string.length]==string
90
- TerminalNode.new(parent_node,string.length,string)
91
- end
92
- end
93
- self.terminal=true
97
+ init_regex Regexp.escape(string)
94
98
  end
95
99
 
96
100
  # initialize PatternElement as a parser that matches the given regex
97
101
  def init_regex(regex)
98
102
  optimized_regex=/\A#{regex}/ # anchor the search
99
103
  self.parser=lambda do |parent_node|
100
- offset=parent_node.next
104
+ offset = match_start_index(parent_node)
101
105
  if parent_node.src[offset..-1].index(optimized_regex)==0
102
106
  range=$~.offset(0)
103
- TerminalNode.new(parent_node,range[1]-range[0],regex)
107
+ range = (range.min+offset)..(range.max+offset)
108
+ TerminalNode.new(parent_node,range,regex)
104
109
  end
105
110
  end
106
111
  self.terminal=true
@@ -138,66 +143,48 @@ class PatternElement
138
143
  self.optional ||= hash[:optional] || hash[:optionally]
139
144
  self.could_match ||= hash[:could]
140
145
  self.negative ||= hash[:dont]
146
+ self.include_whitespace ||= hash[:include_whitespace]
141
147
  end
142
148
 
143
149
  # initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
144
150
  def init_many(hash)
145
151
  # generate single_parser
146
152
  init hash[:many]
147
- single_parser=parser
153
+ single_parser = parser
148
154
 
149
155
  # generate delimiter_pattern_element
150
- delimiter_pattern_element= hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
151
-
152
- # generate post_delimiter_element
153
- post_delimiter_element=hash[:post_delimiter] && case hash[:post_delimiter]
154
- when TrueClass then delimiter_pattern_element
155
- else PatternElement.new(hash[:post_delimiter],rule_variant)
156
- end
156
+ delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
157
157
 
158
158
  # generate many-parser
159
- self.parser= lambda do |parent_node|
160
- last_match=single_parser.call(parent_node)
161
- many_node=ManyNode.new(parent_node)
159
+ self.parser = lambda do |parent_node|
160
+ many_node = ManyNode.new parent_node
162
161
 
163
162
  if delimiter_pattern_element
164
163
  # delimited matching
165
- while last_match
166
- many_node<<last_match
164
+ while true
165
+ #match primary
166
+ match = single_parser.call many_node
167
+ break unless match
168
+ many_node << match
167
169
 
168
170
  #match delimiter
169
- delimiter_match = delimiter_pattern_element.parse(many_node)
171
+ delimiter_match = delimiter_pattern_element.parse many_node
170
172
  break unless delimiter_match
171
- many_node.delimiter_matches<<delimiter_match
172
-
173
- #match next
174
- last_match=single_parser.call(many_node)
173
+ many_node << delimiter_match
175
174
  end
175
+ many_node.separate_delimiter_matches
176
176
  else
177
177
  # not delimited matching
178
- while last_match
179
- many_node<<last_match
180
- last_match=single_parser.call(many_node)
178
+ while true
179
+ match = single_parser.call many_node
180
+ break unless match
181
+ many_node << match
181
182
  end
182
183
  end
183
184
 
184
185
  # success only if we have at least one match
185
- return nil unless many_node.length>0
186
-
187
- # pop the post delimiter matched with delimiter_pattern_element
188
- many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
189
-
190
- # If post_delimiter is requested, many_node and delimiter_matches will be the same length
191
- if post_delimiter_element
192
- post_delimiter_match=post_delimiter_element.parse(many_node)
193
-
194
- # fail if post_delimiter didn't match
195
- return nil unless post_delimiter_match
196
- many_node.delimiter_matches<<post_delimiter_match
197
- end
198
-
199
- many_node
186
+ many_node.length>0 && many_node
200
187
  end
201
188
  end
202
189
  end
203
- end
190
+ end