babel_bridge 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,113 +5,46 @@ http://babel-bridge.rubyforge.org/
5
5
  =end
6
6
 
7
7
  module BabelBridge
8
- # non-terminal node
8
+ # rule node
9
9
  # subclassed automatically by parser.rule for each unique non-terminal
10
10
  class NonTerminalNode < Node
11
- attr_accessor :matches,:match_names
12
11
 
13
- def match_names
14
- @match_names ||= []
15
- end
16
- def matches
17
- @matches ||= []
18
- end
19
-
20
- # length returns the number of sub-nodes
21
- def length
22
- matches.length
23
- end
24
-
25
- def matches_by_name
26
- @matches_by_name||= begin
27
- raise "matches.length #{matches.length} != match_names.length #{match_names.length}" unless matches.length==match_names.length
28
- mbn={}
29
- mn=match_names
30
- matches.each_with_index do |match,i|
31
- name=mn[i]
32
- next unless name
33
- if current=mbn[name] # name already used
34
- # convert to MultiMatchesArray if not already
35
- mbn[name]=MultiMatchesArray.new([current]) if !current.kind_of? MultiMatchesArray
36
- # add to array
37
- mbn[name]<<match
38
- else
39
- mbn[name]=match
40
- end
41
- end
42
- mbn
43
- end
44
- end
45
-
46
- def inspect(options={})
47
- return "#{self.class}" if matches.length==0
48
- matches_inspected=matches.collect{|a|a.inspect(options)}.compact
49
- if matches_inspected.length==0 then nil
50
- elsif matches_inspected.length==1
51
- m=matches_inspected[0]
52
- ret="#{self.class} > "+matches_inspected[0]
53
- if options[:simple]
54
- ret=if m["\n"] then m
55
- else
56
- # just show the first and last nodes in the chain
57
- ret.gsub(/( > [A-Z][a-zA-Z0-9:]+ > (\.\.\. > )?)/," > ... > ")
58
- end
59
- end
60
- ret
12
+ def trailing_whitespace_range
13
+ if matches.length == 0
14
+ preceding_whitespace_range || (0..-1)
61
15
  else
62
- (["#{self.class}"]+matches_inspected).join("\n").gsub("\n","\n ")
16
+ matches[-1].trailing_whitespace_range
63
17
  end
64
18
  end
65
19
 
66
- #********************
67
- # alter methods
68
- #********************
69
- def reset_matches_by_name
70
- @matches_by_name=nil
20
+ def update_match_length
21
+ m = matches[-1]
22
+ @match_length = m ? m.offset_after_match - offset : 0
71
23
  end
72
24
 
73
- # defines where to forward missing methods to; override for custom behavior
74
- def forward_to(method_name)
75
- matches.each {|m| return m if m.respond_to?(method_name)}
76
- nil
25
+ #*****************************
26
+ # Array interface implementation
27
+ #*****************************
28
+ def matches
29
+ @matches ||= []
77
30
  end
78
31
 
79
- def respond_to?(method_name)
80
- super ||
81
- matches_by_name[method_name] ||
82
- forward_to(method_name)
32
+ include Enumerable
33
+ def length
34
+ matches.length
83
35
  end
84
36
 
85
- def method_missing(method_name, *args) #method_name is a symbol
86
- unless matches_by_name.has_key? method_name
87
- if f=forward_to(method_name)
88
- return f.send(method_name,*args)
89
- end
90
- raise "#{self.class}: missing method #{method_name.inspect} / doesn't match named pattern element: #{matches_by_name.keys.inspect}"
91
- end
92
- case ret=matches_by_name[method_name]
93
- when EmptyNode then nil
94
- else ret
95
- end
37
+ def <<(node)
38
+ matches<<node
39
+ update_match_length
96
40
  end
97
41
 
98
- # adds a match with name (optional)
99
- # returns self so you can chain add_match or concat methods
100
- def add_match(match,name=nil)
101
- reset_matches_by_name
102
- matches<<match
103
- match_names<<name
104
-
105
- self.match_length=match.next - offset
106
- self
42
+ def [](i)
43
+ matches[i]
107
44
  end
108
45
 
109
- # concatinate all matches from another node
110
- # returns self so you can chain add_match or concat methods
111
- def concat(node)
112
- names=node.match_names
113
- node.matches.each_with_index { |match,i| add_match(match,names[i])}
114
- self
46
+ def each(&block)
47
+ matches.each(&block)
115
48
  end
116
49
  end
117
50
  end
@@ -0,0 +1,102 @@
1
+ =begin
2
+ Copyright 2011 Shane Brinkman-Davis
3
+ See README for licence information.
4
+ http://babel-bridge.rubyforge.org/
5
+ =end
6
+
7
+ module BabelBridge
8
+ # rule node
9
+ # subclassed automatically by parser.rule for each unique non-terminal
10
+ class RuleNode < NonTerminalNode
11
+
12
+ def match_names
13
+ @match_names ||= []
14
+ end
15
+
16
+ def matches_by_name
17
+ @matches_by_name||= begin
18
+ raise "matches.length #{matches.length} != match_names.length #{match_names.length}" unless matches.length==match_names.length
19
+ mbn={}
20
+ mn=match_names
21
+ matches.each_with_index do |match,i|
22
+ name=mn[i]
23
+ next unless name
24
+ if current=mbn[name] # name already used
25
+ # convert to MultiMatchesArray if not already
26
+ mbn[name]=MultiMatchesArray.new([current]) if !current.kind_of? MultiMatchesArray
27
+ # add to array
28
+ mbn[name]<<match
29
+ else
30
+ mbn[name]=match
31
+ end
32
+ end
33
+ mbn
34
+ end
35
+ end
36
+
37
+ def inspect(options={})
38
+ return "#{self.class}" if matches.length==0
39
+ matches_inspected=matches.collect{|a|a.inspect(options)}.compact
40
+ if matches_inspected.length==0 then nil
41
+ elsif matches_inspected.length==1
42
+ m=matches_inspected[0]
43
+ ret="#{self.class} > "+matches_inspected[0]
44
+ if options[:simple]
45
+ ret=if m["\n"] then m
46
+ else
47
+ # just show the first and last nodes in the chain
48
+ ret.gsub(/( > [A-Z][a-zA-Z0-9:]+ > (\.\.\. > )?)/," > ... > ")
49
+ end
50
+ end
51
+ ret
52
+ else
53
+ (["#{self.class}"]+matches_inspected).join("\n").gsub("\n","\n ")
54
+ end
55
+ end
56
+
57
+ #********************
58
+ # alter methods
59
+ #********************
60
+ def reset_matches_by_name
61
+ @matches_by_name=nil
62
+ end
63
+
64
+ # defines where to forward missing methods to; override for custom behavior
65
+ def forward_to(method_name)
66
+ matches.each {|m| return m if m.respond_to?(method_name)}
67
+ nil
68
+ end
69
+
70
+ def respond_to?(method_name)
71
+ super ||
72
+ matches_by_name[method_name] ||
73
+ forward_to(method_name)
74
+ end
75
+
76
+ def method_missing(method_name, *args) #method_name is a symbol
77
+ unless matches_by_name.has_key? method_name
78
+ if f=forward_to(method_name)
79
+ return f.send(method_name,*args)
80
+ end
81
+ match_path = [self]
82
+ while match_path[-1].matches.length==1
83
+ match_path<<match_path[-1].matches[0]
84
+ end
85
+ raise "#{match_path.collect{|m|m.class}.join(' > ')}: no methods or named pattern elements match: #{method_name.inspect}"
86
+ end
87
+ case ret=matches_by_name[method_name]
88
+ when EmptyNode then nil
89
+ else ret
90
+ end
91
+ end
92
+
93
+ # adds a match with name (optional)
94
+ def add_match(match,name=nil)
95
+ reset_matches_by_name
96
+ matches<<match
97
+ match_names<<name
98
+
99
+ update_match_length
100
+ end
101
+ end
102
+ end
@@ -8,25 +8,12 @@ module BabelBridge
8
8
  # used for String and Regexp PatternElements
9
9
  # not subclassed
10
10
  class TerminalNode < Node
11
- attr_accessor :pattern
12
- def initialize(parent,match_length,pattern)
11
+ attr_accessor :pattern, :trailing_whitespace_offset
12
+ def initialize(parent,range,pattern)
13
13
  node_init(parent)
14
- self.match_length=match_length
15
- self.pattern=pattern
16
- @ignore_whitespace = parser.ignore_whitespace?
17
- consume_trailing_whitespace if @ignore_whitespace
18
- end
19
-
20
- def consume_trailing_whitespace
21
- offset = self.next
22
- if src[offset..-1].index(/\A\s*/)==0
23
- range = $~.offset(0)
24
- self.match_length += range[1]-range[0]
25
- end
26
- end
27
-
28
- def to_s
29
- @ignore_whitespace ? text.strip : text
14
+ self.offset = range.min
15
+ self.match_length = range.max-range.min
16
+ self.pattern = pattern
30
17
  end
31
18
 
32
19
  def inspect(options={})
@@ -34,5 +21,6 @@ class TerminalNode < Node
34
21
  end
35
22
 
36
23
  def matches; [self]; end
24
+
37
25
  end
38
26
  end
data/lib/parser.rb CHANGED
@@ -8,7 +8,7 @@ class Parser
8
8
  # These methods are used in the creation of a Parser Sub-Class to define
9
9
  # its grammar
10
10
  class <<self
11
- attr_accessor :rules,:module_name,:root_rule
11
+ attr_accessor :rules, :module_name, :root_rule, :whitespace_regexp
12
12
 
13
13
  def rules
14
14
  @rules||={}
@@ -32,12 +32,12 @@ class Parser
32
32
  # MyParser.rule :name, to_match1, to_match2, etc...
33
33
  #
34
34
  # The first rule added is the root-rule for the parser.
35
- # You can override by:
35
+ # You can override by:
36
36
  # class MyParser < BabelBridge::Parser
37
37
  # root_rule = :new_root_rool
38
38
  # end
39
39
  #
40
- # The block is executed in the context of the rule-varient's node type, a subclass of: NonTerminalNode
40
+ # The block is executed in the context of the rule-varient's node type, a subclass of: RuleNode
41
41
  # This allows you to add whatever functionality you want to a your nodes in the final parse tree.
42
42
  # Also note you can override the post_match method. This allows you to restructure the parse tree as it is parsed.
43
43
  def rule(name,*pattern,&block)
@@ -52,7 +52,7 @@ class Parser
52
52
  # typical example is the "**" exponentiation operator which should be evaluated right-to-left.
53
53
  def binary_operators_rule(name,elements_pattern,operators,options={},&block)
54
54
  right_operators = options[:right_operators]
55
- rule(name,many(elements_pattern,Tools::array_to_or_regexp(operators))) do
55
+ rule(name,many(elements_pattern,Tools::array_to_or_regexp(operators))) do
56
56
  self.class_eval &block if block
57
57
  class <<self
58
58
  attr_accessor :operators_from_rule, :right_operators
@@ -98,17 +98,13 @@ class Parser
98
98
  @root_rule=rule
99
99
  end
100
100
 
101
- def ignore_whitespace
102
- @ignore_whitespace = true
103
- end
104
-
105
- def ignore_whitespace?
106
- @ignore_whitespace
101
+ def ignore_whitespace(regexp = /\s*/)
102
+ @whitespace_regexp = /\A(#{regexp})?/
107
103
  end
108
104
  end
109
105
 
110
- def ignore_whitespace?
111
- self.class.ignore_whitespace?
106
+ def whitespace_regexp
107
+ self.class.whitespace_regexp || /\A/
112
108
  end
113
109
 
114
110
  #*********************************************
@@ -136,14 +132,18 @@ class Parser
136
132
  #
137
133
  #*********************************************
138
134
  class <<self
139
- def many(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
140
- def many?(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
141
- def many!(m,delimiter=nil,post_delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter).post_delimiter(post_delimiter) end
135
+ def many(m,delimiter=nil) PatternElementHash.new.match.many(m).delimiter(delimiter) end
136
+ def many?(m,delimiter=nil) PatternElementHash.new.optionally.match.many(m).delimiter(delimiter) end
137
+ def many!(m,delimiter=nil) PatternElementHash.new.dont.match.many(m).delimiter(delimiter) end
142
138
 
143
139
  def match?(*args) PatternElementHash.new.optionally.match(*args) end
144
140
  def match(*args) PatternElementHash.new.match(*args) end
145
141
  def match!(*args) PatternElementHash.new.dont.match(*args) end
146
142
 
143
+ # if ignore_whitespace is used, after every TerminalNode, all whitespace is consumed. Wrapping include_whitespace around a pattern-element allows you to explicitly match the preceeding whitespace for that one element.
144
+ # NOTE: you can ALWAYS explicitly match any trailing whitespace
145
+ def include_whitespace(*args) PatternElementHash.new.include_whitespace.match(*args) end
146
+
147
147
  def dont; PatternElementHash.new.dont end
148
148
  def optionally; PatternElementHash.new.optionally end
149
149
  def could; PatternElementHash.new.could end
@@ -167,10 +167,24 @@ class Parser
167
167
  end
168
168
 
169
169
  def reset_parser_tracking
170
- self.src=nil
171
- self.failure_index=0
172
- self.expecting_list={}
173
- self.parse_cache={}
170
+ @parsing_did_not_match_entire_input = false
171
+ @src = nil
172
+ @failure_index = 0
173
+ @expecting_list = {}
174
+ @parse_cache = {}
175
+ @white_space_ranges = {}
176
+ end
177
+
178
+ # memoizing whitespace parser
179
+ def white_space_range(start)
180
+ @white_space_ranges[start]||=begin
181
+ # src should always be a string - unless this is called AFTER parsing is done. Currently this can happen with the way ManyNode handles .match_length and .next
182
+ # We should be able to just use:
183
+ # src[start..-1].index whitespace_regexp
184
+ ((src||"")[start..-1]||"").index whitespace_regexp
185
+ r = $~.offset 0
186
+ start+r[0] .. start+r[1]-1
187
+ end
174
188
  end
175
189
 
176
190
  def cached(rule_class,offset)
@@ -187,12 +201,10 @@ class Parser
187
201
 
188
202
  def log_parsing_failure(index,expecting)
189
203
  if index>failure_index
190
- key=expecting[:pattern]
191
- @expecting_list={key=>expecting}
204
+ @expecting_list = {expecting[:pattern] => expecting}
192
205
  @failure_index = index
193
206
  elsif index == failure_index
194
- key=expecting[:pattern]
195
- self.expecting_list[key]=expecting
207
+ @expecting_list[expecting[:pattern]] = expecting
196
208
  else
197
209
  # ignored
198
210
  end
@@ -208,9 +220,11 @@ class Parser
208
220
  unless rule
209
221
  if ret
210
222
  if ret.next<src.length # parse only succeeds if the whole input is matched
211
- @parsing_did_not_match_entire_input=true
212
- @failure_index=ret.next
213
- @failed_parse = ret
223
+ if ret.next >= @failure_index
224
+ @parsing_did_not_match_entire_input=true
225
+ @failure_index = ret.next
226
+ @failed_parse = ret
227
+ end
214
228
  ret=nil
215
229
  else
216
230
  reset_parser_tracking
@@ -240,7 +254,7 @@ class Parser
240
254
  def nodes_interesting_parse_path(node)
241
255
  path = node.parent_list
242
256
  path << node
243
- path.pop while path[-1] && !path[-1].kind_of?(NonTerminalNode)
257
+ path.pop while path[-1] && !path[-1].kind_of?(RuleNode)
244
258
  path
245
259
  end
246
260
 
@@ -277,7 +291,7 @@ ENDTXT
277
291
  end
278
292
 
279
293
  #option: :verbose => true
280
- def parser_failure_info(options={})
294
+ def parser_failure_info(options={})
281
295
  return unless src
282
296
  verbose = options[:verbose]
283
297
  bracketing_lines=5
@@ -295,10 +309,10 @@ ENDTXT
295
309
  ret+="\nParser did not match entire input.\n"
296
310
  if verbose
297
311
  ret+="\nParsed:\n#{Tools::indent failed_parse.inspect}\n"
298
- end
312
+ end
299
313
  end
300
-
314
+
301
315
  ret+expecting_output
302
316
  end
303
317
  end
304
- end
318
+ end
@@ -21,7 +21,7 @@ end
21
21
  # :optional
22
22
  class PatternElement
23
23
  attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
24
- attr_accessor :match,:rule_variant
24
+ attr_accessor :match,:rule_variant,:include_whitespace
25
25
 
26
26
  #match can be:
27
27
  # true, Hash, Symbol, String, Regexp
@@ -56,7 +56,7 @@ class PatternElement
56
56
 
57
57
  if !match && terminal
58
58
  # log failures on Terminal patterns for debug output if overall parse fails
59
- parent_node.parser.log_parsing_failure(parent_node.next,:pattern=>self.match,:node=>parent_node)
59
+ parent_node.parser.log_parsing_failure(match_start_index(parent_node),:pattern=>self.match,:node=>parent_node)
60
60
  end
61
61
 
62
62
  # return match
@@ -83,24 +83,29 @@ class PatternElement
83
83
  self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
84
84
  end
85
85
 
86
+ def match_start_index(parent_node)
87
+ if include_whitespace
88
+ parent_node.trailing_whitespace_range.first
89
+ else
90
+ parent_node.next
91
+ end
92
+ end
93
+
94
+
86
95
  # initialize PatternElement as a parser that matches exactly the string specified
87
96
  def init_string(string)
88
- self.parser=lambda do |parent_node|
89
- if parent_node.src[parent_node.next,string.length]==string
90
- TerminalNode.new(parent_node,string.length,string)
91
- end
92
- end
93
- self.terminal=true
97
+ init_regex Regexp.escape(string)
94
98
  end
95
99
 
96
100
  # initialize PatternElement as a parser that matches the given regex
97
101
  def init_regex(regex)
98
102
  optimized_regex=/\A#{regex}/ # anchor the search
99
103
  self.parser=lambda do |parent_node|
100
- offset=parent_node.next
104
+ offset = match_start_index(parent_node)
101
105
  if parent_node.src[offset..-1].index(optimized_regex)==0
102
106
  range=$~.offset(0)
103
- TerminalNode.new(parent_node,range[1]-range[0],regex)
107
+ range = (range.min+offset)..(range.max+offset)
108
+ TerminalNode.new(parent_node,range,regex)
104
109
  end
105
110
  end
106
111
  self.terminal=true
@@ -138,66 +143,48 @@ class PatternElement
138
143
  self.optional ||= hash[:optional] || hash[:optionally]
139
144
  self.could_match ||= hash[:could]
140
145
  self.negative ||= hash[:dont]
146
+ self.include_whitespace ||= hash[:include_whitespace]
141
147
  end
142
148
 
143
149
  # initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
144
150
  def init_many(hash)
145
151
  # generate single_parser
146
152
  init hash[:many]
147
- single_parser=parser
153
+ single_parser = parser
148
154
 
149
155
  # generate delimiter_pattern_element
150
- delimiter_pattern_element= hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
151
-
152
- # generate post_delimiter_element
153
- post_delimiter_element=hash[:post_delimiter] && case hash[:post_delimiter]
154
- when TrueClass then delimiter_pattern_element
155
- else PatternElement.new(hash[:post_delimiter],rule_variant)
156
- end
156
+ delimiter_pattern_element = hash[:delimiter] && PatternElement.new(hash[:delimiter],rule_variant)
157
157
 
158
158
  # generate many-parser
159
- self.parser= lambda do |parent_node|
160
- last_match=single_parser.call(parent_node)
161
- many_node=ManyNode.new(parent_node)
159
+ self.parser = lambda do |parent_node|
160
+ many_node = ManyNode.new parent_node
162
161
 
163
162
  if delimiter_pattern_element
164
163
  # delimited matching
165
- while last_match
166
- many_node<<last_match
164
+ while true
165
+ #match primary
166
+ match = single_parser.call many_node
167
+ break unless match
168
+ many_node << match
167
169
 
168
170
  #match delimiter
169
- delimiter_match = delimiter_pattern_element.parse(many_node)
171
+ delimiter_match = delimiter_pattern_element.parse many_node
170
172
  break unless delimiter_match
171
- many_node.delimiter_matches<<delimiter_match
172
-
173
- #match next
174
- last_match=single_parser.call(many_node)
173
+ many_node << delimiter_match
175
174
  end
175
+ many_node.separate_delimiter_matches
176
176
  else
177
177
  # not delimited matching
178
- while last_match
179
- many_node<<last_match
180
- last_match=single_parser.call(many_node)
178
+ while true
179
+ match = single_parser.call many_node
180
+ break unless match
181
+ many_node << match
181
182
  end
182
183
  end
183
184
 
184
185
  # success only if we have at least one match
185
- return nil unless many_node.length>0
186
-
187
- # pop the post delimiter matched with delimiter_pattern_element
188
- many_node.delimiter_matches.pop if many_node.length==many_node.delimiter_matches.length
189
-
190
- # If post_delimiter is requested, many_node and delimiter_matches will be the same length
191
- if post_delimiter_element
192
- post_delimiter_match=post_delimiter_element.parse(many_node)
193
-
194
- # fail if post_delimiter didn't match
195
- return nil unless post_delimiter_match
196
- many_node.delimiter_matches<<post_delimiter_match
197
- end
198
-
199
- many_node
186
+ many_node.length>0 && many_node
200
187
  end
201
188
  end
202
189
  end
203
- end
190
+ end