babel_bridge 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,14 +14,26 @@ class EmptyNode < Node
14
14
  "EmptyNode" unless options[:simple]
15
15
  end
16
16
 
17
- # EmptyNodes should always match at the beginning of the whitespace range
18
- def node_init(parent_or_parser)
19
- super
20
- self.offset = preceding_whitespace_range.first
21
- self.preceding_whitespace_range = match_range
17
+ def matches; [self]; end
18
+
19
+ end
20
+
21
+ class RollbackWhitespaceNode < Node
22
+ def inspect(options={})
23
+ "RollbackWhitespace" unless options[:simple]
22
24
  end
23
25
 
24
26
  def matches; [self]; end
25
27
 
28
+ def initialize(parent)
29
+ super
30
+ self.match_length = 0
31
+ self.offset = parent.postwhitespace_range.first
32
+ end
33
+
34
+ def postwhitespace_range
35
+ @postwhitespace_range ||= offset_after_match .. offset_after_match-1
36
+ end
37
+
26
38
  end
27
39
  end
@@ -15,7 +15,7 @@ class ManyNode < NonTerminalNode
15
15
 
16
16
  def separate_delimiter_matches
17
17
  count = 0
18
- @matches, @delimiter_matches = @matches.partition {|el| count+=1;(count%2==1)}
18
+ @matches, @delimiter_matches = matches.partition {|el| count+=1;(count%2==1)}
19
19
  @delimiter_matches = @delimiter_matches[0..@matches.length-2] #delimiter_matches should be exactly 1 shorter
20
20
  update_match_length
21
21
  end
data/lib/nodes/node.rb CHANGED
@@ -7,7 +7,12 @@ end
7
7
 
8
8
  # base class for all parse-tree nodes
9
9
  class Node
10
- attr_accessor :src,:offset,:match_length,:parent,:parser,:preceding_whitespace_range
10
+ attr_accessor :src,:offset,:match_length,:parent,:parser,:prewhitespace_range
11
+
12
+ # no_postwhitespace is used when parsing to temporarilly rollback the preceeding whitespace while
13
+ # attempting to match an ignore_whitespace pattern.
14
+ # It should always be false again once parsing completes or fails.
15
+ attr_accessor :no_postwhitespace
11
16
 
12
17
  def whitespace_regexp
13
18
  parser.whitespace_regexp
@@ -22,10 +27,27 @@ class Node
22
27
  offset..(offset+match_length-1)
23
28
  end
24
29
 
25
- def trailing_whitespace_range
30
+ def postwhitespace_range_without_no_postwhitespace
26
31
  parser.white_space_range offset_after_match
27
32
  end
28
33
 
34
+ def postwhitespace_range
35
+ r = postwhitespace_range_without_no_postwhitespace
36
+ no_postwhitespace ? r.first..r.first-1 : r
37
+ end
38
+
39
+ def postwhitespace
40
+ src[postwhitespace_range]
41
+ end
42
+
43
+ def prewhitespace
44
+ src[prewhitespace_range]
45
+ end
46
+
47
+ # called when a ruled is matched
48
+ def matched
49
+ end
50
+
29
51
  def to_s
30
52
  text
31
53
  end
@@ -45,7 +67,7 @@ class Node
45
67
  self.parent=parent_or_parser
46
68
  self.parser=parent.parser
47
69
  self.offset=parent.next
48
- self.preceding_whitespace_range=parent.trailing_whitespace_range
70
+ self.prewhitespace_range=parent.postwhitespace_range
49
71
  self.src=parent.src
50
72
  raise "parent node does not have parser set" unless parser
51
73
  else
@@ -73,7 +95,7 @@ class Node
73
95
  #********************
74
96
  # info methods
75
97
  #********************
76
- def next; trailing_whitespace_range.last+1 end # index of first character after match and any trailing whitespace
98
+ def next; postwhitespace_range.last+1 end # index of first character after match and any trailing whitespace
77
99
  def text; src[match_range] end # the substring in src matched
78
100
 
79
101
  # length returns the number of sub-nodes
@@ -8,18 +8,18 @@ module BabelBridge
8
8
  # rule node
9
9
  # subclassed automatically by parser.rule for each unique non-terminal
10
10
  class NonTerminalNode < Node
11
+ attr_accessor :last_non_empty_node
11
12
 
12
- def trailing_whitespace_range
13
- if matches.length == 0
14
- preceding_whitespace_range || (0..-1)
13
+ def postwhitespace_range_without_no_postwhitespace
14
+ if last_non_empty_node
15
+ last_non_empty_node.postwhitespace_range
15
16
  else
16
- matches[-1].trailing_whitespace_range
17
+ prewhitespace_range || (0..-1)
17
18
  end
18
19
  end
19
20
 
20
21
  def update_match_length
21
- m = matches[-1]
22
- @match_length = m ? m.offset_after_match - offset : 0
22
+ @match_length = last_non_empty_node ? last_non_empty_node.offset_after_match - offset : 0
23
23
  end
24
24
 
25
25
  #*****************************
@@ -34,7 +34,8 @@ class NonTerminalNode < Node
34
34
  matches.length
35
35
  end
36
36
 
37
- def <<(node)
37
+ def add_match(node)
38
+ @last_non_empty_node = node unless node.kind_of?(EmptyNode)
38
39
  matches<<node
39
40
  update_match_length
40
41
  end
@@ -93,7 +93,7 @@ class RuleNode < NonTerminalNode
93
93
  # adds a match with name (optional)
94
94
  def add_match(match,name=nil)
95
95
  reset_matches_by_name
96
- matches<<match
96
+ super match
97
97
  match_names<<name
98
98
 
99
99
  update_match_length
@@ -8,7 +8,7 @@ module BabelBridge
8
8
  # used for String and Regexp PatternElements
9
9
  # not subclassed
10
10
  class TerminalNode < Node
11
- attr_accessor :pattern, :trailing_whitespace_offset
11
+ attr_accessor :pattern, :postwhitespace_offset
12
12
  def initialize(parent,range,pattern)
13
13
  node_init(parent)
14
14
  self.offset = range.min
data/lib/parser.rb CHANGED
@@ -18,8 +18,6 @@ class Parser
18
18
  #
19
19
  # rules can be specified as:
20
20
  # rule :name, to_match1, to_match2, etc...
21
- #or
22
- # rule :name, [to_match1, to_match2, etc...]
23
21
  #
24
22
  # Can define rules INSIDE class:
25
23
  # class MyParser < BabelBridge::Parser
@@ -41,9 +39,8 @@ class Parser
41
39
  # This allows you to add whatever functionality you want to a your nodes in the final parse tree.
42
40
  # Also note you can override the post_match method. This allows you to restructure the parse tree as it is parsed.
43
41
  def rule(name,*pattern,&block)
44
- pattern=pattern[0] if pattern[0].kind_of?(Array)
45
- rule=self.rules[name]||=Rule.new(name,self)
46
- self.root_rule||=name
42
+ rule = self.rules[name] ||= Rule.new(name,self)
43
+ self.root_rule ||= name
47
44
  rule.add_variant(pattern,&block)
48
45
  end
49
46
 
@@ -140,9 +137,7 @@ class Parser
140
137
  def match(*args) PatternElementHash.new.match(*args) end
141
138
  def match!(*args) PatternElementHash.new.dont.match(*args) end
142
139
 
143
- # if ignore_whitespace is used, after every TerminalNode, all whitespace is consumed. Wrapping include_whitespace around a pattern-element allows you to explicitly match the preceeding whitespace for that one element.
144
- # NOTE: you can ALWAYS explicitly match any trailing whitespace
145
- def include_whitespace(*args) PatternElementHash.new.include_whitespace.match(*args) end
140
+ def rewind_whitespace; PatternElementHash.new.rewind_whitespace end
146
141
 
147
142
  def dont; PatternElementHash.new.dont end
148
143
  def optionally; PatternElementHash.new.optionally end
@@ -21,7 +21,7 @@ end
21
21
  # :optional
22
22
  class PatternElement
23
23
  attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
24
- attr_accessor :match,:rule_variant,:include_whitespace
24
+ attr_accessor :match,:rule_variant,:rewind_whitespace
25
25
 
26
26
  #match can be:
27
27
  # true, Hash, Symbol, String, Regexp
@@ -42,21 +42,23 @@ class PatternElement
42
42
 
43
43
  # attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
44
44
  def parse(parent_node)
45
+ return RollbackWhitespaceNode.new(parent_node) if rewind_whitespace
46
+
45
47
  # run element parser
46
- match=parser.call(parent_node)
48
+ match = parser.call(parent_node)
47
49
 
48
50
  # Negative patterns (PEG: !element)
49
- match=match ? nil : EmptyNode.new(parent_node) if negative
51
+ match = match ? nil : EmptyNode.new(parent_node) if negative
50
52
 
51
53
  # Optional patterns (PEG: element?)
52
- match=EmptyNode.new(parent_node) if !match && optional
54
+ match = EmptyNode.new(parent_node) if !match && optional
53
55
 
54
56
  # Could-match patterns (PEG: &element)
55
- match.match_length=0 if match && could_match
57
+ match.match_length = 0 if match && could_match
56
58
 
57
59
  if !match && terminal
58
60
  # log failures on Terminal patterns for debug output if overall parse fails
59
- parent_node.parser.log_parsing_failure(match_start_index(parent_node),:pattern=>self.match,:node=>parent_node)
61
+ parent_node.parser.log_parsing_failure(parent_node.next,:pattern=>self.match,:node=>parent_node)
60
62
  end
61
63
 
62
64
  # return match
@@ -67,7 +69,7 @@ class PatternElement
67
69
 
68
70
  # initialize PatternElement based on the type of: match
69
71
  def init(match)
70
- self.match=match
72
+ self.match = match
71
73
  case match
72
74
  when TrueClass then init_true
73
75
  when String then init_string match
@@ -83,15 +85,6 @@ class PatternElement
83
85
  self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
84
86
  end
85
87
 
86
- def match_start_index(parent_node)
87
- if include_whitespace
88
- parent_node.trailing_whitespace_range.first
89
- else
90
- parent_node.next
91
- end
92
- end
93
-
94
-
95
88
  # initialize PatternElement as a parser that matches exactly the string specified
96
89
  def init_string(string)
97
90
  init_regex Regexp.escape(string)
@@ -101,7 +94,7 @@ class PatternElement
101
94
  def init_regex(regex)
102
95
  optimized_regex=/\A#{regex}/ # anchor the search
103
96
  self.parser=lambda do |parent_node|
104
- offset = match_start_index(parent_node)
97
+ offset = parent_node.next
105
98
  if parent_node.src[offset..-1].index(optimized_regex)==0
106
99
  range=$~.offset(0)
107
100
  range = (range.min+offset)..(range.max+offset)
@@ -114,16 +107,16 @@ class PatternElement
114
107
  # initialize PatternElement as a parser that matches a named sub-rule
115
108
  def init_rule(rule_name)
116
109
  rule_name.to_s[/^([^?!]*)([?!])?$/]
117
- rule_name=$1.to_sym
118
- option=$2
119
- match_rule=rule_variant.rule.parser.rules[rule_name]
110
+ rule_name = $1.to_sym
111
+ option = $2
112
+ match_rule = rule_variant.rule.parser.rules[rule_name]
120
113
  raise "no rule for #{rule_name}" unless match_rule
121
114
 
122
115
  self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
123
- self.name = rule_name
116
+ self.name = rule_name
124
117
  case option
125
- when "?" then self.optional=true
126
- when "!" then self.negative=true
118
+ when "?" then self.optional = true
119
+ when "!" then self.negative = true
127
120
  end
128
121
  end
129
122
 
@@ -135,6 +128,9 @@ class PatternElement
135
128
  init_many hash
136
129
  elsif hash[:match]
137
130
  init hash[:match]
131
+ elsif hash[:rewind_whitespace]
132
+ self.rewind_whitespace = true
133
+ return
138
134
  else
139
135
  raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
140
136
  end
@@ -143,7 +139,6 @@ class PatternElement
143
139
  self.optional ||= hash[:optional] || hash[:optionally]
144
140
  self.could_match ||= hash[:could]
145
141
  self.negative ||= hash[:dont]
146
- self.include_whitespace ||= hash[:include_whitespace]
147
142
  end
148
143
 
149
144
  # initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
@@ -165,12 +160,12 @@ class PatternElement
165
160
  #match primary
166
161
  match = single_parser.call many_node
167
162
  break unless match
168
- many_node << match
163
+ many_node.add_match match
169
164
 
170
165
  #match delimiter
171
166
  delimiter_match = delimiter_pattern_element.parse many_node
172
167
  break unless delimiter_match
173
- many_node << delimiter_match
168
+ many_node.add_match delimiter_match
174
169
  end
175
170
  many_node.separate_delimiter_matches
176
171
  else
@@ -178,7 +173,7 @@ class PatternElement
178
173
  while true
179
174
  match = single_parser.call many_node
180
175
  break unless match
181
- many_node << match
176
+ many_node.add_match match
182
177
  end
183
178
  end
184
179
 
data/lib/rule_variant.rb CHANGED
@@ -26,6 +26,7 @@ class RuleVariant
26
26
 
27
27
  # if parse failed
28
28
  return if !match
29
+ match.matched
29
30
 
30
31
  # parse succeeded, add to node and continue
31
32
  node.add_match(match,pe.name)
@@ -34,6 +35,6 @@ class RuleVariant
34
35
  end
35
36
 
36
37
  def inspect; pattern.collect {|a| a.inspect}.join(', '); end
37
- def to_s; "variant_class: #{variant_node_class}, pattern: #{inspect}"; end
38
+ def to_s; "variant_class: #{variant_node_class}, pattern: #{inspect}"; end
39
+ end
38
40
  end
39
- end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module BabelBridge
2
- VERSION = "0.4.0"
2
+ VERSION = "0.4.1"
3
3
  end
data/spec/bb_spec.rb CHANGED
@@ -13,9 +13,10 @@ describe BabelBridge do
13
13
  #options
14
14
  # :parser
15
15
  # :failure_ok
16
- def test_parse(string,options={})
16
+ def test_parse(string,options={},&block)
17
17
  parser = options[:parser] || @parser
18
18
  res = parser.parse(string)
19
+ yield res if res && block
19
20
  if options[:should_fail_at]
20
21
  res.should == nil
21
22
  parser.failure_index.should == options[:should_fail_at]
@@ -69,7 +70,7 @@ describe BabelBridge do
69
70
  ignore_whitespace
70
71
 
71
72
  rule :pair, :statement, :end_statement, :statement
72
- rule :end_statement, include_whitespace(/([\t ]*[\n;])+/)
73
+ rule :end_statement, rewind_whitespace, /([\t ]*[\n;])+/
73
74
  rule :statement, "0"
74
75
  end
75
76
 
@@ -89,7 +90,7 @@ describe BabelBridge do
89
90
  ignore_whitespace
90
91
 
91
92
  rule :pair, :statement, :end_statement, :statement
92
- rule :end_statement, include_whitespace(/([\t ]*[\n;])+/)
93
+ rule :end_statement, rewind_whitespace, /([\t ]*[\n;])+/
93
94
  rule :statement, "0", :one?, :one?, :one?
94
95
  rule :one, "1"
95
96
  end
@@ -106,7 +107,7 @@ describe BabelBridge do
106
107
  new_parser do
107
108
  ignore_whitespace
108
109
  rule :statements, many(:statement,:end_statement)
109
- rule :end_statement, include_whitespace(/([\t ]*[;\n])+/)
110
+ rule :end_statement, rewind_whitespace, /([\t ]*[;\n])+/
110
111
  rule :statement, "0"
111
112
  end
112
113
 
@@ -129,7 +130,7 @@ describe BabelBridge do
129
130
  test_parse "foo-bar", :should_fail_at => 3
130
131
  end
131
132
 
132
- it "should work to have many many parsing" do
133
+ it "should work to have many-many parsing" do
133
134
  new_parser do
134
135
  rule :top, many(:bottom,";")
135
136
  rule :bottom, many("0",",")
@@ -143,11 +144,11 @@ describe BabelBridge do
143
144
  test_parse "0,0,0;0;0,0,0"
144
145
  end
145
146
 
146
- it "should work to have many many parsing with whitespace tricks" do
147
+ it "should work to have many parsing with whitespace tricks" do
147
148
  new_parser do
148
149
  ignore_whitespace
149
150
  rule :statements, many(:statement,:end_statement)
150
- rule :end_statement, include_whitespace(/([\t ]*[;\n])+/)
151
+ rule :end_statement, rewind_whitespace, /([\t ]*[;\n])+/
151
152
  rule :statement, :bin_op
152
153
  binary_operators_rule :bin_op, :int, ["**", [:/, :*], [:+, "-"]], :right_operators => ["**"]
153
154
  rule :int, /\d+/
@@ -161,4 +162,50 @@ describe BabelBridge do
161
162
  ENDCODE
162
163
  end
163
164
 
165
+ it "should work to rewind_whitespace, :rule" do
166
+ new_parser do
167
+ ignore_whitespace
168
+ rule :all, :identifier, :parameter?, :identifier do
169
+ def to_model
170
+ [[identifier[0].to_sym, parameter && parameter.to_sym], identifier[1].to_sym]
171
+ end
172
+ end
173
+ rule :parameter, rewind_whitespace, /[ \t]*/, rewind_whitespace, :identifier
174
+ rule :identifier, /[_a-zA-Z][_a-zA-Z0-9]*/
175
+ end
176
+
177
+ test_parse("fred\nbar") {|parsed|parsed.to_model.should == [[:fred,nil],:bar]}
178
+ test_parse("fred foo\nbar") {|parsed|parsed.to_model.should == [[:fred,:foo],:bar]}
179
+ end
180
+
181
+ it "should work to rewind_whitespace, many" do
182
+ new_parser do
183
+ ignore_whitespace
184
+ rule :all, :identifier, :parameters?, :identifier do
185
+ def to_model
186
+ [[identifier[0].to_sym, parameters && parameters.to_s], identifier[1].to_sym]
187
+ end
188
+ end
189
+ rule :parameters, rewind_whitespace, /[ \t]*/, rewind_whitespace, many(:identifier,",")
190
+ rule :identifier, /[_a-zA-Z][_a-zA-Z0-9]*/
191
+ end
192
+
193
+ test_parse("fred\nbar") {|parsed| parsed.to_model.should==[[:fred,nil],:bar]}
194
+ test_parse("fred foo\nbar") {|parsed| parsed.to_model.should==[[:fred,"foo"],:bar]}
195
+ test_parse("fred foo, bar\nbar") {|parsed| parsed.to_model.should==[[:fred,"foo, bar"],:bar]}
196
+ end
197
+
198
+ it "dont.match shouldn't consume any whitespace" do
199
+ new_parser do
200
+ ignore_whitespace
201
+ rule :statements, :statement, "bar"
202
+ rule :statement, :identifier, :parameters?
203
+ rule :parameters, rewind_whitespace, / */, rewind_whitespace, :identifier
204
+ rule :identifier, dont.match("end"), /[_a-zA-Z][_a-zA-Z0-9]*/
205
+ end
206
+
207
+ test_parse("fred\nbar")
208
+ test_parse("fred foo\nbar")
209
+ end
210
+
164
211
  end
data/test/test_bb.rb CHANGED
@@ -58,7 +58,7 @@ class BBTests < TestHelper
58
58
 
59
59
  def test_foo
60
60
  parser=new_parser do
61
- rule :foo, ["foo"]
61
+ rule :foo, "foo"
62
62
  end
63
63
 
64
64
  assert p=parser.parse("foo")
@@ -88,8 +88,8 @@ class BBTests < TestHelper
88
88
 
89
89
  def test_optional
90
90
  parser=new_parser do
91
- rule :foo, ["foo", :bar?]
92
- rule :bar, ["bar"]
91
+ rule :foo, "foo", :bar?
92
+ rule :bar, "bar"
93
93
  end
94
94
 
95
95
  assert parser.parse("foo")
@@ -107,8 +107,8 @@ class BBTests < TestHelper
107
107
 
108
108
  def test_optional_middle
109
109
  parser=new_parser do
110
- rule :foo, ["foo", :bar?, "foo"]
111
- rule :bar, ["bar"]
110
+ rule :foo, "foo", :bar?, "foo"
111
+ rule :bar, "bar"
112
112
  end
113
113
 
114
114
  assert parser.parse("foofoo")
@@ -117,8 +117,8 @@ class BBTests < TestHelper
117
117
 
118
118
  def test_greedy_optional_middle
119
119
  parser=new_parser do
120
- rule :foo, ["foo", :bar?, "foo"]
121
- rule :bar, ["foo"]
120
+ rule :foo, "foo", :bar?, "foo"
121
+ rule :bar, "foo"
122
122
  end
123
123
 
124
124
  assert_nil parser.parse("foofoo")
@@ -127,8 +127,8 @@ class BBTests < TestHelper
127
127
 
128
128
  def test_not
129
129
  parser=new_parser do
130
- rule :foo, ["foo", :bar!]
131
- rule :bar, ["bar"]
130
+ rule :foo, "foo", :bar!
131
+ rule :bar, "bar"
132
132
  end
133
133
 
134
134
  assert_nil parser.parse("foofud") # this should fail because it doesn't match the entire input
@@ -139,7 +139,7 @@ class BBTests < TestHelper
139
139
 
140
140
  def test_recursive
141
141
  parser=new_parser do
142
- rule :foo, ["foo", :foo?]
142
+ rule :foo, "foo", :foo?
143
143
  end
144
144
 
145
145
  assert parser.parse("foo")
@@ -153,8 +153,8 @@ class BBTests < TestHelper
153
153
  v1=nil
154
154
  v2=nil
155
155
  parser=new_parser do
156
- v1=rule :foo, ["foo"]
157
- v2=rule :foo, ["bar"]
156
+ v1=rule :foo, "foo"
157
+ v2=rule :foo, "bar"
158
158
  end
159
159
 
160
160
  assert r1=parser.parse("foo")
@@ -165,8 +165,8 @@ class BBTests < TestHelper
165
165
 
166
166
  def test_add
167
167
  parser=new_parser do
168
- rule :add, [:number,"+",:number]
169
- rule :number, [/[0-9]+/]
168
+ rule :add, :number, "+", :number
169
+ rule :number, /[0-9]+/
170
170
  end
171
171
 
172
172
  assert parser.parse("1+1")
@@ -175,7 +175,7 @@ class BBTests < TestHelper
175
175
 
176
176
  def test_method
177
177
  parser=new_parser do
178
- rule :number, [/[0-9]+/] do
178
+ rule :number, /[0-9]+/ do
179
179
  def number
180
180
  text.to_i
181
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: babel_bridge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: