babel_bridge 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,14 +14,26 @@ class EmptyNode < Node
14
14
  "EmptyNode" unless options[:simple]
15
15
  end
16
16
 
17
- # EmptyNodes should always match at the beginning of the whitespace range
18
- def node_init(parent_or_parser)
19
- super
20
- self.offset = preceding_whitespace_range.first
21
- self.preceding_whitespace_range = match_range
17
+ def matches; [self]; end
18
+
19
+ end
20
+
21
+ class RollbackWhitespaceNode < Node
22
+ def inspect(options={})
23
+ "RollbackWhitespace" unless options[:simple]
22
24
  end
23
25
 
24
26
  def matches; [self]; end
25
27
 
28
+ def initialize(parent)
29
+ super
30
+ self.match_length = 0
31
+ self.offset = parent.postwhitespace_range.first
32
+ end
33
+
34
+ def postwhitespace_range
35
+ @postwhitespace_range ||= offset_after_match .. offset_after_match-1
36
+ end
37
+
26
38
  end
27
39
  end
@@ -15,7 +15,7 @@ class ManyNode < NonTerminalNode
15
15
 
16
16
  def separate_delimiter_matches
17
17
  count = 0
18
- @matches, @delimiter_matches = @matches.partition {|el| count+=1;(count%2==1)}
18
+ @matches, @delimiter_matches = matches.partition {|el| count+=1;(count%2==1)}
19
19
  @delimiter_matches = @delimiter_matches[0..@matches.length-2] #delimiter_matches should be exactly 1 shorter
20
20
  update_match_length
21
21
  end
data/lib/nodes/node.rb CHANGED
@@ -7,7 +7,12 @@ end
7
7
 
8
8
  # base class for all parse-tree nodes
9
9
  class Node
10
- attr_accessor :src,:offset,:match_length,:parent,:parser,:preceding_whitespace_range
10
+ attr_accessor :src,:offset,:match_length,:parent,:parser,:prewhitespace_range
11
+
12
+ # no_postwhitespace is used when parsing to temporarilly rollback the preceeding whitespace while
13
+ # attempting to match an ignore_whitespace pattern.
14
+ # It should always be false again once parsing completes or fails.
15
+ attr_accessor :no_postwhitespace
11
16
 
12
17
  def whitespace_regexp
13
18
  parser.whitespace_regexp
@@ -22,10 +27,27 @@ class Node
22
27
  offset..(offset+match_length-1)
23
28
  end
24
29
 
25
- def trailing_whitespace_range
30
+ def postwhitespace_range_without_no_postwhitespace
26
31
  parser.white_space_range offset_after_match
27
32
  end
28
33
 
34
+ def postwhitespace_range
35
+ r = postwhitespace_range_without_no_postwhitespace
36
+ no_postwhitespace ? r.first..r.first-1 : r
37
+ end
38
+
39
+ def postwhitespace
40
+ src[postwhitespace_range]
41
+ end
42
+
43
+ def prewhitespace
44
+ src[prewhitespace_range]
45
+ end
46
+
47
+ # called when a ruled is matched
48
+ def matched
49
+ end
50
+
29
51
  def to_s
30
52
  text
31
53
  end
@@ -45,7 +67,7 @@ class Node
45
67
  self.parent=parent_or_parser
46
68
  self.parser=parent.parser
47
69
  self.offset=parent.next
48
- self.preceding_whitespace_range=parent.trailing_whitespace_range
70
+ self.prewhitespace_range=parent.postwhitespace_range
49
71
  self.src=parent.src
50
72
  raise "parent node does not have parser set" unless parser
51
73
  else
@@ -73,7 +95,7 @@ class Node
73
95
  #********************
74
96
  # info methods
75
97
  #********************
76
- def next; trailing_whitespace_range.last+1 end # index of first character after match and any trailing whitespace
98
+ def next; postwhitespace_range.last+1 end # index of first character after match and any trailing whitespace
77
99
  def text; src[match_range] end # the substring in src matched
78
100
 
79
101
  # length returns the number of sub-nodes
@@ -8,18 +8,18 @@ module BabelBridge
8
8
  # rule node
9
9
  # subclassed automatically by parser.rule for each unique non-terminal
10
10
  class NonTerminalNode < Node
11
+ attr_accessor :last_non_empty_node
11
12
 
12
- def trailing_whitespace_range
13
- if matches.length == 0
14
- preceding_whitespace_range || (0..-1)
13
+ def postwhitespace_range_without_no_postwhitespace
14
+ if last_non_empty_node
15
+ last_non_empty_node.postwhitespace_range
15
16
  else
16
- matches[-1].trailing_whitespace_range
17
+ prewhitespace_range || (0..-1)
17
18
  end
18
19
  end
19
20
 
20
21
  def update_match_length
21
- m = matches[-1]
22
- @match_length = m ? m.offset_after_match - offset : 0
22
+ @match_length = last_non_empty_node ? last_non_empty_node.offset_after_match - offset : 0
23
23
  end
24
24
 
25
25
  #*****************************
@@ -34,7 +34,8 @@ class NonTerminalNode < Node
34
34
  matches.length
35
35
  end
36
36
 
37
- def <<(node)
37
+ def add_match(node)
38
+ @last_non_empty_node = node unless node.kind_of?(EmptyNode)
38
39
  matches<<node
39
40
  update_match_length
40
41
  end
@@ -93,7 +93,7 @@ class RuleNode < NonTerminalNode
93
93
  # adds a match with name (optional)
94
94
  def add_match(match,name=nil)
95
95
  reset_matches_by_name
96
- matches<<match
96
+ super match
97
97
  match_names<<name
98
98
 
99
99
  update_match_length
@@ -8,7 +8,7 @@ module BabelBridge
8
8
  # used for String and Regexp PatternElements
9
9
  # not subclassed
10
10
  class TerminalNode < Node
11
- attr_accessor :pattern, :trailing_whitespace_offset
11
+ attr_accessor :pattern, :postwhitespace_offset
12
12
  def initialize(parent,range,pattern)
13
13
  node_init(parent)
14
14
  self.offset = range.min
data/lib/parser.rb CHANGED
@@ -18,8 +18,6 @@ class Parser
18
18
  #
19
19
  # rules can be specified as:
20
20
  # rule :name, to_match1, to_match2, etc...
21
- #or
22
- # rule :name, [to_match1, to_match2, etc...]
23
21
  #
24
22
  # Can define rules INSIDE class:
25
23
  # class MyParser < BabelBridge::Parser
@@ -41,9 +39,8 @@ class Parser
41
39
  # This allows you to add whatever functionality you want to a your nodes in the final parse tree.
42
40
  # Also note you can override the post_match method. This allows you to restructure the parse tree as it is parsed.
43
41
  def rule(name,*pattern,&block)
44
- pattern=pattern[0] if pattern[0].kind_of?(Array)
45
- rule=self.rules[name]||=Rule.new(name,self)
46
- self.root_rule||=name
42
+ rule = self.rules[name] ||= Rule.new(name,self)
43
+ self.root_rule ||= name
47
44
  rule.add_variant(pattern,&block)
48
45
  end
49
46
 
@@ -140,9 +137,7 @@ class Parser
140
137
  def match(*args) PatternElementHash.new.match(*args) end
141
138
  def match!(*args) PatternElementHash.new.dont.match(*args) end
142
139
 
143
- # if ignore_whitespace is used, after every TerminalNode, all whitespace is consumed. Wrapping include_whitespace around a pattern-element allows you to explicitly match the preceeding whitespace for that one element.
144
- # NOTE: you can ALWAYS explicitly match any trailing whitespace
145
- def include_whitespace(*args) PatternElementHash.new.include_whitespace.match(*args) end
140
+ def rewind_whitespace; PatternElementHash.new.rewind_whitespace end
146
141
 
147
142
  def dont; PatternElementHash.new.dont end
148
143
  def optionally; PatternElementHash.new.optionally end
@@ -21,7 +21,7 @@ end
21
21
  # :optional
22
22
  class PatternElement
23
23
  attr_accessor :parser,:optional,:negative,:name,:terminal,:could_match
24
- attr_accessor :match,:rule_variant,:include_whitespace
24
+ attr_accessor :match,:rule_variant,:rewind_whitespace
25
25
 
26
26
  #match can be:
27
27
  # true, Hash, Symbol, String, Regexp
@@ -42,21 +42,23 @@ class PatternElement
42
42
 
43
43
  # attempt to match the pattern defined in self.parser in parent_node.src starting at offset parent_node.next
44
44
  def parse(parent_node)
45
+ return RollbackWhitespaceNode.new(parent_node) if rewind_whitespace
46
+
45
47
  # run element parser
46
- match=parser.call(parent_node)
48
+ match = parser.call(parent_node)
47
49
 
48
50
  # Negative patterns (PEG: !element)
49
- match=match ? nil : EmptyNode.new(parent_node) if negative
51
+ match = match ? nil : EmptyNode.new(parent_node) if negative
50
52
 
51
53
  # Optional patterns (PEG: element?)
52
- match=EmptyNode.new(parent_node) if !match && optional
54
+ match = EmptyNode.new(parent_node) if !match && optional
53
55
 
54
56
  # Could-match patterns (PEG: &element)
55
- match.match_length=0 if match && could_match
57
+ match.match_length = 0 if match && could_match
56
58
 
57
59
  if !match && terminal
58
60
  # log failures on Terminal patterns for debug output if overall parse fails
59
- parent_node.parser.log_parsing_failure(match_start_index(parent_node),:pattern=>self.match,:node=>parent_node)
61
+ parent_node.parser.log_parsing_failure(parent_node.next,:pattern=>self.match,:node=>parent_node)
60
62
  end
61
63
 
62
64
  # return match
@@ -67,7 +69,7 @@ class PatternElement
67
69
 
68
70
  # initialize PatternElement based on the type of: match
69
71
  def init(match)
70
- self.match=match
72
+ self.match = match
71
73
  case match
72
74
  when TrueClass then init_true
73
75
  when String then init_string match
@@ -83,15 +85,6 @@ class PatternElement
83
85
  self.parser=lambda {|parent_node| EmptyNode.new(parent_node)}
84
86
  end
85
87
 
86
- def match_start_index(parent_node)
87
- if include_whitespace
88
- parent_node.trailing_whitespace_range.first
89
- else
90
- parent_node.next
91
- end
92
- end
93
-
94
-
95
88
  # initialize PatternElement as a parser that matches exactly the string specified
96
89
  def init_string(string)
97
90
  init_regex Regexp.escape(string)
@@ -101,7 +94,7 @@ class PatternElement
101
94
  def init_regex(regex)
102
95
  optimized_regex=/\A#{regex}/ # anchor the search
103
96
  self.parser=lambda do |parent_node|
104
- offset = match_start_index(parent_node)
97
+ offset = parent_node.next
105
98
  if parent_node.src[offset..-1].index(optimized_regex)==0
106
99
  range=$~.offset(0)
107
100
  range = (range.min+offset)..(range.max+offset)
@@ -114,16 +107,16 @@ class PatternElement
114
107
  # initialize PatternElement as a parser that matches a named sub-rule
115
108
  def init_rule(rule_name)
116
109
  rule_name.to_s[/^([^?!]*)([?!])?$/]
117
- rule_name=$1.to_sym
118
- option=$2
119
- match_rule=rule_variant.rule.parser.rules[rule_name]
110
+ rule_name = $1.to_sym
111
+ option = $2
112
+ match_rule = rule_variant.rule.parser.rules[rule_name]
120
113
  raise "no rule for #{rule_name}" unless match_rule
121
114
 
122
115
  self.parser = lambda {|parent_node| match_rule.parse(parent_node)}
123
- self.name = rule_name
116
+ self.name = rule_name
124
117
  case option
125
- when "?" then self.optional=true
126
- when "!" then self.negative=true
118
+ when "?" then self.optional = true
119
+ when "!" then self.negative = true
127
120
  end
128
121
  end
129
122
 
@@ -135,6 +128,9 @@ class PatternElement
135
128
  init_many hash
136
129
  elsif hash[:match]
137
130
  init hash[:match]
131
+ elsif hash[:rewind_whitespace]
132
+ self.rewind_whitespace = true
133
+ return
138
134
  else
139
135
  raise "extended-options patterns (specified by a hash) must have either :parser=> or a :match=> set"
140
136
  end
@@ -143,7 +139,6 @@ class PatternElement
143
139
  self.optional ||= hash[:optional] || hash[:optionally]
144
140
  self.could_match ||= hash[:could]
145
141
  self.negative ||= hash[:dont]
146
- self.include_whitespace ||= hash[:include_whitespace]
147
142
  end
148
143
 
149
144
  # initialize the PatternElement as a many-parser from hashed parameters (hash[:many] is assumed to be set)
@@ -165,12 +160,12 @@ class PatternElement
165
160
  #match primary
166
161
  match = single_parser.call many_node
167
162
  break unless match
168
- many_node << match
163
+ many_node.add_match match
169
164
 
170
165
  #match delimiter
171
166
  delimiter_match = delimiter_pattern_element.parse many_node
172
167
  break unless delimiter_match
173
- many_node << delimiter_match
168
+ many_node.add_match delimiter_match
174
169
  end
175
170
  many_node.separate_delimiter_matches
176
171
  else
@@ -178,7 +173,7 @@ class PatternElement
178
173
  while true
179
174
  match = single_parser.call many_node
180
175
  break unless match
181
- many_node << match
176
+ many_node.add_match match
182
177
  end
183
178
  end
184
179
 
data/lib/rule_variant.rb CHANGED
@@ -26,6 +26,7 @@ class RuleVariant
26
26
 
27
27
  # if parse failed
28
28
  return if !match
29
+ match.matched
29
30
 
30
31
  # parse succeeded, add to node and continue
31
32
  node.add_match(match,pe.name)
@@ -34,6 +35,6 @@ class RuleVariant
34
35
  end
35
36
 
36
37
  def inspect; pattern.collect {|a| a.inspect}.join(', '); end
37
- def to_s; "variant_class: #{variant_node_class}, pattern: #{inspect}"; end
38
+ def to_s; "variant_class: #{variant_node_class}, pattern: #{inspect}"; end
39
+ end
38
40
  end
39
- end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module BabelBridge
2
- VERSION = "0.4.0"
2
+ VERSION = "0.4.1"
3
3
  end
data/spec/bb_spec.rb CHANGED
@@ -13,9 +13,10 @@ describe BabelBridge do
13
13
  #options
14
14
  # :parser
15
15
  # :failure_ok
16
- def test_parse(string,options={})
16
+ def test_parse(string,options={},&block)
17
17
  parser = options[:parser] || @parser
18
18
  res = parser.parse(string)
19
+ yield res if res && block
19
20
  if options[:should_fail_at]
20
21
  res.should == nil
21
22
  parser.failure_index.should == options[:should_fail_at]
@@ -69,7 +70,7 @@ describe BabelBridge do
69
70
  ignore_whitespace
70
71
 
71
72
  rule :pair, :statement, :end_statement, :statement
72
- rule :end_statement, include_whitespace(/([\t ]*[\n;])+/)
73
+ rule :end_statement, rewind_whitespace, /([\t ]*[\n;])+/
73
74
  rule :statement, "0"
74
75
  end
75
76
 
@@ -89,7 +90,7 @@ describe BabelBridge do
89
90
  ignore_whitespace
90
91
 
91
92
  rule :pair, :statement, :end_statement, :statement
92
- rule :end_statement, include_whitespace(/([\t ]*[\n;])+/)
93
+ rule :end_statement, rewind_whitespace, /([\t ]*[\n;])+/
93
94
  rule :statement, "0", :one?, :one?, :one?
94
95
  rule :one, "1"
95
96
  end
@@ -106,7 +107,7 @@ describe BabelBridge do
106
107
  new_parser do
107
108
  ignore_whitespace
108
109
  rule :statements, many(:statement,:end_statement)
109
- rule :end_statement, include_whitespace(/([\t ]*[;\n])+/)
110
+ rule :end_statement, rewind_whitespace, /([\t ]*[;\n])+/
110
111
  rule :statement, "0"
111
112
  end
112
113
 
@@ -129,7 +130,7 @@ describe BabelBridge do
129
130
  test_parse "foo-bar", :should_fail_at => 3
130
131
  end
131
132
 
132
- it "should work to have many many parsing" do
133
+ it "should work to have many-many parsing" do
133
134
  new_parser do
134
135
  rule :top, many(:bottom,";")
135
136
  rule :bottom, many("0",",")
@@ -143,11 +144,11 @@ describe BabelBridge do
143
144
  test_parse "0,0,0;0;0,0,0"
144
145
  end
145
146
 
146
- it "should work to have many many parsing with whitespace tricks" do
147
+ it "should work to have many parsing with whitespace tricks" do
147
148
  new_parser do
148
149
  ignore_whitespace
149
150
  rule :statements, many(:statement,:end_statement)
150
- rule :end_statement, include_whitespace(/([\t ]*[;\n])+/)
151
+ rule :end_statement, rewind_whitespace, /([\t ]*[;\n])+/
151
152
  rule :statement, :bin_op
152
153
  binary_operators_rule :bin_op, :int, ["**", [:/, :*], [:+, "-"]], :right_operators => ["**"]
153
154
  rule :int, /\d+/
@@ -161,4 +162,50 @@ describe BabelBridge do
161
162
  ENDCODE
162
163
  end
163
164
 
165
+ it "should work to rewind_whitespace, :rule" do
166
+ new_parser do
167
+ ignore_whitespace
168
+ rule :all, :identifier, :parameter?, :identifier do
169
+ def to_model
170
+ [[identifier[0].to_sym, parameter && parameter.to_sym], identifier[1].to_sym]
171
+ end
172
+ end
173
+ rule :parameter, rewind_whitespace, /[ \t]*/, rewind_whitespace, :identifier
174
+ rule :identifier, /[_a-zA-Z][_a-zA-Z0-9]*/
175
+ end
176
+
177
+ test_parse("fred\nbar") {|parsed|parsed.to_model.should == [[:fred,nil],:bar]}
178
+ test_parse("fred foo\nbar") {|parsed|parsed.to_model.should == [[:fred,:foo],:bar]}
179
+ end
180
+
181
+ it "should work to rewind_whitespace, many" do
182
+ new_parser do
183
+ ignore_whitespace
184
+ rule :all, :identifier, :parameters?, :identifier do
185
+ def to_model
186
+ [[identifier[0].to_sym, parameters && parameters.to_s], identifier[1].to_sym]
187
+ end
188
+ end
189
+ rule :parameters, rewind_whitespace, /[ \t]*/, rewind_whitespace, many(:identifier,",")
190
+ rule :identifier, /[_a-zA-Z][_a-zA-Z0-9]*/
191
+ end
192
+
193
+ test_parse("fred\nbar") {|parsed| parsed.to_model.should==[[:fred,nil],:bar]}
194
+ test_parse("fred foo\nbar") {|parsed| parsed.to_model.should==[[:fred,"foo"],:bar]}
195
+ test_parse("fred foo, bar\nbar") {|parsed| parsed.to_model.should==[[:fred,"foo, bar"],:bar]}
196
+ end
197
+
198
+ it "dont.match shouldn't consume any whitespace" do
199
+ new_parser do
200
+ ignore_whitespace
201
+ rule :statements, :statement, "bar"
202
+ rule :statement, :identifier, :parameters?
203
+ rule :parameters, rewind_whitespace, / */, rewind_whitespace, :identifier
204
+ rule :identifier, dont.match("end"), /[_a-zA-Z][_a-zA-Z0-9]*/
205
+ end
206
+
207
+ test_parse("fred\nbar")
208
+ test_parse("fred foo\nbar")
209
+ end
210
+
164
211
  end
data/test/test_bb.rb CHANGED
@@ -58,7 +58,7 @@ class BBTests < TestHelper
58
58
 
59
59
  def test_foo
60
60
  parser=new_parser do
61
- rule :foo, ["foo"]
61
+ rule :foo, "foo"
62
62
  end
63
63
 
64
64
  assert p=parser.parse("foo")
@@ -88,8 +88,8 @@ class BBTests < TestHelper
88
88
 
89
89
  def test_optional
90
90
  parser=new_parser do
91
- rule :foo, ["foo", :bar?]
92
- rule :bar, ["bar"]
91
+ rule :foo, "foo", :bar?
92
+ rule :bar, "bar"
93
93
  end
94
94
 
95
95
  assert parser.parse("foo")
@@ -107,8 +107,8 @@ class BBTests < TestHelper
107
107
 
108
108
  def test_optional_middle
109
109
  parser=new_parser do
110
- rule :foo, ["foo", :bar?, "foo"]
111
- rule :bar, ["bar"]
110
+ rule :foo, "foo", :bar?, "foo"
111
+ rule :bar, "bar"
112
112
  end
113
113
 
114
114
  assert parser.parse("foofoo")
@@ -117,8 +117,8 @@ class BBTests < TestHelper
117
117
 
118
118
  def test_greedy_optional_middle
119
119
  parser=new_parser do
120
- rule :foo, ["foo", :bar?, "foo"]
121
- rule :bar, ["foo"]
120
+ rule :foo, "foo", :bar?, "foo"
121
+ rule :bar, "foo"
122
122
  end
123
123
 
124
124
  assert_nil parser.parse("foofoo")
@@ -127,8 +127,8 @@ class BBTests < TestHelper
127
127
 
128
128
  def test_not
129
129
  parser=new_parser do
130
- rule :foo, ["foo", :bar!]
131
- rule :bar, ["bar"]
130
+ rule :foo, "foo", :bar!
131
+ rule :bar, "bar"
132
132
  end
133
133
 
134
134
  assert_nil parser.parse("foofud") # this should fail because it doesn't match the entire input
@@ -139,7 +139,7 @@ class BBTests < TestHelper
139
139
 
140
140
  def test_recursive
141
141
  parser=new_parser do
142
- rule :foo, ["foo", :foo?]
142
+ rule :foo, "foo", :foo?
143
143
  end
144
144
 
145
145
  assert parser.parse("foo")
@@ -153,8 +153,8 @@ class BBTests < TestHelper
153
153
  v1=nil
154
154
  v2=nil
155
155
  parser=new_parser do
156
- v1=rule :foo, ["foo"]
157
- v2=rule :foo, ["bar"]
156
+ v1=rule :foo, "foo"
157
+ v2=rule :foo, "bar"
158
158
  end
159
159
 
160
160
  assert r1=parser.parse("foo")
@@ -165,8 +165,8 @@ class BBTests < TestHelper
165
165
 
166
166
  def test_add
167
167
  parser=new_parser do
168
- rule :add, [:number,"+",:number]
169
- rule :number, [/[0-9]+/]
168
+ rule :add, :number, "+", :number
169
+ rule :number, /[0-9]+/
170
170
  end
171
171
 
172
172
  assert parser.parse("1+1")
@@ -175,7 +175,7 @@ class BBTests < TestHelper
175
175
 
176
176
  def test_method
177
177
  parser=new_parser do
178
- rule :number, [/[0-9]+/] do
178
+ rule :number, /[0-9]+/ do
179
179
  def number
180
180
  text.to_i
181
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: babel_bridge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: