treetop 1.4.5 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/README.md +44 -20
  2. data/lib/treetop/compiler/metagrammar.rb +126 -33
  3. data/lib/treetop/compiler/metagrammar.treetop +46 -42
  4. data/lib/treetop/compiler/node_classes/repetition.rb +39 -5
  5. data/lib/treetop/version.rb +1 -1
  6. data/spec/compiler/and_predicate_spec.rb +36 -0
  7. data/spec/compiler/anything_symbol_spec.rb +44 -0
  8. data/spec/compiler/character_class_spec.rb +276 -0
  9. data/spec/compiler/choice_spec.rb +80 -0
  10. data/spec/compiler/circular_compilation_spec.rb +30 -0
  11. data/spec/compiler/failure_propagation_functional_spec.rb +21 -0
  12. data/spec/compiler/grammar_compiler_spec.rb +91 -0
  13. data/spec/compiler/grammar_spec.rb +41 -0
  14. data/spec/compiler/multibyte_chars_spec.rb +38 -0
  15. data/spec/compiler/nonterminal_symbol_spec.rb +40 -0
  16. data/spec/compiler/not_predicate_spec.rb +38 -0
  17. data/spec/compiler/occurrence_range_spec.rb +191 -0
  18. data/spec/compiler/one_or_more_spec.rb +35 -0
  19. data/spec/compiler/optional_spec.rb +37 -0
  20. data/spec/compiler/parenthesized_expression_spec.rb +19 -0
  21. data/spec/compiler/parsing_rule_spec.rb +61 -0
  22. data/spec/compiler/repeated_subrule_spec.rb +29 -0
  23. data/spec/compiler/semantic_predicate_spec.rb +175 -0
  24. data/spec/compiler/sequence_spec.rb +115 -0
  25. data/spec/compiler/terminal_spec.rb +81 -0
  26. data/spec/compiler/terminal_symbol_spec.rb +37 -0
  27. data/spec/compiler/test_grammar.treetop +7 -0
  28. data/spec/compiler/test_grammar.tt +7 -0
  29. data/spec/compiler/test_grammar_do.treetop +7 -0
  30. data/spec/compiler/tt_compiler_spec.rb +215 -0
  31. data/spec/compiler/zero_or_more_spec.rb +56 -0
  32. data/spec/composition/a.treetop +11 -0
  33. data/spec/composition/b.treetop +11 -0
  34. data/spec/composition/c.treetop +10 -0
  35. data/spec/composition/d.treetop +10 -0
  36. data/spec/composition/f.treetop +17 -0
  37. data/spec/composition/grammar_composition_spec.rb +40 -0
  38. data/spec/composition/subfolder/e_includes_c.treetop +15 -0
  39. data/spec/ruby_extensions/string_spec.rb +32 -0
  40. data/spec/runtime/compiled_parser_spec.rb +101 -0
  41. data/spec/runtime/interval_skip_list/delete_spec.rb +147 -0
  42. data/spec/runtime/interval_skip_list/expire_range_spec.rb +349 -0
  43. data/spec/runtime/interval_skip_list/insert_and_delete_node.rb +385 -0
  44. data/spec/runtime/interval_skip_list/insert_spec.rb +660 -0
  45. data/spec/runtime/interval_skip_list/interval_skip_list_spec.graffle +6175 -0
  46. data/spec/runtime/interval_skip_list/interval_skip_list_spec.rb +58 -0
  47. data/spec/runtime/interval_skip_list/palindromic_fixture.rb +23 -0
  48. data/spec/runtime/interval_skip_list/palindromic_fixture_spec.rb +163 -0
  49. data/spec/runtime/interval_skip_list/spec_helper.rb +84 -0
  50. data/spec/runtime/syntax_node_spec.rb +77 -0
  51. data/spec/spec_helper.rb +110 -0
  52. data/treetop.gemspec +18 -0
  53. metadata +70 -9
@@ -16,28 +16,37 @@ module Treetop
16
16
  builder.else_ do
17
17
  builder.break
18
18
  end
19
+ if max && !max.empty?
20
+ builder.if_ "#{accumulator_var}.size == #{max.text_value}" do
21
+ builder.break
22
+ end
23
+ end
19
24
  end
20
25
  end
21
-
26
+
22
27
  def inline_module_name
23
28
  parent_expression.inline_module_name
24
29
  end
25
-
30
+
26
31
  def assign_and_extend_result
27
32
  assign_result "instantiate_node(#{node_class_name},input, #{start_index_var}...index, #{accumulator_var})"
28
33
  extend_result_with_inline_module
29
34
  end
30
35
  end
31
36
 
32
-
37
+
33
38
  class ZeroOrMore < Repetition
34
39
  def compile(address, builder, parent_expression)
35
40
  super
36
41
  assign_and_extend_result
37
42
  end_comment(parent_expression)
38
43
  end
44
+
45
+ def max
46
+ nil
47
+ end
39
48
  end
40
-
49
+
41
50
  class OneOrMore < Repetition
42
51
  def compile(address, builder, parent_expression)
43
52
  super
@@ -50,6 +59,31 @@ module Treetop
50
59
  end
51
60
  end_comment(parent_expression)
52
61
  end
62
+
63
+ def max
64
+ nil
65
+ end
66
+ end
67
+
68
+ class OccurrenceRange < Repetition
69
+ def compile(address, builder, parent_expression)
70
+ super
71
+
72
+ if min.empty? || min.text_value.to_i == 0
73
+ assign_and_extend_result
74
+ else
75
+ # We got some, but fewer than we wanted. There'll be a failure reported already
76
+ builder.if__ "#{accumulator_var}.size < #{min.text_value}" do
77
+ reset_index
78
+ assign_failure start_index_var
79
+ end
80
+ builder.else_ do
81
+ assign_and_extend_result
82
+ end
83
+ end
84
+ end_comment(parent_expression)
85
+ end
53
86
  end
87
+
54
88
  end
55
- end
89
+ end
@@ -2,7 +2,7 @@ module Treetop #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
4
  MINOR = 4
5
- TINY = 5
5
+ TINY = 7
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ module AndPredicateSpec
4
+ describe "An &-predicated terminal symbol" do
5
+ testing_expression '&"foo"'
6
+
7
+ it "successfully parses input matching the terminal symbol, returning an epsilon syntax node" do
8
+ parse('foo', :consume_all_input => false) do |result|
9
+ result.should_not be_nil
10
+ result.interval.should == (0...0)
11
+ end
12
+ end
13
+ end
14
+
15
+ describe "A sequence of a terminal and an and another &-predicated terminal" do
16
+ testing_expression '"foo" &"bar"'
17
+
18
+ it "matches input matching both terminals, but only consumes the first" do
19
+ parse('foobar', :consume_all_input => false) do |result|
20
+ result.should_not be_nil
21
+ result.text_value.should == 'foo'
22
+ end
23
+ end
24
+
25
+ it "fails to parse input matching only the first terminal, with a terminal failure recorded at index 3" do
26
+ parse('foo') do |result|
27
+ result.should be_nil
28
+ terminal_failures = parser.terminal_failures
29
+ terminal_failures.size.should == 1
30
+ failure = terminal_failures[0]
31
+ failure.index.should == 3
32
+ failure.expected_string.should == 'bar'
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,44 @@
1
+ require 'spec_helper'
2
+
3
+ module AnythingSymbolSpec
4
+ class Foo < Treetop::Runtime::SyntaxNode
5
+ end
6
+
7
+ describe "an anything symbol followed by a node class declaration and a block" do
8
+ testing_expression '. <AnythingSymbolSpec::Foo> { def a_method; end }'
9
+
10
+ it "matches any single character in a big range, returning an instance of the declared node class that responds to methods defined in the inline module" do
11
+ (33..127).each do |digit|
12
+ parse(digit.chr) do |result|
13
+ result.should_not be_nil
14
+ result.should be_an_instance_of(Foo)
15
+ result.should respond_to(:a_method)
16
+ result.interval.should == (0...1)
17
+ end
18
+ end
19
+ end
20
+
21
+ it "fails to parse epsilon" do
22
+ parse('').should be_nil
23
+ end
24
+ end
25
+
26
+ module ModFoo
27
+ end
28
+
29
+ describe "an anything symbol followed by a module declaration and a block" do
30
+ testing_expression '. <AnythingSymbolSpec::ModFoo> { def a_method; end }'
31
+
32
+ it "matches any single character in a big range, returning an instance of SyntaxNode extended by the declared module that responds to methods defined in the inline module" do
33
+ (33..127).each do |digit|
34
+ parse(digit.chr) do |result|
35
+ result.should_not be_nil
36
+ result.should be_an_instance_of(Treetop::Runtime::SyntaxNode)
37
+ result.should be_a_kind_of(ModFoo)
38
+ result.should respond_to(:a_method)
39
+ result.interval.should == (0...1)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,276 @@
1
+ require 'spec_helper'
2
+
3
+ module CharacterClassSpec
4
+ class Foo < Treetop::Runtime::SyntaxNode
5
+ end
6
+
7
+ describe "a character class followed by a node class declaration and a block" do
8
+
9
+ testing_expression "[A-Z] <CharacterClassSpec::Foo> { def a_method; end }"
10
+
11
+ it "matches single characters within that range, returning instances of the declared node class that respond to the method defined in the inline module" do
12
+ result = parse('A')
13
+ result.should be_an_instance_of(Foo)
14
+ result.should respond_to(:a_method)
15
+ result = parse('N')
16
+ result.should be_an_instance_of(Foo)
17
+ result.should respond_to(:a_method)
18
+ result = parse('Z')
19
+ result.should be_an_instance_of(Foo)
20
+ result.should respond_to(:a_method)
21
+ end
22
+
23
+ it "does not match single characters outside of that range" do
24
+ parse('8').should be_nil
25
+ parse('a').should be_nil
26
+ end
27
+
28
+ it "matches a single character within that range at index 1" do
29
+ parse(' A', :index => 1).should_not be_nil
30
+ end
31
+
32
+ it "fails to match a single character out of that range at index 1" do
33
+ parse(' 1', :index => 1).should be_nil
34
+ end
35
+ end
36
+
37
+ module ModFoo
38
+ end
39
+
40
+ describe "a character class followed by a node module declaration and a block" do
41
+
42
+ testing_expression "[A-Z] <CharacterClassSpec::ModFoo> { def a_method; end }"
43
+
44
+ it "matches single characters within that range, returning instances of SyntaxNode extended by the specified module" do
45
+ result = parse('A')
46
+ result.should be_an_instance_of(Treetop::Runtime::SyntaxNode)
47
+ result.should be_a_kind_of(ModFoo)
48
+ result.should respond_to(:a_method)
49
+ result = parse('N')
50
+ result.should be_an_instance_of(Treetop::Runtime::SyntaxNode)
51
+ result.should be_a_kind_of(ModFoo)
52
+ result.should respond_to(:a_method)
53
+ result = parse('Z')
54
+ result.should be_an_instance_of(Treetop::Runtime::SyntaxNode)
55
+ result.should be_a_kind_of(ModFoo)
56
+ result.should respond_to(:a_method)
57
+ end
58
+
59
+ it "does not match single characters outside of that range" do
60
+ parse('8').should be_nil
61
+ parse('a').should be_nil
62
+ end
63
+
64
+ it "matches a single character within that range at index 1" do
65
+ parse(' A', :index => 1).should_not be_nil
66
+ end
67
+
68
+ it "fails to match a single character out of that range at index 1" do
69
+ parse(' 1', :index => 1).should be_nil
70
+ end
71
+ end
72
+
73
+ describe "a character class followed by a node class declaration and a block" do
74
+
75
+ testing_expression "[A-Z] <CharacterClassSpec::Foo>"
76
+
77
+ it "actively generates nodes for the character when it is the primary node" do
78
+ result = parse('A')
79
+ result.should be_a(Treetop::Runtime::SyntaxNode)
80
+ result.elements.should be_nil
81
+ end
82
+
83
+ end
84
+
85
+ describe "A character class containing quotes" do
86
+ testing_expression "[\"']"
87
+
88
+ it "matches a quote" do
89
+ parse("'").should_not be_nil
90
+ end
91
+
92
+ it "matches a double-quote" do
93
+ parse('"').should_not be_nil
94
+ end
95
+ end
96
+
97
+ describe "A character class containing a special character" do
98
+ testing_expression "[\t]"
99
+ it "matches that character only" do
100
+ parse("\t").should_not be_nil
101
+ parse('t').should be_nil
102
+ end
103
+ end
104
+
105
+ describe "A character class containing an escaped backslash" do
106
+ slash = "\\" # Make it explicit that there are *two* backslashes here
107
+ testing_expression "[#{slash}#{slash}]"
108
+ it "matches a backslash only" do
109
+ parse("\\").should_not be_nil
110
+ parse('t').should be_nil
111
+ end
112
+ end
113
+
114
+ describe "A character class containing a hex escape" do
115
+ slash = "\\"
116
+ testing_expression "[#{slash}x41]"
117
+ it "matches that character only" do
118
+ parse('A').should_not be_nil
119
+ parse('\\').should be_nil
120
+ parse('x').should be_nil
121
+ parse('4').should be_nil
122
+ parse('1').should be_nil
123
+ end
124
+ end
125
+
126
+ describe "A character class containing an octal escape" do
127
+ slash = "\\"
128
+ testing_expression "[#{slash}101]"
129
+ it "matches that character only" do
130
+ parse('A').should_not be_nil
131
+ parse('\\').should be_nil
132
+ parse('1').should be_nil
133
+ parse('0').should be_nil
134
+ end
135
+ end
136
+
137
+ describe "A character class containing a \\c control-char escape" do
138
+ slash = "\\"
139
+ testing_expression "[#{slash}cC]"
140
+ it "matches that character only" do
141
+ parse("\003").should_not be_nil
142
+ parse('\\').should be_nil
143
+ parse('c').should be_nil
144
+ parse('C').should be_nil
145
+ end
146
+ end
147
+
148
+ describe "A character class containing a \\C- control-char escape" do
149
+ slash = "\\"
150
+ testing_expression "[#{slash}C-C]"
151
+ it "matches that character only" do
152
+ parse("\003").should_not be_nil
153
+ parse('\\').should be_nil
154
+ parse('C').should be_nil
155
+ parse('-').should be_nil
156
+ end
157
+ end
158
+
159
+ if RUBY_VERSION =~ /\A1\.8\./
160
+ describe "A character class containing a \\M- meta-char escape" do
161
+ slash = "\\"
162
+ testing_expression "[#{slash}M- ]"
163
+ it "matches that character only" do
164
+ parse("\240").should_not be_nil
165
+ parse('\\').should be_nil
166
+ parse('M').should be_nil
167
+ parse('-').should be_nil
168
+ parse(' ').should be_nil
169
+ end
170
+ end
171
+ end
172
+
173
+ describe "A character class containing an escaped non-special character" do
174
+ slash = "\\"
175
+ testing_expression "[#{slash}y]"
176
+ it "matches that character only" do
177
+ parse("y").should_not be_nil
178
+ parse('\\').should be_nil
179
+ end
180
+ end
181
+
182
+ describe "A character class containing an \#{...} insertion" do
183
+ testing_expression "[\#{raise 'error'}]"
184
+ it "doesn't evaluate the insertion" do
185
+ x = true
186
+ lambda{
187
+ x = parse("y")
188
+ }.should_not raise_error
189
+ x.should be_nil
190
+ parse('#').should_not be_nil
191
+ parse("'").should_not be_nil
192
+ parse("0").should be_nil
193
+ end
194
+ end
195
+
196
+ describe "a character class" do
197
+ testing_expression "[A-Z]"
198
+ it "actively generates a node for the character because it is the primary node" do
199
+ result = parse('A')
200
+ result.should be_a(Treetop::Runtime::SyntaxNode)
201
+ result.elements.should be_nil
202
+ end
203
+ end
204
+
205
+ describe "a character class mixed with other expressions" do
206
+ testing_expression '[A-Z] "a"'
207
+ it "lazily instantiates a node for the character" do
208
+ result = parse('Aa')
209
+ result.instance_variable_get("@elements").should include(true)
210
+ result.elements.should_not include(true)
211
+ result.elements.size.should == 2
212
+ end
213
+ end
214
+
215
+ describe "a character class with a node class declaration mixed with other expressions" do
216
+ testing_expression '([A-Z] <CharacterClassSpec::Foo>) "a"'
217
+ it "actively generates a node for the character because it has a node class declared" do
218
+ result = parse('Aa')
219
+ result.instance_variable_get("@elements").should_not include(true)
220
+ result.elements.should_not include(true)
221
+ result.elements.size.should == 2
222
+ end
223
+ end
224
+
225
+ describe "a character class with a node module declaration mixed with other expressions" do
226
+ testing_expression '([A-Z] <CharacterClassSpec::ModFoo>) "a"'
227
+ it "actively generates a node for the character because it has a node module declared" do
228
+ result = parse('Aa')
229
+ result.instance_variable_get("@elements").should_not include(true)
230
+ result.elements.should_not include(true)
231
+ result.elements.size.should == 2
232
+ end
233
+ end
234
+
235
+ describe "a character class with an inline block mixed with other expressions" do
236
+ testing_expression '([A-Z] { def a_method; end }) "a"'
237
+ it "actively generates a node for the character because it has an inline block" do
238
+ result = parse('Aa')
239
+ result.instance_variable_get("@elements").should_not include(true)
240
+ result.elements.should_not include(true)
241
+ result.elements.size.should == 2
242
+ end
243
+ end
244
+
245
+ describe "a character class with a label mixed with other expressions" do
246
+ testing_expression 'upper:([A-Z]) "b"'
247
+ it "returns the correct element for the labeled expression" do
248
+ result = parse('Ab')
249
+ result.upper.text_value.should == "A"
250
+ result.elements.size.should == 2
251
+ end
252
+ end
253
+
254
+ describe "a character class repetition mixed with other expressions" do
255
+ testing_expression '[A-Z]+ "a"'
256
+ it "lazily instantiates a node for the character" do
257
+ result = parse('ABCa')
258
+ result.elements[0].instance_variable_get("@elements").should include(true)
259
+ result.elements[0].elements.should_not include(true)
260
+ result.elements[0].elements.size.should == 3
261
+ result.elements.size.should == 2
262
+ result.elements.inspect.should == %Q{[SyntaxNode offset=0, "ABC":\n SyntaxNode offset=0, "A"\n SyntaxNode offset=1, "B"\n SyntaxNode offset=2, "C", SyntaxNode offset=3, "a"]}
263
+ end
264
+ end
265
+
266
+ describe "a character class that gets cached because of a choice" do
267
+ testing_expression "[A-Z] 'a' / [A-Z]"
268
+
269
+ it "generates a node for the lazily-instantiated character when it is the primary node" do
270
+ result = parse('A')
271
+ result.should be_a(Treetop::Runtime::SyntaxNode)
272
+ result.elements.should be_nil
273
+ end
274
+ end
275
+
276
+ end
@@ -0,0 +1,80 @@
1
+ require 'spec_helper'
2
+
3
+ module ChoiceSpec
4
+ describe "A choice between terminal symbols" do
5
+ testing_expression '"foo" { def foo_method; end } / "bar" { def bar_method; end } / "baz" { def baz_method; end }'
6
+
7
+ it "successfully parses input matching any of the alternatives, returning a node that responds to methods defined in its respective inline module" do
8
+ result = parse('foo')
9
+ result.should_not be_nil
10
+ result.should respond_to(:foo_method)
11
+
12
+ result = parse('bar')
13
+ result.should_not be_nil
14
+ result.should respond_to(:bar_method)
15
+
16
+ result = parse('baz')
17
+ result.should_not be_nil
18
+ result.should respond_to(:baz_method)
19
+ end
20
+
21
+ it "upon parsing a string matching the second alternative, records the failure of the first terminal" do
22
+ result = parse('bar')
23
+ terminal_failures = parser.terminal_failures
24
+ terminal_failures.size.should == 1
25
+ failure = terminal_failures[0]
26
+ failure.expected_string.should == 'foo'
27
+ failure.index.should == 0
28
+ end
29
+
30
+ it "upon parsing a string matching the third alternative, records the failure of the first two terminals" do
31
+ result = parse('baz')
32
+
33
+ terminal_failures = parser.terminal_failures
34
+
35
+ terminal_failures.size.should == 2
36
+
37
+ failure_1 = terminal_failures[0]
38
+ failure_1.expected_string == 'foo'
39
+ failure_1.index.should == 0
40
+
41
+ failure_2 = terminal_failures[1]
42
+ failure_2.expected_string == 'bar'
43
+ failure_2.index.should == 0
44
+ end
45
+ end
46
+
47
+ describe "A choice between sequences" do
48
+ testing_expression "'foo' 'bar' 'baz'\n/\n'bing' 'bang' 'boom'"
49
+
50
+ it "successfully parses input matching any of the alternatives" do
51
+ parse('foobarbaz').should_not be_nil
52
+ parse('bingbangboom').should_not be_nil
53
+ end
54
+ end
55
+
56
+ describe "A choice between terminals followed by a block" do
57
+ testing_expression "('a'/ 'b' / 'c') { def a_method; end }"
58
+
59
+ it "extends a match of any of its subexpressions with a module created from the block" do
60
+ ['a', 'b', 'c'].each do |letter|
61
+ parse(letter).should respond_to(:a_method)
62
+ end
63
+ end
64
+ end
65
+
66
+ module TestModule
67
+ def a_method
68
+ end
69
+ end
70
+
71
+ describe "a choice followed by a declared module" do
72
+ testing_expression "('a'/ 'b' / 'c') <ChoiceSpec::TestModule>"
73
+
74
+ it "extends a match of any of its subexpressions with a module created from the block" do
75
+ ['a', 'b', 'c'].each do |letter|
76
+ parse(letter).should respond_to(:a_method)
77
+ end
78
+ end
79
+ end
80
+ end