walrus 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. data/bin/walrus +44 -0
  2. data/ext/jindex/extconf.rb +11 -0
  3. data/ext/jindex/jindex.c +79 -0
  4. data/ext/mkdtemp/extconf.rb +11 -0
  5. data/ext/mkdtemp/mkdtemp.c +41 -0
  6. data/lib/walrus/additions/module.rb +36 -0
  7. data/lib/walrus/additions/string.rb +37 -0
  8. data/lib/walrus/additions/test/unit/error_collector.rb +62 -0
  9. data/lib/walrus/compile_error.rb +28 -0
  10. data/lib/walrus/compiler.rb +124 -0
  11. data/lib/walrus/contrib/spec/walruscloth_spec.rb +32 -0
  12. data/lib/walrus/contrib/walruscloth.rb +82 -0
  13. data/lib/walrus/diff.rb +89 -0
  14. data/lib/walrus/document.rb +98 -0
  15. data/lib/walrus/grammar/additions/proc.rb +20 -0
  16. data/lib/walrus/grammar/additions/regexp.rb +21 -0
  17. data/lib/walrus/grammar/additions/string.rb +52 -0
  18. data/lib/walrus/grammar/additions/symbol.rb +42 -0
  19. data/lib/walrus/grammar/and_predicate.rb +40 -0
  20. data/lib/walrus/grammar/array_result.rb +19 -0
  21. data/lib/walrus/grammar/continuation_wrapper_exception.rb +28 -0
  22. data/lib/walrus/grammar/left_recursion_exception.rb +27 -0
  23. data/lib/walrus/grammar/location_tracking.rb +105 -0
  24. data/lib/walrus/grammar/match_data_wrapper.rb +65 -0
  25. data/lib/walrus/grammar/memoizing.rb +41 -0
  26. data/lib/walrus/grammar/memoizing_cache.rb +94 -0
  27. data/lib/walrus/grammar/node.rb +60 -0
  28. data/lib/walrus/grammar/not_predicate.rb +40 -0
  29. data/lib/walrus/grammar/parse_error.rb +39 -0
  30. data/lib/walrus/grammar/parser_state.rb +181 -0
  31. data/lib/walrus/grammar/parslet.rb +28 -0
  32. data/lib/walrus/grammar/parslet_choice.rb +120 -0
  33. data/lib/walrus/grammar/parslet_combination.rb +26 -0
  34. data/lib/walrus/grammar/parslet_combining.rb +154 -0
  35. data/lib/walrus/grammar/parslet_merge.rb +88 -0
  36. data/lib/walrus/grammar/parslet_omission.rb +57 -0
  37. data/lib/walrus/grammar/parslet_repetition.rb +97 -0
  38. data/lib/walrus/grammar/parslet_repetition_default.rb +58 -0
  39. data/lib/walrus/grammar/parslet_sequence.rb +202 -0
  40. data/lib/walrus/grammar/predicate.rb +57 -0
  41. data/lib/walrus/grammar/proc_parslet.rb +52 -0
  42. data/lib/walrus/grammar/regexp_parslet.rb +73 -0
  43. data/lib/walrus/grammar/skipped_substring_exception.rb +36 -0
  44. data/lib/walrus/grammar/string_enumerator.rb +45 -0
  45. data/lib/walrus/grammar/string_parslet.rb +75 -0
  46. data/lib/walrus/grammar/string_result.rb +24 -0
  47. data/lib/walrus/grammar/symbol_parslet.rb +63 -0
  48. data/lib/walrus/grammar.rb +170 -0
  49. data/lib/walrus/no_parameter_marker.rb +19 -0
  50. data/lib/walrus/parser.rb +420 -0
  51. data/lib/walrus/runner.rb +356 -0
  52. data/lib/walrus/template.rb +75 -0
  53. data/lib/walrus/walrus_grammar/assignment_expression.rb +24 -0
  54. data/lib/walrus/walrus_grammar/block_directive.rb +28 -0
  55. data/lib/walrus/walrus_grammar/comment.rb +24 -0
  56. data/lib/walrus/walrus_grammar/def_directive.rb +64 -0
  57. data/lib/walrus/walrus_grammar/echo_directive.rb +44 -0
  58. data/lib/walrus/walrus_grammar/escape_sequence.rb +24 -0
  59. data/lib/walrus/walrus_grammar/import_directive.rb +44 -0
  60. data/lib/walrus/walrus_grammar/include_directive.rb +27 -0
  61. data/lib/walrus/walrus_grammar/instance_variable.rb +24 -0
  62. data/lib/walrus/walrus_grammar/literal.rb +24 -0
  63. data/lib/walrus/walrus_grammar/message_expression.rb +25 -0
  64. data/lib/walrus/walrus_grammar/multiline_comment.rb +54 -0
  65. data/lib/walrus/walrus_grammar/placeholder.rb +40 -0
  66. data/lib/walrus/walrus_grammar/raw_directive.rb +42 -0
  67. data/lib/walrus/walrus_grammar/raw_text.rb +45 -0
  68. data/lib/walrus/walrus_grammar/ruby_directive.rb +29 -0
  69. data/lib/walrus/walrus_grammar/ruby_expression.rb +31 -0
  70. data/lib/walrus/walrus_grammar/set_directive.rb +24 -0
  71. data/lib/walrus/walrus_grammar/silent_directive.rb +44 -0
  72. data/lib/walrus/walrus_grammar/slurp_directive.rb +25 -0
  73. data/lib/walrus/walrus_grammar/super_directive.rb +27 -0
  74. data/lib/walrus.rb +64 -0
  75. data/spec/acceptance/acceptance_spec.rb +97 -0
  76. data/spec/acceptance/block/basic_block.expected +1 -0
  77. data/spec/acceptance/block/basic_block.tmpl +3 -0
  78. data/spec/acceptance/block/nested_blocks.expected +5 -0
  79. data/spec/acceptance/block/nested_blocks.tmpl +11 -0
  80. data/spec/acceptance/comments/comments_and_text.expected +3 -0
  81. data/spec/acceptance/comments/comments_and_text.tmpl +6 -0
  82. data/spec/acceptance/comments/single_comment.expected +0 -0
  83. data/spec/acceptance/comments/single_comment.tmpl +1 -0
  84. data/spec/acceptance/def/alternative_def_calling_conventions.expected +3 -0
  85. data/spec/acceptance/def/alternative_def_calling_conventions.tmpl +18 -0
  86. data/spec/acceptance/def/basic_def_block_no_output.expected +0 -0
  87. data/spec/acceptance/def/basic_def_block_no_output.tmpl +17 -0
  88. data/spec/acceptance/def/defs_can_be_called_multiple_times.expected +3 -0
  89. data/spec/acceptance/def/defs_can_be_called_multiple_times.tmpl +6 -0
  90. data/spec/acceptance/def/defs_can_be_dynamic.expected +4 -0
  91. data/spec/acceptance/def/defs_can_be_dynamic.tmpl +12 -0
  92. data/spec/acceptance/echo/echo_directive_with_numeric_literal.expected +1 -0
  93. data/spec/acceptance/echo/echo_directive_with_numeric_literal.tmpl +1 -0
  94. data/spec/acceptance/echo/echo_expression_list.expected +1 -0
  95. data/spec/acceptance/echo/echo_expression_list.tmpl +1 -0
  96. data/spec/acceptance/echo/echo_short_notation.expected +1 -0
  97. data/spec/acceptance/echo/echo_short_notation.tmpl +1 -0
  98. data/spec/acceptance/echo/echo_simple_expression.expected +1 -0
  99. data/spec/acceptance/echo/echo_simple_expression.tmpl +1 -0
  100. data/spec/acceptance/echo/echo_single_quoted_string_literal.expected +1 -0
  101. data/spec/acceptance/echo/echo_single_quoted_string_literal.tmpl +1 -0
  102. data/spec/acceptance/echo/multiple_echo_statements.expected +1 -0
  103. data/spec/acceptance/echo/multiple_echo_statements.tmpl +2 -0
  104. data/spec/acceptance/includes/basic_included_file.txt +1 -0
  105. data/spec/acceptance/includes/basic_includer.complex +3 -0
  106. data/spec/acceptance/includes/basic_includer.expected +3 -0
  107. data/spec/acceptance/includes/basic_includer.rb +38 -0
  108. data/spec/acceptance/includes/complicated_included_file.txt +3 -0
  109. data/spec/acceptance/includes/complicated_includer.complex +3 -0
  110. data/spec/acceptance/includes/complicated_includer.expected +3 -0
  111. data/spec/acceptance/includes/complicated_includer.rb +41 -0
  112. data/spec/acceptance/includes/nested_include_1.txt +3 -0
  113. data/spec/acceptance/includes/nested_include_2.txt +1 -0
  114. data/spec/acceptance/includes/nested_includer.complex +3 -0
  115. data/spec/acceptance/includes/nested_includer.expected +4 -0
  116. data/spec/acceptance/includes/nested_includer.rb +41 -0
  117. data/spec/acceptance/inheritance/basic_child.complex +10 -0
  118. data/spec/acceptance/inheritance/basic_child.expected +9 -0
  119. data/spec/acceptance/inheritance/basic_child.rb +54 -0
  120. data/spec/acceptance/inheritance/basic_parent.complex +5 -0
  121. data/spec/acceptance/inheritance/basic_parent.expected +3 -0
  122. data/spec/acceptance/inheritance/basic_parent.rb +41 -0
  123. data/spec/acceptance/inheritance/importing_child.complex +8 -0
  124. data/spec/acceptance/inheritance/importing_child.expected +7 -0
  125. data/spec/acceptance/inheritance/importing_child.rb +46 -0
  126. data/spec/acceptance/inheritance/subdirectory/importing_child_in_subdirectory.complex +8 -0
  127. data/spec/acceptance/inheritance/subdirectory/importing_child_in_subdirectory.expected +7 -0
  128. data/spec/acceptance/inheritance/subdirectory/importing_child_in_subdirectory.rb +44 -0
  129. data/spec/acceptance/multiline_comments/multiline_comment_with_directives_inside.expected +0 -0
  130. data/spec/acceptance/multiline_comments/multiline_comment_with_directives_inside.tmpl +15 -0
  131. data/spec/acceptance/multiline_comments/simple_multiline_comment.expected +2 -0
  132. data/spec/acceptance/multiline_comments/simple_multiline_comment.tmpl +4 -0
  133. data/spec/acceptance/raw/complicated_raw_example.expected +57 -0
  134. data/spec/acceptance/raw/complicated_raw_example.tmpl +79 -0
  135. data/spec/acceptance/raw-text/UTF_8.expected +12 -0
  136. data/spec/acceptance/raw-text/UTF_8.tmpl +12 -0
  137. data/spec/acceptance/raw-text/empty_file.expected +0 -0
  138. data/spec/acceptance/raw-text/empty_file.tmpl +0 -0
  139. data/spec/acceptance/raw-text/multi_line.expected +4 -0
  140. data/spec/acceptance/raw-text/multi_line.tmpl +4 -0
  141. data/spec/acceptance/raw-text/single_line.expected +1 -0
  142. data/spec/acceptance/raw-text/single_line.tmpl +1 -0
  143. data/spec/acceptance/raw-text/single_line_whitespace.expected +1 -0
  144. data/spec/acceptance/raw-text/single_line_whitespace.tmpl +1 -0
  145. data/spec/acceptance/ruby/ruby_directive_is_just_like_silent.expected +1 -0
  146. data/spec/acceptance/ruby/ruby_directive_is_just_like_silent.tmpl +4 -0
  147. data/spec/acceptance/ruby/ruby_directive_using_here_doc.expected +1 -0
  148. data/spec/acceptance/ruby/ruby_directive_using_here_doc.tmpl +4 -0
  149. data/spec/acceptance/ruby/ruby_directive_using_here_doc_alt_syntax.expected +1 -0
  150. data/spec/acceptance/ruby/ruby_directive_using_here_doc_alt_syntax.tmpl +4 -0
  151. data/spec/acceptance/ruby/ruby_directive_with_accumulate.expected +1 -0
  152. data/spec/acceptance/ruby/ruby_directive_with_accumulate.tmpl +4 -0
  153. data/spec/acceptance/ruby/ruby_directive_with_accumulate_and_block.expected +1 -0
  154. data/spec/acceptance/ruby/ruby_directive_with_accumulate_and_block.tmpl +6 -0
  155. data/spec/acceptance/set/unused_set.expected +0 -0
  156. data/spec/acceptance/set/unused_set.tmpl +1 -0
  157. data/spec/acceptance/set/used_set.expected +1 -0
  158. data/spec/acceptance/set/used_set.tmpl +2 -0
  159. data/spec/acceptance/silent/silent_and_echo_combined.expected +1 -0
  160. data/spec/acceptance/silent/silent_and_echo_combined.tmpl +2 -0
  161. data/spec/acceptance/silent/silent_short_notation.expected +1 -0
  162. data/spec/acceptance/silent/silent_short_notation.tmpl +1 -0
  163. data/spec/acceptance/silent/simple_silent_directive.expected +0 -0
  164. data/spec/acceptance/silent/simple_silent_directive.tmpl +1 -0
  165. data/spec/acceptance/slurp/basic_slurp_demo.expected +1 -0
  166. data/spec/acceptance/slurp/basic_slurp_demo.tmpl +4 -0
  167. data/spec/acceptance/super/super_with_no_effect.expected +4 -0
  168. data/spec/acceptance/super/super_with_no_effect.tmpl +5 -0
  169. data/spec/additions/module_spec.rb +126 -0
  170. data/spec/additions/string_spec.rb +99 -0
  171. data/spec/compiler_spec.rb +55 -0
  172. data/spec/grammar/additions/proc_spec.rb +25 -0
  173. data/spec/grammar/additions/regexp_spec.rb +37 -0
  174. data/spec/grammar/additions/string_spec.rb +106 -0
  175. data/spec/grammar/and_predicate_spec.rb +29 -0
  176. data/spec/grammar/continuation_wrapper_exception_spec.rb +23 -0
  177. data/spec/grammar/match_data_wrapper_spec.rb +41 -0
  178. data/spec/grammar/memoizing_cache_spec.rb +112 -0
  179. data/spec/grammar/node_spec.rb +126 -0
  180. data/spec/grammar/not_predicate_spec.rb +29 -0
  181. data/spec/grammar/parser_state_spec.rb +172 -0
  182. data/spec/grammar/parslet_choice_spec.rb +49 -0
  183. data/spec/grammar/parslet_combining_spec.rb +287 -0
  184. data/spec/grammar/parslet_merge_spec.rb +33 -0
  185. data/spec/grammar/parslet_omission_spec.rb +58 -0
  186. data/spec/grammar/parslet_repetition_spec.rb +77 -0
  187. data/spec/grammar/parslet_sequence_spec.rb +49 -0
  188. data/spec/grammar/parslet_spec.rb +23 -0
  189. data/spec/grammar/predicate_spec.rb +53 -0
  190. data/spec/grammar/proc_parslet_spec.rb +52 -0
  191. data/spec/grammar/regexp_parslet_spec.rb +347 -0
  192. data/spec/grammar/string_enumerator_spec.rb +94 -0
  193. data/spec/grammar/string_parslet_spec.rb +143 -0
  194. data/spec/grammar/symbol_parslet_spec.rb +30 -0
  195. data/spec/grammar_spec.rb +545 -0
  196. data/spec/parser_spec.rb +1418 -0
  197. data/spec/spec_helper.rb +34 -0
  198. data/spec/walrus_grammar/comment_spec.rb +39 -0
  199. data/spec/walrus_grammar/echo_directive_spec.rb +63 -0
  200. data/spec/walrus_grammar/escape_sequence_spec.rb +85 -0
  201. data/spec/walrus_grammar/literal_spec.rb +41 -0
  202. data/spec/walrus_grammar/message_expression_spec.rb +37 -0
  203. data/spec/walrus_grammar/multiline_comment_spec.rb +58 -0
  204. data/spec/walrus_grammar/placeholder_spec.rb +48 -0
  205. data/spec/walrus_grammar/raw_directive_spec.rb +81 -0
  206. data/spec/walrus_grammar/raw_text_spec.rb +65 -0
  207. data/spec/walrus_grammar/silent_directive_spec.rb +34 -0
  208. metadata +291 -0
@@ -0,0 +1,545 @@
1
+ # Copyright 2007 Wincent Colaiuta
2
+ # This program is distributed in the hope that it will be useful, but WITHOUT
3
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
4
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
5
+ # in the accompanying file, "LICENSE.txt", for more details.
6
+ #
7
+ # $Id: /mirrors/Walrus/trunk/walrus/spec/grammar_spec.rb 6702 2007-04-09T15:04:40.448669Z wincent $
8
+
9
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
10
+
11
+ module Walrus
12
+ class Grammar
13
+
14
+ describe 'defining a grammar subclass' do
15
+
16
+ it 'should be able to create new Grammar subclasses on the fly' do
17
+
18
+ # first create a new subclass and make sure the returned instance is non-nil
19
+ Grammar.subclass('MyGrammar').should_not be_nil
20
+
21
+ # the class constant should now be available for creating new instances
22
+ MyGrammar.new.should_not be_nil
23
+
24
+ end
25
+
26
+ it 'should complain if an attempt is made to create the same subclass twice' do
27
+ lambda { Grammar.subclass('FooGrammar') }.should_not raise_error
28
+ lambda { Grammar.subclass('FooGrammar') }.should raise_error
29
+ end
30
+
31
+ it 'should complain if subclass name is nil' do
32
+ lambda { Grammar.subclass(nil) }.should raise_error(ArgumentError)
33
+ end
34
+
35
+ it 'should be able to pass a block while defining a new subclass' do
36
+
37
+ instance = Grammar.subclass('TestGrammar') do
38
+ starting_symbol :foo
39
+ end
40
+ instance.instance_eval("@starting_symbol").should == :foo
41
+
42
+ end
43
+
44
+ end
45
+
46
+ describe 'defining rules in a grammar' do
47
+
48
+ it '"rules" method should complain if either parameter is nil' do
49
+ lambda { Grammar.subclass('AxeGrammar') { rule nil, 'expression' } }.should raise_error(ArgumentError)
50
+ lambda { Grammar.subclass('BoneGrammar') { rule :my_rule, nil } }.should raise_error(ArgumentError)
51
+ lambda { Grammar.subclass('CatGrammar') { rule nil, nil } }.should raise_error(ArgumentError)
52
+ end
53
+
54
+ it '"rules" method should complain if an attempt is made to define a rule a second time' do
55
+ lambda do
56
+ Grammar.subclass('DogGrammar') do
57
+ rule :my_rule, 'foo'
58
+ rule :my_rule, 'bar'
59
+ end
60
+ end.should raise_error(ArgumentError)
61
+ end
62
+
63
+ it 'should be able to define rules in the block using the "rule" method' do
64
+
65
+ end
66
+
67
+ end
68
+
69
+ describe 'defining productions in a grammar' do
70
+
71
+ it '"node" method should complain if new class name is nil' do
72
+ lambda do
73
+ Grammar.subclass('NodeComplainingGrammar') { node nil }
74
+ end.should raise_error(ArgumentError)
75
+ end
76
+
77
+ it 'should be able to define a simple Node subclass using the "node" function' do
78
+
79
+ Grammar.subclass('NodeGrammar1') do
80
+ node :my_node_subclass
81
+ node :my_subclass_of_a_subclass, :my_node_subclass
82
+ node :my_node_class_that_takes_params, :node, :foo, :bar
83
+ end
84
+
85
+ NodeGrammar1::MyNodeSubclass.to_s.should == 'Walrus::NodeGrammar1::MyNodeSubclass'
86
+ NodeGrammar1::MyNodeSubclass.superclass.should == Node
87
+ NodeGrammar1::MySubclassOfASubclass.to_s.should == 'Walrus::NodeGrammar1::MySubclassOfASubclass'
88
+ NodeGrammar1::MySubclassOfASubclass.superclass.should == NodeGrammar1::MyNodeSubclass
89
+ NodeGrammar1::MyNodeClassThatTakesParams.to_s.should == 'Walrus::NodeGrammar1::MyNodeClassThatTakesParams'
90
+ NodeGrammar1::MyNodeClassThatTakesParams.superclass.should == Node
91
+ node = NodeGrammar1::MyNodeClassThatTakesParams.new('hello', 'world')
92
+ node.foo.should == 'hello'
93
+ node.bar.should == 'world'
94
+
95
+ end
96
+
97
+ it 'should be able to use the "build" method to define production subclasses on the fly' do
98
+
99
+ Grammar.subclass('HeMeansJavaRuntimeAPIs') do
100
+ rule :foobar, 'foo' & 'bar'
101
+ production :foobar.build(:node, :foo, :bar)
102
+ end
103
+
104
+ # try instantiating the newly created class
105
+ node = HeMeansJavaRuntimeAPIs::Foobar.new('hello', 'world')
106
+ node.class.should == HeMeansJavaRuntimeAPIs::Foobar
107
+ node.foo.should == 'hello'
108
+ node.bar.should == 'world'
109
+
110
+ # try passing the wrong number of parameters
111
+ lambda { HeMeansJavaRuntimeAPIs::Foobar.new }.should raise_error(ArgumentError) # no parameters
112
+ lambda { HeMeansJavaRuntimeAPIs::Foobar.new('hi') }.should raise_error(ArgumentError) # one parameter too few
113
+ lambda { HeMeansJavaRuntimeAPIs::Foobar.new('a', 'b', 'c') }.should raise_error(ArgumentError) # one parameter too many
114
+
115
+ end
116
+
117
+ it 'should complain if an attempt is made to create the same production class twice' do
118
+ lambda do
119
+ Grammar.subclass('HowToGetControlOfJavaAwayFromSun') do
120
+ rule :foo, 'foo'
121
+ production :foo
122
+ production :foo
123
+ end
124
+ end.should raise_error(ArgumentError)
125
+ end
126
+
127
+ it 'should complain if an attempt is made to create a production for a rule that does not exist yet' do
128
+ lambda do
129
+ Grammar.subclass('GettingControlOfJavaAwayFromSun') { production :foo }
130
+ end.should raise_error(ArgumentError)
131
+ end
132
+
133
+ end
134
+
135
+ describe 'parsing using a grammar' do
136
+
137
+ it 'should complain if asked to parse a nil string' do
138
+ lambda { Grammar.subclass('BobGrammar').parse(nil) }.should raise_error(ArgumentError)
139
+ end
140
+
141
+ it 'should complain if trying to parse without first defining a start symbol' do
142
+ lambda { Grammar.subclass('RoyalGrammar').parse('foo') }.should raise_error
143
+ end
144
+
145
+ it 'should parse starting with the start symbol' do
146
+ grammar = Grammar.subclass('AliceGrammar') do
147
+ rule :expr, /\w+/
148
+ starting_symbol :expr
149
+ end
150
+
151
+ grammar.parse('foo').should == 'foo'
152
+ lambda { grammar.parse('') }.should raise_error(ParseError)
153
+
154
+ end
155
+
156
+ it 'should complain if reference is made to an undefined symbol' do
157
+ grammar = Grammar.subclass('RoyGrammar') { starting_symbol :expr } # :expr is not defined
158
+ lambda { grammar.parse('foo') }.should raise_error
159
+ end
160
+
161
+ it 'should be able to parse using a simple grammar (one rule)' do
162
+ grammar = Grammar.subclass('SimpleGrammar') do
163
+ starting_symbol :foo
164
+ rule :foo, 'foo!'
165
+ end
166
+ grammar.parse('foo!').should == 'foo!'
167
+ lambda { grammar.parse('---') }.should raise_error(ParseError)
168
+ end
169
+
170
+ it 'should be able to parse using a simple grammar (two rules)' do
171
+ grammar = Grammar.subclass('AlmostAsSimpleGrammar') do
172
+ starting_symbol :foo
173
+ rule :foo, 'foo!' | :bar
174
+ rule :bar, /bar/
175
+ end
176
+ grammar.parse('foo!').should == 'foo!'
177
+ grammar.parse('bar').should == 'bar'
178
+ lambda { grammar.parse('---') }.should raise_error(ParseError)
179
+ end
180
+
181
+ it 'should be able to parse using a simple grammar (three rules)' do
182
+
183
+ # a basic version written using intermediary parslets (really two parslets and one rule)
184
+ grammar = Grammar.subclass('MacGrammar') do
185
+ starting_symbol :comment
186
+
187
+ # parslets
188
+ comment_marker = '##'
189
+ comment_body = /.+/
190
+
191
+ # rules
192
+ rule :comment, comment_marker & comment_body.optional
193
+ end
194
+ grammar.parse('## hello!').should == ['##', ' hello!']
195
+ grammar.parse('##').should == '##'
196
+ lambda { grammar.parse('foobar') }.should raise_error(ParseError)
197
+
198
+ # the same grammar rewritten without intermediary parslets (three rules, no standalone parslets)
199
+ grammar = Grammar.subclass('MacAltGrammar') do
200
+ starting_symbol :comment
201
+ rule :comment, :comment_marker & :comment_body.optional
202
+ rule :comment_marker, '##'
203
+ rule :comment_body, /.+/
204
+ end
205
+ grammar.parse('## hello!').should == ['##', ' hello!']
206
+ grammar.parse('##').should == '##'
207
+ lambda { grammar.parse('foobar') }.should raise_error(ParseError)
208
+ end
209
+
210
+ it 'should be able to parse using recursive rules (nested parentheses)' do
211
+
212
+ # basic example
213
+ grammar = Grammar.subclass('NestedGrammar') do
214
+ starting_symbol :bracket_expression
215
+ rule :left_bracket, '('
216
+ rule :right_bracket, ')'
217
+ rule :bracket_content, (/[^()]+/ | :bracket_expression).zero_or_more
218
+ rule :bracket_expression, :left_bracket & :bracket_content.optional & :right_bracket
219
+ end
220
+ grammar.parse('()').should == ['(', ')']
221
+ grammar.parse('(content)').should == ['(', 'content', ')']
222
+ grammar.parse('(content (and more content))').should == ['(', ['content ', ['(', 'and more content', ')']], ')']
223
+ lambda { grammar.parse('(') }.should raise_error(ParseError)
224
+
225
+ # same example but automatically skipping the delimiting braces for clearer output
226
+ grammar = Grammar.subclass('NestedSkippingGrammar') do
227
+ starting_symbol :bracket_expression
228
+ rule :bracket_expression, '('.skip & (/[^()]+/ | :bracket_expression).zero_or_more & ')'.skip
229
+ end
230
+ grammar.parse('()').should == []
231
+ grammar.parse('(content)').should == 'content'
232
+ grammar.parse('(content (and more content))').should == ['content ', 'and more content']
233
+ grammar.parse('(content (and more content)(and more))').should == ['content ', 'and more content', 'and more']
234
+ grammar.parse('(content (and more content)(and more)(more still))').should == ['content ', 'and more content', 'and more', 'more still']
235
+ grammar.parse('(content (and more content)(and more(more still)))').should == ['content ', 'and more content', ['and more', 'more still']]
236
+ lambda { grammar.parse('(') }.should raise_error(ParseError)
237
+
238
+ # note that this confusing (possible even misleading) nesting goes away if you use a proper AST
239
+ grammar = Grammar.subclass('NestedBracketsWithAST') do
240
+ starting_symbol :bracket_expression
241
+ rule :text_expression, /[^()]+/
242
+ rule :bracket_expression, '('.skip & (:text_expression | :bracket_expression).zero_or_more & ')'.skip
243
+ production :bracket_expression.build(:node, :children)
244
+ end
245
+
246
+ # simple tests
247
+ grammar.parse('()').children.should == []
248
+ grammar.parse('(content)').children.to_s.should == 'content'
249
+
250
+ # nested test: two expressions at the first level, one of them nested
251
+ results = grammar.parse('(content (and more content))')
252
+ results.children[0].should == 'content '
253
+ results.children[1].children.to_s.should == 'and more content'
254
+
255
+ # nested test: three expressions at first level, two of them nested
256
+ results = grammar.parse('(content (and more content)(and more))')#.should == ['content ', 'and more content', 'and more']
257
+ results.children[0].should == 'content '
258
+ results.children[1].children.should == 'and more content'
259
+ results.children[2].children.should == 'and more'
260
+
261
+ # nested test: four expressions at the first level, three of them nested
262
+ results = grammar.parse('(content (and more content)(and more)(more still))')
263
+ results.children[0].should == 'content '
264
+ results.children[1].children.should == 'and more content'
265
+ results.children[2].children.should == 'and more'
266
+ results.children[3].children.should == 'more still'
267
+
268
+ # nested test: three expressions at the first level, one nested and another not only nested but containing another level of nesting
269
+ results = grammar.parse('(content (and more content)(and more(more still)))')
270
+ results.children[0].should == 'content '
271
+ results.children[1].children.should == 'and more content'
272
+ results.children[2].children[0].should == 'and more'
273
+ results.children[2].children[1].children.should == 'more still'
274
+
275
+ # bad input case
276
+ lambda { grammar.parse('(') }.should raise_error(ParseError)
277
+
278
+ end
279
+
280
+ it 'should be able to parse using recursive rules (nested comments)' do
281
+ grammar = Grammar.subclass('NestedCommentsGrammar') do
282
+ starting_symbol :comment
283
+ rule :comment_start, '/*'
284
+ rule :comment_end, '*/'
285
+ rule :comment_content, (:comment | /\/+/ | ('*' & '/'.not!) | /[^*\/]+/).zero_or_more
286
+ rule :comment, '/*' & :comment_content.optional & '*/'
287
+ end
288
+ grammar.parse('/**/').should == ['/*', '*/']
289
+ grammar.parse('/*comment*/').should == ['/*', 'comment', '*/']
290
+ grammar.parse('/* comment /* nested */*/').should == ['/*', [' comment ', ['/*', ' nested ', '*/']], '*/']
291
+ lambda { grammar.parse('/*') }.should raise_error(ParseError)
292
+ end
293
+
294
+ it 'should be able to write a grammar that produces an AST for a simple language that supports addition and assignment' do
295
+
296
+ grammar = Grammar.subclass('SimpleASTLanguage') do
297
+
298
+ starting_symbol :expression
299
+
300
+ # terminal tokens
301
+ rule :identifier, /[a-zA-Z_][a-zA-Z0-9_]*/
302
+ production :identifier.build(:node)
303
+ rule :integer_literal, /[0-9]+/
304
+ production :integer_literal.build(:node)
305
+
306
+ # expressions
307
+ rule :expression, :assignment_expression | :addition_expression | :identifier | :integer_literal
308
+ node :expression
309
+ rule :assignment_expression, :identifier & '='.skip & :expression
310
+ production :assignment_expression.build(:expression, :target, :value)
311
+ rule :addition_expression, (:identifier | :integer_literal) & '+'.skip & :expression
312
+ production :addition_expression.build(:expression, :summee, :summor)
313
+
314
+ end
315
+
316
+ results = grammar.parse('hello')
317
+ results.should be_kind_of(SimpleASTLanguage::Identifier)
318
+ results.lexeme.should == 'hello'
319
+
320
+ results = grammar.parse('1234')
321
+ results.should be_kind_of(SimpleASTLanguage::IntegerLiteral)
322
+ results.lexeme.should == '1234'
323
+
324
+ results = grammar.parse('foo=bar')
325
+ results.should be_kind_of(SimpleASTLanguage::Expression)
326
+ results.should be_kind_of(SimpleASTLanguage::AssignmentExpression)
327
+ results.target.should be_kind_of(SimpleASTLanguage::Identifier)
328
+ results.target.lexeme.should == 'foo'
329
+ results.value.should be_kind_of(SimpleASTLanguage::Identifier)
330
+ results.value.lexeme.should == 'bar'
331
+
332
+ results = grammar.parse('baz+123')
333
+ results.should be_kind_of(SimpleASTLanguage::Expression)
334
+ results.should be_kind_of(SimpleASTLanguage::AdditionExpression)
335
+ results.summee.should be_kind_of(SimpleASTLanguage::Identifier)
336
+ results.summee.lexeme.should == 'baz'
337
+ results.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral)
338
+ results.summor.lexeme.should == '123'
339
+
340
+ results = grammar.parse('foo=abc+123')
341
+ results.should be_kind_of(SimpleASTLanguage::Expression)
342
+ results.should be_kind_of(SimpleASTLanguage::AssignmentExpression)
343
+ results.target.should be_kind_of(SimpleASTLanguage::Identifier)
344
+ results.target.lexeme.should == 'foo'
345
+ results.value.should be_kind_of(SimpleASTLanguage::AdditionExpression)
346
+ results.value.summee.should be_kind_of(SimpleASTLanguage::Identifier)
347
+ results.value.summee.lexeme.should == 'abc'
348
+ results.value.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral)
349
+ results.value.summor.lexeme.should == '123'
350
+
351
+ results = grammar.parse('a+b+2')
352
+ results.should be_kind_of(SimpleASTLanguage::Expression)
353
+ results.should be_kind_of(SimpleASTLanguage::AdditionExpression)
354
+ results.summee.should be_kind_of(SimpleASTLanguage::Identifier)
355
+ results.summee.lexeme.should == 'a'
356
+ results.summor.should be_kind_of(SimpleASTLanguage::AdditionExpression)
357
+ results.summor.summee.should be_kind_of(SimpleASTLanguage::Identifier)
358
+ results.summor.summee.lexeme.should == 'b'
359
+ results.summor.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral)
360
+ results.summor.summor.lexeme.should == '2'
361
+
362
+ end
363
+
364
+ it 'should be able to write a grammar that complains if all the input is not consumed' do
365
+ grammar = Grammar.subclass('ComplainingGrammar') do
366
+ starting_symbol :translation_unit
367
+ rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string
368
+ rule :end_of_string, /\z/
369
+ rule :whitespace, /\s+/
370
+ rule :word, /[a-z]+/
371
+ rule :word_list, :word >> (:whitespace.skip & :word).zero_or_more
372
+
373
+ end
374
+
375
+ grammar.parse('').should == ''
376
+ grammar.parse('foo').should == 'foo'
377
+ grammar.parse('foo bar').should == ['foo', 'bar']
378
+ lambda { grammar.parse('...') }.should raise_error(ParseError)
379
+ lambda { grammar.parse('foo...') }.should raise_error(ParseError)
380
+ lambda { grammar.parse('foo bar...') }.should raise_error(ParseError)
381
+
382
+ end
383
+
384
+ it 'should be able to define a default parslet for intertoken skipping' do
385
+
386
+ # simple example
387
+ grammar = Grammar.subclass('SkippingGrammar') do
388
+ starting_symbol :translation_unit
389
+ skipping :whitespace_and_newlines
390
+ rule :whitespace_and_newlines, /[\s\n\r]+/
391
+ rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string
392
+ rule :end_of_string, /\z/
393
+ rule :word_list, :word.zero_or_more
394
+ rule :word, /[a-z0-9_]+/
395
+ end
396
+
397
+ # not sure if I can justify the difference in behaviour here compared with the previous grammar
398
+ # if I catch these throws at the grammar level I can return nil
399
+ # but note that the previous grammar returns an empty array, which to_s is just ""
400
+ lambda { grammar.parse('') }.should throw_symbol(:AndPredicateSuccess)
401
+
402
+ grammar.parse('foo').should == 'foo'
403
+ grammar.parse('foo bar').should == ['foo', 'bar'] # intervening whitespace
404
+ grammar.parse('foo bar ').should == ['foo', 'bar'] # trailing whitespace
405
+ grammar.parse(' foo bar').should == ['foo', 'bar'] # leading whitespace
406
+
407
+ # additional example, this time involving the ">>" pseudo-operator
408
+ grammar = Grammar.subclass('SkippingAndMergingGrammar') do
409
+ starting_symbol :translation_unit
410
+ skipping :whitespace_and_newlines
411
+ rule :whitespace_and_newlines, /[\s\n\r]+/
412
+ rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string
413
+ rule :end_of_string, /\z/
414
+ rule :word_list, :word >> (','.skip & :word).zero_or_more
415
+ rule :word, /[a-z0-9_]+/
416
+ end
417
+
418
+ # one word
419
+ grammar.parse('foo').should == 'foo'
420
+
421
+ # two words
422
+ grammar.parse('foo,bar').should == ['foo', 'bar'] # no whitespace
423
+ grammar.parse('foo, bar').should == ['foo', 'bar'] # whitespace after
424
+ grammar.parse('foo ,bar').should == ['foo', 'bar'] # whitespace before
425
+ grammar.parse('foo , bar').should == ['foo', 'bar'] # whitespace before and after
426
+ grammar.parse('foo , bar ').should == ['foo', 'bar'] # trailing and embedded whitespace
427
+ grammar.parse(' foo , bar').should == ['foo', 'bar'] # leading and embedded whitespace
428
+
429
+ # three or four words
430
+ grammar.parse('foo , bar, baz').should == ['foo', 'bar', 'baz']
431
+ grammar.parse(' foo , bar, baz ,bin').should == ['foo', 'bar', 'baz', 'bin']
432
+
433
+ end
434
+
435
+ it 'should complain if trying to set default skipping parslet more than once' do
436
+ lambda do
437
+ Grammar.subclass('SetSkipperTwice') do
438
+ skipping :first # fine
439
+ skipping :again # should raise here
440
+ end
441
+ end.should raise_error
442
+ end
443
+
444
+ it 'should complain if passed nil' do
445
+ lambda do
446
+ Grammar.subclass('PassNilToSkipping') { skipping nil }
447
+ end.should raise_error(ArgumentError)
448
+ end
449
+
450
+ it 'should be able to override default skipping parslet on a per-rule basis' do
451
+
452
+ # the example grammar parses word lists and number lists
453
+ grammar = Grammar.subclass('OverrideDefaultSkippingParslet') do
454
+ starting_symbol :translation_unit
455
+ skipping :whitespace_and_newlines
456
+ rule :whitespace_and_newlines, /\s+/ # any whitespace including newlines
457
+ rule :whitespace, /[ \t\v]+/ # literally only spaces, tabs, not newlines etc
458
+ rule :translation_unit, :component.one_or_more & :end_of_string.and? | :end_of_string
459
+ rule :end_of_string, /\z/
460
+ rule :component, :word_list | :number_list
461
+ rule :word_list, :word.one_or_more
462
+ rule :word, /[a-z]+/
463
+ rule :number, /[0-9]+/
464
+
465
+ # the interesting bit: we override the skipping rule for number lists
466
+ rule :number_list, :number.one_or_more
467
+ skipping :number_list, :whitespace # only whitespace, no newlines
468
+ end
469
+
470
+ # words in word lists can be separated by whitespace or newlines
471
+ grammar.parse('hello world').should == ['hello', 'world']
472
+ grammar.parse("hello\nworld").should == ['hello', 'world']
473
+ grammar.parse("hello world\nworld hello").should == ['hello', 'world', 'world', 'hello']
474
+
475
+ # numbers in number lists may be separated only by whitespace, not newlines
476
+ grammar.parse('123 456').should == ['123', '456']
477
+ grammar.parse("123\n456").should == ['123', '456'] # this succeeds because parser treats them as two separate number lists
478
+ grammar.parse("123 456\n456 123").should == [['123', '456'], ['456', '123']]
479
+
480
+ # intermixing word lists and number lists
481
+ grammar.parse("bar\n123").should == ['bar', '123']
482
+ grammar.parse("123\n456\nbar").should == ['123', '456', 'bar']
483
+
484
+ # these were buggy at one point: "123\n456" was getting mashed into "123456" due to misguided use of String#delete! to delete first newline
485
+ grammar.parse("\n123\n456").should == ['123', '456']
486
+ grammar.parse("bar\n123\n456").should == ['bar', '123', '456']
487
+ grammar.parse("baz bar\n123\n456").should == [['baz', 'bar'], '123', '456']
488
+ grammar.parse("hello world\nfoo\n123 456 baz bar\n123\n456").should == [['hello', 'world', 'foo'], ['123', '456'], ['baz', 'bar'], '123', '456']
489
+
490
+ end
491
+
492
+ it 'should complain if trying to override the default for the same rule twice' do
493
+ lambda do
494
+ Grammar.subclass('OverrideSameRuleTwice') do
495
+ rule :the_rule, 'foo'
496
+ skipping :the_rule, :the_override # fine
497
+ skipping :the_rule, :the_override # should raise
498
+ end
499
+ end.should raise_error(ArgumentError)
500
+ end
501
+
502
+ it "should complain if trying to set an override for a rule that hasn't been defined yet" do
503
+ lambda do
504
+ Grammar.subclass('OverrideUndefinedRule') { skipping :non_existent_rule, :the_override }
505
+ end.should raise_error(ArgumentError)
506
+ end
507
+
508
+ it 'use of the "skipping" directive should play nicely with predicates' do
509
+
510
+ # example 1: word + predicate
511
+ grammar = Grammar.subclass('NicePlayer') do
512
+ starting_symbol :foo
513
+ skipping :whitespace
514
+ rule :whitespace, /[ \t\v]+/
515
+ rule :foo, 'hello' & 'world'.and?
516
+ end
517
+
518
+ grammar.parse('hello world').should == 'hello'
519
+ grammar.parse('hello world').should == 'hello'
520
+ grammar.parse('helloworld').should == 'hello'
521
+ lambda { grammar.parse('hello') }.should raise_error(ParseError)
522
+ lambda { grammar.parse('hello buddy') }.should raise_error(ParseError)
523
+ lambda { grammar.parse("hello\nbuddy") }.should raise_error(ParseError)
524
+
525
+ # example 2: word + predicate + other word
526
+ grammar = Grammar.subclass('NicePlayer2') do
527
+ starting_symbol :foo
528
+ skipping :whitespace
529
+ rule :whitespace, /[ \t\v]+/
530
+ rule :foo, /hel../ & 'world'.and? & /\w+/
531
+ end
532
+
533
+ grammar.parse('hello world').should == ['hello', 'world']
534
+ grammar.parse('hello world').should == ['hello', 'world']
535
+ grammar.parse('helloworld').should == ['hello', 'world']
536
+ lambda { grammar.parse('hello') }.should raise_error(ParseError)
537
+ lambda { grammar.parse('hello buddy') }.should raise_error(ParseError)
538
+ lambda { grammar.parse("hello\nbuddy") }.should raise_error(ParseError)
539
+
540
+ end
541
+
542
+ end
543
+ end # class Grammar
544
+ end # module Walrus
545
+