treetop 1.5.3 → 1.6.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +5 -13
  2. data/Gemfile +12 -0
  3. data/History.txt +18 -0
  4. data/README.md +4 -0
  5. data/Rakefile +20 -40
  6. data/Treetop.tmbundle/Preferences/Comments.tmPreferences +28 -0
  7. data/Treetop.tmbundle/Snippets/grammar ___ end.tmSnippet +20 -0
  8. data/Treetop.tmbundle/Snippets/rule ___ end.tmSnippet +18 -0
  9. data/Treetop.tmbundle/Support/nibs/SyntaxTreeViewer.nib/designable.nib +1524 -0
  10. data/Treetop.tmbundle/Support/nibs/SyntaxTreeViewer.nib/keyedobjects.nib +0 -0
  11. data/Treetop.tmbundle/Support/syntax_tree_viewer.rb +117 -0
  12. data/Treetop.tmbundle/Syntaxes/Treetop Grammar.tmLanguage +358 -0
  13. data/Treetop.tmbundle/info.plist +10 -0
  14. data/doc/pitfalls_and_advanced_techniques.markdown +7 -1
  15. data/doc/syntactic_recognition.markdown +7 -2
  16. data/doc/tt.1 +1 -1
  17. data/examples/indented_blocks/indented_blocks.tt +73 -0
  18. data/examples/indented_blocks/indented_blocks_test.rb +24 -0
  19. data/lib/treetop/compiler/grammar_compiler.rb +6 -3
  20. data/lib/treetop/compiler/metagrammar.rb +301 -159
  21. data/lib/treetop/compiler/metagrammar.treetop +96 -13
  22. data/lib/treetop/compiler/node_classes/anything_symbol.rb +10 -2
  23. data/lib/treetop/compiler/node_classes/atomic_expression.rb +2 -2
  24. data/lib/treetop/compiler/node_classes/character_class.rb +10 -2
  25. data/lib/treetop/compiler/node_classes/choice.rb +11 -7
  26. data/lib/treetop/compiler/node_classes/nonterminal.rb +6 -2
  27. data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +5 -1
  28. data/lib/treetop/compiler/node_classes/parsing_expression.rb +10 -1
  29. data/lib/treetop/compiler/node_classes/parsing_rule.rb +1 -1
  30. data/lib/treetop/compiler/node_classes/predicate.rb +8 -1
  31. data/lib/treetop/compiler/node_classes/predicate_block.rb +7 -0
  32. data/lib/treetop/compiler/node_classes/repetition.rb +28 -8
  33. data/lib/treetop/compiler/node_classes/sequence.rb +5 -1
  34. data/lib/treetop/compiler/node_classes/terminal.rb +36 -22
  35. data/lib/treetop/compiler/ruby_builder.rb +2 -2
  36. data/lib/treetop/ruby_extensions/string.rb +0 -6
  37. data/lib/treetop/runtime/compiled_parser.rb +33 -14
  38. data/lib/treetop/runtime/syntax_node.rb +24 -15
  39. data/lib/treetop/runtime/terminal_parse_failure.rb +4 -3
  40. data/lib/treetop/runtime/terminal_syntax_node.rb +4 -4
  41. data/lib/treetop/version.rb +2 -2
  42. data/treetop.gemspec +25 -165
  43. metadata +43 -98
  44. data/doc/site.rb +0 -112
  45. data/doc/sitegen.rb +0 -65
  46. data/examples/lambda_calculus/lambda_calculus +0 -0
  47. data/spec/compiler/and_predicate_spec.rb +0 -36
  48. data/spec/compiler/anything_symbol_spec.rb +0 -44
  49. data/spec/compiler/character_class_spec.rb +0 -301
  50. data/spec/compiler/choice_spec.rb +0 -80
  51. data/spec/compiler/circular_compilation_spec.rb +0 -30
  52. data/spec/compiler/failure_propagation_functional_spec.rb +0 -21
  53. data/spec/compiler/grammar_compiler_spec.rb +0 -113
  54. data/spec/compiler/grammar_spec.rb +0 -41
  55. data/spec/compiler/multibyte_chars_spec.rb +0 -38
  56. data/spec/compiler/namespace_spec.rb +0 -42
  57. data/spec/compiler/nonterminal_symbol_spec.rb +0 -40
  58. data/spec/compiler/not_predicate_spec.rb +0 -38
  59. data/spec/compiler/occurrence_range_spec.rb +0 -189
  60. data/spec/compiler/one_or_more_spec.rb +0 -35
  61. data/spec/compiler/optional_spec.rb +0 -37
  62. data/spec/compiler/parenthesized_expression_spec.rb +0 -19
  63. data/spec/compiler/parsing_rule_spec.rb +0 -61
  64. data/spec/compiler/repeated_subrule_spec.rb +0 -29
  65. data/spec/compiler/semantic_predicate_spec.rb +0 -175
  66. data/spec/compiler/sequence_spec.rb +0 -129
  67. data/spec/compiler/terminal_spec.rb +0 -170
  68. data/spec/compiler/terminal_symbol_spec.rb +0 -37
  69. data/spec/compiler/test_grammar.treetop +0 -7
  70. data/spec/compiler/test_grammar.tt +0 -7
  71. data/spec/compiler/test_grammar_do.treetop +0 -7
  72. data/spec/compiler/test_grammar_magic_coding.treetop +0 -8
  73. data/spec/compiler/test_grammar_magic_encoding.treetop +0 -8
  74. data/spec/compiler/tt_compiler_spec.rb +0 -224
  75. data/spec/compiler/zero_or_more_spec.rb +0 -56
  76. data/spec/composition/a.treetop +0 -11
  77. data/spec/composition/b.treetop +0 -11
  78. data/spec/composition/c.treetop +0 -10
  79. data/spec/composition/d.treetop +0 -10
  80. data/spec/composition/f.treetop +0 -17
  81. data/spec/composition/grammar_composition_spec.rb +0 -40
  82. data/spec/composition/subfolder/e_includes_c.treetop +0 -15
  83. data/spec/ruby_extensions/string_spec.rb +0 -32
  84. data/spec/runtime/compiled_parser_spec.rb +0 -123
  85. data/spec/runtime/interval_skip_list/delete_spec.rb +0 -147
  86. data/spec/runtime/interval_skip_list/expire_range_spec.rb +0 -349
  87. data/spec/runtime/interval_skip_list/insert_and_delete_node_spec.rb +0 -385
  88. data/spec/runtime/interval_skip_list/insert_spec.rb +0 -660
  89. data/spec/runtime/interval_skip_list/interval_skip_list_spec.graffle +0 -6175
  90. data/spec/runtime/interval_skip_list/interval_skip_list_spec.rb +0 -58
  91. data/spec/runtime/interval_skip_list/palindromic_fixture.rb +0 -35
  92. data/spec/runtime/interval_skip_list/palindromic_fixture_spec.rb +0 -163
  93. data/spec/runtime/interval_skip_list/spec_helper.rb +0 -91
  94. data/spec/runtime/syntax_node_spec.rb +0 -77
  95. data/spec/spec_helper.rb +0 -115
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'treetop'
4
+ require "#{ENV["TM_SUPPORT_PATH"]}/lib/ui"
5
+
6
+ # Monkey-patch SyntaxNode in a gross violation of its privacy.
7
+ module Treetop
8
+ module Runtime
9
+ class SyntaxNode
10
+ def interesting_methods
11
+ # Copied from SyntaxNode#inspect
12
+ methods-[extension_modules.last ? extension_modules.last.methods : nil]-self.class.instance_methods
13
+ end
14
+
15
+ def to_hash
16
+ {
17
+ "class_and_modules" => self.class.to_s.sub(/.*:/,'') + extension_modules.map{|m| "+"+m.to_s.sub(/.*:/,'')}*"",
18
+ "offset" => interval.first,
19
+ "text_value" => text_value,
20
+ "methods" => interesting_methods * ", ",
21
+ "elements" => elements ? elements.map {|e| e.to_hash} : []
22
+ }
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ class SyntaxTreeViewer
29
+ def self.nib_path
30
+ File.dirname(__FILE__) + "/SyntaxTreeViewer.nib"
31
+ end
32
+
33
+ def self.new_with_unknown_grammar
34
+ parser = if ENV['TM_SCOPE'] =~ /source\.ruby/ && ENV['TM_SELECTED_TEXT']
35
+ eval(ENV['TM_SELECTED_TEXT'])
36
+ else
37
+ parser_file = if ENV['TM_SCOPE'] =~ /source\.treetop/
38
+ ENV['TM_FILEPATH']
39
+ else
40
+ ask_for_grammar
41
+ end
42
+ return unless parser_file
43
+ Treetop.load(parser_file)
44
+ end
45
+ new(parser.new)
46
+ end
47
+
48
+ def self.ask_for_grammar
49
+ files = TextMate::UI.request_file("title" => "Select a Grammar File")
50
+ if files.nil? || files.empty?
51
+ nil
52
+ else
53
+ files[0]
54
+ end
55
+ end
56
+
57
+ def self.nib_path
58
+ File.dirname(__FILE__) + "/nibs/SyntaxTreeViewer.nib"
59
+ end
60
+
61
+ def initialize(_parser)
62
+ @parser = _parser
63
+ run_parser!
64
+ end
65
+
66
+ def dialog
67
+ TextMate::UI.dialog(
68
+ :nib => self.class.nib_path,
69
+ :parameters => parameters
70
+ ) do |d|
71
+ d.wait_for_input do |params|
72
+ if params["returnArgument"]
73
+ self.input = params["returnArgument"]
74
+ run_parser!
75
+ d.parameters = parameters
76
+ true
77
+ else
78
+ false
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+ def input
85
+ ENV['TM_TREETOP_SYNTAX_VIEWER_INPUT'] ||= ""
86
+ end
87
+
88
+ def input=(new_input)
89
+ ENV['TM_TREETOP_SYNTAX_VIEWER_INPUT'] = new_input
90
+ end
91
+
92
+ private
93
+ def run_parser!
94
+ if input && !input.empty?
95
+ @syntax_tree = @parser.parse(input)
96
+ TextMate::UI.alert(:warning, "Parse error", @parser.failure_reason) unless @syntax_tree
97
+ end
98
+ end
99
+
100
+ def parameters
101
+ p = {
102
+ "input" => input,
103
+ "syntax_tree" => @syntax_tree ? @syntax_tree.to_hash : {},
104
+ "selected_tab" => !input.empty? && @syntax_tree ? "Syntax Tree" : "Input"
105
+ }
106
+ puts p
107
+ p
108
+ end
109
+ end
110
+
111
+ if __FILE__ == $0
112
+ Dir.chdir("/Users/aobrien/Work/canopy/play/sparql/")
113
+ require "/Users/aobrien/Work/canopy/play/sparql/lib/sparql"
114
+ v = SyntaxTreeViewer.new(SparqlParser.new)
115
+ v.input = %{PREFIX foaf: <http://xmlns.com/foaf/0.1/> SELECT ?foo WHERE { ?x foaf:knows ?y . ?z foaf:knows ?x .}}
116
+ v.dialog
117
+ end
@@ -0,0 +1,358 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>fileTypes</key>
6
+ <array>
7
+ <string>treetop</string>
8
+ <string>tt</string>
9
+ </array>
10
+ <key>foldingStartMarker</key>
11
+ <string>(module|grammer|rule).*$</string>
12
+ <key>foldingStopMarker</key>
13
+ <string>^\s*end</string>
14
+ <key>keyEquivalent</key>
15
+ <string>^~T</string>
16
+ <key>name</key>
17
+ <string>Treetop Grammar</string>
18
+ <key>patterns</key>
19
+ <array>
20
+ <dict>
21
+ <key>include</key>
22
+ <string>#comment</string>
23
+ </dict>
24
+ <dict>
25
+ <key>begin</key>
26
+ <string>^(\s*)(module) ([A-Z]\w+)</string>
27
+ <key>beginCaptures</key>
28
+ <dict>
29
+ <key>2</key>
30
+ <dict>
31
+ <key>name</key>
32
+ <string>keyword.begin.module.treetop</string>
33
+ </dict>
34
+ <key>3</key>
35
+ <dict>
36
+ <key>name</key>
37
+ <string>entity.name.module.treetop</string>
38
+ </dict>
39
+ </dict>
40
+ <key>end</key>
41
+ <string>^\1(end)$</string>
42
+ <key>endCaptures</key>
43
+ <dict>
44
+ <key>1</key>
45
+ <dict>
46
+ <key>name</key>
47
+ <string>keyword.end.module.treetop</string>
48
+ </dict>
49
+ </dict>
50
+ <key>name</key>
51
+ <string>meta.module.treetop</string>
52
+ <key>patterns</key>
53
+ <array>
54
+ <dict>
55
+ <key>include</key>
56
+ <string>$self</string>
57
+ </dict>
58
+ </array>
59
+ </dict>
60
+ <dict>
61
+ <key>begin</key>
62
+ <string>^(\s*)(grammar) ([A-Z]\w+)</string>
63
+ <key>beginCaptures</key>
64
+ <dict>
65
+ <key>2</key>
66
+ <dict>
67
+ <key>name</key>
68
+ <string>keyword.begin.grammar.treetop</string>
69
+ </dict>
70
+ <key>3</key>
71
+ <dict>
72
+ <key>name</key>
73
+ <string>entity.name.grammar.treetop</string>
74
+ </dict>
75
+ </dict>
76
+ <key>end</key>
77
+ <string>^\1(end)$</string>
78
+ <key>endCaptures</key>
79
+ <dict>
80
+ <key>1</key>
81
+ <dict>
82
+ <key>name</key>
83
+ <string>keyword.end.grammar.treetop</string>
84
+ </dict>
85
+ </dict>
86
+ <key>name</key>
87
+ <string>meta.grammar.treetop</string>
88
+ <key>patterns</key>
89
+ <array>
90
+ <dict>
91
+ <key>include</key>
92
+ <string>$self</string>
93
+ </dict>
94
+ </array>
95
+ </dict>
96
+ <dict>
97
+ <key>captures</key>
98
+ <dict>
99
+ <key>1</key>
100
+ <dict>
101
+ <key>name</key>
102
+ <string>keyword.include.treetop</string>
103
+ </dict>
104
+ <key>2</key>
105
+ <dict>
106
+ <key>name</key>
107
+ <string>entity.name.include.treetop</string>
108
+ </dict>
109
+ </dict>
110
+ <key>match</key>
111
+ <string>(include)\s+(\w+)</string>
112
+ <key>name</key>
113
+ <string>meta.include.treetop</string>
114
+ </dict>
115
+ <dict>
116
+ <key>begin</key>
117
+ <string>\b(rule)\b (\w+)</string>
118
+ <key>beginCaptures</key>
119
+ <dict>
120
+ <key>1</key>
121
+ <dict>
122
+ <key>name</key>
123
+ <string>keyword.begin.rule.treetop</string>
124
+ </dict>
125
+ <key>2</key>
126
+ <dict>
127
+ <key>name</key>
128
+ <string>entity.name.rule.treetop</string>
129
+ </dict>
130
+ </dict>
131
+ <key>end</key>
132
+ <string>^\s+\bend\b\s*$</string>
133
+ <key>endCaptures</key>
134
+ <dict>
135
+ <key>0</key>
136
+ <dict>
137
+ <key>name</key>
138
+ <string>keyword.end.rule.treetop</string>
139
+ </dict>
140
+ </dict>
141
+ <key>name</key>
142
+ <string>meta.rule.treetop</string>
143
+ <key>patterns</key>
144
+ <array>
145
+ <dict>
146
+ <key>include</key>
147
+ <string>#comment</string>
148
+ </dict>
149
+ <dict>
150
+ <key>include</key>
151
+ <string>#strings</string>
152
+ </dict>
153
+ <dict>
154
+ <key>include</key>
155
+ <string>#character-class</string>
156
+ </dict>
157
+ <dict>
158
+ <key>match</key>
159
+ <string>\/</string>
160
+ <key>name</key>
161
+ <string>keyword.operator.or.treetop</string>
162
+ </dict>
163
+ <dict>
164
+ <key>match</key>
165
+ <string>&lt;\w+?&gt;</string>
166
+ <key>name</key>
167
+ <string>variable.class-instance.treetop</string>
168
+ </dict>
169
+ <dict>
170
+ <key>match</key>
171
+ <string>\w+?:</string>
172
+ <key>name</key>
173
+ <string>support.operand.treetop</string>
174
+ </dict>
175
+ <dict>
176
+ <key>begin</key>
177
+ <string>\{</string>
178
+ <key>end</key>
179
+ <string>\}</string>
180
+ <key>name</key>
181
+ <string>meta.embedded-ruby.treetop</string>
182
+ <key>patterns</key>
183
+ <array>
184
+ <dict>
185
+ <key>include</key>
186
+ <string>source.ruby</string>
187
+ </dict>
188
+ </array>
189
+ </dict>
190
+ </array>
191
+ </dict>
192
+ </array>
193
+ <key>repository</key>
194
+ <dict>
195
+ <key>character-class</key>
196
+ <dict>
197
+ <key>patterns</key>
198
+ <array>
199
+ <dict>
200
+ <key>match</key>
201
+ <string>\\[wWsSdDhH]|\.</string>
202
+ <key>name</key>
203
+ <string>constant.character.character-class.regexp</string>
204
+ </dict>
205
+ <dict>
206
+ <key>match</key>
207
+ <string>\\.</string>
208
+ <key>name</key>
209
+ <string>constant.character.escape.backslash.regexp</string>
210
+ </dict>
211
+ <dict>
212
+ <key>begin</key>
213
+ <string>(\[)(\^)?</string>
214
+ <key>beginCaptures</key>
215
+ <dict>
216
+ <key>1</key>
217
+ <dict>
218
+ <key>name</key>
219
+ <string>punctuation.definition.character-class.regexp</string>
220
+ </dict>
221
+ <key>2</key>
222
+ <dict>
223
+ <key>name</key>
224
+ <string>keyword.operator.negation.regexp</string>
225
+ </dict>
226
+ </dict>
227
+ <key>end</key>
228
+ <string>(\])</string>
229
+ <key>endCaptures</key>
230
+ <dict>
231
+ <key>1</key>
232
+ <dict>
233
+ <key>name</key>
234
+ <string>punctuation.definition.character-class.regexp</string>
235
+ </dict>
236
+ </dict>
237
+ <key>name</key>
238
+ <string>constant.other.character-class.set.regexp</string>
239
+ <key>patterns</key>
240
+ <array>
241
+ <dict>
242
+ <key>include</key>
243
+ <string>#character-class</string>
244
+ </dict>
245
+ <dict>
246
+ <key>captures</key>
247
+ <dict>
248
+ <key>2</key>
249
+ <dict>
250
+ <key>name</key>
251
+ <string>constant.character.escape.backslash.regexp</string>
252
+ </dict>
253
+ <key>4</key>
254
+ <dict>
255
+ <key>name</key>
256
+ <string>constant.character.escape.backslash.regexp</string>
257
+ </dict>
258
+ </dict>
259
+ <key>match</key>
260
+ <string>(.|(\\.))\-([^\]]|(\\.))</string>
261
+ <key>name</key>
262
+ <string>constant.other.character-class.range.regexp</string>
263
+ </dict>
264
+ <dict>
265
+ <key>match</key>
266
+ <string>&amp;&amp;</string>
267
+ <key>name</key>
268
+ <string>keyword.operator.intersection.regexp</string>
269
+ </dict>
270
+ </array>
271
+ </dict>
272
+ </array>
273
+ </dict>
274
+ <key>comment</key>
275
+ <dict>
276
+ <key>captures</key>
277
+ <dict>
278
+ <key>1</key>
279
+ <dict>
280
+ <key>name</key>
281
+ <string>punctuation.definition.comment.treetop</string>
282
+ </dict>
283
+ </dict>
284
+ <key>match</key>
285
+ <string>(#).*$\n?</string>
286
+ <key>name</key>
287
+ <string>comment.line.number-sign.treetop</string>
288
+ </dict>
289
+ <key>strings</key>
290
+ <dict>
291
+ <key>patterns</key>
292
+ <array>
293
+ <dict>
294
+ <key>begin</key>
295
+ <string>'</string>
296
+ <key>beginCaptures</key>
297
+ <dict>
298
+ <key>0</key>
299
+ <dict>
300
+ <key>name</key>
301
+ <string>punctuation.definition.string.begin.treetop</string>
302
+ </dict>
303
+ </dict>
304
+ <key>end</key>
305
+ <string>'</string>
306
+ <key>endCaptures</key>
307
+ <dict>
308
+ <key>0</key>
309
+ <dict>
310
+ <key>name</key>
311
+ <string>punctuation.definition.string.end.treetop</string>
312
+ </dict>
313
+ </dict>
314
+ <key>name</key>
315
+ <string>string.quoted.single.treetop</string>
316
+ <key>patterns</key>
317
+ <array>
318
+ <dict>
319
+ <key>match</key>
320
+ <string>\\(u\h{4}|.)</string>
321
+ <key>name</key>
322
+ <string>constant.character.escape.antlr</string>
323
+ </dict>
324
+ </array>
325
+ </dict>
326
+ <dict>
327
+ <key>begin</key>
328
+ <string>"</string>
329
+ <key>beginCaptures</key>
330
+ <dict>
331
+ <key>0</key>
332
+ <dict>
333
+ <key>name</key>
334
+ <string>punctuation.definition.string.begin.treetop</string>
335
+ </dict>
336
+ </dict>
337
+ <key>end</key>
338
+ <string>"</string>
339
+ <key>endCaptures</key>
340
+ <dict>
341
+ <key>0</key>
342
+ <dict>
343
+ <key>name</key>
344
+ <string>punctuation.definition.string.end.treetop</string>
345
+ </dict>
346
+ </dict>
347
+ <key>name</key>
348
+ <string>string.quoted.double.treetop</string>
349
+ </dict>
350
+ </array>
351
+ </dict>
352
+ </dict>
353
+ <key>scopeName</key>
354
+ <string>source.treetop</string>
355
+ <key>uuid</key>
356
+ <string>A1604A34-0B73-4D5A-9499-87D881DFA8D5</string>
357
+ </dict>
358
+ </plist>
@@ -0,0 +1,10 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>name</key>
6
+ <string>Treetop</string>
7
+ <key>uuid</key>
8
+ <string>83A8B700-143D-4BD6-B4EA-D73796E8F883</string>
9
+ </dict>
10
+ </plist>
@@ -30,7 +30,7 @@ Say I want to parse a diabolical wiki syntax in which the following interpretati
30
30
  end
31
31
 
32
32
  rule em
33
- '**' (strong / !'*' . / '\*')+ '**'
33
+ '*' (strong / !'*' . / '\*')+ '*'
34
34
  end
35
35
 
36
36
  Emphasized text is allowed within strong text by virtue of `em` being the first alternative. Since `em` will only successfully parse if a matching `*` is found, it is permitted, but other than that, no `*` characters are allowed unless they are escaped.
@@ -49,3 +49,9 @@ This says that `'end'` must be followed by a space, but this space is not consum
49
49
  end
50
50
 
51
51
  In general, when the syntax gets tough, it helps to focus on what you really mean. A keyword is a character not followed by another character that isn't a space.
52
+
53
+ ## Poor Performance with Large Unicode Strings
54
+
55
+ Treetop may perform poorly when parsing very large (more than 100KB) unicode strings. This is due to the fact that substring lookups on Ruby unicode strings are linear-time operations, and not constant-time operations like they are on ASCII encoded strings. This means that parse times for larger strings can be exponentially worse than for smaller strings.
56
+
57
+ If your input and grammar only expect ASCII strings, you can achieve significant performance improvements for large strings by re-encoding them to ASCII using `input.encode(Encoding::US_ASCII)`. See [this issue on GitHub](https://github.com/cjheath/treetop/issues/31) for more information and other possible workarounds for unicode strings.
@@ -32,13 +32,15 @@ The main keywords are:
32
32
 
33
33
  * `rule` : This defines a parsing rule within the grammar. It is followed by a name by which this rule can be referenced within other rules. It is then followed by a parsing expression defining the rule.
34
34
 
35
- A grammar may be surrounded by one or more nested `module` statements, which provides a namespace for the generated Ruby parser.
35
+ A grammar may be surrounded by one or more nested `module` or `class` statements, which provides a namespace for the generated Ruby parser. Note that you cannot specify a superclass for a class, so if your class has a superclass, it must be declared elsewhere and loaded first.
36
36
 
37
- Treetop will emit a module called `GrammarName` and a parser class called `GrammarNameParser` (in the module namespace, if specified).
37
+ Treetop will emit a module called `GrammarName` and a parser class called `GrammarNameParser` (in the namespace, if specified).
38
38
 
39
39
  #Parsing Expressions
40
40
  Each rule associates a name with a _parsing expression_. Parsing expressions are a generalization of vanilla regular expressions. Their key feature is the ability to reference other expressions in the grammar by name.
41
41
 
42
+ Treetop parsers will try to match the first rule defined in the grammar, unless you pass an optional parameter to set a different top rule.
43
+
42
44
  ##Terminal Symbols
43
45
  ###Strings
44
46
  Strings are surrounded in double or single quotes and must be matched exactly.
@@ -213,3 +215,6 @@ tried at which locations in the input, and what the result was. This process, ca
213
215
  requires that the rule would produce the same result (if run again) as it produced the first time when
214
216
  the result was remembered. If you violate this principle in your semantic predicates, be prepared to
215
217
  fight Cerberus before you're allowed out of Hades again.
218
+
219
+ There's an example of how to use semantic predicates to parse a language with white-space indented blocks
220
+ in the examples directory.
data/doc/tt.1 CHANGED
@@ -80,4 +80,4 @@ tt \-o alterate_name.rb foo
80
80
 
81
81
  The treetop website:
82
82
 
83
- .B http://treetop.rubyforge.org
83
+ .B http://cjheath.github.io/treetop/
@@ -0,0 +1,73 @@
1
+ grammar IndentedBlocks
2
+ rule top
3
+ # Initialise the indent stack with a sentinel:
4
+ &{|s| @indents = [-1] }
5
+ foo:('foo'?)
6
+ nested_blocks
7
+ {
8
+ def inspect
9
+ nested_blocks.inspect
10
+ end
11
+ }
12
+ end
13
+
14
+ rule nested_blocks
15
+ (
16
+ # Do not try to extract this semantic predicate into a new rule.
17
+ # It will be memo-ized incorrectly because @indents.last will change.
18
+ !{|s|
19
+ # Peek at the following indentation:
20
+ save = index; i = _nt_indentation; index = save
21
+ # We're closing if the indentation is less or the same as our enclosing block's:
22
+ closing = i.text_value.length <= @indents.last
23
+ }
24
+ block
25
+ )*
26
+ {
27
+ def inspect
28
+ elements.map{|e| e.block.inspect}*"\n"
29
+ end
30
+ }
31
+ end
32
+
33
+ rule block
34
+ indented_line # The block's opening line
35
+ &{|s| # Push the indent level to the stack
36
+ level = s[0].indentation.text_value.length
37
+ @indents << level
38
+ true
39
+ }
40
+ nested_blocks # Parse any nested blocks
41
+ &{|s| # Pop the indent stack
42
+ # Note that under no circumstances should "nested_blocks" fail, or the stack will be mis-aligned
43
+ @indents.pop
44
+ true
45
+ }
46
+ {
47
+ def inspect
48
+ indented_line.inspect +
49
+ (nested_blocks.elements.size > 0 ? (
50
+ "\n{\n" +
51
+ nested_blocks.elements.map { |content|
52
+ content.block.inspect+"\n"
53
+ }*'' +
54
+ "}"
55
+ )
56
+ : "")
57
+ end
58
+ }
59
+ end
60
+
61
+ rule indented_line
62
+ indentation text:((!"\n" .)*) "\n"
63
+ {
64
+ def inspect
65
+ text.text_value
66
+ end
67
+ }
68
+ end
69
+
70
+ rule indentation
71
+ ' '*
72
+ end
73
+ end
@@ -0,0 +1,24 @@
1
+ require 'polyglot'
2
+ require 'byebug'
3
+ require 'treetop'
4
+ require 'indented_blocks'
5
+
6
+ parser = IndentedBlocksParser.new
7
+
8
+ input = <<END
9
+ def foo
10
+ here is some indented text
11
+ here it's further indented
12
+ and here the same
13
+ but here it's further again
14
+ and some more like that
15
+ before going back to here
16
+ down again
17
+ back twice
18
+ and start from the beginning again
19
+ with only a small block this time
20
+ END
21
+
22
+ parse_tree = parser.parse input
23
+
24
+ p parse_tree
@@ -33,10 +33,13 @@ module Treetop
33
33
 
34
34
  # compile a treetop source file and load it
35
35
  def self.load(path)
36
- adjusted_path = path =~ /\.(treetop|tt)\Z/ ? path : path + '.treetop'
37
- File.open(adjusted_path) do |source_file|
36
+ unless path =~ Treetop::Polyglot::VALID_GRAMMAR_EXT_REGEXP
37
+ ext = Treetop::Polyglot::VALID_GRAMMAR_EXT.select {|ext| File.exist?(path+".#{ext}")}.shift
38
+ path += ".#{ext}" unless ext.nil?
39
+ end
40
+ File.open(path) do |source_file|
38
41
  source = source_file.read
39
- source.gsub!(/\b__FILE__\b/, %Q{"#{adjusted_path}"})
42
+ source.gsub!(/\b__FILE__\b/, %Q{"#{path}"})
40
43
  load_from_string(source)
41
44
  end
42
45
  end