treetop 1.5.3 → 1.6.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/Gemfile +12 -0
- data/History.txt +18 -0
- data/README.md +4 -0
- data/Rakefile +20 -40
- data/Treetop.tmbundle/Preferences/Comments.tmPreferences +28 -0
- data/Treetop.tmbundle/Snippets/grammar ___ end.tmSnippet +20 -0
- data/Treetop.tmbundle/Snippets/rule ___ end.tmSnippet +18 -0
- data/Treetop.tmbundle/Support/nibs/SyntaxTreeViewer.nib/designable.nib +1524 -0
- data/Treetop.tmbundle/Support/nibs/SyntaxTreeViewer.nib/keyedobjects.nib +0 -0
- data/Treetop.tmbundle/Support/syntax_tree_viewer.rb +117 -0
- data/Treetop.tmbundle/Syntaxes/Treetop Grammar.tmLanguage +358 -0
- data/Treetop.tmbundle/info.plist +10 -0
- data/doc/pitfalls_and_advanced_techniques.markdown +7 -1
- data/doc/syntactic_recognition.markdown +7 -2
- data/doc/tt.1 +1 -1
- data/examples/indented_blocks/indented_blocks.tt +73 -0
- data/examples/indented_blocks/indented_blocks_test.rb +24 -0
- data/lib/treetop/compiler/grammar_compiler.rb +6 -3
- data/lib/treetop/compiler/metagrammar.rb +301 -159
- data/lib/treetop/compiler/metagrammar.treetop +96 -13
- data/lib/treetop/compiler/node_classes/anything_symbol.rb +10 -2
- data/lib/treetop/compiler/node_classes/atomic_expression.rb +2 -2
- data/lib/treetop/compiler/node_classes/character_class.rb +10 -2
- data/lib/treetop/compiler/node_classes/choice.rb +11 -7
- data/lib/treetop/compiler/node_classes/nonterminal.rb +6 -2
- data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +5 -1
- data/lib/treetop/compiler/node_classes/parsing_expression.rb +10 -1
- data/lib/treetop/compiler/node_classes/parsing_rule.rb +1 -1
- data/lib/treetop/compiler/node_classes/predicate.rb +8 -1
- data/lib/treetop/compiler/node_classes/predicate_block.rb +7 -0
- data/lib/treetop/compiler/node_classes/repetition.rb +28 -8
- data/lib/treetop/compiler/node_classes/sequence.rb +5 -1
- data/lib/treetop/compiler/node_classes/terminal.rb +36 -22
- data/lib/treetop/compiler/ruby_builder.rb +2 -2
- data/lib/treetop/ruby_extensions/string.rb +0 -6
- data/lib/treetop/runtime/compiled_parser.rb +33 -14
- data/lib/treetop/runtime/syntax_node.rb +24 -15
- data/lib/treetop/runtime/terminal_parse_failure.rb +4 -3
- data/lib/treetop/runtime/terminal_syntax_node.rb +4 -4
- data/lib/treetop/version.rb +2 -2
- data/treetop.gemspec +25 -165
- metadata +43 -98
- data/doc/site.rb +0 -112
- data/doc/sitegen.rb +0 -65
- data/examples/lambda_calculus/lambda_calculus +0 -0
- data/spec/compiler/and_predicate_spec.rb +0 -36
- data/spec/compiler/anything_symbol_spec.rb +0 -44
- data/spec/compiler/character_class_spec.rb +0 -301
- data/spec/compiler/choice_spec.rb +0 -80
- data/spec/compiler/circular_compilation_spec.rb +0 -30
- data/spec/compiler/failure_propagation_functional_spec.rb +0 -21
- data/spec/compiler/grammar_compiler_spec.rb +0 -113
- data/spec/compiler/grammar_spec.rb +0 -41
- data/spec/compiler/multibyte_chars_spec.rb +0 -38
- data/spec/compiler/namespace_spec.rb +0 -42
- data/spec/compiler/nonterminal_symbol_spec.rb +0 -40
- data/spec/compiler/not_predicate_spec.rb +0 -38
- data/spec/compiler/occurrence_range_spec.rb +0 -189
- data/spec/compiler/one_or_more_spec.rb +0 -35
- data/spec/compiler/optional_spec.rb +0 -37
- data/spec/compiler/parenthesized_expression_spec.rb +0 -19
- data/spec/compiler/parsing_rule_spec.rb +0 -61
- data/spec/compiler/repeated_subrule_spec.rb +0 -29
- data/spec/compiler/semantic_predicate_spec.rb +0 -175
- data/spec/compiler/sequence_spec.rb +0 -129
- data/spec/compiler/terminal_spec.rb +0 -170
- data/spec/compiler/terminal_symbol_spec.rb +0 -37
- data/spec/compiler/test_grammar.treetop +0 -7
- data/spec/compiler/test_grammar.tt +0 -7
- data/spec/compiler/test_grammar_do.treetop +0 -7
- data/spec/compiler/test_grammar_magic_coding.treetop +0 -8
- data/spec/compiler/test_grammar_magic_encoding.treetop +0 -8
- data/spec/compiler/tt_compiler_spec.rb +0 -224
- data/spec/compiler/zero_or_more_spec.rb +0 -56
- data/spec/composition/a.treetop +0 -11
- data/spec/composition/b.treetop +0 -11
- data/spec/composition/c.treetop +0 -10
- data/spec/composition/d.treetop +0 -10
- data/spec/composition/f.treetop +0 -17
- data/spec/composition/grammar_composition_spec.rb +0 -40
- data/spec/composition/subfolder/e_includes_c.treetop +0 -15
- data/spec/ruby_extensions/string_spec.rb +0 -32
- data/spec/runtime/compiled_parser_spec.rb +0 -123
- data/spec/runtime/interval_skip_list/delete_spec.rb +0 -147
- data/spec/runtime/interval_skip_list/expire_range_spec.rb +0 -349
- data/spec/runtime/interval_skip_list/insert_and_delete_node_spec.rb +0 -385
- data/spec/runtime/interval_skip_list/insert_spec.rb +0 -660
- data/spec/runtime/interval_skip_list/interval_skip_list_spec.graffle +0 -6175
- data/spec/runtime/interval_skip_list/interval_skip_list_spec.rb +0 -58
- data/spec/runtime/interval_skip_list/palindromic_fixture.rb +0 -35
- data/spec/runtime/interval_skip_list/palindromic_fixture_spec.rb +0 -163
- data/spec/runtime/interval_skip_list/spec_helper.rb +0 -91
- data/spec/runtime/syntax_node_spec.rb +0 -77
- data/spec/spec_helper.rb +0 -115
Binary file
|
@@ -0,0 +1,117 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'treetop'
|
4
|
+
require "#{ENV["TM_SUPPORT_PATH"]}/lib/ui"
|
5
|
+
|
6
|
+
# Monkey-patch SyntaxNode in a gross violation of its privacy.
|
7
|
+
module Treetop
|
8
|
+
module Runtime
|
9
|
+
class SyntaxNode
|
10
|
+
def interesting_methods
|
11
|
+
# Copied from SyntaxNode#inspect
|
12
|
+
methods-[extension_modules.last ? extension_modules.last.methods : nil]-self.class.instance_methods
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
{
|
17
|
+
"class_and_modules" => self.class.to_s.sub(/.*:/,'') + extension_modules.map{|m| "+"+m.to_s.sub(/.*:/,'')}*"",
|
18
|
+
"offset" => interval.first,
|
19
|
+
"text_value" => text_value,
|
20
|
+
"methods" => interesting_methods * ", ",
|
21
|
+
"elements" => elements ? elements.map {|e| e.to_hash} : []
|
22
|
+
}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class SyntaxTreeViewer
|
29
|
+
def self.nib_path
|
30
|
+
File.dirname(__FILE__) + "/SyntaxTreeViewer.nib"
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.new_with_unknown_grammar
|
34
|
+
parser = if ENV['TM_SCOPE'] =~ /source\.ruby/ && ENV['TM_SELECTED_TEXT']
|
35
|
+
eval(ENV['TM_SELECTED_TEXT'])
|
36
|
+
else
|
37
|
+
parser_file = if ENV['TM_SCOPE'] =~ /source\.treetop/
|
38
|
+
ENV['TM_FILEPATH']
|
39
|
+
else
|
40
|
+
ask_for_grammar
|
41
|
+
end
|
42
|
+
return unless parser_file
|
43
|
+
Treetop.load(parser_file)
|
44
|
+
end
|
45
|
+
new(parser.new)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.ask_for_grammar
|
49
|
+
files = TextMate::UI.request_file("title" => "Select a Grammar File")
|
50
|
+
if files.nil? || files.empty?
|
51
|
+
nil
|
52
|
+
else
|
53
|
+
files[0]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.nib_path
|
58
|
+
File.dirname(__FILE__) + "/nibs/SyntaxTreeViewer.nib"
|
59
|
+
end
|
60
|
+
|
61
|
+
def initialize(_parser)
|
62
|
+
@parser = _parser
|
63
|
+
run_parser!
|
64
|
+
end
|
65
|
+
|
66
|
+
def dialog
|
67
|
+
TextMate::UI.dialog(
|
68
|
+
:nib => self.class.nib_path,
|
69
|
+
:parameters => parameters
|
70
|
+
) do |d|
|
71
|
+
d.wait_for_input do |params|
|
72
|
+
if params["returnArgument"]
|
73
|
+
self.input = params["returnArgument"]
|
74
|
+
run_parser!
|
75
|
+
d.parameters = parameters
|
76
|
+
true
|
77
|
+
else
|
78
|
+
false
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def input
|
85
|
+
ENV['TM_TREETOP_SYNTAX_VIEWER_INPUT'] ||= ""
|
86
|
+
end
|
87
|
+
|
88
|
+
def input=(new_input)
|
89
|
+
ENV['TM_TREETOP_SYNTAX_VIEWER_INPUT'] = new_input
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
def run_parser!
|
94
|
+
if input && !input.empty?
|
95
|
+
@syntax_tree = @parser.parse(input)
|
96
|
+
TextMate::UI.alert(:warning, "Parse error", @parser.failure_reason) unless @syntax_tree
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def parameters
|
101
|
+
p = {
|
102
|
+
"input" => input,
|
103
|
+
"syntax_tree" => @syntax_tree ? @syntax_tree.to_hash : {},
|
104
|
+
"selected_tab" => !input.empty? && @syntax_tree ? "Syntax Tree" : "Input"
|
105
|
+
}
|
106
|
+
puts p
|
107
|
+
p
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
if __FILE__ == $0
|
112
|
+
Dir.chdir("/Users/aobrien/Work/canopy/play/sparql/")
|
113
|
+
require "/Users/aobrien/Work/canopy/play/sparql/lib/sparql"
|
114
|
+
v = SyntaxTreeViewer.new(SparqlParser.new)
|
115
|
+
v.input = %{PREFIX foaf: <http://xmlns.com/foaf/0.1/> SELECT ?foo WHERE { ?x foaf:knows ?y . ?z foaf:knows ?x .}}
|
116
|
+
v.dialog
|
117
|
+
end
|
@@ -0,0 +1,358 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
3
|
+
<plist version="1.0">
|
4
|
+
<dict>
|
5
|
+
<key>fileTypes</key>
|
6
|
+
<array>
|
7
|
+
<string>treetop</string>
|
8
|
+
<string>tt</string>
|
9
|
+
</array>
|
10
|
+
<key>foldingStartMarker</key>
|
11
|
+
<string>(module|grammer|rule).*$</string>
|
12
|
+
<key>foldingStopMarker</key>
|
13
|
+
<string>^\s*end</string>
|
14
|
+
<key>keyEquivalent</key>
|
15
|
+
<string>^~T</string>
|
16
|
+
<key>name</key>
|
17
|
+
<string>Treetop Grammar</string>
|
18
|
+
<key>patterns</key>
|
19
|
+
<array>
|
20
|
+
<dict>
|
21
|
+
<key>include</key>
|
22
|
+
<string>#comment</string>
|
23
|
+
</dict>
|
24
|
+
<dict>
|
25
|
+
<key>begin</key>
|
26
|
+
<string>^(\s*)(module) ([A-Z]\w+)</string>
|
27
|
+
<key>beginCaptures</key>
|
28
|
+
<dict>
|
29
|
+
<key>2</key>
|
30
|
+
<dict>
|
31
|
+
<key>name</key>
|
32
|
+
<string>keyword.begin.module.treetop</string>
|
33
|
+
</dict>
|
34
|
+
<key>3</key>
|
35
|
+
<dict>
|
36
|
+
<key>name</key>
|
37
|
+
<string>entity.name.module.treetop</string>
|
38
|
+
</dict>
|
39
|
+
</dict>
|
40
|
+
<key>end</key>
|
41
|
+
<string>^\1(end)$</string>
|
42
|
+
<key>endCaptures</key>
|
43
|
+
<dict>
|
44
|
+
<key>1</key>
|
45
|
+
<dict>
|
46
|
+
<key>name</key>
|
47
|
+
<string>keyword.end.module.treetop</string>
|
48
|
+
</dict>
|
49
|
+
</dict>
|
50
|
+
<key>name</key>
|
51
|
+
<string>meta.module.treetop</string>
|
52
|
+
<key>patterns</key>
|
53
|
+
<array>
|
54
|
+
<dict>
|
55
|
+
<key>include</key>
|
56
|
+
<string>$self</string>
|
57
|
+
</dict>
|
58
|
+
</array>
|
59
|
+
</dict>
|
60
|
+
<dict>
|
61
|
+
<key>begin</key>
|
62
|
+
<string>^(\s*)(grammar) ([A-Z]\w+)</string>
|
63
|
+
<key>beginCaptures</key>
|
64
|
+
<dict>
|
65
|
+
<key>2</key>
|
66
|
+
<dict>
|
67
|
+
<key>name</key>
|
68
|
+
<string>keyword.begin.grammar.treetop</string>
|
69
|
+
</dict>
|
70
|
+
<key>3</key>
|
71
|
+
<dict>
|
72
|
+
<key>name</key>
|
73
|
+
<string>entity.name.grammar.treetop</string>
|
74
|
+
</dict>
|
75
|
+
</dict>
|
76
|
+
<key>end</key>
|
77
|
+
<string>^\1(end)$</string>
|
78
|
+
<key>endCaptures</key>
|
79
|
+
<dict>
|
80
|
+
<key>1</key>
|
81
|
+
<dict>
|
82
|
+
<key>name</key>
|
83
|
+
<string>keyword.end.grammar.treetop</string>
|
84
|
+
</dict>
|
85
|
+
</dict>
|
86
|
+
<key>name</key>
|
87
|
+
<string>meta.grammar.treetop</string>
|
88
|
+
<key>patterns</key>
|
89
|
+
<array>
|
90
|
+
<dict>
|
91
|
+
<key>include</key>
|
92
|
+
<string>$self</string>
|
93
|
+
</dict>
|
94
|
+
</array>
|
95
|
+
</dict>
|
96
|
+
<dict>
|
97
|
+
<key>captures</key>
|
98
|
+
<dict>
|
99
|
+
<key>1</key>
|
100
|
+
<dict>
|
101
|
+
<key>name</key>
|
102
|
+
<string>keyword.include.treetop</string>
|
103
|
+
</dict>
|
104
|
+
<key>2</key>
|
105
|
+
<dict>
|
106
|
+
<key>name</key>
|
107
|
+
<string>entity.name.include.treetop</string>
|
108
|
+
</dict>
|
109
|
+
</dict>
|
110
|
+
<key>match</key>
|
111
|
+
<string>(include)\s+(\w+)</string>
|
112
|
+
<key>name</key>
|
113
|
+
<string>meta.include.treetop</string>
|
114
|
+
</dict>
|
115
|
+
<dict>
|
116
|
+
<key>begin</key>
|
117
|
+
<string>\b(rule)\b (\w+)</string>
|
118
|
+
<key>beginCaptures</key>
|
119
|
+
<dict>
|
120
|
+
<key>1</key>
|
121
|
+
<dict>
|
122
|
+
<key>name</key>
|
123
|
+
<string>keyword.begin.rule.treetop</string>
|
124
|
+
</dict>
|
125
|
+
<key>2</key>
|
126
|
+
<dict>
|
127
|
+
<key>name</key>
|
128
|
+
<string>entity.name.rule.treetop</string>
|
129
|
+
</dict>
|
130
|
+
</dict>
|
131
|
+
<key>end</key>
|
132
|
+
<string>^\s+\bend\b\s*$</string>
|
133
|
+
<key>endCaptures</key>
|
134
|
+
<dict>
|
135
|
+
<key>0</key>
|
136
|
+
<dict>
|
137
|
+
<key>name</key>
|
138
|
+
<string>keyword.end.rule.treetop</string>
|
139
|
+
</dict>
|
140
|
+
</dict>
|
141
|
+
<key>name</key>
|
142
|
+
<string>meta.rule.treetop</string>
|
143
|
+
<key>patterns</key>
|
144
|
+
<array>
|
145
|
+
<dict>
|
146
|
+
<key>include</key>
|
147
|
+
<string>#comment</string>
|
148
|
+
</dict>
|
149
|
+
<dict>
|
150
|
+
<key>include</key>
|
151
|
+
<string>#strings</string>
|
152
|
+
</dict>
|
153
|
+
<dict>
|
154
|
+
<key>include</key>
|
155
|
+
<string>#character-class</string>
|
156
|
+
</dict>
|
157
|
+
<dict>
|
158
|
+
<key>match</key>
|
159
|
+
<string>\/</string>
|
160
|
+
<key>name</key>
|
161
|
+
<string>keyword.operator.or.treetop</string>
|
162
|
+
</dict>
|
163
|
+
<dict>
|
164
|
+
<key>match</key>
|
165
|
+
<string><\w+?></string>
|
166
|
+
<key>name</key>
|
167
|
+
<string>variable.class-instance.treetop</string>
|
168
|
+
</dict>
|
169
|
+
<dict>
|
170
|
+
<key>match</key>
|
171
|
+
<string>\w+?:</string>
|
172
|
+
<key>name</key>
|
173
|
+
<string>support.operand.treetop</string>
|
174
|
+
</dict>
|
175
|
+
<dict>
|
176
|
+
<key>begin</key>
|
177
|
+
<string>\{</string>
|
178
|
+
<key>end</key>
|
179
|
+
<string>\}</string>
|
180
|
+
<key>name</key>
|
181
|
+
<string>meta.embedded-ruby.treetop</string>
|
182
|
+
<key>patterns</key>
|
183
|
+
<array>
|
184
|
+
<dict>
|
185
|
+
<key>include</key>
|
186
|
+
<string>source.ruby</string>
|
187
|
+
</dict>
|
188
|
+
</array>
|
189
|
+
</dict>
|
190
|
+
</array>
|
191
|
+
</dict>
|
192
|
+
</array>
|
193
|
+
<key>repository</key>
|
194
|
+
<dict>
|
195
|
+
<key>character-class</key>
|
196
|
+
<dict>
|
197
|
+
<key>patterns</key>
|
198
|
+
<array>
|
199
|
+
<dict>
|
200
|
+
<key>match</key>
|
201
|
+
<string>\\[wWsSdDhH]|\.</string>
|
202
|
+
<key>name</key>
|
203
|
+
<string>constant.character.character-class.regexp</string>
|
204
|
+
</dict>
|
205
|
+
<dict>
|
206
|
+
<key>match</key>
|
207
|
+
<string>\\.</string>
|
208
|
+
<key>name</key>
|
209
|
+
<string>constant.character.escape.backslash.regexp</string>
|
210
|
+
</dict>
|
211
|
+
<dict>
|
212
|
+
<key>begin</key>
|
213
|
+
<string>(\[)(\^)?</string>
|
214
|
+
<key>beginCaptures</key>
|
215
|
+
<dict>
|
216
|
+
<key>1</key>
|
217
|
+
<dict>
|
218
|
+
<key>name</key>
|
219
|
+
<string>punctuation.definition.character-class.regexp</string>
|
220
|
+
</dict>
|
221
|
+
<key>2</key>
|
222
|
+
<dict>
|
223
|
+
<key>name</key>
|
224
|
+
<string>keyword.operator.negation.regexp</string>
|
225
|
+
</dict>
|
226
|
+
</dict>
|
227
|
+
<key>end</key>
|
228
|
+
<string>(\])</string>
|
229
|
+
<key>endCaptures</key>
|
230
|
+
<dict>
|
231
|
+
<key>1</key>
|
232
|
+
<dict>
|
233
|
+
<key>name</key>
|
234
|
+
<string>punctuation.definition.character-class.regexp</string>
|
235
|
+
</dict>
|
236
|
+
</dict>
|
237
|
+
<key>name</key>
|
238
|
+
<string>constant.other.character-class.set.regexp</string>
|
239
|
+
<key>patterns</key>
|
240
|
+
<array>
|
241
|
+
<dict>
|
242
|
+
<key>include</key>
|
243
|
+
<string>#character-class</string>
|
244
|
+
</dict>
|
245
|
+
<dict>
|
246
|
+
<key>captures</key>
|
247
|
+
<dict>
|
248
|
+
<key>2</key>
|
249
|
+
<dict>
|
250
|
+
<key>name</key>
|
251
|
+
<string>constant.character.escape.backslash.regexp</string>
|
252
|
+
</dict>
|
253
|
+
<key>4</key>
|
254
|
+
<dict>
|
255
|
+
<key>name</key>
|
256
|
+
<string>constant.character.escape.backslash.regexp</string>
|
257
|
+
</dict>
|
258
|
+
</dict>
|
259
|
+
<key>match</key>
|
260
|
+
<string>(.|(\\.))\-([^\]]|(\\.))</string>
|
261
|
+
<key>name</key>
|
262
|
+
<string>constant.other.character-class.range.regexp</string>
|
263
|
+
</dict>
|
264
|
+
<dict>
|
265
|
+
<key>match</key>
|
266
|
+
<string>&&</string>
|
267
|
+
<key>name</key>
|
268
|
+
<string>keyword.operator.intersection.regexp</string>
|
269
|
+
</dict>
|
270
|
+
</array>
|
271
|
+
</dict>
|
272
|
+
</array>
|
273
|
+
</dict>
|
274
|
+
<key>comment</key>
|
275
|
+
<dict>
|
276
|
+
<key>captures</key>
|
277
|
+
<dict>
|
278
|
+
<key>1</key>
|
279
|
+
<dict>
|
280
|
+
<key>name</key>
|
281
|
+
<string>punctuation.definition.comment.treetop</string>
|
282
|
+
</dict>
|
283
|
+
</dict>
|
284
|
+
<key>match</key>
|
285
|
+
<string>(#).*$\n?</string>
|
286
|
+
<key>name</key>
|
287
|
+
<string>comment.line.number-sign.treetop</string>
|
288
|
+
</dict>
|
289
|
+
<key>strings</key>
|
290
|
+
<dict>
|
291
|
+
<key>patterns</key>
|
292
|
+
<array>
|
293
|
+
<dict>
|
294
|
+
<key>begin</key>
|
295
|
+
<string>'</string>
|
296
|
+
<key>beginCaptures</key>
|
297
|
+
<dict>
|
298
|
+
<key>0</key>
|
299
|
+
<dict>
|
300
|
+
<key>name</key>
|
301
|
+
<string>punctuation.definition.string.begin.treetop</string>
|
302
|
+
</dict>
|
303
|
+
</dict>
|
304
|
+
<key>end</key>
|
305
|
+
<string>'</string>
|
306
|
+
<key>endCaptures</key>
|
307
|
+
<dict>
|
308
|
+
<key>0</key>
|
309
|
+
<dict>
|
310
|
+
<key>name</key>
|
311
|
+
<string>punctuation.definition.string.end.treetop</string>
|
312
|
+
</dict>
|
313
|
+
</dict>
|
314
|
+
<key>name</key>
|
315
|
+
<string>string.quoted.single.treetop</string>
|
316
|
+
<key>patterns</key>
|
317
|
+
<array>
|
318
|
+
<dict>
|
319
|
+
<key>match</key>
|
320
|
+
<string>\\(u\h{4}|.)</string>
|
321
|
+
<key>name</key>
|
322
|
+
<string>constant.character.escape.antlr</string>
|
323
|
+
</dict>
|
324
|
+
</array>
|
325
|
+
</dict>
|
326
|
+
<dict>
|
327
|
+
<key>begin</key>
|
328
|
+
<string>"</string>
|
329
|
+
<key>beginCaptures</key>
|
330
|
+
<dict>
|
331
|
+
<key>0</key>
|
332
|
+
<dict>
|
333
|
+
<key>name</key>
|
334
|
+
<string>punctuation.definition.string.begin.treetop</string>
|
335
|
+
</dict>
|
336
|
+
</dict>
|
337
|
+
<key>end</key>
|
338
|
+
<string>"</string>
|
339
|
+
<key>endCaptures</key>
|
340
|
+
<dict>
|
341
|
+
<key>0</key>
|
342
|
+
<dict>
|
343
|
+
<key>name</key>
|
344
|
+
<string>punctuation.definition.string.end.treetop</string>
|
345
|
+
</dict>
|
346
|
+
</dict>
|
347
|
+
<key>name</key>
|
348
|
+
<string>string.quoted.double.treetop</string>
|
349
|
+
</dict>
|
350
|
+
</array>
|
351
|
+
</dict>
|
352
|
+
</dict>
|
353
|
+
<key>scopeName</key>
|
354
|
+
<string>source.treetop</string>
|
355
|
+
<key>uuid</key>
|
356
|
+
<string>A1604A34-0B73-4D5A-9499-87D881DFA8D5</string>
|
357
|
+
</dict>
|
358
|
+
</plist>
|
@@ -0,0 +1,10 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
3
|
+
<plist version="1.0">
|
4
|
+
<dict>
|
5
|
+
<key>name</key>
|
6
|
+
<string>Treetop</string>
|
7
|
+
<key>uuid</key>
|
8
|
+
<string>83A8B700-143D-4BD6-B4EA-D73796E8F883</string>
|
9
|
+
</dict>
|
10
|
+
</plist>
|
@@ -30,7 +30,7 @@ Say I want to parse a diabolical wiki syntax in which the following interpretati
|
|
30
30
|
end
|
31
31
|
|
32
32
|
rule em
|
33
|
-
'
|
33
|
+
'*' (strong / !'*' . / '\*')+ '*'
|
34
34
|
end
|
35
35
|
|
36
36
|
Emphasized text is allowed within strong text by virtue of `em` being the first alternative. Since `em` will only successfully parse if a matching `*` is found, it is permitted, but other than that, no `*` characters are allowed unless they are escaped.
|
@@ -49,3 +49,9 @@ This says that `'end'` must be followed by a space, but this space is not consum
|
|
49
49
|
end
|
50
50
|
|
51
51
|
In general, when the syntax gets tough, it helps to focus on what you really mean. A keyword is a character not followed by another character that isn't a space.
|
52
|
+
|
53
|
+
## Poor Performance with Large Unicode Strings
|
54
|
+
|
55
|
+
Treetop may perform poorly when parsing very large (more than 100KB) unicode strings. This is due to the fact that substring lookups on Ruby unicode strings are linear-time operations, and not constant-time operations like they are on ASCII encoded strings. This means that parse times for larger strings can be exponentially worse than for smaller strings.
|
56
|
+
|
57
|
+
If your input and grammar only expect ASCII strings, you can achieve significant performance improvements for large strings by re-encoding them to ASCII using `input.encode(Encoding::US_ASCII)`. See [this issue on GitHub](https://github.com/cjheath/treetop/issues/31) for more information and other possible workarounds for unicode strings.
|
@@ -32,13 +32,15 @@ The main keywords are:
|
|
32
32
|
|
33
33
|
* `rule` : This defines a parsing rule within the grammar. It is followed by a name by which this rule can be referenced within other rules. It is then followed by a parsing expression defining the rule.
|
34
34
|
|
35
|
-
A grammar may be surrounded by one or more nested `module` statements, which provides a namespace for the generated Ruby parser.
|
35
|
+
A grammar may be surrounded by one or more nested `module` or `class` statements, which provides a namespace for the generated Ruby parser. Note that you cannot specify a superclass for a class, so if your class has a superclass, it must be declared elsewhere and loaded first.
|
36
36
|
|
37
|
-
Treetop will emit a module called `GrammarName` and a parser class called `GrammarNameParser` (in the
|
37
|
+
Treetop will emit a module called `GrammarName` and a parser class called `GrammarNameParser` (in the namespace, if specified).
|
38
38
|
|
39
39
|
#Parsing Expressions
|
40
40
|
Each rule associates a name with a _parsing expression_. Parsing expressions are a generalization of vanilla regular expressions. Their key feature is the ability to reference other expressions in the grammar by name.
|
41
41
|
|
42
|
+
Treetop parsers will try to match the first rule defined in the grammar, unless you pass an optional parameter to set a different top rule.
|
43
|
+
|
42
44
|
##Terminal Symbols
|
43
45
|
###Strings
|
44
46
|
Strings are surrounded in double or single quotes and must be matched exactly.
|
@@ -213,3 +215,6 @@ tried at which locations in the input, and what the result was. This process, ca
|
|
213
215
|
requires that the rule would produce the same result (if run again) as it produced the first time when
|
214
216
|
the result was remembered. If you violate this principle in your semantic predicates, be prepared to
|
215
217
|
fight Cerberus before you're allowed out of Hades again.
|
218
|
+
|
219
|
+
There's an example of how to use semantic predicates to parse a language with white-space indented blocks
|
220
|
+
in the examples directory.
|
data/doc/tt.1
CHANGED
@@ -0,0 +1,73 @@
|
|
1
|
+
grammar IndentedBlocks
|
2
|
+
rule top
|
3
|
+
# Initialise the indent stack with a sentinel:
|
4
|
+
&{|s| @indents = [-1] }
|
5
|
+
foo:('foo'?)
|
6
|
+
nested_blocks
|
7
|
+
{
|
8
|
+
def inspect
|
9
|
+
nested_blocks.inspect
|
10
|
+
end
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
rule nested_blocks
|
15
|
+
(
|
16
|
+
# Do not try to extract this semantic predicate into a new rule.
|
17
|
+
# It will be memo-ized incorrectly because @indents.last will change.
|
18
|
+
!{|s|
|
19
|
+
# Peek at the following indentation:
|
20
|
+
save = index; i = _nt_indentation; index = save
|
21
|
+
# We're closing if the indentation is less or the same as our enclosing block's:
|
22
|
+
closing = i.text_value.length <= @indents.last
|
23
|
+
}
|
24
|
+
block
|
25
|
+
)*
|
26
|
+
{
|
27
|
+
def inspect
|
28
|
+
elements.map{|e| e.block.inspect}*"\n"
|
29
|
+
end
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
rule block
|
34
|
+
indented_line # The block's opening line
|
35
|
+
&{|s| # Push the indent level to the stack
|
36
|
+
level = s[0].indentation.text_value.length
|
37
|
+
@indents << level
|
38
|
+
true
|
39
|
+
}
|
40
|
+
nested_blocks # Parse any nested blocks
|
41
|
+
&{|s| # Pop the indent stack
|
42
|
+
# Note that under no circumstances should "nested_blocks" fail, or the stack will be mis-aligned
|
43
|
+
@indents.pop
|
44
|
+
true
|
45
|
+
}
|
46
|
+
{
|
47
|
+
def inspect
|
48
|
+
indented_line.inspect +
|
49
|
+
(nested_blocks.elements.size > 0 ? (
|
50
|
+
"\n{\n" +
|
51
|
+
nested_blocks.elements.map { |content|
|
52
|
+
content.block.inspect+"\n"
|
53
|
+
}*'' +
|
54
|
+
"}"
|
55
|
+
)
|
56
|
+
: "")
|
57
|
+
end
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
rule indented_line
|
62
|
+
indentation text:((!"\n" .)*) "\n"
|
63
|
+
{
|
64
|
+
def inspect
|
65
|
+
text.text_value
|
66
|
+
end
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
rule indentation
|
71
|
+
' '*
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'polyglot'
|
2
|
+
require 'byebug'
|
3
|
+
require 'treetop'
|
4
|
+
require 'indented_blocks'
|
5
|
+
|
6
|
+
parser = IndentedBlocksParser.new
|
7
|
+
|
8
|
+
input = <<END
|
9
|
+
def foo
|
10
|
+
here is some indented text
|
11
|
+
here it's further indented
|
12
|
+
and here the same
|
13
|
+
but here it's further again
|
14
|
+
and some more like that
|
15
|
+
before going back to here
|
16
|
+
down again
|
17
|
+
back twice
|
18
|
+
and start from the beginning again
|
19
|
+
with only a small block this time
|
20
|
+
END
|
21
|
+
|
22
|
+
parse_tree = parser.parse input
|
23
|
+
|
24
|
+
p parse_tree
|
@@ -33,10 +33,13 @@ module Treetop
|
|
33
33
|
|
34
34
|
# compile a treetop source file and load it
|
35
35
|
def self.load(path)
|
36
|
-
|
37
|
-
|
36
|
+
unless path =~ Treetop::Polyglot::VALID_GRAMMAR_EXT_REGEXP
|
37
|
+
ext = Treetop::Polyglot::VALID_GRAMMAR_EXT.select {|ext| File.exist?(path+".#{ext}")}.shift
|
38
|
+
path += ".#{ext}" unless ext.nil?
|
39
|
+
end
|
40
|
+
File.open(path) do |source_file|
|
38
41
|
source = source_file.read
|
39
|
-
source.gsub!(/\b__FILE__\b/, %Q{"#{
|
42
|
+
source.gsub!(/\b__FILE__\b/, %Q{"#{path}"})
|
40
43
|
load_from_string(source)
|
41
44
|
end
|
42
45
|
end
|