treetop 1.4.9 → 1.4.10
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +26 -9
- data/doc/semantic_interpretation.markdown +2 -2
- data/doc/site/semantic_interpretation.html +2 -2
- data/doc/site/syntactic_recognition.html +12 -5
- data/doc/syntactic_recognition.markdown +12 -3
- data/lib/treetop/compiler/metagrammar.rb +153 -62
- data/lib/treetop/compiler/metagrammar.treetop +10 -2
- data/lib/treetop/compiler/node_classes/grammar.rb +2 -2
- data/lib/treetop/compiler/node_classes/terminal.rb +2 -2
- data/lib/treetop/version.rb +1 -1
- data/spec/compiler/multibyte_chars_spec.rb +7 -6
- data/spec/compiler/namespace_spec.rb +42 -0
- data/spec/compiler/occurrence_range_spec.rb +0 -2
- data/spec/runtime/interval_skip_list/delete_spec.rb +2 -2
- data/spec/runtime/interval_skip_list/expire_range_spec.rb +3 -3
- data/spec/runtime/interval_skip_list/{insert_and_delete_node.rb → insert_and_delete_node_spec.rb} +13 -13
- data/spec/runtime/interval_skip_list/insert_spec.rb +2 -2
- data/spec/runtime/interval_skip_list/palindromic_fixture.rb +25 -13
- data/spec/runtime/interval_skip_list/palindromic_fixture_spec.rb +2 -2
- data/spec/runtime/interval_skip_list/spec_helper.rb +9 -2
- data/spec/spec_helper.rb +16 -12
- data/treetop.gemspec +184 -16
- metadata +195 -92
- data/lib/treetop/runtime/terminal_parse_failure_debug.rb +0 -21
data/Rakefile
CHANGED
@@ -1,17 +1,34 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
Bundler.setup(:default, :development)
|
2
4
|
require 'rake'
|
3
|
-
require '
|
4
|
-
require '
|
5
|
+
require 'jeweler'
|
6
|
+
require 'rspec/core/rake_task'
|
7
|
+
require File.expand_path("../lib/treetop/version", __FILE__)
|
8
|
+
|
9
|
+
Jeweler::Tasks.new do |gem|
|
10
|
+
gem.name = "treetop"
|
11
|
+
gem.version = Treetop::VERSION::STRING
|
12
|
+
gem.author = "Nathan Sobo"
|
13
|
+
gem.license = "MIT"
|
14
|
+
gem.email = "cliffordheath@gmail.com"
|
15
|
+
gem.homepage = "http://functionalform.blogspot.com"
|
16
|
+
gem.platform = Gem::Platform::RUBY
|
17
|
+
gem.summary = "A Ruby-based text parsing and interpretation DSL"
|
18
|
+
gem.files = ["LICENSE", "README.md", "Rakefile", "treetop.gemspec", "{spec,lib,bin,doc,examples}/**/*"].map{|p| Dir[p]}.flatten
|
19
|
+
gem.bindir = "bin"
|
20
|
+
gem.executables = ["tt"]
|
21
|
+
gem.require_path = "lib"
|
22
|
+
gem.autorequire = "treetop"
|
23
|
+
gem.has_rdoc = false
|
24
|
+
gem.add_dependency "polyglot", ">= 0.3.1"
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
5
27
|
|
6
28
|
task :default => :spec
|
7
|
-
|
29
|
+
RSpec::Core::RakeTask.new do |t|
|
8
30
|
t.pattern = 'spec/**/*spec.rb'
|
9
|
-
t.libs << 'spec'
|
10
|
-
end
|
11
|
-
|
12
|
-
load "./treetop.gemspec"
|
13
|
-
Rake::GemPackageTask.new($gemspec) do |pkg|
|
14
|
-
pkg.need_tar = true
|
31
|
+
# t.libs << 'spec' # @todo not sure what this did in the original rspec 1.3
|
15
32
|
end
|
16
33
|
|
17
34
|
task :spec => 'lib/treetop/compiler/metagrammar.treetop'
|
@@ -128,7 +128,7 @@ Subexpressions can be given an explicit label to have an element accessor method
|
|
128
128
|
rule labels
|
129
129
|
first_letter:[a-z] rest_letters:(', ' letter:[a-z])* {
|
130
130
|
def letters
|
131
|
-
[first_letter] + rest_letters.map do |comma_and_letter|
|
131
|
+
[first_letter] + rest_letters.elements.map do |comma_and_letter|
|
132
132
|
comma_and_letter.letter
|
133
133
|
end
|
134
134
|
end
|
@@ -147,7 +147,7 @@ The module containing automatically defined element accessor methods is an ances
|
|
147
147
|
end
|
148
148
|
|
149
149
|
def rest_letters
|
150
|
-
super.map { |comma_and_letter| comma_and_letter.letter }
|
150
|
+
super.elements.map { |comma_and_letter| comma_and_letter.letter }
|
151
151
|
end
|
152
152
|
}
|
153
153
|
end
|
@@ -152,7 +152,7 @@ end
|
|
152
152
|
<pre><code>rule labels
|
153
153
|
first_letter:[a-z] rest_letters:(', ' letter:[a-z])* {
|
154
154
|
def letters
|
155
|
-
[first_letter] + rest_letters.map do |comma_and_letter|
|
155
|
+
[first_letter] + rest_letters.elements.map do |comma_and_letter|
|
156
156
|
comma_and_letter.letter
|
157
157
|
end
|
158
158
|
end
|
@@ -173,7 +173,7 @@ end
|
|
173
173
|
end
|
174
174
|
|
175
175
|
def rest_letters
|
176
|
-
super.map { |comma_and_letter| comma_and_letter.letter }
|
176
|
+
super.elements.map { |comma_and_letter| comma_and_letter.letter }
|
177
177
|
end
|
178
178
|
}
|
179
179
|
end
|
@@ -80,9 +80,16 @@ end
|
|
80
80
|
|
81
81
|
<p>An empty string matches at any position and consumes no input. It's useful when you wish to treat a single symbol as part of a sequence, for example when an alternate rule will be processed using shared code.</p>
|
82
82
|
|
83
|
-
<
|
84
|
-
|
85
|
-
|
83
|
+
<pre>
|
84
|
+
rule alts
|
85
|
+
( foo bar / baz '' )
|
86
|
+
{
|
87
|
+
def value
|
88
|
+
elements.map{|e| e.text_value }
|
89
|
+
end
|
90
|
+
}
|
91
|
+
end
|
92
|
+
</pre>
|
86
93
|
|
87
94
|
|
88
95
|
<h2>Nonterminal Symbols</h2>
|
@@ -223,7 +230,7 @@ end
|
|
223
230
|
<p>Sometimes you must execute Ruby code during parsing in order to decide how to proceed.
|
224
231
|
This is an advanced feature, and must be used with great care, because it can change the
|
225
232
|
way a Treetop parser backtracks in a way that breaks the parsing algorithm. See the
|
226
|
-
notes
|
233
|
+
notes below on how to use this feature safely.</p>
|
227
234
|
|
228
235
|
<p>The code block is the body of a Ruby lambda block, and should return true or false, to cause this
|
229
236
|
parse rule to continue or fail (for positive sempreds), fail or continue (for negative sempreds).</p>
|
@@ -249,7 +256,7 @@ preceding rules (or as assigned by labels) are not available to access the sub-r
|
|
249
256
|
end
|
250
257
|
|
251
258
|
rule foo_rule
|
252
|
-
foo id &{|seq| seq[1].is_reserved } baz
|
259
|
+
foo id &{|seq| seq[1].is_reserved } baz
|
253
260
|
end
|
254
261
|
</code></pre>
|
255
262
|
|
@@ -58,7 +58,16 @@ The anything symbol is represented by a dot (`.`) and matches any single charact
|
|
58
58
|
###Ellipsis
|
59
59
|
An empty string matches at any position and consumes no input. It's useful when you wish to treat a single symbol as part of a sequence, for example when an alternate rule will be processed using shared code.
|
60
60
|
|
61
|
-
|
61
|
+
<pre>
|
62
|
+
rule alts
|
63
|
+
( foo bar / baz '' )
|
64
|
+
{
|
65
|
+
def value
|
66
|
+
elements.map{|e| e.text_value }
|
67
|
+
end
|
68
|
+
}
|
69
|
+
end
|
70
|
+
</pre>
|
62
71
|
|
63
72
|
##Nonterminal Symbols
|
64
73
|
Nonterminal symbols are unquoted references to other named rules. They are equivalent to an inline substitution of the named expression.
|
@@ -163,7 +172,7 @@ Here's the easiest way to handle C-style comments:
|
|
163
172
|
Sometimes you must execute Ruby code during parsing in order to decide how to proceed.
|
164
173
|
This is an advanced feature, and must be used with great care, because it can change the
|
165
174
|
way a Treetop parser backtracks in a way that breaks the parsing algorithm. See the
|
166
|
-
notes
|
175
|
+
notes below on how to use this feature safely.
|
167
176
|
|
168
177
|
The code block is the body of a Ruby lambda block, and should return true or false, to cause this
|
169
178
|
parse rule to continue or fail (for positive sempreds), fail or continue (for negative sempreds).
|
@@ -186,7 +195,7 @@ preceding rules (or as assigned by labels) are not available to access the sub-r
|
|
186
195
|
end
|
187
196
|
|
188
197
|
rule foo_rule
|
189
|
-
foo id &{|seq| seq[1].is_reserved } baz
|
198
|
+
foo id &{|seq| seq[1].is_reserved } baz
|
190
199
|
end
|
191
200
|
|
192
201
|
Match "foo id baz" only if `id.is_reserved`. Note that `id` cannot be referenced by name from `foo_rule`,
|
@@ -90,28 +90,16 @@ module Treetop
|
|
90
90
|
end
|
91
91
|
s0 << r6
|
92
92
|
if r6
|
93
|
-
|
94
|
-
r9 = _nt_module_declaration
|
95
|
-
if r9
|
96
|
-
r8 = r9
|
97
|
-
else
|
98
|
-
r10 = _nt_grammar
|
99
|
-
if r10
|
100
|
-
r8 = r10
|
101
|
-
else
|
102
|
-
@index = i8
|
103
|
-
r8 = nil
|
104
|
-
end
|
105
|
-
end
|
93
|
+
r8 = _nt_module_or_grammar
|
106
94
|
s0 << r8
|
107
95
|
if r8
|
108
|
-
|
109
|
-
if
|
110
|
-
|
96
|
+
r10 = _nt_space
|
97
|
+
if r10
|
98
|
+
r9 = r10
|
111
99
|
else
|
112
|
-
|
100
|
+
r9 = instantiate_node(SyntaxNode,input, index...index)
|
113
101
|
end
|
114
|
-
s0 <<
|
102
|
+
s0 << r9
|
115
103
|
end
|
116
104
|
end
|
117
105
|
end
|
@@ -233,24 +221,64 @@ module Treetop
|
|
233
221
|
r0
|
234
222
|
end
|
235
223
|
|
224
|
+
def _nt_module_or_grammar
|
225
|
+
start_index = index
|
226
|
+
if node_cache[:module_or_grammar].has_key?(index)
|
227
|
+
cached = node_cache[:module_or_grammar][index]
|
228
|
+
if cached
|
229
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
230
|
+
@index = cached.interval.end
|
231
|
+
end
|
232
|
+
return cached
|
233
|
+
end
|
234
|
+
|
235
|
+
i0 = index
|
236
|
+
r1 = _nt_module_declaration
|
237
|
+
if r1
|
238
|
+
r0 = r1
|
239
|
+
else
|
240
|
+
r2 = _nt_grammar
|
241
|
+
if r2
|
242
|
+
r0 = r2
|
243
|
+
else
|
244
|
+
@index = i0
|
245
|
+
r0 = nil
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
node_cache[:module_or_grammar][start_index] = r0
|
250
|
+
|
251
|
+
r0
|
252
|
+
end
|
253
|
+
|
236
254
|
module ModuleDeclaration0
|
255
|
+
end
|
256
|
+
|
257
|
+
module ModuleDeclaration1
|
258
|
+
end
|
259
|
+
|
260
|
+
module ModuleDeclaration2
|
237
261
|
def space1
|
238
262
|
elements[1]
|
239
263
|
end
|
240
264
|
|
265
|
+
def name
|
266
|
+
elements[2]
|
267
|
+
end
|
268
|
+
|
241
269
|
def space2
|
242
|
-
elements[
|
270
|
+
elements[3]
|
243
271
|
end
|
244
272
|
end
|
245
273
|
|
246
|
-
module
|
274
|
+
module ModuleDeclaration3
|
247
275
|
def space
|
248
276
|
elements[0]
|
249
277
|
end
|
250
278
|
|
251
279
|
end
|
252
280
|
|
253
|
-
module
|
281
|
+
module ModuleDeclaration4
|
254
282
|
def prefix
|
255
283
|
elements[0]
|
256
284
|
end
|
@@ -264,10 +292,14 @@ module Treetop
|
|
264
292
|
end
|
265
293
|
end
|
266
294
|
|
267
|
-
module
|
295
|
+
module ModuleDeclaration5
|
268
296
|
def compile
|
269
297
|
prefix.text_value + module_contents.compile + suffix.text_value
|
270
298
|
end
|
299
|
+
|
300
|
+
def parser_name
|
301
|
+
prefix.name.text_value+'::'+module_contents.parser_name
|
302
|
+
end
|
271
303
|
end
|
272
304
|
|
273
305
|
def _nt_module_declaration
|
@@ -295,83 +327,142 @@ module Treetop
|
|
295
327
|
r3 = _nt_space
|
296
328
|
s1 << r3
|
297
329
|
if r3
|
330
|
+
i4, s4 = index, []
|
298
331
|
if has_terminal?('\G[A-Z]', true, index)
|
299
|
-
|
332
|
+
r5 = true
|
300
333
|
@index += 1
|
301
334
|
else
|
302
|
-
|
335
|
+
r5 = nil
|
303
336
|
end
|
304
|
-
|
305
|
-
if
|
306
|
-
|
337
|
+
s4 << r5
|
338
|
+
if r5
|
339
|
+
s6, i6 = [], index
|
307
340
|
loop do
|
308
|
-
|
309
|
-
if
|
310
|
-
|
341
|
+
r7 = _nt_alphanumeric_char
|
342
|
+
if r7
|
343
|
+
s6 << r7
|
311
344
|
else
|
312
345
|
break
|
313
346
|
end
|
314
347
|
end
|
315
|
-
|
316
|
-
|
317
|
-
if
|
318
|
-
|
319
|
-
|
348
|
+
r6 = instantiate_node(SyntaxNode,input, i6...index, s6)
|
349
|
+
s4 << r6
|
350
|
+
if r6
|
351
|
+
s8, i8 = [], index
|
352
|
+
loop do
|
353
|
+
i9, s9 = index, []
|
354
|
+
if has_terminal?('::', false, index)
|
355
|
+
r10 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
356
|
+
@index += 2
|
357
|
+
else
|
358
|
+
terminal_parse_failure('::')
|
359
|
+
r10 = nil
|
360
|
+
end
|
361
|
+
s9 << r10
|
362
|
+
if r10
|
363
|
+
if has_terminal?('\G[A-Z]', true, index)
|
364
|
+
r11 = true
|
365
|
+
@index += 1
|
366
|
+
else
|
367
|
+
r11 = nil
|
368
|
+
end
|
369
|
+
s9 << r11
|
370
|
+
if r11
|
371
|
+
s12, i12 = [], index
|
372
|
+
loop do
|
373
|
+
r13 = _nt_alphanumeric_char
|
374
|
+
if r13
|
375
|
+
s12 << r13
|
376
|
+
else
|
377
|
+
break
|
378
|
+
end
|
379
|
+
end
|
380
|
+
r12 = instantiate_node(SyntaxNode,input, i12...index, s12)
|
381
|
+
s9 << r12
|
382
|
+
end
|
383
|
+
end
|
384
|
+
if s9.last
|
385
|
+
r9 = instantiate_node(SyntaxNode,input, i9...index, s9)
|
386
|
+
r9.extend(ModuleDeclaration0)
|
387
|
+
else
|
388
|
+
@index = i9
|
389
|
+
r9 = nil
|
390
|
+
end
|
391
|
+
if r9
|
392
|
+
s8 << r9
|
393
|
+
else
|
394
|
+
break
|
395
|
+
end
|
396
|
+
end
|
397
|
+
r8 = instantiate_node(SyntaxNode,input, i8...index, s8)
|
398
|
+
s4 << r8
|
320
399
|
end
|
321
400
|
end
|
401
|
+
if s4.last
|
402
|
+
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
403
|
+
r4.extend(ModuleDeclaration1)
|
404
|
+
else
|
405
|
+
@index = i4
|
406
|
+
r4 = nil
|
407
|
+
end
|
408
|
+
s1 << r4
|
409
|
+
if r4
|
410
|
+
r14 = _nt_space
|
411
|
+
s1 << r14
|
412
|
+
end
|
322
413
|
end
|
323
414
|
end
|
324
415
|
if s1.last
|
325
416
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
326
|
-
r1.extend(
|
417
|
+
r1.extend(ModuleDeclaration2)
|
327
418
|
else
|
328
419
|
@index = i1
|
329
420
|
r1 = nil
|
330
421
|
end
|
331
422
|
s0 << r1
|
332
423
|
if r1
|
333
|
-
|
334
|
-
|
335
|
-
if
|
336
|
-
|
424
|
+
i15 = index
|
425
|
+
r16 = _nt_module_declaration
|
426
|
+
if r16
|
427
|
+
r15 = r16
|
337
428
|
else
|
338
|
-
|
339
|
-
if
|
340
|
-
|
429
|
+
r17 = _nt_grammar
|
430
|
+
if r17
|
431
|
+
r15 = r17
|
341
432
|
else
|
342
|
-
@index =
|
343
|
-
|
433
|
+
@index = i15
|
434
|
+
r15 = nil
|
344
435
|
end
|
345
436
|
end
|
346
|
-
s0 <<
|
347
|
-
if
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
if
|
437
|
+
s0 << r15
|
438
|
+
if r15
|
439
|
+
i18, s18 = index, []
|
440
|
+
r19 = _nt_space
|
441
|
+
s18 << r19
|
442
|
+
if r19
|
352
443
|
if has_terminal?('end', false, index)
|
353
|
-
|
444
|
+
r20 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
354
445
|
@index += 3
|
355
446
|
else
|
356
447
|
terminal_parse_failure('end')
|
357
|
-
|
448
|
+
r20 = nil
|
358
449
|
end
|
359
|
-
|
450
|
+
s18 << r20
|
360
451
|
end
|
361
|
-
if
|
362
|
-
|
363
|
-
|
452
|
+
if s18.last
|
453
|
+
r18 = instantiate_node(SyntaxNode,input, i18...index, s18)
|
454
|
+
r18.extend(ModuleDeclaration3)
|
364
455
|
else
|
365
|
-
@index =
|
366
|
-
|
456
|
+
@index = i18
|
457
|
+
r18 = nil
|
367
458
|
end
|
368
|
-
s0 <<
|
459
|
+
s0 << r18
|
369
460
|
end
|
370
461
|
end
|
371
462
|
if s0.last
|
372
463
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
373
|
-
r0.extend(
|
374
|
-
r0.extend(
|
464
|
+
r0.extend(ModuleDeclaration4)
|
465
|
+
r0.extend(ModuleDeclaration5)
|
375
466
|
else
|
376
467
|
@index = i0
|
377
468
|
r0 = nil
|