treetop 1.6.2 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +8 -1
- data/doc/syntactic_recognition.markdown +3 -0
- data/examples/indented_blocks/indented_blocks.tt +73 -0
- data/examples/indented_blocks/indented_blocks_test.rb +24 -0
- data/examples/inner_outer.rb +51 -0
- data/examples/inner_outer.tt +14 -0
- data/examples/numerals.rb +210 -0
- data/examples/numerals.tt +21 -0
- data/lib/treetop/compiler/metagrammar.rb +3 -57
- data/lib/treetop/compiler/metagrammar.treetop +3 -57
- data/lib/treetop/compiler/node_classes/anything_symbol.rb +1 -1
- data/lib/treetop/compiler/node_classes/character_class.rb +1 -1
- data/lib/treetop/compiler/node_classes/choice.rb +5 -5
- data/lib/treetop/compiler/node_classes/nonterminal.rb +2 -2
- data/lib/treetop/compiler/node_classes/parsing_expression.rb +2 -8
- data/lib/treetop/compiler/node_classes/repetition.rb +6 -6
- data/lib/treetop/compiler/node_classes/sequence.rb +1 -1
- data/lib/treetop/compiler/node_classes/terminal.rb +1 -7
- data/lib/treetop/runtime/syntax_node.rb +13 -20
- data/lib/treetop/version.rb +1 -1
- data/spec/compiler/choice_spec.rb +10 -19
- data/spec/compiler/grammar_compiler_spec.rb +4 -4
- data/spec/compiler/parenthesized_expression_spec.rb +0 -11
- data/spec/compiler/tt_compiler_spec.rb +40 -40
- data/spec/compiler/zero_or_more_spec.rb +0 -2
- data/spec/spec_helper.rb +1 -1
- data/treetop.gemspec +10 -5
- metadata +14 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 18fbc0b36ef1e609af0a671196218e5357161e54
|
4
|
+
data.tar.gz: adafd739d5027568cea7ed3baf9ffd8bcb6f5b59
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c11d7d971c0c563025bacef00d8ad62aaae7c4d4400515ee7d173e6509c7afb2c6679036da29b8cb802749343ebf02d6ead9b6b6c8e659bd78f41751671b66a5
|
7
|
+
data.tar.gz: 965406473b8c53171bdb9c0a4c8e49ec2ab552cb1dc10c847d415a5c7d1b214b7bebf42d11a11988a52cd6b7cc3d96fc31b692efacc48a8bf8ef416439803d2a
|
data/Rakefile
CHANGED
@@ -15,6 +15,7 @@ Jeweler::Tasks.new do |gem|
|
|
15
15
|
gem.homepage = "https://github.com/cjheath/treetop"
|
16
16
|
gem.platform = Gem::Platform::RUBY
|
17
17
|
gem.summary = "A Ruby-based text parsing and interpretation DSL"
|
18
|
+
gem.description = "A Parsing Expression Grammar (PEG) Parser generator DSL for Ruby"
|
18
19
|
gem.files = [
|
19
20
|
"LICENSE", "README.md", "Rakefile", "treetop.gemspec",
|
20
21
|
"{spec,lib,bin,examples}/**/*",
|
@@ -23,7 +24,6 @@ Jeweler::Tasks.new do |gem|
|
|
23
24
|
gem.bindir = "bin"
|
24
25
|
gem.executables = ["tt"]
|
25
26
|
gem.require_path = "lib"
|
26
|
-
gem.autorequire = "treetop"
|
27
27
|
gem.has_rdoc = false
|
28
28
|
end
|
29
29
|
Jeweler::RubygemsDotOrgTasks.new
|
@@ -43,6 +43,13 @@ file 'lib/treetop/compiler/metagrammar.treetop' do |t|
|
|
43
43
|
Treetop::Compiler::GrammarCompiler.new.compile(METAGRAMMAR_PATH)
|
44
44
|
end
|
45
45
|
|
46
|
+
task :rebuild do
|
47
|
+
$:.unshift "lib"
|
48
|
+
require './lib/treetop'
|
49
|
+
load File.expand_path('../lib/treetop/compiler/metagrammar.rb', __FILE__)
|
50
|
+
Treetop::Compiler::GrammarCompiler.new.compile('lib/treetop/compiler/metagrammar.treetop')
|
51
|
+
end
|
52
|
+
|
46
53
|
task :version do
|
47
54
|
puts RUBY_VERSION
|
48
55
|
end
|
@@ -215,3 +215,6 @@ tried at which locations in the input, and what the result was. This process, ca
|
|
215
215
|
requires that the rule would produce the same result (if run again) as it produced the first time when
|
216
216
|
the result was remembered. If you violate this principle in your semantic predicates, be prepared to
|
217
217
|
fight Cerberus before you're allowed out of Hades again.
|
218
|
+
|
219
|
+
There's an example of how to use semantic predicates to parse a language with white-space indented blocks
|
220
|
+
in the examples directory.
|
@@ -0,0 +1,73 @@
|
|
1
|
+
grammar IndentedBlocks
|
2
|
+
rule top
|
3
|
+
# Initialise the indent stack with a sentinel:
|
4
|
+
&{|s| @indents = [-1] }
|
5
|
+
foo:('foo'?)
|
6
|
+
nested_blocks
|
7
|
+
{
|
8
|
+
def inspect
|
9
|
+
nested_blocks.inspect
|
10
|
+
end
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
rule nested_blocks
|
15
|
+
(
|
16
|
+
# Do not try to extract this semantic predicate into a new rule.
|
17
|
+
# It will be memo-ized incorrectly because @indents.last will change.
|
18
|
+
!{|s|
|
19
|
+
# Peek at the following indentation:
|
20
|
+
save = index; i = _nt_indentation; index = save
|
21
|
+
# We're closing if the indentation is less or the same as our enclosing block's:
|
22
|
+
closing = i.text_value.length <= @indents.last
|
23
|
+
}
|
24
|
+
block
|
25
|
+
)*
|
26
|
+
{
|
27
|
+
def inspect
|
28
|
+
elements.map{|e| e.block.inspect}*"\n"
|
29
|
+
end
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
rule block
|
34
|
+
indented_line # The block's opening line
|
35
|
+
&{|s| # Push the indent level to the stack
|
36
|
+
level = s[0].indentation.text_value.length
|
37
|
+
@indents << level
|
38
|
+
true
|
39
|
+
}
|
40
|
+
nested_blocks # Parse any nested blocks
|
41
|
+
&{|s| # Pop the indent stack
|
42
|
+
# Note that under no circumstances should "nested_blocks" fail, or the stack will be mis-aligned
|
43
|
+
@indents.pop
|
44
|
+
true
|
45
|
+
}
|
46
|
+
{
|
47
|
+
def inspect
|
48
|
+
indented_line.inspect +
|
49
|
+
(nested_blocks.elements.size > 0 ? (
|
50
|
+
"\n{\n" +
|
51
|
+
nested_blocks.elements.map { |content|
|
52
|
+
content.block.inspect+"\n"
|
53
|
+
}*'' +
|
54
|
+
"}"
|
55
|
+
)
|
56
|
+
: "")
|
57
|
+
end
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
rule indented_line
|
62
|
+
indentation text:((!"\n" .)*) "\n"
|
63
|
+
{
|
64
|
+
def inspect
|
65
|
+
text.text_value
|
66
|
+
end
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
rule indentation
|
71
|
+
' '*
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'polyglot'
|
2
|
+
require 'byebug'
|
3
|
+
require 'treetop'
|
4
|
+
require 'indented_blocks'
|
5
|
+
|
6
|
+
parser = IndentedBlocksParser.new
|
7
|
+
|
8
|
+
input = <<END
|
9
|
+
def foo
|
10
|
+
here is some indented text
|
11
|
+
here it's further indented
|
12
|
+
and here the same
|
13
|
+
but here it's further again
|
14
|
+
and some more like that
|
15
|
+
before going back to here
|
16
|
+
down again
|
17
|
+
back twice
|
18
|
+
and start from the beginning again
|
19
|
+
with only a small block this time
|
20
|
+
END
|
21
|
+
|
22
|
+
parse_tree = parser.parse input
|
23
|
+
|
24
|
+
p parse_tree
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Autogenerated from a Treetop grammar. Edits may be lost.
|
2
|
+
|
3
|
+
|
4
|
+
module InnerOuter
|
5
|
+
include Treetop::Runtime
|
6
|
+
|
7
|
+
def root
|
8
|
+
@root ||= :inner_outer
|
9
|
+
end
|
10
|
+
|
11
|
+
module InnerOuter0
|
12
|
+
def inner
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
module InnerOuter1
|
17
|
+
def outer
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def _nt_inner_outer
|
22
|
+
start_index = index
|
23
|
+
if node_cache[:inner_outer].has_key?(index)
|
24
|
+
cached = node_cache[:inner_outer][index]
|
25
|
+
if cached
|
26
|
+
node_cache[:inner_outer][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
27
|
+
@index = cached.interval.end
|
28
|
+
end
|
29
|
+
return cached
|
30
|
+
end
|
31
|
+
|
32
|
+
if (match_len = has_terminal?("foo", false, index))
|
33
|
+
r0 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
34
|
+
r0.extend(InnerOuter0)
|
35
|
+
@index += match_len
|
36
|
+
else
|
37
|
+
terminal_parse_failure('"foo"')
|
38
|
+
r0 = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
node_cache[:inner_outer][start_index] = r0
|
42
|
+
|
43
|
+
r0
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
class InnerOuterParser < Treetop::Runtime::CompiledParser
|
49
|
+
include InnerOuter
|
50
|
+
end
|
51
|
+
|
@@ -0,0 +1,210 @@
|
|
1
|
+
# Autogenerated from a Treetop grammar. Edits may be lost.
|
2
|
+
|
3
|
+
|
4
|
+
module Numerals
|
5
|
+
include Treetop::Runtime
|
6
|
+
|
7
|
+
def root
|
8
|
+
@root ||= :percentage
|
9
|
+
end
|
10
|
+
|
11
|
+
module Percentage0
|
12
|
+
def decimal
|
13
|
+
elements[0]
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
module Percentage1
|
19
|
+
def to_f
|
20
|
+
decimal.to_f / 100
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def _nt_percentage
|
25
|
+
start_index = index
|
26
|
+
if node_cache[:percentage].has_key?(index)
|
27
|
+
cached = node_cache[:percentage][index]
|
28
|
+
if cached
|
29
|
+
node_cache[:percentage][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
30
|
+
@index = cached.interval.end
|
31
|
+
end
|
32
|
+
return cached
|
33
|
+
end
|
34
|
+
|
35
|
+
i0, s0 = index, []
|
36
|
+
r1 = _nt_decimal
|
37
|
+
s0 << r1
|
38
|
+
if r1
|
39
|
+
if (match_len = has_terminal?("%", false, index))
|
40
|
+
r2 = true
|
41
|
+
@index += match_len
|
42
|
+
else
|
43
|
+
terminal_parse_failure('"%"')
|
44
|
+
r2 = nil
|
45
|
+
end
|
46
|
+
s0 << r2
|
47
|
+
end
|
48
|
+
if s0.last
|
49
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
50
|
+
r0.extend(Percentage0)
|
51
|
+
r0.extend(Percentage1)
|
52
|
+
else
|
53
|
+
@index = i0
|
54
|
+
r0 = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
node_cache[:percentage][start_index] = r0
|
58
|
+
|
59
|
+
r0
|
60
|
+
end
|
61
|
+
|
62
|
+
module Decimal0
|
63
|
+
def sign
|
64
|
+
elements[0]
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
module Decimal1
|
70
|
+
def to_f
|
71
|
+
text_value.to_f
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def _nt_decimal
|
76
|
+
start_index = index
|
77
|
+
if node_cache[:decimal].has_key?(index)
|
78
|
+
cached = node_cache[:decimal][index]
|
79
|
+
if cached
|
80
|
+
node_cache[:decimal][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
81
|
+
@index = cached.interval.end
|
82
|
+
end
|
83
|
+
return cached
|
84
|
+
end
|
85
|
+
|
86
|
+
i0, s0 = index, []
|
87
|
+
r1 = _nt_sign
|
88
|
+
s0 << r1
|
89
|
+
if r1
|
90
|
+
s2, i2 = [], index
|
91
|
+
loop do
|
92
|
+
if has_terminal?(@regexps[gr = '\A[0-9]'] ||= Regexp.new(gr), :regexp, index)
|
93
|
+
r3 = true
|
94
|
+
@index += 1
|
95
|
+
else
|
96
|
+
terminal_parse_failure('[0-9]')
|
97
|
+
r3 = nil
|
98
|
+
end
|
99
|
+
if r3
|
100
|
+
s2 << r3
|
101
|
+
else
|
102
|
+
break
|
103
|
+
end
|
104
|
+
end
|
105
|
+
if s2.empty?
|
106
|
+
@index = i2
|
107
|
+
r2 = nil
|
108
|
+
else
|
109
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
110
|
+
end
|
111
|
+
s0 << r2
|
112
|
+
if r2
|
113
|
+
if (match_len = has_terminal?('.', false, index))
|
114
|
+
r4 = true
|
115
|
+
@index += match_len
|
116
|
+
else
|
117
|
+
terminal_parse_failure('\'.\'')
|
118
|
+
r4 = nil
|
119
|
+
end
|
120
|
+
s0 << r4
|
121
|
+
if r4
|
122
|
+
s5, i5 = [], index
|
123
|
+
loop do
|
124
|
+
if has_terminal?(@regexps[gr = '\A[0-9]'] ||= Regexp.new(gr), :regexp, index)
|
125
|
+
r6 = true
|
126
|
+
@index += 1
|
127
|
+
else
|
128
|
+
terminal_parse_failure('[0-9]')
|
129
|
+
r6 = nil
|
130
|
+
end
|
131
|
+
if r6
|
132
|
+
s5 << r6
|
133
|
+
else
|
134
|
+
break
|
135
|
+
end
|
136
|
+
end
|
137
|
+
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
138
|
+
s0 << r5
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
if s0.last
|
143
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
144
|
+
r0.extend(Decimal0)
|
145
|
+
r0.extend(Decimal1)
|
146
|
+
else
|
147
|
+
@index = i0
|
148
|
+
r0 = nil
|
149
|
+
end
|
150
|
+
|
151
|
+
node_cache[:decimal][start_index] = r0
|
152
|
+
|
153
|
+
r0
|
154
|
+
end
|
155
|
+
|
156
|
+
def _nt_sign
|
157
|
+
start_index = index
|
158
|
+
if node_cache[:sign].has_key?(index)
|
159
|
+
cached = node_cache[:sign][index]
|
160
|
+
if cached
|
161
|
+
node_cache[:sign][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
162
|
+
@index = cached.interval.end
|
163
|
+
end
|
164
|
+
return cached
|
165
|
+
end
|
166
|
+
|
167
|
+
i1 = index
|
168
|
+
if (match_len = has_terminal?('+', false, index))
|
169
|
+
r2 = true
|
170
|
+
@index += match_len
|
171
|
+
else
|
172
|
+
terminal_parse_failure('\'+\'')
|
173
|
+
r2 = nil
|
174
|
+
end
|
175
|
+
if r2
|
176
|
+
r2 = SyntaxNode.new(input, (index-1)...index) if r2 == true
|
177
|
+
r1 = r2
|
178
|
+
else
|
179
|
+
if (match_len = has_terminal?('-', false, index))
|
180
|
+
r3 = true
|
181
|
+
@index += match_len
|
182
|
+
else
|
183
|
+
terminal_parse_failure('\'-\'')
|
184
|
+
r3 = nil
|
185
|
+
end
|
186
|
+
if r3
|
187
|
+
r3 = SyntaxNode.new(input, (index-1)...index) if r3 == true
|
188
|
+
r1 = r3
|
189
|
+
else
|
190
|
+
@index = i1
|
191
|
+
r1 = nil
|
192
|
+
end
|
193
|
+
end
|
194
|
+
if r1
|
195
|
+
r0 = r1
|
196
|
+
else
|
197
|
+
r0 = instantiate_node(SyntaxNode,input, index...index)
|
198
|
+
end
|
199
|
+
|
200
|
+
node_cache[:sign][start_index] = r0
|
201
|
+
|
202
|
+
r0
|
203
|
+
end
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
class NumeralsParser < Treetop::Runtime::CompiledParser
|
208
|
+
include Numerals
|
209
|
+
end
|
210
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
grammar Numerals
|
2
|
+
rule percentage
|
3
|
+
(decimal "%") {
|
4
|
+
def to_f
|
5
|
+
decimal.to_f / 100
|
6
|
+
end
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
rule decimal
|
11
|
+
sign [0-9]+ '.' [0-9]* {
|
12
|
+
def to_f
|
13
|
+
text_value.to_f
|
14
|
+
end
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
rule sign
|
19
|
+
('+'/'-')?
|
20
|
+
end
|
21
|
+
end
|