treetop 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/tt +1 -1
- data/doc/using_in_ruby.markdown +0 -126
- data/lib/treetop/compiler/node_classes/anything_symbol.rb +2 -3
- data/lib/treetop/compiler/node_classes/character_class.rb +2 -3
- data/lib/treetop/compiler/node_classes/sequence.rb +2 -2
- data/lib/treetop/ruby_extensions/string.rb +2 -28
- data/lib/treetop/ruby_extensions.rb +1 -3
- data/lib/treetop/runtime/compiled_parser.rb +1 -1
- data/lib/treetop/version.rb +1 -1
- data/lib/treetop.rb +0 -1
- metadata +2 -10
- data/examples/ruby_syntax/syntax_test.rb +0 -105
- data/examples/ruby_syntax/test_helper.rb +0 -28
- data/lib/treetop/ruby_extensions/array.rb +0 -22
- data/lib/treetop/ruby_extensions/nil.rb +0 -5
- data/lib/treetop/ruby_extensions/object.rb +0 -57
- data/lib/treetop/ruby_extensions/regexp.rb +0 -5
- data/lib/treetop/ruby_extensions/symbol.rb +0 -5
- data/lib/treetop/syntax.rb +0 -39
data/bin/tt
CHANGED
data/doc/using_in_ruby.markdown
CHANGED
@@ -19,129 +19,3 @@ If a grammar by the name of `Foo` is defined, the compiled Ruby source will defi
|
|
19
19
|
else
|
20
20
|
puts 'failure'
|
21
21
|
end
|
22
|
-
|
23
|
-
##Defining Grammars Directly in Ruby
|
24
|
-
It is possible to define parser directly in Ruby source file.
|
25
|
-
|
26
|
-
###Grammars
|
27
|
-
Defining parsers in Ruby code is as much similar to original definition as it is possible. To create a grammar just write:
|
28
|
-
|
29
|
-
include Treetop::Syntax
|
30
|
-
grammar :Foo do
|
31
|
-
end
|
32
|
-
parser = FooParser.new
|
33
|
-
|
34
|
-
Treetop will automatically compile and load it into memory, thus an instance of `FooParser` can be created.
|
35
|
-
|
36
|
-
###Syntactic Recognition
|
37
|
-
To create a rule inside of a grammar simply write:
|
38
|
-
|
39
|
-
include Treetop::Syntax
|
40
|
-
grammar :Foo do
|
41
|
-
rule :bar do
|
42
|
-
...
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
Inside the rule any of Treetop syntactic elements can be used. Each element of a rule is created with standard Ruby classes: Strings act as Terminals, Symbols stand for Nonterminals, Arrays are sequences, Regexps are character classes.
|
47
|
-
|
48
|
-
_Note: it is better not to use Numbers, as terminal symbols; use Strings instead._
|
49
|
-
|
50
|
-
Sequences can be defined as follows:
|
51
|
-
|
52
|
-
rule :sequence do
|
53
|
-
[ "foo", "bar", "baz" ]
|
54
|
-
end
|
55
|
-
|
56
|
-
Ordered choices use `/` operator:
|
57
|
-
|
58
|
-
rule :choice do
|
59
|
-
"foo" / "bar"
|
60
|
-
end
|
61
|
-
|
62
|
-
Sequences have higher precedence than choices, so choices must be parenthesized to be used as the elements of a sequence. For example:
|
63
|
-
|
64
|
-
rule :nested do
|
65
|
-
["foo", "bar", "baz" / "bop" ] # -> "foo" "bar" ( "baz" / "bop" )
|
66
|
-
end
|
67
|
-
|
68
|
-
Special operators like `!`, `&`, `?`, `+` and `*` are available through methods (all of the methods return element itself so calls can be chained) of elements in a rule:
|
69
|
-
|
70
|
-
Op. | Method
|
71
|
-
-----------
|
72
|
-
! | bang
|
73
|
-
& | amper
|
74
|
-
? | mark
|
75
|
-
+ | plus
|
76
|
-
* | kleene
|
77
|
-
|
78
|
-
For example grammar:
|
79
|
-
|
80
|
-
grammar :Foo do
|
81
|
-
rule :bar do
|
82
|
-
[ "baz" / "bop" ].kleene
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
can generate any word that contain words "bar" and "bop".
|
87
|
-
|
88
|
-
###Semantic Interpretation
|
89
|
-
|
90
|
-
Syntax node declaration can be added by `node` method (which may be called the same as operators above):
|
91
|
-
|
92
|
-
grammar :Parens do
|
93
|
-
rule :parenthesized_letter do
|
94
|
-
([ '(', :parenthesized_letter, ')'] / /[a-z]/ ).node(:ParenNode)
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
It is also possible to add inline blocks of code. They are in fact strings strictly inserted into generated grammar:
|
99
|
-
|
100
|
-
grammar :Parens do
|
101
|
-
rule :parenthesized_letter do
|
102
|
-
(['(', :parenthesized_letter, ')'] / /[a-z]/ ).block(%{
|
103
|
-
def depth
|
104
|
-
if nonterminal?
|
105
|
-
parenthesized_letter.depth + 1
|
106
|
-
else
|
107
|
-
0
|
108
|
-
end
|
109
|
-
end
|
110
|
-
})
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
Labels in rule definitions can be written as follow (example taken from documentation):
|
115
|
-
|
116
|
-
rule :labels do
|
117
|
-
[/[a-z]/.label(:first_letter), [', ', /[a-z]/.kleene.label(:letter)].label(:rest_letters)].block(%{
|
118
|
-
...
|
119
|
-
})
|
120
|
-
end
|
121
|
-
|
122
|
-
###Composition
|
123
|
-
|
124
|
-
Inclusion of a grammar works thanks to `include` function call inside the grammar definition:
|
125
|
-
|
126
|
-
grammar :One do
|
127
|
-
rule :a do
|
128
|
-
foo"
|
129
|
-
end
|
130
|
-
|
131
|
-
rule :b do
|
132
|
-
"baz"
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
grammar :Two do
|
137
|
-
include :One
|
138
|
-
rule :a do
|
139
|
-
:super / "bar" / :c
|
140
|
-
end
|
141
|
-
|
142
|
-
rule :c do
|
143
|
-
:b
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
Grammar Two can generate `"foo"`, `"bar"` and `"baz"` words.
|
@@ -4,10 +4,9 @@ module Treetop
|
|
4
4
|
def compile(address, builder, parent_expression = nil)
|
5
5
|
super
|
6
6
|
builder.if__ "index < input_length" do
|
7
|
-
|
8
|
-
assign_result "instantiate_node(#{node_class_name},input, index...next_character)"
|
7
|
+
assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
|
9
8
|
extend_result_with_inline_module
|
10
|
-
builder << "@index
|
9
|
+
builder << "@index += 1"
|
11
10
|
end
|
12
11
|
builder.else_ do
|
13
12
|
builder << 'terminal_parse_failure("any character")'
|
@@ -5,14 +5,13 @@ module Treetop
|
|
5
5
|
super
|
6
6
|
|
7
7
|
builder.if__ "has_terminal?(#{grounded_regexp(text_value)}, true, index)" do
|
8
|
-
builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
|
9
8
|
if address == 0 || decorated?
|
10
|
-
assign_result "instantiate_node(#{node_class_name},
|
9
|
+
assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
|
11
10
|
extend_result_with_inline_module
|
12
11
|
else
|
13
12
|
assign_lazily_instantiated_node
|
14
13
|
end
|
15
|
-
builder << "@index
|
14
|
+
builder << "@index += 1"
|
16
15
|
end
|
17
16
|
builder.else_ do
|
18
17
|
# "terminal_parse_failure(#{single_quote(characters)})"
|
@@ -53,10 +53,10 @@ module Treetop
|
|
53
53
|
def compile(index, builder, rule)
|
54
54
|
super
|
55
55
|
builder.module_declaration(module_name) do
|
56
|
-
elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name] ||= []) << e; h}
|
56
|
+
elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name.to_s] ||= []) << e; h}
|
57
57
|
sequence_elements.each_with_index do |element, index|
|
58
58
|
if element.label_name
|
59
|
-
repetitions = elements_by_name[element.label_name]
|
59
|
+
repetitions = elements_by_name[element.label_name.to_s]
|
60
60
|
label_name = element.label_name + (repetitions.size > 1 ? (repetitions.index(element)+1).to_s : "")
|
61
61
|
builder.method_declaration(label_name) do
|
62
62
|
builder << "elements[#{index}]"
|
@@ -8,7 +8,7 @@ class String
|
|
8
8
|
index + 1
|
9
9
|
end
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def line_of(index)
|
13
13
|
self[0...index].count("\n") + 1
|
14
14
|
end
|
@@ -36,33 +36,7 @@ class String
|
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
def indent_paragraph(n)
|
40
|
-
out = ""
|
41
|
-
self.each_line {|line| out += line.indent(n) }
|
42
|
-
out
|
43
|
-
end
|
44
|
-
|
45
|
-
# Removes indentation uniformly.
|
46
|
-
def justify
|
47
|
-
min = self.length
|
48
|
-
self.each_line {|line|
|
49
|
-
next if line.strip == ""
|
50
|
-
if line =~ /^( *)\S/
|
51
|
-
min = $1.length if min > $1.length
|
52
|
-
else
|
53
|
-
min = 0
|
54
|
-
end
|
55
|
-
}
|
56
|
-
out = ""
|
57
|
-
self.each_line {|line| out += line.slice(min...line.length) || "\n" }
|
58
|
-
out.strip
|
59
|
-
end
|
60
|
-
|
61
39
|
def treetop_camelize
|
62
40
|
to_s.gsub(/\/(.?)/){ "::" + $1.upcase }.gsub(/(^|_)(.)/){ $2.upcase }
|
63
41
|
end
|
64
|
-
|
65
|
-
def to_tt
|
66
|
-
"'#{self}'"
|
67
|
-
end
|
68
|
-
end
|
42
|
+
end
|
@@ -88,7 +88,7 @@ module Treetop
|
|
88
88
|
|
89
89
|
def has_terminal?(terminal, regex, index)
|
90
90
|
if regex
|
91
|
-
rx = @regexps[terminal] ||= Regexp.new(terminal
|
91
|
+
rx = @regexps[terminal] ||= Regexp.new(terminal)
|
92
92
|
input.index(rx, index) == index
|
93
93
|
else
|
94
94
|
input[index, terminal.size] == terminal
|
data/lib/treetop/version.rb
CHANGED
data/lib/treetop.rb
CHANGED
@@ -11,7 +11,6 @@ TREETOP_ROOT = File.join(dir, 'treetop')
|
|
11
11
|
require File.join(TREETOP_ROOT, "ruby_extensions")
|
12
12
|
require File.join(TREETOP_ROOT, "runtime")
|
13
13
|
require File.join(TREETOP_ROOT, "compiler")
|
14
|
-
require File.join(TREETOP_ROOT, "syntax")
|
15
14
|
|
16
15
|
require 'polyglot'
|
17
16
|
Polyglot.register(Treetop::VALID_GRAMMAR_EXT, Treetop)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treetop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Sobo
|
@@ -9,7 +9,7 @@ autorequire: treetop
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-09-
|
12
|
+
date: 2009-09-11 00:00:00 +10:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -61,12 +61,7 @@ files:
|
|
61
61
|
- lib/treetop/compiler/node_classes.rb
|
62
62
|
- lib/treetop/compiler/ruby_builder.rb
|
63
63
|
- lib/treetop/compiler.rb
|
64
|
-
- lib/treetop/ruby_extensions/array.rb
|
65
|
-
- lib/treetop/ruby_extensions/nil.rb
|
66
|
-
- lib/treetop/ruby_extensions/object.rb
|
67
|
-
- lib/treetop/ruby_extensions/regexp.rb
|
68
64
|
- lib/treetop/ruby_extensions/string.rb
|
69
|
-
- lib/treetop/ruby_extensions/symbol.rb
|
70
65
|
- lib/treetop/ruby_extensions.rb
|
71
66
|
- lib/treetop/runtime/compiled_parser.rb
|
72
67
|
- lib/treetop/runtime/interval_skip_list/head_node.rb
|
@@ -78,7 +73,6 @@ files:
|
|
78
73
|
- lib/treetop/runtime/terminal_parse_failure_debug.rb
|
79
74
|
- lib/treetop/runtime/terminal_syntax_node.rb
|
80
75
|
- lib/treetop/runtime.rb
|
81
|
-
- lib/treetop/syntax.rb
|
82
76
|
- lib/treetop/version.rb
|
83
77
|
- lib/treetop.rb
|
84
78
|
- bin/tt
|
@@ -101,8 +95,6 @@ files:
|
|
101
95
|
- examples/lambda_calculus/lambda_calculus_node_classes.rb
|
102
96
|
- examples/lambda_calculus/lambda_calculus_test.rb
|
103
97
|
- examples/lambda_calculus/test_helper.rb
|
104
|
-
- examples/ruby_syntax/syntax_test.rb
|
105
|
-
- examples/ruby_syntax/test_helper.rb
|
106
98
|
has_rdoc: true
|
107
99
|
homepage: http://functionalform.blogspot.com
|
108
100
|
licenses: []
|
@@ -1,105 +0,0 @@
|
|
1
|
-
dir = File.dirname(__FILE__)
|
2
|
-
require File.expand_path("#{dir}/test_helper")
|
3
|
-
|
4
|
-
class SyntaxTest < Test::Unit::TestCase
|
5
|
-
include Treetop::Syntax
|
6
|
-
include SyntaxTestHelper
|
7
|
-
|
8
|
-
def test_simple
|
9
|
-
assert_grammar {
|
10
|
-
grammar :OnlyGrammar do
|
11
|
-
end
|
12
|
-
}
|
13
|
-
end
|
14
|
-
|
15
|
-
def test_rules
|
16
|
-
assert_grammar {
|
17
|
-
grammar :Simple do
|
18
|
-
rule :foo do
|
19
|
-
["foo", :bar]
|
20
|
-
end
|
21
|
-
|
22
|
-
rule :bar do
|
23
|
-
"bar" / "baz"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
}
|
27
|
-
parse('foobar')
|
28
|
-
parse('foobaz')
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_nested
|
32
|
-
assert_grammar {
|
33
|
-
grammar :Nested do
|
34
|
-
rule :nested do
|
35
|
-
["foo", "bar", "baz" / "bop"]
|
36
|
-
end
|
37
|
-
end
|
38
|
-
}
|
39
|
-
parse('foobarbaz')
|
40
|
-
parse('foobarbop')
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_operators
|
44
|
-
assert_grammar {
|
45
|
-
grammar :Kleene do
|
46
|
-
rule :Kleene do
|
47
|
-
"foo".kleene
|
48
|
-
end
|
49
|
-
end
|
50
|
-
}
|
51
|
-
parse("")
|
52
|
-
parse("foo")
|
53
|
-
parse("foofoo")
|
54
|
-
|
55
|
-
assert_grammar {
|
56
|
-
grammar :Plus do
|
57
|
-
rule :Plus do
|
58
|
-
"foo".plus
|
59
|
-
end
|
60
|
-
end
|
61
|
-
}
|
62
|
-
parse("foo")
|
63
|
-
parse("foofoo")
|
64
|
-
|
65
|
-
assert_grammar {
|
66
|
-
grammar :Optional do
|
67
|
-
rule :Optional do
|
68
|
-
"foo".mark
|
69
|
-
end
|
70
|
-
end
|
71
|
-
}
|
72
|
-
parse("")
|
73
|
-
parse("foo")
|
74
|
-
end
|
75
|
-
|
76
|
-
def test_inclusion
|
77
|
-
assert_grammar {
|
78
|
-
grammar :One do
|
79
|
-
rule :a do
|
80
|
-
"foo"
|
81
|
-
end
|
82
|
-
|
83
|
-
rule :b do
|
84
|
-
"baz"
|
85
|
-
end
|
86
|
-
end
|
87
|
-
}
|
88
|
-
|
89
|
-
assert_grammar {
|
90
|
-
grammar :Two do
|
91
|
-
include :One
|
92
|
-
rule :a do
|
93
|
-
:super / "bar" / :c
|
94
|
-
end
|
95
|
-
|
96
|
-
rule :c do
|
97
|
-
:b
|
98
|
-
end
|
99
|
-
end
|
100
|
-
}
|
101
|
-
parse("foo")
|
102
|
-
parse("bar")
|
103
|
-
parse("baz")
|
104
|
-
end
|
105
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
require 'test/unit'
|
2
|
-
require 'rubygems'
|
3
|
-
require 'treetop'
|
4
|
-
|
5
|
-
dir = File.dirname(__FILE__)
|
6
|
-
require File.expand_path("#{dir}/../../lib/treetop/ruby_extensions")
|
7
|
-
require File.expand_path("#{dir}/../../lib/treetop/syntax")
|
8
|
-
|
9
|
-
module SyntaxTestHelper
|
10
|
-
def assert_grammar
|
11
|
-
g = yield
|
12
|
-
assert_not_nil g
|
13
|
-
flunk "Badly generated parser" unless g
|
14
|
-
@parser = eval("#{g}.new")
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse(input)
|
18
|
-
result = @parser.parse(input)
|
19
|
-
unless result
|
20
|
-
puts @parser.terminal_failures.join("\n")
|
21
|
-
end
|
22
|
-
assert_not_nil result
|
23
|
-
if result
|
24
|
-
assert_equal input, result.text_value
|
25
|
-
end
|
26
|
-
result
|
27
|
-
end
|
28
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
class Array
|
2
|
-
def join_with(method, pattern = "")
|
3
|
-
return join(pattern) unless method
|
4
|
-
return "" if self.length == 0
|
5
|
-
|
6
|
-
args = []
|
7
|
-
if method.respond_to? :to_hash
|
8
|
-
args = method[:args] || []
|
9
|
-
method = method[:name]
|
10
|
-
end
|
11
|
-
|
12
|
-
output = self[0].send(method, *args)
|
13
|
-
for i in (1...self.length)
|
14
|
-
output += pattern + self[i].send(method, *args)
|
15
|
-
end
|
16
|
-
output
|
17
|
-
end
|
18
|
-
|
19
|
-
def to_tt
|
20
|
-
self.join_with({:name => :seq_to_tt, :args => [true]}, " ")
|
21
|
-
end
|
22
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
class Object
|
2
|
-
def sequence
|
3
|
-
@sequence ||= []
|
4
|
-
end
|
5
|
-
|
6
|
-
def /(other)
|
7
|
-
sequence.push(other)
|
8
|
-
self
|
9
|
-
end
|
10
|
-
|
11
|
-
def seq_to_tt(inline = false)
|
12
|
-
separator = inline ? " / " : "\n/\n"
|
13
|
-
tt = if sequence.length == 0
|
14
|
-
self.to_tt
|
15
|
-
else
|
16
|
-
output = self.to_tt + separator + sequence.join_with({:name => :seq_to_tt, :args => [true]}, separator)
|
17
|
-
output = "( #{output} )" if inline
|
18
|
-
output
|
19
|
-
end
|
20
|
-
|
21
|
-
# Operators
|
22
|
-
tt = "&" + tt if @amper
|
23
|
-
tt = "!" + tt if @bang
|
24
|
-
tt += "*" if @kleene
|
25
|
-
tt += "+" if @plus
|
26
|
-
tt += "?" if @mark
|
27
|
-
|
28
|
-
tt += " <#{@node.to_s}>" if @node
|
29
|
-
tt += " {\n#{@block.gsub("\t", " ").justify.indent_paragraph(2)}\n}" if @block
|
30
|
-
tt = @label.to_s + ':' + tt if @label
|
31
|
-
tt
|
32
|
-
end
|
33
|
-
|
34
|
-
def node(name)
|
35
|
-
@node = name
|
36
|
-
self
|
37
|
-
end
|
38
|
-
|
39
|
-
def block(content)
|
40
|
-
@block = content
|
41
|
-
self
|
42
|
-
end
|
43
|
-
|
44
|
-
def label(name)
|
45
|
-
@label = name
|
46
|
-
self
|
47
|
-
end
|
48
|
-
|
49
|
-
[:mark, :kleene, :plus, :amper, :bang].each do |sym|
|
50
|
-
Object.class_eval(%{
|
51
|
-
def #{sym.to_s}
|
52
|
-
@#{sym.to_s} = true
|
53
|
-
self
|
54
|
-
end
|
55
|
-
})
|
56
|
-
end
|
57
|
-
end
|
data/lib/treetop/syntax.rb
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
=begin rdoc
|
2
|
-
Definition of TreeTop syntax in pure Ruby.
|
3
|
-
=end
|
4
|
-
|
5
|
-
module Treetop
|
6
|
-
# Provides the possibility to write Treetop syntax as a Ruby code.
|
7
|
-
# Symbols act as nonterminals, strings as terminals, arrays as
|
8
|
-
# sequences. Ordered choices are defined similar to original Treetop
|
9
|
-
# syntax.
|
10
|
-
#
|
11
|
-
# (Note: it is better not to use numbers; use Strings instead)
|
12
|
-
#
|
13
|
-
module Syntax
|
14
|
-
class Grammar
|
15
|
-
attr_reader :source
|
16
|
-
def initialize
|
17
|
-
@source = ""
|
18
|
-
end
|
19
|
-
|
20
|
-
def rule(name)
|
21
|
-
@source += "rule #{name.to_s}\n#{yield.seq_to_tt.indent_paragraph(2)}\nend\n"
|
22
|
-
end
|
23
|
-
|
24
|
-
def include(name)
|
25
|
-
@source += "include #{name.to_s}\n"
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def grammar(name, &block)
|
30
|
-
Syntax.grammar(name, &block)
|
31
|
-
end
|
32
|
-
|
33
|
-
def self.grammar(name, &block)
|
34
|
-
(g = Grammar.new).instance_eval(&block)
|
35
|
-
source = "grammar #{name.to_s}\n#{g.source.indent_paragraph(2)}end\n"
|
36
|
-
Treetop.load_from_string(source)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|