treetop 1.3.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/{README → README.md} +0 -0
- data/bin/tt +96 -12
- data/doc/semantic_interpretation.markdown +2 -2
- data/doc/site.rb +2 -0
- data/doc/sitegen.rb +6 -1
- data/doc/using_in_ruby.markdown +126 -0
- data/examples/ruby_syntax/syntax_test.rb +105 -0
- data/examples/ruby_syntax/test_helper.rb +28 -0
- data/lib/treetop/compiler/grammar_compiler.rb +2 -0
- data/lib/treetop/compiler/metagrammar.rb +422 -261
- data/lib/treetop/compiler/metagrammar.treetop +29 -1
- data/lib/treetop/compiler/node_classes/anything_symbol.rb +3 -2
- data/lib/treetop/compiler/node_classes/character_class.rb +9 -4
- data/lib/treetop/compiler/node_classes/parsing_expression.rb +8 -0
- data/lib/treetop/compiler/node_classes/predicate_block.rb +16 -0
- data/lib/treetop/compiler/node_classes/sequence.rb +4 -1
- data/lib/treetop/compiler/node_classes.rb +1 -0
- data/lib/treetop/compiler.rb +1 -1
- data/lib/treetop/ruby_extensions/array.rb +22 -0
- data/lib/treetop/ruby_extensions/nil.rb +5 -0
- data/lib/treetop/ruby_extensions/object.rb +57 -0
- data/lib/treetop/ruby_extensions/regexp.rb +5 -0
- data/lib/treetop/ruby_extensions/string.rb +28 -2
- data/lib/treetop/ruby_extensions/symbol.rb +5 -0
- data/lib/treetop/ruby_extensions.rb +3 -1
- data/lib/treetop/runtime/compiled_parser.rb +16 -12
- data/lib/treetop/runtime/syntax_node.rb +47 -5
- data/lib/treetop/syntax.rb +39 -0
- data/lib/treetop/version.rb +2 -2
- data/lib/treetop.rb +7 -1
- metadata +13 -4
data/{README → README.md}
RENAMED
File without changes
|
data/bin/tt
CHANGED
@@ -1,28 +1,112 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
2
3
|
require 'rubygems'
|
3
4
|
gem 'treetop'
|
4
5
|
|
5
6
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
6
7
|
require 'treetop'
|
8
|
+
require 'treetop/version'
|
9
|
+
|
10
|
+
options = {}
|
11
|
+
parser = OptionParser.new do |opts|
|
12
|
+
exts = Treetop::VALID_GRAMMAR_EXT.collect { |i| '.' + i }
|
13
|
+
|
14
|
+
opts.banner = "Treetop Parsing Expression Grammar (PEG) Comand Line Compiler"
|
15
|
+
opts.define_head "Usage: tt [options] grammar_file[#{exts.join('|')}] ..."
|
16
|
+
opts.separator ''
|
17
|
+
opts.separator 'Examples:'
|
18
|
+
opts.separator ' tt foo.tt # 1 grammar -> 1 parser source'
|
19
|
+
opts.separator ' tt foo bar.treetop # 2 grammars -> 2 separate parsers'
|
20
|
+
opts.separator ' tt -o alt_name.rb foo # alternately named output file'
|
21
|
+
opts.separator ''
|
22
|
+
opts.separator ''
|
23
|
+
opts.separator 'NOTE: while treetop grammar files *must* have one of the following'
|
24
|
+
opts.separator 'filename extensions, the extension name is not required when calling'
|
25
|
+
opts.separator 'the compiler with grammar file names.'
|
26
|
+
opts.separator ''
|
27
|
+
opts.separator " Valid extensions: #{exts.join(', ')}"
|
28
|
+
opts.separator ''
|
29
|
+
opts.separator ''
|
30
|
+
opts.separator 'Options:'
|
31
|
+
|
32
|
+
opts.on('-o', '--output FILENAME', 'Write parser source to FILENAME') do |fn|
|
33
|
+
options[:out_file] = fn
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on('-f', '--force', 'Overwrite existing output file(s)') do
|
37
|
+
options[:force] = true
|
38
|
+
end
|
39
|
+
|
40
|
+
opts.on_tail('-v', '--version', 'Show Treetop version') do
|
41
|
+
puts "Treetop v#{Treetop::VERSION::STRING}"
|
42
|
+
exit
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on_tail('-h', '--help', 'Show this help message') do
|
46
|
+
puts opts
|
47
|
+
exit
|
48
|
+
end
|
7
49
|
|
8
|
-
|
9
|
-
|
10
|
-
|
50
|
+
end
|
51
|
+
file_list = parser.parse!
|
52
|
+
|
53
|
+
# check options and arg constraints
|
54
|
+
if file_list.empty? || (options[:out_file] && file_list.size > 1)
|
55
|
+
puts parser
|
56
|
+
exit 1
|
57
|
+
end
|
58
|
+
|
59
|
+
def grammar_exist?(filename)
|
60
|
+
if File.extname(filename).empty?
|
61
|
+
Treetop::VALID_GRAMMAR_EXT.each do |ext|
|
62
|
+
fn_ext = "#{filename}.#{ext}"
|
63
|
+
return true if File.exist?(fn_ext) && !File.zero?(fn_ext)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
File.exist?(filename) && !File.zero?(filename)
|
67
|
+
end
|
68
|
+
|
69
|
+
def full_grammar_filename(filename)
|
70
|
+
return filename if !File.extname(filename).empty?
|
71
|
+
Treetop::VALID_GRAMMAR_EXT.each do |ext|
|
72
|
+
fn_ext = "#{filename}.#{ext}"
|
73
|
+
return fn_ext if File.exist?(fn_ext) && !File.zero?(fn_ext)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def protect_output?(filename, forced=false)
|
78
|
+
if !forced and
|
79
|
+
File.exist?(filename) and
|
80
|
+
(l=File.open(filename) { |f| f.gets rescue "" }) != Treetop::Compiler::AUTOGENERATED
|
81
|
+
puts "ERROR: '#{filename}' output already exists; skipping compilation...\n"
|
82
|
+
return true
|
83
|
+
end
|
84
|
+
false
|
11
85
|
end
|
12
86
|
|
13
87
|
compiler = Treetop::Compiler::GrammarCompiler.new
|
14
88
|
|
15
|
-
while !
|
16
|
-
treetop_file =
|
17
|
-
|
18
|
-
|
19
|
-
|
89
|
+
while !file_list.empty?
|
90
|
+
treetop_file = file_list.shift
|
91
|
+
|
92
|
+
# handle nonexistent and existent grammar files mixed together
|
93
|
+
if !grammar_exist?(treetop_file)
|
94
|
+
puts "ERROR: input grammar file '#{treetop_file}' does not exist; continuing...\n"
|
95
|
+
next
|
20
96
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
97
|
+
|
98
|
+
# try to compile
|
99
|
+
treetop_file = full_grammar_filename(treetop_file)
|
100
|
+
std_output_file = treetop_file.gsub(Treetop::VALID_GRAMMAR_EXT_REGEXP, '.rb')
|
101
|
+
|
102
|
+
if options[:out_file]
|
103
|
+
# explicit output file name option; never overwrite unless forced
|
104
|
+
next if protect_output?(options[:out_file], options[:force])
|
105
|
+
compiler.compile(treetop_file, options[:out_file])
|
24
106
|
else
|
25
|
-
#
|
107
|
+
# compile one input file from input file list option; never overwrite unless forced
|
108
|
+
next if protect_output?(std_output_file, options[:force])
|
26
109
|
compiler.compile(treetop_file)
|
27
110
|
end
|
111
|
+
|
28
112
|
end
|
@@ -45,7 +45,7 @@ Methods can be added to the nodes instantiated by the successful match of an exp
|
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
|
-
Note that each alternative expression is followed by a block containing a method definition. A `depth` method is defined on both expressions. The recursive `depth` method defined in the block following the first expression determines the depth of the nested parentheses and adds one
|
48
|
+
Note that each alternative expression is followed by a block containing a method definition. A `depth` method is defined on both expressions. The recursive `depth` method defined in the block following the first expression determines the depth of the nested parentheses and adds one to it. The base case is implemented in the block following the second expression; a single character has a depth of 0.
|
49
49
|
|
50
50
|
|
51
51
|
###Custom `SyntaxNode` Subclass Declarations
|
@@ -186,4 +186,4 @@ The module containing automatically defined element accessor methods is an ances
|
|
186
186
|
Available only on nonterminal nodes, returns the nodes parsed by the elements of the matched sequence.
|
187
187
|
</td>
|
188
188
|
</tr>
|
189
|
-
</table>
|
189
|
+
</table>
|
data/doc/site.rb
CHANGED
data/doc/sitegen.rb
CHANGED
@@ -7,6 +7,7 @@ class Layout < Erector::Widget
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def generate_site
|
10
|
+
FileUtils.mkdir_p(site_dir)
|
10
11
|
@@page_classes.each do |page_class|
|
11
12
|
page_class.generate_html unless page_class.abstract?
|
12
13
|
puts page_class
|
@@ -28,7 +29,7 @@ class Layout < Erector::Widget
|
|
28
29
|
end
|
29
30
|
|
30
31
|
def absolutize(relative_path)
|
31
|
-
File.join(
|
32
|
+
File.join(site_dir, relative_path)
|
32
33
|
end
|
33
34
|
|
34
35
|
def abstract
|
@@ -38,6 +39,10 @@ class Layout < Erector::Widget
|
|
38
39
|
def abstract?
|
39
40
|
@abstract
|
40
41
|
end
|
42
|
+
|
43
|
+
def site_dir
|
44
|
+
File.join(File.dirname(__FILE__), "site")
|
45
|
+
end
|
41
46
|
end
|
42
47
|
|
43
48
|
def bluecloth(relative_path)
|
data/doc/using_in_ruby.markdown
CHANGED
@@ -19,3 +19,129 @@ If a grammar by the name of `Foo` is defined, the compiled Ruby source will defi
|
|
19
19
|
else
|
20
20
|
puts 'failure'
|
21
21
|
end
|
22
|
+
|
23
|
+
##Defining Grammars Directly in Ruby
|
24
|
+
It is possible to define parser directly in Ruby source file.
|
25
|
+
|
26
|
+
###Grammars
|
27
|
+
Defining parsers in Ruby code is as much similar to original definition as it is possible. To create a grammar just write:
|
28
|
+
|
29
|
+
include Treetop::Syntax
|
30
|
+
grammar :Foo do
|
31
|
+
end
|
32
|
+
parser = FooParser.new
|
33
|
+
|
34
|
+
Treetop will automatically compile and load it into memory, thus an instance of `FooParser` can be created.
|
35
|
+
|
36
|
+
###Syntactic Recognition
|
37
|
+
To create a rule inside of a grammar simply write:
|
38
|
+
|
39
|
+
include Treetop::Syntax
|
40
|
+
grammar :Foo do
|
41
|
+
rule :bar do
|
42
|
+
...
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
Inside the rule any of Treetop syntactic elements can be used. Each element of a rule is created with standard Ruby classes: Strings act as Terminals, Symbols stand for Nonterminals, Arrays are sequences, Regexps are character classes.
|
47
|
+
|
48
|
+
_Note: it is better not to use Numbers, as terminal symbols; use Strings instead._
|
49
|
+
|
50
|
+
Sequences can be defined as follows:
|
51
|
+
|
52
|
+
rule :sequence do
|
53
|
+
[ "foo", "bar", "baz" ]
|
54
|
+
end
|
55
|
+
|
56
|
+
Ordered choices use `/` operator:
|
57
|
+
|
58
|
+
rule :choice do
|
59
|
+
"foo" / "bar"
|
60
|
+
end
|
61
|
+
|
62
|
+
Sequences have higher precedence than choices, so choices must be parenthesized to be used as the elements of a sequence. For example:
|
63
|
+
|
64
|
+
rule :nested do
|
65
|
+
["foo", "bar", "baz" / "bop" ] # -> "foo" "bar" ( "baz" / "bop" )
|
66
|
+
end
|
67
|
+
|
68
|
+
Special operators like `!`, `&`, `?`, `+` and `*` are available through methods (all of the methods return element itself so calls can be chained) of elements in a rule:
|
69
|
+
|
70
|
+
Op. | Method
|
71
|
+
-----------
|
72
|
+
! | bang
|
73
|
+
& | amper
|
74
|
+
? | mark
|
75
|
+
+ | plus
|
76
|
+
* | kleene
|
77
|
+
|
78
|
+
For example grammar:
|
79
|
+
|
80
|
+
grammar :Foo do
|
81
|
+
rule :bar do
|
82
|
+
[ "baz" / "bop" ].kleene
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
can generate any word that contain words "bar" and "bop".
|
87
|
+
|
88
|
+
###Semantic Interpretation
|
89
|
+
|
90
|
+
Syntax node declaration can be added by `node` method (which may be called the same as operators above):
|
91
|
+
|
92
|
+
grammar :Parens do
|
93
|
+
rule :parenthesized_letter do
|
94
|
+
([ '(', :parenthesized_letter, ')'] / /[a-z]/ ).node(:ParenNode)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
It is also possible to add inline blocks of code. They are in fact strings strictly inserted into generated grammar:
|
99
|
+
|
100
|
+
grammar :Parens do
|
101
|
+
rule :parenthesized_letter do
|
102
|
+
(['(', :parenthesized_letter, ')'] / /[a-z]/ ).block(%{
|
103
|
+
def depth
|
104
|
+
if nonterminal?
|
105
|
+
parenthesized_letter.depth + 1
|
106
|
+
else
|
107
|
+
0
|
108
|
+
end
|
109
|
+
end
|
110
|
+
})
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
Labels in rule definitions can be written as follow (example taken from documentation):
|
115
|
+
|
116
|
+
rule :labels do
|
117
|
+
[/[a-z]/.label(:first_letter), [', ', /[a-z]/.kleene.label(:letter)].label(:rest_letters)].block(%{
|
118
|
+
...
|
119
|
+
})
|
120
|
+
end
|
121
|
+
|
122
|
+
###Composition
|
123
|
+
|
124
|
+
Inclusion of a grammar works thanks to `include` function call inside the grammar definition:
|
125
|
+
|
126
|
+
grammar :One do
|
127
|
+
rule :a do
|
128
|
+
foo"
|
129
|
+
end
|
130
|
+
|
131
|
+
rule :b do
|
132
|
+
"baz"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
grammar :Two do
|
137
|
+
include :One
|
138
|
+
rule :a do
|
139
|
+
:super / "bar" / :c
|
140
|
+
end
|
141
|
+
|
142
|
+
rule :c do
|
143
|
+
:b
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
Grammar Two can generate `"foo"`, `"bar"` and `"baz"` words.
|
@@ -0,0 +1,105 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
require File.expand_path("#{dir}/test_helper")
|
3
|
+
|
4
|
+
class SyntaxTest < Test::Unit::TestCase
|
5
|
+
include Treetop::Syntax
|
6
|
+
include SyntaxTestHelper
|
7
|
+
|
8
|
+
def test_simple
|
9
|
+
assert_grammar {
|
10
|
+
grammar :OnlyGrammar do
|
11
|
+
end
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_rules
|
16
|
+
assert_grammar {
|
17
|
+
grammar :Simple do
|
18
|
+
rule :foo do
|
19
|
+
["foo", :bar]
|
20
|
+
end
|
21
|
+
|
22
|
+
rule :bar do
|
23
|
+
"bar" / "baz"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
}
|
27
|
+
parse('foobar')
|
28
|
+
parse('foobaz')
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_nested
|
32
|
+
assert_grammar {
|
33
|
+
grammar :Nested do
|
34
|
+
rule :nested do
|
35
|
+
["foo", "bar", "baz" / "bop"]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
}
|
39
|
+
parse('foobarbaz')
|
40
|
+
parse('foobarbop')
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_operators
|
44
|
+
assert_grammar {
|
45
|
+
grammar :Kleene do
|
46
|
+
rule :Kleene do
|
47
|
+
"foo".kleene
|
48
|
+
end
|
49
|
+
end
|
50
|
+
}
|
51
|
+
parse("")
|
52
|
+
parse("foo")
|
53
|
+
parse("foofoo")
|
54
|
+
|
55
|
+
assert_grammar {
|
56
|
+
grammar :Plus do
|
57
|
+
rule :Plus do
|
58
|
+
"foo".plus
|
59
|
+
end
|
60
|
+
end
|
61
|
+
}
|
62
|
+
parse("foo")
|
63
|
+
parse("foofoo")
|
64
|
+
|
65
|
+
assert_grammar {
|
66
|
+
grammar :Optional do
|
67
|
+
rule :Optional do
|
68
|
+
"foo".mark
|
69
|
+
end
|
70
|
+
end
|
71
|
+
}
|
72
|
+
parse("")
|
73
|
+
parse("foo")
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_inclusion
|
77
|
+
assert_grammar {
|
78
|
+
grammar :One do
|
79
|
+
rule :a do
|
80
|
+
"foo"
|
81
|
+
end
|
82
|
+
|
83
|
+
rule :b do
|
84
|
+
"baz"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
}
|
88
|
+
|
89
|
+
assert_grammar {
|
90
|
+
grammar :Two do
|
91
|
+
include :One
|
92
|
+
rule :a do
|
93
|
+
:super / "bar" / :c
|
94
|
+
end
|
95
|
+
|
96
|
+
rule :c do
|
97
|
+
:b
|
98
|
+
end
|
99
|
+
end
|
100
|
+
}
|
101
|
+
parse("foo")
|
102
|
+
parse("bar")
|
103
|
+
parse("baz")
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'treetop'
|
4
|
+
|
5
|
+
dir = File.dirname(__FILE__)
|
6
|
+
require File.expand_path("#{dir}/../../lib/treetop/ruby_extensions")
|
7
|
+
require File.expand_path("#{dir}/../../lib/treetop/syntax")
|
8
|
+
|
9
|
+
module SyntaxTestHelper
|
10
|
+
def assert_grammar
|
11
|
+
g = yield
|
12
|
+
assert_not_nil g
|
13
|
+
flunk "Badly generated parser" unless g
|
14
|
+
@parser = eval("#{g}.new")
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse(input)
|
18
|
+
result = @parser.parse(input)
|
19
|
+
unless result
|
20
|
+
puts @parser.terminal_failures.join("\n")
|
21
|
+
end
|
22
|
+
assert_not_nil result
|
23
|
+
if result
|
24
|
+
assert_equal input, result.text_value
|
25
|
+
end
|
26
|
+
result
|
27
|
+
end
|
28
|
+
end
|
@@ -1,8 +1,10 @@
|
|
1
1
|
module Treetop
|
2
2
|
module Compiler
|
3
|
+
AUTOGENERATED = "# Autogenerated from a Treetop grammar. Edits may be lost.\n"
|
3
4
|
class GrammarCompiler
|
4
5
|
def compile(source_path, target_path = source_path.gsub(/\.(treetop|tt)\Z/, '.rb'))
|
5
6
|
File.open(target_path, 'w') do |target_file|
|
7
|
+
target_file.write(AUTOGENERATED+"\n\n")
|
6
8
|
target_file.write(ruby_source(source_path))
|
7
9
|
end
|
8
10
|
end
|