cognita-treetop 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +164 -0
- data/Rakefile +35 -0
- data/bin/tt +25 -0
- data/doc/contributing_and_planned_features.markdown +103 -0
- data/doc/grammar_composition.markdown +65 -0
- data/doc/index.markdown +90 -0
- data/doc/pitfalls_and_advanced_techniques.markdown +51 -0
- data/doc/semantic_interpretation.markdown +189 -0
- data/doc/site.rb +110 -0
- data/doc/sitegen.rb +60 -0
- data/doc/syntactic_recognition.markdown +100 -0
- data/doc/using_in_ruby.markdown +21 -0
- data/examples/lambda_calculus/arithmetic.rb +551 -0
- data/examples/lambda_calculus/arithmetic.treetop +97 -0
- data/examples/lambda_calculus/arithmetic_node_classes.rb +7 -0
- data/examples/lambda_calculus/arithmetic_test.rb +54 -0
- data/examples/lambda_calculus/lambda_calculus +0 -0
- data/examples/lambda_calculus/lambda_calculus.rb +718 -0
- data/examples/lambda_calculus/lambda_calculus.treetop +132 -0
- data/examples/lambda_calculus/lambda_calculus_node_classes.rb +5 -0
- data/examples/lambda_calculus/lambda_calculus_test.rb +89 -0
- data/examples/lambda_calculus/test_helper.rb +18 -0
- data/lib/treetop.rb +8 -0
- data/lib/treetop/bootstrap_gen_1_metagrammar.rb +45 -0
- data/lib/treetop/compiler.rb +6 -0
- data/lib/treetop/compiler/grammar_compiler.rb +40 -0
- data/lib/treetop/compiler/lexical_address_space.rb +17 -0
- data/lib/treetop/compiler/metagrammar.rb +2887 -0
- data/lib/treetop/compiler/metagrammar.treetop +404 -0
- data/lib/treetop/compiler/node_classes.rb +19 -0
- data/lib/treetop/compiler/node_classes/anything_symbol.rb +18 -0
- data/lib/treetop/compiler/node_classes/atomic_expression.rb +14 -0
- data/lib/treetop/compiler/node_classes/character_class.rb +19 -0
- data/lib/treetop/compiler/node_classes/choice.rb +31 -0
- data/lib/treetop/compiler/node_classes/declaration_sequence.rb +24 -0
- data/lib/treetop/compiler/node_classes/grammar.rb +28 -0
- data/lib/treetop/compiler/node_classes/inline_module.rb +27 -0
- data/lib/treetop/compiler/node_classes/nonterminal.rb +13 -0
- data/lib/treetop/compiler/node_classes/optional.rb +19 -0
- data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +9 -0
- data/lib/treetop/compiler/node_classes/parsing_expression.rb +138 -0
- data/lib/treetop/compiler/node_classes/parsing_rule.rb +55 -0
- data/lib/treetop/compiler/node_classes/predicate.rb +45 -0
- data/lib/treetop/compiler/node_classes/repetition.rb +55 -0
- data/lib/treetop/compiler/node_classes/sequence.rb +68 -0
- data/lib/treetop/compiler/node_classes/terminal.rb +20 -0
- data/lib/treetop/compiler/node_classes/transient_prefix.rb +9 -0
- data/lib/treetop/compiler/node_classes/treetop_file.rb +9 -0
- data/lib/treetop/compiler/ruby_builder.rb +113 -0
- data/lib/treetop/ruby_extensions.rb +2 -0
- data/lib/treetop/ruby_extensions/string.rb +42 -0
- data/lib/treetop/runtime.rb +5 -0
- data/lib/treetop/runtime/compiled_parser.rb +87 -0
- data/lib/treetop/runtime/interval_skip_list.rb +4 -0
- data/lib/treetop/runtime/interval_skip_list/head_node.rb +15 -0
- data/lib/treetop/runtime/interval_skip_list/interval_skip_list.rb +200 -0
- data/lib/treetop/runtime/interval_skip_list/node.rb +164 -0
- data/lib/treetop/runtime/syntax_node.rb +72 -0
- data/lib/treetop/runtime/terminal_parse_failure.rb +16 -0
- data/lib/treetop/runtime/terminal_syntax_node.rb +17 -0
- metadata +119 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class CharacterClass < AtomicExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
|
7
|
+
builder.if__ "input.index(Regexp.new(#{single_quote(text_value)}), index) == index" do
|
8
|
+
assign_result "(#{node_class_name}).new(input, index...(index + 1))"
|
9
|
+
extend_result_with_inline_module
|
10
|
+
builder << "@index += 1"
|
11
|
+
end
|
12
|
+
builder.else_ do
|
13
|
+
"terminal_parse_failure(#{single_quote(characters)})"
|
14
|
+
assign_result 'nil'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Choice < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
begin_comment(self)
|
7
|
+
use_vars :result, :start_index
|
8
|
+
compile_alternatives(alternatives)
|
9
|
+
end_comment(self)
|
10
|
+
end
|
11
|
+
|
12
|
+
def compile_alternatives(alternatives)
|
13
|
+
obtain_new_subexpression_address
|
14
|
+
alternatives.first.compile(subexpression_address, builder)
|
15
|
+
builder.if__ subexpression_success? do
|
16
|
+
assign_result subexpression_result_var
|
17
|
+
extend_result_with_declared_module
|
18
|
+
extend_result_with_inline_module
|
19
|
+
end
|
20
|
+
builder.else_ do
|
21
|
+
if alternatives.size == 1
|
22
|
+
reset_index
|
23
|
+
assign_failure start_index_var
|
24
|
+
else
|
25
|
+
compile_alternatives(alternatives[1..-1])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class DeclarationSequence < Runtime::SyntaxNode
|
4
|
+
|
5
|
+
def compile(builder)
|
6
|
+
unless rules.empty?
|
7
|
+
builder.method_declaration("root") do
|
8
|
+
builder << "@root || :#{rules.first.name}"
|
9
|
+
end
|
10
|
+
builder.newline
|
11
|
+
end
|
12
|
+
|
13
|
+
declarations.each do |declaration|
|
14
|
+
declaration.compile(builder)
|
15
|
+
builder.newline
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def rules
|
20
|
+
declarations.select { |declaration| declaration.instance_of?(ParsingRule) }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Grammar < Runtime::SyntaxNode
|
4
|
+
def compile
|
5
|
+
builder = RubyBuilder.new
|
6
|
+
|
7
|
+
builder.module_declaration "#{grammar_name.text_value}" do
|
8
|
+
builder.in(indent_level) # account for initial indentation of grammar declaration
|
9
|
+
builder << "include Treetop::Runtime"
|
10
|
+
builder.newline
|
11
|
+
declaration_sequence.compile(builder)
|
12
|
+
end
|
13
|
+
builder.newline
|
14
|
+
builder.class_declaration "#{parser_name} < Treetop::Runtime::CompiledParser" do
|
15
|
+
builder << "include #{grammar_name.text_value}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def indent_level
|
20
|
+
input.column_of(interval.begin) - 1
|
21
|
+
end
|
22
|
+
|
23
|
+
def parser_name
|
24
|
+
grammar_name.text_value + 'Parser'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
module InlineModuleMixin
|
4
|
+
attr_reader :module_name
|
5
|
+
|
6
|
+
def compile(index, rule, builder)
|
7
|
+
@module_name = "#{rule.name.treetop_camelize}#{index}"
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class InlineModule < Runtime::SyntaxNode
|
12
|
+
|
13
|
+
include InlineModuleMixin
|
14
|
+
|
15
|
+
def compile(index, rule, builder)
|
16
|
+
super
|
17
|
+
builder.module_declaration(module_name) do
|
18
|
+
builder << ruby_code.gsub(/\A\n/, '').rstrip
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def ruby_code
|
23
|
+
elements[1].text_value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Nonterminal < AtomicExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
use_vars :result
|
7
|
+
assign_result text_value == 'super' ? 'super' : "_nt_#{text_value}"
|
8
|
+
extend_result_with_declared_module
|
9
|
+
extend_result_with_inline_module
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Optional < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression)
|
5
|
+
super
|
6
|
+
use_vars :result
|
7
|
+
obtain_new_subexpression_address
|
8
|
+
parent_expression.atomic.compile(subexpression_address, builder)
|
9
|
+
|
10
|
+
builder.if__ subexpression_success? do
|
11
|
+
assign_result subexpression_result_var
|
12
|
+
end
|
13
|
+
builder.else_ do
|
14
|
+
assign_result epsilon_node
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class ParsingExpression < Runtime::SyntaxNode
|
4
|
+
attr_reader :address, :builder, :subexpression_address, :var_symbols, :parent_expression
|
5
|
+
|
6
|
+
def compile(address, builder, parent_expression)
|
7
|
+
@address = address
|
8
|
+
@builder = builder
|
9
|
+
@parent_expression = parent_expression
|
10
|
+
end
|
11
|
+
|
12
|
+
def node_class_name
|
13
|
+
parent_expression && parent_expression.node_class_name || 'SyntaxNode'
|
14
|
+
end
|
15
|
+
|
16
|
+
def declared_module_name
|
17
|
+
parent_expression && parent_expression.node_class_name
|
18
|
+
end
|
19
|
+
|
20
|
+
def inline_module_name
|
21
|
+
parent_expression && parent_expression.inline_module_name
|
22
|
+
end
|
23
|
+
|
24
|
+
def optional_arg(arg)
|
25
|
+
if arg
|
26
|
+
", #{arg}"
|
27
|
+
else
|
28
|
+
''
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def use_vars(*var_symbols)
|
33
|
+
@var_symbols = var_symbols
|
34
|
+
builder << var_initialization
|
35
|
+
end
|
36
|
+
|
37
|
+
def result_var
|
38
|
+
var(:result)
|
39
|
+
end
|
40
|
+
|
41
|
+
def accumulator_var
|
42
|
+
var(:accumulator)
|
43
|
+
end
|
44
|
+
|
45
|
+
def start_index_var
|
46
|
+
var(:start_index)
|
47
|
+
end
|
48
|
+
|
49
|
+
def subexpression_result_var
|
50
|
+
"r#{subexpression_address}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def subexpression_success?
|
54
|
+
subexpression_result_var
|
55
|
+
end
|
56
|
+
|
57
|
+
def obtain_new_subexpression_address
|
58
|
+
@subexpression_address = builder.next_address
|
59
|
+
end
|
60
|
+
|
61
|
+
def accumulate_subexpression_result
|
62
|
+
builder.accumulate accumulator_var, subexpression_result_var
|
63
|
+
end
|
64
|
+
|
65
|
+
def assign_result(value_ruby)
|
66
|
+
builder.assign result_var, value_ruby
|
67
|
+
end
|
68
|
+
|
69
|
+
def extend_result(module_name)
|
70
|
+
builder.extend result_var, module_name
|
71
|
+
end
|
72
|
+
|
73
|
+
def extend_result_with_declared_module
|
74
|
+
extend_result declared_module_name if declared_module_name
|
75
|
+
end
|
76
|
+
|
77
|
+
def extend_result_with_inline_module
|
78
|
+
extend_result inline_module_name if inline_module_name
|
79
|
+
end
|
80
|
+
|
81
|
+
def reset_index
|
82
|
+
builder.assign 'self.index', start_index_var
|
83
|
+
end
|
84
|
+
|
85
|
+
def epsilon_node
|
86
|
+
"SyntaxNode.new(input, index...index)"
|
87
|
+
end
|
88
|
+
|
89
|
+
def assign_failure(start_index_var)
|
90
|
+
assign_result("nil")
|
91
|
+
end
|
92
|
+
|
93
|
+
def var_initialization
|
94
|
+
left, right = [], []
|
95
|
+
var_symbols.each do |symbol|
|
96
|
+
if init_value(symbol)
|
97
|
+
left << var(symbol)
|
98
|
+
right << init_value(symbol)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
if left.empty?
|
102
|
+
""
|
103
|
+
else
|
104
|
+
left.join(', ') + ' = ' + right.join(', ')
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def var(var_symbol)
|
109
|
+
case var_symbol
|
110
|
+
when :result then "r#{address}"
|
111
|
+
when :accumulator then "s#{address}"
|
112
|
+
when :start_index then "i#{address}"
|
113
|
+
else raise "Unknown var symbol #{var_symbol}."
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def init_value(var_symbol)
|
118
|
+
case var_symbol
|
119
|
+
when :accumulator then '[]'
|
120
|
+
when :start_index then 'index'
|
121
|
+
else nil
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def begin_comment(expression)
|
126
|
+
#builder << "# begin #{on_one_line(expression)}"
|
127
|
+
end
|
128
|
+
|
129
|
+
def end_comment(expression)
|
130
|
+
#builder << "# end #{on_one_line(expression)}"
|
131
|
+
end
|
132
|
+
|
133
|
+
def on_one_line(expression)
|
134
|
+
expression.text_value.tr("\n", ' ')
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class ParsingRule < Runtime::SyntaxNode
|
4
|
+
|
5
|
+
def compile(builder)
|
6
|
+
compile_inline_module_declarations(builder)
|
7
|
+
generate_method_definition(builder)
|
8
|
+
end
|
9
|
+
|
10
|
+
def compile_inline_module_declarations(builder)
|
11
|
+
parsing_expression.inline_modules.each_with_index do |inline_module, i|
|
12
|
+
inline_module.compile(i, self, builder)
|
13
|
+
builder.newline
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def generate_method_definition(builder)
|
18
|
+
builder.reset_addresses
|
19
|
+
expression_address = builder.next_address
|
20
|
+
result_var = "r#{expression_address}"
|
21
|
+
|
22
|
+
builder.method_declaration(method_name) do
|
23
|
+
builder.assign 'start_index', 'index'
|
24
|
+
generate_cache_lookup(builder)
|
25
|
+
builder.newline
|
26
|
+
parsing_expression.compile(expression_address, builder)
|
27
|
+
builder.newline
|
28
|
+
generate_cache_storage(builder, result_var)
|
29
|
+
builder.newline
|
30
|
+
builder << "return #{result_var}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def generate_cache_lookup(builder)
|
35
|
+
builder.if_ "node_cache[:#{name}].has_key?(index)" do
|
36
|
+
builder.assign 'cached', "node_cache[:#{name}][index]"
|
37
|
+
builder << '@index = cached.interval.end if cached'
|
38
|
+
builder << 'return cached'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def generate_cache_storage(builder, result_var)
|
43
|
+
builder.assign "node_cache[:#{name}][start_index]", result_var
|
44
|
+
end
|
45
|
+
|
46
|
+
def method_name
|
47
|
+
"_nt_#{name}"
|
48
|
+
end
|
49
|
+
|
50
|
+
def name
|
51
|
+
nonterminal.text_value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Predicate < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression)
|
5
|
+
super
|
6
|
+
begin_comment(parent_expression)
|
7
|
+
use_vars :result, :start_index
|
8
|
+
obtain_new_subexpression_address
|
9
|
+
parent_expression.prefixed_expression.compile(subexpression_address, builder)
|
10
|
+
builder.if__(subexpression_success?) { when_success }
|
11
|
+
builder.else_ { when_failure }
|
12
|
+
end_comment(parent_expression)
|
13
|
+
end
|
14
|
+
|
15
|
+
def assign_failure
|
16
|
+
super(start_index_var)
|
17
|
+
end
|
18
|
+
|
19
|
+
def assign_success
|
20
|
+
reset_index
|
21
|
+
assign_result epsilon_node
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class AndPredicate < Predicate
|
26
|
+
def when_success
|
27
|
+
assign_success
|
28
|
+
end
|
29
|
+
|
30
|
+
def when_failure
|
31
|
+
assign_failure
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class NotPredicate < Predicate
|
36
|
+
def when_success
|
37
|
+
assign_failure
|
38
|
+
end
|
39
|
+
|
40
|
+
def when_failure
|
41
|
+
assign_success
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Repetition < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression)
|
5
|
+
super
|
6
|
+
repeated_expression = parent_expression.atomic
|
7
|
+
begin_comment(parent_expression)
|
8
|
+
use_vars :result, :accumulator, :start_index
|
9
|
+
|
10
|
+
builder.loop do
|
11
|
+
obtain_new_subexpression_address
|
12
|
+
repeated_expression.compile(subexpression_address, builder)
|
13
|
+
builder.if__ subexpression_success? do
|
14
|
+
accumulate_subexpression_result
|
15
|
+
end
|
16
|
+
builder.else_ do
|
17
|
+
builder.break
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def inline_module_name
|
23
|
+
parent_expression.inline_module_name
|
24
|
+
end
|
25
|
+
|
26
|
+
def assign_and_extend_result
|
27
|
+
assign_result "#{node_class_name}.new(input, #{start_index_var}...index, #{accumulator_var})"
|
28
|
+
extend_result_with_inline_module
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
class ZeroOrMore < Repetition
|
34
|
+
def compile(address, builder, parent_expression)
|
35
|
+
super
|
36
|
+
assign_and_extend_result
|
37
|
+
end_comment(parent_expression)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class OneOrMore < Repetition
|
42
|
+
def compile(address, builder, parent_expression)
|
43
|
+
super
|
44
|
+
builder.if__ "#{accumulator_var}.empty?" do
|
45
|
+
reset_index
|
46
|
+
assign_failure start_index_var
|
47
|
+
end
|
48
|
+
builder.else_ do
|
49
|
+
assign_and_extend_result
|
50
|
+
end
|
51
|
+
end_comment(parent_expression)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|