regex-treetop 1.4.8
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +19 -0
- data/README.md +164 -0
- data/Rakefile +19 -0
- data/bin/tt +112 -0
- data/doc/contributing_and_planned_features.markdown +103 -0
- data/doc/grammar_composition.markdown +65 -0
- data/doc/index.markdown +90 -0
- data/doc/pitfalls_and_advanced_techniques.markdown +51 -0
- data/doc/semantic_interpretation.markdown +189 -0
- data/doc/site.rb +112 -0
- data/doc/sitegen.rb +65 -0
- data/doc/syntactic_recognition.markdown +100 -0
- data/doc/using_in_ruby.markdown +21 -0
- data/examples/lambda_calculus/arithmetic.rb +551 -0
- data/examples/lambda_calculus/arithmetic.treetop +97 -0
- data/examples/lambda_calculus/arithmetic_node_classes.rb +7 -0
- data/examples/lambda_calculus/arithmetic_test.rb +54 -0
- data/examples/lambda_calculus/lambda_calculus +0 -0
- data/examples/lambda_calculus/lambda_calculus.rb +718 -0
- data/examples/lambda_calculus/lambda_calculus.treetop +132 -0
- data/examples/lambda_calculus/lambda_calculus_node_classes.rb +5 -0
- data/examples/lambda_calculus/lambda_calculus_test.rb +89 -0
- data/examples/lambda_calculus/test_helper.rb +18 -0
- data/lib/treetop.rb +16 -0
- data/lib/treetop/bootstrap_gen_1_metagrammar.rb +45 -0
- data/lib/treetop/compiler.rb +6 -0
- data/lib/treetop/compiler/grammar_compiler.rb +44 -0
- data/lib/treetop/compiler/lexical_address_space.rb +17 -0
- data/lib/treetop/compiler/metagrammar.rb +3392 -0
- data/lib/treetop/compiler/metagrammar.treetop +454 -0
- data/lib/treetop/compiler/node_classes.rb +21 -0
- data/lib/treetop/compiler/node_classes/anything_symbol.rb +18 -0
- data/lib/treetop/compiler/node_classes/atomic_expression.rb +14 -0
- data/lib/treetop/compiler/node_classes/character_class.rb +28 -0
- data/lib/treetop/compiler/node_classes/choice.rb +31 -0
- data/lib/treetop/compiler/node_classes/declaration_sequence.rb +24 -0
- data/lib/treetop/compiler/node_classes/grammar.rb +28 -0
- data/lib/treetop/compiler/node_classes/inline_module.rb +27 -0
- data/lib/treetop/compiler/node_classes/nonterminal.rb +13 -0
- data/lib/treetop/compiler/node_classes/optional.rb +19 -0
- data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +9 -0
- data/lib/treetop/compiler/node_classes/parsing_expression.rb +146 -0
- data/lib/treetop/compiler/node_classes/parsing_rule.rb +55 -0
- data/lib/treetop/compiler/node_classes/predicate.rb +45 -0
- data/lib/treetop/compiler/node_classes/predicate_block.rb +16 -0
- data/lib/treetop/compiler/node_classes/regex.rb +23 -0
- data/lib/treetop/compiler/node_classes/repetition.rb +55 -0
- data/lib/treetop/compiler/node_classes/sequence.rb +71 -0
- data/lib/treetop/compiler/node_classes/terminal.rb +20 -0
- data/lib/treetop/compiler/node_classes/transient_prefix.rb +9 -0
- data/lib/treetop/compiler/node_classes/treetop_file.rb +9 -0
- data/lib/treetop/compiler/ruby_builder.rb +113 -0
- data/lib/treetop/ruby_extensions.rb +2 -0
- data/lib/treetop/ruby_extensions/string.rb +42 -0
- data/lib/treetop/runtime.rb +5 -0
- data/lib/treetop/runtime/compiled_parser.rb +118 -0
- data/lib/treetop/runtime/interval_skip_list.rb +4 -0
- data/lib/treetop/runtime/interval_skip_list/head_node.rb +15 -0
- data/lib/treetop/runtime/interval_skip_list/interval_skip_list.rb +200 -0
- data/lib/treetop/runtime/interval_skip_list/node.rb +164 -0
- data/lib/treetop/runtime/syntax_node.rb +114 -0
- data/lib/treetop/runtime/terminal_parse_failure.rb +16 -0
- data/lib/treetop/runtime/terminal_syntax_node.rb +17 -0
- data/lib/treetop/version.rb +9 -0
- metadata +138 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
require File.join(dir, *%w[node_classes parsing_expression])
|
3
|
+
require File.join(dir, *%w[node_classes atomic_expression])
|
4
|
+
require File.join(dir, *%w[node_classes inline_module])
|
5
|
+
require File.join(dir, *%w[node_classes predicate_block])
|
6
|
+
require File.join(dir, *%w[node_classes treetop_file])
|
7
|
+
require File.join(dir, *%w[node_classes grammar])
|
8
|
+
require File.join(dir, *%w[node_classes declaration_sequence])
|
9
|
+
require File.join(dir, *%w[node_classes parsing_rule])
|
10
|
+
require File.join(dir, *%w[node_classes parenthesized_expression])
|
11
|
+
require File.join(dir, *%w[node_classes nonterminal])
|
12
|
+
require File.join(dir, *%w[node_classes terminal])
|
13
|
+
require File.join(dir, *%w[node_classes regex])
|
14
|
+
require File.join(dir, *%w[node_classes anything_symbol])
|
15
|
+
require File.join(dir, *%w[node_classes character_class])
|
16
|
+
require File.join(dir, *%w[node_classes sequence])
|
17
|
+
require File.join(dir, *%w[node_classes choice])
|
18
|
+
require File.join(dir, *%w[node_classes repetition])
|
19
|
+
require File.join(dir, *%w[node_classes optional])
|
20
|
+
require File.join(dir, *%w[node_classes predicate])
|
21
|
+
require File.join(dir, *%w[node_classes transient_prefix])
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class AnythingSymbol < AtomicExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
builder.if__ "index < input_length" do
|
7
|
+
assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
|
8
|
+
extend_result_with_inline_module
|
9
|
+
builder << "@index += 1"
|
10
|
+
end
|
11
|
+
builder.else_ do
|
12
|
+
builder << 'terminal_parse_failure("any character")'
|
13
|
+
assign_result 'nil'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class AtomicExpression < ParsingExpression
|
4
|
+
def inline_modules
|
5
|
+
[]
|
6
|
+
end
|
7
|
+
|
8
|
+
def single_quote(string)
|
9
|
+
# Double any backslashes, then backslash any single-quotes:
|
10
|
+
"'#{string.gsub(/\\/) { '\\\\' }.gsub(/'/) { "\\'"}}'"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class CharacterClass < AtomicExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
|
7
|
+
builder.if__ "has_terminal?(#{grounded_regexp(text_value)}, true, index)" do
|
8
|
+
if address == 0 || decorated?
|
9
|
+
assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
|
10
|
+
extend_result_with_inline_module
|
11
|
+
else
|
12
|
+
assign_lazily_instantiated_node
|
13
|
+
end
|
14
|
+
builder << "@index += 1"
|
15
|
+
end
|
16
|
+
builder.else_ do
|
17
|
+
# "terminal_parse_failure(#{single_quote(characters)})"
|
18
|
+
assign_result 'nil'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def grounded_regexp(string)
|
23
|
+
# Double any backslashes, then backslash any single-quotes:
|
24
|
+
"'\\G#{string.gsub(/\\/) { '\\\\' }.gsub(/'/) { "\\'"}}'"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Choice < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
begin_comment(self)
|
7
|
+
use_vars :result, :start_index
|
8
|
+
compile_alternatives(alternatives)
|
9
|
+
end_comment(self)
|
10
|
+
end
|
11
|
+
|
12
|
+
def compile_alternatives(alternatives)
|
13
|
+
obtain_new_subexpression_address
|
14
|
+
alternatives.first.compile(subexpression_address, builder)
|
15
|
+
builder.if__ subexpression_success? do
|
16
|
+
assign_result subexpression_result_var
|
17
|
+
extend_result_with_declared_module
|
18
|
+
extend_result_with_inline_module
|
19
|
+
end
|
20
|
+
builder.else_ do
|
21
|
+
if alternatives.size == 1
|
22
|
+
reset_index
|
23
|
+
assign_failure start_index_var
|
24
|
+
else
|
25
|
+
compile_alternatives(alternatives[1..-1])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class DeclarationSequence < Runtime::SyntaxNode
|
4
|
+
|
5
|
+
def compile(builder)
|
6
|
+
unless rules.empty?
|
7
|
+
builder.method_declaration("root") do
|
8
|
+
builder << "@root || :#{rules.first.name}"
|
9
|
+
end
|
10
|
+
builder.newline
|
11
|
+
end
|
12
|
+
|
13
|
+
declarations.each do |declaration|
|
14
|
+
declaration.compile(builder)
|
15
|
+
builder.newline
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def rules
|
20
|
+
declarations.select { |declaration| declaration.instance_of?(ParsingRule) }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Grammar < Runtime::SyntaxNode
|
4
|
+
def compile
|
5
|
+
builder = RubyBuilder.new
|
6
|
+
|
7
|
+
builder.module_declaration "#{grammar_name.text_value}" do
|
8
|
+
builder.in(indent_level) # account for initial indentation of grammar declaration
|
9
|
+
builder << "include Treetop::Runtime"
|
10
|
+
builder.newline
|
11
|
+
declaration_sequence.compile(builder)
|
12
|
+
end
|
13
|
+
builder.newline
|
14
|
+
builder.class_declaration "#{parser_name} < Treetop::Runtime::CompiledParser" do
|
15
|
+
builder << "include #{grammar_name.text_value}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def indent_level
|
20
|
+
input.column_of(interval.begin) - 1
|
21
|
+
end
|
22
|
+
|
23
|
+
def parser_name
|
24
|
+
grammar_name.text_value + 'Parser'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
module InlineModuleMixin
|
4
|
+
attr_reader :module_name
|
5
|
+
|
6
|
+
def compile(index, builder, rule)
|
7
|
+
@module_name = "#{rule.name.treetop_camelize}#{index}"
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class InlineModule < Runtime::SyntaxNode
|
12
|
+
|
13
|
+
include InlineModuleMixin
|
14
|
+
|
15
|
+
def compile(index, builder, rule)
|
16
|
+
super
|
17
|
+
builder.module_declaration(module_name) do
|
18
|
+
builder << ruby_code.gsub(/\A\n/, '').rstrip
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def ruby_code
|
23
|
+
elements[1].text_value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Nonterminal < AtomicExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
use_vars :result
|
7
|
+
assign_result text_value == 'super' ? 'super' : "_nt_#{text_value}"
|
8
|
+
extend_result_with_declared_module
|
9
|
+
extend_result_with_inline_module
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Optional < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression)
|
5
|
+
super
|
6
|
+
use_vars :result
|
7
|
+
obtain_new_subexpression_address
|
8
|
+
parent_expression.atomic.compile(subexpression_address, builder)
|
9
|
+
|
10
|
+
builder.if__ subexpression_success? do
|
11
|
+
assign_result subexpression_result_var
|
12
|
+
end
|
13
|
+
builder.else_ do
|
14
|
+
assign_result epsilon_node
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class ParsingExpression < Runtime::SyntaxNode
|
4
|
+
attr_reader :address, :builder, :subexpression_address, :var_symbols, :parent_expression
|
5
|
+
|
6
|
+
def compile(address, builder, parent_expression)
|
7
|
+
@address = address
|
8
|
+
@builder = builder
|
9
|
+
@parent_expression = parent_expression
|
10
|
+
end
|
11
|
+
|
12
|
+
def node_class_name
|
13
|
+
parent_expression && parent_expression.node_class_name || 'SyntaxNode'
|
14
|
+
end
|
15
|
+
|
16
|
+
def declared_module_name
|
17
|
+
parent_expression && parent_expression.node_class_name
|
18
|
+
end
|
19
|
+
|
20
|
+
def inline_module_name
|
21
|
+
parent_expression && parent_expression.inline_module_name
|
22
|
+
end
|
23
|
+
|
24
|
+
def decorated?
|
25
|
+
parent_expression && (parent_expression.node_class_name || parent_expression.node_class_name || parent_expression.inline_module_name)
|
26
|
+
end
|
27
|
+
|
28
|
+
def optional_arg(arg)
|
29
|
+
if arg
|
30
|
+
", #{arg}"
|
31
|
+
else
|
32
|
+
''
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def use_vars(*var_symbols)
|
37
|
+
@var_symbols = var_symbols
|
38
|
+
builder << var_initialization
|
39
|
+
end
|
40
|
+
|
41
|
+
def result_var
|
42
|
+
var(:result)
|
43
|
+
end
|
44
|
+
|
45
|
+
def accumulator_var
|
46
|
+
var(:accumulator)
|
47
|
+
end
|
48
|
+
|
49
|
+
def start_index_var
|
50
|
+
var(:start_index)
|
51
|
+
end
|
52
|
+
|
53
|
+
def subexpression_result_var
|
54
|
+
"r#{subexpression_address}"
|
55
|
+
end
|
56
|
+
|
57
|
+
def subexpression_success?
|
58
|
+
subexpression_result_var
|
59
|
+
end
|
60
|
+
|
61
|
+
def obtain_new_subexpression_address
|
62
|
+
@subexpression_address = builder.next_address
|
63
|
+
end
|
64
|
+
|
65
|
+
def accumulate_subexpression_result
|
66
|
+
builder.accumulate accumulator_var, subexpression_result_var
|
67
|
+
end
|
68
|
+
|
69
|
+
def assign_result(value_ruby)
|
70
|
+
builder.assign result_var, value_ruby
|
71
|
+
end
|
72
|
+
|
73
|
+
def extend_result(module_name)
|
74
|
+
builder.extend result_var, module_name
|
75
|
+
end
|
76
|
+
|
77
|
+
def extend_result_with_declared_module
|
78
|
+
extend_result declared_module_name if declared_module_name
|
79
|
+
end
|
80
|
+
|
81
|
+
def extend_result_with_inline_module
|
82
|
+
extend_result inline_module_name if inline_module_name
|
83
|
+
end
|
84
|
+
|
85
|
+
def reset_index
|
86
|
+
builder.assign '@index', start_index_var
|
87
|
+
end
|
88
|
+
|
89
|
+
def epsilon_node
|
90
|
+
"instantiate_node(SyntaxNode,input, index...index)"
|
91
|
+
end
|
92
|
+
|
93
|
+
def assign_failure(start_index_var)
|
94
|
+
assign_result("nil")
|
95
|
+
end
|
96
|
+
|
97
|
+
def assign_lazily_instantiated_node
|
98
|
+
assign_result("true")
|
99
|
+
end
|
100
|
+
|
101
|
+
def var_initialization
|
102
|
+
left, right = [], []
|
103
|
+
var_symbols.each do |symbol|
|
104
|
+
if init_value(symbol)
|
105
|
+
left << var(symbol)
|
106
|
+
right << init_value(symbol)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
if left.empty?
|
110
|
+
""
|
111
|
+
else
|
112
|
+
left.join(', ') + ' = ' + right.join(', ')
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def var(var_symbol)
|
117
|
+
case var_symbol
|
118
|
+
when :result then "r#{address}"
|
119
|
+
when :accumulator then "s#{address}"
|
120
|
+
when :start_index then "i#{address}"
|
121
|
+
else raise "Unknown var symbol #{var_symbol}."
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def init_value(var_symbol)
|
126
|
+
case var_symbol
|
127
|
+
when :accumulator then '[]'
|
128
|
+
when :start_index then 'index'
|
129
|
+
else nil
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def begin_comment(expression)
|
134
|
+
#builder << "# begin #{on_one_line(expression)}"
|
135
|
+
end
|
136
|
+
|
137
|
+
def end_comment(expression)
|
138
|
+
#builder << "# end #{on_one_line(expression)}"
|
139
|
+
end
|
140
|
+
|
141
|
+
def on_one_line(expression)
|
142
|
+
expression.text_value.tr("\n", ' ')
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class ParsingRule < Runtime::SyntaxNode
|
4
|
+
|
5
|
+
def compile(builder)
|
6
|
+
compile_inline_module_declarations(builder)
|
7
|
+
generate_method_definition(builder)
|
8
|
+
end
|
9
|
+
|
10
|
+
def compile_inline_module_declarations(builder)
|
11
|
+
parsing_expression.inline_modules.each_with_index do |inline_module, i|
|
12
|
+
inline_module.compile(i, builder, self)
|
13
|
+
builder.newline
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def generate_method_definition(builder)
|
18
|
+
builder.reset_addresses
|
19
|
+
expression_address = builder.next_address
|
20
|
+
result_var = "r#{expression_address}"
|
21
|
+
|
22
|
+
builder.method_declaration(method_name) do
|
23
|
+
builder.assign 'start_index', 'index'
|
24
|
+
generate_cache_lookup(builder)
|
25
|
+
builder.newline
|
26
|
+
parsing_expression.compile(expression_address, builder)
|
27
|
+
builder.newline
|
28
|
+
generate_cache_storage(builder, result_var)
|
29
|
+
builder.newline
|
30
|
+
builder << result_var
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def generate_cache_lookup(builder)
|
35
|
+
builder.if_ "node_cache[:#{name}].has_key?(index)" do
|
36
|
+
builder.assign 'cached', "node_cache[:#{name}][index]"
|
37
|
+
builder << '@index = cached.interval.end if cached'
|
38
|
+
builder << 'return cached'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def generate_cache_storage(builder, result_var)
|
43
|
+
builder.assign "node_cache[:#{name}][start_index]", result_var
|
44
|
+
end
|
45
|
+
|
46
|
+
def method_name
|
47
|
+
"_nt_#{name}"
|
48
|
+
end
|
49
|
+
|
50
|
+
def name
|
51
|
+
nonterminal.text_value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|