cognita-treetop 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/README +164 -0
  2. data/Rakefile +35 -0
  3. data/bin/tt +25 -0
  4. data/doc/contributing_and_planned_features.markdown +103 -0
  5. data/doc/grammar_composition.markdown +65 -0
  6. data/doc/index.markdown +90 -0
  7. data/doc/pitfalls_and_advanced_techniques.markdown +51 -0
  8. data/doc/semantic_interpretation.markdown +189 -0
  9. data/doc/site.rb +110 -0
  10. data/doc/sitegen.rb +60 -0
  11. data/doc/syntactic_recognition.markdown +100 -0
  12. data/doc/using_in_ruby.markdown +21 -0
  13. data/examples/lambda_calculus/arithmetic.rb +551 -0
  14. data/examples/lambda_calculus/arithmetic.treetop +97 -0
  15. data/examples/lambda_calculus/arithmetic_node_classes.rb +7 -0
  16. data/examples/lambda_calculus/arithmetic_test.rb +54 -0
  17. data/examples/lambda_calculus/lambda_calculus +0 -0
  18. data/examples/lambda_calculus/lambda_calculus.rb +718 -0
  19. data/examples/lambda_calculus/lambda_calculus.treetop +132 -0
  20. data/examples/lambda_calculus/lambda_calculus_node_classes.rb +5 -0
  21. data/examples/lambda_calculus/lambda_calculus_test.rb +89 -0
  22. data/examples/lambda_calculus/test_helper.rb +18 -0
  23. data/lib/treetop.rb +8 -0
  24. data/lib/treetop/bootstrap_gen_1_metagrammar.rb +45 -0
  25. data/lib/treetop/compiler.rb +6 -0
  26. data/lib/treetop/compiler/grammar_compiler.rb +40 -0
  27. data/lib/treetop/compiler/lexical_address_space.rb +17 -0
  28. data/lib/treetop/compiler/metagrammar.rb +2887 -0
  29. data/lib/treetop/compiler/metagrammar.treetop +404 -0
  30. data/lib/treetop/compiler/node_classes.rb +19 -0
  31. data/lib/treetop/compiler/node_classes/anything_symbol.rb +18 -0
  32. data/lib/treetop/compiler/node_classes/atomic_expression.rb +14 -0
  33. data/lib/treetop/compiler/node_classes/character_class.rb +19 -0
  34. data/lib/treetop/compiler/node_classes/choice.rb +31 -0
  35. data/lib/treetop/compiler/node_classes/declaration_sequence.rb +24 -0
  36. data/lib/treetop/compiler/node_classes/grammar.rb +28 -0
  37. data/lib/treetop/compiler/node_classes/inline_module.rb +27 -0
  38. data/lib/treetop/compiler/node_classes/nonterminal.rb +13 -0
  39. data/lib/treetop/compiler/node_classes/optional.rb +19 -0
  40. data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +9 -0
  41. data/lib/treetop/compiler/node_classes/parsing_expression.rb +138 -0
  42. data/lib/treetop/compiler/node_classes/parsing_rule.rb +55 -0
  43. data/lib/treetop/compiler/node_classes/predicate.rb +45 -0
  44. data/lib/treetop/compiler/node_classes/repetition.rb +55 -0
  45. data/lib/treetop/compiler/node_classes/sequence.rb +68 -0
  46. data/lib/treetop/compiler/node_classes/terminal.rb +20 -0
  47. data/lib/treetop/compiler/node_classes/transient_prefix.rb +9 -0
  48. data/lib/treetop/compiler/node_classes/treetop_file.rb +9 -0
  49. data/lib/treetop/compiler/ruby_builder.rb +113 -0
  50. data/lib/treetop/ruby_extensions.rb +2 -0
  51. data/lib/treetop/ruby_extensions/string.rb +42 -0
  52. data/lib/treetop/runtime.rb +5 -0
  53. data/lib/treetop/runtime/compiled_parser.rb +87 -0
  54. data/lib/treetop/runtime/interval_skip_list.rb +4 -0
  55. data/lib/treetop/runtime/interval_skip_list/head_node.rb +15 -0
  56. data/lib/treetop/runtime/interval_skip_list/interval_skip_list.rb +200 -0
  57. data/lib/treetop/runtime/interval_skip_list/node.rb +164 -0
  58. data/lib/treetop/runtime/syntax_node.rb +72 -0
  59. data/lib/treetop/runtime/terminal_parse_failure.rb +16 -0
  60. data/lib/treetop/runtime/terminal_syntax_node.rb +17 -0
  61. metadata +119 -0
@@ -0,0 +1,19 @@
1
+ module Treetop
2
+ module Compiler
3
+ class CharacterClass < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+
7
+ builder.if__ "input.index(Regexp.new(#{single_quote(text_value)}), index) == index" do
8
+ assign_result "(#{node_class_name}).new(input, index...(index + 1))"
9
+ extend_result_with_inline_module
10
+ builder << "@index += 1"
11
+ end
12
+ builder.else_ do
13
+ "terminal_parse_failure(#{single_quote(characters)})"
14
+ assign_result 'nil'
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,31 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Choice < ParsingExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ begin_comment(self)
7
+ use_vars :result, :start_index
8
+ compile_alternatives(alternatives)
9
+ end_comment(self)
10
+ end
11
+
12
+ def compile_alternatives(alternatives)
13
+ obtain_new_subexpression_address
14
+ alternatives.first.compile(subexpression_address, builder)
15
+ builder.if__ subexpression_success? do
16
+ assign_result subexpression_result_var
17
+ extend_result_with_declared_module
18
+ extend_result_with_inline_module
19
+ end
20
+ builder.else_ do
21
+ if alternatives.size == 1
22
+ reset_index
23
+ assign_failure start_index_var
24
+ else
25
+ compile_alternatives(alternatives[1..-1])
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,24 @@
1
+ module Treetop
2
+ module Compiler
3
+ class DeclarationSequence < Runtime::SyntaxNode
4
+
5
+ def compile(builder)
6
+ unless rules.empty?
7
+ builder.method_declaration("root") do
8
+ builder << "@root || :#{rules.first.name}"
9
+ end
10
+ builder.newline
11
+ end
12
+
13
+ declarations.each do |declaration|
14
+ declaration.compile(builder)
15
+ builder.newline
16
+ end
17
+ end
18
+
19
+ def rules
20
+ declarations.select { |declaration| declaration.instance_of?(ParsingRule) }
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Grammar < Runtime::SyntaxNode
4
+ def compile
5
+ builder = RubyBuilder.new
6
+
7
+ builder.module_declaration "#{grammar_name.text_value}" do
8
+ builder.in(indent_level) # account for initial indentation of grammar declaration
9
+ builder << "include Treetop::Runtime"
10
+ builder.newline
11
+ declaration_sequence.compile(builder)
12
+ end
13
+ builder.newline
14
+ builder.class_declaration "#{parser_name} < Treetop::Runtime::CompiledParser" do
15
+ builder << "include #{grammar_name.text_value}"
16
+ end
17
+ end
18
+
19
+ def indent_level
20
+ input.column_of(interval.begin) - 1
21
+ end
22
+
23
+ def parser_name
24
+ grammar_name.text_value + 'Parser'
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,27 @@
1
+ module Treetop
2
+ module Compiler
3
+ module InlineModuleMixin
4
+ attr_reader :module_name
5
+
6
+ def compile(index, rule, builder)
7
+ @module_name = "#{rule.name.treetop_camelize}#{index}"
8
+ end
9
+ end
10
+
11
+ class InlineModule < Runtime::SyntaxNode
12
+
13
+ include InlineModuleMixin
14
+
15
+ def compile(index, rule, builder)
16
+ super
17
+ builder.module_declaration(module_name) do
18
+ builder << ruby_code.gsub(/\A\n/, '').rstrip
19
+ end
20
+ end
21
+
22
+ def ruby_code
23
+ elements[1].text_value
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,13 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Nonterminal < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ use_vars :result
7
+ assign_result text_value == 'super' ? 'super' : "_nt_#{text_value}"
8
+ extend_result_with_declared_module
9
+ extend_result_with_inline_module
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Optional < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ super
6
+ use_vars :result
7
+ obtain_new_subexpression_address
8
+ parent_expression.atomic.compile(subexpression_address, builder)
9
+
10
+ builder.if__ subexpression_success? do
11
+ assign_result subexpression_result_var
12
+ end
13
+ builder.else_ do
14
+ assign_result epsilon_node
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,9 @@
1
+ module Treetop
2
+ module Compiler
3
+ class ParenthesizedExpression < ParsingExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ elements[2].compile(address, builder, parent_expression)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,138 @@
1
+ module Treetop
2
+ module Compiler
3
+ class ParsingExpression < Runtime::SyntaxNode
4
+ attr_reader :address, :builder, :subexpression_address, :var_symbols, :parent_expression
5
+
6
+ def compile(address, builder, parent_expression)
7
+ @address = address
8
+ @builder = builder
9
+ @parent_expression = parent_expression
10
+ end
11
+
12
+ def node_class_name
13
+ parent_expression && parent_expression.node_class_name || 'SyntaxNode'
14
+ end
15
+
16
+ def declared_module_name
17
+ parent_expression && parent_expression.node_class_name
18
+ end
19
+
20
+ def inline_module_name
21
+ parent_expression && parent_expression.inline_module_name
22
+ end
23
+
24
+ def optional_arg(arg)
25
+ if arg
26
+ ", #{arg}"
27
+ else
28
+ ''
29
+ end
30
+ end
31
+
32
+ def use_vars(*var_symbols)
33
+ @var_symbols = var_symbols
34
+ builder << var_initialization
35
+ end
36
+
37
+ def result_var
38
+ var(:result)
39
+ end
40
+
41
+ def accumulator_var
42
+ var(:accumulator)
43
+ end
44
+
45
+ def start_index_var
46
+ var(:start_index)
47
+ end
48
+
49
+ def subexpression_result_var
50
+ "r#{subexpression_address}"
51
+ end
52
+
53
+ def subexpression_success?
54
+ subexpression_result_var
55
+ end
56
+
57
+ def obtain_new_subexpression_address
58
+ @subexpression_address = builder.next_address
59
+ end
60
+
61
+ def accumulate_subexpression_result
62
+ builder.accumulate accumulator_var, subexpression_result_var
63
+ end
64
+
65
+ def assign_result(value_ruby)
66
+ builder.assign result_var, value_ruby
67
+ end
68
+
69
+ def extend_result(module_name)
70
+ builder.extend result_var, module_name
71
+ end
72
+
73
+ def extend_result_with_declared_module
74
+ extend_result declared_module_name if declared_module_name
75
+ end
76
+
77
+ def extend_result_with_inline_module
78
+ extend_result inline_module_name if inline_module_name
79
+ end
80
+
81
+ def reset_index
82
+ builder.assign 'self.index', start_index_var
83
+ end
84
+
85
+ def epsilon_node
86
+ "SyntaxNode.new(input, index...index)"
87
+ end
88
+
89
+ def assign_failure(start_index_var)
90
+ assign_result("nil")
91
+ end
92
+
93
+ def var_initialization
94
+ left, right = [], []
95
+ var_symbols.each do |symbol|
96
+ if init_value(symbol)
97
+ left << var(symbol)
98
+ right << init_value(symbol)
99
+ end
100
+ end
101
+ if left.empty?
102
+ ""
103
+ else
104
+ left.join(', ') + ' = ' + right.join(', ')
105
+ end
106
+ end
107
+
108
+ def var(var_symbol)
109
+ case var_symbol
110
+ when :result then "r#{address}"
111
+ when :accumulator then "s#{address}"
112
+ when :start_index then "i#{address}"
113
+ else raise "Unknown var symbol #{var_symbol}."
114
+ end
115
+ end
116
+
117
+ def init_value(var_symbol)
118
+ case var_symbol
119
+ when :accumulator then '[]'
120
+ when :start_index then 'index'
121
+ else nil
122
+ end
123
+ end
124
+
125
+ def begin_comment(expression)
126
+ #builder << "# begin #{on_one_line(expression)}"
127
+ end
128
+
129
+ def end_comment(expression)
130
+ #builder << "# end #{on_one_line(expression)}"
131
+ end
132
+
133
+ def on_one_line(expression)
134
+ expression.text_value.tr("\n", ' ')
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,55 @@
1
+ module Treetop
2
+ module Compiler
3
+ class ParsingRule < Runtime::SyntaxNode
4
+
5
+ def compile(builder)
6
+ compile_inline_module_declarations(builder)
7
+ generate_method_definition(builder)
8
+ end
9
+
10
+ def compile_inline_module_declarations(builder)
11
+ parsing_expression.inline_modules.each_with_index do |inline_module, i|
12
+ inline_module.compile(i, self, builder)
13
+ builder.newline
14
+ end
15
+ end
16
+
17
+ def generate_method_definition(builder)
18
+ builder.reset_addresses
19
+ expression_address = builder.next_address
20
+ result_var = "r#{expression_address}"
21
+
22
+ builder.method_declaration(method_name) do
23
+ builder.assign 'start_index', 'index'
24
+ generate_cache_lookup(builder)
25
+ builder.newline
26
+ parsing_expression.compile(expression_address, builder)
27
+ builder.newline
28
+ generate_cache_storage(builder, result_var)
29
+ builder.newline
30
+ builder << "return #{result_var}"
31
+ end
32
+ end
33
+
34
+ def generate_cache_lookup(builder)
35
+ builder.if_ "node_cache[:#{name}].has_key?(index)" do
36
+ builder.assign 'cached', "node_cache[:#{name}][index]"
37
+ builder << '@index = cached.interval.end if cached'
38
+ builder << 'return cached'
39
+ end
40
+ end
41
+
42
+ def generate_cache_storage(builder, result_var)
43
+ builder.assign "node_cache[:#{name}][start_index]", result_var
44
+ end
45
+
46
+ def method_name
47
+ "_nt_#{name}"
48
+ end
49
+
50
+ def name
51
+ nonterminal.text_value
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,45 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Predicate < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ super
6
+ begin_comment(parent_expression)
7
+ use_vars :result, :start_index
8
+ obtain_new_subexpression_address
9
+ parent_expression.prefixed_expression.compile(subexpression_address, builder)
10
+ builder.if__(subexpression_success?) { when_success }
11
+ builder.else_ { when_failure }
12
+ end_comment(parent_expression)
13
+ end
14
+
15
+ def assign_failure
16
+ super(start_index_var)
17
+ end
18
+
19
+ def assign_success
20
+ reset_index
21
+ assign_result epsilon_node
22
+ end
23
+ end
24
+
25
+ class AndPredicate < Predicate
26
+ def when_success
27
+ assign_success
28
+ end
29
+
30
+ def when_failure
31
+ assign_failure
32
+ end
33
+ end
34
+
35
+ class NotPredicate < Predicate
36
+ def when_success
37
+ assign_failure
38
+ end
39
+
40
+ def when_failure
41
+ assign_success
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,55 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Repetition < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ super
6
+ repeated_expression = parent_expression.atomic
7
+ begin_comment(parent_expression)
8
+ use_vars :result, :accumulator, :start_index
9
+
10
+ builder.loop do
11
+ obtain_new_subexpression_address
12
+ repeated_expression.compile(subexpression_address, builder)
13
+ builder.if__ subexpression_success? do
14
+ accumulate_subexpression_result
15
+ end
16
+ builder.else_ do
17
+ builder.break
18
+ end
19
+ end
20
+ end
21
+
22
+ def inline_module_name
23
+ parent_expression.inline_module_name
24
+ end
25
+
26
+ def assign_and_extend_result
27
+ assign_result "#{node_class_name}.new(input, #{start_index_var}...index, #{accumulator_var})"
28
+ extend_result_with_inline_module
29
+ end
30
+ end
31
+
32
+
33
+ class ZeroOrMore < Repetition
34
+ def compile(address, builder, parent_expression)
35
+ super
36
+ assign_and_extend_result
37
+ end_comment(parent_expression)
38
+ end
39
+ end
40
+
41
+ class OneOrMore < Repetition
42
+ def compile(address, builder, parent_expression)
43
+ super
44
+ builder.if__ "#{accumulator_var}.empty?" do
45
+ reset_index
46
+ assign_failure start_index_var
47
+ end
48
+ builder.else_ do
49
+ assign_and_extend_result
50
+ end
51
+ end_comment(parent_expression)
52
+ end
53
+ end
54
+ end
55
+ end