regex-treetop 1.4.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/LICENSE +19 -0
  2. data/README.md +164 -0
  3. data/Rakefile +19 -0
  4. data/bin/tt +112 -0
  5. data/doc/contributing_and_planned_features.markdown +103 -0
  6. data/doc/grammar_composition.markdown +65 -0
  7. data/doc/index.markdown +90 -0
  8. data/doc/pitfalls_and_advanced_techniques.markdown +51 -0
  9. data/doc/semantic_interpretation.markdown +189 -0
  10. data/doc/site.rb +112 -0
  11. data/doc/sitegen.rb +65 -0
  12. data/doc/syntactic_recognition.markdown +100 -0
  13. data/doc/using_in_ruby.markdown +21 -0
  14. data/examples/lambda_calculus/arithmetic.rb +551 -0
  15. data/examples/lambda_calculus/arithmetic.treetop +97 -0
  16. data/examples/lambda_calculus/arithmetic_node_classes.rb +7 -0
  17. data/examples/lambda_calculus/arithmetic_test.rb +54 -0
  18. data/examples/lambda_calculus/lambda_calculus +0 -0
  19. data/examples/lambda_calculus/lambda_calculus.rb +718 -0
  20. data/examples/lambda_calculus/lambda_calculus.treetop +132 -0
  21. data/examples/lambda_calculus/lambda_calculus_node_classes.rb +5 -0
  22. data/examples/lambda_calculus/lambda_calculus_test.rb +89 -0
  23. data/examples/lambda_calculus/test_helper.rb +18 -0
  24. data/lib/treetop.rb +16 -0
  25. data/lib/treetop/bootstrap_gen_1_metagrammar.rb +45 -0
  26. data/lib/treetop/compiler.rb +6 -0
  27. data/lib/treetop/compiler/grammar_compiler.rb +44 -0
  28. data/lib/treetop/compiler/lexical_address_space.rb +17 -0
  29. data/lib/treetop/compiler/metagrammar.rb +3392 -0
  30. data/lib/treetop/compiler/metagrammar.treetop +454 -0
  31. data/lib/treetop/compiler/node_classes.rb +21 -0
  32. data/lib/treetop/compiler/node_classes/anything_symbol.rb +18 -0
  33. data/lib/treetop/compiler/node_classes/atomic_expression.rb +14 -0
  34. data/lib/treetop/compiler/node_classes/character_class.rb +28 -0
  35. data/lib/treetop/compiler/node_classes/choice.rb +31 -0
  36. data/lib/treetop/compiler/node_classes/declaration_sequence.rb +24 -0
  37. data/lib/treetop/compiler/node_classes/grammar.rb +28 -0
  38. data/lib/treetop/compiler/node_classes/inline_module.rb +27 -0
  39. data/lib/treetop/compiler/node_classes/nonterminal.rb +13 -0
  40. data/lib/treetop/compiler/node_classes/optional.rb +19 -0
  41. data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +9 -0
  42. data/lib/treetop/compiler/node_classes/parsing_expression.rb +146 -0
  43. data/lib/treetop/compiler/node_classes/parsing_rule.rb +55 -0
  44. data/lib/treetop/compiler/node_classes/predicate.rb +45 -0
  45. data/lib/treetop/compiler/node_classes/predicate_block.rb +16 -0
  46. data/lib/treetop/compiler/node_classes/regex.rb +23 -0
  47. data/lib/treetop/compiler/node_classes/repetition.rb +55 -0
  48. data/lib/treetop/compiler/node_classes/sequence.rb +71 -0
  49. data/lib/treetop/compiler/node_classes/terminal.rb +20 -0
  50. data/lib/treetop/compiler/node_classes/transient_prefix.rb +9 -0
  51. data/lib/treetop/compiler/node_classes/treetop_file.rb +9 -0
  52. data/lib/treetop/compiler/ruby_builder.rb +113 -0
  53. data/lib/treetop/ruby_extensions.rb +2 -0
  54. data/lib/treetop/ruby_extensions/string.rb +42 -0
  55. data/lib/treetop/runtime.rb +5 -0
  56. data/lib/treetop/runtime/compiled_parser.rb +118 -0
  57. data/lib/treetop/runtime/interval_skip_list.rb +4 -0
  58. data/lib/treetop/runtime/interval_skip_list/head_node.rb +15 -0
  59. data/lib/treetop/runtime/interval_skip_list/interval_skip_list.rb +200 -0
  60. data/lib/treetop/runtime/interval_skip_list/node.rb +164 -0
  61. data/lib/treetop/runtime/syntax_node.rb +114 -0
  62. data/lib/treetop/runtime/terminal_parse_failure.rb +16 -0
  63. data/lib/treetop/runtime/terminal_syntax_node.rb +17 -0
  64. data/lib/treetop/version.rb +9 -0
  65. metadata +138 -0
@@ -0,0 +1,21 @@
1
+ dir = File.dirname(__FILE__)
2
+ require File.join(dir, *%w[node_classes parsing_expression])
3
+ require File.join(dir, *%w[node_classes atomic_expression])
4
+ require File.join(dir, *%w[node_classes inline_module])
5
+ require File.join(dir, *%w[node_classes predicate_block])
6
+ require File.join(dir, *%w[node_classes treetop_file])
7
+ require File.join(dir, *%w[node_classes grammar])
8
+ require File.join(dir, *%w[node_classes declaration_sequence])
9
+ require File.join(dir, *%w[node_classes parsing_rule])
10
+ require File.join(dir, *%w[node_classes parenthesized_expression])
11
+ require File.join(dir, *%w[node_classes nonterminal])
12
+ require File.join(dir, *%w[node_classes terminal])
13
+ require File.join(dir, *%w[node_classes regex])
14
+ require File.join(dir, *%w[node_classes anything_symbol])
15
+ require File.join(dir, *%w[node_classes character_class])
16
+ require File.join(dir, *%w[node_classes sequence])
17
+ require File.join(dir, *%w[node_classes choice])
18
+ require File.join(dir, *%w[node_classes repetition])
19
+ require File.join(dir, *%w[node_classes optional])
20
+ require File.join(dir, *%w[node_classes predicate])
21
+ require File.join(dir, *%w[node_classes transient_prefix])
@@ -0,0 +1,18 @@
1
+ module Treetop
2
+ module Compiler
3
+ class AnythingSymbol < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ builder.if__ "index < input_length" do
7
+ assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
8
+ extend_result_with_inline_module
9
+ builder << "@index += 1"
10
+ end
11
+ builder.else_ do
12
+ builder << 'terminal_parse_failure("any character")'
13
+ assign_result 'nil'
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,14 @@
1
+ module Treetop
2
+ module Compiler
3
+ class AtomicExpression < ParsingExpression
4
+ def inline_modules
5
+ []
6
+ end
7
+
8
+ def single_quote(string)
9
+ # Double any backslashes, then backslash any single-quotes:
10
+ "'#{string.gsub(/\\/) { '\\\\' }.gsub(/'/) { "\\'"}}'"
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,28 @@
1
+ module Treetop
2
+ module Compiler
3
+ class CharacterClass < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+
7
+ builder.if__ "has_terminal?(#{grounded_regexp(text_value)}, true, index)" do
8
+ if address == 0 || decorated?
9
+ assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
10
+ extend_result_with_inline_module
11
+ else
12
+ assign_lazily_instantiated_node
13
+ end
14
+ builder << "@index += 1"
15
+ end
16
+ builder.else_ do
17
+ # "terminal_parse_failure(#{single_quote(characters)})"
18
+ assign_result 'nil'
19
+ end
20
+ end
21
+
22
+ def grounded_regexp(string)
23
+ # Double any backslashes, then backslash any single-quotes:
24
+ "'\\G#{string.gsub(/\\/) { '\\\\' }.gsub(/'/) { "\\'"}}'"
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,31 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Choice < ParsingExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ begin_comment(self)
7
+ use_vars :result, :start_index
8
+ compile_alternatives(alternatives)
9
+ end_comment(self)
10
+ end
11
+
12
+ def compile_alternatives(alternatives)
13
+ obtain_new_subexpression_address
14
+ alternatives.first.compile(subexpression_address, builder)
15
+ builder.if__ subexpression_success? do
16
+ assign_result subexpression_result_var
17
+ extend_result_with_declared_module
18
+ extend_result_with_inline_module
19
+ end
20
+ builder.else_ do
21
+ if alternatives.size == 1
22
+ reset_index
23
+ assign_failure start_index_var
24
+ else
25
+ compile_alternatives(alternatives[1..-1])
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,24 @@
1
+ module Treetop
2
+ module Compiler
3
+ class DeclarationSequence < Runtime::SyntaxNode
4
+
5
+ def compile(builder)
6
+ unless rules.empty?
7
+ builder.method_declaration("root") do
8
+ builder << "@root || :#{rules.first.name}"
9
+ end
10
+ builder.newline
11
+ end
12
+
13
+ declarations.each do |declaration|
14
+ declaration.compile(builder)
15
+ builder.newline
16
+ end
17
+ end
18
+
19
+ def rules
20
+ declarations.select { |declaration| declaration.instance_of?(ParsingRule) }
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Grammar < Runtime::SyntaxNode
4
+ def compile
5
+ builder = RubyBuilder.new
6
+
7
+ builder.module_declaration "#{grammar_name.text_value}" do
8
+ builder.in(indent_level) # account for initial indentation of grammar declaration
9
+ builder << "include Treetop::Runtime"
10
+ builder.newline
11
+ declaration_sequence.compile(builder)
12
+ end
13
+ builder.newline
14
+ builder.class_declaration "#{parser_name} < Treetop::Runtime::CompiledParser" do
15
+ builder << "include #{grammar_name.text_value}"
16
+ end
17
+ end
18
+
19
+ def indent_level
20
+ input.column_of(interval.begin) - 1
21
+ end
22
+
23
+ def parser_name
24
+ grammar_name.text_value + 'Parser'
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,27 @@
1
+ module Treetop
2
+ module Compiler
3
+ module InlineModuleMixin
4
+ attr_reader :module_name
5
+
6
+ def compile(index, builder, rule)
7
+ @module_name = "#{rule.name.treetop_camelize}#{index}"
8
+ end
9
+ end
10
+
11
+ class InlineModule < Runtime::SyntaxNode
12
+
13
+ include InlineModuleMixin
14
+
15
+ def compile(index, builder, rule)
16
+ super
17
+ builder.module_declaration(module_name) do
18
+ builder << ruby_code.gsub(/\A\n/, '').rstrip
19
+ end
20
+ end
21
+
22
+ def ruby_code
23
+ elements[1].text_value
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,13 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Nonterminal < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ use_vars :result
7
+ assign_result text_value == 'super' ? 'super' : "_nt_#{text_value}"
8
+ extend_result_with_declared_module
9
+ extend_result_with_inline_module
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Optional < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ super
6
+ use_vars :result
7
+ obtain_new_subexpression_address
8
+ parent_expression.atomic.compile(subexpression_address, builder)
9
+
10
+ builder.if__ subexpression_success? do
11
+ assign_result subexpression_result_var
12
+ end
13
+ builder.else_ do
14
+ assign_result epsilon_node
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,9 @@
1
+ module Treetop
2
+ module Compiler
3
+ class ParenthesizedExpression < ParsingExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ elements[2].compile(address, builder, parent_expression)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,146 @@
1
+ module Treetop
2
+ module Compiler
3
+ class ParsingExpression < Runtime::SyntaxNode
4
+ attr_reader :address, :builder, :subexpression_address, :var_symbols, :parent_expression
5
+
6
+ def compile(address, builder, parent_expression)
7
+ @address = address
8
+ @builder = builder
9
+ @parent_expression = parent_expression
10
+ end
11
+
12
+ def node_class_name
13
+ parent_expression && parent_expression.node_class_name || 'SyntaxNode'
14
+ end
15
+
16
+ def declared_module_name
17
+ parent_expression && parent_expression.node_class_name
18
+ end
19
+
20
+ def inline_module_name
21
+ parent_expression && parent_expression.inline_module_name
22
+ end
23
+
24
+ def decorated?
25
+ parent_expression && (parent_expression.node_class_name || parent_expression.node_class_name || parent_expression.inline_module_name)
26
+ end
27
+
28
+ def optional_arg(arg)
29
+ if arg
30
+ ", #{arg}"
31
+ else
32
+ ''
33
+ end
34
+ end
35
+
36
+ def use_vars(*var_symbols)
37
+ @var_symbols = var_symbols
38
+ builder << var_initialization
39
+ end
40
+
41
+ def result_var
42
+ var(:result)
43
+ end
44
+
45
+ def accumulator_var
46
+ var(:accumulator)
47
+ end
48
+
49
+ def start_index_var
50
+ var(:start_index)
51
+ end
52
+
53
+ def subexpression_result_var
54
+ "r#{subexpression_address}"
55
+ end
56
+
57
+ def subexpression_success?
58
+ subexpression_result_var
59
+ end
60
+
61
+ def obtain_new_subexpression_address
62
+ @subexpression_address = builder.next_address
63
+ end
64
+
65
+ def accumulate_subexpression_result
66
+ builder.accumulate accumulator_var, subexpression_result_var
67
+ end
68
+
69
+ def assign_result(value_ruby)
70
+ builder.assign result_var, value_ruby
71
+ end
72
+
73
+ def extend_result(module_name)
74
+ builder.extend result_var, module_name
75
+ end
76
+
77
+ def extend_result_with_declared_module
78
+ extend_result declared_module_name if declared_module_name
79
+ end
80
+
81
+ def extend_result_with_inline_module
82
+ extend_result inline_module_name if inline_module_name
83
+ end
84
+
85
+ def reset_index
86
+ builder.assign '@index', start_index_var
87
+ end
88
+
89
+ def epsilon_node
90
+ "instantiate_node(SyntaxNode,input, index...index)"
91
+ end
92
+
93
+ def assign_failure(start_index_var)
94
+ assign_result("nil")
95
+ end
96
+
97
+ def assign_lazily_instantiated_node
98
+ assign_result("true")
99
+ end
100
+
101
+ def var_initialization
102
+ left, right = [], []
103
+ var_symbols.each do |symbol|
104
+ if init_value(symbol)
105
+ left << var(symbol)
106
+ right << init_value(symbol)
107
+ end
108
+ end
109
+ if left.empty?
110
+ ""
111
+ else
112
+ left.join(', ') + ' = ' + right.join(', ')
113
+ end
114
+ end
115
+
116
+ def var(var_symbol)
117
+ case var_symbol
118
+ when :result then "r#{address}"
119
+ when :accumulator then "s#{address}"
120
+ when :start_index then "i#{address}"
121
+ else raise "Unknown var symbol #{var_symbol}."
122
+ end
123
+ end
124
+
125
+ def init_value(var_symbol)
126
+ case var_symbol
127
+ when :accumulator then '[]'
128
+ when :start_index then 'index'
129
+ else nil
130
+ end
131
+ end
132
+
133
+ def begin_comment(expression)
134
+ #builder << "# begin #{on_one_line(expression)}"
135
+ end
136
+
137
+ def end_comment(expression)
138
+ #builder << "# end #{on_one_line(expression)}"
139
+ end
140
+
141
+ def on_one_line(expression)
142
+ expression.text_value.tr("\n", ' ')
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,55 @@
1
+ module Treetop
2
+ module Compiler
3
+ class ParsingRule < Runtime::SyntaxNode
4
+
5
+ def compile(builder)
6
+ compile_inline_module_declarations(builder)
7
+ generate_method_definition(builder)
8
+ end
9
+
10
+ def compile_inline_module_declarations(builder)
11
+ parsing_expression.inline_modules.each_with_index do |inline_module, i|
12
+ inline_module.compile(i, builder, self)
13
+ builder.newline
14
+ end
15
+ end
16
+
17
+ def generate_method_definition(builder)
18
+ builder.reset_addresses
19
+ expression_address = builder.next_address
20
+ result_var = "r#{expression_address}"
21
+
22
+ builder.method_declaration(method_name) do
23
+ builder.assign 'start_index', 'index'
24
+ generate_cache_lookup(builder)
25
+ builder.newline
26
+ parsing_expression.compile(expression_address, builder)
27
+ builder.newline
28
+ generate_cache_storage(builder, result_var)
29
+ builder.newline
30
+ builder << result_var
31
+ end
32
+ end
33
+
34
+ def generate_cache_lookup(builder)
35
+ builder.if_ "node_cache[:#{name}].has_key?(index)" do
36
+ builder.assign 'cached', "node_cache[:#{name}][index]"
37
+ builder << '@index = cached.interval.end if cached'
38
+ builder << 'return cached'
39
+ end
40
+ end
41
+
42
+ def generate_cache_storage(builder, result_var)
43
+ builder.assign "node_cache[:#{name}][start_index]", result_var
44
+ end
45
+
46
+ def method_name
47
+ "_nt_#{name}"
48
+ end
49
+
50
+ def name
51
+ nonterminal.text_value
52
+ end
53
+ end
54
+ end
55
+ end