regex-treetop 1.4.8
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +19 -0
- data/README.md +164 -0
- data/Rakefile +19 -0
- data/bin/tt +112 -0
- data/doc/contributing_and_planned_features.markdown +103 -0
- data/doc/grammar_composition.markdown +65 -0
- data/doc/index.markdown +90 -0
- data/doc/pitfalls_and_advanced_techniques.markdown +51 -0
- data/doc/semantic_interpretation.markdown +189 -0
- data/doc/site.rb +112 -0
- data/doc/sitegen.rb +65 -0
- data/doc/syntactic_recognition.markdown +100 -0
- data/doc/using_in_ruby.markdown +21 -0
- data/examples/lambda_calculus/arithmetic.rb +551 -0
- data/examples/lambda_calculus/arithmetic.treetop +97 -0
- data/examples/lambda_calculus/arithmetic_node_classes.rb +7 -0
- data/examples/lambda_calculus/arithmetic_test.rb +54 -0
- data/examples/lambda_calculus/lambda_calculus +0 -0
- data/examples/lambda_calculus/lambda_calculus.rb +718 -0
- data/examples/lambda_calculus/lambda_calculus.treetop +132 -0
- data/examples/lambda_calculus/lambda_calculus_node_classes.rb +5 -0
- data/examples/lambda_calculus/lambda_calculus_test.rb +89 -0
- data/examples/lambda_calculus/test_helper.rb +18 -0
- data/lib/treetop.rb +16 -0
- data/lib/treetop/bootstrap_gen_1_metagrammar.rb +45 -0
- data/lib/treetop/compiler.rb +6 -0
- data/lib/treetop/compiler/grammar_compiler.rb +44 -0
- data/lib/treetop/compiler/lexical_address_space.rb +17 -0
- data/lib/treetop/compiler/metagrammar.rb +3392 -0
- data/lib/treetop/compiler/metagrammar.treetop +454 -0
- data/lib/treetop/compiler/node_classes.rb +21 -0
- data/lib/treetop/compiler/node_classes/anything_symbol.rb +18 -0
- data/lib/treetop/compiler/node_classes/atomic_expression.rb +14 -0
- data/lib/treetop/compiler/node_classes/character_class.rb +28 -0
- data/lib/treetop/compiler/node_classes/choice.rb +31 -0
- data/lib/treetop/compiler/node_classes/declaration_sequence.rb +24 -0
- data/lib/treetop/compiler/node_classes/grammar.rb +28 -0
- data/lib/treetop/compiler/node_classes/inline_module.rb +27 -0
- data/lib/treetop/compiler/node_classes/nonterminal.rb +13 -0
- data/lib/treetop/compiler/node_classes/optional.rb +19 -0
- data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +9 -0
- data/lib/treetop/compiler/node_classes/parsing_expression.rb +146 -0
- data/lib/treetop/compiler/node_classes/parsing_rule.rb +55 -0
- data/lib/treetop/compiler/node_classes/predicate.rb +45 -0
- data/lib/treetop/compiler/node_classes/predicate_block.rb +16 -0
- data/lib/treetop/compiler/node_classes/regex.rb +23 -0
- data/lib/treetop/compiler/node_classes/repetition.rb +55 -0
- data/lib/treetop/compiler/node_classes/sequence.rb +71 -0
- data/lib/treetop/compiler/node_classes/terminal.rb +20 -0
- data/lib/treetop/compiler/node_classes/transient_prefix.rb +9 -0
- data/lib/treetop/compiler/node_classes/treetop_file.rb +9 -0
- data/lib/treetop/compiler/ruby_builder.rb +113 -0
- data/lib/treetop/ruby_extensions.rb +2 -0
- data/lib/treetop/ruby_extensions/string.rb +42 -0
- data/lib/treetop/runtime.rb +5 -0
- data/lib/treetop/runtime/compiled_parser.rb +118 -0
- data/lib/treetop/runtime/interval_skip_list.rb +4 -0
- data/lib/treetop/runtime/interval_skip_list/head_node.rb +15 -0
- data/lib/treetop/runtime/interval_skip_list/interval_skip_list.rb +200 -0
- data/lib/treetop/runtime/interval_skip_list/node.rb +164 -0
- data/lib/treetop/runtime/syntax_node.rb +114 -0
- data/lib/treetop/runtime/terminal_parse_failure.rb +16 -0
- data/lib/treetop/runtime/terminal_syntax_node.rb +17 -0
- data/lib/treetop/version.rb +9 -0
- metadata +138 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Predicate < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression)
|
5
|
+
super
|
6
|
+
begin_comment(parent_expression)
|
7
|
+
use_vars :result, :start_index
|
8
|
+
obtain_new_subexpression_address
|
9
|
+
parent_expression.prefixed_expression.compile(subexpression_address, builder)
|
10
|
+
builder.if__(subexpression_success?) { when_success }
|
11
|
+
builder.else_ { when_failure }
|
12
|
+
end_comment(parent_expression)
|
13
|
+
end
|
14
|
+
|
15
|
+
def assign_failure
|
16
|
+
super(start_index_var)
|
17
|
+
end
|
18
|
+
|
19
|
+
def assign_success
|
20
|
+
reset_index
|
21
|
+
assign_result epsilon_node
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class AndPredicate < Predicate
|
26
|
+
def when_success
|
27
|
+
assign_success
|
28
|
+
end
|
29
|
+
|
30
|
+
def when_failure
|
31
|
+
assign_failure
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class NotPredicate < Predicate
|
36
|
+
def when_success
|
37
|
+
assign_failure
|
38
|
+
end
|
39
|
+
|
40
|
+
def when_failure
|
41
|
+
assign_success
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class PredicateBlock < ParsingExpression
|
4
|
+
def compile(index, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
# REVISIT: This is distinctly dodgey, but since we can only be called from
|
7
|
+
# two contexts, and it works in both those, I'm going with it for now, as
|
8
|
+
# opposed to doing the major refactor of providing a proper way of accessing
|
9
|
+
# the parent's accumulator variable.
|
10
|
+
p = parent
|
11
|
+
p = p.parent while p && !p.respond_to?(:accumulator_var)
|
12
|
+
assign_result "lambda #{text_value}.call(#{p ? p.accumulator_var : ""})"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Regex < AtomicExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
|
7
|
+
# I really think there should be 4 slashes in that replacement string, but 6 works. Deeply weird
|
8
|
+
rx = text_value[2..-2].gsub("\\", "\\\\\\") # strip 'r(' and ')'
|
9
|
+
rx = "%(#{rx})"
|
10
|
+
|
11
|
+
builder.if__ "(rx_match = regex_match?(#{rx}, index))" do
|
12
|
+
assign_result "instantiate_node(#{node_class_name},input, index...(index + rx_match.length))"
|
13
|
+
extend_result_with_inline_module
|
14
|
+
builder << "@index += rx_match.length"
|
15
|
+
end
|
16
|
+
builder.else_ do
|
17
|
+
builder << "terminal_parse_failure('/' + #{rx} + '/')"
|
18
|
+
assign_result 'nil'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Repetition < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression)
|
5
|
+
super
|
6
|
+
repeated_expression = parent_expression.atomic
|
7
|
+
begin_comment(parent_expression)
|
8
|
+
use_vars :result, :accumulator, :start_index
|
9
|
+
|
10
|
+
builder.loop do
|
11
|
+
obtain_new_subexpression_address
|
12
|
+
repeated_expression.compile(subexpression_address, builder)
|
13
|
+
builder.if__ subexpression_success? do
|
14
|
+
accumulate_subexpression_result
|
15
|
+
end
|
16
|
+
builder.else_ do
|
17
|
+
builder.break
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def inline_module_name
|
23
|
+
parent_expression.inline_module_name
|
24
|
+
end
|
25
|
+
|
26
|
+
def assign_and_extend_result
|
27
|
+
assign_result "instantiate_node(#{node_class_name},input, #{start_index_var}...index, #{accumulator_var})"
|
28
|
+
extend_result_with_inline_module
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
class ZeroOrMore < Repetition
|
34
|
+
def compile(address, builder, parent_expression)
|
35
|
+
super
|
36
|
+
assign_and_extend_result
|
37
|
+
end_comment(parent_expression)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class OneOrMore < Repetition
|
42
|
+
def compile(address, builder, parent_expression)
|
43
|
+
super
|
44
|
+
builder.if__ "#{accumulator_var}.empty?" do
|
45
|
+
reset_index
|
46
|
+
assign_failure start_index_var
|
47
|
+
end
|
48
|
+
builder.else_ do
|
49
|
+
assign_and_extend_result
|
50
|
+
end
|
51
|
+
end_comment(parent_expression)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Sequence < ParsingExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
begin_comment(self)
|
7
|
+
use_vars :result, :start_index, :accumulator
|
8
|
+
compile_sequence_elements(sequence_elements)
|
9
|
+
builder.if__ "#{accumulator_var}.last" do
|
10
|
+
assign_result "instantiate_node(#{node_class_name},input, #{start_index_var}...index, #{accumulator_var})"
|
11
|
+
extend_result sequence_element_accessor_module_name if sequence_element_accessor_module_name
|
12
|
+
extend_result_with_inline_module
|
13
|
+
end
|
14
|
+
builder.else_ do
|
15
|
+
reset_index
|
16
|
+
assign_failure start_index_var
|
17
|
+
end
|
18
|
+
end_comment(self)
|
19
|
+
end
|
20
|
+
|
21
|
+
def node_class_name
|
22
|
+
node_class_declarations.node_class_name || 'SyntaxNode'
|
23
|
+
end
|
24
|
+
|
25
|
+
def compile_sequence_elements(elements)
|
26
|
+
obtain_new_subexpression_address
|
27
|
+
elements.first.compile(subexpression_address, builder)
|
28
|
+
accumulate_subexpression_result
|
29
|
+
if elements.size > 1
|
30
|
+
builder.if_ subexpression_success? do
|
31
|
+
compile_sequence_elements(elements[1..-1])
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def sequence_element_accessor_module
|
37
|
+
@sequence_element_accessor_module ||= SequenceElementAccessorModule.new(sequence_elements)
|
38
|
+
end
|
39
|
+
|
40
|
+
def sequence_element_accessor_module_name
|
41
|
+
sequence_element_accessor_module.module_name
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class SequenceElementAccessorModule
|
46
|
+
include InlineModuleMixin
|
47
|
+
attr_reader :sequence_elements
|
48
|
+
|
49
|
+
def initialize(sequence_elements)
|
50
|
+
@sequence_elements = sequence_elements
|
51
|
+
end
|
52
|
+
|
53
|
+
def compile(index, builder, rule)
|
54
|
+
super
|
55
|
+
builder.module_declaration(module_name) do
|
56
|
+
elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name.to_s] ||= []) << e; h}
|
57
|
+
sequence_elements.each_with_index do |element, index|
|
58
|
+
if element.label_name
|
59
|
+
repetitions = elements_by_name[element.label_name.to_s]
|
60
|
+
label_name = element.label_name + (repetitions.size > 1 ? (repetitions.index(element)+1).to_s : "")
|
61
|
+
builder.method_declaration(label_name) do
|
62
|
+
builder << "elements[#{index}]"
|
63
|
+
end
|
64
|
+
builder.newline unless index == sequence_elements.size - 1
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class Terminal < AtomicExpression
|
4
|
+
def compile(address, builder, parent_expression = nil)
|
5
|
+
super
|
6
|
+
string_length = eval(text_value).length
|
7
|
+
|
8
|
+
builder.if__ "has_terminal?(#{text_value}, false, index)" do
|
9
|
+
assign_result "instantiate_node(#{node_class_name},input, index...(index + #{string_length}))"
|
10
|
+
extend_result_with_inline_module
|
11
|
+
builder << "@index += #{string_length}"
|
12
|
+
end
|
13
|
+
builder.else_ do
|
14
|
+
builder << "terminal_parse_failure(#{text_value})"
|
15
|
+
assign_result 'nil'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Compiler
|
3
|
+
class RubyBuilder
|
4
|
+
|
5
|
+
attr_reader :level, :address_space, :ruby
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@level = 0
|
9
|
+
@address_space = LexicalAddressSpace.new
|
10
|
+
@ruby = ""
|
11
|
+
end
|
12
|
+
|
13
|
+
def <<(ruby_line)
|
14
|
+
return if ruby_line.blank?
|
15
|
+
ruby << ruby_line.tabto(level) << "\n"
|
16
|
+
end
|
17
|
+
|
18
|
+
def newline
|
19
|
+
ruby << "\n"
|
20
|
+
end
|
21
|
+
|
22
|
+
def indented(depth = 2)
|
23
|
+
self.in(depth)
|
24
|
+
yield
|
25
|
+
self.out(depth)
|
26
|
+
end
|
27
|
+
|
28
|
+
def class_declaration(name, &block)
|
29
|
+
self << "class #{name}"
|
30
|
+
indented(&block)
|
31
|
+
self << "end"
|
32
|
+
end
|
33
|
+
|
34
|
+
def module_declaration(name, &block)
|
35
|
+
self << "module #{name}"
|
36
|
+
indented(&block)
|
37
|
+
self << "end"
|
38
|
+
end
|
39
|
+
|
40
|
+
def method_declaration(name, &block)
|
41
|
+
self << "def #{name}"
|
42
|
+
indented(&block)
|
43
|
+
self << "end"
|
44
|
+
end
|
45
|
+
|
46
|
+
def assign(left, right)
|
47
|
+
if left.instance_of? Array
|
48
|
+
self << "#{left.join(', ')} = #{right.join(', ')}"
|
49
|
+
else
|
50
|
+
self << "#{left} = #{right}"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def extend(var, module_name)
|
55
|
+
self << "#{var}.extend(#{module_name})"
|
56
|
+
end
|
57
|
+
|
58
|
+
def accumulate(left, right)
|
59
|
+
self << "#{left} << #{right}"
|
60
|
+
end
|
61
|
+
|
62
|
+
def if__(condition, &block)
|
63
|
+
self << "if #{condition}"
|
64
|
+
indented(&block)
|
65
|
+
end
|
66
|
+
|
67
|
+
def if_(condition, &block)
|
68
|
+
if__(condition, &block)
|
69
|
+
self << 'end'
|
70
|
+
end
|
71
|
+
|
72
|
+
def else_(&block)
|
73
|
+
self << 'else'
|
74
|
+
indented(&block)
|
75
|
+
self << 'end'
|
76
|
+
end
|
77
|
+
|
78
|
+
def loop(&block)
|
79
|
+
self << 'loop do'
|
80
|
+
indented(&block)
|
81
|
+
self << 'end'
|
82
|
+
end
|
83
|
+
|
84
|
+
def break
|
85
|
+
self << 'break'
|
86
|
+
end
|
87
|
+
|
88
|
+
def in(depth = 2)
|
89
|
+
@level += depth
|
90
|
+
self
|
91
|
+
end
|
92
|
+
|
93
|
+
def out(depth = 2)
|
94
|
+
@level -= depth
|
95
|
+
self
|
96
|
+
end
|
97
|
+
|
98
|
+
def next_address
|
99
|
+
address_space.next_address
|
100
|
+
end
|
101
|
+
|
102
|
+
def reset_addresses
|
103
|
+
address_space.reset_addresses
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def indent
|
109
|
+
" " * level
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class String
|
2
|
+
def column_of(index)
|
3
|
+
return 1 if index == 0
|
4
|
+
newline_index = rindex("\n", index - 1)
|
5
|
+
if newline_index
|
6
|
+
index - newline_index
|
7
|
+
else
|
8
|
+
index + 1
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def line_of(index)
|
13
|
+
self[0...index].count("\n") + 1
|
14
|
+
end
|
15
|
+
|
16
|
+
unless method_defined?(:blank?)
|
17
|
+
def blank?
|
18
|
+
self == ""
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# The following methods are lifted from Facets 2.0.2
|
23
|
+
def tabto(n)
|
24
|
+
if self =~ /^( *)\S/
|
25
|
+
indent(n - $1.length)
|
26
|
+
else
|
27
|
+
self
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def indent(n)
|
32
|
+
if n >= 0
|
33
|
+
gsub(/^/, ' ' * n)
|
34
|
+
else
|
35
|
+
gsub(/^ {0,#{-n}}/, "")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def treetop_camelize
|
40
|
+
to_s.gsub(/\/(.?)/){ "::" + $1.upcase }.gsub(/(^|_)(.)/){ $2.upcase }
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Treetop
|
2
|
+
module Runtime
|
3
|
+
class CompiledParser
|
4
|
+
include Treetop::Runtime
|
5
|
+
|
6
|
+
attr_reader :input, :index, :max_terminal_failure_index
|
7
|
+
attr_writer :root
|
8
|
+
attr_accessor :consume_all_input
|
9
|
+
alias :consume_all_input? :consume_all_input
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
self.consume_all_input = true
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse(input, options = {})
|
16
|
+
prepare_to_parse(input)
|
17
|
+
@index = options[:index] if options[:index]
|
18
|
+
result = send("_nt_#{root}")
|
19
|
+
return nil if (consume_all_input? && index != input.size)
|
20
|
+
return result
|
21
|
+
end
|
22
|
+
|
23
|
+
def failure_index
|
24
|
+
max_terminal_failure_index
|
25
|
+
end
|
26
|
+
|
27
|
+
def failure_line
|
28
|
+
@terminal_failures && input.line_of(failure_index)
|
29
|
+
end
|
30
|
+
|
31
|
+
def failure_column
|
32
|
+
@terminal_failures && input.column_of(failure_index)
|
33
|
+
end
|
34
|
+
|
35
|
+
def failure_reason
|
36
|
+
return nil unless (tf = terminal_failures) && tf.size > 0
|
37
|
+
"Expected " +
|
38
|
+
(tf.size == 1 ?
|
39
|
+
tf[0].expected_string :
|
40
|
+
"one of #{tf.map{|f| f.expected_string}.uniq*', '}"
|
41
|
+
) +
|
42
|
+
" at line #{failure_line}, column #{failure_column} (byte #{failure_index+1})" +
|
43
|
+
" after #{input[index...failure_index]}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def terminal_failures
|
47
|
+
@terminal_failures.map! {|tf_ary| TerminalParseFailure.new(*tf_ary) }
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
attr_reader :node_cache, :input_length
|
54
|
+
attr_writer :index
|
55
|
+
|
56
|
+
def prepare_to_parse(input)
|
57
|
+
@input = input
|
58
|
+
@input_length = input.length
|
59
|
+
reset_index
|
60
|
+
@node_cache = Hash.new {|hash, key| hash[key] = Hash.new}
|
61
|
+
@regexps = {}
|
62
|
+
@terminal_failures = []
|
63
|
+
@max_terminal_failure_index = 0
|
64
|
+
end
|
65
|
+
|
66
|
+
def reset_index
|
67
|
+
@index = 0
|
68
|
+
end
|
69
|
+
|
70
|
+
def parse_anything(node_class = SyntaxNode, inline_module = nil)
|
71
|
+
if index < input.length
|
72
|
+
result = instantiate_node(node_class,input, index...(index + 1))
|
73
|
+
result.extend(inline_module) if inline_module
|
74
|
+
@index += 1
|
75
|
+
result
|
76
|
+
else
|
77
|
+
terminal_parse_failure("any character")
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def instantiate_node(node_type,*args)
|
82
|
+
if node_type.respond_to? :new
|
83
|
+
node_type.new(*args)
|
84
|
+
else
|
85
|
+
SyntaxNode.new(*args).extend(node_type)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def has_terminal?(terminal, regex, index)
|
90
|
+
if regex
|
91
|
+
rx = @regexps[terminal] ||= Regexp.new(terminal)
|
92
|
+
input.index(rx, index) == index
|
93
|
+
else
|
94
|
+
input[index, terminal.size] == terminal
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def regex_match?(regex, index)
|
99
|
+
rx = @regexps[regex] ||= Regexp.new(regex)
|
100
|
+
if input.index(rx, index) == index
|
101
|
+
$&
|
102
|
+
else
|
103
|
+
nil
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def terminal_parse_failure(expected_string)
|
108
|
+
return nil if index < max_terminal_failure_index
|
109
|
+
if index > max_terminal_failure_index
|
110
|
+
@max_terminal_failure_index = index
|
111
|
+
@terminal_failures = []
|
112
|
+
end
|
113
|
+
@terminal_failures << [index, expected_string]
|
114
|
+
return nil
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|