regex-treetop 1.4.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/LICENSE +19 -0
  2. data/README.md +164 -0
  3. data/Rakefile +19 -0
  4. data/bin/tt +112 -0
  5. data/doc/contributing_and_planned_features.markdown +103 -0
  6. data/doc/grammar_composition.markdown +65 -0
  7. data/doc/index.markdown +90 -0
  8. data/doc/pitfalls_and_advanced_techniques.markdown +51 -0
  9. data/doc/semantic_interpretation.markdown +189 -0
  10. data/doc/site.rb +112 -0
  11. data/doc/sitegen.rb +65 -0
  12. data/doc/syntactic_recognition.markdown +100 -0
  13. data/doc/using_in_ruby.markdown +21 -0
  14. data/examples/lambda_calculus/arithmetic.rb +551 -0
  15. data/examples/lambda_calculus/arithmetic.treetop +97 -0
  16. data/examples/lambda_calculus/arithmetic_node_classes.rb +7 -0
  17. data/examples/lambda_calculus/arithmetic_test.rb +54 -0
  18. data/examples/lambda_calculus/lambda_calculus +0 -0
  19. data/examples/lambda_calculus/lambda_calculus.rb +718 -0
  20. data/examples/lambda_calculus/lambda_calculus.treetop +132 -0
  21. data/examples/lambda_calculus/lambda_calculus_node_classes.rb +5 -0
  22. data/examples/lambda_calculus/lambda_calculus_test.rb +89 -0
  23. data/examples/lambda_calculus/test_helper.rb +18 -0
  24. data/lib/treetop.rb +16 -0
  25. data/lib/treetop/bootstrap_gen_1_metagrammar.rb +45 -0
  26. data/lib/treetop/compiler.rb +6 -0
  27. data/lib/treetop/compiler/grammar_compiler.rb +44 -0
  28. data/lib/treetop/compiler/lexical_address_space.rb +17 -0
  29. data/lib/treetop/compiler/metagrammar.rb +3392 -0
  30. data/lib/treetop/compiler/metagrammar.treetop +454 -0
  31. data/lib/treetop/compiler/node_classes.rb +21 -0
  32. data/lib/treetop/compiler/node_classes/anything_symbol.rb +18 -0
  33. data/lib/treetop/compiler/node_classes/atomic_expression.rb +14 -0
  34. data/lib/treetop/compiler/node_classes/character_class.rb +28 -0
  35. data/lib/treetop/compiler/node_classes/choice.rb +31 -0
  36. data/lib/treetop/compiler/node_classes/declaration_sequence.rb +24 -0
  37. data/lib/treetop/compiler/node_classes/grammar.rb +28 -0
  38. data/lib/treetop/compiler/node_classes/inline_module.rb +27 -0
  39. data/lib/treetop/compiler/node_classes/nonterminal.rb +13 -0
  40. data/lib/treetop/compiler/node_classes/optional.rb +19 -0
  41. data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +9 -0
  42. data/lib/treetop/compiler/node_classes/parsing_expression.rb +146 -0
  43. data/lib/treetop/compiler/node_classes/parsing_rule.rb +55 -0
  44. data/lib/treetop/compiler/node_classes/predicate.rb +45 -0
  45. data/lib/treetop/compiler/node_classes/predicate_block.rb +16 -0
  46. data/lib/treetop/compiler/node_classes/regex.rb +23 -0
  47. data/lib/treetop/compiler/node_classes/repetition.rb +55 -0
  48. data/lib/treetop/compiler/node_classes/sequence.rb +71 -0
  49. data/lib/treetop/compiler/node_classes/terminal.rb +20 -0
  50. data/lib/treetop/compiler/node_classes/transient_prefix.rb +9 -0
  51. data/lib/treetop/compiler/node_classes/treetop_file.rb +9 -0
  52. data/lib/treetop/compiler/ruby_builder.rb +113 -0
  53. data/lib/treetop/ruby_extensions.rb +2 -0
  54. data/lib/treetop/ruby_extensions/string.rb +42 -0
  55. data/lib/treetop/runtime.rb +5 -0
  56. data/lib/treetop/runtime/compiled_parser.rb +118 -0
  57. data/lib/treetop/runtime/interval_skip_list.rb +4 -0
  58. data/lib/treetop/runtime/interval_skip_list/head_node.rb +15 -0
  59. data/lib/treetop/runtime/interval_skip_list/interval_skip_list.rb +200 -0
  60. data/lib/treetop/runtime/interval_skip_list/node.rb +164 -0
  61. data/lib/treetop/runtime/syntax_node.rb +114 -0
  62. data/lib/treetop/runtime/terminal_parse_failure.rb +16 -0
  63. data/lib/treetop/runtime/terminal_syntax_node.rb +17 -0
  64. data/lib/treetop/version.rb +9 -0
  65. metadata +138 -0
@@ -0,0 +1,45 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Predicate < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ super
6
+ begin_comment(parent_expression)
7
+ use_vars :result, :start_index
8
+ obtain_new_subexpression_address
9
+ parent_expression.prefixed_expression.compile(subexpression_address, builder)
10
+ builder.if__(subexpression_success?) { when_success }
11
+ builder.else_ { when_failure }
12
+ end_comment(parent_expression)
13
+ end
14
+
15
+ def assign_failure
16
+ super(start_index_var)
17
+ end
18
+
19
+ def assign_success
20
+ reset_index
21
+ assign_result epsilon_node
22
+ end
23
+ end
24
+
25
+ class AndPredicate < Predicate
26
+ def when_success
27
+ assign_success
28
+ end
29
+
30
+ def when_failure
31
+ assign_failure
32
+ end
33
+ end
34
+
35
+ class NotPredicate < Predicate
36
+ def when_success
37
+ assign_failure
38
+ end
39
+
40
+ def when_failure
41
+ assign_success
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,16 @@
1
+ module Treetop
2
+ module Compiler
3
+ class PredicateBlock < ParsingExpression
4
+ def compile(index, builder, parent_expression = nil)
5
+ super
6
+ # REVISIT: This is distinctly dodgey, but since we can only be called from
7
+ # two contexts, and it works in both those, I'm going with it for now, as
8
+ # opposed to doing the major refactor of providing a proper way of accessing
9
+ # the parent's accumulator variable.
10
+ p = parent
11
+ p = p.parent while p && !p.respond_to?(:accumulator_var)
12
+ assign_result "lambda #{text_value}.call(#{p ? p.accumulator_var : ""})"
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,23 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Regex < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+
7
+ # I really think there should be 4 slashes in that replacement string, but 6 works. Deeply weird
8
+ rx = text_value[2..-2].gsub("\\", "\\\\\\") # strip 'r(' and ')'
9
+ rx = "%(#{rx})"
10
+
11
+ builder.if__ "(rx_match = regex_match?(#{rx}, index))" do
12
+ assign_result "instantiate_node(#{node_class_name},input, index...(index + rx_match.length))"
13
+ extend_result_with_inline_module
14
+ builder << "@index += rx_match.length"
15
+ end
16
+ builder.else_ do
17
+ builder << "terminal_parse_failure('/' + #{rx} + '/')"
18
+ assign_result 'nil'
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,55 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Repetition < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ super
6
+ repeated_expression = parent_expression.atomic
7
+ begin_comment(parent_expression)
8
+ use_vars :result, :accumulator, :start_index
9
+
10
+ builder.loop do
11
+ obtain_new_subexpression_address
12
+ repeated_expression.compile(subexpression_address, builder)
13
+ builder.if__ subexpression_success? do
14
+ accumulate_subexpression_result
15
+ end
16
+ builder.else_ do
17
+ builder.break
18
+ end
19
+ end
20
+ end
21
+
22
+ def inline_module_name
23
+ parent_expression.inline_module_name
24
+ end
25
+
26
+ def assign_and_extend_result
27
+ assign_result "instantiate_node(#{node_class_name},input, #{start_index_var}...index, #{accumulator_var})"
28
+ extend_result_with_inline_module
29
+ end
30
+ end
31
+
32
+
33
+ class ZeroOrMore < Repetition
34
+ def compile(address, builder, parent_expression)
35
+ super
36
+ assign_and_extend_result
37
+ end_comment(parent_expression)
38
+ end
39
+ end
40
+
41
+ class OneOrMore < Repetition
42
+ def compile(address, builder, parent_expression)
43
+ super
44
+ builder.if__ "#{accumulator_var}.empty?" do
45
+ reset_index
46
+ assign_failure start_index_var
47
+ end
48
+ builder.else_ do
49
+ assign_and_extend_result
50
+ end
51
+ end_comment(parent_expression)
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,71 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Sequence < ParsingExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ begin_comment(self)
7
+ use_vars :result, :start_index, :accumulator
8
+ compile_sequence_elements(sequence_elements)
9
+ builder.if__ "#{accumulator_var}.last" do
10
+ assign_result "instantiate_node(#{node_class_name},input, #{start_index_var}...index, #{accumulator_var})"
11
+ extend_result sequence_element_accessor_module_name if sequence_element_accessor_module_name
12
+ extend_result_with_inline_module
13
+ end
14
+ builder.else_ do
15
+ reset_index
16
+ assign_failure start_index_var
17
+ end
18
+ end_comment(self)
19
+ end
20
+
21
+ def node_class_name
22
+ node_class_declarations.node_class_name || 'SyntaxNode'
23
+ end
24
+
25
+ def compile_sequence_elements(elements)
26
+ obtain_new_subexpression_address
27
+ elements.first.compile(subexpression_address, builder)
28
+ accumulate_subexpression_result
29
+ if elements.size > 1
30
+ builder.if_ subexpression_success? do
31
+ compile_sequence_elements(elements[1..-1])
32
+ end
33
+ end
34
+ end
35
+
36
+ def sequence_element_accessor_module
37
+ @sequence_element_accessor_module ||= SequenceElementAccessorModule.new(sequence_elements)
38
+ end
39
+
40
+ def sequence_element_accessor_module_name
41
+ sequence_element_accessor_module.module_name
42
+ end
43
+ end
44
+
45
+ class SequenceElementAccessorModule
46
+ include InlineModuleMixin
47
+ attr_reader :sequence_elements
48
+
49
+ def initialize(sequence_elements)
50
+ @sequence_elements = sequence_elements
51
+ end
52
+
53
+ def compile(index, builder, rule)
54
+ super
55
+ builder.module_declaration(module_name) do
56
+ elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name.to_s] ||= []) << e; h}
57
+ sequence_elements.each_with_index do |element, index|
58
+ if element.label_name
59
+ repetitions = elements_by_name[element.label_name.to_s]
60
+ label_name = element.label_name + (repetitions.size > 1 ? (repetitions.index(element)+1).to_s : "")
61
+ builder.method_declaration(label_name) do
62
+ builder << "elements[#{index}]"
63
+ end
64
+ builder.newline unless index == sequence_elements.size - 1
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,20 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Terminal < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ string_length = eval(text_value).length
7
+
8
+ builder.if__ "has_terminal?(#{text_value}, false, index)" do
9
+ assign_result "instantiate_node(#{node_class_name},input, index...(index + #{string_length}))"
10
+ extend_result_with_inline_module
11
+ builder << "@index += #{string_length}"
12
+ end
13
+ builder.else_ do
14
+ builder << "terminal_parse_failure(#{text_value})"
15
+ assign_result 'nil'
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,9 @@
1
+ module Treetop
2
+ module Compiler
3
+ class TransientPrefix < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ parent_expression.prefixed_expression.compile(address, builder)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Treetop
2
+ module Compiler
3
+ class TreetopFile < Runtime::SyntaxNode
4
+ def compile
5
+ (elements.map {|elt| elt.compile}).join
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,113 @@
1
+ module Treetop
2
+ module Compiler
3
+ class RubyBuilder
4
+
5
+ attr_reader :level, :address_space, :ruby
6
+
7
+ def initialize
8
+ @level = 0
9
+ @address_space = LexicalAddressSpace.new
10
+ @ruby = ""
11
+ end
12
+
13
+ def <<(ruby_line)
14
+ return if ruby_line.blank?
15
+ ruby << ruby_line.tabto(level) << "\n"
16
+ end
17
+
18
+ def newline
19
+ ruby << "\n"
20
+ end
21
+
22
+ def indented(depth = 2)
23
+ self.in(depth)
24
+ yield
25
+ self.out(depth)
26
+ end
27
+
28
+ def class_declaration(name, &block)
29
+ self << "class #{name}"
30
+ indented(&block)
31
+ self << "end"
32
+ end
33
+
34
+ def module_declaration(name, &block)
35
+ self << "module #{name}"
36
+ indented(&block)
37
+ self << "end"
38
+ end
39
+
40
+ def method_declaration(name, &block)
41
+ self << "def #{name}"
42
+ indented(&block)
43
+ self << "end"
44
+ end
45
+
46
+ def assign(left, right)
47
+ if left.instance_of? Array
48
+ self << "#{left.join(', ')} = #{right.join(', ')}"
49
+ else
50
+ self << "#{left} = #{right}"
51
+ end
52
+ end
53
+
54
+ def extend(var, module_name)
55
+ self << "#{var}.extend(#{module_name})"
56
+ end
57
+
58
+ def accumulate(left, right)
59
+ self << "#{left} << #{right}"
60
+ end
61
+
62
+ def if__(condition, &block)
63
+ self << "if #{condition}"
64
+ indented(&block)
65
+ end
66
+
67
+ def if_(condition, &block)
68
+ if__(condition, &block)
69
+ self << 'end'
70
+ end
71
+
72
+ def else_(&block)
73
+ self << 'else'
74
+ indented(&block)
75
+ self << 'end'
76
+ end
77
+
78
+ def loop(&block)
79
+ self << 'loop do'
80
+ indented(&block)
81
+ self << 'end'
82
+ end
83
+
84
+ def break
85
+ self << 'break'
86
+ end
87
+
88
+ def in(depth = 2)
89
+ @level += depth
90
+ self
91
+ end
92
+
93
+ def out(depth = 2)
94
+ @level -= depth
95
+ self
96
+ end
97
+
98
+ def next_address
99
+ address_space.next_address
100
+ end
101
+
102
+ def reset_addresses
103
+ address_space.reset_addresses
104
+ end
105
+
106
+ private
107
+
108
+ def indent
109
+ " " * level
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,2 @@
1
+ dir = File.dirname(__FILE__)
2
+ require "#{dir}/ruby_extensions/string"
@@ -0,0 +1,42 @@
1
+ class String
2
+ def column_of(index)
3
+ return 1 if index == 0
4
+ newline_index = rindex("\n", index - 1)
5
+ if newline_index
6
+ index - newline_index
7
+ else
8
+ index + 1
9
+ end
10
+ end
11
+
12
+ def line_of(index)
13
+ self[0...index].count("\n") + 1
14
+ end
15
+
16
+ unless method_defined?(:blank?)
17
+ def blank?
18
+ self == ""
19
+ end
20
+ end
21
+
22
+ # The following methods are lifted from Facets 2.0.2
23
+ def tabto(n)
24
+ if self =~ /^( *)\S/
25
+ indent(n - $1.length)
26
+ else
27
+ self
28
+ end
29
+ end
30
+
31
+ def indent(n)
32
+ if n >= 0
33
+ gsub(/^/, ' ' * n)
34
+ else
35
+ gsub(/^ {0,#{-n}}/, "")
36
+ end
37
+ end
38
+
39
+ def treetop_camelize
40
+ to_s.gsub(/\/(.?)/){ "::" + $1.upcase }.gsub(/(^|_)(.)/){ $2.upcase }
41
+ end
42
+ end
@@ -0,0 +1,5 @@
1
+ dir = File.dirname(__FILE__)
2
+ require "#{dir}/runtime/compiled_parser"
3
+ require "#{dir}/runtime/syntax_node"
4
+ require "#{dir}/runtime/terminal_parse_failure"
5
+ require "#{dir}/runtime/interval_skip_list"
@@ -0,0 +1,118 @@
1
+ module Treetop
2
+ module Runtime
3
+ class CompiledParser
4
+ include Treetop::Runtime
5
+
6
+ attr_reader :input, :index, :max_terminal_failure_index
7
+ attr_writer :root
8
+ attr_accessor :consume_all_input
9
+ alias :consume_all_input? :consume_all_input
10
+
11
+ def initialize
12
+ self.consume_all_input = true
13
+ end
14
+
15
+ def parse(input, options = {})
16
+ prepare_to_parse(input)
17
+ @index = options[:index] if options[:index]
18
+ result = send("_nt_#{root}")
19
+ return nil if (consume_all_input? && index != input.size)
20
+ return result
21
+ end
22
+
23
+ def failure_index
24
+ max_terminal_failure_index
25
+ end
26
+
27
+ def failure_line
28
+ @terminal_failures && input.line_of(failure_index)
29
+ end
30
+
31
+ def failure_column
32
+ @terminal_failures && input.column_of(failure_index)
33
+ end
34
+
35
+ def failure_reason
36
+ return nil unless (tf = terminal_failures) && tf.size > 0
37
+ "Expected " +
38
+ (tf.size == 1 ?
39
+ tf[0].expected_string :
40
+ "one of #{tf.map{|f| f.expected_string}.uniq*', '}"
41
+ ) +
42
+ " at line #{failure_line}, column #{failure_column} (byte #{failure_index+1})" +
43
+ " after #{input[index...failure_index]}"
44
+ end
45
+
46
+ def terminal_failures
47
+ @terminal_failures.map! {|tf_ary| TerminalParseFailure.new(*tf_ary) }
48
+ end
49
+
50
+
51
+ protected
52
+
53
+ attr_reader :node_cache, :input_length
54
+ attr_writer :index
55
+
56
+ def prepare_to_parse(input)
57
+ @input = input
58
+ @input_length = input.length
59
+ reset_index
60
+ @node_cache = Hash.new {|hash, key| hash[key] = Hash.new}
61
+ @regexps = {}
62
+ @terminal_failures = []
63
+ @max_terminal_failure_index = 0
64
+ end
65
+
66
+ def reset_index
67
+ @index = 0
68
+ end
69
+
70
+ def parse_anything(node_class = SyntaxNode, inline_module = nil)
71
+ if index < input.length
72
+ result = instantiate_node(node_class,input, index...(index + 1))
73
+ result.extend(inline_module) if inline_module
74
+ @index += 1
75
+ result
76
+ else
77
+ terminal_parse_failure("any character")
78
+ end
79
+ end
80
+
81
+ def instantiate_node(node_type,*args)
82
+ if node_type.respond_to? :new
83
+ node_type.new(*args)
84
+ else
85
+ SyntaxNode.new(*args).extend(node_type)
86
+ end
87
+ end
88
+
89
+ def has_terminal?(terminal, regex, index)
90
+ if regex
91
+ rx = @regexps[terminal] ||= Regexp.new(terminal)
92
+ input.index(rx, index) == index
93
+ else
94
+ input[index, terminal.size] == terminal
95
+ end
96
+ end
97
+
98
+ def regex_match?(regex, index)
99
+ rx = @regexps[regex] ||= Regexp.new(regex)
100
+ if input.index(rx, index) == index
101
+ $&
102
+ else
103
+ nil
104
+ end
105
+ end
106
+
107
+ def terminal_parse_failure(expected_string)
108
+ return nil if index < max_terminal_failure_index
109
+ if index > max_terminal_failure_index
110
+ @max_terminal_failure_index = index
111
+ @terminal_failures = []
112
+ end
113
+ @terminal_failures << [index, expected_string]
114
+ return nil
115
+ end
116
+ end
117
+ end
118
+ end