regex-treetop 1.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/LICENSE +19 -0
  2. data/README.md +164 -0
  3. data/Rakefile +19 -0
  4. data/bin/tt +112 -0
  5. data/doc/contributing_and_planned_features.markdown +103 -0
  6. data/doc/grammar_composition.markdown +65 -0
  7. data/doc/index.markdown +90 -0
  8. data/doc/pitfalls_and_advanced_techniques.markdown +51 -0
  9. data/doc/semantic_interpretation.markdown +189 -0
  10. data/doc/site.rb +112 -0
  11. data/doc/sitegen.rb +65 -0
  12. data/doc/syntactic_recognition.markdown +100 -0
  13. data/doc/using_in_ruby.markdown +21 -0
  14. data/examples/lambda_calculus/arithmetic.rb +551 -0
  15. data/examples/lambda_calculus/arithmetic.treetop +97 -0
  16. data/examples/lambda_calculus/arithmetic_node_classes.rb +7 -0
  17. data/examples/lambda_calculus/arithmetic_test.rb +54 -0
  18. data/examples/lambda_calculus/lambda_calculus +0 -0
  19. data/examples/lambda_calculus/lambda_calculus.rb +718 -0
  20. data/examples/lambda_calculus/lambda_calculus.treetop +132 -0
  21. data/examples/lambda_calculus/lambda_calculus_node_classes.rb +5 -0
  22. data/examples/lambda_calculus/lambda_calculus_test.rb +89 -0
  23. data/examples/lambda_calculus/test_helper.rb +18 -0
  24. data/lib/treetop.rb +16 -0
  25. data/lib/treetop/bootstrap_gen_1_metagrammar.rb +45 -0
  26. data/lib/treetop/compiler.rb +6 -0
  27. data/lib/treetop/compiler/grammar_compiler.rb +44 -0
  28. data/lib/treetop/compiler/lexical_address_space.rb +17 -0
  29. data/lib/treetop/compiler/metagrammar.rb +3392 -0
  30. data/lib/treetop/compiler/metagrammar.treetop +454 -0
  31. data/lib/treetop/compiler/node_classes.rb +21 -0
  32. data/lib/treetop/compiler/node_classes/anything_symbol.rb +18 -0
  33. data/lib/treetop/compiler/node_classes/atomic_expression.rb +14 -0
  34. data/lib/treetop/compiler/node_classes/character_class.rb +28 -0
  35. data/lib/treetop/compiler/node_classes/choice.rb +31 -0
  36. data/lib/treetop/compiler/node_classes/declaration_sequence.rb +24 -0
  37. data/lib/treetop/compiler/node_classes/grammar.rb +28 -0
  38. data/lib/treetop/compiler/node_classes/inline_module.rb +27 -0
  39. data/lib/treetop/compiler/node_classes/nonterminal.rb +13 -0
  40. data/lib/treetop/compiler/node_classes/optional.rb +19 -0
  41. data/lib/treetop/compiler/node_classes/parenthesized_expression.rb +9 -0
  42. data/lib/treetop/compiler/node_classes/parsing_expression.rb +146 -0
  43. data/lib/treetop/compiler/node_classes/parsing_rule.rb +55 -0
  44. data/lib/treetop/compiler/node_classes/predicate.rb +45 -0
  45. data/lib/treetop/compiler/node_classes/predicate_block.rb +16 -0
  46. data/lib/treetop/compiler/node_classes/regex.rb +23 -0
  47. data/lib/treetop/compiler/node_classes/repetition.rb +55 -0
  48. data/lib/treetop/compiler/node_classes/sequence.rb +71 -0
  49. data/lib/treetop/compiler/node_classes/terminal.rb +20 -0
  50. data/lib/treetop/compiler/node_classes/transient_prefix.rb +9 -0
  51. data/lib/treetop/compiler/node_classes/treetop_file.rb +9 -0
  52. data/lib/treetop/compiler/ruby_builder.rb +113 -0
  53. data/lib/treetop/ruby_extensions.rb +2 -0
  54. data/lib/treetop/ruby_extensions/string.rb +42 -0
  55. data/lib/treetop/runtime.rb +5 -0
  56. data/lib/treetop/runtime/compiled_parser.rb +118 -0
  57. data/lib/treetop/runtime/interval_skip_list.rb +4 -0
  58. data/lib/treetop/runtime/interval_skip_list/head_node.rb +15 -0
  59. data/lib/treetop/runtime/interval_skip_list/interval_skip_list.rb +200 -0
  60. data/lib/treetop/runtime/interval_skip_list/node.rb +164 -0
  61. data/lib/treetop/runtime/syntax_node.rb +114 -0
  62. data/lib/treetop/runtime/terminal_parse_failure.rb +16 -0
  63. data/lib/treetop/runtime/terminal_syntax_node.rb +17 -0
  64. data/lib/treetop/version.rb +9 -0
  65. metadata +138 -0
@@ -0,0 +1,45 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Predicate < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ super
6
+ begin_comment(parent_expression)
7
+ use_vars :result, :start_index
8
+ obtain_new_subexpression_address
9
+ parent_expression.prefixed_expression.compile(subexpression_address, builder)
10
+ builder.if__(subexpression_success?) { when_success }
11
+ builder.else_ { when_failure }
12
+ end_comment(parent_expression)
13
+ end
14
+
15
+ def assign_failure
16
+ super(start_index_var)
17
+ end
18
+
19
+ def assign_success
20
+ reset_index
21
+ assign_result epsilon_node
22
+ end
23
+ end
24
+
25
+ class AndPredicate < Predicate
26
+ def when_success
27
+ assign_success
28
+ end
29
+
30
+ def when_failure
31
+ assign_failure
32
+ end
33
+ end
34
+
35
+ class NotPredicate < Predicate
36
+ def when_success
37
+ assign_failure
38
+ end
39
+
40
+ def when_failure
41
+ assign_success
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,16 @@
1
+ module Treetop
2
+ module Compiler
3
+ class PredicateBlock < ParsingExpression
4
+ def compile(index, builder, parent_expression = nil)
5
+ super
6
+ # REVISIT: This is distinctly dodgey, but since we can only be called from
7
+ # two contexts, and it works in both those, I'm going with it for now, as
8
+ # opposed to doing the major refactor of providing a proper way of accessing
9
+ # the parent's accumulator variable.
10
+ p = parent
11
+ p = p.parent while p && !p.respond_to?(:accumulator_var)
12
+ assign_result "lambda #{text_value}.call(#{p ? p.accumulator_var : ""})"
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,23 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Regex < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+
7
+ # I really think there should be 4 slashes in that replacement string, but 6 works. Deeply weird
8
+ rx = text_value[2..-2].gsub("\\", "\\\\\\") # strip 'r(' and ')'
9
+ rx = "%(#{rx})"
10
+
11
+ builder.if__ "(rx_match = regex_match?(#{rx}, index))" do
12
+ assign_result "instantiate_node(#{node_class_name},input, index...(index + rx_match.length))"
13
+ extend_result_with_inline_module
14
+ builder << "@index += rx_match.length"
15
+ end
16
+ builder.else_ do
17
+ builder << "terminal_parse_failure('/' + #{rx} + '/')"
18
+ assign_result 'nil'
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,55 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Repetition < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ super
6
+ repeated_expression = parent_expression.atomic
7
+ begin_comment(parent_expression)
8
+ use_vars :result, :accumulator, :start_index
9
+
10
+ builder.loop do
11
+ obtain_new_subexpression_address
12
+ repeated_expression.compile(subexpression_address, builder)
13
+ builder.if__ subexpression_success? do
14
+ accumulate_subexpression_result
15
+ end
16
+ builder.else_ do
17
+ builder.break
18
+ end
19
+ end
20
+ end
21
+
22
+ def inline_module_name
23
+ parent_expression.inline_module_name
24
+ end
25
+
26
+ def assign_and_extend_result
27
+ assign_result "instantiate_node(#{node_class_name},input, #{start_index_var}...index, #{accumulator_var})"
28
+ extend_result_with_inline_module
29
+ end
30
+ end
31
+
32
+
33
+ class ZeroOrMore < Repetition
34
+ def compile(address, builder, parent_expression)
35
+ super
36
+ assign_and_extend_result
37
+ end_comment(parent_expression)
38
+ end
39
+ end
40
+
41
+ class OneOrMore < Repetition
42
+ def compile(address, builder, parent_expression)
43
+ super
44
+ builder.if__ "#{accumulator_var}.empty?" do
45
+ reset_index
46
+ assign_failure start_index_var
47
+ end
48
+ builder.else_ do
49
+ assign_and_extend_result
50
+ end
51
+ end_comment(parent_expression)
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,71 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Sequence < ParsingExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ begin_comment(self)
7
+ use_vars :result, :start_index, :accumulator
8
+ compile_sequence_elements(sequence_elements)
9
+ builder.if__ "#{accumulator_var}.last" do
10
+ assign_result "instantiate_node(#{node_class_name},input, #{start_index_var}...index, #{accumulator_var})"
11
+ extend_result sequence_element_accessor_module_name if sequence_element_accessor_module_name
12
+ extend_result_with_inline_module
13
+ end
14
+ builder.else_ do
15
+ reset_index
16
+ assign_failure start_index_var
17
+ end
18
+ end_comment(self)
19
+ end
20
+
21
+ def node_class_name
22
+ node_class_declarations.node_class_name || 'SyntaxNode'
23
+ end
24
+
25
+ def compile_sequence_elements(elements)
26
+ obtain_new_subexpression_address
27
+ elements.first.compile(subexpression_address, builder)
28
+ accumulate_subexpression_result
29
+ if elements.size > 1
30
+ builder.if_ subexpression_success? do
31
+ compile_sequence_elements(elements[1..-1])
32
+ end
33
+ end
34
+ end
35
+
36
+ def sequence_element_accessor_module
37
+ @sequence_element_accessor_module ||= SequenceElementAccessorModule.new(sequence_elements)
38
+ end
39
+
40
+ def sequence_element_accessor_module_name
41
+ sequence_element_accessor_module.module_name
42
+ end
43
+ end
44
+
45
+ class SequenceElementAccessorModule
46
+ include InlineModuleMixin
47
+ attr_reader :sequence_elements
48
+
49
+ def initialize(sequence_elements)
50
+ @sequence_elements = sequence_elements
51
+ end
52
+
53
+ def compile(index, builder, rule)
54
+ super
55
+ builder.module_declaration(module_name) do
56
+ elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name.to_s] ||= []) << e; h}
57
+ sequence_elements.each_with_index do |element, index|
58
+ if element.label_name
59
+ repetitions = elements_by_name[element.label_name.to_s]
60
+ label_name = element.label_name + (repetitions.size > 1 ? (repetitions.index(element)+1).to_s : "")
61
+ builder.method_declaration(label_name) do
62
+ builder << "elements[#{index}]"
63
+ end
64
+ builder.newline unless index == sequence_elements.size - 1
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,20 @@
1
+ module Treetop
2
+ module Compiler
3
+ class Terminal < AtomicExpression
4
+ def compile(address, builder, parent_expression = nil)
5
+ super
6
+ string_length = eval(text_value).length
7
+
8
+ builder.if__ "has_terminal?(#{text_value}, false, index)" do
9
+ assign_result "instantiate_node(#{node_class_name},input, index...(index + #{string_length}))"
10
+ extend_result_with_inline_module
11
+ builder << "@index += #{string_length}"
12
+ end
13
+ builder.else_ do
14
+ builder << "terminal_parse_failure(#{text_value})"
15
+ assign_result 'nil'
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,9 @@
1
+ module Treetop
2
+ module Compiler
3
+ class TransientPrefix < ParsingExpression
4
+ def compile(address, builder, parent_expression)
5
+ parent_expression.prefixed_expression.compile(address, builder)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Treetop
2
+ module Compiler
3
+ class TreetopFile < Runtime::SyntaxNode
4
+ def compile
5
+ (elements.map {|elt| elt.compile}).join
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,113 @@
1
+ module Treetop
2
+ module Compiler
3
+ class RubyBuilder
4
+
5
+ attr_reader :level, :address_space, :ruby
6
+
7
+ def initialize
8
+ @level = 0
9
+ @address_space = LexicalAddressSpace.new
10
+ @ruby = ""
11
+ end
12
+
13
+ def <<(ruby_line)
14
+ return if ruby_line.blank?
15
+ ruby << ruby_line.tabto(level) << "\n"
16
+ end
17
+
18
+ def newline
19
+ ruby << "\n"
20
+ end
21
+
22
+ def indented(depth = 2)
23
+ self.in(depth)
24
+ yield
25
+ self.out(depth)
26
+ end
27
+
28
+ def class_declaration(name, &block)
29
+ self << "class #{name}"
30
+ indented(&block)
31
+ self << "end"
32
+ end
33
+
34
+ def module_declaration(name, &block)
35
+ self << "module #{name}"
36
+ indented(&block)
37
+ self << "end"
38
+ end
39
+
40
+ def method_declaration(name, &block)
41
+ self << "def #{name}"
42
+ indented(&block)
43
+ self << "end"
44
+ end
45
+
46
+ def assign(left, right)
47
+ if left.instance_of? Array
48
+ self << "#{left.join(', ')} = #{right.join(', ')}"
49
+ else
50
+ self << "#{left} = #{right}"
51
+ end
52
+ end
53
+
54
+ def extend(var, module_name)
55
+ self << "#{var}.extend(#{module_name})"
56
+ end
57
+
58
+ def accumulate(left, right)
59
+ self << "#{left} << #{right}"
60
+ end
61
+
62
+ def if__(condition, &block)
63
+ self << "if #{condition}"
64
+ indented(&block)
65
+ end
66
+
67
+ def if_(condition, &block)
68
+ if__(condition, &block)
69
+ self << 'end'
70
+ end
71
+
72
+ def else_(&block)
73
+ self << 'else'
74
+ indented(&block)
75
+ self << 'end'
76
+ end
77
+
78
+ def loop(&block)
79
+ self << 'loop do'
80
+ indented(&block)
81
+ self << 'end'
82
+ end
83
+
84
+ def break
85
+ self << 'break'
86
+ end
87
+
88
+ def in(depth = 2)
89
+ @level += depth
90
+ self
91
+ end
92
+
93
+ def out(depth = 2)
94
+ @level -= depth
95
+ self
96
+ end
97
+
98
+ def next_address
99
+ address_space.next_address
100
+ end
101
+
102
+ def reset_addresses
103
+ address_space.reset_addresses
104
+ end
105
+
106
+ private
107
+
108
+ def indent
109
+ " " * level
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,2 @@
1
+ dir = File.dirname(__FILE__)
2
+ require "#{dir}/ruby_extensions/string"
@@ -0,0 +1,42 @@
1
+ class String
2
+ def column_of(index)
3
+ return 1 if index == 0
4
+ newline_index = rindex("\n", index - 1)
5
+ if newline_index
6
+ index - newline_index
7
+ else
8
+ index + 1
9
+ end
10
+ end
11
+
12
+ def line_of(index)
13
+ self[0...index].count("\n") + 1
14
+ end
15
+
16
+ unless method_defined?(:blank?)
17
+ def blank?
18
+ self == ""
19
+ end
20
+ end
21
+
22
+ # The following methods are lifted from Facets 2.0.2
23
+ def tabto(n)
24
+ if self =~ /^( *)\S/
25
+ indent(n - $1.length)
26
+ else
27
+ self
28
+ end
29
+ end
30
+
31
+ def indent(n)
32
+ if n >= 0
33
+ gsub(/^/, ' ' * n)
34
+ else
35
+ gsub(/^ {0,#{-n}}/, "")
36
+ end
37
+ end
38
+
39
+ def treetop_camelize
40
+ to_s.gsub(/\/(.?)/){ "::" + $1.upcase }.gsub(/(^|_)(.)/){ $2.upcase }
41
+ end
42
+ end
@@ -0,0 +1,5 @@
1
+ dir = File.dirname(__FILE__)
2
+ require "#{dir}/runtime/compiled_parser"
3
+ require "#{dir}/runtime/syntax_node"
4
+ require "#{dir}/runtime/terminal_parse_failure"
5
+ require "#{dir}/runtime/interval_skip_list"
@@ -0,0 +1,118 @@
1
+ module Treetop
2
+ module Runtime
3
+ class CompiledParser
4
+ include Treetop::Runtime
5
+
6
+ attr_reader :input, :index, :max_terminal_failure_index
7
+ attr_writer :root
8
+ attr_accessor :consume_all_input
9
+ alias :consume_all_input? :consume_all_input
10
+
11
+ def initialize
12
+ self.consume_all_input = true
13
+ end
14
+
15
+ def parse(input, options = {})
16
+ prepare_to_parse(input)
17
+ @index = options[:index] if options[:index]
18
+ result = send("_nt_#{root}")
19
+ return nil if (consume_all_input? && index != input.size)
20
+ return result
21
+ end
22
+
23
+ def failure_index
24
+ max_terminal_failure_index
25
+ end
26
+
27
+ def failure_line
28
+ @terminal_failures && input.line_of(failure_index)
29
+ end
30
+
31
+ def failure_column
32
+ @terminal_failures && input.column_of(failure_index)
33
+ end
34
+
35
+ def failure_reason
36
+ return nil unless (tf = terminal_failures) && tf.size > 0
37
+ "Expected " +
38
+ (tf.size == 1 ?
39
+ tf[0].expected_string :
40
+ "one of #{tf.map{|f| f.expected_string}.uniq*', '}"
41
+ ) +
42
+ " at line #{failure_line}, column #{failure_column} (byte #{failure_index+1})" +
43
+ " after #{input[index...failure_index]}"
44
+ end
45
+
46
+ def terminal_failures
47
+ @terminal_failures.map! {|tf_ary| TerminalParseFailure.new(*tf_ary) }
48
+ end
49
+
50
+
51
+ protected
52
+
53
+ attr_reader :node_cache, :input_length
54
+ attr_writer :index
55
+
56
+ def prepare_to_parse(input)
57
+ @input = input
58
+ @input_length = input.length
59
+ reset_index
60
+ @node_cache = Hash.new {|hash, key| hash[key] = Hash.new}
61
+ @regexps = {}
62
+ @terminal_failures = []
63
+ @max_terminal_failure_index = 0
64
+ end
65
+
66
+ def reset_index
67
+ @index = 0
68
+ end
69
+
70
+ def parse_anything(node_class = SyntaxNode, inline_module = nil)
71
+ if index < input.length
72
+ result = instantiate_node(node_class,input, index...(index + 1))
73
+ result.extend(inline_module) if inline_module
74
+ @index += 1
75
+ result
76
+ else
77
+ terminal_parse_failure("any character")
78
+ end
79
+ end
80
+
81
+ def instantiate_node(node_type,*args)
82
+ if node_type.respond_to? :new
83
+ node_type.new(*args)
84
+ else
85
+ SyntaxNode.new(*args).extend(node_type)
86
+ end
87
+ end
88
+
89
+ def has_terminal?(terminal, regex, index)
90
+ if regex
91
+ rx = @regexps[terminal] ||= Regexp.new(terminal)
92
+ input.index(rx, index) == index
93
+ else
94
+ input[index, terminal.size] == terminal
95
+ end
96
+ end
97
+
98
+ def regex_match?(regex, index)
99
+ rx = @regexps[regex] ||= Regexp.new(regex)
100
+ if input.index(rx, index) == index
101
+ $&
102
+ else
103
+ nil
104
+ end
105
+ end
106
+
107
+ def terminal_parse_failure(expected_string)
108
+ return nil if index < max_terminal_failure_index
109
+ if index > max_terminal_failure_index
110
+ @max_terminal_failure_index = index
111
+ @terminal_failures = []
112
+ end
113
+ @terminal_failures << [index, expected_string]
114
+ return nil
115
+ end
116
+ end
117
+ end
118
+ end