treetop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/treetop.rb +6 -0
- data/lib/treetop/api.rb +3 -0
- data/lib/treetop/api/load_grammar.rb +16 -0
- data/lib/treetop/api/malformed_grammar_exception.rb +9 -0
- data/lib/treetop/grammar.rb +7 -0
- data/lib/treetop/grammar/grammar.rb +48 -0
- data/lib/treetop/grammar/grammar_builder.rb +35 -0
- data/lib/treetop/grammar/parsing_expression_builder.rb +5 -0
- data/lib/treetop/grammar/parsing_expression_builder_helper.rb +121 -0
- data/lib/treetop/grammar/parsing_expressions.rb +18 -0
- data/lib/treetop/grammar/parsing_expressions/and_predicate.rb +17 -0
- data/lib/treetop/grammar/parsing_expressions/anything_symbol.rb +20 -0
- data/lib/treetop/grammar/parsing_expressions/character_class.rb +24 -0
- data/lib/treetop/grammar/parsing_expressions/node_instantiating_parsing_expression.rb +14 -0
- data/lib/treetop/grammar/parsing_expressions/node_propagating_parsing_expression.rb +4 -0
- data/lib/treetop/grammar/parsing_expressions/nonterminal_symbol.rb +42 -0
- data/lib/treetop/grammar/parsing_expressions/not_predicate.rb +18 -0
- data/lib/treetop/grammar/parsing_expressions/one_or_more.rb +12 -0
- data/lib/treetop/grammar/parsing_expressions/optional.rb +14 -0
- data/lib/treetop/grammar/parsing_expressions/ordered_choice.rb +27 -0
- data/lib/treetop/grammar/parsing_expressions/parsing_expression.rb +36 -0
- data/lib/treetop/grammar/parsing_expressions/predicate.rb +25 -0
- data/lib/treetop/grammar/parsing_expressions/repeating_parsing_expression.rb +29 -0
- data/lib/treetop/grammar/parsing_expressions/sequence.rb +41 -0
- data/lib/treetop/grammar/parsing_expressions/terminal_parsing_expression.rb +11 -0
- data/lib/treetop/grammar/parsing_expressions/terminal_symbol.rb +31 -0
- data/lib/treetop/grammar/parsing_expressions/zero_or_more.rb +11 -0
- data/lib/treetop/grammar/parsing_rule.rb +10 -0
- data/lib/treetop/metagrammar.rb +2 -0
- data/lib/treetop/metagrammar/metagrammar.rb +14 -0
- data/lib/treetop/metagrammar/metagrammar.treetop +320 -0
- data/lib/treetop/parser.rb +11 -0
- data/lib/treetop/parser/node_cache.rb +25 -0
- data/lib/treetop/parser/parse_cache.rb +17 -0
- data/lib/treetop/parser/parse_failure.rb +22 -0
- data/lib/treetop/parser/parse_result.rb +26 -0
- data/lib/treetop/parser/parser.rb +24 -0
- data/lib/treetop/parser/sequence_syntax_node.rb +14 -0
- data/lib/treetop/parser/syntax_node.rb +31 -0
- data/lib/treetop/parser/terminal_parse_failure.rb +18 -0
- data/lib/treetop/parser/terminal_syntax_node.rb +7 -0
- data/lib/treetop/protometagrammar.rb +16 -0
- data/lib/treetop/protometagrammar/anything_symbol_expression_builder.rb +13 -0
- data/lib/treetop/protometagrammar/block_expression_builder.rb +17 -0
- data/lib/treetop/protometagrammar/character_class_expression_builder.rb +25 -0
- data/lib/treetop/protometagrammar/grammar_expression_builder.rb +38 -0
- data/lib/treetop/protometagrammar/nonterminal_symbol_expression_builder.rb +45 -0
- data/lib/treetop/protometagrammar/ordered_choice_expression_builder.rb +21 -0
- data/lib/treetop/protometagrammar/parsing_rule_expression_builder.rb +23 -0
- data/lib/treetop/protometagrammar/parsing_rule_sequence_expression_builder.rb +14 -0
- data/lib/treetop/protometagrammar/prefix_expression_builder.rb +25 -0
- data/lib/treetop/protometagrammar/primary_expression_builder.rb +71 -0
- data/lib/treetop/protometagrammar/protometagrammar.rb +25 -0
- data/lib/treetop/protometagrammar/sequence_expression_builder.rb +37 -0
- data/lib/treetop/protometagrammar/suffix_expression_builder.rb +33 -0
- data/lib/treetop/protometagrammar/terminal_symbol_expression_builder.rb +52 -0
- data/lib/treetop/protometagrammar/trailing_block_expression_builder.rb +30 -0
- data/lib/treetop/ruby_extension.rb +11 -0
- metadata +110 -0
data/lib/treetop.rb
ADDED
data/lib/treetop/api.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
class Object
|
2
|
+
def load_grammar(treetop_file_path)
|
3
|
+
treetop_file_path += ".treetop" unless treetop_file_path =~ /\.treetop$/
|
4
|
+
|
5
|
+
File.open("#{treetop_file_path}", 'r') do |grammar_file|
|
6
|
+
result = Metagrammar.new_parser.parse(grammar_file.read)
|
7
|
+
|
8
|
+
if result.success?
|
9
|
+
grammar = result.value
|
10
|
+
Object.instance_eval { const_set(grammar.name, grammar) }
|
11
|
+
else
|
12
|
+
raise MalformedGrammarException.new(result.nested_failures)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
class MalformedGrammarException < Exception
|
2
|
+
attr_reader :errors
|
3
|
+
|
4
|
+
def initialize(errors)
|
5
|
+
@errors = errors
|
6
|
+
expected_expressions = errors.collect(&:expression)
|
7
|
+
super("String matching #{expected_expressions.join(' or ')} expected at position #{errors.first.index}.")
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
require "#{dir}/grammar/grammar"
|
3
|
+
require "#{dir}/grammar/parsing_expression_builder_helper"
|
4
|
+
require "#{dir}/grammar/grammar_builder"
|
5
|
+
require "#{dir}/grammar/parsing_expression_builder"
|
6
|
+
require "#{dir}/grammar/parsing_expressions"
|
7
|
+
require "#{dir}/grammar/parsing_rule"
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Treetop
|
2
|
+
class Grammar
|
3
|
+
attr_accessor :root, :builder, :name
|
4
|
+
|
5
|
+
def initialize(name = nil, &block)
|
6
|
+
@name = name
|
7
|
+
@parsing_rules = Hash.new
|
8
|
+
@nonterminal_symbols = Hash.new
|
9
|
+
self.builder = GrammarBuilder.new(self)
|
10
|
+
build &block if block
|
11
|
+
end
|
12
|
+
|
13
|
+
def new_parser
|
14
|
+
Parser.new(self)
|
15
|
+
end
|
16
|
+
|
17
|
+
def nonterminal_symbol(ruby_sym)
|
18
|
+
@nonterminal_symbols[ruby_sym] ||= NonterminalSymbol.new(ruby_sym, self)
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_parsing_rule(parsing_rule_or_nonterminal, expression = nil)
|
22
|
+
rule = make_parsing_rule(parsing_rule_or_nonterminal, expression)
|
23
|
+
@parsing_rules[rule.nonterminal_symbol.name] = rule
|
24
|
+
self.root ||= rule.nonterminal_symbol
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_parsing_expression(nonterminal_symbol)
|
28
|
+
if @parsing_rules[nonterminal_symbol.name]
|
29
|
+
@parsing_rules[nonterminal_symbol.name].parsing_expression
|
30
|
+
else
|
31
|
+
raise "No parsing rule found named #{nonterminal_symbol}."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def build(&block)
|
36
|
+
builder.build &block
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
def make_parsing_rule(rule_or_nonterminal, expression)
|
41
|
+
if rule_or_nonterminal.is_a? NonterminalSymbol
|
42
|
+
return ParsingRule.new(rule_or_nonterminal, expression)
|
43
|
+
else
|
44
|
+
return rule_or_nonterminal
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Treetop
|
2
|
+
class GrammarBuilder
|
3
|
+
attr_accessor :grammar
|
4
|
+
include ParsingExpressionBuilderHelper
|
5
|
+
|
6
|
+
def initialize(grammar)
|
7
|
+
self.grammar = grammar
|
8
|
+
end
|
9
|
+
|
10
|
+
def build(&block)
|
11
|
+
instance_eval(&block)
|
12
|
+
end
|
13
|
+
|
14
|
+
def root(sym)
|
15
|
+
grammar.root = grammar.nonterminal_symbol(sym)
|
16
|
+
end
|
17
|
+
|
18
|
+
def rule(nonterminal_name, expression_or_expression_builder)
|
19
|
+
nonterminal_symbol = grammar.nonterminal_symbol(nonterminal_name)
|
20
|
+
parsing_expression = parsing_expression_for(expression_or_expression_builder)
|
21
|
+
|
22
|
+
grammar.add_parsing_rule(nonterminal_symbol, parsing_expression)
|
23
|
+
end
|
24
|
+
|
25
|
+
def parsing_expression_for(expression_or_expression_builder)
|
26
|
+
case expression_or_expression_builder
|
27
|
+
when ParsingExpression
|
28
|
+
return expression_or_expression_builder
|
29
|
+
when ParsingExpressionBuilder
|
30
|
+
expression_or_expression_builder.grammar = grammar
|
31
|
+
return expression_or_expression_builder.build
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module Treetop
|
2
|
+
module ParsingExpressionBuilderHelper
|
3
|
+
attr_accessor :grammar
|
4
|
+
|
5
|
+
def nonterm(symbol)
|
6
|
+
grammar.nonterminal_symbol(symbol)
|
7
|
+
end
|
8
|
+
|
9
|
+
def term(string)
|
10
|
+
TerminalSymbol.new(string)
|
11
|
+
end
|
12
|
+
|
13
|
+
def exp(object, &block)
|
14
|
+
exp = case object
|
15
|
+
when String
|
16
|
+
term(object)
|
17
|
+
when Symbol
|
18
|
+
nonterm(object)
|
19
|
+
when ParsingExpression
|
20
|
+
object
|
21
|
+
when Array
|
22
|
+
object.map { |elt| exp(elt) }
|
23
|
+
else raise "Argument must be an instance of String, Symbol, or ParsingExpression"
|
24
|
+
end
|
25
|
+
exp.node_class_eval &block if block
|
26
|
+
exp
|
27
|
+
end
|
28
|
+
|
29
|
+
def any
|
30
|
+
AnythingSymbol.new
|
31
|
+
end
|
32
|
+
|
33
|
+
def char_class(char_class_string)
|
34
|
+
CharacterClass.new(char_class_string)
|
35
|
+
end
|
36
|
+
|
37
|
+
def andp(expression)
|
38
|
+
exp(expression).and_predicate
|
39
|
+
end
|
40
|
+
|
41
|
+
def notp(expression)
|
42
|
+
exp(expression).not_predicate
|
43
|
+
end
|
44
|
+
|
45
|
+
def optional(expression)
|
46
|
+
exp(expression).optional
|
47
|
+
end
|
48
|
+
|
49
|
+
def seq(*expressions, &block)
|
50
|
+
sequence = Sequence.new(exp(expressions))
|
51
|
+
sequence.node_class_eval &block if block
|
52
|
+
return sequence
|
53
|
+
end
|
54
|
+
|
55
|
+
def choice(*expressions)
|
56
|
+
OrderedChoice.new(exp(expressions))
|
57
|
+
end
|
58
|
+
|
59
|
+
def zero_or_more(expression)
|
60
|
+
exp(expression).zero_or_more
|
61
|
+
end
|
62
|
+
|
63
|
+
def one_or_more(expression)
|
64
|
+
exp(expression).one_or_more
|
65
|
+
end
|
66
|
+
|
67
|
+
def escaped(character)
|
68
|
+
seq('\\', character)
|
69
|
+
end
|
70
|
+
|
71
|
+
def zero_or_more_delimited(expression, delimiter, &block)
|
72
|
+
n_or_more_delimited(0, expression, delimiter, &block)
|
73
|
+
end
|
74
|
+
|
75
|
+
def two_or_more_delimited(expression, delimiter, &block)
|
76
|
+
n_or_more_delimited(2, expression, delimiter, &block)
|
77
|
+
end
|
78
|
+
|
79
|
+
def n_or_more_delimited(n, expression, delimiter, &block)
|
80
|
+
expression = exp(expression)
|
81
|
+
delimiter = exp(delimiter)
|
82
|
+
|
83
|
+
delimited_sequence = seq(delimited_sequence_head(n, expression),
|
84
|
+
delimited_sequence_tail(n, expression, delimiter)) do
|
85
|
+
def elements
|
86
|
+
return [] if super[0].epsilon?
|
87
|
+
[super[0]] + super[1].elements
|
88
|
+
end
|
89
|
+
end
|
90
|
+
delimited_sequence.node_class_eval(&block) if block
|
91
|
+
|
92
|
+
return delimited_sequence
|
93
|
+
end
|
94
|
+
|
95
|
+
def delimited_sequence_head(n, expression)
|
96
|
+
n == 0 ? optional(expression) : expression
|
97
|
+
end
|
98
|
+
|
99
|
+
def delimited_sequence_tail(n, expression, delimiter)
|
100
|
+
if n > 1
|
101
|
+
tail_elements = one_or_more(delimited_sequence_tail_element(n, expression, delimiter))
|
102
|
+
else
|
103
|
+
tail_elements = zero_or_more(delimited_sequence_tail_element(n, expression, delimiter))
|
104
|
+
end
|
105
|
+
tail_elements.node_class_eval do
|
106
|
+
def elements
|
107
|
+
super.map {|elt| elt.element}
|
108
|
+
end
|
109
|
+
end
|
110
|
+
return tail_elements
|
111
|
+
end
|
112
|
+
|
113
|
+
def delimited_sequence_tail_element(n, expression, delimiter)
|
114
|
+
seq(delimiter, expression) do
|
115
|
+
def element
|
116
|
+
elements[1]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
require "#{dir}/parsing_expressions/parsing_expression"
|
3
|
+
require "#{dir}/parsing_expressions/node_instantiating_parsing_expression"
|
4
|
+
require "#{dir}/parsing_expressions/node_propagating_parsing_expression"
|
5
|
+
require "#{dir}/parsing_expressions/terminal_parsing_expression"
|
6
|
+
require "#{dir}/parsing_expressions/terminal_symbol"
|
7
|
+
require "#{dir}/parsing_expressions/anything_symbol"
|
8
|
+
require "#{dir}/parsing_expressions/character_class"
|
9
|
+
require "#{dir}/parsing_expressions/nonterminal_symbol"
|
10
|
+
require "#{dir}/parsing_expressions/sequence"
|
11
|
+
require "#{dir}/parsing_expressions/ordered_choice"
|
12
|
+
require "#{dir}/parsing_expressions/repeating_parsing_expression"
|
13
|
+
require "#{dir}/parsing_expressions/zero_or_more"
|
14
|
+
require "#{dir}/parsing_expressions/one_or_more"
|
15
|
+
require "#{dir}/parsing_expressions/optional"
|
16
|
+
require "#{dir}/parsing_expressions/predicate"
|
17
|
+
require "#{dir}/parsing_expressions/and_predicate"
|
18
|
+
require "#{dir}/parsing_expressions/not_predicate"
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Treetop
|
2
|
+
class AndPredicate < Predicate
|
3
|
+
def to_s
|
4
|
+
"&(#{expression.to_s})"
|
5
|
+
end
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def child_expression_success(index, input, result)
|
10
|
+
return success_at(index, input, [result])
|
11
|
+
end
|
12
|
+
|
13
|
+
def child_expression_failure(index, input, result)
|
14
|
+
return failure_at(index, [result])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Treetop
|
2
|
+
class AnythingSymbol < TerminalParsingExpression
|
3
|
+
def initialize
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def parse_at(input, start_index, parser)
|
8
|
+
if start_index < input.length
|
9
|
+
interval = (start_index...(start_index + 1))
|
10
|
+
return node_class.new(input, interval)
|
11
|
+
else
|
12
|
+
TerminalParseFailure.new(start_index, self)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_s
|
17
|
+
'.'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Treetop
|
2
|
+
class CharacterClass < TerminalParsingExpression
|
3
|
+
|
4
|
+
attr_reader :char_class_string, :prefix_regex
|
5
|
+
|
6
|
+
def initialize(char_class_string)
|
7
|
+
super()
|
8
|
+
@char_class_string = char_class_string
|
9
|
+
@prefix_regex = Regexp.new("[#{char_class_string}]")
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse_at(input, start_index, parser)
|
13
|
+
if input.index(prefix_regex, start_index) == start_index
|
14
|
+
return node_class.new(input, start_index...(start_index + 1))
|
15
|
+
else
|
16
|
+
TerminalParseFailure.new(start_index, self)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
return "[#{char_class_string}]"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Treetop
|
2
|
+
class NodeInstantiatingParsingExpression < ParsingExpression
|
3
|
+
attr_reader :node_class
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@node_class = Class.new(node_superclass)
|
7
|
+
end
|
8
|
+
|
9
|
+
def node_class_eval(string = nil, &block)
|
10
|
+
node_class.class_eval(string) if string
|
11
|
+
node_class.class_eval(&block) if block
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Treetop
|
2
|
+
class NonterminalSymbol < NodePropagatingParsingExpression
|
3
|
+
attr_reader :name, :grammar
|
4
|
+
|
5
|
+
def initialize(name, grammar)
|
6
|
+
@name = name
|
7
|
+
@grammar = grammar
|
8
|
+
end
|
9
|
+
|
10
|
+
def parsing_expression
|
11
|
+
grammar.get_parsing_expression(self)
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
name.to_s
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_at(input, start_index, parser)
|
19
|
+
node_cache = parser.node_cache_for(self)
|
20
|
+
if cached_result = node_cache[start_index]
|
21
|
+
return cached_result
|
22
|
+
else
|
23
|
+
return node_cache.store(parse_at_without_caching(input, start_index, parser))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
def parse_at_without_caching(input, start_index, parser)
|
29
|
+
result = parsing_expression.parse_at(input, start_index, parser)
|
30
|
+
|
31
|
+
if result.success?
|
32
|
+
result
|
33
|
+
else
|
34
|
+
return failure_at(start_index, result.nested_failures)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def node_cache(parser)
|
39
|
+
parser.node_cache_for(self)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Treetop
|
2
|
+
class NotPredicate < Predicate
|
3
|
+
def to_s
|
4
|
+
"!(#{expression.to_s})"
|
5
|
+
end
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def child_expression_success(index, input, result)
|
10
|
+
return failure_at(index, [result])
|
11
|
+
end
|
12
|
+
|
13
|
+
def child_expression_failure(index, input, result)
|
14
|
+
return success_at(index, input, [result])
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|