treetop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/lib/treetop.rb +6 -0
  2. data/lib/treetop/api.rb +3 -0
  3. data/lib/treetop/api/load_grammar.rb +16 -0
  4. data/lib/treetop/api/malformed_grammar_exception.rb +9 -0
  5. data/lib/treetop/grammar.rb +7 -0
  6. data/lib/treetop/grammar/grammar.rb +48 -0
  7. data/lib/treetop/grammar/grammar_builder.rb +35 -0
  8. data/lib/treetop/grammar/parsing_expression_builder.rb +5 -0
  9. data/lib/treetop/grammar/parsing_expression_builder_helper.rb +121 -0
  10. data/lib/treetop/grammar/parsing_expressions.rb +18 -0
  11. data/lib/treetop/grammar/parsing_expressions/and_predicate.rb +17 -0
  12. data/lib/treetop/grammar/parsing_expressions/anything_symbol.rb +20 -0
  13. data/lib/treetop/grammar/parsing_expressions/character_class.rb +24 -0
  14. data/lib/treetop/grammar/parsing_expressions/node_instantiating_parsing_expression.rb +14 -0
  15. data/lib/treetop/grammar/parsing_expressions/node_propagating_parsing_expression.rb +4 -0
  16. data/lib/treetop/grammar/parsing_expressions/nonterminal_symbol.rb +42 -0
  17. data/lib/treetop/grammar/parsing_expressions/not_predicate.rb +18 -0
  18. data/lib/treetop/grammar/parsing_expressions/one_or_more.rb +12 -0
  19. data/lib/treetop/grammar/parsing_expressions/optional.rb +14 -0
  20. data/lib/treetop/grammar/parsing_expressions/ordered_choice.rb +27 -0
  21. data/lib/treetop/grammar/parsing_expressions/parsing_expression.rb +36 -0
  22. data/lib/treetop/grammar/parsing_expressions/predicate.rb +25 -0
  23. data/lib/treetop/grammar/parsing_expressions/repeating_parsing_expression.rb +29 -0
  24. data/lib/treetop/grammar/parsing_expressions/sequence.rb +41 -0
  25. data/lib/treetop/grammar/parsing_expressions/terminal_parsing_expression.rb +11 -0
  26. data/lib/treetop/grammar/parsing_expressions/terminal_symbol.rb +31 -0
  27. data/lib/treetop/grammar/parsing_expressions/zero_or_more.rb +11 -0
  28. data/lib/treetop/grammar/parsing_rule.rb +10 -0
  29. data/lib/treetop/metagrammar.rb +2 -0
  30. data/lib/treetop/metagrammar/metagrammar.rb +14 -0
  31. data/lib/treetop/metagrammar/metagrammar.treetop +320 -0
  32. data/lib/treetop/parser.rb +11 -0
  33. data/lib/treetop/parser/node_cache.rb +25 -0
  34. data/lib/treetop/parser/parse_cache.rb +17 -0
  35. data/lib/treetop/parser/parse_failure.rb +22 -0
  36. data/lib/treetop/parser/parse_result.rb +26 -0
  37. data/lib/treetop/parser/parser.rb +24 -0
  38. data/lib/treetop/parser/sequence_syntax_node.rb +14 -0
  39. data/lib/treetop/parser/syntax_node.rb +31 -0
  40. data/lib/treetop/parser/terminal_parse_failure.rb +18 -0
  41. data/lib/treetop/parser/terminal_syntax_node.rb +7 -0
  42. data/lib/treetop/protometagrammar.rb +16 -0
  43. data/lib/treetop/protometagrammar/anything_symbol_expression_builder.rb +13 -0
  44. data/lib/treetop/protometagrammar/block_expression_builder.rb +17 -0
  45. data/lib/treetop/protometagrammar/character_class_expression_builder.rb +25 -0
  46. data/lib/treetop/protometagrammar/grammar_expression_builder.rb +38 -0
  47. data/lib/treetop/protometagrammar/nonterminal_symbol_expression_builder.rb +45 -0
  48. data/lib/treetop/protometagrammar/ordered_choice_expression_builder.rb +21 -0
  49. data/lib/treetop/protometagrammar/parsing_rule_expression_builder.rb +23 -0
  50. data/lib/treetop/protometagrammar/parsing_rule_sequence_expression_builder.rb +14 -0
  51. data/lib/treetop/protometagrammar/prefix_expression_builder.rb +25 -0
  52. data/lib/treetop/protometagrammar/primary_expression_builder.rb +71 -0
  53. data/lib/treetop/protometagrammar/protometagrammar.rb +25 -0
  54. data/lib/treetop/protometagrammar/sequence_expression_builder.rb +37 -0
  55. data/lib/treetop/protometagrammar/suffix_expression_builder.rb +33 -0
  56. data/lib/treetop/protometagrammar/terminal_symbol_expression_builder.rb +52 -0
  57. data/lib/treetop/protometagrammar/trailing_block_expression_builder.rb +30 -0
  58. data/lib/treetop/ruby_extension.rb +11 -0
  59. metadata +110 -0
@@ -0,0 +1,14 @@
1
+ module Treetop
2
+ class Optional < OrderedChoice
3
+ attr_reader :expression
4
+
5
+ def initialize(optional_expression)
6
+ super([optional_expression, TerminalSymbol.epsilon])
7
+ @expression = optional_expression
8
+ end
9
+
10
+ def to_s
11
+ "(#{expression.to_s})?"
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,27 @@
1
+ module Treetop
2
+ class OrderedChoice < NodePropagatingParsingExpression
3
+ attr_reader :alternatives
4
+
5
+ def initialize(alternatives)
6
+ @alternatives = alternatives
7
+ end
8
+
9
+ def to_s
10
+ parenthesize((alternatives.collect {|alt| alt.to_s}).join(" / "))
11
+ end
12
+
13
+ def parse_at(input, start_index, parser)
14
+ failures = []
15
+ for alt in alternatives
16
+ result = alt.parse_at(input, start_index, parser)
17
+ if result.success?
18
+ result.update_nested_failures(collect_nested_failures(failures))
19
+ return result
20
+ else
21
+ failures << result
22
+ end
23
+ end
24
+ return failure_at(start_index, failures)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,36 @@
1
+ module Treetop
2
+ class ParsingExpression
3
+ def zero_or_more
4
+ ZeroOrMore.new(self)
5
+ end
6
+
7
+ def one_or_more
8
+ OneOrMore.new(self)
9
+ end
10
+
11
+ def optional
12
+ Optional.new(self)
13
+ end
14
+
15
+ def and_predicate
16
+ AndPredicate.new(self)
17
+ end
18
+
19
+ def not_predicate
20
+ NotPredicate.new(self)
21
+ end
22
+
23
+ def parenthesize(string)
24
+ "(#{string})"
25
+ end
26
+
27
+ protected
28
+ def failure_at(index, nested_results = [])
29
+ ParseFailure.new(index, collect_nested_failures(nested_results))
30
+ end
31
+
32
+ def collect_nested_failures(results)
33
+ (results.collect {|result| result.nested_failures}).flatten
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,25 @@
1
+ module Treetop
2
+ class Predicate < NodePropagatingParsingExpression
3
+ attr_reader :expression
4
+
5
+ def initialize(expression)
6
+ @expression = expression
7
+ end
8
+
9
+ def parse_at(input, start_index, parser)
10
+ result = expression.parse_at(input, start_index, parser)
11
+
12
+ if result.success?
13
+ return child_expression_success(start_index, input, result)
14
+ else
15
+ return child_expression_failure(start_index, input, result)
16
+ end
17
+ end
18
+
19
+ protected
20
+
21
+ def success_at(index, input, nested_results)
22
+ SyntaxNode.new(input, index...index, collect_nested_failures(nested_results))
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,29 @@
1
+ module Treetop
2
+ class RepeatingParsingExpression < Sequence
3
+ attr_reader :repeated_expression
4
+
5
+ def initialize(repeated_expression)
6
+ super
7
+ @repeated_expression = repeated_expression
8
+ end
9
+
10
+ def parse_at(input, start_index, parser)
11
+ results = []
12
+ next_index = start_index
13
+
14
+ while true
15
+ result = repeated_expression.parse_at(input, next_index, parser)
16
+ break if result.failure?
17
+ results << result
18
+ next_index = result.interval.end
19
+ end
20
+
21
+ if enough? results
22
+ interval = start_index...next_index
23
+ return success(input, interval, results, results + [result])
24
+ else
25
+ return failure_at(start_index, results + [result])
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,41 @@
1
+ module Treetop
2
+ class Sequence < NodeInstantiatingParsingExpression
3
+ attr_reader :elements, :node_class
4
+
5
+ def initialize(elements)
6
+ super()
7
+ @elements = elements
8
+ end
9
+
10
+ def node_superclass
11
+ SequenceSyntaxNode
12
+ end
13
+
14
+ def to_s
15
+ parenthesize((@elements.collect {|elt| elt.to_s}).join(" "))
16
+ end
17
+
18
+ def parse_at(input, start_index, parser)
19
+ results = []
20
+ next_index = start_index
21
+
22
+ for elt in elements
23
+ result = elt.parse_at(input, next_index, parser)
24
+ results << result
25
+ return failure_at(start_index, results) if result.failure?
26
+ next_index = result.interval.end
27
+ end
28
+
29
+ success(input, start_index...next_index, results, results)
30
+ end
31
+
32
+ protected
33
+
34
+ def success(input, interval, results, encountered_child_results)
35
+ return node_class.new(input,
36
+ interval,
37
+ results,
38
+ collect_nested_failures(encountered_child_results))
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,11 @@
1
+ module Treetop
2
+ class TerminalParsingExpression < NodeInstantiatingParsingExpression
3
+ def initialize
4
+ super
5
+ end
6
+
7
+ def node_superclass
8
+ TerminalSyntaxNode
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,31 @@
1
+ module Treetop
2
+ class TerminalSymbol < TerminalParsingExpression
3
+ attr_accessor :prefix
4
+
5
+ def self.epsilon
6
+ @epsilon ||= self.new("")
7
+ end
8
+
9
+ def initialize(prefix)
10
+ super()
11
+ self.prefix = prefix
12
+ end
13
+
14
+ def epsilon?
15
+ prefix.blank?
16
+ end
17
+
18
+ def to_s
19
+ "\"#{prefix}\""
20
+ end
21
+
22
+ def parse_at(input, start_index, parser)
23
+
24
+ if input.index(prefix, start_index) == start_index
25
+ return node_class.new(input, start_index...(prefix.length + start_index))
26
+ else
27
+ TerminalParseFailure.new(start_index, self)
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,11 @@
1
+ module Treetop
2
+ class ZeroOrMore < RepeatingParsingExpression
3
+ def enough?(results)
4
+ true
5
+ end
6
+
7
+ def to_s
8
+ "(#{repeated_expression.to_s})*"
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,10 @@
1
+ module Treetop
2
+ class ParsingRule
3
+ attr_reader :nonterminal_symbol, :parsing_expression
4
+
5
+ def initialize(nonterminal_symbol, parsing_expression)
6
+ @nonterminal_symbol = nonterminal_symbol
7
+ @parsing_expression = parsing_expression
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,2 @@
1
+ dir = File.dirname(__FILE__)
2
+ require "#{dir}/metagrammar/metagrammar"
@@ -0,0 +1,14 @@
1
+ module Treetop
2
+ metagrammar_parser = Protometagrammar.new.new_parser
3
+
4
+ File.open(File.expand_path('metagrammar.treetop', File.dirname(__FILE__)), 'r') do |file|
5
+ metagrammar_text = file.read
6
+ result = metagrammar_parser.parse(metagrammar_text)
7
+
8
+ unless result.success?
9
+ raise 'unable to parse metagrammar'
10
+ end
11
+
12
+ Metagrammar = result.value
13
+ end
14
+ end
@@ -0,0 +1,320 @@
1
+ grammar Metagrammar
2
+ rule grammar
3
+ ('grammar' space grammar_name? parsing_rule_sequence space? 'end') {
4
+ def value
5
+ grammar = Grammar.new(grammar_name)
6
+ parsing_rules(grammar).each do |parsing_rule|
7
+ grammar.add_parsing_rule(parsing_rule)
8
+ end
9
+ return grammar
10
+ end
11
+
12
+ def grammar_name
13
+ if elements[2].epsilon?
14
+ nil
15
+ else
16
+ elements[2].value
17
+ end
18
+ end
19
+
20
+ def parsing_rules(grammar)
21
+ elements[3].value(grammar)
22
+ end
23
+ }
24
+ end
25
+
26
+ rule grammar_name
27
+ ([A-Z] alphanumeric_char*) space {
28
+ def value
29
+ elements[0].text_value.to_sym
30
+ end
31
+ }
32
+ end
33
+
34
+ rule parsing_rule_sequence
35
+ (parsing_rule (space parsing_rule)*) {
36
+ def value(grammar)
37
+ [head.value(grammar)] + tail_values(grammar)
38
+ end
39
+
40
+ def head
41
+ elements[0]
42
+ end
43
+
44
+ def tail_elements
45
+ elements[1].elements
46
+ end
47
+
48
+ def tail_values(grammar)
49
+ tail_elements.collect {|tail_element| tail_element.elements[1].value(grammar)}
50
+ end
51
+ }
52
+ /
53
+ '' {
54
+ def value(grammar)
55
+ []
56
+ end
57
+ }
58
+ end
59
+
60
+ rule parsing_rule
61
+ ('rule' space nonterminal_symbol space ordered_choice space 'end') {
62
+ def value(grammar)
63
+ ParsingRule.new(nonterminal_symbol.value(grammar),
64
+ parsing_expression.value(grammar))
65
+ end
66
+
67
+ def nonterminal_symbol
68
+ elements[2]
69
+ end
70
+
71
+ def parsing_expression
72
+ elements[4]
73
+ end
74
+ }
75
+ end
76
+
77
+ rule ordered_choice
78
+ (sequence (space? '/' space? sequence)+) {
79
+ def value(grammar)
80
+ OrderedChoice.new(alternatives(grammar))
81
+ end
82
+
83
+ def alternatives(grammar)
84
+ [head.value(grammar)] + tail_values(grammar)
85
+ end
86
+
87
+ def head
88
+ elements[0]
89
+ end
90
+
91
+ def tail_elements
92
+ elements[1].elements
93
+ end
94
+
95
+ def tail_values(grammar)
96
+ tail_elements.map {|tail_element| tail_element.elements[3].value(grammar)}
97
+ end
98
+ }
99
+ /
100
+ sequence
101
+ end
102
+
103
+ rule sequence
104
+ (primary (space primary)+ trailing_block) {
105
+ def value(grammar)
106
+ trailing_block.value(Sequence.new(sequence_elements(grammar)))
107
+ end
108
+
109
+ def trailing_block
110
+ elements[2]
111
+ end
112
+
113
+ def sequence_elements(grammar)
114
+ [head.value(grammar)] + tail_values(grammar)
115
+ end
116
+
117
+ def head
118
+ elements[0]
119
+ end
120
+
121
+ def tail_elements
122
+ elements[1].elements
123
+ end
124
+
125
+ def tail_values(grammar)
126
+ tail_elements.map {|tail_element| tail_element.elements[1].value(grammar)}
127
+ end
128
+ }
129
+ /
130
+ primary
131
+ end
132
+
133
+ rule parenthesized_ordered_choice
134
+ '(' space? ordered_choice space? ')' trailing_block {
135
+ def value(grammar)
136
+ nested_value = nested_expression.value(grammar)
137
+ unless trailing_block.epsilon? || nested_value.kind_of?(NodeInstantiatingParsingExpression)
138
+ raise "Blocks can only follow node-instantiating parsing expressions such as sequences and terminal symbols."
139
+ end
140
+ return trailing_block.value(nested_expression.value(grammar))
141
+ end
142
+
143
+ def nested_expression
144
+ elements[2]
145
+ end
146
+
147
+ def trailing_block
148
+ elements[5]
149
+ end
150
+ }
151
+ end
152
+
153
+ rule primary
154
+ (prefix? ((parenthesized_ordered_choice / terminal_symbol / nonterminal_symbol) suffix?)) {
155
+ def value(grammar)
156
+ value = primary_expression.value(grammar)
157
+ value = suffix.value(value) unless suffix.epsilon?
158
+ value = prefix.value(value) unless prefix.epsilon?
159
+ value
160
+ end
161
+
162
+ def prefix
163
+ elements[0]
164
+ end
165
+
166
+ def primary_expression
167
+ elements[1].elements[0]
168
+ end
169
+
170
+ def suffix
171
+ elements[1].elements[1]
172
+ end
173
+ }
174
+ end
175
+
176
+ rule prefix
177
+ '&' {
178
+ def value(parsing_expression)
179
+ parsing_expression.and_predicate
180
+ end
181
+ }
182
+ /
183
+ '!' {
184
+ def value(parsing_expression)
185
+ parsing_expression.not_predicate
186
+ end
187
+ }
188
+ end
189
+
190
+ rule suffix
191
+ '*' {
192
+ def value(parsing_expression)
193
+ parsing_expression.zero_or_more
194
+ end
195
+ }
196
+ /
197
+ '+' {
198
+ def value(parsing_expression)
199
+ parsing_expression.one_or_more
200
+ end
201
+ }
202
+ /
203
+ '?' {
204
+ def value(parsing_expression)
205
+ parsing_expression.optional
206
+ end
207
+ }
208
+ end
209
+
210
+ rule nonterminal_symbol
211
+ (!(keyword !alphanumeric_char) (alpha_char alphanumeric_char*)) {
212
+ def value(grammar)
213
+ grammar.nonterminal_symbol(name)
214
+ end
215
+
216
+ def name
217
+ elements[1].text_value.to_sym
218
+ end
219
+ }
220
+ end
221
+
222
+ rule alpha_char
223
+ [A-Za-z_]
224
+ end
225
+
226
+ rule alphanumeric_char
227
+ alpha_char / [0-9]
228
+ end
229
+
230
+ rule terminal_symbol
231
+ terminal_symbol_prefix trailing_block {
232
+ def value(grammar = nil)
233
+ trailing_block.value(terminal_symbol.value)
234
+ end
235
+
236
+ def terminal_symbol
237
+ elements[0]
238
+ end
239
+
240
+ def trailing_block
241
+ elements[1]
242
+ end
243
+ }
244
+ end
245
+
246
+ rule terminal_symbol_prefix
247
+ single_quoted_string / double_quoted_string / character_class / anything_symbol
248
+ end
249
+
250
+ rule double_quoted_string
251
+ ('"' (!'"' ('\"' / .))* '"') {
252
+ def value
253
+ TerminalSymbol.new(elements[1].text_value)
254
+ end
255
+ }
256
+ end
257
+
258
+ rule single_quoted_string
259
+ ("'" (!"'" ("\'" / .))* "'") {
260
+ def value
261
+ TerminalSymbol.new(elements[1].text_value)
262
+ end
263
+ }
264
+ end
265
+
266
+ rule trailing_block
267
+ space block {
268
+ def value(parsing_expression)
269
+ parsing_expression.node_class_eval(block.value)
270
+ return parsing_expression
271
+ end
272
+
273
+ def block
274
+ elements[1]
275
+ end
276
+ }
277
+ /
278
+ '' {
279
+ def value(parsing_expression)
280
+ parsing_expression
281
+ end
282
+ }
283
+ end
284
+
285
+ rule block
286
+ ('{' (block / ![{}] .)* '}') {
287
+ def value
288
+ elements[1].text_value
289
+ end
290
+ }
291
+ end
292
+
293
+ rule character_class
294
+ ('[' (!']' ('\]'/.))+ ']') {
295
+ def value(grammar = nil)
296
+ CharacterClass.new(characters)
297
+ end
298
+
299
+ def characters
300
+ elements[1].text_value
301
+ end
302
+ }
303
+ end
304
+
305
+ rule keyword
306
+ 'rule' / 'end'
307
+ end
308
+
309
+ rule anything_symbol
310
+ '.' {
311
+ def value(grammar = nil)
312
+ AnythingSymbol.new
313
+ end
314
+ }
315
+ end
316
+
317
+ rule space
318
+ [ \t\n\r]+
319
+ end
320
+ end