Spectre 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/CHANGELOG +1 -0
  2. data/LICENSE +23 -0
  3. data/README +20 -0
  4. data/Rakefile +112 -0
  5. data/lib/spectre/base.rb +44 -0
  6. data/lib/spectre/base/closure.rb +96 -0
  7. data/lib/spectre/base/directive.rb +148 -0
  8. data/lib/spectre/base/grammar.rb +269 -0
  9. data/lib/spectre/base/inputiterator.rb +276 -0
  10. data/lib/spectre/base/node.rb +393 -0
  11. data/lib/spectre/base/operators.rb +342 -0
  12. data/lib/spectre/base/parser.rb +110 -0
  13. data/lib/spectre/generic.rb +115 -0
  14. data/lib/spectre/generic/directives.rb +246 -0
  15. data/lib/spectre/generic/negations.rb +68 -0
  16. data/lib/spectre/generic/primitives.rb +172 -0
  17. data/lib/spectre/generic/semanticaction.rb +43 -0
  18. data/lib/spectre/string.rb +57 -0
  19. data/lib/spectre/string/additionals.rb +80 -0
  20. data/lib/spectre/string/directives.rb +51 -0
  21. data/lib/spectre/string/inputiterator.rb +57 -0
  22. data/lib/spectre/string/primitives.rb +400 -0
  23. data/test/base/closure_tests.rb +108 -0
  24. data/test/base/grammar_tests.rb +97 -0
  25. data/test/base/operator_tests.rb +335 -0
  26. data/test/base/semanticaction_tests.rb +53 -0
  27. data/test/generic/directive_tests.rb +224 -0
  28. data/test/generic/negation_tests.rb +146 -0
  29. data/test/generic/primitive_tests.rb +99 -0
  30. data/test/string/POD2Parser_tests.rb +93 -0
  31. data/test/string/additional_tests.rb +43 -0
  32. data/test/string/directive_tests.rb +32 -0
  33. data/test/string/primitive_tests.rb +173 -0
  34. data/test/tests.rb +33 -0
  35. data/test/tutorial/funnymath_tests.rb +57 -0
  36. data/test/tutorial/html_tests.rb +171 -0
  37. data/test/tutorial/skipping_tests.rb +60 -0
  38. metadata +109 -0
@@ -0,0 +1,393 @@
1
+ # This is Spectre, a parser framework inspired by Boost.Spirit,
2
+ # which can be found at http://spirit.sourceforge.net/.
3
+ #
4
+ # If you want to find out more or need a tutorial, go to
5
+ # http://spectre.rubyforge.org/
6
+ # You'll find a nice wiki there!
7
+ #
8
+ # Author:: Fabian Streitel (karottenreibe)
9
+ # Copyright:: Copyright (c) 2009 Fabian Streitel
10
+ # License:: Boost Software License 1.0
11
+ # For further information regarding this license, you can go to
12
+ # http://www.boost.org/LICENSE_1_0.txt
13
+ # or read the file LICENSE distributed with this software.
14
+ # Homepage:: http://spectre.rubyforge.org/
15
+ # Git repo:: http://rubyforge.org/scm/?group_id=7618
16
+ #
17
+ # Keeps the Node class the parse tree is composed of.
18
+ #
19
+
20
+ module Spectre
21
+
22
+ ##
23
+ # A Node in the parse tree. Nodes keep the parsing tree consistent by applying
24
+ # some control mechanisms to the parsing process and taking away responsibility
25
+ # from the Parsers.
26
+ # They keep the Closures and parent-child relations between the Parsers as well as
27
+ # the symbol-identified Parsers of the Grammars.
28
+ #
29
+ class Node
30
+
31
+ ##
32
+ # The semantic actions that are associated with this Node - Array.
33
+ attr_accessor :actions
34
+
35
+ ##
36
+ # The location the InputIterator was at, before the Parser started matching.
37
+ attr_accessor :backtrace
38
+
39
+ ##
40
+ # The left child Node of this Node or nil if none.
41
+ attr_accessor :left
42
+
43
+ ##
44
+ # The right child Node of this Node or nil if none.
45
+ attr_accessor :right
46
+
47
+ ##
48
+ # The parent Node of this Node.
49
+ attr_accessor :parent
50
+
51
+ ##
52
+ # The symbol-identified Parsers for this Node - Hash.
53
+ attr_accessor :symbols
54
+
55
+ ##
56
+ # The Parser that resides in this Node.
57
+ attr_accessor :parser
58
+
59
+ ##
60
+ # The policy that govern the Parser's behaviour - Hash.
61
+ # - :union => :normal|:shortest|:longest -- Evaluation unions: first, longest or shortest match
62
+ # - :actions => true|false -- Whether to call semantic actions on successful match
63
+ # - :min => _value_|nil -- Minimal value for successful match (parser dependant)
64
+ # - :max => _value_|nil -- Maximal value for successful match (parser dependant)
65
+ #
66
+ # Defaults are:
67
+ # - :union => :normal
68
+ # - :actions => true
69
+ # - :min => nil
70
+ # - :max => nil
71
+ attr_accessor :policy
72
+
73
+ ##
74
+ # Initializes the Node's +parser+ property and registers the Node with the
75
+ # given Parser, as well as the +left+ and +right+ child.
76
+ #
77
+ def initialize parser, left = nil, right = nil
78
+ if left
79
+ @left = left.to_p
80
+ @left.parent = self
81
+ end
82
+
83
+ if right
84
+ @right = right.to_p
85
+ @right.parent = self
86
+ end
87
+
88
+ @parser, @symbols, @actions = parser, {}, []
89
+ parser.node = self
90
+ end
91
+
92
+ ##
93
+ # Initializes a new Node by +dup+ping the +other+ Node's Parser, as well as its Closure
94
+ # and actions and the whole parse tree under it.
95
+ # The backtrace will not be copied.
96
+ #
97
+ def initialize_copy other
98
+ if other.parser
99
+ @parser = other.parser.dup
100
+ @parser.node = self
101
+ end
102
+ if other.closure?
103
+ @closure = other.closure.dup
104
+ @closure.node = self
105
+ @closure.parser = @parser
106
+ end
107
+ if other.symbols
108
+ @symbols = other.symbols.dup
109
+ end
110
+ if other.actions
111
+ @actions = other.actions.dup
112
+ end
113
+ if other.left
114
+ @left = other.left.dup
115
+ @left.parent = self
116
+ end
117
+ if other.right
118
+ @right = other.right.dup
119
+ @right.parent = self
120
+ end
121
+ end
122
+
123
+ ##
124
+ # Unlike +#initialize_copy+ this method does not +dup+ the elments contained in the +other+
125
+ # Node, but rather only copies references.
126
+ # The backtrace will not be copied.
127
+ #
128
+ def shallow_copy other
129
+ @parser, @closure, @symbols, @actions, @left, @right =
130
+ other.parser, other.closure, other.symbols, other.actions, other.left, other.right
131
+ end
132
+
133
+ ##
134
+ # Returns true, if this Node is the root Node, else false.
135
+ #
136
+ def root?
137
+ @parent.nil?
138
+ end
139
+
140
+ ##
141
+ # Returns true, if this Node is a leaf Node, else false.
142
+ #
143
+ def leaf?
144
+ @left.nil? and @right.nil?
145
+ end
146
+
147
+ ##
148
+ # Recursively ascends to the top of the parsing chain and returns the first
149
+ # closure it finds or nil if none.
150
+ #
151
+ def closure
152
+ @closure || ( @parent ? @parent.closure : nil )
153
+ end
154
+
155
+ ##
156
+ # Returns true if _this_ Node has a closure set, i.e. unlike +#closure+ it will not query
157
+ # the parent for it's closure.
158
+ #
159
+ def closure?
160
+ @closure ? true : false
161
+ end
162
+
163
+ ##
164
+ # Sets the closure and registers the Node and it's Parser with it.
165
+ #
166
+ def closure= clos
167
+ @closure = clos
168
+ clos.parser = @parser
169
+ clos.node = self
170
+ end
171
+
172
+ ##
173
+ # Recursively ascends to the top of the parsing chain and returns the first
174
+ # policy it finds or the default policy.
175
+ #
176
+ def policy
177
+ # only set those if nothing has been specified
178
+ std = { :union => :normal,
179
+ :actions => true,
180
+ :min => nil,
181
+ :max => nil }
182
+ # replace if not specified in THIS node.
183
+ rep = { :min => nil,
184
+ :max => nil }
185
+
186
+ pol = @policy
187
+
188
+ unless pol
189
+ pol = @parent ? @parent.policy || std : std
190
+ pol.merge! rep
191
+ end
192
+
193
+ pol.merge(std) { |k,o,n| o }
194
+ end
195
+
196
+ ##
197
+ # If parsing fails, the parent of the Node will instruct it to backtrack.
198
+ # I.e. it will return the InputIterator to the position before the failed
199
+ # parsing attempt.
200
+ # If no backtrace has been saved, nothing is done.
201
+ # Returns the modified InputIterator.
202
+ #
203
+ def backtrack iter
204
+ iter.to @backtrace.pos if @backtrace
205
+ end
206
+
207
+ ##
208
+ # Saves the backtrace and calls +#scan+ on the Parser to do the actual parsing of the
209
+ # InputIterator +iter+. This method will +backtrack+ automatically when the result of
210
+ # the Parser is +nil+.
211
+ #
212
+ # If +pre_skip+ is +false+, +InputIterator#skip!+ will not be called before the Parser
213
+ # invocation.
214
+ # Please note: +pre_skip+ being +true+ does not mean that pre-skipping is done. Other
215
+ # factors may prohibit that, e.g. a +LexemeDirective+.
216
+ #
217
+ def parse iter, pre_skip = true
218
+ @backtrace = iter.dup
219
+
220
+ iter.skip! if @parser.pre_skip? and pre_skip
221
+ ret = @parser.scan iter
222
+ backtrack iter unless ret
223
+
224
+ pol = policy
225
+
226
+ ##TODO: simplify this?
227
+ # test on min and or max
228
+ if ret and ret.value
229
+ # min
230
+ if pol[:min]
231
+ ret = nil if pol[:min] > ret.value
232
+ end
233
+
234
+ # max if not reset
235
+ if ret and pol[:max]
236
+ ret = nil if pol[:max] < ret.value
237
+ end
238
+ # reset if value == nil and min or max specified
239
+ elsif ret
240
+ ret = nil if pol[:min] or pol[:max]
241
+ end
242
+
243
+ @actions ||= []
244
+ @actions.each { |action|
245
+ action.call ret, self.closure
246
+ } if ret and pol[:actions]
247
+
248
+ ret
249
+ end
250
+
251
+ ##
252
+ # Converts this Node to a Node, i.e. returns +self+.
253
+ #
254
+ def to_p
255
+ self
256
+ end
257
+
258
+ ##
259
+ # Replaces this Node with +rep+ and returns the modified replacement Node.
260
+ # If this is called during parsing process, the calling function should
261
+ # +#backtrack+ and call +#parse+ for the replacement Node.
262
+ #
263
+ def replace_with rep
264
+ rep.parent = @parent
265
+
266
+ if @parent
267
+ if self == @parent.left then @parent.left = rep
268
+ else @parent.right = rep
269
+ end
270
+ end
271
+
272
+ rep
273
+ end
274
+
275
+ ##
276
+ # Returns a multy-line String representing the parse-chain the current Parser is in.
277
+ #
278
+ def chain
279
+ ( @parent ? @parent.inspect + "\n " : '' ) +
280
+ self.inspect
281
+ end
282
+
283
+ ##
284
+ # Tries to find the Parser referenced by the symbol +sym+ by walking up the
285
+ # parse tree.
286
+ # If a Parser is found, it is +#dup+ped and returned, nil otherwise.
287
+ #
288
+ def find sym
289
+ ret = @symbols[sym] || ( @parent ? @parent.find(sym) : nil )
290
+ ret = ret.dup if ret
291
+ ret
292
+ end
293
+
294
+ ##
295
+ # Adds a semantic action to the Node.
296
+ # A semantic action is nothing but an object that implements the +call+ method (most simple
297
+ # example being a +lambda+ block), which will accept two parameters:
298
+ # The match made by the Parser and the Closure associated with the Parser.
299
+ #
300
+ def [] action
301
+ @actions ||= []
302
+
303
+ if action.is_a? Symbol then @actions << ClosureAction.new(action)
304
+ else @actions << action
305
+ end
306
+
307
+ self
308
+ end
309
+
310
+ ##
311
+ # See Operators::Sequence.
312
+ #
313
+ def >> right
314
+ Node.new Operators::Sequence.new, self, right.to_p
315
+ end
316
+
317
+ ##
318
+ # See Operators::Union.
319
+ #
320
+ def | right
321
+ Node.new Operators::Union.new, self, right.to_p
322
+ end
323
+
324
+ ##
325
+ # See Operators::Intersection.
326
+ #
327
+ def & right
328
+ Node.new Operators::Intersection.new, self, right.to_p
329
+ end
330
+
331
+ ##
332
+ # See Operators::Difference.
333
+ #
334
+ def - right
335
+ Node.new Operators::Difference.new, self, right.to_p
336
+ end
337
+
338
+ ##
339
+ # See Operators::Xor.
340
+ #
341
+ def ^ right
342
+ Node.new Operators::Difference.new, self, right.to_p
343
+ end
344
+
345
+ ##
346
+ # See Operators::List.
347
+ #
348
+ def % right
349
+ Node.new Operators::List.new, self, right.to_p
350
+ end
351
+
352
+ ##
353
+ # See Operators::SequentialOr.
354
+ #
355
+ def ** right
356
+ Node.new Operators::SequentialOr.new, self, right.to_p
357
+ end
358
+
359
+ ##
360
+ # See Operators::KleeneStar.
361
+ #
362
+ def *
363
+ Node.new Operators::KleeneStar.new, self
364
+ end
365
+
366
+ ##
367
+ # See Operators::Positive.
368
+ #
369
+ def +
370
+ Node.new Operators::Positive.new, self
371
+ end
372
+
373
+ ##
374
+ # See Operators::Optional.
375
+ #
376
+ def -@
377
+ Node.new Operators::Optional.new, self
378
+ end
379
+
380
+ ##
381
+ # See Operators::Negation.
382
+ #
383
+ def ~@
384
+ Node.new Operators::Negation.new, self
385
+ end
386
+
387
+ def inspect
388
+ @parser.inspect
389
+ end
390
+
391
+ end
392
+ end
393
+
@@ -0,0 +1,342 @@
1
+ # This is Spectre, a parser framework inspired by Boost.Spirit,
2
+ # which can be found at http://spirit.sourceforge.net/.
3
+ #
4
+ # If you want to find out more or need a tutorial, go to
5
+ # http://spectre.rubyforge.org/
6
+ # You'll find a nice wiki there!
7
+ #
8
+ # Author:: Fabian Streitel (karottenreibe)
9
+ # Copyright:: Copyright (c) 2009 Fabian Streitel
10
+ # License:: Boost Software License 1.0
11
+ # For further information regarding this license, you can go to
12
+ # http://www.boost.org/LICENSE_1_0.txt
13
+ # or read the file LICENSE distributed with this software.
14
+ # Homepage:: http://spectre.rubyforge.org/
15
+ # Git repo:: http://rubyforge.org/scm/?group_id=7618
16
+ #
17
+ # Keeps the Operators used to compose Parsers/Nodes.
18
+ #
19
+
20
+ require 'spectre/base/parser'
21
+
22
+ module Spectre
23
+
24
+ ##
25
+ # Keeps the Parsers that are used as wrapper around other Parsers.
26
+ # These are implemented as operators of class Node.
27
+ #
28
+ module Operators
29
+
30
+ module Op
31
+ include Parser
32
+ def pre_skip?; false; end
33
+ end
34
+
35
+ ##
36
+ # Takes the two Nodes it gets as parameters and tries to match them one
37
+ # after the other.
38
+ # +Sequence.new p1, p2+ is equal to +p1 >> p2+.
39
+ #
40
+ class Sequence
41
+ include Op
42
+
43
+ def scan iter
44
+ first = @node.left.parse iter
45
+ return nil unless first
46
+
47
+ second = @node.right.parse iter
48
+ return nil unless second
49
+
50
+ create_match iter, iter.concat(first.value, second.value)
51
+ end
52
+
53
+ def inspect
54
+ '(' + @node.left.inspect + ' >> ' + @node.right.inspect + ')'
55
+ end
56
+ end
57
+
58
+ ##
59
+ # Takes the two Nodes it gets as parameters and tries to match them one
60
+ # after the other, but if one of the two does not match, it doesn't matter
61
+ # +SequentialOr.new p1, p2+ is equal to +p1 ** p2+.
62
+ #
63
+ class SequentialOr
64
+ include Op
65
+
66
+ def scan iter
67
+ rep = ( @node.left.dup >> -@node.right.dup ) | @node.right.dup
68
+ rep = @node.replace_with rep
69
+ create_match iter, rep.parse(iter)
70
+ end
71
+
72
+ def inspect
73
+ '(' + @node.left.inspect + ' ** ' + @node.right.inspect + ')'
74
+ end
75
+ end
76
+
77
+ ##
78
+ # Matches either the first or the second Node it receives
79
+ # as parameters
80
+ # +Union.new p1, p2+ is equal to +p1 | p2+.
81
+ # This operator applies the tactics of short-circuiting, i.e.
82
+ # if p1 matches, p2 will not be tried at all.
83
+ #
84
+ class Union
85
+ include Op
86
+
87
+ def scan iter
88
+ first = @node.left.parse iter
89
+
90
+ if @node.policy[:union] == :normal
91
+ if first
92
+ create_match iter, first
93
+ else
94
+ backtrack iter
95
+ create_match iter, @node.right.parse(iter)
96
+ end
97
+ else
98
+ fiter = iter.dup
99
+ backtrack iter
100
+ second = @node.right.parse iter
101
+ siter = iter.dup
102
+ backtrack iter
103
+
104
+ if first and second
105
+ mm = [[first,fiter], [second,siter]].minmax_by { |x| x[0].length }
106
+ winner = @node.policy[:union] == :longest ? mm[1] : mm[0]
107
+
108
+ iter.to winner[1].pos
109
+ create_match iter, winner[0]
110
+ elsif first
111
+ iter.to fiter.pos
112
+ create_match iter, first
113
+ elsif second
114
+ iter.to siter.pos
115
+ create_match iter, second
116
+ else
117
+ nil
118
+ end
119
+ end
120
+ end
121
+
122
+ def inspect
123
+ '(' + @node.left.inspect + ' | ' + @node.right.inspect + ')'
124
+ end
125
+ end
126
+
127
+ ##
128
+ # Matches if both Nodes it receives as parameters match
129
+ # from the current position.
130
+ # Goes to the position in the input stream returned by the Node
131
+ # that matched the bigger input.
132
+ # +Intersection.new p1, p2+ is equal to +p1 & p2+.
133
+ #
134
+ class Intersection
135
+ include Op
136
+
137
+ def scan iter
138
+ first = @node.left.parse iter
139
+ fiter = iter.dup
140
+ backtrack iter
141
+ return nil unless first
142
+
143
+ second = @node.right.parse iter
144
+ siter = iter.dup
145
+ backtrack iter
146
+
147
+ if first and second
148
+ max = [[first,fiter], [second,siter]].max_by { |x| x[0].length }
149
+ iter.to max[1].pos
150
+ create_match iter, max[0]
151
+ else
152
+ nil
153
+ end
154
+ end
155
+
156
+ def inspect
157
+ '(' + @node.left.inspect + ' & ' + @node.right.inspect + ')'
158
+ end
159
+ end
160
+
161
+ ##
162
+ # Matches if the first Node matches, but not the second or if
163
+ # both match and the first one's match is longer.
164
+ # +Difference.new p1, p2+ is equal to +p1 - p2+.
165
+ #
166
+ class Difference
167
+ include Op
168
+
169
+ def scan iter
170
+ first = @node.left.parse iter
171
+ fiter = iter.dup
172
+ backtrack iter
173
+ return nil unless first
174
+
175
+ second = @node.right.parse iter
176
+ backtrack iter
177
+
178
+ if ( first and not second ) or ( first.length > second.length )
179
+ iter.to fiter.pos
180
+ create_match iter, first
181
+ else
182
+ nil
183
+ end
184
+ end
185
+
186
+ def inspect
187
+ '(' + @node.left.inspect + ' - ' + @node.right.inspect + ')'
188
+ end
189
+ end
190
+
191
+ ##
192
+ # Matches if either the first Node or the second matches, but
193
+ # not if both match.
194
+ # +Xor.new p1, p2+ is equal to +p1 ^ p2+.
195
+ #
196
+ class Xor
197
+ include Op
198
+
199
+ def scan iter
200
+ first = @node.left.parse iter
201
+ fiter = iter.dup
202
+ backtrack iter
203
+ second = @node.right.parse iter
204
+ siter = iter.dup
205
+ backtrack iter
206
+
207
+ if first and not second
208
+ iter.to fiter.pos
209
+ create_match iter, first
210
+ elsif second and not first
211
+ iter.to siter.pos
212
+ create_match iter, second
213
+ else
214
+ nil
215
+ end
216
+ end
217
+
218
+ def inspect
219
+ '(' + @node.left.inspect + ' ^ ' + @node.right.inspect + ')'
220
+ end
221
+ end
222
+
223
+ ##
224
+ # Negates the Node it receives as a parameter.
225
+ # +Negation.new p+ is equal to +~p+.
226
+ # NOTE: This will only work for Nodes that implement the
227
+ # +negation+ singleton method that returns the class of the
228
+ # negation Parser to be used.
229
+ #
230
+ class Negation
231
+ include Parser
232
+
233
+ def scan iter
234
+ neg = @node.left.parser.respond_to? :negation
235
+ raise "no negation defined for parser class #{@node.left.parser.class}." unless neg
236
+ rep = Node.new @node.left.parser.negation
237
+ rep.left = @node.left
238
+ rep = @node.replace_with rep
239
+ create_match iter, rep.parse(iter)
240
+ end
241
+
242
+ def inspect
243
+ '(~' + @node.left.inspect + ')'
244
+ end
245
+ end
246
+
247
+ ##
248
+ # Matches the given Node 0 or more times.
249
+ # +KleeneStar.new p+ is equal to +p.*+.
250
+ #
251
+ class KleeneStar
252
+ include Op
253
+
254
+ def scan iter
255
+ ret = true
256
+ val = iter.empty
257
+
258
+ while ret
259
+ ret = @node.left.parse iter
260
+ val = iter.concat(val, ret.value) if ret
261
+ end
262
+
263
+ @node.left.parser.backtrack iter
264
+ create_match iter, val
265
+ end
266
+
267
+ def inspect
268
+ '(' + @node.left.inspect + '.*)'
269
+ end
270
+ end
271
+
272
+ ##
273
+ # Matches the given Node 1 or more times.
274
+ # +Positive.new p+ is equal to +p.\++.
275
+ #
276
+ class Positive
277
+ include Op
278
+
279
+ def scan iter
280
+ ret = @node.left.parse iter
281
+ return nil unless ret
282
+
283
+ val = ret.value
284
+ while ret
285
+ ret = @node.left.parse iter
286
+ val = iter.concat val, ret.value if ret
287
+ end
288
+
289
+ create_match iter, val
290
+ end
291
+
292
+ def inspect
293
+ '(' + @node.left.inspect + '.+)'
294
+ end
295
+ end
296
+
297
+ ##
298
+ # Matches the given Node 0 or 1 times.
299
+ # +Optional.new p+ is equal to +-p+.
300
+ #
301
+ class Optional
302
+ include Op
303
+
304
+ def scan iter
305
+ ret = @node.left.parse iter
306
+ if ret
307
+ create_match iter, ret
308
+ else
309
+ backtrack iter
310
+ create_match iter, iter.empty
311
+ end
312
+ end
313
+
314
+ def inspect
315
+ '(-' + @node.left.inspect + ')'
316
+ end
317
+ end
318
+
319
+ ##
320
+ # Matches a list of tokens matched by the first Node,
321
+ # separated by tokens matched by the second Node.
322
+ # The first Node must not match the second.
323
+ # +List.new p1 p2+ is equal to +p1 % p2+.
324
+ #
325
+ class List
326
+ include Op
327
+
328
+ def scan iter
329
+ rep = @node.left.dup >> ( @node.right.dup >> @node.left.dup ).*
330
+ rep = @node.replace_with rep
331
+ create_match iter, rep.parse(iter)
332
+ end
333
+
334
+ def inspect
335
+ "(#{@node.left.inspect} % #{@node.right.inspect})"
336
+ end
337
+ end
338
+
339
+ end
340
+
341
+ end
342
+