Spectre 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/CHANGELOG +1 -0
  2. data/LICENSE +23 -0
  3. data/README +20 -0
  4. data/Rakefile +112 -0
  5. data/lib/spectre/base.rb +44 -0
  6. data/lib/spectre/base/closure.rb +96 -0
  7. data/lib/spectre/base/directive.rb +148 -0
  8. data/lib/spectre/base/grammar.rb +269 -0
  9. data/lib/spectre/base/inputiterator.rb +276 -0
  10. data/lib/spectre/base/node.rb +393 -0
  11. data/lib/spectre/base/operators.rb +342 -0
  12. data/lib/spectre/base/parser.rb +110 -0
  13. data/lib/spectre/generic.rb +115 -0
  14. data/lib/spectre/generic/directives.rb +246 -0
  15. data/lib/spectre/generic/negations.rb +68 -0
  16. data/lib/spectre/generic/primitives.rb +172 -0
  17. data/lib/spectre/generic/semanticaction.rb +43 -0
  18. data/lib/spectre/string.rb +57 -0
  19. data/lib/spectre/string/additionals.rb +80 -0
  20. data/lib/spectre/string/directives.rb +51 -0
  21. data/lib/spectre/string/inputiterator.rb +57 -0
  22. data/lib/spectre/string/primitives.rb +400 -0
  23. data/test/base/closure_tests.rb +108 -0
  24. data/test/base/grammar_tests.rb +97 -0
  25. data/test/base/operator_tests.rb +335 -0
  26. data/test/base/semanticaction_tests.rb +53 -0
  27. data/test/generic/directive_tests.rb +224 -0
  28. data/test/generic/negation_tests.rb +146 -0
  29. data/test/generic/primitive_tests.rb +99 -0
  30. data/test/string/POD2Parser_tests.rb +93 -0
  31. data/test/string/additional_tests.rb +43 -0
  32. data/test/string/directive_tests.rb +32 -0
  33. data/test/string/primitive_tests.rb +173 -0
  34. data/test/tests.rb +33 -0
  35. data/test/tutorial/funnymath_tests.rb +57 -0
  36. data/test/tutorial/html_tests.rb +171 -0
  37. data/test/tutorial/skipping_tests.rb +60 -0
  38. metadata +109 -0
@@ -0,0 +1,393 @@
1
+ # This is Spectre, a parser framework inspired by Boost.Spirit,
2
+ # which can be found at http://spirit.sourceforge.net/.
3
+ #
4
+ # If you want to find out more or need a tutorial, go to
5
+ # http://spectre.rubyforge.org/
6
+ # You'll find a nice wiki there!
7
+ #
8
+ # Author:: Fabian Streitel (karottenreibe)
9
+ # Copyright:: Copyright (c) 2009 Fabian Streitel
10
+ # License:: Boost Software License 1.0
11
+ # For further information regarding this license, you can go to
12
+ # http://www.boost.org/LICENSE_1_0.txt
13
+ # or read the file LICENSE distributed with this software.
14
+ # Homepage:: http://spectre.rubyforge.org/
15
+ # Git repo:: http://rubyforge.org/scm/?group_id=7618
16
+ #
17
+ # Keeps the Node class the parse tree is composed of.
18
+ #
19
+
20
+ module Spectre
21
+
22
+ ##
23
+ # A Node in the parse tree. Nodes keep the parsing tree consistent by applying
24
+ # some control mechanisms to the parsing process and taking away responsibility
25
+ # from the Parsers.
26
+ # They keep the Closures and parent-child relations between the Parsers as well as
27
+ # the symbol-identified Parsers of the Grammars.
28
+ #
29
+ class Node
30
+
31
+ ##
32
+ # The semantic actions that are associated with this Node - Array.
33
+ attr_accessor :actions
34
+
35
+ ##
36
+ # The location the InputIterator was at, before the Parser started matching.
37
+ attr_accessor :backtrace
38
+
39
+ ##
40
+ # The left child Node of this Node or nil if none.
41
+ attr_accessor :left
42
+
43
+ ##
44
+ # The right child Node of this Node or nil if none.
45
+ attr_accessor :right
46
+
47
+ ##
48
+ # The parent Node of this Node.
49
+ attr_accessor :parent
50
+
51
+ ##
52
+ # The symbol-identified Parsers for this Node - Hash.
53
+ attr_accessor :symbols
54
+
55
+ ##
56
+ # The Parser that resides in this Node.
57
+ attr_accessor :parser
58
+
59
+ ##
60
+ # The policy that govern the Parser's behaviour - Hash.
61
+ # - :union => :normal|:shortest|:longest -- Evaluation unions: first, longest or shortest match
62
+ # - :actions => true|false -- Whether to call semantic actions on successful match
63
+ # - :min => _value_|nil -- Minimal value for successful match (parser dependant)
64
+ # - :max => _value_|nil -- Maximal value for successful match (parser dependant)
65
+ #
66
+ # Defaults are:
67
+ # - :union => :normal
68
+ # - :actions => true
69
+ # - :min => nil
70
+ # - :max => nil
71
+ attr_accessor :policy
72
+
73
+ ##
74
+ # Initializes the Node's +parser+ property and registers the Node with the
75
+ # given Parser, as well as the +left+ and +right+ child.
76
+ #
77
+ def initialize parser, left = nil, right = nil
78
+ if left
79
+ @left = left.to_p
80
+ @left.parent = self
81
+ end
82
+
83
+ if right
84
+ @right = right.to_p
85
+ @right.parent = self
86
+ end
87
+
88
+ @parser, @symbols, @actions = parser, {}, []
89
+ parser.node = self
90
+ end
91
+
92
+ ##
93
+ # Initializes a new Node by +dup+ping the +other+ Node's Parser, as well as its Closure
94
+ # and actions and the whole parse tree under it.
95
+ # The backtrace will not be copied.
96
+ #
97
+ def initialize_copy other
98
+ if other.parser
99
+ @parser = other.parser.dup
100
+ @parser.node = self
101
+ end
102
+ if other.closure?
103
+ @closure = other.closure.dup
104
+ @closure.node = self
105
+ @closure.parser = @parser
106
+ end
107
+ if other.symbols
108
+ @symbols = other.symbols.dup
109
+ end
110
+ if other.actions
111
+ @actions = other.actions.dup
112
+ end
113
+ if other.left
114
+ @left = other.left.dup
115
+ @left.parent = self
116
+ end
117
+ if other.right
118
+ @right = other.right.dup
119
+ @right.parent = self
120
+ end
121
+ end
122
+
123
+ ##
124
+ # Unlike +#initialize_copy+ this method does not +dup+ the elments contained in the +other+
125
+ # Node, but rather only copies references.
126
+ # The backtrace will not be copied.
127
+ #
128
+ def shallow_copy other
129
+ @parser, @closure, @symbols, @actions, @left, @right =
130
+ other.parser, other.closure, other.symbols, other.actions, other.left, other.right
131
+ end
132
+
133
+ ##
134
+ # Returns true, if this Node is the root Node, else false.
135
+ #
136
+ def root?
137
+ @parent.nil?
138
+ end
139
+
140
+ ##
141
+ # Returns true, if this Node is a leaf Node, else false.
142
+ #
143
+ def leaf?
144
+ @left.nil? and @right.nil?
145
+ end
146
+
147
+ ##
148
+ # Recursively ascends to the top of the parsing chain and returns the first
149
+ # closure it finds or nil if none.
150
+ #
151
+ def closure
152
+ @closure || ( @parent ? @parent.closure : nil )
153
+ end
154
+
155
+ ##
156
+ # Returns true if _this_ Node has a closure set, i.e. unlike +#closure+ it will not query
157
+ # the parent for it's closure.
158
+ #
159
+ def closure?
160
+ @closure ? true : false
161
+ end
162
+
163
+ ##
164
+ # Sets the closure and registers the Node and it's Parser with it.
165
+ #
166
+ def closure= clos
167
+ @closure = clos
168
+ clos.parser = @parser
169
+ clos.node = self
170
+ end
171
+
172
+ ##
173
+ # Recursively ascends to the top of the parsing chain and returns the first
174
+ # policy it finds or the default policy.
175
+ #
176
+ def policy
177
+ # only set those if nothing has been specified
178
+ std = { :union => :normal,
179
+ :actions => true,
180
+ :min => nil,
181
+ :max => nil }
182
+ # replace if not specified in THIS node.
183
+ rep = { :min => nil,
184
+ :max => nil }
185
+
186
+ pol = @policy
187
+
188
+ unless pol
189
+ pol = @parent ? @parent.policy || std : std
190
+ pol.merge! rep
191
+ end
192
+
193
+ pol.merge(std) { |k,o,n| o }
194
+ end
195
+
196
+ ##
197
+ # If parsing fails, the parent of the Node will instruct it to backtrack.
198
+ # I.e. it will return the InputIterator to the position before the failed
199
+ # parsing attempt.
200
+ # If no backtrace has been saved, nothing is done.
201
+ # Returns the modified InputIterator.
202
+ #
203
+ def backtrack iter
204
+ iter.to @backtrace.pos if @backtrace
205
+ end
206
+
207
+ ##
208
+ # Saves the backtrace and calls +#scan+ on the Parser to do the actual parsing of the
209
+ # InputIterator +iter+. This method will +backtrack+ automatically when the result of
210
+ # the Parser is +nil+.
211
+ #
212
+ # If +pre_skip+ is +false+, +InputIterator#skip!+ will not be called before the Parser
213
+ # invocation.
214
+ # Please note: +pre_skip+ being +true+ does not mean that pre-skipping is done. Other
215
+ # factors may prohibit that, e.g. a +LexemeDirective+.
216
+ #
217
+ def parse iter, pre_skip = true
218
+ @backtrace = iter.dup
219
+
220
+ iter.skip! if @parser.pre_skip? and pre_skip
221
+ ret = @parser.scan iter
222
+ backtrack iter unless ret
223
+
224
+ pol = policy
225
+
226
+ ##TODO: simplify this?
227
+ # test on min and or max
228
+ if ret and ret.value
229
+ # min
230
+ if pol[:min]
231
+ ret = nil if pol[:min] > ret.value
232
+ end
233
+
234
+ # max if not reset
235
+ if ret and pol[:max]
236
+ ret = nil if pol[:max] < ret.value
237
+ end
238
+ # reset if value == nil and min or max specified
239
+ elsif ret
240
+ ret = nil if pol[:min] or pol[:max]
241
+ end
242
+
243
+ @actions ||= []
244
+ @actions.each { |action|
245
+ action.call ret, self.closure
246
+ } if ret and pol[:actions]
247
+
248
+ ret
249
+ end
250
+
251
+ ##
252
+ # Converts this Node to a Node, i.e. returns +self+.
253
+ #
254
+ def to_p
255
+ self
256
+ end
257
+
258
+ ##
259
+ # Replaces this Node with +rep+ and returns the modified replacement Node.
260
+ # If this is called during parsing process, the calling function should
261
+ # +#backtrack+ and call +#parse+ for the replacement Node.
262
+ #
263
+ def replace_with rep
264
+ rep.parent = @parent
265
+
266
+ if @parent
267
+ if self == @parent.left then @parent.left = rep
268
+ else @parent.right = rep
269
+ end
270
+ end
271
+
272
+ rep
273
+ end
274
+
275
+ ##
276
+ # Returns a multy-line String representing the parse-chain the current Parser is in.
277
+ #
278
+ def chain
279
+ ( @parent ? @parent.inspect + "\n " : '' ) +
280
+ self.inspect
281
+ end
282
+
283
+ ##
284
+ # Tries to find the Parser referenced by the symbol +sym+ by walking up the
285
+ # parse tree.
286
+ # If a Parser is found, it is +#dup+ped and returned, nil otherwise.
287
+ #
288
+ def find sym
289
+ ret = @symbols[sym] || ( @parent ? @parent.find(sym) : nil )
290
+ ret = ret.dup if ret
291
+ ret
292
+ end
293
+
294
+ ##
295
+ # Adds a semantic action to the Node.
296
+ # A semantic action is nothing but an object that implements the +call+ method (most simple
297
+ # example being a +lambda+ block), which will accept two parameters:
298
+ # The match made by the Parser and the Closure associated with the Parser.
299
+ #
300
+ def [] action
301
+ @actions ||= []
302
+
303
+ if action.is_a? Symbol then @actions << ClosureAction.new(action)
304
+ else @actions << action
305
+ end
306
+
307
+ self
308
+ end
309
+
310
+ ##
311
+ # See Operators::Sequence.
312
+ #
313
+ def >> right
314
+ Node.new Operators::Sequence.new, self, right.to_p
315
+ end
316
+
317
+ ##
318
+ # See Operators::Union.
319
+ #
320
+ def | right
321
+ Node.new Operators::Union.new, self, right.to_p
322
+ end
323
+
324
+ ##
325
+ # See Operators::Intersection.
326
+ #
327
+ def & right
328
+ Node.new Operators::Intersection.new, self, right.to_p
329
+ end
330
+
331
+ ##
332
+ # See Operators::Difference.
333
+ #
334
+ def - right
335
+ Node.new Operators::Difference.new, self, right.to_p
336
+ end
337
+
338
+ ##
339
+ # See Operators::Xor.
340
+ #
341
+ def ^ right
342
+ Node.new Operators::Difference.new, self, right.to_p
343
+ end
344
+
345
+ ##
346
+ # See Operators::List.
347
+ #
348
+ def % right
349
+ Node.new Operators::List.new, self, right.to_p
350
+ end
351
+
352
+ ##
353
+ # See Operators::SequentialOr.
354
+ #
355
+ def ** right
356
+ Node.new Operators::SequentialOr.new, self, right.to_p
357
+ end
358
+
359
+ ##
360
+ # See Operators::KleeneStar.
361
+ #
362
+ def *
363
+ Node.new Operators::KleeneStar.new, self
364
+ end
365
+
366
+ ##
367
+ # See Operators::Positive.
368
+ #
369
+ def +
370
+ Node.new Operators::Positive.new, self
371
+ end
372
+
373
+ ##
374
+ # See Operators::Optional.
375
+ #
376
+ def -@
377
+ Node.new Operators::Optional.new, self
378
+ end
379
+
380
+ ##
381
+ # See Operators::Negation.
382
+ #
383
+ def ~@
384
+ Node.new Operators::Negation.new, self
385
+ end
386
+
387
+ def inspect
388
+ @parser.inspect
389
+ end
390
+
391
+ end
392
+ end
393
+
@@ -0,0 +1,342 @@
1
+ # This is Spectre, a parser framework inspired by Boost.Spirit,
2
+ # which can be found at http://spirit.sourceforge.net/.
3
+ #
4
+ # If you want to find out more or need a tutorial, go to
5
+ # http://spectre.rubyforge.org/
6
+ # You'll find a nice wiki there!
7
+ #
8
+ # Author:: Fabian Streitel (karottenreibe)
9
+ # Copyright:: Copyright (c) 2009 Fabian Streitel
10
+ # License:: Boost Software License 1.0
11
+ # For further information regarding this license, you can go to
12
+ # http://www.boost.org/LICENSE_1_0.txt
13
+ # or read the file LICENSE distributed with this software.
14
+ # Homepage:: http://spectre.rubyforge.org/
15
+ # Git repo:: http://rubyforge.org/scm/?group_id=7618
16
+ #
17
+ # Keeps the Operators used to compose Parsers/Nodes.
18
+ #
19
+
20
+ require 'spectre/base/parser'
21
+
22
+ module Spectre
23
+
24
+ ##
25
+ # Keeps the Parsers that are used as wrapper around other Parsers.
26
+ # These are implemented as operators of class Node.
27
+ #
28
+ module Operators
29
+
30
+ module Op
31
+ include Parser
32
+ def pre_skip?; false; end
33
+ end
34
+
35
+ ##
36
+ # Takes the two Nodes it gets as parameters and tries to match them one
37
+ # after the other.
38
+ # +Sequence.new p1, p2+ is equal to +p1 >> p2+.
39
+ #
40
+ class Sequence
41
+ include Op
42
+
43
+ def scan iter
44
+ first = @node.left.parse iter
45
+ return nil unless first
46
+
47
+ second = @node.right.parse iter
48
+ return nil unless second
49
+
50
+ create_match iter, iter.concat(first.value, second.value)
51
+ end
52
+
53
+ def inspect
54
+ '(' + @node.left.inspect + ' >> ' + @node.right.inspect + ')'
55
+ end
56
+ end
57
+
58
+ ##
59
+ # Takes the two Nodes it gets as parameters and tries to match them one
60
+ # after the other, but if one of the two does not match, it doesn't matter
61
+ # +SequentialOr.new p1, p2+ is equal to +p1 ** p2+.
62
+ #
63
+ class SequentialOr
64
+ include Op
65
+
66
+ def scan iter
67
+ rep = ( @node.left.dup >> -@node.right.dup ) | @node.right.dup
68
+ rep = @node.replace_with rep
69
+ create_match iter, rep.parse(iter)
70
+ end
71
+
72
+ def inspect
73
+ '(' + @node.left.inspect + ' ** ' + @node.right.inspect + ')'
74
+ end
75
+ end
76
+
77
+ ##
78
+ # Matches either the first or the second Node it receives
79
+ # as parameters
80
+ # +Union.new p1, p2+ is equal to +p1 | p2+.
81
+ # This operator applies the tactics of short-circuiting, i.e.
82
+ # if p1 matches, p2 will not be tried at all.
83
+ #
84
+ class Union
85
+ include Op
86
+
87
+ def scan iter
88
+ first = @node.left.parse iter
89
+
90
+ if @node.policy[:union] == :normal
91
+ if first
92
+ create_match iter, first
93
+ else
94
+ backtrack iter
95
+ create_match iter, @node.right.parse(iter)
96
+ end
97
+ else
98
+ fiter = iter.dup
99
+ backtrack iter
100
+ second = @node.right.parse iter
101
+ siter = iter.dup
102
+ backtrack iter
103
+
104
+ if first and second
105
+ mm = [[first,fiter], [second,siter]].minmax_by { |x| x[0].length }
106
+ winner = @node.policy[:union] == :longest ? mm[1] : mm[0]
107
+
108
+ iter.to winner[1].pos
109
+ create_match iter, winner[0]
110
+ elsif first
111
+ iter.to fiter.pos
112
+ create_match iter, first
113
+ elsif second
114
+ iter.to siter.pos
115
+ create_match iter, second
116
+ else
117
+ nil
118
+ end
119
+ end
120
+ end
121
+
122
+ def inspect
123
+ '(' + @node.left.inspect + ' | ' + @node.right.inspect + ')'
124
+ end
125
+ end
126
+
127
+ ##
128
+ # Matches if both Nodes it receives as parameters match
129
+ # from the current position.
130
+ # Goes to the position in the input stream returned by the Node
131
+ # that matched the bigger input.
132
+ # +Intersection.new p1, p2+ is equal to +p1 & p2+.
133
+ #
134
+ class Intersection
135
+ include Op
136
+
137
+ def scan iter
138
+ first = @node.left.parse iter
139
+ fiter = iter.dup
140
+ backtrack iter
141
+ return nil unless first
142
+
143
+ second = @node.right.parse iter
144
+ siter = iter.dup
145
+ backtrack iter
146
+
147
+ if first and second
148
+ max = [[first,fiter], [second,siter]].max_by { |x| x[0].length }
149
+ iter.to max[1].pos
150
+ create_match iter, max[0]
151
+ else
152
+ nil
153
+ end
154
+ end
155
+
156
+ def inspect
157
+ '(' + @node.left.inspect + ' & ' + @node.right.inspect + ')'
158
+ end
159
+ end
160
+
161
+ ##
162
+ # Matches if the first Node matches, but not the second or if
163
+ # both match and the first one's match is longer.
164
+ # +Difference.new p1, p2+ is equal to +p1 - p2+.
165
+ #
166
+ class Difference
167
+ include Op
168
+
169
+ def scan iter
170
+ first = @node.left.parse iter
171
+ fiter = iter.dup
172
+ backtrack iter
173
+ return nil unless first
174
+
175
+ second = @node.right.parse iter
176
+ backtrack iter
177
+
178
+ if ( first and not second ) or ( first.length > second.length )
179
+ iter.to fiter.pos
180
+ create_match iter, first
181
+ else
182
+ nil
183
+ end
184
+ end
185
+
186
+ def inspect
187
+ '(' + @node.left.inspect + ' - ' + @node.right.inspect + ')'
188
+ end
189
+ end
190
+
191
+ ##
192
+ # Matches if either the first Node or the second matches, but
193
+ # not if both match.
194
+ # +Xor.new p1, p2+ is equal to +p1 ^ p2+.
195
+ #
196
+ class Xor
197
+ include Op
198
+
199
+ def scan iter
200
+ first = @node.left.parse iter
201
+ fiter = iter.dup
202
+ backtrack iter
203
+ second = @node.right.parse iter
204
+ siter = iter.dup
205
+ backtrack iter
206
+
207
+ if first and not second
208
+ iter.to fiter.pos
209
+ create_match iter, first
210
+ elsif second and not first
211
+ iter.to siter.pos
212
+ create_match iter, second
213
+ else
214
+ nil
215
+ end
216
+ end
217
+
218
+ def inspect
219
+ '(' + @node.left.inspect + ' ^ ' + @node.right.inspect + ')'
220
+ end
221
+ end
222
+
223
+ ##
224
+ # Negates the Node it receives as a parameter.
225
+ # +Negation.new p+ is equal to +~p+.
226
+ # NOTE: This will only work for Nodes that implement the
227
+ # +negation+ singleton method that returns the class of the
228
+ # negation Parser to be used.
229
+ #
230
+ class Negation
231
+ include Parser
232
+
233
+ def scan iter
234
+ neg = @node.left.parser.respond_to? :negation
235
+ raise "no negation defined for parser class #{@node.left.parser.class}." unless neg
236
+ rep = Node.new @node.left.parser.negation
237
+ rep.left = @node.left
238
+ rep = @node.replace_with rep
239
+ create_match iter, rep.parse(iter)
240
+ end
241
+
242
+ def inspect
243
+ '(~' + @node.left.inspect + ')'
244
+ end
245
+ end
246
+
247
+ ##
248
+ # Matches the given Node 0 or more times.
249
+ # +KleeneStar.new p+ is equal to +p.*+.
250
+ #
251
+ class KleeneStar
252
+ include Op
253
+
254
+ def scan iter
255
+ ret = true
256
+ val = iter.empty
257
+
258
+ while ret
259
+ ret = @node.left.parse iter
260
+ val = iter.concat(val, ret.value) if ret
261
+ end
262
+
263
+ @node.left.parser.backtrack iter
264
+ create_match iter, val
265
+ end
266
+
267
+ def inspect
268
+ '(' + @node.left.inspect + '.*)'
269
+ end
270
+ end
271
+
272
+ ##
273
+ # Matches the given Node 1 or more times.
274
+ # +Positive.new p+ is equal to +p.\++.
275
+ #
276
+ class Positive
277
+ include Op
278
+
279
+ def scan iter
280
+ ret = @node.left.parse iter
281
+ return nil unless ret
282
+
283
+ val = ret.value
284
+ while ret
285
+ ret = @node.left.parse iter
286
+ val = iter.concat val, ret.value if ret
287
+ end
288
+
289
+ create_match iter, val
290
+ end
291
+
292
+ def inspect
293
+ '(' + @node.left.inspect + '.+)'
294
+ end
295
+ end
296
+
297
+ ##
298
+ # Matches the given Node 0 or 1 times.
299
+ # +Optional.new p+ is equal to +-p+.
300
+ #
301
+ class Optional
302
+ include Op
303
+
304
+ def scan iter
305
+ ret = @node.left.parse iter
306
+ if ret
307
+ create_match iter, ret
308
+ else
309
+ backtrack iter
310
+ create_match iter, iter.empty
311
+ end
312
+ end
313
+
314
+ def inspect
315
+ '(-' + @node.left.inspect + ')'
316
+ end
317
+ end
318
+
319
+ ##
320
+ # Matches a list of tokens matched by the first Node,
321
+ # separated by tokens matched by the second Node.
322
+ # The first Node must not match the second.
323
+ # +List.new p1 p2+ is equal to +p1 % p2+.
324
+ #
325
+ class List
326
+ include Op
327
+
328
+ def scan iter
329
+ rep = @node.left.dup >> ( @node.right.dup >> @node.left.dup ).*
330
+ rep = @node.replace_with rep
331
+ create_match iter, rep.parse(iter)
332
+ end
333
+
334
+ def inspect
335
+ "(#{@node.left.inspect} % #{@node.right.inspect})"
336
+ end
337
+ end
338
+
339
+ end
340
+
341
+ end
342
+