Spectre 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/CHANGELOG +1 -0
  2. data/LICENSE +23 -0
  3. data/README +20 -0
  4. data/Rakefile +112 -0
  5. data/lib/spectre/base.rb +44 -0
  6. data/lib/spectre/base/closure.rb +96 -0
  7. data/lib/spectre/base/directive.rb +148 -0
  8. data/lib/spectre/base/grammar.rb +269 -0
  9. data/lib/spectre/base/inputiterator.rb +276 -0
  10. data/lib/spectre/base/node.rb +393 -0
  11. data/lib/spectre/base/operators.rb +342 -0
  12. data/lib/spectre/base/parser.rb +110 -0
  13. data/lib/spectre/generic.rb +115 -0
  14. data/lib/spectre/generic/directives.rb +246 -0
  15. data/lib/spectre/generic/negations.rb +68 -0
  16. data/lib/spectre/generic/primitives.rb +172 -0
  17. data/lib/spectre/generic/semanticaction.rb +43 -0
  18. data/lib/spectre/string.rb +57 -0
  19. data/lib/spectre/string/additionals.rb +80 -0
  20. data/lib/spectre/string/directives.rb +51 -0
  21. data/lib/spectre/string/inputiterator.rb +57 -0
  22. data/lib/spectre/string/primitives.rb +400 -0
  23. data/test/base/closure_tests.rb +108 -0
  24. data/test/base/grammar_tests.rb +97 -0
  25. data/test/base/operator_tests.rb +335 -0
  26. data/test/base/semanticaction_tests.rb +53 -0
  27. data/test/generic/directive_tests.rb +224 -0
  28. data/test/generic/negation_tests.rb +146 -0
  29. data/test/generic/primitive_tests.rb +99 -0
  30. data/test/string/POD2Parser_tests.rb +93 -0
  31. data/test/string/additional_tests.rb +43 -0
  32. data/test/string/directive_tests.rb +32 -0
  33. data/test/string/primitive_tests.rb +173 -0
  34. data/test/tests.rb +33 -0
  35. data/test/tutorial/funnymath_tests.rb +57 -0
  36. data/test/tutorial/html_tests.rb +171 -0
  37. data/test/tutorial/skipping_tests.rb +60 -0
  38. metadata +109 -0
@@ -0,0 +1,269 @@
1
+ # This is Spectre, a parser framework inspired by Boost.Spirit,
2
+ # which can be found at http://spirit.sourceforge.net/.
3
+ #
4
+ # If you want to find out more or need a tutorial, go to
5
+ # http://spectre.rubyforge.org/
6
+ # You'll find a nice wiki there!
7
+ #
8
+ # Author:: Fabian Streitel (karottenreibe)
9
+ # Copyright:: Copyright (c) 2009 Fabian Streitel
10
+ # License:: Boost Software License 1.0
11
+ # For further information regarding this license, you can go to
12
+ # http://www.boost.org/LICENSE_1_0.txt
13
+ # or read the file LICENSE distributed with this software.
14
+ # Homepage:: http://spectre.rubyforge.org/
15
+ # Git repo:: http://rubyforge.org/scm/?group_id=7618
16
+ #
17
+ # Keeps the Grammar class that can hold recursive Parsers.
18
+ #
19
+
20
+ require 'rubygems'
21
+ require 'metaid'
22
+ require 'spectre/base/parser'
23
+ require 'spectre/base/node'
24
+
25
+ module Spectre
26
+
27
+ ##
28
+ # Provides lazy evaluation of the Parser name, so you can use it recursively and before
29
+ # specifying it.
30
+ #
31
+ class SymParser
32
+ include Parser
33
+
34
+ ##
35
+ # Initializes the SymParser to look for +sym+.
36
+ #
37
+ def initialize sym
38
+ @sym = sym
39
+ end
40
+
41
+ def scan is
42
+ n = @node.find @sym
43
+ raise "rule #{@sym.inspect} not found." unless n
44
+ n.parent = @node
45
+ n.parse is
46
+ end
47
+
48
+ def inspect
49
+ "[<#{@sym.inspect}>]"
50
+ end
51
+ end
52
+
53
+ ##
54
+ # Provides the functionality of dynamically defining rules inside Grammars.
55
+ # Must be mixed into a Parser.
56
+ #
57
+ module DynVarMixin
58
+
59
+ ##
60
+ # Closes over the specified +node+ with a newly constructed Closure.
61
+ #
62
+ def close node
63
+ node = node.to_p
64
+ node.closure = Closure.new
65
+ node
66
+ end
67
+
68
+ ##
69
+ # Stores the Parsers referenced by +symbol+ => +parser+ from the +hash+ so they can be used
70
+ # later on.
71
+ #
72
+ def rule hash
73
+ hash.each do |getter,node|
74
+ @node.symbols[getter] = node.to_p
75
+ end
76
+ end
77
+
78
+ ##
79
+ # Sets the +parser+ with which the Grammar should start parsing.
80
+ # Does auto-conversion on +parser+.
81
+ #
82
+ def start_with parser
83
+ @start_rule = parser.to_p
84
+ end
85
+ end
86
+
87
+ ##
88
+ # If mixed into a class, it defines shortuct methods for all registered Parsers.
89
+ # Used by the Spectre standard parsers, e.g. +char('k')+ will be a shortcut for
90
+ # +CharParser.new('k')+.
91
+ # See std/std.rb for more details.
92
+ #
93
+ module ShortcutsMixin
94
+ class << self
95
+ ##
96
+ # For each +name+ => +klass+ in +hsh+: Register the Parser +klass+ with the +name+ inside
97
+ # the Grammar class, so that a new Parser of that +klass+ can be chained into a Grammar
98
+ # simply by calling
99
+ # +name _arguments_+
100
+ # inside the Grammar class definition.
101
+ # +name+ must be a Symbol.
102
+ #
103
+ def register_shortcut hsh
104
+ hsh.each do |meth,klass|
105
+ raise "class Grammar already has a singleton method named '#{meth.to_s}'" if
106
+ Grammar.singleton_methods.include? meth.to_s
107
+
108
+ block = lambda do |*args|
109
+ parser = klass.new *args
110
+ parser.to_p
111
+ end
112
+
113
+ # define it for the grammars
114
+ self.class_def meth, &block
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ ##
121
+ # Provides an +inspect+ method for Grammar-like classes.
122
+ #
123
+ module GrammarInspectMixin
124
+ def inspect
125
+ "[Grammar:" + ( @bound ?
126
+ "#{@node.symbols.inject(''){ |memo,(s,n)| memo + "{#{s.inspect} => #{n.inspect}} " }[0..-2]}" :
127
+ "unbound" ) +
128
+ "]"
129
+ end
130
+ end
131
+
132
+ ##
133
+ # Chains several Parsers together to form a reusable unit and allows for recursion in
134
+ # Parser definition.
135
+ #
136
+ # To define a Grammar, you may use the generator methods to create Parsers and chain
137
+ # them together. You will then have to store the created top-level Parser in the Grammar
138
+ # by calling +start_with(parser)+.
139
+ #
140
+ # To do so, you first have to call +Grammar.new+, passing it a block that describes the
141
+ # Grammar's behaviour.
142
+ # The returned Grammar object can be supplied with arguments on runtime
143
+ # in order to customize it's behaviour, e.g:
144
+ #
145
+ # mayor = Grammar.new do |city, klass|
146
+ # start_with 'Mayor ' >> ( ~blank ).+ >> ", class #{klass}" >>
147
+ # ' from ' >> city.to_p
148
+ # end
149
+ #
150
+ #
151
+ # The thus created Grammar has to be bound to some arguments before
152
+ # it can actually be used to parse anything:
153
+ #
154
+ # mayor.bind( AnycharParser.new.+, 'A' )
155
+ #
156
+ # It will now parse any Mayor from any city of class 'A'.
157
+ # You can of course rebind the Grammar anytime (except during parsing):
158
+ # + mayor.bind( 'Boston', AnycharParser.new )+
159
+ # Now it will parse any Mayor of any class from 'Boston'.
160
+ #
161
+ # The only exception to the binding rule is a dynamic Grammar that takes
162
+ # no arguments. Such a Grammar will be bound right at instantiation time.
163
+ # Rebinding will have no effect whatsoever.
164
+ # NOTE: Due to an existing Ruby bug, you have to define such a Grammar with an empty
165
+ # argument block:
166
+ #
167
+ #
168
+ # chunky = Grammar.new do ||
169
+ # rule :bacon => ...
170
+ # end
171
+ #
172
+ #
173
+ # Otherwise it will not be automatically bound. This will change, as soon as bug #574
174
+ # is fixed (http://redmine.ruby-lang.org/issues/show/574).
175
+ #
176
+ # You may at any time store a parser inside a _rule_, like this:
177
+ #
178
+ #
179
+ # towns_folk = Grammar.new do ||
180
+ # start_with :person % ( 'from '.to_p >> :town >> ', ' )
181
+ # rule :person => :name >> blank.+ >> :name,
182
+ # :town => :name
183
+ # rule :name => ( ~blank ).+
184
+ # end
185
+ #
186
+ #
187
+ # As you can also see here, the rules are evaluated lazyly, thus enabling
188
+ # you to use parsers recursively and before they have actually been defined.
189
+ #
190
+ # NOTE: As tempting as it may be, do NOT use instance variables to store Parsers,
191
+ # because if you use the Parser more than once, the backtrace of the first
192
+ # invocation of that Parser will be lost as soon as it is invoked a second time.
193
+ # Also the use of Closures will be broken.
194
+ # Storing the parsers as is described above will circumvent this problem by dupping
195
+ # the Parser each time it is invoked.
196
+ #
197
+ # If you'd like to provide Grammar-like functionality in your own class(es), you can
198
+ # receive some from the mixins DynVarMixin, ShortcutsMixin and GrammarInspectMixin.
199
+ #
200
+ class Grammar < Node
201
+
202
+ # if we don't preserve the backtrack method, we'll run into an endless loop
203
+ alias_method :node_backtrack, :backtrack
204
+ include Parser
205
+ alias_method :backtrack, :node_backtrack
206
+
207
+ include DynVarMixin
208
+ include ShortcutsMixin
209
+ include GrammarInspectMixin
210
+
211
+ ##
212
+ # Defines a new Grammar.
213
+ # The passed +block+ will be executed once the returned Grammar's
214
+ # +bind+ method is called. All of +bind+'s parameters will be
215
+ # passed to the block.
216
+ #
217
+ # If the +block+ takes no arguments, it will be bound at instantiation time.
218
+ # NOTE: Ruby bug 574 (http://redmine.ruby-lang.org/issues/show/574)
219
+ #
220
+ # See Grammar for an example.
221
+ #
222
+ def initialize &block
223
+ @dynamic = block
224
+ @bound = false
225
+
226
+ # nice little trick: we are node and parser in one
227
+ # but for upwards compatibility, we will act as if it weren't so
228
+ # from here on
229
+ super(self)
230
+
231
+ self.bind if block.arity == 0
232
+ end
233
+
234
+ def to_p; self; end
235
+
236
+ ##
237
+ # Binds a Grammar to a set of values supplied in the +args+.
238
+ # Executes the block given to +#initialize+.
239
+ #
240
+ def bind *args
241
+ self.instance_exec *args, &@dynamic
242
+ @bound = true
243
+ self
244
+ end
245
+
246
+ ##
247
+ # Parses the InputIterator +iter+ with the Parsers defined in the Grammar.
248
+ # The Grammar must have been bound before doing so.
249
+ #
250
+ def scan iter
251
+ raise "a dynamic Grammar must be bound to a value" unless @bound
252
+
253
+ raise "you need to set a start rule" unless @start_rule
254
+ n = @start_rule.dup
255
+
256
+ # sort into tree
257
+ n.parent = @node
258
+ @node.left = n
259
+ # start parsing
260
+ create_match iter, n.parse(iter)
261
+ end
262
+ end
263
+
264
+ ##
265
+ # The SymParser shortcut is +sym+.
266
+ ShortcutsMixin.register_shortcut :sym => SymParser
267
+
268
+ end
269
+
@@ -0,0 +1,276 @@
1
+ # This is Spectre, a parser framework inspired by Boost.Spirit,
2
+ # which can be found at http://spirit.sourceforge.net/.
3
+ #
4
+ # If you want to find out more or need a tutorial, go to
5
+ # http://spectre.rubyforge.org/
6
+ # You'll find a nice wiki there!
7
+ #
8
+ # Author:: Fabian Streitel (karottenreibe)
9
+ # Copyright:: Copyright (c) 2009 Fabian Streitel
10
+ # License:: Boost Software License 1.0
11
+ # For further information regarding this license, you can go to
12
+ # http://www.boost.org/LICENSE_1_0.txt
13
+ # or read the file LICENSE distributed with this software.
14
+ # Homepage:: http://spectre.rubyforge.org/
15
+ # Git repo:: http://rubyforge.org/scm/?group_id=7618
16
+ #
17
+ # Keeps the InputIterator class.
18
+ #
19
+
20
+ module Spectre
21
+
22
+ ##
23
+ # Used to access the input stream.
24
+ # The standard implementation works with Integers on Array or String-like structures and
25
+ # is a forward iterator. Jumping to a position behind the current one is only possible via
26
+ # a call to +#to+.
27
+ # To actually be able to use the InputIterator, you have to subclass it and implement the
28
+ # +#concat+ and +#empty+ methods.
29
+ # When implementing iterators for non-array-like data, you will also have to reimplement
30
+ # +#get+ and +#valid?+.
31
+ # When implementing iterators which do not rely on 0 based Integers, you will also have to
32
+ # reimplement +#\++, +#\+@+, +#-+, +#skip!+ and +#to+.
33
+ #
34
+ # = The Input =
35
+ #
36
+ # The input the InputIterator will traverse must have some properties, regardless of the Parsers
37
+ # used on it. It must be
38
+ # - comparable, i.e. it has to supply the standard comparison operators <, >, ==, != etc.
39
+ # - non-atomic, i.e. you must be able to split the input into pieces.
40
+ #
41
+ class InputIterator
42
+
43
+ ##
44
+ # The input this iterator works on.
45
+ # The default is an array-like structure.
46
+ attr_accessor :input
47
+
48
+ ##
49
+ # A skipper object that is called for every retrieved token from the input.
50
+ # It is required to return either +nil+, which causes the token to be processed
51
+ # normally, or an Integer > 1, which describes how many tokens should be skipped
52
+ # from and including the current one.
53
+ #
54
+ # The transformation is invoked via it's +#call+ method, thus effectively enabling the
55
+ # use of lambda blocks as transformations.
56
+ # The object will be passed the token to process and the InputIterator as parameters.
57
+ #
58
+ # NOTE:
59
+ # - The skipper should be set to +:default+ if the default is to be used
60
+ # - The skipper should be +nil+ if it is required to let all tokens pass as valid
61
+ # - Parsers may choose to ignore the skipper, e.g. the StringParser, which parses literal
62
+ # Strings, must ignore any (white space) skippers in order to function correctly
63
+ # - The skipper must _not_ modify the InputIterator
64
+ attr_accessor :skipper
65
+
66
+ ##
67
+ # A transformation object that is called for every retrieved token from the input, unless the
68
+ # skipper instructed the iterator not to process the token.
69
+ # It may modify that token and is expected to return that modified token. The Parses will then be
70
+ # supplied with that token instead of the original one.
71
+ #
72
+ # The transformation is invoked via it's +#call+ method, thus effectively enabling the
73
+ # use of lambda blocks as transformations.
74
+ # The object will be passed the token to process and the InputIterator as parameters.
75
+ #
76
+ # NOTE:
77
+ # - The transformation should be set to +:default+ if the default is to be used
78
+ # - The transformation should be +nil+ if it is required to leave all tokens unmodified
79
+ # - Parsers should never ignore the transformation
80
+ # - The transformation must _not_ modify the InputIterator
81
+ attr_accessor :transformation
82
+
83
+ ##
84
+ # The position this iterator is currently at.
85
+ # The default is an Integer.
86
+ attr_accessor :pos
87
+
88
+ ##
89
+ # Initializes the iterator to a +pos+ition on an +input+.
90
+ #
91
+ def initialize input, pos = 0
92
+ @pos, @input, @transformation, @skipper = pos, input, :default, :default
93
+ end
94
+
95
+ ##
96
+ # Copies the position and input reference from the +other+ iterator to initialize
97
+ # this one.
98
+ #
99
+ def initialize_copy other
100
+ @pos, @input = other.pos, other.input
101
+ end
102
+
103
+ ##
104
+ # Returns the token at the current position and advances by one token.
105
+ #
106
+ def +@
107
+ token,len = self.internal_get(@pos..@pos)
108
+ @pos += len
109
+ token
110
+ end
111
+
112
+ ##
113
+ # Returns the next n tokens from (and including) the current position and advances
114
+ # by n tokens.
115
+ #
116
+ def + n
117
+ tokens,len = self.internal_get(@pos..@pos+n-1)
118
+ @pos += len
119
+ tokens
120
+ end
121
+
122
+ ##
123
+ # Sets the iterator to point to address +n+. Will _not_ return the token at that position but
124
+ # the modified iterator instead..
125
+ #
126
+ def to n
127
+ @pos = n
128
+ self
129
+ end
130
+
131
+ ##
132
+ # Calculates the distance between the positions this iterator and the other +iter+ point to.
133
+ # If they point to the same location, the distance will be 0, if this iterator points behind
134
+ # +iter+, the distance will be positive, else negative.
135
+ #
136
+ # NOTE: This method must be used to correctly calculate Match length in Parsers.
137
+ #
138
+ def - iter
139
+ @pos - iter.pos
140
+ end
141
+
142
+ ##
143
+ # Whether or not this iterator points to a valid location in the input.
144
+ # This must take the skip parser into consideration.
145
+ #
146
+ def valid?
147
+ return false if @pos >= @input.length
148
+
149
+ pos = @pos
150
+ while skip = skipper.call(@input[pos..pos], self)
151
+ pos += skip
152
+ end
153
+
154
+ return false if pos >= @input.length
155
+
156
+ true
157
+ end
158
+
159
+ ##
160
+ # If there are any skippable tokens from (and including) the current position, a call to this
161
+ # method will cause the InputIterator to advance over them to the next non-skippable token.
162
+ #
163
+ def skip!
164
+ copy = self.dup
165
+ while dist = skipper.call(@input[@pos..@pos], copy)
166
+ @pos += dist
167
+ end
168
+ end
169
+
170
+ ##
171
+ # Returns the default skipper for this InputIterator class.
172
+ # The default implementation simply returns +nil+.
173
+ #
174
+ def default_skipper
175
+ nil
176
+ end
177
+
178
+ ##
179
+ # Returns the default transformation for this InputIterator class.
180
+ # The default implementation simply returns +nil+.
181
+ #
182
+ def default_transformation
183
+ nil
184
+ end
185
+
186
+ ##
187
+ # Returns the transformation or an empty transformation or the +#default_transformation+ if
188
+ # the transformation is set to +nil+.
189
+ #
190
+ def transformation
191
+ @transformation == :default ?
192
+ default_transformation || lambda { |token,iter| token } :
193
+ @transformation || lambda { |token,iter| token }
194
+ end
195
+
196
+ ##
197
+ # Returns the skipper or an empty skipper or the +#default_skipper+ if the skipper is set
198
+ # to +:default+.
199
+ #
200
+ def skipper
201
+ @skipper == :default ?
202
+ default_skipper || lambda { nil } :
203
+ @skipper || lambda { nil }
204
+ end
205
+
206
+ ##
207
+ # Returns all of the input that has not yet been parsed, while ignoring the skipper.
208
+ #
209
+ def rest
210
+ @input[@pos..-1]
211
+ end
212
+
213
+ ##
214
+ # Retrieves the tokens from within the specified +range+, with the transformation applied.
215
+ #
216
+ def get *args
217
+ self.internal_get(*args)[0]
218
+ end
219
+
220
+ ##
221
+ # Executes the given +block+ with the skipper being set to +nil+.
222
+ #
223
+ def ignore_skipper &block
224
+ return unless block_given?
225
+ bak, @skipper = @skipper, nil
226
+ yield self
227
+ @skipper = bak
228
+ end
229
+
230
+ ##
231
+ # Concatenates the two values and returns the result. The values will be of the
232
+ # same type as the input. Must be able to handle nil as a value as well.
233
+ # Must be implemented by a subclass.
234
+ #
235
+ def concat val1, val2
236
+ nil
237
+ end
238
+
239
+ ##
240
+ # Returns an empty object of the input type, e.g. an empty String or an empty Array.
241
+ # Must be implemented by a subclass.
242
+ #
243
+ def empty
244
+ nil
245
+ end
246
+
247
+ protected
248
+
249
+ ##
250
+ # Retrieves the tokens from within the specified +range+, with the transformation applied.
251
+ # Returns +[tokens,len]+, where +len+ is the length of the retrieved input, including skipped
252
+ # tokens.
253
+ # NOTE: This method is inteded for iterator internal use only. Use +#get+ instead.
254
+ #
255
+ def internal_get range = (@pos..@pos)
256
+ buf = self.empty
257
+ pos = range.first
258
+
259
+ while buf.length < range.count and pos < @input.length
260
+ skip = skipper.call @input[pos..pos], self
261
+
262
+ if skip
263
+ pos += skip
264
+ else
265
+ buf = self.concat buf, transformation.call(@input[pos..pos], self)
266
+ pos += 1
267
+ end
268
+ end
269
+
270
+ [buf, pos - range.first]
271
+ end
272
+
273
+ end
274
+
275
+ end
276
+