Spectre 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/CHANGELOG +1 -0
  2. data/LICENSE +23 -0
  3. data/README +20 -0
  4. data/Rakefile +112 -0
  5. data/lib/spectre/base.rb +44 -0
  6. data/lib/spectre/base/closure.rb +96 -0
  7. data/lib/spectre/base/directive.rb +148 -0
  8. data/lib/spectre/base/grammar.rb +269 -0
  9. data/lib/spectre/base/inputiterator.rb +276 -0
  10. data/lib/spectre/base/node.rb +393 -0
  11. data/lib/spectre/base/operators.rb +342 -0
  12. data/lib/spectre/base/parser.rb +110 -0
  13. data/lib/spectre/generic.rb +115 -0
  14. data/lib/spectre/generic/directives.rb +246 -0
  15. data/lib/spectre/generic/negations.rb +68 -0
  16. data/lib/spectre/generic/primitives.rb +172 -0
  17. data/lib/spectre/generic/semanticaction.rb +43 -0
  18. data/lib/spectre/string.rb +57 -0
  19. data/lib/spectre/string/additionals.rb +80 -0
  20. data/lib/spectre/string/directives.rb +51 -0
  21. data/lib/spectre/string/inputiterator.rb +57 -0
  22. data/lib/spectre/string/primitives.rb +400 -0
  23. data/test/base/closure_tests.rb +108 -0
  24. data/test/base/grammar_tests.rb +97 -0
  25. data/test/base/operator_tests.rb +335 -0
  26. data/test/base/semanticaction_tests.rb +53 -0
  27. data/test/generic/directive_tests.rb +224 -0
  28. data/test/generic/negation_tests.rb +146 -0
  29. data/test/generic/primitive_tests.rb +99 -0
  30. data/test/string/POD2Parser_tests.rb +93 -0
  31. data/test/string/additional_tests.rb +43 -0
  32. data/test/string/directive_tests.rb +32 -0
  33. data/test/string/primitive_tests.rb +173 -0
  34. data/test/tests.rb +33 -0
  35. data/test/tutorial/funnymath_tests.rb +57 -0
  36. data/test/tutorial/html_tests.rb +171 -0
  37. data/test/tutorial/skipping_tests.rb +60 -0
  38. metadata +109 -0
@@ -0,0 +1,269 @@
1
+ # This is Spectre, a parser framework inspired by Boost.Spirit,
2
+ # which can be found at http://spirit.sourceforge.net/.
3
+ #
4
+ # If you want to find out more or need a tutorial, go to
5
+ # http://spectre.rubyforge.org/
6
+ # You'll find a nice wiki there!
7
+ #
8
+ # Author:: Fabian Streitel (karottenreibe)
9
+ # Copyright:: Copyright (c) 2009 Fabian Streitel
10
+ # License:: Boost Software License 1.0
11
+ # For further information regarding this license, you can go to
12
+ # http://www.boost.org/LICENSE_1_0.txt
13
+ # or read the file LICENSE distributed with this software.
14
+ # Homepage:: http://spectre.rubyforge.org/
15
+ # Git repo:: http://rubyforge.org/scm/?group_id=7618
16
+ #
17
+ # Keeps the Grammar class that can hold recursive Parsers.
18
+ #
19
+
20
+ require 'rubygems'
21
+ require 'metaid'
22
+ require 'spectre/base/parser'
23
+ require 'spectre/base/node'
24
+
25
+ module Spectre
26
+
27
+ ##
28
+ # Provides lazy evaluation of the Parser name, so you can use it recursively and before
29
+ # specifying it.
30
+ #
31
+ class SymParser
32
+ include Parser
33
+
34
+ ##
35
+ # Initializes the SymParser to look for +sym+.
36
+ #
37
+ def initialize sym
38
+ @sym = sym
39
+ end
40
+
41
+ def scan is
42
+ n = @node.find @sym
43
+ raise "rule #{@sym.inspect} not found." unless n
44
+ n.parent = @node
45
+ n.parse is
46
+ end
47
+
48
+ def inspect
49
+ "[<#{@sym.inspect}>]"
50
+ end
51
+ end
52
+
53
+ ##
54
+ # Provides the functionality of dynamically defining rules inside Grammars.
55
+ # Must be mixed into a Parser.
56
+ #
57
+ module DynVarMixin
58
+
59
+ ##
60
+ # Closes over the specified +node+ with a newly constructed Closure.
61
+ #
62
+ def close node
63
+ node = node.to_p
64
+ node.closure = Closure.new
65
+ node
66
+ end
67
+
68
+ ##
69
+ # Stores the Parsers referenced by +symbol+ => +parser+ from the +hash+ so they can be used
70
+ # later on.
71
+ #
72
+ def rule hash
73
+ hash.each do |getter,node|
74
+ @node.symbols[getter] = node.to_p
75
+ end
76
+ end
77
+
78
+ ##
79
+ # Sets the +parser+ with which the Grammar should start parsing.
80
+ # Does auto-conversion on +parser+.
81
+ #
82
+ def start_with parser
83
+ @start_rule = parser.to_p
84
+ end
85
+ end
86
+
87
+ ##
88
+ # If mixed into a class, it defines shortuct methods for all registered Parsers.
89
+ # Used by the Spectre standard parsers, e.g. +char('k')+ will be a shortcut for
90
+ # +CharParser.new('k')+.
91
+ # See std/std.rb for more details.
92
+ #
93
+ module ShortcutsMixin
94
+ class << self
95
+ ##
96
+ # For each +name+ => +klass+ in +hsh+: Register the Parser +klass+ with the +name+ inside
97
+ # the Grammar class, so that a new Parser of that +klass+ can be chained into a Grammar
98
+ # simply by calling
99
+ # +name _arguments_+
100
+ # inside the Grammar class definition.
101
+ # +name+ must be a Symbol.
102
+ #
103
+ def register_shortcut hsh
104
+ hsh.each do |meth,klass|
105
+ raise "class Grammar already has a singleton method named '#{meth.to_s}'" if
106
+ Grammar.singleton_methods.include? meth.to_s
107
+
108
+ block = lambda do |*args|
109
+ parser = klass.new *args
110
+ parser.to_p
111
+ end
112
+
113
+ # define it for the grammars
114
+ self.class_def meth, &block
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ ##
121
+ # Provides an +inspect+ method for Grammar-like classes.
122
+ #
123
+ module GrammarInspectMixin
124
+ def inspect
125
+ "[Grammar:" + ( @bound ?
126
+ "#{@node.symbols.inject(''){ |memo,(s,n)| memo + "{#{s.inspect} => #{n.inspect}} " }[0..-2]}" :
127
+ "unbound" ) +
128
+ "]"
129
+ end
130
+ end
131
+
132
+ ##
133
+ # Chains several Parsers together to form a reusable unit and allows for recursion in
134
+ # Parser definition.
135
+ #
136
+ # To define a Grammar, you may use the generator methods to create Parsers and chain
137
+ # them together. You will then have to store the created top-level Parser in the Grammar
138
+ # by calling +start_with(parser)+.
139
+ #
140
+ # To do so, you first have to call +Grammar.new+, passing it a block that describes the
141
+ # Grammar's behaviour.
142
+ # The returned Grammar object can be supplied with arguments on runtime
143
+ # in order to customize it's behaviour, e.g:
144
+ #
145
+ # mayor = Grammar.new do |city, klass|
146
+ # start_with 'Mayor ' >> ( ~blank ).+ >> ", class #{klass}" >>
147
+ # ' from ' >> city.to_p
148
+ # end
149
+ #
150
+ #
151
+ # The thus created Grammar has to be bound to some arguments before
152
+ # it can actually be used to parse anything:
153
+ #
154
+ # mayor.bind( AnycharParser.new.+, 'A' )
155
+ #
156
+ # It will now parse any Mayor from any city of class 'A'.
157
+ # You can of course rebind the Grammar anytime (except during parsing):
158
+ # + mayor.bind( 'Boston', AnycharParser.new )+
159
+ # Now it will parse any Mayor of any class from 'Boston'.
160
+ #
161
+ # The only exception to the binding rule is a dynamic Grammar that takes
162
+ # no arguments. Such a Grammar will be bound right at instantiation time.
163
+ # Rebinding will have no effect whatsoever.
164
+ # NOTE: Due to an existing Ruby bug, you have to define such a Grammar with an empty
165
+ # argument block:
166
+ #
167
+ #
168
+ # chunky = Grammar.new do ||
169
+ # rule :bacon => ...
170
+ # end
171
+ #
172
+ #
173
+ # Otherwise it will not be automatically bound. This will change, as soon as bug #574
174
+ # is fixed (http://redmine.ruby-lang.org/issues/show/574).
175
+ #
176
+ # You may at any time store a parser inside a _rule_, like this:
177
+ #
178
+ #
179
+ # towns_folk = Grammar.new do ||
180
+ # start_with :person % ( 'from '.to_p >> :town >> ', ' )
181
+ # rule :person => :name >> blank.+ >> :name,
182
+ # :town => :name
183
+ # rule :name => ( ~blank ).+
184
+ # end
185
+ #
186
+ #
187
+ # As you can also see here, the rules are evaluated lazyly, thus enabling
188
+ # you to use parsers recursively and before they have actually been defined.
189
+ #
190
+ # NOTE: As tempting as it may be, do NOT use instance variables to store Parsers,
191
+ # because if you use the Parser more than once, the backtrace of the first
192
+ # invocation of that Parser will be lost as soon as it is invoked a second time.
193
+ # Also the use of Closures will be broken.
194
+ # Storing the parsers as is described above will circumvent this problem by dupping
195
+ # the Parser each time it is invoked.
196
+ #
197
+ # If you'd like to provide Grammar-like functionality in your own class(es), you can
198
+ # receive some from the mixins DynVarMixin, ShortcutsMixin and GrammarInspectMixin.
199
+ #
200
+ class Grammar < Node
201
+
202
+ # if we don't preserve the backtrack method, we'll run into an endless loop
203
+ alias_method :node_backtrack, :backtrack
204
+ include Parser
205
+ alias_method :backtrack, :node_backtrack
206
+
207
+ include DynVarMixin
208
+ include ShortcutsMixin
209
+ include GrammarInspectMixin
210
+
211
+ ##
212
+ # Defines a new Grammar.
213
+ # The passed +block+ will be executed once the returned Grammar's
214
+ # +bind+ method is called. All of +bind+'s parameters will be
215
+ # passed to the block.
216
+ #
217
+ # If the +block+ takes no arguments, it will be bound at instantiation time.
218
+ # NOTE: Ruby bug 574 (http://redmine.ruby-lang.org/issues/show/574)
219
+ #
220
+ # See Grammar for an example.
221
+ #
222
+ def initialize &block
223
+ @dynamic = block
224
+ @bound = false
225
+
226
+ # nice little trick: we are node and parser in one
227
+ # but for upwards compatibility, we will act as if it weren't so
228
+ # from here on
229
+ super(self)
230
+
231
+ self.bind if block.arity == 0
232
+ end
233
+
234
+ def to_p; self; end
235
+
236
+ ##
237
+ # Binds a Grammar to a set of values supplied in the +args+.
238
+ # Executes the block given to +#initialize+.
239
+ #
240
+ def bind *args
241
+ self.instance_exec *args, &@dynamic
242
+ @bound = true
243
+ self
244
+ end
245
+
246
+ ##
247
+ # Parses the InputIterator +iter+ with the Parsers defined in the Grammar.
248
+ # The Grammar must have been bound before doing so.
249
+ #
250
+ def scan iter
251
+ raise "a dynamic Grammar must be bound to a value" unless @bound
252
+
253
+ raise "you need to set a start rule" unless @start_rule
254
+ n = @start_rule.dup
255
+
256
+ # sort into tree
257
+ n.parent = @node
258
+ @node.left = n
259
+ # start parsing
260
+ create_match iter, n.parse(iter)
261
+ end
262
+ end
263
+
264
+ ##
265
+ # The SymParser shortcut is +sym+.
266
+ ShortcutsMixin.register_shortcut :sym => SymParser
267
+
268
+ end
269
+
@@ -0,0 +1,276 @@
1
+ # This is Spectre, a parser framework inspired by Boost.Spirit,
2
+ # which can be found at http://spirit.sourceforge.net/.
3
+ #
4
+ # If you want to find out more or need a tutorial, go to
5
+ # http://spectre.rubyforge.org/
6
+ # You'll find a nice wiki there!
7
+ #
8
+ # Author:: Fabian Streitel (karottenreibe)
9
+ # Copyright:: Copyright (c) 2009 Fabian Streitel
10
+ # License:: Boost Software License 1.0
11
+ # For further information regarding this license, you can go to
12
+ # http://www.boost.org/LICENSE_1_0.txt
13
+ # or read the file LICENSE distributed with this software.
14
+ # Homepage:: http://spectre.rubyforge.org/
15
+ # Git repo:: http://rubyforge.org/scm/?group_id=7618
16
+ #
17
+ # Keeps the InputIterator class.
18
+ #
19
+
20
+ module Spectre
21
+
22
+ ##
23
+ # Used to access the input stream.
24
+ # The standard implementation works with Integers on Array or String-like structures and
25
+ # is a forward iterator. Jumping to a position behind the current one is only possible via
26
+ # a call to +#to+.
27
+ # To actually be able to use the InputIterator, you have to subclass it and implement the
28
+ # +#concat+ and +#empty+ methods.
29
+ # When implementing iterators for non-array-like data, you will also have to reimplement
30
+ # +#get+ and +#valid?+.
31
+ # When implementing iterators which do not rely on 0 based Integers, you will also have to
32
+ # reimplement +#\++, +#\+@+, +#-+, +#skip!+ and +#to+.
33
+ #
34
+ # = The Input =
35
+ #
36
+ # The input the InputIterator will traverse must have some properties, regardless of the Parsers
37
+ # used on it. It must be
38
+ # - comparable, i.e. it has to supply the standard comparison operators <, >, ==, != etc.
39
+ # - non-atomic, i.e. you must be able to split the input into pieces.
40
+ #
41
+ class InputIterator
42
+
43
+ ##
44
+ # The input this iterator works on.
45
+ # The default is an array-like structure.
46
+ attr_accessor :input
47
+
48
+ ##
49
+ # A skipper object that is called for every retrieved token from the input.
50
+ # It is required to return either +nil+, which causes the token to be processed
51
+ # normally, or an Integer > 1, which describes how many tokens should be skipped
52
+ # from and including the current one.
53
+ #
54
+ # The transformation is invoked via it's +#call+ method, thus effectively enabling the
55
+ # use of lambda blocks as transformations.
56
+ # The object will be passed the token to process and the InputIterator as parameters.
57
+ #
58
+ # NOTE:
59
+ # - The skipper should be set to +:default+ if the default is to be used
60
+ # - The skipper should be +nil+ if it is required to let all tokens pass as valid
61
+ # - Parsers may choose to ignore the skipper, e.g. the StringParser, which parses literal
62
+ # Strings, must ignore any (white space) skippers in order to function correctly
63
+ # - The skipper must _not_ modify the InputIterator
64
+ attr_accessor :skipper
65
+
66
+ ##
67
+ # A transformation object that is called for every retrieved token from the input, unless the
68
+ # skipper instructed the iterator not to process the token.
69
+ # It may modify that token and is expected to return that modified token. The Parses will then be
70
+ # supplied with that token instead of the original one.
71
+ #
72
+ # The transformation is invoked via it's +#call+ method, thus effectively enabling the
73
+ # use of lambda blocks as transformations.
74
+ # The object will be passed the token to process and the InputIterator as parameters.
75
+ #
76
+ # NOTE:
77
+ # - The transformation should be set to +:default+ if the default is to be used
78
+ # - The transformation should be +nil+ if it is required to leave all tokens unmodified
79
+ # - Parsers should never ignore the transformation
80
+ # - The transformation must _not_ modify the InputIterator
81
+ attr_accessor :transformation
82
+
83
+ ##
84
+ # The position this iterator is currently at.
85
+ # The default is an Integer.
86
+ attr_accessor :pos
87
+
88
+ ##
89
+ # Initializes the iterator to a +pos+ition on an +input+.
90
+ #
91
+ def initialize input, pos = 0
92
+ @pos, @input, @transformation, @skipper = pos, input, :default, :default
93
+ end
94
+
95
+ ##
96
+ # Copies the position and input reference from the +other+ iterator to initialize
97
+ # this one.
98
+ #
99
+ def initialize_copy other
100
+ @pos, @input = other.pos, other.input
101
+ end
102
+
103
+ ##
104
+ # Returns the token at the current position and advances by one token.
105
+ #
106
+ def +@
107
+ token,len = self.internal_get(@pos..@pos)
108
+ @pos += len
109
+ token
110
+ end
111
+
112
+ ##
113
+ # Returns the next n tokens from (and including) the current position and advances
114
+ # by n tokens.
115
+ #
116
+ def + n
117
+ tokens,len = self.internal_get(@pos..@pos+n-1)
118
+ @pos += len
119
+ tokens
120
+ end
121
+
122
+ ##
123
+ # Sets the iterator to point to address +n+. Will _not_ return the token at that position but
124
+ # the modified iterator instead..
125
+ #
126
+ def to n
127
+ @pos = n
128
+ self
129
+ end
130
+
131
+ ##
132
+ # Calculates the distance between the positions this iterator and the other +iter+ point to.
133
+ # If they point to the same location, the distance will be 0, if this iterator points behind
134
+ # +iter+, the distance will be positive, else negative.
135
+ #
136
+ # NOTE: This method must be used to correctly calculate Match length in Parsers.
137
+ #
138
+ def - iter
139
+ @pos - iter.pos
140
+ end
141
+
142
+ ##
143
+ # Whether or not this iterator points to a valid location in the input.
144
+ # This must take the skip parser into consideration.
145
+ #
146
+ def valid?
147
+ return false if @pos >= @input.length
148
+
149
+ pos = @pos
150
+ while skip = skipper.call(@input[pos..pos], self)
151
+ pos += skip
152
+ end
153
+
154
+ return false if pos >= @input.length
155
+
156
+ true
157
+ end
158
+
159
+ ##
160
+ # If there are any skippable tokens from (and including) the current position, a call to this
161
+ # method will cause the InputIterator to advance over them to the next non-skippable token.
162
+ #
163
+ def skip!
164
+ copy = self.dup
165
+ while dist = skipper.call(@input[@pos..@pos], copy)
166
+ @pos += dist
167
+ end
168
+ end
169
+
170
+ ##
171
+ # Returns the default skipper for this InputIterator class.
172
+ # The default implementation simply returns +nil+.
173
+ #
174
+ def default_skipper
175
+ nil
176
+ end
177
+
178
+ ##
179
+ # Returns the default transformation for this InputIterator class.
180
+ # The default implementation simply returns +nil+.
181
+ #
182
+ def default_transformation
183
+ nil
184
+ end
185
+
186
+ ##
187
+ # Returns the transformation or an empty transformation or the +#default_transformation+ if
188
+ # the transformation is set to +nil+.
189
+ #
190
+ def transformation
191
+ @transformation == :default ?
192
+ default_transformation || lambda { |token,iter| token } :
193
+ @transformation || lambda { |token,iter| token }
194
+ end
195
+
196
+ ##
197
+ # Returns the skipper or an empty skipper or the +#default_skipper+ if the skipper is set
198
+ # to +:default+.
199
+ #
200
+ def skipper
201
+ @skipper == :default ?
202
+ default_skipper || lambda { nil } :
203
+ @skipper || lambda { nil }
204
+ end
205
+
206
+ ##
207
+ # Returns all of the input that has not yet been parsed, while ignoring the skipper.
208
+ #
209
+ def rest
210
+ @input[@pos..-1]
211
+ end
212
+
213
+ ##
214
+ # Retrieves the tokens from within the specified +range+, with the transformation applied.
215
+ #
216
+ def get *args
217
+ self.internal_get(*args)[0]
218
+ end
219
+
220
+ ##
221
+ # Executes the given +block+ with the skipper being set to +nil+.
222
+ #
223
+ def ignore_skipper &block
224
+ return unless block_given?
225
+ bak, @skipper = @skipper, nil
226
+ yield self
227
+ @skipper = bak
228
+ end
229
+
230
+ ##
231
+ # Concatenates the two values and returns the result. The values will be of the
232
+ # same type as the input. Must be able to handle nil as a value as well.
233
+ # Must be implemented by a subclass.
234
+ #
235
+ def concat val1, val2
236
+ nil
237
+ end
238
+
239
+ ##
240
+ # Returns an empty object of the input type, e.g. an empty String or an empty Array.
241
+ # Must be implemented by a subclass.
242
+ #
243
+ def empty
244
+ nil
245
+ end
246
+
247
+ protected
248
+
249
+ ##
250
+ # Retrieves the tokens from within the specified +range+, with the transformation applied.
251
+ # Returns +[tokens,len]+, where +len+ is the length of the retrieved input, including skipped
252
+ # tokens.
253
+ # NOTE: This method is inteded for iterator internal use only. Use +#get+ instead.
254
+ #
255
+ def internal_get range = (@pos..@pos)
256
+ buf = self.empty
257
+ pos = range.first
258
+
259
+ while buf.length < range.count and pos < @input.length
260
+ skip = skipper.call @input[pos..pos], self
261
+
262
+ if skip
263
+ pos += skip
264
+ else
265
+ buf = self.concat buf, transformation.call(@input[pos..pos], self)
266
+ pos += 1
267
+ end
268
+ end
269
+
270
+ [buf, pos - range.first]
271
+ end
272
+
273
+ end
274
+
275
+ end
276
+