digestr 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CONTRIBUTORS +3 -0
- data/LICENSE +20 -0
- data/README +136 -0
- data/Rakefile +420 -0
- data/TODO +10 -0
- data/install.rb +35 -0
- data/lib/xml/digester.rb +10 -0
- data/lib/xml/digestr.rb +810 -0
- data/tests/runner.rb +5 -0
- metadata +66 -0
data/TODO
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
= DigestR project -- Todo list
|
2
|
+
|
3
|
+
Most of the following ideas will probably be implemented at some point.
|
4
|
+
Send any suggestions or patches (preferably as/with tests) to:
|
5
|
+
|
6
|
+
rosco <at> roscopeco <dot> co <dot> uk
|
7
|
+
|
8
|
+
* Namespace support
|
9
|
+
* Many standard rule types remain to be implemented
|
10
|
+
|
data/install.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
require 'find'
|
3
|
+
require 'ftools'
|
4
|
+
|
5
|
+
include Config
|
6
|
+
|
7
|
+
$ruby = CONFIG['ruby_install_name']
|
8
|
+
$sitedir = CONFIG["sitelibdir"]
|
9
|
+
unless $sitedir
|
10
|
+
version = CONFIG["MAJOR"]+"."+CONFIG["MINOR"]
|
11
|
+
$libdir = File.join(CONFIG["libdir"], "ruby", version)
|
12
|
+
$sitedir = $:.find {|x| x =~ /site_ruby/}
|
13
|
+
if !$sitedir
|
14
|
+
$sitedir = File.join($libdir, "site_ruby")
|
15
|
+
elsif $sitedir !~ Regexp.quote(version)
|
16
|
+
$sitedir = File.join($sitedir, version)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
if (destdir = ENV['DESTDIR'])
|
21
|
+
$sitedir = destdir + $sitedir
|
22
|
+
File::makedirs($sitedir)
|
23
|
+
end
|
24
|
+
|
25
|
+
# The library files
|
26
|
+
|
27
|
+
files = Dir.chdir('lib') { Dir['**/*.rb'] }
|
28
|
+
for fn in files
|
29
|
+
fn_dir = File.dirname(fn)
|
30
|
+
target_dir = File.join($sitedir, fn_dir)
|
31
|
+
if ! File.exist?(target_dir)
|
32
|
+
File.makedirs(target_dir)
|
33
|
+
end
|
34
|
+
File::install(File.join('lib', fn), File.join($sitedir, fn), 0644, true)
|
35
|
+
end
|
data/lib/xml/digester.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# === DigestR - Libxml2-based XML Digester similar to Jakarta Commons Digester.
|
2
|
+
#
|
3
|
+
# Copyright (c)2006 Ross Bamford (and contributors). All rights reserved.
|
4
|
+
# See LICENSE for copyright and license information.
|
5
|
+
#
|
6
|
+
# Main documentation in XML::Digester
|
7
|
+
#
|
8
|
+
# $Id: digester.rb 4 2006-04-17 19:05:32Z roscopeco $
|
9
|
+
|
10
|
+
require File.join(File.dirname(__FILE__),'digestr.rb')
|
data/lib/xml/digestr.rb
ADDED
@@ -0,0 +1,810 @@
|
|
1
|
+
# === DigestR - Libxml2-based XML Digester similar to Jakarta Commons Digester.
|
2
|
+
#
|
3
|
+
# Copyright (c)2006 Ross Bamford (and contributors). All rights reserved.
|
4
|
+
# See LICENSE for copyright and license information.
|
5
|
+
#
|
6
|
+
# Main documentation in XML::Digester
|
7
|
+
#
|
8
|
+
# $Id: digestr.rb 6 2006-04-18 01:15:49Z roscopeco $
|
9
|
+
|
10
|
+
require 'xml/libxml'
|
11
|
+
|
12
|
+
module XML #:nodoc:
|
13
|
+
|
14
|
+
# Processes XML input according to a series of rules applied prior
|
15
|
+
# to the parse beginning. A Digester instance wraps an instance
|
16
|
+
# of XML::SaxParser and uses it's own event callbacks to trigger
|
17
|
+
# the configured rules as appropriate.
|
18
|
+
#
|
19
|
+
# This API is based on the Jakarta Commons Digester library
|
20
|
+
# (http://jakarta.apache.org/commons/digester), and is intended
|
21
|
+
# to provide similar semantics to that package in a pleasingly
|
22
|
+
# Rubyish manner.
|
23
|
+
#
|
24
|
+
# ==== Notes
|
25
|
+
#
|
26
|
+
# * It's not yet as fast as I'd like.
|
27
|
+
# * There is currently no namespace support.
|
28
|
+
class Digester
|
29
|
+
VERSION = "0.0.1"
|
30
|
+
|
31
|
+
# Raised to signal errors in rule processing when +pedantic+ is
|
32
|
+
# true.
|
33
|
+
class Error < RuntimeError; end
|
34
|
+
|
35
|
+
# Base-class for rule implementations. This class implements the
|
36
|
+
# standard (currently fnmatch-based) pattern matching logic and
|
37
|
+
# can be used as a base-class for custom rule implementations.
|
38
|
+
class RulesBase
|
39
|
+
attr_reader :pattern
|
40
|
+
attr_accessor :digester
|
41
|
+
|
42
|
+
# Be sure to call through to here when subclassing RulesBase.
|
43
|
+
def initialize(pattern)
|
44
|
+
@pattern = pattern or raise ArgumentError, "No pattern given"
|
45
|
+
@pattern = '/' + @pattern unless @pattern[0] == ?/
|
46
|
+
end
|
47
|
+
|
48
|
+
# Called at start of matching element.
|
49
|
+
def begin(namespace, name, attrs); end
|
50
|
+
|
51
|
+
# Called when body text is encountered.
|
52
|
+
def body(txt); end
|
53
|
+
|
54
|
+
# Called at end of matching element (in reverse order
|
55
|
+
# of rule registration).
|
56
|
+
def end(namespace, name); end
|
57
|
+
|
58
|
+
# Called at the end of the parse (again, in reverse order).
|
59
|
+
def finish; end
|
60
|
+
|
61
|
+
attr_accessor :next, :prev #:nodoc:
|
62
|
+
end
|
63
|
+
|
64
|
+
# Rule that executes the given block when matched. The
|
65
|
+
# supplied block should have a variable argument count,
|
66
|
+
# since it is used with different arguments by the
|
67
|
+
# different events, as follows:
|
68
|
+
#
|
69
|
+
# begin : { |:begin, digester, namespace, name, attrs| ... }
|
70
|
+
# body : { |:body, digester, txt| ... }
|
71
|
+
# end : { |:end, digester, namespace, name| ... }
|
72
|
+
# finish: { |:finish| ... }
|
73
|
+
class BlockRule < RulesBase
|
74
|
+
# call-seq:
|
75
|
+
# BlockRule.new(pattern) { |*args| ... }
|
76
|
+
#
|
77
|
+
# Create a new BlockRule with the supplied block.
|
78
|
+
def initialize(pattern, &blk)
|
79
|
+
super(pattern)
|
80
|
+
@blk = blk or raise ArgumentError, "No block given"
|
81
|
+
end
|
82
|
+
|
83
|
+
def begin(namespace, name, attrs) #:nodoc:
|
84
|
+
@blk.call(:begin, @digester, namespace, name, attrs)
|
85
|
+
end
|
86
|
+
|
87
|
+
def body(txt) #:nodoc:
|
88
|
+
@blk.call(:body, @digester, txt)
|
89
|
+
end
|
90
|
+
|
91
|
+
def end(namespace, name) #:nodoc:
|
92
|
+
@blk.call(:end, @digester, namespace, name)
|
93
|
+
end
|
94
|
+
|
95
|
+
def finish #:nodoc:
|
96
|
+
@blk.call(:finish)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Rule that creates an instance of the specified class (or alternatively
|
101
|
+
# obtains the result of a call to specified method on the specified
|
102
|
+
# object, possibly with custom arguments) and pushes it onto the stack
|
103
|
+
# in the *begin* event, and pops it off again in the *end* event.
|
104
|
+
# This combines the functionality of both ObjectCreateRule and
|
105
|
+
# FactoryCreateRule from commons digester.
|
106
|
+
#
|
107
|
+
# It is also possible to supply a block, in which case it will be
|
108
|
+
# executed when the rule is triggered (in *begin*) and the result
|
109
|
+
# from that pushed. In this case, any class/message will be ignored.
|
110
|
+
class ObjectCreateRule < RulesBase
|
111
|
+
# call-seq:
|
112
|
+
# ObjectCreateRule.new(pattern, klass, message = :new, *args)
|
113
|
+
# ObjectCreateRule.new(pattern, obj, message, *args)
|
114
|
+
# ObjectCreateRule.new(pattern) { ... }
|
115
|
+
#
|
116
|
+
# Create a new ObjectCreateRule with the specified class,
|
117
|
+
# method call or block.
|
118
|
+
def initialize(pattern, klass = Object, msg = :new, *args, &blk)
|
119
|
+
super(pattern)
|
120
|
+
@klass, @msg, @args, @blk = klass, msg, args, blk
|
121
|
+
end
|
122
|
+
|
123
|
+
def begin(namespace, name, attrs) #:nodoc:
|
124
|
+
@digester.push(if b = @blk
|
125
|
+
b.call(*@args)
|
126
|
+
else
|
127
|
+
@klass.send(@msg, *@args)
|
128
|
+
end)
|
129
|
+
end
|
130
|
+
|
131
|
+
def end(namespace, name) #:nodoc:
|
132
|
+
@digester.pop
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Rule that sets attributes on the top object on the
|
137
|
+
# stack based on values from the current element's attributes.
|
138
|
+
# This rule often follows an ObjectCreateRule with the same pattern
|
139
|
+
# in order to configure the newly created object. The attributes
|
140
|
+
# are set in the *begin* event.
|
141
|
+
class SetPropertiesRule < RulesBase
|
142
|
+
# call-seq:
|
143
|
+
# SetPropertiesRule.new(pattern, [mapping])
|
144
|
+
# SetPropertiesRule.new(pattern, [mapping]) { |target, attr, value| ... }
|
145
|
+
#
|
146
|
+
# Create a new SetPropertiesRule. The optional mapping
|
147
|
+
# parameter allows a mapping between XML attribute names and
|
148
|
+
# the corresponding ruby attribute name/type. It should quack
|
149
|
+
# like a hash, and return entries of the form:
|
150
|
+
#
|
151
|
+
# xml_name => ruby_name
|
152
|
+
# xml_name => coerce_type
|
153
|
+
# xml_name => [ruby_name, coerce_type]
|
154
|
+
#
|
155
|
+
# where +ruby_name+ is the name of the attribute that is to be
|
156
|
+
# set, +coerce_type+ is the class (for which a coercion method must
|
157
|
+
# be defined on Kernel) the attribute value should be coerced to.
|
158
|
+
# The mapping may mix entries of the three forms.
|
159
|
+
#
|
160
|
+
# If a block is supplied, it is called (during *begin*) for
|
161
|
+
# each attribute that is to be set. The +target+ argument supplies
|
162
|
+
# the top stack object, while +attr+ and +value+ supply the attribute
|
163
|
+
# name and value, processed according to the mapping rules.
|
164
|
+
def initialize(pattern,
|
165
|
+
mapping = Hash.new { |h,k| h[k] = k },
|
166
|
+
&blk)
|
167
|
+
super(pattern)
|
168
|
+
@mapping = mapping
|
169
|
+
@blk = blk
|
170
|
+
end
|
171
|
+
|
172
|
+
def begin(namespace, name, attrs) #:nodoc:
|
173
|
+
if (top = @digester.peek)
|
174
|
+
attrs.each do |k,v|
|
175
|
+
obj_attr, coerce = @mapping[k] || k
|
176
|
+
|
177
|
+
# allow a single class to be specified if same
|
178
|
+
# name is to be used
|
179
|
+
if obj_attr.kind_of? Module
|
180
|
+
coerce, obj_attr = obj_attr, k
|
181
|
+
end
|
182
|
+
|
183
|
+
if coerce && coerce != String
|
184
|
+
v = send(coerce.name, v)
|
185
|
+
end
|
186
|
+
|
187
|
+
unless obj_attr == :nil
|
188
|
+
if b = @blk
|
189
|
+
b.call(top,obj_attr,v)
|
190
|
+
else
|
191
|
+
setter = "#{obj_attr}="
|
192
|
+
begin
|
193
|
+
top.send(setter,v)
|
194
|
+
rescue NoMethodError
|
195
|
+
if @digester.pedantic?
|
196
|
+
raise Error,
|
197
|
+
"Unknown property attribute: #{name} for #{top.inspect}"
|
198
|
+
end
|
199
|
+
nil
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Rule that sets a single ruby attribute on the top stack object. The
|
209
|
+
# attribute name and value to set are obtained from the specified XML
|
210
|
+
# attributes. The attribute is set in the *begin* event.
|
211
|
+
class SetPropertyRule < RulesBase
|
212
|
+
# call-seq:
|
213
|
+
# SetPropertyRule.new(pattern, name_attr = 'name', value_attr = 'value', type = String)
|
214
|
+
# SetPropertyRule.new(pattern, name_attr = 'name', value_attr = 'value', type = String) { |target, attr, value| ... }
|
215
|
+
#
|
216
|
+
# Create a new SetPropertyRule that will set the ruby attribute named by
|
217
|
+
# the 'name_attr' XML attribute to the value specified by the 'value_attr'
|
218
|
+
# XML attribute. Given, for example, a rule:
|
219
|
+
#
|
220
|
+
# SetPropertyRule.new('*/a', 'foo_name', 'foo_val')
|
221
|
+
#
|
222
|
+
# And the following element:
|
223
|
+
#
|
224
|
+
# <a foo_name='foo', foo_value='bar'/>
|
225
|
+
#
|
226
|
+
# The message sent to the top object on the stack would look like:
|
227
|
+
#
|
228
|
+
# top.send(:foo=, 'bar')
|
229
|
+
#
|
230
|
+
# You may optionally specify a coercion type, which should be the class
|
231
|
+
# the value should be converted to. The same rules apply as with the
|
232
|
+
# SetPropertiesRule.
|
233
|
+
#
|
234
|
+
# If a block is supplied, it is called (during *begin*). The +target+
|
235
|
+
# argument supplies the top stack object, while +attr+ and +value+
|
236
|
+
# supply the attribute name and value, processed according to the mapping rules.
|
237
|
+
# The block should set the attribute appropriately.
|
238
|
+
def initialize(pattern, name_attr = 'name', value_attr = 'value', type = String, &blk)
|
239
|
+
super(pattern)
|
240
|
+
@name_attr, @value_attr, @blk = name_attr, value_attr, blk
|
241
|
+
@type = type
|
242
|
+
end
|
243
|
+
|
244
|
+
def begin(namespace, name, attrs) #:nodoc:
|
245
|
+
dig = @digester
|
246
|
+
if name = attrs[@name_attr]
|
247
|
+
value = attrs[@value_attr]
|
248
|
+
|
249
|
+
if ltype = @type
|
250
|
+
begin
|
251
|
+
(value = send(ltype.name,value)) unless ltype == String
|
252
|
+
rescue ArgumentError, TypeError
|
253
|
+
if dig.pedantic?
|
254
|
+
raise Error, $!.message
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
if (top = dig.peek)
|
260
|
+
if b = @blk
|
261
|
+
b.call(top, name, value)
|
262
|
+
else
|
263
|
+
begin
|
264
|
+
top.send("#{name}=", value)
|
265
|
+
rescue NoMethodError
|
266
|
+
if dig.pedantic?
|
267
|
+
raise Error,
|
268
|
+
"Unknown property attribute: #{name} for #{top.inspect}"
|
269
|
+
end
|
270
|
+
nil
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
# Rule that allows links to be made between the top two stack objects
|
279
|
+
# using a supplied block. See SetNextRule and SetTopRule for common-case
|
280
|
+
# specializations of this class. Operates within the *end* event.
|
281
|
+
class LinkRule < RulesBase
|
282
|
+
|
283
|
+
# call-seq:
|
284
|
+
# LinkRule.new(pattern) { |parent, child| ... }
|
285
|
+
#
|
286
|
+
# Create a new LinkRule that, when matched, will pass the top two
|
287
|
+
# elements (in order - see below) to the supplied block.
|
288
|
+
#
|
289
|
+
# The notion of 'parent' and 'child' is quite arbitrary, and merely
|
290
|
+
# reflects the common usage of this rule. Specifically, the 'parent'
|
291
|
+
# is the next-to-top stack element, while the 'child' is the top
|
292
|
+
# element itself.
|
293
|
+
def initialize(pattern, &blk)
|
294
|
+
super(pattern)
|
295
|
+
@blk = blk or raise ArgumentError, "No block given"
|
296
|
+
end
|
297
|
+
|
298
|
+
def end(namespace, name) #:nodoc:
|
299
|
+
stk = @digester.stack
|
300
|
+
if stk.length > 1
|
301
|
+
@blk.call(stk[-2], stk[-1])
|
302
|
+
else
|
303
|
+
if @digester.pedantic?
|
304
|
+
raise Error,
|
305
|
+
"Cannot link: no parent on stack: #{digester.stack.inspect}"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
# Rule that allows a parent -> child relationship to be created
|
312
|
+
# between the top two stack elements via a given method call.
|
313
|
+
# This is a specialized LinkRule that calls a method on the
|
314
|
+
# next-to-top stack element, passing in the top element as an argument.
|
315
|
+
# Often, the called method will have a name like 'add_child'.
|
316
|
+
#
|
317
|
+
# Like LinkRule, this rule operates in the *end* event. See also
|
318
|
+
# SetTopRule.
|
319
|
+
class SetNextRule < LinkRule
|
320
|
+
# call-seq:
|
321
|
+
# SetNextRule.new(pattern, msg, *additional_args)
|
322
|
+
#
|
323
|
+
# Create a new SetNextRule that will send the specified message
|
324
|
+
# to the next-to-top stack object, passing in the top object
|
325
|
+
# as the initial parameter, followed by any additional arguments
|
326
|
+
# supplied to this method.
|
327
|
+
def initialize(pattern, msg, *args)
|
328
|
+
super(pattern) do |parent, child|
|
329
|
+
begin
|
330
|
+
parent.send(msg, child, *args)
|
331
|
+
rescue NoMethodError
|
332
|
+
if digester.pedantic?
|
333
|
+
raise Error,
|
334
|
+
"Cannot SetNext: no method #{msg} for #{parent.inspect}"
|
335
|
+
end
|
336
|
+
nil
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
# Rule that allows a child -> parent relationship to be created
|
343
|
+
# between the top two stack elements via a given method call.
|
344
|
+
# This is a specialized LinkRule that calls a method on the
|
345
|
+
# top stack element, passing in the next-to-top element as an
|
346
|
+
# argument. Often, the called method will have a name like
|
347
|
+
# 'set_parent'.
|
348
|
+
#
|
349
|
+
# Like LinkRule, this rule operates in the *end* event. See
|
350
|
+
# also SetNextRule.
|
351
|
+
class SetTopRule < LinkRule
|
352
|
+
# call-seq:
|
353
|
+
# SetTopRule.new(pattern, msg, *additional_args)
|
354
|
+
#
|
355
|
+
# Create a new SetTopRule that will send the specified message
|
356
|
+
# to the top stack object, passing in the next-to-top object
|
357
|
+
# as the initial parameter, followed by any additional arguments
|
358
|
+
# supplied to this method.
|
359
|
+
def initialize(pattern, msg, *args)
|
360
|
+
super(pattern) do |parent, child|
|
361
|
+
begin
|
362
|
+
child.send(msg, parent, *args)
|
363
|
+
rescue NoMethodError
|
364
|
+
if digester.pedantic?
|
365
|
+
raise Error,
|
366
|
+
"Cannot SetTop: no method #{msg} for #{parent.inspect}"
|
367
|
+
nil
|
368
|
+
end
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
# Rule that allows a given method to be called on an object
|
375
|
+
# on the stack, with arguments collected from the stack or
|
376
|
+
# XML attributes (possibly on nested elements) and element
|
377
|
+
# bodies using CallParamRule. The actual call is executed
|
378
|
+
# in the *end* event.
|
379
|
+
class CallMethodRule < RulesBase
|
380
|
+
# call-seq:
|
381
|
+
# CallMethodRule.new(pattern, msg, target_ofs = 0, *extra_args)
|
382
|
+
# CallMethodRule.new(pattern) { |target| ... }
|
383
|
+
# CallMethodRule.new(pattern, nil, target_ofs, *extra_args) { |target, *args| ... }
|
384
|
+
#
|
385
|
+
# Create a new CallMethodRule that will call the given method
|
386
|
+
# on the object at the given offset from the top of the stack
|
387
|
+
# (positive only, increasing distance from the stack top).
|
388
|
+
# Any additional arguments supplied are *suffixed* on the arguments
|
389
|
+
# collected from any nested CallParamRule matches, and passed
|
390
|
+
# to the method in the *end* event.
|
391
|
+
#
|
392
|
+
# If a block is supplied, this will be called instead of a method.
|
393
|
+
# In this case, the +msg+ argument will be ignored.
|
394
|
+
def initialize(pattern, msg = nil, target_ofs = 0,*args, &blk)
|
395
|
+
unless msg || blk
|
396
|
+
raise ArgumentError, "Either a message or block is required"
|
397
|
+
end
|
398
|
+
|
399
|
+
super(pattern)
|
400
|
+
@msg, @target_ofs, @args, @blk = msg, target_ofs, args, blk
|
401
|
+
end
|
402
|
+
|
403
|
+
def begin(namespace, name, attrs) #:nodoc:
|
404
|
+
@digester.rulestack.push([])
|
405
|
+
end
|
406
|
+
|
407
|
+
def end(namespace, name) #:nodoc:
|
408
|
+
dig = @digester
|
409
|
+
largs = dig.rulestack.pop + @args
|
410
|
+
targ = dig.stack[-(@target_ofs+1)]
|
411
|
+
if b = @blk
|
412
|
+
b.call(targ,*largs)
|
413
|
+
else
|
414
|
+
begin
|
415
|
+
targ.send(@msg, *largs)
|
416
|
+
rescue NoMethodError
|
417
|
+
if dig.pedantic?
|
418
|
+
raise Error,
|
419
|
+
"Cannot CallMethod: no method '#{@msg}' on #{targ.inspect}"
|
420
|
+
end
|
421
|
+
end
|
422
|
+
end
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
# Rule that collects arguments for a previous CallMethodRule.
|
427
|
+
# CallParamRule is typically matched on elements nested within
|
428
|
+
# those that trigger CallMethodRule. Parameters are referenced
|
429
|
+
# by index and can take their value from XML attributes, element
|
430
|
+
# bodies, or objects on the digester's stack.
|
431
|
+
class CallParamRule < RulesBase
|
432
|
+
EMPTYSTR = "" #:nodoc:
|
433
|
+
|
434
|
+
# call-seq:
|
435
|
+
# CallParamRule.new(pattern, param_idx, attr_name, type = String)
|
436
|
+
# CallParamRule.new(pattern, param_idx, stack_index, type = String)
|
437
|
+
# CallParamRule.new(pattern, param_idx, nil, type = String)
|
438
|
+
# CallParamRule.new(pattern, param_idx = 0)
|
439
|
+
#
|
440
|
+
# Create a new CallParamRule that will take it's parameter value
|
441
|
+
# from the specified source, or the current element body if
|
442
|
+
# source is +nil+. When the enclosing CallMethodRule executes
|
443
|
+
# it's *end* event, the value will be passed as the param_idx'th
|
444
|
+
# argument.
|
445
|
+
#
|
446
|
+
# If a class is supplied for the type argument, the value will
|
447
|
+
# be coerced to that type prior to the method call. See the
|
448
|
+
# SetPropertiesRule rule for more on the coercion mechanism.
|
449
|
+
def initialize(pattern, param_idx = 0, source = nil, type = String)
|
450
|
+
super(pattern)
|
451
|
+
@param_idx, @type = param_idx, type
|
452
|
+
@bodytxt = ""
|
453
|
+
|
454
|
+
# wierd shit happening here, trying to factor as much work
|
455
|
+
# out of the rulechain processing as possible.
|
456
|
+
@argproc = case source
|
457
|
+
when nil
|
458
|
+
# element body
|
459
|
+
bt = @bodytxt
|
460
|
+
lambda { bt.strip }
|
461
|
+
when Integer
|
462
|
+
# stack
|
463
|
+
stackidx = -(source+1)
|
464
|
+
lambda { @digester.stack[stackidx] }
|
465
|
+
else
|
466
|
+
# attribute
|
467
|
+
srcs = source.to_s
|
468
|
+
lambda { @attrs[srcs] }
|
469
|
+
end
|
470
|
+
end
|
471
|
+
|
472
|
+
def begin(namespace, name, attrs) #:nodoc:
|
473
|
+
@attrs = attrs
|
474
|
+
end
|
475
|
+
|
476
|
+
def body(txt) #:nodoc:
|
477
|
+
@bodytxt << txt
|
478
|
+
end
|
479
|
+
|
480
|
+
def end(namespace, name) #:nodoc:
|
481
|
+
arg = @argproc.call
|
482
|
+
|
483
|
+
if ltype = @type
|
484
|
+
begin
|
485
|
+
arg = send(ltype.name, arg)
|
486
|
+
rescue NoMethodError
|
487
|
+
if @digester.pedantic?
|
488
|
+
raise Error, "No coercion method for #{@type.name}"
|
489
|
+
end
|
490
|
+
rescue ArgumentError, TypeError
|
491
|
+
if @digester.pedantic?
|
492
|
+
raise Error, $!.message
|
493
|
+
end
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
@digester.rulestack.last[@param_idx] = arg
|
498
|
+
@bodytxt.replace(EMPTYSTR)
|
499
|
+
end
|
500
|
+
end
|
501
|
+
|
502
|
+
#### DIGESTER IMPL ####
|
503
|
+
|
504
|
+
# Obtain the XML::SaxParser used by this Digester
|
505
|
+
attr_reader :parser
|
506
|
+
|
507
|
+
# Obtain the _rulestack_ - an auxiliary stack provided for rule-specific
|
508
|
+
# state storage.
|
509
|
+
attr_reader :rulestack
|
510
|
+
|
511
|
+
# Obtain the path to the current element in the form: /a/b/c
|
512
|
+
attr_reader :current_path
|
513
|
+
|
514
|
+
# Determines whether rules are pedantic (e.g. raise Errors when
|
515
|
+
# some attributes cannot be matched, or when a method call fails).
|
516
|
+
attr_accessor :pedantic
|
517
|
+
alias :pedantic? :pedantic
|
518
|
+
|
519
|
+
# Obtain the _userstack_. This is the main stack used by the digester,
|
520
|
+
# and should be used read-only - you must use the mutators provided by
|
521
|
+
# this class (#push, #pop, etc) to ensure the digester state remains
|
522
|
+
# consistent.
|
523
|
+
attr_reader :userstack
|
524
|
+
alias :stack :userstack
|
525
|
+
|
526
|
+
# Create a new Digester. If a parser is supplied, be aware that
|
527
|
+
# some of it's callbacks will be replaced:
|
528
|
+
#
|
529
|
+
# * on_start_document
|
530
|
+
# * on_start_element
|
531
|
+
# * on_characters
|
532
|
+
# * on_cdata_block
|
533
|
+
# * on_end_element
|
534
|
+
# * on_end_document
|
535
|
+
#
|
536
|
+
def initialize(pedantic = false, parser = XML::SaxParser.new)
|
537
|
+
@parser = parser
|
538
|
+
@parser.on_start_document(&method(:cb_start_document))
|
539
|
+
@parser.on_end_document(&method(:cb_end_document))
|
540
|
+
@parser.on_start_element(&method(:cb_start_element))
|
541
|
+
@parser.on_characters(&method(:cb_characters))
|
542
|
+
@parser.on_cdata_block(&method(:cb_characters))
|
543
|
+
@parser.on_end_element(&method(:cb_end_element))
|
544
|
+
@pedantic = !!pedantic
|
545
|
+
@userstack = []
|
546
|
+
end
|
547
|
+
|
548
|
+
# call-seq:
|
549
|
+
# parse_string(xml) -> first_object
|
550
|
+
#
|
551
|
+
# Parse the specified XML string and trigger appropriate rules
|
552
|
+
# in this Digester. Returns the first element that was pushed
|
553
|
+
# onto the stack.
|
554
|
+
def parse_string(xml)
|
555
|
+
@parser.string = xml
|
556
|
+
do_parse
|
557
|
+
end
|
558
|
+
|
559
|
+
# call-seq:
|
560
|
+
# parse_file(filename) -> first_object
|
561
|
+
#
|
562
|
+
# Parse the specified XML file and trigger appropriate rules
|
563
|
+
# in this Digester. Returns the first element that was pushed
|
564
|
+
# onto the stack.
|
565
|
+
def parse_file(filename)
|
566
|
+
@parser.filename = filename
|
567
|
+
do_parse
|
568
|
+
end
|
569
|
+
|
570
|
+
# Clear the user stack and reset the digester state.
|
571
|
+
def clear
|
572
|
+
@userstack.clear
|
573
|
+
@rulestack = []
|
574
|
+
end
|
575
|
+
|
576
|
+
# Retrieve a reference to the top user stack element, without
|
577
|
+
# actually popping it from the stack.
|
578
|
+
def peek
|
579
|
+
@userstack.last
|
580
|
+
end
|
581
|
+
|
582
|
+
# Remove and return the top user stack element.
|
583
|
+
def pop
|
584
|
+
@userstack.pop
|
585
|
+
end
|
586
|
+
|
587
|
+
# Push the supplied object +o+ onto the user stack.
|
588
|
+
def push(o)
|
589
|
+
@first ||= o
|
590
|
+
@userstack.push(o)
|
591
|
+
end
|
592
|
+
|
593
|
+
# Obtain the name of the element currently being processed,
|
594
|
+
def current_element
|
595
|
+
stk = @current_path
|
596
|
+
stk.slice(stk.rindex('/')..-1)
|
597
|
+
end
|
598
|
+
|
599
|
+
## ADD RULE METHODS ###
|
600
|
+
|
601
|
+
# Add the specified rule to this digester.
|
602
|
+
def add_rule(rule)
|
603
|
+
rule.digester = self
|
604
|
+
|
605
|
+
if @first_rule
|
606
|
+
@last_rule.next, rule.prev = rule, @last_rule
|
607
|
+
@last_rule = rule
|
608
|
+
else
|
609
|
+
@first_rule = @last_rule = rule
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
# call-seq:
|
614
|
+
# add_block(pattern) { |*args| ... }
|
615
|
+
#
|
616
|
+
# Add a new BlockRule with the supplied block. See BlockRule
|
617
|
+
# for details of the block's arguments.
|
618
|
+
def add_block(pattern, &blk)
|
619
|
+
add_rule(BlockRule.new(pattern, &blk))
|
620
|
+
end
|
621
|
+
|
622
|
+
# call-seq:
|
623
|
+
# add_object_create(pattern, klass)
|
624
|
+
# add_object_create(pattern, obj, message, *args)
|
625
|
+
# add_object_create(pattern) { ... }
|
626
|
+
#
|
627
|
+
# Add a new ObjectCreateRule with the specified class,
|
628
|
+
# method call or block.
|
629
|
+
def add_object_create(pattern, klass = Object, msg = :new, *args, &blk)
|
630
|
+
add_rule(ObjectCreateRule.new(pattern,klass,msg,*args,&blk))
|
631
|
+
end
|
632
|
+
|
633
|
+
# Compatibility alias for REXML-based xmldigester
|
634
|
+
alias :add_create_object :add_object_create
|
635
|
+
|
636
|
+
# call-seq:
|
637
|
+
# add_set_properties(pattern, [mapping])
|
638
|
+
# add_set_properties(pattern, [mapping]) { |target, attr, value| ... }
|
639
|
+
#
|
640
|
+
# Add a new SetPropertiesRule. See SetPropertiesRule for details
|
641
|
+
# of the optional mapping format.
|
642
|
+
def add_set_properties(pattern,
|
643
|
+
mapping = Hash.new {|h,k| h[k] = k},
|
644
|
+
&blk)
|
645
|
+
add_rule(SetPropertiesRule.new(pattern,mapping,&blk))
|
646
|
+
end
|
647
|
+
|
648
|
+
# call-seq:
|
649
|
+
# add_set_property(pattern, name_attr = 'name', value_attr = 'value', type = String)
|
650
|
+
# add_set_property(pattern, name_attr = 'name', value_attr = 'value', type = String) { |target, attr, value| ... }
|
651
|
+
#
|
652
|
+
# Add a new SetPropertyRule that will set the ruby attribute named by
|
653
|
+
# the 'name_attr' XML attribute to the value specified by the 'value_attr'
|
654
|
+
# XML attribute.
|
655
|
+
def add_set_property(pattern, name_attr, value_attr, type = String)
|
656
|
+
add_rule(SetPropertyRule.new(pattern,name_attr,value_attr,type))
|
657
|
+
end
|
658
|
+
|
659
|
+
# call-seq:
|
660
|
+
# add_link(pattern) { |parent, child| ... }
|
661
|
+
#
|
662
|
+
# Add a new LinkRule that, when matched, will pass the top two
|
663
|
+
# elements (in order - see below) to the supplied block.
|
664
|
+
def add_link(pattern, &blk)
|
665
|
+
add_rule(LinkRule.new(pattern,&blk))
|
666
|
+
end
|
667
|
+
|
668
|
+
# call-seq:
|
669
|
+
# add_set_next(pattern, msg, *additional_args)
|
670
|
+
#
|
671
|
+
# Add a new SetNextRule that will send the specified message
|
672
|
+
# to the next-to-top stack object, passing in the top object
|
673
|
+
# as the initial parameter, followed by any additional arguments
|
674
|
+
# supplied to this method.
|
675
|
+
def add_set_next(pattern, msg, *args)
|
676
|
+
add_rule(SetNextRule.new(pattern,msg,*args))
|
677
|
+
end
|
678
|
+
|
679
|
+
# call-seq:
|
680
|
+
# add_set_top(pattern, msg, *additional_args)
|
681
|
+
#
|
682
|
+
# Add a new SetTopRule that will send the specified message
|
683
|
+
# to the top stack object, passing in the next-to-top object
|
684
|
+
# as the initial parameter, followed by any additional arguments
|
685
|
+
# supplied to this method.
|
686
|
+
def add_set_top(pattern, msg, *args)
|
687
|
+
add_rule(SetTopRule.new(pattern,msg,*args))
|
688
|
+
end
|
689
|
+
|
690
|
+
# call-seq:
|
691
|
+
# add_call_method(pattern, msg, target_ofs = 0, *args)
|
692
|
+
# add_call_method(pattern) { |target| ... }
|
693
|
+
# add_call_method(pattern, nil, target_ofs, *args) { |target, *args| ... }
|
694
|
+
#
|
695
|
+
# Add a new CallMethodRule that will call the given method
|
696
|
+
# on the object at the given offset from the top of the stack
|
697
|
+
# (positive only, increasing distance from the stack top).
|
698
|
+
def add_call_method(pattern, msg = nil, target_ofs = 0,*args, &blk)
|
699
|
+
add_rule(CallMethodRule.new(pattern,msg,target_ofs,*args,&blk))
|
700
|
+
end
|
701
|
+
|
702
|
+
# call-seq:
|
703
|
+
# add_call_param(pattern, param_idx, attr_name, type = String)
|
704
|
+
# add_call_param(pattern, param_idx, stack_index, type = String)
|
705
|
+
# add_call_param(pattern, param_idx, nil, type = String)
|
706
|
+
# add_call_param(pattern, param_idx = 0)
|
707
|
+
#
|
708
|
+
# Add a new CallParamRule that will take it's parameter value
|
709
|
+
# from the specified source, or the current element body if
|
710
|
+
# source is +nil+.
|
711
|
+
def add_call_param(pattern, param_idx = 0, source = nil, type = String)
|
712
|
+
add_rule(CallParamRule.new(pattern,param_idx,source,type))
|
713
|
+
end
|
714
|
+
|
715
|
+
# call-seq:
|
716
|
+
# add_call_param_body(pattern, param_idx, type = String)
|
717
|
+
# add_call_param_body(pattern, param_idx = 0)
|
718
|
+
#
|
719
|
+
# Add a new CallParamRule that will take it's parameter value
|
720
|
+
# from the current element body. This just calls through to
|
721
|
+
# +add_call_param+ and is provided for xmldigester compatibility.
|
722
|
+
def add_call_param_body(pattern, param_idx = 0, type = String)
|
723
|
+
add_call_param(pattern,param_idx,nil,type)
|
724
|
+
end
|
725
|
+
|
726
|
+
# call-seq:
|
727
|
+
# add_call_param_attribute(pattern, param_idx, attr_name, type = String)
|
728
|
+
# add_call_param_attribute(pattern, param_idx, attr_name)
|
729
|
+
#
|
730
|
+
# Add a new CallParamRule that will take it's parameter value
|
731
|
+
# from the named attribute on the current element. This just calls through to
|
732
|
+
# +add_call_param+ and is provided for xmldigester compatibility.
|
733
|
+
def add_call_param_attribute(pattern, param_idx, attr_name, type = String)
|
734
|
+
add_call_param(pattern, param_idx, attr_name, type)
|
735
|
+
end
|
736
|
+
|
737
|
+
# call-seq:
|
738
|
+
# add_call_param_stack(pattern, param_idx, stack_ofs, type = nil)
|
739
|
+
# add_call_param_stack(pattern, param_idx = 0, stack_ofs = 0)
|
740
|
+
#
|
741
|
+
# Add a new CallParamRule that will take it's parameter value
|
742
|
+
# from the specified stack element. The stack offset should be a positive
|
743
|
+
# integer indicating the depth of the target object - zero (the default)
|
744
|
+
# indicates the top of the stack.
|
745
|
+
#
|
746
|
+
# This just calls through to +add_call_param+ and is provided for
|
747
|
+
# xmldigester compatibility.
|
748
|
+
def add_call_param_stack(pattern, param_idx, stack_ofs = 0, type = nil)
|
749
|
+
add_call_param(pattern, param_idx, stack_ofs, type)
|
750
|
+
end
|
751
|
+
|
752
|
+
private
|
753
|
+
|
754
|
+
def do_parse
|
755
|
+
@parser.parse
|
756
|
+
f = @first
|
757
|
+
@first = nil
|
758
|
+
f
|
759
|
+
end
|
760
|
+
|
761
|
+
#### PARSER CALLBACKS ####
|
762
|
+
# lots of duplicate code here, but factoring it out
|
763
|
+
# made it too slow (mainly dealing with send and varargs)...
|
764
|
+
def cb_start_document
|
765
|
+
@current_path = ""
|
766
|
+
@rulestack = []
|
767
|
+
end
|
768
|
+
|
769
|
+
def cb_end_document
|
770
|
+
rule = @last_rule
|
771
|
+
begin
|
772
|
+
rule.finish
|
773
|
+
end while rule = rule.prev
|
774
|
+
end
|
775
|
+
|
776
|
+
def cb_start_element(name, attrs)
|
777
|
+
cp = (@current_path << '/' << name)
|
778
|
+
rule = @first_rule
|
779
|
+
begin
|
780
|
+
if File.fnmatch(rule.pattern, cp)
|
781
|
+
rule.begin(nil,name,attrs)
|
782
|
+
end
|
783
|
+
end while rule = rule.next
|
784
|
+
end
|
785
|
+
|
786
|
+
def cb_end_element(name)
|
787
|
+
rule = @last_rule
|
788
|
+
cp = @current_path
|
789
|
+
begin
|
790
|
+
if File.fnmatch(rule.pattern, cp)
|
791
|
+
rule.end(nil,name)
|
792
|
+
end
|
793
|
+
end while rule = rule.prev
|
794
|
+
stk = @current_path
|
795
|
+
stk.reverse!.sub!(/^.*?\//,'').reverse!
|
796
|
+
end
|
797
|
+
|
798
|
+
def cb_characters(txt)
|
799
|
+
rule = @first_rule
|
800
|
+
cp = @current_path
|
801
|
+
begin
|
802
|
+
if File.fnmatch(rule.pattern, cp)
|
803
|
+
rule.body(txt)
|
804
|
+
end
|
805
|
+
end while rule = rule.next
|
806
|
+
end
|
807
|
+
end
|
808
|
+
|
809
|
+
Digestr = Digester
|
810
|
+
end
|