xmlcodec 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/XMLElement.rb ADDED
@@ -0,0 +1,489 @@
1
+ require 'XMLStreamObjectParser'
2
+ require 'XMLSubElements'
3
+
4
+ module XMLCodec
5
+ # This class should be inherited from to create classes that are able to
6
+ # import and export XML elements and their children. It provides three main
7
+ # functions: xmlattr, xmlsubel and xmlsubel_mult.
8
+ #
9
+ # To create an importer/exporter for a XML format all that's needed is to
10
+ # create a class for each of the elements and then declare their atributes and
11
+ # subelements.
12
+ #
13
+ # Two other functions have an important role. elname declares the name of the
14
+ # XML element the class represents. elwithvalue declares that the element
15
+ # has no subelements and includes only text content.
16
+ #
17
+ # After the class is defined import_xml can be used to import the content from
18
+ # a REXML Element or Document and create_xml can be used to create the XML DOM
19
+ # of the element as a child to a REXML Element or Document. For big documents
20
+ # these are usually too slow and memory hungry, using xml_text to export to
21
+ # XML and import_xml_text to import XML are probably better ideas.
22
+ # import_xml_text is just a utility function around XMLStreamObjectParser,
23
+ # that allow more flexible stream parsing of XML files while still using the
24
+ # same XMLElement objects.
25
+ #
26
+ # <b>WARNING</b>: This API is still very much a work in progress and very
27
+ # rough in certain places. Changes will surely be made.
28
+ class XMLElement
29
+ INDENT_STR = ' '
30
+
31
+ attr_accessor :element_id, :parent_id, :__xml_text
32
+ attr_accessor :__parent
33
+ ## A xmlsubel is any subelement of a given element
34
+
35
+ private
36
+ # Class level variable to hold the list of subelements
37
+ def self.xmlsubels
38
+ @xmlsubels ||=[]
39
+ end
40
+
41
+ # Class level variable to hold the list of subelements that are multiple
42
+ def self.xmlsubelmultiples
43
+ @xmlsubelmultiples ||=[]
44
+ end
45
+
46
+ # Class level variable that holds the list of attributes
47
+ def self.xmlattrs
48
+ @xmlattrs ||=[]
49
+ end
50
+
51
+ # Add a name as being a subelement (mult or single)
52
+ def self._xmlsubel(name)
53
+ self.xmlsubels << name
54
+ end
55
+
56
+ # Add a xmlsubel type attribute
57
+ def self.xmlsubel(name) #:doc:
58
+ name = name.to_sym
59
+ self._xmlsubel(name)
60
+ attr_reader name
61
+ define_method((name.to_s+"=").to_sym) { |value|
62
+ if value.is_a? String or value.is_a? Fixnum
63
+ elclass = XMLCodec::XMLElement.get_element_class(name)
64
+ value = elclass.new(value)
65
+ end
66
+ value.__parent = self if value
67
+ instance_variable_set "@#{name}", value
68
+ }
69
+ end
70
+
71
+ # Add a xmlsubel_mult type attribute (wrapper around attr_accessor)
72
+ def self.xmlsubel_mult(name) #:doc:
73
+ self._xmlsubel(name)
74
+ self.xmlsubelmultiples << name
75
+ define_method(name){
76
+ if not self.instance_variables.index("@#{name}")
77
+ instance_variable_set "@#{name}", XMLSubElements.new(self)
78
+ end
79
+ instance_variable_get "@#{name}"
80
+ }
81
+ end
82
+
83
+ # Iterates over all of the object's XML subelements
84
+ def each_subel
85
+ self.class.xmlsubels.each {|name| yield name}
86
+ end
87
+
88
+ # Iterate all the superclasses that are still children of XMLElement
89
+ # and check if any of them have the subelement mult defined
90
+ def subel_mult?(element)
91
+ c = self.class
92
+ while c.ancestors.index(XMLCodec::XMLElement)
93
+ if c.xmlsubelmultiples.index(element)
94
+ return true
95
+ end
96
+ c = c.superclass
97
+ end
98
+ return false
99
+ end
100
+
101
+ # Iterate all the superclasses that are still children of XMLElement
102
+ # and check if any of them have any subelements handled by this class
103
+ def get_subel(elclass)
104
+ names = elclass.get_elnames
105
+ c = self.class
106
+ while c.ancestors.index(XMLCodec::XMLElement)
107
+ names.each do |name|
108
+ if c.xmlsubels.index(name.to_sym)
109
+ return names[0].to_sym
110
+ end
111
+ end
112
+ c = c.superclass
113
+ end
114
+ return nil
115
+ end
116
+
117
+ # Iterates over the object's XML atributes
118
+ def each_attr
119
+ # Iterate all the superclasses that are still children of EADElement
120
+ # and iterate each of the attributes
121
+ c = self.class
122
+ while c.ancestors.index(XMLCodec::XMLElement)
123
+ c.xmlattrs.each {|name| yield name}
124
+ c = c.superclass
125
+ end
126
+ end
127
+
128
+ # Creates the XML for the atributes
129
+ def create_xml_attr(parent)
130
+ each_attr do |a|
131
+ value = self.send(a)
132
+ if value
133
+ parent.add_attribute(a.to_s, value)
134
+ end
135
+ end
136
+ end
137
+
138
+ # returns a string with the opening tag for the element
139
+ def create_open_tag
140
+ attrs = {}
141
+ each_attr do |a|
142
+ value = self.send(a)
143
+ if value
144
+ attrs[a.to_s] = value
145
+ end
146
+ end
147
+ XMLUtils::create_open_tag(elname.to_s, attrs)
148
+ end
149
+
150
+ # returns a string with the closing tag for the element
151
+ def create_close_tag
152
+ "</"+elname.to_s+">"
153
+ end
154
+
155
+ # Declare the class as having many subelements. Instances will have a
156
+ # method called #subelements that will return an instance of XMLSubElements
157
+ def self.xmlsubelements #:doc:
158
+ define_method(:subelements) {
159
+ if not self.instance_variables.index("@subelements")
160
+ @subelements = XMLSubElements.new(self)
161
+ end
162
+ @subelements
163
+ }
164
+ define_method(:has_subelements?) {true}
165
+ end
166
+
167
+ # Add a xmlattr type attribute (wrapper around attr_accessor)
168
+ def self.xmlattr(name) #:doc:
169
+ self.xmlattrs << name
170
+ attr_accessor name
171
+ end
172
+
173
+ # This is the hash that gives classes for the element names that are declared.
174
+ ElClasses = {}
175
+
176
+ # Sets the element name for the element
177
+ def self.elname(name)
178
+ elnames(name)
179
+ end
180
+
181
+ # Sets several element names for the element
182
+ def self.elnames(*names)
183
+ define_method(:elname){names[0].to_sym}
184
+
185
+ eln = get_elnames
186
+ names.each {|n| eln << n}
187
+ names.each {|n| ElClasses[n.to_sym] = self}
188
+ end
189
+
190
+ # Returns the list of element names for the element
191
+ def self.get_elnames
192
+ @elnames||=[]
193
+ end
194
+
195
+ # Set the element as having a value. The element will have an initializer
196
+ # that takes a value as argument and an accessor named #value. This should
197
+ # be used for elements that contain only text and no subelements
198
+ def self.elwithvalue
199
+ define_method(:hasvalue?){true}
200
+ define_method(:initialize){|value| @value = value}
201
+ attr_accessor :value
202
+ end
203
+
204
+ # Creates the XML subelements
205
+ def create_xml_subel(parent)
206
+ each_subel do |a|
207
+ if value = self.send(a)
208
+ value.create_xml(parent)
209
+ end
210
+ end
211
+ end
212
+
213
+ # Create the XML of the SubElements
214
+ def create_xml_subelements(parent)
215
+ self.subelements.create_xml(parent)
216
+ end
217
+
218
+ # Have we already started the partial export of this element?
219
+ def already_partial_exported?
220
+ (@already_partial_exported ||= false)
221
+ end
222
+
223
+ # Have we already ended the partial export of this element?
224
+ def already_partial_export_ended?
225
+ (@already_partial_export_ended ||= false)
226
+ end
227
+
228
+ # Which level of indentation are we in?
229
+ def indent_level
230
+ if not self.instance_variables.index '@indent_level'
231
+ curr = self
232
+ level = 0
233
+ while curr = curr.__parent
234
+ level +=1
235
+ end
236
+ @indent_level = level
237
+ end
238
+ @indent_level
239
+ end
240
+
241
+ # Iterate all of the subelements
242
+ # We copy everything into an array and iterate that because #each doesn't
243
+ # like it when elements are deleted while it's iterating.
244
+ def each_subelement
245
+ arr = []
246
+
247
+ each_subel do |a|
248
+ if value = self.send(a)
249
+ if subel_mult? a
250
+ value.each {|e| arr << e}
251
+ else
252
+ arr << value
253
+ end
254
+ end
255
+ end
256
+
257
+ if has_subelements?
258
+ @subelements.each{|e| arr << e}
259
+ end
260
+
261
+ arr.each {|e| yield e}
262
+ end
263
+
264
+ public
265
+ # Remove the given subelement from the element
266
+ def delete_element(element)
267
+ each_subel do |a|
268
+ value = self.send(a)
269
+ if subel_mult? a
270
+ value.delete_element(element)
271
+ else
272
+ self.send(a.to_s+'=', nil) if value == element
273
+ end
274
+ end
275
+
276
+ if has_subelements?
277
+ @subelements.delete_element(element)
278
+ end
279
+ end
280
+
281
+ # Calculate the text indentation to use for this level. Returns a string
282
+ # with the whitespace that should precede every line. Extra levels of
283
+ # indentation can be passed so that a caller can calculate the whitespace
284
+ # to indent an element X levels deeper than this one.
285
+ def indentation(extra=0)
286
+ INDENT_STR*(indent_level+extra)
287
+ end
288
+
289
+ # Gets the class for a certain element name.
290
+ def self.get_element_class(name)
291
+ cl = ElClasses[name.to_sym]
292
+ if not cl
293
+ raise "No class defined for element type: '" + name.to_s + "'"
294
+ end
295
+ cl
296
+ end
297
+
298
+ # Gets the possible element names for a certain element.
299
+ def self.get_element_names(name)
300
+ cl = get_element_class(name)
301
+ [name.to_s]+cl.get_elnames
302
+ end
303
+
304
+ # Method that checks if a given class has subelements. This is usually only
305
+ # used when exporting stuff.
306
+ def has_subelements?; false; end
307
+
308
+ # tests if the element is a value element as defined by 'elwithvalue'
309
+ def hasvalue?
310
+ false
311
+ end
312
+
313
+
314
+ # Creates the xml for the element inside the parent element. The parent
315
+ # passed should be a REXML element or document. This call is recursive
316
+ # creating the XML for any subelements.
317
+ def create_xml(parent)
318
+ xmlel = parent.add_element self.elname.to_s
319
+ if self.hasvalue?
320
+ xmlel.text = self.value
321
+ end
322
+ create_xml_attr(xmlel)
323
+ create_xml_subel(xmlel)
324
+
325
+ if has_subelements?
326
+ create_xml_subelements(xmlel)
327
+ end
328
+
329
+ xmlel
330
+ end
331
+
332
+ # Import the XML into an object from a REXML element. This call is recursive
333
+ # and imports any subelements found into the corresponding objects.
334
+ def self.import_xml(xmlel)
335
+ if xmlel.is_a? REXML::Document
336
+ xmlel = xmlel.root
337
+ end
338
+
339
+ elements = []
340
+ xmlel.to_a.each do |e|
341
+ if e.is_a? REXML::Text
342
+ elements << e.value
343
+ else
344
+ elclass = get_element_class(e.name)
345
+ elements << elclass.import_xml(e)
346
+ end
347
+ end
348
+
349
+ attributes = {}
350
+ xmlel.attributes.each do |name, value|
351
+ attributes[name] = value
352
+ end
353
+
354
+ new_with_content(attributes, elements)
355
+ end
356
+
357
+ # Import the XML directly from the text. This call receives the text and the
358
+ # classes that should be used to import the subelements.
359
+ def self.import_xml_text(text)
360
+ parser = XMLStreamObjectParser.new(ElClasses)
361
+ parser.parse(text)
362
+ parser.top_element
363
+ end
364
+
365
+ # Create a new element passing it all the atributes, children and texts
366
+ def self.new_with_content(attrs, children)
367
+ text_children = []
368
+ element_children = []
369
+
370
+ children.each do |c|
371
+ if c.is_a? String
372
+ text_children << c
373
+ else
374
+ element_children << c
375
+ end
376
+ end
377
+
378
+ obj = self.allocate
379
+ obj.add_attr(attrs)
380
+ obj.add_subel(element_children)
381
+ obj.add_texts(text_children)
382
+ if obj.has_subelements?
383
+ obj.add_subelements(children)
384
+ end
385
+ obj
386
+ end
387
+
388
+ # add the attributes passed as a hash to the element
389
+ def add_attr(attrs)
390
+ each_attr do |a|
391
+ if value = attrs[a.to_s]
392
+ self.send("#{a}=", value)
393
+ end
394
+ end
395
+ end
396
+
397
+ # add the text elements into the element
398
+ def add_texts(texts)
399
+ if hasvalue?
400
+ @value = texts.join
401
+ end
402
+ end
403
+
404
+ # add the subelements into the element
405
+ def add_subel(children)
406
+ children.each do |c|
407
+ if subel_name = get_subel(c.class)
408
+ if subel_mult? subel_name
409
+ self.send(subel_name) << c
410
+ else
411
+ self.send(subel_name.to_s+'=', c)
412
+ end
413
+ end
414
+ end
415
+ end
416
+
417
+ # If the class is one with many subelements import all of them into the
418
+ # object.
419
+ def add_subelements(all_children)
420
+ all_children.each {|c| self.subelements << c}
421
+ end
422
+
423
+
424
+ # create the XML text of the element. This does not use REXML so should be
425
+ # pretty fast.
426
+ def xml_text
427
+ str = create_open_tag
428
+ if self.hasvalue?
429
+ str << XMLUtils::escape_xml(self.value)
430
+ end
431
+
432
+ each_subelement do |e|
433
+ str << e.xml_text
434
+ end
435
+
436
+ str << create_close_tag
437
+ str
438
+ end
439
+
440
+ # Export this element into a file. Will also start to export the parents of
441
+ # the element. It's equivalent to calling start_partial_export followed by
442
+ # end_partial_export.
443
+ def partial_export(file)
444
+ if not already_partial_exported?
445
+ start_partial_export(file)
446
+ end_partial_export(file)
447
+ end
448
+ end
449
+
450
+ # Starts to export the element to a file. all the existing elements will be
451
+ # exported. After calling this you should only add stuff that you will
452
+ # export explicitly by calling partial_export or start_partial_export.
453
+ def start_partial_export(file)
454
+ if not already_partial_exported?
455
+ @already_partial_exported = true
456
+ if self.__parent
457
+ self.__parent.start_partial_export(file)
458
+ end
459
+
460
+ file << indentation+create_open_tag+"\n"
461
+
462
+ if self.hasvalue?
463
+ file << indentation(1)+XMLUtils::escape_xml(self.value)+"\n"
464
+ end
465
+
466
+ each_subelement do |e|
467
+ e.partial_export(file)
468
+ end
469
+ end
470
+ end
471
+
472
+ # Ends the partial exporting of the element.
473
+ def end_partial_export(file)
474
+ if not already_partial_export_ended?
475
+ @already_partial_export_ended = true
476
+
477
+ each_subelement do |e|
478
+ e.end_partial_export(file)
479
+ end
480
+
481
+ file << indentation+create_close_tag+"\n"
482
+
483
+ if self.__parent
484
+ self.__parent.delete_element(self)
485
+ end
486
+ end
487
+ end
488
+ end
489
+ end
@@ -0,0 +1,136 @@
1
+ require "rexml/document"
2
+ require "XMLUtils"
3
+
4
+ module XMLCodec
5
+ # This class is used internally by the parser to store the information about
6
+ # each of the elements that gets created.
7
+ class XMLSOParserElement
8
+ attr_reader :elclass, :consumed, :id
9
+
10
+ # Create a new instance with the element name, a hash of atributes, it's
11
+ # import/export class, the parent element and it's id
12
+ # The id is used to fill in element_id and parent_id in XMLElement so that
13
+ # the parser's user can know what is the tree structure between objects.
14
+ def initialize(name, attrs, elclass, parent, id)
15
+ @attrs = attrs
16
+ @elclass = elclass
17
+ @children = Hash.new([])
18
+ @children = []
19
+ @object = nil
20
+ @consumed = false
21
+ @parent = parent
22
+ @id = id
23
+ @name = name
24
+ end
25
+
26
+ # Add a child element to the object
27
+ def add_child(child)
28
+ @children << child
29
+ end
30
+
31
+ # Get the actual object for the XML element, created using the elclass
32
+ # passed to the constructor. This is cached so the object will only be
33
+ # created once. All subsequent calls will return the same object.
34
+ def get_object
35
+ if not @object
36
+ @object = @elclass.new_with_content(@attrs, @children)
37
+ if @parent
38
+ @object.element_id = @id
39
+ @object.parent_id = @parent.id
40
+ end
41
+ end
42
+ @object
43
+ end
44
+
45
+ # Consume the object so that it may be freed. The object will no longer
46
+ # appear a a child of the parent object.
47
+ def consume
48
+ @consumed = true
49
+ @object = nil
50
+ end
51
+ end
52
+
53
+ # This is a XML Stream parser that returns ruby objects for whole elements.
54
+ # To do this a class must be defined as descending from XMLElement and having
55
+ # set elname or elnames. To use it all you have to do is define a listener
56
+ # that responds to methods of the form el_<element name> and define the
57
+ # importers for the elements as subclasses of XMLElement.
58
+ #
59
+ # The listener will be passed XMLSOParserElement objects. The two relevant
60
+ # methods for it's use are XMLSOParserElement#get_object and
61
+ # XMLSOParserElement#consume.
62
+ class XMLStreamObjectParser
63
+ # Create a new parser with a listener.
64
+ def initialize(listener=nil)
65
+ @listener = listener
66
+ @children = Hash.new([])
67
+ @currel = 0
68
+ @elements = [XMLSOParserElement.new(nil, nil, nil, nil, nil)]
69
+ @id = 0
70
+ @top_element = nil
71
+ end
72
+
73
+ private
74
+ def next_id
75
+ @id += 1
76
+ end
77
+
78
+ def get_elclass(name)
79
+ XMLCodec::XMLElement.get_element_class(name)
80
+ end
81
+
82
+ def curr_element
83
+ @elements[@currel]
84
+ end
85
+
86
+ def prev_element
87
+ @elements[@currel - 1]
88
+ end
89
+
90
+ public
91
+ # Parse the text with the stream parser calling the listener on any events
92
+ # that it listens to.
93
+ def parse(text)
94
+ REXML::Document.parse_stream(text, self)
95
+ end
96
+
97
+ # Get the current top element of the parse. This is usually used to get the
98
+ # root at the end of the parse.
99
+ def top_element
100
+ @top_element.get_object if @top_element
101
+ end
102
+
103
+ def tag_start(name, attrs) #:nodoc:
104
+ @elements << XMLSOParserElement.new(name, attrs, get_elclass(name),
105
+ curr_element, next_id)
106
+ @currel += 1
107
+ end
108
+
109
+ def text(text) #:nodoc:
110
+ curr_element.add_child(text)
111
+ end
112
+
113
+ def tag_end(name) #:nodoc:
114
+ obj = curr_element
115
+
116
+ if @listener.respond_to?("el_"+name)
117
+ @listener.send("el_"+name, obj)
118
+ end
119
+
120
+ if not obj.consumed
121
+ if prev_element
122
+ prev_element.add_child(obj.get_object)
123
+ end
124
+
125
+ @top_element = obj
126
+ end
127
+
128
+ @elements.pop
129
+ @currel -= 1
130
+ end
131
+
132
+ # Ignore everything except tags and text for now
133
+ def method_missing(methId, *args) #:nodoc:
134
+ end
135
+ end
136
+ end