xmlcodec 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/XMLElement.rb ADDED
@@ -0,0 +1,489 @@
1
+ require 'XMLStreamObjectParser'
2
+ require 'XMLSubElements'
3
+
4
+ module XMLCodec
5
+ # This class should be inherited from to create classes that are able to
6
+ # import and export XML elements and their children. It provides three main
7
+ # functions: xmlattr, xmlsubel and xmlsubel_mult.
8
+ #
9
+ # To create an importer/exporter for a XML format all that's needed is to
10
+ # create a class for each of the elements and then declare their atributes and
11
+ # subelements.
12
+ #
13
+ # Two other functions have an important role. elname declares the name of the
14
+ # XML element the class represents. elwithvalue declares that the element
15
+ # has no subelements and includes only text content.
16
+ #
17
+ # After the class is defined import_xml can be used to import the content from
18
+ # a REXML Element or Document and create_xml can be used to create the XML DOM
19
+ # of the element as a child to a REXML Element or Document. For big documents
20
+ # these are usually too slow and memory hungry, using xml_text to export to
21
+ # XML and import_xml_text to import XML are probably better ideas.
22
+ # import_xml_text is just a utility function around XMLStreamObjectParser,
23
+ # that allow more flexible stream parsing of XML files while still using the
24
+ # same XMLElement objects.
25
+ #
26
+ # <b>WARNING</b>: This API is still very much a work in progress and very
27
+ # rough in certain places. Changes will surely be made.
28
+ class XMLElement
29
+ INDENT_STR = ' '
30
+
31
+ attr_accessor :element_id, :parent_id, :__xml_text
32
+ attr_accessor :__parent
33
+ ## A xmlsubel is any subelement of a given element
34
+
35
+ private
36
+ # Class level variable to hold the list of subelements
37
+ def self.xmlsubels
38
+ @xmlsubels ||=[]
39
+ end
40
+
41
+ # Class level variable to hold the list of subelements that are multiple
42
+ def self.xmlsubelmultiples
43
+ @xmlsubelmultiples ||=[]
44
+ end
45
+
46
+ # Class level variable that holds the list of attributes
47
+ def self.xmlattrs
48
+ @xmlattrs ||=[]
49
+ end
50
+
51
+ # Add a name as being a subelement (mult or single)
52
+ def self._xmlsubel(name)
53
+ self.xmlsubels << name
54
+ end
55
+
56
+ # Add a xmlsubel type attribute
57
+ def self.xmlsubel(name) #:doc:
58
+ name = name.to_sym
59
+ self._xmlsubel(name)
60
+ attr_reader name
61
+ define_method((name.to_s+"=").to_sym) { |value|
62
+ if value.is_a? String or value.is_a? Fixnum
63
+ elclass = XMLCodec::XMLElement.get_element_class(name)
64
+ value = elclass.new(value)
65
+ end
66
+ value.__parent = self if value
67
+ instance_variable_set "@#{name}", value
68
+ }
69
+ end
70
+
71
+ # Add a xmlsubel_mult type attribute (wrapper around attr_accessor)
72
+ def self.xmlsubel_mult(name) #:doc:
73
+ self._xmlsubel(name)
74
+ self.xmlsubelmultiples << name
75
+ define_method(name){
76
+ if not self.instance_variables.index("@#{name}")
77
+ instance_variable_set "@#{name}", XMLSubElements.new(self)
78
+ end
79
+ instance_variable_get "@#{name}"
80
+ }
81
+ end
82
+
83
+ # Iterates over all of the object's XML subelements
84
+ def each_subel
85
+ self.class.xmlsubels.each {|name| yield name}
86
+ end
87
+
88
+ # Iterate all the superclasses that are still children of XMLElement
89
+ # and check if any of them have the subelement mult defined
90
+ def subel_mult?(element)
91
+ c = self.class
92
+ while c.ancestors.index(XMLCodec::XMLElement)
93
+ if c.xmlsubelmultiples.index(element)
94
+ return true
95
+ end
96
+ c = c.superclass
97
+ end
98
+ return false
99
+ end
100
+
101
+ # Iterate all the superclasses that are still children of XMLElement
102
+ # and check if any of them have any subelements handled by this class
103
+ def get_subel(elclass)
104
+ names = elclass.get_elnames
105
+ c = self.class
106
+ while c.ancestors.index(XMLCodec::XMLElement)
107
+ names.each do |name|
108
+ if c.xmlsubels.index(name.to_sym)
109
+ return names[0].to_sym
110
+ end
111
+ end
112
+ c = c.superclass
113
+ end
114
+ return nil
115
+ end
116
+
117
+ # Iterates over the object's XML atributes
118
+ def each_attr
119
+ # Iterate all the superclasses that are still children of EADElement
120
+ # and iterate each of the attributes
121
+ c = self.class
122
+ while c.ancestors.index(XMLCodec::XMLElement)
123
+ c.xmlattrs.each {|name| yield name}
124
+ c = c.superclass
125
+ end
126
+ end
127
+
128
+ # Creates the XML for the atributes
129
+ def create_xml_attr(parent)
130
+ each_attr do |a|
131
+ value = self.send(a)
132
+ if value
133
+ parent.add_attribute(a.to_s, value)
134
+ end
135
+ end
136
+ end
137
+
138
+ # returns a string with the opening tag for the element
139
+ def create_open_tag
140
+ attrs = {}
141
+ each_attr do |a|
142
+ value = self.send(a)
143
+ if value
144
+ attrs[a.to_s] = value
145
+ end
146
+ end
147
+ XMLUtils::create_open_tag(elname.to_s, attrs)
148
+ end
149
+
150
+ # returns a string with the closing tag for the element
151
+ def create_close_tag
152
+ "</"+elname.to_s+">"
153
+ end
154
+
155
+ # Declare the class as having many subelements. Instances will have a
156
+ # method called #subelements that will return an instance of XMLSubElements
157
+ def self.xmlsubelements #:doc:
158
+ define_method(:subelements) {
159
+ if not self.instance_variables.index("@subelements")
160
+ @subelements = XMLSubElements.new(self)
161
+ end
162
+ @subelements
163
+ }
164
+ define_method(:has_subelements?) {true}
165
+ end
166
+
167
+ # Add a xmlattr type attribute (wrapper around attr_accessor)
168
+ def self.xmlattr(name) #:doc:
169
+ self.xmlattrs << name
170
+ attr_accessor name
171
+ end
172
+
173
+ # This is the hash that gives classes for the element names that are declared.
174
+ ElClasses = {}
175
+
176
+ # Sets the element name for the element
177
+ def self.elname(name)
178
+ elnames(name)
179
+ end
180
+
181
+ # Sets several element names for the element
182
+ def self.elnames(*names)
183
+ define_method(:elname){names[0].to_sym}
184
+
185
+ eln = get_elnames
186
+ names.each {|n| eln << n}
187
+ names.each {|n| ElClasses[n.to_sym] = self}
188
+ end
189
+
190
+ # Returns the list of element names for the element
191
+ def self.get_elnames
192
+ @elnames||=[]
193
+ end
194
+
195
+ # Set the element as having a value. The element will have an initializer
196
+ # that takes a value as argument and an accessor named #value. This should
197
+ # be used for elements that contain only text and no subelements
198
+ def self.elwithvalue
199
+ define_method(:hasvalue?){true}
200
+ define_method(:initialize){|value| @value = value}
201
+ attr_accessor :value
202
+ end
203
+
204
+ # Creates the XML subelements
205
+ def create_xml_subel(parent)
206
+ each_subel do |a|
207
+ if value = self.send(a)
208
+ value.create_xml(parent)
209
+ end
210
+ end
211
+ end
212
+
213
+ # Create the XML of the SubElements
214
+ def create_xml_subelements(parent)
215
+ self.subelements.create_xml(parent)
216
+ end
217
+
218
+ # Have we already started the partial export of this element?
219
+ def already_partial_exported?
220
+ (@already_partial_exported ||= false)
221
+ end
222
+
223
+ # Have we already ended the partial export of this element?
224
+ def already_partial_export_ended?
225
+ (@already_partial_export_ended ||= false)
226
+ end
227
+
228
+ # Which level of indentation are we in?
229
+ def indent_level
230
+ if not self.instance_variables.index '@indent_level'
231
+ curr = self
232
+ level = 0
233
+ while curr = curr.__parent
234
+ level +=1
235
+ end
236
+ @indent_level = level
237
+ end
238
+ @indent_level
239
+ end
240
+
241
+ # Iterate all of the subelements
242
+ # We copy everything into an array and iterate that because #each doesn't
243
+ # like it when elements are deleted while it's iterating.
244
+ def each_subelement
245
+ arr = []
246
+
247
+ each_subel do |a|
248
+ if value = self.send(a)
249
+ if subel_mult? a
250
+ value.each {|e| arr << e}
251
+ else
252
+ arr << value
253
+ end
254
+ end
255
+ end
256
+
257
+ if has_subelements?
258
+ @subelements.each{|e| arr << e}
259
+ end
260
+
261
+ arr.each {|e| yield e}
262
+ end
263
+
264
+ public
265
+ # Remove the given subelement from the element
266
+ def delete_element(element)
267
+ each_subel do |a|
268
+ value = self.send(a)
269
+ if subel_mult? a
270
+ value.delete_element(element)
271
+ else
272
+ self.send(a.to_s+'=', nil) if value == element
273
+ end
274
+ end
275
+
276
+ if has_subelements?
277
+ @subelements.delete_element(element)
278
+ end
279
+ end
280
+
281
+ # Calculate the text indentation to use for this level. Returns a string
282
+ # with the whitespace that should precede every line. Extra levels of
283
+ # indentation can be passed so that a caller can calculate the whitespace
284
+ # to indent an element X levels deeper than this one.
285
+ def indentation(extra=0)
286
+ INDENT_STR*(indent_level+extra)
287
+ end
288
+
289
+ # Gets the class for a certain element name.
290
+ def self.get_element_class(name)
291
+ cl = ElClasses[name.to_sym]
292
+ if not cl
293
+ raise "No class defined for element type: '" + name.to_s + "'"
294
+ end
295
+ cl
296
+ end
297
+
298
+ # Gets the possible element names for a certain element.
299
+ def self.get_element_names(name)
300
+ cl = get_element_class(name)
301
+ [name.to_s]+cl.get_elnames
302
+ end
303
+
304
+ # Method that checks if a given class has subelements. This is usually only
305
+ # used when exporting stuff.
306
+ def has_subelements?; false; end
307
+
308
+ # tests if the element is a value element as defined by 'elwithvalue'
309
+ def hasvalue?
310
+ false
311
+ end
312
+
313
+
314
+ # Creates the xml for the element inside the parent element. The parent
315
+ # passed should be a REXML element or document. This call is recursive
316
+ # creating the XML for any subelements.
317
+ def create_xml(parent)
318
+ xmlel = parent.add_element self.elname.to_s
319
+ if self.hasvalue?
320
+ xmlel.text = self.value
321
+ end
322
+ create_xml_attr(xmlel)
323
+ create_xml_subel(xmlel)
324
+
325
+ if has_subelements?
326
+ create_xml_subelements(xmlel)
327
+ end
328
+
329
+ xmlel
330
+ end
331
+
332
+ # Import the XML into an object from a REXML element. This call is recursive
333
+ # and imports any subelements found into the corresponding objects.
334
+ def self.import_xml(xmlel)
335
+ if xmlel.is_a? REXML::Document
336
+ xmlel = xmlel.root
337
+ end
338
+
339
+ elements = []
340
+ xmlel.to_a.each do |e|
341
+ if e.is_a? REXML::Text
342
+ elements << e.value
343
+ else
344
+ elclass = get_element_class(e.name)
345
+ elements << elclass.import_xml(e)
346
+ end
347
+ end
348
+
349
+ attributes = {}
350
+ xmlel.attributes.each do |name, value|
351
+ attributes[name] = value
352
+ end
353
+
354
+ new_with_content(attributes, elements)
355
+ end
356
+
357
+ # Import the XML directly from the text. This call receives the text and the
358
+ # classes that should be used to import the subelements.
359
+ def self.import_xml_text(text)
360
+ parser = XMLStreamObjectParser.new(ElClasses)
361
+ parser.parse(text)
362
+ parser.top_element
363
+ end
364
+
365
+ # Create a new element passing it all the atributes, children and texts
366
+ def self.new_with_content(attrs, children)
367
+ text_children = []
368
+ element_children = []
369
+
370
+ children.each do |c|
371
+ if c.is_a? String
372
+ text_children << c
373
+ else
374
+ element_children << c
375
+ end
376
+ end
377
+
378
+ obj = self.allocate
379
+ obj.add_attr(attrs)
380
+ obj.add_subel(element_children)
381
+ obj.add_texts(text_children)
382
+ if obj.has_subelements?
383
+ obj.add_subelements(children)
384
+ end
385
+ obj
386
+ end
387
+
388
+ # add the attributes passed as a hash to the element
389
+ def add_attr(attrs)
390
+ each_attr do |a|
391
+ if value = attrs[a.to_s]
392
+ self.send("#{a}=", value)
393
+ end
394
+ end
395
+ end
396
+
397
+ # add the text elements into the element
398
+ def add_texts(texts)
399
+ if hasvalue?
400
+ @value = texts.join
401
+ end
402
+ end
403
+
404
+ # add the subelements into the element
405
+ def add_subel(children)
406
+ children.each do |c|
407
+ if subel_name = get_subel(c.class)
408
+ if subel_mult? subel_name
409
+ self.send(subel_name) << c
410
+ else
411
+ self.send(subel_name.to_s+'=', c)
412
+ end
413
+ end
414
+ end
415
+ end
416
+
417
+ # If the class is one with many subelements import all of them into the
418
+ # object.
419
+ def add_subelements(all_children)
420
+ all_children.each {|c| self.subelements << c}
421
+ end
422
+
423
+
424
+ # create the XML text of the element. This does not use REXML so should be
425
+ # pretty fast.
426
+ def xml_text
427
+ str = create_open_tag
428
+ if self.hasvalue?
429
+ str << XMLUtils::escape_xml(self.value)
430
+ end
431
+
432
+ each_subelement do |e|
433
+ str << e.xml_text
434
+ end
435
+
436
+ str << create_close_tag
437
+ str
438
+ end
439
+
440
+ # Export this element into a file. Will also start to export the parents of
441
+ # the element. It's equivalent to calling start_partial_export followed by
442
+ # end_partial_export.
443
+ def partial_export(file)
444
+ if not already_partial_exported?
445
+ start_partial_export(file)
446
+ end_partial_export(file)
447
+ end
448
+ end
449
+
450
+ # Starts to export the element to a file. all the existing elements will be
451
+ # exported. After calling this you should only add stuff that you will
452
+ # export explicitly by calling partial_export or start_partial_export.
453
+ def start_partial_export(file)
454
+ if not already_partial_exported?
455
+ @already_partial_exported = true
456
+ if self.__parent
457
+ self.__parent.start_partial_export(file)
458
+ end
459
+
460
+ file << indentation+create_open_tag+"\n"
461
+
462
+ if self.hasvalue?
463
+ file << indentation(1)+XMLUtils::escape_xml(self.value)+"\n"
464
+ end
465
+
466
+ each_subelement do |e|
467
+ e.partial_export(file)
468
+ end
469
+ end
470
+ end
471
+
472
+ # Ends the partial exporting of the element.
473
+ def end_partial_export(file)
474
+ if not already_partial_export_ended?
475
+ @already_partial_export_ended = true
476
+
477
+ each_subelement do |e|
478
+ e.end_partial_export(file)
479
+ end
480
+
481
+ file << indentation+create_close_tag+"\n"
482
+
483
+ if self.__parent
484
+ self.__parent.delete_element(self)
485
+ end
486
+ end
487
+ end
488
+ end
489
+ end
@@ -0,0 +1,136 @@
1
+ require "rexml/document"
2
+ require "XMLUtils"
3
+
4
+ module XMLCodec
5
+ # This class is used internally by the parser to store the information about
6
+ # each of the elements that gets created.
7
+ class XMLSOParserElement
8
+ attr_reader :elclass, :consumed, :id
9
+
10
+ # Create a new instance with the element name, a hash of atributes, it's
11
+ # import/export class, the parent element and it's id
12
+ # The id is used to fill in element_id and parent_id in XMLElement so that
13
+ # the parser's user can know what is the tree structure between objects.
14
+ def initialize(name, attrs, elclass, parent, id)
15
+ @attrs = attrs
16
+ @elclass = elclass
17
+ @children = Hash.new([])
18
+ @children = []
19
+ @object = nil
20
+ @consumed = false
21
+ @parent = parent
22
+ @id = id
23
+ @name = name
24
+ end
25
+
26
+ # Add a child element to the object
27
+ def add_child(child)
28
+ @children << child
29
+ end
30
+
31
+ # Get the actual object for the XML element, created using the elclass
32
+ # passed to the constructor. This is cached so the object will only be
33
+ # created once. All subsequent calls will return the same object.
34
+ def get_object
35
+ if not @object
36
+ @object = @elclass.new_with_content(@attrs, @children)
37
+ if @parent
38
+ @object.element_id = @id
39
+ @object.parent_id = @parent.id
40
+ end
41
+ end
42
+ @object
43
+ end
44
+
45
+ # Consume the object so that it may be freed. The object will no longer
46
+ # appear a a child of the parent object.
47
+ def consume
48
+ @consumed = true
49
+ @object = nil
50
+ end
51
+ end
52
+
53
+ # This is a XML Stream parser that returns ruby objects for whole elements.
54
+ # To do this a class must be defined as descending from XMLElement and having
55
+ # set elname or elnames. To use it all you have to do is define a listener
56
+ # that responds to methods of the form el_<element name> and define the
57
+ # importers for the elements as subclasses of XMLElement.
58
+ #
59
+ # The listener will be passed XMLSOParserElement objects. The two relevant
60
+ # methods for it's use are XMLSOParserElement#get_object and
61
+ # XMLSOParserElement#consume.
62
+ class XMLStreamObjectParser
63
+ # Create a new parser with a listener.
64
+ def initialize(listener=nil)
65
+ @listener = listener
66
+ @children = Hash.new([])
67
+ @currel = 0
68
+ @elements = [XMLSOParserElement.new(nil, nil, nil, nil, nil)]
69
+ @id = 0
70
+ @top_element = nil
71
+ end
72
+
73
+ private
74
+ def next_id
75
+ @id += 1
76
+ end
77
+
78
+ def get_elclass(name)
79
+ XMLCodec::XMLElement.get_element_class(name)
80
+ end
81
+
82
+ def curr_element
83
+ @elements[@currel]
84
+ end
85
+
86
+ def prev_element
87
+ @elements[@currel - 1]
88
+ end
89
+
90
+ public
91
+ # Parse the text with the stream parser calling the listener on any events
92
+ # that it listens to.
93
+ def parse(text)
94
+ REXML::Document.parse_stream(text, self)
95
+ end
96
+
97
+ # Get the current top element of the parse. This is usually used to get the
98
+ # root at the end of the parse.
99
+ def top_element
100
+ @top_element.get_object if @top_element
101
+ end
102
+
103
+ def tag_start(name, attrs) #:nodoc:
104
+ @elements << XMLSOParserElement.new(name, attrs, get_elclass(name),
105
+ curr_element, next_id)
106
+ @currel += 1
107
+ end
108
+
109
+ def text(text) #:nodoc:
110
+ curr_element.add_child(text)
111
+ end
112
+
113
+ def tag_end(name) #:nodoc:
114
+ obj = curr_element
115
+
116
+ if @listener.respond_to?("el_"+name)
117
+ @listener.send("el_"+name, obj)
118
+ end
119
+
120
+ if not obj.consumed
121
+ if prev_element
122
+ prev_element.add_child(obj.get_object)
123
+ end
124
+
125
+ @top_element = obj
126
+ end
127
+
128
+ @elements.pop
129
+ @currel -= 1
130
+ end
131
+
132
+ # Ignore everything except tags and text for now
133
+ def method_missing(methId, *args) #:nodoc:
134
+ end
135
+ end
136
+ end