multi_xml 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of multi_xml might be problematic. Click here for more details.

@@ -0,0 +1,79 @@
1
+ require 'nokogiri' unless defined?(Nokogiri)
2
+
3
+ module MultiXml
4
+ module Parsers
5
+ # Use Nokogiri to parse XML.
6
+ module Nokogiri #:nodoc:
7
+ extend self
8
+
9
+ # Parse an XML Document string or IO into a simple hash using libxml / nokogiri.
10
+ # data::
11
+ # XML Document string or IO to parse
12
+ def parse(data)
13
+ if !data.respond_to?(:read)
14
+ data = StringIO.new(data || '')
15
+ end
16
+
17
+ char = data.getc
18
+ if char.nil?
19
+ {}
20
+ else
21
+ data.ungetc(char)
22
+ doc = ::Nokogiri::XML(data)
23
+ raise doc.errors.first if doc.errors.length > 0
24
+ doc.to_hash
25
+ end
26
+ end
27
+
28
+ module Conversions #:nodoc:
29
+ module Document #:nodoc:
30
+ def to_hash
31
+ root.to_hash
32
+ end
33
+ end
34
+
35
+ module Node #:nodoc:
36
+ CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
37
+
38
+ # Convert XML document to hash
39
+ #
40
+ # hash::
41
+ # Hash to merge the converted element into.
42
+ def to_hash(hash={})
43
+ node_hash = {}
44
+
45
+ # Insert node hash into parent hash correctly.
46
+ case hash[name]
47
+ when Array then hash[name] << node_hash
48
+ when Hash then hash[name] = [hash[name], node_hash]
49
+ when nil then hash[name] = node_hash
50
+ end
51
+
52
+ # Handle child elements
53
+ children.each do |c|
54
+ if c.element?
55
+ c.to_hash(node_hash)
56
+ elsif c.text? || c.cdata?
57
+ node_hash[CONTENT_ROOT] ||= ''
58
+ node_hash[CONTENT_ROOT] << c.content
59
+ end
60
+ end
61
+
62
+ # Remove content node if it is blank and there are child tags
63
+ if node_hash.length > 1 && (node_hash[CONTENT_ROOT].nil? || node_hash[CONTENT_ROOT].empty?)
64
+ node_hash.delete(CONTENT_ROOT)
65
+ end
66
+
67
+ # Handle attributes
68
+ attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
69
+
70
+ hash
71
+ end
72
+ end
73
+ end
74
+
75
+ ::Nokogiri::XML::Document.send(:include, Conversions::Document)
76
+ ::Nokogiri::XML::Node.send(:include, Conversions::Node)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,127 @@
1
+ require 'rexml/document' unless defined?(REXML::Document)
2
+
3
+ module MultiXml
4
+ module Parsers
5
+ # Use REXML to parse XML.
6
+ module Rexml #:nodoc:
7
+ extend self
8
+
9
+ CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
10
+
11
+ # Parse an XML Document string or IO into a simple hash
12
+ #
13
+ # data::
14
+ # XML Document string or IO to parse
15
+ def parse(data)
16
+ if !data.respond_to?(:read)
17
+ data = StringIO.new(data || '')
18
+ end
19
+
20
+ char = data.getc
21
+ if char.nil?
22
+ {}
23
+ else
24
+ data.ungetc(char)
25
+ doc = REXML::Document.new(data)
26
+
27
+ if doc.root
28
+ merge_element!({}, doc.root)
29
+ else
30
+ raise REXML::ParseException,
31
+ "The document #{doc.to_s.inspect} does not have a valid root"
32
+ end
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ # Convert an XML element and merge into the hash
39
+ #
40
+ # hash::
41
+ # Hash to merge the converted element into.
42
+ # element::
43
+ # XML element to merge into hash
44
+ def merge_element!(hash, element)
45
+ merge!(hash, element.name, collapse(element))
46
+ end
47
+
48
+ # Actually converts an XML document element into a data structure.
49
+ #
50
+ # element::
51
+ # The document element to be collapsed.
52
+ def collapse(element)
53
+ hash = get_attributes(element)
54
+
55
+ if element.has_elements?
56
+ element.each_element {|child| merge_element!(hash, child) }
57
+ merge_texts!(hash, element) unless empty_content?(element)
58
+ hash
59
+ else
60
+ merge_texts!(hash, element)
61
+ end
62
+ end
63
+
64
+ # Merge all the texts of an element into the hash
65
+ #
66
+ # hash::
67
+ # Hash to add the converted element to.
68
+ # element::
69
+ # XML element whose texts are to me merged into the hash
70
+ def merge_texts!(hash, element)
71
+ unless element.has_text?
72
+ hash
73
+ else
74
+ # must use value to prevent double-escaping
75
+ texts = ''
76
+ element.texts.each { |t| texts << t.value }
77
+ merge!(hash, CONTENT_ROOT, texts)
78
+ end
79
+ end
80
+
81
+ # Adds a new key/value pair to an existing Hash. If the key to be added
82
+ # already exists and the existing value associated with key is not
83
+ # an Array, it will be wrapped in an Array. Then the new value is
84
+ # appended to that Array.
85
+ #
86
+ # hash::
87
+ # Hash to add key/value pair to.
88
+ # key::
89
+ # Key to be added.
90
+ # value::
91
+ # Value to be associated with key.
92
+ def merge!(hash, key, value)
93
+ if hash.has_key?(key)
94
+ if hash[key].instance_of?(Array)
95
+ hash[key] << value
96
+ else
97
+ hash[key] = [hash[key], value]
98
+ end
99
+ elsif value.instance_of?(Array)
100
+ hash[key] = [value]
101
+ else
102
+ hash[key] = value
103
+ end
104
+ hash
105
+ end
106
+
107
+ # Converts the attributes array of an XML element into a hash.
108
+ # Returns an empty Hash if node has no attributes.
109
+ #
110
+ # element::
111
+ # XML element to extract attributes from.
112
+ def get_attributes(element)
113
+ attributes = {}
114
+ element.attributes.each { |n,v| attributes[n] = v }
115
+ attributes
116
+ end
117
+
118
+ # Determines if a document element has text content
119
+ #
120
+ # element::
121
+ # XML element to be checked.
122
+ def empty_content?(element)
123
+ element.texts.join.nil? || element.texts.join.empty?
124
+ end
125
+ end
126
+ end
127
+ end
@@ -1,3 +1,3 @@
1
1
  module MultiXml
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -3,12 +3,11 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
  require "multi_xml/version"
4
4
 
5
5
  Gem::Specification.new do |s|
6
- s.add_development_dependency("bundler", ["~> 1.0.2"])
7
- s.add_development_dependency("hpricot", ["~> 0.8.2"])
8
- s.add_development_dependency("libxml-ruby", ["~> 1.1.4"])
9
- s.add_development_dependency("nokogiri", ["~> 1.4.3"])
10
- s.add_development_dependency("rake", ["~> 0.8.7"])
11
- s.add_development_dependency("rspec", ["~> 1.3.0"])
6
+ s.add_development_dependency("bundler", "~> 1.0")
7
+ s.add_development_dependency("libxml-ruby", "~> 1.1")
8
+ s.add_development_dependency("nokogiri", "~> 1.4")
9
+ s.add_development_dependency("rake", "~> 0.8")
10
+ s.add_development_dependency("rspec", "~> 2.0")
12
11
  s.name = "multi_xml"
13
12
  s.version = MultiXml::VERSION
14
13
  s.platform = Gem::Platform::RUBY
@@ -7,42 +7,477 @@ class MockDecoder
7
7
  end
8
8
 
9
9
  describe "MultiXml" do
10
- context 'engines' do
11
- it 'should default to the best available gem' do
12
- pending
10
+ context "Parsers" do
11
+ it "should default to the best available gem" do
13
12
  require 'libxml'
14
- MultiXml.engine.name.should == 'MultiXml::Engines::Libxml'
13
+ MultiXml.parser.name.should == 'MultiXml::Parsers::Libxml'
15
14
  end
16
15
 
17
- it 'should be settable via a symbol' do
18
- pending
19
- MultiXml.engine = :libxml
20
- MultiXml.engine.name.should == 'MultiXml::Engines::Libxml'
16
+ it "should be settable via a symbol" do
17
+ MultiXml.parser = :libxml
18
+ MultiXml.parser.name.should == 'MultiXml::Parsers::Libxml'
21
19
  end
22
20
 
23
- it 'should be settable via a class' do
24
- MultiXml.engine = MockDecoder
25
- MultiXml.engine.name.should == 'MockDecoder'
21
+ it "should be settable via a class" do
22
+ MultiXml.parser = MockDecoder
23
+ MultiXml.parser.name.should == 'MockDecoder'
26
24
  end
27
25
  end
28
26
 
29
- %w(libxml nokogiri hpricot rexml).each do |engine|
30
- context engine do
27
+ Dir.glob('lib/multi_xml/parsers/**/*.rb').map{|file| File.basename(file, ".rb").split('_').map{|s| s.capitalize}.join('')}.each do |parser|
28
+ context "Parsers::#{parser}" do
31
29
  before do
32
30
  begin
33
- MultiXml.engine = engine
31
+ MultiXml.parser = parser
34
32
  rescue LoadError
35
- pending "Engine #{engine} couldn't be loaded (not installed?)"
33
+ pending "Parser #{parser} couldn't be loaded"
36
34
  end
37
35
  end
38
36
 
39
- describe '.parse' do
40
- it 'should properly parse some XML' do
41
- MultiXml.parse('<tag>This is the contents</tag>').should == {'tag' => 'This is the contents'}
37
+ describe ".parse" do
38
+ context "a blank string" do
39
+ before do
40
+ @string = ''
41
+ end
42
+
43
+ it "should return an empty Hash" do
44
+ MultiXml.parse(@string).should == {}
45
+ end
46
+ end
47
+
48
+ context "a whitespace string" do
49
+ before do
50
+ @string = ' '
51
+ end
52
+
53
+ it "should return an empty Hash" do
54
+ MultiXml.parse(@string).should == {}
55
+ end
56
+ end
57
+
58
+ context "a single-node document" do
59
+
60
+ before do
61
+ @string = '<user/>'
62
+ end
63
+
64
+ it "should parse correctly" do
65
+ MultiXml.parse(@string).should == {'user' => nil}
66
+ end
67
+
68
+ context "with CDATA" do
69
+ before do
70
+ @string = '<user><![CDATA[Erik Michaels-Ober]]></user>'
71
+ end
72
+
73
+ it "should parse correctly" do
74
+ MultiXml.parse(@string).should == {"user" => "Erik Michaels-Ober"}
75
+ end
76
+ end
77
+
78
+ context "with content" do
79
+ before do
80
+ @string = '<user>Erik Michaels-Ober</user>'
81
+ end
82
+
83
+ it "should parse correctly" do
84
+ MultiXml.parse(@string).should == {"user" => "Erik Michaels-Ober"}
85
+ end
86
+ end
87
+
88
+ context "with an attribute" do
89
+ before do
90
+ @string = '<user name="Erik Michaels-Ober"/>'
91
+ end
92
+
93
+ it "should parse correctly" do
94
+ MultiXml.parse(@string).should == {"user" => {"name" => "Erik Michaels-Ober"}}
95
+ end
96
+ end
97
+
98
+ context "with multiple attributes" do
99
+ before do
100
+ @string = '<user name="Erik Michaels-Ober" screen_name="sferik"/>'
101
+ end
102
+
103
+ it "should parse correctly" do
104
+ MultiXml.parse(@string).should == {"user" => {"name" => "Erik Michaels-Ober", "screen_name" => "sferik"}}
105
+ end
106
+ end
107
+
108
+ context "with :symbolize_keys => true" do
109
+ before do
110
+ @string = '<user name="Erik Michaels-Ober"/>'
111
+ end
112
+
113
+ it "should symbolize keys" do
114
+ MultiXml.parse(@string, :symbolize_keys => true).should == {:user => {:name => "Erik Michaels-Ober"}}
115
+ end
116
+ end
117
+
118
+ context "with an attribute type=\"boolean\"" do
119
+ %w(true false).each do |boolean|
120
+ context "when #{boolean}" do
121
+ it "should be #{boolean}" do
122
+ string = "<tag type=\"boolean\">#{boolean}</tag>"
123
+ MultiXml.parse(string)['tag'].should instance_eval("be_#{boolean}")
124
+ end
125
+ end
126
+ end
127
+
128
+ context "when 1" do
129
+ before do
130
+ @string = '<tag type="boolean">1</tag>'
131
+ end
132
+
133
+ it "should be true" do
134
+ MultiXml.parse(@string)['tag'].should be_true
135
+ end
136
+ end
137
+
138
+ context "when 0" do
139
+ before do
140
+ @string = '<tag type="boolean">0</tag>'
141
+ end
142
+
143
+ it "should be false" do
144
+ MultiXml.parse(@string)['tag'].should be_false
145
+ end
146
+ end
147
+ end
148
+
149
+ context "with an attribute type=\"integer\"" do
150
+ context "with a positive integer" do
151
+ before do
152
+ @string = '<tag type="integer">1</tag>'
153
+ end
154
+
155
+ it "should be a Fixnum" do
156
+ MultiXml.parse(@string)['tag'].should be_a(Fixnum)
157
+ end
158
+
159
+ it "should be the correct number" do
160
+ MultiXml.parse(@string)['tag'].should == 1
161
+ end
162
+ end
163
+
164
+ context "with a negative integer" do
165
+ before do
166
+ @string = '<tag type="integer">-1</tag>'
167
+ end
168
+
169
+ it "should be a Fixnum" do
170
+ MultiXml.parse(@string)['tag'].should be_a(Fixnum)
171
+ end
172
+
173
+ it "should be the correct number" do
174
+ MultiXml.parse(@string)['tag'].should == -1
175
+ end
176
+ end
177
+ end
178
+
179
+ context "with an attribute type=\"string\"" do
180
+ before do
181
+ @string = '<tag type="string"></tag>'
182
+ end
183
+
184
+ it "should be a String" do
185
+ MultiXml.parse(@string)['tag'].should be_a(String)
186
+ end
187
+
188
+ it "should be the correct string" do
189
+ MultiXml.parse(@string)['tag'].should == ""
190
+ end
191
+ end
192
+
193
+ context "with an attribute type=\"date\"" do
194
+ before do
195
+ @string = '<tag type="date">1970-01-01</tag>'
196
+ end
197
+
198
+ it "should be a Date" do
199
+ MultiXml.parse(@string)['tag'].should be_a(Date)
200
+ end
201
+
202
+ it "should be the correct date" do
203
+ MultiXml.parse(@string)['tag'].should == Date.parse('1970-01-01')
204
+ end
205
+ end
206
+
207
+ context "with an attribute type=\"datetime\"" do
208
+ before do
209
+ @string = '<tag type="datetime">1970-01-01 00:00</tag>'
210
+ end
211
+
212
+ it "should be a Time" do
213
+ MultiXml.parse(@string)['tag'].should be_a(Time)
214
+ end
215
+
216
+ it "should be the correct time" do
217
+ MultiXml.parse(@string)['tag'].should == Time.parse('1970-01-01 00:00')
218
+ end
219
+ end
220
+
221
+ context "with an attribute type=\"dateTime\"" do
222
+ before do
223
+ @string = '<tag type="datetime">1970-01-01 00:00</tag>'
224
+ end
225
+
226
+ it "should be a Time" do
227
+ MultiXml.parse(@string)['tag'].should be_a(Time)
228
+ end
229
+
230
+ it "should be the correct time" do
231
+ MultiXml.parse(@string)['tag'].should == Time.parse('1970-01-01 00:00')
232
+ end
233
+ end
234
+
235
+ context "with an attribute type=\"double\"" do
236
+ before do
237
+ @string = '<tag type="double">3.14159265358979</tag>'
238
+ end
239
+
240
+ it "should be a Float" do
241
+ MultiXml.parse(@string)['tag'].should be_a(Float)
242
+ end
243
+
244
+ it "should be the correct number" do
245
+ MultiXml.parse(@string)['tag'].should == 3.14159265358979
246
+ end
247
+ end
248
+
249
+ context "with an attribute type=\"decimal\"" do
250
+ before do
251
+ @string = '<tag type="decimal">3.14159265358979323846264338327950288419716939937510</tag>'
252
+ end
253
+
254
+ it "should be a BigDecimal" do
255
+ MultiXml.parse(@string)['tag'].should be_a(BigDecimal)
256
+ end
257
+
258
+ it "should be the correct number" do
259
+ MultiXml.parse(@string)['tag'].should == 3.14159265358979323846264338327950288419716939937510
260
+ end
261
+ end
262
+
263
+ context "with an attribute type=\"base64Binary\"" do
264
+ before do
265
+ @string = '<tag type="base64Binary">aW1hZ2UucG5n</tag>'
266
+ end
267
+
268
+ it "should be a String" do
269
+ MultiXml.parse(@string)['tag'].should be_a(String)
270
+ end
271
+
272
+ it "should be the correct string" do
273
+ MultiXml.parse(@string)['tag'].should == "image.png"
274
+ end
275
+ end
276
+
277
+ context "with an attribute type=\"yaml\"" do
278
+ before do
279
+ @string = "<tag type=\"yaml\">--- \n1: should be an integer\n:message: Have a nice day\narray: \n- should-have-dashes: true\n should_have_underscores: true\n</tag>"
280
+ end
281
+
282
+ it "should parse correctly" do
283
+ MultiXml.parse(@string)['tag'].should == {:message => "Have a nice day", 1 => "should be an integer", "array" => [{"should-have-dashes" => true, "should_have_underscores" => true}]}
284
+ end
285
+ end
286
+
287
+ context "with an attribute type=\"file\"" do
288
+ before do
289
+ @string = '<tag type="file" name="data.txt" content_type="text/plain">ZGF0YQ==</tag>'
290
+ end
291
+
292
+ it "should be a StringIO" do
293
+ MultiXml.parse(@string)['tag'].should be_a(StringIO)
294
+ end
295
+
296
+ it "should be decoded correctly" do
297
+ MultiXml.parse(@string)['tag'].string.should == 'data'
298
+ end
299
+
300
+ it "should have the correct file name" do
301
+ MultiXml.parse(@string)['tag'].original_filename.should == 'data.txt'
302
+ end
303
+
304
+ it "should have the correct content type" do
305
+ MultiXml.parse(@string)['tag'].content_type.should == 'text/plain'
306
+ end
307
+
308
+ context "with missing name and content type" do
309
+ before do
310
+ @string = '<tag type="file">ZGF0YQ==</tag>'
311
+ end
312
+
313
+ it "should be a StringIO" do
314
+ MultiXml.parse(@string)['tag'].should be_a(StringIO)
315
+ end
316
+
317
+ it "should be decoded correctly" do
318
+ MultiXml.parse(@string)['tag'].string.should == 'data'
319
+ end
320
+
321
+ it "should have the default file name" do
322
+ MultiXml.parse(@string)['tag'].original_filename.should == 'untitled'
323
+ end
324
+
325
+ it "should have the default content type" do
326
+ MultiXml.parse(@string)['tag'].content_type.should == 'application/octet-stream'
327
+ end
328
+ end
329
+ end
330
+
331
+ context "with an unrecognized attribute type" do
332
+ before do
333
+ @string = '<tag type="foo"/>'
334
+ end
335
+
336
+ it "should pass through the type" do
337
+ pending
338
+ MultiXml.parse(@string)['tag']['type'].should == 'foo'
339
+ end
340
+ end
341
+
342
+ %w(integer boolean date datetime yaml).each do |type|
343
+ context "with an empty attribute type=\"#{type}\"" do
344
+ before do
345
+ @string = "<tag type=\"#{type}\"/>"
346
+ end
347
+
348
+ it "should be nil" do
349
+ MultiXml.parse(@string)['tag'].should be_nil
350
+ end
351
+ end
352
+ end
353
+
354
+ context "with an empty attribute type=\"array\"" do
355
+ before do
356
+ @string = '<users type="array"/>'
357
+ end
358
+
359
+ it "should be an empty Array" do
360
+ MultiXml.parse(@string)['users'].should == []
361
+ end
362
+
363
+ context "with whitespace" do
364
+ before do
365
+ @string = '<users type="array"> </users>'
366
+ end
367
+
368
+ it "should be an empty Array" do
369
+ MultiXml.parse(@string)['users'].should == []
370
+ end
371
+ end
372
+ end
373
+
374
+ context "with XML entities" do
375
+ before do
376
+ @xml_entities = {
377
+ "<" => "&lt;",
378
+ ">" => "&gt;",
379
+ '"' => "&quot;",
380
+ "'" => "&apos;",
381
+ "&" => "&amp;"
382
+ }
383
+ end
384
+
385
+ context "in content" do
386
+ it "should unescape XML entities" do
387
+ @xml_entities.each do |key, value|
388
+ string = "<tag>#{value}</tag>"
389
+ MultiXml.parse(string)['tag'].should == key
390
+ end
391
+ end
392
+ end
393
+
394
+ context "in attribute" do
395
+ it "should unescape XML entities" do
396
+ @xml_entities.each do |key, value|
397
+ string = "<tag attribute=\"#{value}\"/>"
398
+ MultiXml.parse(string)['tag']['attribute'].should == key
399
+ end
400
+ end
401
+ end
402
+ end
403
+
404
+ context "with dasherized tag" do
405
+ before do
406
+ @string = '<tag-1/>'
407
+ end
408
+
409
+ it "should undasherize tag" do
410
+ MultiXml.parse(@string).keys.should include('tag_1')
411
+ end
412
+ end
413
+
414
+ context "with dasherized attribute" do
415
+ before do
416
+ @string = '<tag attribute-1="1"></tag>'
417
+ end
418
+
419
+ it "should undasherize attribute" do
420
+ MultiXml.parse(@string)['tag'].keys.should include('attribute_1')
421
+ end
422
+ end
42
423
  end
43
424
 
44
- it 'should allow for symbolization of keys' do
45
- MultiXml.parse('<tag>This is the contents</tag>', :symbolize_keys => true).should == {:tag => 'This is the contents'}
425
+ context "a document" do
426
+ context "with :symbolize_keys => true" do
427
+ before do
428
+ @string = '<user><name>Erik Michaels-Ober</name></user>'
429
+ end
430
+
431
+ it "should symbolize keys" do
432
+ MultiXml.parse(@string, :symbolize_keys => true).should == {:user => {:name => "Erik Michaels-Ober"}}
433
+ end
434
+ end
435
+
436
+ context "with children" do
437
+ before do
438
+ @string = '<root><user name="Erik Michaels-Ober"/></root>'
439
+ end
440
+
441
+ it "should parse correctly" do
442
+ MultiXml.parse(@string).should == {"root" => {"user" => {"name"=>"Erik Michaels-Ober"}}}
443
+ end
444
+
445
+ context "with text" do
446
+ before do
447
+ @string = '<user><name>Erik Michaels-Ober</name></user>'
448
+ end
449
+
450
+ it "should parse correctly" do
451
+ MultiXml.parse(@string).should == {"user" => {"name" => "Erik Michaels-Ober"}}
452
+ end
453
+ end
454
+
455
+ # Babies having babies
456
+ context "with children" do
457
+ before do
458
+ @string = '<root><user name="Erik Michaels-Ober"><status text="Hello"/></user></root>'
459
+ end
460
+
461
+ it "should parse correctly" do
462
+ MultiXml.parse(@string).should == {"root" => {"user" => {"name" => "Erik Michaels-Ober", "status" => {"text" => "Hello"}}}}
463
+ end
464
+ end
465
+ end
466
+
467
+ context "with sibling children" do
468
+ before do
469
+ @string = '<root><users>Erik Michaels-Ober</users><users>Wynn Netherland</users></root>'
470
+ end
471
+
472
+ it "should parse correctly" do
473
+ MultiXml.parse(@string).should == {"root" => {"users" => ["Erik Michaels-Ober", "Wynn Netherland"]}}
474
+ end
475
+
476
+ it "should make an Array of children" do
477
+ MultiXml.parse(@string)['root']['users'].should be_a(Array)
478
+ end
479
+
480
+ end
46
481
  end
47
482
  end
48
483
  end