multi_xml 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of multi_xml might be problematic. Click here for more details.

@@ -0,0 +1,79 @@
1
+ require 'nokogiri' unless defined?(Nokogiri)
2
+
3
+ module MultiXml
4
+ module Parsers
5
+ # Use Nokogiri to parse XML.
6
+ module Nokogiri #:nodoc:
7
+ extend self
8
+
9
+ # Parse an XML Document string or IO into a simple hash using libxml / nokogiri.
10
+ # data::
11
+ # XML Document string or IO to parse
12
+ def parse(data)
13
+ if !data.respond_to?(:read)
14
+ data = StringIO.new(data || '')
15
+ end
16
+
17
+ char = data.getc
18
+ if char.nil?
19
+ {}
20
+ else
21
+ data.ungetc(char)
22
+ doc = ::Nokogiri::XML(data)
23
+ raise doc.errors.first if doc.errors.length > 0
24
+ doc.to_hash
25
+ end
26
+ end
27
+
28
+ module Conversions #:nodoc:
29
+ module Document #:nodoc:
30
+ def to_hash
31
+ root.to_hash
32
+ end
33
+ end
34
+
35
+ module Node #:nodoc:
36
+ CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
37
+
38
+ # Convert XML document to hash
39
+ #
40
+ # hash::
41
+ # Hash to merge the converted element into.
42
+ def to_hash(hash={})
43
+ node_hash = {}
44
+
45
+ # Insert node hash into parent hash correctly.
46
+ case hash[name]
47
+ when Array then hash[name] << node_hash
48
+ when Hash then hash[name] = [hash[name], node_hash]
49
+ when nil then hash[name] = node_hash
50
+ end
51
+
52
+ # Handle child elements
53
+ children.each do |c|
54
+ if c.element?
55
+ c.to_hash(node_hash)
56
+ elsif c.text? || c.cdata?
57
+ node_hash[CONTENT_ROOT] ||= ''
58
+ node_hash[CONTENT_ROOT] << c.content
59
+ end
60
+ end
61
+
62
+ # Remove content node if it is blank and there are child tags
63
+ if node_hash.length > 1 && (node_hash[CONTENT_ROOT].nil? || node_hash[CONTENT_ROOT].empty?)
64
+ node_hash.delete(CONTENT_ROOT)
65
+ end
66
+
67
+ # Handle attributes
68
+ attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
69
+
70
+ hash
71
+ end
72
+ end
73
+ end
74
+
75
+ ::Nokogiri::XML::Document.send(:include, Conversions::Document)
76
+ ::Nokogiri::XML::Node.send(:include, Conversions::Node)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,127 @@
1
+ require 'rexml/document' unless defined?(REXML::Document)
2
+
3
+ module MultiXml
4
+ module Parsers
5
+ # Use REXML to parse XML.
6
+ module Rexml #:nodoc:
7
+ extend self
8
+
9
+ CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
10
+
11
+ # Parse an XML Document string or IO into a simple hash
12
+ #
13
+ # data::
14
+ # XML Document string or IO to parse
15
+ def parse(data)
16
+ if !data.respond_to?(:read)
17
+ data = StringIO.new(data || '')
18
+ end
19
+
20
+ char = data.getc
21
+ if char.nil?
22
+ {}
23
+ else
24
+ data.ungetc(char)
25
+ doc = REXML::Document.new(data)
26
+
27
+ if doc.root
28
+ merge_element!({}, doc.root)
29
+ else
30
+ raise REXML::ParseException,
31
+ "The document #{doc.to_s.inspect} does not have a valid root"
32
+ end
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ # Convert an XML element and merge into the hash
39
+ #
40
+ # hash::
41
+ # Hash to merge the converted element into.
42
+ # element::
43
+ # XML element to merge into hash
44
+ def merge_element!(hash, element)
45
+ merge!(hash, element.name, collapse(element))
46
+ end
47
+
48
+ # Actually converts an XML document element into a data structure.
49
+ #
50
+ # element::
51
+ # The document element to be collapsed.
52
+ def collapse(element)
53
+ hash = get_attributes(element)
54
+
55
+ if element.has_elements?
56
+ element.each_element {|child| merge_element!(hash, child) }
57
+ merge_texts!(hash, element) unless empty_content?(element)
58
+ hash
59
+ else
60
+ merge_texts!(hash, element)
61
+ end
62
+ end
63
+
64
+ # Merge all the texts of an element into the hash
65
+ #
66
+ # hash::
67
+ # Hash to add the converted element to.
68
+ # element::
69
+ # XML element whose texts are to me merged into the hash
70
+ def merge_texts!(hash, element)
71
+ unless element.has_text?
72
+ hash
73
+ else
74
+ # must use value to prevent double-escaping
75
+ texts = ''
76
+ element.texts.each { |t| texts << t.value }
77
+ merge!(hash, CONTENT_ROOT, texts)
78
+ end
79
+ end
80
+
81
+ # Adds a new key/value pair to an existing Hash. If the key to be added
82
+ # already exists and the existing value associated with key is not
83
+ # an Array, it will be wrapped in an Array. Then the new value is
84
+ # appended to that Array.
85
+ #
86
+ # hash::
87
+ # Hash to add key/value pair to.
88
+ # key::
89
+ # Key to be added.
90
+ # value::
91
+ # Value to be associated with key.
92
+ def merge!(hash, key, value)
93
+ if hash.has_key?(key)
94
+ if hash[key].instance_of?(Array)
95
+ hash[key] << value
96
+ else
97
+ hash[key] = [hash[key], value]
98
+ end
99
+ elsif value.instance_of?(Array)
100
+ hash[key] = [value]
101
+ else
102
+ hash[key] = value
103
+ end
104
+ hash
105
+ end
106
+
107
+ # Converts the attributes array of an XML element into a hash.
108
+ # Returns an empty Hash if node has no attributes.
109
+ #
110
+ # element::
111
+ # XML element to extract attributes from.
112
+ def get_attributes(element)
113
+ attributes = {}
114
+ element.attributes.each { |n,v| attributes[n] = v }
115
+ attributes
116
+ end
117
+
118
+ # Determines if a document element has text content
119
+ #
120
+ # element::
121
+ # XML element to be checked.
122
+ def empty_content?(element)
123
+ element.texts.join.nil? || element.texts.join.empty?
124
+ end
125
+ end
126
+ end
127
+ end
@@ -1,3 +1,3 @@
1
1
  module MultiXml
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -3,12 +3,11 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
  require "multi_xml/version"
4
4
 
5
5
  Gem::Specification.new do |s|
6
- s.add_development_dependency("bundler", ["~> 1.0.2"])
7
- s.add_development_dependency("hpricot", ["~> 0.8.2"])
8
- s.add_development_dependency("libxml-ruby", ["~> 1.1.4"])
9
- s.add_development_dependency("nokogiri", ["~> 1.4.3"])
10
- s.add_development_dependency("rake", ["~> 0.8.7"])
11
- s.add_development_dependency("rspec", ["~> 1.3.0"])
6
+ s.add_development_dependency("bundler", "~> 1.0")
7
+ s.add_development_dependency("libxml-ruby", "~> 1.1")
8
+ s.add_development_dependency("nokogiri", "~> 1.4")
9
+ s.add_development_dependency("rake", "~> 0.8")
10
+ s.add_development_dependency("rspec", "~> 2.0")
12
11
  s.name = "multi_xml"
13
12
  s.version = MultiXml::VERSION
14
13
  s.platform = Gem::Platform::RUBY
@@ -7,42 +7,477 @@ class MockDecoder
7
7
  end
8
8
 
9
9
  describe "MultiXml" do
10
- context 'engines' do
11
- it 'should default to the best available gem' do
12
- pending
10
+ context "Parsers" do
11
+ it "should default to the best available gem" do
13
12
  require 'libxml'
14
- MultiXml.engine.name.should == 'MultiXml::Engines::Libxml'
13
+ MultiXml.parser.name.should == 'MultiXml::Parsers::Libxml'
15
14
  end
16
15
 
17
- it 'should be settable via a symbol' do
18
- pending
19
- MultiXml.engine = :libxml
20
- MultiXml.engine.name.should == 'MultiXml::Engines::Libxml'
16
+ it "should be settable via a symbol" do
17
+ MultiXml.parser = :libxml
18
+ MultiXml.parser.name.should == 'MultiXml::Parsers::Libxml'
21
19
  end
22
20
 
23
- it 'should be settable via a class' do
24
- MultiXml.engine = MockDecoder
25
- MultiXml.engine.name.should == 'MockDecoder'
21
+ it "should be settable via a class" do
22
+ MultiXml.parser = MockDecoder
23
+ MultiXml.parser.name.should == 'MockDecoder'
26
24
  end
27
25
  end
28
26
 
29
- %w(libxml nokogiri hpricot rexml).each do |engine|
30
- context engine do
27
+ Dir.glob('lib/multi_xml/parsers/**/*.rb').map{|file| File.basename(file, ".rb").split('_').map{|s| s.capitalize}.join('')}.each do |parser|
28
+ context "Parsers::#{parser}" do
31
29
  before do
32
30
  begin
33
- MultiXml.engine = engine
31
+ MultiXml.parser = parser
34
32
  rescue LoadError
35
- pending "Engine #{engine} couldn't be loaded (not installed?)"
33
+ pending "Parser #{parser} couldn't be loaded"
36
34
  end
37
35
  end
38
36
 
39
- describe '.parse' do
40
- it 'should properly parse some XML' do
41
- MultiXml.parse('<tag>This is the contents</tag>').should == {'tag' => 'This is the contents'}
37
+ describe ".parse" do
38
+ context "a blank string" do
39
+ before do
40
+ @string = ''
41
+ end
42
+
43
+ it "should return an empty Hash" do
44
+ MultiXml.parse(@string).should == {}
45
+ end
46
+ end
47
+
48
+ context "a whitespace string" do
49
+ before do
50
+ @string = ' '
51
+ end
52
+
53
+ it "should return an empty Hash" do
54
+ MultiXml.parse(@string).should == {}
55
+ end
56
+ end
57
+
58
+ context "a single-node document" do
59
+
60
+ before do
61
+ @string = '<user/>'
62
+ end
63
+
64
+ it "should parse correctly" do
65
+ MultiXml.parse(@string).should == {'user' => nil}
66
+ end
67
+
68
+ context "with CDATA" do
69
+ before do
70
+ @string = '<user><![CDATA[Erik Michaels-Ober]]></user>'
71
+ end
72
+
73
+ it "should parse correctly" do
74
+ MultiXml.parse(@string).should == {"user" => "Erik Michaels-Ober"}
75
+ end
76
+ end
77
+
78
+ context "with content" do
79
+ before do
80
+ @string = '<user>Erik Michaels-Ober</user>'
81
+ end
82
+
83
+ it "should parse correctly" do
84
+ MultiXml.parse(@string).should == {"user" => "Erik Michaels-Ober"}
85
+ end
86
+ end
87
+
88
+ context "with an attribute" do
89
+ before do
90
+ @string = '<user name="Erik Michaels-Ober"/>'
91
+ end
92
+
93
+ it "should parse correctly" do
94
+ MultiXml.parse(@string).should == {"user" => {"name" => "Erik Michaels-Ober"}}
95
+ end
96
+ end
97
+
98
+ context "with multiple attributes" do
99
+ before do
100
+ @string = '<user name="Erik Michaels-Ober" screen_name="sferik"/>'
101
+ end
102
+
103
+ it "should parse correctly" do
104
+ MultiXml.parse(@string).should == {"user" => {"name" => "Erik Michaels-Ober", "screen_name" => "sferik"}}
105
+ end
106
+ end
107
+
108
+ context "with :symbolize_keys => true" do
109
+ before do
110
+ @string = '<user name="Erik Michaels-Ober"/>'
111
+ end
112
+
113
+ it "should symbolize keys" do
114
+ MultiXml.parse(@string, :symbolize_keys => true).should == {:user => {:name => "Erik Michaels-Ober"}}
115
+ end
116
+ end
117
+
118
+ context "with an attribute type=\"boolean\"" do
119
+ %w(true false).each do |boolean|
120
+ context "when #{boolean}" do
121
+ it "should be #{boolean}" do
122
+ string = "<tag type=\"boolean\">#{boolean}</tag>"
123
+ MultiXml.parse(string)['tag'].should instance_eval("be_#{boolean}")
124
+ end
125
+ end
126
+ end
127
+
128
+ context "when 1" do
129
+ before do
130
+ @string = '<tag type="boolean">1</tag>'
131
+ end
132
+
133
+ it "should be true" do
134
+ MultiXml.parse(@string)['tag'].should be_true
135
+ end
136
+ end
137
+
138
+ context "when 0" do
139
+ before do
140
+ @string = '<tag type="boolean">0</tag>'
141
+ end
142
+
143
+ it "should be false" do
144
+ MultiXml.parse(@string)['tag'].should be_false
145
+ end
146
+ end
147
+ end
148
+
149
+ context "with an attribute type=\"integer\"" do
150
+ context "with a positive integer" do
151
+ before do
152
+ @string = '<tag type="integer">1</tag>'
153
+ end
154
+
155
+ it "should be a Fixnum" do
156
+ MultiXml.parse(@string)['tag'].should be_a(Fixnum)
157
+ end
158
+
159
+ it "should be the correct number" do
160
+ MultiXml.parse(@string)['tag'].should == 1
161
+ end
162
+ end
163
+
164
+ context "with a negative integer" do
165
+ before do
166
+ @string = '<tag type="integer">-1</tag>'
167
+ end
168
+
169
+ it "should be a Fixnum" do
170
+ MultiXml.parse(@string)['tag'].should be_a(Fixnum)
171
+ end
172
+
173
+ it "should be the correct number" do
174
+ MultiXml.parse(@string)['tag'].should == -1
175
+ end
176
+ end
177
+ end
178
+
179
+ context "with an attribute type=\"string\"" do
180
+ before do
181
+ @string = '<tag type="string"></tag>'
182
+ end
183
+
184
+ it "should be a String" do
185
+ MultiXml.parse(@string)['tag'].should be_a(String)
186
+ end
187
+
188
+ it "should be the correct string" do
189
+ MultiXml.parse(@string)['tag'].should == ""
190
+ end
191
+ end
192
+
193
+ context "with an attribute type=\"date\"" do
194
+ before do
195
+ @string = '<tag type="date">1970-01-01</tag>'
196
+ end
197
+
198
+ it "should be a Date" do
199
+ MultiXml.parse(@string)['tag'].should be_a(Date)
200
+ end
201
+
202
+ it "should be the correct date" do
203
+ MultiXml.parse(@string)['tag'].should == Date.parse('1970-01-01')
204
+ end
205
+ end
206
+
207
+ context "with an attribute type=\"datetime\"" do
208
+ before do
209
+ @string = '<tag type="datetime">1970-01-01 00:00</tag>'
210
+ end
211
+
212
+ it "should be a Time" do
213
+ MultiXml.parse(@string)['tag'].should be_a(Time)
214
+ end
215
+
216
+ it "should be the correct time" do
217
+ MultiXml.parse(@string)['tag'].should == Time.parse('1970-01-01 00:00')
218
+ end
219
+ end
220
+
221
+ context "with an attribute type=\"dateTime\"" do
222
+ before do
223
+ @string = '<tag type="datetime">1970-01-01 00:00</tag>'
224
+ end
225
+
226
+ it "should be a Time" do
227
+ MultiXml.parse(@string)['tag'].should be_a(Time)
228
+ end
229
+
230
+ it "should be the correct time" do
231
+ MultiXml.parse(@string)['tag'].should == Time.parse('1970-01-01 00:00')
232
+ end
233
+ end
234
+
235
+ context "with an attribute type=\"double\"" do
236
+ before do
237
+ @string = '<tag type="double">3.14159265358979</tag>'
238
+ end
239
+
240
+ it "should be a Float" do
241
+ MultiXml.parse(@string)['tag'].should be_a(Float)
242
+ end
243
+
244
+ it "should be the correct number" do
245
+ MultiXml.parse(@string)['tag'].should == 3.14159265358979
246
+ end
247
+ end
248
+
249
+ context "with an attribute type=\"decimal\"" do
250
+ before do
251
+ @string = '<tag type="decimal">3.14159265358979323846264338327950288419716939937510</tag>'
252
+ end
253
+
254
+ it "should be a BigDecimal" do
255
+ MultiXml.parse(@string)['tag'].should be_a(BigDecimal)
256
+ end
257
+
258
+ it "should be the correct number" do
259
+ MultiXml.parse(@string)['tag'].should == 3.14159265358979323846264338327950288419716939937510
260
+ end
261
+ end
262
+
263
+ context "with an attribute type=\"base64Binary\"" do
264
+ before do
265
+ @string = '<tag type="base64Binary">aW1hZ2UucG5n</tag>'
266
+ end
267
+
268
+ it "should be a String" do
269
+ MultiXml.parse(@string)['tag'].should be_a(String)
270
+ end
271
+
272
+ it "should be the correct string" do
273
+ MultiXml.parse(@string)['tag'].should == "image.png"
274
+ end
275
+ end
276
+
277
+ context "with an attribute type=\"yaml\"" do
278
+ before do
279
+ @string = "<tag type=\"yaml\">--- \n1: should be an integer\n:message: Have a nice day\narray: \n- should-have-dashes: true\n should_have_underscores: true\n</tag>"
280
+ end
281
+
282
+ it "should parse correctly" do
283
+ MultiXml.parse(@string)['tag'].should == {:message => "Have a nice day", 1 => "should be an integer", "array" => [{"should-have-dashes" => true, "should_have_underscores" => true}]}
284
+ end
285
+ end
286
+
287
+ context "with an attribute type=\"file\"" do
288
+ before do
289
+ @string = '<tag type="file" name="data.txt" content_type="text/plain">ZGF0YQ==</tag>'
290
+ end
291
+
292
+ it "should be a StringIO" do
293
+ MultiXml.parse(@string)['tag'].should be_a(StringIO)
294
+ end
295
+
296
+ it "should be decoded correctly" do
297
+ MultiXml.parse(@string)['tag'].string.should == 'data'
298
+ end
299
+
300
+ it "should have the correct file name" do
301
+ MultiXml.parse(@string)['tag'].original_filename.should == 'data.txt'
302
+ end
303
+
304
+ it "should have the correct content type" do
305
+ MultiXml.parse(@string)['tag'].content_type.should == 'text/plain'
306
+ end
307
+
308
+ context "with missing name and content type" do
309
+ before do
310
+ @string = '<tag type="file">ZGF0YQ==</tag>'
311
+ end
312
+
313
+ it "should be a StringIO" do
314
+ MultiXml.parse(@string)['tag'].should be_a(StringIO)
315
+ end
316
+
317
+ it "should be decoded correctly" do
318
+ MultiXml.parse(@string)['tag'].string.should == 'data'
319
+ end
320
+
321
+ it "should have the default file name" do
322
+ MultiXml.parse(@string)['tag'].original_filename.should == 'untitled'
323
+ end
324
+
325
+ it "should have the default content type" do
326
+ MultiXml.parse(@string)['tag'].content_type.should == 'application/octet-stream'
327
+ end
328
+ end
329
+ end
330
+
331
+ context "with an unrecognized attribute type" do
332
+ before do
333
+ @string = '<tag type="foo"/>'
334
+ end
335
+
336
+ it "should pass through the type" do
337
+ pending
338
+ MultiXml.parse(@string)['tag']['type'].should == 'foo'
339
+ end
340
+ end
341
+
342
+ %w(integer boolean date datetime yaml).each do |type|
343
+ context "with an empty attribute type=\"#{type}\"" do
344
+ before do
345
+ @string = "<tag type=\"#{type}\"/>"
346
+ end
347
+
348
+ it "should be nil" do
349
+ MultiXml.parse(@string)['tag'].should be_nil
350
+ end
351
+ end
352
+ end
353
+
354
+ context "with an empty attribute type=\"array\"" do
355
+ before do
356
+ @string = '<users type="array"/>'
357
+ end
358
+
359
+ it "should be an empty Array" do
360
+ MultiXml.parse(@string)['users'].should == []
361
+ end
362
+
363
+ context "with whitespace" do
364
+ before do
365
+ @string = '<users type="array"> </users>'
366
+ end
367
+
368
+ it "should be an empty Array" do
369
+ MultiXml.parse(@string)['users'].should == []
370
+ end
371
+ end
372
+ end
373
+
374
+ context "with XML entities" do
375
+ before do
376
+ @xml_entities = {
377
+ "<" => "&lt;",
378
+ ">" => "&gt;",
379
+ '"' => "&quot;",
380
+ "'" => "&apos;",
381
+ "&" => "&amp;"
382
+ }
383
+ end
384
+
385
+ context "in content" do
386
+ it "should unescape XML entities" do
387
+ @xml_entities.each do |key, value|
388
+ string = "<tag>#{value}</tag>"
389
+ MultiXml.parse(string)['tag'].should == key
390
+ end
391
+ end
392
+ end
393
+
394
+ context "in attribute" do
395
+ it "should unescape XML entities" do
396
+ @xml_entities.each do |key, value|
397
+ string = "<tag attribute=\"#{value}\"/>"
398
+ MultiXml.parse(string)['tag']['attribute'].should == key
399
+ end
400
+ end
401
+ end
402
+ end
403
+
404
+ context "with dasherized tag" do
405
+ before do
406
+ @string = '<tag-1/>'
407
+ end
408
+
409
+ it "should undasherize tag" do
410
+ MultiXml.parse(@string).keys.should include('tag_1')
411
+ end
412
+ end
413
+
414
+ context "with dasherized attribute" do
415
+ before do
416
+ @string = '<tag attribute-1="1"></tag>'
417
+ end
418
+
419
+ it "should undasherize attribute" do
420
+ MultiXml.parse(@string)['tag'].keys.should include('attribute_1')
421
+ end
422
+ end
42
423
  end
43
424
 
44
- it 'should allow for symbolization of keys' do
45
- MultiXml.parse('<tag>This is the contents</tag>', :symbolize_keys => true).should == {:tag => 'This is the contents'}
425
+ context "a document" do
426
+ context "with :symbolize_keys => true" do
427
+ before do
428
+ @string = '<user><name>Erik Michaels-Ober</name></user>'
429
+ end
430
+
431
+ it "should symbolize keys" do
432
+ MultiXml.parse(@string, :symbolize_keys => true).should == {:user => {:name => "Erik Michaels-Ober"}}
433
+ end
434
+ end
435
+
436
+ context "with children" do
437
+ before do
438
+ @string = '<root><user name="Erik Michaels-Ober"/></root>'
439
+ end
440
+
441
+ it "should parse correctly" do
442
+ MultiXml.parse(@string).should == {"root" => {"user" => {"name"=>"Erik Michaels-Ober"}}}
443
+ end
444
+
445
+ context "with text" do
446
+ before do
447
+ @string = '<user><name>Erik Michaels-Ober</name></user>'
448
+ end
449
+
450
+ it "should parse correctly" do
451
+ MultiXml.parse(@string).should == {"user" => {"name" => "Erik Michaels-Ober"}}
452
+ end
453
+ end
454
+
455
+ # Babies having babies
456
+ context "with children" do
457
+ before do
458
+ @string = '<root><user name="Erik Michaels-Ober"><status text="Hello"/></user></root>'
459
+ end
460
+
461
+ it "should parse correctly" do
462
+ MultiXml.parse(@string).should == {"root" => {"user" => {"name" => "Erik Michaels-Ober", "status" => {"text" => "Hello"}}}}
463
+ end
464
+ end
465
+ end
466
+
467
+ context "with sibling children" do
468
+ before do
469
+ @string = '<root><users>Erik Michaels-Ober</users><users>Wynn Netherland</users></root>'
470
+ end
471
+
472
+ it "should parse correctly" do
473
+ MultiXml.parse(@string).should == {"root" => {"users" => ["Erik Michaels-Ober", "Wynn Netherland"]}}
474
+ end
475
+
476
+ it "should make an Array of children" do
477
+ MultiXml.parse(@string)['root']['users'].should be_a(Array)
478
+ end
479
+
480
+ end
46
481
  end
47
482
  end
48
483
  end