multi_xml 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of multi_xml might be problematic. Click here for more details.
- data/.autotest +1 -0
- data/.travis.yml +7 -0
- data/.yardopts +1 -1
- data/Gemfile +3 -3
- data/{LICENSE.mkd → LICENSE.md} +0 -0
- data/{README.mkd → README.md} +37 -7
- data/Rakefile +3 -1
- data/lib/multi_xml.rb +53 -44
- data/lib/multi_xml/parsers/libxml.rb +13 -62
- data/lib/multi_xml/parsers/libxml2_parser.rb +66 -0
- data/lib/multi_xml/parsers/nokogiri.rb +16 -63
- data/lib/multi_xml/parsers/rexml.rb +8 -20
- data/lib/multi_xml/version.rb +1 -1
- data/multi_xml.gemspec +20 -22
- data/spec/helper.rb +0 -1
- data/spec/multi_xml_spec.rb +14 -551
- data/spec/parser_shared_example.rb +550 -0
- metadata +80 -94
- data/lib/multi_xml/core_extensions.rb +0 -108
data/.autotest
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'autotest/bundler'
|
data/.travis.yml
ADDED
data/.yardopts
CHANGED
data/Gemfile
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
source
|
1
|
+
source 'http://rubygems.org'
|
2
2
|
|
3
3
|
group :development, :test do
|
4
|
-
gem 'libxml-ruby', "~>
|
5
|
-
gem 'nokogiri',
|
4
|
+
gem 'libxml-ruby', "~> 2.0", :require => nil, :platforms => :mri
|
5
|
+
gem 'nokogiri', "~> 1.4", :require => nil
|
6
6
|
end
|
7
7
|
|
8
8
|
gemspec
|
data/{LICENSE.mkd → LICENSE.md}
RENAMED
File without changes
|
data/{README.mkd → README.md}
RENAMED
@@ -4,11 +4,15 @@ A generic swappable back-end for XML parsing
|
|
4
4
|
|
5
5
|
Installation
|
6
6
|
------------
|
7
|
-
|
7
|
+
gem install multi_xml
|
8
8
|
|
9
9
|
Documentation
|
10
10
|
-------------
|
11
|
-
|
11
|
+
[http://rdoc.info/gems/multi_xml](http://rdoc.info/gems/multi_xml)
|
12
|
+
|
13
|
+
Continuous Integration
|
14
|
+
----------------------
|
15
|
+
[](http://travis-ci.org/sferik/multi_xml)
|
12
16
|
|
13
17
|
Usage Examples
|
14
18
|
--------------
|
@@ -50,15 +54,15 @@ Here are some ways *you* can contribute:
|
|
50
54
|
* by writing specifications
|
51
55
|
* by writing code (**no patch is too small**: fix typos, add comments, clean up inconsistent whitespace)
|
52
56
|
* by refactoring code
|
53
|
-
* by resolving [issues](
|
57
|
+
* by resolving [issues](https://github.com/sferik/multi_xml/issues)
|
54
58
|
* by reviewing patches
|
55
59
|
|
56
60
|
Submitting an Issue
|
57
61
|
-------------------
|
58
|
-
We use the [GitHub issue tracker](
|
62
|
+
We use the [GitHub issue tracker](https://github.com/sferik/multi_xml/issues) to track bugs and
|
59
63
|
features. Before submitting a bug report or feature request, check to make sure it hasn't already
|
60
64
|
been submitted. You can indicate support for an existing issuse by voting it up. When submitting a
|
61
|
-
bug report, please include a [Gist](
|
65
|
+
bug report, please include a [Gist](https://gist.github.com/) that includes a stack trace and any
|
62
66
|
details that may be necessary to reproduce the bug, including your gem version, Ruby version, and
|
63
67
|
operating system. Ideally, a bug report should include a pull request with failing specs.
|
64
68
|
|
@@ -74,11 +78,37 @@ Submitting a Pull Request
|
|
74
78
|
8. Commit and push your changes.
|
75
79
|
9. Submit a pull request. Please do not include changes to the gemspec, version, or history file. (If you want to create your own version for some reason, please do so in a separate commit.)
|
76
80
|
|
81
|
+
Supported Rubies
|
82
|
+
----------------
|
83
|
+
This library aims to support and is [tested
|
84
|
+
against](http://travis-ci.org/sferik/multi_xml) the following Ruby
|
85
|
+
implementations:
|
86
|
+
|
87
|
+
* Ruby 1.8.7
|
88
|
+
* Ruby 1.9.1
|
89
|
+
* Ruby 1.9.2
|
90
|
+
* [Rubinius](http://rubini.us)
|
91
|
+
* [Ruby Enterprise Edition](http://www.rubyenterpriseedition.com/)
|
92
|
+
|
93
|
+
If something doesn't work on one of these interpreters, it should be considered
|
94
|
+
a bug.
|
95
|
+
|
96
|
+
This library may inadvertently work (or seem to work) on other Ruby
|
97
|
+
implementations, however support will only be provided for the versions listed
|
98
|
+
above.
|
99
|
+
|
100
|
+
If you would like this library to support another Ruby version, you may
|
101
|
+
volunteer to be a maintainer. Being a maintainer entails making sure all tests
|
102
|
+
run and pass on that implementation. When something breaks on your
|
103
|
+
implementation, you will be personally responsible for providing patches in a
|
104
|
+
timely fashion. If critical issues for a particular implementation exist at the
|
105
|
+
time of a major release, support for that Ruby version may be dropped.
|
106
|
+
|
77
107
|
Inspiration
|
78
108
|
-----------
|
79
|
-
MultiXML was inspired by [MultiJSON](
|
109
|
+
MultiXML was inspired by [MultiJSON](https://github.com/intridea/multi_json/).
|
80
110
|
|
81
111
|
Copyright
|
82
112
|
---------
|
83
113
|
Copyright (c) 2010 Erik Michaels-Ober.
|
84
|
-
See [LICENSE](https://github.com/sferik/multi_xml/blob/master/LICENSE.
|
114
|
+
See [LICENSE](https://github.com/sferik/multi_xml/blob/master/LICENSE.md) for details.
|
data/Rakefile
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
|
1
3
|
require 'bundler'
|
2
4
|
Bundler::GemHelper.install_tasks
|
3
5
|
|
@@ -10,7 +12,7 @@ task :default => :spec
|
|
10
12
|
namespace :doc do
|
11
13
|
require 'yard'
|
12
14
|
YARD::Rake::YardocTask.new do |task|
|
13
|
-
task.files = ['LICENSE.
|
15
|
+
task.files = ['LICENSE.md', 'lib/**/*.rb']
|
14
16
|
task.options = [
|
15
17
|
'--no-private',
|
16
18
|
'--protected',
|
data/lib/multi_xml.rb
CHANGED
@@ -1,47 +1,46 @@
|
|
1
1
|
require 'base64'
|
2
2
|
require 'bigdecimal'
|
3
3
|
require 'date'
|
4
|
-
require '
|
4
|
+
require 'stringio'
|
5
5
|
require 'time'
|
6
6
|
require 'yaml'
|
7
7
|
|
8
8
|
module MultiXml
|
9
9
|
class ParseError < StandardError; end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
PARSING
|
40
|
-
|
41
|
-
|
42
|
-
)
|
43
|
-
end
|
11
|
+
REQUIREMENT_MAP = [
|
12
|
+
['libxml', :libxml],
|
13
|
+
['nokogiri', :nokogiri],
|
14
|
+
['rexml/document', :rexml]
|
15
|
+
] unless defined?(REQUIREMENT_MAP)
|
16
|
+
|
17
|
+
CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
|
18
|
+
|
19
|
+
# TODO: use Time.xmlschema instead of Time.parse;
|
20
|
+
# use regexp instead of Date.parse
|
21
|
+
unless defined?(PARSING)
|
22
|
+
PARSING = {
|
23
|
+
'symbol' => Proc.new{|symbol| symbol.to_sym},
|
24
|
+
'date' => Proc.new{|date| Date.parse(date)},
|
25
|
+
'datetime' => Proc.new{|time| Time.parse(time).utc rescue DateTime.parse(time).utc},
|
26
|
+
'integer' => Proc.new{|integer| integer.to_i},
|
27
|
+
'float' => Proc.new{|float| float.to_f},
|
28
|
+
'decimal' => Proc.new{|number| BigDecimal(number)},
|
29
|
+
'boolean' => Proc.new{|boolean| !%w(0 false).include?(boolean.strip)},
|
30
|
+
'string' => Proc.new{|string| string.to_s},
|
31
|
+
'yaml' => Proc.new{|yaml| YAML::load(yaml) rescue yaml},
|
32
|
+
'base64Binary' => Proc.new{|binary| binary.unpack('m').first},
|
33
|
+
'binary' => Proc.new{|binary, entity| parse_binary(binary, entity)},
|
34
|
+
'file' => Proc.new{|file, entity| parse_file(file, entity)}
|
35
|
+
}
|
36
|
+
|
37
|
+
PARSING.update(
|
38
|
+
'double' => PARSING['float'],
|
39
|
+
'dateTime' => PARSING['datetime']
|
40
|
+
)
|
41
|
+
end
|
44
42
|
|
43
|
+
class << self
|
45
44
|
# Get the current parser class.
|
46
45
|
def parser
|
47
46
|
return @parser if @parser
|
@@ -85,13 +84,22 @@ module MultiXml
|
|
85
84
|
end
|
86
85
|
end
|
87
86
|
|
88
|
-
# Parse an XML string into Ruby.
|
87
|
+
# Parse an XML string or IO into Ruby.
|
89
88
|
#
|
90
89
|
# <b>Options</b>
|
91
90
|
#
|
92
91
|
# <tt>:symbolize_keys</tt> :: If true, will use symbols instead of strings for the keys.
|
93
92
|
def parse(xml, options={})
|
94
|
-
xml
|
93
|
+
xml ||= ''
|
94
|
+
|
95
|
+
xml.strip! if xml.respond_to?(:strip!)
|
96
|
+
|
97
|
+
xml = StringIO.new(xml) unless xml.respond_to?(:read)
|
98
|
+
|
99
|
+
char = xml.getc
|
100
|
+
return {} if char.nil?
|
101
|
+
xml.ungetc(char)
|
102
|
+
|
95
103
|
begin
|
96
104
|
hash = typecast_xml_value(undasherize_keys(parser.parse(xml))) || {}
|
97
105
|
rescue parser.parse_error => error
|
@@ -118,7 +126,7 @@ module MultiXml
|
|
118
126
|
private
|
119
127
|
|
120
128
|
# TODO: Add support for other encodings
|
121
|
-
def
|
129
|
+
def parse_binary(binary, entity) #:nodoc:
|
122
130
|
case entity['encoding']
|
123
131
|
when 'base64'
|
124
132
|
Base64.decode64(binary)
|
@@ -127,7 +135,7 @@ module MultiXml
|
|
127
135
|
end
|
128
136
|
end
|
129
137
|
|
130
|
-
def
|
138
|
+
def parse_file(file, entity)
|
131
139
|
f = StringIO.new(Base64.decode64(file))
|
132
140
|
f.extend(FileLike)
|
133
141
|
f.original_filename = entity['name']
|
@@ -172,13 +180,14 @@ module MultiXml
|
|
172
180
|
case value
|
173
181
|
when Hash
|
174
182
|
if value['type'] == 'array'
|
175
|
-
_, entries =
|
176
|
-
|
183
|
+
_, entries = value.detect {|key, _| key != 'type'}
|
184
|
+
|
185
|
+
if entries.nil? || (entries.is_a?(String) && entries.strip.empty?)
|
177
186
|
[]
|
178
187
|
else
|
179
188
|
case entries
|
180
189
|
when Array
|
181
|
-
entries.map{|
|
190
|
+
entries.map {|entry| typecast_xml_value(entry)}
|
182
191
|
when Hash
|
183
192
|
[typecast_xml_value(entries)]
|
184
193
|
else
|
@@ -195,7 +204,7 @@ module MultiXml
|
|
195
204
|
elsif value['type'] == 'string' && value['nil'] != 'true'
|
196
205
|
''
|
197
206
|
# blank or nil parsed values are represented by nil
|
198
|
-
elsif value.
|
207
|
+
elsif value.empty? || value['nil'] == 'true'
|
199
208
|
nil
|
200
209
|
# If the type is the only element which makes it then
|
201
210
|
# this still makes the value nil, except if type is
|
@@ -203,8 +212,8 @@ module MultiXml
|
|
203
212
|
elsif value['type'] && value.size == 1 && !value['type'].is_a?(Hash)
|
204
213
|
nil
|
205
214
|
else
|
206
|
-
xml_value = value.inject({}) do |hash, (
|
207
|
-
hash[
|
215
|
+
xml_value = value.inject({}) do |hash, (k, v)|
|
216
|
+
hash[k] = typecast_xml_value(v)
|
208
217
|
hash
|
209
218
|
end
|
210
219
|
|
@@ -1,79 +1,30 @@
|
|
1
1
|
require 'libxml' unless defined?(LibXML)
|
2
|
+
require 'multi_xml/parsers/libxml2_parser'
|
2
3
|
|
3
4
|
module MultiXml
|
4
5
|
module Parsers
|
5
6
|
module Libxml #:nodoc:
|
7
|
+
include Libxml2Parser
|
8
|
+
|
6
9
|
extend self
|
7
|
-
def parse_error; ::LibXML::XML::Error; end
|
8
10
|
|
9
|
-
|
10
|
-
# xml::
|
11
|
-
# XML Document string or IO to parse
|
12
|
-
def parse(xml)
|
13
|
-
if !xml.respond_to?(:read)
|
14
|
-
xml = StringIO.new(xml || '')
|
15
|
-
end
|
11
|
+
def parse_error() ::LibXML::XML::Error end
|
16
12
|
|
17
|
-
|
18
|
-
|
19
|
-
{}
|
20
|
-
else
|
21
|
-
xml.ungetc(char)
|
22
|
-
LibXML::XML::Parser.io(xml).parse.to_hash
|
23
|
-
end
|
13
|
+
def parse(xml)
|
14
|
+
node_to_hash(LibXML::XML::Parser.io(xml).parse.root)
|
24
15
|
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
16
|
|
29
|
-
|
30
|
-
|
31
|
-
module Document #:nodoc:
|
32
|
-
def to_hash
|
33
|
-
root.to_hash
|
17
|
+
def each_child(node, &block)
|
18
|
+
node.each_child &block
|
34
19
|
end
|
35
|
-
end
|
36
20
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
# Convert XML document to hash
|
41
|
-
#
|
42
|
-
# hash::
|
43
|
-
# Hash to merge the converted element into.
|
44
|
-
def to_hash(hash={})
|
45
|
-
node_hash = {}
|
46
|
-
|
47
|
-
# Insert node hash into parent hash correctly.
|
48
|
-
case hash[name]
|
49
|
-
when Array then hash[name] << node_hash
|
50
|
-
when Hash then hash[name] = [hash[name], node_hash]
|
51
|
-
when nil then hash[name] = node_hash
|
52
|
-
end
|
53
|
-
|
54
|
-
# Handle child elements
|
55
|
-
each_child do |c|
|
56
|
-
if c.element?
|
57
|
-
c.to_hash(node_hash)
|
58
|
-
elsif c.text? || c.cdata?
|
59
|
-
node_hash[CONTENT_ROOT] ||= ''
|
60
|
-
node_hash[CONTENT_ROOT] << c.content
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# Remove content node if it is blank
|
65
|
-
if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
|
66
|
-
node_hash.delete(CONTENT_ROOT)
|
67
|
-
end
|
68
|
-
|
69
|
-
# Handle attributes
|
70
|
-
each_attr { |a| node_hash[a.name] = a.value }
|
21
|
+
def each_attr(node, &block)
|
22
|
+
node.each_attr &block
|
23
|
+
end
|
71
24
|
|
72
|
-
|
25
|
+
def node_name(node)
|
26
|
+
node.name
|
73
27
|
end
|
74
28
|
end
|
75
29
|
end
|
76
30
|
end
|
77
|
-
|
78
|
-
LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
|
79
|
-
LibXML::XML::Node.send(:include, LibXML::Conversions::Node)
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module MultiXml
|
2
|
+
module Parsers
|
3
|
+
module Libxml2Parser #:nodoc:
|
4
|
+
# Convert XML document to hash
|
5
|
+
#
|
6
|
+
# node::
|
7
|
+
# The XML node object to convert to a hash.
|
8
|
+
#
|
9
|
+
# hash::
|
10
|
+
# Hash to merge the converted element into.
|
11
|
+
def node_to_hash(node, hash={})
|
12
|
+
node_hash = {MultiXml::CONTENT_ROOT => ''}
|
13
|
+
|
14
|
+
name = node_name(node)
|
15
|
+
|
16
|
+
# Insert node hash into parent hash correctly.
|
17
|
+
case hash[name]
|
18
|
+
when Array then hash[name] << node_hash
|
19
|
+
when Hash then hash[name] = [hash[name], node_hash]
|
20
|
+
when nil then hash[name] = node_hash
|
21
|
+
end
|
22
|
+
|
23
|
+
# Handle child elements
|
24
|
+
each_child(node) do |c|
|
25
|
+
if c.element?
|
26
|
+
node_to_hash(c, node_hash)
|
27
|
+
elsif c.text? || c.cdata?
|
28
|
+
node_hash[MultiXml::CONTENT_ROOT] << c.content
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Remove content node if it is empty
|
33
|
+
if node_hash[MultiXml::CONTENT_ROOT].strip.empty?
|
34
|
+
node_hash.delete(MultiXml::CONTENT_ROOT)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Handle attributes
|
38
|
+
each_attr(node) {|a| node_hash[node_name(a)] = a.value }
|
39
|
+
|
40
|
+
hash
|
41
|
+
end
|
42
|
+
|
43
|
+
# Parse an XML Document IO into a simple hash.
|
44
|
+
# xml::
|
45
|
+
# XML Document IO to parse
|
46
|
+
def parse(xml)
|
47
|
+
raise NotImplementedError, "inheritor should define #{__method__}"
|
48
|
+
end
|
49
|
+
|
50
|
+
# :stopdoc:
|
51
|
+
private
|
52
|
+
|
53
|
+
def each_child(*args)
|
54
|
+
raise NotImplementedError, "inheritor should define #{__method__}"
|
55
|
+
end
|
56
|
+
|
57
|
+
def each_attr(*args)
|
58
|
+
raise NotImplementedError, "inheritor should define #{__method__}"
|
59
|
+
end
|
60
|
+
|
61
|
+
def node_name(*args)
|
62
|
+
raise NotImplementedError, "inheritor should define #{__method__}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -1,79 +1,32 @@
|
|
1
1
|
require 'nokogiri' unless defined?(Nokogiri)
|
2
|
+
require 'multi_xml/parsers/libxml2_parser'
|
2
3
|
|
3
4
|
module MultiXml
|
4
5
|
module Parsers
|
5
6
|
module Nokogiri #:nodoc:
|
7
|
+
include Libxml2Parser
|
8
|
+
|
6
9
|
extend self
|
7
|
-
def parse_error; ::Nokogiri::XML::SyntaxError; end
|
8
10
|
|
9
|
-
|
10
|
-
# xml::
|
11
|
-
# XML Document string or IO to parse
|
12
|
-
def parse(xml)
|
13
|
-
if !xml.respond_to?(:read)
|
14
|
-
xml = StringIO.new(xml || '')
|
15
|
-
end
|
11
|
+
def parse_error() ::Nokogiri::XML::SyntaxError end
|
16
12
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
xml.ungetc(char)
|
22
|
-
doc = ::Nokogiri::XML(xml)
|
23
|
-
raise doc.errors.first if doc.errors.length > 0
|
24
|
-
doc.to_hash
|
25
|
-
end
|
13
|
+
def parse(xml)
|
14
|
+
doc = ::Nokogiri::XML(xml)
|
15
|
+
raise doc.errors.first if doc.errors.length > 0
|
16
|
+
node_to_hash(doc.root)
|
26
17
|
end
|
27
18
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
root.to_hash
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
module Node #:nodoc:
|
36
|
-
CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
|
37
|
-
|
38
|
-
# Convert XML document to hash
|
39
|
-
#
|
40
|
-
# hash::
|
41
|
-
# Hash to merge the converted element into.
|
42
|
-
def to_hash(hash={})
|
43
|
-
node_hash = {}
|
44
|
-
|
45
|
-
# Insert node hash into parent hash correctly.
|
46
|
-
case hash[name]
|
47
|
-
when Array then hash[name] << node_hash
|
48
|
-
when Hash then hash[name] = [hash[name], node_hash]
|
49
|
-
when nil then hash[name] = node_hash
|
50
|
-
end
|
51
|
-
|
52
|
-
# Handle child elements
|
53
|
-
children.each do |c|
|
54
|
-
if c.element?
|
55
|
-
c.to_hash(node_hash)
|
56
|
-
elsif c.text? || c.cdata?
|
57
|
-
node_hash[CONTENT_ROOT] ||= ''
|
58
|
-
node_hash[CONTENT_ROOT] << c.content
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
# Remove content node if it is blank and there are child tags
|
63
|
-
if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
|
64
|
-
node_hash.delete(CONTENT_ROOT)
|
65
|
-
end
|
66
|
-
|
67
|
-
# Handle attributes
|
68
|
-
attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
|
19
|
+
def each_child(node, &block)
|
20
|
+
node.children.each &block
|
21
|
+
end
|
69
22
|
|
70
|
-
|
71
|
-
|
72
|
-
end
|
23
|
+
def each_attr(node, &block)
|
24
|
+
node.attribute_nodes.each &block
|
73
25
|
end
|
74
26
|
|
75
|
-
|
76
|
-
|
27
|
+
def node_name(node)
|
28
|
+
node.node_name
|
29
|
+
end
|
77
30
|
end
|
78
31
|
end
|
79
32
|
end
|