multi_xml 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of multi_xml might be problematic. Click here for more details.
- data/.autotest +1 -0
- data/.travis.yml +7 -0
- data/.yardopts +1 -1
- data/Gemfile +3 -3
- data/{LICENSE.mkd → LICENSE.md} +0 -0
- data/{README.mkd → README.md} +37 -7
- data/Rakefile +3 -1
- data/lib/multi_xml.rb +53 -44
- data/lib/multi_xml/parsers/libxml.rb +13 -62
- data/lib/multi_xml/parsers/libxml2_parser.rb +66 -0
- data/lib/multi_xml/parsers/nokogiri.rb +16 -63
- data/lib/multi_xml/parsers/rexml.rb +8 -20
- data/lib/multi_xml/version.rb +1 -1
- data/multi_xml.gemspec +20 -22
- data/spec/helper.rb +0 -1
- data/spec/multi_xml_spec.rb +14 -551
- data/spec/parser_shared_example.rb +550 -0
- metadata +80 -94
- data/lib/multi_xml/core_extensions.rb +0 -108
data/.autotest
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'autotest/bundler'
|
data/.travis.yml
ADDED
data/.yardopts
CHANGED
data/Gemfile
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
source
|
1
|
+
source 'http://rubygems.org'
|
2
2
|
|
3
3
|
group :development, :test do
|
4
|
-
gem 'libxml-ruby', "~>
|
5
|
-
gem 'nokogiri',
|
4
|
+
gem 'libxml-ruby', "~> 2.0", :require => nil, :platforms => :mri
|
5
|
+
gem 'nokogiri', "~> 1.4", :require => nil
|
6
6
|
end
|
7
7
|
|
8
8
|
gemspec
|
data/{LICENSE.mkd → LICENSE.md}
RENAMED
File without changes
|
data/{README.mkd → README.md}
RENAMED
@@ -4,11 +4,15 @@ A generic swappable back-end for XML parsing
|
|
4
4
|
|
5
5
|
Installation
|
6
6
|
------------
|
7
|
-
|
7
|
+
gem install multi_xml
|
8
8
|
|
9
9
|
Documentation
|
10
10
|
-------------
|
11
|
-
|
11
|
+
[http://rdoc.info/gems/multi_xml](http://rdoc.info/gems/multi_xml)
|
12
|
+
|
13
|
+
Continuous Integration
|
14
|
+
----------------------
|
15
|
+
[![Build Status](https://secure.travis-ci.org/sferik/multi_xml.png)](http://travis-ci.org/sferik/multi_xml)
|
12
16
|
|
13
17
|
Usage Examples
|
14
18
|
--------------
|
@@ -50,15 +54,15 @@ Here are some ways *you* can contribute:
|
|
50
54
|
* by writing specifications
|
51
55
|
* by writing code (**no patch is too small**: fix typos, add comments, clean up inconsistent whitespace)
|
52
56
|
* by refactoring code
|
53
|
-
* by resolving [issues](
|
57
|
+
* by resolving [issues](https://github.com/sferik/multi_xml/issues)
|
54
58
|
* by reviewing patches
|
55
59
|
|
56
60
|
Submitting an Issue
|
57
61
|
-------------------
|
58
|
-
We use the [GitHub issue tracker](
|
62
|
+
We use the [GitHub issue tracker](https://github.com/sferik/multi_xml/issues) to track bugs and
|
59
63
|
features. Before submitting a bug report or feature request, check to make sure it hasn't already
|
60
64
|
been submitted. You can indicate support for an existing issuse by voting it up. When submitting a
|
61
|
-
bug report, please include a [Gist](
|
65
|
+
bug report, please include a [Gist](https://gist.github.com/) that includes a stack trace and any
|
62
66
|
details that may be necessary to reproduce the bug, including your gem version, Ruby version, and
|
63
67
|
operating system. Ideally, a bug report should include a pull request with failing specs.
|
64
68
|
|
@@ -74,11 +78,37 @@ Submitting a Pull Request
|
|
74
78
|
8. Commit and push your changes.
|
75
79
|
9. Submit a pull request. Please do not include changes to the gemspec, version, or history file. (If you want to create your own version for some reason, please do so in a separate commit.)
|
76
80
|
|
81
|
+
Supported Rubies
|
82
|
+
----------------
|
83
|
+
This library aims to support and is [tested
|
84
|
+
against](http://travis-ci.org/sferik/multi_xml) the following Ruby
|
85
|
+
implementations:
|
86
|
+
|
87
|
+
* Ruby 1.8.7
|
88
|
+
* Ruby 1.9.1
|
89
|
+
* Ruby 1.9.2
|
90
|
+
* [Rubinius](http://rubini.us)
|
91
|
+
* [Ruby Enterprise Edition](http://www.rubyenterpriseedition.com/)
|
92
|
+
|
93
|
+
If something doesn't work on one of these interpreters, it should be considered
|
94
|
+
a bug.
|
95
|
+
|
96
|
+
This library may inadvertently work (or seem to work) on other Ruby
|
97
|
+
implementations, however support will only be provided for the versions listed
|
98
|
+
above.
|
99
|
+
|
100
|
+
If you would like this library to support another Ruby version, you may
|
101
|
+
volunteer to be a maintainer. Being a maintainer entails making sure all tests
|
102
|
+
run and pass on that implementation. When something breaks on your
|
103
|
+
implementation, you will be personally responsible for providing patches in a
|
104
|
+
timely fashion. If critical issues for a particular implementation exist at the
|
105
|
+
time of a major release, support for that Ruby version may be dropped.
|
106
|
+
|
77
107
|
Inspiration
|
78
108
|
-----------
|
79
|
-
MultiXML was inspired by [MultiJSON](
|
109
|
+
MultiXML was inspired by [MultiJSON](https://github.com/intridea/multi_json/).
|
80
110
|
|
81
111
|
Copyright
|
82
112
|
---------
|
83
113
|
Copyright (c) 2010 Erik Michaels-Ober.
|
84
|
-
See [LICENSE](https://github.com/sferik/multi_xml/blob/master/LICENSE.
|
114
|
+
See [LICENSE](https://github.com/sferik/multi_xml/blob/master/LICENSE.md) for details.
|
data/Rakefile
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
|
1
3
|
require 'bundler'
|
2
4
|
Bundler::GemHelper.install_tasks
|
3
5
|
|
@@ -10,7 +12,7 @@ task :default => :spec
|
|
10
12
|
namespace :doc do
|
11
13
|
require 'yard'
|
12
14
|
YARD::Rake::YardocTask.new do |task|
|
13
|
-
task.files = ['LICENSE.
|
15
|
+
task.files = ['LICENSE.md', 'lib/**/*.rb']
|
14
16
|
task.options = [
|
15
17
|
'--no-private',
|
16
18
|
'--protected',
|
data/lib/multi_xml.rb
CHANGED
@@ -1,47 +1,46 @@
|
|
1
1
|
require 'base64'
|
2
2
|
require 'bigdecimal'
|
3
3
|
require 'date'
|
4
|
-
require '
|
4
|
+
require 'stringio'
|
5
5
|
require 'time'
|
6
6
|
require 'yaml'
|
7
7
|
|
8
8
|
module MultiXml
|
9
9
|
class ParseError < StandardError; end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
PARSING
|
40
|
-
|
41
|
-
|
42
|
-
)
|
43
|
-
end
|
11
|
+
REQUIREMENT_MAP = [
|
12
|
+
['libxml', :libxml],
|
13
|
+
['nokogiri', :nokogiri],
|
14
|
+
['rexml/document', :rexml]
|
15
|
+
] unless defined?(REQUIREMENT_MAP)
|
16
|
+
|
17
|
+
CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
|
18
|
+
|
19
|
+
# TODO: use Time.xmlschema instead of Time.parse;
|
20
|
+
# use regexp instead of Date.parse
|
21
|
+
unless defined?(PARSING)
|
22
|
+
PARSING = {
|
23
|
+
'symbol' => Proc.new{|symbol| symbol.to_sym},
|
24
|
+
'date' => Proc.new{|date| Date.parse(date)},
|
25
|
+
'datetime' => Proc.new{|time| Time.parse(time).utc rescue DateTime.parse(time).utc},
|
26
|
+
'integer' => Proc.new{|integer| integer.to_i},
|
27
|
+
'float' => Proc.new{|float| float.to_f},
|
28
|
+
'decimal' => Proc.new{|number| BigDecimal(number)},
|
29
|
+
'boolean' => Proc.new{|boolean| !%w(0 false).include?(boolean.strip)},
|
30
|
+
'string' => Proc.new{|string| string.to_s},
|
31
|
+
'yaml' => Proc.new{|yaml| YAML::load(yaml) rescue yaml},
|
32
|
+
'base64Binary' => Proc.new{|binary| binary.unpack('m').first},
|
33
|
+
'binary' => Proc.new{|binary, entity| parse_binary(binary, entity)},
|
34
|
+
'file' => Proc.new{|file, entity| parse_file(file, entity)}
|
35
|
+
}
|
36
|
+
|
37
|
+
PARSING.update(
|
38
|
+
'double' => PARSING['float'],
|
39
|
+
'dateTime' => PARSING['datetime']
|
40
|
+
)
|
41
|
+
end
|
44
42
|
|
43
|
+
class << self
|
45
44
|
# Get the current parser class.
|
46
45
|
def parser
|
47
46
|
return @parser if @parser
|
@@ -85,13 +84,22 @@ module MultiXml
|
|
85
84
|
end
|
86
85
|
end
|
87
86
|
|
88
|
-
# Parse an XML string into Ruby.
|
87
|
+
# Parse an XML string or IO into Ruby.
|
89
88
|
#
|
90
89
|
# <b>Options</b>
|
91
90
|
#
|
92
91
|
# <tt>:symbolize_keys</tt> :: If true, will use symbols instead of strings for the keys.
|
93
92
|
def parse(xml, options={})
|
94
|
-
xml
|
93
|
+
xml ||= ''
|
94
|
+
|
95
|
+
xml.strip! if xml.respond_to?(:strip!)
|
96
|
+
|
97
|
+
xml = StringIO.new(xml) unless xml.respond_to?(:read)
|
98
|
+
|
99
|
+
char = xml.getc
|
100
|
+
return {} if char.nil?
|
101
|
+
xml.ungetc(char)
|
102
|
+
|
95
103
|
begin
|
96
104
|
hash = typecast_xml_value(undasherize_keys(parser.parse(xml))) || {}
|
97
105
|
rescue parser.parse_error => error
|
@@ -118,7 +126,7 @@ module MultiXml
|
|
118
126
|
private
|
119
127
|
|
120
128
|
# TODO: Add support for other encodings
|
121
|
-
def
|
129
|
+
def parse_binary(binary, entity) #:nodoc:
|
122
130
|
case entity['encoding']
|
123
131
|
when 'base64'
|
124
132
|
Base64.decode64(binary)
|
@@ -127,7 +135,7 @@ module MultiXml
|
|
127
135
|
end
|
128
136
|
end
|
129
137
|
|
130
|
-
def
|
138
|
+
def parse_file(file, entity)
|
131
139
|
f = StringIO.new(Base64.decode64(file))
|
132
140
|
f.extend(FileLike)
|
133
141
|
f.original_filename = entity['name']
|
@@ -172,13 +180,14 @@ module MultiXml
|
|
172
180
|
case value
|
173
181
|
when Hash
|
174
182
|
if value['type'] == 'array'
|
175
|
-
_, entries =
|
176
|
-
|
183
|
+
_, entries = value.detect {|key, _| key != 'type'}
|
184
|
+
|
185
|
+
if entries.nil? || (entries.is_a?(String) && entries.strip.empty?)
|
177
186
|
[]
|
178
187
|
else
|
179
188
|
case entries
|
180
189
|
when Array
|
181
|
-
entries.map{|
|
190
|
+
entries.map {|entry| typecast_xml_value(entry)}
|
182
191
|
when Hash
|
183
192
|
[typecast_xml_value(entries)]
|
184
193
|
else
|
@@ -195,7 +204,7 @@ module MultiXml
|
|
195
204
|
elsif value['type'] == 'string' && value['nil'] != 'true'
|
196
205
|
''
|
197
206
|
# blank or nil parsed values are represented by nil
|
198
|
-
elsif value.
|
207
|
+
elsif value.empty? || value['nil'] == 'true'
|
199
208
|
nil
|
200
209
|
# If the type is the only element which makes it then
|
201
210
|
# this still makes the value nil, except if type is
|
@@ -203,8 +212,8 @@ module MultiXml
|
|
203
212
|
elsif value['type'] && value.size == 1 && !value['type'].is_a?(Hash)
|
204
213
|
nil
|
205
214
|
else
|
206
|
-
xml_value = value.inject({}) do |hash, (
|
207
|
-
hash[
|
215
|
+
xml_value = value.inject({}) do |hash, (k, v)|
|
216
|
+
hash[k] = typecast_xml_value(v)
|
208
217
|
hash
|
209
218
|
end
|
210
219
|
|
@@ -1,79 +1,30 @@
|
|
1
1
|
require 'libxml' unless defined?(LibXML)
|
2
|
+
require 'multi_xml/parsers/libxml2_parser'
|
2
3
|
|
3
4
|
module MultiXml
|
4
5
|
module Parsers
|
5
6
|
module Libxml #:nodoc:
|
7
|
+
include Libxml2Parser
|
8
|
+
|
6
9
|
extend self
|
7
|
-
def parse_error; ::LibXML::XML::Error; end
|
8
10
|
|
9
|
-
|
10
|
-
# xml::
|
11
|
-
# XML Document string or IO to parse
|
12
|
-
def parse(xml)
|
13
|
-
if !xml.respond_to?(:read)
|
14
|
-
xml = StringIO.new(xml || '')
|
15
|
-
end
|
11
|
+
def parse_error() ::LibXML::XML::Error end
|
16
12
|
|
17
|
-
|
18
|
-
|
19
|
-
{}
|
20
|
-
else
|
21
|
-
xml.ungetc(char)
|
22
|
-
LibXML::XML::Parser.io(xml).parse.to_hash
|
23
|
-
end
|
13
|
+
def parse(xml)
|
14
|
+
node_to_hash(LibXML::XML::Parser.io(xml).parse.root)
|
24
15
|
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
16
|
|
29
|
-
|
30
|
-
|
31
|
-
module Document #:nodoc:
|
32
|
-
def to_hash
|
33
|
-
root.to_hash
|
17
|
+
def each_child(node, &block)
|
18
|
+
node.each_child &block
|
34
19
|
end
|
35
|
-
end
|
36
20
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
# Convert XML document to hash
|
41
|
-
#
|
42
|
-
# hash::
|
43
|
-
# Hash to merge the converted element into.
|
44
|
-
def to_hash(hash={})
|
45
|
-
node_hash = {}
|
46
|
-
|
47
|
-
# Insert node hash into parent hash correctly.
|
48
|
-
case hash[name]
|
49
|
-
when Array then hash[name] << node_hash
|
50
|
-
when Hash then hash[name] = [hash[name], node_hash]
|
51
|
-
when nil then hash[name] = node_hash
|
52
|
-
end
|
53
|
-
|
54
|
-
# Handle child elements
|
55
|
-
each_child do |c|
|
56
|
-
if c.element?
|
57
|
-
c.to_hash(node_hash)
|
58
|
-
elsif c.text? || c.cdata?
|
59
|
-
node_hash[CONTENT_ROOT] ||= ''
|
60
|
-
node_hash[CONTENT_ROOT] << c.content
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# Remove content node if it is blank
|
65
|
-
if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
|
66
|
-
node_hash.delete(CONTENT_ROOT)
|
67
|
-
end
|
68
|
-
|
69
|
-
# Handle attributes
|
70
|
-
each_attr { |a| node_hash[a.name] = a.value }
|
21
|
+
def each_attr(node, &block)
|
22
|
+
node.each_attr &block
|
23
|
+
end
|
71
24
|
|
72
|
-
|
25
|
+
def node_name(node)
|
26
|
+
node.name
|
73
27
|
end
|
74
28
|
end
|
75
29
|
end
|
76
30
|
end
|
77
|
-
|
78
|
-
LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
|
79
|
-
LibXML::XML::Node.send(:include, LibXML::Conversions::Node)
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module MultiXml
|
2
|
+
module Parsers
|
3
|
+
module Libxml2Parser #:nodoc:
|
4
|
+
# Convert XML document to hash
|
5
|
+
#
|
6
|
+
# node::
|
7
|
+
# The XML node object to convert to a hash.
|
8
|
+
#
|
9
|
+
# hash::
|
10
|
+
# Hash to merge the converted element into.
|
11
|
+
def node_to_hash(node, hash={})
|
12
|
+
node_hash = {MultiXml::CONTENT_ROOT => ''}
|
13
|
+
|
14
|
+
name = node_name(node)
|
15
|
+
|
16
|
+
# Insert node hash into parent hash correctly.
|
17
|
+
case hash[name]
|
18
|
+
when Array then hash[name] << node_hash
|
19
|
+
when Hash then hash[name] = [hash[name], node_hash]
|
20
|
+
when nil then hash[name] = node_hash
|
21
|
+
end
|
22
|
+
|
23
|
+
# Handle child elements
|
24
|
+
each_child(node) do |c|
|
25
|
+
if c.element?
|
26
|
+
node_to_hash(c, node_hash)
|
27
|
+
elsif c.text? || c.cdata?
|
28
|
+
node_hash[MultiXml::CONTENT_ROOT] << c.content
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Remove content node if it is empty
|
33
|
+
if node_hash[MultiXml::CONTENT_ROOT].strip.empty?
|
34
|
+
node_hash.delete(MultiXml::CONTENT_ROOT)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Handle attributes
|
38
|
+
each_attr(node) {|a| node_hash[node_name(a)] = a.value }
|
39
|
+
|
40
|
+
hash
|
41
|
+
end
|
42
|
+
|
43
|
+
# Parse an XML Document IO into a simple hash.
|
44
|
+
# xml::
|
45
|
+
# XML Document IO to parse
|
46
|
+
def parse(xml)
|
47
|
+
raise NotImplementedError, "inheritor should define #{__method__}"
|
48
|
+
end
|
49
|
+
|
50
|
+
# :stopdoc:
|
51
|
+
private
|
52
|
+
|
53
|
+
def each_child(*args)
|
54
|
+
raise NotImplementedError, "inheritor should define #{__method__}"
|
55
|
+
end
|
56
|
+
|
57
|
+
def each_attr(*args)
|
58
|
+
raise NotImplementedError, "inheritor should define #{__method__}"
|
59
|
+
end
|
60
|
+
|
61
|
+
def node_name(*args)
|
62
|
+
raise NotImplementedError, "inheritor should define #{__method__}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -1,79 +1,32 @@
|
|
1
1
|
require 'nokogiri' unless defined?(Nokogiri)
|
2
|
+
require 'multi_xml/parsers/libxml2_parser'
|
2
3
|
|
3
4
|
module MultiXml
|
4
5
|
module Parsers
|
5
6
|
module Nokogiri #:nodoc:
|
7
|
+
include Libxml2Parser
|
8
|
+
|
6
9
|
extend self
|
7
|
-
def parse_error; ::Nokogiri::XML::SyntaxError; end
|
8
10
|
|
9
|
-
|
10
|
-
# xml::
|
11
|
-
# XML Document string or IO to parse
|
12
|
-
def parse(xml)
|
13
|
-
if !xml.respond_to?(:read)
|
14
|
-
xml = StringIO.new(xml || '')
|
15
|
-
end
|
11
|
+
def parse_error() ::Nokogiri::XML::SyntaxError end
|
16
12
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
xml.ungetc(char)
|
22
|
-
doc = ::Nokogiri::XML(xml)
|
23
|
-
raise doc.errors.first if doc.errors.length > 0
|
24
|
-
doc.to_hash
|
25
|
-
end
|
13
|
+
def parse(xml)
|
14
|
+
doc = ::Nokogiri::XML(xml)
|
15
|
+
raise doc.errors.first if doc.errors.length > 0
|
16
|
+
node_to_hash(doc.root)
|
26
17
|
end
|
27
18
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
root.to_hash
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
module Node #:nodoc:
|
36
|
-
CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
|
37
|
-
|
38
|
-
# Convert XML document to hash
|
39
|
-
#
|
40
|
-
# hash::
|
41
|
-
# Hash to merge the converted element into.
|
42
|
-
def to_hash(hash={})
|
43
|
-
node_hash = {}
|
44
|
-
|
45
|
-
# Insert node hash into parent hash correctly.
|
46
|
-
case hash[name]
|
47
|
-
when Array then hash[name] << node_hash
|
48
|
-
when Hash then hash[name] = [hash[name], node_hash]
|
49
|
-
when nil then hash[name] = node_hash
|
50
|
-
end
|
51
|
-
|
52
|
-
# Handle child elements
|
53
|
-
children.each do |c|
|
54
|
-
if c.element?
|
55
|
-
c.to_hash(node_hash)
|
56
|
-
elsif c.text? || c.cdata?
|
57
|
-
node_hash[CONTENT_ROOT] ||= ''
|
58
|
-
node_hash[CONTENT_ROOT] << c.content
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
# Remove content node if it is blank and there are child tags
|
63
|
-
if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
|
64
|
-
node_hash.delete(CONTENT_ROOT)
|
65
|
-
end
|
66
|
-
|
67
|
-
# Handle attributes
|
68
|
-
attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
|
19
|
+
def each_child(node, &block)
|
20
|
+
node.children.each &block
|
21
|
+
end
|
69
22
|
|
70
|
-
|
71
|
-
|
72
|
-
end
|
23
|
+
def each_attr(node, &block)
|
24
|
+
node.attribute_nodes.each &block
|
73
25
|
end
|
74
26
|
|
75
|
-
|
76
|
-
|
27
|
+
def node_name(node)
|
28
|
+
node.node_name
|
29
|
+
end
|
77
30
|
end
|
78
31
|
end
|
79
32
|
end
|