multi_xml 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of multi_xml might be problematic. Click here for more details.
- data/.gitignore +2 -1
- data/.rspec +2 -0
- data/Gemfile +2 -3
- data/Gemfile.lock +17 -9
- data/README.rdoc +6 -6
- data/Rakefile +6 -16
- data/lib/multi_xml.rb +218 -54
- data/lib/multi_xml/parsers/libxml.rb +79 -0
- data/lib/multi_xml/parsers/nokogiri.rb +79 -0
- data/lib/multi_xml/parsers/rexml.rb +127 -0
- data/lib/multi_xml/version.rb +1 -1
- data/multi_xml.gemspec +5 -6
- data/spec/multi_xml_spec.rb +455 -20
- data/spec/spec_helper.rb +1 -6
- metadata +27 -47
- data/lib/multi_xml/engines/rexml.rb +0 -221
- data/spec/spec.opts +0 -2
data/.rspec
ADDED
data/Gemfile
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
3
|
group :development, :test do
|
4
|
-
gem '
|
5
|
-
gem '
|
6
|
-
gem 'nokogiri', "~> 1.4.3", :require => nil
|
4
|
+
gem 'libxml-ruby', "~> 1.1", :require => nil
|
5
|
+
gem 'nokogiri', "~> 1.4", :require => nil
|
7
6
|
end
|
8
7
|
|
9
8
|
gemspec
|
data/Gemfile.lock
CHANGED
@@ -1,25 +1,33 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
multi_xml (0.0
|
4
|
+
multi_xml (0.1.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: http://rubygems.org/
|
8
8
|
specs:
|
9
|
-
|
9
|
+
diff-lcs (1.1.2)
|
10
10
|
libxml-ruby (1.1.4)
|
11
11
|
nokogiri (1.4.3.1)
|
12
12
|
rake (0.8.7)
|
13
|
-
rspec (
|
13
|
+
rspec (2.0.0)
|
14
|
+
rspec-core (= 2.0.0)
|
15
|
+
rspec-expectations (= 2.0.0)
|
16
|
+
rspec-mocks (= 2.0.0)
|
17
|
+
rspec-core (2.0.0)
|
18
|
+
rspec-expectations (2.0.0)
|
19
|
+
diff-lcs (>= 1.1.2)
|
20
|
+
rspec-mocks (2.0.0)
|
21
|
+
rspec-core (= 2.0.0)
|
22
|
+
rspec-expectations (= 2.0.0)
|
14
23
|
|
15
24
|
PLATFORMS
|
16
25
|
ruby
|
17
26
|
|
18
27
|
DEPENDENCIES
|
19
|
-
bundler (~> 1.0
|
20
|
-
|
21
|
-
libxml-ruby (~> 1.1.4)
|
28
|
+
bundler (~> 1.0)
|
29
|
+
libxml-ruby (~> 1.1)
|
22
30
|
multi_xml!
|
23
|
-
nokogiri (~> 1.4
|
24
|
-
rake (~> 0.8
|
25
|
-
rspec (~>
|
31
|
+
nokogiri (~> 1.4)
|
32
|
+
rake (~> 0.8)
|
33
|
+
rspec (~> 2.0)
|
data/README.rdoc
CHANGED
@@ -4,18 +4,18 @@ Lots of Ruby libraries utilize XML parsing in some form, and everyone has their
|
|
4
4
|
|
5
5
|
require 'multi_xml'
|
6
6
|
|
7
|
-
MultiXml.
|
7
|
+
MultiXml.parser = :nokogiri
|
8
8
|
MultiXml.parse('<tag>This is the contents</tag>') # parsed using Nokogiri
|
9
9
|
|
10
|
-
MultiXml.
|
11
|
-
MultiXml.
|
10
|
+
MultiXml.parser = :rexml
|
11
|
+
MultiXml.parser = MultiJson::Parser::Rexml # equivalent to previous line
|
12
12
|
MultiXml.parse('<tag>This is the contents</tag>') # parsed using REXML
|
13
13
|
|
14
|
-
The <tt>
|
14
|
+
The <tt>parser</tt> setter takes either a symbol or a class (to allow for custom XML parsers) that responds to <tt>.parse</tt> at the class level.
|
15
15
|
|
16
|
-
MultiXML tries to have intelligent defaulting. That is, if you have any of the supported
|
16
|
+
MultiXML tries to have intelligent defaulting. That is, if you have any of the supported parsers already loaded, it will utilize them before attempting to load any. When loading, libraries are ordered by speed. First LibXML, then Nokogiri, then REXML.
|
17
17
|
|
18
|
-
==
|
18
|
+
== Inspiration
|
19
19
|
|
20
20
|
MultiXML was inspired by MultiJSON[http://github.com/intridea/multi_json/].
|
21
21
|
|
data/Rakefile
CHANGED
@@ -1,28 +1,18 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require 'rake/rdoctask'
|
5
|
-
require 'spec/rake/spectask'
|
1
|
+
require "bundler"
|
2
|
+
require "rake/rdoctask"
|
3
|
+
require "rspec/core/rake_task"
|
6
4
|
|
7
5
|
Bundler::GemHelper.install_tasks
|
8
6
|
|
9
7
|
Rake::RDocTask.new do |rdoc|
|
10
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
11
8
|
rdoc.rdoc_dir = 'rdoc'
|
12
|
-
rdoc.title = "multi_xml #{
|
9
|
+
rdoc.title = "multi_xml #{MultiXml::VERSION}"
|
13
10
|
rdoc.rdoc_files.include('README*')
|
14
11
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
12
|
end
|
16
13
|
|
17
|
-
|
18
|
-
|
19
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
20
|
-
end
|
21
|
-
|
22
|
-
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
23
|
-
spec.libs << 'lib' << 'spec'
|
24
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
25
|
-
spec.rcov = true
|
14
|
+
desc "Run all examples"
|
15
|
+
RSpec::Core::RakeTask.new(:spec) do |t|
|
26
16
|
end
|
27
17
|
|
28
18
|
task :default => :spec
|
data/lib/multi_xml.rb
CHANGED
@@ -1,65 +1,229 @@
|
|
1
|
+
require 'base64'
|
2
|
+
require 'bigdecimal'
|
3
|
+
require 'date'
|
4
|
+
require 'time'
|
5
|
+
require 'yaml'
|
6
|
+
|
1
7
|
module MultiXml
|
2
|
-
|
8
|
+
class << self
|
3
9
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
+
REQUIREMENT_MAP = [
|
11
|
+
['libxml', :libxml],
|
12
|
+
['nokogiri', :nokogiri],
|
13
|
+
['rexml/document', :rexml]
|
14
|
+
]
|
15
|
+
|
16
|
+
CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
|
17
|
+
|
18
|
+
# TODO: use Time.xmlschema instead of Time.parse;
|
19
|
+
# use regexp instead of Date.parse
|
20
|
+
unless defined?(PARSING)
|
21
|
+
PARSING = {
|
22
|
+
'symbol' => Proc.new{|symbol| symbol.to_sym},
|
23
|
+
'date' => Proc.new{|date| ::Date.parse(date)},
|
24
|
+
'datetime' => Proc.new{|time| ::Time.parse(time).utc rescue ::DateTime.parse(time).utc},
|
25
|
+
'integer' => Proc.new{|integer| integer.to_i},
|
26
|
+
'float' => Proc.new{|float| float.to_f},
|
27
|
+
'decimal' => Proc.new{|number| BigDecimal(number)},
|
28
|
+
'boolean' => Proc.new{|boolean| !%w(0 false).include?(boolean.strip)},
|
29
|
+
'string' => Proc.new{|string| string.to_s},
|
30
|
+
'yaml' => Proc.new{|yaml| YAML::load(yaml) rescue yaml},
|
31
|
+
'base64Binary' => Proc.new{|binary| binary.unpack('m').first},
|
32
|
+
'binary' => Proc.new{|binary, entity| parse_binary(binary, entity)},
|
33
|
+
'file' => Proc.new{|file, entity| parse_file(file, entity)}
|
34
|
+
}
|
35
|
+
|
36
|
+
PARSING.update(
|
37
|
+
'double' => PARSING['float'],
|
38
|
+
'dateTime' => PARSING['datetime']
|
39
|
+
)
|
40
|
+
end
|
10
41
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
42
|
+
# Get the current parser class.
|
43
|
+
def parser
|
44
|
+
return @parser if @parser
|
45
|
+
self.parser = self.default_parser
|
46
|
+
@parser
|
47
|
+
end
|
48
|
+
|
49
|
+
# The default parser based on what you currently
|
50
|
+
# have loaded and installed. First checks to see
|
51
|
+
# if any parsers are already loaded, then checks
|
52
|
+
# to see which are installed if none are loaded.
|
53
|
+
def default_parser
|
54
|
+
return :libxml if defined?(::LibXML)
|
55
|
+
return :nokogiri if defined?(::Nokogiri)
|
56
|
+
|
57
|
+
REQUIREMENT_MAP.each do |(library, parser)|
|
58
|
+
begin
|
59
|
+
require library
|
60
|
+
return parser
|
61
|
+
rescue LoadError
|
62
|
+
next
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Set the XML parser utilizing a symbol, string, or class.
|
68
|
+
# Supported by default are:
|
69
|
+
#
|
70
|
+
# * <tt>:libxml</tt>
|
71
|
+
# * <tt>:nokogiri</tt>
|
72
|
+
# * <tt>:rexml</tt>
|
73
|
+
def parser=(new_parser)
|
74
|
+
case new_parser
|
75
|
+
when String, Symbol
|
76
|
+
require "multi_xml/parsers/#{new_parser.to_s.downcase}"
|
77
|
+
@parser = MultiXml::Parsers.const_get("#{new_parser.to_s.split('_').map{|s| s.capitalize}.join('')}")
|
78
|
+
when Class, Module
|
79
|
+
@parser = new_parser
|
80
|
+
else
|
81
|
+
raise "Did not recognize your parser specification. Please specify either a symbol or a class."
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Parse an XML string into Ruby.
|
86
|
+
#
|
87
|
+
# <b>Options</b>
|
88
|
+
#
|
89
|
+
# <tt>:symbolize_keys</tt> :: If true, will use symbols instead of strings for the keys.
|
90
|
+
def parse(xml, options={})
|
91
|
+
xml.strip!
|
92
|
+
hash = typecast_xml_value(undasherize_keys(parser.parse(xml)))
|
93
|
+
hash = symbolize_keys(hash) if options[:symbolize_keys]
|
94
|
+
hash = {} if hash.nil?
|
95
|
+
hash
|
96
|
+
end
|
97
|
+
|
98
|
+
# This module decorates files with the <tt>original_filename</tt>
|
99
|
+
# and <tt>content_type</tt> methods.
|
100
|
+
module FileLike #:nodoc:
|
101
|
+
attr_writer :original_filename, :content_type
|
102
|
+
|
103
|
+
def original_filename
|
104
|
+
@original_filename || 'untitled'
|
105
|
+
end
|
106
|
+
|
107
|
+
def content_type
|
108
|
+
@content_type || 'application/octet-stream'
|
33
109
|
end
|
34
110
|
end
|
35
|
-
end
|
36
111
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
def engine=(new_engine)
|
45
|
-
case new_engine
|
46
|
-
when String, Symbol
|
47
|
-
require "multi_xml/engines/#{new_engine}"
|
48
|
-
@engine = MultiXml::Engines.const_get("#{new_engine.to_s.split('_').map{|s| s.capitalize}.join('')}")
|
49
|
-
when Class
|
50
|
-
@engine = new_engine
|
112
|
+
private
|
113
|
+
|
114
|
+
# TODO: Add support for other encodings
|
115
|
+
def self.parse_binary(binary, entity) #:nodoc:
|
116
|
+
case entity['encoding']
|
117
|
+
when 'base64'
|
118
|
+
Base64.decode64(binary)
|
51
119
|
else
|
52
|
-
|
120
|
+
binary
|
121
|
+
end
|
53
122
|
end
|
54
|
-
end
|
55
123
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
end
|
124
|
+
def self.parse_file(file, entity)
|
125
|
+
f = StringIO.new(Base64.decode64(file))
|
126
|
+
f.extend(FileLike)
|
127
|
+
f.original_filename = entity['name']
|
128
|
+
f.content_type = entity['content_type']
|
129
|
+
f
|
130
|
+
end
|
64
131
|
|
132
|
+
def symbolize_keys(hash)
|
133
|
+
hash.inject({}) do |result, (key, value)|
|
134
|
+
new_key = case key
|
135
|
+
when String
|
136
|
+
key.to_sym
|
137
|
+
else
|
138
|
+
key
|
139
|
+
end
|
140
|
+
new_value = case value
|
141
|
+
when Hash
|
142
|
+
symbolize_keys(value)
|
143
|
+
else
|
144
|
+
value
|
145
|
+
end
|
146
|
+
result[new_key] = new_value
|
147
|
+
result
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def undasherize_keys(params)
|
152
|
+
case params
|
153
|
+
when Hash
|
154
|
+
params.inject({}) do |hash, (key, value)|
|
155
|
+
hash[key.to_s.tr('-', '_')] = undasherize_keys(value)
|
156
|
+
hash
|
157
|
+
end
|
158
|
+
when Array
|
159
|
+
params.map{|value| undasherize_keys(value)}
|
160
|
+
else
|
161
|
+
params
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def wrap(object)
|
166
|
+
if object.nil?
|
167
|
+
[]
|
168
|
+
elsif object.respond_to?(:to_ary)
|
169
|
+
object.to_ary
|
170
|
+
else
|
171
|
+
[object]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def typecast_xml_value(value)
|
176
|
+
case value
|
177
|
+
when Hash
|
178
|
+
if value['type'] == 'array'
|
179
|
+
_, entries = wrap(value.detect{|key, value| key != 'type'})
|
180
|
+
if entries.nil? || entries.strip == '' || (c = value[CONTENT_ROOT] && c.nil?)
|
181
|
+
[]
|
182
|
+
else
|
183
|
+
case entries
|
184
|
+
when Array
|
185
|
+
entries.map{|value| typecast_xml_value(value)}
|
186
|
+
when Hash
|
187
|
+
[typecast_xml_value(entries)]
|
188
|
+
else
|
189
|
+
raise "can't typecast #{entries.class.name}: #{entries.inspect}"
|
190
|
+
end
|
191
|
+
end
|
192
|
+
elsif value.has_key?(CONTENT_ROOT)
|
193
|
+
content = value[CONTENT_ROOT]
|
194
|
+
if block = PARSING[value['type']]
|
195
|
+
block.arity == 1 ? block.call(content) : block.call(content, value)
|
196
|
+
else
|
197
|
+
content
|
198
|
+
end
|
199
|
+
elsif value['type'] == 'string' && value['nil'] != 'true'
|
200
|
+
''
|
201
|
+
# blank or nil parsed values are represented by nil
|
202
|
+
elsif value.nil? || value.empty? || value['nil'] == 'true'
|
203
|
+
nil
|
204
|
+
# If the type is the only element which makes it then
|
205
|
+
# this still makes the value nil, except if type is
|
206
|
+
# a XML node(where type['value'] is a Hash)
|
207
|
+
elsif value['type'] && value.size == 1 && !value['type'].is_a?(::Hash)
|
208
|
+
nil
|
209
|
+
else
|
210
|
+
xml_value = value.inject({}) do |hash, (key, value)|
|
211
|
+
hash[key] = typecast_xml_value(value)
|
212
|
+
hash
|
213
|
+
end
|
214
|
+
|
215
|
+
# Turn {:files => {:file => #<StringIO>} into {:files => #<StringIO>} so it is compatible with
|
216
|
+
# how multipart uploaded files from HTML appear
|
217
|
+
xml_value['file'].is_a?(StringIO) ? xml_value['file'] : xml_value
|
218
|
+
end
|
219
|
+
when Array
|
220
|
+
value.map!{|i| typecast_xml_value(i)}
|
221
|
+
value.length > 1 ? value : value.first
|
222
|
+
when String
|
223
|
+
value
|
224
|
+
else
|
225
|
+
raise "can't typecast #{value.class.name}: #{value.inspect}"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
65
229
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'libxml' unless defined?(LibXML)
|
2
|
+
|
3
|
+
module MultiXml
|
4
|
+
module Parsers
|
5
|
+
# Use LibXML to parse XML.
|
6
|
+
module Libxml #:nodoc:
|
7
|
+
extend self
|
8
|
+
|
9
|
+
# Parse an XML Document string or IO into a simple hash using libxml.
|
10
|
+
# data::
|
11
|
+
# XML Document string or IO to parse
|
12
|
+
def parse(data)
|
13
|
+
if !data.respond_to?(:read)
|
14
|
+
data = StringIO.new(data || '')
|
15
|
+
end
|
16
|
+
|
17
|
+
char = data.getc
|
18
|
+
if char.nil?
|
19
|
+
{}
|
20
|
+
else
|
21
|
+
data.ungetc(char)
|
22
|
+
LibXML::XML::Parser.io(data).parse.to_hash
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module LibXML #:nodoc:
|
30
|
+
module Conversions #:nodoc:
|
31
|
+
module Document #:nodoc:
|
32
|
+
def to_hash
|
33
|
+
root.to_hash
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module Node #:nodoc:
|
38
|
+
CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
|
39
|
+
|
40
|
+
# Convert XML document to hash
|
41
|
+
#
|
42
|
+
# hash::
|
43
|
+
# Hash to merge the converted element into.
|
44
|
+
def to_hash(hash={})
|
45
|
+
node_hash = {}
|
46
|
+
|
47
|
+
# Insert node hash into parent hash correctly.
|
48
|
+
case hash[name]
|
49
|
+
when Array then hash[name] << node_hash
|
50
|
+
when Hash then hash[name] = [hash[name], node_hash]
|
51
|
+
when nil then hash[name] = node_hash
|
52
|
+
end
|
53
|
+
|
54
|
+
# Handle child elements
|
55
|
+
each_child do |c|
|
56
|
+
if c.element?
|
57
|
+
c.to_hash(node_hash)
|
58
|
+
elsif c.text? || c.cdata?
|
59
|
+
node_hash[CONTENT_ROOT] ||= ''
|
60
|
+
node_hash[CONTENT_ROOT] << c.content
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Remove content node if it is blank
|
65
|
+
if node_hash.length > 1 && (node_hash[CONTENT_ROOT].nil? || node_hash[CONTENT_ROOT].empty?)
|
66
|
+
node_hash.delete(CONTENT_ROOT)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Handle attributes
|
70
|
+
each_attr { |a| node_hash[a.name] = a.value }
|
71
|
+
|
72
|
+
hash
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
LibXML::XML::Document.send(:include, LibXML::Conversions::Document)
|
79
|
+
LibXML::XML::Node.send(:include, LibXML::Conversions::Node)
|