multi_xml 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CONTRIBUTING.md +5 -3
- data/README.md +15 -17
- data/lib/multi_xml.rb +80 -71
- data/lib/multi_xml/parsers/libxml.rb +5 -2
- data/lib/multi_xml/parsers/libxml2_parser.rb +18 -20
- data/lib/multi_xml/parsers/nokogiri.rb +6 -3
- data/lib/multi_xml/parsers/oga.rb +73 -0
- data/lib/multi_xml/parsers/ox.rb +8 -14
- data/lib/multi_xml/parsers/rexml.rb +13 -13
- data/lib/multi_xml/version.rb +43 -1
- data/multi_xml.gemspec +5 -10
- metadata +15 -55
- data.tar.gz.sig +0 -0
- data/Rakefile +0 -21
- data/spec/helper.rb +0 -17
- data/spec/multi_xml_spec.rb +0 -43
- data/spec/parser_shared_example.rb +0 -694
- data/spec/speed.rb +0 -63
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: add5ff8df4d736d143b1d83c38c5f1b8b35d21cd
|
4
|
+
data.tar.gz: ec8e8a3657ff340a9a4abefa2561e44dfee8371d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1cb437e88276aa09e33c5b10a1e32bbf181ecae9543976ab13acc4ca458b00b1315a59f1e3e1cd95fd24e47559a85058585578ba16441ac83c98ffa81c5ee902
|
7
|
+
data.tar.gz: 1931768faabe059174c01e24d12f9b1c8bcc0c9717891ec9ba9098f55c98535ad2a0efab7dcc4bfd756a569f137bf86e4b45b652a182652e6a468393103de9f5
|
data/CONTRIBUTING.md
CHANGED
@@ -16,8 +16,10 @@ Here are some ways *you* can contribute:
|
|
16
16
|
* by refactoring code
|
17
17
|
* by resolving [issues][]
|
18
18
|
* by reviewing patches
|
19
|
+
* [financially][gittip]
|
19
20
|
|
20
21
|
[issues]: https://github.com/sferik/multi_xml/issues
|
22
|
+
[gittip]: https://www.gittip.com/sferik/
|
21
23
|
|
22
24
|
## Submitting an Issue
|
23
25
|
We use the [GitHub issue tracker][issues] to track bugs and features. Before
|
@@ -35,12 +37,12 @@ Ideally, a bug report should include a pull request with failing specs.
|
|
35
37
|
3. Add specs for your unimplemented feature or bug fix.
|
36
38
|
4. Run `bundle exec rake spec`. If your specs pass, return to step 3.
|
37
39
|
5. Implement your feature or bug fix.
|
38
|
-
6. Run `bundle exec rake
|
40
|
+
6. Run `bundle exec rake`. If your specs fail, return to step 5.
|
39
41
|
7. Run `open coverage/index.html`. If your changes are not completely covered
|
40
42
|
by your tests, return to step 3.
|
41
43
|
8. Add documentation for your feature or bug fix.
|
42
|
-
9. Run `bundle exec rake
|
43
|
-
back to step 8.
|
44
|
+
9. Run `bundle exec rake verify_measurements`. If your changes are not 100%
|
45
|
+
documented, go back to step 8.
|
44
46
|
10. Add, commit, and push your changes.
|
45
47
|
11. [Submit a pull request.][pr]
|
46
48
|
|
data/README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# MultiXML
|
2
2
|
|
3
|
-
[][gem]
|
4
|
+
[][travis]
|
5
|
+
[][gemnasium]
|
6
|
+
[][codeclimate]
|
7
|
+
[][coveralls]
|
8
8
|
|
9
9
|
[gem]: https://rubygems.org/gems/multi_xml
|
10
10
|
[travis]: http://travis-ci.org/sferik/multi_xml
|
@@ -17,16 +17,6 @@ A generic swappable back-end for XML parsing
|
|
17
17
|
## Installation
|
18
18
|
gem install multi_xml
|
19
19
|
|
20
|
-
To ensure the code you're installing hasn't been tampered with, it's
|
21
|
-
recommended that you verify the signature. To do this, you need to add my
|
22
|
-
public key as a trusted certificate (you only need to do this once):
|
23
|
-
|
24
|
-
gem cert --add <(curl -Ls https://raw.github.com/sferik/multi_xml/master/certs/sferik.pem)
|
25
|
-
|
26
|
-
Then, install the gem with the high security trust policy:
|
27
|
-
|
28
|
-
gem install multi_xml -P HighSecurity
|
29
|
-
|
30
20
|
## Documentation
|
31
21
|
[http://rdoc.info/gems/multi_xml][documentation]
|
32
22
|
|
@@ -55,6 +45,10 @@ MultiXml.parse('<tag>This is the contents</tag>') # Parsed using Nokogiri
|
|
55
45
|
MultiXml.parser = :rexml
|
56
46
|
MultiXml.parser = MultiXml::Parsers::Rexml # Same as above
|
57
47
|
MultiXml.parse('<tag>This is the contents</tag>') # Parsed using REXML
|
48
|
+
|
49
|
+
MultiXml.parser = :oga
|
50
|
+
MultiXml.parser = MultiXml::Parsers::Oga # Same as above
|
51
|
+
MultiXml.parse('<tag>This is the contents</tag>') # Parsed using Oga
|
58
52
|
```
|
59
53
|
The `parser` setter takes either a symbol or a class (to allow for custom XML
|
60
54
|
parsers) that responds to `.parse` at the class level.
|
@@ -68,10 +62,14 @@ then Nokogiri, and finally REXML.
|
|
68
62
|
This library aims to support and is [tested against][travis] the following Ruby
|
69
63
|
implementations:
|
70
64
|
|
71
|
-
* Ruby 1.8.7
|
72
|
-
* Ruby 1.9.2
|
73
65
|
* Ruby 1.9.3
|
74
66
|
* Ruby 2.0.0
|
67
|
+
* Ruby 2.1
|
68
|
+
* Ruby 2.2
|
69
|
+
* Ruby 2.3
|
70
|
+
* [JRuby 9000][jruby]
|
71
|
+
|
72
|
+
[jruby]: http://jruby.org/
|
75
73
|
|
76
74
|
If something doesn't work on one of these interpreters, it's a bug.
|
77
75
|
|
data/lib/multi_xml.rb
CHANGED
@@ -5,73 +5,78 @@ require 'stringio'
|
|
5
5
|
require 'time'
|
6
6
|
require 'yaml'
|
7
7
|
|
8
|
-
module MultiXml
|
8
|
+
module MultiXml # rubocop:disable ModuleLength
|
9
9
|
class ParseError < StandardError; end
|
10
|
+
class NoParserError < StandardError; end
|
10
11
|
class DisallowedTypeError < StandardError
|
11
12
|
def initialize(type)
|
12
13
|
super "Disallowed type attribute: #{type.inspect}"
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
16
|
-
REQUIREMENT_MAP
|
17
|
-
[
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
17
|
+
unless defined?(REQUIREMENT_MAP)
|
18
|
+
REQUIREMENT_MAP = [
|
19
|
+
['ox', :ox],
|
20
|
+
['libxml', :libxml],
|
21
|
+
['nokogiri', :nokogiri],
|
22
|
+
['rexml/document', :rexml],
|
23
|
+
['oga', :oga],
|
24
|
+
].freeze
|
25
|
+
end
|
22
26
|
|
23
27
|
CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
|
24
28
|
|
25
29
|
unless defined?(PARSING)
|
30
|
+
float_proc = proc { |float| float.to_f }
|
31
|
+
datetime_proc = proc { |time| Time.parse(time).utc rescue DateTime.parse(time).utc } # rubocop:disable RescueModifier
|
32
|
+
|
26
33
|
PARSING = {
|
27
|
-
'symbol' =>
|
28
|
-
'date' =>
|
29
|
-
'datetime' =>
|
30
|
-
'
|
31
|
-
'
|
32
|
-
'
|
33
|
-
'
|
34
|
-
'
|
35
|
-
'
|
36
|
-
'
|
37
|
-
'
|
38
|
-
'
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
34
|
+
'symbol' => proc { |symbol| symbol.to_sym },
|
35
|
+
'date' => proc { |date| Date.parse(date) },
|
36
|
+
'datetime' => datetime_proc,
|
37
|
+
'dateTime' => datetime_proc,
|
38
|
+
'integer' => proc { |integer| integer.to_i },
|
39
|
+
'float' => float_proc,
|
40
|
+
'double' => float_proc,
|
41
|
+
'decimal' => proc { |number| BigDecimal(number) },
|
42
|
+
'boolean' => proc { |boolean| !%w(0 false).include?(boolean.strip) },
|
43
|
+
'string' => proc { |string| string.to_s },
|
44
|
+
'yaml' => proc { |yaml| YAML.load(yaml) rescue yaml }, # rubocop:disable RescueModifier
|
45
|
+
'base64Binary' => proc { |binary| ::Base64.decode64(binary) },
|
46
|
+
'binary' => proc { |binary, entity| parse_binary(binary, entity) },
|
47
|
+
'file' => proc { |file, entity| parse_file(file, entity) },
|
48
|
+
}.freeze
|
49
|
+
end
|
50
|
+
|
51
|
+
unless defined?(TYPE_NAMES)
|
52
|
+
TYPE_NAMES = {
|
53
|
+
'Symbol' => 'symbol',
|
54
|
+
'Integer' => 'integer',
|
55
|
+
'BigDecimal' => 'decimal',
|
56
|
+
'Float' => 'float',
|
57
|
+
'TrueClass' => 'boolean',
|
58
|
+
'FalseClass' => 'boolean',
|
59
|
+
'Date' => 'date',
|
60
|
+
'DateTime' => 'datetime',
|
61
|
+
'Time' => 'datetime',
|
62
|
+
'Array' => 'array',
|
63
|
+
'Hash' => 'hash',
|
64
|
+
}.freeze
|
45
65
|
end
|
46
66
|
|
47
|
-
|
48
|
-
'Symbol' => 'symbol',
|
49
|
-
'Fixnum' => 'integer',
|
50
|
-
'Bignum' => 'integer',
|
51
|
-
'BigDecimal' => 'decimal',
|
52
|
-
'Float' => 'float',
|
53
|
-
'TrueClass' => 'boolean',
|
54
|
-
'FalseClass' => 'boolean',
|
55
|
-
'Date' => 'date',
|
56
|
-
'DateTime' => 'datetime',
|
57
|
-
'Time' => 'datetime',
|
58
|
-
'Array' => 'array',
|
59
|
-
'Hash' => 'hash'
|
60
|
-
} unless defined?(TYPE_NAMES)
|
61
|
-
|
62
|
-
DISALLOWED_XML_TYPES = %w(symbol yaml)
|
67
|
+
DISALLOWED_XML_TYPES = %w(symbol yaml).freeze
|
63
68
|
|
64
69
|
DEFAULT_OPTIONS = {
|
65
70
|
:typecast_xml_value => true,
|
66
71
|
:disallowed_types => DISALLOWED_XML_TYPES,
|
67
|
-
:symbolize_keys => false
|
68
|
-
}
|
72
|
+
:symbolize_keys => false,
|
73
|
+
}.freeze
|
69
74
|
|
70
75
|
class << self
|
71
76
|
# Get the current parser class.
|
72
77
|
def parser
|
73
78
|
return @parser if defined?(@parser)
|
74
|
-
self.parser =
|
79
|
+
self.parser = default_parser
|
75
80
|
@parser
|
76
81
|
end
|
77
82
|
|
@@ -83,6 +88,7 @@ module MultiXml
|
|
83
88
|
return :ox if defined?(::Ox)
|
84
89
|
return :libxml if defined?(::LibXML)
|
85
90
|
return :nokogiri if defined?(::Nokogiri)
|
91
|
+
return :oga if defined?(::Oga)
|
86
92
|
|
87
93
|
REQUIREMENT_MAP.each do |library, parser|
|
88
94
|
begin
|
@@ -92,6 +98,7 @@ module MultiXml
|
|
92
98
|
next
|
93
99
|
end
|
94
100
|
end
|
101
|
+
raise(NoParserError.new("No XML parser detected. If you're using Rubinius and Bundler, try adding an XML parser to your Gemfile (e.g. libxml-ruby, nokogiri, or rubysl-rexml). For more information, see https://github.com/sferik/multi_xml/issues/42."))
|
95
102
|
end
|
96
103
|
|
97
104
|
# Set the XML parser utilizing a symbol, string, or class.
|
@@ -101,15 +108,16 @@ module MultiXml
|
|
101
108
|
# * <tt>:nokogiri</tt>
|
102
109
|
# * <tt>:ox</tt>
|
103
110
|
# * <tt>:rexml</tt>
|
111
|
+
# * <tt>:oga</tt>
|
104
112
|
def parser=(new_parser)
|
105
113
|
case new_parser
|
106
114
|
when String, Symbol
|
107
115
|
require "multi_xml/parsers/#{new_parser.to_s.downcase}"
|
108
|
-
@parser = MultiXml::Parsers.const_get(
|
116
|
+
@parser = MultiXml::Parsers.const_get(new_parser.to_s.split('_').collect(&:capitalize).join('').to_s)
|
109
117
|
when Class, Module
|
110
118
|
@parser = new_parser
|
111
119
|
else
|
112
|
-
raise
|
120
|
+
raise('Did not recognize your parser specification. Please specify either a symbol or a class.')
|
113
121
|
end
|
114
122
|
end
|
115
123
|
|
@@ -122,12 +130,12 @@ module MultiXml
|
|
122
130
|
# <tt>:disallowed_types</tt> :: Types to disallow from being typecasted. Defaults to `['yaml', 'symbol']`. Use `[]` to allow all types.
|
123
131
|
#
|
124
132
|
# <tt>:typecast_xml_value</tt> :: If true, won't typecast values for parsed document
|
125
|
-
def parse(xml, options={})
|
133
|
+
def parse(xml, options = {}) # rubocop:disable AbcSize, CyclomaticComplexity, MethodLength, PerceivedComplexity
|
126
134
|
xml ||= ''
|
127
135
|
|
128
136
|
options = DEFAULT_OPTIONS.merge(options)
|
129
137
|
|
130
|
-
xml.strip
|
138
|
+
xml = xml.strip if xml.respond_to?(:strip)
|
131
139
|
begin
|
132
140
|
xml = StringIO.new(xml) unless xml.respond_to?(:read)
|
133
141
|
|
@@ -140,7 +148,7 @@ module MultiXml
|
|
140
148
|
rescue DisallowedTypeError
|
141
149
|
raise
|
142
150
|
rescue parser.parse_error => error
|
143
|
-
raise
|
151
|
+
raise(ParseError, error.message, error.backtrace) # rubocop:disable RaiseArgs
|
144
152
|
end
|
145
153
|
hash = symbolize_keys(hash) if options[:symbolize_keys]
|
146
154
|
hash
|
@@ -160,7 +168,7 @@ module MultiXml
|
|
160
168
|
end
|
161
169
|
end
|
162
170
|
|
163
|
-
|
171
|
+
private
|
164
172
|
|
165
173
|
# TODO: Add support for other encodings
|
166
174
|
def parse_binary(binary, entity) #:nodoc:
|
@@ -187,7 +195,7 @@ module MultiXml
|
|
187
195
|
result.merge(key.to_sym => symbolize_keys(value))
|
188
196
|
end
|
189
197
|
when Array
|
190
|
-
params.
|
198
|
+
params.collect { |value| symbolize_keys(value) }
|
191
199
|
else
|
192
200
|
params
|
193
201
|
end
|
@@ -197,23 +205,23 @@ module MultiXml
|
|
197
205
|
case params
|
198
206
|
when Hash
|
199
207
|
params.inject({}) do |hash, (key, value)|
|
200
|
-
hash[key.to_s.tr('-', '_')] = undasherize_keys(value)
|
208
|
+
hash[key.to_s.tr('-'.freeze, '_'.freeze)] = undasherize_keys(value)
|
201
209
|
hash
|
202
210
|
end
|
203
211
|
when Array
|
204
|
-
params.
|
212
|
+
params.collect { |value| undasherize_keys(value) }
|
205
213
|
else
|
206
214
|
params
|
207
215
|
end
|
208
216
|
end
|
209
217
|
|
210
|
-
def typecast_xml_value(value, disallowed_types=nil)
|
218
|
+
def typecast_xml_value(value, disallowed_types = nil) # rubocop:disable AbcSize, CyclomaticComplexity, MethodLength, PerceivedComplexity
|
211
219
|
disallowed_types ||= DISALLOWED_XML_TYPES
|
212
220
|
|
213
221
|
case value
|
214
222
|
when Hash
|
215
223
|
if value.include?('type') && !value['type'].is_a?(Hash) && disallowed_types.include?(value['type'])
|
216
|
-
raise
|
224
|
+
raise(DisallowedTypeError.new(value['type']))
|
217
225
|
end
|
218
226
|
|
219
227
|
if value['type'] == 'array'
|
@@ -227,27 +235,29 @@ module MultiXml
|
|
227
235
|
|
228
236
|
# This attempt fails to consider the order that the detect method
|
229
237
|
# retrieves the entries.
|
230
|
-
#_, entries = value.detect {|key, _| key != 'type'}
|
238
|
+
# _, entries = value.detect {|key, _| key != 'type'}
|
231
239
|
|
232
240
|
# This approach ignores attribute entries that are not convertable
|
233
241
|
# to an Array which allows attributes to be ignored.
|
234
|
-
_, entries = value.detect {|k, v| k != 'type' && (v.is_a?(Array) || v.is_a?(Hash)) }
|
242
|
+
_, entries = value.detect { |k, v| k != 'type' && (v.is_a?(Array) || v.is_a?(Hash)) }
|
235
243
|
|
236
|
-
|
244
|
+
case entries
|
245
|
+
when NilClass
|
237
246
|
[]
|
247
|
+
when String
|
248
|
+
[] if entries.strip.empty?
|
249
|
+
when Array
|
250
|
+
entries.collect { |entry| typecast_xml_value(entry, disallowed_types) }
|
251
|
+
when Hash
|
252
|
+
[typecast_xml_value(entries, disallowed_types)]
|
238
253
|
else
|
239
|
-
|
240
|
-
when Array
|
241
|
-
entries.map {|entry| typecast_xml_value(entry, disallowed_types)}
|
242
|
-
when Hash
|
243
|
-
[typecast_xml_value(entries, disallowed_types)]
|
244
|
-
else
|
245
|
-
raise "can't typecast #{entries.class.name}: #{entries.inspect}"
|
246
|
-
end
|
254
|
+
raise("can't typecast #{entries.class.name}: #{entries.inspect}")
|
247
255
|
end
|
248
|
-
|
256
|
+
|
257
|
+
elsif value.key?(CONTENT_ROOT)
|
249
258
|
content = value[CONTENT_ROOT]
|
250
|
-
|
259
|
+
block = PARSING[value['type']]
|
260
|
+
if block
|
251
261
|
if block.arity == 1
|
252
262
|
value.delete('type') if PARSING[value['type']]
|
253
263
|
if value.keys.size > 1
|
@@ -283,14 +293,13 @@ module MultiXml
|
|
283
293
|
xml_value['file'].is_a?(StringIO) ? xml_value['file'] : xml_value
|
284
294
|
end
|
285
295
|
when Array
|
286
|
-
value.map!{|i| typecast_xml_value(i, disallowed_types)}
|
296
|
+
value.map! { |i| typecast_xml_value(i, disallowed_types) }
|
287
297
|
value.length > 1 ? value : value.first
|
288
298
|
when String
|
289
299
|
value
|
290
300
|
else
|
291
|
-
raise
|
301
|
+
raise("can't typecast #{value.class.name}: #{value.inspect}")
|
292
302
|
end
|
293
303
|
end
|
294
304
|
end
|
295
|
-
|
296
305
|
end
|
@@ -5,15 +5,18 @@ module MultiXml
|
|
5
5
|
module Parsers
|
6
6
|
module Libxml #:nodoc:
|
7
7
|
include Libxml2Parser
|
8
|
-
|
9
8
|
extend self
|
10
9
|
|
11
|
-
def parse_error
|
10
|
+
def parse_error
|
11
|
+
::LibXML::XML::Error
|
12
|
+
end
|
12
13
|
|
13
14
|
def parse(xml)
|
14
15
|
node_to_hash(LibXML::XML::Parser.io(xml).parse.root)
|
15
16
|
end
|
16
17
|
|
18
|
+
private
|
19
|
+
|
17
20
|
def each_child(node, &block)
|
18
21
|
node.each_child(&block)
|
19
22
|
end
|
@@ -8,16 +8,19 @@ module MultiXml
|
|
8
8
|
#
|
9
9
|
# hash::
|
10
10
|
# Hash to merge the converted element into.
|
11
|
-
def node_to_hash(node, hash={})
|
11
|
+
def node_to_hash(node, hash = {}) # rubocop:disable AbcSize, CyclomaticComplexity, MethodLength, PerceivedComplexity
|
12
12
|
node_hash = {MultiXml::CONTENT_ROOT => ''}
|
13
13
|
|
14
14
|
name = node_name(node)
|
15
15
|
|
16
16
|
# Insert node hash into parent hash correctly.
|
17
17
|
case hash[name]
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
when Array
|
19
|
+
hash[name] << node_hash
|
20
|
+
when Hash
|
21
|
+
hash[name] = [hash[name], node_hash]
|
22
|
+
when NilClass
|
23
|
+
hash[name] = node_hash
|
21
24
|
end
|
22
25
|
|
23
26
|
# Handle child elements
|
@@ -37,12 +40,8 @@ module MultiXml
|
|
37
40
|
# Handle attributes
|
38
41
|
each_attr(node) do |a|
|
39
42
|
key = node_name(a)
|
40
|
-
|
41
|
-
node_hash[key] =
|
42
|
-
[a.value, v]
|
43
|
-
else
|
44
|
-
a.value
|
45
|
-
end
|
43
|
+
v = node_hash[key]
|
44
|
+
node_hash[key] = (v ? [a.value, v] : a.value)
|
46
45
|
end
|
47
46
|
|
48
47
|
hash
|
@@ -51,23 +50,22 @@ module MultiXml
|
|
51
50
|
# Parse an XML Document IO into a simple hash.
|
52
51
|
# xml::
|
53
52
|
# XML Document IO to parse
|
54
|
-
def parse(
|
55
|
-
raise
|
53
|
+
def parse(_)
|
54
|
+
raise(NotImplementedError.new("inheritor should define #{__method__}"))
|
56
55
|
end
|
57
56
|
|
58
|
-
|
59
|
-
private
|
57
|
+
private
|
60
58
|
|
61
|
-
def each_child(*
|
62
|
-
raise
|
59
|
+
def each_child(*)
|
60
|
+
raise(NotImplementedError.new("inheritor should define #{__method__}"))
|
63
61
|
end
|
64
62
|
|
65
|
-
def each_attr(*
|
66
|
-
raise
|
63
|
+
def each_attr(*)
|
64
|
+
raise(NotImplementedError.new("inheritor should define #{__method__}"))
|
67
65
|
end
|
68
66
|
|
69
|
-
def node_name(*
|
70
|
-
raise
|
67
|
+
def node_name(*)
|
68
|
+
raise(NotImplementedError.new("inheritor should define #{__method__}"))
|
71
69
|
end
|
72
70
|
end
|
73
71
|
end
|