multi_xml 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +16 -0
- data/.rubocop.yml +14 -5
- data/CHANGELOG.md +8 -0
- data/Gemfile +12 -9
- data/README.md +1 -1
- data/Rakefile +35 -7
- data/Steepfile +22 -0
- data/lib/multi_xml/constants.rb +134 -0
- data/lib/multi_xml/errors.rb +93 -0
- data/lib/multi_xml/file_like.rb +62 -0
- data/lib/multi_xml/helpers.rb +228 -0
- data/lib/multi_xml/parsers/dom_parser.rb +97 -0
- data/lib/multi_xml/parsers/libxml.rb +35 -18
- data/lib/multi_xml/parsers/libxml_sax.rb +103 -0
- data/lib/multi_xml/parsers/nokogiri.rb +39 -22
- data/lib/multi_xml/parsers/nokogiri_sax.rb +102 -0
- data/lib/multi_xml/parsers/oga.rb +48 -51
- data/lib/multi_xml/parsers/ox.rb +99 -57
- data/lib/multi_xml/parsers/rexml.rb +84 -78
- data/lib/multi_xml/parsers/sax_handler.rb +117 -0
- data/lib/multi_xml/version.rb +5 -1
- data/lib/multi_xml.rb +173 -269
- data/sig/multi_xml.rbs +227 -0
- metadata +21 -5
- data/lib/multi_xml/parsers/libxml2_parser.rb +0 -70
data/lib/multi_xml.rb
CHANGED
|
@@ -3,309 +3,213 @@ require "date"
|
|
|
3
3
|
require "stringio"
|
|
4
4
|
require "time"
|
|
5
5
|
require "yaml"
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
].freeze
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
CONTENT_ROOT = "__content__".freeze unless defined?(CONTENT_ROOT)
|
|
29
|
-
|
|
30
|
-
unless defined?(PARSING)
|
|
31
|
-
float_proc = proc { |float| float.to_f }
|
|
32
|
-
datetime_proc = proc { |time| Time.parse(time).utc rescue DateTime.parse(time).utc } # rubocop:disable Style/RescueModifier
|
|
33
|
-
|
|
34
|
-
PARSING = {
|
|
35
|
-
"symbol" => proc { |symbol| symbol.to_sym },
|
|
36
|
-
"date" => proc { |date| Date.parse(date) },
|
|
37
|
-
"datetime" => datetime_proc,
|
|
38
|
-
"dateTime" => datetime_proc,
|
|
39
|
-
"integer" => proc { |integer| integer.to_i },
|
|
40
|
-
"float" => float_proc,
|
|
41
|
-
"double" => float_proc,
|
|
42
|
-
"decimal" => proc { |number| BigDecimal(number) },
|
|
43
|
-
"boolean" => proc { |boolean| !%w[0 false].include?(boolean.strip) },
|
|
44
|
-
"string" => proc { |string| string.to_s },
|
|
45
|
-
"yaml" => proc { |yaml| YAML.load(yaml) rescue yaml }, # rubocop:disable Style/RescueModifier
|
|
46
|
-
"base64Binary" => proc { |binary| base64_decode(binary) },
|
|
47
|
-
"binary" => proc { |binary, entity| parse_binary(binary, entity) },
|
|
48
|
-
"file" => proc { |file, entity| parse_file(file, entity) }
|
|
49
|
-
}.freeze
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
unless defined?(TYPE_NAMES)
|
|
53
|
-
TYPE_NAMES = {
|
|
54
|
-
"Symbol" => "symbol",
|
|
55
|
-
"Integer" => "integer",
|
|
56
|
-
"BigDecimal" => "decimal",
|
|
57
|
-
"Float" => "float",
|
|
58
|
-
"TrueClass" => "boolean",
|
|
59
|
-
"FalseClass" => "boolean",
|
|
60
|
-
"Date" => "date",
|
|
61
|
-
"DateTime" => "datetime",
|
|
62
|
-
"Time" => "datetime",
|
|
63
|
-
"Array" => "array",
|
|
64
|
-
"Hash" => "hash"
|
|
65
|
-
}.freeze
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
DISALLOWED_XML_TYPES = %w[symbol yaml].freeze
|
|
69
|
-
|
|
70
|
-
DEFAULT_OPTIONS = {
|
|
71
|
-
typecast_xml_value: true,
|
|
72
|
-
disallowed_types: DISALLOWED_XML_TYPES,
|
|
73
|
-
symbolize_keys: false
|
|
74
|
-
}.freeze
|
|
75
|
-
|
|
6
|
+
require_relative "multi_xml/constants"
|
|
7
|
+
require_relative "multi_xml/errors"
|
|
8
|
+
require_relative "multi_xml/file_like"
|
|
9
|
+
require_relative "multi_xml/helpers"
|
|
10
|
+
|
|
11
|
+
# A generic swappable back-end for parsing XML
|
|
12
|
+
#
|
|
13
|
+
# MultiXml provides a unified interface for XML parsing across different
|
|
14
|
+
# parser libraries. It automatically selects the best available parser
|
|
15
|
+
# (Ox, LibXML, Nokogiri, Oga, or REXML) and converts XML to Ruby hashes.
|
|
16
|
+
#
|
|
17
|
+
# @api public
|
|
18
|
+
# @example Parse XML
|
|
19
|
+
# MultiXml.parse('<root><name>John</name></root>')
|
|
20
|
+
# #=> {"root"=>{"name"=>"John"}}
|
|
21
|
+
#
|
|
22
|
+
# @example Set the parser
|
|
23
|
+
# MultiXml.parser = :nokogiri
|
|
24
|
+
module MultiXml
|
|
76
25
|
class << self
|
|
77
|
-
|
|
78
|
-
def parser
|
|
79
|
-
return @parser if defined?(@parser)
|
|
80
|
-
|
|
81
|
-
self.parser = default_parser
|
|
82
|
-
@parser
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# The default parser based on what you currently
|
|
86
|
-
# have loaded and installed. First checks to see
|
|
87
|
-
# if any parsers are already loaded, then checks
|
|
88
|
-
# to see which are installed if none are loaded.
|
|
89
|
-
def default_parser
|
|
90
|
-
return :ox if defined?(::Ox)
|
|
91
|
-
return :libxml if defined?(::LibXML)
|
|
92
|
-
return :nokogiri if defined?(::Nokogiri)
|
|
93
|
-
return :oga if defined?(::Oga)
|
|
26
|
+
include Helpers
|
|
94
27
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
28
|
+
# Get the current XML parser module
|
|
29
|
+
#
|
|
30
|
+
# Returns the currently configured parser, auto-detecting one if not set.
|
|
31
|
+
# Parsers are checked in order of performance: Ox, LibXML, Nokogiri, Oga, REXML.
|
|
32
|
+
#
|
|
33
|
+
# @api public
|
|
34
|
+
# @return [Module] the current parser module
|
|
35
|
+
# @example Get current parser
|
|
36
|
+
# MultiXml.parser #=> MultiXml::Parsers::Ox
|
|
37
|
+
def parser
|
|
38
|
+
@parser ||= resolve_parser(detect_parser)
|
|
103
39
|
end
|
|
104
40
|
|
|
105
|
-
# Set the XML parser
|
|
106
|
-
# Supported by default are:
|
|
41
|
+
# Set the XML parser to use
|
|
107
42
|
#
|
|
108
|
-
#
|
|
109
|
-
#
|
|
110
|
-
#
|
|
111
|
-
#
|
|
112
|
-
#
|
|
43
|
+
# @api public
|
|
44
|
+
# @param new_parser [Symbol, String, Module] Parser specification
|
|
45
|
+
# - Symbol/String: :libxml, :nokogiri, :ox, :rexml, :oga
|
|
46
|
+
# - Module: Custom parser implementing parse(io) and parse_error
|
|
47
|
+
# @return [Module] the newly configured parser module
|
|
48
|
+
# @example Set parser by symbol
|
|
49
|
+
# MultiXml.parser = :nokogiri
|
|
50
|
+
# @example Set parser by module
|
|
51
|
+
# MultiXml.parser = MyCustomParser
|
|
113
52
|
def parser=(new_parser)
|
|
114
|
-
|
|
115
|
-
when String, Symbol
|
|
116
|
-
require "multi_xml/parsers/#{new_parser.to_s.downcase}"
|
|
117
|
-
@parser = MultiXml::Parsers.const_get(new_parser.to_s.split("_").collect(&:capitalize).join.to_s)
|
|
118
|
-
when Class, Module
|
|
119
|
-
@parser = new_parser
|
|
120
|
-
else
|
|
121
|
-
raise("Did not recognize your parser specification. Please specify either a symbol or a class.")
|
|
122
|
-
end
|
|
53
|
+
@parser = resolve_parser(new_parser)
|
|
123
54
|
end
|
|
124
55
|
|
|
125
|
-
# Parse
|
|
126
|
-
#
|
|
127
|
-
# <b>Options</b>
|
|
56
|
+
# Parse XML into a Ruby Hash
|
|
128
57
|
#
|
|
129
|
-
#
|
|
130
|
-
#
|
|
131
|
-
#
|
|
132
|
-
#
|
|
133
|
-
#
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
58
|
+
# @api public
|
|
59
|
+
# @param xml [String, IO] XML content as a string or IO-like object
|
|
60
|
+
# @param options [Hash] Parsing options
|
|
61
|
+
# @option options [Symbol, String, Module] :parser Parser to use for this call
|
|
62
|
+
# @option options [Boolean] :symbolize_keys Convert keys to symbols (default: false)
|
|
63
|
+
# @option options [Array<String>] :disallowed_types Types to reject (default: ['yaml', 'symbol'])
|
|
64
|
+
# @option options [Boolean] :typecast_xml_value Apply type conversions (default: true)
|
|
65
|
+
# @return [Hash] Parsed XML as nested hash
|
|
66
|
+
# @raise [ParseError] if XML is malformed
|
|
67
|
+
# @raise [DisallowedTypeError] if XML contains a disallowed type attribute
|
|
68
|
+
# @example Parse simple XML
|
|
69
|
+
# MultiXml.parse('<root><name>John</name></root>')
|
|
70
|
+
# #=> {"root"=>{"name"=>"John"}}
|
|
71
|
+
# @example Parse with symbolized keys
|
|
72
|
+
# MultiXml.parse('<root><name>John</name></root>', symbolize_keys: true)
|
|
73
|
+
# #=> {root: {name: "John"}}
|
|
74
|
+
def parse(xml, options = {})
|
|
137
75
|
options = DEFAULT_OPTIONS.merge(options)
|
|
76
|
+
xml_parser = options[:parser] ? resolve_parser(options.fetch(:parser)) : parser
|
|
138
77
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
xml = StringIO.new(xml) unless xml.respond_to?(:read)
|
|
78
|
+
io = normalize_input(xml)
|
|
79
|
+
return {} if io.eof?
|
|
142
80
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
hash = undasherize_keys(parser.parse(xml) || {})
|
|
149
|
-
hash = typecast_xml_value(hash, options[:disallowed_types]) if options[:typecast_xml_value]
|
|
150
|
-
rescue DisallowedTypeError
|
|
151
|
-
raise
|
|
152
|
-
rescue parser.parse_error => e
|
|
153
|
-
raise(ParseError, e.message, e.backtrace)
|
|
154
|
-
end
|
|
155
|
-
hash = symbolize_keys(hash) if options[:symbolize_keys]
|
|
156
|
-
hash
|
|
81
|
+
result = parse_with_error_handling(io, xml, xml_parser)
|
|
82
|
+
result = typecast_xml_value(result, options.fetch(:disallowed_types)) if options.fetch(:typecast_xml_value)
|
|
83
|
+
result = symbolize_keys(result) if options.fetch(:symbolize_keys)
|
|
84
|
+
result
|
|
157
85
|
end
|
|
158
86
|
|
|
159
|
-
|
|
160
|
-
# and <tt>content_type</tt> methods.
|
|
161
|
-
module FileLike # :nodoc:
|
|
162
|
-
attr_writer :original_filename, :content_type
|
|
163
|
-
|
|
164
|
-
def original_filename
|
|
165
|
-
@original_filename || "untitled"
|
|
166
|
-
end
|
|
87
|
+
private
|
|
167
88
|
|
|
168
|
-
|
|
169
|
-
|
|
89
|
+
# Resolve a parser specification to a module
|
|
90
|
+
#
|
|
91
|
+
# @api private
|
|
92
|
+
# @param spec [Symbol, String, Class, Module] Parser specification
|
|
93
|
+
# @return [Module] Resolved parser module
|
|
94
|
+
# @raise [RuntimeError] if spec is invalid
|
|
95
|
+
def resolve_parser(spec)
|
|
96
|
+
case spec
|
|
97
|
+
when String, Symbol then load_parser(spec)
|
|
98
|
+
when Module then spec
|
|
99
|
+
else raise "Invalid parser specification: expected Symbol, String, or Module"
|
|
170
100
|
end
|
|
171
101
|
end
|
|
172
102
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
#
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
end
|
|
103
|
+
# Load a parser by name
|
|
104
|
+
#
|
|
105
|
+
# @api private
|
|
106
|
+
# @param name [Symbol, String] Parser name
|
|
107
|
+
# @return [Module] Loaded parser module
|
|
108
|
+
def load_parser(name)
|
|
109
|
+
name = name.to_s.downcase
|
|
110
|
+
require "multi_xml/parsers/#{name}"
|
|
111
|
+
Parsers.const_get(camelize(name))
|
|
183
112
|
end
|
|
184
113
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
114
|
+
# Convert underscored string to CamelCase
|
|
115
|
+
#
|
|
116
|
+
# @api private
|
|
117
|
+
# @param name [String] Underscored string
|
|
118
|
+
# @return [String] CamelCased string
|
|
119
|
+
def camelize(name)
|
|
120
|
+
name.split("_").map(&:capitalize).join
|
|
191
121
|
end
|
|
192
122
|
|
|
193
|
-
|
|
194
|
-
|
|
123
|
+
# Detect the best available parser
|
|
124
|
+
#
|
|
125
|
+
# @api private
|
|
126
|
+
# @return [Symbol] Parser name
|
|
127
|
+
# @raise [NoParserError] if no parser is available
|
|
128
|
+
def detect_parser
|
|
129
|
+
find_loaded_parser || find_available_parser || raise_no_parser_error
|
|
195
130
|
end
|
|
196
131
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
132
|
+
# Parser constant names mapped to their symbols, in preference order
|
|
133
|
+
#
|
|
134
|
+
# @api private
|
|
135
|
+
LOADED_PARSER_CHECKS = {
|
|
136
|
+
Ox: :ox,
|
|
137
|
+
LibXML: :libxml,
|
|
138
|
+
Nokogiri: :nokogiri,
|
|
139
|
+
Oga: :oga
|
|
140
|
+
}.freeze
|
|
141
|
+
private_constant :LOADED_PARSER_CHECKS
|
|
142
|
+
|
|
143
|
+
# Find an already-loaded parser library
|
|
144
|
+
#
|
|
145
|
+
# @api private
|
|
146
|
+
# @return [Symbol, nil] Parser name or nil if none loaded
|
|
147
|
+
def find_loaded_parser
|
|
148
|
+
LOADED_PARSER_CHECKS.each do |const_name, parser_name|
|
|
149
|
+
return parser_name if const_defined?(const_name)
|
|
207
150
|
end
|
|
151
|
+
nil
|
|
208
152
|
end
|
|
209
153
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
when Array
|
|
218
|
-
params.collect { |value| undasherize_keys(value) }
|
|
219
|
-
else
|
|
220
|
-
params
|
|
154
|
+
# Try to load and find an available parser
|
|
155
|
+
#
|
|
156
|
+
# @api private
|
|
157
|
+
# @return [Symbol, nil] Parser name or nil if none available
|
|
158
|
+
def find_available_parser
|
|
159
|
+
PARSER_PREFERENCE.each do |library, parser_name|
|
|
160
|
+
return parser_name if try_require(library)
|
|
221
161
|
end
|
|
162
|
+
nil
|
|
222
163
|
end
|
|
223
164
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
# this commented-out suggestion helps to avoid the multiple attribute
|
|
236
|
-
# problem, but it breaks when there is only one item in the array.
|
|
237
|
-
#
|
|
238
|
-
# from: https://github.com/jnunemaker/httparty/issues/102
|
|
239
|
-
#
|
|
240
|
-
# _, entries = value.detect { |k, v| k != 'type' && v.is_a?(Array) }
|
|
241
|
-
|
|
242
|
-
# This attempt fails to consider the order that the detect method
|
|
243
|
-
# retrieves the entries.
|
|
244
|
-
# _, entries = value.detect {|key, _| key != 'type'}
|
|
165
|
+
# Attempt to require a library
|
|
166
|
+
#
|
|
167
|
+
# @api private
|
|
168
|
+
# @param library [String] Library to require
|
|
169
|
+
# @return [Boolean] true if successful, false if LoadError
|
|
170
|
+
def try_require(library)
|
|
171
|
+
require library
|
|
172
|
+
true
|
|
173
|
+
rescue LoadError
|
|
174
|
+
false
|
|
175
|
+
end
|
|
245
176
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
177
|
+
# Raise an error indicating no parser is available
|
|
178
|
+
#
|
|
179
|
+
# @api private
|
|
180
|
+
# @return [void]
|
|
181
|
+
# @raise [NoParserError] always
|
|
182
|
+
def raise_no_parser_error
|
|
183
|
+
raise NoParserError, <<~MSG.chomp
|
|
184
|
+
No XML parser detected. Install one of: ox, nokogiri, libxml-ruby, or oga.
|
|
185
|
+
See https://github.com/sferik/multi_xml for more information.
|
|
186
|
+
MSG
|
|
187
|
+
end
|
|
249
188
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
when Hash
|
|
258
|
-
[typecast_xml_value(entries, disallowed_types)]
|
|
259
|
-
else
|
|
260
|
-
raise("can't typecast #{entries.class.name}: #{entries.inspect}")
|
|
261
|
-
end
|
|
189
|
+
# Normalize input to an IO-like object
|
|
190
|
+
#
|
|
191
|
+
# @api private
|
|
192
|
+
# @param xml [String, IO] Input to normalize
|
|
193
|
+
# @return [IO] IO-like object
|
|
194
|
+
def normalize_input(xml)
|
|
195
|
+
return xml if xml.respond_to?(:read)
|
|
262
196
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
block = PARSING[value["type"]]
|
|
266
|
-
if block
|
|
267
|
-
if block.arity == 1
|
|
268
|
-
value.delete("type") if PARSING[value["type"]]
|
|
269
|
-
if value.keys.size > 1
|
|
270
|
-
value[CONTENT_ROOT] = block.call(content)
|
|
271
|
-
value
|
|
272
|
-
else
|
|
273
|
-
block.call(content)
|
|
274
|
-
end
|
|
275
|
-
else
|
|
276
|
-
block.call(content, value)
|
|
277
|
-
end
|
|
278
|
-
else
|
|
279
|
-
(value.keys.size > 1) ? value : content
|
|
280
|
-
end
|
|
281
|
-
elsif value["type"] == "string" && value["nil"] != "true"
|
|
282
|
-
""
|
|
283
|
-
# blank or nil parsed values are represented by nil
|
|
284
|
-
elsif value.empty? || value["nil"] == "true"
|
|
285
|
-
nil
|
|
286
|
-
# If the type is the only element which makes it then
|
|
287
|
-
# this still makes the value nil, except if type is
|
|
288
|
-
# a XML node(where type['value'] is a Hash)
|
|
289
|
-
elsif value["type"] && value.size == 1 && !value["type"].is_a?(Hash)
|
|
290
|
-
nil
|
|
291
|
-
else
|
|
292
|
-
xml_value = value.each_with_object({}) do |(k, v), hash|
|
|
293
|
-
hash[k] = typecast_xml_value(v, disallowed_types)
|
|
294
|
-
hash
|
|
295
|
-
end
|
|
197
|
+
StringIO.new(xml.to_s.strip)
|
|
198
|
+
end
|
|
296
199
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
200
|
+
# Parse XML with error handling and key normalization
|
|
201
|
+
#
|
|
202
|
+
# @api private
|
|
203
|
+
# @param io [IO] IO-like object containing XML
|
|
204
|
+
# @param original_input [String, IO] Original input for error reporting
|
|
205
|
+
# @param xml_parser [Module] Parser to use
|
|
206
|
+
# @return [Hash] Parsed XML with undasherized keys
|
|
207
|
+
# @raise [ParseError] if XML is malformed
|
|
208
|
+
def parse_with_error_handling(io, original_input, xml_parser)
|
|
209
|
+
undasherize_keys(xml_parser.parse(io) || {})
|
|
210
|
+
rescue xml_parser.parse_error => e
|
|
211
|
+
xml_string = original_input.respond_to?(:read) ? original_input.tap(&:rewind).read : original_input.to_s
|
|
212
|
+
raise(ParseError.new(e, xml: xml_string, cause: e))
|
|
309
213
|
end
|
|
310
214
|
end
|
|
311
215
|
end
|