multi_xml 0.6.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,45 +1,7 @@
1
1
  module MultiXml
2
- module Version
3
- module_function
4
-
5
- # @return [Integer]
6
- def major
7
- 0
8
- end
9
-
10
- # @return [Integer]
11
- def minor
12
- 6
13
- end
14
-
15
- # @return [Integer]
16
- def patch
17
- 0
18
- end
19
-
20
- # @return [Integer, NilClass]
21
- def pre
22
- nil
23
- end
24
-
25
- # @return [Hash]
26
- def to_h
27
- {
28
- :major => major,
29
- :minor => minor,
30
- :patch => patch,
31
- :pre => pre,
32
- }
33
- end
34
-
35
- # @return [Array]
36
- def to_a
37
- [major, minor, patch, pre].compact
38
- end
39
-
40
- # @return [String]
41
- def to_s
42
- to_a.join('.')
43
- end
44
- end
2
+ # The current version of MultiXml
3
+ #
4
+ # @api public
5
+ # @return [Gem::Version] the gem version
6
+ VERSION = Gem::Version.create("0.8.1")
45
7
  end
data/lib/multi_xml.rb CHANGED
@@ -1,305 +1,215 @@
1
- require 'base64'
2
- require 'bigdecimal'
3
- require 'date'
4
- require 'stringio'
5
- require 'time'
6
- require 'yaml'
7
-
8
- module MultiXml # rubocop:disable ModuleLength
9
- class ParseError < StandardError; end
10
- class NoParserError < StandardError; end
11
- class DisallowedTypeError < StandardError
12
- def initialize(type)
13
- super "Disallowed type attribute: #{type.inspect}"
14
- end
15
- end
16
-
17
- unless defined?(REQUIREMENT_MAP)
18
- REQUIREMENT_MAP = [
19
- ['ox', :ox],
20
- ['libxml', :libxml],
21
- ['nokogiri', :nokogiri],
22
- ['rexml/document', :rexml],
23
- ['oga', :oga],
24
- ].freeze
25
- end
26
-
27
- CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
28
-
29
- unless defined?(PARSING)
30
- float_proc = proc { |float| float.to_f }
31
- datetime_proc = proc { |time| Time.parse(time).utc rescue DateTime.parse(time).utc } # rubocop:disable RescueModifier
32
-
33
- PARSING = {
34
- 'symbol' => proc { |symbol| symbol.to_sym },
35
- 'date' => proc { |date| Date.parse(date) },
36
- 'datetime' => datetime_proc,
37
- 'dateTime' => datetime_proc,
38
- 'integer' => proc { |integer| integer.to_i },
39
- 'float' => float_proc,
40
- 'double' => float_proc,
41
- 'decimal' => proc { |number| BigDecimal(number) },
42
- 'boolean' => proc { |boolean| !%w(0 false).include?(boolean.strip) },
43
- 'string' => proc { |string| string.to_s },
44
- 'yaml' => proc { |yaml| YAML.load(yaml) rescue yaml }, # rubocop:disable RescueModifier
45
- 'base64Binary' => proc { |binary| ::Base64.decode64(binary) },
46
- 'binary' => proc { |binary, entity| parse_binary(binary, entity) },
47
- 'file' => proc { |file, entity| parse_file(file, entity) },
48
- }.freeze
49
- end
50
-
51
- unless defined?(TYPE_NAMES)
52
- TYPE_NAMES = {
53
- 'Symbol' => 'symbol',
54
- 'Integer' => 'integer',
55
- 'BigDecimal' => 'decimal',
56
- 'Float' => 'float',
57
- 'TrueClass' => 'boolean',
58
- 'FalseClass' => 'boolean',
59
- 'Date' => 'date',
60
- 'DateTime' => 'datetime',
61
- 'Time' => 'datetime',
62
- 'Array' => 'array',
63
- 'Hash' => 'hash',
64
- }.freeze
65
- end
66
-
67
- DISALLOWED_XML_TYPES = %w(symbol yaml).freeze
68
-
69
- DEFAULT_OPTIONS = {
70
- :typecast_xml_value => true,
71
- :disallowed_types => DISALLOWED_XML_TYPES,
72
- :symbolize_keys => false,
73
- }.freeze
74
-
1
+ require "bigdecimal"
2
+ require "date"
3
+ require "stringio"
4
+ require "time"
5
+ require "yaml"
6
+ require_relative "multi_xml/constants"
7
+ require_relative "multi_xml/errors"
8
+ require_relative "multi_xml/file_like"
9
+ require_relative "multi_xml/helpers"
10
+
11
+ # A generic swappable back-end for parsing XML
12
+ #
13
+ # MultiXml provides a unified interface for XML parsing across different
14
+ # parser libraries. It automatically selects the best available parser
15
+ # (Ox, LibXML, Nokogiri, Oga, or REXML) and converts XML to Ruby hashes.
16
+ #
17
+ # @api public
18
+ # @example Parse XML
19
+ # MultiXml.parse('<root><name>John</name></root>')
20
+ # #=> {"root"=>{"name"=>"John"}}
21
+ #
22
+ # @example Set the parser
23
+ # MultiXml.parser = :nokogiri
24
+ module MultiXml
75
25
  class << self
76
- # Get the current parser class.
77
- def parser
78
- return @parser if defined?(@parser)
79
- self.parser = default_parser
80
- @parser
81
- end
26
+ include Helpers
82
27
 
83
- # The default parser based on what you currently
84
- # have loaded and installed. First checks to see
85
- # if any parsers are already loaded, then checks
86
- # to see which are installed if none are loaded.
87
- def default_parser
88
- return :ox if defined?(::Ox)
89
- return :libxml if defined?(::LibXML)
90
- return :nokogiri if defined?(::Nokogiri)
91
- return :oga if defined?(::Oga)
92
-
93
- REQUIREMENT_MAP.each do |library, parser|
94
- begin
95
- require library
96
- return parser
97
- rescue LoadError
98
- next
99
- end
100
- end
101
- raise(NoParserError.new("No XML parser detected. If you're using Rubinius and Bundler, try adding an XML parser to your Gemfile (e.g. libxml-ruby, nokogiri, or rubysl-rexml). For more information, see https://github.com/sferik/multi_xml/issues/42."))
28
+ # Get the current XML parser module
29
+ #
30
+ # Returns the currently configured parser, auto-detecting one if not set.
31
+ # Parsers are checked in order of performance: Ox, LibXML, Nokogiri, Oga, REXML.
32
+ #
33
+ # @api public
34
+ # @return [Module] the current parser module
35
+ # @example Get current parser
36
+ # MultiXml.parser #=> MultiXml::Parsers::Ox
37
+ def parser
38
+ @parser ||= resolve_parser(detect_parser)
102
39
  end
103
40
 
104
- # Set the XML parser utilizing a symbol, string, or class.
105
- # Supported by default are:
41
+ # Set the XML parser to use
106
42
  #
107
- # * <tt>:libxml</tt>
108
- # * <tt>:nokogiri</tt>
109
- # * <tt>:ox</tt>
110
- # * <tt>:rexml</tt>
111
- # * <tt>:oga</tt>
43
+ # @api public
44
+ # @param new_parser [Symbol, String, Module] Parser specification
45
+ # - Symbol/String: :libxml, :nokogiri, :ox, :rexml, :oga
46
+ # - Module: Custom parser implementing parse(io) and parse_error
47
+ # @return [Module] the newly configured parser module
48
+ # @example Set parser by symbol
49
+ # MultiXml.parser = :nokogiri
50
+ # @example Set parser by module
51
+ # MultiXml.parser = MyCustomParser
112
52
  def parser=(new_parser)
113
- case new_parser
114
- when String, Symbol
115
- require "multi_xml/parsers/#{new_parser.to_s.downcase}"
116
- @parser = MultiXml::Parsers.const_get(new_parser.to_s.split('_').collect(&:capitalize).join('').to_s)
117
- when Class, Module
118
- @parser = new_parser
119
- else
120
- raise('Did not recognize your parser specification. Please specify either a symbol or a class.')
121
- end
53
+ @parser = resolve_parser(new_parser)
122
54
  end
123
55
 
124
- # Parse an XML string or IO into Ruby.
125
- #
126
- # <b>Options</b>
56
+ # Parse XML into a Ruby Hash
127
57
  #
128
- # <tt>:symbolize_keys</tt> :: If true, will use symbols instead of strings for the keys.
129
- #
130
- # <tt>:disallowed_types</tt> :: Types to disallow from being typecasted. Defaults to `['yaml', 'symbol']`. Use `[]` to allow all types.
131
- #
132
- # <tt>:typecast_xml_value</tt> :: If true, won't typecast values for parsed document
133
- def parse(xml, options = {}) # rubocop:disable AbcSize, CyclomaticComplexity, MethodLength, PerceivedComplexity
134
- xml ||= ''
135
-
58
+ # @api public
59
+ # @param xml [String, IO] XML content as a string or IO-like object
60
+ # @param options [Hash] Parsing options
61
+ # @option options [Symbol, String, Module] :parser Parser to use for this call
62
+ # @option options [Boolean] :symbolize_keys Convert keys to symbols (default: false)
63
+ # @option options [Array<String>] :disallowed_types Types to reject (default: ['yaml', 'symbol'])
64
+ # @option options [Boolean] :typecast_xml_value Apply type conversions (default: true)
65
+ # @return [Hash] Parsed XML as nested hash
66
+ # @raise [ParseError] if XML is malformed
67
+ # @raise [DisallowedTypeError] if XML contains a disallowed type attribute
68
+ # @example Parse simple XML
69
+ # MultiXml.parse('<root><name>John</name></root>')
70
+ # #=> {"root"=>{"name"=>"John"}}
71
+ # @example Parse with symbolized keys
72
+ # MultiXml.parse('<root><name>John</name></root>', symbolize_keys: true)
73
+ # #=> {root: {name: "John"}}
74
+ def parse(xml, options = {})
136
75
  options = DEFAULT_OPTIONS.merge(options)
76
+ xml_parser = options[:parser] ? resolve_parser(options.fetch(:parser)) : parser
137
77
 
138
- xml = xml.strip if xml.respond_to?(:strip)
139
- begin
140
- xml = StringIO.new(xml) unless xml.respond_to?(:read)
78
+ io = normalize_input(xml)
79
+ return {} if io.eof?
141
80
 
142
- char = xml.getc
143
- return {} if char.nil?
144
- xml.ungetc(char)
145
-
146
- hash = undasherize_keys(parser.parse(xml) || {})
147
- hash = options[:typecast_xml_value] ? typecast_xml_value(hash, options[:disallowed_types]) : hash
148
- rescue DisallowedTypeError
149
- raise
150
- rescue parser.parse_error => error
151
- raise(ParseError, error.message, error.backtrace) # rubocop:disable RaiseArgs
152
- end
153
- hash = symbolize_keys(hash) if options[:symbolize_keys]
154
- hash
81
+ result = parse_with_error_handling(io, xml, xml_parser)
82
+ result = typecast_xml_value(result, options.fetch(:disallowed_types)) if options.fetch(:typecast_xml_value)
83
+ result = symbolize_keys(result) if options.fetch(:symbolize_keys)
84
+ result
155
85
  end
156
86
 
157
- # This module decorates files with the <tt>original_filename</tt>
158
- # and <tt>content_type</tt> methods.
159
- module FileLike #:nodoc:
160
- attr_writer :original_filename, :content_type
161
-
162
- def original_filename
163
- @original_filename || 'untitled'
164
- end
87
+ private
165
88
 
166
- def content_type
167
- @content_type || 'application/octet-stream'
89
+ # Resolve a parser specification to a module
90
+ #
91
+ # @api private
92
+ # @param spec [Symbol, String, Class, Module] Parser specification
93
+ # @return [Module] Resolved parser module
94
+ # @raise [RuntimeError] if spec is invalid
95
+ def resolve_parser(spec)
96
+ case spec
97
+ when String, Symbol then load_parser(spec)
98
+ when Module then spec
99
+ else raise "Invalid parser specification: expected Symbol, String, or Module"
168
100
  end
169
101
  end
170
102
 
171
- private
103
+ # Load a parser by name
104
+ #
105
+ # @api private
106
+ # @param name [Symbol, String] Parser name
107
+ # @return [Module] Loaded parser module
108
+ def load_parser(name)
109
+ name = name.to_s.downcase
110
+ require "multi_xml/parsers/#{name}"
111
+ Parsers.const_get(camelize(name))
112
+ end
172
113
 
173
- # TODO: Add support for other encodings
174
- def parse_binary(binary, entity) #:nodoc:
175
- case entity['encoding']
176
- when 'base64'
177
- Base64.decode64(binary)
178
- else
179
- binary
180
- end
114
+ # Convert underscored string to CamelCase
115
+ #
116
+ # @api private
117
+ # @param name [String] Underscored string
118
+ # @return [String] CamelCased string
119
+ def camelize(name)
120
+ name.split("_").map(&:capitalize).join
181
121
  end
182
122
 
183
- def parse_file(file, entity)
184
- f = StringIO.new(Base64.decode64(file))
185
- f.extend(FileLike)
186
- f.original_filename = entity['name']
187
- f.content_type = entity['content_type']
188
- f
123
+ # Detect the best available parser
124
+ #
125
+ # @api private
126
+ # @return [Symbol] Parser name
127
+ # @raise [NoParserError] if no parser is available
128
+ def detect_parser
129
+ find_loaded_parser || find_available_parser || raise_no_parser_error
189
130
  end
190
131
 
191
- def symbolize_keys(params)
192
- case params
193
- when Hash
194
- params.inject({}) do |result, (key, value)|
195
- result.merge(key.to_sym => symbolize_keys(value))
196
- end
197
- when Array
198
- params.collect { |value| symbolize_keys(value) }
199
- else
200
- params
132
+ # Parser constant names mapped to their symbols, in preference order
133
+ #
134
+ # @api private
135
+ LOADED_PARSER_CHECKS = {
136
+ Ox: :ox,
137
+ LibXML: :libxml,
138
+ Nokogiri: :nokogiri,
139
+ Oga: :oga
140
+ }.freeze
141
+ private_constant :LOADED_PARSER_CHECKS
142
+
143
+ # Find an already-loaded parser library
144
+ #
145
+ # @api private
146
+ # @return [Symbol, nil] Parser name or nil if none loaded
147
+ def find_loaded_parser
148
+ LOADED_PARSER_CHECKS.each do |const_name, parser_name|
149
+ return parser_name if const_defined?(const_name)
201
150
  end
151
+ nil
202
152
  end
203
153
 
204
- def undasherize_keys(params)
205
- case params
206
- when Hash
207
- params.inject({}) do |hash, (key, value)|
208
- hash[key.to_s.tr('-'.freeze, '_'.freeze)] = undasherize_keys(value)
209
- hash
210
- end
211
- when Array
212
- params.collect { |value| undasherize_keys(value) }
213
- else
214
- params
154
+ # Try to load and find an available parser
155
+ #
156
+ # @api private
157
+ # @return [Symbol, nil] Parser name or nil if none available
158
+ def find_available_parser
159
+ PARSER_PREFERENCE.each do |library, parser_name|
160
+ return parser_name if try_require(library)
215
161
  end
162
+ nil
216
163
  end
217
164
 
218
- def typecast_xml_value(value, disallowed_types = nil) # rubocop:disable AbcSize, CyclomaticComplexity, MethodLength, PerceivedComplexity
219
- disallowed_types ||= DISALLOWED_XML_TYPES
220
-
221
- case value
222
- when Hash
223
- if value.include?('type') && !value['type'].is_a?(Hash) && disallowed_types.include?(value['type'])
224
- raise(DisallowedTypeError.new(value['type']))
225
- end
226
-
227
- if value['type'] == 'array'
228
-
229
- # this commented-out suggestion helps to avoid the multiple attribute
230
- # problem, but it breaks when there is only one item in the array.
231
- #
232
- # from: https://github.com/jnunemaker/httparty/issues/102
233
- #
234
- # _, entries = value.detect { |k, v| k != 'type' && v.is_a?(Array) }
235
-
236
- # This attempt fails to consider the order that the detect method
237
- # retrieves the entries.
238
- # _, entries = value.detect {|key, _| key != 'type'}
165
+ # Attempt to require a library
166
+ #
167
+ # @api private
168
+ # @param library [String] Library to require
169
+ # @return [Boolean] true if successful, false if LoadError
170
+ def try_require(library)
171
+ require library
172
+ true
173
+ rescue LoadError
174
+ false
175
+ end
239
176
 
240
- # This approach ignores attribute entries that are not convertable
241
- # to an Array which allows attributes to be ignored.
242
- _, entries = value.detect { |k, v| k != 'type' && (v.is_a?(Array) || v.is_a?(Hash)) }
177
+ # Raise an error indicating no parser is available
178
+ #
179
+ # @api private
180
+ # @return [void]
181
+ # @raise [NoParserError] always
182
+ def raise_no_parser_error
183
+ raise NoParserError, <<~MSG.chomp
184
+ No XML parser detected. Install one of: ox, nokogiri, libxml-ruby, or oga.
185
+ See https://github.com/sferik/multi_xml for more information.
186
+ MSG
187
+ end
243
188
 
244
- case entries
245
- when NilClass
246
- []
247
- when String
248
- [] if entries.strip.empty?
249
- when Array
250
- entries.collect { |entry| typecast_xml_value(entry, disallowed_types) }
251
- when Hash
252
- [typecast_xml_value(entries, disallowed_types)]
253
- else
254
- raise("can't typecast #{entries.class.name}: #{entries.inspect}")
255
- end
189
+ # Normalize input to an IO-like object
190
+ #
191
+ # @api private
192
+ # @param xml [String, IO] Input to normalize
193
+ # @return [IO] IO-like object
194
+ def normalize_input(xml)
195
+ return xml if xml.respond_to?(:read)
256
196
 
257
- elsif value.key?(CONTENT_ROOT)
258
- content = value[CONTENT_ROOT]
259
- block = PARSING[value['type']]
260
- if block
261
- if block.arity == 1
262
- value.delete('type') if PARSING[value['type']]
263
- if value.keys.size > 1
264
- value[CONTENT_ROOT] = block.call(content)
265
- value
266
- else
267
- block.call(content)
268
- end
269
- else
270
- block.call(content, value)
271
- end
272
- else
273
- value.keys.size > 1 ? value : content
274
- end
275
- elsif value['type'] == 'string' && value['nil'] != 'true'
276
- ''
277
- # blank or nil parsed values are represented by nil
278
- elsif value.empty? || value['nil'] == 'true'
279
- nil
280
- # If the type is the only element which makes it then
281
- # this still makes the value nil, except if type is
282
- # a XML node(where type['value'] is a Hash)
283
- elsif value['type'] && value.size == 1 && !value['type'].is_a?(Hash)
284
- nil
285
- else
286
- xml_value = value.inject({}) do |hash, (k, v)|
287
- hash[k] = typecast_xml_value(v, disallowed_types)
288
- hash
289
- end
197
+ StringIO.new(xml.to_s.strip)
198
+ end
290
199
 
291
- # Turn {:files => {:file => #<StringIO>} into {:files => #<StringIO>} so it is compatible with
292
- # how multipart uploaded files from HTML appear
293
- xml_value['file'].is_a?(StringIO) ? xml_value['file'] : xml_value
294
- end
295
- when Array
296
- value.map! { |i| typecast_xml_value(i, disallowed_types) }
297
- value.length > 1 ? value : value.first
298
- when String
299
- value
300
- else
301
- raise("can't typecast #{value.class.name}: #{value.inspect}")
302
- end
200
+ # Parse XML with error handling and key normalization
201
+ #
202
+ # @api private
203
+ # @param io [IO] IO-like object containing XML
204
+ # @param original_input [String, IO] Original input for error reporting
205
+ # @param xml_parser [Module] Parser to use
206
+ # @return [Hash] Parsed XML with undasherized keys
207
+ # @raise [ParseError] if XML is malformed
208
+ def parse_with_error_handling(io, original_input, xml_parser)
209
+ undasherize_keys(xml_parser.parse(io) || {})
210
+ rescue xml_parser.parse_error => e
211
+ xml_string = original_input.respond_to?(:read) ? original_input.tap(&:rewind).read : original_input.to_s
212
+ raise(ParseError.new(e, xml: xml_string, cause: e))
303
213
  end
304
214
  end
305
215
  end