multi_xml 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +6 -1
- data/CHANGELOG.md +26 -0
- data/Gemfile +2 -1
- data/README.md +183 -39
- data/Rakefile +7 -0
- data/Steepfile +8 -1
- data/benchmark/overall_parser_benchmark.rb +5 -0
- data/benchmark.rb +1002 -0
- data/lib/multi_xml/concurrency.rb +31 -0
- data/lib/multi_xml/constants.rb +65 -20
- data/lib/multi_xml/deprecated.rb +35 -0
- data/lib/multi_xml/errors.rb +62 -8
- data/lib/multi_xml/file_like.rb +2 -2
- data/lib/multi_xml/helpers.rb +2 -2
- data/lib/multi_xml/options.rb +63 -0
- data/lib/multi_xml/options_normalization.rb +40 -0
- data/lib/multi_xml/parse_support.rb +113 -0
- data/lib/multi_xml/parser.rb +47 -0
- data/lib/multi_xml/parser_resolution.rb +150 -0
- data/lib/multi_xml/parsers/dom_parser.rb +107 -14
- data/lib/multi_xml/parsers/libxml.rb +36 -13
- data/lib/multi_xml/parsers/libxml_sax.rb +104 -19
- data/lib/multi_xml/parsers/nokogiri.rb +36 -13
- data/lib/multi_xml/parsers/nokogiri_sax.rb +47 -19
- data/lib/multi_xml/parsers/oga.rb +87 -15
- data/lib/multi_xml/parsers/ox.rb +120 -37
- data/lib/multi_xml/parsers/rexml.rb +104 -16
- data/lib/multi_xml/parsers/sax_handler.rb +84 -32
- data/lib/multi_xml/version.rb +3 -3
- data/lib/multi_xml.rb +137 -134
- data/sig/multi_xml.rbs +93 -16
- metadata +11 -2
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module MultiXML
|
|
2
|
+
# Catalog of process-wide mutexes used to serialize MultiXML's mutable
|
|
3
|
+
# state. Each mutex protects a distinct piece of state. Callers go
|
|
4
|
+
# through {.synchronize} rather than touching the mutex constants
|
|
5
|
+
# directly so the constants themselves can stay {.private_constant}
|
|
6
|
+
# and the surface of the module is documented in one place.
|
|
7
|
+
#
|
|
8
|
+
# @api private
|
|
9
|
+
module Concurrency
|
|
10
|
+
# Catalog of mutexes keyed by symbolic name. Each entry maps the
|
|
11
|
+
# public name passed to {.synchronize} to the underlying mutex
|
|
12
|
+
# instance.
|
|
13
|
+
MUTEXES = {
|
|
14
|
+
# Guards the DEPRECATION_WARNINGS_SHOWN set in MultiXML so the
|
|
15
|
+
# check-then-add pair in warn_deprecation_once doesn't race.
|
|
16
|
+
deprecation_warnings: Mutex.new
|
|
17
|
+
}.freeze
|
|
18
|
+
private_constant :MUTEXES
|
|
19
|
+
|
|
20
|
+
# Run a block while holding the named mutex
|
|
21
|
+
#
|
|
22
|
+
# @api private
|
|
23
|
+
# @param name [Symbol] mutex identifier
|
|
24
|
+
# @yield block to execute while holding the mutex
|
|
25
|
+
# @return [Object] the block's return value
|
|
26
|
+
# @raise [KeyError] when name does not match a known mutex
|
|
27
|
+
def self.synchronize(name, &)
|
|
28
|
+
MUTEXES.fetch(name).synchronize(&)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
data/lib/multi_xml/constants.rb
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
# Shared constants and converter lambdas used across parser backends.
|
|
2
|
+
module MultiXML
|
|
2
3
|
# Hash key for storing text content within element hashes
|
|
3
4
|
#
|
|
4
5
|
# @api public
|
|
5
6
|
# @return [String] the key "__content__" used for text content
|
|
6
7
|
# @example Accessing text content
|
|
7
|
-
# result =
|
|
8
|
+
# result = MultiXML.parse('<name>John</name>')
|
|
8
9
|
# result["name"] #=> "John" (simplified, but internally uses __content__)
|
|
9
10
|
TEXT_CONTENT_KEY = "__content__".freeze
|
|
10
11
|
|
|
@@ -46,31 +47,57 @@ module MultiXml
|
|
|
46
47
|
# FALSE_BOOLEAN_VALUES.include?("0") #=> true
|
|
47
48
|
FALSE_BOOLEAN_VALUES = Set.new(%w[0 false]).freeze
|
|
48
49
|
|
|
50
|
+
# Supported values for the :namespaces parse option
|
|
51
|
+
#
|
|
52
|
+
# @api public
|
|
53
|
+
# @return [Array<Symbol>] the valid namespace handling modes
|
|
54
|
+
# @example Parse with namespace preservation
|
|
55
|
+
# MultiXML.parse(xml, namespaces: :preserve)
|
|
56
|
+
NAMESPACE_MODES = %i[strip preserve].freeze
|
|
57
|
+
|
|
49
58
|
# Default parsing options
|
|
50
59
|
#
|
|
51
60
|
# @api public
|
|
52
61
|
# @return [Hash] default options for parse method
|
|
53
62
|
# @example View defaults
|
|
54
|
-
# DEFAULT_OPTIONS[:
|
|
63
|
+
# DEFAULT_OPTIONS[:symbolize_names] #=> false
|
|
55
64
|
DEFAULT_OPTIONS = {
|
|
56
65
|
typecast_xml_value: true,
|
|
57
66
|
disallowed_types: DISALLOWED_TYPES,
|
|
58
|
-
|
|
67
|
+
symbolize_names: false,
|
|
68
|
+
namespaces: :strip
|
|
59
69
|
}.freeze
|
|
60
70
|
|
|
61
71
|
# Parser libraries in preference order (fastest first)
|
|
62
72
|
#
|
|
73
|
+
# TruffleRuby's JIT favors pure-Ruby parsers and penalizes FFI-bound
|
|
74
|
+
# ones, so rexml jumps to the head of the list (after ox, which is
|
|
75
|
+
# filtered out of auto-detection by ParserResolution#skip_on_platform?)
|
|
76
|
+
# and nokogiri falls to last.
|
|
77
|
+
#
|
|
63
78
|
# @api public
|
|
64
79
|
# @return [Array<Array>] pairs of [require_path, parser_symbol]
|
|
65
80
|
# @example View parser order
|
|
66
81
|
# PARSER_PREFERENCE.first #=> ["ox", :ox]
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
[
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
82
|
+
# :nocov:
|
|
83
|
+
PARSER_PREFERENCE = if RUBY_ENGINE == "truffleruby"
|
|
84
|
+
[
|
|
85
|
+
["ox", :ox],
|
|
86
|
+
["rexml/document", :rexml],
|
|
87
|
+
["libxml-ruby", :libxml],
|
|
88
|
+
["oga", :oga],
|
|
89
|
+
["nokogiri", :nokogiri]
|
|
90
|
+
].freeze
|
|
91
|
+
else
|
|
92
|
+
[
|
|
93
|
+
["ox", :ox],
|
|
94
|
+
["libxml-ruby", :libxml],
|
|
95
|
+
["nokogiri", :nokogiri],
|
|
96
|
+
["oga", :oga],
|
|
97
|
+
["rexml/document", :rexml]
|
|
98
|
+
].freeze
|
|
99
|
+
end
|
|
100
|
+
# :nocov:
|
|
74
101
|
|
|
75
102
|
# Parses datetime strings, trying Time first then DateTime
|
|
76
103
|
#
|
|
@@ -79,9 +106,34 @@ module MultiXml
|
|
|
79
106
|
PARSE_DATETIME = lambda do |string|
|
|
80
107
|
Time.parse(string).utc
|
|
81
108
|
rescue ArgumentError
|
|
82
|
-
|
|
109
|
+
begin
|
|
110
|
+
DateTime.parse(string).to_time.utc
|
|
111
|
+
rescue ArgumentError, NoMethodError
|
|
112
|
+
MultiXML.send(:parse_iso_week_datetime, string)
|
|
113
|
+
end
|
|
83
114
|
end
|
|
84
115
|
|
|
116
|
+
# Regex matching ISO week dates like YYYY-Www or YYYY-Www-d.
|
|
117
|
+
#
|
|
118
|
+
# @api private
|
|
119
|
+
ISO_WEEK_DATE = /\A(?<year>\d{4})-W(?<week>\d{2})(?:-(?<day>\d))?\z/
|
|
120
|
+
private_constant :ISO_WEEK_DATE
|
|
121
|
+
|
|
122
|
+
# Parse YYYY-Www[-d] ISO week dates into a UTC Time
|
|
123
|
+
#
|
|
124
|
+
# @api private
|
|
125
|
+
# @param string [String] ISO week date string
|
|
126
|
+
# @return [Time] UTC midnight for the given ISO week date
|
|
127
|
+
# @raise [ArgumentError] if the string is not a supported ISO week date
|
|
128
|
+
def self.parse_iso_week_datetime(string)
|
|
129
|
+
match = ISO_WEEK_DATE.match(string)
|
|
130
|
+
raise ArgumentError, "invalid date" unless match
|
|
131
|
+
|
|
132
|
+
date = Date.commercial(Integer(match[:year]), Integer(match[:week]), Integer(match[:day] || "1"))
|
|
133
|
+
Time.utc(date.year, date.month, date.day)
|
|
134
|
+
end
|
|
135
|
+
private_class_method :parse_iso_week_datetime
|
|
136
|
+
|
|
85
137
|
# Creates a file-like StringIO from base64-encoded content
|
|
86
138
|
#
|
|
87
139
|
# @api private
|
|
@@ -105,26 +157,19 @@ module MultiXml
|
|
|
105
157
|
# @example Using a converter
|
|
106
158
|
# TYPE_CONVERTERS["integer"].call("42") #=> 42
|
|
107
159
|
TYPE_CONVERTERS = {
|
|
108
|
-
|
|
109
|
-
"symbol" => :to_sym.to_proc,
|
|
160
|
+
"symbol" => ->(s) { s.to_sym },
|
|
110
161
|
"string" => :to_s.to_proc,
|
|
111
162
|
"integer" => :to_i.to_proc,
|
|
112
163
|
"float" => :to_f.to_proc,
|
|
113
164
|
"double" => :to_f.to_proc,
|
|
114
165
|
"decimal" => ->(s) { BigDecimal(s) },
|
|
115
166
|
"boolean" => ->(s) { !FALSE_BOOLEAN_VALUES.include?(s.strip) },
|
|
116
|
-
|
|
117
|
-
# Date and time types
|
|
118
167
|
"date" => Date.method(:parse),
|
|
119
168
|
"datetime" => PARSE_DATETIME,
|
|
120
169
|
"dateTime" => PARSE_DATETIME,
|
|
121
|
-
|
|
122
|
-
# Binary types
|
|
123
170
|
"base64Binary" => ->(s) { s.unpack1("m") },
|
|
124
171
|
"binary" => ->(s, entity) { (entity["encoding"] == "base64") ? s.unpack1("m") : s },
|
|
125
172
|
"file" => FILE_CONVERTER,
|
|
126
|
-
|
|
127
|
-
# Structured types
|
|
128
173
|
"yaml" => lambda do |string|
|
|
129
174
|
YAML.safe_load(string, permitted_classes: [Symbol, Date, Time])
|
|
130
175
|
rescue ArgumentError, Psych::SyntaxError
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Deprecated public API kept around for one major release
|
|
2
|
+
#
|
|
3
|
+
# Each method here emits a one-time deprecation warning on first call and
|
|
4
|
+
# delegates to its current-API counterpart. The whole file is loaded by
|
|
5
|
+
# {MultiXML} so the deprecation surface stays out of the main module
|
|
6
|
+
# definition.
|
|
7
|
+
#
|
|
8
|
+
# @api private
|
|
9
|
+
module MultiXML
|
|
10
|
+
class << self
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
# Define a deprecated alias that delegates to a new method name
|
|
14
|
+
#
|
|
15
|
+
# The generated singleton method emits a one-time deprecation
|
|
16
|
+
# warning naming the replacement, then forwards all positional and
|
|
17
|
+
# keyword arguments plus any block to replacement.
|
|
18
|
+
#
|
|
19
|
+
# @api private
|
|
20
|
+
# @param name [Symbol] deprecated method name
|
|
21
|
+
# @param replacement [Symbol] current-API method to delegate to
|
|
22
|
+
# @return [Symbol] the defined method name
|
|
23
|
+
# @example
|
|
24
|
+
# deprecate_alias :load, :parse
|
|
25
|
+
def deprecate_alias(name, replacement)
|
|
26
|
+
message = "MultiXML.#{name} is deprecated and will be removed in v1.0. Use MultiXML.#{replacement} instead."
|
|
27
|
+
define_singleton_method(name) do |*args, **kwargs, &block|
|
|
28
|
+
warn_deprecation_once(name, message)
|
|
29
|
+
public_send(replacement, *args, **kwargs, &block)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
deprecate_alias :load, :parse
|
|
35
|
+
end
|
data/lib/multi_xml/errors.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
module
|
|
1
|
+
module MultiXML
|
|
2
2
|
# Raised when XML parsing fails
|
|
3
3
|
#
|
|
4
4
|
# Preserves the original XML and underlying cause for debugging.
|
|
@@ -6,8 +6,8 @@ module MultiXml
|
|
|
6
6
|
# @api public
|
|
7
7
|
# @example Catching a parse error
|
|
8
8
|
# begin
|
|
9
|
-
#
|
|
10
|
-
# rescue
|
|
9
|
+
# MultiXML.parse('<invalid>')
|
|
10
|
+
# rescue MultiXML::ParseError => e
|
|
11
11
|
# puts e.xml # The malformed XML
|
|
12
12
|
# puts e.cause # The underlying parser exception
|
|
13
13
|
# end
|
|
@@ -46,18 +46,72 @@ module MultiXml
|
|
|
46
46
|
|
|
47
47
|
# Raised when no XML parser library is available
|
|
48
48
|
#
|
|
49
|
-
# This error is raised when
|
|
49
|
+
# This error is raised when MultiXML cannot find any supported XML parser.
|
|
50
50
|
# Install one of: ox, nokogiri, libxml-ruby, or oga.
|
|
51
51
|
#
|
|
52
52
|
# @api public
|
|
53
53
|
# @example Catching the error
|
|
54
54
|
# begin
|
|
55
|
-
#
|
|
56
|
-
# rescue
|
|
55
|
+
# MultiXML.parse('<root/>')
|
|
56
|
+
# rescue MultiXML::NoParserError => e
|
|
57
57
|
# puts "Please install an XML parser gem"
|
|
58
58
|
# end
|
|
59
59
|
class NoParserError < StandardError; end
|
|
60
60
|
|
|
61
|
+
# Raised when a parser cannot be loaded or is not recognized
|
|
62
|
+
#
|
|
63
|
+
# Covers three failure modes in one typed error, so callers can catch
|
|
64
|
+
# all "I couldn't even get to parsing" problems with one rescue:
|
|
65
|
+
# - Invalid spec type (not a Symbol, String, or Module)
|
|
66
|
+
# - LoadError from requiring the parser file
|
|
67
|
+
# - A custom parser that doesn't satisfy the contract
|
|
68
|
+
# (no .parse method or no parse_error method / ParseError constant)
|
|
69
|
+
#
|
|
70
|
+
# Matches the role of {MultiJSON::AdapterError}.
|
|
71
|
+
#
|
|
72
|
+
# @api public
|
|
73
|
+
# @example Catching a load error
|
|
74
|
+
# begin
|
|
75
|
+
# MultiXML.parser = :bogus
|
|
76
|
+
# rescue MultiXML::ParserLoadError => e
|
|
77
|
+
# puts e.message
|
|
78
|
+
# end
|
|
79
|
+
class ParserLoadError < ArgumentError
|
|
80
|
+
# Create a new ParserLoadError
|
|
81
|
+
#
|
|
82
|
+
# @api public
|
|
83
|
+
# @param message [String, nil] error message
|
|
84
|
+
# @param cause [Exception, nil] the original exception
|
|
85
|
+
# @return [ParserLoadError] new error instance
|
|
86
|
+
# @example
|
|
87
|
+
# ParserLoadError.new("Unknown parser", cause: original_error)
|
|
88
|
+
def initialize(message = nil, cause: nil)
|
|
89
|
+
super(message)
|
|
90
|
+
set_backtrace(cause.backtrace) if cause
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Build a ParserLoadError from an original exception
|
|
94
|
+
#
|
|
95
|
+
# The original exception's class name is included in the message so
|
|
96
|
+
# a downstream consumer reading just the ParserLoadError can tell
|
|
97
|
+
# whether the underlying failure was a ``LoadError``, an
|
|
98
|
+
# ``ArgumentError`` from the spec validator, or some other class
|
|
99
|
+
# without having to look at ``error.cause`` separately.
|
|
100
|
+
#
|
|
101
|
+
# @api public
|
|
102
|
+
# @param original_exception [Exception] the original load error
|
|
103
|
+
# @return [ParserLoadError] new error with formatted message
|
|
104
|
+
# @example
|
|
105
|
+
# ParserLoadError.build(LoadError.new("cannot load such file"))
|
|
106
|
+
def self.build(original_exception)
|
|
107
|
+
new(
|
|
108
|
+
"Did not recognize your parser specification " \
|
|
109
|
+
"(#{original_exception.class}: #{original_exception.message}).",
|
|
110
|
+
cause: original_exception
|
|
111
|
+
)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
61
115
|
# Raised when an XML type attribute is in the disallowed list
|
|
62
116
|
#
|
|
63
117
|
# By default, 'yaml' and 'symbol' types are disallowed for security reasons.
|
|
@@ -65,8 +119,8 @@ module MultiXml
|
|
|
65
119
|
# @api public
|
|
66
120
|
# @example Catching a disallowed type error
|
|
67
121
|
# begin
|
|
68
|
-
#
|
|
69
|
-
# rescue
|
|
122
|
+
# MultiXML.parse('<data type="yaml">--- :key</data>')
|
|
123
|
+
# rescue MultiXML::DisallowedTypeError => e
|
|
70
124
|
# puts e.type #=> "yaml"
|
|
71
125
|
# end
|
|
72
126
|
class DisallowedTypeError < StandardError
|
data/lib/multi_xml/file_like.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
module
|
|
1
|
+
module MultiXML
|
|
2
2
|
# Mixin that provides file-like metadata to StringIO objects
|
|
3
3
|
#
|
|
4
4
|
# Used when parsing base64-encoded file content from XML.
|
|
@@ -7,7 +7,7 @@ module MultiXml
|
|
|
7
7
|
# @api public
|
|
8
8
|
# @example Extending a StringIO
|
|
9
9
|
# io = StringIO.new("file content")
|
|
10
|
-
# io.extend(
|
|
10
|
+
# io.extend(MultiXML::FileLike)
|
|
11
11
|
# io.original_filename = "document.pdf"
|
|
12
12
|
# io.content_type = "application/pdf"
|
|
13
13
|
module FileLike
|
data/lib/multi_xml/helpers.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
module
|
|
1
|
+
module MultiXML
|
|
2
2
|
# Methods for transforming parsed XML hash structures
|
|
3
3
|
#
|
|
4
4
|
# These helper methods handle key transformation and type casting
|
|
@@ -193,7 +193,7 @@ module MultiXml
|
|
|
193
193
|
def transform_keys(data, &block)
|
|
194
194
|
case data
|
|
195
195
|
when Hash then data.each_with_object(
|
|
196
|
-
{} #: Hash[Symbol,
|
|
196
|
+
{} #: Hash[Symbol, MultiXML::xmlValue] # rubocop:disable Layout/LeadingCommentSpace
|
|
197
197
|
) { |(key, value), acc| acc[yield(key)] = transform_keys(value, &block) }
|
|
198
198
|
when Array then data.map { |item| transform_keys(item, &block) }
|
|
199
199
|
else data
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
module MultiXML
|
|
2
|
+
# Mixin providing configurable parse options
|
|
3
|
+
#
|
|
4
|
+
# Supports static hashes or dynamic callables (procs/lambdas). Extended
|
|
5
|
+
# into MultiXML so callers configure process-wide defaults via
|
|
6
|
+
# {MultiXML.parse_options=}.
|
|
7
|
+
#
|
|
8
|
+
# @api private
|
|
9
|
+
module Options
|
|
10
|
+
# Frozen empty hash used as the zero-default for parse options.
|
|
11
|
+
EMPTY_OPTIONS = {}.freeze
|
|
12
|
+
|
|
13
|
+
# Set options for parse operations
|
|
14
|
+
#
|
|
15
|
+
# @api public
|
|
16
|
+
# @param options [Hash, Proc] options hash or callable
|
|
17
|
+
# @return [Hash, Proc] the options
|
|
18
|
+
# @example
|
|
19
|
+
# MultiXML.parse_options = {symbolize_names: true}
|
|
20
|
+
def parse_options=(options)
|
|
21
|
+
@parse_options = options
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Get options for parse operations
|
|
25
|
+
#
|
|
26
|
+
# When ``@parse_options`` is a callable (proc/lambda), it's invoked
|
|
27
|
+
# with ``args`` as positional arguments — typically the call-site
|
|
28
|
+
# options hash. When it's a plain hash, ``args`` is ignored.
|
|
29
|
+
#
|
|
30
|
+
# @api public
|
|
31
|
+
# @param args [Array<Object>] forwarded to the callable, ignored otherwise
|
|
32
|
+
# @return [Hash] resolved options hash
|
|
33
|
+
# @example
|
|
34
|
+
# MultiXML.parse_options #=> {}
|
|
35
|
+
def parse_options(*)
|
|
36
|
+
resolve_options(@parse_options, *) || EMPTY_OPTIONS
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
# Resolves options from a hash or callable
|
|
42
|
+
#
|
|
43
|
+
# @api private
|
|
44
|
+
# @param options [Hash, Proc, nil] options configuration
|
|
45
|
+
# @param args [Array<Object>] arguments forwarded to a callable provider
|
|
46
|
+
# @return [Hash, nil] resolved options hash
|
|
47
|
+
def resolve_options(options, *)
|
|
48
|
+
return invoke_callable(options, *) if options.respond_to?(:call)
|
|
49
|
+
|
|
50
|
+
options.to_hash if options.respond_to?(:to_hash)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Invokes a callable options provider
|
|
54
|
+
#
|
|
55
|
+
# @api private
|
|
56
|
+
# @param callable [Proc] options provider
|
|
57
|
+
# @param args [Array<Object>] arguments forwarded when the callable is non-arity-zero
|
|
58
|
+
# @return [Hash] options returned by the callable
|
|
59
|
+
def invoke_callable(callable, *)
|
|
60
|
+
callable.arity.zero? ? callable.call : callable.call(*)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
module MultiXML
|
|
2
|
+
# Helpers for normalizing the options hash passed to {MultiXML.parse}
|
|
3
|
+
#
|
|
4
|
+
# Lives in its own module (rather than inside {ParseSupport}, which is
|
|
5
|
+
# mixed into MultiXML's singleton class) so ``self`` inside these
|
|
6
|
+
# methods is ``OptionsNormalization`` rather than ``MultiXML``. That
|
|
7
|
+
# separation is what lets mutation testing distinguish
|
|
8
|
+
# ``MultiXML.warn_deprecation_once(...)`` from
|
|
9
|
+
# ``self.warn_deprecation_once(...)``.
|
|
10
|
+
#
|
|
11
|
+
# @api private
|
|
12
|
+
module OptionsNormalization
|
|
13
|
+
# Translate the deprecated ``:symbolize_keys`` option to ``:symbolize_names``
|
|
14
|
+
#
|
|
15
|
+
# Matches Ruby stdlib's ``JSON.parse`` and sister library MultiJSON
|
|
16
|
+
# naming. Emits a one-time deprecation warning on first encounter
|
|
17
|
+
# of ``:symbolize_keys``. When both names appear together (unusual
|
|
18
|
+
# — only possible if the caller explicitly set both), the canonical
|
|
19
|
+
# ``:symbolize_names`` value wins and ``:symbolize_keys`` is
|
|
20
|
+
# silently dropped.
|
|
21
|
+
#
|
|
22
|
+
# @api private
|
|
23
|
+
# @param options [Hash] options layer to normalize
|
|
24
|
+
# @return [Hash] hash with ``:symbolize_keys`` translated, or the
|
|
25
|
+
# original hash when no translation is needed
|
|
26
|
+
# @example
|
|
27
|
+
# MultiXML::OptionsNormalization.normalize_symbolize_option(symbolize_keys: true)
|
|
28
|
+
def self.normalize_symbolize_option(options)
|
|
29
|
+
return options unless options.key?(:symbolize_keys)
|
|
30
|
+
|
|
31
|
+
MultiXML.warn_deprecation_once(:symbolize_keys_option,
|
|
32
|
+
"The :symbolize_keys option is deprecated and will be removed in v1.0. Use :symbolize_names instead.")
|
|
33
|
+
|
|
34
|
+
new_opts = options.dup
|
|
35
|
+
legacy_value = new_opts.delete(:symbolize_keys)
|
|
36
|
+
new_opts[:symbolize_names] = legacy_value unless new_opts.key?(:symbolize_names)
|
|
37
|
+
new_opts
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
module MultiXML
|
|
2
|
+
# Internal helpers for parsing and post-processing XML
|
|
3
|
+
#
|
|
4
|
+
# @api private
|
|
5
|
+
module ParseSupport
|
|
6
|
+
private
|
|
7
|
+
|
|
8
|
+
# Normalize input to an IO-like object
|
|
9
|
+
#
|
|
10
|
+
# @api private
|
|
11
|
+
# @param xml [String, IO] Input to normalize
|
|
12
|
+
# @return [IO] IO-like object
|
|
13
|
+
def normalize_input(xml)
|
|
14
|
+
return xml if xml.respond_to?(:read)
|
|
15
|
+
|
|
16
|
+
StringIO.new(xml.to_s.strip)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Parse XML with error handling and key normalization
|
|
20
|
+
#
|
|
21
|
+
# @api private
|
|
22
|
+
# @param io [IO] IO-like object containing XML
|
|
23
|
+
# @param original_input [String, IO] Original input for error reporting
|
|
24
|
+
# @param xml_parser [Module] Parser to use
|
|
25
|
+
# @param namespaces [Symbol] Namespace handling mode
|
|
26
|
+
# @return [Hash] Parsed XML (undasherized only when mode is :strip)
|
|
27
|
+
# @raise [ParseError] if XML is malformed
|
|
28
|
+
def parse_with_error_handling(io, original_input, xml_parser, namespaces)
|
|
29
|
+
result = parse_with_namespaces_compatibility(io, xml_parser, namespaces) || {}
|
|
30
|
+
(namespaces == :strip) ? undasherize_keys(result) : result
|
|
31
|
+
rescue xml_parser.parse_error => e
|
|
32
|
+
xml_string = extract_xml_for_error(original_input)
|
|
33
|
+
raise(ParseError.new(e, xml: xml_string, cause: e))
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Call the parser while preserving legacy custom parser compatibility
|
|
37
|
+
#
|
|
38
|
+
# @api private
|
|
39
|
+
# @param io [IO] IO-like object containing XML
|
|
40
|
+
# @param xml_parser [Module] Parser to use
|
|
41
|
+
# @param namespaces [Symbol] Namespace handling mode
|
|
42
|
+
# @return [Hash, nil] Parsed XML result
|
|
43
|
+
def parse_with_namespaces_compatibility(io, xml_parser, namespaces)
|
|
44
|
+
if parser_supports_namespaces_keyword?(xml_parser)
|
|
45
|
+
xml_parser.parse(io, namespaces: namespaces)
|
|
46
|
+
else
|
|
47
|
+
xml_parser.parse(io)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Validate the :namespaces mode option
|
|
52
|
+
#
|
|
53
|
+
# @api private
|
|
54
|
+
# @param mode [Symbol] Namespace handling mode to validate
|
|
55
|
+
# @return [Symbol] the validated mode
|
|
56
|
+
# @raise [ArgumentError] if mode is not a recognized value
|
|
57
|
+
def validate_namespaces_mode(mode)
|
|
58
|
+
return mode if NAMESPACE_MODES.include?(mode)
|
|
59
|
+
|
|
60
|
+
expected_modes = "[#{NAMESPACE_MODES.map(&:inspect).join(", ")}]"
|
|
61
|
+
raise ArgumentError, "invalid :namespaces mode #{mode.inspect}; expected one of #{expected_modes}"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Pick the parser to use for this call, honoring the :parser option
|
|
65
|
+
#
|
|
66
|
+
# @api private
|
|
67
|
+
# @param options [Hash] Parsing options
|
|
68
|
+
# @return [Module] Resolved parser module
|
|
69
|
+
def resolve_parse_parser(options)
|
|
70
|
+
options[:parser] ? resolve_parser(options.fetch(:parser)) : parser
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Check whether the parser accepts a `namespaces:` keyword
|
|
74
|
+
#
|
|
75
|
+
# @api private
|
|
76
|
+
# @param xml_parser [Module] Parser to inspect
|
|
77
|
+
# @return [Boolean] true when the parser accepts `namespaces:`
|
|
78
|
+
def parser_supports_namespaces_keyword?(xml_parser)
|
|
79
|
+
xml_parser.public_method(:parse).parameters.any? do |kind, name|
|
|
80
|
+
kind == :keyrest || (name == :namespaces && %i[key keyreq].include?(kind))
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Extract the original XML for ParseError reporting
|
|
85
|
+
#
|
|
86
|
+
# Some parser backends mutate or close IO objects on error. JRuby's
|
|
87
|
+
# Nokogiri path closes StringIO instances, so prefer rewind/read when
|
|
88
|
+
# available but fall back to the underlying string buffer when present.
|
|
89
|
+
#
|
|
90
|
+
# @api private
|
|
91
|
+
# @param original_input [String, IO] original parse input
|
|
92
|
+
# @return [String] XML payload for ParseError context
|
|
93
|
+
def extract_xml_for_error(original_input)
|
|
94
|
+
return original_input.to_s unless original_input.respond_to?(:read)
|
|
95
|
+
|
|
96
|
+
original_input.tap(&:rewind).read
|
|
97
|
+
rescue IOError
|
|
98
|
+
original_input.respond_to?(:string) ? original_input.string : original_input.to_s
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Apply typecasting and key-symbolization as configured
|
|
102
|
+
#
|
|
103
|
+
# @api private
|
|
104
|
+
# @param result [Hash] Parsed hash
|
|
105
|
+
# @param options [Hash] Parsing options
|
|
106
|
+
# @return [Hash] Post-processed hash
|
|
107
|
+
def apply_postprocessing(result, options)
|
|
108
|
+
result = typecast_xml_value(result, options.fetch(:disallowed_types)) if options.fetch(:typecast_xml_value)
|
|
109
|
+
result = symbolize_keys(result) if options.fetch(:symbolize_names)
|
|
110
|
+
result
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
module MultiXML
|
|
2
|
+
# Base module for XML parser implementations
|
|
3
|
+
#
|
|
4
|
+
# Built-in parsers ``extend`` this module and declare the XML library's
|
|
5
|
+
# native parse-error class as a ``ParseError`` constant. The inherited
|
|
6
|
+
# {#parse_error} method reads that constant so {MultiXML.parse} can
|
|
7
|
+
# wrap backend-specific parse failures uniformly.
|
|
8
|
+
#
|
|
9
|
+
# Matches the role of {MultiJSON::Adapter} — a shared contract that
|
|
10
|
+
# custom parsers can pick up by extending this module, while keeping
|
|
11
|
+
# backwards compatibility with parsers that instead define a
|
|
12
|
+
# ``parse_error`` method directly.
|
|
13
|
+
#
|
|
14
|
+
# @example Writing a custom parser
|
|
15
|
+
# module MyParser
|
|
16
|
+
# extend MultiXML::Parser
|
|
17
|
+
#
|
|
18
|
+
# ParseError = Class.new(StandardError)
|
|
19
|
+
#
|
|
20
|
+
# def self.parse(io, namespaces: :strip)
|
|
21
|
+
# # parse io into a Hash, raising ParseError on failure
|
|
22
|
+
# end
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# MultiXML.parser = MyParser
|
|
26
|
+
#
|
|
27
|
+
# @api public
|
|
28
|
+
module Parser
|
|
29
|
+
# Return the parse-error class declared on the including parser
|
|
30
|
+
#
|
|
31
|
+
# The lookup uses ``inherit: false`` so a stray top-level
|
|
32
|
+
# ``::ParseError`` in the host process (Racc defines one when
|
|
33
|
+
# Nokogiri is loaded) is correctly ignored.
|
|
34
|
+
#
|
|
35
|
+
# @api public
|
|
36
|
+
# @return [Class] the ParseError class declared on ``self``
|
|
37
|
+
# @raise [ParserLoadError] when ``self`` doesn't define ParseError
|
|
38
|
+
# @example
|
|
39
|
+
# MultiXML::Parsers::Nokogiri.parse_error
|
|
40
|
+
# #=> Nokogiri::XML::SyntaxError
|
|
41
|
+
def parse_error
|
|
42
|
+
const_get(:ParseError, false)
|
|
43
|
+
rescue NameError
|
|
44
|
+
raise ParserLoadError, "Parser #{self} must define a ParseError constant"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|