multi_xml 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +16 -0
- data/.rubocop.yml +14 -5
- data/CHANGELOG.md +8 -0
- data/Gemfile +12 -9
- data/README.md +1 -1
- data/Rakefile +35 -7
- data/Steepfile +22 -0
- data/lib/multi_xml/constants.rb +134 -0
- data/lib/multi_xml/errors.rb +93 -0
- data/lib/multi_xml/file_like.rb +62 -0
- data/lib/multi_xml/helpers.rb +228 -0
- data/lib/multi_xml/parsers/dom_parser.rb +97 -0
- data/lib/multi_xml/parsers/libxml.rb +35 -18
- data/lib/multi_xml/parsers/libxml_sax.rb +103 -0
- data/lib/multi_xml/parsers/nokogiri.rb +39 -22
- data/lib/multi_xml/parsers/nokogiri_sax.rb +102 -0
- data/lib/multi_xml/parsers/oga.rb +48 -51
- data/lib/multi_xml/parsers/ox.rb +99 -57
- data/lib/multi_xml/parsers/rexml.rb +84 -78
- data/lib/multi_xml/parsers/sax_handler.rb +117 -0
- data/lib/multi_xml/version.rb +5 -1
- data/lib/multi_xml.rb +173 -269
- data/sig/multi_xml.rbs +227 -0
- metadata +21 -5
- data/lib/multi_xml/parsers/libxml2_parser.rb +0 -70
data/sig/multi_xml.rbs
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# Type signatures for MultiXml
|
|
2
|
+
|
|
3
|
+
# Recursive type alias for parsed XML values
|
|
4
|
+
# XML parsing produces nested structures of hashes, arrays, and primitive values
|
|
5
|
+
type MultiXml::xmlValue = String
|
|
6
|
+
| Integer
|
|
7
|
+
| Float
|
|
8
|
+
| bool
|
|
9
|
+
| Symbol
|
|
10
|
+
| Time
|
|
11
|
+
| Date
|
|
12
|
+
| BigDecimal
|
|
13
|
+
| StringIO
|
|
14
|
+
| nil
|
|
15
|
+
| Array[MultiXml::xmlValue]
|
|
16
|
+
| Hash[String, MultiXml::xmlValue]
|
|
17
|
+
| Hash[Symbol, MultiXml::xmlValue]
|
|
18
|
+
|
|
19
|
+
# Type for hash with string keys used internally during parsing
|
|
20
|
+
type MultiXml::xmlHash = Hash[String, MultiXml::xmlValue]
|
|
21
|
+
|
|
22
|
+
# Interface for parser modules
|
|
23
|
+
interface MultiXml::_Parser
|
|
24
|
+
def parse: (StringIO io) -> MultiXml::xmlHash?
|
|
25
|
+
def parse_error: () -> singleton(Exception)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
module MultiXml
|
|
29
|
+
VERSION: Gem::Version
|
|
30
|
+
|
|
31
|
+
TEXT_CONTENT_KEY: String
|
|
32
|
+
|
|
33
|
+
RUBY_TYPE_TO_XML: Hash[String, String]
|
|
34
|
+
|
|
35
|
+
DISALLOWED_TYPES: Array[String]
|
|
36
|
+
|
|
37
|
+
FALSE_BOOLEAN_VALUES: Set[String]
|
|
38
|
+
|
|
39
|
+
DEFAULT_OPTIONS: Hash[Symbol, bool | Array[String]]
|
|
40
|
+
|
|
41
|
+
# Array of [library_name, parser_symbol] pairs
|
|
42
|
+
PARSER_PREFERENCE: Array[Array[String | Symbol]]
|
|
43
|
+
|
|
44
|
+
PARSE_DATETIME: ^(String) -> Time
|
|
45
|
+
|
|
46
|
+
# Lambda for creating file-like StringIO from base64 content
|
|
47
|
+
# Uses untyped for content because unpack1 returns various types
|
|
48
|
+
# Uses untyped for entity because hash values are xmlValue but we access specific String keys
|
|
49
|
+
FILE_CONVERTER: ^(untyped, untyped) -> StringIO
|
|
50
|
+
|
|
51
|
+
# Type converters keyed by XML type attribute string
|
|
52
|
+
# Uses untyped key because hash["type"] returns xmlValue, and Hash#[] with non-String returns nil
|
|
53
|
+
TYPE_CONVERTERS: Hash[untyped, Proc | Method]
|
|
54
|
+
|
|
55
|
+
LOADED_PARSER_CHECKS: Hash[Symbol, Symbol]
|
|
56
|
+
|
|
57
|
+
self.@parser: Module
|
|
58
|
+
|
|
59
|
+
extend Helpers
|
|
60
|
+
|
|
61
|
+
# Public API: Get the current XML parser module
|
|
62
|
+
def self.parser: () -> Module
|
|
63
|
+
|
|
64
|
+
# Public API: Set the XML parser to use
|
|
65
|
+
def self.parser=: (Symbol | String | Module new_parser) -> Module
|
|
66
|
+
|
|
67
|
+
# Public API: Parse XML into a Ruby Hash
|
|
68
|
+
# Uses untyped for options because values vary by key (:parser, :symbolize_keys, :disallowed_types, :typecast_xml_value)
|
|
69
|
+
def self.parse: (String | StringIO xml, ?Hash[Symbol, untyped] options) -> xmlHash
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
# Resolve a parser specification (Symbol, String, or Module) to a parser
|
|
74
|
+
def self.resolve_parser: (Symbol | String | Module spec) -> Module
|
|
75
|
+
|
|
76
|
+
# Load a parser module by name
|
|
77
|
+
def self.load_parser: (Symbol | String name) -> Module
|
|
78
|
+
|
|
79
|
+
# Convert snake_case to CamelCase
|
|
80
|
+
def self.camelize: (String name) -> String
|
|
81
|
+
|
|
82
|
+
# Detect the best available parser
|
|
83
|
+
def self.detect_parser: () -> (Symbol | String)
|
|
84
|
+
|
|
85
|
+
# Find an already-loaded parser library
|
|
86
|
+
def self.find_loaded_parser: () -> Symbol?
|
|
87
|
+
|
|
88
|
+
# Try to find an available parser by requiring libraries
|
|
89
|
+
def self.find_available_parser: () -> (String | Symbol | nil)
|
|
90
|
+
|
|
91
|
+
# Attempt to require a library, returning success/failure
|
|
92
|
+
# Kernel#require accepts String; library may be Symbol from PARSER_PREFERENCE (coerced at runtime)
|
|
93
|
+
def self.try_require: (untyped library) -> bool
|
|
94
|
+
|
|
95
|
+
# Raise NoParserError - never returns
|
|
96
|
+
def self.raise_no_parser_error: () -> bot
|
|
97
|
+
|
|
98
|
+
# Convert String to StringIO, pass through IO-like objects
|
|
99
|
+
# Uses respond_to?(:read) duck typing - returns input unchanged if IO-like
|
|
100
|
+
def self.normalize_input: (String | StringIO xml) -> untyped
|
|
101
|
+
|
|
102
|
+
# Parse with error handling and key normalization
|
|
103
|
+
# xml_parser implements _Parser interface; original_input uses respond_to? duck typing
|
|
104
|
+
def self.parse_with_error_handling: (StringIO io, untyped original_input, untyped xml_parser) -> xmlHash
|
|
105
|
+
|
|
106
|
+
module Helpers
|
|
107
|
+
# Recursively convert all hash keys to symbols
|
|
108
|
+
# Uses case/when type dispatch - Steep can't track flow narrowing
|
|
109
|
+
def self?.symbolize_keys: (untyped data) -> untyped
|
|
110
|
+
|
|
111
|
+
# Recursively convert dashes in hash keys to underscores
|
|
112
|
+
# Uses case/when type dispatch - Steep can't track flow narrowing
|
|
113
|
+
def self?.undasherize_keys: (untyped data) -> untyped
|
|
114
|
+
|
|
115
|
+
# Recursively typecast XML values based on type attributes
|
|
116
|
+
# Uses case/when type dispatch - Steep can't track flow narrowing
|
|
117
|
+
def self?.typecast_xml_value: (untyped value, ?Array[String] disallowed_types) -> xmlValue
|
|
118
|
+
|
|
119
|
+
# Typecast array elements and unwrap single-element arrays
|
|
120
|
+
def self?.typecast_array: (Array[xmlValue] array, Array[String] disallowed_types) -> xmlValue
|
|
121
|
+
|
|
122
|
+
# Typecast a hash based on its type attribute
|
|
123
|
+
def self?.typecast_hash: (xmlHash hash, Array[String] disallowed_types) -> xmlValue
|
|
124
|
+
|
|
125
|
+
# Check if a type is in the disallowed list
|
|
126
|
+
# Uses is_a?(Hash) guard then include? - Steep can't narrow xmlValue to String
|
|
127
|
+
def self?.disallowed_type?: (untyped type, Array[String] disallowed_types) -> boolish
|
|
128
|
+
|
|
129
|
+
# Convert a hash based on its type and content
|
|
130
|
+
def self?.convert_hash: (xmlHash hash, xmlValue type, Array[String] disallowed_types) -> xmlValue
|
|
131
|
+
|
|
132
|
+
# Typecast all child values in a hash
|
|
133
|
+
def self?.typecast_children: (xmlHash hash, Array[String] disallowed_types) -> (xmlHash | StringIO)
|
|
134
|
+
|
|
135
|
+
# Extract array entries from element with type="array"
|
|
136
|
+
def self?.extract_array_entries: (xmlHash hash, Array[String] disallowed_types) -> Array[xmlValue]
|
|
137
|
+
|
|
138
|
+
# Find array or hash entries in a hash, excluding the type key
|
|
139
|
+
# Returns xmlValue subset (Array or Hash) - uses is_a? that Steep can't narrow
|
|
140
|
+
def self?.find_array_entries: (xmlHash hash) -> untyped
|
|
141
|
+
|
|
142
|
+
# Wrap hash in array if needed and typecast all entries
|
|
143
|
+
def self?.wrap_and_typecast: (Array[xmlValue] | xmlHash entries, Array[String] disallowed_types) -> Array[xmlValue]
|
|
144
|
+
|
|
145
|
+
# Convert text content using type converters
|
|
146
|
+
# hash["type"] is xmlValue, used as Hash key - Steep requires String
|
|
147
|
+
def self?.convert_text_content: (xmlHash hash) -> xmlValue
|
|
148
|
+
|
|
149
|
+
# Unwrap value if hash has no other significant keys
|
|
150
|
+
def self?.unwrap_if_simple: (xmlHash hash, xmlValue value) -> (xmlValue | xmlHash)
|
|
151
|
+
|
|
152
|
+
# Check if a hash represents an empty value
|
|
153
|
+
def self?.empty_value?: (xmlHash hash, xmlValue type) -> bool
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
# Recursively transform hash keys using a block
|
|
158
|
+
# Block receives key (String) and returns transformed key
|
|
159
|
+
# Uses untyped because &:to_sym Proc type narrowing not supported by Steep
|
|
160
|
+
def self?.transform_keys: (untyped data) { (untyped) -> untyped } -> untyped
|
|
161
|
+
|
|
162
|
+
# Unwrap a file object from the result hash if present
|
|
163
|
+
def self?.unwrap_file_if_present: (xmlHash result) -> (xmlHash | StringIO)
|
|
164
|
+
|
|
165
|
+
# Apply a type converter to content
|
|
166
|
+
# Content is xmlValue (from hash.fetch) but typically String in practice
|
|
167
|
+
def self?.apply_converter: (xmlHash hash, untyped content, Proc | Method converter) -> xmlValue
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
module FileLike
|
|
171
|
+
DEFAULT_FILENAME: String
|
|
172
|
+
|
|
173
|
+
DEFAULT_CONTENT_TYPE: String
|
|
174
|
+
|
|
175
|
+
@original_filename: String?
|
|
176
|
+
|
|
177
|
+
@content_type: String?
|
|
178
|
+
|
|
179
|
+
attr_writer original_filename: String?
|
|
180
|
+
|
|
181
|
+
attr_writer content_type: String?
|
|
182
|
+
|
|
183
|
+
def original_filename: () -> String
|
|
184
|
+
|
|
185
|
+
def content_type: () -> String
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Represents a StringIO that has been extended with FileLike
|
|
189
|
+
# Used for file type conversions in XML parsing
|
|
190
|
+
class FileIO < StringIO
|
|
191
|
+
include FileLike
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
class ParseError < StandardError
|
|
195
|
+
@xml: String?
|
|
196
|
+
|
|
197
|
+
@cause: Exception?
|
|
198
|
+
|
|
199
|
+
attr_reader xml: String?
|
|
200
|
+
|
|
201
|
+
attr_reader cause: Exception?
|
|
202
|
+
|
|
203
|
+
# Message can be String (normal) or Exception (from parser errors), or nil for default
|
|
204
|
+
def initialize: (?(String | Exception | nil) message, ?xml: String?, ?cause: Exception?) -> void
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
class NoParserError < StandardError
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
class DisallowedTypeError < StandardError
|
|
211
|
+
@type: String
|
|
212
|
+
|
|
213
|
+
attr_reader type: String
|
|
214
|
+
|
|
215
|
+
def initialize: (String type) -> void
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Parsers module - parser implementations depend on optional external gems
|
|
219
|
+
module Parsers
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Stub for Psych::SyntaxError which is part of the yaml library
|
|
224
|
+
module Psych
|
|
225
|
+
class SyntaxError < ::StandardError
|
|
226
|
+
end
|
|
227
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: multi_xml
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Erik Berlin
|
|
@@ -13,22 +13,29 @@ dependencies:
|
|
|
13
13
|
name: bigdecimal
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
15
15
|
requirements:
|
|
16
|
-
- - "
|
|
16
|
+
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
18
|
version: '3.1'
|
|
19
|
+
- - "<"
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '5'
|
|
19
22
|
type: :runtime
|
|
20
23
|
prerelease: false
|
|
21
24
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
25
|
requirements:
|
|
23
|
-
- - "
|
|
26
|
+
- - ">="
|
|
24
27
|
- !ruby/object:Gem::Version
|
|
25
28
|
version: '3.1'
|
|
29
|
+
- - "<"
|
|
30
|
+
- !ruby/object:Gem::Version
|
|
31
|
+
version: '5'
|
|
26
32
|
email:
|
|
27
33
|
- sferik@gmail.com
|
|
28
34
|
executables: []
|
|
29
35
|
extensions: []
|
|
30
36
|
extra_rdoc_files: []
|
|
31
37
|
files:
|
|
38
|
+
- ".mutant.yml"
|
|
32
39
|
- ".rspec"
|
|
33
40
|
- ".rubocop.yml"
|
|
34
41
|
- ".yardopts"
|
|
@@ -38,14 +45,23 @@ files:
|
|
|
38
45
|
- LICENSE.md
|
|
39
46
|
- README.md
|
|
40
47
|
- Rakefile
|
|
48
|
+
- Steepfile
|
|
41
49
|
- lib/multi_xml.rb
|
|
50
|
+
- lib/multi_xml/constants.rb
|
|
51
|
+
- lib/multi_xml/errors.rb
|
|
52
|
+
- lib/multi_xml/file_like.rb
|
|
53
|
+
- lib/multi_xml/helpers.rb
|
|
54
|
+
- lib/multi_xml/parsers/dom_parser.rb
|
|
42
55
|
- lib/multi_xml/parsers/libxml.rb
|
|
43
|
-
- lib/multi_xml/parsers/
|
|
56
|
+
- lib/multi_xml/parsers/libxml_sax.rb
|
|
44
57
|
- lib/multi_xml/parsers/nokogiri.rb
|
|
58
|
+
- lib/multi_xml/parsers/nokogiri_sax.rb
|
|
45
59
|
- lib/multi_xml/parsers/oga.rb
|
|
46
60
|
- lib/multi_xml/parsers/ox.rb
|
|
47
61
|
- lib/multi_xml/parsers/rexml.rb
|
|
62
|
+
- lib/multi_xml/parsers/sax_handler.rb
|
|
48
63
|
- lib/multi_xml/version.rb
|
|
64
|
+
- sig/multi_xml.rbs
|
|
49
65
|
homepage: https://github.com/sferik/multi_xml
|
|
50
66
|
licenses:
|
|
51
67
|
- MIT
|
|
@@ -72,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
72
88
|
- !ruby/object:Gem::Version
|
|
73
89
|
version: '0'
|
|
74
90
|
requirements: []
|
|
75
|
-
rubygems_version:
|
|
91
|
+
rubygems_version: 4.0.3
|
|
76
92
|
specification_version: 4
|
|
77
93
|
summary: Provides swappable XML backends utilizing LibXML, Nokogiri, Ox, or REXML.
|
|
78
94
|
test_files: []
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
module MultiXml
|
|
2
|
-
module Parsers
|
|
3
|
-
module Libxml2Parser # :nodoc:
|
|
4
|
-
# Convert XML document to hash
|
|
5
|
-
#
|
|
6
|
-
# node::
|
|
7
|
-
# The XML node object to convert to a hash.
|
|
8
|
-
#
|
|
9
|
-
# hash::
|
|
10
|
-
# Hash to merge the converted element into.
|
|
11
|
-
def node_to_hash(node, hash = {}) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
12
|
-
node_hash = {MultiXml::CONTENT_ROOT => ""}
|
|
13
|
-
|
|
14
|
-
name = node_name(node)
|
|
15
|
-
|
|
16
|
-
# Insert node hash into parent hash correctly.
|
|
17
|
-
case hash[name]
|
|
18
|
-
when Array
|
|
19
|
-
hash[name] << node_hash
|
|
20
|
-
when Hash
|
|
21
|
-
hash[name] = [hash[name], node_hash]
|
|
22
|
-
when NilClass
|
|
23
|
-
hash[name] = node_hash
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Handle child elements
|
|
27
|
-
each_child(node) do |c|
|
|
28
|
-
if c.element?
|
|
29
|
-
node_to_hash(c, node_hash)
|
|
30
|
-
elsif c.text? || c.cdata?
|
|
31
|
-
node_hash[MultiXml::CONTENT_ROOT] += c.content
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Remove content node if it is empty
|
|
36
|
-
node_hash.delete(MultiXml::CONTENT_ROOT) if node_hash[MultiXml::CONTENT_ROOT].strip.empty?
|
|
37
|
-
|
|
38
|
-
# Handle attributes
|
|
39
|
-
each_attr(node) do |a|
|
|
40
|
-
key = node_name(a)
|
|
41
|
-
v = node_hash[key]
|
|
42
|
-
node_hash[key] = ((v) ? [a.value, v] : a.value)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
hash
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# Parse an XML Document IO into a simple hash.
|
|
49
|
-
# xml::
|
|
50
|
-
# XML Document IO to parse
|
|
51
|
-
def parse(_)
|
|
52
|
-
raise(NotImplementedError, "inheritor should define #{__method__}")
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
private
|
|
56
|
-
|
|
57
|
-
def each_child(*)
|
|
58
|
-
raise(NotImplementedError, "inheritor should define #{__method__}")
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def each_attr(*)
|
|
62
|
-
raise(NotImplementedError, "inheritor should define #{__method__}")
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
def node_name(*)
|
|
66
|
-
raise(NotImplementedError, "inheritor should define #{__method__}")
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
end
|