multi_xml 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2915f9d6570c4a488028d398e2ea12da2caadbaa3c36441063d166d515d22f20
4
- data.tar.gz: f592c0844048c5dfd81d2b4a5caf4304ece166a8c609338522b4298b87d8d502
3
+ metadata.gz: a6a703a4614209e07bba561caf80be62e407c2168d7234ce8323b1cf10314a1a
4
+ data.tar.gz: f93e6d26aa75c0d357968bca0577b4584285941569a1f418eb843c70ede3aee2
5
5
  SHA512:
6
- metadata.gz: 60a07a4bb9ae555d9537c4b6dba844b9c64f37e4d769f41dea612577b98231c7afec2d34dd757623fc64bcad50486bb822cd73838a924fdc58e5417c0cf537ea
7
- data.tar.gz: cd2ea60bf6f5f0de466b177f2a3f9f73006d829b6d5484d5f1db2b641e5deceeb80580d55d19d7e6b92724e675afbf387bcc12e6c66d62e7da840faf4980335e
6
+ metadata.gz: f7ae3d51af07c83c76b976caaff6db7a0a2a9e305e82a4f66234aeb9f4566d2654c167d5578bebe8ba44bdeaccf343c60b49c52407728896c28a1ebd33c5cf06
7
+ data.tar.gz: ef1fee85543fa75f734322cf3d66483d3029d2811fdd42db83537e2e9cca331ed69771caa4b164665e37f42b97ac70918a238ddc0ed0c119d4680ab2fee9c41c
data/.mutant.yml ADDED
@@ -0,0 +1,16 @@
1
+ usage: opensource
2
+
3
+ integration:
4
+ name: minitest
5
+
6
+ includes:
7
+ - lib
8
+ - test
9
+
10
+ requires:
11
+ - multi_xml
12
+ - mutant/minitest/coverage
13
+
14
+ matcher:
15
+ subjects:
16
+ - MultiXml*
data/.rubocop.yml CHANGED
@@ -4,7 +4,7 @@ require:
4
4
  plugins:
5
5
  - rubocop-performance
6
6
  - rubocop-rake
7
- - rubocop-rspec
7
+ - rubocop-minitest
8
8
  - standard-performance
9
9
 
10
10
  AllCops:
@@ -15,6 +15,9 @@ Layout/ArgumentAlignment:
15
15
  EnforcedStyle: with_fixed_indentation
16
16
  IndentationWidth: 2
17
17
 
18
+ Layout/ArrayAlignment:
19
+ EnforcedStyle: with_fixed_indentation
20
+
18
21
  Layout/CaseIndentation:
19
22
  EnforcedStyle: end
20
23
 
@@ -37,20 +40,26 @@ Metrics/ParameterLists:
37
40
  Style/Alias:
38
41
  EnforcedStyle: prefer_alias_method
39
42
 
40
- Style/Documentation:
41
- Enabled: false
43
+ Style/EmptyMethod:
44
+ EnforcedStyle: expanded
42
45
 
43
46
  Style/FrozenStringLiteralComment:
44
47
  EnforcedStyle: never
45
48
 
46
- Style/OpenStructUse:
49
+ Style/RedundantConstantBase:
47
50
  Enabled: false
48
51
 
52
+ Style/RescueStandardError:
53
+ EnforcedStyle: implicit
54
+
49
55
  Style/StringLiterals:
50
56
  EnforcedStyle: double_quotes
51
57
 
52
58
  Style/StringLiteralsInInterpolation:
53
59
  EnforcedStyle: double_quotes
54
60
 
61
+ Style/SymbolProc:
62
+ Enabled: false
63
+
55
64
  Style/TernaryParentheses:
56
- EnforcedStyle: require_parentheses
65
+ EnforcedStyle: require_parentheses_when_complex
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ 0.8.0
2
+ -----
3
+ * [Add per-parse :parser option to MultiXml.parse](https://github.com/sferik/multi_xml/commit/eb0c1ccadd9026980ba8b6dd0128d6862dc361c4)
4
+ * [Add SAX parsers for Nokogiri and LibXML](https://github.com/sferik/multi_xml/commit/5d67fe6cae3c1ef2c306f1e83fc91b9accfcb724)
5
+ * [Fix inconsistent whitespace handling across parsers](https://github.com/sferik/multi_xml/commit/55aa23f1c401e66984ad1c7d753c1b4258bf0dfd)
6
+ * [Make parsing errors inspectable with cause and xml accessors](https://github.com/sferik/multi_xml/commit/f676f1b657f3352a80ac171d9b839e41ad52a14d)
7
+ * [Drop support for JRuby](https://github.com/sferik/multi_xml/commit/27895ca3918c681ad7ddaa57c5cae7b8340bd601)
8
+
1
9
  0.7.2
2
10
  -----
3
11
  * [Drop support for Ruby 3.1](https://github.com/sferik/multi_xml/commit/fab6288edd36c58a2b13e0206d8bed305fcb4a4b)
data/Gemfile CHANGED
@@ -6,16 +6,19 @@ gem "oga", ">= 2.3", require: nil
6
6
  gem "ox", require: nil, platforms: :ruby
7
7
  gem "rexml", require: nil
8
8
 
9
- gem "rake", ">= 13.2.1"
10
- gem "rspec", ">= 3.12"
11
- gem "rubocop", ">= 1.62.1"
12
- gem "rubocop-performance", ">= 1.20.2"
13
- gem "rubocop-rake", ">= 0.6"
14
- gem "rubocop-rspec", ">= 2.24"
9
+ gem "minitest", ">= 5.27"
10
+ gem "mutant-minitest", ">= 0.13.5"
11
+ gem "rake", ">= 13.3.1"
12
+ gem "rdoc", ">= 7.0.2"
13
+ gem "rubocop", ">= 1.81.7"
14
+ gem "rubocop-minitest", ">= 0.36"
15
+ gem "rubocop-performance", ">= 1.26.1"
16
+ gem "rubocop-rake", ">= 0.7.1"
15
17
  gem "simplecov", ">= 0.22"
16
- gem "standard", ">= 1.35.1"
17
- gem "standard-performance", ">= 1.3.1"
18
- gem "yard", ">= 0.9.36"
18
+ gem "standard", ">= 1.52"
19
+ gem "standard-performance", ">= 1.9"
20
+ gem "steep", ">= 1.10", platforms: :ruby
21
+ gem "yard", ">= 0.9.38"
19
22
  gem "yardstick", ">= 0.9.9"
20
23
 
21
24
  gemspec
data/README.md CHANGED
@@ -49,7 +49,7 @@ implementations:
49
49
  * 3.2
50
50
  * 3.3
51
51
  * 3.4
52
- * JRuby 10
52
+ * 4.0
53
53
 
54
54
  If something doesn't work on one of these versions, it's a bug.
55
55
 
data/Rakefile CHANGED
@@ -1,11 +1,17 @@
1
- require "bundler"
2
- Bundler::GemHelper.install_tasks
1
+ require "bundler/gem_tasks"
3
2
 
4
- require "rspec/core/rake_task"
5
- RSpec::Core::RakeTask.new(:spec)
3
+ # Override release task to skip gem push (handled by GitHub Actions with attestations)
4
+ Rake::Task["release"].clear
5
+ desc "Build gem and create tag (gem push handled by CI)"
6
+ task release: %w[build release:guard_clean release:source_control_push]
6
7
 
7
- task test: :spec
8
+ require "rake/testtask"
9
+ Rake::TestTask.new(:test) do |t|
10
+ t.libs << "test"
11
+ t.test_files = FileList["test/**/*_test.rb"]
12
+ end
8
13
 
14
+ require "standard/rake"
9
15
  require "rubocop/rake_task"
10
16
  RuboCop::RakeTask.new
11
17
 
@@ -27,7 +33,29 @@ end
27
33
 
28
34
  require "yardstick/rake/verify"
29
35
  Yardstick::Rake::Verify.new do |verify|
30
- verify.threshold = 48.8
36
+ verify.threshold = 100
37
+ end
38
+
39
+ # Steep requires native extensions not available on JRuby or Windows
40
+ unless RUBY_PLATFORM == "java" || Gem.win_platform?
41
+ require "steep/rake_task"
42
+ Steep::RakeTask.new
31
43
  end
32
44
 
33
- task default: %i[spec rubocop verify_measurements]
45
+ desc "Run linters"
46
+ task lint: %i[rubocop standard]
47
+
48
+ # Mutant uses fork() which is not available on Windows or JRuby
49
+ desc "Run mutation testing"
50
+ task :mutant do
51
+ if Gem.win_platform? || RUBY_PLATFORM == "java"
52
+ puts "Skipping mutant on Windows/JRuby (fork not supported)"
53
+ else
54
+ system("bundle", "exec", "mutant", "run") || exit(1)
55
+ end
56
+ end
57
+
58
+ default_tasks = %i[test lint verify_measurements mutant]
59
+ default_tasks << :steep unless RUBY_PLATFORM == "java" || Gem.win_platform?
60
+
61
+ task default: default_tasks
data/Steepfile ADDED
@@ -0,0 +1,22 @@
1
+ D = Steep::Diagnostic
2
+
3
+ target :lib do
4
+ signature "sig"
5
+
6
+ # Check core library files (excluding parser implementations that depend on optional gems)
7
+ check "lib/multi_xml.rb"
8
+ check "lib/multi_xml/constants.rb"
9
+ check "lib/multi_xml/errors.rb"
10
+ check "lib/multi_xml/file_like.rb"
11
+ check "lib/multi_xml/helpers.rb"
12
+ check "lib/multi_xml/version.rb"
13
+
14
+ # Use stdlib types
15
+ library "date"
16
+ library "time"
17
+ library "yaml"
18
+ library "bigdecimal"
19
+ library "stringio"
20
+
21
+ configure_code_diagnostics(D::Ruby.strict)
22
+ end
@@ -0,0 +1,134 @@
1
+ module MultiXml
2
+ # Hash key for storing text content within element hashes
3
+ #
4
+ # @api public
5
+ # @return [String] the key "__content__" used for text content
6
+ # @example Accessing text content
7
+ # result = MultiXml.parse('<name>John</name>')
8
+ # result["name"] #=> "John" (simplified, but internally uses __content__)
9
+ TEXT_CONTENT_KEY = "__content__".freeze
10
+
11
+ # Maps Ruby class names to XML type attribute values
12
+ #
13
+ # @api public
14
+ # @return [Hash{String => String}] mapping of Ruby class names to XML types
15
+ # @example Check XML type for a Ruby class
16
+ # RUBY_TYPE_TO_XML["Integer"] #=> "integer"
17
+ RUBY_TYPE_TO_XML = {
18
+ "Symbol" => "symbol",
19
+ "Integer" => "integer",
20
+ "BigDecimal" => "decimal",
21
+ "Float" => "float",
22
+ "TrueClass" => "boolean",
23
+ "FalseClass" => "boolean",
24
+ "Date" => "date",
25
+ "DateTime" => "datetime",
26
+ "Time" => "datetime",
27
+ "Array" => "array",
28
+ "Hash" => "hash"
29
+ }.freeze
30
+
31
+ # XML type attributes disallowed by default for security
32
+ #
33
+ # These types are blocked to prevent code execution vulnerabilities.
34
+ #
35
+ # @api public
36
+ # @return [Array<String>] list of disallowed type names
37
+ # @example Check default disallowed types
38
+ # DISALLOWED_TYPES #=> ["symbol", "yaml"]
39
+ DISALLOWED_TYPES = %w[symbol yaml].freeze
40
+
41
+ # Values that represent false in XML boolean attributes
42
+ #
43
+ # @api public
44
+ # @return [Set<String>] values considered false
45
+ # @example Check false values
46
+ # FALSE_BOOLEAN_VALUES.include?("0") #=> true
47
+ FALSE_BOOLEAN_VALUES = Set.new(%w[0 false]).freeze
48
+
49
+ # Default parsing options
50
+ #
51
+ # @api public
52
+ # @return [Hash] default options for parse method
53
+ # @example View defaults
54
+ # DEFAULT_OPTIONS[:symbolize_keys] #=> false
55
+ DEFAULT_OPTIONS = {
56
+ typecast_xml_value: true,
57
+ disallowed_types: DISALLOWED_TYPES,
58
+ symbolize_keys: false
59
+ }.freeze
60
+
61
+ # Parser libraries in preference order (fastest first)
62
+ #
63
+ # @api public
64
+ # @return [Array<Array>] pairs of [require_path, parser_symbol]
65
+ # @example View parser order
66
+ # PARSER_PREFERENCE.first #=> ["ox", :ox]
67
+ PARSER_PREFERENCE = [
68
+ ["ox", :ox],
69
+ ["libxml", :libxml],
70
+ ["nokogiri", :nokogiri],
71
+ ["rexml/document", :rexml],
72
+ ["oga", :oga]
73
+ ].freeze
74
+
75
+ # Parses datetime strings, trying Time first then DateTime
76
+ #
77
+ # @api private
78
+ # @return [Proc] lambda that parses datetime strings
79
+ PARSE_DATETIME = lambda do |string|
80
+ Time.parse(string).utc
81
+ rescue ArgumentError
82
+ DateTime.parse(string).to_time.utc
83
+ end
84
+
85
+ # Creates a file-like StringIO from base64-encoded content
86
+ #
87
+ # @api private
88
+ # @return [Proc] lambda that creates file objects
89
+ FILE_CONVERTER = lambda do |content, entity|
90
+ StringIO.new(content.unpack1("m")).tap do |io|
91
+ io.extend(FileLike)
92
+ file_io = io # : FileIO
93
+ file_io.original_filename = entity["name"]
94
+ file_io.content_type = entity["content_type"]
95
+ end
96
+ end
97
+
98
+ # Type converters for XML type attributes
99
+ #
100
+ # Maps type attribute values to lambdas that convert string content.
101
+ # Converters with arity 2 receive the content and the full entity hash.
102
+ #
103
+ # @api public
104
+ # @return [Hash{String => Proc}] mapping of type names to converter procs
105
+ # @example Using a converter
106
+ # TYPE_CONVERTERS["integer"].call("42") #=> 42
107
+ TYPE_CONVERTERS = {
108
+ # Primitive types
109
+ "symbol" => :to_sym.to_proc,
110
+ "string" => :to_s.to_proc,
111
+ "integer" => :to_i.to_proc,
112
+ "float" => :to_f.to_proc,
113
+ "double" => :to_f.to_proc,
114
+ "decimal" => ->(s) { BigDecimal(s) },
115
+ "boolean" => ->(s) { !FALSE_BOOLEAN_VALUES.include?(s.strip) },
116
+
117
+ # Date and time types
118
+ "date" => Date.method(:parse),
119
+ "datetime" => PARSE_DATETIME,
120
+ "dateTime" => PARSE_DATETIME,
121
+
122
+ # Binary types
123
+ "base64Binary" => ->(s) { s.unpack1("m") },
124
+ "binary" => ->(s, entity) { (entity["encoding"] == "base64") ? s.unpack1("m") : s },
125
+ "file" => FILE_CONVERTER,
126
+
127
+ # Structured types
128
+ "yaml" => lambda do |string|
129
+ YAML.safe_load(string, permitted_classes: [Symbol, Date, Time])
130
+ rescue ArgumentError, Psych::SyntaxError
131
+ string
132
+ end
133
+ }.freeze
134
+ end
@@ -0,0 +1,93 @@
1
+ module MultiXml
2
+ # Raised when XML parsing fails
3
+ #
4
+ # Preserves the original XML and underlying cause for debugging.
5
+ #
6
+ # @api public
7
+ # @example Catching a parse error
8
+ # begin
9
+ # MultiXml.parse('<invalid>')
10
+ # rescue MultiXml::ParseError => e
11
+ # puts e.xml # The malformed XML
12
+ # puts e.cause # The underlying parser exception
13
+ # end
14
+ class ParseError < StandardError
15
+ # The original XML that failed to parse
16
+ #
17
+ # @api public
18
+ # @return [String, nil] the XML string that caused the error
19
+ # @example Access the failing XML
20
+ # error.xml #=> "<invalid>"
21
+ attr_reader :xml
22
+
23
+ # The underlying parser exception
24
+ #
25
+ # @api public
26
+ # @return [Exception, nil] the original exception from the parser
27
+ # @example Access the cause
28
+ # error.cause #=> #<Nokogiri::XML::SyntaxError: ...>
29
+ attr_reader :cause
30
+
31
+ # Create a new ParseError
32
+ #
33
+ # @api public
34
+ # @param message [String, nil] Error message
35
+ # @param xml [String, nil] The original XML that failed to parse
36
+ # @param cause [Exception, nil] The underlying parser exception
37
+ # @return [ParseError] the new error instance
38
+ # @example Create a parse error
39
+ # ParseError.new("Invalid XML", xml: "<bad>", cause: original_error)
40
+ def initialize(message = nil, xml: nil, cause: nil)
41
+ @xml = xml
42
+ @cause = cause
43
+ super(message)
44
+ end
45
+ end
46
+
47
+ # Raised when no XML parser library is available
48
+ #
49
+ # This error is raised when MultiXml cannot find any supported XML parser.
50
+ # Install one of: ox, nokogiri, libxml-ruby, or oga.
51
+ #
52
+ # @api public
53
+ # @example Catching the error
54
+ # begin
55
+ # MultiXml.parse('<root/>')
56
+ # rescue MultiXml::NoParserError => e
57
+ # puts "Please install an XML parser gem"
58
+ # end
59
+ class NoParserError < StandardError; end
60
+
61
+ # Raised when an XML type attribute is in the disallowed list
62
+ #
63
+ # By default, 'yaml' and 'symbol' types are disallowed for security reasons.
64
+ #
65
+ # @api public
66
+ # @example Catching a disallowed type error
67
+ # begin
68
+ # MultiXml.parse('<data type="yaml">--- :key</data>')
69
+ # rescue MultiXml::DisallowedTypeError => e
70
+ # puts e.type #=> "yaml"
71
+ # end
72
+ class DisallowedTypeError < StandardError
73
+ # The disallowed type that was encountered
74
+ #
75
+ # @api public
76
+ # @return [String] the type attribute value that was disallowed
77
+ # @example Access the disallowed type
78
+ # error.type #=> "yaml"
79
+ attr_reader :type
80
+
81
+ # Create a new DisallowedTypeError
82
+ #
83
+ # @api public
84
+ # @param type [String] The disallowed type attribute value
85
+ # @return [DisallowedTypeError] the new error instance
86
+ # @example Create a disallowed type error
87
+ # DisallowedTypeError.new("yaml")
88
+ def initialize(type)
89
+ @type = type
90
+ super("Disallowed type attribute: #{type.inspect}")
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,62 @@
1
+ module MultiXml
2
+ # Mixin that provides file-like metadata to StringIO objects
3
+ #
4
+ # Used when parsing base64-encoded file content from XML.
5
+ # Adds original_filename and content_type attributes to StringIO.
6
+ #
7
+ # @api public
8
+ # @example Extending a StringIO
9
+ # io = StringIO.new("file content")
10
+ # io.extend(MultiXml::FileLike)
11
+ # io.original_filename = "document.pdf"
12
+ # io.content_type = "application/pdf"
13
+ module FileLike
14
+ # Default filename when none is specified
15
+ # @api public
16
+ # @return [String] the default filename "untitled"
17
+ DEFAULT_FILENAME = "untitled".freeze
18
+
19
+ # Default content type when none is specified
20
+ # @api public
21
+ # @return [String] the default MIME type "application/octet-stream"
22
+ DEFAULT_CONTENT_TYPE = "application/octet-stream".freeze
23
+
24
+ # Set the original filename
25
+ #
26
+ # @api public
27
+ # @param value [String] The filename to set
28
+ # @return [String] the filename that was set
29
+ # @example Set filename
30
+ # io.original_filename = "report.pdf"
31
+ attr_writer :original_filename
32
+
33
+ # Set the content type
34
+ #
35
+ # @api public
36
+ # @param value [String] The MIME type to set
37
+ # @return [String] the content type that was set
38
+ # @example Set content type
39
+ # io.content_type = "application/pdf"
40
+ attr_writer :content_type
41
+
42
+ # Get the original filename
43
+ #
44
+ # @api public
45
+ # @return [String] the original filename or "untitled" if not set
46
+ # @example Get filename
47
+ # io.original_filename #=> "document.pdf"
48
+ def original_filename
49
+ @original_filename || DEFAULT_FILENAME
50
+ end
51
+
52
+ # Get the content type
53
+ #
54
+ # @api public
55
+ # @return [String] the content type or "application/octet-stream" if not set
56
+ # @example Get content type
57
+ # io.content_type #=> "application/pdf"
58
+ def content_type
59
+ @content_type || DEFAULT_CONTENT_TYPE
60
+ end
61
+ end
62
+ end