moxml 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +117 -66
- data/Gemfile +1 -0
- data/README.adoc +11 -9
- data/Rakefile +3 -1
- data/docs/_pages/configuration.adoc +22 -19
- data/docs/_tutorials/namespace-handling.adoc +5 -5
- data/lib/moxml/adapter/base.rb +8 -3
- data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
- data/lib/moxml/adapter/customized_libxml.rb +18 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +2 -2
- data/lib/moxml/adapter/customized_oga.rb +10 -0
- data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
- data/lib/moxml/adapter/customized_ox.rb +12 -0
- data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +2 -0
- data/lib/moxml/adapter/customized_rexml.rb +11 -0
- data/lib/moxml/adapter/headed_ox.rb +9 -3
- data/lib/moxml/adapter/libxml.rb +76 -62
- data/lib/moxml/adapter/nokogiri.rb +4 -5
- data/lib/moxml/adapter/oga.rb +50 -26
- data/lib/moxml/adapter/ox.rb +189 -41
- data/lib/moxml/adapter/rexml.rb +27 -8
- data/lib/moxml/attribute.rb +3 -0
- data/lib/moxml/builder.rb +1 -0
- data/lib/moxml/config.rb +7 -7
- data/lib/moxml/document.rb +5 -1
- data/lib/moxml/document_builder.rb +37 -31
- data/lib/moxml/element.rb +13 -5
- data/lib/moxml/entity_registry.rb +36 -0
- data/lib/moxml/node.rb +23 -2
- data/lib/moxml/node_set.rb +43 -15
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -1
- data/spec/integration/shared_examples/edge_cases.rb +3 -0
- data/spec/moxml/adapter/oga_spec.rb +62 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
- data/spec/moxml/allocation_benchmark_spec.rb +96 -0
- data/spec/moxml/allocation_guard_spec.rb +282 -0
- data/spec/moxml/builder_spec.rb +22 -0
- data/spec/moxml/config_spec.rb +11 -11
- data/spec/moxml/doctype_spec.rb +41 -0
- data/spec/moxml/lazy_parse_spec.rb +115 -0
- data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
- data/spec/moxml/node_cache_spec.rb +110 -0
- data/spec/moxml/node_set_cache_spec.rb +90 -0
- data/spec/moxml/xml_utils_spec.rb +32 -0
- data/spec/support/allocation_helper.rb +165 -0
- data/spec/support/w3c_namespace_helpers.rb +2 -1
- metadata +15 -2
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
require "support/allocation_helper"
|
|
5
|
+
|
|
6
|
+
# NodeSet wrap caching correctness tests — these run in CI by default.
|
|
7
|
+
# Verifies that NodeSet per-index wrap caching works correctly across adapters.
|
|
8
|
+
RSpec.describe "Moxml NodeSet wrap caching" do
|
|
9
|
+
shared_examples "cached NodeSet wraps" do |adapter_name|
|
|
10
|
+
let(:ctx) { Moxml::Context.new(adapter_name) }
|
|
11
|
+
let(:xml) { "<root><a/><b/><c/></root>" }
|
|
12
|
+
let(:doc) { ctx.parse(xml) }
|
|
13
|
+
|
|
14
|
+
describe "NodeSet#each caching" do
|
|
15
|
+
it "returns the same wrapper object on repeated iteration" do
|
|
16
|
+
root = doc.root
|
|
17
|
+
first_pass = root.children.to_a
|
|
18
|
+
second_pass = root.children.to_a
|
|
19
|
+
# Since children itself is cached, the same NodeSet is returned.
|
|
20
|
+
# Within that NodeSet, wrapped nodes should be cached.
|
|
21
|
+
first_pass.each_with_index do |node, i|
|
|
22
|
+
expect(node).to equal(second_pass[i])
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it "returns consistent node names" do
|
|
27
|
+
children = doc.root.children
|
|
28
|
+
names = children.map(&:name)
|
|
29
|
+
expect(names).to eq(%w[a b c])
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
describe "NodeSet#[] caching" do
|
|
34
|
+
it "returns the same wrapper for the same index" do
|
|
35
|
+
children = doc.root.children
|
|
36
|
+
first = children[0]
|
|
37
|
+
second = children[0]
|
|
38
|
+
expect(first).to equal(second)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it "returns the same wrapper from #[] as from #each" do
|
|
42
|
+
children = doc.root.children
|
|
43
|
+
from_each = children.to_a[1]
|
|
44
|
+
from_index = children[1]
|
|
45
|
+
expect(from_each).to equal(from_index)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
describe "NodeSet#first/#last caching" do
|
|
50
|
+
it "returns the same wrapper from #first as from #[0]" do
|
|
51
|
+
children = doc.root.children
|
|
52
|
+
expect(children.first).to equal(children[0])
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "returns the same wrapper from #last as from #[-1]" do
|
|
56
|
+
children = doc.root.children
|
|
57
|
+
last_idx = children.size - 1
|
|
58
|
+
expect(children.last).to equal(children[last_idx])
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe "NodeSet mutation" do
|
|
63
|
+
it "appends to cache correctly" do
|
|
64
|
+
ns = doc.root.children
|
|
65
|
+
initial_size = ns.size
|
|
66
|
+
ns << ctx.parse("<d/>").root
|
|
67
|
+
expect(ns.size).to eq(initial_size + 1)
|
|
68
|
+
expect(ns[initial_size].name).to eq("d")
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it "deletes from cache correctly" do
|
|
72
|
+
ns = doc.root.children
|
|
73
|
+
first_child = ns[0]
|
|
74
|
+
ns.delete(first_child)
|
|
75
|
+
expect(ns.size).to eq(2)
|
|
76
|
+
expect(ns[0].name).to eq("b")
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
AllocationHelper::GUARDED_ADAPTERS.each do |adapter_name|
|
|
82
|
+
describe "#{adapter_name} adapter" do
|
|
83
|
+
before(:all) do
|
|
84
|
+
skip("#{adapter_name} adapter not available") unless AllocationHelper.adapter_available?(adapter_name)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it_behaves_like "cached NodeSet wraps", adapter_name
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -46,4 +46,36 @@ RSpec.describe Moxml::XmlUtils do
|
|
|
46
46
|
"Invalid XML element name: 123invalid")
|
|
47
47
|
end
|
|
48
48
|
end
|
|
49
|
+
|
|
50
|
+
describe "#validate_prefix" do
|
|
51
|
+
it "accepts valid NCName prefixes" do
|
|
52
|
+
expect { utils.validate_prefix("xs") }.not_to raise_error
|
|
53
|
+
expect { utils.validate_prefix("my-ns") }.not_to raise_error
|
|
54
|
+
expect { utils.validate_prefix("a1") }.not_to raise_error
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it "accepts prefixes containing dots (valid NCName NameChar)" do
|
|
58
|
+
expect { utils.validate_prefix("abc_1.0") }.not_to raise_error
|
|
59
|
+
expect { utils.validate_prefix("xmlns_1.0") }.not_to raise_error
|
|
60
|
+
expect { utils.validate_prefix("v2.0.1") }.not_to raise_error
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "rejects prefixes starting with a digit" do
|
|
64
|
+
expect do
|
|
65
|
+
utils.validate_prefix("1abc")
|
|
66
|
+
end.to raise_error(Moxml::ValidationError, /Invalid namespace prefix/)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "rejects prefixes containing colons" do
|
|
70
|
+
expect do
|
|
71
|
+
utils.validate_prefix("a:b")
|
|
72
|
+
end.to raise_error(Moxml::ValidationError, /Invalid namespace prefix/)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it "rejects empty prefixes" do
|
|
76
|
+
expect do
|
|
77
|
+
utils.validate_prefix("")
|
|
78
|
+
end.to raise_error(Moxml::ValidationError, /Invalid namespace prefix/)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
49
81
|
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
# Shared helper for allocation guard specs.
|
|
6
|
+
#
|
|
7
|
+
# Provides:
|
|
8
|
+
# - Precise allocation counting via GC.stat
|
|
9
|
+
# - Per-adapter threshold configuration
|
|
10
|
+
# - Adapter availability checks
|
|
11
|
+
# - Optional StackProf diagnostic on guard failure
|
|
12
|
+
module AllocationHelper
|
|
13
|
+
# Adapters to guard in CI (ordered by importance).
|
|
14
|
+
# Skip REXML/LibXML — not used in production.
|
|
15
|
+
GUARDED_ADAPTERS = %i[nokogiri ox headed_ox oga].freeze
|
|
16
|
+
|
|
17
|
+
# Per-adapter allocation thresholds.
|
|
18
|
+
# Format: { operation => { adapter => max_allocations } }
|
|
19
|
+
#
|
|
20
|
+
# Thresholds calibrated at ~2x measured baseline (2026-04-18).
|
|
21
|
+
# All lazy-parse adapters (nokogiri, ox, headed_ox) share similar profiles.
|
|
22
|
+
# OGA is pure Ruby so naturally allocates more.
|
|
23
|
+
THRESHOLDS = {
|
|
24
|
+
# Parse a 100-element document (no subsequent access).
|
|
25
|
+
# Measured: nokogiri=299, ox=1003, headed_ox=1001, oga=8732
|
|
26
|
+
parse_100: {
|
|
27
|
+
nokogiri: 600,
|
|
28
|
+
ox: 2500,
|
|
29
|
+
headed_ox: 2500,
|
|
30
|
+
oga: 18_000,
|
|
31
|
+
},
|
|
32
|
+
# Parse a 50-element document.
|
|
33
|
+
# Measured: nokogiri=148, ox=501, headed_ox=501, oga=4365
|
|
34
|
+
parse_50: {
|
|
35
|
+
nokogiri: 300,
|
|
36
|
+
ox: 1200,
|
|
37
|
+
headed_ox: 1200,
|
|
38
|
+
oga: 9000,
|
|
39
|
+
},
|
|
40
|
+
# Access root.name after parse (lazy wrapping overhead).
|
|
41
|
+
# Measured: nokogiri=317, ox=1013, headed_ox=1009, oga=8673
|
|
42
|
+
parse_and_root: {
|
|
43
|
+
nokogiri: 700,
|
|
44
|
+
ox: 2500,
|
|
45
|
+
headed_ox: 2500,
|
|
46
|
+
oga: 18_000,
|
|
47
|
+
},
|
|
48
|
+
# First access to children (NodeSet construction).
|
|
49
|
+
first_children_access: {
|
|
50
|
+
nokogiri: 200,
|
|
51
|
+
ox: 200,
|
|
52
|
+
headed_ox: 200,
|
|
53
|
+
oga: 300,
|
|
54
|
+
},
|
|
55
|
+
# Second access to children (should be ~0 — cached).
|
|
56
|
+
# Measured: all adapters = 1-3
|
|
57
|
+
cached_children_access: {
|
|
58
|
+
nokogiri: 5,
|
|
59
|
+
ox: 5,
|
|
60
|
+
headed_ox: 5,
|
|
61
|
+
oga: 5,
|
|
62
|
+
},
|
|
63
|
+
# Second access to attributes (should be ~0 — cached).
|
|
64
|
+
# Measured: all adapters = 1
|
|
65
|
+
cached_attributes_access: {
|
|
66
|
+
nokogiri: 5,
|
|
67
|
+
ox: 5,
|
|
68
|
+
headed_ox: 5,
|
|
69
|
+
oga: 5,
|
|
70
|
+
},
|
|
71
|
+
# Second iteration of NodeSet (wrap cache hit).
|
|
72
|
+
# Measured: all adapters = 2
|
|
73
|
+
cached_iteration: {
|
|
74
|
+
nokogiri: 10,
|
|
75
|
+
ox: 10,
|
|
76
|
+
headed_ox: 10,
|
|
77
|
+
oga: 10,
|
|
78
|
+
},
|
|
79
|
+
# Parse + serialize round-trip (50 elements).
|
|
80
|
+
# Measured: nokogiri=222, ox=893, headed_ox=882, oga=9523
|
|
81
|
+
round_trip: {
|
|
82
|
+
nokogiri: 500,
|
|
83
|
+
ox: 2000,
|
|
84
|
+
headed_ox: 2000,
|
|
85
|
+
oga: 20_000,
|
|
86
|
+
},
|
|
87
|
+
# Ratio of allocations for 200-element vs 100-element parse.
|
|
88
|
+
# Must be <= max (linear growth). Quadratic would be > 4x.
|
|
89
|
+
# Measured: nokogiri=2.01, ox=2.0, headed_ox=2.0, oga=1.99
|
|
90
|
+
scalability_ratio: {
|
|
91
|
+
nokogiri: 2.5,
|
|
92
|
+
ox: 2.5,
|
|
93
|
+
headed_ox: 2.5,
|
|
94
|
+
oga: 2.5,
|
|
95
|
+
},
|
|
96
|
+
}.freeze
|
|
97
|
+
|
|
98
|
+
class << self
|
|
99
|
+
# Count object allocations during a block.
|
|
100
|
+
# Uses GC.stat[:total_allocated_objects] for precision.
|
|
101
|
+
def count_allocations
|
|
102
|
+
GC.start
|
|
103
|
+
GC.disable
|
|
104
|
+
before = GC.stat[:total_allocated_objects] || ObjectSpace.count_objects[:TOTAL]
|
|
105
|
+
result = yield
|
|
106
|
+
after = GC.stat[:total_allocated_objects] || ObjectSpace.count_objects[:TOTAL]
|
|
107
|
+
after - before
|
|
108
|
+
ensure
|
|
109
|
+
GC.enable
|
|
110
|
+
result
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Check if an adapter is available for testing.
|
|
114
|
+
def adapter_available?(adapter_name)
|
|
115
|
+
ctx = Moxml::Context.new(adapter_name)
|
|
116
|
+
ctx.parse("<root/>")
|
|
117
|
+
true
|
|
118
|
+
rescue StandardError
|
|
119
|
+
false
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Get the allocation threshold for an adapter + operation.
|
|
123
|
+
def threshold(adapter_name, operation)
|
|
124
|
+
THRESHOLDS.dig(operation, adapter_name) ||
|
|
125
|
+
raise(ArgumentError, "No threshold for #{adapter_name}/#{operation}")
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Run StackProf and return top hotspots as a diagnostic string.
|
|
129
|
+
# Tries :obj mode first (allocation profiling), falls back to :wall.
|
|
130
|
+
def profile_allocations(&block)
|
|
131
|
+
require "stackprof"
|
|
132
|
+
|
|
133
|
+
# :obj mode tracks object allocations but requires platform support.
|
|
134
|
+
# :wall mode tracks wall-clock time — less precise but always available.
|
|
135
|
+
result = begin
|
|
136
|
+
StackProf.run(mode: :obj, &block)
|
|
137
|
+
rescue ArgumentError
|
|
138
|
+
StackProf.run(mode: :wall, &block)
|
|
139
|
+
end
|
|
140
|
+
return nil unless result
|
|
141
|
+
|
|
142
|
+
frames = result[:frames]
|
|
143
|
+
total_samples = result[:samples]
|
|
144
|
+
|
|
145
|
+
hotspots = frames.sort_by { |_, f| -f[:samples] }.first(10)
|
|
146
|
+
lines = ["StackProf hotspot (#{total_samples} total samples):"]
|
|
147
|
+
hotspots.each do |name, frame|
|
|
148
|
+
pct = (frame[:samples].to_f / total_samples * 100).round(1)
|
|
149
|
+
lines << " #{pct}% #{name} (#{frame[:samples]} samples)"
|
|
150
|
+
end
|
|
151
|
+
lines.join("\n")
|
|
152
|
+
rescue LoadError
|
|
153
|
+
"StackProf not available — add gem 'stackprof' to Gemfile"
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Generate a test XML document with N elements.
|
|
159
|
+
# Each element has 2 attributes and nested text content.
|
|
160
|
+
def generate_xml(element_count)
|
|
161
|
+
inner = Array.new(element_count) do |i|
|
|
162
|
+
"<elem#{i % 10} id=\"#{i}\" type=\"t#{i % 3}\">text#{i}</elem#{i % 10}>"
|
|
163
|
+
end.join
|
|
164
|
+
"<root>#{inner}</root>"
|
|
165
|
+
end
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
require "rexml/document"
|
|
4
4
|
|
|
5
|
-
W3C_NS_FIXTURES_DIR = File.expand_path("../fixtures/w3c/namespaces/1.0",
|
|
5
|
+
W3C_NS_FIXTURES_DIR = File.expand_path("../fixtures/w3c/namespaces/1.0",
|
|
6
|
+
__dir__)
|
|
6
7
|
|
|
7
8
|
# Parse the test catalog to get test metadata
|
|
8
9
|
def load_w3c_namespace_tests
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: moxml
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.15
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: |
|
|
14
14
|
Moxml is a unified XML manipulation library that provides a common API
|
|
@@ -99,18 +99,25 @@ files:
|
|
|
99
99
|
- lib/moxml.rb
|
|
100
100
|
- lib/moxml/adapter.rb
|
|
101
101
|
- lib/moxml/adapter/base.rb
|
|
102
|
+
- lib/moxml/adapter/customized_libxml.rb
|
|
102
103
|
- lib/moxml/adapter/customized_libxml/cdata.rb
|
|
103
104
|
- lib/moxml/adapter/customized_libxml/comment.rb
|
|
104
105
|
- lib/moxml/adapter/customized_libxml/declaration.rb
|
|
105
106
|
- lib/moxml/adapter/customized_libxml/element.rb
|
|
107
|
+
- lib/moxml/adapter/customized_libxml/entity_reference.rb
|
|
106
108
|
- lib/moxml/adapter/customized_libxml/node.rb
|
|
107
109
|
- lib/moxml/adapter/customized_libxml/processing_instruction.rb
|
|
108
110
|
- lib/moxml/adapter/customized_libxml/text.rb
|
|
111
|
+
- lib/moxml/adapter/customized_oga.rb
|
|
109
112
|
- lib/moxml/adapter/customized_oga/xml_declaration.rb
|
|
110
113
|
- lib/moxml/adapter/customized_oga/xml_generator.rb
|
|
114
|
+
- lib/moxml/adapter/customized_ox.rb
|
|
111
115
|
- lib/moxml/adapter/customized_ox/attribute.rb
|
|
116
|
+
- lib/moxml/adapter/customized_ox/entity_reference.rb
|
|
112
117
|
- lib/moxml/adapter/customized_ox/namespace.rb
|
|
113
118
|
- lib/moxml/adapter/customized_ox/text.rb
|
|
119
|
+
- lib/moxml/adapter/customized_rexml.rb
|
|
120
|
+
- lib/moxml/adapter/customized_rexml/entity_reference.rb
|
|
114
121
|
- lib/moxml/adapter/customized_rexml/formatter.rb
|
|
115
122
|
- lib/moxml/adapter/headed_ox.rb
|
|
116
123
|
- lib/moxml/adapter/libxml.rb
|
|
@@ -290,6 +297,8 @@ files:
|
|
|
290
297
|
- spec/moxml/adapter/shared_examples/.gitkeep
|
|
291
298
|
- spec/moxml/adapter/shared_examples/adapter_contract.rb
|
|
292
299
|
- spec/moxml/adapter_spec.rb
|
|
300
|
+
- spec/moxml/allocation_benchmark_spec.rb
|
|
301
|
+
- spec/moxml/allocation_guard_spec.rb
|
|
293
302
|
- spec/moxml/attribute_spec.rb
|
|
294
303
|
- spec/moxml/builder_spec.rb
|
|
295
304
|
- spec/moxml/cdata_spec.rb
|
|
@@ -304,9 +313,12 @@ files:
|
|
|
304
313
|
- spec/moxml/element_spec.rb
|
|
305
314
|
- spec/moxml/entity_registry_spec.rb
|
|
306
315
|
- spec/moxml/error_spec.rb
|
|
316
|
+
- spec/moxml/lazy_parse_spec.rb
|
|
307
317
|
- spec/moxml/moxml_spec.rb
|
|
308
318
|
- spec/moxml/namespace_spec.rb
|
|
309
319
|
- spec/moxml/namespace_uri_validation_spec.rb
|
|
320
|
+
- spec/moxml/node_cache_spec.rb
|
|
321
|
+
- spec/moxml/node_set_cache_spec.rb
|
|
310
322
|
- spec/moxml/node_set_spec.rb
|
|
311
323
|
- spec/moxml/node_spec.rb
|
|
312
324
|
- spec/moxml/processing_instruction_spec.rb
|
|
@@ -342,6 +354,7 @@ files:
|
|
|
342
354
|
- spec/performance/thread_safety_spec.rb
|
|
343
355
|
- spec/performance/xpath_benchmark_spec.rb
|
|
344
356
|
- spec/spec_helper.rb
|
|
357
|
+
- spec/support/allocation_helper.rb
|
|
345
358
|
- spec/support/w3c_namespace_helpers.rb
|
|
346
359
|
- spec/support/xml_matchers.rb
|
|
347
360
|
- spec/unit/rexml_isolated_test.rb
|