moxml 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +117 -66
  3. data/Gemfile +1 -0
  4. data/README.adoc +11 -9
  5. data/Rakefile +3 -1
  6. data/docs/_pages/configuration.adoc +22 -19
  7. data/docs/_tutorials/namespace-handling.adoc +5 -5
  8. data/lib/moxml/adapter/base.rb +8 -3
  9. data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
  10. data/lib/moxml/adapter/customized_libxml.rb +18 -0
  11. data/lib/moxml/adapter/customized_oga/xml_generator.rb +2 -2
  12. data/lib/moxml/adapter/customized_oga.rb +10 -0
  13. data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
  14. data/lib/moxml/adapter/customized_ox.rb +12 -0
  15. data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
  16. data/lib/moxml/adapter/customized_rexml/formatter.rb +2 -0
  17. data/lib/moxml/adapter/customized_rexml.rb +11 -0
  18. data/lib/moxml/adapter/headed_ox.rb +9 -3
  19. data/lib/moxml/adapter/libxml.rb +76 -62
  20. data/lib/moxml/adapter/nokogiri.rb +4 -5
  21. data/lib/moxml/adapter/oga.rb +50 -26
  22. data/lib/moxml/adapter/ox.rb +189 -41
  23. data/lib/moxml/adapter/rexml.rb +27 -8
  24. data/lib/moxml/attribute.rb +3 -0
  25. data/lib/moxml/builder.rb +1 -0
  26. data/lib/moxml/config.rb +7 -7
  27. data/lib/moxml/document.rb +5 -1
  28. data/lib/moxml/document_builder.rb +37 -31
  29. data/lib/moxml/element.rb +13 -5
  30. data/lib/moxml/entity_registry.rb +36 -0
  31. data/lib/moxml/node.rb +23 -2
  32. data/lib/moxml/node_set.rb +43 -15
  33. data/lib/moxml/version.rb +1 -1
  34. data/lib/moxml/xml_utils.rb +1 -1
  35. data/spec/integration/shared_examples/edge_cases.rb +3 -0
  36. data/spec/moxml/adapter/oga_spec.rb +62 -0
  37. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
  38. data/spec/moxml/allocation_benchmark_spec.rb +96 -0
  39. data/spec/moxml/allocation_guard_spec.rb +282 -0
  40. data/spec/moxml/builder_spec.rb +22 -0
  41. data/spec/moxml/config_spec.rb +11 -11
  42. data/spec/moxml/doctype_spec.rb +41 -0
  43. data/spec/moxml/lazy_parse_spec.rb +115 -0
  44. data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
  45. data/spec/moxml/node_cache_spec.rb +110 -0
  46. data/spec/moxml/node_set_cache_spec.rb +90 -0
  47. data/spec/moxml/xml_utils_spec.rb +32 -0
  48. data/spec/support/allocation_helper.rb +165 -0
  49. data/spec/support/w3c_namespace_helpers.rb +2 -1
  50. metadata +15 -2
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+ require "support/allocation_helper"
5
+
6
+ # NodeSet wrap caching correctness tests — these run in CI by default.
7
+ # Verifies that NodeSet per-index wrap caching works correctly across adapters.
8
+ RSpec.describe "Moxml NodeSet wrap caching" do
9
+ shared_examples "cached NodeSet wraps" do |adapter_name|
10
+ let(:ctx) { Moxml::Context.new(adapter_name) }
11
+ let(:xml) { "<root><a/><b/><c/></root>" }
12
+ let(:doc) { ctx.parse(xml) }
13
+
14
+ describe "NodeSet#each caching" do
15
+ it "returns the same wrapper object on repeated iteration" do
16
+ root = doc.root
17
+ first_pass = root.children.to_a
18
+ second_pass = root.children.to_a
19
+ # Since children itself is cached, the same NodeSet is returned.
20
+ # Within that NodeSet, wrapped nodes should be cached.
21
+ first_pass.each_with_index do |node, i|
22
+ expect(node).to equal(second_pass[i])
23
+ end
24
+ end
25
+
26
+ it "returns consistent node names" do
27
+ children = doc.root.children
28
+ names = children.map(&:name)
29
+ expect(names).to eq(%w[a b c])
30
+ end
31
+ end
32
+
33
+ describe "NodeSet#[] caching" do
34
+ it "returns the same wrapper for the same index" do
35
+ children = doc.root.children
36
+ first = children[0]
37
+ second = children[0]
38
+ expect(first).to equal(second)
39
+ end
40
+
41
+ it "returns the same wrapper from #[] as from #each" do
42
+ children = doc.root.children
43
+ from_each = children.to_a[1]
44
+ from_index = children[1]
45
+ expect(from_each).to equal(from_index)
46
+ end
47
+ end
48
+
49
+ describe "NodeSet#first/#last caching" do
50
+ it "returns the same wrapper from #first as from #[0]" do
51
+ children = doc.root.children
52
+ expect(children.first).to equal(children[0])
53
+ end
54
+
55
+ it "returns the same wrapper from #last as from #[-1]" do
56
+ children = doc.root.children
57
+ last_idx = children.size - 1
58
+ expect(children.last).to equal(children[last_idx])
59
+ end
60
+ end
61
+
62
+ describe "NodeSet mutation" do
63
+ it "appends to cache correctly" do
64
+ ns = doc.root.children
65
+ initial_size = ns.size
66
+ ns << ctx.parse("<d/>").root
67
+ expect(ns.size).to eq(initial_size + 1)
68
+ expect(ns[initial_size].name).to eq("d")
69
+ end
70
+
71
+ it "deletes from cache correctly" do
72
+ ns = doc.root.children
73
+ first_child = ns[0]
74
+ ns.delete(first_child)
75
+ expect(ns.size).to eq(2)
76
+ expect(ns[0].name).to eq("b")
77
+ end
78
+ end
79
+ end
80
+
81
+ AllocationHelper::GUARDED_ADAPTERS.each do |adapter_name|
82
+ describe "#{adapter_name} adapter" do
83
+ before(:all) do
84
+ skip("#{adapter_name} adapter not available") unless AllocationHelper.adapter_available?(adapter_name)
85
+ end
86
+
87
+ it_behaves_like "cached NodeSet wraps", adapter_name
88
+ end
89
+ end
90
+ end
@@ -46,4 +46,36 @@ RSpec.describe Moxml::XmlUtils do
46
46
  "Invalid XML element name: 123invalid")
47
47
  end
48
48
  end
49
+
50
+ describe "#validate_prefix" do
51
+ it "accepts valid NCName prefixes" do
52
+ expect { utils.validate_prefix("xs") }.not_to raise_error
53
+ expect { utils.validate_prefix("my-ns") }.not_to raise_error
54
+ expect { utils.validate_prefix("a1") }.not_to raise_error
55
+ end
56
+
57
+ it "accepts prefixes containing dots (valid NCName NameChar)" do
58
+ expect { utils.validate_prefix("abc_1.0") }.not_to raise_error
59
+ expect { utils.validate_prefix("xmlns_1.0") }.not_to raise_error
60
+ expect { utils.validate_prefix("v2.0.1") }.not_to raise_error
61
+ end
62
+
63
+ it "rejects prefixes starting with a digit" do
64
+ expect do
65
+ utils.validate_prefix("1abc")
66
+ end.to raise_error(Moxml::ValidationError, /Invalid namespace prefix/)
67
+ end
68
+
69
+ it "rejects prefixes containing colons" do
70
+ expect do
71
+ utils.validate_prefix("a:b")
72
+ end.to raise_error(Moxml::ValidationError, /Invalid namespace prefix/)
73
+ end
74
+
75
+ it "rejects empty prefixes" do
76
+ expect do
77
+ utils.validate_prefix("")
78
+ end.to raise_error(Moxml::ValidationError, /Invalid namespace prefix/)
79
+ end
80
+ end
49
81
  end
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ # Shared helper for allocation guard specs.
6
+ #
7
+ # Provides:
8
+ # - Precise allocation counting via GC.stat
9
+ # - Per-adapter threshold configuration
10
+ # - Adapter availability checks
11
+ # - Optional StackProf diagnostic on guard failure
12
+ module AllocationHelper
13
+ # Adapters to guard in CI (ordered by importance).
14
+ # Skip REXML/LibXML — not used in production.
15
+ GUARDED_ADAPTERS = %i[nokogiri ox headed_ox oga].freeze
16
+
17
+ # Per-adapter allocation thresholds.
18
+ # Format: { operation => { adapter => max_allocations } }
19
+ #
20
+ # Thresholds calibrated at ~2x measured baseline (2026-04-18).
21
+ # All lazy-parse adapters (nokogiri, ox, headed_ox) share similar profiles.
22
+ # OGA is pure Ruby so naturally allocates more.
23
+ THRESHOLDS = {
24
+ # Parse a 100-element document (no subsequent access).
25
+ # Measured: nokogiri=299, ox=1003, headed_ox=1001, oga=8732
26
+ parse_100: {
27
+ nokogiri: 600,
28
+ ox: 2500,
29
+ headed_ox: 2500,
30
+ oga: 18_000,
31
+ },
32
+ # Parse a 50-element document.
33
+ # Measured: nokogiri=148, ox=501, headed_ox=501, oga=4365
34
+ parse_50: {
35
+ nokogiri: 300,
36
+ ox: 1200,
37
+ headed_ox: 1200,
38
+ oga: 9000,
39
+ },
40
+ # Access root.name after parse (lazy wrapping overhead).
41
+ # Measured: nokogiri=317, ox=1013, headed_ox=1009, oga=8673
42
+ parse_and_root: {
43
+ nokogiri: 700,
44
+ ox: 2500,
45
+ headed_ox: 2500,
46
+ oga: 18_000,
47
+ },
48
+ # First access to children (NodeSet construction).
49
+ first_children_access: {
50
+ nokogiri: 200,
51
+ ox: 200,
52
+ headed_ox: 200,
53
+ oga: 300,
54
+ },
55
+ # Second access to children (should be ~0 — cached).
56
+ # Measured: all adapters = 1-3
57
+ cached_children_access: {
58
+ nokogiri: 5,
59
+ ox: 5,
60
+ headed_ox: 5,
61
+ oga: 5,
62
+ },
63
+ # Second access to attributes (should be ~0 — cached).
64
+ # Measured: all adapters = 1
65
+ cached_attributes_access: {
66
+ nokogiri: 5,
67
+ ox: 5,
68
+ headed_ox: 5,
69
+ oga: 5,
70
+ },
71
+ # Second iteration of NodeSet (wrap cache hit).
72
+ # Measured: all adapters = 2
73
+ cached_iteration: {
74
+ nokogiri: 10,
75
+ ox: 10,
76
+ headed_ox: 10,
77
+ oga: 10,
78
+ },
79
+ # Parse + serialize round-trip (50 elements).
80
+ # Measured: nokogiri=222, ox=893, headed_ox=882, oga=9523
81
+ round_trip: {
82
+ nokogiri: 500,
83
+ ox: 2000,
84
+ headed_ox: 2000,
85
+ oga: 20_000,
86
+ },
87
+ # Ratio of allocations for 200-element vs 100-element parse.
88
+ # Must be <= max (linear growth). Quadratic would be > 4x.
89
+ # Measured: nokogiri=2.01, ox=2.0, headed_ox=2.0, oga=1.99
90
+ scalability_ratio: {
91
+ nokogiri: 2.5,
92
+ ox: 2.5,
93
+ headed_ox: 2.5,
94
+ oga: 2.5,
95
+ },
96
+ }.freeze
97
+
98
+ class << self
99
+ # Count object allocations during a block.
100
+ # Uses GC.stat[:total_allocated_objects] for precision.
101
+ def count_allocations
102
+ GC.start
103
+ GC.disable
104
+ before = GC.stat[:total_allocated_objects] || ObjectSpace.count_objects[:TOTAL]
105
+ result = yield
106
+ after = GC.stat[:total_allocated_objects] || ObjectSpace.count_objects[:TOTAL]
107
+ after - before
108
+ ensure
109
+ GC.enable
110
+ result
111
+ end
112
+
113
+ # Check if an adapter is available for testing.
114
+ def adapter_available?(adapter_name)
115
+ ctx = Moxml::Context.new(adapter_name)
116
+ ctx.parse("<root/>")
117
+ true
118
+ rescue StandardError
119
+ false
120
+ end
121
+
122
+ # Get the allocation threshold for an adapter + operation.
123
+ def threshold(adapter_name, operation)
124
+ THRESHOLDS.dig(operation, adapter_name) ||
125
+ raise(ArgumentError, "No threshold for #{adapter_name}/#{operation}")
126
+ end
127
+
128
+ # Run StackProf and return top hotspots as a diagnostic string.
129
+ # Tries :obj mode first (allocation profiling), falls back to :wall.
130
+ def profile_allocations(&block)
131
+ require "stackprof"
132
+
133
+ # :obj mode tracks object allocations but requires platform support.
134
+ # :wall mode tracks wall-clock time — less precise but always available.
135
+ result = begin
136
+ StackProf.run(mode: :obj, &block)
137
+ rescue ArgumentError
138
+ StackProf.run(mode: :wall, &block)
139
+ end
140
+ return nil unless result
141
+
142
+ frames = result[:frames]
143
+ total_samples = result[:samples]
144
+
145
+ hotspots = frames.sort_by { |_, f| -f[:samples] }.first(10)
146
+ lines = ["StackProf hotspot (#{total_samples} total samples):"]
147
+ hotspots.each do |name, frame|
148
+ pct = (frame[:samples].to_f / total_samples * 100).round(1)
149
+ lines << " #{pct}% #{name} (#{frame[:samples]} samples)"
150
+ end
151
+ lines.join("\n")
152
+ rescue LoadError
153
+ "StackProf not available — add gem 'stackprof' to Gemfile"
154
+ end
155
+ end
156
+ end
157
+
158
+ # Generate a test XML document with N elements.
159
+ # Each element has 2 attributes and nested text content.
160
+ def generate_xml(element_count)
161
+ inner = Array.new(element_count) do |i|
162
+ "<elem#{i % 10} id=\"#{i}\" type=\"t#{i % 3}\">text#{i}</elem#{i % 10}>"
163
+ end.join
164
+ "<root>#{inner}</root>"
165
+ end
@@ -2,7 +2,8 @@
2
2
 
3
3
  require "rexml/document"
4
4
 
5
- W3C_NS_FIXTURES_DIR = File.expand_path("../fixtures/w3c/namespaces/1.0", __dir__)
5
+ W3C_NS_FIXTURES_DIR = File.expand_path("../fixtures/w3c/namespaces/1.0",
6
+ __dir__)
6
7
 
7
8
  # Parse the test catalog to get test metadata
8
9
  def load_w3c_namespace_tests
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-04-20 00:00:00.000000000 Z
11
+ date: 2026-04-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  Moxml is a unified XML manipulation library that provides a common API
@@ -99,18 +99,25 @@ files:
99
99
  - lib/moxml.rb
100
100
  - lib/moxml/adapter.rb
101
101
  - lib/moxml/adapter/base.rb
102
+ - lib/moxml/adapter/customized_libxml.rb
102
103
  - lib/moxml/adapter/customized_libxml/cdata.rb
103
104
  - lib/moxml/adapter/customized_libxml/comment.rb
104
105
  - lib/moxml/adapter/customized_libxml/declaration.rb
105
106
  - lib/moxml/adapter/customized_libxml/element.rb
107
+ - lib/moxml/adapter/customized_libxml/entity_reference.rb
106
108
  - lib/moxml/adapter/customized_libxml/node.rb
107
109
  - lib/moxml/adapter/customized_libxml/processing_instruction.rb
108
110
  - lib/moxml/adapter/customized_libxml/text.rb
111
+ - lib/moxml/adapter/customized_oga.rb
109
112
  - lib/moxml/adapter/customized_oga/xml_declaration.rb
110
113
  - lib/moxml/adapter/customized_oga/xml_generator.rb
114
+ - lib/moxml/adapter/customized_ox.rb
111
115
  - lib/moxml/adapter/customized_ox/attribute.rb
116
+ - lib/moxml/adapter/customized_ox/entity_reference.rb
112
117
  - lib/moxml/adapter/customized_ox/namespace.rb
113
118
  - lib/moxml/adapter/customized_ox/text.rb
119
+ - lib/moxml/adapter/customized_rexml.rb
120
+ - lib/moxml/adapter/customized_rexml/entity_reference.rb
114
121
  - lib/moxml/adapter/customized_rexml/formatter.rb
115
122
  - lib/moxml/adapter/headed_ox.rb
116
123
  - lib/moxml/adapter/libxml.rb
@@ -290,6 +297,8 @@ files:
290
297
  - spec/moxml/adapter/shared_examples/.gitkeep
291
298
  - spec/moxml/adapter/shared_examples/adapter_contract.rb
292
299
  - spec/moxml/adapter_spec.rb
300
+ - spec/moxml/allocation_benchmark_spec.rb
301
+ - spec/moxml/allocation_guard_spec.rb
293
302
  - spec/moxml/attribute_spec.rb
294
303
  - spec/moxml/builder_spec.rb
295
304
  - spec/moxml/cdata_spec.rb
@@ -304,9 +313,12 @@ files:
304
313
  - spec/moxml/element_spec.rb
305
314
  - spec/moxml/entity_registry_spec.rb
306
315
  - spec/moxml/error_spec.rb
316
+ - spec/moxml/lazy_parse_spec.rb
307
317
  - spec/moxml/moxml_spec.rb
308
318
  - spec/moxml/namespace_spec.rb
309
319
  - spec/moxml/namespace_uri_validation_spec.rb
320
+ - spec/moxml/node_cache_spec.rb
321
+ - spec/moxml/node_set_cache_spec.rb
310
322
  - spec/moxml/node_set_spec.rb
311
323
  - spec/moxml/node_spec.rb
312
324
  - spec/moxml/processing_instruction_spec.rb
@@ -342,6 +354,7 @@ files:
342
354
  - spec/performance/thread_safety_spec.rb
343
355
  - spec/performance/xpath_benchmark_spec.rb
344
356
  - spec/spec_helper.rb
357
+ - spec/support/allocation_helper.rb
345
358
  - spec/support/w3c_namespace_helpers.rb
346
359
  - spec/support/xml_matchers.rb
347
360
  - spec/unit/rexml_isolated_test.rb