moxml 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +117 -66
  3. data/Gemfile +1 -0
  4. data/README.adoc +11 -9
  5. data/Rakefile +3 -1
  6. data/docs/_pages/configuration.adoc +22 -19
  7. data/docs/_tutorials/namespace-handling.adoc +5 -5
  8. data/lib/moxml/adapter/base.rb +8 -3
  9. data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
  10. data/lib/moxml/adapter/customized_libxml.rb +18 -0
  11. data/lib/moxml/adapter/customized_oga/xml_generator.rb +2 -2
  12. data/lib/moxml/adapter/customized_oga.rb +10 -0
  13. data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
  14. data/lib/moxml/adapter/customized_ox.rb +12 -0
  15. data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
  16. data/lib/moxml/adapter/customized_rexml/formatter.rb +2 -0
  17. data/lib/moxml/adapter/customized_rexml.rb +11 -0
  18. data/lib/moxml/adapter/headed_ox.rb +9 -3
  19. data/lib/moxml/adapter/libxml.rb +76 -62
  20. data/lib/moxml/adapter/nokogiri.rb +4 -5
  21. data/lib/moxml/adapter/oga.rb +50 -26
  22. data/lib/moxml/adapter/ox.rb +189 -41
  23. data/lib/moxml/adapter/rexml.rb +27 -8
  24. data/lib/moxml/attribute.rb +3 -0
  25. data/lib/moxml/builder.rb +1 -0
  26. data/lib/moxml/config.rb +7 -7
  27. data/lib/moxml/document.rb +5 -1
  28. data/lib/moxml/document_builder.rb +37 -31
  29. data/lib/moxml/element.rb +13 -5
  30. data/lib/moxml/entity_registry.rb +36 -0
  31. data/lib/moxml/node.rb +23 -2
  32. data/lib/moxml/node_set.rb +43 -15
  33. data/lib/moxml/version.rb +1 -1
  34. data/lib/moxml/xml_utils.rb +1 -1
  35. data/spec/integration/shared_examples/edge_cases.rb +3 -0
  36. data/spec/moxml/adapter/oga_spec.rb +62 -0
  37. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
  38. data/spec/moxml/allocation_benchmark_spec.rb +96 -0
  39. data/spec/moxml/allocation_guard_spec.rb +282 -0
  40. data/spec/moxml/builder_spec.rb +22 -0
  41. data/spec/moxml/config_spec.rb +11 -11
  42. data/spec/moxml/doctype_spec.rb +41 -0
  43. data/spec/moxml/lazy_parse_spec.rb +115 -0
  44. data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
  45. data/spec/moxml/node_cache_spec.rb +110 -0
  46. data/spec/moxml/node_set_cache_spec.rb +90 -0
  47. data/spec/moxml/xml_utils_spec.rb +32 -0
  48. data/spec/support/allocation_helper.rb +165 -0
  49. data/spec/support/w3c_namespace_helpers.rb +2 -1
  50. metadata +15 -2
@@ -13,15 +13,13 @@ module Moxml
13
13
  @current_doc = context.create_document(native_doc)
14
14
 
15
15
  # Transfer has_declaration flag if present
16
- if native_doc.respond_to?(:instance_variable_get) &&
17
- native_doc.instance_variable_defined?(:@moxml_has_declaration)
16
+ if native_doc.instance_variable_defined?(:@moxml_has_declaration)
18
17
  has_declaration = native_doc.instance_variable_get(:@moxml_has_declaration)
19
18
  @current_doc.has_xml_declaration = has_declaration
20
19
  end
21
20
 
22
21
  # Transfer DOCTYPE from parsed document if it exists
23
- if native_doc.respond_to?(:instance_variable_get) &&
24
- native_doc.instance_variable_defined?(:@moxml_doctype)
22
+ if native_doc.instance_variable_defined?(:@moxml_doctype)
25
23
  doctype = native_doc.instance_variable_get(:@moxml_doctype)
26
24
  if doctype
27
25
  @current_doc.native.instance_variable_set(:@moxml_doctype,
@@ -70,43 +68,54 @@ module Moxml
70
68
  content = adapter.text_content(node)
71
69
 
72
70
  # Check if we should restore entity references for this text
73
- if context.config.restore_entities && content.to_s =~ /[<>&"']/
71
+ if context.config.restore_entities && text_has_restorable_entities?(content)
74
72
  restore_entities_in_text(content)
75
73
  else
76
74
  @node_stack.last&.add_child(Text.new(prepared, context))
77
75
  end
78
76
  end
79
77
 
78
+ def text_has_restorable_entities?(content)
79
+ return false unless content
80
+
81
+ registry = context.entity_registry
82
+ codepoints = registry.restorable_codepoints
83
+ content.each_char do |char|
84
+ return true if codepoints.include?(char.ord)
85
+ end
86
+ false
87
+ end
88
+
80
89
  def restore_entities_in_text(content)
81
90
  parent = @node_stack.last
82
91
  return unless parent
83
92
 
84
- # Characters that should potentially be entity-encoded
85
- # Per W3C XML spec, these characters have special meaning
86
- entity_chars = {
87
- "<" => "lt",
88
- ">" => "gt",
89
- "&" => "amp",
90
- '"' => "quot",
91
- "'" => "apos",
92
- }
93
-
94
- # Process character by character
95
- chars = content.to_s.chars
96
- chars.each do |char|
97
- codepoint = char.ord
98
- entity_name = context.entity_registry.primary_name_for_codepoint(codepoint)
93
+ registry = context.entity_registry
94
+ config = context.config
95
+ buffer = +""
99
96
 
100
- if entity_name && entity_chars.value?(entity_name)
101
- # This character should be an entity reference
102
- entity_node = adapter.create_entity_reference(entity_name)
103
- parent.add_child(EntityReference.new(entity_node, context))
97
+ content.to_s.each_char do |char|
98
+ codepoint = char.ord
99
+ name = registry.primary_name_for_codepoint(codepoint)
100
+
101
+ if name && registry.should_restore?(codepoint, config: config)
102
+ # Flush buffered text before the entity
103
+ unless buffer.empty?
104
+ parent.add_child(Text.new(adapter.create_text(buffer), context))
105
+ buffer.clear
106
+ end
107
+ parent.add_child(
108
+ EntityReference.new(adapter.create_entity_reference(name), context),
109
+ )
104
110
  else
105
- # Regular character
106
- text_node = adapter.create_text(char)
107
- parent.add_child(Text.new(text_node, context))
111
+ buffer << char
108
112
  end
109
113
  end
114
+
115
+ # Flush remaining buffer
116
+ unless buffer.empty?
117
+ parent.add_child(Text.new(adapter.create_text(buffer), context))
118
+ end
110
119
  end
111
120
 
112
121
  def visit_cdata(node)
@@ -135,10 +144,7 @@ module Moxml
135
144
  end
136
145
 
137
146
  def visit_children(node)
138
- node_children = children(node).dup
139
- node_children.each do |child|
140
- visit_node(child)
141
- end
147
+ children(node).each { |child| visit_node(child) }
142
148
  end
143
149
 
144
150
  def node_type(node)
data/lib/moxml/element.rb CHANGED
@@ -42,6 +42,7 @@ module Moxml
42
42
 
43
43
  def []=(name, value)
44
44
  adapter.set_attribute(@native, name, normalize_xml_value(value))
45
+ @attributes_cache = nil
45
46
  end
46
47
 
47
48
  def [](name)
@@ -64,19 +65,23 @@ module Moxml
64
65
  end
65
66
 
66
67
  def attributes
67
- adapter.attributes(@native).map do |attr|
68
- Attribute.new(attr, context)
68
+ @attributes ||= adapter.attributes(@native).map do |attr|
69
+ a = Attribute.new(attr, context)
70
+ a.instance_variable_set(:@parent_node, self)
71
+ a
69
72
  end
70
73
  end
71
74
 
72
75
  def remove_attribute(name)
73
76
  adapter.remove_attribute(@native, name)
77
+ @attributes_cache = nil
74
78
  self
75
79
  end
76
80
 
77
81
  def add_namespace(prefix, uri)
78
82
  adapter.create_namespace(@native, prefix, uri,
79
- namespace_uri_mode: context.config.namespace_uri_mode)
83
+ namespace_validation_mode: context.config.namespace_validation_mode)
84
+ @namespaces_cache = nil
80
85
  self
81
86
  rescue ValidationError => e
82
87
  # Re-raise as NamespaceError, provide attributes for error context
@@ -103,15 +108,16 @@ module Moxml
103
108
  adapter.set_namespace(
104
109
  @native,
105
110
  adapter.create_namespace(@native, *ns_or_hash.to_a.first,
106
- namespace_uri_mode: context.config.namespace_uri_mode),
111
+ namespace_validation_mode: context.config.namespace_validation_mode),
107
112
  )
108
113
  else
109
114
  adapter.set_namespace(@native, ns_or_hash&.native)
110
115
  end
116
+ @namespaces_cache = nil
111
117
  end
112
118
 
113
119
  def namespaces
114
- adapter.namespace_definitions(@native).map do |ns|
120
+ @namespaces ||= adapter.namespace_definitions(@native).map do |ns|
115
121
  Namespace.new(ns, context)
116
122
  end
117
123
  end
@@ -136,6 +142,7 @@ module Moxml
136
142
 
137
143
  def text=(content)
138
144
  adapter.set_text_content(@native, normalize_xml_value(content))
145
+ invalidate_children_cache!
139
146
  end
140
147
 
141
148
  def inner_text
@@ -149,6 +156,7 @@ module Moxml
149
156
  def inner_xml=(xml)
150
157
  doc = context.parse("<root>#{xml}</root>")
151
158
  adapter.replace_children(@native, doc.root.children.map(&:native))
159
+ invalidate_children_cache!
152
160
  end
153
161
 
154
162
  # Fluent interface methods
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "json"
4
+ require "set"
4
5
 
5
6
  module Moxml
6
7
  # EntityRegistry maintains a knowledge base of XML entity definitions.
@@ -26,6 +27,9 @@ module Moxml
26
27
  # W3C entity data file name
27
28
  ENTITY_DATA_FILE = "w3c_entities.json"
28
29
 
30
+ # Standard XML predefined entities (XML spec §4.6)
31
+ STANDARD_CODEPOINTS = Set[0x26, 0x3C, 0x3E, 0x22, 0x27].freeze
32
+
29
33
  class << self
30
34
  # Get the raw entity data from the bundled JSON source
31
35
  # @return [Hash{String => String}] entity name to character mapping
@@ -153,6 +157,38 @@ module Moxml
153
157
  @by_codepoint[codepoint]&.first
154
158
  end
155
159
 
160
+ # Check if a codepoint is one of the 5 standard XML predefined entities
161
+ # @param codepoint [Integer] Unicode codepoint
162
+ # @return [Boolean]
163
+ def standard_entity?(codepoint)
164
+ STANDARD_CODEPOINTS.include?(codepoint)
165
+ end
166
+
167
+ # Determine if an entity reference should be restored for a codepoint.
168
+ # Standard XML entities are always restored (required by XML spec).
169
+ # Non-standard entities are only restored when restore_entities is enabled.
170
+ # @param codepoint [Integer] Unicode codepoint
171
+ # @param config [Moxml::Config] configuration object
172
+ # @return [Boolean]
173
+ def should_restore?(codepoint, config:)
174
+ name = primary_name_for_codepoint(codepoint)
175
+ return false unless name
176
+ return true if standard_entity?(codepoint)
177
+
178
+ config.restore_entities
179
+ end
180
+
181
+ # Returns the set of codepoints that could potentially be restored as entities.
182
+ # Used by DocumentBuilder for O(1) fast-path checks.
183
+ # @return [Set<Integer>]
184
+ def restorable_codepoints
185
+ @restorable_codepoints ||= if @by_name.empty?
186
+ STANDARD_CODEPOINTS
187
+ else
188
+ Set.new(@by_name.values).freeze
189
+ end
190
+ end
191
+
156
192
  # Register additional entities
157
193
  # @param entities [Hash{String => Integer}] name => codepoint mapping
158
194
  # @return [self]
data/lib/moxml/node.rb CHANGED
@@ -16,8 +16,8 @@ module Moxml
16
16
 
17
17
  def initialize(native, context)
18
18
  @context = context
19
- # @native = adapter.patch_node(native)
20
19
  @native = native
20
+ @parent_node = nil
21
21
  end
22
22
 
23
23
  def document
@@ -29,9 +29,10 @@ module Moxml
29
29
  end
30
30
 
31
31
  def children
32
- NodeSet.new(
32
+ @children ||= NodeSet.new(
33
33
  adapter.children(@native).map { adapter.patch_node(_1, @native) },
34
34
  context,
35
+ self,
35
36
  )
36
37
  end
37
38
 
@@ -46,29 +47,37 @@ module Moxml
46
47
  def add_child(node)
47
48
  node = prepare_node(node)
48
49
  adapter.add_child(@native, node.native)
50
+ node.instance_variable_set(:@parent_node, self)
51
+ invalidate_children_cache!
49
52
  self
50
53
  end
51
54
 
52
55
  def add_previous_sibling(node)
53
56
  node = prepare_node(node)
54
57
  adapter.add_previous_sibling(@native, node.native)
58
+ invalidate_parent_children_cache!
55
59
  self
56
60
  end
57
61
 
58
62
  def add_next_sibling(node)
59
63
  node = prepare_node(node)
60
64
  adapter.add_next_sibling(@native, node.native)
65
+ invalidate_parent_children_cache!
61
66
  self
62
67
  end
63
68
 
64
69
  def remove
70
+ invalidate_parent_children_cache!
65
71
  adapter.remove(@native)
72
+ invalidate_children_cache!
66
73
  self
67
74
  end
68
75
 
69
76
  def replace(node)
70
77
  node = prepare_node(node)
78
+ invalidate_parent_children_cache!
71
79
  adapter.replace(@native, node.native)
80
+ invalidate_children_cache!
72
81
  self
73
82
  end
74
83
 
@@ -229,6 +238,18 @@ module Moxml
229
238
  context.config.adapter
230
239
  end
231
240
 
241
+ # Invalidate cached children. Called by mutation methods
242
+ # and by Element attribute/namespace caches.
243
+ def invalidate_children_cache!
244
+ @children = nil
245
+ end
246
+
247
+ # Invalidate parent's cached children when this node
248
+ # is removed/replaced from its parent's child list.
249
+ def invalidate_parent_children_cache!
250
+ @parent_node&.invalidate_children_cache!
251
+ end
252
+
232
253
  private
233
254
 
234
255
  def prepare_node(node)
@@ -6,60 +6,72 @@ module Moxml
6
6
 
7
7
  attr_reader :nodes, :context
8
8
 
9
- def initialize(nodes, context)
9
+ def initialize(nodes, context, parent_node = nil)
10
10
  @nodes = Array(nodes)
11
11
  @context = context
12
+ @wrapped = Array.new(@nodes.size)
13
+ @parent_node = parent_node
12
14
  end
13
15
 
14
16
  def each
15
17
  return to_enum(:each) unless block_given?
16
18
 
17
- nodes.each { |node| yield Moxml::Node.wrap(node, context) }
19
+ @nodes.each_with_index do |node, i|
20
+ @wrapped[i] ||= wrap_with_parent(node)
21
+ yield @wrapped[i]
22
+ end
18
23
  self
19
24
  end
20
25
 
21
26
  def [](index)
22
27
  case index
23
28
  when Integer
24
- Moxml::Node.wrap(nodes[index], context)
29
+ actual = index.negative? ? @nodes.size + index : index
30
+ return nil unless actual >= 0 && actual < @nodes.size
31
+
32
+ @wrapped[actual] ||= wrap_with_parent(@nodes[actual])
25
33
  when Range
26
- NodeSet.new(nodes[index], context)
34
+ self.class.new(@nodes[index], @context)
27
35
  end
28
36
  end
29
37
 
30
38
  def first(n = nil)
31
39
  if n.nil?
32
- Moxml::Node.wrap(nodes.first, context)
40
+ @nodes.empty? ? nil : self[0]
33
41
  else
34
- nodes.first(n).map { |node| Moxml::Node.wrap(node, context) }
42
+ n.times.filter_map { |i| self[i] }
35
43
  end
36
44
  end
37
45
 
38
46
  def last
39
- Moxml::Node.wrap(nodes.last, context)
47
+ @nodes.empty? ? nil : self[@nodes.size - 1]
40
48
  end
41
49
 
42
50
  def empty?
43
- nodes.empty?
51
+ @nodes.empty?
44
52
  end
45
53
 
46
54
  def size
47
- nodes.size
55
+ @nodes.size
48
56
  end
49
57
  alias length size
50
58
 
51
59
  def to_a
52
- map { |node| node }
60
+ @nodes.each_with_index do |_node, i|
61
+ @wrapped[i] ||= wrap_with_parent(@nodes[i])
62
+ end
63
+ @wrapped.compact
53
64
  end
54
65
 
55
66
  def +(other)
56
- self.class.new(nodes + other.nodes, context)
67
+ self.class.new(@nodes + other.nodes, @context)
57
68
  end
58
69
 
59
70
  def <<(node)
60
71
  # If it's a wrapped Moxml node, unwrap to native before storing
61
72
  native_node = node.respond_to?(:native) ? node.native : node
62
73
  @nodes << native_node
74
+ @wrapped << nil
63
75
  self
64
76
  end
65
77
  alias push <<
@@ -78,14 +90,14 @@ module Moxml
78
90
  true
79
91
  end
80
92
  end
81
- self.class.new(unique_natives, context)
93
+ self.class.new(unique_natives, @context)
82
94
  end
83
95
 
84
96
  def ==(other)
85
97
  self.class == other.class &&
86
98
  length == other.length &&
87
- nodes.each_with_index.all? do |node, index|
88
- Moxml::Node.wrap(node, context) == other[index]
99
+ @nodes.each_with_index.all? do |_node, index|
100
+ self[index] == other[index]
89
101
  end
90
102
  end
91
103
 
@@ -103,8 +115,24 @@ module Moxml
103
115
  def delete(node)
104
116
  # If it's a wrapped Moxml node, unwrap to native
105
117
  native_node = node.respond_to?(:native) ? node.native : node
106
- @nodes.delete(native_node)
118
+ idx = @nodes.index(native_node)
119
+ if idx
120
+ @nodes.delete_at(idx)
121
+ @wrapped.delete_at(idx)
122
+ else
123
+ @nodes.delete(native_node)
124
+ end
107
125
  self
108
126
  end
127
+
128
+ private
129
+
130
+ def wrap_with_parent(native_node)
131
+ wrapped = Moxml::Node.wrap(native_node, @context)
132
+ if @parent_node && wrapped
133
+ wrapped.instance_variable_set(:@parent_node, @parent_node)
134
+ end
135
+ wrapped
136
+ end
109
137
  end
110
138
  end
data/lib/moxml/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- VERSION = "0.1.13"
4
+ VERSION = "0.1.15"
5
5
  end
@@ -81,7 +81,7 @@ module Moxml
81
81
  end
82
82
 
83
83
  def validate_prefix(prefix)
84
- return if prefix.match?(/\A[a-zA-Z_][\w-]*\z/)
84
+ return if prefix.match?(/\A[a-zA-Z_][\w.-]*\z/)
85
85
 
86
86
  raise ValidationError, "Invalid namespace prefix: #{prefix}"
87
87
  end
@@ -95,6 +95,9 @@ RSpec.shared_examples "Moxml Edge Cases" do
95
95
  if context.config.adapter_name == :libxml
96
96
  skip "LibXML cannot query empty default namespace with XPath (documented limitation)"
97
97
  end
98
+ if context.config.adapter_name == :nokogiri
99
+ skip "Nokogiri XPath does not support querying empty namespace with xmlns prefix mapping"
100
+ end
98
101
  xml = <<~XML
99
102
  <root xmlns="http://default1.org">
100
103
  <child xmlns="http://default2.org">
@@ -12,6 +12,22 @@ RSpec.describe Moxml::Adapter::Oga do
12
12
 
13
13
  it_behaves_like "xml adapter"
14
14
 
15
+ describe "serialization" do
16
+ it "does not duplicate XML declarations when declaration nodes repeat" do
17
+ context = Moxml::Context.new(:oga)
18
+ doc = context.create_document
19
+
20
+ doc.add_child(doc.create_declaration("1.0", "UTF-8"))
21
+ doc.add_child(doc.create_declaration("1.0", "UTF-8"))
22
+ doc.add_child(doc.create_element("root"))
23
+
24
+ serialized = doc.to_xml
25
+
26
+ expect(serialized.scan("<?xml").size).to eq(1)
27
+ expect(serialized).to include("<root></root>")
28
+ end
29
+ end
30
+
15
31
  describe "entity handling" do
16
32
  it "preserves non-breaking space through parse and serialize round-trip" do
17
33
  xml = "<root>Item&nbsp;One</root>"
@@ -103,4 +119,50 @@ RSpec.describe Moxml::Adapter::Oga do
103
119
  expect(serialized).not_to include("\x01")
104
120
  end
105
121
  end
122
+
123
+ describe "doctype handling" do
124
+ it "correctly parses PUBLIC doctype" do
125
+ xml = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html/>'
126
+ doc = described_class.parse(xml)
127
+ doctype = doc.children.find { |c| c.is_a?(Moxml::Doctype) }
128
+
129
+ expect(doctype.name).to eq("html")
130
+ expect(doctype.external_id).to eq("-//W3C//DTD XHTML 1.0 Strict//EN")
131
+ expect(doctype.system_id).to eq("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")
132
+ end
133
+
134
+ it "correctly parses SYSTEM doctype" do
135
+ xml = '<!DOCTYPE config SYSTEM "config.dtd"><config/>'
136
+ doc = described_class.parse(xml)
137
+ doctype = doc.children.find { |c| c.is_a?(Moxml::Doctype) }
138
+
139
+ expect(doctype.name).to eq("config")
140
+ expect(doctype.external_id).to be_nil
141
+ expect(doctype.system_id).to eq("config.dtd")
142
+ end
143
+
144
+ it "correctly parses simple doctype" do
145
+ xml = "<!DOCTYPE html><html/>"
146
+ doc = described_class.parse(xml)
147
+ doctype = doc.children.find { |c| c.is_a?(Moxml::Doctype) }
148
+
149
+ expect(doctype.name).to eq("html")
150
+ expect(doctype.external_id).to be_nil
151
+ expect(doctype.system_id).to be_nil
152
+ end
153
+
154
+ it "round-trips PUBLIC doctype" do
155
+ xml = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html/>'
156
+ doc = described_class.parse(xml)
157
+
158
+ expect(doc.to_xml).to include('PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"')
159
+ end
160
+
161
+ it "round-trips SYSTEM doctype" do
162
+ xml = '<!DOCTYPE config SYSTEM "config.dtd"><config/>'
163
+ doc = described_class.parse(xml)
164
+
165
+ expect(doc.to_xml).to include('SYSTEM "config.dtd"')
166
+ end
167
+ end
106
168
  end
@@ -4,18 +4,7 @@
4
4
  # A better way is to run it through Moxml wrappers
5
5
  RSpec.shared_examples "xml adapter" do
6
6
  let(:xml) do
7
- <<~XML
8
- <?xml version="1.0"?>
9
- <root xmlns="http://example.org" xmlns:x="http://example.org/x">
10
- <child id="1">Text</child>
11
- <child id="2"/>
12
- <x:special>
13
- <![CDATA[Some <special> text]]>
14
- <!-- A comment -->
15
- <?pi target?>
16
- </x:special>
17
- </root>
18
- XML
7
+ '<?xml version="1.0"?><root xmlns="http://example.org" xmlns:x="http://example.org/x"><child id="1">Text</child><child id="2"/><x:special><![CDATA[Some <special> text]]><!-- A comment --><?pi target?></x:special></root>'
19
8
  end
20
9
 
21
10
  describe ".parse" do
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+ require "support/allocation_helper"
5
+
6
+ # Detailed allocation benchmarks — only run with RUN_PERFORMANCE=1.
7
+ # These measure exact allocation counts and compare across adapters.
8
+ RSpec.describe "Moxml allocation benchmarks", :performance do
9
+ shared_examples "reduced allocations" do |adapter_name|
10
+ let(:ctx) { Moxml::Context.new(adapter_name) }
11
+
12
+ it "parse allocates fewer objects than a 100-element baseline" do
13
+ xml = generate_xml(100)
14
+ allocs = AllocationHelper.count_allocations { ctx.parse(xml) }
15
+ # Before lazy parse: ~18,000 allocations for 100 elements via DocumentBuilder
16
+ # After lazy parse: should be dramatically less (document wrapper + root only)
17
+ expect(allocs).to be < 5000,
18
+ "Expected <5000 allocations for 100-element parse, got #{allocs}"
19
+ end
20
+
21
+ it "parse + root access is allocation-efficient" do
22
+ xml = generate_xml(50)
23
+ allocs = AllocationHelper.count_allocations do
24
+ doc = ctx.parse(xml)
25
+ doc.root.name
26
+ end
27
+ expect(allocs).to be < 2000,
28
+ "Expected <2000 allocations for parse + root.name, got #{allocs}"
29
+ end
30
+
31
+ it "children access is cached (repeated calls don't increase allocations)" do
32
+ xml = "<root><a/><b/><c/></root>"
33
+ doc = ctx.parse(xml)
34
+ root = doc.root
35
+
36
+ allocs1 = AllocationHelper.count_allocations { root.children.to_a }
37
+ allocs2 = AllocationHelper.count_allocations { root.children.to_a }
38
+
39
+ # Second call should allocate fewer objects because children are cached
40
+ expect(allocs2).to be <= allocs1,
41
+ "Second children.to_a (#{allocs2}) should allocate <= first (#{allocs1})"
42
+ end
43
+
44
+ it "attributes access is cached" do
45
+ xml = '<root a="1" b="2" c="3"><child d="4"/></root>'
46
+ doc = ctx.parse(xml)
47
+ root = doc.root
48
+
49
+ allocs1 = AllocationHelper.count_allocations { root.attributes }
50
+ allocs2 = AllocationHelper.count_allocations { root.attributes }
51
+
52
+ expect(allocs2).to be <= allocs1,
53
+ "Second attributes call (#{allocs2}) should allocate <= first (#{allocs1})"
54
+ end
55
+
56
+ it "namespaces access is cached" do
57
+ xml = '<root xmlns:a="http://a.com" xmlns:b="http://b.com"><a:child/></root>'
58
+ doc = ctx.parse(xml)
59
+ root = doc.root
60
+
61
+ allocs1 = AllocationHelper.count_allocations { root.namespaces }
62
+ allocs2 = AllocationHelper.count_allocations { root.namespaces }
63
+
64
+ expect(allocs2).to be <= allocs1,
65
+ "Second namespaces call (#{allocs2}) should allocate <= first (#{allocs1})"
66
+ end
67
+
68
+ it "NodeSet iteration is cached (second iteration allocates less)" do
69
+ xml = generate_xml(20)
70
+ doc = ctx.parse(xml)
71
+ root = doc.root
72
+
73
+ allocs1 = AllocationHelper.count_allocations do
74
+ root.children.each do |_c|
75
+ end
76
+ end
77
+ allocs2 = AllocationHelper.count_allocations do
78
+ root.children.each do |_c|
79
+ end
80
+ end
81
+
82
+ expect(allocs2).to be <= allocs1,
83
+ "Second NodeSet iteration (#{allocs2}) should allocate <= first (#{allocs1})"
84
+ end
85
+ end
86
+
87
+ AllocationHelper::GUARDED_ADAPTERS.each do |adapter_name|
88
+ describe "#{adapter_name} adapter" do
89
+ before(:all) do
90
+ skip("#{adapter_name} adapter not available") unless AllocationHelper.adapter_available?(adapter_name)
91
+ end
92
+
93
+ it_behaves_like "reduced allocations", adapter_name
94
+ end
95
+ end
96
+ end