moxml 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +117 -66
  3. data/Gemfile +1 -0
  4. data/README.adoc +11 -9
  5. data/Rakefile +34 -1
  6. data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
  7. data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
  8. data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
  9. data/TODO.remaining/4-lenient-entities-mode.md +106 -0
  10. data/TODO.remaining/5-fixture-integrity.md +65 -0
  11. data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
  12. data/TODO.remaining/7-headed-ox-limitations.md +95 -0
  13. data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
  14. data/TODO.remaining/9-cleanup-hygiene.md +42 -0
  15. data/TODO.remaining/README.md +54 -0
  16. data/benchmarks/generate_report.rb +1 -1
  17. data/docs/_pages/configuration.adoc +22 -19
  18. data/docs/_tutorials/namespace-handling.adoc +5 -5
  19. data/lib/moxml/adapter/base.rb +22 -3
  20. data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
  21. data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
  22. data/lib/moxml/adapter/customized_libxml.rb +18 -0
  23. data/lib/moxml/adapter/customized_oga.rb +10 -0
  24. data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
  25. data/lib/moxml/adapter/customized_ox.rb +12 -0
  26. data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
  27. data/lib/moxml/adapter/customized_rexml/formatter.rb +44 -20
  28. data/lib/moxml/adapter/customized_rexml.rb +11 -0
  29. data/lib/moxml/adapter/headed_ox.rb +37 -14
  30. data/lib/moxml/adapter/libxml.rb +233 -119
  31. data/lib/moxml/adapter/nokogiri.rb +22 -11
  32. data/lib/moxml/adapter/oga.rb +64 -25
  33. data/lib/moxml/adapter/ox.rb +198 -42
  34. data/lib/moxml/adapter/rexml.rb +64 -13
  35. data/lib/moxml/attribute.rb +3 -0
  36. data/lib/moxml/builder.rb +78 -24
  37. data/lib/moxml/config.rb +24 -7
  38. data/lib/moxml/declaration.rb +4 -2
  39. data/lib/moxml/document.rb +8 -1
  40. data/lib/moxml/document_builder.rb +44 -37
  41. data/lib/moxml/element.rb +18 -5
  42. data/lib/moxml/entity_registry.rb +51 -1
  43. data/lib/moxml/native_attachment.rb +65 -0
  44. data/lib/moxml/node.rb +39 -50
  45. data/lib/moxml/node_set.rb +43 -15
  46. data/lib/moxml/version.rb +1 -1
  47. data/lib/moxml/xml_utils.rb +1 -1
  48. data/lib/moxml/xpath/compiler.rb +4 -1
  49. data/lib/moxml.rb +1 -0
  50. data/scripts/format_xml.rb +16 -0
  51. data/scripts/pretty_format_xml.rb +14 -0
  52. data/spec/consistency/round_trip_spec.rb +3 -30
  53. data/spec/integration/all_adapters_spec.rb +1 -0
  54. data/spec/integration/headed_ox_integration_spec.rb +0 -2
  55. data/spec/integration/shared_examples/edge_cases.rb +7 -4
  56. data/spec/integration/shared_examples/integration_workflows.rb +3 -3
  57. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
  58. data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
  59. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
  60. data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
  61. data/spec/moxml/adapter/oga_spec.rb +46 -0
  62. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
  63. data/spec/moxml/allocation_benchmark_spec.rb +96 -0
  64. data/spec/moxml/allocation_guard_spec.rb +282 -0
  65. data/spec/moxml/builder_spec.rb +256 -0
  66. data/spec/moxml/config_spec.rb +11 -11
  67. data/spec/moxml/doctype_spec.rb +41 -0
  68. data/spec/moxml/lazy_parse_spec.rb +115 -0
  69. data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
  70. data/spec/moxml/node_cache_spec.rb +110 -0
  71. data/spec/moxml/node_set_cache_spec.rb +90 -0
  72. data/spec/moxml/xml_utils_spec.rb +32 -0
  73. data/spec/moxml/xpath/axes_spec.rb +1 -1
  74. data/spec/moxml/xpath/compiler_spec.rb +2 -2
  75. data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
  76. data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
  77. data/spec/performance/memory_usage_spec.rb +0 -4
  78. data/spec/support/allocation_helper.rb +165 -0
  79. data/spec/support/w3c_namespace_helpers.rb +2 -1
  80. metadata +29 -2
data/lib/moxml/builder.rb CHANGED
@@ -2,7 +2,10 @@
2
2
 
3
3
  module Moxml
4
4
  class Builder
5
+ RESERVED_METHOD_PATTERN = /\A(to_|as_json|marshal_|inspect|freeze|dup|clone)/
6
+
5
7
  attr_reader :document
8
+ alias_method :doc, :document
6
9
 
7
10
  def initialize(context)
8
11
  @context = context
@@ -21,33 +24,18 @@ module Moxml
21
24
  )
22
25
  end
23
26
 
24
- def element(name, attributes = {}, &block)
25
- el = @document.create_element(name)
26
-
27
- attributes.each do |key, value|
28
- if key.to_s == "xmlns"
29
- # Handle default namespace
30
- el.add_namespace(nil, value.to_s)
31
- elsif key.to_s.start_with?("xmlns:")
32
- # Handle prefixed namespace
33
- prefix = key.to_s.sub("xmlns:", "")
34
- el.add_namespace(prefix, value.to_s)
35
- else
36
- # Regular attribute
37
- el[key] = value
38
- end
27
+ # When called with a String name: creates element via instance_eval (DSL block context).
28
+ # When called with a Hash (e.g., element(name: "foo")): creates <element> tag
29
+ # via yield — handles collision where "element" is both a builder method
30
+ # and a valid XML tag name (XSD/RelaxNG).
31
+ def element(name_or_attrs = nil, attributes = {}, &block)
32
+ if name_or_attrs.is_a?(Hash)
33
+ return create_element_node("element", name_or_attrs, block: block, eval_block: false)
39
34
  end
40
35
 
41
- @current.add_child(el)
42
-
43
- if block
44
- previous = @current
45
- @current = el
46
- instance_eval(&block)
47
- @current = previous
48
- end
36
+ raise ArgumentError, "element requires a tag name" if name_or_attrs.nil?
49
37
 
50
- el
38
+ create_element_node(name_or_attrs, attributes, block: block, eval_block: true)
51
39
  end
52
40
 
53
41
  def text(content)
@@ -102,5 +90,71 @@ module Moxml
102
90
  el.namespace = { prefix => namespace_uri } if prefix
103
91
  el
104
92
  end
93
+
94
+ # Dynamic element creation DSL.
95
+ # xml.schema(attrs) { } creates <schema> with those attributes.
96
+ # Uses yield so blocks preserve the caller's self context.
97
+ # Supported call shapes: (), (String), (Hash), (String, Hash).
98
+ def method_missing(method_name, *args, &block)
99
+ return super if RESERVED_METHOD_PATTERN.match?(method_name.to_s)
100
+
101
+ text_content = args.first.is_a?(String) ? args.shift : nil
102
+ attrs = args.first.is_a?(Hash) ? args.shift : {}
103
+
104
+ raise ArgumentError, "unexpected arguments for #{method_name}: #{args.inspect}" unless args.empty?
105
+
106
+ if text_content && block
107
+ raise ArgumentError, "#{method_name}: cannot combine text content with a block"
108
+ end
109
+
110
+ # Strip trailing underscore to allow reserved Ruby method names as tags
111
+ # (e.g., type_, class_, id_ become <type>, <class>, <id>)
112
+ tag_name = method_name.to_s.chomp("_")
113
+
114
+ create_element_node(tag_name, attrs, text_content: text_content,
115
+ block: block, eval_block: false)
116
+ end
117
+
118
+ def respond_to_missing?(method_name, _include_private = false)
119
+ return super if RESERVED_METHOD_PATTERN.match?(method_name.to_s)
120
+
121
+ true
122
+ end
123
+
124
+ private
125
+
126
+ # Single method for all element creation.
127
+ # eval_block: true → instance_eval (build DSL context)
128
+ # eval_block: false → yield (preserves caller's self)
129
+ def create_element_node(tag_name, attrs = {}, text_content: nil, block: nil, eval_block: true)
130
+ el = @document.create_element(tag_name)
131
+
132
+ attrs.each do |key, value|
133
+ if key.to_s == "xmlns"
134
+ el.add_namespace(nil, value.to_s)
135
+ elsif key.to_s.start_with?("xmlns:")
136
+ prefix = key.to_s.sub("xmlns:", "")
137
+ el.add_namespace(prefix, value.to_s)
138
+ else
139
+ el[key] = value
140
+ end
141
+ end
142
+
143
+ @current.add_child(el)
144
+
145
+ el.add_child(@document.create_text(text_content)) if text_content
146
+
147
+ if block
148
+ previous = @current
149
+ @current = el
150
+ begin
151
+ eval_block ? instance_eval(&block) : block.call
152
+ ensure
153
+ @current = previous
154
+ end
155
+ end
156
+
157
+ el
158
+ end
105
159
  end
106
160
  end
data/lib/moxml/config.rb CHANGED
@@ -24,7 +24,12 @@ module Moxml
24
24
  end
25
25
  end
26
26
 
27
- NAMESPACE_URI_MODES = %i[strict lenient].freeze
27
+ NAMESPACE_VALIDATION_MODES = %i[strict lenient].freeze
28
+
29
+ # Entity restoration modes:
30
+ # - :lenient (default) — restore any known entity from the registry
31
+ # - :strict — only restore DTD-declared entities (falls back to lenient until DTD parsing is implemented)
32
+ ENTITY_RESTORATION_MODES = %i[strict lenient].freeze
28
33
 
29
34
  attr_reader :adapter_name
30
35
  attr_accessor :strict_parsing,
@@ -35,7 +40,8 @@ module Moxml
35
40
  :preload_entity_sets,
36
41
  :entity_load_mode,
37
42
  :entity_provider,
38
- :namespace_uri_mode
43
+ :namespace_validation_mode,
44
+ :entity_restoration_mode
39
45
 
40
46
  def initialize(adapter_name = nil, strict_parsing = nil,
41
47
  default_encoding = nil)
@@ -49,7 +55,8 @@ module Moxml
49
55
  @preload_entity_sets = []
50
56
  @entity_load_mode = :required
51
57
  @entity_provider = nil
52
- @namespace_uri_mode = :strict
58
+ @namespace_validation_mode = :strict
59
+ @entity_restoration_mode = :lenient
53
60
  end
54
61
 
55
62
  def adapter=(name)
@@ -86,14 +93,24 @@ module Moxml
86
93
  @entity_load_mode = mode
87
94
  end
88
95
 
89
- def namespace_uri_mode=(mode)
96
+ def namespace_validation_mode=(mode)
97
+ mode = mode.to_sym
98
+ unless NAMESPACE_VALIDATION_MODES.include?(mode)
99
+ raise ArgumentError,
100
+ "Invalid namespace_validation_mode: #{mode}. Must be one of: #{NAMESPACE_VALIDATION_MODES.join(', ')}"
101
+ end
102
+
103
+ @namespace_validation_mode = mode
104
+ end
105
+
106
+ def entity_restoration_mode=(mode)
90
107
  mode = mode.to_sym
91
- unless NAMESPACE_URI_MODES.include?(mode)
108
+ unless ENTITY_RESTORATION_MODES.include?(mode)
92
109
  raise ArgumentError,
93
- "Invalid namespace_uri_mode: #{mode}. Must be one of: #{NAMESPACE_URI_MODES.join(', ')}"
110
+ "Invalid entity_restoration_mode: #{mode}. Must be one of: #{ENTITY_RESTORATION_MODES.join(', ')}"
94
111
  end
95
112
 
96
- @namespace_uri_mode = mode
113
+ @entity_restoration_mode = mode
97
114
  end
98
115
 
99
116
  # Backward compatibility: convert old boolean to new symbol
@@ -35,9 +35,11 @@ module Moxml
35
35
 
36
36
  def remove
37
37
  # Mark document as having no declaration when declaration is removed
38
- # Store on native document so all wrappers see it
38
+ # Store in adapter's attachment map so all wrappers see it
39
39
  native_doc = adapter.document(@native)
40
- native_doc&.instance_variable_set(:@moxml_has_declaration, false)
40
+ if native_doc && adapter.respond_to?(:attachments)
41
+ adapter.attachments.set(native_doc, :has_declaration, false)
42
+ end
41
43
 
42
44
  super
43
45
  end
@@ -26,11 +26,13 @@ module Moxml
26
26
 
27
27
  def root=(element)
28
28
  adapter.set_root(@native, element.native)
29
+ element.parent_node = self
30
+ invalidate_children_cache!
29
31
  end
30
32
 
31
33
  def root
32
34
  root_element = adapter.root(@native)
33
- root_element ? Element.wrap(root_element, context) : nil
35
+ root_element ? Element.new(root_element, context) : nil
34
36
  end
35
37
 
36
38
  def create_element(name)
@@ -90,7 +92,12 @@ module Moxml
90
92
  raise Error, "Document already has a root element"
91
93
  else
92
94
  adapter.add_child(@native, node.native)
95
+ # Refresh native for adapters where identity changes (e.g., LibXML doc.root=)
96
+ refreshed = adapter.actual_native(node.native, @native)
97
+ node.refresh_native!(refreshed) if refreshed && refreshed != node.native
93
98
  end
99
+ node.parent_node = self
100
+ invalidate_children_cache!
94
101
  self
95
102
  end
96
103
 
@@ -12,20 +12,19 @@ module Moxml
12
12
  def build(native_doc)
13
13
  @current_doc = context.create_document(native_doc)
14
14
 
15
- # Transfer has_declaration flag if present
16
- if native_doc.respond_to?(:instance_variable_get) &&
17
- native_doc.instance_variable_defined?(:@moxml_has_declaration)
18
- has_declaration = native_doc.instance_variable_get(:@moxml_has_declaration)
15
+ # Transfer has_declaration flag if present in attachments
16
+ if adapter.respond_to?(:attachments) &&
17
+ adapter.attachments.key?(native_doc, :has_declaration)
18
+ has_declaration = adapter.attachments.get(native_doc, :has_declaration)
19
19
  @current_doc.has_xml_declaration = has_declaration
20
20
  end
21
21
 
22
- # Transfer DOCTYPE from parsed document if it exists
23
- if native_doc.respond_to?(:instance_variable_get) &&
24
- native_doc.instance_variable_defined?(:@moxml_doctype)
25
- doctype = native_doc.instance_variable_get(:@moxml_doctype)
22
+ # Transfer DOCTYPE from parsed document if it exists in attachments
23
+ if adapter.respond_to?(:attachments) &&
24
+ adapter.attachments.key?(native_doc, :doctype)
25
+ doctype = adapter.attachments.get(native_doc, :doctype)
26
26
  if doctype
27
- @current_doc.native.instance_variable_set(:@moxml_doctype,
28
- doctype)
27
+ adapter.attachments.set(@current_doc.native, :doctype, doctype)
29
28
  end
30
29
  end
31
30
 
@@ -70,43 +69,54 @@ module Moxml
70
69
  content = adapter.text_content(node)
71
70
 
72
71
  # Check if we should restore entity references for this text
73
- if context.config.restore_entities && content.to_s =~ /[<>&"']/
72
+ if context.config.restore_entities && text_has_restorable_entities?(content)
74
73
  restore_entities_in_text(content)
75
74
  else
76
75
  @node_stack.last&.add_child(Text.new(prepared, context))
77
76
  end
78
77
  end
79
78
 
79
+ def text_has_restorable_entities?(content)
80
+ return false unless content
81
+
82
+ registry = context.entity_registry
83
+ codepoints = registry.restorable_codepoints
84
+ content.each_char do |char|
85
+ return true if codepoints.include?(char.ord)
86
+ end
87
+ false
88
+ end
89
+
80
90
  def restore_entities_in_text(content)
81
91
  parent = @node_stack.last
82
92
  return unless parent
83
93
 
84
- # Characters that should potentially be entity-encoded
85
- # Per W3C XML spec, these characters have special meaning
86
- entity_chars = {
87
- "<" => "lt",
88
- ">" => "gt",
89
- "&" => "amp",
90
- '"' => "quot",
91
- "'" => "apos",
92
- }
93
-
94
- # Process character by character
95
- chars = content.to_s.chars
96
- chars.each do |char|
97
- codepoint = char.ord
98
- entity_name = context.entity_registry.primary_name_for_codepoint(codepoint)
94
+ registry = context.entity_registry
95
+ config = context.config
96
+ buffer = +""
99
97
 
100
- if entity_name && entity_chars.value?(entity_name)
101
- # This character should be an entity reference
102
- entity_node = adapter.create_entity_reference(entity_name)
103
- parent.add_child(EntityReference.new(entity_node, context))
98
+ content.to_s.each_char do |char|
99
+ codepoint = char.ord
100
+ name = registry.primary_name_for_codepoint(codepoint)
101
+
102
+ if name && registry.should_restore?(codepoint, config: config)
103
+ # Flush buffered text before the entity
104
+ unless buffer.empty?
105
+ parent.add_child(Text.new(adapter.create_text(buffer), context))
106
+ buffer.clear
107
+ end
108
+ parent.add_child(
109
+ EntityReference.new(adapter.create_entity_reference(name), context),
110
+ )
104
111
  else
105
- # Regular character
106
- text_node = adapter.create_text(char)
107
- parent.add_child(Text.new(text_node, context))
112
+ buffer << char
108
113
  end
109
114
  end
115
+
116
+ # Flush remaining buffer
117
+ unless buffer.empty?
118
+ parent.add_child(Text.new(adapter.create_text(buffer), context))
119
+ end
110
120
  end
111
121
 
112
122
  def visit_cdata(node)
@@ -135,10 +145,7 @@ module Moxml
135
145
  end
136
146
 
137
147
  def visit_children(node)
138
- node_children = children(node).dup
139
- node_children.each do |child|
140
- visit_node(child)
141
- end
148
+ children(node).each { |child| visit_node(child) }
142
149
  end
143
150
 
144
151
  def node_type(node)
data/lib/moxml/element.rb CHANGED
@@ -42,6 +42,7 @@ module Moxml
42
42
 
43
43
  def []=(name, value)
44
44
  adapter.set_attribute(@native, name, normalize_xml_value(value))
45
+ @attributes = nil
45
46
  end
46
47
 
47
48
  def [](name)
@@ -64,19 +65,23 @@ module Moxml
64
65
  end
65
66
 
66
67
  def attributes
67
- adapter.attributes(@native).map do |attr|
68
- Attribute.new(attr, context)
68
+ @attributes ||= adapter.attributes(@native).map do |attr|
69
+ a = Attribute.new(attr, context)
70
+ a.parent_node = self
71
+ a
69
72
  end
70
73
  end
71
74
 
72
75
  def remove_attribute(name)
73
76
  adapter.remove_attribute(@native, name)
77
+ @attributes = nil
74
78
  self
75
79
  end
76
80
 
77
81
  def add_namespace(prefix, uri)
78
82
  adapter.create_namespace(@native, prefix, uri,
79
- namespace_uri_mode: context.config.namespace_uri_mode)
83
+ namespace_validation_mode: context.config.namespace_validation_mode)
84
+ @namespaces = nil
80
85
  self
81
86
  rescue ValidationError => e
82
87
  # Re-raise as NamespaceError, provide attributes for error context
@@ -103,15 +108,16 @@ module Moxml
103
108
  adapter.set_namespace(
104
109
  @native,
105
110
  adapter.create_namespace(@native, *ns_or_hash.to_a.first,
106
- namespace_uri_mode: context.config.namespace_uri_mode),
111
+ namespace_validation_mode: context.config.namespace_validation_mode),
107
112
  )
108
113
  else
109
114
  adapter.set_namespace(@native, ns_or_hash&.native)
110
115
  end
116
+ @namespaces = nil
111
117
  end
112
118
 
113
119
  def namespaces
114
- adapter.namespace_definitions(@native).map do |ns|
120
+ @namespaces ||= adapter.namespace_definitions(@native).map do |ns|
115
121
  Namespace.new(ns, context)
116
122
  end
117
123
  end
@@ -136,6 +142,7 @@ module Moxml
136
142
 
137
143
  def text=(content)
138
144
  adapter.set_text_content(@native, normalize_xml_value(content))
145
+ invalidate_children_cache!
139
146
  end
140
147
 
141
148
  def inner_text
@@ -149,6 +156,7 @@ module Moxml
149
156
  def inner_xml=(xml)
150
157
  doc = context.parse("<root>#{xml}</root>")
151
158
  adapter.replace_children(@native, doc.root.children.map(&:native))
159
+ invalidate_children_cache!
152
160
  end
153
161
 
154
162
  # Fluent interface methods
@@ -192,5 +200,10 @@ module Moxml
192
200
  def nodes
193
201
  children
194
202
  end
203
+
204
+ # Called by Attribute#remove to invalidate the cached attributes
205
+ def invalidate_attribute_cache!
206
+ @attributes = nil
207
+ end
195
208
  end
196
209
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "json"
4
+ require "set"
4
5
 
5
6
  module Moxml
6
7
  # EntityRegistry maintains a knowledge base of XML entity definitions.
@@ -26,6 +27,9 @@ module Moxml
26
27
  # W3C entity data file name
27
28
  ENTITY_DATA_FILE = "w3c_entities.json"
28
29
 
30
+ # Standard XML predefined entities (XML spec §4.6)
31
+ STANDARD_CODEPOINTS = Set[0x26, 0x3C, 0x3E, 0x22, 0x27].freeze
32
+
29
33
  class << self
30
34
  # Get the raw entity data from the bundled JSON source
31
35
  # @return [Hash{String => String}] entity name to character mapping
@@ -150,7 +154,53 @@ module Moxml
150
154
  # @param codepoint [Integer] Unicode codepoint
151
155
  # @return [String, nil] primary entity name or nil
152
156
  def primary_name_for_codepoint(codepoint)
153
- @by_codepoint[codepoint]&.first
157
+ names = @by_codepoint[codepoint]
158
+ return nil unless names&.any?
159
+ # Prefer lowercase names (e.g., "amp" over "AMP") for XML compatibility
160
+ names.find { |n| n == n.downcase } || names.first
161
+ end
162
+
163
+ # Check if a codepoint is one of the 5 standard XML predefined entities
164
+ # @param codepoint [Integer] Unicode codepoint
165
+ # @return [Boolean]
166
+ def standard_entity?(codepoint)
167
+ STANDARD_CODEPOINTS.include?(codepoint)
168
+ end
169
+
170
+ # Determine if an entity reference should be restored for a codepoint.
171
+ # Standard XML entities are always restored (required by XML spec).
172
+ # Non-standard entities are only restored when restore_entities is enabled.
173
+ # @param codepoint [Integer] Unicode codepoint
174
+ # @param config [Moxml::Config] configuration object
175
+ # @return [Boolean]
176
+ def should_restore?(codepoint, config:)
177
+ name = primary_name_for_codepoint(codepoint)
178
+ return false unless name
179
+ return true if standard_entity?(codepoint)
180
+
181
+ return false unless config.restore_entities
182
+
183
+ case config.entity_restoration_mode
184
+ when :lenient
185
+ # Any known entity from the registry
186
+ true
187
+ when :strict
188
+ # Only DTD-declared entities (falls back to lenient until DTD parsing is implemented)
189
+ true
190
+ else
191
+ false
192
+ end
193
+ end
194
+
195
+ # Returns the set of codepoints that could potentially be restored as entities.
196
+ # Used by DocumentBuilder for O(1) fast-path checks.
197
+ # @return [Set<Integer>]
198
+ def restorable_codepoints
199
+ @restorable_codepoints ||= if @by_name.empty?
200
+ STANDARD_CODEPOINTS
201
+ else
202
+ Set.new(@by_name.values).freeze
203
+ end
154
204
  end
155
205
 
156
206
  # Register additional entities
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ # Stores Moxml-specific state associated with native adapter objects
5
+ # without polluting their internals.
6
+ #
7
+ # Uses object_id as key with GC finalizer cleanup to prevent memory leaks.
8
+ # Thread-safe via Monitor (reentrant-safe).
9
+ #
10
+ # Replaces the anti-pattern of using instance_variable_set/get on
11
+ # foreign library objects (Nokogiri, REXML, Oga, Ox, LibXML nodes).
12
+ #
13
+ # @example
14
+ # attachments = NativeAttachment.new
15
+ # attachments.set(native_element, :entity_refs, [])
16
+ # refs = attachments.get(native_element, :entity_refs)
17
+ # attachments.key?(native_element, :doctype) #=> false
18
+ class NativeAttachment
19
+ def initialize
20
+ @data = {}
21
+ @finalizer_registered = {}
22
+ @monitor = Monitor.new
23
+ end
24
+
25
+ def get(native, key)
26
+ @monitor.synchronize { @data[native.object_id]&.[](key) }
27
+ end
28
+
29
+ def set(native, key, value)
30
+ id = native.object_id
31
+ @monitor.synchronize do
32
+ @data[id] ||= {}
33
+ @data[id][key] = value
34
+ register_finalizer(native, id) unless @finalizer_registered[id]
35
+ end
36
+ end
37
+
38
+ def key?(native, key)
39
+ @monitor.synchronize { @data[native.object_id]&.key?(key) || false }
40
+ end
41
+
42
+ def delete(native, key)
43
+ @monitor.synchronize { @data[native.object_id]&.delete(key) }
44
+ end
45
+
46
+ private
47
+
48
+ def register_finalizer(native, id)
49
+ @finalizer_registered[id] = true
50
+ ObjectSpace.define_finalizer(native, finalizer_for(id))
51
+ end
52
+
53
+ def finalizer_for(id)
54
+ data = @data
55
+ registered = @finalizer_registered
56
+ # Finalizers must NOT use Mutex/Monitor (can't be called from trap context).
57
+ # Direct Hash operations are safe here since finalizers run sequentially
58
+ # and the GC'd object's id won't be accessed by any other thread.
59
+ proc do
60
+ data.delete(id)
61
+ registered.delete(id)
62
+ end
63
+ end
64
+ end
65
+ end