moxml 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +117 -66
- data/Gemfile +1 -0
- data/README.adoc +11 -9
- data/Rakefile +34 -1
- data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
- data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
- data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
- data/TODO.remaining/4-lenient-entities-mode.md +106 -0
- data/TODO.remaining/5-fixture-integrity.md +65 -0
- data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
- data/TODO.remaining/7-headed-ox-limitations.md +95 -0
- data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
- data/TODO.remaining/9-cleanup-hygiene.md +42 -0
- data/TODO.remaining/README.md +54 -0
- data/benchmarks/generate_report.rb +1 -1
- data/docs/_pages/configuration.adoc +22 -19
- data/docs/_tutorials/namespace-handling.adoc +5 -5
- data/lib/moxml/adapter/base.rb +22 -3
- data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
- data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
- data/lib/moxml/adapter/customized_libxml.rb +18 -0
- data/lib/moxml/adapter/customized_oga.rb +10 -0
- data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
- data/lib/moxml/adapter/customized_ox.rb +12 -0
- data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +44 -20
- data/lib/moxml/adapter/customized_rexml.rb +11 -0
- data/lib/moxml/adapter/headed_ox.rb +37 -14
- data/lib/moxml/adapter/libxml.rb +233 -119
- data/lib/moxml/adapter/nokogiri.rb +22 -11
- data/lib/moxml/adapter/oga.rb +64 -25
- data/lib/moxml/adapter/ox.rb +198 -42
- data/lib/moxml/adapter/rexml.rb +64 -13
- data/lib/moxml/attribute.rb +3 -0
- data/lib/moxml/builder.rb +78 -24
- data/lib/moxml/config.rb +24 -7
- data/lib/moxml/declaration.rb +4 -2
- data/lib/moxml/document.rb +8 -1
- data/lib/moxml/document_builder.rb +44 -37
- data/lib/moxml/element.rb +18 -5
- data/lib/moxml/entity_registry.rb +51 -1
- data/lib/moxml/native_attachment.rb +65 -0
- data/lib/moxml/node.rb +39 -50
- data/lib/moxml/node_set.rb +43 -15
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +4 -1
- data/lib/moxml.rb +1 -0
- data/scripts/format_xml.rb +16 -0
- data/scripts/pretty_format_xml.rb +14 -0
- data/spec/consistency/round_trip_spec.rb +3 -30
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/headed_ox_integration_spec.rb +0 -2
- data/spec/integration/shared_examples/edge_cases.rb +7 -4
- data/spec/integration/shared_examples/integration_workflows.rb +3 -3
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
- data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
- data/spec/moxml/adapter/oga_spec.rb +46 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
- data/spec/moxml/allocation_benchmark_spec.rb +96 -0
- data/spec/moxml/allocation_guard_spec.rb +282 -0
- data/spec/moxml/builder_spec.rb +256 -0
- data/spec/moxml/config_spec.rb +11 -11
- data/spec/moxml/doctype_spec.rb +41 -0
- data/spec/moxml/lazy_parse_spec.rb +115 -0
- data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
- data/spec/moxml/node_cache_spec.rb +110 -0
- data/spec/moxml/node_set_cache_spec.rb +90 -0
- data/spec/moxml/xml_utils_spec.rb +32 -0
- data/spec/moxml/xpath/axes_spec.rb +1 -1
- data/spec/moxml/xpath/compiler_spec.rb +2 -2
- data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
- data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
- data/spec/performance/memory_usage_spec.rb +0 -4
- data/spec/support/allocation_helper.rb +165 -0
- data/spec/support/w3c_namespace_helpers.rb +2 -1
- metadata +29 -2
data/lib/moxml/builder.rb
CHANGED
|
@@ -2,7 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
module Moxml
|
|
4
4
|
class Builder
|
|
5
|
+
RESERVED_METHOD_PATTERN = /\A(to_|as_json|marshal_|inspect|freeze|dup|clone)/
|
|
6
|
+
|
|
5
7
|
attr_reader :document
|
|
8
|
+
alias_method :doc, :document
|
|
6
9
|
|
|
7
10
|
def initialize(context)
|
|
8
11
|
@context = context
|
|
@@ -21,33 +24,18 @@ module Moxml
|
|
|
21
24
|
)
|
|
22
25
|
end
|
|
23
26
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
elsif key.to_s.start_with?("xmlns:")
|
|
32
|
-
# Handle prefixed namespace
|
|
33
|
-
prefix = key.to_s.sub("xmlns:", "")
|
|
34
|
-
el.add_namespace(prefix, value.to_s)
|
|
35
|
-
else
|
|
36
|
-
# Regular attribute
|
|
37
|
-
el[key] = value
|
|
38
|
-
end
|
|
27
|
+
# When called with a String name: creates element via instance_eval (DSL block context).
|
|
28
|
+
# When called with a Hash (e.g., element(name: "foo")): creates <element> tag
|
|
29
|
+
# via yield — handles collision where "element" is both a builder method
|
|
30
|
+
# and a valid XML tag name (XSD/RelaxNG).
|
|
31
|
+
def element(name_or_attrs = nil, attributes = {}, &block)
|
|
32
|
+
if name_or_attrs.is_a?(Hash)
|
|
33
|
+
return create_element_node("element", name_or_attrs, block: block, eval_block: false)
|
|
39
34
|
end
|
|
40
35
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if block
|
|
44
|
-
previous = @current
|
|
45
|
-
@current = el
|
|
46
|
-
instance_eval(&block)
|
|
47
|
-
@current = previous
|
|
48
|
-
end
|
|
36
|
+
raise ArgumentError, "element requires a tag name" if name_or_attrs.nil?
|
|
49
37
|
|
|
50
|
-
|
|
38
|
+
create_element_node(name_or_attrs, attributes, block: block, eval_block: true)
|
|
51
39
|
end
|
|
52
40
|
|
|
53
41
|
def text(content)
|
|
@@ -102,5 +90,71 @@ module Moxml
|
|
|
102
90
|
el.namespace = { prefix => namespace_uri } if prefix
|
|
103
91
|
el
|
|
104
92
|
end
|
|
93
|
+
|
|
94
|
+
# Dynamic element creation DSL.
|
|
95
|
+
# xml.schema(attrs) { } creates <schema> with those attributes.
|
|
96
|
+
# Uses yield so blocks preserve the caller's self context.
|
|
97
|
+
# Supported call shapes: (), (String), (Hash), (String, Hash).
|
|
98
|
+
def method_missing(method_name, *args, &block)
|
|
99
|
+
return super if RESERVED_METHOD_PATTERN.match?(method_name.to_s)
|
|
100
|
+
|
|
101
|
+
text_content = args.first.is_a?(String) ? args.shift : nil
|
|
102
|
+
attrs = args.first.is_a?(Hash) ? args.shift : {}
|
|
103
|
+
|
|
104
|
+
raise ArgumentError, "unexpected arguments for #{method_name}: #{args.inspect}" unless args.empty?
|
|
105
|
+
|
|
106
|
+
if text_content && block
|
|
107
|
+
raise ArgumentError, "#{method_name}: cannot combine text content with a block"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Strip trailing underscore to allow reserved Ruby method names as tags
|
|
111
|
+
# (e.g., type_, class_, id_ become <type>, <class>, <id>)
|
|
112
|
+
tag_name = method_name.to_s.chomp("_")
|
|
113
|
+
|
|
114
|
+
create_element_node(tag_name, attrs, text_content: text_content,
|
|
115
|
+
block: block, eval_block: false)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def respond_to_missing?(method_name, _include_private = false)
|
|
119
|
+
return super if RESERVED_METHOD_PATTERN.match?(method_name.to_s)
|
|
120
|
+
|
|
121
|
+
true
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
private
|
|
125
|
+
|
|
126
|
+
# Single method for all element creation.
|
|
127
|
+
# eval_block: true → instance_eval (build DSL context)
|
|
128
|
+
# eval_block: false → yield (preserves caller's self)
|
|
129
|
+
def create_element_node(tag_name, attrs = {}, text_content: nil, block: nil, eval_block: true)
|
|
130
|
+
el = @document.create_element(tag_name)
|
|
131
|
+
|
|
132
|
+
attrs.each do |key, value|
|
|
133
|
+
if key.to_s == "xmlns"
|
|
134
|
+
el.add_namespace(nil, value.to_s)
|
|
135
|
+
elsif key.to_s.start_with?("xmlns:")
|
|
136
|
+
prefix = key.to_s.sub("xmlns:", "")
|
|
137
|
+
el.add_namespace(prefix, value.to_s)
|
|
138
|
+
else
|
|
139
|
+
el[key] = value
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
@current.add_child(el)
|
|
144
|
+
|
|
145
|
+
el.add_child(@document.create_text(text_content)) if text_content
|
|
146
|
+
|
|
147
|
+
if block
|
|
148
|
+
previous = @current
|
|
149
|
+
@current = el
|
|
150
|
+
begin
|
|
151
|
+
eval_block ? instance_eval(&block) : block.call
|
|
152
|
+
ensure
|
|
153
|
+
@current = previous
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
el
|
|
158
|
+
end
|
|
105
159
|
end
|
|
106
160
|
end
|
data/lib/moxml/config.rb
CHANGED
|
@@ -24,7 +24,12 @@ module Moxml
|
|
|
24
24
|
end
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
NAMESPACE_VALIDATION_MODES = %i[strict lenient].freeze
|
|
28
|
+
|
|
29
|
+
# Entity restoration modes:
|
|
30
|
+
# - :lenient (default) — restore any known entity from the registry
|
|
31
|
+
# - :strict — only restore DTD-declared entities (falls back to lenient until DTD parsing is implemented)
|
|
32
|
+
ENTITY_RESTORATION_MODES = %i[strict lenient].freeze
|
|
28
33
|
|
|
29
34
|
attr_reader :adapter_name
|
|
30
35
|
attr_accessor :strict_parsing,
|
|
@@ -35,7 +40,8 @@ module Moxml
|
|
|
35
40
|
:preload_entity_sets,
|
|
36
41
|
:entity_load_mode,
|
|
37
42
|
:entity_provider,
|
|
38
|
-
:
|
|
43
|
+
:namespace_validation_mode,
|
|
44
|
+
:entity_restoration_mode
|
|
39
45
|
|
|
40
46
|
def initialize(adapter_name = nil, strict_parsing = nil,
|
|
41
47
|
default_encoding = nil)
|
|
@@ -49,7 +55,8 @@ module Moxml
|
|
|
49
55
|
@preload_entity_sets = []
|
|
50
56
|
@entity_load_mode = :required
|
|
51
57
|
@entity_provider = nil
|
|
52
|
-
@
|
|
58
|
+
@namespace_validation_mode = :strict
|
|
59
|
+
@entity_restoration_mode = :lenient
|
|
53
60
|
end
|
|
54
61
|
|
|
55
62
|
def adapter=(name)
|
|
@@ -86,14 +93,24 @@ module Moxml
|
|
|
86
93
|
@entity_load_mode = mode
|
|
87
94
|
end
|
|
88
95
|
|
|
89
|
-
def
|
|
96
|
+
def namespace_validation_mode=(mode)
|
|
97
|
+
mode = mode.to_sym
|
|
98
|
+
unless NAMESPACE_VALIDATION_MODES.include?(mode)
|
|
99
|
+
raise ArgumentError,
|
|
100
|
+
"Invalid namespace_validation_mode: #{mode}. Must be one of: #{NAMESPACE_VALIDATION_MODES.join(', ')}"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
@namespace_validation_mode = mode
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def entity_restoration_mode=(mode)
|
|
90
107
|
mode = mode.to_sym
|
|
91
|
-
unless
|
|
108
|
+
unless ENTITY_RESTORATION_MODES.include?(mode)
|
|
92
109
|
raise ArgumentError,
|
|
93
|
-
"Invalid
|
|
110
|
+
"Invalid entity_restoration_mode: #{mode}. Must be one of: #{ENTITY_RESTORATION_MODES.join(', ')}"
|
|
94
111
|
end
|
|
95
112
|
|
|
96
|
-
@
|
|
113
|
+
@entity_restoration_mode = mode
|
|
97
114
|
end
|
|
98
115
|
|
|
99
116
|
# Backward compatibility: convert old boolean to new symbol
|
data/lib/moxml/declaration.rb
CHANGED
|
@@ -35,9 +35,11 @@ module Moxml
|
|
|
35
35
|
|
|
36
36
|
def remove
|
|
37
37
|
# Mark document as having no declaration when declaration is removed
|
|
38
|
-
# Store
|
|
38
|
+
# Store in adapter's attachment map so all wrappers see it
|
|
39
39
|
native_doc = adapter.document(@native)
|
|
40
|
-
native_doc
|
|
40
|
+
if native_doc && adapter.respond_to?(:attachments)
|
|
41
|
+
adapter.attachments.set(native_doc, :has_declaration, false)
|
|
42
|
+
end
|
|
41
43
|
|
|
42
44
|
super
|
|
43
45
|
end
|
data/lib/moxml/document.rb
CHANGED
|
@@ -26,11 +26,13 @@ module Moxml
|
|
|
26
26
|
|
|
27
27
|
def root=(element)
|
|
28
28
|
adapter.set_root(@native, element.native)
|
|
29
|
+
element.parent_node = self
|
|
30
|
+
invalidate_children_cache!
|
|
29
31
|
end
|
|
30
32
|
|
|
31
33
|
def root
|
|
32
34
|
root_element = adapter.root(@native)
|
|
33
|
-
root_element ? Element.
|
|
35
|
+
root_element ? Element.new(root_element, context) : nil
|
|
34
36
|
end
|
|
35
37
|
|
|
36
38
|
def create_element(name)
|
|
@@ -90,7 +92,12 @@ module Moxml
|
|
|
90
92
|
raise Error, "Document already has a root element"
|
|
91
93
|
else
|
|
92
94
|
adapter.add_child(@native, node.native)
|
|
95
|
+
# Refresh native for adapters where identity changes (e.g., LibXML doc.root=)
|
|
96
|
+
refreshed = adapter.actual_native(node.native, @native)
|
|
97
|
+
node.refresh_native!(refreshed) if refreshed && refreshed != node.native
|
|
93
98
|
end
|
|
99
|
+
node.parent_node = self
|
|
100
|
+
invalidate_children_cache!
|
|
94
101
|
self
|
|
95
102
|
end
|
|
96
103
|
|
|
@@ -12,20 +12,19 @@ module Moxml
|
|
|
12
12
|
def build(native_doc)
|
|
13
13
|
@current_doc = context.create_document(native_doc)
|
|
14
14
|
|
|
15
|
-
# Transfer has_declaration flag if present
|
|
16
|
-
if
|
|
17
|
-
|
|
18
|
-
has_declaration =
|
|
15
|
+
# Transfer has_declaration flag if present in attachments
|
|
16
|
+
if adapter.respond_to?(:attachments) &&
|
|
17
|
+
adapter.attachments.key?(native_doc, :has_declaration)
|
|
18
|
+
has_declaration = adapter.attachments.get(native_doc, :has_declaration)
|
|
19
19
|
@current_doc.has_xml_declaration = has_declaration
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
-
# Transfer DOCTYPE from parsed document if it exists
|
|
23
|
-
if
|
|
24
|
-
|
|
25
|
-
doctype =
|
|
22
|
+
# Transfer DOCTYPE from parsed document if it exists in attachments
|
|
23
|
+
if adapter.respond_to?(:attachments) &&
|
|
24
|
+
adapter.attachments.key?(native_doc, :doctype)
|
|
25
|
+
doctype = adapter.attachments.get(native_doc, :doctype)
|
|
26
26
|
if doctype
|
|
27
|
-
@current_doc.native
|
|
28
|
-
doctype)
|
|
27
|
+
adapter.attachments.set(@current_doc.native, :doctype, doctype)
|
|
29
28
|
end
|
|
30
29
|
end
|
|
31
30
|
|
|
@@ -70,43 +69,54 @@ module Moxml
|
|
|
70
69
|
content = adapter.text_content(node)
|
|
71
70
|
|
|
72
71
|
# Check if we should restore entity references for this text
|
|
73
|
-
if context.config.restore_entities && content
|
|
72
|
+
if context.config.restore_entities && text_has_restorable_entities?(content)
|
|
74
73
|
restore_entities_in_text(content)
|
|
75
74
|
else
|
|
76
75
|
@node_stack.last&.add_child(Text.new(prepared, context))
|
|
77
76
|
end
|
|
78
77
|
end
|
|
79
78
|
|
|
79
|
+
def text_has_restorable_entities?(content)
|
|
80
|
+
return false unless content
|
|
81
|
+
|
|
82
|
+
registry = context.entity_registry
|
|
83
|
+
codepoints = registry.restorable_codepoints
|
|
84
|
+
content.each_char do |char|
|
|
85
|
+
return true if codepoints.include?(char.ord)
|
|
86
|
+
end
|
|
87
|
+
false
|
|
88
|
+
end
|
|
89
|
+
|
|
80
90
|
def restore_entities_in_text(content)
|
|
81
91
|
parent = @node_stack.last
|
|
82
92
|
return unless parent
|
|
83
93
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
"<" => "lt",
|
|
88
|
-
">" => "gt",
|
|
89
|
-
"&" => "amp",
|
|
90
|
-
'"' => "quot",
|
|
91
|
-
"'" => "apos",
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
# Process character by character
|
|
95
|
-
chars = content.to_s.chars
|
|
96
|
-
chars.each do |char|
|
|
97
|
-
codepoint = char.ord
|
|
98
|
-
entity_name = context.entity_registry.primary_name_for_codepoint(codepoint)
|
|
94
|
+
registry = context.entity_registry
|
|
95
|
+
config = context.config
|
|
96
|
+
buffer = +""
|
|
99
97
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
98
|
+
content.to_s.each_char do |char|
|
|
99
|
+
codepoint = char.ord
|
|
100
|
+
name = registry.primary_name_for_codepoint(codepoint)
|
|
101
|
+
|
|
102
|
+
if name && registry.should_restore?(codepoint, config: config)
|
|
103
|
+
# Flush buffered text before the entity
|
|
104
|
+
unless buffer.empty?
|
|
105
|
+
parent.add_child(Text.new(adapter.create_text(buffer), context))
|
|
106
|
+
buffer.clear
|
|
107
|
+
end
|
|
108
|
+
parent.add_child(
|
|
109
|
+
EntityReference.new(adapter.create_entity_reference(name), context),
|
|
110
|
+
)
|
|
104
111
|
else
|
|
105
|
-
|
|
106
|
-
text_node = adapter.create_text(char)
|
|
107
|
-
parent.add_child(Text.new(text_node, context))
|
|
112
|
+
buffer << char
|
|
108
113
|
end
|
|
109
114
|
end
|
|
115
|
+
|
|
116
|
+
# Flush remaining buffer
|
|
117
|
+
unless buffer.empty?
|
|
118
|
+
parent.add_child(Text.new(adapter.create_text(buffer), context))
|
|
119
|
+
end
|
|
110
120
|
end
|
|
111
121
|
|
|
112
122
|
def visit_cdata(node)
|
|
@@ -135,10 +145,7 @@ module Moxml
|
|
|
135
145
|
end
|
|
136
146
|
|
|
137
147
|
def visit_children(node)
|
|
138
|
-
|
|
139
|
-
node_children.each do |child|
|
|
140
|
-
visit_node(child)
|
|
141
|
-
end
|
|
148
|
+
children(node).each { |child| visit_node(child) }
|
|
142
149
|
end
|
|
143
150
|
|
|
144
151
|
def node_type(node)
|
data/lib/moxml/element.rb
CHANGED
|
@@ -42,6 +42,7 @@ module Moxml
|
|
|
42
42
|
|
|
43
43
|
def []=(name, value)
|
|
44
44
|
adapter.set_attribute(@native, name, normalize_xml_value(value))
|
|
45
|
+
@attributes = nil
|
|
45
46
|
end
|
|
46
47
|
|
|
47
48
|
def [](name)
|
|
@@ -64,19 +65,23 @@ module Moxml
|
|
|
64
65
|
end
|
|
65
66
|
|
|
66
67
|
def attributes
|
|
67
|
-
adapter.attributes(@native).map do |attr|
|
|
68
|
-
Attribute.new(attr, context)
|
|
68
|
+
@attributes ||= adapter.attributes(@native).map do |attr|
|
|
69
|
+
a = Attribute.new(attr, context)
|
|
70
|
+
a.parent_node = self
|
|
71
|
+
a
|
|
69
72
|
end
|
|
70
73
|
end
|
|
71
74
|
|
|
72
75
|
def remove_attribute(name)
|
|
73
76
|
adapter.remove_attribute(@native, name)
|
|
77
|
+
@attributes = nil
|
|
74
78
|
self
|
|
75
79
|
end
|
|
76
80
|
|
|
77
81
|
def add_namespace(prefix, uri)
|
|
78
82
|
adapter.create_namespace(@native, prefix, uri,
|
|
79
|
-
|
|
83
|
+
namespace_validation_mode: context.config.namespace_validation_mode)
|
|
84
|
+
@namespaces = nil
|
|
80
85
|
self
|
|
81
86
|
rescue ValidationError => e
|
|
82
87
|
# Re-raise as NamespaceError, provide attributes for error context
|
|
@@ -103,15 +108,16 @@ module Moxml
|
|
|
103
108
|
adapter.set_namespace(
|
|
104
109
|
@native,
|
|
105
110
|
adapter.create_namespace(@native, *ns_or_hash.to_a.first,
|
|
106
|
-
|
|
111
|
+
namespace_validation_mode: context.config.namespace_validation_mode),
|
|
107
112
|
)
|
|
108
113
|
else
|
|
109
114
|
adapter.set_namespace(@native, ns_or_hash&.native)
|
|
110
115
|
end
|
|
116
|
+
@namespaces = nil
|
|
111
117
|
end
|
|
112
118
|
|
|
113
119
|
def namespaces
|
|
114
|
-
adapter.namespace_definitions(@native).map do |ns|
|
|
120
|
+
@namespaces ||= adapter.namespace_definitions(@native).map do |ns|
|
|
115
121
|
Namespace.new(ns, context)
|
|
116
122
|
end
|
|
117
123
|
end
|
|
@@ -136,6 +142,7 @@ module Moxml
|
|
|
136
142
|
|
|
137
143
|
def text=(content)
|
|
138
144
|
adapter.set_text_content(@native, normalize_xml_value(content))
|
|
145
|
+
invalidate_children_cache!
|
|
139
146
|
end
|
|
140
147
|
|
|
141
148
|
def inner_text
|
|
@@ -149,6 +156,7 @@ module Moxml
|
|
|
149
156
|
def inner_xml=(xml)
|
|
150
157
|
doc = context.parse("<root>#{xml}</root>")
|
|
151
158
|
adapter.replace_children(@native, doc.root.children.map(&:native))
|
|
159
|
+
invalidate_children_cache!
|
|
152
160
|
end
|
|
153
161
|
|
|
154
162
|
# Fluent interface methods
|
|
@@ -192,5 +200,10 @@ module Moxml
|
|
|
192
200
|
def nodes
|
|
193
201
|
children
|
|
194
202
|
end
|
|
203
|
+
|
|
204
|
+
# Called by Attribute#remove to invalidate the cached attributes
|
|
205
|
+
def invalidate_attribute_cache!
|
|
206
|
+
@attributes = nil
|
|
207
|
+
end
|
|
195
208
|
end
|
|
196
209
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "set"
|
|
4
5
|
|
|
5
6
|
module Moxml
|
|
6
7
|
# EntityRegistry maintains a knowledge base of XML entity definitions.
|
|
@@ -26,6 +27,9 @@ module Moxml
|
|
|
26
27
|
# W3C entity data file name
|
|
27
28
|
ENTITY_DATA_FILE = "w3c_entities.json"
|
|
28
29
|
|
|
30
|
+
# Standard XML predefined entities (XML spec §4.6)
|
|
31
|
+
STANDARD_CODEPOINTS = Set[0x26, 0x3C, 0x3E, 0x22, 0x27].freeze
|
|
32
|
+
|
|
29
33
|
class << self
|
|
30
34
|
# Get the raw entity data from the bundled JSON source
|
|
31
35
|
# @return [Hash{String => String}] entity name to character mapping
|
|
@@ -150,7 +154,53 @@ module Moxml
|
|
|
150
154
|
# @param codepoint [Integer] Unicode codepoint
|
|
151
155
|
# @return [String, nil] primary entity name or nil
|
|
152
156
|
def primary_name_for_codepoint(codepoint)
|
|
153
|
-
@by_codepoint[codepoint]
|
|
157
|
+
names = @by_codepoint[codepoint]
|
|
158
|
+
return nil unless names&.any?
|
|
159
|
+
# Prefer lowercase names (e.g., "amp" over "AMP") for XML compatibility
|
|
160
|
+
names.find { |n| n == n.downcase } || names.first
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Check if a codepoint is one of the 5 standard XML predefined entities
|
|
164
|
+
# @param codepoint [Integer] Unicode codepoint
|
|
165
|
+
# @return [Boolean]
|
|
166
|
+
def standard_entity?(codepoint)
|
|
167
|
+
STANDARD_CODEPOINTS.include?(codepoint)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Determine if an entity reference should be restored for a codepoint.
|
|
171
|
+
# Standard XML entities are always restored (required by XML spec).
|
|
172
|
+
# Non-standard entities are only restored when restore_entities is enabled.
|
|
173
|
+
# @param codepoint [Integer] Unicode codepoint
|
|
174
|
+
# @param config [Moxml::Config] configuration object
|
|
175
|
+
# @return [Boolean]
|
|
176
|
+
def should_restore?(codepoint, config:)
|
|
177
|
+
name = primary_name_for_codepoint(codepoint)
|
|
178
|
+
return false unless name
|
|
179
|
+
return true if standard_entity?(codepoint)
|
|
180
|
+
|
|
181
|
+
return false unless config.restore_entities
|
|
182
|
+
|
|
183
|
+
case config.entity_restoration_mode
|
|
184
|
+
when :lenient
|
|
185
|
+
# Any known entity from the registry
|
|
186
|
+
true
|
|
187
|
+
when :strict
|
|
188
|
+
# Only DTD-declared entities (falls back to lenient until DTD parsing is implemented)
|
|
189
|
+
true
|
|
190
|
+
else
|
|
191
|
+
false
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Returns the set of codepoints that could potentially be restored as entities.
|
|
196
|
+
# Used by DocumentBuilder for O(1) fast-path checks.
|
|
197
|
+
# @return [Set<Integer>]
|
|
198
|
+
def restorable_codepoints
|
|
199
|
+
@restorable_codepoints ||= if @by_name.empty?
|
|
200
|
+
STANDARD_CODEPOINTS
|
|
201
|
+
else
|
|
202
|
+
Set.new(@by_name.values).freeze
|
|
203
|
+
end
|
|
154
204
|
end
|
|
155
205
|
|
|
156
206
|
# Register additional entities
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
# Stores Moxml-specific state associated with native adapter objects
|
|
5
|
+
# without polluting their internals.
|
|
6
|
+
#
|
|
7
|
+
# Uses object_id as key with GC finalizer cleanup to prevent memory leaks.
|
|
8
|
+
# Thread-safe via Monitor (reentrant-safe).
|
|
9
|
+
#
|
|
10
|
+
# Replaces the anti-pattern of using instance_variable_set/get on
|
|
11
|
+
# foreign library objects (Nokogiri, REXML, Oga, Ox, LibXML nodes).
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# attachments = NativeAttachment.new
|
|
15
|
+
# attachments.set(native_element, :entity_refs, [])
|
|
16
|
+
# refs = attachments.get(native_element, :entity_refs)
|
|
17
|
+
# attachments.key?(native_element, :doctype) #=> false
|
|
18
|
+
class NativeAttachment
|
|
19
|
+
def initialize
|
|
20
|
+
@data = {}
|
|
21
|
+
@finalizer_registered = {}
|
|
22
|
+
@monitor = Monitor.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def get(native, key)
|
|
26
|
+
@monitor.synchronize { @data[native.object_id]&.[](key) }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def set(native, key, value)
|
|
30
|
+
id = native.object_id
|
|
31
|
+
@monitor.synchronize do
|
|
32
|
+
@data[id] ||= {}
|
|
33
|
+
@data[id][key] = value
|
|
34
|
+
register_finalizer(native, id) unless @finalizer_registered[id]
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def key?(native, key)
|
|
39
|
+
@monitor.synchronize { @data[native.object_id]&.key?(key) || false }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def delete(native, key)
|
|
43
|
+
@monitor.synchronize { @data[native.object_id]&.delete(key) }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def register_finalizer(native, id)
|
|
49
|
+
@finalizer_registered[id] = true
|
|
50
|
+
ObjectSpace.define_finalizer(native, finalizer_for(id))
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def finalizer_for(id)
|
|
54
|
+
data = @data
|
|
55
|
+
registered = @finalizer_registered
|
|
56
|
+
# Finalizers must NOT use Mutex/Monitor (can't be called from trap context).
|
|
57
|
+
# Direct Hash operations are safe here since finalizers run sequentially
|
|
58
|
+
# and the GC'd object's id won't be accessed by any other thread.
|
|
59
|
+
proc do
|
|
60
|
+
data.delete(id)
|
|
61
|
+
registered.delete(id)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|