moxml 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +117 -66
- data/Gemfile +1 -0
- data/README.adoc +11 -9
- data/Rakefile +34 -1
- data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
- data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
- data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
- data/TODO.remaining/4-lenient-entities-mode.md +106 -0
- data/TODO.remaining/5-fixture-integrity.md +65 -0
- data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
- data/TODO.remaining/7-headed-ox-limitations.md +95 -0
- data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
- data/TODO.remaining/9-cleanup-hygiene.md +42 -0
- data/TODO.remaining/README.md +54 -0
- data/benchmarks/generate_report.rb +1 -1
- data/docs/_pages/configuration.adoc +22 -19
- data/docs/_tutorials/namespace-handling.adoc +5 -5
- data/lib/moxml/adapter/base.rb +22 -3
- data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
- data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
- data/lib/moxml/adapter/customized_libxml.rb +18 -0
- data/lib/moxml/adapter/customized_oga.rb +10 -0
- data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
- data/lib/moxml/adapter/customized_ox.rb +12 -0
- data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +44 -20
- data/lib/moxml/adapter/customized_rexml.rb +11 -0
- data/lib/moxml/adapter/headed_ox.rb +37 -14
- data/lib/moxml/adapter/libxml.rb +233 -119
- data/lib/moxml/adapter/nokogiri.rb +22 -11
- data/lib/moxml/adapter/oga.rb +64 -25
- data/lib/moxml/adapter/ox.rb +198 -42
- data/lib/moxml/adapter/rexml.rb +64 -13
- data/lib/moxml/attribute.rb +3 -0
- data/lib/moxml/builder.rb +78 -24
- data/lib/moxml/config.rb +24 -7
- data/lib/moxml/declaration.rb +4 -2
- data/lib/moxml/document.rb +8 -1
- data/lib/moxml/document_builder.rb +44 -37
- data/lib/moxml/element.rb +18 -5
- data/lib/moxml/entity_registry.rb +51 -1
- data/lib/moxml/native_attachment.rb +65 -0
- data/lib/moxml/node.rb +39 -50
- data/lib/moxml/node_set.rb +43 -15
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +4 -1
- data/lib/moxml.rb +1 -0
- data/scripts/format_xml.rb +16 -0
- data/scripts/pretty_format_xml.rb +14 -0
- data/spec/consistency/round_trip_spec.rb +3 -30
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/headed_ox_integration_spec.rb +0 -2
- data/spec/integration/shared_examples/edge_cases.rb +7 -4
- data/spec/integration/shared_examples/integration_workflows.rb +3 -3
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
- data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
- data/spec/moxml/adapter/oga_spec.rb +46 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
- data/spec/moxml/allocation_benchmark_spec.rb +96 -0
- data/spec/moxml/allocation_guard_spec.rb +282 -0
- data/spec/moxml/builder_spec.rb +256 -0
- data/spec/moxml/config_spec.rb +11 -11
- data/spec/moxml/doctype_spec.rb +41 -0
- data/spec/moxml/lazy_parse_spec.rb +115 -0
- data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
- data/spec/moxml/node_cache_spec.rb +110 -0
- data/spec/moxml/node_set_cache_spec.rb +90 -0
- data/spec/moxml/xml_utils_spec.rb +32 -0
- data/spec/moxml/xpath/axes_spec.rb +1 -1
- data/spec/moxml/xpath/compiler_spec.rb +2 -2
- data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
- data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
- data/spec/performance/memory_usage_spec.rb +0 -4
- data/spec/support/allocation_helper.rb +165 -0
- data/spec/support/w3c_namespace_helpers.rb +2 -1
- metadata +29 -2
data/lib/moxml/adapter/libxml.rb
CHANGED
|
@@ -2,13 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
4
|
require "libxml"
|
|
5
|
-
require_relative "customized_libxml
|
|
6
|
-
require_relative "customized_libxml/element"
|
|
7
|
-
require_relative "customized_libxml/text"
|
|
8
|
-
require_relative "customized_libxml/comment"
|
|
9
|
-
require_relative "customized_libxml/cdata"
|
|
10
|
-
require_relative "customized_libxml/processing_instruction"
|
|
11
|
-
require_relative "customized_libxml/declaration"
|
|
5
|
+
require_relative "customized_libxml"
|
|
12
6
|
|
|
13
7
|
module Moxml
|
|
14
8
|
module Adapter
|
|
@@ -44,6 +38,10 @@ module Moxml
|
|
|
44
38
|
end
|
|
45
39
|
|
|
46
40
|
class << self
|
|
41
|
+
def attachments
|
|
42
|
+
@attachments ||= Moxml::NativeAttachment.new
|
|
43
|
+
end
|
|
44
|
+
|
|
47
45
|
def set_root(doc, element)
|
|
48
46
|
doc.root = element
|
|
49
47
|
end
|
|
@@ -52,7 +50,7 @@ module Moxml
|
|
|
52
50
|
# LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
|
|
53
51
|
xml_string = if xml.is_a?(String)
|
|
54
52
|
xml
|
|
55
|
-
elsif xml.
|
|
53
|
+
elsif xml.is_a?(IO) || xml.is_a?(StringIO)
|
|
56
54
|
xml.read
|
|
57
55
|
else
|
|
58
56
|
xml.to_s
|
|
@@ -67,7 +65,7 @@ module Moxml
|
|
|
67
65
|
parser.parse
|
|
68
66
|
rescue ::LibXML::XML::Error => e
|
|
69
67
|
if options[:strict]
|
|
70
|
-
line = e.
|
|
68
|
+
line = e.line
|
|
71
69
|
raise Moxml::ParseError.new(
|
|
72
70
|
e.message,
|
|
73
71
|
line: line,
|
|
@@ -91,7 +89,7 @@ module Moxml
|
|
|
91
89
|
external_id,
|
|
92
90
|
system_id,
|
|
93
91
|
)
|
|
94
|
-
|
|
92
|
+
attachments.set(native_doc, :doctype, doctype_wrapper)
|
|
95
93
|
end
|
|
96
94
|
|
|
97
95
|
ctx = _context || Context.new(:libxml)
|
|
@@ -116,8 +114,12 @@ module Moxml
|
|
|
116
114
|
# Parse
|
|
117
115
|
parser.parse
|
|
118
116
|
rescue ::LibXML::XML::Error => e
|
|
119
|
-
line = e.
|
|
120
|
-
column =
|
|
117
|
+
line = e.line
|
|
118
|
+
column = begin
|
|
119
|
+
e.column
|
|
120
|
+
rescue StandardError
|
|
121
|
+
nil
|
|
122
|
+
end
|
|
121
123
|
error = Moxml::ParseError.new(e.message, line: line, column: column)
|
|
122
124
|
handler.on_error(error)
|
|
123
125
|
end
|
|
@@ -135,6 +137,14 @@ module Moxml
|
|
|
135
137
|
CustomizedLibxml::Text.new(native)
|
|
136
138
|
end
|
|
137
139
|
|
|
140
|
+
def create_native_entity_reference(name)
|
|
141
|
+
CustomizedLibxml::EntityReference.new(name)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def entity_reference_name(node)
|
|
145
|
+
node.name if node.is_a?(CustomizedLibxml::EntityReference)
|
|
146
|
+
end
|
|
147
|
+
|
|
138
148
|
def create_native_cdata(content, _owner_doc = nil)
|
|
139
149
|
native = ::LibXML::XML::Node.new_cdata(content.to_s)
|
|
140
150
|
CustomizedLibxml::Cdata.new(native)
|
|
@@ -174,10 +184,11 @@ module Moxml
|
|
|
174
184
|
if node.is_a?(CustomizedLibxml::ProcessingInstruction)
|
|
175
185
|
return :processing_instruction
|
|
176
186
|
end
|
|
187
|
+
return :entity_reference if node.is_a?(CustomizedLibxml::EntityReference)
|
|
177
188
|
return :doctype if node.is_a?(DoctypeWrapper)
|
|
178
189
|
|
|
179
190
|
# Unwrap if needed
|
|
180
|
-
native_node = node
|
|
191
|
+
native_node = unpatch_node(node)
|
|
181
192
|
|
|
182
193
|
case native_node.node_type
|
|
183
194
|
when ::LibXML::XML::Node::DOCUMENT_NODE
|
|
@@ -266,22 +277,8 @@ module Moxml
|
|
|
266
277
|
result = []
|
|
267
278
|
|
|
268
279
|
# Include DOCTYPE if present
|
|
269
|
-
|
|
270
|
-
if
|
|
271
|
-
doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
|
|
272
|
-
result << doctype_wrapper if doctype_wrapper
|
|
273
|
-
elsif native_node.respond_to?(:dtd) && native_node.dtd
|
|
274
|
-
# Otherwise check dtd property directly
|
|
275
|
-
dtd = native_node.dtd
|
|
276
|
-
# Wrap in DoctypeWrapper for consistency
|
|
277
|
-
doctype_wrapper = DoctypeWrapper.new(
|
|
278
|
-
native_node,
|
|
279
|
-
dtd.name,
|
|
280
|
-
dtd.external_id,
|
|
281
|
-
dtd.system_id,
|
|
282
|
-
)
|
|
283
|
-
result << doctype_wrapper
|
|
284
|
-
end
|
|
280
|
+
doctype_wrapper = attachments.get(native_node, :doctype)
|
|
281
|
+
result << doctype_wrapper if doctype_wrapper
|
|
285
282
|
|
|
286
283
|
return result unless native_node.root
|
|
287
284
|
|
|
@@ -289,15 +286,21 @@ module Moxml
|
|
|
289
286
|
return result
|
|
290
287
|
end
|
|
291
288
|
|
|
292
|
-
return [] unless native_node.children?
|
|
293
|
-
|
|
294
289
|
result = []
|
|
295
|
-
native_node.
|
|
296
|
-
|
|
297
|
-
|
|
290
|
+
if native_node.children?
|
|
291
|
+
native_node.each_child do |child|
|
|
292
|
+
# Skip whitespace-only text nodes
|
|
293
|
+
next if child.text? && child.content.to_s.strip.empty?
|
|
298
294
|
|
|
299
|
-
|
|
295
|
+
result << patch_node(child)
|
|
296
|
+
end
|
|
300
297
|
end
|
|
298
|
+
|
|
299
|
+
# Include any EntityReference wrappers stored on the document
|
|
300
|
+
doc = native_node.doc
|
|
301
|
+
entity_refs = doc ? lookup_entity_refs(doc, native_node) : nil
|
|
302
|
+
result.concat(entity_refs) if entity_refs
|
|
303
|
+
|
|
301
304
|
result
|
|
302
305
|
end
|
|
303
306
|
|
|
@@ -350,7 +353,7 @@ module Moxml
|
|
|
350
353
|
def attributes(element)
|
|
351
354
|
native_elem = unpatch_node(element)
|
|
352
355
|
return [] unless native_elem
|
|
353
|
-
unless native_elem.
|
|
356
|
+
unless native_elem.is_a?(::LibXML::XML::Node) && native_elem.element?
|
|
354
357
|
return []
|
|
355
358
|
end
|
|
356
359
|
return [] unless native_elem.attributes?
|
|
@@ -368,7 +371,7 @@ module Moxml
|
|
|
368
371
|
|
|
369
372
|
def attribute_namespace(attr)
|
|
370
373
|
return nil unless attr
|
|
371
|
-
return nil unless attr.
|
|
374
|
+
return nil unless attr.is_a?(::LibXML::XML::Attr)
|
|
372
375
|
|
|
373
376
|
attr.ns
|
|
374
377
|
end
|
|
@@ -498,11 +501,22 @@ module Moxml
|
|
|
498
501
|
native_elem = unpatch_node(element)
|
|
499
502
|
native_child = unpatch_node(child)
|
|
500
503
|
|
|
504
|
+
# EntityReference wrappers can't go in LibXML's native tree.
|
|
505
|
+
# Store on the document (stable identity) keyed by element.
|
|
506
|
+
# LibXML creates new Ruby wrappers on each access, so element
|
|
507
|
+
# object_id is unstable — we look up via == comparison.
|
|
508
|
+
if child.is_a?(CustomizedLibxml::EntityReference)
|
|
509
|
+
doc = native_elem.is_a?(::LibXML::XML::Document) ? native_elem : native_elem.doc
|
|
510
|
+
store_entity_ref_on_doc(doc, native_elem, child)
|
|
511
|
+
append_child_sequence_on_doc(doc, native_elem, :eref)
|
|
512
|
+
return
|
|
513
|
+
end
|
|
514
|
+
|
|
501
515
|
# For LibXML: if parent has a DEFAULT namespace (nil/empty prefix) and child is an element without a namespace,
|
|
502
516
|
# explicitly set the child's namespace to match the parent's for XPath compatibility
|
|
503
517
|
# NOTE: Prefixed namespaces are NOT inherited, only default namespaces
|
|
504
|
-
if native_elem.
|
|
505
|
-
native_child.
|
|
518
|
+
if native_elem.is_a?(::LibXML::XML::Node) && native_elem.namespaces&.namespace &&
|
|
519
|
+
native_child.is_a?(::LibXML::XML::Node) && native_child.element? &&
|
|
506
520
|
(!native_child.namespaces.namespace || native_child.namespaces.namespace.href.to_s.empty?)
|
|
507
521
|
|
|
508
522
|
parent_ns = native_elem.namespaces.namespace
|
|
@@ -515,32 +529,32 @@ module Moxml
|
|
|
515
529
|
if native_elem.is_a?(::LibXML::XML::Document)
|
|
516
530
|
# For Declaration wrappers, store them for serialization
|
|
517
531
|
if child.is_a?(CustomizedLibxml::Declaration)
|
|
518
|
-
|
|
532
|
+
attachments.set(native_elem, :declaration, child)
|
|
519
533
|
# Also store reference to parent document in the declaration
|
|
520
|
-
child.
|
|
534
|
+
child.parent_doc = native_elem
|
|
521
535
|
return
|
|
522
536
|
end
|
|
523
537
|
|
|
524
538
|
# For DOCTYPE wrappers, store them for serialization
|
|
525
539
|
if child.is_a?(DoctypeWrapper)
|
|
526
|
-
|
|
540
|
+
attachments.set(native_elem, :doctype, child)
|
|
527
541
|
return
|
|
528
542
|
end
|
|
529
543
|
|
|
530
544
|
# For document-level PIs, store them for serialization
|
|
531
545
|
if child.is_a?(CustomizedLibxml::ProcessingInstruction)
|
|
532
|
-
pis =
|
|
546
|
+
pis = attachments.get(native_elem, :pis) || []
|
|
533
547
|
pis << child
|
|
534
|
-
|
|
548
|
+
attachments.set(native_elem, :pis, pis)
|
|
535
549
|
return
|
|
536
550
|
end
|
|
537
551
|
|
|
538
552
|
# For text nodes added to document, store them for serialization
|
|
539
553
|
# Documents can't have text children in LibXML
|
|
540
554
|
if child.is_a?(CustomizedLibxml::Text)
|
|
541
|
-
texts =
|
|
555
|
+
texts = attachments.get(native_elem, :texts) || []
|
|
542
556
|
texts << child
|
|
543
|
-
|
|
557
|
+
attachments.set(native_elem, :texts, texts)
|
|
544
558
|
return
|
|
545
559
|
end
|
|
546
560
|
|
|
@@ -548,15 +562,66 @@ module Moxml
|
|
|
548
562
|
if native_elem.root.nil? && node_type(native_child) == :element
|
|
549
563
|
# Set as root element
|
|
550
564
|
native_elem.root = native_child
|
|
565
|
+
# Flag for actual_native to refresh the wrapper's native reference
|
|
566
|
+
attachments.set(native_elem, :_pending_root_refresh, native_child.object_id)
|
|
551
567
|
elsif native_elem.root
|
|
552
568
|
# Document has root, add to it instead
|
|
553
569
|
import_and_add(native_elem.doc, native_elem.root, native_child)
|
|
554
570
|
end
|
|
555
571
|
else
|
|
556
572
|
import_and_add(native_elem.doc, native_elem, native_child)
|
|
573
|
+
doc = native_elem.doc || native_elem
|
|
574
|
+
append_child_sequence_on_doc(doc, native_elem, :native)
|
|
557
575
|
end
|
|
558
576
|
end
|
|
559
577
|
|
|
578
|
+
# Store entity ref on the document (stable identity).
|
|
579
|
+
# LibXML element wrappers are ephemeral, so we use == to find matching elements.
|
|
580
|
+
def store_entity_ref_on_doc(doc, element, ref)
|
|
581
|
+
pairs = attachments.get(doc, :_entity_ref_pairs) || []
|
|
582
|
+
pair = pairs.find { |elem, _| elem == element }
|
|
583
|
+
if pair
|
|
584
|
+
pair[1] << ref
|
|
585
|
+
else
|
|
586
|
+
pairs << [element, [ref]]
|
|
587
|
+
end
|
|
588
|
+
attachments.set(doc, :_entity_ref_pairs, pairs)
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
# Look up entity refs for an element from the document
|
|
592
|
+
def lookup_entity_refs(doc, element)
|
|
593
|
+
pairs = attachments.get(doc, :_entity_ref_pairs)
|
|
594
|
+
return nil unless pairs
|
|
595
|
+
pair = pairs.find { |elem, _| elem == element }
|
|
596
|
+
pair&.last
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
# Track child order on the document (stable identity)
|
|
600
|
+
def append_child_sequence_on_doc(doc, element, type)
|
|
601
|
+
pairs = attachments.get(doc, :_child_seq_pairs) || []
|
|
602
|
+
pair = pairs.find { |elem, _| elem == element }
|
|
603
|
+
if pair
|
|
604
|
+
pair[1] << type
|
|
605
|
+
else
|
|
606
|
+
pairs << [element, [type]]
|
|
607
|
+
end
|
|
608
|
+
attachments.set(doc, :_child_seq_pairs, pairs)
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
# Look up child sequence for an element from the document
|
|
612
|
+
def lookup_child_sequence(doc, element)
|
|
613
|
+
pairs = attachments.get(doc, :_child_seq_pairs)
|
|
614
|
+
return nil unless pairs
|
|
615
|
+
pair = pairs.find { |elem, _| elem == element }
|
|
616
|
+
pair&.last
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
def append_child_sequence(element, type)
|
|
620
|
+
seq = attachments.get(element, :child_sequence) || []
|
|
621
|
+
seq << type
|
|
622
|
+
attachments.set(element, :child_sequence, seq)
|
|
623
|
+
end
|
|
624
|
+
|
|
560
625
|
def add_previous_sibling(node, sibling)
|
|
561
626
|
return unless node && sibling
|
|
562
627
|
|
|
@@ -566,11 +631,11 @@ module Moxml
|
|
|
566
631
|
# Special handling for document-level processing instructions
|
|
567
632
|
# When adding a PI as sibling to root element, store it on document
|
|
568
633
|
if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
|
|
569
|
-
native_node.
|
|
634
|
+
native_node.is_a?(::LibXML::XML::Node) && native_node.doc
|
|
570
635
|
doc = native_node.doc
|
|
571
|
-
pis =
|
|
636
|
+
pis = attachments.get(doc, :pis) || []
|
|
572
637
|
pis << sibling
|
|
573
|
-
|
|
638
|
+
attachments.set(doc, :pis, pis)
|
|
574
639
|
return
|
|
575
640
|
end
|
|
576
641
|
|
|
@@ -588,16 +653,7 @@ module Moxml
|
|
|
588
653
|
def remove(node)
|
|
589
654
|
# Handle Declaration wrapper - mark as removed on document
|
|
590
655
|
if node.is_a?(CustomizedLibxml::Declaration)
|
|
591
|
-
|
|
592
|
-
# We need to find which document it's stored on and mark it as removed
|
|
593
|
-
# This is a bit tricky since the Declaration's native is its own internal doc
|
|
594
|
-
# We rely on the fact that when a declaration is added to a document,
|
|
595
|
-
# the document stores a reference to it in @moxml_declaration
|
|
596
|
-
# So we need to clear that reference and mark it as removed
|
|
597
|
-
|
|
598
|
-
# Since we can't easily find the parent document from the Declaration,
|
|
599
|
-
# we'll set a flag on the Declaration itself
|
|
600
|
-
node.instance_variable_set(:@removed, true)
|
|
656
|
+
node.removed = true
|
|
601
657
|
return
|
|
602
658
|
end
|
|
603
659
|
|
|
@@ -624,7 +680,7 @@ module Moxml
|
|
|
624
680
|
next_sibling = native_node.next
|
|
625
681
|
|
|
626
682
|
# Import if needed for cross-document operations
|
|
627
|
-
parent_doc = parent.
|
|
683
|
+
parent_doc = parent.is_a?(::LibXML::XML::Node) ? parent.doc : nil
|
|
628
684
|
|
|
629
685
|
# Use import_and_add to properly handle document adoption
|
|
630
686
|
import_and_add(parent_doc, parent, native_new)
|
|
@@ -651,7 +707,7 @@ module Moxml
|
|
|
651
707
|
native_elem.each_child(&:remove!)
|
|
652
708
|
|
|
653
709
|
# Get the element's document for importing
|
|
654
|
-
doc = native_elem.
|
|
710
|
+
doc = native_elem.is_a?(::LibXML::XML::Node) ? native_elem.doc : nil
|
|
655
711
|
|
|
656
712
|
children.each do |c|
|
|
657
713
|
native_c = unpatch_node(c)
|
|
@@ -662,6 +718,8 @@ module Moxml
|
|
|
662
718
|
end
|
|
663
719
|
|
|
664
720
|
def text_content(node)
|
|
721
|
+
return "" if node.is_a?(CustomizedLibxml::EntityReference)
|
|
722
|
+
|
|
665
723
|
native_node = unpatch_node(node)
|
|
666
724
|
return nil unless native_node
|
|
667
725
|
|
|
@@ -782,7 +840,7 @@ module Moxml
|
|
|
782
840
|
def namespace_definitions(node)
|
|
783
841
|
native_node = unpatch_node(node)
|
|
784
842
|
return [] unless native_node
|
|
785
|
-
return [] unless native_node.
|
|
843
|
+
return [] unless native_node.is_a?(::LibXML::XML::Node)
|
|
786
844
|
|
|
787
845
|
native_node.namespaces.map do |ns|
|
|
788
846
|
ns
|
|
@@ -835,15 +893,8 @@ module Moxml
|
|
|
835
893
|
|
|
836
894
|
def serialize(node, options = {})
|
|
837
895
|
# FIRST: Check if node is any kind of wrapper with custom to_xml
|
|
838
|
-
if node.
|
|
839
|
-
|
|
840
|
-
return node.to_xml if node.is_a?(CustomizedLibxml::Declaration)
|
|
841
|
-
|
|
842
|
-
# Other wrappers - check they're not native LibXML nodes
|
|
843
|
-
unless node.is_a?(::LibXML::XML::Node) ||
|
|
844
|
-
node.is_a?(::LibXML::XML::Document)
|
|
845
|
-
return node.to_xml
|
|
846
|
-
end
|
|
896
|
+
if node.is_a?(CustomizedLibxml::Node) || node.is_a?(DoctypeWrapper)
|
|
897
|
+
return node.to_xml
|
|
847
898
|
end
|
|
848
899
|
|
|
849
900
|
native_node = unpatch_node(node)
|
|
@@ -863,12 +914,10 @@ module Moxml
|
|
|
863
914
|
|
|
864
915
|
if should_include_decl
|
|
865
916
|
# Check if declaration was explicitly managed
|
|
866
|
-
|
|
867
|
-
|
|
917
|
+
decl = attachments.get(native_node, :declaration)
|
|
918
|
+
if decl
|
|
868
919
|
# Only output declaration if it exists and wasn't removed
|
|
869
|
-
|
|
870
|
-
output << decl.to_xml
|
|
871
|
-
end
|
|
920
|
+
output << decl.to_xml unless decl.removed
|
|
872
921
|
else
|
|
873
922
|
# No declaration stored - create default
|
|
874
923
|
version = native_node.version || "1.0"
|
|
@@ -883,39 +932,33 @@ module Moxml
|
|
|
883
932
|
encoding_val,
|
|
884
933
|
nil, # No standalone by default
|
|
885
934
|
)
|
|
886
|
-
|
|
935
|
+
attachments.set(native_node, :declaration, decl)
|
|
887
936
|
output << decl.to_xml
|
|
888
937
|
end
|
|
889
938
|
end
|
|
890
939
|
|
|
891
940
|
# Add DOCTYPE if stored on document
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
output << doctype_wrapper.to_xml
|
|
897
|
-
end
|
|
941
|
+
doctype_wrapper = attachments.get(native_node, :doctype)
|
|
942
|
+
if doctype_wrapper
|
|
943
|
+
output << "\n" unless output.empty?
|
|
944
|
+
output << doctype_wrapper.to_xml
|
|
898
945
|
end
|
|
899
946
|
|
|
900
947
|
# Add document-level processing instructions if stored
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
output << pi.to_xml
|
|
907
|
-
end
|
|
948
|
+
pis = attachments.get(native_node, :pis)
|
|
949
|
+
if pis && !pis.empty?
|
|
950
|
+
pis.each do |pi|
|
|
951
|
+
output << "\n" unless output.empty?
|
|
952
|
+
output << pi.to_xml
|
|
908
953
|
end
|
|
909
954
|
end
|
|
910
955
|
|
|
911
956
|
# Add text nodes if stored (for documents without root)
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
output << text.to_xml
|
|
918
|
-
end
|
|
957
|
+
texts = attachments.get(native_node, :texts)
|
|
958
|
+
if texts && !texts.empty?
|
|
959
|
+
texts.each do |text|
|
|
960
|
+
output << "\n" unless output.empty?
|
|
961
|
+
output << text.to_xml
|
|
919
962
|
end
|
|
920
963
|
end
|
|
921
964
|
|
|
@@ -1033,7 +1076,7 @@ module Moxml
|
|
|
1033
1076
|
return nil unless node
|
|
1034
1077
|
|
|
1035
1078
|
# Unwrap if wrapped
|
|
1036
|
-
native_node = node
|
|
1079
|
+
native_node = unpatch_node(node)
|
|
1037
1080
|
|
|
1038
1081
|
# LibXML is strict about document ownership
|
|
1039
1082
|
# Create brand new NATIVE nodes that are document-independent
|
|
@@ -1057,7 +1100,7 @@ module Moxml
|
|
|
1057
1100
|
# new_node.line = node.line
|
|
1058
1101
|
|
|
1059
1102
|
# Copy and set namespace definitions FIRST
|
|
1060
|
-
if native_node.
|
|
1103
|
+
if native_node.is_a?(::LibXML::XML::Node)
|
|
1061
1104
|
# First, copy all namespace definitions
|
|
1062
1105
|
native_node.namespaces.each do |ns|
|
|
1063
1106
|
::LibXML::XML::Namespace.new(
|
|
@@ -1144,7 +1187,12 @@ module Moxml
|
|
|
1144
1187
|
|
|
1145
1188
|
def unpatch_node(node)
|
|
1146
1189
|
# Unwrap to get native LibXML node
|
|
1147
|
-
|
|
1190
|
+
case node
|
|
1191
|
+
when CustomizedLibxml::Node, CustomizedLibxml::Declaration, DoctypeWrapper
|
|
1192
|
+
node.native
|
|
1193
|
+
else
|
|
1194
|
+
node
|
|
1195
|
+
end
|
|
1148
1196
|
end
|
|
1149
1197
|
|
|
1150
1198
|
def prepare_for_new_document(node, target_doc)
|
|
@@ -1156,13 +1204,35 @@ module Moxml
|
|
|
1156
1204
|
duplicate_node(node)
|
|
1157
1205
|
end
|
|
1158
1206
|
|
|
1207
|
+
def has_declaration?(native_doc, wrapper)
|
|
1208
|
+
decl = attachments.get(native_doc, :declaration)
|
|
1209
|
+
if decl
|
|
1210
|
+
!decl.removed
|
|
1211
|
+
else
|
|
1212
|
+
wrapper.has_xml_declaration
|
|
1213
|
+
end
|
|
1214
|
+
end
|
|
1215
|
+
|
|
1216
|
+
# LibXML's doc.root= creates a new Ruby wrapper with different object_id.
|
|
1217
|
+
# Return the actual root node so attachments are stored on the correct object.
|
|
1218
|
+
def actual_native(child_native, parent_native)
|
|
1219
|
+
if parent_native.is_a?(::LibXML::XML::Document)
|
|
1220
|
+
pending = attachments.get(parent_native, :_pending_root_refresh)
|
|
1221
|
+
if pending && pending == child_native.object_id
|
|
1222
|
+
attachments.delete(parent_native, :_pending_root_refresh)
|
|
1223
|
+
return parent_native.root
|
|
1224
|
+
end
|
|
1225
|
+
end
|
|
1226
|
+
child_native
|
|
1227
|
+
end
|
|
1228
|
+
|
|
1159
1229
|
private
|
|
1160
1230
|
|
|
1161
1231
|
def serialize_element(elem)
|
|
1162
1232
|
output = "<#{elem.name}"
|
|
1163
1233
|
|
|
1164
1234
|
# Add namespace definitions (only on this element, not ancestors)
|
|
1165
|
-
if elem.
|
|
1235
|
+
if elem.is_a?(::LibXML::XML::Node)
|
|
1166
1236
|
seen_ns = {}
|
|
1167
1237
|
elem.namespaces.each do |ns|
|
|
1168
1238
|
prefix = ns.prefix
|
|
@@ -1203,6 +1273,12 @@ module Moxml
|
|
|
1203
1273
|
output << serialize_node(child)
|
|
1204
1274
|
end
|
|
1205
1275
|
end
|
|
1276
|
+
|
|
1277
|
+
# Append any EntityReference wrappers stored on the document
|
|
1278
|
+
doc = elem.doc
|
|
1279
|
+
entity_refs = doc ? lookup_entity_refs(doc, elem) : nil
|
|
1280
|
+
entity_refs&.each { |ref| output << ref.to_xml }
|
|
1281
|
+
|
|
1206
1282
|
output << "</#{elem.name}>"
|
|
1207
1283
|
|
|
1208
1284
|
output
|
|
@@ -1210,11 +1286,12 @@ module Moxml
|
|
|
1210
1286
|
|
|
1211
1287
|
def serialize_node(node)
|
|
1212
1288
|
# Check if node is a wrapper with to_xml method
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1289
|
+
case node
|
|
1290
|
+
when CustomizedLibxml::ProcessingInstruction,
|
|
1291
|
+
CustomizedLibxml::Comment,
|
|
1292
|
+
CustomizedLibxml::Cdata,
|
|
1293
|
+
CustomizedLibxml::Text,
|
|
1294
|
+
CustomizedLibxml::EntityReference
|
|
1218
1295
|
return node.to_xml
|
|
1219
1296
|
end
|
|
1220
1297
|
|
|
@@ -1269,7 +1346,7 @@ module Moxml
|
|
|
1269
1346
|
raise unless e.message.include?("different documents")
|
|
1270
1347
|
|
|
1271
1348
|
# Get the target document - either from parameter or element
|
|
1272
|
-
target_doc = doc || (element.
|
|
1349
|
+
target_doc = doc || (element.is_a?(::LibXML::XML::Node) ? element.doc : nil)
|
|
1273
1350
|
|
|
1274
1351
|
if target_doc
|
|
1275
1352
|
# Use deep import to ensure all descendants are included
|
|
@@ -1329,11 +1406,11 @@ module Moxml
|
|
|
1329
1406
|
# Include namespace definitions:
|
|
1330
1407
|
# - On root element (include_ns = true), output ALL namespace definitions
|
|
1331
1408
|
# - On child elements, output namespace definitions that override parent namespaces
|
|
1332
|
-
if elem.
|
|
1409
|
+
if elem.is_a?(::LibXML::XML::Node) && elem.namespaces.respond_to?(:definitions)
|
|
1333
1410
|
# Get parent's namespace definitions to detect overrides
|
|
1334
|
-
parent_ns_defs = if !include_ns && elem.
|
|
1411
|
+
parent_ns_defs = if !include_ns && elem.parent && !elem.parent.is_a?(::LibXML::XML::Document)
|
|
1335
1412
|
parent_namespaces = {}
|
|
1336
|
-
if elem.parent.
|
|
1413
|
+
if elem.parent.is_a?(::LibXML::XML::Node)
|
|
1337
1414
|
elem.parent.namespaces.each do |ns|
|
|
1338
1415
|
parent_namespaces[ns.prefix] = ns.href
|
|
1339
1416
|
end
|
|
@@ -1381,17 +1458,54 @@ module Moxml
|
|
|
1381
1458
|
end
|
|
1382
1459
|
end
|
|
1383
1460
|
|
|
1461
|
+
# Check for entity refs stored on the document
|
|
1462
|
+
# LibXML element wrappers are ephemeral, so look up via == comparison
|
|
1463
|
+
doc = elem.doc
|
|
1464
|
+
entity_refs = doc ? lookup_entity_refs(doc, elem) : nil
|
|
1465
|
+
child_sequence = doc ? lookup_child_sequence(doc, elem) : nil
|
|
1466
|
+
|
|
1384
1467
|
# Always use verbose format <tag></tag> for consistency with other adapters
|
|
1385
1468
|
output << ">"
|
|
1386
|
-
|
|
1469
|
+
|
|
1470
|
+
if entity_refs && !entity_refs.empty? && child_sequence
|
|
1471
|
+
# Interleave native children with entity refs using tracked sequence
|
|
1472
|
+
native_children = []
|
|
1473
|
+
if elem.children?
|
|
1474
|
+
elem.each_child { |c| native_children << c unless c.text? && c.content.to_s.strip.empty? }
|
|
1475
|
+
end
|
|
1476
|
+
|
|
1477
|
+
eref_idx = 0
|
|
1478
|
+
native_idx = 0
|
|
1479
|
+
child_sequence.each do |type|
|
|
1480
|
+
case type
|
|
1481
|
+
when :native
|
|
1482
|
+
if native_idx < native_children.size
|
|
1483
|
+
child = native_children[native_idx]
|
|
1484
|
+
native_idx += 1
|
|
1485
|
+
wrapped_child = patch_node(child)
|
|
1486
|
+
output << if wrapped_child.is_a?(CustomizedLibxml::Node) && !wrapped_child.is_a?(CustomizedLibxml::Element)
|
|
1487
|
+
wrapped_child.to_xml
|
|
1488
|
+
elsif child.element?
|
|
1489
|
+
serialize_element_with_namespaces(child, false)
|
|
1490
|
+
else
|
|
1491
|
+
serialize_node(child)
|
|
1492
|
+
end
|
|
1493
|
+
end
|
|
1494
|
+
when :eref
|
|
1495
|
+
if eref_idx < entity_refs.size
|
|
1496
|
+
output << entity_refs[eref_idx].to_xml
|
|
1497
|
+
eref_idx += 1
|
|
1498
|
+
end
|
|
1499
|
+
end
|
|
1500
|
+
end
|
|
1501
|
+
elsif elem.children?
|
|
1387
1502
|
elem.each_child do |child|
|
|
1388
1503
|
# Skip whitespace-only text nodes
|
|
1389
1504
|
next if child.text? && child.content.to_s.strip.empty?
|
|
1390
1505
|
|
|
1391
1506
|
# Wrap the child and serialize
|
|
1392
1507
|
wrapped_child = patch_node(child)
|
|
1393
|
-
output << if wrapped_child.
|
|
1394
|
-
!wrapped_child.is_a?(::LibXML::XML::Node)
|
|
1508
|
+
output << if wrapped_child.is_a?(CustomizedLibxml::Node) && !wrapped_child.is_a?(CustomizedLibxml::Element)
|
|
1395
1509
|
# Use wrapper's to_xml for proper serialization
|
|
1396
1510
|
wrapped_child.to_xml
|
|
1397
1511
|
elsif child.element?
|
|
@@ -1421,7 +1535,7 @@ module Moxml
|
|
|
1421
1535
|
else
|
|
1422
1536
|
# Walk up to root first
|
|
1423
1537
|
current = node
|
|
1424
|
-
current = current.parent while current.
|
|
1538
|
+
current = current.parent while current.is_a?(::LibXML::XML::Node) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
|
|
1425
1539
|
current
|
|
1426
1540
|
end
|
|
1427
1541
|
|
|
@@ -1435,7 +1549,7 @@ module Moxml
|
|
|
1435
1549
|
|
|
1436
1550
|
def collect_ns_from_subtree(node, ns_defs)
|
|
1437
1551
|
# Collect namespaces defined on this node
|
|
1438
|
-
if node.
|
|
1552
|
+
if node.is_a?(::LibXML::XML::Node)
|
|
1439
1553
|
node.namespaces.each do |ns|
|
|
1440
1554
|
prefix = ns.prefix
|
|
1441
1555
|
uri = ns.href
|
|
@@ -1453,7 +1567,7 @@ module Moxml
|
|
|
1453
1567
|
|
|
1454
1568
|
# Also check if this element has an active namespace (inherited or own)
|
|
1455
1569
|
# This catches cases where elements inherit namespaces from parents
|
|
1456
|
-
if node.
|
|
1570
|
+
if node.is_a?(::LibXML::XML::Node) && node.namespaces.respond_to?(:namespace)
|
|
1457
1571
|
active_ns = node.namespaces.namespace
|
|
1458
1572
|
if active_ns
|
|
1459
1573
|
prefix = active_ns.prefix
|
|
@@ -1469,7 +1583,7 @@ module Moxml
|
|
|
1469
1583
|
end
|
|
1470
1584
|
|
|
1471
1585
|
# Recursively collect from children
|
|
1472
|
-
return unless node.
|
|
1586
|
+
return unless node.is_a?(::LibXML::XML::Node) && node.children?
|
|
1473
1587
|
|
|
1474
1588
|
node.each_child do |child|
|
|
1475
1589
|
collect_ns_from_subtree(child, ns_defs) if child.element?
|
|
@@ -1493,12 +1607,12 @@ module Moxml
|
|
|
1493
1607
|
# Search element and ancestors for namespace with given prefix
|
|
1494
1608
|
current = element
|
|
1495
1609
|
while current
|
|
1496
|
-
if current.
|
|
1610
|
+
if current.is_a?(::LibXML::XML::Node)
|
|
1497
1611
|
current.namespaces.each do |ns|
|
|
1498
1612
|
return ns if ns.prefix == prefix
|
|
1499
1613
|
end
|
|
1500
1614
|
end
|
|
1501
|
-
current = current.
|
|
1615
|
+
current = current.is_a?(::LibXML::XML::Node) ? current.parent : nil
|
|
1502
1616
|
end
|
|
1503
1617
|
nil
|
|
1504
1618
|
end
|