moxml 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +117 -66
  3. data/Gemfile +1 -0
  4. data/README.adoc +11 -9
  5. data/Rakefile +34 -1
  6. data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
  7. data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
  8. data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
  9. data/TODO.remaining/4-lenient-entities-mode.md +106 -0
  10. data/TODO.remaining/5-fixture-integrity.md +65 -0
  11. data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
  12. data/TODO.remaining/7-headed-ox-limitations.md +95 -0
  13. data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
  14. data/TODO.remaining/9-cleanup-hygiene.md +42 -0
  15. data/TODO.remaining/README.md +54 -0
  16. data/benchmarks/generate_report.rb +1 -1
  17. data/docs/_pages/configuration.adoc +22 -19
  18. data/docs/_tutorials/namespace-handling.adoc +5 -5
  19. data/lib/moxml/adapter/base.rb +22 -3
  20. data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
  21. data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
  22. data/lib/moxml/adapter/customized_libxml.rb +18 -0
  23. data/lib/moxml/adapter/customized_oga.rb +10 -0
  24. data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
  25. data/lib/moxml/adapter/customized_ox.rb +12 -0
  26. data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
  27. data/lib/moxml/adapter/customized_rexml/formatter.rb +44 -20
  28. data/lib/moxml/adapter/customized_rexml.rb +11 -0
  29. data/lib/moxml/adapter/headed_ox.rb +37 -14
  30. data/lib/moxml/adapter/libxml.rb +233 -119
  31. data/lib/moxml/adapter/nokogiri.rb +22 -11
  32. data/lib/moxml/adapter/oga.rb +64 -25
  33. data/lib/moxml/adapter/ox.rb +198 -42
  34. data/lib/moxml/adapter/rexml.rb +64 -13
  35. data/lib/moxml/attribute.rb +3 -0
  36. data/lib/moxml/builder.rb +78 -24
  37. data/lib/moxml/config.rb +24 -7
  38. data/lib/moxml/declaration.rb +4 -2
  39. data/lib/moxml/document.rb +8 -1
  40. data/lib/moxml/document_builder.rb +44 -37
  41. data/lib/moxml/element.rb +18 -5
  42. data/lib/moxml/entity_registry.rb +51 -1
  43. data/lib/moxml/native_attachment.rb +65 -0
  44. data/lib/moxml/node.rb +39 -50
  45. data/lib/moxml/node_set.rb +43 -15
  46. data/lib/moxml/version.rb +1 -1
  47. data/lib/moxml/xml_utils.rb +1 -1
  48. data/lib/moxml/xpath/compiler.rb +4 -1
  49. data/lib/moxml.rb +1 -0
  50. data/scripts/format_xml.rb +16 -0
  51. data/scripts/pretty_format_xml.rb +14 -0
  52. data/spec/consistency/round_trip_spec.rb +3 -30
  53. data/spec/integration/all_adapters_spec.rb +1 -0
  54. data/spec/integration/headed_ox_integration_spec.rb +0 -2
  55. data/spec/integration/shared_examples/edge_cases.rb +7 -4
  56. data/spec/integration/shared_examples/integration_workflows.rb +3 -3
  57. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
  58. data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
  59. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
  60. data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
  61. data/spec/moxml/adapter/oga_spec.rb +46 -0
  62. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
  63. data/spec/moxml/allocation_benchmark_spec.rb +96 -0
  64. data/spec/moxml/allocation_guard_spec.rb +282 -0
  65. data/spec/moxml/builder_spec.rb +256 -0
  66. data/spec/moxml/config_spec.rb +11 -11
  67. data/spec/moxml/doctype_spec.rb +41 -0
  68. data/spec/moxml/lazy_parse_spec.rb +115 -0
  69. data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
  70. data/spec/moxml/node_cache_spec.rb +110 -0
  71. data/spec/moxml/node_set_cache_spec.rb +90 -0
  72. data/spec/moxml/xml_utils_spec.rb +32 -0
  73. data/spec/moxml/xpath/axes_spec.rb +1 -1
  74. data/spec/moxml/xpath/compiler_spec.rb +2 -2
  75. data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
  76. data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
  77. data/spec/performance/memory_usage_spec.rb +0 -4
  78. data/spec/support/allocation_helper.rb +165 -0
  79. data/spec/support/w3c_namespace_helpers.rb +2 -1
  80. metadata +29 -2
@@ -2,13 +2,7 @@
2
2
 
3
3
  require_relative "base"
4
4
  require "libxml"
5
- require_relative "customized_libxml/node"
6
- require_relative "customized_libxml/element"
7
- require_relative "customized_libxml/text"
8
- require_relative "customized_libxml/comment"
9
- require_relative "customized_libxml/cdata"
10
- require_relative "customized_libxml/processing_instruction"
11
- require_relative "customized_libxml/declaration"
5
+ require_relative "customized_libxml"
12
6
 
13
7
  module Moxml
14
8
  module Adapter
@@ -44,6 +38,10 @@ module Moxml
44
38
  end
45
39
 
46
40
  class << self
41
+ def attachments
42
+ @attachments ||= Moxml::NativeAttachment.new
43
+ end
44
+
47
45
  def set_root(doc, element)
48
46
  doc.root = element
49
47
  end
@@ -52,7 +50,7 @@ module Moxml
52
50
  # LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
53
51
  xml_string = if xml.is_a?(String)
54
52
  xml
55
- elsif xml.respond_to?(:read)
53
+ elsif xml.is_a?(IO) || xml.is_a?(StringIO)
56
54
  xml.read
57
55
  else
58
56
  xml.to_s
@@ -67,7 +65,7 @@ module Moxml
67
65
  parser.parse
68
66
  rescue ::LibXML::XML::Error => e
69
67
  if options[:strict]
70
- line = e.respond_to?(:line) ? e.line : nil
68
+ line = e.line
71
69
  raise Moxml::ParseError.new(
72
70
  e.message,
73
71
  line: line,
@@ -91,7 +89,7 @@ module Moxml
91
89
  external_id,
92
90
  system_id,
93
91
  )
94
- native_doc.instance_variable_set(:@moxml_doctype, doctype_wrapper)
92
+ attachments.set(native_doc, :doctype, doctype_wrapper)
95
93
  end
96
94
 
97
95
  ctx = _context || Context.new(:libxml)
@@ -116,8 +114,12 @@ module Moxml
116
114
  # Parse
117
115
  parser.parse
118
116
  rescue ::LibXML::XML::Error => e
119
- line = e.respond_to?(:line) ? e.line : nil
120
- column = e.respond_to?(:column) ? e.column : nil
117
+ line = e.line
118
+ column = begin
119
+ e.column
120
+ rescue StandardError
121
+ nil
122
+ end
121
123
  error = Moxml::ParseError.new(e.message, line: line, column: column)
122
124
  handler.on_error(error)
123
125
  end
@@ -135,6 +137,14 @@ module Moxml
135
137
  CustomizedLibxml::Text.new(native)
136
138
  end
137
139
 
140
+ def create_native_entity_reference(name)
141
+ CustomizedLibxml::EntityReference.new(name)
142
+ end
143
+
144
+ def entity_reference_name(node)
145
+ node.name if node.is_a?(CustomizedLibxml::EntityReference)
146
+ end
147
+
138
148
  def create_native_cdata(content, _owner_doc = nil)
139
149
  native = ::LibXML::XML::Node.new_cdata(content.to_s)
140
150
  CustomizedLibxml::Cdata.new(native)
@@ -174,10 +184,11 @@ module Moxml
174
184
  if node.is_a?(CustomizedLibxml::ProcessingInstruction)
175
185
  return :processing_instruction
176
186
  end
187
+ return :entity_reference if node.is_a?(CustomizedLibxml::EntityReference)
177
188
  return :doctype if node.is_a?(DoctypeWrapper)
178
189
 
179
190
  # Unwrap if needed
180
- native_node = node.respond_to?(:native) ? node.native : node
191
+ native_node = unpatch_node(node)
181
192
 
182
193
  case native_node.node_type
183
194
  when ::LibXML::XML::Node::DOCUMENT_NODE
@@ -266,22 +277,8 @@ module Moxml
266
277
  result = []
267
278
 
268
279
  # Include DOCTYPE if present
269
- # First check if we stored it as instance variable (from parse)
270
- if native_node.instance_variable_defined?(:@moxml_doctype)
271
- doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
272
- result << doctype_wrapper if doctype_wrapper
273
- elsif native_node.respond_to?(:dtd) && native_node.dtd
274
- # Otherwise check dtd property directly
275
- dtd = native_node.dtd
276
- # Wrap in DoctypeWrapper for consistency
277
- doctype_wrapper = DoctypeWrapper.new(
278
- native_node,
279
- dtd.name,
280
- dtd.external_id,
281
- dtd.system_id,
282
- )
283
- result << doctype_wrapper
284
- end
280
+ doctype_wrapper = attachments.get(native_node, :doctype)
281
+ result << doctype_wrapper if doctype_wrapper
285
282
 
286
283
  return result unless native_node.root
287
284
 
@@ -289,15 +286,21 @@ module Moxml
289
286
  return result
290
287
  end
291
288
 
292
- return [] unless native_node.children?
293
-
294
289
  result = []
295
- native_node.each_child do |child|
296
- # Skip whitespace-only text nodes
297
- next if child.text? && child.content.to_s.strip.empty?
290
+ if native_node.children?
291
+ native_node.each_child do |child|
292
+ # Skip whitespace-only text nodes
293
+ next if child.text? && child.content.to_s.strip.empty?
298
294
 
299
- result << patch_node(child)
295
+ result << patch_node(child)
296
+ end
300
297
  end
298
+
299
+ # Include any EntityReference wrappers stored on the document
300
+ doc = native_node.doc
301
+ entity_refs = doc ? lookup_entity_refs(doc, native_node) : nil
302
+ result.concat(entity_refs) if entity_refs
303
+
301
304
  result
302
305
  end
303
306
 
@@ -350,7 +353,7 @@ module Moxml
350
353
  def attributes(element)
351
354
  native_elem = unpatch_node(element)
352
355
  return [] unless native_elem
353
- unless native_elem.respond_to?(:element?) && native_elem.element?
356
+ unless native_elem.is_a?(::LibXML::XML::Node) && native_elem.element?
354
357
  return []
355
358
  end
356
359
  return [] unless native_elem.attributes?
@@ -368,7 +371,7 @@ module Moxml
368
371
 
369
372
  def attribute_namespace(attr)
370
373
  return nil unless attr
371
- return nil unless attr.respond_to?(:ns)
374
+ return nil unless attr.is_a?(::LibXML::XML::Attr)
372
375
 
373
376
  attr.ns
374
377
  end
@@ -498,11 +501,22 @@ module Moxml
498
501
  native_elem = unpatch_node(element)
499
502
  native_child = unpatch_node(child)
500
503
 
504
+ # EntityReference wrappers can't go in LibXML's native tree.
505
+ # Store on the document (stable identity) keyed by element.
506
+ # LibXML creates new Ruby wrappers on each access, so element
507
+ # object_id is unstable — we look up via == comparison.
508
+ if child.is_a?(CustomizedLibxml::EntityReference)
509
+ doc = native_elem.is_a?(::LibXML::XML::Document) ? native_elem : native_elem.doc
510
+ store_entity_ref_on_doc(doc, native_elem, child)
511
+ append_child_sequence_on_doc(doc, native_elem, :eref)
512
+ return
513
+ end
514
+
501
515
  # For LibXML: if parent has a DEFAULT namespace (nil/empty prefix) and child is an element without a namespace,
502
516
  # explicitly set the child's namespace to match the parent's for XPath compatibility
503
517
  # NOTE: Prefixed namespaces are NOT inherited, only default namespaces
504
- if native_elem.respond_to?(:namespaces) && native_elem.namespaces&.namespace &&
505
- native_child.respond_to?(:namespaces) && native_child.element? &&
518
+ if native_elem.is_a?(::LibXML::XML::Node) && native_elem.namespaces&.namespace &&
519
+ native_child.is_a?(::LibXML::XML::Node) && native_child.element? &&
506
520
  (!native_child.namespaces.namespace || native_child.namespaces.namespace.href.to_s.empty?)
507
521
 
508
522
  parent_ns = native_elem.namespaces.namespace
@@ -515,32 +529,32 @@ module Moxml
515
529
  if native_elem.is_a?(::LibXML::XML::Document)
516
530
  # For Declaration wrappers, store them for serialization
517
531
  if child.is_a?(CustomizedLibxml::Declaration)
518
- native_elem.instance_variable_set(:@moxml_declaration, child)
532
+ attachments.set(native_elem, :declaration, child)
519
533
  # Also store reference to parent document in the declaration
520
- child.instance_variable_set(:@parent_doc, native_elem)
534
+ child.parent_doc = native_elem
521
535
  return
522
536
  end
523
537
 
524
538
  # For DOCTYPE wrappers, store them for serialization
525
539
  if child.is_a?(DoctypeWrapper)
526
- native_elem.instance_variable_set(:@moxml_doctype, child)
540
+ attachments.set(native_elem, :doctype, child)
527
541
  return
528
542
  end
529
543
 
530
544
  # For document-level PIs, store them for serialization
531
545
  if child.is_a?(CustomizedLibxml::ProcessingInstruction)
532
- pis = native_elem.instance_variable_get(:@moxml_pis) || []
546
+ pis = attachments.get(native_elem, :pis) || []
533
547
  pis << child
534
- native_elem.instance_variable_set(:@moxml_pis, pis)
548
+ attachments.set(native_elem, :pis, pis)
535
549
  return
536
550
  end
537
551
 
538
552
  # For text nodes added to document, store them for serialization
539
553
  # Documents can't have text children in LibXML
540
554
  if child.is_a?(CustomizedLibxml::Text)
541
- texts = native_elem.instance_variable_get(:@moxml_texts) || []
555
+ texts = attachments.get(native_elem, :texts) || []
542
556
  texts << child
543
- native_elem.instance_variable_set(:@moxml_texts, texts)
557
+ attachments.set(native_elem, :texts, texts)
544
558
  return
545
559
  end
546
560
 
@@ -548,15 +562,66 @@ module Moxml
548
562
  if native_elem.root.nil? && node_type(native_child) == :element
549
563
  # Set as root element
550
564
  native_elem.root = native_child
565
+ # Flag for actual_native to refresh the wrapper's native reference
566
+ attachments.set(native_elem, :_pending_root_refresh, native_child.object_id)
551
567
  elsif native_elem.root
552
568
  # Document has root, add to it instead
553
569
  import_and_add(native_elem.doc, native_elem.root, native_child)
554
570
  end
555
571
  else
556
572
  import_and_add(native_elem.doc, native_elem, native_child)
573
+ doc = native_elem.doc || native_elem
574
+ append_child_sequence_on_doc(doc, native_elem, :native)
557
575
  end
558
576
  end
559
577
 
578
+ # Store entity ref on the document (stable identity).
579
+ # LibXML element wrappers are ephemeral, so we use == to find matching elements.
580
+ def store_entity_ref_on_doc(doc, element, ref)
581
+ pairs = attachments.get(doc, :_entity_ref_pairs) || []
582
+ pair = pairs.find { |elem, _| elem == element }
583
+ if pair
584
+ pair[1] << ref
585
+ else
586
+ pairs << [element, [ref]]
587
+ end
588
+ attachments.set(doc, :_entity_ref_pairs, pairs)
589
+ end
590
+
591
+ # Look up entity refs for an element from the document
592
+ def lookup_entity_refs(doc, element)
593
+ pairs = attachments.get(doc, :_entity_ref_pairs)
594
+ return nil unless pairs
595
+ pair = pairs.find { |elem, _| elem == element }
596
+ pair&.last
597
+ end
598
+
599
+ # Track child order on the document (stable identity)
600
+ def append_child_sequence_on_doc(doc, element, type)
601
+ pairs = attachments.get(doc, :_child_seq_pairs) || []
602
+ pair = pairs.find { |elem, _| elem == element }
603
+ if pair
604
+ pair[1] << type
605
+ else
606
+ pairs << [element, [type]]
607
+ end
608
+ attachments.set(doc, :_child_seq_pairs, pairs)
609
+ end
610
+
611
+ # Look up child sequence for an element from the document
612
+ def lookup_child_sequence(doc, element)
613
+ pairs = attachments.get(doc, :_child_seq_pairs)
614
+ return nil unless pairs
615
+ pair = pairs.find { |elem, _| elem == element }
616
+ pair&.last
617
+ end
618
+
619
+ def append_child_sequence(element, type)
620
+ seq = attachments.get(element, :child_sequence) || []
621
+ seq << type
622
+ attachments.set(element, :child_sequence, seq)
623
+ end
624
+
560
625
  def add_previous_sibling(node, sibling)
561
626
  return unless node && sibling
562
627
 
@@ -566,11 +631,11 @@ module Moxml
566
631
  # Special handling for document-level processing instructions
567
632
  # When adding a PI as sibling to root element, store it on document
568
633
  if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
569
- native_node.respond_to?(:doc) && native_node.doc
634
+ native_node.is_a?(::LibXML::XML::Node) && native_node.doc
570
635
  doc = native_node.doc
571
- pis = doc.instance_variable_get(:@moxml_pis) || []
636
+ pis = attachments.get(doc, :pis) || []
572
637
  pis << sibling
573
- doc.instance_variable_set(:@moxml_pis, pis)
638
+ attachments.set(doc, :pis, pis)
574
639
  return
575
640
  end
576
641
 
@@ -588,16 +653,7 @@ module Moxml
588
653
  def remove(node)
589
654
  # Handle Declaration wrapper - mark as removed on document
590
655
  if node.is_a?(CustomizedLibxml::Declaration)
591
- # The Declaration wrapper is stored on the actual document
592
- # We need to find which document it's stored on and mark it as removed
593
- # This is a bit tricky since the Declaration's native is its own internal doc
594
- # We rely on the fact that when a declaration is added to a document,
595
- # the document stores a reference to it in @moxml_declaration
596
- # So we need to clear that reference and mark it as removed
597
-
598
- # Since we can't easily find the parent document from the Declaration,
599
- # we'll set a flag on the Declaration itself
600
- node.instance_variable_set(:@removed, true)
656
+ node.removed = true
601
657
  return
602
658
  end
603
659
 
@@ -624,7 +680,7 @@ module Moxml
624
680
  next_sibling = native_node.next
625
681
 
626
682
  # Import if needed for cross-document operations
627
- parent_doc = parent.respond_to?(:doc) ? parent.doc : nil
683
+ parent_doc = parent.is_a?(::LibXML::XML::Node) ? parent.doc : nil
628
684
 
629
685
  # Use import_and_add to properly handle document adoption
630
686
  import_and_add(parent_doc, parent, native_new)
@@ -651,7 +707,7 @@ module Moxml
651
707
  native_elem.each_child(&:remove!)
652
708
 
653
709
  # Get the element's document for importing
654
- doc = native_elem.respond_to?(:doc) ? native_elem.doc : nil
710
+ doc = native_elem.is_a?(::LibXML::XML::Node) ? native_elem.doc : nil
655
711
 
656
712
  children.each do |c|
657
713
  native_c = unpatch_node(c)
@@ -662,6 +718,8 @@ module Moxml
662
718
  end
663
719
 
664
720
  def text_content(node)
721
+ return "" if node.is_a?(CustomizedLibxml::EntityReference)
722
+
665
723
  native_node = unpatch_node(node)
666
724
  return nil unless native_node
667
725
 
@@ -782,7 +840,7 @@ module Moxml
782
840
  def namespace_definitions(node)
783
841
  native_node = unpatch_node(node)
784
842
  return [] unless native_node
785
- return [] unless native_node.respond_to?(:namespaces)
843
+ return [] unless native_node.is_a?(::LibXML::XML::Node)
786
844
 
787
845
  native_node.namespaces.map do |ns|
788
846
  ns
@@ -835,15 +893,8 @@ module Moxml
835
893
 
836
894
  def serialize(node, options = {})
837
895
  # FIRST: Check if node is any kind of wrapper with custom to_xml
838
- if node.respond_to?(:to_xml)
839
- # Declaration wrapper
840
- return node.to_xml if node.is_a?(CustomizedLibxml::Declaration)
841
-
842
- # Other wrappers - check they're not native LibXML nodes
843
- unless node.is_a?(::LibXML::XML::Node) ||
844
- node.is_a?(::LibXML::XML::Document)
845
- return node.to_xml
846
- end
896
+ if node.is_a?(CustomizedLibxml::Node) || node.is_a?(DoctypeWrapper)
897
+ return node.to_xml
847
898
  end
848
899
 
849
900
  native_node = unpatch_node(node)
@@ -863,12 +914,10 @@ module Moxml
863
914
 
864
915
  if should_include_decl
865
916
  # Check if declaration was explicitly managed
866
- if native_node.instance_variable_defined?(:@moxml_declaration)
867
- decl = native_node.instance_variable_get(:@moxml_declaration)
917
+ decl = attachments.get(native_node, :declaration)
918
+ if decl
868
919
  # Only output declaration if it exists and wasn't removed
869
- if decl && !decl.instance_variable_get(:@removed)
870
- output << decl.to_xml
871
- end
920
+ output << decl.to_xml unless decl.removed
872
921
  else
873
922
  # No declaration stored - create default
874
923
  version = native_node.version || "1.0"
@@ -883,39 +932,33 @@ module Moxml
883
932
  encoding_val,
884
933
  nil, # No standalone by default
885
934
  )
886
- native_node.instance_variable_set(:@moxml_declaration, decl)
935
+ attachments.set(native_node, :declaration, decl)
887
936
  output << decl.to_xml
888
937
  end
889
938
  end
890
939
 
891
940
  # Add DOCTYPE if stored on document
892
- if native_node.instance_variable_defined?(:@moxml_doctype)
893
- doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
894
- if doctype_wrapper
895
- output << "\n" unless output.empty?
896
- output << doctype_wrapper.to_xml
897
- end
941
+ doctype_wrapper = attachments.get(native_node, :doctype)
942
+ if doctype_wrapper
943
+ output << "\n" unless output.empty?
944
+ output << doctype_wrapper.to_xml
898
945
  end
899
946
 
900
947
  # Add document-level processing instructions if stored
901
- if native_node.instance_variable_defined?(:@moxml_pis)
902
- pis = native_node.instance_variable_get(:@moxml_pis)
903
- if pis && !pis.empty?
904
- pis.each do |pi|
905
- output << "\n" unless output.empty?
906
- output << pi.to_xml
907
- end
948
+ pis = attachments.get(native_node, :pis)
949
+ if pis && !pis.empty?
950
+ pis.each do |pi|
951
+ output << "\n" unless output.empty?
952
+ output << pi.to_xml
908
953
  end
909
954
  end
910
955
 
911
956
  # Add text nodes if stored (for documents without root)
912
- if native_node.instance_variable_defined?(:@moxml_texts)
913
- texts = native_node.instance_variable_get(:@moxml_texts)
914
- if texts && !texts.empty?
915
- texts.each do |text|
916
- output << "\n" unless output.empty?
917
- output << text.to_xml
918
- end
957
+ texts = attachments.get(native_node, :texts)
958
+ if texts && !texts.empty?
959
+ texts.each do |text|
960
+ output << "\n" unless output.empty?
961
+ output << text.to_xml
919
962
  end
920
963
  end
921
964
 
@@ -1033,7 +1076,7 @@ module Moxml
1033
1076
  return nil unless node
1034
1077
 
1035
1078
  # Unwrap if wrapped
1036
- native_node = node.respond_to?(:native) ? node.native : node
1079
+ native_node = unpatch_node(node)
1037
1080
 
1038
1081
  # LibXML is strict about document ownership
1039
1082
  # Create brand new NATIVE nodes that are document-independent
@@ -1057,7 +1100,7 @@ module Moxml
1057
1100
  # new_node.line = node.line
1058
1101
 
1059
1102
  # Copy and set namespace definitions FIRST
1060
- if native_node.respond_to?(:namespaces)
1103
+ if native_node.is_a?(::LibXML::XML::Node)
1061
1104
  # First, copy all namespace definitions
1062
1105
  native_node.namespaces.each do |ns|
1063
1106
  ::LibXML::XML::Namespace.new(
@@ -1144,7 +1187,12 @@ module Moxml
1144
1187
 
1145
1188
  def unpatch_node(node)
1146
1189
  # Unwrap to get native LibXML node
1147
- node.respond_to?(:native) ? node.native : node
1190
+ case node
1191
+ when CustomizedLibxml::Node, CustomizedLibxml::Declaration, DoctypeWrapper
1192
+ node.native
1193
+ else
1194
+ node
1195
+ end
1148
1196
  end
1149
1197
 
1150
1198
  def prepare_for_new_document(node, target_doc)
@@ -1156,13 +1204,35 @@ module Moxml
1156
1204
  duplicate_node(node)
1157
1205
  end
1158
1206
 
1207
+ def has_declaration?(native_doc, wrapper)
1208
+ decl = attachments.get(native_doc, :declaration)
1209
+ if decl
1210
+ !decl.removed
1211
+ else
1212
+ wrapper.has_xml_declaration
1213
+ end
1214
+ end
1215
+
1216
+ # LibXML's doc.root= creates a new Ruby wrapper with different object_id.
1217
+ # Return the actual root node so attachments are stored on the correct object.
1218
+ def actual_native(child_native, parent_native)
1219
+ if parent_native.is_a?(::LibXML::XML::Document)
1220
+ pending = attachments.get(parent_native, :_pending_root_refresh)
1221
+ if pending && pending == child_native.object_id
1222
+ attachments.delete(parent_native, :_pending_root_refresh)
1223
+ return parent_native.root
1224
+ end
1225
+ end
1226
+ child_native
1227
+ end
1228
+
1159
1229
  private
1160
1230
 
1161
1231
  def serialize_element(elem)
1162
1232
  output = "<#{elem.name}"
1163
1233
 
1164
1234
  # Add namespace definitions (only on this element, not ancestors)
1165
- if elem.respond_to?(:namespaces)
1235
+ if elem.is_a?(::LibXML::XML::Node)
1166
1236
  seen_ns = {}
1167
1237
  elem.namespaces.each do |ns|
1168
1238
  prefix = ns.prefix
@@ -1203,6 +1273,12 @@ module Moxml
1203
1273
  output << serialize_node(child)
1204
1274
  end
1205
1275
  end
1276
+
1277
+ # Append any EntityReference wrappers stored on the document
1278
+ doc = elem.doc
1279
+ entity_refs = doc ? lookup_entity_refs(doc, elem) : nil
1280
+ entity_refs&.each { |ref| output << ref.to_xml }
1281
+
1206
1282
  output << "</#{elem.name}>"
1207
1283
 
1208
1284
  output
@@ -1210,11 +1286,12 @@ module Moxml
1210
1286
 
1211
1287
  def serialize_node(node)
1212
1288
  # Check if node is a wrapper with to_xml method
1213
- if node.respond_to?(:to_xml) &&
1214
- (node.is_a?(CustomizedLibxml::ProcessingInstruction) ||
1215
- node.is_a?(CustomizedLibxml::Comment) ||
1216
- node.is_a?(CustomizedLibxml::Cdata) ||
1217
- node.is_a?(CustomizedLibxml::Text))
1289
+ case node
1290
+ when CustomizedLibxml::ProcessingInstruction,
1291
+ CustomizedLibxml::Comment,
1292
+ CustomizedLibxml::Cdata,
1293
+ CustomizedLibxml::Text,
1294
+ CustomizedLibxml::EntityReference
1218
1295
  return node.to_xml
1219
1296
  end
1220
1297
 
@@ -1269,7 +1346,7 @@ module Moxml
1269
1346
  raise unless e.message.include?("different documents")
1270
1347
 
1271
1348
  # Get the target document - either from parameter or element
1272
- target_doc = doc || (element.respond_to?(:doc) ? element.doc : nil)
1349
+ target_doc = doc || (element.is_a?(::LibXML::XML::Node) ? element.doc : nil)
1273
1350
 
1274
1351
  if target_doc
1275
1352
  # Use deep import to ensure all descendants are included
@@ -1329,11 +1406,11 @@ module Moxml
1329
1406
  # Include namespace definitions:
1330
1407
  # - On root element (include_ns = true), output ALL namespace definitions
1331
1408
  # - On child elements, output namespace definitions that override parent namespaces
1332
- if elem.respond_to?(:namespaces) && elem.namespaces.respond_to?(:definitions)
1409
+ if elem.is_a?(::LibXML::XML::Node) && elem.namespaces.respond_to?(:definitions)
1333
1410
  # Get parent's namespace definitions to detect overrides
1334
- parent_ns_defs = if !include_ns && elem.respond_to?(:parent) && elem.parent && !elem.parent.is_a?(::LibXML::XML::Document)
1411
+ parent_ns_defs = if !include_ns && elem.parent && !elem.parent.is_a?(::LibXML::XML::Document)
1335
1412
  parent_namespaces = {}
1336
- if elem.parent.respond_to?(:namespaces)
1413
+ if elem.parent.is_a?(::LibXML::XML::Node)
1337
1414
  elem.parent.namespaces.each do |ns|
1338
1415
  parent_namespaces[ns.prefix] = ns.href
1339
1416
  end
@@ -1381,17 +1458,54 @@ module Moxml
1381
1458
  end
1382
1459
  end
1383
1460
 
1461
+ # Check for entity refs stored on the document
1462
+ # LibXML element wrappers are ephemeral, so look up via == comparison
1463
+ doc = elem.doc
1464
+ entity_refs = doc ? lookup_entity_refs(doc, elem) : nil
1465
+ child_sequence = doc ? lookup_child_sequence(doc, elem) : nil
1466
+
1384
1467
  # Always use verbose format <tag></tag> for consistency with other adapters
1385
1468
  output << ">"
1386
- if elem.children?
1469
+
1470
+ if entity_refs && !entity_refs.empty? && child_sequence
1471
+ # Interleave native children with entity refs using tracked sequence
1472
+ native_children = []
1473
+ if elem.children?
1474
+ elem.each_child { |c| native_children << c unless c.text? && c.content.to_s.strip.empty? }
1475
+ end
1476
+
1477
+ eref_idx = 0
1478
+ native_idx = 0
1479
+ child_sequence.each do |type|
1480
+ case type
1481
+ when :native
1482
+ if native_idx < native_children.size
1483
+ child = native_children[native_idx]
1484
+ native_idx += 1
1485
+ wrapped_child = patch_node(child)
1486
+ output << if wrapped_child.is_a?(CustomizedLibxml::Node) && !wrapped_child.is_a?(CustomizedLibxml::Element)
1487
+ wrapped_child.to_xml
1488
+ elsif child.element?
1489
+ serialize_element_with_namespaces(child, false)
1490
+ else
1491
+ serialize_node(child)
1492
+ end
1493
+ end
1494
+ when :eref
1495
+ if eref_idx < entity_refs.size
1496
+ output << entity_refs[eref_idx].to_xml
1497
+ eref_idx += 1
1498
+ end
1499
+ end
1500
+ end
1501
+ elsif elem.children?
1387
1502
  elem.each_child do |child|
1388
1503
  # Skip whitespace-only text nodes
1389
1504
  next if child.text? && child.content.to_s.strip.empty?
1390
1505
 
1391
1506
  # Wrap the child and serialize
1392
1507
  wrapped_child = patch_node(child)
1393
- output << if wrapped_child.respond_to?(:to_xml) &&
1394
- !wrapped_child.is_a?(::LibXML::XML::Node)
1508
+ output << if wrapped_child.is_a?(CustomizedLibxml::Node) && !wrapped_child.is_a?(CustomizedLibxml::Element)
1395
1509
  # Use wrapper's to_xml for proper serialization
1396
1510
  wrapped_child.to_xml
1397
1511
  elsif child.element?
@@ -1421,7 +1535,7 @@ module Moxml
1421
1535
  else
1422
1536
  # Walk up to root first
1423
1537
  current = node
1424
- current = current.parent while current.respond_to?(:parent) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
1538
+ current = current.parent while current.is_a?(::LibXML::XML::Node) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
1425
1539
  current
1426
1540
  end
1427
1541
 
@@ -1435,7 +1549,7 @@ module Moxml
1435
1549
 
1436
1550
  def collect_ns_from_subtree(node, ns_defs)
1437
1551
  # Collect namespaces defined on this node
1438
- if node.respond_to?(:namespaces)
1552
+ if node.is_a?(::LibXML::XML::Node)
1439
1553
  node.namespaces.each do |ns|
1440
1554
  prefix = ns.prefix
1441
1555
  uri = ns.href
@@ -1453,7 +1567,7 @@ module Moxml
1453
1567
 
1454
1568
  # Also check if this element has an active namespace (inherited or own)
1455
1569
  # This catches cases where elements inherit namespaces from parents
1456
- if node.respond_to?(:namespaces) && node.namespaces.respond_to?(:namespace)
1570
+ if node.is_a?(::LibXML::XML::Node) && node.namespaces.respond_to?(:namespace)
1457
1571
  active_ns = node.namespaces.namespace
1458
1572
  if active_ns
1459
1573
  prefix = active_ns.prefix
@@ -1469,7 +1583,7 @@ module Moxml
1469
1583
  end
1470
1584
 
1471
1585
  # Recursively collect from children
1472
- return unless node.respond_to?(:children?) && node.children?
1586
+ return unless node.is_a?(::LibXML::XML::Node) && node.children?
1473
1587
 
1474
1588
  node.each_child do |child|
1475
1589
  collect_ns_from_subtree(child, ns_defs) if child.element?
@@ -1493,12 +1607,12 @@ module Moxml
1493
1607
  # Search element and ancestors for namespace with given prefix
1494
1608
  current = element
1495
1609
  while current
1496
- if current.respond_to?(:namespaces)
1610
+ if current.is_a?(::LibXML::XML::Node)
1497
1611
  current.namespaces.each do |ns|
1498
1612
  return ns if ns.prefix == prefix
1499
1613
  end
1500
1614
  end
1501
- current = current.respond_to?(:parent) ? current.parent : nil
1615
+ current = current.is_a?(::LibXML::XML::Node) ? current.parent : nil
1502
1616
  end
1503
1617
  nil
1504
1618
  end