moxml 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.rubocop_todo.yml +49 -133
- data/README.adoc +18 -0
- data/Rakefile +31 -0
- data/benchmarks/generate_report.rb +1 -1
- data/lib/moxml/adapter/base.rb +79 -8
- data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +42 -20
- data/lib/moxml/adapter/headed_ox.rb +30 -12
- data/lib/moxml/adapter/libxml.rb +181 -68
- data/lib/moxml/adapter/nokogiri.rb +33 -11
- data/lib/moxml/adapter/oga.rb +51 -96
- data/lib/moxml/adapter/ox.rb +79 -21
- data/lib/moxml/adapter/rexml.rb +64 -11
- data/lib/moxml/attribute.rb +7 -1
- data/lib/moxml/builder.rb +77 -24
- data/lib/moxml/config.rb +18 -1
- data/lib/moxml/declaration.rb +4 -2
- data/lib/moxml/document.rb +5 -2
- data/lib/moxml/document_builder.rb +9 -8
- data/lib/moxml/element.rb +22 -13
- data/lib/moxml/entity_registry.rb +16 -2
- data/lib/moxml/native_attachment.rb +65 -0
- data/lib/moxml/node.rb +21 -50
- data/lib/moxml/node_set.rb +1 -1
- data/lib/moxml/text.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +44 -22
- data/lib/moxml/xpath/parser.rb +12 -7
- data/lib/moxml.rb +1 -0
- data/scripts/format_xml.rb +16 -0
- data/scripts/pretty_format_xml.rb +14 -0
- data/spec/consistency/round_trip_spec.rb +3 -30
- data/spec/integration/all_adapters_spec.rb +2 -0
- data/spec/integration/headed_ox_integration_spec.rb +0 -2
- data/spec/integration/shared_examples/edge_cases.rb +3 -9
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +122 -0
- data/spec/integration/shared_examples/integration_workflows.rb +3 -3
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
- data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
- data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
- data/spec/moxml/builder_spec.rb +249 -0
- data/spec/moxml/entity_preservation_spec.rb +130 -0
- data/spec/moxml/entity_reference_spec.rb +114 -0
- data/spec/moxml/entity_registry_spec.rb +68 -0
- data/spec/moxml/xpath/axes_spec.rb +0 -1
- data/spec/moxml/xpath/compiler_spec.rb +0 -2
- data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
- data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
- data/spec/performance/memory_usage_spec.rb +0 -4
- metadata +10 -2
|
@@ -30,8 +30,9 @@ module Moxml
|
|
|
30
30
|
# ~176K allocations per 100-element parse). Lazy parse defers wrapper
|
|
31
31
|
# creation until nodes are accessed, matching Ox adapter behavior.
|
|
32
32
|
def parse(xml, options = {}, _context = nil)
|
|
33
|
+
processed_xml = preprocess_entities(xml)
|
|
33
34
|
native_doc = begin
|
|
34
|
-
result = ::Ox.parse(
|
|
35
|
+
result = ::Ox.parse(processed_xml)
|
|
35
36
|
|
|
36
37
|
# result can be either Document or Element
|
|
37
38
|
if result.is_a?(::Ox::Document)
|
|
@@ -60,10 +61,10 @@ module Moxml
|
|
|
60
61
|
#
|
|
61
62
|
# This overrides the Ox adapter's xpath method which uses locate().
|
|
62
63
|
#
|
|
63
|
-
# @param
|
|
64
|
+
# @param node Starting node (native or wrapped)
|
|
64
65
|
# @param [String] expression XPath expression
|
|
65
66
|
# @param [Hash] namespaces Namespace prefix mappings
|
|
66
|
-
# @return [
|
|
67
|
+
# @return [Array, Object] Native node array or scalar value
|
|
67
68
|
def xpath(node, expression, namespaces = {})
|
|
68
69
|
# If we receive a native node, wrap it first
|
|
69
70
|
# Document#xpath passes @native, but our compiled XPath needs Moxml nodes
|
|
@@ -85,16 +86,33 @@ module Moxml
|
|
|
85
86
|
# Execute on the node (now guaranteed to be wrapped Moxml node)
|
|
86
87
|
result = proc.call(node)
|
|
87
88
|
|
|
88
|
-
#
|
|
89
|
+
# Return native arrays for Node#xpath to wrap, scalars directly.
|
|
90
|
+
# The adapter contract: xpath() returns Array<native> | scalar.
|
|
89
91
|
case result
|
|
90
92
|
when Array
|
|
91
|
-
#
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
93
|
+
# XPath engine returns wrapped Moxml::Node objects.
|
|
94
|
+
# Extract native nodes and deduplicate by object identity.
|
|
95
|
+
native_nodes = result.map { |n| n.is_a?(Moxml::Node) ? n.native : n }
|
|
96
|
+
seen = {}
|
|
97
|
+
native_nodes.select do |native|
|
|
98
|
+
id = native.object_id
|
|
99
|
+
if seen[id]
|
|
100
|
+
false
|
|
101
|
+
else
|
|
102
|
+
seen[id] = true
|
|
103
|
+
end
|
|
104
|
+
end
|
|
95
105
|
when NodeSet
|
|
96
|
-
#
|
|
97
|
-
|
|
106
|
+
# NodeSet from intermediate evaluation - extract natives and deduplicate
|
|
107
|
+
seen = {}
|
|
108
|
+
result.to_a.map(&:native).select do |native|
|
|
109
|
+
id = native.object_id
|
|
110
|
+
if seen[id]
|
|
111
|
+
false
|
|
112
|
+
else
|
|
113
|
+
seen[id] = true
|
|
114
|
+
end
|
|
115
|
+
end
|
|
98
116
|
else
|
|
99
117
|
# Scalar values (string, number, boolean) - return as-is
|
|
100
118
|
result
|
|
@@ -113,10 +131,10 @@ module Moxml
|
|
|
113
131
|
# @param [Moxml::Node] node Starting node
|
|
114
132
|
# @param [String] expression XPath expression
|
|
115
133
|
# @param [Hash] namespaces Namespace prefix mappings
|
|
116
|
-
# @return [
|
|
134
|
+
# @return [Object, nil] First native node or scalar value
|
|
117
135
|
def at_xpath(node, expression, namespaces = {})
|
|
118
136
|
result = xpath(node, expression, namespaces)
|
|
119
|
-
result.is_a?(
|
|
137
|
+
result.is_a?(Array) ? result.first : result
|
|
120
138
|
end
|
|
121
139
|
|
|
122
140
|
# Check if XPath is supported
|
data/lib/moxml/adapter/libxml.rb
CHANGED
|
@@ -38,6 +38,10 @@ module Moxml
|
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
class << self
|
|
41
|
+
def attachments
|
|
42
|
+
@attachments ||= Moxml::NativeAttachment.new
|
|
43
|
+
end
|
|
44
|
+
|
|
41
45
|
def set_root(doc, element)
|
|
42
46
|
doc.root = element
|
|
43
47
|
end
|
|
@@ -52,6 +56,11 @@ module Moxml
|
|
|
52
56
|
xml.to_s
|
|
53
57
|
end
|
|
54
58
|
|
|
59
|
+
# Preprocess entities before parsing.
|
|
60
|
+
# This converts the string to UTF-8; LibXML will use the encoding
|
|
61
|
+
# parameter or XML declaration for byte interpretation.
|
|
62
|
+
xml_string = preprocess_entities(xml_string)
|
|
63
|
+
|
|
55
64
|
# Extract DOCTYPE before parsing
|
|
56
65
|
doctype_match = xml_string.match(/<!DOCTYPE\s+(\S+)(?:\s+PUBLIC\s+"([^"]+)"\s+"([^"]+)"| \s+SYSTEM\s+"([^"]+)")?\s*>/i)
|
|
57
66
|
|
|
@@ -85,7 +94,7 @@ module Moxml
|
|
|
85
94
|
external_id,
|
|
86
95
|
system_id,
|
|
87
96
|
)
|
|
88
|
-
|
|
97
|
+
attachments.set(native_doc, :doctype, doctype_wrapper)
|
|
89
98
|
end
|
|
90
99
|
|
|
91
100
|
ctx = _context || Context.new(:libxml)
|
|
@@ -273,10 +282,8 @@ module Moxml
|
|
|
273
282
|
result = []
|
|
274
283
|
|
|
275
284
|
# Include DOCTYPE if present
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
result << doctype_wrapper if doctype_wrapper
|
|
279
|
-
end
|
|
285
|
+
doctype_wrapper = attachments.get(native_node, :doctype)
|
|
286
|
+
result << doctype_wrapper if doctype_wrapper
|
|
280
287
|
|
|
281
288
|
return result unless native_node.root
|
|
282
289
|
|
|
@@ -284,18 +291,19 @@ module Moxml
|
|
|
284
291
|
return result
|
|
285
292
|
end
|
|
286
293
|
|
|
287
|
-
return [] unless native_node.children?
|
|
288
|
-
|
|
289
294
|
result = []
|
|
290
|
-
native_node.
|
|
291
|
-
|
|
292
|
-
|
|
295
|
+
if native_node.children?
|
|
296
|
+
native_node.each_child do |child|
|
|
297
|
+
# Skip whitespace-only text nodes
|
|
298
|
+
next if child.text? && child.content.to_s.strip.empty?
|
|
293
299
|
|
|
294
|
-
|
|
300
|
+
result << patch_node(child)
|
|
301
|
+
end
|
|
295
302
|
end
|
|
296
303
|
|
|
297
|
-
# Include any EntityReference wrappers stored
|
|
298
|
-
|
|
304
|
+
# Include any EntityReference wrappers stored on the document
|
|
305
|
+
doc = native_node.doc
|
|
306
|
+
entity_refs = doc ? lookup_entity_refs(doc, native_node) : nil
|
|
299
307
|
result.concat(entity_refs) if entity_refs
|
|
300
308
|
|
|
301
309
|
result
|
|
@@ -499,11 +507,13 @@ module Moxml
|
|
|
499
507
|
native_child = unpatch_node(child)
|
|
500
508
|
|
|
501
509
|
# EntityReference wrappers can't go in LibXML's native tree.
|
|
502
|
-
# Store
|
|
510
|
+
# Store on the document (stable identity) keyed by element.
|
|
511
|
+
# LibXML creates new Ruby wrappers on each access, so element
|
|
512
|
+
# object_id is unstable — we look up via == comparison.
|
|
503
513
|
if child.is_a?(CustomizedLibxml::EntityReference)
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
514
|
+
doc = native_elem.is_a?(::LibXML::XML::Document) ? native_elem : native_elem.doc
|
|
515
|
+
store_entity_ref_on_doc(doc, native_elem, child)
|
|
516
|
+
append_child_sequence_on_doc(doc, native_elem, :eref)
|
|
507
517
|
return
|
|
508
518
|
end
|
|
509
519
|
|
|
@@ -524,32 +534,32 @@ module Moxml
|
|
|
524
534
|
if native_elem.is_a?(::LibXML::XML::Document)
|
|
525
535
|
# For Declaration wrappers, store them for serialization
|
|
526
536
|
if child.is_a?(CustomizedLibxml::Declaration)
|
|
527
|
-
|
|
537
|
+
attachments.set(native_elem, :declaration, child)
|
|
528
538
|
# Also store reference to parent document in the declaration
|
|
529
|
-
child.
|
|
539
|
+
child.parent_doc = native_elem
|
|
530
540
|
return
|
|
531
541
|
end
|
|
532
542
|
|
|
533
543
|
# For DOCTYPE wrappers, store them for serialization
|
|
534
544
|
if child.is_a?(DoctypeWrapper)
|
|
535
|
-
|
|
545
|
+
attachments.set(native_elem, :doctype, child)
|
|
536
546
|
return
|
|
537
547
|
end
|
|
538
548
|
|
|
539
549
|
# For document-level PIs, store them for serialization
|
|
540
550
|
if child.is_a?(CustomizedLibxml::ProcessingInstruction)
|
|
541
|
-
pis =
|
|
551
|
+
pis = attachments.get(native_elem, :pis) || []
|
|
542
552
|
pis << child
|
|
543
|
-
|
|
553
|
+
attachments.set(native_elem, :pis, pis)
|
|
544
554
|
return
|
|
545
555
|
end
|
|
546
556
|
|
|
547
557
|
# For text nodes added to document, store them for serialization
|
|
548
558
|
# Documents can't have text children in LibXML
|
|
549
559
|
if child.is_a?(CustomizedLibxml::Text)
|
|
550
|
-
texts =
|
|
560
|
+
texts = attachments.get(native_elem, :texts) || []
|
|
551
561
|
texts << child
|
|
552
|
-
|
|
562
|
+
attachments.set(native_elem, :texts, texts)
|
|
553
563
|
return
|
|
554
564
|
end
|
|
555
565
|
|
|
@@ -557,13 +567,64 @@ module Moxml
|
|
|
557
567
|
if native_elem.root.nil? && node_type(native_child) == :element
|
|
558
568
|
# Set as root element
|
|
559
569
|
native_elem.root = native_child
|
|
570
|
+
# Flag for actual_native to refresh the wrapper's native reference
|
|
571
|
+
attachments.set(native_elem, :_pending_root_refresh, native_child.object_id)
|
|
560
572
|
elsif native_elem.root
|
|
561
573
|
# Document has root, add to it instead
|
|
562
574
|
import_and_add(native_elem.doc, native_elem.root, native_child)
|
|
563
575
|
end
|
|
564
576
|
else
|
|
565
577
|
import_and_add(native_elem.doc, native_elem, native_child)
|
|
578
|
+
doc = native_elem.doc || native_elem
|
|
579
|
+
append_child_sequence_on_doc(doc, native_elem, :native)
|
|
580
|
+
end
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
# Store entity ref on the document (stable identity).
|
|
584
|
+
# LibXML element wrappers are ephemeral, so we use == to find matching elements.
|
|
585
|
+
def store_entity_ref_on_doc(doc, element, ref)
|
|
586
|
+
pairs = attachments.get(doc, :_entity_ref_pairs) || []
|
|
587
|
+
pair = pairs.find { |elem, _| elem == element }
|
|
588
|
+
if pair
|
|
589
|
+
pair[1] << ref
|
|
590
|
+
else
|
|
591
|
+
pairs << [element, [ref]]
|
|
566
592
|
end
|
|
593
|
+
attachments.set(doc, :_entity_ref_pairs, pairs)
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
# Look up entity refs for an element from the document
|
|
597
|
+
def lookup_entity_refs(doc, element)
|
|
598
|
+
pairs = attachments.get(doc, :_entity_ref_pairs)
|
|
599
|
+
return nil unless pairs
|
|
600
|
+
pair = pairs.find { |elem, _| elem == element }
|
|
601
|
+
pair&.last
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
# Track child order on the document (stable identity)
|
|
605
|
+
def append_child_sequence_on_doc(doc, element, type)
|
|
606
|
+
pairs = attachments.get(doc, :_child_seq_pairs) || []
|
|
607
|
+
pair = pairs.find { |elem, _| elem == element }
|
|
608
|
+
if pair
|
|
609
|
+
pair[1] << type
|
|
610
|
+
else
|
|
611
|
+
pairs << [element, [type]]
|
|
612
|
+
end
|
|
613
|
+
attachments.set(doc, :_child_seq_pairs, pairs)
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# Look up child sequence for an element from the document
|
|
617
|
+
def lookup_child_sequence(doc, element)
|
|
618
|
+
pairs = attachments.get(doc, :_child_seq_pairs)
|
|
619
|
+
return nil unless pairs
|
|
620
|
+
pair = pairs.find { |elem, _| elem == element }
|
|
621
|
+
pair&.last
|
|
622
|
+
end
|
|
623
|
+
|
|
624
|
+
def append_child_sequence(element, type)
|
|
625
|
+
seq = attachments.get(element, :child_sequence) || []
|
|
626
|
+
seq << type
|
|
627
|
+
attachments.set(element, :child_sequence, seq)
|
|
567
628
|
end
|
|
568
629
|
|
|
569
630
|
def add_previous_sibling(node, sibling)
|
|
@@ -577,9 +638,9 @@ module Moxml
|
|
|
577
638
|
if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
|
|
578
639
|
native_node.is_a?(::LibXML::XML::Node) && native_node.doc
|
|
579
640
|
doc = native_node.doc
|
|
580
|
-
pis =
|
|
641
|
+
pis = attachments.get(doc, :pis) || []
|
|
581
642
|
pis << sibling
|
|
582
|
-
|
|
643
|
+
attachments.set(doc, :pis, pis)
|
|
583
644
|
return
|
|
584
645
|
end
|
|
585
646
|
|
|
@@ -597,16 +658,7 @@ module Moxml
|
|
|
597
658
|
def remove(node)
|
|
598
659
|
# Handle Declaration wrapper - mark as removed on document
|
|
599
660
|
if node.is_a?(CustomizedLibxml::Declaration)
|
|
600
|
-
|
|
601
|
-
# We need to find which document it's stored on and mark it as removed
|
|
602
|
-
# This is a bit tricky since the Declaration's native is its own internal doc
|
|
603
|
-
# We rely on the fact that when a declaration is added to a document,
|
|
604
|
-
# the document stores a reference to it in @moxml_declaration
|
|
605
|
-
# So we need to clear that reference and mark it as removed
|
|
606
|
-
|
|
607
|
-
# Since we can't easily find the parent document from the Declaration,
|
|
608
|
-
# we'll set a flag on the Declaration itself
|
|
609
|
-
node.instance_variable_set(:@removed, true)
|
|
661
|
+
node.removed = true
|
|
610
662
|
return
|
|
611
663
|
end
|
|
612
664
|
|
|
@@ -795,9 +847,17 @@ module Moxml
|
|
|
795
847
|
return [] unless native_node
|
|
796
848
|
return [] unless native_node.is_a?(::LibXML::XML::Node)
|
|
797
849
|
|
|
798
|
-
native_node.namespaces
|
|
799
|
-
|
|
800
|
-
|
|
850
|
+
namespaces = native_node.namespaces
|
|
851
|
+
return [] unless namespaces
|
|
852
|
+
|
|
853
|
+
namespace_list =
|
|
854
|
+
if namespaces.respond_to?(:definitions)
|
|
855
|
+
namespaces.definitions
|
|
856
|
+
else
|
|
857
|
+
namespaces
|
|
858
|
+
end
|
|
859
|
+
|
|
860
|
+
namespace_list.to_a
|
|
801
861
|
end
|
|
802
862
|
|
|
803
863
|
# Doctype accessor methods
|
|
@@ -867,12 +927,10 @@ module Moxml
|
|
|
867
927
|
|
|
868
928
|
if should_include_decl
|
|
869
929
|
# Check if declaration was explicitly managed
|
|
870
|
-
|
|
871
|
-
|
|
930
|
+
decl = attachments.get(native_node, :declaration)
|
|
931
|
+
if decl
|
|
872
932
|
# Only output declaration if it exists and wasn't removed
|
|
873
|
-
|
|
874
|
-
output << decl.to_xml
|
|
875
|
-
end
|
|
933
|
+
output << decl.to_xml unless decl.removed
|
|
876
934
|
else
|
|
877
935
|
# No declaration stored - create default
|
|
878
936
|
version = native_node.version || "1.0"
|
|
@@ -887,39 +945,33 @@ module Moxml
|
|
|
887
945
|
encoding_val,
|
|
888
946
|
nil, # No standalone by default
|
|
889
947
|
)
|
|
890
|
-
|
|
948
|
+
attachments.set(native_node, :declaration, decl)
|
|
891
949
|
output << decl.to_xml
|
|
892
950
|
end
|
|
893
951
|
end
|
|
894
952
|
|
|
895
953
|
# Add DOCTYPE if stored on document
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
output << doctype_wrapper.to_xml
|
|
901
|
-
end
|
|
954
|
+
doctype_wrapper = attachments.get(native_node, :doctype)
|
|
955
|
+
if doctype_wrapper
|
|
956
|
+
output << "\n" unless output.empty?
|
|
957
|
+
output << doctype_wrapper.to_xml
|
|
902
958
|
end
|
|
903
959
|
|
|
904
960
|
# Add document-level processing instructions if stored
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
output << pi.to_xml
|
|
911
|
-
end
|
|
961
|
+
pis = attachments.get(native_node, :pis)
|
|
962
|
+
if pis && !pis.empty?
|
|
963
|
+
pis.each do |pi|
|
|
964
|
+
output << "\n" unless output.empty?
|
|
965
|
+
output << pi.to_xml
|
|
912
966
|
end
|
|
913
967
|
end
|
|
914
968
|
|
|
915
969
|
# Add text nodes if stored (for documents without root)
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
output << text.to_xml
|
|
922
|
-
end
|
|
970
|
+
texts = attachments.get(native_node, :texts)
|
|
971
|
+
if texts && !texts.empty?
|
|
972
|
+
texts.each do |text|
|
|
973
|
+
output << "\n" unless output.empty?
|
|
974
|
+
output << text.to_xml
|
|
923
975
|
end
|
|
924
976
|
end
|
|
925
977
|
|
|
@@ -1165,6 +1217,28 @@ module Moxml
|
|
|
1165
1217
|
duplicate_node(node)
|
|
1166
1218
|
end
|
|
1167
1219
|
|
|
1220
|
+
def has_declaration?(native_doc, wrapper)
|
|
1221
|
+
decl = attachments.get(native_doc, :declaration)
|
|
1222
|
+
if decl
|
|
1223
|
+
!decl.removed
|
|
1224
|
+
else
|
|
1225
|
+
wrapper.has_xml_declaration
|
|
1226
|
+
end
|
|
1227
|
+
end
|
|
1228
|
+
|
|
1229
|
+
# LibXML's doc.root= creates a new Ruby wrapper with different object_id.
|
|
1230
|
+
# Return the actual root node so attachments are stored on the correct object.
|
|
1231
|
+
def actual_native(child_native, parent_native)
|
|
1232
|
+
if parent_native.is_a?(::LibXML::XML::Document)
|
|
1233
|
+
pending = attachments.get(parent_native, :_pending_root_refresh)
|
|
1234
|
+
if pending && pending == child_native.object_id
|
|
1235
|
+
attachments.delete(parent_native, :_pending_root_refresh)
|
|
1236
|
+
return parent_native.root
|
|
1237
|
+
end
|
|
1238
|
+
end
|
|
1239
|
+
child_native
|
|
1240
|
+
end
|
|
1241
|
+
|
|
1168
1242
|
private
|
|
1169
1243
|
|
|
1170
1244
|
def serialize_element(elem)
|
|
@@ -1213,8 +1287,9 @@ module Moxml
|
|
|
1213
1287
|
end
|
|
1214
1288
|
end
|
|
1215
1289
|
|
|
1216
|
-
# Append any EntityReference wrappers stored on
|
|
1217
|
-
|
|
1290
|
+
# Append any EntityReference wrappers stored on the document
|
|
1291
|
+
doc = elem.doc
|
|
1292
|
+
entity_refs = doc ? lookup_entity_refs(doc, elem) : nil
|
|
1218
1293
|
entity_refs&.each { |ref| output << ref.to_xml }
|
|
1219
1294
|
|
|
1220
1295
|
output << "</#{elem.name}>"
|
|
@@ -1396,9 +1471,47 @@ module Moxml
|
|
|
1396
1471
|
end
|
|
1397
1472
|
end
|
|
1398
1473
|
|
|
1474
|
+
# Check for entity refs stored on the document
|
|
1475
|
+
# LibXML element wrappers are ephemeral, so look up via == comparison
|
|
1476
|
+
doc = elem.doc
|
|
1477
|
+
entity_refs = doc ? lookup_entity_refs(doc, elem) : nil
|
|
1478
|
+
child_sequence = doc ? lookup_child_sequence(doc, elem) : nil
|
|
1479
|
+
|
|
1399
1480
|
# Always use verbose format <tag></tag> for consistency with other adapters
|
|
1400
1481
|
output << ">"
|
|
1401
|
-
|
|
1482
|
+
|
|
1483
|
+
if entity_refs && !entity_refs.empty? && child_sequence
|
|
1484
|
+
# Interleave native children with entity refs using tracked sequence
|
|
1485
|
+
native_children = []
|
|
1486
|
+
if elem.children?
|
|
1487
|
+
elem.each_child { |c| native_children << c unless c.text? && c.content.to_s.strip.empty? }
|
|
1488
|
+
end
|
|
1489
|
+
|
|
1490
|
+
eref_idx = 0
|
|
1491
|
+
native_idx = 0
|
|
1492
|
+
child_sequence.each do |type|
|
|
1493
|
+
case type
|
|
1494
|
+
when :native
|
|
1495
|
+
if native_idx < native_children.size
|
|
1496
|
+
child = native_children[native_idx]
|
|
1497
|
+
native_idx += 1
|
|
1498
|
+
wrapped_child = patch_node(child)
|
|
1499
|
+
output << if wrapped_child.is_a?(CustomizedLibxml::Node) && !wrapped_child.is_a?(CustomizedLibxml::Element)
|
|
1500
|
+
wrapped_child.to_xml
|
|
1501
|
+
elsif child.element?
|
|
1502
|
+
serialize_element_with_namespaces(child, false)
|
|
1503
|
+
else
|
|
1504
|
+
serialize_node(child)
|
|
1505
|
+
end
|
|
1506
|
+
end
|
|
1507
|
+
when :eref
|
|
1508
|
+
if eref_idx < entity_refs.size
|
|
1509
|
+
output << entity_refs[eref_idx].to_xml
|
|
1510
|
+
eref_idx += 1
|
|
1511
|
+
end
|
|
1512
|
+
end
|
|
1513
|
+
end
|
|
1514
|
+
elsif elem.children?
|
|
1402
1515
|
elem.each_child do |child|
|
|
1403
1516
|
# Skip whitespace-only text nodes
|
|
1404
1517
|
next if child.text? && child.content.to_s.strip.empty?
|
|
@@ -7,19 +7,27 @@ module Moxml
|
|
|
7
7
|
module Adapter
|
|
8
8
|
class Nokogiri < Base
|
|
9
9
|
class << self
|
|
10
|
+
def attachments
|
|
11
|
+
@attachments ||= Moxml::NativeAttachment.new
|
|
12
|
+
end
|
|
13
|
+
|
|
10
14
|
def set_root(doc, element)
|
|
11
15
|
doc.root = element
|
|
12
16
|
end
|
|
13
17
|
|
|
14
18
|
def parse(xml, options = {}, _context = nil)
|
|
19
|
+
processed_xml = preprocess_entities(xml)
|
|
20
|
+
|
|
21
|
+
# preprocess_entities always returns UTF-8, so tell Nokogiri to
|
|
22
|
+
# parse as UTF-8 regardless of any original encoding option.
|
|
15
23
|
native_doc = begin
|
|
16
24
|
if options[:fragment]
|
|
17
|
-
::Nokogiri::XML::DocumentFragment.parse(
|
|
25
|
+
::Nokogiri::XML::DocumentFragment.parse(processed_xml) do |config|
|
|
18
26
|
config.strict.nonet
|
|
19
27
|
config.recover unless options[:strict]
|
|
20
28
|
end
|
|
21
29
|
else
|
|
22
|
-
::Nokogiri::XML(
|
|
30
|
+
::Nokogiri::XML(processed_xml, nil, "UTF-8") do |config|
|
|
23
31
|
config.strict.nonet
|
|
24
32
|
config.recover unless options[:strict]
|
|
25
33
|
end
|
|
@@ -176,10 +184,16 @@ module Moxml
|
|
|
176
184
|
def children(node)
|
|
177
185
|
node.children.reject do |child|
|
|
178
186
|
child.text? && child.content.strip.empty? &&
|
|
179
|
-
!(child.previous_sibling.nil? && child.next_sibling.nil?)
|
|
187
|
+
!(child.previous_sibling.nil? && child.next_sibling.nil?) &&
|
|
188
|
+
!adjacent_to_entity_reference?(child)
|
|
180
189
|
end
|
|
181
190
|
end
|
|
182
191
|
|
|
192
|
+
def adjacent_to_entity_reference?(node)
|
|
193
|
+
node.previous_sibling.is_a?(::Nokogiri::XML::EntityReference) ||
|
|
194
|
+
node.next_sibling.is_a?(::Nokogiri::XML::EntityReference)
|
|
195
|
+
end
|
|
196
|
+
|
|
183
197
|
def replace_children(node, new_children)
|
|
184
198
|
node.children.unlink
|
|
185
199
|
new_children.each { |child| add_child(node, child) }
|
|
@@ -241,8 +255,8 @@ module Moxml
|
|
|
241
255
|
encoding = declaration_attribute(child, "encoding")
|
|
242
256
|
standalone = declaration_attribute(child, "standalone")
|
|
243
257
|
|
|
244
|
-
#
|
|
245
|
-
|
|
258
|
+
# Store declaration state in attachment map
|
|
259
|
+
attachments.set(element, :xml_decl, {
|
|
246
260
|
version: version,
|
|
247
261
|
encoding: encoding,
|
|
248
262
|
standalone: standalone,
|
|
@@ -273,7 +287,7 @@ module Moxml
|
|
|
273
287
|
node.name == "xml" &&
|
|
274
288
|
node.parent.is_a?(::Nokogiri::XML::Document)
|
|
275
289
|
# Clear document's xml_decl when removing declaration
|
|
276
|
-
node.parent
|
|
290
|
+
attachments.set(node.parent, :xml_decl, nil)
|
|
277
291
|
end
|
|
278
292
|
|
|
279
293
|
node.remove
|
|
@@ -284,7 +298,7 @@ module Moxml
|
|
|
284
298
|
end
|
|
285
299
|
|
|
286
300
|
def text_content(node)
|
|
287
|
-
node.text
|
|
301
|
+
node.text.to_s
|
|
288
302
|
end
|
|
289
303
|
|
|
290
304
|
def inner_text(node)
|
|
@@ -387,12 +401,12 @@ module Moxml
|
|
|
387
401
|
# Handle declaration option
|
|
388
402
|
# Priority:
|
|
389
403
|
# 1. Explicit no_declaration option
|
|
390
|
-
# 2. Check
|
|
404
|
+
# 2. Check attachment-stored xml_decl (when remove is called, this becomes nil)
|
|
391
405
|
if options.key?(:no_declaration)
|
|
392
406
|
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
|
|
393
|
-
elsif
|
|
394
|
-
#
|
|
395
|
-
xml_decl =
|
|
407
|
+
elsif attachments.key?(node, :xml_decl)
|
|
408
|
+
# State stored in attachment - if nil, declaration was removed
|
|
409
|
+
xml_decl = attachments.get(node, :xml_decl)
|
|
396
410
|
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
|
|
397
411
|
end
|
|
398
412
|
|
|
@@ -403,6 +417,14 @@ module Moxml
|
|
|
403
417
|
)
|
|
404
418
|
end
|
|
405
419
|
|
|
420
|
+
def has_declaration?(native_doc, wrapper)
|
|
421
|
+
if attachments.key?(native_doc, :xml_decl)
|
|
422
|
+
!attachments.get(native_doc, :xml_decl).nil?
|
|
423
|
+
else
|
|
424
|
+
wrapper.has_xml_declaration
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
406
428
|
private
|
|
407
429
|
|
|
408
430
|
def build_declaration_attrs(version, encoding, standalone)
|