moxml 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +117 -66
- data/Gemfile +1 -0
- data/README.adoc +11 -9
- data/Rakefile +3 -1
- data/docs/_pages/configuration.adoc +22 -19
- data/docs/_tutorials/namespace-handling.adoc +5 -5
- data/lib/moxml/adapter/base.rb +8 -3
- data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
- data/lib/moxml/adapter/customized_libxml.rb +18 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +2 -2
- data/lib/moxml/adapter/customized_oga.rb +10 -0
- data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
- data/lib/moxml/adapter/customized_ox.rb +12 -0
- data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +2 -0
- data/lib/moxml/adapter/customized_rexml.rb +11 -0
- data/lib/moxml/adapter/headed_ox.rb +9 -3
- data/lib/moxml/adapter/libxml.rb +76 -62
- data/lib/moxml/adapter/nokogiri.rb +4 -5
- data/lib/moxml/adapter/oga.rb +50 -26
- data/lib/moxml/adapter/ox.rb +189 -41
- data/lib/moxml/adapter/rexml.rb +27 -8
- data/lib/moxml/attribute.rb +3 -0
- data/lib/moxml/builder.rb +1 -0
- data/lib/moxml/config.rb +7 -7
- data/lib/moxml/document.rb +5 -1
- data/lib/moxml/document_builder.rb +37 -31
- data/lib/moxml/element.rb +13 -5
- data/lib/moxml/entity_registry.rb +36 -0
- data/lib/moxml/node.rb +23 -2
- data/lib/moxml/node_set.rb +43 -15
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -1
- data/spec/integration/shared_examples/edge_cases.rb +3 -0
- data/spec/moxml/adapter/oga_spec.rb +62 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
- data/spec/moxml/allocation_benchmark_spec.rb +96 -0
- data/spec/moxml/allocation_guard_spec.rb +282 -0
- data/spec/moxml/builder_spec.rb +22 -0
- data/spec/moxml/config_spec.rb +11 -11
- data/spec/moxml/doctype_spec.rb +41 -0
- data/spec/moxml/lazy_parse_spec.rb +115 -0
- data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
- data/spec/moxml/node_cache_spec.rb +110 -0
- data/spec/moxml/node_set_cache_spec.rb +90 -0
- data/spec/moxml/xml_utils_spec.rb +32 -0
- data/spec/support/allocation_helper.rb +165 -0
- data/spec/support/w3c_namespace_helpers.rb +2 -1
- metadata +15 -2
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
module CustomizedRexml
|
|
6
|
+
class EntityReference
|
|
7
|
+
attr_reader :name
|
|
8
|
+
|
|
9
|
+
def initialize(name)
|
|
10
|
+
@name = name
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def ==(other)
|
|
14
|
+
other.is_a?(self.class) && @name == other.name
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -25,17 +25,23 @@ module Moxml
|
|
|
25
25
|
#
|
|
26
26
|
class HeadedOx < Ox
|
|
27
27
|
class << self
|
|
28
|
-
# Override parse to use
|
|
29
|
-
|
|
28
|
+
# Override parse to use lazy wrapping like the Ox adapter.
|
|
29
|
+
# Previously used DocumentBuilder (eager tree construction causing
|
|
30
|
+
# ~176K allocations per 100-element parse). Lazy parse defers wrapper
|
|
31
|
+
# creation until nodes are accessed, matching Ox adapter behavior.
|
|
32
|
+
def parse(xml, options = {}, _context = nil)
|
|
30
33
|
native_doc = begin
|
|
31
34
|
result = ::Ox.parse(xml)
|
|
32
35
|
|
|
33
36
|
# result can be either Document or Element
|
|
34
37
|
if result.is_a?(::Ox::Document)
|
|
38
|
+
assign_parents(result)
|
|
39
|
+
validate_single_root(result) if options[:strict]
|
|
35
40
|
result
|
|
36
41
|
else
|
|
37
42
|
doc = ::Ox::Document.new
|
|
38
43
|
doc << result
|
|
44
|
+
assign_parents(doc)
|
|
39
45
|
doc
|
|
40
46
|
end
|
|
41
47
|
rescue ::Ox::ParseError => e
|
|
@@ -47,7 +53,7 @@ module Moxml
|
|
|
47
53
|
|
|
48
54
|
# Use provided context if available, otherwise create new one
|
|
49
55
|
ctx = _context || Context.new(:headed_ox)
|
|
50
|
-
|
|
56
|
+
Document.new(native_doc, ctx)
|
|
51
57
|
end
|
|
52
58
|
|
|
53
59
|
# Execute XPath query using Moxml's XPath engine
|
data/lib/moxml/adapter/libxml.rb
CHANGED
|
@@ -2,13 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
4
|
require "libxml"
|
|
5
|
-
require_relative "customized_libxml
|
|
6
|
-
require_relative "customized_libxml/element"
|
|
7
|
-
require_relative "customized_libxml/text"
|
|
8
|
-
require_relative "customized_libxml/comment"
|
|
9
|
-
require_relative "customized_libxml/cdata"
|
|
10
|
-
require_relative "customized_libxml/processing_instruction"
|
|
11
|
-
require_relative "customized_libxml/declaration"
|
|
5
|
+
require_relative "customized_libxml"
|
|
12
6
|
|
|
13
7
|
module Moxml
|
|
14
8
|
module Adapter
|
|
@@ -52,7 +46,7 @@ module Moxml
|
|
|
52
46
|
# LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
|
|
53
47
|
xml_string = if xml.is_a?(String)
|
|
54
48
|
xml
|
|
55
|
-
elsif xml.
|
|
49
|
+
elsif xml.is_a?(IO) || xml.is_a?(StringIO)
|
|
56
50
|
xml.read
|
|
57
51
|
else
|
|
58
52
|
xml.to_s
|
|
@@ -67,7 +61,7 @@ module Moxml
|
|
|
67
61
|
parser.parse
|
|
68
62
|
rescue ::LibXML::XML::Error => e
|
|
69
63
|
if options[:strict]
|
|
70
|
-
line = e.
|
|
64
|
+
line = e.line
|
|
71
65
|
raise Moxml::ParseError.new(
|
|
72
66
|
e.message,
|
|
73
67
|
line: line,
|
|
@@ -116,8 +110,12 @@ module Moxml
|
|
|
116
110
|
# Parse
|
|
117
111
|
parser.parse
|
|
118
112
|
rescue ::LibXML::XML::Error => e
|
|
119
|
-
line = e.
|
|
120
|
-
column =
|
|
113
|
+
line = e.line
|
|
114
|
+
column = begin
|
|
115
|
+
e.column
|
|
116
|
+
rescue StandardError
|
|
117
|
+
nil
|
|
118
|
+
end
|
|
121
119
|
error = Moxml::ParseError.new(e.message, line: line, column: column)
|
|
122
120
|
handler.on_error(error)
|
|
123
121
|
end
|
|
@@ -135,6 +133,14 @@ module Moxml
|
|
|
135
133
|
CustomizedLibxml::Text.new(native)
|
|
136
134
|
end
|
|
137
135
|
|
|
136
|
+
def create_native_entity_reference(name)
|
|
137
|
+
CustomizedLibxml::EntityReference.new(name)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def entity_reference_name(node)
|
|
141
|
+
node.name if node.is_a?(CustomizedLibxml::EntityReference)
|
|
142
|
+
end
|
|
143
|
+
|
|
138
144
|
def create_native_cdata(content, _owner_doc = nil)
|
|
139
145
|
native = ::LibXML::XML::Node.new_cdata(content.to_s)
|
|
140
146
|
CustomizedLibxml::Cdata.new(native)
|
|
@@ -174,10 +180,11 @@ module Moxml
|
|
|
174
180
|
if node.is_a?(CustomizedLibxml::ProcessingInstruction)
|
|
175
181
|
return :processing_instruction
|
|
176
182
|
end
|
|
183
|
+
return :entity_reference if node.is_a?(CustomizedLibxml::EntityReference)
|
|
177
184
|
return :doctype if node.is_a?(DoctypeWrapper)
|
|
178
185
|
|
|
179
186
|
# Unwrap if needed
|
|
180
|
-
native_node = node
|
|
187
|
+
native_node = unpatch_node(node)
|
|
181
188
|
|
|
182
189
|
case native_node.node_type
|
|
183
190
|
when ::LibXML::XML::Node::DOCUMENT_NODE
|
|
@@ -266,21 +273,9 @@ module Moxml
|
|
|
266
273
|
result = []
|
|
267
274
|
|
|
268
275
|
# Include DOCTYPE if present
|
|
269
|
-
# First check if we stored it as instance variable (from parse)
|
|
270
276
|
if native_node.instance_variable_defined?(:@moxml_doctype)
|
|
271
277
|
doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
|
|
272
278
|
result << doctype_wrapper if doctype_wrapper
|
|
273
|
-
elsif native_node.respond_to?(:dtd) && native_node.dtd
|
|
274
|
-
# Otherwise check dtd property directly
|
|
275
|
-
dtd = native_node.dtd
|
|
276
|
-
# Wrap in DoctypeWrapper for consistency
|
|
277
|
-
doctype_wrapper = DoctypeWrapper.new(
|
|
278
|
-
native_node,
|
|
279
|
-
dtd.name,
|
|
280
|
-
dtd.external_id,
|
|
281
|
-
dtd.system_id,
|
|
282
|
-
)
|
|
283
|
-
result << doctype_wrapper
|
|
284
279
|
end
|
|
285
280
|
|
|
286
281
|
return result unless native_node.root
|
|
@@ -298,6 +293,11 @@ module Moxml
|
|
|
298
293
|
|
|
299
294
|
result << patch_node(child)
|
|
300
295
|
end
|
|
296
|
+
|
|
297
|
+
# Include any EntityReference wrappers stored alongside native children
|
|
298
|
+
entity_refs = native_node.instance_variable_get(:@moxml_entity_refs)
|
|
299
|
+
result.concat(entity_refs) if entity_refs
|
|
300
|
+
|
|
301
301
|
result
|
|
302
302
|
end
|
|
303
303
|
|
|
@@ -350,7 +350,7 @@ module Moxml
|
|
|
350
350
|
def attributes(element)
|
|
351
351
|
native_elem = unpatch_node(element)
|
|
352
352
|
return [] unless native_elem
|
|
353
|
-
unless native_elem.
|
|
353
|
+
unless native_elem.is_a?(::LibXML::XML::Node) && native_elem.element?
|
|
354
354
|
return []
|
|
355
355
|
end
|
|
356
356
|
return [] unless native_elem.attributes?
|
|
@@ -368,7 +368,7 @@ module Moxml
|
|
|
368
368
|
|
|
369
369
|
def attribute_namespace(attr)
|
|
370
370
|
return nil unless attr
|
|
371
|
-
return nil unless attr.
|
|
371
|
+
return nil unless attr.is_a?(::LibXML::XML::Attr)
|
|
372
372
|
|
|
373
373
|
attr.ns
|
|
374
374
|
end
|
|
@@ -498,11 +498,20 @@ module Moxml
|
|
|
498
498
|
native_elem = unpatch_node(element)
|
|
499
499
|
native_child = unpatch_node(child)
|
|
500
500
|
|
|
501
|
+
# EntityReference wrappers can't go in LibXML's native tree.
|
|
502
|
+
# Store alongside native children via instance variable.
|
|
503
|
+
if child.is_a?(CustomizedLibxml::EntityReference)
|
|
504
|
+
refs = native_elem.instance_variable_get(:@moxml_entity_refs) || []
|
|
505
|
+
refs << child
|
|
506
|
+
native_elem.instance_variable_set(:@moxml_entity_refs, refs)
|
|
507
|
+
return
|
|
508
|
+
end
|
|
509
|
+
|
|
501
510
|
# For LibXML: if parent has a DEFAULT namespace (nil/empty prefix) and child is an element without a namespace,
|
|
502
511
|
# explicitly set the child's namespace to match the parent's for XPath compatibility
|
|
503
512
|
# NOTE: Prefixed namespaces are NOT inherited, only default namespaces
|
|
504
|
-
if native_elem.
|
|
505
|
-
native_child.
|
|
513
|
+
if native_elem.is_a?(::LibXML::XML::Node) && native_elem.namespaces&.namespace &&
|
|
514
|
+
native_child.is_a?(::LibXML::XML::Node) && native_child.element? &&
|
|
506
515
|
(!native_child.namespaces.namespace || native_child.namespaces.namespace.href.to_s.empty?)
|
|
507
516
|
|
|
508
517
|
parent_ns = native_elem.namespaces.namespace
|
|
@@ -566,7 +575,7 @@ module Moxml
|
|
|
566
575
|
# Special handling for document-level processing instructions
|
|
567
576
|
# When adding a PI as sibling to root element, store it on document
|
|
568
577
|
if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
|
|
569
|
-
native_node.
|
|
578
|
+
native_node.is_a?(::LibXML::XML::Node) && native_node.doc
|
|
570
579
|
doc = native_node.doc
|
|
571
580
|
pis = doc.instance_variable_get(:@moxml_pis) || []
|
|
572
581
|
pis << sibling
|
|
@@ -624,7 +633,7 @@ module Moxml
|
|
|
624
633
|
next_sibling = native_node.next
|
|
625
634
|
|
|
626
635
|
# Import if needed for cross-document operations
|
|
627
|
-
parent_doc = parent.
|
|
636
|
+
parent_doc = parent.is_a?(::LibXML::XML::Node) ? parent.doc : nil
|
|
628
637
|
|
|
629
638
|
# Use import_and_add to properly handle document adoption
|
|
630
639
|
import_and_add(parent_doc, parent, native_new)
|
|
@@ -651,7 +660,7 @@ module Moxml
|
|
|
651
660
|
native_elem.each_child(&:remove!)
|
|
652
661
|
|
|
653
662
|
# Get the element's document for importing
|
|
654
|
-
doc = native_elem.
|
|
663
|
+
doc = native_elem.is_a?(::LibXML::XML::Node) ? native_elem.doc : nil
|
|
655
664
|
|
|
656
665
|
children.each do |c|
|
|
657
666
|
native_c = unpatch_node(c)
|
|
@@ -662,6 +671,8 @@ module Moxml
|
|
|
662
671
|
end
|
|
663
672
|
|
|
664
673
|
def text_content(node)
|
|
674
|
+
return "" if node.is_a?(CustomizedLibxml::EntityReference)
|
|
675
|
+
|
|
665
676
|
native_node = unpatch_node(node)
|
|
666
677
|
return nil unless native_node
|
|
667
678
|
|
|
@@ -782,7 +793,7 @@ module Moxml
|
|
|
782
793
|
def namespace_definitions(node)
|
|
783
794
|
native_node = unpatch_node(node)
|
|
784
795
|
return [] unless native_node
|
|
785
|
-
return [] unless native_node.
|
|
796
|
+
return [] unless native_node.is_a?(::LibXML::XML::Node)
|
|
786
797
|
|
|
787
798
|
native_node.namespaces.map do |ns|
|
|
788
799
|
ns
|
|
@@ -835,15 +846,8 @@ module Moxml
|
|
|
835
846
|
|
|
836
847
|
def serialize(node, options = {})
|
|
837
848
|
# FIRST: Check if node is any kind of wrapper with custom to_xml
|
|
838
|
-
if node.
|
|
839
|
-
|
|
840
|
-
return node.to_xml if node.is_a?(CustomizedLibxml::Declaration)
|
|
841
|
-
|
|
842
|
-
# Other wrappers - check they're not native LibXML nodes
|
|
843
|
-
unless node.is_a?(::LibXML::XML::Node) ||
|
|
844
|
-
node.is_a?(::LibXML::XML::Document)
|
|
845
|
-
return node.to_xml
|
|
846
|
-
end
|
|
849
|
+
if node.is_a?(CustomizedLibxml::Node) || node.is_a?(DoctypeWrapper)
|
|
850
|
+
return node.to_xml
|
|
847
851
|
end
|
|
848
852
|
|
|
849
853
|
native_node = unpatch_node(node)
|
|
@@ -1033,7 +1037,7 @@ module Moxml
|
|
|
1033
1037
|
return nil unless node
|
|
1034
1038
|
|
|
1035
1039
|
# Unwrap if wrapped
|
|
1036
|
-
native_node = node
|
|
1040
|
+
native_node = unpatch_node(node)
|
|
1037
1041
|
|
|
1038
1042
|
# LibXML is strict about document ownership
|
|
1039
1043
|
# Create brand new NATIVE nodes that are document-independent
|
|
@@ -1057,7 +1061,7 @@ module Moxml
|
|
|
1057
1061
|
# new_node.line = node.line
|
|
1058
1062
|
|
|
1059
1063
|
# Copy and set namespace definitions FIRST
|
|
1060
|
-
if native_node.
|
|
1064
|
+
if native_node.is_a?(::LibXML::XML::Node)
|
|
1061
1065
|
# First, copy all namespace definitions
|
|
1062
1066
|
native_node.namespaces.each do |ns|
|
|
1063
1067
|
::LibXML::XML::Namespace.new(
|
|
@@ -1144,7 +1148,12 @@ module Moxml
|
|
|
1144
1148
|
|
|
1145
1149
|
def unpatch_node(node)
|
|
1146
1150
|
# Unwrap to get native LibXML node
|
|
1147
|
-
|
|
1151
|
+
case node
|
|
1152
|
+
when CustomizedLibxml::Node, CustomizedLibxml::Declaration, DoctypeWrapper
|
|
1153
|
+
node.native
|
|
1154
|
+
else
|
|
1155
|
+
node
|
|
1156
|
+
end
|
|
1148
1157
|
end
|
|
1149
1158
|
|
|
1150
1159
|
def prepare_for_new_document(node, target_doc)
|
|
@@ -1162,7 +1171,7 @@ module Moxml
|
|
|
1162
1171
|
output = "<#{elem.name}"
|
|
1163
1172
|
|
|
1164
1173
|
# Add namespace definitions (only on this element, not ancestors)
|
|
1165
|
-
if elem.
|
|
1174
|
+
if elem.is_a?(::LibXML::XML::Node)
|
|
1166
1175
|
seen_ns = {}
|
|
1167
1176
|
elem.namespaces.each do |ns|
|
|
1168
1177
|
prefix = ns.prefix
|
|
@@ -1203,6 +1212,11 @@ module Moxml
|
|
|
1203
1212
|
output << serialize_node(child)
|
|
1204
1213
|
end
|
|
1205
1214
|
end
|
|
1215
|
+
|
|
1216
|
+
# Append any EntityReference wrappers stored on this element
|
|
1217
|
+
entity_refs = elem.instance_variable_get(:@moxml_entity_refs)
|
|
1218
|
+
entity_refs&.each { |ref| output << ref.to_xml }
|
|
1219
|
+
|
|
1206
1220
|
output << "</#{elem.name}>"
|
|
1207
1221
|
|
|
1208
1222
|
output
|
|
@@ -1210,11 +1224,12 @@ module Moxml
|
|
|
1210
1224
|
|
|
1211
1225
|
def serialize_node(node)
|
|
1212
1226
|
# Check if node is a wrapper with to_xml method
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1227
|
+
case node
|
|
1228
|
+
when CustomizedLibxml::ProcessingInstruction,
|
|
1229
|
+
CustomizedLibxml::Comment,
|
|
1230
|
+
CustomizedLibxml::Cdata,
|
|
1231
|
+
CustomizedLibxml::Text,
|
|
1232
|
+
CustomizedLibxml::EntityReference
|
|
1218
1233
|
return node.to_xml
|
|
1219
1234
|
end
|
|
1220
1235
|
|
|
@@ -1269,7 +1284,7 @@ module Moxml
|
|
|
1269
1284
|
raise unless e.message.include?("different documents")
|
|
1270
1285
|
|
|
1271
1286
|
# Get the target document - either from parameter or element
|
|
1272
|
-
target_doc = doc || (element.
|
|
1287
|
+
target_doc = doc || (element.is_a?(::LibXML::XML::Node) ? element.doc : nil)
|
|
1273
1288
|
|
|
1274
1289
|
if target_doc
|
|
1275
1290
|
# Use deep import to ensure all descendants are included
|
|
@@ -1329,11 +1344,11 @@ module Moxml
|
|
|
1329
1344
|
# Include namespace definitions:
|
|
1330
1345
|
# - On root element (include_ns = true), output ALL namespace definitions
|
|
1331
1346
|
# - On child elements, output namespace definitions that override parent namespaces
|
|
1332
|
-
if elem.
|
|
1347
|
+
if elem.is_a?(::LibXML::XML::Node) && elem.namespaces.respond_to?(:definitions)
|
|
1333
1348
|
# Get parent's namespace definitions to detect overrides
|
|
1334
|
-
parent_ns_defs = if !include_ns && elem.
|
|
1349
|
+
parent_ns_defs = if !include_ns && elem.parent && !elem.parent.is_a?(::LibXML::XML::Document)
|
|
1335
1350
|
parent_namespaces = {}
|
|
1336
|
-
if elem.parent.
|
|
1351
|
+
if elem.parent.is_a?(::LibXML::XML::Node)
|
|
1337
1352
|
elem.parent.namespaces.each do |ns|
|
|
1338
1353
|
parent_namespaces[ns.prefix] = ns.href
|
|
1339
1354
|
end
|
|
@@ -1390,8 +1405,7 @@ module Moxml
|
|
|
1390
1405
|
|
|
1391
1406
|
# Wrap the child and serialize
|
|
1392
1407
|
wrapped_child = patch_node(child)
|
|
1393
|
-
output << if wrapped_child.
|
|
1394
|
-
!wrapped_child.is_a?(::LibXML::XML::Node)
|
|
1408
|
+
output << if wrapped_child.is_a?(CustomizedLibxml::Node) && !wrapped_child.is_a?(CustomizedLibxml::Element)
|
|
1395
1409
|
# Use wrapper's to_xml for proper serialization
|
|
1396
1410
|
wrapped_child.to_xml
|
|
1397
1411
|
elsif child.element?
|
|
@@ -1421,7 +1435,7 @@ module Moxml
|
|
|
1421
1435
|
else
|
|
1422
1436
|
# Walk up to root first
|
|
1423
1437
|
current = node
|
|
1424
|
-
current = current.parent while current.
|
|
1438
|
+
current = current.parent while current.is_a?(::LibXML::XML::Node) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
|
|
1425
1439
|
current
|
|
1426
1440
|
end
|
|
1427
1441
|
|
|
@@ -1435,7 +1449,7 @@ module Moxml
|
|
|
1435
1449
|
|
|
1436
1450
|
def collect_ns_from_subtree(node, ns_defs)
|
|
1437
1451
|
# Collect namespaces defined on this node
|
|
1438
|
-
if node.
|
|
1452
|
+
if node.is_a?(::LibXML::XML::Node)
|
|
1439
1453
|
node.namespaces.each do |ns|
|
|
1440
1454
|
prefix = ns.prefix
|
|
1441
1455
|
uri = ns.href
|
|
@@ -1453,7 +1467,7 @@ module Moxml
|
|
|
1453
1467
|
|
|
1454
1468
|
# Also check if this element has an active namespace (inherited or own)
|
|
1455
1469
|
# This catches cases where elements inherit namespaces from parents
|
|
1456
|
-
if node.
|
|
1470
|
+
if node.is_a?(::LibXML::XML::Node) && node.namespaces.respond_to?(:namespace)
|
|
1457
1471
|
active_ns = node.namespaces.namespace
|
|
1458
1472
|
if active_ns
|
|
1459
1473
|
prefix = active_ns.prefix
|
|
@@ -1469,7 +1483,7 @@ module Moxml
|
|
|
1469
1483
|
end
|
|
1470
1484
|
|
|
1471
1485
|
# Recursively collect from children
|
|
1472
|
-
return unless node.
|
|
1486
|
+
return unless node.is_a?(::LibXML::XML::Node) && node.children?
|
|
1473
1487
|
|
|
1474
1488
|
node.each_child do |child|
|
|
1475
1489
|
collect_ns_from_subtree(child, ns_defs) if child.element?
|
|
@@ -1493,12 +1507,12 @@ module Moxml
|
|
|
1493
1507
|
# Search element and ancestors for namespace with given prefix
|
|
1494
1508
|
current = element
|
|
1495
1509
|
while current
|
|
1496
|
-
if current.
|
|
1510
|
+
if current.is_a?(::LibXML::XML::Node)
|
|
1497
1511
|
current.namespaces.each do |ns|
|
|
1498
1512
|
return ns if ns.prefix == prefix
|
|
1499
1513
|
end
|
|
1500
1514
|
end
|
|
1501
|
-
current = current.
|
|
1515
|
+
current = current.is_a?(::LibXML::XML::Node) ? current.parent : nil
|
|
1502
1516
|
end
|
|
1503
1517
|
nil
|
|
1504
1518
|
end
|
|
@@ -31,7 +31,7 @@ module Moxml
|
|
|
31
31
|
|
|
32
32
|
# Use provided context if available, otherwise create new one
|
|
33
33
|
ctx = _context || Context.new(:nokogiri)
|
|
34
|
-
|
|
34
|
+
Document.new(native_doc, ctx)
|
|
35
35
|
end
|
|
36
36
|
|
|
37
37
|
# SAX parsing implementation for Nokogiri
|
|
@@ -47,7 +47,7 @@ module Moxml
|
|
|
47
47
|
parser = ::Nokogiri::XML::SAX::Parser.new(bridge)
|
|
48
48
|
|
|
49
49
|
# Parse
|
|
50
|
-
if xml.
|
|
50
|
+
if xml.is_a?(IO) || xml.is_a?(StringIO)
|
|
51
51
|
parser.parse(xml)
|
|
52
52
|
else
|
|
53
53
|
parser.parse(xml.to_s)
|
|
@@ -202,7 +202,7 @@ module Moxml
|
|
|
202
202
|
end
|
|
203
203
|
|
|
204
204
|
def root(document)
|
|
205
|
-
document.
|
|
205
|
+
document.is_a?(::Nokogiri::XML::Document) ? document.root : document.children.first
|
|
206
206
|
end
|
|
207
207
|
|
|
208
208
|
def attribute_element(attr)
|
|
@@ -390,8 +390,7 @@ module Moxml
|
|
|
390
390
|
# 2. Check Nokogiri's internal @xml_decl (when remove is called, this becomes nil)
|
|
391
391
|
if options.key?(:no_declaration)
|
|
392
392
|
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
|
|
393
|
-
elsif node.
|
|
394
|
-
node.instance_variable_defined?(:@xml_decl)
|
|
393
|
+
elsif node.instance_variable_defined?(:@xml_decl)
|
|
395
394
|
# Nokogiri's internal state - if nil, declaration was removed
|
|
396
395
|
xml_decl = node.instance_variable_get(:@xml_decl)
|
|
397
396
|
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
|
data/lib/moxml/adapter/oga.rb
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
|
-
require_relative "customized_oga
|
|
5
|
-
require_relative "customized_oga/xml_declaration"
|
|
4
|
+
require_relative "customized_oga"
|
|
6
5
|
require "oga"
|
|
7
6
|
|
|
8
7
|
module Moxml
|
|
@@ -46,7 +45,7 @@ module Moxml
|
|
|
46
45
|
def sax_parse(xml, handler)
|
|
47
46
|
bridge = OgaSAXBridge.new(handler)
|
|
48
47
|
|
|
49
|
-
xml_string = xml.
|
|
48
|
+
xml_string = xml.is_a?(IO) || xml.is_a?(StringIO) ? xml.read : xml.to_s
|
|
50
49
|
|
|
51
50
|
# Manually call start_document (Oga doesn't)
|
|
52
51
|
handler.on_start_document
|
|
@@ -72,6 +71,17 @@ module Moxml
|
|
|
72
71
|
::Oga::XML::Text.new(text: encode_entity_markers(content))
|
|
73
72
|
end
|
|
74
73
|
|
|
74
|
+
def create_native_entity_reference(name)
|
|
75
|
+
text = ::Oga::XML::Text.new
|
|
76
|
+
text.text = "#{ENTITY_MARKER}#{name};"
|
|
77
|
+
text.instance_variable_set(:@moxml_entity_name, name)
|
|
78
|
+
text
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def entity_reference_name(node)
|
|
82
|
+
node.instance_variable_get(:@moxml_entity_name)
|
|
83
|
+
end
|
|
84
|
+
|
|
75
85
|
def create_native_cdata(content, _owner_doc = nil)
|
|
76
86
|
::Oga::XML::Cdata.new(text: content)
|
|
77
87
|
end
|
|
@@ -132,10 +142,9 @@ module Moxml
|
|
|
132
142
|
end
|
|
133
143
|
|
|
134
144
|
def namespace(element)
|
|
135
|
-
|
|
145
|
+
case element
|
|
146
|
+
when ::Oga::XML::Element, ::Oga::XML::Attribute
|
|
136
147
|
element.namespace
|
|
137
|
-
elsif element.respond_to?(:namespaces)
|
|
138
|
-
element.namespaces.values.last
|
|
139
148
|
end
|
|
140
149
|
rescue NoMethodError
|
|
141
150
|
# Oga attributes fail with NoMethodError:
|
|
@@ -150,7 +159,12 @@ module Moxml
|
|
|
150
159
|
def node_type(node)
|
|
151
160
|
case node
|
|
152
161
|
when ::Oga::XML::Element then :element
|
|
153
|
-
when ::Oga::XML::Text
|
|
162
|
+
when ::Oga::XML::Text
|
|
163
|
+
if node.instance_variable_get(:@moxml_entity_name)
|
|
164
|
+
:entity_reference
|
|
165
|
+
else
|
|
166
|
+
:text
|
|
167
|
+
end
|
|
154
168
|
when ::Oga::XML::Cdata then :cdata
|
|
155
169
|
when ::Oga::XML::Comment then :comment
|
|
156
170
|
when ::Oga::XML::Attribute then :attribute
|
|
@@ -178,7 +192,7 @@ module Moxml
|
|
|
178
192
|
node.doctype].compact
|
|
179
193
|
end
|
|
180
194
|
|
|
181
|
-
return all_children unless node.
|
|
195
|
+
return all_children unless node.is_a?(::Oga::XML::Node) || node.is_a?(::Oga::XML::Document)
|
|
182
196
|
|
|
183
197
|
all_children + node.children.reject do |child|
|
|
184
198
|
child.is_a?(::Oga::XML::Text) &&
|
|
@@ -188,7 +202,7 @@ module Moxml
|
|
|
188
202
|
end
|
|
189
203
|
|
|
190
204
|
def parent(node)
|
|
191
|
-
node.parent if node.
|
|
205
|
+
node.parent if node.is_a?(::Oga::XML::Node)
|
|
192
206
|
end
|
|
193
207
|
|
|
194
208
|
def next_sibling(node)
|
|
@@ -215,7 +229,7 @@ module Moxml
|
|
|
215
229
|
end
|
|
216
230
|
|
|
217
231
|
def attributes(element)
|
|
218
|
-
return [] unless element.
|
|
232
|
+
return [] unless element.is_a?(::Oga::XML::Element)
|
|
219
233
|
|
|
220
234
|
# remove attributes-namespaces
|
|
221
235
|
element.attributes.reject do |attr|
|
|
@@ -316,10 +330,9 @@ module Moxml
|
|
|
316
330
|
end
|
|
317
331
|
|
|
318
332
|
def inner_text(node)
|
|
319
|
-
text = if node.
|
|
333
|
+
text = if node.is_a?(::Oga::XML::Element)
|
|
320
334
|
node.inner_text
|
|
321
335
|
else
|
|
322
|
-
# Oga::XML::Text node for example
|
|
323
336
|
node.text
|
|
324
337
|
end
|
|
325
338
|
restore_entity_markers(text)
|
|
@@ -327,7 +340,7 @@ module Moxml
|
|
|
327
340
|
|
|
328
341
|
def set_text_content(node, content)
|
|
329
342
|
encoded = encode_entity_markers(content)
|
|
330
|
-
if node.
|
|
343
|
+
if node.is_a?(::Oga::XML::Element)
|
|
331
344
|
node.inner_text = encoded
|
|
332
345
|
else
|
|
333
346
|
node.text = encoded
|
|
@@ -370,22 +383,32 @@ module Moxml
|
|
|
370
383
|
end
|
|
371
384
|
|
|
372
385
|
def namespace_definitions(node)
|
|
373
|
-
return [] unless node.
|
|
386
|
+
return [] unless node.is_a?(::Oga::XML::Element)
|
|
374
387
|
|
|
375
388
|
node.namespaces.values
|
|
376
389
|
end
|
|
377
390
|
|
|
378
391
|
# Doctype accessor methods
|
|
392
|
+
# Note: Oga stores SYSTEM identifier in public_id for SYSTEM doctypes.
|
|
393
|
+
# See: Oga::XML::Doctype puts SYSTEM dtd in public_id, system_id is nil.
|
|
379
394
|
def doctype_name(native)
|
|
380
395
|
native.name
|
|
381
396
|
end
|
|
382
397
|
|
|
383
398
|
def doctype_external_id(native)
|
|
384
|
-
native.
|
|
399
|
+
if native.type == "SYSTEM"
|
|
400
|
+
nil
|
|
401
|
+
else
|
|
402
|
+
native.public_id
|
|
403
|
+
end
|
|
385
404
|
end
|
|
386
405
|
|
|
387
406
|
def doctype_system_id(native)
|
|
388
|
-
native.
|
|
407
|
+
if native.type == "SYSTEM"
|
|
408
|
+
native.public_id
|
|
409
|
+
else
|
|
410
|
+
native.system_id
|
|
411
|
+
end
|
|
389
412
|
end
|
|
390
413
|
|
|
391
414
|
def xpath(node, expression, namespaces = nil)
|
|
@@ -479,7 +502,7 @@ module Moxml
|
|
|
479
502
|
|
|
480
503
|
if should_include_decl && !node.xml_declaration && !has_existing_declaration
|
|
481
504
|
# Need to add declaration - create default one
|
|
482
|
-
output =
|
|
505
|
+
output = []
|
|
483
506
|
output << '<?xml version="1.0" encoding="UTF-8"?>'
|
|
484
507
|
output << "\n"
|
|
485
508
|
|
|
@@ -491,10 +514,10 @@ module Moxml
|
|
|
491
514
|
output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
|
|
492
515
|
end
|
|
493
516
|
|
|
494
|
-
return output
|
|
517
|
+
return output.join
|
|
495
518
|
elsif !should_include_decl
|
|
496
519
|
# Skip xml_declaration
|
|
497
|
-
output =
|
|
520
|
+
output = []
|
|
498
521
|
|
|
499
522
|
# Serialize doctype if present
|
|
500
523
|
output << node.doctype.to_xml << "\n" if node.doctype
|
|
@@ -506,7 +529,7 @@ module Moxml
|
|
|
506
529
|
output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
|
|
507
530
|
end
|
|
508
531
|
|
|
509
|
-
return output
|
|
532
|
+
return output.join
|
|
510
533
|
end
|
|
511
534
|
end
|
|
512
535
|
|
|
@@ -514,19 +537,20 @@ module Moxml
|
|
|
514
537
|
# But first check if we need to handle declaration specially
|
|
515
538
|
if node.is_a?(::Oga::XML::Document) && node.xml_declaration
|
|
516
539
|
# Document has declaration - use custom handling to avoid duplicates
|
|
517
|
-
output =
|
|
540
|
+
output = []
|
|
541
|
+
xml_declaration_serialized = false
|
|
518
542
|
|
|
519
543
|
# Serialize children, but skip XmlDeclaration if it would cause duplication
|
|
520
544
|
node.children.each do |child|
|
|
521
|
-
|
|
522
|
-
if
|
|
523
|
-
|
|
524
|
-
|
|
545
|
+
xml_declaration = child.is_a?(::Oga::XML::XmlDeclaration)
|
|
546
|
+
next if xml_declaration && xml_declaration_serialized
|
|
547
|
+
|
|
548
|
+
xml_declaration_serialized = true if xml_declaration
|
|
525
549
|
|
|
526
550
|
output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
|
|
527
551
|
end
|
|
528
552
|
|
|
529
|
-
output
|
|
553
|
+
output.join
|
|
530
554
|
else
|
|
531
555
|
# Normal case - use XmlGenerator directly
|
|
532
556
|
::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
|