moxml 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +117 -66
  3. data/Gemfile +1 -0
  4. data/README.adoc +11 -9
  5. data/Rakefile +3 -1
  6. data/docs/_pages/configuration.adoc +22 -19
  7. data/docs/_tutorials/namespace-handling.adoc +5 -5
  8. data/lib/moxml/adapter/base.rb +8 -3
  9. data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
  10. data/lib/moxml/adapter/customized_libxml.rb +18 -0
  11. data/lib/moxml/adapter/customized_oga/xml_generator.rb +2 -2
  12. data/lib/moxml/adapter/customized_oga.rb +10 -0
  13. data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
  14. data/lib/moxml/adapter/customized_ox.rb +12 -0
  15. data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
  16. data/lib/moxml/adapter/customized_rexml/formatter.rb +2 -0
  17. data/lib/moxml/adapter/customized_rexml.rb +11 -0
  18. data/lib/moxml/adapter/headed_ox.rb +9 -3
  19. data/lib/moxml/adapter/libxml.rb +76 -62
  20. data/lib/moxml/adapter/nokogiri.rb +4 -5
  21. data/lib/moxml/adapter/oga.rb +50 -26
  22. data/lib/moxml/adapter/ox.rb +189 -41
  23. data/lib/moxml/adapter/rexml.rb +27 -8
  24. data/lib/moxml/attribute.rb +3 -0
  25. data/lib/moxml/builder.rb +1 -0
  26. data/lib/moxml/config.rb +7 -7
  27. data/lib/moxml/document.rb +5 -1
  28. data/lib/moxml/document_builder.rb +37 -31
  29. data/lib/moxml/element.rb +13 -5
  30. data/lib/moxml/entity_registry.rb +36 -0
  31. data/lib/moxml/node.rb +23 -2
  32. data/lib/moxml/node_set.rb +43 -15
  33. data/lib/moxml/version.rb +1 -1
  34. data/lib/moxml/xml_utils.rb +1 -1
  35. data/spec/integration/shared_examples/edge_cases.rb +3 -0
  36. data/spec/moxml/adapter/oga_spec.rb +62 -0
  37. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
  38. data/spec/moxml/allocation_benchmark_spec.rb +96 -0
  39. data/spec/moxml/allocation_guard_spec.rb +282 -0
  40. data/spec/moxml/builder_spec.rb +22 -0
  41. data/spec/moxml/config_spec.rb +11 -11
  42. data/spec/moxml/doctype_spec.rb +41 -0
  43. data/spec/moxml/lazy_parse_spec.rb +115 -0
  44. data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
  45. data/spec/moxml/node_cache_spec.rb +110 -0
  46. data/spec/moxml/node_set_cache_spec.rb +90 -0
  47. data/spec/moxml/xml_utils_spec.rb +32 -0
  48. data/spec/support/allocation_helper.rb +165 -0
  49. data/spec/support/w3c_namespace_helpers.rb +2 -1
  50. metadata +15 -2
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedRexml
6
+ class EntityReference
7
+ attr_reader :name
8
+
9
+ def initialize(name)
10
+ @name = name
11
+ end
12
+
13
+ def ==(other)
14
+ other.is_a?(self.class) && @name == other.name
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -19,6 +19,8 @@ module Moxml
19
19
  case node
20
20
  when ::REXML::XMLDecl
21
21
  write_declaration(node, output)
22
+ when ::Moxml::Adapter::CustomizedRexml::EntityReference
23
+ output << "&#{node.name};"
22
24
  else
23
25
  super
24
26
  end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedRexml
6
+ autoload :EntityReference,
7
+ "moxml/adapter/customized_rexml/entity_reference"
8
+ autoload :Formatter, "moxml/adapter/customized_rexml/formatter"
9
+ end
10
+ end
11
+ end
@@ -25,17 +25,23 @@ module Moxml
25
25
  #
26
26
  class HeadedOx < Ox
27
27
  class << self
28
- # Override parse to use HeadedOx context instead of Ox context
29
- def parse(xml, _options = {}, _context = nil)
28
+ # Override parse to use lazy wrapping like the Ox adapter.
29
+ # Previously used DocumentBuilder (eager tree construction causing
30
+ # ~176K allocations per 100-element parse). Lazy parse defers wrapper
31
+ # creation until nodes are accessed, matching Ox adapter behavior.
32
+ def parse(xml, options = {}, _context = nil)
30
33
  native_doc = begin
31
34
  result = ::Ox.parse(xml)
32
35
 
33
36
  # result can be either Document or Element
34
37
  if result.is_a?(::Ox::Document)
38
+ assign_parents(result)
39
+ validate_single_root(result) if options[:strict]
35
40
  result
36
41
  else
37
42
  doc = ::Ox::Document.new
38
43
  doc << result
44
+ assign_parents(doc)
39
45
  doc
40
46
  end
41
47
  rescue ::Ox::ParseError => e
@@ -47,7 +53,7 @@ module Moxml
47
53
 
48
54
  # Use provided context if available, otherwise create new one
49
55
  ctx = _context || Context.new(:headed_ox)
50
- DocumentBuilder.new(ctx).build(native_doc)
56
+ Document.new(native_doc, ctx)
51
57
  end
52
58
 
53
59
  # Execute XPath query using Moxml's XPath engine
@@ -2,13 +2,7 @@
2
2
 
3
3
  require_relative "base"
4
4
  require "libxml"
5
- require_relative "customized_libxml/node"
6
- require_relative "customized_libxml/element"
7
- require_relative "customized_libxml/text"
8
- require_relative "customized_libxml/comment"
9
- require_relative "customized_libxml/cdata"
10
- require_relative "customized_libxml/processing_instruction"
11
- require_relative "customized_libxml/declaration"
5
+ require_relative "customized_libxml"
12
6
 
13
7
  module Moxml
14
8
  module Adapter
@@ -52,7 +46,7 @@ module Moxml
52
46
  # LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
53
47
  xml_string = if xml.is_a?(String)
54
48
  xml
55
- elsif xml.respond_to?(:read)
49
+ elsif xml.is_a?(IO) || xml.is_a?(StringIO)
56
50
  xml.read
57
51
  else
58
52
  xml.to_s
@@ -67,7 +61,7 @@ module Moxml
67
61
  parser.parse
68
62
  rescue ::LibXML::XML::Error => e
69
63
  if options[:strict]
70
- line = e.respond_to?(:line) ? e.line : nil
64
+ line = e.line
71
65
  raise Moxml::ParseError.new(
72
66
  e.message,
73
67
  line: line,
@@ -116,8 +110,12 @@ module Moxml
116
110
  # Parse
117
111
  parser.parse
118
112
  rescue ::LibXML::XML::Error => e
119
- line = e.respond_to?(:line) ? e.line : nil
120
- column = e.respond_to?(:column) ? e.column : nil
113
+ line = e.line
114
+ column = begin
115
+ e.column
116
+ rescue StandardError
117
+ nil
118
+ end
121
119
  error = Moxml::ParseError.new(e.message, line: line, column: column)
122
120
  handler.on_error(error)
123
121
  end
@@ -135,6 +133,14 @@ module Moxml
135
133
  CustomizedLibxml::Text.new(native)
136
134
  end
137
135
 
136
+ def create_native_entity_reference(name)
137
+ CustomizedLibxml::EntityReference.new(name)
138
+ end
139
+
140
+ def entity_reference_name(node)
141
+ node.name if node.is_a?(CustomizedLibxml::EntityReference)
142
+ end
143
+
138
144
  def create_native_cdata(content, _owner_doc = nil)
139
145
  native = ::LibXML::XML::Node.new_cdata(content.to_s)
140
146
  CustomizedLibxml::Cdata.new(native)
@@ -174,10 +180,11 @@ module Moxml
174
180
  if node.is_a?(CustomizedLibxml::ProcessingInstruction)
175
181
  return :processing_instruction
176
182
  end
183
+ return :entity_reference if node.is_a?(CustomizedLibxml::EntityReference)
177
184
  return :doctype if node.is_a?(DoctypeWrapper)
178
185
 
179
186
  # Unwrap if needed
180
- native_node = node.respond_to?(:native) ? node.native : node
187
+ native_node = unpatch_node(node)
181
188
 
182
189
  case native_node.node_type
183
190
  when ::LibXML::XML::Node::DOCUMENT_NODE
@@ -266,21 +273,9 @@ module Moxml
266
273
  result = []
267
274
 
268
275
  # Include DOCTYPE if present
269
- # First check if we stored it as instance variable (from parse)
270
276
  if native_node.instance_variable_defined?(:@moxml_doctype)
271
277
  doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
272
278
  result << doctype_wrapper if doctype_wrapper
273
- elsif native_node.respond_to?(:dtd) && native_node.dtd
274
- # Otherwise check dtd property directly
275
- dtd = native_node.dtd
276
- # Wrap in DoctypeWrapper for consistency
277
- doctype_wrapper = DoctypeWrapper.new(
278
- native_node,
279
- dtd.name,
280
- dtd.external_id,
281
- dtd.system_id,
282
- )
283
- result << doctype_wrapper
284
279
  end
285
280
 
286
281
  return result unless native_node.root
@@ -298,6 +293,11 @@ module Moxml
298
293
 
299
294
  result << patch_node(child)
300
295
  end
296
+
297
+ # Include any EntityReference wrappers stored alongside native children
298
+ entity_refs = native_node.instance_variable_get(:@moxml_entity_refs)
299
+ result.concat(entity_refs) if entity_refs
300
+
301
301
  result
302
302
  end
303
303
 
@@ -350,7 +350,7 @@ module Moxml
350
350
  def attributes(element)
351
351
  native_elem = unpatch_node(element)
352
352
  return [] unless native_elem
353
- unless native_elem.respond_to?(:element?) && native_elem.element?
353
+ unless native_elem.is_a?(::LibXML::XML::Node) && native_elem.element?
354
354
  return []
355
355
  end
356
356
  return [] unless native_elem.attributes?
@@ -368,7 +368,7 @@ module Moxml
368
368
 
369
369
  def attribute_namespace(attr)
370
370
  return nil unless attr
371
- return nil unless attr.respond_to?(:ns)
371
+ return nil unless attr.is_a?(::LibXML::XML::Attr)
372
372
 
373
373
  attr.ns
374
374
  end
@@ -498,11 +498,20 @@ module Moxml
498
498
  native_elem = unpatch_node(element)
499
499
  native_child = unpatch_node(child)
500
500
 
501
+ # EntityReference wrappers can't go in LibXML's native tree.
502
+ # Store alongside native children via instance variable.
503
+ if child.is_a?(CustomizedLibxml::EntityReference)
504
+ refs = native_elem.instance_variable_get(:@moxml_entity_refs) || []
505
+ refs << child
506
+ native_elem.instance_variable_set(:@moxml_entity_refs, refs)
507
+ return
508
+ end
509
+
501
510
  # For LibXML: if parent has a DEFAULT namespace (nil/empty prefix) and child is an element without a namespace,
502
511
  # explicitly set the child's namespace to match the parent's for XPath compatibility
503
512
  # NOTE: Prefixed namespaces are NOT inherited, only default namespaces
504
- if native_elem.respond_to?(:namespaces) && native_elem.namespaces&.namespace &&
505
- native_child.respond_to?(:namespaces) && native_child.element? &&
513
+ if native_elem.is_a?(::LibXML::XML::Node) && native_elem.namespaces&.namespace &&
514
+ native_child.is_a?(::LibXML::XML::Node) && native_child.element? &&
506
515
  (!native_child.namespaces.namespace || native_child.namespaces.namespace.href.to_s.empty?)
507
516
 
508
517
  parent_ns = native_elem.namespaces.namespace
@@ -566,7 +575,7 @@ module Moxml
566
575
  # Special handling for document-level processing instructions
567
576
  # When adding a PI as sibling to root element, store it on document
568
577
  if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
569
- native_node.respond_to?(:doc) && native_node.doc
578
+ native_node.is_a?(::LibXML::XML::Node) && native_node.doc
570
579
  doc = native_node.doc
571
580
  pis = doc.instance_variable_get(:@moxml_pis) || []
572
581
  pis << sibling
@@ -624,7 +633,7 @@ module Moxml
624
633
  next_sibling = native_node.next
625
634
 
626
635
  # Import if needed for cross-document operations
627
- parent_doc = parent.respond_to?(:doc) ? parent.doc : nil
636
+ parent_doc = parent.is_a?(::LibXML::XML::Node) ? parent.doc : nil
628
637
 
629
638
  # Use import_and_add to properly handle document adoption
630
639
  import_and_add(parent_doc, parent, native_new)
@@ -651,7 +660,7 @@ module Moxml
651
660
  native_elem.each_child(&:remove!)
652
661
 
653
662
  # Get the element's document for importing
654
- doc = native_elem.respond_to?(:doc) ? native_elem.doc : nil
663
+ doc = native_elem.is_a?(::LibXML::XML::Node) ? native_elem.doc : nil
655
664
 
656
665
  children.each do |c|
657
666
  native_c = unpatch_node(c)
@@ -662,6 +671,8 @@ module Moxml
662
671
  end
663
672
 
664
673
  def text_content(node)
674
+ return "" if node.is_a?(CustomizedLibxml::EntityReference)
675
+
665
676
  native_node = unpatch_node(node)
666
677
  return nil unless native_node
667
678
 
@@ -782,7 +793,7 @@ module Moxml
782
793
  def namespace_definitions(node)
783
794
  native_node = unpatch_node(node)
784
795
  return [] unless native_node
785
- return [] unless native_node.respond_to?(:namespaces)
796
+ return [] unless native_node.is_a?(::LibXML::XML::Node)
786
797
 
787
798
  native_node.namespaces.map do |ns|
788
799
  ns
@@ -835,15 +846,8 @@ module Moxml
835
846
 
836
847
  def serialize(node, options = {})
837
848
  # FIRST: Check if node is any kind of wrapper with custom to_xml
838
- if node.respond_to?(:to_xml)
839
- # Declaration wrapper
840
- return node.to_xml if node.is_a?(CustomizedLibxml::Declaration)
841
-
842
- # Other wrappers - check they're not native LibXML nodes
843
- unless node.is_a?(::LibXML::XML::Node) ||
844
- node.is_a?(::LibXML::XML::Document)
845
- return node.to_xml
846
- end
849
+ if node.is_a?(CustomizedLibxml::Node) || node.is_a?(DoctypeWrapper)
850
+ return node.to_xml
847
851
  end
848
852
 
849
853
  native_node = unpatch_node(node)
@@ -1033,7 +1037,7 @@ module Moxml
1033
1037
  return nil unless node
1034
1038
 
1035
1039
  # Unwrap if wrapped
1036
- native_node = node.respond_to?(:native) ? node.native : node
1040
+ native_node = unpatch_node(node)
1037
1041
 
1038
1042
  # LibXML is strict about document ownership
1039
1043
  # Create brand new NATIVE nodes that are document-independent
@@ -1057,7 +1061,7 @@ module Moxml
1057
1061
  # new_node.line = node.line
1058
1062
 
1059
1063
  # Copy and set namespace definitions FIRST
1060
- if native_node.respond_to?(:namespaces)
1064
+ if native_node.is_a?(::LibXML::XML::Node)
1061
1065
  # First, copy all namespace definitions
1062
1066
  native_node.namespaces.each do |ns|
1063
1067
  ::LibXML::XML::Namespace.new(
@@ -1144,7 +1148,12 @@ module Moxml
1144
1148
 
1145
1149
  def unpatch_node(node)
1146
1150
  # Unwrap to get native LibXML node
1147
- node.respond_to?(:native) ? node.native : node
1151
+ case node
1152
+ when CustomizedLibxml::Node, CustomizedLibxml::Declaration, DoctypeWrapper
1153
+ node.native
1154
+ else
1155
+ node
1156
+ end
1148
1157
  end
1149
1158
 
1150
1159
  def prepare_for_new_document(node, target_doc)
@@ -1162,7 +1171,7 @@ module Moxml
1162
1171
  output = "<#{elem.name}"
1163
1172
 
1164
1173
  # Add namespace definitions (only on this element, not ancestors)
1165
- if elem.respond_to?(:namespaces)
1174
+ if elem.is_a?(::LibXML::XML::Node)
1166
1175
  seen_ns = {}
1167
1176
  elem.namespaces.each do |ns|
1168
1177
  prefix = ns.prefix
@@ -1203,6 +1212,11 @@ module Moxml
1203
1212
  output << serialize_node(child)
1204
1213
  end
1205
1214
  end
1215
+
1216
+ # Append any EntityReference wrappers stored on this element
1217
+ entity_refs = elem.instance_variable_get(:@moxml_entity_refs)
1218
+ entity_refs&.each { |ref| output << ref.to_xml }
1219
+
1206
1220
  output << "</#{elem.name}>"
1207
1221
 
1208
1222
  output
@@ -1210,11 +1224,12 @@ module Moxml
1210
1224
 
1211
1225
  def serialize_node(node)
1212
1226
  # Check if node is a wrapper with to_xml method
1213
- if node.respond_to?(:to_xml) &&
1214
- (node.is_a?(CustomizedLibxml::ProcessingInstruction) ||
1215
- node.is_a?(CustomizedLibxml::Comment) ||
1216
- node.is_a?(CustomizedLibxml::Cdata) ||
1217
- node.is_a?(CustomizedLibxml::Text))
1227
+ case node
1228
+ when CustomizedLibxml::ProcessingInstruction,
1229
+ CustomizedLibxml::Comment,
1230
+ CustomizedLibxml::Cdata,
1231
+ CustomizedLibxml::Text,
1232
+ CustomizedLibxml::EntityReference
1218
1233
  return node.to_xml
1219
1234
  end
1220
1235
 
@@ -1269,7 +1284,7 @@ module Moxml
1269
1284
  raise unless e.message.include?("different documents")
1270
1285
 
1271
1286
  # Get the target document - either from parameter or element
1272
- target_doc = doc || (element.respond_to?(:doc) ? element.doc : nil)
1287
+ target_doc = doc || (element.is_a?(::LibXML::XML::Node) ? element.doc : nil)
1273
1288
 
1274
1289
  if target_doc
1275
1290
  # Use deep import to ensure all descendants are included
@@ -1329,11 +1344,11 @@ module Moxml
1329
1344
  # Include namespace definitions:
1330
1345
  # - On root element (include_ns = true), output ALL namespace definitions
1331
1346
  # - On child elements, output namespace definitions that override parent namespaces
1332
- if elem.respond_to?(:namespaces) && elem.namespaces.respond_to?(:definitions)
1347
+ if elem.is_a?(::LibXML::XML::Node) && elem.namespaces.respond_to?(:definitions)
1333
1348
  # Get parent's namespace definitions to detect overrides
1334
- parent_ns_defs = if !include_ns && elem.respond_to?(:parent) && elem.parent && !elem.parent.is_a?(::LibXML::XML::Document)
1349
+ parent_ns_defs = if !include_ns && elem.parent && !elem.parent.is_a?(::LibXML::XML::Document)
1335
1350
  parent_namespaces = {}
1336
- if elem.parent.respond_to?(:namespaces)
1351
+ if elem.parent.is_a?(::LibXML::XML::Node)
1337
1352
  elem.parent.namespaces.each do |ns|
1338
1353
  parent_namespaces[ns.prefix] = ns.href
1339
1354
  end
@@ -1390,8 +1405,7 @@ module Moxml
1390
1405
 
1391
1406
  # Wrap the child and serialize
1392
1407
  wrapped_child = patch_node(child)
1393
- output << if wrapped_child.respond_to?(:to_xml) &&
1394
- !wrapped_child.is_a?(::LibXML::XML::Node)
1408
+ output << if wrapped_child.is_a?(CustomizedLibxml::Node) && !wrapped_child.is_a?(CustomizedLibxml::Element)
1395
1409
  # Use wrapper's to_xml for proper serialization
1396
1410
  wrapped_child.to_xml
1397
1411
  elsif child.element?
@@ -1421,7 +1435,7 @@ module Moxml
1421
1435
  else
1422
1436
  # Walk up to root first
1423
1437
  current = node
1424
- current = current.parent while current.respond_to?(:parent) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
1438
+ current = current.parent while current.is_a?(::LibXML::XML::Node) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
1425
1439
  current
1426
1440
  end
1427
1441
 
@@ -1435,7 +1449,7 @@ module Moxml
1435
1449
 
1436
1450
  def collect_ns_from_subtree(node, ns_defs)
1437
1451
  # Collect namespaces defined on this node
1438
- if node.respond_to?(:namespaces)
1452
+ if node.is_a?(::LibXML::XML::Node)
1439
1453
  node.namespaces.each do |ns|
1440
1454
  prefix = ns.prefix
1441
1455
  uri = ns.href
@@ -1453,7 +1467,7 @@ module Moxml
1453
1467
 
1454
1468
  # Also check if this element has an active namespace (inherited or own)
1455
1469
  # This catches cases where elements inherit namespaces from parents
1456
- if node.respond_to?(:namespaces) && node.namespaces.respond_to?(:namespace)
1470
+ if node.is_a?(::LibXML::XML::Node) && node.namespaces.respond_to?(:namespace)
1457
1471
  active_ns = node.namespaces.namespace
1458
1472
  if active_ns
1459
1473
  prefix = active_ns.prefix
@@ -1469,7 +1483,7 @@ module Moxml
1469
1483
  end
1470
1484
 
1471
1485
  # Recursively collect from children
1472
- return unless node.respond_to?(:children?) && node.children?
1486
+ return unless node.is_a?(::LibXML::XML::Node) && node.children?
1473
1487
 
1474
1488
  node.each_child do |child|
1475
1489
  collect_ns_from_subtree(child, ns_defs) if child.element?
@@ -1493,12 +1507,12 @@ module Moxml
1493
1507
  # Search element and ancestors for namespace with given prefix
1494
1508
  current = element
1495
1509
  while current
1496
- if current.respond_to?(:namespaces)
1510
+ if current.is_a?(::LibXML::XML::Node)
1497
1511
  current.namespaces.each do |ns|
1498
1512
  return ns if ns.prefix == prefix
1499
1513
  end
1500
1514
  end
1501
- current = current.respond_to?(:parent) ? current.parent : nil
1515
+ current = current.is_a?(::LibXML::XML::Node) ? current.parent : nil
1502
1516
  end
1503
1517
  nil
1504
1518
  end
@@ -31,7 +31,7 @@ module Moxml
31
31
 
32
32
  # Use provided context if available, otherwise create new one
33
33
  ctx = _context || Context.new(:nokogiri)
34
- DocumentBuilder.new(ctx).build(native_doc)
34
+ Document.new(native_doc, ctx)
35
35
  end
36
36
 
37
37
  # SAX parsing implementation for Nokogiri
@@ -47,7 +47,7 @@ module Moxml
47
47
  parser = ::Nokogiri::XML::SAX::Parser.new(bridge)
48
48
 
49
49
  # Parse
50
- if xml.respond_to?(:read)
50
+ if xml.is_a?(IO) || xml.is_a?(StringIO)
51
51
  parser.parse(xml)
52
52
  else
53
53
  parser.parse(xml.to_s)
@@ -202,7 +202,7 @@ module Moxml
202
202
  end
203
203
 
204
204
  def root(document)
205
- document.respond_to?(:root) ? document.root : document.children.first
205
+ document.is_a?(::Nokogiri::XML::Document) ? document.root : document.children.first
206
206
  end
207
207
 
208
208
  def attribute_element(attr)
@@ -390,8 +390,7 @@ module Moxml
390
390
  # 2. Check Nokogiri's internal @xml_decl (when remove is called, this becomes nil)
391
391
  if options.key?(:no_declaration)
392
392
  save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
393
- elsif node.respond_to?(:instance_variable_get) &&
394
- node.instance_variable_defined?(:@xml_decl)
393
+ elsif node.instance_variable_defined?(:@xml_decl)
395
394
  # Nokogiri's internal state - if nil, declaration was removed
396
395
  xml_decl = node.instance_variable_get(:@xml_decl)
397
396
  save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
@@ -1,8 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "base"
4
- require_relative "customized_oga/xml_generator"
5
- require_relative "customized_oga/xml_declaration"
4
+ require_relative "customized_oga"
6
5
  require "oga"
7
6
 
8
7
  module Moxml
@@ -46,7 +45,7 @@ module Moxml
46
45
  def sax_parse(xml, handler)
47
46
  bridge = OgaSAXBridge.new(handler)
48
47
 
49
- xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
48
+ xml_string = xml.is_a?(IO) || xml.is_a?(StringIO) ? xml.read : xml.to_s
50
49
 
51
50
  # Manually call start_document (Oga doesn't)
52
51
  handler.on_start_document
@@ -72,6 +71,17 @@ module Moxml
72
71
  ::Oga::XML::Text.new(text: encode_entity_markers(content))
73
72
  end
74
73
 
74
+ def create_native_entity_reference(name)
75
+ text = ::Oga::XML::Text.new
76
+ text.text = "#{ENTITY_MARKER}#{name};"
77
+ text.instance_variable_set(:@moxml_entity_name, name)
78
+ text
79
+ end
80
+
81
+ def entity_reference_name(node)
82
+ node.instance_variable_get(:@moxml_entity_name)
83
+ end
84
+
75
85
  def create_native_cdata(content, _owner_doc = nil)
76
86
  ::Oga::XML::Cdata.new(text: content)
77
87
  end
@@ -132,10 +142,9 @@ module Moxml
132
142
  end
133
143
 
134
144
  def namespace(element)
135
- if element.respond_to?(:namespace)
145
+ case element
146
+ when ::Oga::XML::Element, ::Oga::XML::Attribute
136
147
  element.namespace
137
- elsif element.respond_to?(:namespaces)
138
- element.namespaces.values.last
139
148
  end
140
149
  rescue NoMethodError
141
150
  # Oga attributes fail with NoMethodError:
@@ -150,7 +159,12 @@ module Moxml
150
159
  def node_type(node)
151
160
  case node
152
161
  when ::Oga::XML::Element then :element
153
- when ::Oga::XML::Text then :text
162
+ when ::Oga::XML::Text
163
+ if node.instance_variable_get(:@moxml_entity_name)
164
+ :entity_reference
165
+ else
166
+ :text
167
+ end
154
168
  when ::Oga::XML::Cdata then :cdata
155
169
  when ::Oga::XML::Comment then :comment
156
170
  when ::Oga::XML::Attribute then :attribute
@@ -178,7 +192,7 @@ module Moxml
178
192
  node.doctype].compact
179
193
  end
180
194
 
181
- return all_children unless node.respond_to?(:children)
195
+ return all_children unless node.is_a?(::Oga::XML::Node) || node.is_a?(::Oga::XML::Document)
182
196
 
183
197
  all_children + node.children.reject do |child|
184
198
  child.is_a?(::Oga::XML::Text) &&
@@ -188,7 +202,7 @@ module Moxml
188
202
  end
189
203
 
190
204
  def parent(node)
191
- node.parent if node.respond_to?(:parent)
205
+ node.parent if node.is_a?(::Oga::XML::Node)
192
206
  end
193
207
 
194
208
  def next_sibling(node)
@@ -215,7 +229,7 @@ module Moxml
215
229
  end
216
230
 
217
231
  def attributes(element)
218
- return [] unless element.respond_to?(:attributes)
232
+ return [] unless element.is_a?(::Oga::XML::Element)
219
233
 
220
234
  # remove attributes-namespaces
221
235
  element.attributes.reject do |attr|
@@ -316,10 +330,9 @@ module Moxml
316
330
  end
317
331
 
318
332
  def inner_text(node)
319
- text = if node.respond_to?(:inner_text)
333
+ text = if node.is_a?(::Oga::XML::Element)
320
334
  node.inner_text
321
335
  else
322
- # Oga::XML::Text node for example
323
336
  node.text
324
337
  end
325
338
  restore_entity_markers(text)
@@ -327,7 +340,7 @@ module Moxml
327
340
 
328
341
  def set_text_content(node, content)
329
342
  encoded = encode_entity_markers(content)
330
- if node.respond_to?(:inner_text=)
343
+ if node.is_a?(::Oga::XML::Element)
331
344
  node.inner_text = encoded
332
345
  else
333
346
  node.text = encoded
@@ -370,22 +383,32 @@ module Moxml
370
383
  end
371
384
 
372
385
  def namespace_definitions(node)
373
- return [] unless node.respond_to?(:namespaces)
386
+ return [] unless node.is_a?(::Oga::XML::Element)
374
387
 
375
388
  node.namespaces.values
376
389
  end
377
390
 
378
391
  # Doctype accessor methods
392
+ # Note: Oga stores SYSTEM identifier in public_id for SYSTEM doctypes.
393
+ # See: Oga::XML::Doctype puts SYSTEM dtd in public_id, system_id is nil.
379
394
  def doctype_name(native)
380
395
  native.name
381
396
  end
382
397
 
383
398
  def doctype_external_id(native)
384
- native.public_id
399
+ if native.type == "SYSTEM"
400
+ nil
401
+ else
402
+ native.public_id
403
+ end
385
404
  end
386
405
 
387
406
  def doctype_system_id(native)
388
- native.system_id
407
+ if native.type == "SYSTEM"
408
+ native.public_id
409
+ else
410
+ native.system_id
411
+ end
389
412
  end
390
413
 
391
414
  def xpath(node, expression, namespaces = nil)
@@ -479,7 +502,7 @@ module Moxml
479
502
 
480
503
  if should_include_decl && !node.xml_declaration && !has_existing_declaration
481
504
  # Need to add declaration - create default one
482
- output = +""
505
+ output = []
483
506
  output << '<?xml version="1.0" encoding="UTF-8"?>'
484
507
  output << "\n"
485
508
 
@@ -491,10 +514,10 @@ module Moxml
491
514
  output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
492
515
  end
493
516
 
494
- return output
517
+ return output.join
495
518
  elsif !should_include_decl
496
519
  # Skip xml_declaration
497
- output = +""
520
+ output = []
498
521
 
499
522
  # Serialize doctype if present
500
523
  output << node.doctype.to_xml << "\n" if node.doctype
@@ -506,7 +529,7 @@ module Moxml
506
529
  output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
507
530
  end
508
531
 
509
- return output
532
+ return output.join
510
533
  end
511
534
  end
512
535
 
@@ -514,19 +537,20 @@ module Moxml
514
537
  # But first check if we need to handle declaration specially
515
538
  if node.is_a?(::Oga::XML::Document) && node.xml_declaration
516
539
  # Document has declaration - use custom handling to avoid duplicates
517
- output = +""
540
+ output = []
541
+ xml_declaration_serialized = false
518
542
 
519
543
  # Serialize children, but skip XmlDeclaration if it would cause duplication
520
544
  node.children.each do |child|
521
- # Check if this would cause duplication by seeing if we already have one in output
522
- if child.is_a?(::Oga::XML::XmlDeclaration) && output.include?("<?xml")
523
- next # Skip duplicate declaration
524
- end
545
+ xml_declaration = child.is_a?(::Oga::XML::XmlDeclaration)
546
+ next if xml_declaration && xml_declaration_serialized
547
+
548
+ xml_declaration_serialized = true if xml_declaration
525
549
 
526
550
  output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
527
551
  end
528
552
 
529
- output
553
+ output.join
530
554
  else
531
555
  # Normal case - use XmlGenerator directly
532
556
  ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml