moxml 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  80. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  81. data/lib/moxml/adapter/headed_ox.rb +161 -0
  82. data/lib/moxml/adapter/libxml.rb +1548 -0
  83. data/lib/moxml/adapter/nokogiri.rb +121 -9
  84. data/lib/moxml/adapter/oga.rb +123 -12
  85. data/lib/moxml/adapter/ox.rb +282 -26
  86. data/lib/moxml/adapter/rexml.rb +127 -20
  87. data/lib/moxml/adapter.rb +21 -4
  88. data/lib/moxml/attribute.rb +6 -0
  89. data/lib/moxml/builder.rb +40 -4
  90. data/lib/moxml/config.rb +8 -3
  91. data/lib/moxml/context.rb +39 -1
  92. data/lib/moxml/doctype.rb +13 -1
  93. data/lib/moxml/document.rb +39 -6
  94. data/lib/moxml/document_builder.rb +27 -5
  95. data/lib/moxml/element.rb +71 -2
  96. data/lib/moxml/error.rb +175 -6
  97. data/lib/moxml/node.rb +94 -3
  98. data/lib/moxml/node_set.rb +34 -0
  99. data/lib/moxml/sax/block_handler.rb +194 -0
  100. data/lib/moxml/sax/element_handler.rb +124 -0
  101. data/lib/moxml/sax/handler.rb +113 -0
  102. data/lib/moxml/sax.rb +31 -0
  103. data/lib/moxml/version.rb +1 -1
  104. data/lib/moxml/xml_utils/encoder.rb +4 -4
  105. data/lib/moxml/xml_utils.rb +7 -4
  106. data/lib/moxml/xpath/ast/node.rb +159 -0
  107. data/lib/moxml/xpath/cache.rb +91 -0
  108. data/lib/moxml/xpath/compiler.rb +1768 -0
  109. data/lib/moxml/xpath/context.rb +26 -0
  110. data/lib/moxml/xpath/conversion.rb +124 -0
  111. data/lib/moxml/xpath/engine.rb +52 -0
  112. data/lib/moxml/xpath/errors.rb +101 -0
  113. data/lib/moxml/xpath/lexer.rb +304 -0
  114. data/lib/moxml/xpath/parser.rb +485 -0
  115. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  116. data/lib/moxml/xpath/ruby/node.rb +193 -0
  117. data/lib/moxml/xpath.rb +37 -0
  118. data/lib/moxml.rb +5 -2
  119. data/moxml.gemspec +3 -1
  120. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  121. data/spec/consistency/README.md +77 -0
  122. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  123. data/spec/examples/README.md +75 -0
  124. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  125. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  126. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  127. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  128. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  129. data/spec/integration/README.md +71 -0
  130. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  131. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  132. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  133. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  134. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  135. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  136. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  137. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  138. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  139. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  140. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  141. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  142. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  143. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  144. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  145. data/spec/moxml/README.md +41 -0
  146. data/spec/moxml/adapter/.gitkeep +0 -0
  147. data/spec/moxml/adapter/README.md +61 -0
  148. data/spec/moxml/adapter/base_spec.rb +27 -0
  149. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  150. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  151. data/spec/moxml/adapter/ox_spec.rb +9 -8
  152. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  153. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  154. data/spec/moxml/adapter_spec.rb +16 -0
  155. data/spec/moxml/attribute_spec.rb +30 -0
  156. data/spec/moxml/builder_spec.rb +33 -0
  157. data/spec/moxml/cdata_spec.rb +31 -0
  158. data/spec/moxml/comment_spec.rb +31 -0
  159. data/spec/moxml/config_spec.rb +3 -3
  160. data/spec/moxml/context_spec.rb +28 -0
  161. data/spec/moxml/declaration_spec.rb +36 -0
  162. data/spec/moxml/doctype_spec.rb +33 -0
  163. data/spec/moxml/document_builder_spec.rb +30 -0
  164. data/spec/moxml/document_spec.rb +105 -0
  165. data/spec/moxml/element_spec.rb +143 -0
  166. data/spec/moxml/error_spec.rb +266 -22
  167. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  168. data/spec/moxml/namespace_spec.rb +32 -0
  169. data/spec/moxml/node_set_spec.rb +39 -0
  170. data/spec/moxml/node_spec.rb +37 -0
  171. data/spec/moxml/processing_instruction_spec.rb +34 -0
  172. data/spec/moxml/sax_spec.rb +1067 -0
  173. data/spec/moxml/text_spec.rb +31 -0
  174. data/spec/moxml/version_spec.rb +14 -0
  175. data/spec/moxml/xml_utils/.gitkeep +0 -0
  176. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  177. data/spec/moxml/xml_utils_spec.rb +49 -0
  178. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  179. data/spec/moxml/xpath/axes_spec.rb +296 -0
  180. data/spec/moxml/xpath/cache_spec.rb +358 -0
  181. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  182. data/spec/moxml/xpath/context_spec.rb +210 -0
  183. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  184. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  185. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  186. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  187. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  188. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  189. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  190. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  191. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  192. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  193. data/spec/moxml/xpath/parser_spec.rb +364 -0
  194. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  195. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  196. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  197. data/spec/moxml/xpath_spec.rb +77 -0
  198. data/spec/performance/README.md +83 -0
  199. data/spec/performance/benchmark_spec.rb +64 -0
  200. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  201. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  202. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  203. data/spec/spec_helper.rb +58 -1
  204. data/spec/support/xml_matchers.rb +1 -1
  205. metadata +176 -34
  206. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  207. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  208. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  209. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  210. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,1548 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+ require "libxml"
5
+ require_relative "customized_libxml/node"
6
+ require_relative "customized_libxml/element"
7
+ require_relative "customized_libxml/text"
8
+ require_relative "customized_libxml/comment"
9
+ require_relative "customized_libxml/cdata"
10
+ require_relative "customized_libxml/processing_instruction"
11
+ require_relative "customized_libxml/declaration"
12
+
13
+ module Moxml
14
+ module Adapter
15
+ class Libxml < Base
16
+ # Wrapper class to store DOCTYPE information
17
+ class DoctypeWrapper
18
+ attr_reader :native_doc
19
+ attr_accessor :name, :external_id, :system_id
20
+
21
+ def initialize(doc, name, external_id, system_id)
22
+ @native_doc = doc
23
+ @name = name
24
+ @external_id = external_id
25
+ @system_id = system_id
26
+ end
27
+
28
+ # Provide native method to match adapter pattern
29
+ def native
30
+ @native_doc
31
+ end
32
+
33
+ def to_xml
34
+ output = "<!DOCTYPE #{@name}"
35
+ if @external_id && !@external_id.empty?
36
+ output << " PUBLIC \"#{@external_id}\""
37
+ output << " \"#{@system_id}\"" if @system_id
38
+ elsif @system_id && !@system_id.empty?
39
+ output << " SYSTEM \"#{@system_id}\""
40
+ end
41
+ output << ">"
42
+ output
43
+ end
44
+ end
45
+
46
+ class << self
47
+ def set_root(doc, element)
48
+ doc.root = element
49
+ end
50
+
51
+ def parse(xml, options = {})
52
+ # LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
53
+ xml_string = if xml.is_a?(String)
54
+ xml
55
+ elsif xml.respond_to?(:read)
56
+ xml.read
57
+ else
58
+ xml.to_s
59
+ end
60
+
61
+ # Extract DOCTYPE before parsing
62
+ doctype_match = xml_string.match(/<!DOCTYPE\s+(\S+)(?:\s+PUBLIC\s+"([^"]+)"\s+"([^"]+)"| \s+SYSTEM\s+"([^"]+)")?\s*>/i)
63
+
64
+ native_doc = begin
65
+ # Handle both string and file inputs
66
+ parser = ::LibXML::XML::Parser.string(xml_string)
67
+ parser.parse
68
+ rescue ::LibXML::XML::Error => e
69
+ if options[:strict]
70
+ line = e.respond_to?(:line) ? e.line : nil
71
+ raise Moxml::ParseError.new(
72
+ e.message,
73
+ line: line,
74
+ column: nil,
75
+ source: xml_string[0..100],
76
+ )
77
+ end
78
+ # Return empty document for non-strict mode
79
+ create_document
80
+ end
81
+
82
+ # Store DOCTYPE if found
83
+ if doctype_match
84
+ name = doctype_match[1]
85
+ external_id = doctype_match[2]
86
+ system_id = doctype_match[3] || doctype_match[4]
87
+
88
+ doctype_wrapper = DoctypeWrapper.new(
89
+ native_doc,
90
+ name,
91
+ external_id,
92
+ system_id,
93
+ )
94
+ native_doc.instance_variable_set(:@moxml_doctype, doctype_wrapper)
95
+ end
96
+
97
+ DocumentBuilder.new(Context.new(:libxml)).build(native_doc)
98
+ end
99
+
100
+ # SAX parsing implementation for LibXML
101
+ #
102
+ # @param xml [String, IO] XML to parse
103
+ # @param handler [Moxml::SAX::Handler] Moxml SAX handler
104
+ # @return [void]
105
+ def sax_parse(xml, handler)
106
+ # Create bridge that translates LibXML SAX to Moxml SAX
107
+ bridge = LibXMLSAXBridge.new(handler)
108
+
109
+ # Create LibXML SAX parser
110
+ parser = ::LibXML::XML::SaxParser.string(xml.to_s)
111
+
112
+ # Set callbacks
113
+ parser.callbacks = bridge
114
+
115
+ # Parse
116
+ parser.parse
117
+ rescue ::LibXML::XML::Error => e
118
+ line = e.respond_to?(:line) ? e.line : nil
119
+ column = e.respond_to?(:column) ? e.column : nil
120
+ error = Moxml::ParseError.new(e.message, line: line, column: column)
121
+ handler.on_error(error)
122
+ end
123
+
124
+ def create_document(_native_doc = nil)
125
+ ::LibXML::XML::Document.new
126
+ end
127
+
128
+ def create_native_element(name)
129
+ ::LibXML::XML::Node.new(name.to_s)
130
+ end
131
+
132
+ def create_native_text(content)
133
+ native = ::LibXML::XML::Node.new_text(content.to_s)
134
+ CustomizedLibxml::Text.new(native)
135
+ end
136
+
137
+ def create_native_cdata(content)
138
+ native = ::LibXML::XML::Node.new_cdata(content.to_s)
139
+ CustomizedLibxml::Cdata.new(native)
140
+ end
141
+
142
+ def create_native_comment(content)
143
+ native = ::LibXML::XML::Node.new_comment(content.to_s)
144
+ CustomizedLibxml::Comment.new(native)
145
+ end
146
+
147
+ def create_native_processing_instruction(target, content)
148
+ native = ::LibXML::XML::Node.new_pi(target.to_s, content.to_s)
149
+ CustomizedLibxml::ProcessingInstruction.new(native)
150
+ end
151
+
152
+ def create_native_declaration(version, encoding, standalone)
153
+ doc = create_document
154
+ # Return a Declaration wrapper with explicit parameters
155
+ CustomizedLibxml::Declaration.new(doc, version, encoding, standalone)
156
+ end
157
+
158
+ def create_native_doctype(name, external_id, system_id)
159
+ # LibXML::XML::Dtd.new has bizarre parameter order, so we just
160
+ # store values directly in our wrapper
161
+ DoctypeWrapper.new(create_document, name.to_s, external_id&.to_s,
162
+ system_id&.to_s)
163
+ end
164
+
165
+ def node_type(node)
166
+ return :unknown unless node
167
+
168
+ # Handle wrapper classes
169
+ return :element if node.is_a?(CustomizedLibxml::Element)
170
+ return :text if node.is_a?(CustomizedLibxml::Text)
171
+ return :cdata if node.is_a?(CustomizedLibxml::Cdata)
172
+ return :comment if node.is_a?(CustomizedLibxml::Comment)
173
+ if node.is_a?(CustomizedLibxml::ProcessingInstruction)
174
+ return :processing_instruction
175
+ end
176
+ return :doctype if node.is_a?(DoctypeWrapper)
177
+
178
+ # Unwrap if needed
179
+ native_node = node.respond_to?(:native) ? node.native : node
180
+
181
+ case native_node.node_type
182
+ when ::LibXML::XML::Node::DOCUMENT_NODE
183
+ :document
184
+ when ::LibXML::XML::Node::ELEMENT_NODE
185
+ :element
186
+ when ::LibXML::XML::Node::TEXT_NODE
187
+ :text
188
+ when ::LibXML::XML::Node::CDATA_SECTION_NODE
189
+ :cdata
190
+ when ::LibXML::XML::Node::COMMENT_NODE
191
+ :comment
192
+ when ::LibXML::XML::Node::ATTRIBUTE_NODE
193
+ :attribute
194
+ when ::LibXML::XML::Node::PI_NODE
195
+ :processing_instruction
196
+ when ::LibXML::XML::Node::DTD_NODE
197
+ :doctype
198
+ else
199
+ :unknown
200
+ end
201
+ end
202
+
203
+ def node_name(node)
204
+ native_node = unpatch_node(node)
205
+ native_node&.name
206
+ end
207
+
208
+ def set_node_name(node, name)
209
+ native_node = unpatch_node(node)
210
+ native_node.name = name.to_s if native_node
211
+ end
212
+
213
+ def declaration_attribute(node, name)
214
+ return nil unless node
215
+
216
+ # Handle Declaration wrapper
217
+ if node.is_a?(CustomizedLibxml::Declaration)
218
+ case name
219
+ when "version"
220
+ node.version
221
+ when "encoding"
222
+ node.encoding
223
+ when "standalone"
224
+ node.standalone # Returns "yes", "no", or nil
225
+ end
226
+ else
227
+ # Fallback for native documents
228
+ case name
229
+ when "version"
230
+ node.version
231
+ when "encoding"
232
+ enc = node.encoding
233
+ enc ? encoding_to_string(enc) : nil
234
+ when "standalone"
235
+ node.standalone? ? "yes" : nil
236
+ end
237
+ end
238
+ end
239
+
240
+ def set_declaration_attribute(node, name, value)
241
+ return unless node
242
+
243
+ # Handle Declaration wrapper
244
+ return unless node.is_a?(CustomizedLibxml::Declaration)
245
+
246
+ case name
247
+ when "version"
248
+ node.version = value
249
+ when "encoding"
250
+ node.encoding = value
251
+ when "standalone"
252
+ # Pass the value directly - Declaration handles the conversion
253
+ node.standalone = value
254
+ end
255
+
256
+ # Native documents are read-only, do nothing for them
257
+ end
258
+
259
+ def children(node)
260
+ native_node = unpatch_node(node)
261
+ return [] unless native_node
262
+
263
+ # Handle Document specially - it doesn't have children? method
264
+ if native_node.is_a?(::LibXML::XML::Document)
265
+ result = []
266
+
267
+ # Include DOCTYPE if present
268
+ # First check if we stored it as instance variable (from parse)
269
+ if native_node.instance_variable_defined?(:@moxml_doctype)
270
+ doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
271
+ result << doctype_wrapper if doctype_wrapper
272
+ elsif native_node.respond_to?(:dtd) && native_node.dtd
273
+ # Otherwise check dtd property directly
274
+ dtd = native_node.dtd
275
+ # Wrap in DoctypeWrapper for consistency
276
+ doctype_wrapper = DoctypeWrapper.new(
277
+ native_node,
278
+ dtd.name,
279
+ dtd.external_id,
280
+ dtd.system_id,
281
+ )
282
+ result << doctype_wrapper
283
+ end
284
+
285
+ return result unless native_node.root
286
+
287
+ result << patch_node(native_node.root)
288
+ return result
289
+ end
290
+
291
+ return [] unless native_node.children?
292
+
293
+ result = []
294
+ native_node.each_child do |child|
295
+ # Skip whitespace-only text nodes
296
+ next if child.text? && child.content.to_s.strip.empty?
297
+
298
+ result << patch_node(child)
299
+ end
300
+ result
301
+ end
302
+
303
+ def parent(node)
304
+ native_node = unpatch_node(node)
305
+ parent_node = native_node&.parent
306
+ parent_node ? patch_node(parent_node) : nil
307
+ end
308
+
309
+ def next_sibling(node)
310
+ native_node = unpatch_node(node)
311
+ current = native_node&.next
312
+ while current
313
+ # Skip whitespace-only text nodes
314
+ break unless current.text? && current.content.to_s.strip.empty?
315
+
316
+ current = current.next
317
+ end
318
+ current ? patch_node(current) : nil
319
+ end
320
+
321
+ def previous_sibling(node)
322
+ native_node = unpatch_node(node)
323
+ current = native_node&.prev
324
+ while current
325
+ # Skip whitespace-only text nodes
326
+ break unless current.text? && current.content.to_s.strip.empty?
327
+
328
+ current = current.prev
329
+ end
330
+ current ? patch_node(current) : nil
331
+ end
332
+
333
+ def document(node)
334
+ native_node = unpatch_node(node)
335
+ native_node&.doc
336
+ end
337
+
338
+ def root(document)
339
+ native_doc = unpatch_node(document)
340
+ native_doc&.root
341
+ end
342
+
343
+ def attributes(element)
344
+ native_elem = unpatch_node(element)
345
+ return [] unless native_elem
346
+ unless native_elem.respond_to?(:element?) && native_elem.element?
347
+ return []
348
+ end
349
+ return [] unless native_elem.attributes?
350
+
351
+ attrs = []
352
+ native_elem.each_attr do |attr|
353
+ attrs << attr unless attr.name.to_s.start_with?("xmlns")
354
+ end
355
+ attrs
356
+ end
357
+
358
+ def attribute_element(attr)
359
+ attr&.parent
360
+ end
361
+
362
+ def attribute_namespace(attr)
363
+ return nil unless attr
364
+ return nil unless attr.respond_to?(:ns)
365
+
366
+ attr.ns
367
+ end
368
+
369
+ def set_attribute(element, name, value)
370
+ native_elem = unpatch_node(element)
371
+ return unless native_elem
372
+
373
+ name_str = name.to_s
374
+ value_str = value.to_s
375
+
376
+ # Check if attribute name contains namespace prefix
377
+ if name_str.include?(":")
378
+ prefix, local_name = name_str.split(":", 2)
379
+
380
+ # Find the namespace with the given prefix
381
+ ns = find_namespace_by_prefix(native_elem, prefix)
382
+
383
+ if ns
384
+ # LibXML::XML::Attr.new accepts namespace as third parameter
385
+ # First remove existing attribute if present
386
+ existing = native_elem.attributes.get_attribute(name_str)
387
+ existing&.remove!
388
+
389
+ # Create new attribute with namespace
390
+ # Attr.new(node, name, value, ns)
391
+ ::LibXML::XML::Attr.new(native_elem, local_name, value_str, ns)
392
+
393
+ # Return the created attribute
394
+
395
+ else
396
+ # Namespace not found, set as regular attribute
397
+ native_elem[name_str] = value_str
398
+ native_elem.attributes.get_attribute(name_str)
399
+ end
400
+ else
401
+ # Regular attribute without namespace
402
+ native_elem[name_str] = value_str
403
+ native_elem.attributes.get_attribute(name_str)
404
+ end
405
+ end
406
+
407
+ def get_attribute(element, name)
408
+ native_elem = unpatch_node(element)
409
+ return nil unless native_elem
410
+ return nil unless native_elem.attributes?
411
+
412
+ attr = native_elem.attributes.get_attribute(name.to_s)
413
+ return nil unless attr
414
+
415
+ # Extend the attribute with to_xml method for proper escaping
416
+ attr.define_singleton_method(:to_xml) do
417
+ escaped = value.to_s
418
+ .gsub("&", "&amp;")
419
+ .gsub("<", "&lt;")
420
+ .gsub(">", "&gt;")
421
+ .gsub("\"", "&quot;")
422
+ "#{name} = #{escaped}"
423
+ end
424
+ attr
425
+ end
426
+
427
+ def get_attribute_value(element, name)
428
+ native_elem = unpatch_node(element)
429
+ return nil unless native_elem
430
+
431
+ # Try to get the attribute with the given name (handles namespaced attrs)
432
+ value = native_elem[name.to_s]
433
+ return value if value
434
+
435
+ # If name contains ':', try to get as namespaced attribute
436
+ if name.to_s.include?(":")
437
+ prefix, local_name = name.to_s.split(":", 2)
438
+ # Try to find attribute by namespace
439
+ if native_elem.attributes?
440
+ native_elem.each_attr do |attr|
441
+ if attr.name == local_name || attr.name == name.to_s
442
+ # Check if attribute's namespace matches the prefix
443
+ if attr.ns && attr.ns.prefix == prefix
444
+ return attr.value
445
+ elsif attr.name == name.to_s
446
+ # Fallback: attribute name includes the prefix
447
+ return attr.value
448
+ end
449
+ end
450
+ end
451
+ end
452
+ end
453
+
454
+ nil
455
+ end
456
+
457
+ def remove_attribute(element, name)
458
+ native_elem = unpatch_node(element)
459
+ return unless native_elem
460
+ return unless native_elem.attributes?
461
+
462
+ attr = native_elem.attributes.get_attribute(name.to_s)
463
+ attr&.remove!
464
+ end
465
+
466
+ def set_attribute_name(attribute, new_name)
467
+ # LibXML attributes cannot be renamed directly
468
+ # We must work at the element level
469
+ return unless attribute
470
+
471
+ # Get values FIRST before any removal
472
+ old_name = attribute.name
473
+ value = attribute.value
474
+ element = attribute.parent
475
+ return unless element
476
+
477
+ # Remove old attribute via element
478
+ element.attributes.get_attribute(old_name)&.remove!
479
+
480
+ # Add new attribute with same value
481
+ element[new_name.to_s] = value
482
+
483
+ # Return the new attribute
484
+ element.attributes.get_attribute(new_name.to_s)
485
+ end
486
+
487
+ def add_child(element, child)
488
+ return unless element && child
489
+
490
+ # Unwrap both element and child
491
+ native_elem = unpatch_node(element)
492
+ native_child = unpatch_node(child)
493
+
494
+ # For LibXML: if parent has a DEFAULT namespace (nil/empty prefix) and child is an element without a namespace,
495
+ # explicitly set the child's namespace to match the parent's for XPath compatibility
496
+ # NOTE: Prefixed namespaces are NOT inherited, only default namespaces
497
+ if native_elem.respond_to?(:namespaces) && native_elem.namespaces&.namespace &&
498
+ native_child.respond_to?(:namespaces) && native_child.element? &&
499
+ (!native_child.namespaces.namespace || native_child.namespaces.namespace.href.to_s.empty?)
500
+
501
+ parent_ns = native_elem.namespaces.namespace
502
+ # Only set child's namespace if parent's namespace is DEFAULT (nil or empty prefix)
503
+ if parent_ns.prefix.nil? || parent_ns.prefix.to_s.empty?
504
+ native_child.namespaces.namespace = parent_ns
505
+ end
506
+ end
507
+
508
+ if native_elem.is_a?(::LibXML::XML::Document)
509
+ # For Declaration wrappers, store them for serialization
510
+ if child.is_a?(CustomizedLibxml::Declaration)
511
+ native_elem.instance_variable_set(:@moxml_declaration, child)
512
+ # Also store reference to parent document in the declaration
513
+ child.instance_variable_set(:@parent_doc, native_elem)
514
+ return
515
+ end
516
+
517
+ # For DOCTYPE wrappers, store them for serialization
518
+ if child.is_a?(DoctypeWrapper)
519
+ native_elem.instance_variable_set(:@moxml_doctype, child)
520
+ return
521
+ end
522
+
523
+ # For document-level PIs, store them for serialization
524
+ if child.is_a?(CustomizedLibxml::ProcessingInstruction)
525
+ pis = native_elem.instance_variable_get(:@moxml_pis) || []
526
+ pis << child
527
+ native_elem.instance_variable_set(:@moxml_pis, pis)
528
+ return
529
+ end
530
+
531
+ # For text nodes added to document, store them for serialization
532
+ # Documents can't have text children in LibXML
533
+ if child.is_a?(CustomizedLibxml::Text)
534
+ texts = native_elem.instance_variable_get(:@moxml_texts) || []
535
+ texts << child
536
+ native_elem.instance_variable_set(:@moxml_texts, texts)
537
+ return
538
+ end
539
+
540
+ # For documents, check if adding the first root element
541
+ if native_elem.root.nil? && node_type(native_child) == :element
542
+ # Set as root element
543
+ native_elem.root = native_child
544
+ elsif native_elem.root
545
+ # Document has root, add to it instead
546
+ import_and_add(native_elem.doc, native_elem.root, native_child)
547
+ end
548
+ else
549
+ import_and_add(native_elem.doc, native_elem, native_child)
550
+ end
551
+ end
552
+
553
+ def add_previous_sibling(node, sibling)
554
+ return unless node && sibling
555
+
556
+ native_node = unpatch_node(node)
557
+ native_sibling = unpatch_node(sibling)
558
+
559
+ # Special handling for document-level processing instructions
560
+ # When adding a PI as sibling to root element, store it on document
561
+ if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
562
+ native_node.respond_to?(:doc) && native_node.doc
563
+ doc = native_node.doc
564
+ pis = doc.instance_variable_get(:@moxml_pis) || []
565
+ pis << sibling
566
+ doc.instance_variable_set(:@moxml_pis, pis)
567
+ return
568
+ end
569
+
570
+ native_node.prev = native_sibling
571
+ end
572
+
573
+ def add_next_sibling(node, sibling)
574
+ return unless node && sibling
575
+
576
+ native_node = unpatch_node(node)
577
+ native_sibling = unpatch_node(sibling)
578
+ native_node.next = native_sibling
579
+ end
580
+
581
+ def remove(node)
582
+ # Handle Declaration wrapper - mark as removed on document
583
+ if node.is_a?(CustomizedLibxml::Declaration)
584
+ # The Declaration wrapper is stored on the actual document
585
+ # We need to find which document it's stored on and mark it as removed
586
+ # This is a bit tricky since the Declaration's native is its own internal doc
587
+ # We rely on the fact that when a declaration is added to a document,
588
+ # the document stores a reference to it in @moxml_declaration
589
+ # So we need to clear that reference and mark it as removed
590
+
591
+ # Since we can't easily find the parent document from the Declaration,
592
+ # we'll set a flag on the Declaration itself
593
+ node.instance_variable_set(:@removed, true)
594
+ return
595
+ end
596
+
597
+ native_node = unpatch_node(node)
598
+ native_node&.remove!
599
+ end
600
+
601
+ def replace(node, new_node)
602
+ native_node = unpatch_node(node)
603
+ native_new = unpatch_node(new_node)
604
+ parent = native_node&.parent
605
+ return unless parent && native_new
606
+
607
+ # Special handling for text nodes - LibXML's sibling manipulation
608
+ # doesn't work reliably for text nodes. Instead, use parent.content
609
+ # for text-to-text replacement
610
+ if native_node.text? && native_new.text?
611
+ parent.content = native_new.content
612
+ return
613
+ end
614
+
615
+ # Save the prev/next siblings before removing
616
+ prev_sibling = native_node.prev
617
+ next_sibling = native_node.next
618
+
619
+ # Import if needed for cross-document operations
620
+ parent_doc = parent.respond_to?(:doc) ? parent.doc : nil
621
+
622
+ # Use import_and_add to properly handle document adoption
623
+ import_and_add(parent_doc, parent, native_new)
624
+
625
+ # Now adjust the position - move new node to where old node was
626
+ if prev_sibling
627
+ # Insert after the previous sibling
628
+ prev_sibling.next = native_new
629
+ end
630
+ if next_sibling
631
+ # Insert before the next sibling
632
+ next_sibling.prev = native_new
633
+ end
634
+
635
+ # Finally remove the old node
636
+ native_node.remove!
637
+ end
638
+
639
+ def replace_children(element, children)
640
+ native_elem = unpatch_node(element)
641
+ return unless native_elem
642
+
643
+ # Remove all existing children first
644
+ native_elem.each_child(&:remove!)
645
+
646
+ # Get the element's document for importing
647
+ doc = native_elem.respond_to?(:doc) ? native_elem.doc : nil
648
+
649
+ children.each do |c|
650
+ native_c = unpatch_node(c)
651
+
652
+ # Use import_and_add helper which handles all the edge cases
653
+ import_and_add(doc, native_elem, native_c)
654
+ end
655
+ end
656
+
657
+ def text_content(node)
658
+ native_node = unpatch_node(node)
659
+ return nil unless native_node
660
+
661
+ native_node.content
662
+ end
663
+
664
+ def inner_text(node)
665
+ native_node = unpatch_node(node)
666
+ return "" unless native_node
667
+ return "" unless native_node.children?
668
+
669
+ result = []
670
+ native_node.each_child do |child|
671
+ result << child.content if child.text?
672
+ end
673
+ result.join
674
+ end
675
+
676
+ def set_text_content(node, content)
677
+ native_node = unpatch_node(node)
678
+ native_node.content = content.to_s if native_node
679
+ end
680
+
681
+ def cdata_content(node)
682
+ native_node = unpatch_node(node)
683
+ content = native_node&.content
684
+ # LibXML may HTML-escape CDATA content, un-escape it
685
+ return nil unless content
686
+
687
+ content.gsub("&quot;", '"')
688
+ .gsub("&apos;", "'")
689
+ .gsub("&lt;", "<")
690
+ .gsub("&gt;", ">")
691
+ .gsub("&amp;", "&")
692
+ end
693
+
694
+ def set_cdata_content(node, content)
695
+ native_node = unpatch_node(node)
696
+ # CDATA content should NOT be escaped
697
+ native_node.content = content.to_s if native_node
698
+ end
699
+
700
+ def comment_content(node)
701
+ native_node = unpatch_node(node)
702
+ native_node&.content
703
+ end
704
+
705
+ def set_comment_content(node, content)
706
+ native_node = unpatch_node(node)
707
+ native_node.content = content.to_s if native_node
708
+ end
709
+
710
+ def processing_instruction_target(node)
711
+ native_node = unpatch_node(node)
712
+ native_node&.name
713
+ end
714
+
715
+ def processing_instruction_content(node)
716
+ native_node = unpatch_node(node)
717
+ content = native_node&.content
718
+ # LibXML may HTML-escape the content, un-escape it
719
+ return nil unless content
720
+
721
+ content.gsub("&quot;", '"')
722
+ .gsub("&apos;", "'")
723
+ .gsub("&lt;", "<")
724
+ .gsub("&gt;", ">")
725
+ .gsub("&amp;", "&")
726
+ end
727
+
728
+ def set_processing_instruction_content(node, content)
729
+ native_node = unpatch_node(node)
730
+ # Store raw content - LibXML will escape it
731
+ native_node.content = content.to_s if native_node
732
+ end
733
+
734
+ def create_native_namespace(element, prefix, uri)
735
+ native_elem = unpatch_node(element)
736
+ return nil unless native_elem
737
+
738
+ ns = ::LibXML::XML::Namespace.new(
739
+ native_elem,
740
+ prefix.to_s.empty? ? nil : prefix.to_s,
741
+ uri.to_s,
742
+ )
743
+
744
+ # For default namespace (nil/empty prefix), set it as the element's namespace
745
+ native_elem.namespaces.namespace = ns if prefix.to_s.empty?
746
+
747
+ ns
748
+ end
749
+
750
+ def set_namespace(element, ns)
751
+ native_elem = unpatch_node(element)
752
+ return unless native_elem && ns
753
+
754
+ native_elem.namespaces.namespace = ns
755
+ end
756
+
757
+ def namespace(element)
758
+ native_elem = unpatch_node(element)
759
+ return nil unless native_elem
760
+
761
+ # Return ONLY the element's own namespace
762
+ # Do NOT inherit parent namespaces (prefixed namespaces are NOT inherited)
763
+ # Only default namespaces are inherited during element creation by LibXML
764
+ native_elem.namespaces&.namespace
765
+ end
766
+
767
+ def namespace_prefix(namespace)
768
+ namespace&.prefix
769
+ end
770
+
771
+ def namespace_uri(namespace)
772
+ namespace&.href
773
+ end
774
+
775
+ def namespace_definitions(node)
776
+ native_node = unpatch_node(node)
777
+ return [] unless native_node
778
+ return [] unless native_node.respond_to?(:namespaces)
779
+
780
+ native_node.namespaces.map do |ns|
781
+ ns
782
+ end
783
+ end
784
+
785
+ def xpath(node, expression, namespaces = nil)
786
+ native_node = unpatch_node(node)
787
+ return [] unless native_node
788
+
789
+ # Build namespace context for LibXML
790
+ # LibXML requires ALL prefixes in the XPath to be registered
791
+ ns_context = build_xpath_namespaces(native_node, namespaces)
792
+
793
+ results = if ns_context.empty?
794
+ native_node.find(expression).to_a
795
+ else
796
+ native_node.find(expression, ns_context).to_a
797
+ end
798
+
799
+ # Wrap results
800
+ results.map { |n| patch_node(n) }
801
+ rescue ::LibXML::XML::Error => e
802
+ raise Moxml::XPathError.new(
803
+ e.message,
804
+ expression: expression,
805
+ adapter: "LibXML",
806
+ node: node,
807
+ )
808
+ end
809
+
810
+ def at_xpath(node, expression, namespaces = nil)
811
+ results = xpath(node, expression, namespaces)
812
+ results&.first
813
+ end
814
+
815
+ def serialize(node, options = {})
816
+ # FIRST: Check if node is any kind of wrapper with custom to_xml
817
+ if node.respond_to?(:to_xml)
818
+ # Declaration wrapper
819
+ return node.to_xml if node.is_a?(CustomizedLibxml::Declaration)
820
+
821
+ # Other wrappers - check they're not native LibXML nodes
822
+ unless node.is_a?(::LibXML::XML::Node) ||
823
+ node.is_a?(::LibXML::XML::Document)
824
+ return node.to_xml
825
+ end
826
+ end
827
+
828
+ native_node = unpatch_node(node)
829
+ return "" unless native_node
830
+
831
+ if native_node.is_a?(::LibXML::XML::Document)
832
+ output = +""
833
+
834
+ unless options[:no_declaration]
835
+ # Check if declaration was explicitly managed
836
+ if native_node.instance_variable_defined?(:@moxml_declaration)
837
+ decl = native_node.instance_variable_get(:@moxml_declaration)
838
+ # Only output declaration if it exists and wasn't removed
839
+ if decl && !decl.instance_variable_get(:@removed)
840
+ output << decl.to_xml
841
+ end
842
+ else
843
+ # No declaration stored - create default
844
+ version = native_node.version || "1.0"
845
+ encoding_val = options[:encoding] ||
846
+ encoding_to_string(native_node.encoding) ||
847
+ "UTF-8"
848
+
849
+ # Don't add standalone="yes" by default - only if explicitly set
850
+ decl = CustomizedLibxml::Declaration.new(
851
+ native_node,
852
+ version,
853
+ encoding_val,
854
+ nil, # No standalone by default
855
+ )
856
+ native_node.instance_variable_set(:@moxml_declaration, decl)
857
+ output << decl.to_xml
858
+ end
859
+ end
860
+
861
+ # Add DOCTYPE if stored on document
862
+ if native_node.instance_variable_defined?(:@moxml_doctype)
863
+ doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
864
+ if doctype_wrapper
865
+ output << "\n" unless output.empty?
866
+ output << doctype_wrapper.to_xml
867
+ end
868
+ end
869
+
870
+ # Add document-level processing instructions if stored
871
+ if native_node.instance_variable_defined?(:@moxml_pis)
872
+ pis = native_node.instance_variable_get(:@moxml_pis)
873
+ if pis && !pis.empty?
874
+ pis.each do |pi|
875
+ output << "\n" unless output.empty?
876
+ output << pi.to_xml
877
+ end
878
+ end
879
+ end
880
+
881
+ # Add text nodes if stored (for documents without root)
882
+ if native_node.instance_variable_defined?(:@moxml_texts)
883
+ texts = native_node.instance_variable_get(:@moxml_texts)
884
+ if texts && !texts.empty?
885
+ texts.each do |text|
886
+ output << "\n" unless output.empty?
887
+ output << text.to_xml
888
+ end
889
+ end
890
+ end
891
+
892
+ if native_node.root
893
+ # Use our custom serializer to control namespace output
894
+ root_output = serialize_element_with_namespaces(
895
+ native_node.root,
896
+ true,
897
+ )
898
+
899
+ # Apply indentation if requested
900
+ if options[:indent]&.positive?
901
+ # First add newlines between elements
902
+ formatted = add_newlines_to_xml(root_output)
903
+ output << "\n" << indent_xml(formatted, options[:indent])
904
+ else
905
+ output << "\n" << root_output unless output.empty?
906
+ output << root_output if output.empty?
907
+ end
908
+ end
909
+
910
+ output
911
+ else
912
+ serialize_element_with_namespaces(native_node, true)
913
+ end
914
+ end
915
+
916
+ def add_newlines_to_xml(xml_string)
917
+ # Add newlines between XML elements for proper indentation
918
+ # But don't add newlines between opening and immediate closing tags (e.g., <tag></tag>)
919
+ # And most importantly, don't add newlines inside CDATA sections
920
+
921
+ # First, protect CDATA sections by replacing them with placeholders
922
+ # Manual scanning guarantees O(n) complexity with no backtracking (ReDoS-safe)
923
+ cdata_sections = []
924
+ result = +""
925
+ pos = 0
926
+
927
+ loop do
928
+ # Find next CDATA start
929
+ cdata_start = xml_string.index("<![CDATA[", pos)
930
+
931
+ if cdata_start
932
+ # Copy everything before CDATA
933
+ result << xml_string[pos...cdata_start]
934
+
935
+ # Find CDATA end
936
+ cdata_content_start = cdata_start + 9 # Length of "<![CDATA["
937
+ cdata_end = xml_string.index("]]>", cdata_content_start)
938
+
939
+ if cdata_end
940
+ # Extract full CDATA including markers
941
+ full_cdata_end = cdata_end + 3 # Include "]]>"
942
+ cdata_section = xml_string[cdata_start...full_cdata_end]
943
+
944
+ # Store and add placeholder
945
+ cdata_sections << cdata_section
946
+ result << "__CDATA_PLACEHOLDER_#{cdata_sections.length - 1}__"
947
+
948
+ # Continue after this CDATA
949
+ pos = full_cdata_end
950
+ else
951
+ # Malformed CDATA (no closing "]]>") - copy as-is
952
+ result << xml_string[cdata_start..]
953
+ break
954
+ end
955
+ else
956
+ # No more CDATA sections - copy rest
957
+ result << xml_string[pos..]
958
+ break
959
+ end
960
+ end
961
+
962
+ protected = result
963
+
964
+ # Add newlines between elements (but not in CDATA - already protected)
965
+ with_newlines = protected.gsub(%r{(<[^>]+)>(?=<(?!/))}, "\\1>\n")
966
+
967
+ # Restore CDATA sections
968
+ cdata_sections.each_with_index do |cdata, index|
969
+ with_newlines.sub!("__CDATA_PLACEHOLDER_#{index}__", cdata)
970
+ end
971
+
972
+ with_newlines
973
+ end
974
+
975
+ def indent_xml(xml_string, indent_size)
976
+ # Simple line-by-line indentation
977
+ lines = []
978
+ level = 0
979
+
980
+ xml_string.each_line do |line|
981
+ line = line.strip
982
+ next if line.empty?
983
+
984
+ # Decrease level for closing tags
985
+ level -= 1 if line.start_with?("</")
986
+ level = [level, 0].max
987
+
988
+ # Add indented line
989
+ lines << ((" " * (indent_size * level)) + line)
990
+
991
+ # Increase level for opening tags (but not self-closing or special tags)
992
+ next unless line.start_with?("<") && !line.start_with?("</") &&
993
+ !line.end_with?("/>") && !line.start_with?("<?") &&
994
+ !line.start_with?("<!") && !line.include?("</")
995
+
996
+ level += 1
997
+ end
998
+
999
+ lines.join("\n")
1000
+ end
1001
+
1002
+ def duplicate_node(node)
1003
+ return nil unless node
1004
+
1005
+ # Unwrap if wrapped
1006
+ native_node = node.respond_to?(:native) ? node.native : node
1007
+
1008
+ # LibXML is strict about document ownership
1009
+ # Create brand new NATIVE nodes that are document-independent
1010
+ # Wrappers are only used via patch_node when reading children
1011
+ case node_type(node)
1012
+ when :doctype
1013
+ # DoctypeWrapper - create a new one with same properties
1014
+ if node.is_a?(DoctypeWrapper)
1015
+ DoctypeWrapper.new(
1016
+ create_document,
1017
+ node.name,
1018
+ node.external_id,
1019
+ node.system_id,
1020
+ )
1021
+ else
1022
+ # Should not happen, but handle gracefully
1023
+ node
1024
+ end
1025
+ when :element
1026
+ new_node = ::LibXML::XML::Node.new(native_node.name)
1027
+ # new_node.line = node.line
1028
+
1029
+ # Copy and set namespace definitions FIRST
1030
+ if native_node.respond_to?(:namespaces)
1031
+ # First, copy all namespace definitions
1032
+ native_node.namespaces.each do |ns|
1033
+ ::LibXML::XML::Namespace.new(
1034
+ new_node,
1035
+ ns.prefix,
1036
+ ns.href,
1037
+ )
1038
+ end
1039
+
1040
+ # Then, set this element's own namespace if it has one
1041
+ if native_node.namespaces.namespace
1042
+ orig_ns = native_node.namespaces.namespace
1043
+ # Find the matching namespace we just created
1044
+ new_node.namespaces.each do |ns|
1045
+ if ns.prefix == orig_ns.prefix && ns.href == orig_ns.href
1046
+ new_node.namespaces.namespace = ns
1047
+ break
1048
+ end
1049
+ end
1050
+ end
1051
+ end
1052
+
1053
+ # Copy attributes AFTER namespaces are set up
1054
+ # LibXML handles namespaced attributes through their full names
1055
+ if native_node.attributes?
1056
+ native_node.each_attr do |attr|
1057
+ # Get the full attribute name (may include namespace prefix)
1058
+ attr_name = if attr.ns&.prefix
1059
+ "#{attr.ns.prefix}:#{attr.name}"
1060
+ else
1061
+ attr.name
1062
+ end
1063
+ new_node[attr_name] = attr.value
1064
+ end
1065
+ end
1066
+
1067
+ # Recursively copy children
1068
+ if native_node.children?
1069
+ native_node.each_child do |child|
1070
+ # Skip whitespace-only text nodes
1071
+ next if child.text? && child.content.to_s.strip.empty?
1072
+
1073
+ # Recursively duplicate the child
1074
+ child_copy = duplicate_node(child)
1075
+ new_node << child_copy
1076
+ end
1077
+ end
1078
+
1079
+ new_node
1080
+ when :text
1081
+ ::LibXML::XML::Node.new_text(native_node.content)
1082
+ when :cdata
1083
+ ::LibXML::XML::Node.new_cdata(native_node.content)
1084
+ when :comment
1085
+ ::LibXML::XML::Node.new_comment(native_node.content)
1086
+ when :processing_instruction
1087
+ ::LibXML::XML::Node.new_pi(native_node.name, native_node.content)
1088
+ else
1089
+ # For other types, try dup as fallback
1090
+ native_node.dup
1091
+ end
1092
+ end
1093
+
1094
+ def patch_node(node, _parent = nil)
1095
+ # Wrap native LibXML nodes in our wrapper classes
1096
+ return node if node.nil?
1097
+ return node if node.is_a?(CustomizedLibxml::Node)
1098
+
1099
+ case node_type(node)
1100
+ when :element
1101
+ CustomizedLibxml::Element.new(node)
1102
+ when :text
1103
+ CustomizedLibxml::Text.new(node)
1104
+ when :cdata
1105
+ CustomizedLibxml::Cdata.new(node)
1106
+ when :comment
1107
+ CustomizedLibxml::Comment.new(node)
1108
+ when :processing_instruction
1109
+ CustomizedLibxml::ProcessingInstruction.new(node)
1110
+ else
1111
+ node
1112
+ end
1113
+ end
1114
+
1115
+ def unpatch_node(node)
1116
+ # Unwrap to get native LibXML node
1117
+ node.respond_to?(:native) ? node.native : node
1118
+ end
1119
+
1120
+ def prepare_for_new_document(node, target_doc)
1121
+ return node unless node && target_doc
1122
+
1123
+ # For LibXML, we need to duplicate ALL nodes to avoid
1124
+ # document ownership issues. Simply importing doesn't work
1125
+ # because nodes from the parsed document still have references.
1126
+ duplicate_node(node)
1127
+ end
1128
+
1129
+ private
1130
+
1131
+ def serialize_element(elem)
1132
+ output = "<#{elem.name}"
1133
+
1134
+ # Add namespace definitions (only on this element, not ancestors)
1135
+ if elem.respond_to?(:namespaces)
1136
+ seen_ns = {}
1137
+ elem.namespaces.definitions.each do |ns|
1138
+ prefix = ns.prefix
1139
+ uri = ns.href
1140
+ next if seen_ns.key?(prefix)
1141
+
1142
+ seen_ns[prefix] = true
1143
+ output << if prefix.nil? || prefix.empty?
1144
+ " xmlns=\"#{escape_xml(uri)}\""
1145
+ else
1146
+ " xmlns:#{prefix}=\"#{escape_xml(uri)}\""
1147
+ end
1148
+ end
1149
+ end
1150
+
1151
+ # Add attributes
1152
+ if elem.attributes?
1153
+ elem.each_attr do |attr|
1154
+ next if attr.name.start_with?("xmlns")
1155
+
1156
+ # Include namespace prefix if attribute has one
1157
+ attr_name = if attr.ns&.prefix
1158
+ "#{attr.ns.prefix}:#{attr.name}"
1159
+ else
1160
+ attr.name
1161
+ end
1162
+ output << " #{attr_name}=\"#{escape_xml(attr.value)}\""
1163
+ end
1164
+ end
1165
+
1166
+ # Always use verbose format <tag></tag> for consistency with other adapters
1167
+ output << ">"
1168
+ if elem.children?
1169
+ elem.each_child do |child|
1170
+ # Skip whitespace-only text nodes
1171
+ next if child.text? && child.content.to_s.strip.empty?
1172
+
1173
+ output << serialize_node(child)
1174
+ end
1175
+ end
1176
+ output << "</#{elem.name}>"
1177
+
1178
+ output
1179
+ end
1180
+
1181
+ def serialize_node(node)
1182
+ # Check if node is a wrapper with to_xml method
1183
+ if node.respond_to?(:to_xml) &&
1184
+ (node.is_a?(CustomizedLibxml::ProcessingInstruction) ||
1185
+ node.is_a?(CustomizedLibxml::Comment) ||
1186
+ node.is_a?(CustomizedLibxml::Cdata) ||
1187
+ node.is_a?(CustomizedLibxml::Text))
1188
+ return node.to_xml
1189
+ end
1190
+
1191
+ case node.node_type
1192
+ when ::LibXML::XML::Node::ELEMENT_NODE
1193
+ serialize_element(node)
1194
+ when ::LibXML::XML::Node::TEXT_NODE
1195
+ escape_text(node.content)
1196
+ when ::LibXML::XML::Node::CDATA_SECTION_NODE
1197
+ "<![CDATA[#{node.content}]]>"
1198
+ when ::LibXML::XML::Node::COMMENT_NODE
1199
+ "<!-- #{node.content} -->"
1200
+ when ::LibXML::XML::Node::PI_NODE
1201
+ "<?#{node.name} #{node.content}?>"
1202
+ else
1203
+ node.to_s
1204
+ end
1205
+ end
1206
+
1207
+ def escape_text(text)
1208
+ text.to_s
1209
+ .gsub("&", "&amp;")
1210
+ .gsub("<", "&lt;")
1211
+ .gsub(">", "&gt;")
1212
+ end
1213
+
1214
+ def escape_xml(text)
1215
+ text.to_s
1216
+ .gsub("&", "&amp;")
1217
+ .gsub("<", "&lt;")
1218
+ .gsub(">", "&gt;")
1219
+ .gsub("\"", "&quot;")
1220
+ end
1221
+
1222
+ def escape_attribute_value(value)
1223
+ escaped = value.to_s
1224
+ .gsub("&", "&amp;")
1225
+ .gsub("<", "&lt;")
1226
+ .gsub(">", "&gt;")
1227
+ .gsub("\"", "&quot;")
1228
+ escaped.to_s
1229
+ end
1230
+
1231
+ def import_and_add(doc, element, child)
1232
+ return unless element && child
1233
+
1234
+ # Always catch the cross-document error and import when needed
1235
+ begin
1236
+ element << child
1237
+ rescue ::LibXML::XML::Error => e
1238
+ # If we get a "different documents" error, we need to import or copy
1239
+ raise unless e.message.include?("different documents")
1240
+
1241
+ # Get the target document - either from parameter or element
1242
+ target_doc = doc || (element.respond_to?(:doc) ? element.doc : nil)
1243
+
1244
+ if target_doc
1245
+ # Use deep import to ensure all descendants are included
1246
+ imported = target_doc.import(child, true)
1247
+ element << imported
1248
+ else
1249
+ # No target document - create a deep copy of the node instead
1250
+ # This handles the case where the element isn't attached to a document yet
1251
+ copied = duplicate_node(child)
1252
+ element << copied
1253
+ end
1254
+
1255
+ # Re-raise other errors
1256
+ end
1257
+ end
1258
+
1259
+ def encoding_to_string(encoding)
1260
+ return nil unless encoding
1261
+ return encoding if encoding.is_a?(String)
1262
+
1263
+ case encoding
1264
+ when ::LibXML::XML::Encoding::UTF_8
1265
+ "UTF-8"
1266
+ when ::LibXML::XML::Encoding::ISO_8859_1
1267
+ "ISO-8859-1"
1268
+ when ::LibXML::XML::Encoding::UTF_16LE
1269
+ "UTF-16LE"
1270
+ when ::LibXML::XML::Encoding::UTF_16BE
1271
+ "UTF-16BE"
1272
+ when ::LibXML::XML::Encoding::UCS_2
1273
+ "UCS-2"
1274
+ else
1275
+ "UTF-8"
1276
+ end
1277
+ end
1278
+
1279
+ def string_to_encoding(str)
1280
+ return nil unless str
1281
+
1282
+ case str.upcase.tr("-", "_")
1283
+ when "UTF_8", "UTF8"
1284
+ ::LibXML::XML::Encoding::UTF_8
1285
+ when "ISO_8859_1", "ISO88591"
1286
+ ::LibXML::XML::Encoding::ISO_8859_1
1287
+ when "UTF_16LE", "UTF16LE"
1288
+ ::LibXML::XML::Encoding::UTF_16LE
1289
+ when "UTF_16BE", "UTF16BE"
1290
+ ::LibXML::XML::Encoding::UTF_16BE
1291
+ else
1292
+ ::LibXML::XML::Encoding::UTF_8
1293
+ end
1294
+ end
1295
+
1296
+ def serialize_element_with_namespaces(elem, include_ns = true)
1297
+ output = "<#{elem.name}"
1298
+
1299
+ # Include namespace definitions:
1300
+ # - On root element (include_ns = true), output ALL namespace definitions
1301
+ # - On child elements, output namespace definitions that override parent namespaces
1302
+ if elem.respond_to?(:namespaces) && elem.namespaces.respond_to?(:definitions)
1303
+ # Get parent's namespace definitions to detect overrides
1304
+ parent_ns_defs = if !include_ns && elem.respond_to?(:parent) && elem.parent
1305
+ parent_namespaces = {}
1306
+ if elem.parent.respond_to?(:namespaces)
1307
+ elem.parent.namespaces.each do |ns|
1308
+ parent_namespaces[ns.prefix] = ns.href
1309
+ end
1310
+ end
1311
+ parent_namespaces
1312
+ else
1313
+ {}
1314
+ end
1315
+
1316
+ seen_ns = {}
1317
+ elem.namespaces.definitions.each do |ns|
1318
+ prefix = ns.prefix
1319
+ uri = ns.href
1320
+ next if seen_ns.key?(prefix)
1321
+
1322
+ # Output namespace if:
1323
+ # 1. This is root element (include_ns = true), OR
1324
+ # 2. This namespace overrides a parent namespace (different URI for same prefix)
1325
+ should_output = include_ns ||
1326
+ (parent_ns_defs.key?(prefix) && parent_ns_defs[prefix] != uri)
1327
+
1328
+ next unless should_output
1329
+
1330
+ seen_ns[prefix] = true
1331
+ output << if prefix.nil? || prefix.empty?
1332
+ " xmlns=\"#{escape_xml(uri)}\""
1333
+ else
1334
+ " xmlns:#{prefix}=\"#{escape_xml(uri)}\""
1335
+ end
1336
+ end
1337
+ end
1338
+
1339
+ # Add attributes
1340
+ if elem.attributes?
1341
+ elem.each_attr do |attr|
1342
+ next if attr.name.start_with?("xmlns")
1343
+
1344
+ # Include namespace prefix if attribute has one
1345
+ attr_name = if attr.ns&.prefix
1346
+ "#{attr.ns.prefix}:#{attr.name}"
1347
+ else
1348
+ attr.name
1349
+ end
1350
+ output << " #{attr_name}=\"#{escape_xml(attr.value)}\""
1351
+ end
1352
+ end
1353
+
1354
+ # Always use verbose format <tag></tag> for consistency with other adapters
1355
+ output << ">"
1356
+ if elem.children?
1357
+ elem.each_child do |child|
1358
+ # Skip whitespace-only text nodes
1359
+ next if child.text? && child.content.to_s.strip.empty?
1360
+
1361
+ # Wrap the child and serialize
1362
+ wrapped_child = patch_node(child)
1363
+ output << if wrapped_child.respond_to?(:to_xml) &&
1364
+ !wrapped_child.is_a?(::LibXML::XML::Node)
1365
+ # Use wrapper's to_xml for proper serialization
1366
+ wrapped_child.to_xml
1367
+ elsif child.element?
1368
+ # Recursively serialize child elements
1369
+ serialize_element_with_namespaces(child, false)
1370
+ else
1371
+ serialize_node(child)
1372
+ end
1373
+ end
1374
+ end
1375
+ output << "</#{elem.name}>"
1376
+
1377
+ output
1378
+ end
1379
+
1380
+ def remove_indentation(xml_string)
1381
+ # Remove all newlines and extra spaces between tags
1382
+ xml_string.gsub(/>\s+</, "><").gsub(/\n\s*/, "")
1383
+ end
1384
+
1385
+ def collect_namespace_definitions(node)
1386
+ ns_defs = {}
1387
+
1388
+ # Start from root to scan entire document
1389
+ root = if node.is_a?(::LibXML::XML::Document)
1390
+ node.root
1391
+ else
1392
+ # Walk up to root first
1393
+ current = node
1394
+ current = current.parent while current.respond_to?(:parent) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
1395
+ current
1396
+ end
1397
+
1398
+ return ns_defs unless root
1399
+
1400
+ # Recursively collect ALL namespace definitions from entire tree
1401
+ collect_ns_from_subtree(root, ns_defs)
1402
+
1403
+ ns_defs
1404
+ end
1405
+
1406
+ def collect_ns_from_subtree(node, ns_defs)
1407
+ # Collect namespaces defined on this node
1408
+ if node.respond_to?(:namespaces)
1409
+ node.namespaces.each do |ns|
1410
+ prefix = ns.prefix
1411
+ uri = ns.href
1412
+
1413
+ # For default namespace (nil/empty prefix), register as "xmlns"
1414
+ if prefix.nil? || prefix.empty?
1415
+ # Only register if we haven't seen a default namespace yet
1416
+ ns_defs["xmlns"] = uri unless ns_defs.key?("xmlns")
1417
+ else
1418
+ # Only register if we haven't seen this prefix yet
1419
+ ns_defs[prefix] = uri unless ns_defs.key?(prefix)
1420
+ end
1421
+ end
1422
+ end
1423
+
1424
+ # Also check if this element has an active namespace (inherited or own)
1425
+ # This catches cases where elements inherit namespaces from parents
1426
+ if node.respond_to?(:namespaces) && node.namespaces.respond_to?(:namespace)
1427
+ active_ns = node.namespaces.namespace
1428
+ if active_ns
1429
+ prefix = active_ns.prefix
1430
+ uri = active_ns.href
1431
+
1432
+ # Register the active namespace if not already registered
1433
+ if prefix.nil? || prefix.empty?
1434
+ ns_defs["xmlns"] = uri unless ns_defs.key?("xmlns")
1435
+ else
1436
+ ns_defs[prefix] = uri unless ns_defs.key?(prefix)
1437
+ end
1438
+ end
1439
+ end
1440
+
1441
+ # Recursively collect from children
1442
+ return unless node.respond_to?(:children?) && node.children?
1443
+
1444
+ node.each_child do |child|
1445
+ collect_ns_from_subtree(child, ns_defs) if child.element?
1446
+ end
1447
+ end
1448
+
1449
+ def build_xpath_namespaces(node, user_namespaces)
1450
+ # Start with collected namespace definitions
1451
+ ns_context = collect_namespace_definitions(node)
1452
+
1453
+ # Merge user-provided namespaces (they override collected ones)
1454
+ if user_namespaces && !user_namespaces.empty?
1455
+ ns_context = ns_context.merge(user_namespaces)
1456
+ end
1457
+
1458
+ ns_context
1459
+ end
1460
+
1461
+ def find_namespace_by_prefix(element, prefix)
1462
+ # Search element and ancestors for namespace with given prefix
1463
+ current = element
1464
+ while current
1465
+ if current.respond_to?(:namespaces)
1466
+ current.namespaces.each do |ns|
1467
+ return ns if ns.prefix == prefix
1468
+ end
1469
+ end
1470
+ current = current.respond_to?(:parent) ? current.parent : nil
1471
+ end
1472
+ nil
1473
+ end
1474
+ end
1475
+
1476
+ # Bridge between LibXML SAX and Moxml SAX
1477
+ #
1478
+ # Translates LibXML::XML::SaxParser events to Moxml::SAX::Handler events
1479
+ #
1480
+ # @private
1481
+ class LibXMLSAXBridge
1482
+ include ::LibXML::XML::SaxParser::Callbacks
1483
+
1484
+ def initialize(handler)
1485
+ @handler = handler
1486
+ end
1487
+
1488
+ # Map LibXML events to Moxml events
1489
+
1490
+ def on_start_document
1491
+ @handler.on_start_document
1492
+ end
1493
+
1494
+ def on_end_document
1495
+ @handler.on_end_document
1496
+ end
1497
+
1498
+ def on_start_element(name, attributes)
1499
+ # Convert LibXML attributes hash to separate attrs and namespaces
1500
+ attr_hash = {}
1501
+ ns_hash = {}
1502
+
1503
+ attributes&.each do |attr_name, attr_value|
1504
+ if attr_name.to_s.start_with?("xmlns")
1505
+ # Namespace declaration
1506
+ prefix = if attr_name.to_s == "xmlns"
1507
+ nil
1508
+ else
1509
+ attr_name.to_s.sub(
1510
+ "xmlns:", ""
1511
+ )
1512
+ end
1513
+ ns_hash[prefix] = attr_value
1514
+ else
1515
+ attr_hash[attr_name.to_s] = attr_value
1516
+ end
1517
+ end
1518
+
1519
+ @handler.on_start_element(name.to_s, attr_hash, ns_hash)
1520
+ end
1521
+
1522
+ def on_end_element(name)
1523
+ @handler.on_end_element(name.to_s)
1524
+ end
1525
+
1526
+ def on_characters(chars)
1527
+ @handler.on_characters(chars)
1528
+ end
1529
+
1530
+ def on_cdata_block(content)
1531
+ @handler.on_cdata(content)
1532
+ end
1533
+
1534
+ def on_comment(msg)
1535
+ @handler.on_comment(msg)
1536
+ end
1537
+
1538
+ def on_processing_instruction(target, data)
1539
+ @handler.on_processing_instruction(target, data || "")
1540
+ end
1541
+
1542
+ def on_error(msg)
1543
+ @handler.on_error(Moxml::ParseError.new(msg))
1544
+ end
1545
+ end
1546
+ end
1547
+ end
1548
+ end