moxml 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +224 -43
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +51 -0
  21. data/docs/_guides/modifying-xml.adoc +292 -0
  22. data/docs/_guides/parsing-xml.adoc +230 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_guides/xml-declaration.adoc +450 -0
  26. data/docs/_pages/adapter-compatibility.adoc +369 -0
  27. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  28. data/docs/_pages/adapters/index.adoc +97 -0
  29. data/docs/_pages/adapters/libxml.adoc +285 -0
  30. data/docs/_pages/adapters/nokogiri.adoc +251 -0
  31. data/docs/_pages/adapters/oga.adoc +291 -0
  32. data/docs/_pages/adapters/ox.adoc +56 -0
  33. data/docs/_pages/adapters/rexml.adoc +292 -0
  34. data/docs/_pages/best-practices.adoc +429 -0
  35. data/docs/_pages/compatibility.adoc +467 -0
  36. data/docs/_pages/configuration.adoc +250 -0
  37. data/docs/_pages/error-handling.adoc +349 -0
  38. data/docs/_pages/headed-ox-limitations.adoc +574 -0
  39. data/docs/_pages/headed-ox.adoc +1025 -0
  40. data/docs/_pages/index.adoc +35 -0
  41. data/docs/_pages/installation.adoc +140 -0
  42. data/docs/_pages/node-api-reference.adoc +49 -0
  43. data/docs/_pages/performance.adoc +35 -0
  44. data/docs/_pages/quick-start.adoc +243 -0
  45. data/docs/_pages/thread-safety.adoc +28 -0
  46. data/docs/_references/document-api.adoc +407 -0
  47. data/docs/_references/index.adoc +48 -0
  48. data/docs/_tutorials/basic-usage.adoc +267 -0
  49. data/docs/_tutorials/builder-pattern.adoc +342 -0
  50. data/docs/_tutorials/index.adoc +33 -0
  51. data/docs/_tutorials/namespace-handling.adoc +324 -0
  52. data/docs/_tutorials/xpath-queries.adoc +358 -0
  53. data/docs/index.adoc +122 -0
  54. data/examples/README.md +124 -0
  55. data/examples/api_client/README.md +424 -0
  56. data/examples/api_client/api_client.rb +394 -0
  57. data/examples/api_client/example_response.xml +48 -0
  58. data/examples/headed_ox_example/README.md +90 -0
  59. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  60. data/examples/rss_parser/README.md +194 -0
  61. data/examples/rss_parser/example_feed.xml +93 -0
  62. data/examples/rss_parser/rss_parser.rb +189 -0
  63. data/examples/sax_parsing/README.md +50 -0
  64. data/examples/sax_parsing/data_extractor.rb +75 -0
  65. data/examples/sax_parsing/example.xml +21 -0
  66. data/examples/sax_parsing/large_file.rb +78 -0
  67. data/examples/sax_parsing/simple_parser.rb +55 -0
  68. data/examples/web_scraper/README.md +352 -0
  69. data/examples/web_scraper/example_page.html +201 -0
  70. data/examples/web_scraper/web_scraper.rb +312 -0
  71. data/lib/moxml/adapter/base.rb +107 -28
  72. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  73. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  74. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  75. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  76. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  77. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  78. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  79. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  80. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  81. data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
  82. data/lib/moxml/adapter/headed_ox.rb +161 -0
  83. data/lib/moxml/adapter/libxml.rb +1564 -0
  84. data/lib/moxml/adapter/nokogiri.rb +156 -9
  85. data/lib/moxml/adapter/oga.rb +190 -15
  86. data/lib/moxml/adapter/ox.rb +322 -28
  87. data/lib/moxml/adapter/rexml.rb +157 -28
  88. data/lib/moxml/adapter.rb +21 -4
  89. data/lib/moxml/attribute.rb +6 -0
  90. data/lib/moxml/builder.rb +40 -4
  91. data/lib/moxml/config.rb +8 -3
  92. data/lib/moxml/context.rb +57 -2
  93. data/lib/moxml/declaration.rb +9 -0
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +53 -6
  96. data/lib/moxml/document_builder.rb +34 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +155 -4
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1770 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_preservation_spec.rb +217 -0
  164. data/spec/moxml/declaration_spec.rb +36 -0
  165. data/spec/moxml/doctype_spec.rb +33 -0
  166. data/spec/moxml/document_builder_spec.rb +30 -0
  167. data/spec/moxml/document_spec.rb +105 -0
  168. data/spec/moxml/element_spec.rb +143 -0
  169. data/spec/moxml/error_spec.rb +266 -22
  170. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  171. data/spec/moxml/namespace_spec.rb +32 -0
  172. data/spec/moxml/node_set_spec.rb +39 -0
  173. data/spec/moxml/node_spec.rb +37 -0
  174. data/spec/moxml/processing_instruction_spec.rb +34 -0
  175. data/spec/moxml/sax_spec.rb +1067 -0
  176. data/spec/moxml/text_spec.rb +31 -0
  177. data/spec/moxml/version_spec.rb +14 -0
  178. data/spec/moxml/xml_utils/.gitkeep +0 -0
  179. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  180. data/spec/moxml/xml_utils_spec.rb +49 -0
  181. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  182. data/spec/moxml/xpath/axes_spec.rb +296 -0
  183. data/spec/moxml/xpath/cache_spec.rb +358 -0
  184. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  185. data/spec/moxml/xpath/context_spec.rb +210 -0
  186. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  187. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  188. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  189. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  190. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  191. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  192. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  193. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  194. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  195. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  196. data/spec/moxml/xpath/parser_spec.rb +364 -0
  197. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  198. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  199. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  200. data/spec/moxml/xpath_spec.rb +77 -0
  201. data/spec/performance/README.md +83 -0
  202. data/spec/performance/benchmark_spec.rb +64 -0
  203. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
  204. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  205. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  206. data/spec/spec_helper.rb +58 -1
  207. data/spec/support/xml_matchers.rb +1 -1
  208. metadata +178 -34
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,1564 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+ require "libxml"
5
+ require_relative "customized_libxml/node"
6
+ require_relative "customized_libxml/element"
7
+ require_relative "customized_libxml/text"
8
+ require_relative "customized_libxml/comment"
9
+ require_relative "customized_libxml/cdata"
10
+ require_relative "customized_libxml/processing_instruction"
11
+ require_relative "customized_libxml/declaration"
12
+
13
+ module Moxml
14
+ module Adapter
15
+ class Libxml < Base
16
+ # Wrapper class to store DOCTYPE information
17
+ class DoctypeWrapper
18
+ attr_reader :native_doc
19
+ attr_accessor :name, :external_id, :system_id
20
+
21
+ def initialize(doc, name, external_id, system_id)
22
+ @native_doc = doc
23
+ @name = name
24
+ @external_id = external_id
25
+ @system_id = system_id
26
+ end
27
+
28
+ # Provide native method to match adapter pattern
29
+ def native
30
+ @native_doc
31
+ end
32
+
33
+ def to_xml
34
+ output = "<!DOCTYPE #{@name}"
35
+ if @external_id && !@external_id.empty?
36
+ output << " PUBLIC \"#{@external_id}\""
37
+ output << " \"#{@system_id}\"" if @system_id
38
+ elsif @system_id && !@system_id.empty?
39
+ output << " SYSTEM \"#{@system_id}\""
40
+ end
41
+ output << ">"
42
+ output
43
+ end
44
+ end
45
+
46
+ class << self
47
+ def set_root(doc, element)
48
+ doc.root = element
49
+ end
50
+
51
+ def parse(xml, options = {})
52
+ # LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
53
+ xml_string = if xml.is_a?(String)
54
+ xml
55
+ elsif xml.respond_to?(:read)
56
+ xml.read
57
+ else
58
+ xml.to_s
59
+ end
60
+
61
+ # Extract DOCTYPE before parsing
62
+ doctype_match = xml_string.match(/<!DOCTYPE\s+(\S+)(?:\s+PUBLIC\s+"([^"]+)"\s+"([^"]+)"| \s+SYSTEM\s+"([^"]+)")?\s*>/i)
63
+
64
+ native_doc = begin
65
+ # Handle both string and file inputs
66
+ parser = ::LibXML::XML::Parser.string(xml_string)
67
+ parser.parse
68
+ rescue ::LibXML::XML::Error => e
69
+ if options[:strict]
70
+ line = e.respond_to?(:line) ? e.line : nil
71
+ raise Moxml::ParseError.new(
72
+ e.message,
73
+ line: line,
74
+ column: nil,
75
+ source: xml_string[0..100],
76
+ )
77
+ end
78
+ # Return empty document for non-strict mode
79
+ create_document
80
+ end
81
+
82
+ # Store DOCTYPE if found
83
+ if doctype_match
84
+ name = doctype_match[1]
85
+ external_id = doctype_match[2]
86
+ system_id = doctype_match[3] || doctype_match[4]
87
+
88
+ doctype_wrapper = DoctypeWrapper.new(
89
+ native_doc,
90
+ name,
91
+ external_id,
92
+ system_id,
93
+ )
94
+ native_doc.instance_variable_set(:@moxml_doctype, doctype_wrapper)
95
+ end
96
+
97
+ DocumentBuilder.new(Context.new(:libxml)).build(native_doc)
98
+ end
99
+
100
+ # SAX parsing implementation for LibXML
101
+ #
102
+ # @param xml [String, IO] XML to parse
103
+ # @param handler [Moxml::SAX::Handler] Moxml SAX handler
104
+ # @return [void]
105
+ def sax_parse(xml, handler)
106
+ # Create bridge that translates LibXML SAX to Moxml SAX
107
+ bridge = LibXMLSAXBridge.new(handler)
108
+
109
+ # Create LibXML SAX parser
110
+ parser = ::LibXML::XML::SaxParser.string(xml.to_s)
111
+
112
+ # Set callbacks
113
+ parser.callbacks = bridge
114
+
115
+ # Parse
116
+ parser.parse
117
+ rescue ::LibXML::XML::Error => e
118
+ line = e.respond_to?(:line) ? e.line : nil
119
+ column = e.respond_to?(:column) ? e.column : nil
120
+ error = Moxml::ParseError.new(e.message, line: line, column: column)
121
+ handler.on_error(error)
122
+ end
123
+
124
+ def create_document(_native_doc = nil)
125
+ ::LibXML::XML::Document.new
126
+ end
127
+
128
+ def create_native_element(name)
129
+ ::LibXML::XML::Node.new(name.to_s)
130
+ end
131
+
132
+ def create_native_text(content)
133
+ native = ::LibXML::XML::Node.new_text(content.to_s)
134
+ CustomizedLibxml::Text.new(native)
135
+ end
136
+
137
+ def create_native_cdata(content)
138
+ native = ::LibXML::XML::Node.new_cdata(content.to_s)
139
+ CustomizedLibxml::Cdata.new(native)
140
+ end
141
+
142
+ def create_native_comment(content)
143
+ native = ::LibXML::XML::Node.new_comment(content.to_s)
144
+ CustomizedLibxml::Comment.new(native)
145
+ end
146
+
147
+ def create_native_processing_instruction(target, content)
148
+ native = ::LibXML::XML::Node.new_pi(target.to_s, content.to_s)
149
+ CustomizedLibxml::ProcessingInstruction.new(native)
150
+ end
151
+
152
+ def create_native_declaration(version, encoding, standalone)
153
+ doc = create_document
154
+ # Return a Declaration wrapper with explicit parameters
155
+ CustomizedLibxml::Declaration.new(doc, version, encoding, standalone)
156
+ end
157
+
158
+ def create_native_doctype(name, external_id, system_id)
159
+ # LibXML::XML::Dtd.new has bizarre parameter order, so we just
160
+ # store values directly in our wrapper
161
+ DoctypeWrapper.new(create_document, name.to_s, external_id&.to_s,
162
+ system_id&.to_s)
163
+ end
164
+
165
+ def node_type(node)
166
+ return :unknown unless node
167
+
168
+ # Handle wrapper classes
169
+ return :element if node.is_a?(CustomizedLibxml::Element)
170
+ return :text if node.is_a?(CustomizedLibxml::Text)
171
+ return :cdata if node.is_a?(CustomizedLibxml::Cdata)
172
+ return :comment if node.is_a?(CustomizedLibxml::Comment)
173
+ if node.is_a?(CustomizedLibxml::ProcessingInstruction)
174
+ return :processing_instruction
175
+ end
176
+ return :doctype if node.is_a?(DoctypeWrapper)
177
+
178
+ # Unwrap if needed
179
+ native_node = node.respond_to?(:native) ? node.native : node
180
+
181
+ case native_node.node_type
182
+ when ::LibXML::XML::Node::DOCUMENT_NODE
183
+ :document
184
+ when ::LibXML::XML::Node::ELEMENT_NODE
185
+ :element
186
+ when ::LibXML::XML::Node::TEXT_NODE
187
+ :text
188
+ when ::LibXML::XML::Node::CDATA_SECTION_NODE
189
+ :cdata
190
+ when ::LibXML::XML::Node::COMMENT_NODE
191
+ :comment
192
+ when ::LibXML::XML::Node::ATTRIBUTE_NODE
193
+ :attribute
194
+ when ::LibXML::XML::Node::PI_NODE
195
+ :processing_instruction
196
+ when ::LibXML::XML::Node::DTD_NODE
197
+ :doctype
198
+ else
199
+ :unknown
200
+ end
201
+ end
202
+
203
+ def node_name(node)
204
+ native_node = unpatch_node(node)
205
+ native_node&.name
206
+ end
207
+
208
+ def set_node_name(node, name)
209
+ native_node = unpatch_node(node)
210
+ native_node.name = name.to_s if native_node
211
+ end
212
+
213
+ def declaration_attribute(node, name)
214
+ return nil unless node
215
+
216
+ # Handle Declaration wrapper
217
+ if node.is_a?(CustomizedLibxml::Declaration)
218
+ case name
219
+ when "version"
220
+ node.version
221
+ when "encoding"
222
+ node.encoding
223
+ when "standalone"
224
+ node.standalone # Returns "yes", "no", or nil
225
+ end
226
+ else
227
+ # Fallback for native documents
228
+ case name
229
+ when "version"
230
+ node.version
231
+ when "encoding"
232
+ enc = node.encoding
233
+ enc ? encoding_to_string(enc) : nil
234
+ when "standalone"
235
+ node.standalone? ? "yes" : nil
236
+ end
237
+ end
238
+ end
239
+
240
+ def set_declaration_attribute(node, name, value)
241
+ return unless node
242
+
243
+ # Handle Declaration wrapper
244
+ return unless node.is_a?(CustomizedLibxml::Declaration)
245
+
246
+ case name
247
+ when "version"
248
+ node.version = value
249
+ when "encoding"
250
+ node.encoding = value
251
+ when "standalone"
252
+ # Pass the value directly - Declaration handles the conversion
253
+ node.standalone = value
254
+ end
255
+
256
+ # Native documents are read-only, do nothing for them
257
+ end
258
+
259
+ def children(node)
260
+ native_node = unpatch_node(node)
261
+ return [] unless native_node
262
+
263
+ # Handle Document specially - it doesn't have children? method
264
+ if native_node.is_a?(::LibXML::XML::Document)
265
+ result = []
266
+
267
+ # Include DOCTYPE if present
268
+ # First check if we stored it as instance variable (from parse)
269
+ if native_node.instance_variable_defined?(:@moxml_doctype)
270
+ doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
271
+ result << doctype_wrapper if doctype_wrapper
272
+ elsif native_node.respond_to?(:dtd) && native_node.dtd
273
+ # Otherwise check dtd property directly
274
+ dtd = native_node.dtd
275
+ # Wrap in DoctypeWrapper for consistency
276
+ doctype_wrapper = DoctypeWrapper.new(
277
+ native_node,
278
+ dtd.name,
279
+ dtd.external_id,
280
+ dtd.system_id,
281
+ )
282
+ result << doctype_wrapper
283
+ end
284
+
285
+ return result unless native_node.root
286
+
287
+ result << patch_node(native_node.root)
288
+ return result
289
+ end
290
+
291
+ return [] unless native_node.children?
292
+
293
+ result = []
294
+ native_node.each_child do |child|
295
+ # Skip whitespace-only text nodes
296
+ next if child.text? && child.content.to_s.strip.empty?
297
+
298
+ result << patch_node(child)
299
+ end
300
+ result
301
+ end
302
+
303
+ def parent(node)
304
+ native_node = unpatch_node(node)
305
+ parent_node = native_node&.parent
306
+ parent_node ? patch_node(parent_node) : nil
307
+ end
308
+
309
+ def next_sibling(node)
310
+ native_node = unpatch_node(node)
311
+ current = native_node&.next
312
+ while current
313
+ # Skip whitespace-only text nodes
314
+ break unless current.text? && current.content.to_s.strip.empty?
315
+
316
+ current = current.next
317
+ end
318
+ current ? patch_node(current) : nil
319
+ end
320
+
321
+ def previous_sibling(node)
322
+ native_node = unpatch_node(node)
323
+ current = native_node&.prev
324
+ while current
325
+ # Skip whitespace-only text nodes
326
+ break unless current.text? && current.content.to_s.strip.empty?
327
+
328
+ current = current.prev
329
+ end
330
+ current ? patch_node(current) : nil
331
+ end
332
+
333
+ def document(node)
334
+ native_node = unpatch_node(node)
335
+ return nil unless native_node
336
+
337
+ # Handle documents themselves
338
+ return native_node if native_node.is_a?(::LibXML::XML::Document)
339
+
340
+ # For other nodes, return their document
341
+ native_node.doc
342
+ end
343
+
344
+ def root(document)
345
+ native_doc = unpatch_node(document)
346
+ native_doc&.root
347
+ end
348
+
349
+ def attributes(element)
350
+ native_elem = unpatch_node(element)
351
+ return [] unless native_elem
352
+ unless native_elem.respond_to?(:element?) && native_elem.element?
353
+ return []
354
+ end
355
+ return [] unless native_elem.attributes?
356
+
357
+ attrs = []
358
+ native_elem.each_attr do |attr|
359
+ attrs << attr unless attr.name.to_s.start_with?("xmlns")
360
+ end
361
+ attrs
362
+ end
363
+
364
+ def attribute_element(attr)
365
+ attr&.parent
366
+ end
367
+
368
+ def attribute_namespace(attr)
369
+ return nil unless attr
370
+ return nil unless attr.respond_to?(:ns)
371
+
372
+ attr.ns
373
+ end
374
+
375
+ def set_attribute(element, name, value)
376
+ native_elem = unpatch_node(element)
377
+ return unless native_elem
378
+
379
+ name_str = name.to_s
380
+ value_str = value.to_s
381
+
382
+ # Check if attribute name contains namespace prefix
383
+ if name_str.include?(":")
384
+ prefix, local_name = name_str.split(":", 2)
385
+
386
+ # Find the namespace with the given prefix
387
+ ns = find_namespace_by_prefix(native_elem, prefix)
388
+
389
+ if ns
390
+ # LibXML::XML::Attr.new accepts namespace as third parameter
391
+ # First remove existing attribute if present
392
+ existing = native_elem.attributes.get_attribute(name_str)
393
+ existing&.remove!
394
+
395
+ # Create new attribute with namespace
396
+ # Attr.new(node, name, value, ns)
397
+ ::LibXML::XML::Attr.new(native_elem, local_name, value_str, ns)
398
+
399
+ # Return the created attribute
400
+
401
+ else
402
+ # Namespace not found, set as regular attribute
403
+ native_elem[name_str] = value_str
404
+ native_elem.attributes.get_attribute(name_str)
405
+ end
406
+ else
407
+ # Regular attribute without namespace
408
+ native_elem[name_str] = value_str
409
+ native_elem.attributes.get_attribute(name_str)
410
+ end
411
+ end
412
+
413
+ def get_attribute(element, name)
414
+ native_elem = unpatch_node(element)
415
+ return nil unless native_elem
416
+ return nil unless native_elem.attributes?
417
+
418
+ attr = native_elem.attributes.get_attribute(name.to_s)
419
+ return nil unless attr
420
+
421
+ # Extend the attribute with to_xml method for proper escaping
422
+ attr.define_singleton_method(:to_xml) do
423
+ escaped = value.to_s
424
+ .gsub("&", "&amp;")
425
+ .gsub("<", "&lt;")
426
+ .gsub(">", "&gt;")
427
+ .gsub("\"", "&quot;")
428
+ "#{name} = #{escaped}"
429
+ end
430
+ attr
431
+ end
432
+
433
+ def get_attribute_value(element, name)
434
+ native_elem = unpatch_node(element)
435
+ return nil unless native_elem
436
+
437
+ # Try to get the attribute with the given name (handles namespaced attrs)
438
+ value = native_elem[name.to_s]
439
+ return value if value
440
+
441
+ # If name contains ':', try to get as namespaced attribute
442
+ if name.to_s.include?(":")
443
+ prefix, local_name = name.to_s.split(":", 2)
444
+ # Try to find attribute by namespace
445
+ if native_elem.attributes?
446
+ native_elem.each_attr do |attr|
447
+ if attr.name == local_name || attr.name == name.to_s
448
+ # Check if attribute's namespace matches the prefix
449
+ if attr.ns && attr.ns.prefix == prefix
450
+ return attr.value
451
+ elsif attr.name == name.to_s
452
+ # Fallback: attribute name includes the prefix
453
+ return attr.value
454
+ end
455
+ end
456
+ end
457
+ end
458
+ end
459
+
460
+ nil
461
+ end
462
+
463
+ def remove_attribute(element, name)
464
+ native_elem = unpatch_node(element)
465
+ return unless native_elem
466
+ return unless native_elem.attributes?
467
+
468
+ attr = native_elem.attributes.get_attribute(name.to_s)
469
+ attr&.remove!
470
+ end
471
+
472
+ def set_attribute_name(attribute, new_name)
473
+ # LibXML attributes cannot be renamed directly
474
+ # We must work at the element level
475
+ return unless attribute
476
+
477
+ # Get values FIRST before any removal
478
+ old_name = attribute.name
479
+ value = attribute.value
480
+ element = attribute.parent
481
+ return unless element
482
+
483
+ # Remove old attribute via element
484
+ element.attributes.get_attribute(old_name)&.remove!
485
+
486
+ # Add new attribute with same value
487
+ element[new_name.to_s] = value
488
+
489
+ # Return the new attribute
490
+ element.attributes.get_attribute(new_name.to_s)
491
+ end
492
+
493
+ def add_child(element, child)
494
+ return unless element && child
495
+
496
+ # Unwrap both element and child
497
+ native_elem = unpatch_node(element)
498
+ native_child = unpatch_node(child)
499
+
500
+ # For LibXML: if parent has a DEFAULT namespace (nil/empty prefix) and child is an element without a namespace,
501
+ # explicitly set the child's namespace to match the parent's for XPath compatibility
502
+ # NOTE: Prefixed namespaces are NOT inherited, only default namespaces
503
+ if native_elem.respond_to?(:namespaces) && native_elem.namespaces&.namespace &&
504
+ native_child.respond_to?(:namespaces) && native_child.element? &&
505
+ (!native_child.namespaces.namespace || native_child.namespaces.namespace.href.to_s.empty?)
506
+
507
+ parent_ns = native_elem.namespaces.namespace
508
+ # Only set child's namespace if parent's namespace is DEFAULT (nil or empty prefix)
509
+ if parent_ns.prefix.nil? || parent_ns.prefix.to_s.empty?
510
+ native_child.namespaces.namespace = parent_ns
511
+ end
512
+ end
513
+
514
+ if native_elem.is_a?(::LibXML::XML::Document)
515
+ # For Declaration wrappers, store them for serialization
516
+ if child.is_a?(CustomizedLibxml::Declaration)
517
+ native_elem.instance_variable_set(:@moxml_declaration, child)
518
+ # Also store reference to parent document in the declaration
519
+ child.instance_variable_set(:@parent_doc, native_elem)
520
+ return
521
+ end
522
+
523
+ # For DOCTYPE wrappers, store them for serialization
524
+ if child.is_a?(DoctypeWrapper)
525
+ native_elem.instance_variable_set(:@moxml_doctype, child)
526
+ return
527
+ end
528
+
529
+ # For document-level PIs, store them for serialization
530
+ if child.is_a?(CustomizedLibxml::ProcessingInstruction)
531
+ pis = native_elem.instance_variable_get(:@moxml_pis) || []
532
+ pis << child
533
+ native_elem.instance_variable_set(:@moxml_pis, pis)
534
+ return
535
+ end
536
+
537
+ # For text nodes added to document, store them for serialization
538
+ # Documents can't have text children in LibXML
539
+ if child.is_a?(CustomizedLibxml::Text)
540
+ texts = native_elem.instance_variable_get(:@moxml_texts) || []
541
+ texts << child
542
+ native_elem.instance_variable_set(:@moxml_texts, texts)
543
+ return
544
+ end
545
+
546
+ # For documents, check if adding the first root element
547
+ if native_elem.root.nil? && node_type(native_child) == :element
548
+ # Set as root element
549
+ native_elem.root = native_child
550
+ elsif native_elem.root
551
+ # Document has root, add to it instead
552
+ import_and_add(native_elem.doc, native_elem.root, native_child)
553
+ end
554
+ else
555
+ import_and_add(native_elem.doc, native_elem, native_child)
556
+ end
557
+ end
558
+
559
+ def add_previous_sibling(node, sibling)
560
+ return unless node && sibling
561
+
562
+ native_node = unpatch_node(node)
563
+ native_sibling = unpatch_node(sibling)
564
+
565
+ # Special handling for document-level processing instructions
566
+ # When adding a PI as sibling to root element, store it on document
567
+ if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
568
+ native_node.respond_to?(:doc) && native_node.doc
569
+ doc = native_node.doc
570
+ pis = doc.instance_variable_get(:@moxml_pis) || []
571
+ pis << sibling
572
+ doc.instance_variable_set(:@moxml_pis, pis)
573
+ return
574
+ end
575
+
576
+ native_node.prev = native_sibling
577
+ end
578
+
579
+ def add_next_sibling(node, sibling)
580
+ return unless node && sibling
581
+
582
+ native_node = unpatch_node(node)
583
+ native_sibling = unpatch_node(sibling)
584
+ native_node.next = native_sibling
585
+ end
586
+
587
+ def remove(node)
588
+ # Handle Declaration wrapper - mark as removed on document
589
+ if node.is_a?(CustomizedLibxml::Declaration)
590
+ # The Declaration wrapper is stored on the actual document
591
+ # We need to find which document it's stored on and mark it as removed
592
+ # This is a bit tricky since the Declaration's native is its own internal doc
593
+ # We rely on the fact that when a declaration is added to a document,
594
+ # the document stores a reference to it in @moxml_declaration
595
+ # So we need to clear that reference and mark it as removed
596
+
597
+ # Since we can't easily find the parent document from the Declaration,
598
+ # we'll set a flag on the Declaration itself
599
+ node.instance_variable_set(:@removed, true)
600
+ return
601
+ end
602
+
603
+ native_node = unpatch_node(node)
604
+ native_node&.remove!
605
+ end
606
+
607
+ def replace(node, new_node)
608
+ native_node = unpatch_node(node)
609
+ native_new = unpatch_node(new_node)
610
+ parent = native_node&.parent
611
+ return unless parent && native_new
612
+
613
+ # Special handling for text nodes - LibXML's sibling manipulation
614
+ # doesn't work reliably for text nodes. Instead, use parent.content
615
+ # for text-to-text replacement
616
+ if native_node.text? && native_new.text?
617
+ parent.content = native_new.content
618
+ return
619
+ end
620
+
621
+ # Save the prev/next siblings before removing
622
+ prev_sibling = native_node.prev
623
+ next_sibling = native_node.next
624
+
625
+ # Import if needed for cross-document operations
626
+ parent_doc = parent.respond_to?(:doc) ? parent.doc : nil
627
+
628
+ # Use import_and_add to properly handle document adoption
629
+ import_and_add(parent_doc, parent, native_new)
630
+
631
+ # Now adjust the position - move new node to where old node was
632
+ if prev_sibling
633
+ # Insert after the previous sibling
634
+ prev_sibling.next = native_new
635
+ end
636
+ if next_sibling
637
+ # Insert before the next sibling
638
+ next_sibling.prev = native_new
639
+ end
640
+
641
+ # Finally remove the old node
642
+ native_node.remove!
643
+ end
644
+
645
+ def replace_children(element, children)
646
+ native_elem = unpatch_node(element)
647
+ return unless native_elem
648
+
649
+ # Remove all existing children first
650
+ native_elem.each_child(&:remove!)
651
+
652
+ # Get the element's document for importing
653
+ doc = native_elem.respond_to?(:doc) ? native_elem.doc : nil
654
+
655
+ children.each do |c|
656
+ native_c = unpatch_node(c)
657
+
658
+ # Use import_and_add helper which handles all the edge cases
659
+ import_and_add(doc, native_elem, native_c)
660
+ end
661
+ end
662
+
663
+ def text_content(node)
664
+ native_node = unpatch_node(node)
665
+ return nil unless native_node
666
+
667
+ native_node.content
668
+ end
669
+
670
+ def inner_text(node)
671
+ native_node = unpatch_node(node)
672
+ return "" unless native_node
673
+ return "" unless native_node.children?
674
+
675
+ result = []
676
+ native_node.each_child do |child|
677
+ result << child.content if child.text?
678
+ end
679
+ result.join
680
+ end
681
+
682
+ def set_text_content(node, content)
683
+ native_node = unpatch_node(node)
684
+ native_node.content = content.to_s if native_node
685
+ end
686
+
687
+ def cdata_content(node)
688
+ native_node = unpatch_node(node)
689
+ content = native_node&.content
690
+ # LibXML may HTML-escape CDATA content, un-escape it
691
+ return nil unless content
692
+
693
+ content.gsub("&quot;", '"')
694
+ .gsub("&apos;", "'")
695
+ .gsub("&lt;", "<")
696
+ .gsub("&gt;", ">")
697
+ .gsub("&amp;", "&")
698
+ end
699
+
700
+ def set_cdata_content(node, content)
701
+ native_node = unpatch_node(node)
702
+ # CDATA content should NOT be escaped
703
+ native_node.content = content.to_s if native_node
704
+ end
705
+
706
+ def comment_content(node)
707
+ native_node = unpatch_node(node)
708
+ native_node&.content
709
+ end
710
+
711
+ def set_comment_content(node, content)
712
+ native_node = unpatch_node(node)
713
+ native_node.content = content.to_s if native_node
714
+ end
715
+
716
+ def processing_instruction_target(node)
717
+ native_node = unpatch_node(node)
718
+ native_node&.name
719
+ end
720
+
721
+ def processing_instruction_content(node)
722
+ native_node = unpatch_node(node)
723
+ content = native_node&.content
724
+ # LibXML may HTML-escape the content, un-escape it
725
+ return nil unless content
726
+
727
+ content.gsub("&quot;", '"')
728
+ .gsub("&apos;", "'")
729
+ .gsub("&lt;", "<")
730
+ .gsub("&gt;", ">")
731
+ .gsub("&amp;", "&")
732
+ end
733
+
734
+ def set_processing_instruction_content(node, content)
735
+ native_node = unpatch_node(node)
736
+ # Store raw content - LibXML will escape it
737
+ native_node.content = content.to_s if native_node
738
+ end
739
+
740
+ def create_native_namespace(element, prefix, uri)
741
+ native_elem = unpatch_node(element)
742
+ return nil unless native_elem
743
+
744
+ ns = ::LibXML::XML::Namespace.new(
745
+ native_elem,
746
+ prefix.to_s.empty? ? nil : prefix.to_s,
747
+ uri.to_s,
748
+ )
749
+
750
+ # For default namespace (nil/empty prefix), set it as the element's namespace
751
+ native_elem.namespaces.namespace = ns if prefix.to_s.empty?
752
+
753
+ ns
754
+ end
755
+
756
+ def set_namespace(element, ns)
757
+ native_elem = unpatch_node(element)
758
+ return unless native_elem && ns
759
+
760
+ native_elem.namespaces.namespace = ns
761
+ end
762
+
763
+ def namespace(element)
764
+ native_elem = unpatch_node(element)
765
+ return nil unless native_elem
766
+
767
+ # Return ONLY the element's own namespace
768
+ # Do NOT inherit parent namespaces (prefixed namespaces are NOT inherited)
769
+ # Only default namespaces are inherited during element creation by LibXML
770
+ native_elem.namespaces&.namespace
771
+ end
772
+
773
+ def namespace_prefix(namespace)
774
+ namespace&.prefix
775
+ end
776
+
777
+ def namespace_uri(namespace)
778
+ namespace&.href
779
+ end
780
+
781
+ def namespace_definitions(node)
782
+ native_node = unpatch_node(node)
783
+ return [] unless native_node
784
+ return [] unless native_node.respond_to?(:namespaces)
785
+
786
+ native_node.namespaces.map do |ns|
787
+ ns
788
+ end
789
+ end
790
+
791
+ def xpath(node, expression, namespaces = nil)
792
+ native_node = unpatch_node(node)
793
+ return [] unless native_node
794
+
795
+ # Build namespace context for LibXML
796
+ # LibXML requires ALL prefixes in the XPath to be registered
797
+ ns_context = build_xpath_namespaces(native_node, namespaces)
798
+
799
+ results = if ns_context.empty?
800
+ native_node.find(expression).to_a
801
+ else
802
+ native_node.find(expression, ns_context).to_a
803
+ end
804
+
805
+ # Wrap results
806
+ results.map { |n| patch_node(n) }
807
+ rescue ::LibXML::XML::Error => e
808
+ raise Moxml::XPathError.new(
809
+ e.message,
810
+ expression: expression,
811
+ adapter: "LibXML",
812
+ node: node,
813
+ )
814
+ end
815
+
816
+ def at_xpath(node, expression, namespaces = nil)
817
+ results = xpath(node, expression, namespaces)
818
+ results&.first
819
+ end
820
+
821
+ def serialize(node, options = {})
822
+ # FIRST: Check if node is any kind of wrapper with custom to_xml
823
+ if node.respond_to?(:to_xml)
824
+ # Declaration wrapper
825
+ return node.to_xml if node.is_a?(CustomizedLibxml::Declaration)
826
+
827
+ # Other wrappers - check they're not native LibXML nodes
828
+ unless node.is_a?(::LibXML::XML::Node) ||
829
+ node.is_a?(::LibXML::XML::Document)
830
+ return node.to_xml
831
+ end
832
+ end
833
+
834
+ native_node = unpatch_node(node)
835
+ return "" unless native_node
836
+
837
+ if native_node.is_a?(::LibXML::XML::Document)
838
+ output = +""
839
+
840
+ # Check if we should include declaration
841
+ # Priority: explicit no_declaration option > default (include)
842
+ should_include_decl = if options.key?(:no_declaration)
843
+ !options[:no_declaration]
844
+ else
845
+ # Default: include declaration
846
+ true
847
+ end
848
+
849
+ if should_include_decl
850
+ # Check if declaration was explicitly managed
851
+ if native_node.instance_variable_defined?(:@moxml_declaration)
852
+ decl = native_node.instance_variable_get(:@moxml_declaration)
853
+ # Only output declaration if it exists and wasn't removed
854
+ if decl && !decl.instance_variable_get(:@removed)
855
+ output << decl.to_xml
856
+ end
857
+ else
858
+ # No declaration stored - create default
859
+ version = native_node.version || "1.0"
860
+ encoding_val = options[:encoding] ||
861
+ encoding_to_string(native_node.encoding) ||
862
+ "UTF-8"
863
+
864
+ # Don't add standalone="yes" by default - only if explicitly set
865
+ decl = CustomizedLibxml::Declaration.new(
866
+ native_node,
867
+ version,
868
+ encoding_val,
869
+ nil, # No standalone by default
870
+ )
871
+ native_node.instance_variable_set(:@moxml_declaration, decl)
872
+ output << decl.to_xml
873
+ end
874
+ end
875
+
876
+ # Add DOCTYPE if stored on document
877
+ if native_node.instance_variable_defined?(:@moxml_doctype)
878
+ doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
879
+ if doctype_wrapper
880
+ output << "\n" unless output.empty?
881
+ output << doctype_wrapper.to_xml
882
+ end
883
+ end
884
+
885
+ # Add document-level processing instructions if stored
886
+ if native_node.instance_variable_defined?(:@moxml_pis)
887
+ pis = native_node.instance_variable_get(:@moxml_pis)
888
+ if pis && !pis.empty?
889
+ pis.each do |pi|
890
+ output << "\n" unless output.empty?
891
+ output << pi.to_xml
892
+ end
893
+ end
894
+ end
895
+
896
+ # Add text nodes if stored (for documents without root)
897
+ if native_node.instance_variable_defined?(:@moxml_texts)
898
+ texts = native_node.instance_variable_get(:@moxml_texts)
899
+ if texts && !texts.empty?
900
+ texts.each do |text|
901
+ output << "\n" unless output.empty?
902
+ output << text.to_xml
903
+ end
904
+ end
905
+ end
906
+
907
+ if native_node.root
908
+ # Use our custom serializer to control namespace output
909
+ root_output = serialize_element_with_namespaces(
910
+ native_node.root,
911
+ true,
912
+ )
913
+
914
+ # Apply indentation if requested
915
+ if options[:indent]&.positive?
916
+ # First add newlines between elements
917
+ formatted = add_newlines_to_xml(root_output)
918
+ output << "\n" << indent_xml(formatted, options[:indent])
919
+ else
920
+ output << "\n" << root_output unless output.empty?
921
+ output << root_output if output.empty?
922
+ end
923
+ end
924
+
925
+ output
926
+ else
927
+ serialize_element_with_namespaces(native_node, true)
928
+ end
929
+ end
930
+
931
+ def add_newlines_to_xml(xml_string)
932
+ # Add newlines between XML elements for proper indentation
933
+ # But don't add newlines between opening and immediate closing tags (e.g., <tag></tag>)
934
+ # And most importantly, don't add newlines inside CDATA sections
935
+
936
+ # First, protect CDATA sections by replacing them with placeholders
937
+ # Manual scanning guarantees O(n) complexity with no backtracking (ReDoS-safe)
938
+ cdata_sections = []
939
+ result = +""
940
+ pos = 0
941
+
942
+ loop do
943
+ # Find next CDATA start
944
+ cdata_start = xml_string.index("<![CDATA[", pos)
945
+
946
+ if cdata_start
947
+ # Copy everything before CDATA
948
+ result << xml_string[pos...cdata_start]
949
+
950
+ # Find CDATA end
951
+ cdata_content_start = cdata_start + 9 # Length of "<![CDATA["
952
+ cdata_end = xml_string.index("]]>", cdata_content_start)
953
+
954
+ if cdata_end
955
+ # Extract full CDATA including markers
956
+ full_cdata_end = cdata_end + 3 # Include "]]>"
957
+ cdata_section = xml_string[cdata_start...full_cdata_end]
958
+
959
+ # Store and add placeholder
960
+ cdata_sections << cdata_section
961
+ result << "__CDATA_PLACEHOLDER_#{cdata_sections.length - 1}__"
962
+
963
+ # Continue after this CDATA
964
+ pos = full_cdata_end
965
+ else
966
+ # Malformed CDATA (no closing "]]>") - copy as-is
967
+ result << xml_string[cdata_start..]
968
+ break
969
+ end
970
+ else
971
+ # No more CDATA sections - copy rest
972
+ result << xml_string[pos..]
973
+ break
974
+ end
975
+ end
976
+
977
+ protected = result
978
+
979
+ # Add newlines between elements (but not in CDATA - already protected)
980
+ with_newlines = protected.gsub(%r{(<[^>]+)>(?=<(?!/))}, "\\1>\n")
981
+
982
+ # Restore CDATA sections
983
+ cdata_sections.each_with_index do |cdata, index|
984
+ with_newlines.sub!("__CDATA_PLACEHOLDER_#{index}__", cdata)
985
+ end
986
+
987
+ with_newlines
988
+ end
989
+
990
+ def indent_xml(xml_string, indent_size)
991
+ # Simple line-by-line indentation
992
+ lines = []
993
+ level = 0
994
+
995
+ xml_string.each_line do |line|
996
+ line = line.strip
997
+ next if line.empty?
998
+
999
+ # Decrease level for closing tags
1000
+ level -= 1 if line.start_with?("</")
1001
+ level = [level, 0].max
1002
+
1003
+ # Add indented line
1004
+ lines << ((" " * (indent_size * level)) + line)
1005
+
1006
+ # Increase level for opening tags (but not self-closing or special tags)
1007
+ next unless line.start_with?("<") && !line.start_with?("</") &&
1008
+ !line.end_with?("/>") && !line.start_with?("<?") &&
1009
+ !line.start_with?("<!") && !line.include?("</")
1010
+
1011
+ level += 1
1012
+ end
1013
+
1014
+ lines.join("\n")
1015
+ end
1016
+
1017
+ def duplicate_node(node)
1018
+ return nil unless node
1019
+
1020
+ # Unwrap if wrapped
1021
+ native_node = node.respond_to?(:native) ? node.native : node
1022
+
1023
+ # LibXML is strict about document ownership
1024
+ # Create brand new NATIVE nodes that are document-independent
1025
+ # Wrappers are only used via patch_node when reading children
1026
+ case node_type(node)
1027
+ when :doctype
1028
+ # DoctypeWrapper - create a new one with same properties
1029
+ if node.is_a?(DoctypeWrapper)
1030
+ DoctypeWrapper.new(
1031
+ create_document,
1032
+ node.name,
1033
+ node.external_id,
1034
+ node.system_id,
1035
+ )
1036
+ else
1037
+ # Should not happen, but handle gracefully
1038
+ node
1039
+ end
1040
+ when :element
1041
+ new_node = ::LibXML::XML::Node.new(native_node.name)
1042
+ # new_node.line = node.line
1043
+
1044
+ # Copy and set namespace definitions FIRST
1045
+ if native_node.respond_to?(:namespaces)
1046
+ # First, copy all namespace definitions
1047
+ native_node.namespaces.each do |ns|
1048
+ ::LibXML::XML::Namespace.new(
1049
+ new_node,
1050
+ ns.prefix,
1051
+ ns.href,
1052
+ )
1053
+ end
1054
+
1055
+ # Then, set this element's own namespace if it has one
1056
+ if native_node.namespaces.namespace
1057
+ orig_ns = native_node.namespaces.namespace
1058
+ # Find the matching namespace we just created
1059
+ new_node.namespaces.each do |ns|
1060
+ if ns.prefix == orig_ns.prefix && ns.href == orig_ns.href
1061
+ new_node.namespaces.namespace = ns
1062
+ break
1063
+ end
1064
+ end
1065
+ end
1066
+ end
1067
+
1068
+ # Copy attributes AFTER namespaces are set up
1069
+ # LibXML handles namespaced attributes through their full names
1070
+ if native_node.attributes?
1071
+ native_node.each_attr do |attr|
1072
+ # Get the full attribute name (may include namespace prefix)
1073
+ attr_name = if attr.ns&.prefix
1074
+ "#{attr.ns.prefix}:#{attr.name}"
1075
+ else
1076
+ attr.name
1077
+ end
1078
+ new_node[attr_name] = attr.value
1079
+ end
1080
+ end
1081
+
1082
+ # Recursively copy children
1083
+ if native_node.children?
1084
+ native_node.each_child do |child|
1085
+ # Skip whitespace-only text nodes
1086
+ next if child.text? && child.content.to_s.strip.empty?
1087
+
1088
+ # Recursively duplicate the child
1089
+ child_copy = duplicate_node(child)
1090
+ new_node << child_copy
1091
+ end
1092
+ end
1093
+
1094
+ new_node
1095
+ when :text
1096
+ ::LibXML::XML::Node.new_text(native_node.content)
1097
+ when :cdata
1098
+ ::LibXML::XML::Node.new_cdata(native_node.content)
1099
+ when :comment
1100
+ ::LibXML::XML::Node.new_comment(native_node.content)
1101
+ when :processing_instruction
1102
+ ::LibXML::XML::Node.new_pi(native_node.name, native_node.content)
1103
+ else
1104
+ # For other types, try dup as fallback
1105
+ native_node.dup
1106
+ end
1107
+ end
1108
+
1109
+ def patch_node(node, _parent = nil)
1110
+ # Wrap native LibXML nodes in our wrapper classes
1111
+ return node if node.nil?
1112
+ return node if node.is_a?(CustomizedLibxml::Node)
1113
+
1114
+ case node_type(node)
1115
+ when :element
1116
+ CustomizedLibxml::Element.new(node)
1117
+ when :text
1118
+ CustomizedLibxml::Text.new(node)
1119
+ when :cdata
1120
+ CustomizedLibxml::Cdata.new(node)
1121
+ when :comment
1122
+ CustomizedLibxml::Comment.new(node)
1123
+ when :processing_instruction
1124
+ CustomizedLibxml::ProcessingInstruction.new(node)
1125
+ else
1126
+ node
1127
+ end
1128
+ end
1129
+
1130
+ def unpatch_node(node)
1131
+ # Unwrap to get native LibXML node
1132
+ node.respond_to?(:native) ? node.native : node
1133
+ end
1134
+
1135
+ def prepare_for_new_document(node, target_doc)
1136
+ return node unless node && target_doc
1137
+
1138
+ # For LibXML, we need to duplicate ALL nodes to avoid
1139
+ # document ownership issues. Simply importing doesn't work
1140
+ # because nodes from the parsed document still have references.
1141
+ duplicate_node(node)
1142
+ end
1143
+
1144
+ private
1145
+
1146
+ def serialize_element(elem)
1147
+ output = "<#{elem.name}"
1148
+
1149
+ # Add namespace definitions (only on this element, not ancestors)
1150
+ if elem.respond_to?(:namespaces)
1151
+ seen_ns = {}
1152
+ elem.namespaces.definitions.each do |ns|
1153
+ prefix = ns.prefix
1154
+ uri = ns.href
1155
+ next if seen_ns.key?(prefix)
1156
+
1157
+ seen_ns[prefix] = true
1158
+ output << if prefix.nil? || prefix.empty?
1159
+ " xmlns=\"#{escape_xml(uri)}\""
1160
+ else
1161
+ " xmlns:#{prefix}=\"#{escape_xml(uri)}\""
1162
+ end
1163
+ end
1164
+ end
1165
+
1166
+ # Add attributes
1167
+ if elem.attributes?
1168
+ elem.each_attr do |attr|
1169
+ next if attr.name.start_with?("xmlns")
1170
+
1171
+ # Include namespace prefix if attribute has one
1172
+ attr_name = if attr.ns&.prefix
1173
+ "#{attr.ns.prefix}:#{attr.name}"
1174
+ else
1175
+ attr.name
1176
+ end
1177
+ output << " #{attr_name}=\"#{escape_xml(attr.value)}\""
1178
+ end
1179
+ end
1180
+
1181
+ # Always use verbose format <tag></tag> for consistency with other adapters
1182
+ output << ">"
1183
+ if elem.children?
1184
+ elem.each_child do |child|
1185
+ # Skip whitespace-only text nodes
1186
+ next if child.text? && child.content.to_s.strip.empty?
1187
+
1188
+ output << serialize_node(child)
1189
+ end
1190
+ end
1191
+ output << "</#{elem.name}>"
1192
+
1193
+ output
1194
+ end
1195
+
1196
+ def serialize_node(node)
1197
+ # Check if node is a wrapper with to_xml method
1198
+ if node.respond_to?(:to_xml) &&
1199
+ (node.is_a?(CustomizedLibxml::ProcessingInstruction) ||
1200
+ node.is_a?(CustomizedLibxml::Comment) ||
1201
+ node.is_a?(CustomizedLibxml::Cdata) ||
1202
+ node.is_a?(CustomizedLibxml::Text))
1203
+ return node.to_xml
1204
+ end
1205
+
1206
+ case node.node_type
1207
+ when ::LibXML::XML::Node::ELEMENT_NODE
1208
+ serialize_element(node)
1209
+ when ::LibXML::XML::Node::TEXT_NODE
1210
+ escape_text(node.content)
1211
+ when ::LibXML::XML::Node::CDATA_SECTION_NODE
1212
+ "<![CDATA[#{node.content}]]>"
1213
+ when ::LibXML::XML::Node::COMMENT_NODE
1214
+ "<!-- #{node.content} -->"
1215
+ when ::LibXML::XML::Node::PI_NODE
1216
+ "<?#{node.name} #{node.content}?>"
1217
+ else
1218
+ node.to_s
1219
+ end
1220
+ end
1221
+
1222
+ def escape_text(text)
1223
+ text.to_s
1224
+ .gsub("&", "&amp;")
1225
+ .gsub("<", "&lt;")
1226
+ .gsub(">", "&gt;")
1227
+ end
1228
+
1229
+ def escape_xml(text)
1230
+ text.to_s
1231
+ .gsub("&", "&amp;")
1232
+ .gsub("<", "&lt;")
1233
+ .gsub(">", "&gt;")
1234
+ .gsub("\"", "&quot;")
1235
+ end
1236
+
1237
+ def escape_attribute_value(value)
1238
+ escaped = value.to_s
1239
+ .gsub("&", "&amp;")
1240
+ .gsub("<", "&lt;")
1241
+ .gsub(">", "&gt;")
1242
+ .gsub("\"", "&quot;")
1243
+ escaped.to_s
1244
+ end
1245
+
1246
+ def import_and_add(doc, element, child)
1247
+ return unless element && child
1248
+
1249
+ # Always catch the cross-document error and import when needed
1250
+ begin
1251
+ element << child
1252
+ rescue ::LibXML::XML::Error => e
1253
+ # If we get a "different documents" error, we need to import or copy
1254
+ raise unless e.message.include?("different documents")
1255
+
1256
+ # Get the target document - either from parameter or element
1257
+ target_doc = doc || (element.respond_to?(:doc) ? element.doc : nil)
1258
+
1259
+ if target_doc
1260
+ # Use deep import to ensure all descendants are included
1261
+ imported = target_doc.import(child, true)
1262
+ element << imported
1263
+ else
1264
+ # No target document - create a deep copy of the node instead
1265
+ # This handles the case where the element isn't attached to a document yet
1266
+ copied = duplicate_node(child)
1267
+ element << copied
1268
+ end
1269
+
1270
+ # Re-raise other errors
1271
+ end
1272
+ end
1273
+
1274
+ def encoding_to_string(encoding)
1275
+ return nil unless encoding
1276
+ return encoding if encoding.is_a?(String)
1277
+
1278
+ case encoding
1279
+ when ::LibXML::XML::Encoding::UTF_8
1280
+ "UTF-8"
1281
+ when ::LibXML::XML::Encoding::ISO_8859_1
1282
+ "ISO-8859-1"
1283
+ when ::LibXML::XML::Encoding::UTF_16LE
1284
+ "UTF-16LE"
1285
+ when ::LibXML::XML::Encoding::UTF_16BE
1286
+ "UTF-16BE"
1287
+ when ::LibXML::XML::Encoding::UCS_2
1288
+ "UCS-2"
1289
+ else
1290
+ "UTF-8"
1291
+ end
1292
+ end
1293
+
1294
+ def string_to_encoding(str)
1295
+ return nil unless str
1296
+
1297
+ case str.upcase.tr("-", "_")
1298
+ when "UTF_8", "UTF8"
1299
+ ::LibXML::XML::Encoding::UTF_8
1300
+ when "ISO_8859_1", "ISO88591"
1301
+ ::LibXML::XML::Encoding::ISO_8859_1
1302
+ when "UTF_16LE", "UTF16LE"
1303
+ ::LibXML::XML::Encoding::UTF_16LE
1304
+ when "UTF_16BE", "UTF16BE"
1305
+ ::LibXML::XML::Encoding::UTF_16BE
1306
+ else
1307
+ ::LibXML::XML::Encoding::UTF_8
1308
+ end
1309
+ end
1310
+
1311
+ def serialize_element_with_namespaces(elem, include_ns = true)
1312
+ output = "<#{elem.name}"
1313
+
1314
+ # Include namespace definitions:
1315
+ # - On root element (include_ns = true), output ALL namespace definitions
1316
+ # - On child elements, output namespace definitions that override parent namespaces
1317
+ if elem.respond_to?(:namespaces) && elem.namespaces.respond_to?(:definitions)
1318
+ # Get parent's namespace definitions to detect overrides
1319
+ parent_ns_defs = if !include_ns && elem.respond_to?(:parent) && elem.parent && !elem.parent.is_a?(::LibXML::XML::Document)
1320
+ parent_namespaces = {}
1321
+ if elem.parent.respond_to?(:namespaces)
1322
+ elem.parent.namespaces.each do |ns|
1323
+ parent_namespaces[ns.prefix] = ns.href
1324
+ end
1325
+ end
1326
+ parent_namespaces
1327
+ else
1328
+ {}
1329
+ end
1330
+
1331
+ seen_ns = {}
1332
+ elem.namespaces.definitions.each do |ns|
1333
+ prefix = ns.prefix
1334
+ uri = ns.href
1335
+ next if seen_ns.key?(prefix)
1336
+
1337
+ # Output namespace if:
1338
+ # 1. This is root element (include_ns = true), OR
1339
+ # 2. This namespace overrides a parent namespace (different URI for same prefix)
1340
+ should_output = include_ns ||
1341
+ (parent_ns_defs.key?(prefix) && parent_ns_defs[prefix] != uri)
1342
+
1343
+ next unless should_output
1344
+
1345
+ seen_ns[prefix] = true
1346
+ output << if prefix.nil? || prefix.empty?
1347
+ " xmlns=\"#{escape_xml(uri)}\""
1348
+ else
1349
+ " xmlns:#{prefix}=\"#{escape_xml(uri)}\""
1350
+ end
1351
+ end
1352
+ end
1353
+
1354
+ # Add attributes
1355
+ if elem.attributes?
1356
+ elem.each_attr do |attr|
1357
+ next if attr.name.start_with?("xmlns")
1358
+
1359
+ # Include namespace prefix if attribute has one
1360
+ attr_name = if attr.ns&.prefix
1361
+ "#{attr.ns.prefix}:#{attr.name}"
1362
+ else
1363
+ attr.name
1364
+ end
1365
+ output << " #{attr_name}=\"#{escape_xml(attr.value)}\""
1366
+ end
1367
+ end
1368
+
1369
+ # Always use verbose format <tag></tag> for consistency with other adapters
1370
+ output << ">"
1371
+ if elem.children?
1372
+ elem.each_child do |child|
1373
+ # Skip whitespace-only text nodes
1374
+ next if child.text? && child.content.to_s.strip.empty?
1375
+
1376
+ # Wrap the child and serialize
1377
+ wrapped_child = patch_node(child)
1378
+ output << if wrapped_child.respond_to?(:to_xml) &&
1379
+ !wrapped_child.is_a?(::LibXML::XML::Node)
1380
+ # Use wrapper's to_xml for proper serialization
1381
+ wrapped_child.to_xml
1382
+ elsif child.element?
1383
+ # Recursively serialize child elements
1384
+ serialize_element_with_namespaces(child, false)
1385
+ else
1386
+ serialize_node(child)
1387
+ end
1388
+ end
1389
+ end
1390
+ output << "</#{elem.name}>"
1391
+
1392
+ output
1393
+ end
1394
+
1395
+ def remove_indentation(xml_string)
1396
+ # Remove all newlines and extra spaces between tags
1397
+ xml_string.gsub(/>\s+</, "><").gsub(/\n\s*/, "")
1398
+ end
1399
+
1400
+ def collect_namespace_definitions(node)
1401
+ ns_defs = {}
1402
+
1403
+ # Start from root to scan entire document
1404
+ root = if node.is_a?(::LibXML::XML::Document)
1405
+ node.root
1406
+ else
1407
+ # Walk up to root first
1408
+ current = node
1409
+ current = current.parent while current.respond_to?(:parent) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
1410
+ current
1411
+ end
1412
+
1413
+ return ns_defs unless root
1414
+
1415
+ # Recursively collect ALL namespace definitions from entire tree
1416
+ collect_ns_from_subtree(root, ns_defs)
1417
+
1418
+ ns_defs
1419
+ end
1420
+
1421
+ def collect_ns_from_subtree(node, ns_defs)
1422
+ # Collect namespaces defined on this node
1423
+ if node.respond_to?(:namespaces)
1424
+ node.namespaces.each do |ns|
1425
+ prefix = ns.prefix
1426
+ uri = ns.href
1427
+
1428
+ # For default namespace (nil/empty prefix), register as "xmlns"
1429
+ if prefix.nil? || prefix.empty?
1430
+ # Only register if we haven't seen a default namespace yet
1431
+ ns_defs["xmlns"] = uri unless ns_defs.key?("xmlns")
1432
+ else
1433
+ # Only register if we haven't seen this prefix yet
1434
+ ns_defs[prefix] = uri unless ns_defs.key?(prefix)
1435
+ end
1436
+ end
1437
+ end
1438
+
1439
+ # Also check if this element has an active namespace (inherited or own)
1440
+ # This catches cases where elements inherit namespaces from parents
1441
+ if node.respond_to?(:namespaces) && node.namespaces.respond_to?(:namespace)
1442
+ active_ns = node.namespaces.namespace
1443
+ if active_ns
1444
+ prefix = active_ns.prefix
1445
+ uri = active_ns.href
1446
+
1447
+ # Register the active namespace if not already registered
1448
+ if prefix.nil? || prefix.empty?
1449
+ ns_defs["xmlns"] = uri unless ns_defs.key?("xmlns")
1450
+ else
1451
+ ns_defs[prefix] = uri unless ns_defs.key?(prefix)
1452
+ end
1453
+ end
1454
+ end
1455
+
1456
+ # Recursively collect from children
1457
+ return unless node.respond_to?(:children?) && node.children?
1458
+
1459
+ node.each_child do |child|
1460
+ collect_ns_from_subtree(child, ns_defs) if child.element?
1461
+ end
1462
+ ns_defs
1463
+ end
1464
+
1465
+ def build_xpath_namespaces(node, user_namespaces)
1466
+ # Start with collected namespace definitions
1467
+ ns_context = collect_namespace_definitions(node)
1468
+
1469
+ # Merge user-provided namespaces (they override collected ones)
1470
+ if user_namespaces && !user_namespaces.empty?
1471
+ ns_context = ns_context.merge(user_namespaces)
1472
+ end
1473
+
1474
+ ns_context
1475
+ end
1476
+
1477
+ def find_namespace_by_prefix(element, prefix)
1478
+ # Search element and ancestors for namespace with given prefix
1479
+ current = element
1480
+ while current
1481
+ if current.respond_to?(:namespaces)
1482
+ current.namespaces.each do |ns|
1483
+ return ns if ns.prefix == prefix
1484
+ end
1485
+ end
1486
+ current = current.respond_to?(:parent) ? current.parent : nil
1487
+ end
1488
+ nil
1489
+ end
1490
+ end
1491
+
1492
+ # Bridge between LibXML SAX and Moxml SAX
1493
+ #
1494
+ # Translates LibXML::XML::SaxParser events to Moxml::SAX::Handler events
1495
+ #
1496
+ # @private
1497
+ class LibXMLSAXBridge
1498
+ include ::LibXML::XML::SaxParser::Callbacks
1499
+
1500
+ def initialize(handler)
1501
+ @handler = handler
1502
+ end
1503
+
1504
+ # Map LibXML events to Moxml events
1505
+
1506
+ def on_start_document
1507
+ @handler.on_start_document
1508
+ end
1509
+
1510
+ def on_end_document
1511
+ @handler.on_end_document
1512
+ end
1513
+
1514
+ def on_start_element(name, attributes)
1515
+ # Convert LibXML attributes hash to separate attrs and namespaces
1516
+ attr_hash = {}
1517
+ ns_hash = {}
1518
+
1519
+ attributes&.each do |attr_name, attr_value|
1520
+ if attr_name.to_s.start_with?("xmlns")
1521
+ # Namespace declaration
1522
+ prefix = if attr_name.to_s == "xmlns"
1523
+ nil
1524
+ else
1525
+ attr_name.to_s.sub(
1526
+ "xmlns:", ""
1527
+ )
1528
+ end
1529
+ ns_hash[prefix] = attr_value
1530
+ else
1531
+ attr_hash[attr_name.to_s] = attr_value
1532
+ end
1533
+ end
1534
+
1535
+ @handler.on_start_element(name.to_s, attr_hash, ns_hash)
1536
+ end
1537
+
1538
+ def on_end_element(name)
1539
+ @handler.on_end_element(name.to_s)
1540
+ end
1541
+
1542
+ def on_characters(chars)
1543
+ @handler.on_characters(chars)
1544
+ end
1545
+
1546
+ def on_cdata_block(content)
1547
+ @handler.on_cdata(content)
1548
+ end
1549
+
1550
+ def on_comment(msg)
1551
+ @handler.on_comment(msg)
1552
+ end
1553
+
1554
+ def on_processing_instruction(target, data)
1555
+ @handler.on_processing_instruction(target, data || "")
1556
+ end
1557
+
1558
+ def on_error(msg)
1559
+ @handler.on_error(Moxml::ParseError.new(msg))
1560
+ end
1561
+ end
1562
+ end
1563
+ end
1564
+ end