moxml 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +224 -43
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +51 -0
  21. data/docs/_guides/modifying-xml.adoc +292 -0
  22. data/docs/_guides/parsing-xml.adoc +230 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_guides/xml-declaration.adoc +450 -0
  26. data/docs/_pages/adapter-compatibility.adoc +369 -0
  27. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  28. data/docs/_pages/adapters/index.adoc +97 -0
  29. data/docs/_pages/adapters/libxml.adoc +285 -0
  30. data/docs/_pages/adapters/nokogiri.adoc +251 -0
  31. data/docs/_pages/adapters/oga.adoc +291 -0
  32. data/docs/_pages/adapters/ox.adoc +56 -0
  33. data/docs/_pages/adapters/rexml.adoc +292 -0
  34. data/docs/_pages/best-practices.adoc +429 -0
  35. data/docs/_pages/compatibility.adoc +467 -0
  36. data/docs/_pages/configuration.adoc +250 -0
  37. data/docs/_pages/error-handling.adoc +349 -0
  38. data/docs/_pages/headed-ox-limitations.adoc +574 -0
  39. data/docs/_pages/headed-ox.adoc +1025 -0
  40. data/docs/_pages/index.adoc +35 -0
  41. data/docs/_pages/installation.adoc +140 -0
  42. data/docs/_pages/node-api-reference.adoc +49 -0
  43. data/docs/_pages/performance.adoc +35 -0
  44. data/docs/_pages/quick-start.adoc +243 -0
  45. data/docs/_pages/thread-safety.adoc +28 -0
  46. data/docs/_references/document-api.adoc +407 -0
  47. data/docs/_references/index.adoc +48 -0
  48. data/docs/_tutorials/basic-usage.adoc +267 -0
  49. data/docs/_tutorials/builder-pattern.adoc +342 -0
  50. data/docs/_tutorials/index.adoc +33 -0
  51. data/docs/_tutorials/namespace-handling.adoc +324 -0
  52. data/docs/_tutorials/xpath-queries.adoc +358 -0
  53. data/docs/index.adoc +122 -0
  54. data/examples/README.md +124 -0
  55. data/examples/api_client/README.md +424 -0
  56. data/examples/api_client/api_client.rb +394 -0
  57. data/examples/api_client/example_response.xml +48 -0
  58. data/examples/headed_ox_example/README.md +90 -0
  59. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  60. data/examples/rss_parser/README.md +194 -0
  61. data/examples/rss_parser/example_feed.xml +93 -0
  62. data/examples/rss_parser/rss_parser.rb +189 -0
  63. data/examples/sax_parsing/README.md +50 -0
  64. data/examples/sax_parsing/data_extractor.rb +75 -0
  65. data/examples/sax_parsing/example.xml +21 -0
  66. data/examples/sax_parsing/large_file.rb +78 -0
  67. data/examples/sax_parsing/simple_parser.rb +55 -0
  68. data/examples/web_scraper/README.md +352 -0
  69. data/examples/web_scraper/example_page.html +201 -0
  70. data/examples/web_scraper/web_scraper.rb +312 -0
  71. data/lib/moxml/adapter/base.rb +107 -28
  72. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  73. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  74. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  75. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  76. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  77. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  78. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  79. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  80. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  81. data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
  82. data/lib/moxml/adapter/headed_ox.rb +161 -0
  83. data/lib/moxml/adapter/libxml.rb +1564 -0
  84. data/lib/moxml/adapter/nokogiri.rb +156 -9
  85. data/lib/moxml/adapter/oga.rb +190 -15
  86. data/lib/moxml/adapter/ox.rb +322 -28
  87. data/lib/moxml/adapter/rexml.rb +157 -28
  88. data/lib/moxml/adapter.rb +21 -4
  89. data/lib/moxml/attribute.rb +6 -0
  90. data/lib/moxml/builder.rb +40 -4
  91. data/lib/moxml/config.rb +8 -3
  92. data/lib/moxml/context.rb +57 -2
  93. data/lib/moxml/declaration.rb +9 -0
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +53 -6
  96. data/lib/moxml/document_builder.rb +34 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +155 -4
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1770 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_preservation_spec.rb +217 -0
  164. data/spec/moxml/declaration_spec.rb +36 -0
  165. data/spec/moxml/doctype_spec.rb +33 -0
  166. data/spec/moxml/document_builder_spec.rb +30 -0
  167. data/spec/moxml/document_spec.rb +105 -0
  168. data/spec/moxml/element_spec.rb +143 -0
  169. data/spec/moxml/error_spec.rb +266 -22
  170. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  171. data/spec/moxml/namespace_spec.rb +32 -0
  172. data/spec/moxml/node_set_spec.rb +39 -0
  173. data/spec/moxml/node_spec.rb +37 -0
  174. data/spec/moxml/processing_instruction_spec.rb +34 -0
  175. data/spec/moxml/sax_spec.rb +1067 -0
  176. data/spec/moxml/text_spec.rb +31 -0
  177. data/spec/moxml/version_spec.rb +14 -0
  178. data/spec/moxml/xml_utils/.gitkeep +0 -0
  179. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  180. data/spec/moxml/xml_utils_spec.rb +49 -0
  181. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  182. data/spec/moxml/xpath/axes_spec.rb +296 -0
  183. data/spec/moxml/xpath/cache_spec.rb +358 -0
  184. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  185. data/spec/moxml/xpath/context_spec.rb +210 -0
  186. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  187. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  188. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  189. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  190. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  191. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  192. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  193. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  194. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  195. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  196. data/spec/moxml/xpath/parser_spec.rb +364 -0
  197. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  198. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  199. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  200. data/spec/moxml/xpath_spec.rb +77 -0
  201. data/spec/performance/README.md +83 -0
  202. data/spec/performance/benchmark_spec.rb +64 -0
  203. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
  204. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  205. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  206. data/spec/spec_helper.rb +58 -1
  207. data/spec/support/xml_matchers.rb +1 -1
  208. metadata +178 -34
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -2,12 +2,13 @@
2
2
 
3
3
  require_relative "base"
4
4
  require "ox"
5
+ require "stringio"
5
6
  require_relative "customized_ox/text"
6
7
  require_relative "customized_ox/attribute"
7
8
  require_relative "customized_ox/namespace"
8
9
 
9
10
  # insert :parent methods to all Ox classes inherit the Node class
10
- ::Ox::Node.attr_accessor :parent
11
+ Ox::Node.attr_accessor :parent
11
12
  module Moxml
12
13
  module Adapter
13
14
  class Ox < Base
@@ -29,12 +30,37 @@ module Moxml
29
30
  doc
30
31
  end
31
32
  rescue ::Ox::ParseError => e
32
- raise Moxml::ParseError, e.message
33
+ raise Moxml::ParseError.new(
34
+ e.message,
35
+ source: xml.is_a?(String) ? xml[0..100] : nil,
36
+ )
33
37
  end
34
38
 
35
39
  DocumentBuilder.new(Context.new(:ox)).build(native_doc)
36
40
  end
37
41
 
42
+ # SAX parsing implementation for Ox
43
+ #
44
+ # @param xml [String, IO] XML to parse
45
+ # @param handler [Moxml::SAX::Handler] Moxml SAX handler
46
+ # @return [void]
47
+ def sax_parse(xml, handler)
48
+ # Create bridge that translates Ox SAX to Moxml SAX
49
+ bridge = OxSAXBridge.new(handler)
50
+
51
+ # Parse using Ox's SAX parser
52
+ xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
53
+
54
+ begin
55
+ ::Ox.sax_parse(bridge, StringIO.new(xml_string))
56
+ # Ox doesn't automatically call end_document, so we do it manually
57
+ bridge.end_document
58
+ rescue ::Ox::ParseError => e
59
+ error = Moxml::ParseError.new(e.message)
60
+ handler.on_error(error)
61
+ end
62
+ end
63
+
38
64
  def create_document(native_doc = nil)
39
65
  attrs = native_doc&.attributes || {}
40
66
  ::Ox::Document.new(**attrs)
@@ -60,7 +86,7 @@ module Moxml
60
86
 
61
87
  def create_native_doctype(name, external_id, system_id)
62
88
  ::Ox::DocType.new(
63
- "#{name} PUBLIC \"#{external_id}\" \"#{system_id}\""
89
+ "#{name} PUBLIC \"#{external_id}\" \"#{system_id}\"",
64
90
  )
65
91
  end
66
92
 
@@ -87,7 +113,8 @@ module Moxml
87
113
  end
88
114
 
89
115
  def create_native_namespace(element, prefix, uri)
90
- ns = ::Moxml::Adapter::CustomizedOx::Namespace.new(prefix, uri, element)
116
+ ns = ::Moxml::Adapter::CustomizedOx::Namespace.new(prefix, uri,
117
+ element)
91
118
  set_attribute(element, ns.expanded_prefix, uri)
92
119
  ns
93
120
  end
@@ -97,8 +124,12 @@ module Moxml
97
124
 
98
125
  prefix = ns.prefix
99
126
  # attributes don't have attributes but can have a namespace prefix
100
- set_attribute(element, ns.expanded_prefix, ns.uri) if element.respond_to?(:attributes)
101
- element.name = [prefix, element.name.delete_prefix("xmlns:")].compact.join(":")
127
+ if element.respond_to?(:attributes)
128
+ set_attribute(element, ns.expanded_prefix,
129
+ ns.uri)
130
+ end
131
+ element.name = [prefix,
132
+ element.name.delete_prefix("xmlns:")].compact.join(":")
102
133
  namespace(element)
103
134
  end
104
135
 
@@ -151,9 +182,14 @@ module Moxml
151
182
  end
152
183
 
153
184
  def node_name(node)
154
- node.value
155
- rescue StandardError
156
- node.name
185
+ name = begin
186
+ node.value
187
+ rescue StandardError
188
+ node.name
189
+ end
190
+
191
+ # Strip namespace prefix if present
192
+ name.to_s.split(":", 2).last
157
193
  end
158
194
 
159
195
  def set_node_name(node, name)
@@ -172,8 +208,12 @@ module Moxml
172
208
  new_node =
173
209
  case node
174
210
  # it can be either attribute or namespace
175
- when Array then ::Moxml::Adapter::CustomizedOx::Attribute.new(node.first, node.last)
176
- when Hash then ::Moxml::Adapter::CustomizedOx::Attribute.new(node.keys.first, node.values.first)
211
+ when Array then ::Moxml::Adapter::CustomizedOx::Attribute.new(
212
+ node.first, node.last
213
+ )
214
+ when Hash then ::Moxml::Adapter::CustomizedOx::Attribute.new(
215
+ node.keys.first, node.values.first
216
+ )
177
217
  when String then ::Moxml::Adapter::CustomizedOx::Text.new(node)
178
218
  else node
179
219
  end
@@ -186,7 +226,8 @@ module Moxml
186
226
  def unpatch_node(node)
187
227
  case node
188
228
  # it can be either attribute or namespace
189
- when ::Moxml::Adapter::CustomizedOx::Attribute then [node.name, node.value]
229
+ when ::Moxml::Adapter::CustomizedOx::Attribute then [node.name,
230
+ node.value]
190
231
  # when ::Moxml::Adapter::CustomizedOx::Attribute then { node.name => node.value }
191
232
  when ::Moxml::Adapter::CustomizedOx::Text then node.value
192
233
  else node
@@ -230,15 +271,18 @@ module Moxml
230
271
  end
231
272
 
232
273
  def attributes(element)
233
- return [] unless element.respond_to?(:attributes) && element.attributes
274
+ unless element.respond_to?(:attributes) && element.attributes
275
+ return []
276
+ end
234
277
 
235
- element.attributes.map do |name, value|
236
- next if name.start_with?("xmlns")
278
+ element.attributes.filter_map do |name, value|
279
+ next if name.to_s.start_with?("xmlns")
237
280
 
281
+ # Ensure value is passed correctly - Ox stores with symbol keys
238
282
  ::Moxml::Adapter::CustomizedOx::Attribute.new(
239
- name, value, element
283
+ name.to_s, value, element
240
284
  )
241
- end.compact
285
+ end
242
286
  end
243
287
 
244
288
  def attribute_element(attribute)
@@ -280,10 +324,15 @@ module Moxml
280
324
 
281
325
  def get_attribute(element, name)
282
326
  return unless element.respond_to?(:attributes) && element.attributes
283
- return unless element.attributes.key?(name.to_s) || element.attributes.key?(name.to_s.to_sym)
327
+ unless element.attributes.key?(name.to_s) || element.attributes.key?(name.to_s.to_sym)
328
+ return
329
+ end
330
+
331
+ # Ox stores attributes with symbol keys, so try both string and symbol
332
+ value = element.attributes[name.to_s] || element.attributes[name.to_s.to_sym]
284
333
 
285
334
  ::Moxml::Adapter::CustomizedOx::Attribute.new(
286
- name.to_s, element.attributes[name], element
335
+ name.to_s, value, element
287
336
  )
288
337
  end
289
338
 
@@ -299,6 +348,24 @@ module Moxml
299
348
  end
300
349
 
301
350
  def add_child(element, child)
351
+ # Special handling for declarations on Ox documents
352
+ if element.is_a?(::Ox::Document) && child.is_a?(::Ox::Instruct) && child.target == "xml"
353
+ # Transfer declaration attributes to document
354
+ element.attributes ||= {}
355
+ if child.attributes["version"]
356
+ element.attributes[:version] =
357
+ child.attributes["version"]
358
+ end
359
+ if child.attributes["encoding"]
360
+ element.attributes[:encoding] =
361
+ child.attributes["encoding"]
362
+ end
363
+ if child.attributes["standalone"]
364
+ element.attributes[:standalone] =
365
+ child.attributes["standalone"]
366
+ end
367
+ end
368
+
302
369
  child.parent = element if child.respond_to?(:parent)
303
370
  element.nodes ||= []
304
371
  element.nodes << child
@@ -331,11 +398,22 @@ module Moxml
331
398
 
332
399
  return unless parent(node)
333
400
 
334
- parent(node).nodes.delete(node)
401
+ # Special handling for declarations on Ox documents
402
+ if parent(node).is_a?(::Ox::Document) && node.is_a?(::Ox::Instruct) && node.target == "xml"
403
+ # Clear declaration attributes from document
404
+ doc = parent(node)
405
+ doc.attributes&.delete(:version)
406
+ doc.attributes&.delete(:encoding)
407
+ doc.attributes&.delete(:standalone)
408
+ end
409
+
410
+ parent(node).nodes.delete(unpatch_node(node))
335
411
  end
336
412
 
337
413
  def replace(node, new_node)
338
- return node.replace(new_node) if node.is_a?(String) && new_node.is_a?(String)
414
+ if node.is_a?(String) && new_node.is_a?(String)
415
+ return node.replace(new_node)
416
+ end
339
417
  # There are other cases:
340
418
  # when node is a String and new_node isn't
341
419
  # when node isn't a String, and new_node is a String
@@ -357,10 +435,14 @@ module Moxml
357
435
  end
358
436
 
359
437
  def text_content(node)
438
+ return "" if node.nil?
439
+
360
440
  case node
361
441
  when String then node.to_s
362
442
  when ::Moxml::Adapter::CustomizedOx::Text then node.value
363
443
  else
444
+ return "" unless node.respond_to?(:nodes)
445
+
364
446
  node.nodes.map do |n|
365
447
  text_content(n)
366
448
  end.join
@@ -428,9 +510,38 @@ module Moxml
428
510
  end.values
429
511
  end
430
512
 
431
- def xpath(node, expression, _namespaces = {})
432
- # locate has a different syntax
433
- node.locate(expression)
513
+ def xpath(node, expression, namespaces = {})
514
+ # Translate common XPath patterns to Ox locate() syntax
515
+ locate_expr = translate_xpath_to_locate(expression, namespaces)
516
+
517
+ # Ox's locate() works differently on documents vs elements
518
+ # For relative descendant searches on elements, we need special handling
519
+ if expression.start_with?(".//") && node.is_a?(::Ox::Element)
520
+ # Manually search descendants for relative paths from elements
521
+ element_name = locate_expr.sub("?/", "")
522
+ results = []
523
+ traverse(node) do |n|
524
+ next unless n.is_a?(::Ox::Element)
525
+
526
+ results << n if n.name == element_name || element_name.empty?
527
+ end
528
+ return results.map do |n|
529
+ patch_node(n, find_parent_in_tree(n, node))
530
+ end
531
+ end
532
+
533
+ # Use Ox's locate method for other cases
534
+ results = node.locate(locate_expr)
535
+
536
+ # Wrap results and set their parents by finding them in the tree
537
+ results.map { |n| patch_node(n, find_parent_in_tree(n, node)) }
538
+ rescue StandardError => e
539
+ raise Moxml::XPathError.new(
540
+ "XPath translation failed: #{e.message}",
541
+ expression: expression,
542
+ adapter: "Ox",
543
+ node: node,
544
+ )
434
545
  end
435
546
 
436
547
  def at_xpath(node, expression, namespaces = {})
@@ -440,9 +551,24 @@ module Moxml
440
551
  def serialize(node, options = {})
441
552
  output = ""
442
553
  if node.is_a?(::Ox::Document)
443
- # add declaration
444
- decl = create_native_declaration(node[:version], node[:encoding], node[:standalone])
445
- output = ::Ox.dump(::Ox::Document.new << decl).strip
554
+ # Check if we should include declaration
555
+ # Priority: explicit option > document attributes
556
+ should_include_decl = if options.key?(:no_declaration)
557
+ !options[:no_declaration]
558
+ else
559
+ # Check if document has declaration attributes
560
+ node[:version] || node[:encoding] || node[:standalone]
561
+ end
562
+
563
+ # Only add declaration if should_include_decl is true
564
+ if should_include_decl
565
+ version = node[:version] || "1.0"
566
+ encoding = options[:encoding] || node[:encoding]
567
+ standalone = node[:standalone]
568
+
569
+ decl = create_native_declaration(version, encoding, standalone)
570
+ output = ::Ox.dump(::Ox::Document.new << decl).strip
571
+ end
446
572
  end
447
573
 
448
574
  ox_options = {
@@ -450,13 +576,76 @@ module Moxml
450
576
  # with_xml: true,
451
577
  with_instructions: true,
452
578
  encoding: options[:encoding],
453
- no_empty: options[:expand_empty]
579
+ no_empty: options[:expand_empty],
454
580
  }
455
581
  output + ::Ox.dump(node, ox_options)
456
582
  end
457
583
 
458
584
  private
459
585
 
586
+ # Translate a subset of XPath to Ox locate() syntax
587
+ # Supports: //element, /path/to/element, .//element, element[@attr]
588
+ # Note: Ox locate() doesn't support namespace prefixes in the path
589
+ def translate_xpath_to_locate(xpath, namespaces = {})
590
+ expr = xpath.dup
591
+
592
+ # Strip namespace prefixes from element names
593
+ # XPath: //ns:element → locate: element
594
+ if namespaces && !namespaces.empty?
595
+ namespaces.each_key do |prefix|
596
+ expr = expr.gsub("/#{prefix}:", "/")
597
+ expr = expr.gsub("/*#{prefix}:", "/*")
598
+ expr = expr.gsub("//*#{prefix}:", "//")
599
+ expr = expr.gsub("//#{prefix}:", "//")
600
+ expr = expr.gsub("///#{prefix}:", "///")
601
+ end
602
+ end
603
+
604
+ # Remove any remaining namespace prefixes
605
+ # Use possessive quantifier to prevent ReDoS
606
+ expr = expr.gsub(/[a-zA-Z_][\w-]*+:/, "")
607
+
608
+ # Remove attribute predicates for now - we'll filter manually
609
+ # Save the attribute name if present
610
+ expr = expr.gsub(/\[@(\w+)\]/, "")
611
+
612
+ # XPath: //element → locate: ?/element (any depth)
613
+ # Note: In Ox, ?/ means "any path"
614
+ expr = expr.sub(%r{^//}, "?/") if expr.start_with?("//")
615
+
616
+ # XPath: .//element → locate: ?/element (relative any depth)
617
+ # For relative paths from an element, we still use ?/ which searches
618
+ # descendants
619
+ expr = expr.sub(%r{^\.//}, "?/") if expr.start_with?(".//")
620
+
621
+ # XPath: /root/child → locate: root/child (absolute path)
622
+ # Remove leading / for Ox
623
+ expr = expr.sub(%r{^/}, "")
624
+
625
+ # XPath: ./element → locate: element (direct child, just remove ./)
626
+ expr.sub(%r{^\./}, "")
627
+ end
628
+
629
+ # Find the actual parent of a node by searching the tree
630
+ def find_parent_in_tree(target_node, search_root)
631
+ # Start from the document root if we have a document
632
+ root = search_root.is_a?(::Ox::Document) ? search_root : document(search_root)
633
+
634
+ result = nil
635
+ traverse(root) do |node|
636
+ next unless node.respond_to?(:nodes)
637
+
638
+ node.nodes&.each do |child|
639
+ if child.equal?(target_node)
640
+ result = node
641
+ break
642
+ end
643
+ end
644
+ break if result
645
+ end
646
+ result
647
+ end
648
+
460
649
  def traverse(node, &block)
461
650
  return unless node
462
651
 
@@ -467,5 +656,110 @@ module Moxml
467
656
  end
468
657
  end
469
658
  end
659
+
660
+ # Bridge between Ox SAX and Moxml SAX
661
+ #
662
+ # Translates Ox::Sax events to Moxml::SAX::Handler events.
663
+ # Ox has a unique SAX pattern where attributes are delivered AFTER start_element.
664
+ #
665
+ # @private
666
+ class OxSAXBridge
667
+ def initialize(handler)
668
+ @handler = handler
669
+ @pending_attrs = {}
670
+ @pending_element_name = nil
671
+ @element_started = false
672
+ @document_started = false
673
+ end
674
+
675
+ # Ox delivers attributes AFTER start_element
676
+ def attr(name, value)
677
+ @pending_attrs[name] = value
678
+ end
679
+
680
+ # Called when element starts (but attributes come AFTER this)
681
+ def start_element(name)
682
+ # If we had a previous element waiting, we need to finalize it first
683
+ if @pending_element_name
684
+ finalize_pending_element
685
+ end
686
+
687
+ # Store this element name (convert symbol to string)
688
+ @pending_element_name = name.to_s
689
+ @element_started = true
690
+
691
+ # Call on_start_document if this is the first element
692
+ unless @document_started
693
+ @handler.on_start_document
694
+ @document_started = true
695
+ end
696
+ end
697
+
698
+ def end_element(name)
699
+ # Finalize any pending element before ending
700
+ if @pending_element_name
701
+ finalize_pending_element
702
+ end
703
+
704
+ # Convert symbol to string
705
+ @handler.on_end_element(name.to_s)
706
+ end
707
+
708
+ # Ox only has text() - no separate CDATA, comment, or PI events
709
+ def text(string)
710
+ # Finalize any pending element before text
711
+ if @pending_element_name
712
+ finalize_pending_element
713
+ end
714
+
715
+ @handler.on_characters(string)
716
+ end
717
+
718
+ def error(message, line, column)
719
+ error = Moxml::ParseError.new(message, line: line, column: column)
720
+ @handler.on_error(error)
721
+ end
722
+
723
+ # Called at end of parsing (not automatically by Ox)
724
+ def end_document
725
+ # Finalize any pending element
726
+ if @pending_element_name
727
+ finalize_pending_element
728
+ end
729
+
730
+ @handler.on_end_document if @document_started
731
+ end
732
+
733
+ private
734
+
735
+ def finalize_pending_element
736
+ # Separate namespace declarations from regular attributes
737
+ attr_hash = {}
738
+ namespaces_hash = {}
739
+
740
+ @pending_attrs.each do |attr_name, attr_value|
741
+ if attr_name.to_s.start_with?("xmlns")
742
+ # Namespace declaration
743
+ prefix = if attr_name.to_s == "xmlns"
744
+ nil
745
+ else
746
+ attr_name.to_s.sub(
747
+ "xmlns:", ""
748
+ )
749
+ end
750
+ namespaces_hash[prefix] = attr_value
751
+ else
752
+ attr_hash[attr_name.to_s] = attr_value
753
+ end
754
+ end
755
+
756
+ @handler.on_start_element(@pending_element_name, attr_hash,
757
+ namespaces_hash)
758
+
759
+ # Clear for next element
760
+ @pending_attrs = {}
761
+ @pending_element_name = nil
762
+ end
763
+ end
470
764
  end
471
765
  end