moxml 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  80. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  81. data/lib/moxml/adapter/headed_ox.rb +161 -0
  82. data/lib/moxml/adapter/libxml.rb +1548 -0
  83. data/lib/moxml/adapter/nokogiri.rb +121 -9
  84. data/lib/moxml/adapter/oga.rb +123 -12
  85. data/lib/moxml/adapter/ox.rb +282 -26
  86. data/lib/moxml/adapter/rexml.rb +127 -20
  87. data/lib/moxml/adapter.rb +21 -4
  88. data/lib/moxml/attribute.rb +6 -0
  89. data/lib/moxml/builder.rb +40 -4
  90. data/lib/moxml/config.rb +8 -3
  91. data/lib/moxml/context.rb +39 -1
  92. data/lib/moxml/doctype.rb +13 -1
  93. data/lib/moxml/document.rb +39 -6
  94. data/lib/moxml/document_builder.rb +27 -5
  95. data/lib/moxml/element.rb +71 -2
  96. data/lib/moxml/error.rb +175 -6
  97. data/lib/moxml/node.rb +94 -3
  98. data/lib/moxml/node_set.rb +34 -0
  99. data/lib/moxml/sax/block_handler.rb +194 -0
  100. data/lib/moxml/sax/element_handler.rb +124 -0
  101. data/lib/moxml/sax/handler.rb +113 -0
  102. data/lib/moxml/sax.rb +31 -0
  103. data/lib/moxml/version.rb +1 -1
  104. data/lib/moxml/xml_utils/encoder.rb +4 -4
  105. data/lib/moxml/xml_utils.rb +7 -4
  106. data/lib/moxml/xpath/ast/node.rb +159 -0
  107. data/lib/moxml/xpath/cache.rb +91 -0
  108. data/lib/moxml/xpath/compiler.rb +1768 -0
  109. data/lib/moxml/xpath/context.rb +26 -0
  110. data/lib/moxml/xpath/conversion.rb +124 -0
  111. data/lib/moxml/xpath/engine.rb +52 -0
  112. data/lib/moxml/xpath/errors.rb +101 -0
  113. data/lib/moxml/xpath/lexer.rb +304 -0
  114. data/lib/moxml/xpath/parser.rb +485 -0
  115. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  116. data/lib/moxml/xpath/ruby/node.rb +193 -0
  117. data/lib/moxml/xpath.rb +37 -0
  118. data/lib/moxml.rb +5 -2
  119. data/moxml.gemspec +3 -1
  120. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  121. data/spec/consistency/README.md +77 -0
  122. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  123. data/spec/examples/README.md +75 -0
  124. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  125. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  126. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  127. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  128. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  129. data/spec/integration/README.md +71 -0
  130. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  131. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  132. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  133. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  134. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  135. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  136. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  137. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  138. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  139. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  140. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  141. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  142. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  143. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  144. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  145. data/spec/moxml/README.md +41 -0
  146. data/spec/moxml/adapter/.gitkeep +0 -0
  147. data/spec/moxml/adapter/README.md +61 -0
  148. data/spec/moxml/adapter/base_spec.rb +27 -0
  149. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  150. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  151. data/spec/moxml/adapter/ox_spec.rb +9 -8
  152. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  153. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  154. data/spec/moxml/adapter_spec.rb +16 -0
  155. data/spec/moxml/attribute_spec.rb +30 -0
  156. data/spec/moxml/builder_spec.rb +33 -0
  157. data/spec/moxml/cdata_spec.rb +31 -0
  158. data/spec/moxml/comment_spec.rb +31 -0
  159. data/spec/moxml/config_spec.rb +3 -3
  160. data/spec/moxml/context_spec.rb +28 -0
  161. data/spec/moxml/declaration_spec.rb +36 -0
  162. data/spec/moxml/doctype_spec.rb +33 -0
  163. data/spec/moxml/document_builder_spec.rb +30 -0
  164. data/spec/moxml/document_spec.rb +105 -0
  165. data/spec/moxml/element_spec.rb +143 -0
  166. data/spec/moxml/error_spec.rb +266 -22
  167. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  168. data/spec/moxml/namespace_spec.rb +32 -0
  169. data/spec/moxml/node_set_spec.rb +39 -0
  170. data/spec/moxml/node_spec.rb +37 -0
  171. data/spec/moxml/processing_instruction_spec.rb +34 -0
  172. data/spec/moxml/sax_spec.rb +1067 -0
  173. data/spec/moxml/text_spec.rb +31 -0
  174. data/spec/moxml/version_spec.rb +14 -0
  175. data/spec/moxml/xml_utils/.gitkeep +0 -0
  176. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  177. data/spec/moxml/xml_utils_spec.rb +49 -0
  178. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  179. data/spec/moxml/xpath/axes_spec.rb +296 -0
  180. data/spec/moxml/xpath/cache_spec.rb +358 -0
  181. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  182. data/spec/moxml/xpath/context_spec.rb +210 -0
  183. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  184. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  185. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  186. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  187. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  188. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  189. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  190. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  191. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  192. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  193. data/spec/moxml/xpath/parser_spec.rb +364 -0
  194. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  195. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  196. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  197. data/spec/moxml/xpath_spec.rb +77 -0
  198. data/spec/performance/README.md +83 -0
  199. data/spec/performance/benchmark_spec.rb +64 -0
  200. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  201. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  202. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  203. data/spec/spec_helper.rb +58 -1
  204. data/spec/support/xml_matchers.rb +1 -1
  205. metadata +176 -34
  206. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  207. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  208. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  209. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  210. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -2,12 +2,13 @@
2
2
 
3
3
  require_relative "base"
4
4
  require "ox"
5
+ require "stringio"
5
6
  require_relative "customized_ox/text"
6
7
  require_relative "customized_ox/attribute"
7
8
  require_relative "customized_ox/namespace"
8
9
 
9
10
  # insert :parent methods to all Ox classes inherit the Node class
10
- ::Ox::Node.attr_accessor :parent
11
+ Ox::Node.attr_accessor :parent
11
12
  module Moxml
12
13
  module Adapter
13
14
  class Ox < Base
@@ -29,12 +30,37 @@ module Moxml
29
30
  doc
30
31
  end
31
32
  rescue ::Ox::ParseError => e
32
- raise Moxml::ParseError, e.message
33
+ raise Moxml::ParseError.new(
34
+ e.message,
35
+ source: xml.is_a?(String) ? xml[0..100] : nil,
36
+ )
33
37
  end
34
38
 
35
39
  DocumentBuilder.new(Context.new(:ox)).build(native_doc)
36
40
  end
37
41
 
42
+ # SAX parsing implementation for Ox
43
+ #
44
+ # @param xml [String, IO] XML to parse
45
+ # @param handler [Moxml::SAX::Handler] Moxml SAX handler
46
+ # @return [void]
47
+ def sax_parse(xml, handler)
48
+ # Create bridge that translates Ox SAX to Moxml SAX
49
+ bridge = OxSAXBridge.new(handler)
50
+
51
+ # Parse using Ox's SAX parser
52
+ xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
53
+
54
+ begin
55
+ ::Ox.sax_parse(bridge, StringIO.new(xml_string))
56
+ # Ox doesn't automatically call end_document, so we do it manually
57
+ bridge.end_document
58
+ rescue ::Ox::ParseError => e
59
+ error = Moxml::ParseError.new(e.message)
60
+ handler.on_error(error)
61
+ end
62
+ end
63
+
38
64
  def create_document(native_doc = nil)
39
65
  attrs = native_doc&.attributes || {}
40
66
  ::Ox::Document.new(**attrs)
@@ -60,7 +86,7 @@ module Moxml
60
86
 
61
87
  def create_native_doctype(name, external_id, system_id)
62
88
  ::Ox::DocType.new(
63
- "#{name} PUBLIC \"#{external_id}\" \"#{system_id}\""
89
+ "#{name} PUBLIC \"#{external_id}\" \"#{system_id}\"",
64
90
  )
65
91
  end
66
92
 
@@ -87,7 +113,8 @@ module Moxml
87
113
  end
88
114
 
89
115
  def create_native_namespace(element, prefix, uri)
90
- ns = ::Moxml::Adapter::CustomizedOx::Namespace.new(prefix, uri, element)
116
+ ns = ::Moxml::Adapter::CustomizedOx::Namespace.new(prefix, uri,
117
+ element)
91
118
  set_attribute(element, ns.expanded_prefix, uri)
92
119
  ns
93
120
  end
@@ -97,8 +124,12 @@ module Moxml
97
124
 
98
125
  prefix = ns.prefix
99
126
  # attributes don't have attributes but can have a namespace prefix
100
- set_attribute(element, ns.expanded_prefix, ns.uri) if element.respond_to?(:attributes)
101
- element.name = [prefix, element.name.delete_prefix("xmlns:")].compact.join(":")
127
+ if element.respond_to?(:attributes)
128
+ set_attribute(element, ns.expanded_prefix,
129
+ ns.uri)
130
+ end
131
+ element.name = [prefix,
132
+ element.name.delete_prefix("xmlns:")].compact.join(":")
102
133
  namespace(element)
103
134
  end
104
135
 
@@ -151,9 +182,14 @@ module Moxml
151
182
  end
152
183
 
153
184
  def node_name(node)
154
- node.value
155
- rescue StandardError
156
- node.name
185
+ name = begin
186
+ node.value
187
+ rescue StandardError
188
+ node.name
189
+ end
190
+
191
+ # Strip namespace prefix if present
192
+ name.to_s.split(":", 2).last
157
193
  end
158
194
 
159
195
  def set_node_name(node, name)
@@ -172,8 +208,12 @@ module Moxml
172
208
  new_node =
173
209
  case node
174
210
  # it can be either attribute or namespace
175
- when Array then ::Moxml::Adapter::CustomizedOx::Attribute.new(node.first, node.last)
176
- when Hash then ::Moxml::Adapter::CustomizedOx::Attribute.new(node.keys.first, node.values.first)
211
+ when Array then ::Moxml::Adapter::CustomizedOx::Attribute.new(
212
+ node.first, node.last
213
+ )
214
+ when Hash then ::Moxml::Adapter::CustomizedOx::Attribute.new(
215
+ node.keys.first, node.values.first
216
+ )
177
217
  when String then ::Moxml::Adapter::CustomizedOx::Text.new(node)
178
218
  else node
179
219
  end
@@ -186,7 +226,8 @@ module Moxml
186
226
  def unpatch_node(node)
187
227
  case node
188
228
  # it can be either attribute or namespace
189
- when ::Moxml::Adapter::CustomizedOx::Attribute then [node.name, node.value]
229
+ when ::Moxml::Adapter::CustomizedOx::Attribute then [node.name,
230
+ node.value]
190
231
  # when ::Moxml::Adapter::CustomizedOx::Attribute then { node.name => node.value }
191
232
  when ::Moxml::Adapter::CustomizedOx::Text then node.value
192
233
  else node
@@ -230,15 +271,18 @@ module Moxml
230
271
  end
231
272
 
232
273
  def attributes(element)
233
- return [] unless element.respond_to?(:attributes) && element.attributes
274
+ unless element.respond_to?(:attributes) && element.attributes
275
+ return []
276
+ end
234
277
 
235
- element.attributes.map do |name, value|
236
- next if name.start_with?("xmlns")
278
+ element.attributes.filter_map do |name, value|
279
+ next if name.to_s.start_with?("xmlns")
237
280
 
281
+ # Ensure value is passed correctly - Ox stores with symbol keys
238
282
  ::Moxml::Adapter::CustomizedOx::Attribute.new(
239
- name, value, element
283
+ name.to_s, value, element
240
284
  )
241
- end.compact
285
+ end
242
286
  end
243
287
 
244
288
  def attribute_element(attribute)
@@ -280,10 +324,15 @@ module Moxml
280
324
 
281
325
  def get_attribute(element, name)
282
326
  return unless element.respond_to?(:attributes) && element.attributes
283
- return unless element.attributes.key?(name.to_s) || element.attributes.key?(name.to_s.to_sym)
327
+ unless element.attributes.key?(name.to_s) || element.attributes.key?(name.to_s.to_sym)
328
+ return
329
+ end
330
+
331
+ # Ox stores attributes with symbol keys, so try both string and symbol
332
+ value = element.attributes[name.to_s] || element.attributes[name.to_s.to_sym]
284
333
 
285
334
  ::Moxml::Adapter::CustomizedOx::Attribute.new(
286
- name.to_s, element.attributes[name], element
335
+ name.to_s, value, element
287
336
  )
288
337
  end
289
338
 
@@ -331,11 +380,13 @@ module Moxml
331
380
 
332
381
  return unless parent(node)
333
382
 
334
- parent(node).nodes.delete(node)
383
+ parent(node).nodes.delete(unpatch_node(node))
335
384
  end
336
385
 
337
386
  def replace(node, new_node)
338
- return node.replace(new_node) if node.is_a?(String) && new_node.is_a?(String)
387
+ if node.is_a?(String) && new_node.is_a?(String)
388
+ return node.replace(new_node)
389
+ end
339
390
  # There are other cases:
340
391
  # when node is a String and new_node isn't
341
392
  # when node isn't a String, and new_node is a String
@@ -357,10 +408,14 @@ module Moxml
357
408
  end
358
409
 
359
410
  def text_content(node)
411
+ return "" if node.nil?
412
+
360
413
  case node
361
414
  when String then node.to_s
362
415
  when ::Moxml::Adapter::CustomizedOx::Text then node.value
363
416
  else
417
+ return "" unless node.respond_to?(:nodes)
418
+
364
419
  node.nodes.map do |n|
365
420
  text_content(n)
366
421
  end.join
@@ -428,9 +483,38 @@ module Moxml
428
483
  end.values
429
484
  end
430
485
 
431
- def xpath(node, expression, _namespaces = {})
432
- # locate has a different syntax
433
- node.locate(expression)
486
+ def xpath(node, expression, namespaces = {})
487
+ # Translate common XPath patterns to Ox locate() syntax
488
+ locate_expr = translate_xpath_to_locate(expression, namespaces)
489
+
490
+ # Ox's locate() works differently on documents vs elements
491
+ # For relative descendant searches on elements, we need special handling
492
+ if expression.start_with?(".//") && node.is_a?(::Ox::Element)
493
+ # Manually search descendants for relative paths from elements
494
+ element_name = locate_expr.sub("?/", "")
495
+ results = []
496
+ traverse(node) do |n|
497
+ next unless n.is_a?(::Ox::Element)
498
+
499
+ results << n if n.name == element_name || element_name.empty?
500
+ end
501
+ return results.map do |n|
502
+ patch_node(n, find_parent_in_tree(n, node))
503
+ end
504
+ end
505
+
506
+ # Use Ox's locate method for other cases
507
+ results = node.locate(locate_expr)
508
+
509
+ # Wrap results and set their parents by finding them in the tree
510
+ results.map { |n| patch_node(n, find_parent_in_tree(n, node)) }
511
+ rescue StandardError => e
512
+ raise Moxml::XPathError.new(
513
+ "XPath translation failed: #{e.message}",
514
+ expression: expression,
515
+ adapter: "Ox",
516
+ node: node,
517
+ )
434
518
  end
435
519
 
436
520
  def at_xpath(node, expression, namespaces = {})
@@ -441,7 +525,11 @@ module Moxml
441
525
  output = ""
442
526
  if node.is_a?(::Ox::Document)
443
527
  # add declaration
444
- decl = create_native_declaration(node[:version], node[:encoding], node[:standalone])
528
+ version = node[:version] || "1.0"
529
+ encoding = options[:encoding] || node[:encoding]
530
+ standalone = node[:standalone]
531
+
532
+ decl = create_native_declaration(version, encoding, standalone)
445
533
  output = ::Ox.dump(::Ox::Document.new << decl).strip
446
534
  end
447
535
 
@@ -450,13 +538,76 @@ module Moxml
450
538
  # with_xml: true,
451
539
  with_instructions: true,
452
540
  encoding: options[:encoding],
453
- no_empty: options[:expand_empty]
541
+ no_empty: options[:expand_empty],
454
542
  }
455
543
  output + ::Ox.dump(node, ox_options)
456
544
  end
457
545
 
458
546
  private
459
547
 
548
+ # Translate a subset of XPath to Ox locate() syntax
549
+ # Supports: //element, /path/to/element, .//element, element[@attr]
550
+ # Note: Ox locate() doesn't support namespace prefixes in the path
551
+ def translate_xpath_to_locate(xpath, namespaces = {})
552
+ expr = xpath.dup
553
+
554
+ # Strip namespace prefixes from element names
555
+ # XPath: //ns:element → locate: element
556
+ if namespaces && !namespaces.empty?
557
+ namespaces.each_key do |prefix|
558
+ expr = expr.gsub("/#{prefix}:", "/")
559
+ expr = expr.gsub("/*#{prefix}:", "/*")
560
+ expr = expr.gsub("//*#{prefix}:", "//")
561
+ expr = expr.gsub("//#{prefix}:", "//")
562
+ expr = expr.gsub("///#{prefix}:", "///")
563
+ end
564
+ end
565
+
566
+ # Remove any remaining namespace prefixes
567
+ # Use possessive quantifier to prevent ReDoS
568
+ expr = expr.gsub(/[a-zA-Z_][\w-]*+:/, "")
569
+
570
+ # Remove attribute predicates for now - we'll filter manually
571
+ # Save the attribute name if present
572
+ expr = expr.gsub(/\[@(\w+)\]/, "")
573
+
574
+ # XPath: //element → locate: ?/element (any depth)
575
+ # Note: In Ox, ?/ means "any path"
576
+ expr = expr.sub(%r{^//}, "?/") if expr.start_with?("//")
577
+
578
+ # XPath: .//element → locate: ?/element (relative any depth)
579
+ # For relative paths from an element, we still use ?/ which searches
580
+ # descendants
581
+ expr = expr.sub(%r{^\.//}, "?/") if expr.start_with?(".//")
582
+
583
+ # XPath: /root/child → locate: root/child (absolute path)
584
+ # Remove leading / for Ox
585
+ expr = expr.sub(%r{^/}, "")
586
+
587
+ # XPath: ./element → locate: element (direct child, just remove ./)
588
+ expr.sub(%r{^\./}, "")
589
+ end
590
+
591
+ # Find the actual parent of a node by searching the tree
592
+ def find_parent_in_tree(target_node, search_root)
593
+ # Start from the document root if we have a document
594
+ root = search_root.is_a?(::Ox::Document) ? search_root : document(search_root)
595
+
596
+ result = nil
597
+ traverse(root) do |node|
598
+ next unless node.respond_to?(:nodes)
599
+
600
+ node.nodes&.each do |child|
601
+ if child.equal?(target_node)
602
+ result = node
603
+ break
604
+ end
605
+ end
606
+ break if result
607
+ end
608
+ result
609
+ end
610
+
460
611
  def traverse(node, &block)
461
612
  return unless node
462
613
 
@@ -467,5 +618,110 @@ module Moxml
467
618
  end
468
619
  end
469
620
  end
621
+
622
+ # Bridge between Ox SAX and Moxml SAX
623
+ #
624
+ # Translates Ox::Sax events to Moxml::SAX::Handler events.
625
+ # Ox has a unique SAX pattern where attributes are delivered AFTER start_element.
626
+ #
627
+ # @private
628
+ class OxSAXBridge
629
+ def initialize(handler)
630
+ @handler = handler
631
+ @pending_attrs = {}
632
+ @pending_element_name = nil
633
+ @element_started = false
634
+ @document_started = false
635
+ end
636
+
637
+ # Ox delivers attributes AFTER start_element
638
+ def attr(name, value)
639
+ @pending_attrs[name] = value
640
+ end
641
+
642
+ # Called when element starts (but attributes come AFTER this)
643
+ def start_element(name)
644
+ # If we had a previous element waiting, we need to finalize it first
645
+ if @pending_element_name
646
+ finalize_pending_element
647
+ end
648
+
649
+ # Store this element name (convert symbol to string)
650
+ @pending_element_name = name.to_s
651
+ @element_started = true
652
+
653
+ # Call on_start_document if this is the first element
654
+ unless @document_started
655
+ @handler.on_start_document
656
+ @document_started = true
657
+ end
658
+ end
659
+
660
+ def end_element(name)
661
+ # Finalize any pending element before ending
662
+ if @pending_element_name
663
+ finalize_pending_element
664
+ end
665
+
666
+ # Convert symbol to string
667
+ @handler.on_end_element(name.to_s)
668
+ end
669
+
670
+ # Ox only has text() - no separate CDATA, comment, or PI events
671
+ def text(string)
672
+ # Finalize any pending element before text
673
+ if @pending_element_name
674
+ finalize_pending_element
675
+ end
676
+
677
+ @handler.on_characters(string)
678
+ end
679
+
680
+ def error(message, line, column)
681
+ error = Moxml::ParseError.new(message, line: line, column: column)
682
+ @handler.on_error(error)
683
+ end
684
+
685
+ # Called at end of parsing (not automatically by Ox)
686
+ def end_document
687
+ # Finalize any pending element
688
+ if @pending_element_name
689
+ finalize_pending_element
690
+ end
691
+
692
+ @handler.on_end_document if @document_started
693
+ end
694
+
695
+ private
696
+
697
+ def finalize_pending_element
698
+ # Separate namespace declarations from regular attributes
699
+ attr_hash = {}
700
+ namespaces_hash = {}
701
+
702
+ @pending_attrs.each do |attr_name, attr_value|
703
+ if attr_name.to_s.start_with?("xmlns")
704
+ # Namespace declaration
705
+ prefix = if attr_name.to_s == "xmlns"
706
+ nil
707
+ else
708
+ attr_name.to_s.sub(
709
+ "xmlns:", ""
710
+ )
711
+ end
712
+ namespaces_hash[prefix] = attr_value
713
+ else
714
+ attr_hash[attr_name.to_s] = attr_value
715
+ end
716
+ end
717
+
718
+ @handler.on_start_element(@pending_element_name, attr_hash,
719
+ namespaces_hash)
720
+
721
+ # Clear for next element
722
+ @pending_attrs = {}
723
+ @pending_element_name = nil
724
+ end
725
+ end
470
726
  end
471
727
  end
@@ -14,13 +14,41 @@ module Moxml
14
14
  native_doc = begin
15
15
  ::REXML::Document.new(xml)
16
16
  rescue ::REXML::ParseException => e
17
- raise Moxml::ParseError.new(e.message, line: e.line) if options[:strict]
18
-
17
+ if options[:strict]
18
+ raise Moxml::ParseError.new(
19
+ e.message,
20
+ line: e.line,
21
+ source: xml.is_a?(String) ? xml[0..100] : nil,
22
+ )
23
+ end
19
24
  create_document
20
25
  end
21
26
  DocumentBuilder.new(Context.new(:rexml)).build(native_doc)
22
27
  end
23
28
 
29
+ # SAX parsing implementation for REXML
30
+ #
31
+ # @param xml [String, IO] XML to parse
32
+ # @param handler [Moxml::SAX::Handler] Moxml SAX handler
33
+ # @return [void]
34
+ def sax_parse(xml, handler)
35
+ require "rexml/parsers/sax2parser"
36
+ require "rexml/source"
37
+ require "stringio"
38
+
39
+ bridge = REXMLSAX2Bridge.new(handler)
40
+
41
+ xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
42
+ source = ::REXML::IOSource.new(StringIO.new(xml_string))
43
+
44
+ parser = ::REXML::Parsers::SAX2Parser.new(source)
45
+ parser.listen(bridge)
46
+ parser.parse
47
+ rescue ::REXML::ParseException => e
48
+ error = Moxml::ParseError.new(e.message, line: e.line)
49
+ handler.on_error(error)
50
+ end
51
+
24
52
  def create_document(_native_doc = nil)
25
53
  ::REXML::Document.new
26
54
  end
@@ -55,10 +83,10 @@ module Moxml
55
83
 
56
84
  parts = [name]
57
85
  if external_id
58
- parts.concat(["PUBLIC", %("#{external_id}")])
86
+ parts.push("PUBLIC", %("#{external_id}"))
59
87
  parts << %("#{system_id}") if system_id
60
88
  elsif system_id
61
- parts.concat(["SYSTEM", %("#{system_id}")])
89
+ parts.push("SYSTEM", %("#{system_id}"))
62
90
  end
63
91
 
64
92
  ::REXML::DocType.new(parts.join(" "))
@@ -189,7 +217,7 @@ module Moxml
189
217
 
190
218
  # Only return non-namespace attributes
191
219
  element.attributes.values
192
- .reject { |attr| attr.prefix.to_s.start_with?("xmlns") }
220
+ .reject { |attr| attr.prefix.to_s.start_with?("xmlns") }
193
221
  end
194
222
 
195
223
  def attribute_element(attribute)
@@ -197,8 +225,8 @@ module Moxml
197
225
  end
198
226
 
199
227
  def set_attribute(element, name, value)
200
- element.attributes[name&.to_s] = value
201
- ::REXML::Attribute.new(name&.to_s, value.to_s, element)
228
+ element.attributes[name&.to_s] = value&.to_s
229
+ element.attributes.get_attribute(name&.to_s)
202
230
  end
203
231
 
204
232
  def set_attribute_name(attribute, name)
@@ -327,8 +355,8 @@ module Moxml
327
355
  def inner_text(node)
328
356
  # Get direct text children only, filter duplicates
329
357
  text_children = node.children
330
- .select { _1.is_a?(::REXML::Text) }
331
- .uniq(&:object_id)
358
+ .select { _1.is_a?(::REXML::Text) }
359
+ .uniq(&:object_id)
332
360
  text_children.map(&:value).join
333
361
  end
334
362
 
@@ -353,7 +381,10 @@ module Moxml
353
381
  # add a namespace prefix to the element name AND a namespace definition
354
382
  def set_namespace(element, ns)
355
383
  prefix = ns.name.to_s.empty? ? "xmlns" : ns.name.to_s
356
- element.add_namespace(prefix, ns.value) if element.respond_to?(:add_namespace)
384
+ if element.respond_to?(:add_namespace)
385
+ element.add_namespace(prefix,
386
+ ns.value)
387
+ end
357
388
  element.name = "#{prefix}:#{element.name}"
358
389
  owner = element.is_a?(::REXML::Attribute) ? element.element : element
359
390
  ::REXML::Attribute.new(prefix, ns.value, owner)
@@ -405,7 +436,12 @@ module Moxml
405
436
  def xpath(node, expression, _namespaces = {})
406
437
  node.get_elements(expression).to_a
407
438
  rescue ::REXML::ParseException => e
408
- raise Moxml::XPathError, e.message
439
+ raise Moxml::XPathError.new(
440
+ e.message,
441
+ expression: expression,
442
+ adapter: "REXML",
443
+ node: node,
444
+ )
409
445
  end
410
446
 
411
447
  def at_xpath(node, expression, namespaces = {})
@@ -414,11 +450,12 @@ module Moxml
414
450
  end
415
451
 
416
452
  def serialize(node, options = {})
417
- output = String.new
453
+ output = +""
418
454
 
419
455
  if node.is_a?(::REXML::Document)
420
456
  # Always include XML declaration
421
- decl = node.xml_decl || ::REXML::XMLDecl.new("1.0", options[:encoding] || "UTF-8")
457
+ decl = node.xml_decl || ::REXML::XMLDecl.new("1.0",
458
+ options[:encoding] || "UTF-8")
422
459
  decl.encoding = options[:encoding] if options[:encoding]
423
460
  output << "<?xml"
424
461
  output << %( version="#{decl.version}") if decl.version
@@ -427,20 +464,22 @@ module Moxml
427
464
  output << "?>"
428
465
  # output << "\n"
429
466
 
430
- if node.doctype
431
- node.doctype.write(output)
432
- # output << "\n"
433
- end
467
+ # output << "\n"
468
+ node.doctype&.write(output)
434
469
 
435
470
  # Write processing instructions
436
471
  node.children.each do |child|
437
- next unless [::REXML::Instruction, ::REXML::CData, ::REXML::Comment, ::REXML::Text].include?(child.class)
472
+ next unless [::REXML::Instruction, ::REXML::CData,
473
+ ::REXML::Comment, ::REXML::Text].include?(child.class)
438
474
 
439
475
  write_with_formatter(child, output, options[:indent] || 2)
440
476
  # output << "\n"
441
477
  end
442
478
 
443
- write_with_formatter(node.root, output, options[:indent] || 2) if node.root
479
+ if node.root
480
+ write_with_formatter(node.root, output,
481
+ options[:indent] || 2)
482
+ end
444
483
  else
445
484
  write_with_formatter(node, output, options[:indent] || 2)
446
485
  end
@@ -452,11 +491,79 @@ module Moxml
452
491
 
453
492
  def write_with_formatter(node, output, indent = 2)
454
493
  formatter = ::Moxml::Adapter::CustomizedRexml::Formatter.new(
455
- indentation: indent, self_close_empty: false
494
+ indentation: indent, self_close_empty: false,
456
495
  )
457
496
  formatter.write(node, output)
458
497
  end
459
498
  end
460
499
  end
500
+
501
+ # Bridge between REXML SAX2 and Moxml SAX
502
+ #
503
+ # Translates REXML::SAX2Parser events to Moxml::SAX::Handler events
504
+ #
505
+ # @private
506
+ class REXMLSAX2Bridge
507
+ def initialize(handler)
508
+ @handler = handler
509
+ end
510
+
511
+ # REXML splits element name into uri/localname/qname
512
+ def start_element(_uri, _localname, qname, attributes)
513
+ # Convert REXML attributes to hash
514
+ attr_hash = {}
515
+ ns_hash = {}
516
+
517
+ attributes.each do |name, value|
518
+ if name.to_s.start_with?("xmlns")
519
+ # Namespace declaration
520
+ prefix = name.to_s == "xmlns" ? nil : name.to_s.sub("xmlns:", "")
521
+ ns_hash[prefix] = value
522
+ else
523
+ attr_hash[name.to_s] = value
524
+ end
525
+ end
526
+
527
+ # Use qname (qualified name) for element name
528
+ @handler.on_start_element(qname, attr_hash, ns_hash)
529
+ end
530
+
531
+ def end_element(_uri, _localname, qname)
532
+ @handler.on_end_element(qname)
533
+ end
534
+
535
+ def characters(text)
536
+ @handler.on_characters(text)
537
+ end
538
+
539
+ def cdata(content)
540
+ @handler.on_cdata(content)
541
+ end
542
+
543
+ def comment(text)
544
+ @handler.on_comment(text)
545
+ end
546
+
547
+ def processing_instruction(target, data)
548
+ @handler.on_processing_instruction(target, data || "")
549
+ end
550
+
551
+ def start_document
552
+ @handler.on_start_document
553
+ end
554
+
555
+ def end_document
556
+ @handler.on_end_document
557
+ end
558
+
559
+ # REXML calls these but we don't need to handle them
560
+ def xmldecl(version, encoding, standalone)
561
+ # XML declaration - we don't need to do anything
562
+ end
563
+
564
+ def progress(position)
565
+ # Progress callback - we don't need to do anything
566
+ end
567
+ end
461
568
  end
462
569
  end