moxml 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +224 -43
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +51 -0
  21. data/docs/_guides/modifying-xml.adoc +292 -0
  22. data/docs/_guides/parsing-xml.adoc +230 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_guides/xml-declaration.adoc +450 -0
  26. data/docs/_pages/adapter-compatibility.adoc +369 -0
  27. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  28. data/docs/_pages/adapters/index.adoc +97 -0
  29. data/docs/_pages/adapters/libxml.adoc +285 -0
  30. data/docs/_pages/adapters/nokogiri.adoc +251 -0
  31. data/docs/_pages/adapters/oga.adoc +291 -0
  32. data/docs/_pages/adapters/ox.adoc +56 -0
  33. data/docs/_pages/adapters/rexml.adoc +292 -0
  34. data/docs/_pages/best-practices.adoc +429 -0
  35. data/docs/_pages/compatibility.adoc +467 -0
  36. data/docs/_pages/configuration.adoc +250 -0
  37. data/docs/_pages/error-handling.adoc +349 -0
  38. data/docs/_pages/headed-ox-limitations.adoc +574 -0
  39. data/docs/_pages/headed-ox.adoc +1025 -0
  40. data/docs/_pages/index.adoc +35 -0
  41. data/docs/_pages/installation.adoc +140 -0
  42. data/docs/_pages/node-api-reference.adoc +49 -0
  43. data/docs/_pages/performance.adoc +35 -0
  44. data/docs/_pages/quick-start.adoc +243 -0
  45. data/docs/_pages/thread-safety.adoc +28 -0
  46. data/docs/_references/document-api.adoc +407 -0
  47. data/docs/_references/index.adoc +48 -0
  48. data/docs/_tutorials/basic-usage.adoc +267 -0
  49. data/docs/_tutorials/builder-pattern.adoc +342 -0
  50. data/docs/_tutorials/index.adoc +33 -0
  51. data/docs/_tutorials/namespace-handling.adoc +324 -0
  52. data/docs/_tutorials/xpath-queries.adoc +358 -0
  53. data/docs/index.adoc +122 -0
  54. data/examples/README.md +124 -0
  55. data/examples/api_client/README.md +424 -0
  56. data/examples/api_client/api_client.rb +394 -0
  57. data/examples/api_client/example_response.xml +48 -0
  58. data/examples/headed_ox_example/README.md +90 -0
  59. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  60. data/examples/rss_parser/README.md +194 -0
  61. data/examples/rss_parser/example_feed.xml +93 -0
  62. data/examples/rss_parser/rss_parser.rb +189 -0
  63. data/examples/sax_parsing/README.md +50 -0
  64. data/examples/sax_parsing/data_extractor.rb +75 -0
  65. data/examples/sax_parsing/example.xml +21 -0
  66. data/examples/sax_parsing/large_file.rb +78 -0
  67. data/examples/sax_parsing/simple_parser.rb +55 -0
  68. data/examples/web_scraper/README.md +352 -0
  69. data/examples/web_scraper/example_page.html +201 -0
  70. data/examples/web_scraper/web_scraper.rb +312 -0
  71. data/lib/moxml/adapter/base.rb +107 -28
  72. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  73. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  74. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  75. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  76. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  77. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  78. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  79. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  80. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  81. data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
  82. data/lib/moxml/adapter/headed_ox.rb +161 -0
  83. data/lib/moxml/adapter/libxml.rb +1564 -0
  84. data/lib/moxml/adapter/nokogiri.rb +156 -9
  85. data/lib/moxml/adapter/oga.rb +190 -15
  86. data/lib/moxml/adapter/ox.rb +322 -28
  87. data/lib/moxml/adapter/rexml.rb +157 -28
  88. data/lib/moxml/adapter.rb +21 -4
  89. data/lib/moxml/attribute.rb +6 -0
  90. data/lib/moxml/builder.rb +40 -4
  91. data/lib/moxml/config.rb +8 -3
  92. data/lib/moxml/context.rb +57 -2
  93. data/lib/moxml/declaration.rb +9 -0
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +53 -6
  96. data/lib/moxml/document_builder.rb +34 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +155 -4
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1770 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_preservation_spec.rb +217 -0
  164. data/spec/moxml/declaration_spec.rb +36 -0
  165. data/spec/moxml/doctype_spec.rb +33 -0
  166. data/spec/moxml/document_builder_spec.rb +30 -0
  167. data/spec/moxml/document_spec.rb +105 -0
  168. data/spec/moxml/element_spec.rb +143 -0
  169. data/spec/moxml/error_spec.rb +266 -22
  170. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  171. data/spec/moxml/namespace_spec.rb +32 -0
  172. data/spec/moxml/node_set_spec.rb +39 -0
  173. data/spec/moxml/node_spec.rb +37 -0
  174. data/spec/moxml/processing_instruction_spec.rb +34 -0
  175. data/spec/moxml/sax_spec.rb +1067 -0
  176. data/spec/moxml/text_spec.rb +31 -0
  177. data/spec/moxml/version_spec.rb +14 -0
  178. data/spec/moxml/xml_utils/.gitkeep +0 -0
  179. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  180. data/spec/moxml/xml_utils_spec.rb +49 -0
  181. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  182. data/spec/moxml/xpath/axes_spec.rb +296 -0
  183. data/spec/moxml/xpath/cache_spec.rb +358 -0
  184. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  185. data/spec/moxml/xpath/context_spec.rb +210 -0
  186. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  187. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  188. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  189. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  190. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  191. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  192. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  193. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  194. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  195. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  196. data/spec/moxml/xpath/parser_spec.rb +364 -0
  197. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  198. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  199. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  200. data/spec/moxml/xpath_spec.rb +77 -0
  201. data/spec/performance/README.md +83 -0
  202. data/spec/performance/benchmark_spec.rb +64 -0
  203. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
  204. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  205. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  206. data/spec/spec_helper.rb +58 -1
  207. data/spec/support/xml_matchers.rb +1 -1
  208. metadata +178 -34
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -25,12 +25,37 @@ module Moxml
25
25
  end
26
26
  end
27
27
  rescue ::Nokogiri::XML::SyntaxError => e
28
- raise Moxml::ParseError.new(e.message, line: e.line, column: e.column)
28
+ raise Moxml::ParseError.new(e.message, line: e.line,
29
+ column: e.column)
29
30
  end
30
31
 
31
32
  DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
32
33
  end
33
34
 
35
+ # SAX parsing implementation for Nokogiri
36
+ #
37
+ # @param xml [String, IO] XML to parse
38
+ # @param handler [Moxml::SAX::Handler] Moxml SAX handler
39
+ # @return [void]
40
+ def sax_parse(xml, handler)
41
+ # Create bridge that translates Nokogiri SAX to Moxml SAX
42
+ bridge = NokogiriSAXBridge.new(handler)
43
+
44
+ # Create Nokogiri SAX parser
45
+ parser = ::Nokogiri::XML::SAX::Parser.new(bridge)
46
+
47
+ # Parse
48
+ if xml.respond_to?(:read)
49
+ parser.parse(xml)
50
+ else
51
+ parser.parse(xml.to_s)
52
+ end
53
+ rescue ::Nokogiri::XML::SyntaxError => e
54
+ error = Moxml::ParseError.new(e.message, line: e.line,
55
+ column: e.column)
56
+ handler.on_error(error)
57
+ end
58
+
34
59
  def create_document(_native_doc = nil)
35
60
  ::Nokogiri::XML::Document.new
36
61
  end
@@ -39,7 +64,7 @@ module Moxml
39
64
  # document fragments are weird and should be used with caution:
40
65
  # https://github.com/sparklemotion/nokogiri/issues/572
41
66
  ::Nokogiri::XML::DocumentFragment.new(
42
- ::Nokogiri::XML::Document.new
67
+ ::Nokogiri::XML::Document.new,
43
68
  )
44
69
  end
45
70
 
@@ -75,7 +100,7 @@ module Moxml
75
100
  ::Nokogiri::XML::ProcessingInstruction.new(
76
101
  create_document,
77
102
  "xml",
78
- build_declaration_attrs(version, encoding, standalone)
103
+ build_declaration_attrs(version, encoding, standalone),
79
104
  )
80
105
  end
81
106
 
@@ -196,6 +221,23 @@ module Moxml
196
221
  end
197
222
 
198
223
  def add_child(element, child)
224
+ # Special handling for declarations on Nokogiri documents
225
+ if element.is_a?(::Nokogiri::XML::Document) &&
226
+ child.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
227
+ child.name == "xml"
228
+ # Set document's xml_decl property
229
+ version = declaration_attribute(child, "version") || "1.0"
230
+ encoding = declaration_attribute(child, "encoding")
231
+ standalone = declaration_attribute(child, "standalone")
232
+
233
+ # Nokogiri's xml_decl can only be set via instance variable
234
+ element.instance_variable_set(:@xml_decl, {
235
+ version: version,
236
+ encoding: encoding,
237
+ standalone: standalone,
238
+ }.compact)
239
+ end
240
+
199
241
  if node_type(child) == :doctype
200
242
  # avoid exceptions: cannot reparent Nokogiri::XML::DTD there
201
243
  element.create_internal_subset(
@@ -215,6 +257,14 @@ module Moxml
215
257
  end
216
258
 
217
259
  def remove(node)
260
+ # Special handling for declarations on Nokogiri documents
261
+ if node.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
262
+ node.name == "xml" &&
263
+ node.parent.is_a?(::Nokogiri::XML::Document)
264
+ # Clear document's xml_decl when removing declaration
265
+ node.parent.instance_variable_set(:@xml_decl, nil)
266
+ end
267
+
218
268
  node.remove
219
269
  end
220
270
 
@@ -274,27 +324,53 @@ module Moxml
274
324
  def xpath(node, expression, namespaces = nil)
275
325
  node.xpath(expression, namespaces).to_a
276
326
  rescue ::Nokogiri::XML::XPath::SyntaxError => e
277
- raise Moxml::XPathError, e.message
327
+ raise Moxml::XPathError.new(
328
+ e.message,
329
+ expression: expression,
330
+ adapter: "Nokogiri",
331
+ node: node,
332
+ )
278
333
  end
279
334
 
280
335
  def at_xpath(node, expression, namespaces = nil)
281
336
  node.at_xpath(expression, namespaces)
282
337
  rescue ::Nokogiri::XML::XPath::SyntaxError => e
283
- raise Moxml::XPathError, e.message
338
+ raise Moxml::XPathError.new(
339
+ e.message,
340
+ expression: expression,
341
+ adapter: "Nokogiri",
342
+ node: node,
343
+ )
284
344
  end
285
345
 
286
346
  def serialize(node, options = {})
287
347
  save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML
288
348
 
289
349
  # Don't force expand empty elements if they're really empty
290
- save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS if options[:expand_empty]
291
- save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT if options[:indent].to_i.positive?
292
- save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
350
+ if options[:expand_empty]
351
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
352
+ end
353
+ if options[:indent].to_i.positive?
354
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
355
+ end
356
+
357
+ # Handle declaration option
358
+ # Priority:
359
+ # 1. Explicit no_declaration option
360
+ # 2. Check Nokogiri's internal @xml_decl (when remove is called, this becomes nil)
361
+ if options.key?(:no_declaration)
362
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
363
+ elsif node.respond_to?(:instance_variable_get) &&
364
+ node.instance_variable_defined?(:@xml_decl)
365
+ # Nokogiri's internal state - if nil, declaration was removed
366
+ xml_decl = node.instance_variable_get(:@xml_decl)
367
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
368
+ end
293
369
 
294
370
  node.to_xml(
295
371
  indent: options[:indent],
296
372
  encoding: options[:encoding],
297
- save_with: save_options
373
+ save_with: save_options,
298
374
  )
299
375
  end
300
376
 
@@ -316,6 +392,77 @@ module Moxml
316
392
  end
317
393
  end
318
394
  end
395
+
396
+ # Bridge between Nokogiri SAX and Moxml SAX
397
+ #
398
+ # Translates Nokogiri::XML::SAX::Document events to Moxml::SAX::Handler events
399
+ #
400
+ # @private
401
+ class NokogiriSAXBridge < ::Nokogiri::XML::SAX::Document
402
+ def initialize(handler)
403
+ super()
404
+ @handler = handler
405
+ end
406
+
407
+ # Map Nokogiri events to Moxml events
408
+
409
+ def start_document
410
+ @handler.on_start_document
411
+ end
412
+
413
+ def end_document
414
+ @handler.on_end_document
415
+ end
416
+
417
+ def start_element(name, attributes = [])
418
+ # Convert Nokogiri attributes array to hash
419
+ attr_hash = {}
420
+ namespaces_hash = {}
421
+
422
+ attributes.each do |attr|
423
+ attr_name = attr[0]
424
+ attr_value = attr[1]
425
+
426
+ if attr_name.start_with?("xmlns")
427
+ # Namespace declaration
428
+ prefix = attr_name == "xmlns" ? nil : attr_name.sub("xmlns:", "")
429
+ namespaces_hash[prefix] = attr_value
430
+ else
431
+ attr_hash[attr_name] = attr_value
432
+ end
433
+ end
434
+
435
+ @handler.on_start_element(name, attr_hash, namespaces_hash)
436
+ end
437
+
438
+ def end_element(name)
439
+ @handler.on_end_element(name)
440
+ end
441
+
442
+ def characters(string)
443
+ @handler.on_characters(string)
444
+ end
445
+
446
+ def cdata_block(string)
447
+ @handler.on_cdata(string)
448
+ end
449
+
450
+ def comment(string)
451
+ @handler.on_comment(string)
452
+ end
453
+
454
+ def processing_instruction(target, data)
455
+ @handler.on_processing_instruction(target, data || "")
456
+ end
457
+
458
+ def error(string)
459
+ @handler.on_error(Moxml::ParseError.new(string))
460
+ end
461
+
462
+ def warning(string)
463
+ @handler.on_warning(string)
464
+ end
465
+ end
319
466
  end
320
467
  end
321
468
  end
@@ -10,7 +10,10 @@ module Moxml
10
10
  class Oga < Base
11
11
  class << self
12
12
  def set_root(doc, element)
13
- doc.children.clear # Clear any existing children
13
+ # Clear existing root element if any - Oga's NodeSet needs special handling
14
+ # We need to manually remove elements since NodeSet doesn't support clear or delete_if
15
+ elements_to_remove = doc.children.select { |child| child.is_a?(::Oga::XML::Element) }
16
+ elements_to_remove.each { |elem| doc.children.delete(elem) }
14
17
  doc.children << element
15
18
  end
16
19
 
@@ -18,12 +21,37 @@ module Moxml
18
21
  native_doc = begin
19
22
  ::Oga.parse_xml(xml, strict: options[:strict])
20
23
  rescue LL::ParserError => e
21
- raise Moxml::ParseError, e.message
24
+ raise Moxml::ParseError.new(
25
+ e.message,
26
+ source: xml.is_a?(String) ? xml[0..100] : nil,
27
+ )
22
28
  end
23
29
 
24
30
  DocumentBuilder.new(Context.new(:oga)).build(native_doc)
25
31
  end
26
32
 
33
+ # SAX parsing implementation for Oga
34
+ #
35
+ # @param xml [String, IO] XML to parse
36
+ # @param handler [Moxml::SAX::Handler] Moxml SAX handler
37
+ # @return [void]
38
+ def sax_parse(xml, handler)
39
+ bridge = OgaSAXBridge.new(handler)
40
+
41
+ xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
42
+
43
+ # Manually call start_document (Oga doesn't)
44
+ handler.on_start_document
45
+
46
+ ::Oga.sax_parse_xml(bridge, xml_string)
47
+
48
+ # Manually call end_document (Oga doesn't)
49
+ handler.on_end_document
50
+ rescue StandardError => e
51
+ error = Moxml::ParseError.new(e.message)
52
+ handler.on_error(error)
53
+ end
54
+
27
55
  def create_document(_native_doc = nil)
28
56
  ::Oga::XML::Document.new
29
57
  end
@@ -46,7 +74,7 @@ module Moxml
46
74
 
47
75
  def create_native_doctype(name, external_id, system_id)
48
76
  ::Oga::XML::Doctype.new(
49
- name: name, public_id: external_id, system_id: system_id, type: "PUBLIC"
77
+ name: name, public_id: external_id, system_id: system_id, type: "PUBLIC",
50
78
  )
51
79
  end
52
80
 
@@ -58,19 +86,23 @@ module Moxml
58
86
  attrs = {
59
87
  version: version,
60
88
  encoding: encoding,
61
- standalone: standalone
89
+ standalone: standalone,
62
90
  }.compact
63
91
  ::Moxml::Adapter::CustomizedOga::XmlDeclaration.new(attrs)
64
92
  end
65
93
 
66
94
  def declaration_attribute(declaration, attr_name)
67
- return unless ::Moxml::Declaration::ALLOWED_ATTRIBUTES.include?(attr_name.to_s)
95
+ unless ::Moxml::Declaration::ALLOWED_ATTRIBUTES.include?(attr_name.to_s)
96
+ return
97
+ end
68
98
 
69
99
  declaration.public_send(attr_name)
70
100
  end
71
101
 
72
102
  def set_declaration_attribute(declaration, attr_name, value)
73
- return unless ::Moxml::Declaration::ALLOWED_ATTRIBUTES.include?(attr_name.to_s)
103
+ unless ::Moxml::Declaration::ALLOWED_ATTRIBUTES.include?(attr_name.to_s)
104
+ return
105
+ end
74
106
 
75
107
  declaration.public_send("#{attr_name}=", value)
76
108
  end
@@ -80,7 +112,8 @@ module Moxml
80
112
  return ns unless ns.nil?
81
113
 
82
114
  # Oga creates an attribute and registers a namespace
83
- set_attribute(element, [::Oga::XML::Element::XMLNS_PREFIX, prefix].compact.join(":"), uri)
115
+ set_attribute(element,
116
+ [::Oga::XML::Element::XMLNS_PREFIX, prefix].compact.join(":"), uri)
84
117
  element.register_namespace(prefix, uri)
85
118
  ::Oga::XML::Namespace.new(name: prefix, uri: uri)
86
119
  end
@@ -131,7 +164,10 @@ module Moxml
131
164
  def children(node)
132
165
  all_children = []
133
166
 
134
- all_children += [node.xml_declaration, node.doctype].compact if node.is_a?(::Oga::XML::Document)
167
+ if node.is_a?(::Oga::XML::Document)
168
+ all_children += [node.xml_declaration,
169
+ node.doctype].compact
170
+ end
135
171
 
136
172
  return all_children unless node.respond_to?(:children)
137
173
 
@@ -180,12 +216,15 @@ module Moxml
180
216
 
181
217
  def set_attribute(element, name, value)
182
218
  namespace_name = nil
183
- namespace_name, name = name.to_s.split(":", 2) if name.to_s.include?(":")
219
+ if name.to_s.include?(":")
220
+ namespace_name, name = name.to_s.split(":",
221
+ 2)
222
+ end
184
223
 
185
224
  attr = ::Oga::XML::Attribute.new(
186
225
  name: name.to_s,
187
226
  namespace_name: namespace_name,
188
- value: value.to_s
227
+ value: value.to_s,
189
228
  )
190
229
  element.add_attribute(attr)
191
230
  end
@@ -211,6 +250,13 @@ module Moxml
211
250
  child_or_text
212
251
  end
213
252
 
253
+ # Special handling for declarations on Oga documents
254
+ if element.is_a?(::Oga::XML::Document) &&
255
+ child.is_a?(::Oga::XML::XmlDeclaration)
256
+ # Set as document's xml_declaration
257
+ element.instance_variable_set(:@xml_declaration, child)
258
+ end
259
+
214
260
  element.children << child
215
261
  end
216
262
 
@@ -237,6 +283,13 @@ module Moxml
237
283
  end
238
284
 
239
285
  def remove(node)
286
+ # Special handling for declarations on Oga documents
287
+ if node.is_a?(::Oga::XML::XmlDeclaration) &&
288
+ node.parent.is_a?(::Oga::XML::Document)
289
+ # Clear document's xml_declaration when removing declaration
290
+ node.parent.instance_variable_set(:@xml_declaration, nil)
291
+ end
292
+
240
293
  node.remove
241
294
  end
242
295
 
@@ -313,22 +366,144 @@ module Moxml
313
366
  end
314
367
 
315
368
  def xpath(node, expression, namespaces = nil)
316
- node.xpath(expression, {}, namespaces: namespaces&.transform_keys(&:to_s)).to_a
369
+ node.xpath(expression, {},
370
+ namespaces: namespaces&.transform_keys(&:to_s)).to_a
317
371
  rescue ::LL::ParserError => e
318
- raise Moxml::XPathError, e.message
372
+ raise Moxml::XPathError.new(
373
+ e.message,
374
+ expression: expression,
375
+ adapter: "Oga",
376
+ node: node,
377
+ )
319
378
  end
320
379
 
321
380
  def at_xpath(node, expression, namespaces = nil)
322
381
  node.at_xpath(expression, namespaces: namespaces)
323
382
  rescue ::Oga::XPath::Error => e
324
- raise Moxml::XPathError, e.message
383
+ raise Moxml::XPathError.new(
384
+ e.message,
385
+ expression: expression,
386
+ adapter: "Oga",
387
+ node: node,
388
+ )
325
389
  end
326
390
 
327
- def serialize(node, _options = {})
328
- # Expand empty tags, encode attributes, etc
391
+ def serialize(node, options = {})
392
+ # Oga's XmlGenerator doesn't support options directly
393
+ # We need to handle declaration options ourselves for Document nodes
394
+ if node.is_a?(::Oga::XML::Document)
395
+ # Check if we should include declaration
396
+ # Priority: explicit option > existence of xml_declaration node
397
+ should_include_decl = if options.key?(:no_declaration)
398
+ !options[:no_declaration]
399
+ elsif options.key?(:declaration)
400
+ options[:declaration]
401
+ else
402
+ # Default: include if document has xml_declaration node
403
+ node.xml_declaration ? true : false
404
+ end
405
+
406
+ if should_include_decl && !node.xml_declaration
407
+ # Need to add declaration - create default one
408
+ output = +""
409
+ output << '<?xml version="1.0" encoding="UTF-8"?>'
410
+ output << "\n"
411
+
412
+ # Serialize doctype if present
413
+ output << node.doctype.to_xml << "\n" if node.doctype
414
+
415
+ # Serialize children
416
+ node.children.each do |child|
417
+ output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
418
+ end
419
+
420
+ return output
421
+ elsif !should_include_decl
422
+ # Skip xml_declaration
423
+ output = +""
424
+
425
+ # Serialize doctype if present
426
+ output << node.doctype.to_xml << "\n" if node.doctype
427
+
428
+ # Serialize root and other children
429
+ node.children.each do |child|
430
+ next if child.is_a?(::Oga::XML::XmlDeclaration)
431
+
432
+ output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
433
+ end
434
+
435
+ return output
436
+ end
437
+ end
438
+
439
+ # Default: use XmlGenerator
329
440
  ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
330
441
  end
331
442
  end
332
443
  end
444
+
445
+ # Bridge between Oga SAX and Moxml SAX
446
+ #
447
+ # Translates Oga SAX events to Moxml::SAX::Handler events.
448
+ # Oga has different event naming and namespace as first param.
449
+ #
450
+ # @private
451
+ class OgaSAXBridge
452
+ def initialize(handler)
453
+ @handler = handler
454
+ end
455
+
456
+ # Oga: on_element(namespace, name, attributes)
457
+ # namespace may be nil
458
+ # attributes is an array of [name, value] pairs
459
+ def on_element(namespace, name, attributes)
460
+ # Build full qualified name if namespace present
461
+ element_name = namespace ? "#{namespace}:#{name}" : name
462
+
463
+ # Convert Oga attributes to hash
464
+ attr_hash = {}
465
+ ns_hash = {}
466
+
467
+ # Oga delivers attributes as array of [name, value] pairs
468
+ attributes.each do |attr_name, attr_value|
469
+ if attr_name.to_s.start_with?("xmlns")
470
+ prefix = if attr_name.to_s == "xmlns"
471
+ nil
472
+ else
473
+ attr_name.to_s.sub(
474
+ "xmlns:", ""
475
+ )
476
+ end
477
+ ns_hash[prefix] = attr_value
478
+ else
479
+ attr_hash[attr_name.to_s] = attr_value
480
+ end
481
+ end
482
+
483
+ @handler.on_start_element(element_name, attr_hash, ns_hash)
484
+ end
485
+
486
+ # Oga: after_element(namespace, name)
487
+ def after_element(namespace, name)
488
+ element_name = namespace ? "#{namespace}:#{name}" : name
489
+ @handler.on_end_element(element_name)
490
+ end
491
+
492
+ def on_text(text)
493
+ @handler.on_characters(text)
494
+ end
495
+
496
+ def on_cdata(text)
497
+ @handler.on_cdata(text)
498
+ end
499
+
500
+ def on_comment(text)
501
+ @handler.on_comment(text)
502
+ end
503
+
504
+ def on_processing_instruction(name, text)
505
+ @handler.on_processing_instruction(name, text || "")
506
+ end
507
+ end
333
508
  end
334
509
  end