moxml 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +224 -43
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +51 -0
  21. data/docs/_guides/modifying-xml.adoc +292 -0
  22. data/docs/_guides/parsing-xml.adoc +230 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_guides/xml-declaration.adoc +450 -0
  26. data/docs/_pages/adapter-compatibility.adoc +369 -0
  27. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  28. data/docs/_pages/adapters/index.adoc +97 -0
  29. data/docs/_pages/adapters/libxml.adoc +285 -0
  30. data/docs/_pages/adapters/nokogiri.adoc +251 -0
  31. data/docs/_pages/adapters/oga.adoc +291 -0
  32. data/docs/_pages/adapters/ox.adoc +56 -0
  33. data/docs/_pages/adapters/rexml.adoc +292 -0
  34. data/docs/_pages/best-practices.adoc +429 -0
  35. data/docs/_pages/compatibility.adoc +467 -0
  36. data/docs/_pages/configuration.adoc +250 -0
  37. data/docs/_pages/error-handling.adoc +349 -0
  38. data/docs/_pages/headed-ox-limitations.adoc +574 -0
  39. data/docs/_pages/headed-ox.adoc +1025 -0
  40. data/docs/_pages/index.adoc +35 -0
  41. data/docs/_pages/installation.adoc +140 -0
  42. data/docs/_pages/node-api-reference.adoc +49 -0
  43. data/docs/_pages/performance.adoc +35 -0
  44. data/docs/_pages/quick-start.adoc +243 -0
  45. data/docs/_pages/thread-safety.adoc +28 -0
  46. data/docs/_references/document-api.adoc +407 -0
  47. data/docs/_references/index.adoc +48 -0
  48. data/docs/_tutorials/basic-usage.adoc +267 -0
  49. data/docs/_tutorials/builder-pattern.adoc +342 -0
  50. data/docs/_tutorials/index.adoc +33 -0
  51. data/docs/_tutorials/namespace-handling.adoc +324 -0
  52. data/docs/_tutorials/xpath-queries.adoc +358 -0
  53. data/docs/index.adoc +122 -0
  54. data/examples/README.md +124 -0
  55. data/examples/api_client/README.md +424 -0
  56. data/examples/api_client/api_client.rb +394 -0
  57. data/examples/api_client/example_response.xml +48 -0
  58. data/examples/headed_ox_example/README.md +90 -0
  59. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  60. data/examples/rss_parser/README.md +194 -0
  61. data/examples/rss_parser/example_feed.xml +93 -0
  62. data/examples/rss_parser/rss_parser.rb +189 -0
  63. data/examples/sax_parsing/README.md +50 -0
  64. data/examples/sax_parsing/data_extractor.rb +75 -0
  65. data/examples/sax_parsing/example.xml +21 -0
  66. data/examples/sax_parsing/large_file.rb +78 -0
  67. data/examples/sax_parsing/simple_parser.rb +55 -0
  68. data/examples/web_scraper/README.md +352 -0
  69. data/examples/web_scraper/example_page.html +201 -0
  70. data/examples/web_scraper/web_scraper.rb +312 -0
  71. data/lib/moxml/adapter/base.rb +107 -28
  72. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  73. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  74. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  75. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  76. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  77. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  78. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  79. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  80. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  81. data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
  82. data/lib/moxml/adapter/headed_ox.rb +161 -0
  83. data/lib/moxml/adapter/libxml.rb +1564 -0
  84. data/lib/moxml/adapter/nokogiri.rb +156 -9
  85. data/lib/moxml/adapter/oga.rb +190 -15
  86. data/lib/moxml/adapter/ox.rb +322 -28
  87. data/lib/moxml/adapter/rexml.rb +157 -28
  88. data/lib/moxml/adapter.rb +21 -4
  89. data/lib/moxml/attribute.rb +6 -0
  90. data/lib/moxml/builder.rb +40 -4
  91. data/lib/moxml/config.rb +8 -3
  92. data/lib/moxml/context.rb +57 -2
  93. data/lib/moxml/declaration.rb +9 -0
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +53 -6
  96. data/lib/moxml/document_builder.rb +34 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +155 -4
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1770 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_preservation_spec.rb +217 -0
  164. data/spec/moxml/declaration_spec.rb +36 -0
  165. data/spec/moxml/doctype_spec.rb +33 -0
  166. data/spec/moxml/document_builder_spec.rb +30 -0
  167. data/spec/moxml/document_spec.rb +105 -0
  168. data/spec/moxml/element_spec.rb +143 -0
  169. data/spec/moxml/error_spec.rb +266 -22
  170. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  171. data/spec/moxml/namespace_spec.rb +32 -0
  172. data/spec/moxml/node_set_spec.rb +39 -0
  173. data/spec/moxml/node_spec.rb +37 -0
  174. data/spec/moxml/processing_instruction_spec.rb +34 -0
  175. data/spec/moxml/sax_spec.rb +1067 -0
  176. data/spec/moxml/text_spec.rb +31 -0
  177. data/spec/moxml/version_spec.rb +14 -0
  178. data/spec/moxml/xml_utils/.gitkeep +0 -0
  179. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  180. data/spec/moxml/xml_utils_spec.rb +49 -0
  181. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  182. data/spec/moxml/xpath/axes_spec.rb +296 -0
  183. data/spec/moxml/xpath/cache_spec.rb +358 -0
  184. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  185. data/spec/moxml/xpath/context_spec.rb +210 -0
  186. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  187. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  188. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  189. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  190. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  191. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  192. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  193. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  194. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  195. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  196. data/spec/moxml/xpath/parser_spec.rb +364 -0
  197. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  198. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  199. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  200. data/spec/moxml/xpath_spec.rb +77 -0
  201. data/spec/performance/README.md +83 -0
  202. data/spec/performance/benchmark_spec.rb +64 -0
  203. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
  204. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  205. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  206. data/spec/spec_helper.rb +58 -1
  207. data/spec/support/xml_matchers.rb +1 -1
  208. metadata +178 -34
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -14,13 +14,42 @@ module Moxml
14
14
  native_doc = begin
15
15
  ::REXML::Document.new(xml)
16
16
  rescue ::REXML::ParseException => e
17
- raise Moxml::ParseError.new(e.message, line: e.line) if options[:strict]
18
-
17
+ if options[:strict]
18
+ raise Moxml::ParseError.new(
19
+ e.message,
20
+ line: e.line,
21
+ source: xml.is_a?(String) ? xml[0..100] : nil,
22
+ )
23
+ end
19
24
  create_document
20
25
  end
26
+
21
27
  DocumentBuilder.new(Context.new(:rexml)).build(native_doc)
22
28
  end
23
29
 
30
+ # SAX parsing implementation for REXML
31
+ #
32
+ # @param xml [String, IO] XML to parse
33
+ # @param handler [Moxml::SAX::Handler] Moxml SAX handler
34
+ # @return [void]
35
+ def sax_parse(xml, handler)
36
+ require "rexml/parsers/sax2parser"
37
+ require "rexml/source"
38
+ require "stringio"
39
+
40
+ bridge = REXMLSAX2Bridge.new(handler)
41
+
42
+ xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
43
+ source = ::REXML::IOSource.new(StringIO.new(xml_string))
44
+
45
+ parser = ::REXML::Parsers::SAX2Parser.new(source)
46
+ parser.listen(bridge)
47
+ parser.parse
48
+ rescue ::REXML::ParseException => e
49
+ error = Moxml::ParseError.new(e.message, line: e.line)
50
+ handler.on_error(error)
51
+ end
52
+
24
53
  def create_document(_native_doc = nil)
25
54
  ::REXML::Document.new
26
55
  end
@@ -55,10 +84,10 @@ module Moxml
55
84
 
56
85
  parts = [name]
57
86
  if external_id
58
- parts.concat(["PUBLIC", %("#{external_id}")])
87
+ parts.push("PUBLIC", %("#{external_id}"))
59
88
  parts << %("#{system_id}") if system_id
60
89
  elsif system_id
61
- parts.concat(["SYSTEM", %("#{system_id}")])
90
+ parts.push("SYSTEM", %("#{system_id}"))
62
91
  end
63
92
 
64
93
  ::REXML::DocType.new(parts.join(" "))
@@ -189,7 +218,7 @@ module Moxml
189
218
 
190
219
  # Only return non-namespace attributes
191
220
  element.attributes.values
192
- .reject { |attr| attr.prefix.to_s.start_with?("xmlns") }
221
+ .reject { |attr| attr.prefix.to_s.start_with?("xmlns") }
193
222
  end
194
223
 
195
224
  def attribute_element(attribute)
@@ -197,8 +226,8 @@ module Moxml
197
226
  end
198
227
 
199
228
  def set_attribute(element, name, value)
200
- element.attributes[name&.to_s] = value
201
- ::REXML::Attribute.new(name&.to_s, value.to_s, element)
229
+ element.attributes[name&.to_s] = value&.to_s
230
+ element.attributes.get_attribute(name&.to_s)
202
231
  end
203
232
 
204
233
  def set_attribute_name(attribute, name)
@@ -227,6 +256,12 @@ module Moxml
227
256
  end
228
257
 
229
258
  def add_child(element, child)
259
+ # Special handling for declarations on REXML documents
260
+ if element.is_a?(::REXML::Document) && child.is_a?(::REXML::XMLDecl)
261
+ # Set document's xml_decl directly
262
+ element.instance_variable_set(:@xml_declaration, child)
263
+ end
264
+
230
265
  case child
231
266
  when String
232
267
  element.add_text(child)
@@ -251,6 +286,12 @@ module Moxml
251
286
  end
252
287
 
253
288
  def remove(node)
289
+ # Special handling for declarations on REXML documents
290
+ if node.is_a?(::REXML::XMLDecl) && node.parent.is_a?(::REXML::Document)
291
+ # Clear document's xml_declaration when removing declaration
292
+ node.parent.instance_variable_set(:@xml_declaration, nil)
293
+ end
294
+
254
295
  node.remove
255
296
  end
256
297
 
@@ -327,8 +368,8 @@ module Moxml
327
368
  def inner_text(node)
328
369
  # Get direct text children only, filter duplicates
329
370
  text_children = node.children
330
- .select { _1.is_a?(::REXML::Text) }
331
- .uniq(&:object_id)
371
+ .select { _1.is_a?(::REXML::Text) }
372
+ .uniq(&:object_id)
332
373
  text_children.map(&:value).join
333
374
  end
334
375
 
@@ -353,7 +394,10 @@ module Moxml
353
394
  # add a namespace prefix to the element name AND a namespace definition
354
395
  def set_namespace(element, ns)
355
396
  prefix = ns.name.to_s.empty? ? "xmlns" : ns.name.to_s
356
- element.add_namespace(prefix, ns.value) if element.respond_to?(:add_namespace)
397
+ if element.respond_to?(:add_namespace)
398
+ element.add_namespace(prefix,
399
+ ns.value)
400
+ end
357
401
  element.name = "#{prefix}:#{element.name}"
358
402
  owner = element.is_a?(::REXML::Attribute) ? element.element : element
359
403
  ::REXML::Attribute.new(prefix, ns.value, owner)
@@ -405,7 +449,12 @@ module Moxml
405
449
  def xpath(node, expression, _namespaces = {})
406
450
  node.get_elements(expression).to_a
407
451
  rescue ::REXML::ParseException => e
408
- raise Moxml::XPathError, e.message
452
+ raise Moxml::XPathError.new(
453
+ e.message,
454
+ expression: expression,
455
+ adapter: "REXML",
456
+ node: node,
457
+ )
409
458
  end
410
459
 
411
460
  def at_xpath(node, expression, namespaces = {})
@@ -414,33 +463,45 @@ module Moxml
414
463
  end
415
464
 
416
465
  def serialize(node, options = {})
417
- output = String.new
466
+ output = +""
418
467
 
419
468
  if node.is_a?(::REXML::Document)
420
- # Always include XML declaration
421
- decl = node.xml_decl || ::REXML::XMLDecl.new("1.0", options[:encoding] || "UTF-8")
422
- decl.encoding = options[:encoding] if options[:encoding]
423
- output << "<?xml"
424
- output << %( version="#{decl.version}") if decl.version
425
- output << %( encoding="#{decl.encoding}") if decl.encoding
426
- output << %( standalone="#{decl.standalone}") if decl.standalone
427
- output << "?>"
428
- # output << "\n"
429
-
430
- if node.doctype
431
- node.doctype.write(output)
432
- # output << "\n"
469
+ # Check if we should include declaration
470
+ # Priority: explicit option > check if document has xml_decl
471
+ should_include_decl = if options.key?(:no_declaration)
472
+ !options[:no_declaration]
473
+ else
474
+ # Include declaration only if document has xml_decl
475
+ !node.xml_decl.nil?
476
+ end
477
+
478
+ # Include XML declaration only if should_include_decl and xml_decl exists
479
+ if should_include_decl && node.xml_decl
480
+ decl = node.xml_decl
481
+ decl.encoding = options[:encoding] if options[:encoding]
482
+ output << "<?xml"
483
+ output << %( version="#{decl.version}") if decl.version
484
+ output << %( encoding="#{decl.encoding}") if decl.encoding
485
+ output << %( standalone="#{decl.standalone}") if decl.standalone
486
+ output << "?>"
433
487
  end
434
488
 
489
+ # output << "\n"
490
+ node.doctype&.write(output)
491
+
435
492
  # Write processing instructions
436
493
  node.children.each do |child|
437
- next unless [::REXML::Instruction, ::REXML::CData, ::REXML::Comment, ::REXML::Text].include?(child.class)
494
+ next unless [::REXML::Instruction, ::REXML::CData,
495
+ ::REXML::Comment, ::REXML::Text].include?(child.class)
438
496
 
439
497
  write_with_formatter(child, output, options[:indent] || 2)
440
498
  # output << "\n"
441
499
  end
442
500
 
443
- write_with_formatter(node.root, output, options[:indent] || 2) if node.root
501
+ if node.root
502
+ write_with_formatter(node.root, output,
503
+ options[:indent] || 2)
504
+ end
444
505
  else
445
506
  write_with_formatter(node, output, options[:indent] || 2)
446
507
  end
@@ -452,11 +513,79 @@ module Moxml
452
513
 
453
514
  def write_with_formatter(node, output, indent = 2)
454
515
  formatter = ::Moxml::Adapter::CustomizedRexml::Formatter.new(
455
- indentation: indent, self_close_empty: false
516
+ indentation: indent, self_close_empty: false,
456
517
  )
457
518
  formatter.write(node, output)
458
519
  end
459
520
  end
460
521
  end
522
+
523
+ # Bridge between REXML SAX2 and Moxml SAX
524
+ #
525
+ # Translates REXML::SAX2Parser events to Moxml::SAX::Handler events
526
+ #
527
+ # @private
528
+ class REXMLSAX2Bridge
529
+ def initialize(handler)
530
+ @handler = handler
531
+ end
532
+
533
+ # REXML splits element name into uri/localname/qname
534
+ def start_element(_uri, _localname, qname, attributes)
535
+ # Convert REXML attributes to hash
536
+ attr_hash = {}
537
+ ns_hash = {}
538
+
539
+ attributes.each do |name, value|
540
+ if name.to_s.start_with?("xmlns")
541
+ # Namespace declaration
542
+ prefix = name.to_s == "xmlns" ? nil : name.to_s.sub("xmlns:", "")
543
+ ns_hash[prefix] = value
544
+ else
545
+ attr_hash[name.to_s] = value
546
+ end
547
+ end
548
+
549
+ # Use qname (qualified name) for element name
550
+ @handler.on_start_element(qname, attr_hash, ns_hash)
551
+ end
552
+
553
+ def end_element(_uri, _localname, qname)
554
+ @handler.on_end_element(qname)
555
+ end
556
+
557
+ def characters(text)
558
+ @handler.on_characters(text)
559
+ end
560
+
561
+ def cdata(content)
562
+ @handler.on_cdata(content)
563
+ end
564
+
565
+ def comment(text)
566
+ @handler.on_comment(text)
567
+ end
568
+
569
+ def processing_instruction(target, data)
570
+ @handler.on_processing_instruction(target, data || "")
571
+ end
572
+
573
+ def start_document
574
+ @handler.on_start_document
575
+ end
576
+
577
+ def end_document
578
+ @handler.on_end_document
579
+ end
580
+
581
+ # REXML calls these but we don't need to handle them
582
+ def xmldecl(version, encoding, standalone)
583
+ # XML declaration - we don't need to do anything
584
+ end
585
+
586
+ def progress(position)
587
+ # Progress callback - we don't need to do anything
588
+ end
589
+ end
461
590
  end
462
591
  end
data/lib/moxml/adapter.rb CHANGED
@@ -4,14 +4,26 @@ require_relative "adapter/base"
4
4
 
5
5
  module Moxml
6
6
  module Adapter
7
- AVALIABLE_ADAPTERS = %i[nokogiri oga rexml ox].freeze
7
+ AVALIABLE_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
8
8
 
9
9
  class << self
10
10
  def load(name)
11
11
  require_adapter(name)
12
- const_get(name.to_s.capitalize)
12
+ # Handle special case for headed_ox -> HeadedOx
13
+ const_name = case name
14
+ when :headed_ox
15
+ "HeadedOx"
16
+ else
17
+ name.to_s.capitalize
18
+ end
19
+ const_get(const_name)
13
20
  rescue LoadError => e
14
- raise LoadError, "Could not load #{name} adapter. Please ensure the #{name} gem is available: #{e.message}"
21
+ raise Moxml::AdapterError.new(
22
+ "Could not load #{name} adapter. Please ensure the #{name} gem is installed",
23
+ adapter: name,
24
+ operation: "load",
25
+ native_error: e,
26
+ )
15
27
  end
16
28
 
17
29
  private
@@ -23,7 +35,12 @@ module Moxml
23
35
  require name.to_s
24
36
  require "#{__dir__}/adapter/#{name}"
25
37
  rescue LoadError => e
26
- raise LoadError, "Failed to load #{name} adapter: #{e.message}"
38
+ raise Moxml::AdapterError.new(
39
+ "Failed to load #{name} adapter",
40
+ adapter: name,
41
+ operation: "require",
42
+ native_error: e,
43
+ )
27
44
  end
28
45
  end
29
46
  end
@@ -18,6 +18,12 @@ module Moxml
18
18
  adapter.set_attribute_value(@native, new_value)
19
19
  end
20
20
 
21
+ # XPath conversion compatibility - attributes need .text method
22
+ # that returns their value for XPath comparisons
23
+ def text
24
+ value
25
+ end
26
+
21
27
  def namespace
22
28
  ns = adapter.namespace(@native)
23
29
  ns && Namespace.new(ns, context)
data/lib/moxml/builder.rb CHANGED
@@ -15,7 +15,7 @@ module Moxml
15
15
 
16
16
  def declaration(version: "1.0", encoding: "UTF-8", standalone: nil)
17
17
  @current.add_child(
18
- @document.create_declaration(version, encoding, standalone)
18
+ @document.create_declaration(version, encoding, standalone),
19
19
  )
20
20
  end
21
21
 
@@ -23,12 +23,22 @@ module Moxml
23
23
  el = @document.create_element(name)
24
24
 
25
25
  attributes.each do |key, value|
26
- el[key] = value
26
+ if key.to_s == "xmlns"
27
+ # Handle default namespace
28
+ el.add_namespace(nil, value.to_s)
29
+ elsif key.to_s.start_with?("xmlns:")
30
+ # Handle prefixed namespace
31
+ prefix = key.to_s.sub("xmlns:", "")
32
+ el.add_namespace(prefix, value.to_s)
33
+ else
34
+ # Regular attribute
35
+ el[key] = value
36
+ end
27
37
  end
28
38
 
29
39
  @current.add_child(el)
30
40
 
31
- if block_given?
41
+ if block
32
42
  previous = @current
33
43
  @current = el
34
44
  instance_eval(&block)
@@ -52,7 +62,7 @@ module Moxml
52
62
 
53
63
  def processing_instruction(target, content)
54
64
  @current.add_child(
55
- @document.create_processing_instruction(target, content)
65
+ @document.create_processing_instruction(target, content),
56
66
  )
57
67
  end
58
68
 
@@ -60,5 +70,31 @@ module Moxml
60
70
  @current.add_namespace(prefix, uri)
61
71
  @namespaces[prefix] = uri
62
72
  end
73
+
74
+ # Convenience method for DOCTYPE
75
+ def doctype(name, external_id = nil, system_id = nil)
76
+ @current.add_child(
77
+ @document.create_doctype(name, external_id, system_id),
78
+ )
79
+ end
80
+
81
+ # Batch element creation
82
+ def elements(element_specs)
83
+ element_specs.each do |name, content_or_attrs|
84
+ if content_or_attrs.is_a?(Hash)
85
+ element(name, content_or_attrs)
86
+ else
87
+ element(name) { text(content_or_attrs) }
88
+ end
89
+ end
90
+ end
91
+
92
+ # Helper for creating namespaced elements
93
+ def ns_element(namespace_uri, name, attributes = {}, &block)
94
+ el = element(name, attributes, &block)
95
+ prefix = @namespaces.key(namespace_uri)
96
+ el.namespace = { prefix => namespace_uri } if prefix
97
+ el
98
+ end
63
99
  end
64
100
  end
data/lib/moxml/config.rb CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Moxml
4
4
  class Config
5
- VALID_ADAPTERS = %i[nokogiri oga rexml ox].freeze
5
+ VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
6
6
  DEFAULT_ADAPTER = VALID_ADAPTERS.first
7
7
 
8
8
  class << self
@@ -23,7 +23,8 @@ module Moxml
23
23
  :entity_encoding,
24
24
  :default_indent
25
25
 
26
- def initialize(adapter_name = nil, strict_parsing = nil, default_encoding = nil)
26
+ def initialize(adapter_name = nil, strict_parsing = nil,
27
+ default_encoding = nil)
27
28
  self.adapter = adapter_name || Config.default.adapter_name
28
29
  @strict_parsing = strict_parsing || Config.default.strict_parsing
29
30
  @default_encoding = default_encoding || Config.default.default_encoding
@@ -37,7 +38,11 @@ module Moxml
37
38
  @adapter = nil
38
39
 
39
40
  unless VALID_ADAPTERS.include?(name)
40
- raise ArgumentError, "Invalid adapter: #{name}. Valid adapters are: #{VALID_ADAPTERS.join(", ")}"
41
+ raise Moxml::AdapterError.new(
42
+ "Invalid adapter: #{name}",
43
+ adapter: name,
44
+ operation: "set_adapter",
45
+ )
41
46
  end
42
47
 
43
48
  @adapter_name = name
data/lib/moxml/context.rb CHANGED
@@ -13,7 +13,62 @@ module Moxml
13
13
  end
14
14
 
15
15
  def parse(xml, options = {})
16
- config.adapter.parse(xml, default_options.merge(options))
16
+ # Detect if input has XML declaration
17
+ xml_string = if xml.respond_to?(:read)
18
+ xml.read.tap do
19
+ xml.rewind if xml.respond_to?(:rewind)
20
+ end
21
+ else
22
+ xml.to_s
23
+ end
24
+ has_declaration = xml_string.strip.start_with?("<?xml")
25
+
26
+ # Parse with adapter (without declaration info - adapters don't need it)
27
+ parsed_options = default_options.merge(options)
28
+ doc = config.adapter.parse(xml_string, parsed_options)
29
+
30
+ # Set declaration flag on Document wrapper (proper OOP)
31
+ doc.has_xml_declaration = has_declaration if doc.is_a?(Document)
32
+
33
+ doc
34
+ end
35
+
36
+ # Parse XML using SAX (event-driven) parsing
37
+ #
38
+ # SAX parsing is memory-efficient and suitable for large XML files.
39
+ # Provide either a handler object or a block with DSL.
40
+ #
41
+ # @param xml [String, IO] XML string or IO object to parse
42
+ # @param handler [Moxml::SAX::Handler, nil] Handler object (optional if block given)
43
+ # @yield [block] DSL block for defining handlers (optional if handler given)
44
+ # @return [void]
45
+ # @raise [ArgumentError] if neither handler nor block is provided
46
+ #
47
+ # @example With handler object
48
+ # handler = MyHandler.new
49
+ # context.sax_parse(xml_string, handler)
50
+ #
51
+ # @example With block
52
+ # context.sax_parse(xml_string) do
53
+ # start_element { |name, attrs| puts name }
54
+ # characters { |text| puts text }
55
+ # end
56
+ #
57
+ def sax_parse(xml, handler = nil, &block)
58
+ # Load SAX module if not already loaded
59
+ require_relative "sax" unless defined?(Moxml::SAX)
60
+
61
+ # Create block handler if block given
62
+ handler ||= SAX::BlockHandler.new(&block) if block
63
+
64
+ # Validate handler
65
+ raise ArgumentError, "Handler or block required" unless handler
66
+ unless handler.is_a?(SAX::Handler)
67
+ raise ArgumentError, "Handler must inherit from Moxml::SAX::Handler"
68
+ end
69
+
70
+ # Delegate to adapter
71
+ config.adapter.sax_parse(xml, handler)
17
72
  end
18
73
 
19
74
  private
@@ -22,7 +77,7 @@ module Moxml
22
77
  {
23
78
  encoding: config.default_encoding,
24
79
  strict: config.strict_parsing,
25
- indent: config.default_indent
80
+ indent: config.default_indent,
26
81
  }
27
82
  end
28
83
  end
@@ -33,6 +33,15 @@ module Moxml
33
33
  adapter.set_declaration_attribute(@native, "standalone", new_standalone)
34
34
  end
35
35
 
36
+ def remove
37
+ # Mark document as having no declaration when declaration is removed
38
+ # Store on native document so all wrappers see it
39
+ native_doc = adapter.document(@native)
40
+ native_doc&.instance_variable_set(:@moxml_has_declaration, false)
41
+
42
+ super
43
+ end
44
+
36
45
  def declaration?
37
46
  true
38
47
  end
data/lib/moxml/doctype.rb CHANGED
@@ -1,5 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- class Doctype < Node; end
4
+ class Doctype < Node
5
+ def name
6
+ adapter.doctype_name(@native)
7
+ end
8
+
9
+ def external_id
10
+ adapter.doctype_external_id(@native)
11
+ end
12
+
13
+ def system_id
14
+ adapter.doctype_system_id(@native)
15
+ end
16
+ end
5
17
  end
@@ -12,6 +12,17 @@ require_relative "doctype"
12
12
 
13
13
  module Moxml
14
14
  class Document < Node
15
+ attr_accessor :has_xml_declaration
16
+
17
+ def initialize(native, context)
18
+ super
19
+ @has_xml_declaration = false
20
+ end
21
+
22
+ def document
23
+ self
24
+ end
25
+
15
26
  def root=(element)
16
27
  adapter.set_root(@native, element.native)
17
28
  end
@@ -40,18 +51,19 @@ module Moxml
40
51
  def create_doctype(name, external_id, system_id)
41
52
  Doctype.new(
42
53
  adapter.create_doctype(name, external_id, system_id),
43
- context
54
+ context,
44
55
  )
45
56
  end
46
57
 
47
58
  def create_processing_instruction(target, content)
48
59
  ProcessingInstruction.new(
49
60
  adapter.create_processing_instruction(target, content),
50
- context
61
+ context,
51
62
  )
52
63
  end
53
64
 
54
- def create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil)
65
+ def create_declaration(version = "1.0", encoding = "UTF-8",
66
+ standalone = nil)
55
67
  decl = adapter.create_declaration(version, encoding, standalone)
56
68
  Declaration.new(decl, context)
57
69
  end
@@ -60,10 +72,14 @@ module Moxml
60
72
  node = prepare_node(node)
61
73
 
62
74
  if node.is_a?(Declaration)
75
+ # Mark that document now has a declaration
76
+ @has_xml_declaration = true
77
+
63
78
  if children.empty?
64
79
  adapter.add_child(@native, node.native)
65
80
  else
66
- adapter.add_previous_sibling(adapter.children(@native).first, node.native)
81
+ adapter.add_previous_sibling(adapter.children(@native).first,
82
+ node.native)
67
83
  end
68
84
  elsif root && !node.is_a?(ProcessingInstruction) && !node.is_a?(Comment)
69
85
  raise Error, "Document already has a root element"
@@ -74,8 +90,21 @@ module Moxml
74
90
  end
75
91
 
76
92
  def xpath(expression, namespaces = nil)
77
- native_nodes = adapter.xpath(@native, expression, namespaces)
78
- NodeSet.new(native_nodes, context)
93
+ result = adapter.xpath(@native, expression, namespaces)
94
+
95
+ # Handle different result types:
96
+ # - Scalar values (from functions): return directly
97
+ # - NodeSet: already wrapped, return directly
98
+ # - Array: wrap in NodeSet
99
+ case result
100
+ when NodeSet, Float, String, TrueClass, FalseClass, NilClass
101
+ result
102
+ when Array
103
+ NodeSet.new(result, context)
104
+ else
105
+ # For other types, try to wrap in NodeSet
106
+ NodeSet.new(result, context)
107
+ end
79
108
  end
80
109
 
81
110
  def at_xpath(expression, namespaces = nil)
@@ -83,5 +112,23 @@ module Moxml
83
112
  Node.wrap(native_node, context)
84
113
  end
85
114
  end
115
+
116
+ # Quick element creation and addition
117
+ def add_element(name, attributes = {}, &block)
118
+ elem = create_element(name)
119
+ attributes.each { |k, v| elem[k] = v }
120
+ add_child(elem)
121
+ block&.call(elem)
122
+ elem
123
+ end
124
+
125
+ # Convenience find methods
126
+ def find(xpath)
127
+ at_xpath(xpath)
128
+ end
129
+
130
+ def find_all(xpath)
131
+ xpath(xpath).to_a
132
+ end
86
133
  end
87
134
  end