moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
data/lib/moxml/adapter/rexml.rb
CHANGED
|
@@ -14,13 +14,42 @@ module Moxml
|
|
|
14
14
|
native_doc = begin
|
|
15
15
|
::REXML::Document.new(xml)
|
|
16
16
|
rescue ::REXML::ParseException => e
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
if options[:strict]
|
|
18
|
+
raise Moxml::ParseError.new(
|
|
19
|
+
e.message,
|
|
20
|
+
line: e.line,
|
|
21
|
+
source: xml.is_a?(String) ? xml[0..100] : nil,
|
|
22
|
+
)
|
|
23
|
+
end
|
|
19
24
|
create_document
|
|
20
25
|
end
|
|
26
|
+
|
|
21
27
|
DocumentBuilder.new(Context.new(:rexml)).build(native_doc)
|
|
22
28
|
end
|
|
23
29
|
|
|
30
|
+
# SAX parsing implementation for REXML
|
|
31
|
+
#
|
|
32
|
+
# @param xml [String, IO] XML to parse
|
|
33
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
34
|
+
# @return [void]
|
|
35
|
+
def sax_parse(xml, handler)
|
|
36
|
+
require "rexml/parsers/sax2parser"
|
|
37
|
+
require "rexml/source"
|
|
38
|
+
require "stringio"
|
|
39
|
+
|
|
40
|
+
bridge = REXMLSAX2Bridge.new(handler)
|
|
41
|
+
|
|
42
|
+
xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
|
|
43
|
+
source = ::REXML::IOSource.new(StringIO.new(xml_string))
|
|
44
|
+
|
|
45
|
+
parser = ::REXML::Parsers::SAX2Parser.new(source)
|
|
46
|
+
parser.listen(bridge)
|
|
47
|
+
parser.parse
|
|
48
|
+
rescue ::REXML::ParseException => e
|
|
49
|
+
error = Moxml::ParseError.new(e.message, line: e.line)
|
|
50
|
+
handler.on_error(error)
|
|
51
|
+
end
|
|
52
|
+
|
|
24
53
|
def create_document(_native_doc = nil)
|
|
25
54
|
::REXML::Document.new
|
|
26
55
|
end
|
|
@@ -55,10 +84,10 @@ module Moxml
|
|
|
55
84
|
|
|
56
85
|
parts = [name]
|
|
57
86
|
if external_id
|
|
58
|
-
parts.
|
|
87
|
+
parts.push("PUBLIC", %("#{external_id}"))
|
|
59
88
|
parts << %("#{system_id}") if system_id
|
|
60
89
|
elsif system_id
|
|
61
|
-
parts.
|
|
90
|
+
parts.push("SYSTEM", %("#{system_id}"))
|
|
62
91
|
end
|
|
63
92
|
|
|
64
93
|
::REXML::DocType.new(parts.join(" "))
|
|
@@ -189,7 +218,7 @@ module Moxml
|
|
|
189
218
|
|
|
190
219
|
# Only return non-namespace attributes
|
|
191
220
|
element.attributes.values
|
|
192
|
-
|
|
221
|
+
.reject { |attr| attr.prefix.to_s.start_with?("xmlns") }
|
|
193
222
|
end
|
|
194
223
|
|
|
195
224
|
def attribute_element(attribute)
|
|
@@ -197,8 +226,8 @@ module Moxml
|
|
|
197
226
|
end
|
|
198
227
|
|
|
199
228
|
def set_attribute(element, name, value)
|
|
200
|
-
element.attributes[name&.to_s] = value
|
|
201
|
-
|
|
229
|
+
element.attributes[name&.to_s] = value&.to_s
|
|
230
|
+
element.attributes.get_attribute(name&.to_s)
|
|
202
231
|
end
|
|
203
232
|
|
|
204
233
|
def set_attribute_name(attribute, name)
|
|
@@ -227,6 +256,12 @@ module Moxml
|
|
|
227
256
|
end
|
|
228
257
|
|
|
229
258
|
def add_child(element, child)
|
|
259
|
+
# Special handling for declarations on REXML documents
|
|
260
|
+
if element.is_a?(::REXML::Document) && child.is_a?(::REXML::XMLDecl)
|
|
261
|
+
# Set document's xml_decl directly
|
|
262
|
+
element.instance_variable_set(:@xml_declaration, child)
|
|
263
|
+
end
|
|
264
|
+
|
|
230
265
|
case child
|
|
231
266
|
when String
|
|
232
267
|
element.add_text(child)
|
|
@@ -251,6 +286,12 @@ module Moxml
|
|
|
251
286
|
end
|
|
252
287
|
|
|
253
288
|
def remove(node)
|
|
289
|
+
# Special handling for declarations on REXML documents
|
|
290
|
+
if node.is_a?(::REXML::XMLDecl) && node.parent.is_a?(::REXML::Document)
|
|
291
|
+
# Clear document's xml_declaration when removing declaration
|
|
292
|
+
node.parent.instance_variable_set(:@xml_declaration, nil)
|
|
293
|
+
end
|
|
294
|
+
|
|
254
295
|
node.remove
|
|
255
296
|
end
|
|
256
297
|
|
|
@@ -327,8 +368,8 @@ module Moxml
|
|
|
327
368
|
def inner_text(node)
|
|
328
369
|
# Get direct text children only, filter duplicates
|
|
329
370
|
text_children = node.children
|
|
330
|
-
|
|
331
|
-
|
|
371
|
+
.select { _1.is_a?(::REXML::Text) }
|
|
372
|
+
.uniq(&:object_id)
|
|
332
373
|
text_children.map(&:value).join
|
|
333
374
|
end
|
|
334
375
|
|
|
@@ -353,7 +394,10 @@ module Moxml
|
|
|
353
394
|
# add a namespace prefix to the element name AND a namespace definition
|
|
354
395
|
def set_namespace(element, ns)
|
|
355
396
|
prefix = ns.name.to_s.empty? ? "xmlns" : ns.name.to_s
|
|
356
|
-
|
|
397
|
+
if element.respond_to?(:add_namespace)
|
|
398
|
+
element.add_namespace(prefix,
|
|
399
|
+
ns.value)
|
|
400
|
+
end
|
|
357
401
|
element.name = "#{prefix}:#{element.name}"
|
|
358
402
|
owner = element.is_a?(::REXML::Attribute) ? element.element : element
|
|
359
403
|
::REXML::Attribute.new(prefix, ns.value, owner)
|
|
@@ -405,7 +449,12 @@ module Moxml
|
|
|
405
449
|
def xpath(node, expression, _namespaces = {})
|
|
406
450
|
node.get_elements(expression).to_a
|
|
407
451
|
rescue ::REXML::ParseException => e
|
|
408
|
-
raise Moxml::XPathError
|
|
452
|
+
raise Moxml::XPathError.new(
|
|
453
|
+
e.message,
|
|
454
|
+
expression: expression,
|
|
455
|
+
adapter: "REXML",
|
|
456
|
+
node: node,
|
|
457
|
+
)
|
|
409
458
|
end
|
|
410
459
|
|
|
411
460
|
def at_xpath(node, expression, namespaces = {})
|
|
@@ -414,33 +463,45 @@ module Moxml
|
|
|
414
463
|
end
|
|
415
464
|
|
|
416
465
|
def serialize(node, options = {})
|
|
417
|
-
output =
|
|
466
|
+
output = +""
|
|
418
467
|
|
|
419
468
|
if node.is_a?(::REXML::Document)
|
|
420
|
-
#
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
if node.
|
|
431
|
-
node.
|
|
432
|
-
|
|
469
|
+
# Check if we should include declaration
|
|
470
|
+
# Priority: explicit option > check if document has xml_decl
|
|
471
|
+
should_include_decl = if options.key?(:no_declaration)
|
|
472
|
+
!options[:no_declaration]
|
|
473
|
+
else
|
|
474
|
+
# Include declaration only if document has xml_decl
|
|
475
|
+
!node.xml_decl.nil?
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
# Include XML declaration only if should_include_decl and xml_decl exists
|
|
479
|
+
if should_include_decl && node.xml_decl
|
|
480
|
+
decl = node.xml_decl
|
|
481
|
+
decl.encoding = options[:encoding] if options[:encoding]
|
|
482
|
+
output << "<?xml"
|
|
483
|
+
output << %( version="#{decl.version}") if decl.version
|
|
484
|
+
output << %( encoding="#{decl.encoding}") if decl.encoding
|
|
485
|
+
output << %( standalone="#{decl.standalone}") if decl.standalone
|
|
486
|
+
output << "?>"
|
|
433
487
|
end
|
|
434
488
|
|
|
489
|
+
# output << "\n"
|
|
490
|
+
node.doctype&.write(output)
|
|
491
|
+
|
|
435
492
|
# Write processing instructions
|
|
436
493
|
node.children.each do |child|
|
|
437
|
-
next unless [::REXML::Instruction, ::REXML::CData,
|
|
494
|
+
next unless [::REXML::Instruction, ::REXML::CData,
|
|
495
|
+
::REXML::Comment, ::REXML::Text].include?(child.class)
|
|
438
496
|
|
|
439
497
|
write_with_formatter(child, output, options[:indent] || 2)
|
|
440
498
|
# output << "\n"
|
|
441
499
|
end
|
|
442
500
|
|
|
443
|
-
|
|
501
|
+
if node.root
|
|
502
|
+
write_with_formatter(node.root, output,
|
|
503
|
+
options[:indent] || 2)
|
|
504
|
+
end
|
|
444
505
|
else
|
|
445
506
|
write_with_formatter(node, output, options[:indent] || 2)
|
|
446
507
|
end
|
|
@@ -452,11 +513,79 @@ module Moxml
|
|
|
452
513
|
|
|
453
514
|
def write_with_formatter(node, output, indent = 2)
|
|
454
515
|
formatter = ::Moxml::Adapter::CustomizedRexml::Formatter.new(
|
|
455
|
-
indentation: indent, self_close_empty: false
|
|
516
|
+
indentation: indent, self_close_empty: false,
|
|
456
517
|
)
|
|
457
518
|
formatter.write(node, output)
|
|
458
519
|
end
|
|
459
520
|
end
|
|
460
521
|
end
|
|
522
|
+
|
|
523
|
+
# Bridge between REXML SAX2 and Moxml SAX
|
|
524
|
+
#
|
|
525
|
+
# Translates REXML::SAX2Parser events to Moxml::SAX::Handler events
|
|
526
|
+
#
|
|
527
|
+
# @private
|
|
528
|
+
class REXMLSAX2Bridge
|
|
529
|
+
def initialize(handler)
|
|
530
|
+
@handler = handler
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
# REXML splits element name into uri/localname/qname
|
|
534
|
+
def start_element(_uri, _localname, qname, attributes)
|
|
535
|
+
# Convert REXML attributes to hash
|
|
536
|
+
attr_hash = {}
|
|
537
|
+
ns_hash = {}
|
|
538
|
+
|
|
539
|
+
attributes.each do |name, value|
|
|
540
|
+
if name.to_s.start_with?("xmlns")
|
|
541
|
+
# Namespace declaration
|
|
542
|
+
prefix = name.to_s == "xmlns" ? nil : name.to_s.sub("xmlns:", "")
|
|
543
|
+
ns_hash[prefix] = value
|
|
544
|
+
else
|
|
545
|
+
attr_hash[name.to_s] = value
|
|
546
|
+
end
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
# Use qname (qualified name) for element name
|
|
550
|
+
@handler.on_start_element(qname, attr_hash, ns_hash)
|
|
551
|
+
end
|
|
552
|
+
|
|
553
|
+
def end_element(_uri, _localname, qname)
|
|
554
|
+
@handler.on_end_element(qname)
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
def characters(text)
|
|
558
|
+
@handler.on_characters(text)
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
def cdata(content)
|
|
562
|
+
@handler.on_cdata(content)
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
def comment(text)
|
|
566
|
+
@handler.on_comment(text)
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
def processing_instruction(target, data)
|
|
570
|
+
@handler.on_processing_instruction(target, data || "")
|
|
571
|
+
end
|
|
572
|
+
|
|
573
|
+
def start_document
|
|
574
|
+
@handler.on_start_document
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
def end_document
|
|
578
|
+
@handler.on_end_document
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
# REXML calls these but we don't need to handle them
|
|
582
|
+
def xmldecl(version, encoding, standalone)
|
|
583
|
+
# XML declaration - we don't need to do anything
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
def progress(position)
|
|
587
|
+
# Progress callback - we don't need to do anything
|
|
588
|
+
end
|
|
589
|
+
end
|
|
461
590
|
end
|
|
462
591
|
end
|
data/lib/moxml/adapter.rb
CHANGED
|
@@ -4,14 +4,26 @@ require_relative "adapter/base"
|
|
|
4
4
|
|
|
5
5
|
module Moxml
|
|
6
6
|
module Adapter
|
|
7
|
-
AVALIABLE_ADAPTERS = %i[nokogiri oga rexml ox].freeze
|
|
7
|
+
AVALIABLE_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
|
|
8
8
|
|
|
9
9
|
class << self
|
|
10
10
|
def load(name)
|
|
11
11
|
require_adapter(name)
|
|
12
|
-
|
|
12
|
+
# Handle special case for headed_ox -> HeadedOx
|
|
13
|
+
const_name = case name
|
|
14
|
+
when :headed_ox
|
|
15
|
+
"HeadedOx"
|
|
16
|
+
else
|
|
17
|
+
name.to_s.capitalize
|
|
18
|
+
end
|
|
19
|
+
const_get(const_name)
|
|
13
20
|
rescue LoadError => e
|
|
14
|
-
raise
|
|
21
|
+
raise Moxml::AdapterError.new(
|
|
22
|
+
"Could not load #{name} adapter. Please ensure the #{name} gem is installed",
|
|
23
|
+
adapter: name,
|
|
24
|
+
operation: "load",
|
|
25
|
+
native_error: e,
|
|
26
|
+
)
|
|
15
27
|
end
|
|
16
28
|
|
|
17
29
|
private
|
|
@@ -23,7 +35,12 @@ module Moxml
|
|
|
23
35
|
require name.to_s
|
|
24
36
|
require "#{__dir__}/adapter/#{name}"
|
|
25
37
|
rescue LoadError => e
|
|
26
|
-
raise
|
|
38
|
+
raise Moxml::AdapterError.new(
|
|
39
|
+
"Failed to load #{name} adapter",
|
|
40
|
+
adapter: name,
|
|
41
|
+
operation: "require",
|
|
42
|
+
native_error: e,
|
|
43
|
+
)
|
|
27
44
|
end
|
|
28
45
|
end
|
|
29
46
|
end
|
data/lib/moxml/attribute.rb
CHANGED
|
@@ -18,6 +18,12 @@ module Moxml
|
|
|
18
18
|
adapter.set_attribute_value(@native, new_value)
|
|
19
19
|
end
|
|
20
20
|
|
|
21
|
+
# XPath conversion compatibility - attributes need .text method
|
|
22
|
+
# that returns their value for XPath comparisons
|
|
23
|
+
def text
|
|
24
|
+
value
|
|
25
|
+
end
|
|
26
|
+
|
|
21
27
|
def namespace
|
|
22
28
|
ns = adapter.namespace(@native)
|
|
23
29
|
ns && Namespace.new(ns, context)
|
data/lib/moxml/builder.rb
CHANGED
|
@@ -15,7 +15,7 @@ module Moxml
|
|
|
15
15
|
|
|
16
16
|
def declaration(version: "1.0", encoding: "UTF-8", standalone: nil)
|
|
17
17
|
@current.add_child(
|
|
18
|
-
@document.create_declaration(version, encoding, standalone)
|
|
18
|
+
@document.create_declaration(version, encoding, standalone),
|
|
19
19
|
)
|
|
20
20
|
end
|
|
21
21
|
|
|
@@ -23,12 +23,22 @@ module Moxml
|
|
|
23
23
|
el = @document.create_element(name)
|
|
24
24
|
|
|
25
25
|
attributes.each do |key, value|
|
|
26
|
-
|
|
26
|
+
if key.to_s == "xmlns"
|
|
27
|
+
# Handle default namespace
|
|
28
|
+
el.add_namespace(nil, value.to_s)
|
|
29
|
+
elsif key.to_s.start_with?("xmlns:")
|
|
30
|
+
# Handle prefixed namespace
|
|
31
|
+
prefix = key.to_s.sub("xmlns:", "")
|
|
32
|
+
el.add_namespace(prefix, value.to_s)
|
|
33
|
+
else
|
|
34
|
+
# Regular attribute
|
|
35
|
+
el[key] = value
|
|
36
|
+
end
|
|
27
37
|
end
|
|
28
38
|
|
|
29
39
|
@current.add_child(el)
|
|
30
40
|
|
|
31
|
-
if
|
|
41
|
+
if block
|
|
32
42
|
previous = @current
|
|
33
43
|
@current = el
|
|
34
44
|
instance_eval(&block)
|
|
@@ -52,7 +62,7 @@ module Moxml
|
|
|
52
62
|
|
|
53
63
|
def processing_instruction(target, content)
|
|
54
64
|
@current.add_child(
|
|
55
|
-
@document.create_processing_instruction(target, content)
|
|
65
|
+
@document.create_processing_instruction(target, content),
|
|
56
66
|
)
|
|
57
67
|
end
|
|
58
68
|
|
|
@@ -60,5 +70,31 @@ module Moxml
|
|
|
60
70
|
@current.add_namespace(prefix, uri)
|
|
61
71
|
@namespaces[prefix] = uri
|
|
62
72
|
end
|
|
73
|
+
|
|
74
|
+
# Convenience method for DOCTYPE
|
|
75
|
+
def doctype(name, external_id = nil, system_id = nil)
|
|
76
|
+
@current.add_child(
|
|
77
|
+
@document.create_doctype(name, external_id, system_id),
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Batch element creation
|
|
82
|
+
def elements(element_specs)
|
|
83
|
+
element_specs.each do |name, content_or_attrs|
|
|
84
|
+
if content_or_attrs.is_a?(Hash)
|
|
85
|
+
element(name, content_or_attrs)
|
|
86
|
+
else
|
|
87
|
+
element(name) { text(content_or_attrs) }
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Helper for creating namespaced elements
|
|
93
|
+
def ns_element(namespace_uri, name, attributes = {}, &block)
|
|
94
|
+
el = element(name, attributes, &block)
|
|
95
|
+
prefix = @namespaces.key(namespace_uri)
|
|
96
|
+
el.namespace = { prefix => namespace_uri } if prefix
|
|
97
|
+
el
|
|
98
|
+
end
|
|
63
99
|
end
|
|
64
100
|
end
|
data/lib/moxml/config.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module Moxml
|
|
4
4
|
class Config
|
|
5
|
-
VALID_ADAPTERS = %i[nokogiri oga rexml ox].freeze
|
|
5
|
+
VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
|
|
6
6
|
DEFAULT_ADAPTER = VALID_ADAPTERS.first
|
|
7
7
|
|
|
8
8
|
class << self
|
|
@@ -23,7 +23,8 @@ module Moxml
|
|
|
23
23
|
:entity_encoding,
|
|
24
24
|
:default_indent
|
|
25
25
|
|
|
26
|
-
def initialize(adapter_name = nil, strict_parsing = nil,
|
|
26
|
+
def initialize(adapter_name = nil, strict_parsing = nil,
|
|
27
|
+
default_encoding = nil)
|
|
27
28
|
self.adapter = adapter_name || Config.default.adapter_name
|
|
28
29
|
@strict_parsing = strict_parsing || Config.default.strict_parsing
|
|
29
30
|
@default_encoding = default_encoding || Config.default.default_encoding
|
|
@@ -37,7 +38,11 @@ module Moxml
|
|
|
37
38
|
@adapter = nil
|
|
38
39
|
|
|
39
40
|
unless VALID_ADAPTERS.include?(name)
|
|
40
|
-
raise
|
|
41
|
+
raise Moxml::AdapterError.new(
|
|
42
|
+
"Invalid adapter: #{name}",
|
|
43
|
+
adapter: name,
|
|
44
|
+
operation: "set_adapter",
|
|
45
|
+
)
|
|
41
46
|
end
|
|
42
47
|
|
|
43
48
|
@adapter_name = name
|
data/lib/moxml/context.rb
CHANGED
|
@@ -13,7 +13,62 @@ module Moxml
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def parse(xml, options = {})
|
|
16
|
-
|
|
16
|
+
# Detect if input has XML declaration
|
|
17
|
+
xml_string = if xml.respond_to?(:read)
|
|
18
|
+
xml.read.tap do
|
|
19
|
+
xml.rewind if xml.respond_to?(:rewind)
|
|
20
|
+
end
|
|
21
|
+
else
|
|
22
|
+
xml.to_s
|
|
23
|
+
end
|
|
24
|
+
has_declaration = xml_string.strip.start_with?("<?xml")
|
|
25
|
+
|
|
26
|
+
# Parse with adapter (without declaration info - adapters don't need it)
|
|
27
|
+
parsed_options = default_options.merge(options)
|
|
28
|
+
doc = config.adapter.parse(xml_string, parsed_options)
|
|
29
|
+
|
|
30
|
+
# Set declaration flag on Document wrapper (proper OOP)
|
|
31
|
+
doc.has_xml_declaration = has_declaration if doc.is_a?(Document)
|
|
32
|
+
|
|
33
|
+
doc
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Parse XML using SAX (event-driven) parsing
|
|
37
|
+
#
|
|
38
|
+
# SAX parsing is memory-efficient and suitable for large XML files.
|
|
39
|
+
# Provide either a handler object or a block with DSL.
|
|
40
|
+
#
|
|
41
|
+
# @param xml [String, IO] XML string or IO object to parse
|
|
42
|
+
# @param handler [Moxml::SAX::Handler, nil] Handler object (optional if block given)
|
|
43
|
+
# @yield [block] DSL block for defining handlers (optional if handler given)
|
|
44
|
+
# @return [void]
|
|
45
|
+
# @raise [ArgumentError] if neither handler nor block is provided
|
|
46
|
+
#
|
|
47
|
+
# @example With handler object
|
|
48
|
+
# handler = MyHandler.new
|
|
49
|
+
# context.sax_parse(xml_string, handler)
|
|
50
|
+
#
|
|
51
|
+
# @example With block
|
|
52
|
+
# context.sax_parse(xml_string) do
|
|
53
|
+
# start_element { |name, attrs| puts name }
|
|
54
|
+
# characters { |text| puts text }
|
|
55
|
+
# end
|
|
56
|
+
#
|
|
57
|
+
def sax_parse(xml, handler = nil, &block)
|
|
58
|
+
# Load SAX module if not already loaded
|
|
59
|
+
require_relative "sax" unless defined?(Moxml::SAX)
|
|
60
|
+
|
|
61
|
+
# Create block handler if block given
|
|
62
|
+
handler ||= SAX::BlockHandler.new(&block) if block
|
|
63
|
+
|
|
64
|
+
# Validate handler
|
|
65
|
+
raise ArgumentError, "Handler or block required" unless handler
|
|
66
|
+
unless handler.is_a?(SAX::Handler)
|
|
67
|
+
raise ArgumentError, "Handler must inherit from Moxml::SAX::Handler"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Delegate to adapter
|
|
71
|
+
config.adapter.sax_parse(xml, handler)
|
|
17
72
|
end
|
|
18
73
|
|
|
19
74
|
private
|
|
@@ -22,7 +77,7 @@ module Moxml
|
|
|
22
77
|
{
|
|
23
78
|
encoding: config.default_encoding,
|
|
24
79
|
strict: config.strict_parsing,
|
|
25
|
-
indent: config.default_indent
|
|
80
|
+
indent: config.default_indent,
|
|
26
81
|
}
|
|
27
82
|
end
|
|
28
83
|
end
|
data/lib/moxml/declaration.rb
CHANGED
|
@@ -33,6 +33,15 @@ module Moxml
|
|
|
33
33
|
adapter.set_declaration_attribute(@native, "standalone", new_standalone)
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
+
def remove
|
|
37
|
+
# Mark document as having no declaration when declaration is removed
|
|
38
|
+
# Store on native document so all wrappers see it
|
|
39
|
+
native_doc = adapter.document(@native)
|
|
40
|
+
native_doc&.instance_variable_set(:@moxml_has_declaration, false)
|
|
41
|
+
|
|
42
|
+
super
|
|
43
|
+
end
|
|
44
|
+
|
|
36
45
|
def declaration?
|
|
37
46
|
true
|
|
38
47
|
end
|
data/lib/moxml/doctype.rb
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Moxml
|
|
4
|
-
class Doctype < Node
|
|
4
|
+
class Doctype < Node
|
|
5
|
+
def name
|
|
6
|
+
adapter.doctype_name(@native)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def external_id
|
|
10
|
+
adapter.doctype_external_id(@native)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def system_id
|
|
14
|
+
adapter.doctype_system_id(@native)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
5
17
|
end
|
data/lib/moxml/document.rb
CHANGED
|
@@ -12,6 +12,17 @@ require_relative "doctype"
|
|
|
12
12
|
|
|
13
13
|
module Moxml
|
|
14
14
|
class Document < Node
|
|
15
|
+
attr_accessor :has_xml_declaration
|
|
16
|
+
|
|
17
|
+
def initialize(native, context)
|
|
18
|
+
super
|
|
19
|
+
@has_xml_declaration = false
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def document
|
|
23
|
+
self
|
|
24
|
+
end
|
|
25
|
+
|
|
15
26
|
def root=(element)
|
|
16
27
|
adapter.set_root(@native, element.native)
|
|
17
28
|
end
|
|
@@ -40,18 +51,19 @@ module Moxml
|
|
|
40
51
|
def create_doctype(name, external_id, system_id)
|
|
41
52
|
Doctype.new(
|
|
42
53
|
adapter.create_doctype(name, external_id, system_id),
|
|
43
|
-
context
|
|
54
|
+
context,
|
|
44
55
|
)
|
|
45
56
|
end
|
|
46
57
|
|
|
47
58
|
def create_processing_instruction(target, content)
|
|
48
59
|
ProcessingInstruction.new(
|
|
49
60
|
adapter.create_processing_instruction(target, content),
|
|
50
|
-
context
|
|
61
|
+
context,
|
|
51
62
|
)
|
|
52
63
|
end
|
|
53
64
|
|
|
54
|
-
def create_declaration(version = "1.0", encoding = "UTF-8",
|
|
65
|
+
def create_declaration(version = "1.0", encoding = "UTF-8",
|
|
66
|
+
standalone = nil)
|
|
55
67
|
decl = adapter.create_declaration(version, encoding, standalone)
|
|
56
68
|
Declaration.new(decl, context)
|
|
57
69
|
end
|
|
@@ -60,10 +72,14 @@ module Moxml
|
|
|
60
72
|
node = prepare_node(node)
|
|
61
73
|
|
|
62
74
|
if node.is_a?(Declaration)
|
|
75
|
+
# Mark that document now has a declaration
|
|
76
|
+
@has_xml_declaration = true
|
|
77
|
+
|
|
63
78
|
if children.empty?
|
|
64
79
|
adapter.add_child(@native, node.native)
|
|
65
80
|
else
|
|
66
|
-
adapter.add_previous_sibling(adapter.children(@native).first,
|
|
81
|
+
adapter.add_previous_sibling(adapter.children(@native).first,
|
|
82
|
+
node.native)
|
|
67
83
|
end
|
|
68
84
|
elsif root && !node.is_a?(ProcessingInstruction) && !node.is_a?(Comment)
|
|
69
85
|
raise Error, "Document already has a root element"
|
|
@@ -74,8 +90,21 @@ module Moxml
|
|
|
74
90
|
end
|
|
75
91
|
|
|
76
92
|
def xpath(expression, namespaces = nil)
|
|
77
|
-
|
|
78
|
-
|
|
93
|
+
result = adapter.xpath(@native, expression, namespaces)
|
|
94
|
+
|
|
95
|
+
# Handle different result types:
|
|
96
|
+
# - Scalar values (from functions): return directly
|
|
97
|
+
# - NodeSet: already wrapped, return directly
|
|
98
|
+
# - Array: wrap in NodeSet
|
|
99
|
+
case result
|
|
100
|
+
when NodeSet, Float, String, TrueClass, FalseClass, NilClass
|
|
101
|
+
result
|
|
102
|
+
when Array
|
|
103
|
+
NodeSet.new(result, context)
|
|
104
|
+
else
|
|
105
|
+
# For other types, try to wrap in NodeSet
|
|
106
|
+
NodeSet.new(result, context)
|
|
107
|
+
end
|
|
79
108
|
end
|
|
80
109
|
|
|
81
110
|
def at_xpath(expression, namespaces = nil)
|
|
@@ -83,5 +112,23 @@ module Moxml
|
|
|
83
112
|
Node.wrap(native_node, context)
|
|
84
113
|
end
|
|
85
114
|
end
|
|
115
|
+
|
|
116
|
+
# Quick element creation and addition
|
|
117
|
+
def add_element(name, attributes = {}, &block)
|
|
118
|
+
elem = create_element(name)
|
|
119
|
+
attributes.each { |k, v| elem[k] = v }
|
|
120
|
+
add_child(elem)
|
|
121
|
+
block&.call(elem)
|
|
122
|
+
elem
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Convenience find methods
|
|
126
|
+
def find(xpath)
|
|
127
|
+
at_xpath(xpath)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def find_all(xpath)
|
|
131
|
+
xpath(xpath).to_a
|
|
132
|
+
end
|
|
86
133
|
end
|
|
87
134
|
end
|