moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
module CustomizedLibxml
|
|
6
|
+
# Base wrapper class for LibXML nodes
|
|
7
|
+
#
|
|
8
|
+
# This wrapper hides LibXML's strict document ownership model,
|
|
9
|
+
# allowing nodes to be moved between documents transparently.
|
|
10
|
+
# Similar pattern to Ox adapter's customized classes.
|
|
11
|
+
class Node
|
|
12
|
+
attr_reader :native
|
|
13
|
+
|
|
14
|
+
def initialize(native_node)
|
|
15
|
+
@native = native_node
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Compare wrappers based on their native nodes
|
|
19
|
+
def ==(other)
|
|
20
|
+
return false unless other
|
|
21
|
+
|
|
22
|
+
other_native = other.respond_to?(:native) ? other.native : other
|
|
23
|
+
@native == other_native
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
alias eql? ==
|
|
27
|
+
|
|
28
|
+
def hash
|
|
29
|
+
@native.hash
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Check if node has a document
|
|
33
|
+
def document_present?
|
|
34
|
+
@native.respond_to?(:doc) && !@native.doc.nil?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Get the document this node belongs to
|
|
38
|
+
def document
|
|
39
|
+
@native.doc if document_present?
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "node"
|
|
4
|
+
|
|
5
|
+
module Moxml
|
|
6
|
+
module Adapter
|
|
7
|
+
module CustomizedLibxml
|
|
8
|
+
# Wrapper for LibXML processing instruction nodes
|
|
9
|
+
class ProcessingInstruction < Node
|
|
10
|
+
# Serialize as XML processing instruction
|
|
11
|
+
# LibXML auto-escapes content, we need to un-escape it
|
|
12
|
+
def to_xml
|
|
13
|
+
target = @native.name
|
|
14
|
+
content = @native.content
|
|
15
|
+
|
|
16
|
+
# Un-escape LibXML's automatic escaping
|
|
17
|
+
if content && !content.empty?
|
|
18
|
+
unescaped = content.gsub(""", '"')
|
|
19
|
+
.gsub("'", "'")
|
|
20
|
+
.gsub("<", "<")
|
|
21
|
+
.gsub(">", ">")
|
|
22
|
+
.gsub("&", "&")
|
|
23
|
+
"<?#{target} #{unescaped}?>"
|
|
24
|
+
else
|
|
25
|
+
"<?#{target}?>"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "node"
|
|
4
|
+
|
|
5
|
+
module Moxml
|
|
6
|
+
module Adapter
|
|
7
|
+
module CustomizedLibxml
|
|
8
|
+
# Wrapper for LibXML text nodes
|
|
9
|
+
class Text < Node
|
|
10
|
+
def to_s
|
|
11
|
+
@native.content
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def text
|
|
15
|
+
@native.content
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Serialize as XML with proper escaping
|
|
19
|
+
# LibXML's .content already contains escaped text, but it over-escapes
|
|
20
|
+
# quotes which don't need escaping in text nodes (only in attributes)
|
|
21
|
+
def to_xml
|
|
22
|
+
@native.content.gsub(""", '"')
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -95,7 +95,7 @@ module Moxml
|
|
|
95
95
|
input&.gsub(
|
|
96
96
|
::Oga::XML::Entities::ENCODE_ATTRIBUTE_REGEXP,
|
|
97
97
|
# Keep apostrophes in attributes
|
|
98
|
-
::Oga::XML::Entities::ENCODE_ATTRIBUTE_MAPPING.merge("'" => "'")
|
|
98
|
+
::Oga::XML::Entities::ENCODE_ATTRIBUTE_MAPPING.merge("'" => "'"),
|
|
99
99
|
)
|
|
100
100
|
end
|
|
101
101
|
end
|
|
@@ -9,11 +9,15 @@ module Moxml
|
|
|
9
9
|
def initialize(attr_name, value, parent = nil)
|
|
10
10
|
self.name = attr_name
|
|
11
11
|
@parent = parent
|
|
12
|
+
@value = value # Explicitly set @value
|
|
12
13
|
super(value)
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def name=(new_name)
|
|
16
|
-
|
|
17
|
+
if new_name.to_s.include?(":")
|
|
18
|
+
@prefix, new_name = new_name.to_s.split(":",
|
|
19
|
+
2)
|
|
20
|
+
end
|
|
17
21
|
|
|
18
22
|
@name = new_name
|
|
19
23
|
end
|
|
@@ -21,6 +25,29 @@ module Moxml
|
|
|
21
25
|
def expanded_name
|
|
22
26
|
[prefix, name].compact.join(":")
|
|
23
27
|
end
|
|
28
|
+
|
|
29
|
+
# Expose the value stored in Ox::Node
|
|
30
|
+
# Ox stores attribute values using @value instance variable
|
|
31
|
+
def value
|
|
32
|
+
@value
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Serialize the attribute to XML format with proper escaping
|
|
36
|
+
def to_xml
|
|
37
|
+
escaped_value = @value.to_s
|
|
38
|
+
.gsub("&", "&")
|
|
39
|
+
.gsub("<", "<")
|
|
40
|
+
.gsub(">", ">")
|
|
41
|
+
.gsub('"', """)
|
|
42
|
+
.gsub("'", "'")
|
|
43
|
+
|
|
44
|
+
"#{expanded_name}=\"#{escaped_value}\""
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Support string conversion
|
|
48
|
+
def to_s
|
|
49
|
+
to_xml
|
|
50
|
+
end
|
|
24
51
|
end
|
|
25
52
|
end
|
|
26
53
|
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require "rexml/formatters/pretty"
|
|
2
4
|
|
|
3
5
|
module Moxml
|
|
@@ -36,7 +38,7 @@ module Moxml
|
|
|
36
38
|
|
|
37
39
|
# Check for mixed content
|
|
38
40
|
has_text = node.children.any? { |c| c.is_a?(::REXML::Text) && !c.to_s.strip.empty? }
|
|
39
|
-
has_elements = node.children.any?
|
|
41
|
+
has_elements = node.children.any?(::REXML::Element)
|
|
40
42
|
mixed = has_text && has_elements
|
|
41
43
|
|
|
42
44
|
# Handle children based on content type
|
|
@@ -46,8 +48,8 @@ module Moxml
|
|
|
46
48
|
node.children.each_with_index do |child, _index|
|
|
47
49
|
# Skip insignificant whitespace
|
|
48
50
|
next if child.is_a?(::REXML::Text) &&
|
|
49
|
-
|
|
50
|
-
|
|
51
|
+
child.to_s.strip.empty? &&
|
|
52
|
+
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
51
53
|
|
|
52
54
|
# Indent non-text nodes in non-mixed content
|
|
53
55
|
# if !mixed && !child.is_a?(::REXML::Text)
|
|
@@ -95,7 +97,7 @@ module Moxml
|
|
|
95
97
|
def find_significant_sibling(node, direction)
|
|
96
98
|
method = direction == :next ? :next_sibling : :previous_sibling
|
|
97
99
|
sibling = node.send(method)
|
|
98
|
-
sibling = sibling.send(method) while sibling
|
|
100
|
+
sibling = sibling.send(method) while sibling.is_a?(::REXML::Text) && sibling.to_s.strip.empty?
|
|
99
101
|
sibling
|
|
100
102
|
end
|
|
101
103
|
|
|
@@ -144,7 +146,9 @@ module Moxml
|
|
|
144
146
|
def write_declaration(node, output)
|
|
145
147
|
output << "<?xml"
|
|
146
148
|
output << %( version="#{node.version}") if node.version
|
|
147
|
-
|
|
149
|
+
if node.writeencoding
|
|
150
|
+
output << %( encoding="#{node.encoding.to_s.upcase}")
|
|
151
|
+
end
|
|
148
152
|
output << %( standalone="#{node.standalone}") if node.standalone
|
|
149
153
|
output << "?>"
|
|
150
154
|
# output << "\n"
|
|
@@ -155,13 +159,14 @@ module Moxml
|
|
|
155
159
|
node.attributes.each do |name, attr|
|
|
156
160
|
next unless name.to_s.start_with?("xmlns:") || name.to_s == "xmlns"
|
|
157
161
|
|
|
158
|
-
|
|
162
|
+
# convert the default namespace
|
|
163
|
+
name = "xmlns" if name.to_s == "xmlns:"
|
|
159
164
|
value = attr.respond_to?(:value) ? attr.value : attr
|
|
160
165
|
output << " #{name}=\"#{value}\""
|
|
161
166
|
end
|
|
162
167
|
|
|
163
168
|
# Then write regular attributes
|
|
164
|
-
node.attributes.each do |name, attr|
|
|
169
|
+
node.attributes.each do |name, attr| # rubocop:disable Style/CombinableLoops
|
|
165
170
|
next if name.to_s.start_with?("xmlns:") || name.to_s == "xmlns"
|
|
166
171
|
|
|
167
172
|
output << " "
|
|
@@ -175,7 +180,7 @@ module Moxml
|
|
|
175
180
|
value = attr.respond_to?(:value) ? attr.value : attr
|
|
176
181
|
output << escape_attribute_value(value.to_s)
|
|
177
182
|
output << "\""
|
|
178
|
-
end
|
|
183
|
+
end # rubocop:enable Style/CombinableLoops
|
|
179
184
|
end
|
|
180
185
|
|
|
181
186
|
def escape_attribute_value(value)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "ox"
|
|
4
|
+
require_relative "../xpath"
|
|
5
|
+
# Force load XPath modules (autoload doesn't work well with relative requires in examples)
|
|
6
|
+
require_relative "../xpath/parser"
|
|
7
|
+
require_relative "../xpath/compiler"
|
|
8
|
+
|
|
9
|
+
module Moxml
|
|
10
|
+
module Adapter
|
|
11
|
+
# HeadedOx adapter - combines Ox's fast parsing with Moxml's XPath engine.
|
|
12
|
+
#
|
|
13
|
+
# This adapter uses:
|
|
14
|
+
# - Ox for XML parsing (fast C-based parser)
|
|
15
|
+
# - Moxml::XPath engine for comprehensive XPath 1.0 support
|
|
16
|
+
#
|
|
17
|
+
# Unlike the standard Ox adapter which has limited XPath support through
|
|
18
|
+
# Ox's locate() method, HeadedOx provides full XPath 1.0 functionality
|
|
19
|
+
# including all axes, predicates, and 27 standard functions.
|
|
20
|
+
#
|
|
21
|
+
# @example
|
|
22
|
+
# context = Moxml.new(:headed_ox)
|
|
23
|
+
# doc = context.parse(xml_string)
|
|
24
|
+
# results = doc.xpath('//book[@price < 10]/title')
|
|
25
|
+
#
|
|
26
|
+
class HeadedOx < Ox
|
|
27
|
+
class << self
|
|
28
|
+
# Override parse to use HeadedOx context instead of Ox context
|
|
29
|
+
def parse(xml, _options = {})
|
|
30
|
+
native_doc = begin
|
|
31
|
+
result = ::Ox.parse(xml)
|
|
32
|
+
|
|
33
|
+
# result can be either Document or Element
|
|
34
|
+
if result.is_a?(::Ox::Document)
|
|
35
|
+
result
|
|
36
|
+
else
|
|
37
|
+
doc = ::Ox::Document.new
|
|
38
|
+
doc << result
|
|
39
|
+
doc
|
|
40
|
+
end
|
|
41
|
+
rescue ::Ox::ParseError => e
|
|
42
|
+
raise Moxml::ParseError.new(
|
|
43
|
+
e.message,
|
|
44
|
+
source: xml.is_a?(String) ? xml[0..100] : nil,
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Use :headed_ox context instead of :ox
|
|
49
|
+
DocumentBuilder.new(Context.new(:headed_ox)).build(native_doc)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Execute XPath query using Moxml's XPath engine
|
|
53
|
+
#
|
|
54
|
+
# This overrides the Ox adapter's xpath method which uses locate().
|
|
55
|
+
#
|
|
56
|
+
# @param [Moxml::Node] node Starting node (wrapped Moxml node)
|
|
57
|
+
# @param [String] expression XPath expression
|
|
58
|
+
# @param [Hash] namespaces Namespace prefix mappings
|
|
59
|
+
# @return [Moxml::NodeSet, Object] Query results
|
|
60
|
+
def xpath(node, expression, namespaces = {})
|
|
61
|
+
# If we receive a native node, wrap it first
|
|
62
|
+
# Document#xpath passes @native, but our compiled XPath needs Moxml nodes
|
|
63
|
+
unless node.is_a?(Moxml::Node)
|
|
64
|
+
# Determine the context from the node if possible
|
|
65
|
+
# For now, create a basic context for wrapped nodes
|
|
66
|
+
ctx = Context.new(:headed_ox)
|
|
67
|
+
|
|
68
|
+
# Wrap the native node - don't rebuild the whole document
|
|
69
|
+
node = Node.wrap(node, ctx)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Parse XPath expression to AST
|
|
73
|
+
ast = XPath::Parser.parse(expression)
|
|
74
|
+
|
|
75
|
+
# Compile AST to executable Proc using class method
|
|
76
|
+
proc = XPath::Compiler.compile_with_cache(ast, namespaces: namespaces)
|
|
77
|
+
|
|
78
|
+
# Execute on the node (now guaranteed to be wrapped Moxml node)
|
|
79
|
+
result = proc.call(node)
|
|
80
|
+
|
|
81
|
+
# Wrap Array results in NodeSet, return other types directly
|
|
82
|
+
case result
|
|
83
|
+
when Array
|
|
84
|
+
# Deduplicate by native object identity to handle descendant-or-self
|
|
85
|
+
# which may yield the same native node multiple times
|
|
86
|
+
nodeset = NodeSet.new(result, node.context)
|
|
87
|
+
nodeset.uniq_by_native
|
|
88
|
+
when NodeSet
|
|
89
|
+
# Deduplicate NodeSet results as well
|
|
90
|
+
result.uniq_by_native
|
|
91
|
+
else
|
|
92
|
+
# Scalar values (string, number, boolean) - return as-is
|
|
93
|
+
result
|
|
94
|
+
end
|
|
95
|
+
rescue StandardError => e
|
|
96
|
+
raise Moxml::XPathError.new(
|
|
97
|
+
"XPath execution failed: #{e.message}",
|
|
98
|
+
expression: expression,
|
|
99
|
+
adapter: "HeadedOx",
|
|
100
|
+
node: node,
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Execute XPath query and return first result
|
|
105
|
+
#
|
|
106
|
+
# @param [Moxml::Node] node Starting node
|
|
107
|
+
# @param [String] expression XPath expression
|
|
108
|
+
# @param [Hash] namespaces Namespace prefix mappings
|
|
109
|
+
# @return [Moxml::Node, Object, nil] First result or nil
|
|
110
|
+
def at_xpath(node, expression, namespaces = {})
|
|
111
|
+
result = xpath(node, expression, namespaces)
|
|
112
|
+
result.is_a?(NodeSet) ? result.first : result
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Check if XPath is supported
|
|
116
|
+
#
|
|
117
|
+
# @return [Boolean] Always true for HeadedOx
|
|
118
|
+
def xpath_supported?
|
|
119
|
+
true
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Report adapter capabilities
|
|
123
|
+
#
|
|
124
|
+
# HeadedOx extends Ox's capabilities with full XPath support
|
|
125
|
+
# through Moxml's XPath engine
|
|
126
|
+
#
|
|
127
|
+
# @return [Hash] Capability flags
|
|
128
|
+
def capabilities
|
|
129
|
+
{
|
|
130
|
+
# Core adapter capabilities
|
|
131
|
+
parse: true,
|
|
132
|
+
|
|
133
|
+
# Parsing capabilities (inherited from Ox)
|
|
134
|
+
sax_parsing: true,
|
|
135
|
+
namespace_aware: true,
|
|
136
|
+
namespace_support: :partial,
|
|
137
|
+
dtd_support: true,
|
|
138
|
+
parsing_speed: :fast,
|
|
139
|
+
|
|
140
|
+
# XPath capabilities (provided by Moxml's XPath engine)
|
|
141
|
+
xpath_support: :full,
|
|
142
|
+
xpath_full: true,
|
|
143
|
+
xpath_axes: :partial, # 6 of 13 axes: child, descendant, descendant-or-self, self, attribute, parent
|
|
144
|
+
xpath_functions: :complete, # All 27 XPath 1.0 functions
|
|
145
|
+
xpath_predicates: true,
|
|
146
|
+
xpath_namespaces: true,
|
|
147
|
+
xpath_variables: true,
|
|
148
|
+
|
|
149
|
+
# Serialization capabilities (inherited from Ox)
|
|
150
|
+
namespace_serialization: true,
|
|
151
|
+
pretty_print: true,
|
|
152
|
+
|
|
153
|
+
# Known limitations
|
|
154
|
+
schema_validation: false,
|
|
155
|
+
xslt_support: false,
|
|
156
|
+
}
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|