moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,1564 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base"
|
|
4
|
+
require "libxml"
|
|
5
|
+
require_relative "customized_libxml/node"
|
|
6
|
+
require_relative "customized_libxml/element"
|
|
7
|
+
require_relative "customized_libxml/text"
|
|
8
|
+
require_relative "customized_libxml/comment"
|
|
9
|
+
require_relative "customized_libxml/cdata"
|
|
10
|
+
require_relative "customized_libxml/processing_instruction"
|
|
11
|
+
require_relative "customized_libxml/declaration"
|
|
12
|
+
|
|
13
|
+
module Moxml
|
|
14
|
+
module Adapter
|
|
15
|
+
class Libxml < Base
|
|
16
|
+
# Wrapper class to store DOCTYPE information
|
|
17
|
+
class DoctypeWrapper
|
|
18
|
+
attr_reader :native_doc
|
|
19
|
+
attr_accessor :name, :external_id, :system_id
|
|
20
|
+
|
|
21
|
+
def initialize(doc, name, external_id, system_id)
|
|
22
|
+
@native_doc = doc
|
|
23
|
+
@name = name
|
|
24
|
+
@external_id = external_id
|
|
25
|
+
@system_id = system_id
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Provide native method to match adapter pattern
|
|
29
|
+
def native
|
|
30
|
+
@native_doc
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def to_xml
|
|
34
|
+
output = "<!DOCTYPE #{@name}"
|
|
35
|
+
if @external_id && !@external_id.empty?
|
|
36
|
+
output << " PUBLIC \"#{@external_id}\""
|
|
37
|
+
output << " \"#{@system_id}\"" if @system_id
|
|
38
|
+
elsif @system_id && !@system_id.empty?
|
|
39
|
+
output << " SYSTEM \"#{@system_id}\""
|
|
40
|
+
end
|
|
41
|
+
output << ">"
|
|
42
|
+
output
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
class << self
|
|
47
|
+
def set_root(doc, element)
|
|
48
|
+
doc.root = element
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def parse(xml, options = {})
|
|
52
|
+
# LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
|
|
53
|
+
xml_string = if xml.is_a?(String)
|
|
54
|
+
xml
|
|
55
|
+
elsif xml.respond_to?(:read)
|
|
56
|
+
xml.read
|
|
57
|
+
else
|
|
58
|
+
xml.to_s
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Extract DOCTYPE before parsing
|
|
62
|
+
doctype_match = xml_string.match(/<!DOCTYPE\s+(\S+)(?:\s+PUBLIC\s+"([^"]+)"\s+"([^"]+)"| \s+SYSTEM\s+"([^"]+)")?\s*>/i)
|
|
63
|
+
|
|
64
|
+
native_doc = begin
|
|
65
|
+
# Handle both string and file inputs
|
|
66
|
+
parser = ::LibXML::XML::Parser.string(xml_string)
|
|
67
|
+
parser.parse
|
|
68
|
+
rescue ::LibXML::XML::Error => e
|
|
69
|
+
if options[:strict]
|
|
70
|
+
line = e.respond_to?(:line) ? e.line : nil
|
|
71
|
+
raise Moxml::ParseError.new(
|
|
72
|
+
e.message,
|
|
73
|
+
line: line,
|
|
74
|
+
column: nil,
|
|
75
|
+
source: xml_string[0..100],
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
# Return empty document for non-strict mode
|
|
79
|
+
create_document
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Store DOCTYPE if found
|
|
83
|
+
if doctype_match
|
|
84
|
+
name = doctype_match[1]
|
|
85
|
+
external_id = doctype_match[2]
|
|
86
|
+
system_id = doctype_match[3] || doctype_match[4]
|
|
87
|
+
|
|
88
|
+
doctype_wrapper = DoctypeWrapper.new(
|
|
89
|
+
native_doc,
|
|
90
|
+
name,
|
|
91
|
+
external_id,
|
|
92
|
+
system_id,
|
|
93
|
+
)
|
|
94
|
+
native_doc.instance_variable_set(:@moxml_doctype, doctype_wrapper)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
DocumentBuilder.new(Context.new(:libxml)).build(native_doc)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# SAX parsing implementation for LibXML
|
|
101
|
+
#
|
|
102
|
+
# @param xml [String, IO] XML to parse
|
|
103
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
104
|
+
# @return [void]
|
|
105
|
+
def sax_parse(xml, handler)
|
|
106
|
+
# Create bridge that translates LibXML SAX to Moxml SAX
|
|
107
|
+
bridge = LibXMLSAXBridge.new(handler)
|
|
108
|
+
|
|
109
|
+
# Create LibXML SAX parser
|
|
110
|
+
parser = ::LibXML::XML::SaxParser.string(xml.to_s)
|
|
111
|
+
|
|
112
|
+
# Set callbacks
|
|
113
|
+
parser.callbacks = bridge
|
|
114
|
+
|
|
115
|
+
# Parse
|
|
116
|
+
parser.parse
|
|
117
|
+
rescue ::LibXML::XML::Error => e
|
|
118
|
+
line = e.respond_to?(:line) ? e.line : nil
|
|
119
|
+
column = e.respond_to?(:column) ? e.column : nil
|
|
120
|
+
error = Moxml::ParseError.new(e.message, line: line, column: column)
|
|
121
|
+
handler.on_error(error)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def create_document(_native_doc = nil)
|
|
125
|
+
::LibXML::XML::Document.new
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def create_native_element(name)
|
|
129
|
+
::LibXML::XML::Node.new(name.to_s)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def create_native_text(content)
|
|
133
|
+
native = ::LibXML::XML::Node.new_text(content.to_s)
|
|
134
|
+
CustomizedLibxml::Text.new(native)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def create_native_cdata(content)
|
|
138
|
+
native = ::LibXML::XML::Node.new_cdata(content.to_s)
|
|
139
|
+
CustomizedLibxml::Cdata.new(native)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def create_native_comment(content)
|
|
143
|
+
native = ::LibXML::XML::Node.new_comment(content.to_s)
|
|
144
|
+
CustomizedLibxml::Comment.new(native)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def create_native_processing_instruction(target, content)
|
|
148
|
+
native = ::LibXML::XML::Node.new_pi(target.to_s, content.to_s)
|
|
149
|
+
CustomizedLibxml::ProcessingInstruction.new(native)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def create_native_declaration(version, encoding, standalone)
|
|
153
|
+
doc = create_document
|
|
154
|
+
# Return a Declaration wrapper with explicit parameters
|
|
155
|
+
CustomizedLibxml::Declaration.new(doc, version, encoding, standalone)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def create_native_doctype(name, external_id, system_id)
|
|
159
|
+
# LibXML::XML::Dtd.new has bizarre parameter order, so we just
|
|
160
|
+
# store values directly in our wrapper
|
|
161
|
+
DoctypeWrapper.new(create_document, name.to_s, external_id&.to_s,
|
|
162
|
+
system_id&.to_s)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def node_type(node)
|
|
166
|
+
return :unknown unless node
|
|
167
|
+
|
|
168
|
+
# Handle wrapper classes
|
|
169
|
+
return :element if node.is_a?(CustomizedLibxml::Element)
|
|
170
|
+
return :text if node.is_a?(CustomizedLibxml::Text)
|
|
171
|
+
return :cdata if node.is_a?(CustomizedLibxml::Cdata)
|
|
172
|
+
return :comment if node.is_a?(CustomizedLibxml::Comment)
|
|
173
|
+
if node.is_a?(CustomizedLibxml::ProcessingInstruction)
|
|
174
|
+
return :processing_instruction
|
|
175
|
+
end
|
|
176
|
+
return :doctype if node.is_a?(DoctypeWrapper)
|
|
177
|
+
|
|
178
|
+
# Unwrap if needed
|
|
179
|
+
native_node = node.respond_to?(:native) ? node.native : node
|
|
180
|
+
|
|
181
|
+
case native_node.node_type
|
|
182
|
+
when ::LibXML::XML::Node::DOCUMENT_NODE
|
|
183
|
+
:document
|
|
184
|
+
when ::LibXML::XML::Node::ELEMENT_NODE
|
|
185
|
+
:element
|
|
186
|
+
when ::LibXML::XML::Node::TEXT_NODE
|
|
187
|
+
:text
|
|
188
|
+
when ::LibXML::XML::Node::CDATA_SECTION_NODE
|
|
189
|
+
:cdata
|
|
190
|
+
when ::LibXML::XML::Node::COMMENT_NODE
|
|
191
|
+
:comment
|
|
192
|
+
when ::LibXML::XML::Node::ATTRIBUTE_NODE
|
|
193
|
+
:attribute
|
|
194
|
+
when ::LibXML::XML::Node::PI_NODE
|
|
195
|
+
:processing_instruction
|
|
196
|
+
when ::LibXML::XML::Node::DTD_NODE
|
|
197
|
+
:doctype
|
|
198
|
+
else
|
|
199
|
+
:unknown
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def node_name(node)
|
|
204
|
+
native_node = unpatch_node(node)
|
|
205
|
+
native_node&.name
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def set_node_name(node, name)
|
|
209
|
+
native_node = unpatch_node(node)
|
|
210
|
+
native_node.name = name.to_s if native_node
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def declaration_attribute(node, name)
|
|
214
|
+
return nil unless node
|
|
215
|
+
|
|
216
|
+
# Handle Declaration wrapper
|
|
217
|
+
if node.is_a?(CustomizedLibxml::Declaration)
|
|
218
|
+
case name
|
|
219
|
+
when "version"
|
|
220
|
+
node.version
|
|
221
|
+
when "encoding"
|
|
222
|
+
node.encoding
|
|
223
|
+
when "standalone"
|
|
224
|
+
node.standalone # Returns "yes", "no", or nil
|
|
225
|
+
end
|
|
226
|
+
else
|
|
227
|
+
# Fallback for native documents
|
|
228
|
+
case name
|
|
229
|
+
when "version"
|
|
230
|
+
node.version
|
|
231
|
+
when "encoding"
|
|
232
|
+
enc = node.encoding
|
|
233
|
+
enc ? encoding_to_string(enc) : nil
|
|
234
|
+
when "standalone"
|
|
235
|
+
node.standalone? ? "yes" : nil
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def set_declaration_attribute(node, name, value)
|
|
241
|
+
return unless node
|
|
242
|
+
|
|
243
|
+
# Handle Declaration wrapper
|
|
244
|
+
return unless node.is_a?(CustomizedLibxml::Declaration)
|
|
245
|
+
|
|
246
|
+
case name
|
|
247
|
+
when "version"
|
|
248
|
+
node.version = value
|
|
249
|
+
when "encoding"
|
|
250
|
+
node.encoding = value
|
|
251
|
+
when "standalone"
|
|
252
|
+
# Pass the value directly - Declaration handles the conversion
|
|
253
|
+
node.standalone = value
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Native documents are read-only, do nothing for them
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def children(node)
|
|
260
|
+
native_node = unpatch_node(node)
|
|
261
|
+
return [] unless native_node
|
|
262
|
+
|
|
263
|
+
# Handle Document specially - it doesn't have children? method
|
|
264
|
+
if native_node.is_a?(::LibXML::XML::Document)
|
|
265
|
+
result = []
|
|
266
|
+
|
|
267
|
+
# Include DOCTYPE if present
|
|
268
|
+
# First check if we stored it as instance variable (from parse)
|
|
269
|
+
if native_node.instance_variable_defined?(:@moxml_doctype)
|
|
270
|
+
doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
|
|
271
|
+
result << doctype_wrapper if doctype_wrapper
|
|
272
|
+
elsif native_node.respond_to?(:dtd) && native_node.dtd
|
|
273
|
+
# Otherwise check dtd property directly
|
|
274
|
+
dtd = native_node.dtd
|
|
275
|
+
# Wrap in DoctypeWrapper for consistency
|
|
276
|
+
doctype_wrapper = DoctypeWrapper.new(
|
|
277
|
+
native_node,
|
|
278
|
+
dtd.name,
|
|
279
|
+
dtd.external_id,
|
|
280
|
+
dtd.system_id,
|
|
281
|
+
)
|
|
282
|
+
result << doctype_wrapper
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
return result unless native_node.root
|
|
286
|
+
|
|
287
|
+
result << patch_node(native_node.root)
|
|
288
|
+
return result
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
return [] unless native_node.children?
|
|
292
|
+
|
|
293
|
+
result = []
|
|
294
|
+
native_node.each_child do |child|
|
|
295
|
+
# Skip whitespace-only text nodes
|
|
296
|
+
next if child.text? && child.content.to_s.strip.empty?
|
|
297
|
+
|
|
298
|
+
result << patch_node(child)
|
|
299
|
+
end
|
|
300
|
+
result
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def parent(node)
|
|
304
|
+
native_node = unpatch_node(node)
|
|
305
|
+
parent_node = native_node&.parent
|
|
306
|
+
parent_node ? patch_node(parent_node) : nil
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def next_sibling(node)
|
|
310
|
+
native_node = unpatch_node(node)
|
|
311
|
+
current = native_node&.next
|
|
312
|
+
while current
|
|
313
|
+
# Skip whitespace-only text nodes
|
|
314
|
+
break unless current.text? && current.content.to_s.strip.empty?
|
|
315
|
+
|
|
316
|
+
current = current.next
|
|
317
|
+
end
|
|
318
|
+
current ? patch_node(current) : nil
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def previous_sibling(node)
|
|
322
|
+
native_node = unpatch_node(node)
|
|
323
|
+
current = native_node&.prev
|
|
324
|
+
while current
|
|
325
|
+
# Skip whitespace-only text nodes
|
|
326
|
+
break unless current.text? && current.content.to_s.strip.empty?
|
|
327
|
+
|
|
328
|
+
current = current.prev
|
|
329
|
+
end
|
|
330
|
+
current ? patch_node(current) : nil
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def document(node)
|
|
334
|
+
native_node = unpatch_node(node)
|
|
335
|
+
return nil unless native_node
|
|
336
|
+
|
|
337
|
+
# Handle documents themselves
|
|
338
|
+
return native_node if native_node.is_a?(::LibXML::XML::Document)
|
|
339
|
+
|
|
340
|
+
# For other nodes, return their document
|
|
341
|
+
native_node.doc
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def root(document)
|
|
345
|
+
native_doc = unpatch_node(document)
|
|
346
|
+
native_doc&.root
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def attributes(element)
|
|
350
|
+
native_elem = unpatch_node(element)
|
|
351
|
+
return [] unless native_elem
|
|
352
|
+
unless native_elem.respond_to?(:element?) && native_elem.element?
|
|
353
|
+
return []
|
|
354
|
+
end
|
|
355
|
+
return [] unless native_elem.attributes?
|
|
356
|
+
|
|
357
|
+
attrs = []
|
|
358
|
+
native_elem.each_attr do |attr|
|
|
359
|
+
attrs << attr unless attr.name.to_s.start_with?("xmlns")
|
|
360
|
+
end
|
|
361
|
+
attrs
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def attribute_element(attr)
|
|
365
|
+
attr&.parent
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def attribute_namespace(attr)
|
|
369
|
+
return nil unless attr
|
|
370
|
+
return nil unless attr.respond_to?(:ns)
|
|
371
|
+
|
|
372
|
+
attr.ns
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
def set_attribute(element, name, value)
|
|
376
|
+
native_elem = unpatch_node(element)
|
|
377
|
+
return unless native_elem
|
|
378
|
+
|
|
379
|
+
name_str = name.to_s
|
|
380
|
+
value_str = value.to_s
|
|
381
|
+
|
|
382
|
+
# Check if attribute name contains namespace prefix
|
|
383
|
+
if name_str.include?(":")
|
|
384
|
+
prefix, local_name = name_str.split(":", 2)
|
|
385
|
+
|
|
386
|
+
# Find the namespace with the given prefix
|
|
387
|
+
ns = find_namespace_by_prefix(native_elem, prefix)
|
|
388
|
+
|
|
389
|
+
if ns
|
|
390
|
+
# LibXML::XML::Attr.new accepts namespace as third parameter
|
|
391
|
+
# First remove existing attribute if present
|
|
392
|
+
existing = native_elem.attributes.get_attribute(name_str)
|
|
393
|
+
existing&.remove!
|
|
394
|
+
|
|
395
|
+
# Create new attribute with namespace
|
|
396
|
+
# Attr.new(node, name, value, ns)
|
|
397
|
+
::LibXML::XML::Attr.new(native_elem, local_name, value_str, ns)
|
|
398
|
+
|
|
399
|
+
# Return the created attribute
|
|
400
|
+
|
|
401
|
+
else
|
|
402
|
+
# Namespace not found, set as regular attribute
|
|
403
|
+
native_elem[name_str] = value_str
|
|
404
|
+
native_elem.attributes.get_attribute(name_str)
|
|
405
|
+
end
|
|
406
|
+
else
|
|
407
|
+
# Regular attribute without namespace
|
|
408
|
+
native_elem[name_str] = value_str
|
|
409
|
+
native_elem.attributes.get_attribute(name_str)
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def get_attribute(element, name)
|
|
414
|
+
native_elem = unpatch_node(element)
|
|
415
|
+
return nil unless native_elem
|
|
416
|
+
return nil unless native_elem.attributes?
|
|
417
|
+
|
|
418
|
+
attr = native_elem.attributes.get_attribute(name.to_s)
|
|
419
|
+
return nil unless attr
|
|
420
|
+
|
|
421
|
+
# Extend the attribute with to_xml method for proper escaping
|
|
422
|
+
attr.define_singleton_method(:to_xml) do
|
|
423
|
+
escaped = value.to_s
|
|
424
|
+
.gsub("&", "&")
|
|
425
|
+
.gsub("<", "<")
|
|
426
|
+
.gsub(">", ">")
|
|
427
|
+
.gsub("\"", """)
|
|
428
|
+
"#{name} = #{escaped}"
|
|
429
|
+
end
|
|
430
|
+
attr
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
def get_attribute_value(element, name)
|
|
434
|
+
native_elem = unpatch_node(element)
|
|
435
|
+
return nil unless native_elem
|
|
436
|
+
|
|
437
|
+
# Try to get the attribute with the given name (handles namespaced attrs)
|
|
438
|
+
value = native_elem[name.to_s]
|
|
439
|
+
return value if value
|
|
440
|
+
|
|
441
|
+
# If name contains ':', try to get as namespaced attribute
|
|
442
|
+
if name.to_s.include?(":")
|
|
443
|
+
prefix, local_name = name.to_s.split(":", 2)
|
|
444
|
+
# Try to find attribute by namespace
|
|
445
|
+
if native_elem.attributes?
|
|
446
|
+
native_elem.each_attr do |attr|
|
|
447
|
+
if attr.name == local_name || attr.name == name.to_s
|
|
448
|
+
# Check if attribute's namespace matches the prefix
|
|
449
|
+
if attr.ns && attr.ns.prefix == prefix
|
|
450
|
+
return attr.value
|
|
451
|
+
elsif attr.name == name.to_s
|
|
452
|
+
# Fallback: attribute name includes the prefix
|
|
453
|
+
return attr.value
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
nil
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
def remove_attribute(element, name)
|
|
464
|
+
native_elem = unpatch_node(element)
|
|
465
|
+
return unless native_elem
|
|
466
|
+
return unless native_elem.attributes?
|
|
467
|
+
|
|
468
|
+
attr = native_elem.attributes.get_attribute(name.to_s)
|
|
469
|
+
attr&.remove!
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def set_attribute_name(attribute, new_name)
|
|
473
|
+
# LibXML attributes cannot be renamed directly
|
|
474
|
+
# We must work at the element level
|
|
475
|
+
return unless attribute
|
|
476
|
+
|
|
477
|
+
# Get values FIRST before any removal
|
|
478
|
+
old_name = attribute.name
|
|
479
|
+
value = attribute.value
|
|
480
|
+
element = attribute.parent
|
|
481
|
+
return unless element
|
|
482
|
+
|
|
483
|
+
# Remove old attribute via element
|
|
484
|
+
element.attributes.get_attribute(old_name)&.remove!
|
|
485
|
+
|
|
486
|
+
# Add new attribute with same value
|
|
487
|
+
element[new_name.to_s] = value
|
|
488
|
+
|
|
489
|
+
# Return the new attribute
|
|
490
|
+
element.attributes.get_attribute(new_name.to_s)
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
def add_child(element, child)
|
|
494
|
+
return unless element && child
|
|
495
|
+
|
|
496
|
+
# Unwrap both element and child
|
|
497
|
+
native_elem = unpatch_node(element)
|
|
498
|
+
native_child = unpatch_node(child)
|
|
499
|
+
|
|
500
|
+
# For LibXML: if parent has a DEFAULT namespace (nil/empty prefix) and child is an element without a namespace,
|
|
501
|
+
# explicitly set the child's namespace to match the parent's for XPath compatibility
|
|
502
|
+
# NOTE: Prefixed namespaces are NOT inherited, only default namespaces
|
|
503
|
+
if native_elem.respond_to?(:namespaces) && native_elem.namespaces&.namespace &&
|
|
504
|
+
native_child.respond_to?(:namespaces) && native_child.element? &&
|
|
505
|
+
(!native_child.namespaces.namespace || native_child.namespaces.namespace.href.to_s.empty?)
|
|
506
|
+
|
|
507
|
+
parent_ns = native_elem.namespaces.namespace
|
|
508
|
+
# Only set child's namespace if parent's namespace is DEFAULT (nil or empty prefix)
|
|
509
|
+
if parent_ns.prefix.nil? || parent_ns.prefix.to_s.empty?
|
|
510
|
+
native_child.namespaces.namespace = parent_ns
|
|
511
|
+
end
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
if native_elem.is_a?(::LibXML::XML::Document)
|
|
515
|
+
# For Declaration wrappers, store them for serialization
|
|
516
|
+
if child.is_a?(CustomizedLibxml::Declaration)
|
|
517
|
+
native_elem.instance_variable_set(:@moxml_declaration, child)
|
|
518
|
+
# Also store reference to parent document in the declaration
|
|
519
|
+
child.instance_variable_set(:@parent_doc, native_elem)
|
|
520
|
+
return
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
# For DOCTYPE wrappers, store them for serialization
|
|
524
|
+
if child.is_a?(DoctypeWrapper)
|
|
525
|
+
native_elem.instance_variable_set(:@moxml_doctype, child)
|
|
526
|
+
return
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# For document-level PIs, store them for serialization
|
|
530
|
+
if child.is_a?(CustomizedLibxml::ProcessingInstruction)
|
|
531
|
+
pis = native_elem.instance_variable_get(:@moxml_pis) || []
|
|
532
|
+
pis << child
|
|
533
|
+
native_elem.instance_variable_set(:@moxml_pis, pis)
|
|
534
|
+
return
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# For text nodes added to document, store them for serialization
|
|
538
|
+
# Documents can't have text children in LibXML
|
|
539
|
+
if child.is_a?(CustomizedLibxml::Text)
|
|
540
|
+
texts = native_elem.instance_variable_get(:@moxml_texts) || []
|
|
541
|
+
texts << child
|
|
542
|
+
native_elem.instance_variable_set(:@moxml_texts, texts)
|
|
543
|
+
return
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
# For documents, check if adding the first root element
|
|
547
|
+
if native_elem.root.nil? && node_type(native_child) == :element
|
|
548
|
+
# Set as root element
|
|
549
|
+
native_elem.root = native_child
|
|
550
|
+
elsif native_elem.root
|
|
551
|
+
# Document has root, add to it instead
|
|
552
|
+
import_and_add(native_elem.doc, native_elem.root, native_child)
|
|
553
|
+
end
|
|
554
|
+
else
|
|
555
|
+
import_and_add(native_elem.doc, native_elem, native_child)
|
|
556
|
+
end
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
def add_previous_sibling(node, sibling)
|
|
560
|
+
return unless node && sibling
|
|
561
|
+
|
|
562
|
+
native_node = unpatch_node(node)
|
|
563
|
+
native_sibling = unpatch_node(sibling)
|
|
564
|
+
|
|
565
|
+
# Special handling for document-level processing instructions
|
|
566
|
+
# When adding a PI as sibling to root element, store it on document
|
|
567
|
+
if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
|
|
568
|
+
native_node.respond_to?(:doc) && native_node.doc
|
|
569
|
+
doc = native_node.doc
|
|
570
|
+
pis = doc.instance_variable_get(:@moxml_pis) || []
|
|
571
|
+
pis << sibling
|
|
572
|
+
doc.instance_variable_set(:@moxml_pis, pis)
|
|
573
|
+
return
|
|
574
|
+
end
|
|
575
|
+
|
|
576
|
+
native_node.prev = native_sibling
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
def add_next_sibling(node, sibling)
|
|
580
|
+
return unless node && sibling
|
|
581
|
+
|
|
582
|
+
native_node = unpatch_node(node)
|
|
583
|
+
native_sibling = unpatch_node(sibling)
|
|
584
|
+
native_node.next = native_sibling
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
def remove(node)
|
|
588
|
+
# Handle Declaration wrapper - mark as removed on document
|
|
589
|
+
if node.is_a?(CustomizedLibxml::Declaration)
|
|
590
|
+
# The Declaration wrapper is stored on the actual document
|
|
591
|
+
# We need to find which document it's stored on and mark it as removed
|
|
592
|
+
# This is a bit tricky since the Declaration's native is its own internal doc
|
|
593
|
+
# We rely on the fact that when a declaration is added to a document,
|
|
594
|
+
# the document stores a reference to it in @moxml_declaration
|
|
595
|
+
# So we need to clear that reference and mark it as removed
|
|
596
|
+
|
|
597
|
+
# Since we can't easily find the parent document from the Declaration,
|
|
598
|
+
# we'll set a flag on the Declaration itself
|
|
599
|
+
node.instance_variable_set(:@removed, true)
|
|
600
|
+
return
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
native_node = unpatch_node(node)
|
|
604
|
+
native_node&.remove!
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
def replace(node, new_node)
|
|
608
|
+
native_node = unpatch_node(node)
|
|
609
|
+
native_new = unpatch_node(new_node)
|
|
610
|
+
parent = native_node&.parent
|
|
611
|
+
return unless parent && native_new
|
|
612
|
+
|
|
613
|
+
# Special handling for text nodes - LibXML's sibling manipulation
|
|
614
|
+
# doesn't work reliably for text nodes. Instead, use parent.content
|
|
615
|
+
# for text-to-text replacement
|
|
616
|
+
if native_node.text? && native_new.text?
|
|
617
|
+
parent.content = native_new.content
|
|
618
|
+
return
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
# Save the prev/next siblings before removing
|
|
622
|
+
prev_sibling = native_node.prev
|
|
623
|
+
next_sibling = native_node.next
|
|
624
|
+
|
|
625
|
+
# Import if needed for cross-document operations
|
|
626
|
+
parent_doc = parent.respond_to?(:doc) ? parent.doc : nil
|
|
627
|
+
|
|
628
|
+
# Use import_and_add to properly handle document adoption
|
|
629
|
+
import_and_add(parent_doc, parent, native_new)
|
|
630
|
+
|
|
631
|
+
# Now adjust the position - move new node to where old node was
|
|
632
|
+
if prev_sibling
|
|
633
|
+
# Insert after the previous sibling
|
|
634
|
+
prev_sibling.next = native_new
|
|
635
|
+
end
|
|
636
|
+
if next_sibling
|
|
637
|
+
# Insert before the next sibling
|
|
638
|
+
next_sibling.prev = native_new
|
|
639
|
+
end
|
|
640
|
+
|
|
641
|
+
# Finally remove the old node
|
|
642
|
+
native_node.remove!
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
def replace_children(element, children)
|
|
646
|
+
native_elem = unpatch_node(element)
|
|
647
|
+
return unless native_elem
|
|
648
|
+
|
|
649
|
+
# Remove all existing children first
|
|
650
|
+
native_elem.each_child(&:remove!)
|
|
651
|
+
|
|
652
|
+
# Get the element's document for importing
|
|
653
|
+
doc = native_elem.respond_to?(:doc) ? native_elem.doc : nil
|
|
654
|
+
|
|
655
|
+
children.each do |c|
|
|
656
|
+
native_c = unpatch_node(c)
|
|
657
|
+
|
|
658
|
+
# Use import_and_add helper which handles all the edge cases
|
|
659
|
+
import_and_add(doc, native_elem, native_c)
|
|
660
|
+
end
|
|
661
|
+
end
|
|
662
|
+
|
|
663
|
+
def text_content(node)
|
|
664
|
+
native_node = unpatch_node(node)
|
|
665
|
+
return nil unless native_node
|
|
666
|
+
|
|
667
|
+
native_node.content
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
def inner_text(node)
|
|
671
|
+
native_node = unpatch_node(node)
|
|
672
|
+
return "" unless native_node
|
|
673
|
+
return "" unless native_node.children?
|
|
674
|
+
|
|
675
|
+
result = []
|
|
676
|
+
native_node.each_child do |child|
|
|
677
|
+
result << child.content if child.text?
|
|
678
|
+
end
|
|
679
|
+
result.join
|
|
680
|
+
end
|
|
681
|
+
|
|
682
|
+
def set_text_content(node, content)
|
|
683
|
+
native_node = unpatch_node(node)
|
|
684
|
+
native_node.content = content.to_s if native_node
|
|
685
|
+
end
|
|
686
|
+
|
|
687
|
+
def cdata_content(node)
|
|
688
|
+
native_node = unpatch_node(node)
|
|
689
|
+
content = native_node&.content
|
|
690
|
+
# LibXML may HTML-escape CDATA content, un-escape it
|
|
691
|
+
return nil unless content
|
|
692
|
+
|
|
693
|
+
content.gsub(""", '"')
|
|
694
|
+
.gsub("'", "'")
|
|
695
|
+
.gsub("<", "<")
|
|
696
|
+
.gsub(">", ">")
|
|
697
|
+
.gsub("&", "&")
|
|
698
|
+
end
|
|
699
|
+
|
|
700
|
+
def set_cdata_content(node, content)
|
|
701
|
+
native_node = unpatch_node(node)
|
|
702
|
+
# CDATA content should NOT be escaped
|
|
703
|
+
native_node.content = content.to_s if native_node
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
def comment_content(node)
|
|
707
|
+
native_node = unpatch_node(node)
|
|
708
|
+
native_node&.content
|
|
709
|
+
end
|
|
710
|
+
|
|
711
|
+
def set_comment_content(node, content)
|
|
712
|
+
native_node = unpatch_node(node)
|
|
713
|
+
native_node.content = content.to_s if native_node
|
|
714
|
+
end
|
|
715
|
+
|
|
716
|
+
def processing_instruction_target(node)
|
|
717
|
+
native_node = unpatch_node(node)
|
|
718
|
+
native_node&.name
|
|
719
|
+
end
|
|
720
|
+
|
|
721
|
+
def processing_instruction_content(node)
|
|
722
|
+
native_node = unpatch_node(node)
|
|
723
|
+
content = native_node&.content
|
|
724
|
+
# LibXML may HTML-escape the content, un-escape it
|
|
725
|
+
return nil unless content
|
|
726
|
+
|
|
727
|
+
content.gsub(""", '"')
|
|
728
|
+
.gsub("'", "'")
|
|
729
|
+
.gsub("<", "<")
|
|
730
|
+
.gsub(">", ">")
|
|
731
|
+
.gsub("&", "&")
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
def set_processing_instruction_content(node, content)
|
|
735
|
+
native_node = unpatch_node(node)
|
|
736
|
+
# Store raw content - LibXML will escape it
|
|
737
|
+
native_node.content = content.to_s if native_node
|
|
738
|
+
end
|
|
739
|
+
|
|
740
|
+
def create_native_namespace(element, prefix, uri)
|
|
741
|
+
native_elem = unpatch_node(element)
|
|
742
|
+
return nil unless native_elem
|
|
743
|
+
|
|
744
|
+
ns = ::LibXML::XML::Namespace.new(
|
|
745
|
+
native_elem,
|
|
746
|
+
prefix.to_s.empty? ? nil : prefix.to_s,
|
|
747
|
+
uri.to_s,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
# For default namespace (nil/empty prefix), set it as the element's namespace
|
|
751
|
+
native_elem.namespaces.namespace = ns if prefix.to_s.empty?
|
|
752
|
+
|
|
753
|
+
ns
|
|
754
|
+
end
|
|
755
|
+
|
|
756
|
+
def set_namespace(element, ns)
|
|
757
|
+
native_elem = unpatch_node(element)
|
|
758
|
+
return unless native_elem && ns
|
|
759
|
+
|
|
760
|
+
native_elem.namespaces.namespace = ns
|
|
761
|
+
end
|
|
762
|
+
|
|
763
|
+
def namespace(element)
|
|
764
|
+
native_elem = unpatch_node(element)
|
|
765
|
+
return nil unless native_elem
|
|
766
|
+
|
|
767
|
+
# Return ONLY the element's own namespace
|
|
768
|
+
# Do NOT inherit parent namespaces (prefixed namespaces are NOT inherited)
|
|
769
|
+
# Only default namespaces are inherited during element creation by LibXML
|
|
770
|
+
native_elem.namespaces&.namespace
|
|
771
|
+
end
|
|
772
|
+
|
|
773
|
+
def namespace_prefix(namespace)
|
|
774
|
+
namespace&.prefix
|
|
775
|
+
end
|
|
776
|
+
|
|
777
|
+
def namespace_uri(namespace)
|
|
778
|
+
namespace&.href
|
|
779
|
+
end
|
|
780
|
+
|
|
781
|
+
def namespace_definitions(node)
|
|
782
|
+
native_node = unpatch_node(node)
|
|
783
|
+
return [] unless native_node
|
|
784
|
+
return [] unless native_node.respond_to?(:namespaces)
|
|
785
|
+
|
|
786
|
+
native_node.namespaces.map do |ns|
|
|
787
|
+
ns
|
|
788
|
+
end
|
|
789
|
+
end
|
|
790
|
+
|
|
791
|
+
def xpath(node, expression, namespaces = nil)
|
|
792
|
+
native_node = unpatch_node(node)
|
|
793
|
+
return [] unless native_node
|
|
794
|
+
|
|
795
|
+
# Build namespace context for LibXML
|
|
796
|
+
# LibXML requires ALL prefixes in the XPath to be registered
|
|
797
|
+
ns_context = build_xpath_namespaces(native_node, namespaces)
|
|
798
|
+
|
|
799
|
+
results = if ns_context.empty?
|
|
800
|
+
native_node.find(expression).to_a
|
|
801
|
+
else
|
|
802
|
+
native_node.find(expression, ns_context).to_a
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
# Wrap results
|
|
806
|
+
results.map { |n| patch_node(n) }
|
|
807
|
+
rescue ::LibXML::XML::Error => e
|
|
808
|
+
raise Moxml::XPathError.new(
|
|
809
|
+
e.message,
|
|
810
|
+
expression: expression,
|
|
811
|
+
adapter: "LibXML",
|
|
812
|
+
node: node,
|
|
813
|
+
)
|
|
814
|
+
end
|
|
815
|
+
|
|
816
|
+
def at_xpath(node, expression, namespaces = nil)
|
|
817
|
+
results = xpath(node, expression, namespaces)
|
|
818
|
+
results&.first
|
|
819
|
+
end
|
|
820
|
+
|
|
821
|
+
def serialize(node, options = {})
|
|
822
|
+
# FIRST: Check if node is any kind of wrapper with custom to_xml
|
|
823
|
+
if node.respond_to?(:to_xml)
|
|
824
|
+
# Declaration wrapper
|
|
825
|
+
return node.to_xml if node.is_a?(CustomizedLibxml::Declaration)
|
|
826
|
+
|
|
827
|
+
# Other wrappers - check they're not native LibXML nodes
|
|
828
|
+
unless node.is_a?(::LibXML::XML::Node) ||
|
|
829
|
+
node.is_a?(::LibXML::XML::Document)
|
|
830
|
+
return node.to_xml
|
|
831
|
+
end
|
|
832
|
+
end
|
|
833
|
+
|
|
834
|
+
native_node = unpatch_node(node)
|
|
835
|
+
return "" unless native_node
|
|
836
|
+
|
|
837
|
+
if native_node.is_a?(::LibXML::XML::Document)
|
|
838
|
+
output = +""
|
|
839
|
+
|
|
840
|
+
# Check if we should include declaration
|
|
841
|
+
# Priority: explicit no_declaration option > default (include)
|
|
842
|
+
should_include_decl = if options.key?(:no_declaration)
|
|
843
|
+
!options[:no_declaration]
|
|
844
|
+
else
|
|
845
|
+
# Default: include declaration
|
|
846
|
+
true
|
|
847
|
+
end
|
|
848
|
+
|
|
849
|
+
if should_include_decl
|
|
850
|
+
# Check if declaration was explicitly managed
|
|
851
|
+
if native_node.instance_variable_defined?(:@moxml_declaration)
|
|
852
|
+
decl = native_node.instance_variable_get(:@moxml_declaration)
|
|
853
|
+
# Only output declaration if it exists and wasn't removed
|
|
854
|
+
if decl && !decl.instance_variable_get(:@removed)
|
|
855
|
+
output << decl.to_xml
|
|
856
|
+
end
|
|
857
|
+
else
|
|
858
|
+
# No declaration stored - create default
|
|
859
|
+
version = native_node.version || "1.0"
|
|
860
|
+
encoding_val = options[:encoding] ||
|
|
861
|
+
encoding_to_string(native_node.encoding) ||
|
|
862
|
+
"UTF-8"
|
|
863
|
+
|
|
864
|
+
# Don't add standalone="yes" by default - only if explicitly set
|
|
865
|
+
decl = CustomizedLibxml::Declaration.new(
|
|
866
|
+
native_node,
|
|
867
|
+
version,
|
|
868
|
+
encoding_val,
|
|
869
|
+
nil, # No standalone by default
|
|
870
|
+
)
|
|
871
|
+
native_node.instance_variable_set(:@moxml_declaration, decl)
|
|
872
|
+
output << decl.to_xml
|
|
873
|
+
end
|
|
874
|
+
end
|
|
875
|
+
|
|
876
|
+
# Add DOCTYPE if stored on document
|
|
877
|
+
if native_node.instance_variable_defined?(:@moxml_doctype)
|
|
878
|
+
doctype_wrapper = native_node.instance_variable_get(:@moxml_doctype)
|
|
879
|
+
if doctype_wrapper
|
|
880
|
+
output << "\n" unless output.empty?
|
|
881
|
+
output << doctype_wrapper.to_xml
|
|
882
|
+
end
|
|
883
|
+
end
|
|
884
|
+
|
|
885
|
+
# Add document-level processing instructions if stored
|
|
886
|
+
if native_node.instance_variable_defined?(:@moxml_pis)
|
|
887
|
+
pis = native_node.instance_variable_get(:@moxml_pis)
|
|
888
|
+
if pis && !pis.empty?
|
|
889
|
+
pis.each do |pi|
|
|
890
|
+
output << "\n" unless output.empty?
|
|
891
|
+
output << pi.to_xml
|
|
892
|
+
end
|
|
893
|
+
end
|
|
894
|
+
end
|
|
895
|
+
|
|
896
|
+
# Add text nodes if stored (for documents without root)
|
|
897
|
+
if native_node.instance_variable_defined?(:@moxml_texts)
|
|
898
|
+
texts = native_node.instance_variable_get(:@moxml_texts)
|
|
899
|
+
if texts && !texts.empty?
|
|
900
|
+
texts.each do |text|
|
|
901
|
+
output << "\n" unless output.empty?
|
|
902
|
+
output << text.to_xml
|
|
903
|
+
end
|
|
904
|
+
end
|
|
905
|
+
end
|
|
906
|
+
|
|
907
|
+
if native_node.root
|
|
908
|
+
# Use our custom serializer to control namespace output
|
|
909
|
+
root_output = serialize_element_with_namespaces(
|
|
910
|
+
native_node.root,
|
|
911
|
+
true,
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
# Apply indentation if requested
|
|
915
|
+
if options[:indent]&.positive?
|
|
916
|
+
# First add newlines between elements
|
|
917
|
+
formatted = add_newlines_to_xml(root_output)
|
|
918
|
+
output << "\n" << indent_xml(formatted, options[:indent])
|
|
919
|
+
else
|
|
920
|
+
output << "\n" << root_output unless output.empty?
|
|
921
|
+
output << root_output if output.empty?
|
|
922
|
+
end
|
|
923
|
+
end
|
|
924
|
+
|
|
925
|
+
output
|
|
926
|
+
else
|
|
927
|
+
serialize_element_with_namespaces(native_node, true)
|
|
928
|
+
end
|
|
929
|
+
end
|
|
930
|
+
|
|
931
|
+
def add_newlines_to_xml(xml_string)
|
|
932
|
+
# Add newlines between XML elements for proper indentation
|
|
933
|
+
# But don't add newlines between opening and immediate closing tags (e.g., <tag></tag>)
|
|
934
|
+
# And most importantly, don't add newlines inside CDATA sections
|
|
935
|
+
|
|
936
|
+
# First, protect CDATA sections by replacing them with placeholders
|
|
937
|
+
# Manual scanning guarantees O(n) complexity with no backtracking (ReDoS-safe)
|
|
938
|
+
cdata_sections = []
|
|
939
|
+
result = +""
|
|
940
|
+
pos = 0
|
|
941
|
+
|
|
942
|
+
loop do
|
|
943
|
+
# Find next CDATA start
|
|
944
|
+
cdata_start = xml_string.index("<![CDATA[", pos)
|
|
945
|
+
|
|
946
|
+
if cdata_start
|
|
947
|
+
# Copy everything before CDATA
|
|
948
|
+
result << xml_string[pos...cdata_start]
|
|
949
|
+
|
|
950
|
+
# Find CDATA end
|
|
951
|
+
cdata_content_start = cdata_start + 9 # Length of "<![CDATA["
|
|
952
|
+
cdata_end = xml_string.index("]]>", cdata_content_start)
|
|
953
|
+
|
|
954
|
+
if cdata_end
|
|
955
|
+
# Extract full CDATA including markers
|
|
956
|
+
full_cdata_end = cdata_end + 3 # Include "]]>"
|
|
957
|
+
cdata_section = xml_string[cdata_start...full_cdata_end]
|
|
958
|
+
|
|
959
|
+
# Store and add placeholder
|
|
960
|
+
cdata_sections << cdata_section
|
|
961
|
+
result << "__CDATA_PLACEHOLDER_#{cdata_sections.length - 1}__"
|
|
962
|
+
|
|
963
|
+
# Continue after this CDATA
|
|
964
|
+
pos = full_cdata_end
|
|
965
|
+
else
|
|
966
|
+
# Malformed CDATA (no closing "]]>") - copy as-is
|
|
967
|
+
result << xml_string[cdata_start..]
|
|
968
|
+
break
|
|
969
|
+
end
|
|
970
|
+
else
|
|
971
|
+
# No more CDATA sections - copy rest
|
|
972
|
+
result << xml_string[pos..]
|
|
973
|
+
break
|
|
974
|
+
end
|
|
975
|
+
end
|
|
976
|
+
|
|
977
|
+
protected = result
|
|
978
|
+
|
|
979
|
+
# Add newlines between elements (but not in CDATA - already protected)
|
|
980
|
+
with_newlines = protected.gsub(%r{(<[^>]+)>(?=<(?!/))}, "\\1>\n")
|
|
981
|
+
|
|
982
|
+
# Restore CDATA sections
|
|
983
|
+
cdata_sections.each_with_index do |cdata, index|
|
|
984
|
+
with_newlines.sub!("__CDATA_PLACEHOLDER_#{index}__", cdata)
|
|
985
|
+
end
|
|
986
|
+
|
|
987
|
+
with_newlines
|
|
988
|
+
end
|
|
989
|
+
|
|
990
|
+
def indent_xml(xml_string, indent_size)
|
|
991
|
+
# Simple line-by-line indentation
|
|
992
|
+
lines = []
|
|
993
|
+
level = 0
|
|
994
|
+
|
|
995
|
+
xml_string.each_line do |line|
|
|
996
|
+
line = line.strip
|
|
997
|
+
next if line.empty?
|
|
998
|
+
|
|
999
|
+
# Decrease level for closing tags
|
|
1000
|
+
level -= 1 if line.start_with?("</")
|
|
1001
|
+
level = [level, 0].max
|
|
1002
|
+
|
|
1003
|
+
# Add indented line
|
|
1004
|
+
lines << ((" " * (indent_size * level)) + line)
|
|
1005
|
+
|
|
1006
|
+
# Increase level for opening tags (but not self-closing or special tags)
|
|
1007
|
+
next unless line.start_with?("<") && !line.start_with?("</") &&
|
|
1008
|
+
!line.end_with?("/>") && !line.start_with?("<?") &&
|
|
1009
|
+
!line.start_with?("<!") && !line.include?("</")
|
|
1010
|
+
|
|
1011
|
+
level += 1
|
|
1012
|
+
end
|
|
1013
|
+
|
|
1014
|
+
lines.join("\n")
|
|
1015
|
+
end
|
|
1016
|
+
|
|
1017
|
+
def duplicate_node(node)
|
|
1018
|
+
return nil unless node
|
|
1019
|
+
|
|
1020
|
+
# Unwrap if wrapped
|
|
1021
|
+
native_node = node.respond_to?(:native) ? node.native : node
|
|
1022
|
+
|
|
1023
|
+
# LibXML is strict about document ownership
|
|
1024
|
+
# Create brand new NATIVE nodes that are document-independent
|
|
1025
|
+
# Wrappers are only used via patch_node when reading children
|
|
1026
|
+
case node_type(node)
|
|
1027
|
+
when :doctype
|
|
1028
|
+
# DoctypeWrapper - create a new one with same properties
|
|
1029
|
+
if node.is_a?(DoctypeWrapper)
|
|
1030
|
+
DoctypeWrapper.new(
|
|
1031
|
+
create_document,
|
|
1032
|
+
node.name,
|
|
1033
|
+
node.external_id,
|
|
1034
|
+
node.system_id,
|
|
1035
|
+
)
|
|
1036
|
+
else
|
|
1037
|
+
# Should not happen, but handle gracefully
|
|
1038
|
+
node
|
|
1039
|
+
end
|
|
1040
|
+
when :element
|
|
1041
|
+
new_node = ::LibXML::XML::Node.new(native_node.name)
|
|
1042
|
+
# new_node.line = node.line
|
|
1043
|
+
|
|
1044
|
+
# Copy and set namespace definitions FIRST
|
|
1045
|
+
if native_node.respond_to?(:namespaces)
|
|
1046
|
+
# First, copy all namespace definitions
|
|
1047
|
+
native_node.namespaces.each do |ns|
|
|
1048
|
+
::LibXML::XML::Namespace.new(
|
|
1049
|
+
new_node,
|
|
1050
|
+
ns.prefix,
|
|
1051
|
+
ns.href,
|
|
1052
|
+
)
|
|
1053
|
+
end
|
|
1054
|
+
|
|
1055
|
+
# Then, set this element's own namespace if it has one
|
|
1056
|
+
if native_node.namespaces.namespace
|
|
1057
|
+
orig_ns = native_node.namespaces.namespace
|
|
1058
|
+
# Find the matching namespace we just created
|
|
1059
|
+
new_node.namespaces.each do |ns|
|
|
1060
|
+
if ns.prefix == orig_ns.prefix && ns.href == orig_ns.href
|
|
1061
|
+
new_node.namespaces.namespace = ns
|
|
1062
|
+
break
|
|
1063
|
+
end
|
|
1064
|
+
end
|
|
1065
|
+
end
|
|
1066
|
+
end
|
|
1067
|
+
|
|
1068
|
+
# Copy attributes AFTER namespaces are set up
|
|
1069
|
+
# LibXML handles namespaced attributes through their full names
|
|
1070
|
+
if native_node.attributes?
|
|
1071
|
+
native_node.each_attr do |attr|
|
|
1072
|
+
# Get the full attribute name (may include namespace prefix)
|
|
1073
|
+
attr_name = if attr.ns&.prefix
|
|
1074
|
+
"#{attr.ns.prefix}:#{attr.name}"
|
|
1075
|
+
else
|
|
1076
|
+
attr.name
|
|
1077
|
+
end
|
|
1078
|
+
new_node[attr_name] = attr.value
|
|
1079
|
+
end
|
|
1080
|
+
end
|
|
1081
|
+
|
|
1082
|
+
# Recursively copy children
|
|
1083
|
+
if native_node.children?
|
|
1084
|
+
native_node.each_child do |child|
|
|
1085
|
+
# Skip whitespace-only text nodes
|
|
1086
|
+
next if child.text? && child.content.to_s.strip.empty?
|
|
1087
|
+
|
|
1088
|
+
# Recursively duplicate the child
|
|
1089
|
+
child_copy = duplicate_node(child)
|
|
1090
|
+
new_node << child_copy
|
|
1091
|
+
end
|
|
1092
|
+
end
|
|
1093
|
+
|
|
1094
|
+
new_node
|
|
1095
|
+
when :text
|
|
1096
|
+
::LibXML::XML::Node.new_text(native_node.content)
|
|
1097
|
+
when :cdata
|
|
1098
|
+
::LibXML::XML::Node.new_cdata(native_node.content)
|
|
1099
|
+
when :comment
|
|
1100
|
+
::LibXML::XML::Node.new_comment(native_node.content)
|
|
1101
|
+
when :processing_instruction
|
|
1102
|
+
::LibXML::XML::Node.new_pi(native_node.name, native_node.content)
|
|
1103
|
+
else
|
|
1104
|
+
# For other types, try dup as fallback
|
|
1105
|
+
native_node.dup
|
|
1106
|
+
end
|
|
1107
|
+
end
|
|
1108
|
+
|
|
1109
|
+
def patch_node(node, _parent = nil)
|
|
1110
|
+
# Wrap native LibXML nodes in our wrapper classes
|
|
1111
|
+
return node if node.nil?
|
|
1112
|
+
return node if node.is_a?(CustomizedLibxml::Node)
|
|
1113
|
+
|
|
1114
|
+
case node_type(node)
|
|
1115
|
+
when :element
|
|
1116
|
+
CustomizedLibxml::Element.new(node)
|
|
1117
|
+
when :text
|
|
1118
|
+
CustomizedLibxml::Text.new(node)
|
|
1119
|
+
when :cdata
|
|
1120
|
+
CustomizedLibxml::Cdata.new(node)
|
|
1121
|
+
when :comment
|
|
1122
|
+
CustomizedLibxml::Comment.new(node)
|
|
1123
|
+
when :processing_instruction
|
|
1124
|
+
CustomizedLibxml::ProcessingInstruction.new(node)
|
|
1125
|
+
else
|
|
1126
|
+
node
|
|
1127
|
+
end
|
|
1128
|
+
end
|
|
1129
|
+
|
|
1130
|
+
def unpatch_node(node)
|
|
1131
|
+
# Unwrap to get native LibXML node
|
|
1132
|
+
node.respond_to?(:native) ? node.native : node
|
|
1133
|
+
end
|
|
1134
|
+
|
|
1135
|
+
def prepare_for_new_document(node, target_doc)
|
|
1136
|
+
return node unless node && target_doc
|
|
1137
|
+
|
|
1138
|
+
# For LibXML, we need to duplicate ALL nodes to avoid
|
|
1139
|
+
# document ownership issues. Simply importing doesn't work
|
|
1140
|
+
# because nodes from the parsed document still have references.
|
|
1141
|
+
duplicate_node(node)
|
|
1142
|
+
end
|
|
1143
|
+
|
|
1144
|
+
private
|
|
1145
|
+
|
|
1146
|
+
def serialize_element(elem)
|
|
1147
|
+
output = "<#{elem.name}"
|
|
1148
|
+
|
|
1149
|
+
# Add namespace definitions (only on this element, not ancestors)
|
|
1150
|
+
if elem.respond_to?(:namespaces)
|
|
1151
|
+
seen_ns = {}
|
|
1152
|
+
elem.namespaces.definitions.each do |ns|
|
|
1153
|
+
prefix = ns.prefix
|
|
1154
|
+
uri = ns.href
|
|
1155
|
+
next if seen_ns.key?(prefix)
|
|
1156
|
+
|
|
1157
|
+
seen_ns[prefix] = true
|
|
1158
|
+
output << if prefix.nil? || prefix.empty?
|
|
1159
|
+
" xmlns=\"#{escape_xml(uri)}\""
|
|
1160
|
+
else
|
|
1161
|
+
" xmlns:#{prefix}=\"#{escape_xml(uri)}\""
|
|
1162
|
+
end
|
|
1163
|
+
end
|
|
1164
|
+
end
|
|
1165
|
+
|
|
1166
|
+
# Add attributes
|
|
1167
|
+
if elem.attributes?
|
|
1168
|
+
elem.each_attr do |attr|
|
|
1169
|
+
next if attr.name.start_with?("xmlns")
|
|
1170
|
+
|
|
1171
|
+
# Include namespace prefix if attribute has one
|
|
1172
|
+
attr_name = if attr.ns&.prefix
|
|
1173
|
+
"#{attr.ns.prefix}:#{attr.name}"
|
|
1174
|
+
else
|
|
1175
|
+
attr.name
|
|
1176
|
+
end
|
|
1177
|
+
output << " #{attr_name}=\"#{escape_xml(attr.value)}\""
|
|
1178
|
+
end
|
|
1179
|
+
end
|
|
1180
|
+
|
|
1181
|
+
# Always use verbose format <tag></tag> for consistency with other adapters
|
|
1182
|
+
output << ">"
|
|
1183
|
+
if elem.children?
|
|
1184
|
+
elem.each_child do |child|
|
|
1185
|
+
# Skip whitespace-only text nodes
|
|
1186
|
+
next if child.text? && child.content.to_s.strip.empty?
|
|
1187
|
+
|
|
1188
|
+
output << serialize_node(child)
|
|
1189
|
+
end
|
|
1190
|
+
end
|
|
1191
|
+
output << "</#{elem.name}>"
|
|
1192
|
+
|
|
1193
|
+
output
|
|
1194
|
+
end
|
|
1195
|
+
|
|
1196
|
+
def serialize_node(node)
|
|
1197
|
+
# Check if node is a wrapper with to_xml method
|
|
1198
|
+
if node.respond_to?(:to_xml) &&
|
|
1199
|
+
(node.is_a?(CustomizedLibxml::ProcessingInstruction) ||
|
|
1200
|
+
node.is_a?(CustomizedLibxml::Comment) ||
|
|
1201
|
+
node.is_a?(CustomizedLibxml::Cdata) ||
|
|
1202
|
+
node.is_a?(CustomizedLibxml::Text))
|
|
1203
|
+
return node.to_xml
|
|
1204
|
+
end
|
|
1205
|
+
|
|
1206
|
+
case node.node_type
|
|
1207
|
+
when ::LibXML::XML::Node::ELEMENT_NODE
|
|
1208
|
+
serialize_element(node)
|
|
1209
|
+
when ::LibXML::XML::Node::TEXT_NODE
|
|
1210
|
+
escape_text(node.content)
|
|
1211
|
+
when ::LibXML::XML::Node::CDATA_SECTION_NODE
|
|
1212
|
+
"<![CDATA[#{node.content}]]>"
|
|
1213
|
+
when ::LibXML::XML::Node::COMMENT_NODE
|
|
1214
|
+
"<!-- #{node.content} -->"
|
|
1215
|
+
when ::LibXML::XML::Node::PI_NODE
|
|
1216
|
+
"<?#{node.name} #{node.content}?>"
|
|
1217
|
+
else
|
|
1218
|
+
node.to_s
|
|
1219
|
+
end
|
|
1220
|
+
end
|
|
1221
|
+
|
|
1222
|
+
def escape_text(text)
|
|
1223
|
+
text.to_s
|
|
1224
|
+
.gsub("&", "&")
|
|
1225
|
+
.gsub("<", "<")
|
|
1226
|
+
.gsub(">", ">")
|
|
1227
|
+
end
|
|
1228
|
+
|
|
1229
|
+
def escape_xml(text)
|
|
1230
|
+
text.to_s
|
|
1231
|
+
.gsub("&", "&")
|
|
1232
|
+
.gsub("<", "<")
|
|
1233
|
+
.gsub(">", ">")
|
|
1234
|
+
.gsub("\"", """)
|
|
1235
|
+
end
|
|
1236
|
+
|
|
1237
|
+
def escape_attribute_value(value)
|
|
1238
|
+
escaped = value.to_s
|
|
1239
|
+
.gsub("&", "&")
|
|
1240
|
+
.gsub("<", "<")
|
|
1241
|
+
.gsub(">", ">")
|
|
1242
|
+
.gsub("\"", """)
|
|
1243
|
+
escaped.to_s
|
|
1244
|
+
end
|
|
1245
|
+
|
|
1246
|
+
def import_and_add(doc, element, child)
|
|
1247
|
+
return unless element && child
|
|
1248
|
+
|
|
1249
|
+
# Always catch the cross-document error and import when needed
|
|
1250
|
+
begin
|
|
1251
|
+
element << child
|
|
1252
|
+
rescue ::LibXML::XML::Error => e
|
|
1253
|
+
# If we get a "different documents" error, we need to import or copy
|
|
1254
|
+
raise unless e.message.include?("different documents")
|
|
1255
|
+
|
|
1256
|
+
# Get the target document - either from parameter or element
|
|
1257
|
+
target_doc = doc || (element.respond_to?(:doc) ? element.doc : nil)
|
|
1258
|
+
|
|
1259
|
+
if target_doc
|
|
1260
|
+
# Use deep import to ensure all descendants are included
|
|
1261
|
+
imported = target_doc.import(child, true)
|
|
1262
|
+
element << imported
|
|
1263
|
+
else
|
|
1264
|
+
# No target document - create a deep copy of the node instead
|
|
1265
|
+
# This handles the case where the element isn't attached to a document yet
|
|
1266
|
+
copied = duplicate_node(child)
|
|
1267
|
+
element << copied
|
|
1268
|
+
end
|
|
1269
|
+
|
|
1270
|
+
# Re-raise other errors
|
|
1271
|
+
end
|
|
1272
|
+
end
|
|
1273
|
+
|
|
1274
|
+
def encoding_to_string(encoding)
|
|
1275
|
+
return nil unless encoding
|
|
1276
|
+
return encoding if encoding.is_a?(String)
|
|
1277
|
+
|
|
1278
|
+
case encoding
|
|
1279
|
+
when ::LibXML::XML::Encoding::UTF_8
|
|
1280
|
+
"UTF-8"
|
|
1281
|
+
when ::LibXML::XML::Encoding::ISO_8859_1
|
|
1282
|
+
"ISO-8859-1"
|
|
1283
|
+
when ::LibXML::XML::Encoding::UTF_16LE
|
|
1284
|
+
"UTF-16LE"
|
|
1285
|
+
when ::LibXML::XML::Encoding::UTF_16BE
|
|
1286
|
+
"UTF-16BE"
|
|
1287
|
+
when ::LibXML::XML::Encoding::UCS_2
|
|
1288
|
+
"UCS-2"
|
|
1289
|
+
else
|
|
1290
|
+
"UTF-8"
|
|
1291
|
+
end
|
|
1292
|
+
end
|
|
1293
|
+
|
|
1294
|
+
def string_to_encoding(str)
|
|
1295
|
+
return nil unless str
|
|
1296
|
+
|
|
1297
|
+
case str.upcase.tr("-", "_")
|
|
1298
|
+
when "UTF_8", "UTF8"
|
|
1299
|
+
::LibXML::XML::Encoding::UTF_8
|
|
1300
|
+
when "ISO_8859_1", "ISO88591"
|
|
1301
|
+
::LibXML::XML::Encoding::ISO_8859_1
|
|
1302
|
+
when "UTF_16LE", "UTF16LE"
|
|
1303
|
+
::LibXML::XML::Encoding::UTF_16LE
|
|
1304
|
+
when "UTF_16BE", "UTF16BE"
|
|
1305
|
+
::LibXML::XML::Encoding::UTF_16BE
|
|
1306
|
+
else
|
|
1307
|
+
::LibXML::XML::Encoding::UTF_8
|
|
1308
|
+
end
|
|
1309
|
+
end
|
|
1310
|
+
|
|
1311
|
+
def serialize_element_with_namespaces(elem, include_ns = true)
|
|
1312
|
+
output = "<#{elem.name}"
|
|
1313
|
+
|
|
1314
|
+
# Include namespace definitions:
|
|
1315
|
+
# - On root element (include_ns = true), output ALL namespace definitions
|
|
1316
|
+
# - On child elements, output namespace definitions that override parent namespaces
|
|
1317
|
+
if elem.respond_to?(:namespaces) && elem.namespaces.respond_to?(:definitions)
|
|
1318
|
+
# Get parent's namespace definitions to detect overrides
|
|
1319
|
+
parent_ns_defs = if !include_ns && elem.respond_to?(:parent) && elem.parent && !elem.parent.is_a?(::LibXML::XML::Document)
|
|
1320
|
+
parent_namespaces = {}
|
|
1321
|
+
if elem.parent.respond_to?(:namespaces)
|
|
1322
|
+
elem.parent.namespaces.each do |ns|
|
|
1323
|
+
parent_namespaces[ns.prefix] = ns.href
|
|
1324
|
+
end
|
|
1325
|
+
end
|
|
1326
|
+
parent_namespaces
|
|
1327
|
+
else
|
|
1328
|
+
{}
|
|
1329
|
+
end
|
|
1330
|
+
|
|
1331
|
+
seen_ns = {}
|
|
1332
|
+
elem.namespaces.definitions.each do |ns|
|
|
1333
|
+
prefix = ns.prefix
|
|
1334
|
+
uri = ns.href
|
|
1335
|
+
next if seen_ns.key?(prefix)
|
|
1336
|
+
|
|
1337
|
+
# Output namespace if:
|
|
1338
|
+
# 1. This is root element (include_ns = true), OR
|
|
1339
|
+
# 2. This namespace overrides a parent namespace (different URI for same prefix)
|
|
1340
|
+
should_output = include_ns ||
|
|
1341
|
+
(parent_ns_defs.key?(prefix) && parent_ns_defs[prefix] != uri)
|
|
1342
|
+
|
|
1343
|
+
next unless should_output
|
|
1344
|
+
|
|
1345
|
+
seen_ns[prefix] = true
|
|
1346
|
+
output << if prefix.nil? || prefix.empty?
|
|
1347
|
+
" xmlns=\"#{escape_xml(uri)}\""
|
|
1348
|
+
else
|
|
1349
|
+
" xmlns:#{prefix}=\"#{escape_xml(uri)}\""
|
|
1350
|
+
end
|
|
1351
|
+
end
|
|
1352
|
+
end
|
|
1353
|
+
|
|
1354
|
+
# Add attributes
|
|
1355
|
+
if elem.attributes?
|
|
1356
|
+
elem.each_attr do |attr|
|
|
1357
|
+
next if attr.name.start_with?("xmlns")
|
|
1358
|
+
|
|
1359
|
+
# Include namespace prefix if attribute has one
|
|
1360
|
+
attr_name = if attr.ns&.prefix
|
|
1361
|
+
"#{attr.ns.prefix}:#{attr.name}"
|
|
1362
|
+
else
|
|
1363
|
+
attr.name
|
|
1364
|
+
end
|
|
1365
|
+
output << " #{attr_name}=\"#{escape_xml(attr.value)}\""
|
|
1366
|
+
end
|
|
1367
|
+
end
|
|
1368
|
+
|
|
1369
|
+
# Always use verbose format <tag></tag> for consistency with other adapters
|
|
1370
|
+
output << ">"
|
|
1371
|
+
if elem.children?
|
|
1372
|
+
elem.each_child do |child|
|
|
1373
|
+
# Skip whitespace-only text nodes
|
|
1374
|
+
next if child.text? && child.content.to_s.strip.empty?
|
|
1375
|
+
|
|
1376
|
+
# Wrap the child and serialize
|
|
1377
|
+
wrapped_child = patch_node(child)
|
|
1378
|
+
output << if wrapped_child.respond_to?(:to_xml) &&
|
|
1379
|
+
!wrapped_child.is_a?(::LibXML::XML::Node)
|
|
1380
|
+
# Use wrapper's to_xml for proper serialization
|
|
1381
|
+
wrapped_child.to_xml
|
|
1382
|
+
elsif child.element?
|
|
1383
|
+
# Recursively serialize child elements
|
|
1384
|
+
serialize_element_with_namespaces(child, false)
|
|
1385
|
+
else
|
|
1386
|
+
serialize_node(child)
|
|
1387
|
+
end
|
|
1388
|
+
end
|
|
1389
|
+
end
|
|
1390
|
+
output << "</#{elem.name}>"
|
|
1391
|
+
|
|
1392
|
+
output
|
|
1393
|
+
end
|
|
1394
|
+
|
|
1395
|
+
def remove_indentation(xml_string)
|
|
1396
|
+
# Remove all newlines and extra spaces between tags
|
|
1397
|
+
xml_string.gsub(/>\s+</, "><").gsub(/\n\s*/, "")
|
|
1398
|
+
end
|
|
1399
|
+
|
|
1400
|
+
def collect_namespace_definitions(node)
|
|
1401
|
+
ns_defs = {}
|
|
1402
|
+
|
|
1403
|
+
# Start from root to scan entire document
|
|
1404
|
+
root = if node.is_a?(::LibXML::XML::Document)
|
|
1405
|
+
node.root
|
|
1406
|
+
else
|
|
1407
|
+
# Walk up to root first
|
|
1408
|
+
current = node
|
|
1409
|
+
current = current.parent while current.respond_to?(:parent) && current.parent && !current.parent.is_a?(::LibXML::XML::Document)
|
|
1410
|
+
current
|
|
1411
|
+
end
|
|
1412
|
+
|
|
1413
|
+
return ns_defs unless root
|
|
1414
|
+
|
|
1415
|
+
# Recursively collect ALL namespace definitions from entire tree
|
|
1416
|
+
collect_ns_from_subtree(root, ns_defs)
|
|
1417
|
+
|
|
1418
|
+
ns_defs
|
|
1419
|
+
end
|
|
1420
|
+
|
|
1421
|
+
def collect_ns_from_subtree(node, ns_defs)
|
|
1422
|
+
# Collect namespaces defined on this node
|
|
1423
|
+
if node.respond_to?(:namespaces)
|
|
1424
|
+
node.namespaces.each do |ns|
|
|
1425
|
+
prefix = ns.prefix
|
|
1426
|
+
uri = ns.href
|
|
1427
|
+
|
|
1428
|
+
# For default namespace (nil/empty prefix), register as "xmlns"
|
|
1429
|
+
if prefix.nil? || prefix.empty?
|
|
1430
|
+
# Only register if we haven't seen a default namespace yet
|
|
1431
|
+
ns_defs["xmlns"] = uri unless ns_defs.key?("xmlns")
|
|
1432
|
+
else
|
|
1433
|
+
# Only register if we haven't seen this prefix yet
|
|
1434
|
+
ns_defs[prefix] = uri unless ns_defs.key?(prefix)
|
|
1435
|
+
end
|
|
1436
|
+
end
|
|
1437
|
+
end
|
|
1438
|
+
|
|
1439
|
+
# Also check if this element has an active namespace (inherited or own)
|
|
1440
|
+
# This catches cases where elements inherit namespaces from parents
|
|
1441
|
+
if node.respond_to?(:namespaces) && node.namespaces.respond_to?(:namespace)
|
|
1442
|
+
active_ns = node.namespaces.namespace
|
|
1443
|
+
if active_ns
|
|
1444
|
+
prefix = active_ns.prefix
|
|
1445
|
+
uri = active_ns.href
|
|
1446
|
+
|
|
1447
|
+
# Register the active namespace if not already registered
|
|
1448
|
+
if prefix.nil? || prefix.empty?
|
|
1449
|
+
ns_defs["xmlns"] = uri unless ns_defs.key?("xmlns")
|
|
1450
|
+
else
|
|
1451
|
+
ns_defs[prefix] = uri unless ns_defs.key?(prefix)
|
|
1452
|
+
end
|
|
1453
|
+
end
|
|
1454
|
+
end
|
|
1455
|
+
|
|
1456
|
+
# Recursively collect from children
|
|
1457
|
+
return unless node.respond_to?(:children?) && node.children?
|
|
1458
|
+
|
|
1459
|
+
node.each_child do |child|
|
|
1460
|
+
collect_ns_from_subtree(child, ns_defs) if child.element?
|
|
1461
|
+
end
|
|
1462
|
+
ns_defs
|
|
1463
|
+
end
|
|
1464
|
+
|
|
1465
|
+
def build_xpath_namespaces(node, user_namespaces)
|
|
1466
|
+
# Start with collected namespace definitions
|
|
1467
|
+
ns_context = collect_namespace_definitions(node)
|
|
1468
|
+
|
|
1469
|
+
# Merge user-provided namespaces (they override collected ones)
|
|
1470
|
+
if user_namespaces && !user_namespaces.empty?
|
|
1471
|
+
ns_context = ns_context.merge(user_namespaces)
|
|
1472
|
+
end
|
|
1473
|
+
|
|
1474
|
+
ns_context
|
|
1475
|
+
end
|
|
1476
|
+
|
|
1477
|
+
def find_namespace_by_prefix(element, prefix)
|
|
1478
|
+
# Search element and ancestors for namespace with given prefix
|
|
1479
|
+
current = element
|
|
1480
|
+
while current
|
|
1481
|
+
if current.respond_to?(:namespaces)
|
|
1482
|
+
current.namespaces.each do |ns|
|
|
1483
|
+
return ns if ns.prefix == prefix
|
|
1484
|
+
end
|
|
1485
|
+
end
|
|
1486
|
+
current = current.respond_to?(:parent) ? current.parent : nil
|
|
1487
|
+
end
|
|
1488
|
+
nil
|
|
1489
|
+
end
|
|
1490
|
+
end
|
|
1491
|
+
|
|
1492
|
+
# Bridge between LibXML SAX and Moxml SAX
|
|
1493
|
+
#
|
|
1494
|
+
# Translates LibXML::XML::SaxParser events to Moxml::SAX::Handler events
|
|
1495
|
+
#
|
|
1496
|
+
# @private
|
|
1497
|
+
class LibXMLSAXBridge
|
|
1498
|
+
include ::LibXML::XML::SaxParser::Callbacks
|
|
1499
|
+
|
|
1500
|
+
def initialize(handler)
|
|
1501
|
+
@handler = handler
|
|
1502
|
+
end
|
|
1503
|
+
|
|
1504
|
+
# Map LibXML events to Moxml events
|
|
1505
|
+
|
|
1506
|
+
def on_start_document
|
|
1507
|
+
@handler.on_start_document
|
|
1508
|
+
end
|
|
1509
|
+
|
|
1510
|
+
def on_end_document
|
|
1511
|
+
@handler.on_end_document
|
|
1512
|
+
end
|
|
1513
|
+
|
|
1514
|
+
def on_start_element(name, attributes)
|
|
1515
|
+
# Convert LibXML attributes hash to separate attrs and namespaces
|
|
1516
|
+
attr_hash = {}
|
|
1517
|
+
ns_hash = {}
|
|
1518
|
+
|
|
1519
|
+
attributes&.each do |attr_name, attr_value|
|
|
1520
|
+
if attr_name.to_s.start_with?("xmlns")
|
|
1521
|
+
# Namespace declaration
|
|
1522
|
+
prefix = if attr_name.to_s == "xmlns"
|
|
1523
|
+
nil
|
|
1524
|
+
else
|
|
1525
|
+
attr_name.to_s.sub(
|
|
1526
|
+
"xmlns:", ""
|
|
1527
|
+
)
|
|
1528
|
+
end
|
|
1529
|
+
ns_hash[prefix] = attr_value
|
|
1530
|
+
else
|
|
1531
|
+
attr_hash[attr_name.to_s] = attr_value
|
|
1532
|
+
end
|
|
1533
|
+
end
|
|
1534
|
+
|
|
1535
|
+
@handler.on_start_element(name.to_s, attr_hash, ns_hash)
|
|
1536
|
+
end
|
|
1537
|
+
|
|
1538
|
+
def on_end_element(name)
|
|
1539
|
+
@handler.on_end_element(name.to_s)
|
|
1540
|
+
end
|
|
1541
|
+
|
|
1542
|
+
def on_characters(chars)
|
|
1543
|
+
@handler.on_characters(chars)
|
|
1544
|
+
end
|
|
1545
|
+
|
|
1546
|
+
def on_cdata_block(content)
|
|
1547
|
+
@handler.on_cdata(content)
|
|
1548
|
+
end
|
|
1549
|
+
|
|
1550
|
+
def on_comment(msg)
|
|
1551
|
+
@handler.on_comment(msg)
|
|
1552
|
+
end
|
|
1553
|
+
|
|
1554
|
+
def on_processing_instruction(target, data)
|
|
1555
|
+
@handler.on_processing_instruction(target, data || "")
|
|
1556
|
+
end
|
|
1557
|
+
|
|
1558
|
+
def on_error(msg)
|
|
1559
|
+
@handler.on_error(Moxml::ParseError.new(msg))
|
|
1560
|
+
end
|
|
1561
|
+
end
|
|
1562
|
+
end
|
|
1563
|
+
end
|
|
1564
|
+
end
|