moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
data/lib/moxml/adapter/ox.rb
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
4
|
require "ox"
|
|
5
|
+
require "stringio"
|
|
5
6
|
require_relative "customized_ox/text"
|
|
6
7
|
require_relative "customized_ox/attribute"
|
|
7
8
|
require_relative "customized_ox/namespace"
|
|
8
9
|
|
|
9
10
|
# insert :parent methods to all Ox classes inherit the Node class
|
|
10
|
-
|
|
11
|
+
Ox::Node.attr_accessor :parent
|
|
11
12
|
module Moxml
|
|
12
13
|
module Adapter
|
|
13
14
|
class Ox < Base
|
|
@@ -29,12 +30,37 @@ module Moxml
|
|
|
29
30
|
doc
|
|
30
31
|
end
|
|
31
32
|
rescue ::Ox::ParseError => e
|
|
32
|
-
raise Moxml::ParseError
|
|
33
|
+
raise Moxml::ParseError.new(
|
|
34
|
+
e.message,
|
|
35
|
+
source: xml.is_a?(String) ? xml[0..100] : nil,
|
|
36
|
+
)
|
|
33
37
|
end
|
|
34
38
|
|
|
35
39
|
DocumentBuilder.new(Context.new(:ox)).build(native_doc)
|
|
36
40
|
end
|
|
37
41
|
|
|
42
|
+
# SAX parsing implementation for Ox
|
|
43
|
+
#
|
|
44
|
+
# @param xml [String, IO] XML to parse
|
|
45
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
46
|
+
# @return [void]
|
|
47
|
+
def sax_parse(xml, handler)
|
|
48
|
+
# Create bridge that translates Ox SAX to Moxml SAX
|
|
49
|
+
bridge = OxSAXBridge.new(handler)
|
|
50
|
+
|
|
51
|
+
# Parse using Ox's SAX parser
|
|
52
|
+
xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
|
|
53
|
+
|
|
54
|
+
begin
|
|
55
|
+
::Ox.sax_parse(bridge, StringIO.new(xml_string))
|
|
56
|
+
# Ox doesn't automatically call end_document, so we do it manually
|
|
57
|
+
bridge.end_document
|
|
58
|
+
rescue ::Ox::ParseError => e
|
|
59
|
+
error = Moxml::ParseError.new(e.message)
|
|
60
|
+
handler.on_error(error)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
38
64
|
def create_document(native_doc = nil)
|
|
39
65
|
attrs = native_doc&.attributes || {}
|
|
40
66
|
::Ox::Document.new(**attrs)
|
|
@@ -60,7 +86,7 @@ module Moxml
|
|
|
60
86
|
|
|
61
87
|
def create_native_doctype(name, external_id, system_id)
|
|
62
88
|
::Ox::DocType.new(
|
|
63
|
-
"#{name} PUBLIC \"#{external_id}\" \"#{system_id}\""
|
|
89
|
+
"#{name} PUBLIC \"#{external_id}\" \"#{system_id}\"",
|
|
64
90
|
)
|
|
65
91
|
end
|
|
66
92
|
|
|
@@ -87,7 +113,8 @@ module Moxml
|
|
|
87
113
|
end
|
|
88
114
|
|
|
89
115
|
def create_native_namespace(element, prefix, uri)
|
|
90
|
-
ns = ::Moxml::Adapter::CustomizedOx::Namespace.new(prefix, uri,
|
|
116
|
+
ns = ::Moxml::Adapter::CustomizedOx::Namespace.new(prefix, uri,
|
|
117
|
+
element)
|
|
91
118
|
set_attribute(element, ns.expanded_prefix, uri)
|
|
92
119
|
ns
|
|
93
120
|
end
|
|
@@ -97,8 +124,12 @@ module Moxml
|
|
|
97
124
|
|
|
98
125
|
prefix = ns.prefix
|
|
99
126
|
# attributes don't have attributes but can have a namespace prefix
|
|
100
|
-
|
|
101
|
-
|
|
127
|
+
if element.respond_to?(:attributes)
|
|
128
|
+
set_attribute(element, ns.expanded_prefix,
|
|
129
|
+
ns.uri)
|
|
130
|
+
end
|
|
131
|
+
element.name = [prefix,
|
|
132
|
+
element.name.delete_prefix("xmlns:")].compact.join(":")
|
|
102
133
|
namespace(element)
|
|
103
134
|
end
|
|
104
135
|
|
|
@@ -151,9 +182,14 @@ module Moxml
|
|
|
151
182
|
end
|
|
152
183
|
|
|
153
184
|
def node_name(node)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
185
|
+
name = begin
|
|
186
|
+
node.value
|
|
187
|
+
rescue StandardError
|
|
188
|
+
node.name
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Strip namespace prefix if present
|
|
192
|
+
name.to_s.split(":", 2).last
|
|
157
193
|
end
|
|
158
194
|
|
|
159
195
|
def set_node_name(node, name)
|
|
@@ -172,8 +208,12 @@ module Moxml
|
|
|
172
208
|
new_node =
|
|
173
209
|
case node
|
|
174
210
|
# it can be either attribute or namespace
|
|
175
|
-
when Array then ::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
176
|
-
|
|
211
|
+
when Array then ::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
212
|
+
node.first, node.last
|
|
213
|
+
)
|
|
214
|
+
when Hash then ::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
215
|
+
node.keys.first, node.values.first
|
|
216
|
+
)
|
|
177
217
|
when String then ::Moxml::Adapter::CustomizedOx::Text.new(node)
|
|
178
218
|
else node
|
|
179
219
|
end
|
|
@@ -186,7 +226,8 @@ module Moxml
|
|
|
186
226
|
def unpatch_node(node)
|
|
187
227
|
case node
|
|
188
228
|
# it can be either attribute or namespace
|
|
189
|
-
when ::Moxml::Adapter::CustomizedOx::Attribute then [node.name,
|
|
229
|
+
when ::Moxml::Adapter::CustomizedOx::Attribute then [node.name,
|
|
230
|
+
node.value]
|
|
190
231
|
# when ::Moxml::Adapter::CustomizedOx::Attribute then { node.name => node.value }
|
|
191
232
|
when ::Moxml::Adapter::CustomizedOx::Text then node.value
|
|
192
233
|
else node
|
|
@@ -230,15 +271,18 @@ module Moxml
|
|
|
230
271
|
end
|
|
231
272
|
|
|
232
273
|
def attributes(element)
|
|
233
|
-
|
|
274
|
+
unless element.respond_to?(:attributes) && element.attributes
|
|
275
|
+
return []
|
|
276
|
+
end
|
|
234
277
|
|
|
235
|
-
element.attributes.
|
|
236
|
-
next if name.start_with?("xmlns")
|
|
278
|
+
element.attributes.filter_map do |name, value|
|
|
279
|
+
next if name.to_s.start_with?("xmlns")
|
|
237
280
|
|
|
281
|
+
# Ensure value is passed correctly - Ox stores with symbol keys
|
|
238
282
|
::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
239
|
-
name, value, element
|
|
283
|
+
name.to_s, value, element
|
|
240
284
|
)
|
|
241
|
-
end
|
|
285
|
+
end
|
|
242
286
|
end
|
|
243
287
|
|
|
244
288
|
def attribute_element(attribute)
|
|
@@ -280,10 +324,15 @@ module Moxml
|
|
|
280
324
|
|
|
281
325
|
def get_attribute(element, name)
|
|
282
326
|
return unless element.respond_to?(:attributes) && element.attributes
|
|
283
|
-
|
|
327
|
+
unless element.attributes.key?(name.to_s) || element.attributes.key?(name.to_s.to_sym)
|
|
328
|
+
return
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Ox stores attributes with symbol keys, so try both string and symbol
|
|
332
|
+
value = element.attributes[name.to_s] || element.attributes[name.to_s.to_sym]
|
|
284
333
|
|
|
285
334
|
::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
286
|
-
name.to_s,
|
|
335
|
+
name.to_s, value, element
|
|
287
336
|
)
|
|
288
337
|
end
|
|
289
338
|
|
|
@@ -299,6 +348,24 @@ module Moxml
|
|
|
299
348
|
end
|
|
300
349
|
|
|
301
350
|
def add_child(element, child)
|
|
351
|
+
# Special handling for declarations on Ox documents
|
|
352
|
+
if element.is_a?(::Ox::Document) && child.is_a?(::Ox::Instruct) && child.target == "xml"
|
|
353
|
+
# Transfer declaration attributes to document
|
|
354
|
+
element.attributes ||= {}
|
|
355
|
+
if child.attributes["version"]
|
|
356
|
+
element.attributes[:version] =
|
|
357
|
+
child.attributes["version"]
|
|
358
|
+
end
|
|
359
|
+
if child.attributes["encoding"]
|
|
360
|
+
element.attributes[:encoding] =
|
|
361
|
+
child.attributes["encoding"]
|
|
362
|
+
end
|
|
363
|
+
if child.attributes["standalone"]
|
|
364
|
+
element.attributes[:standalone] =
|
|
365
|
+
child.attributes["standalone"]
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
302
369
|
child.parent = element if child.respond_to?(:parent)
|
|
303
370
|
element.nodes ||= []
|
|
304
371
|
element.nodes << child
|
|
@@ -331,11 +398,22 @@ module Moxml
|
|
|
331
398
|
|
|
332
399
|
return unless parent(node)
|
|
333
400
|
|
|
334
|
-
|
|
401
|
+
# Special handling for declarations on Ox documents
|
|
402
|
+
if parent(node).is_a?(::Ox::Document) && node.is_a?(::Ox::Instruct) && node.target == "xml"
|
|
403
|
+
# Clear declaration attributes from document
|
|
404
|
+
doc = parent(node)
|
|
405
|
+
doc.attributes&.delete(:version)
|
|
406
|
+
doc.attributes&.delete(:encoding)
|
|
407
|
+
doc.attributes&.delete(:standalone)
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
parent(node).nodes.delete(unpatch_node(node))
|
|
335
411
|
end
|
|
336
412
|
|
|
337
413
|
def replace(node, new_node)
|
|
338
|
-
|
|
414
|
+
if node.is_a?(String) && new_node.is_a?(String)
|
|
415
|
+
return node.replace(new_node)
|
|
416
|
+
end
|
|
339
417
|
# There are other cases:
|
|
340
418
|
# when node is a String and new_node isn't
|
|
341
419
|
# when node isn't a String, and new_node is a String
|
|
@@ -357,10 +435,14 @@ module Moxml
|
|
|
357
435
|
end
|
|
358
436
|
|
|
359
437
|
def text_content(node)
|
|
438
|
+
return "" if node.nil?
|
|
439
|
+
|
|
360
440
|
case node
|
|
361
441
|
when String then node.to_s
|
|
362
442
|
when ::Moxml::Adapter::CustomizedOx::Text then node.value
|
|
363
443
|
else
|
|
444
|
+
return "" unless node.respond_to?(:nodes)
|
|
445
|
+
|
|
364
446
|
node.nodes.map do |n|
|
|
365
447
|
text_content(n)
|
|
366
448
|
end.join
|
|
@@ -428,9 +510,38 @@ module Moxml
|
|
|
428
510
|
end.values
|
|
429
511
|
end
|
|
430
512
|
|
|
431
|
-
def xpath(node, expression,
|
|
432
|
-
#
|
|
433
|
-
|
|
513
|
+
def xpath(node, expression, namespaces = {})
|
|
514
|
+
# Translate common XPath patterns to Ox locate() syntax
|
|
515
|
+
locate_expr = translate_xpath_to_locate(expression, namespaces)
|
|
516
|
+
|
|
517
|
+
# Ox's locate() works differently on documents vs elements
|
|
518
|
+
# For relative descendant searches on elements, we need special handling
|
|
519
|
+
if expression.start_with?(".//") && node.is_a?(::Ox::Element)
|
|
520
|
+
# Manually search descendants for relative paths from elements
|
|
521
|
+
element_name = locate_expr.sub("?/", "")
|
|
522
|
+
results = []
|
|
523
|
+
traverse(node) do |n|
|
|
524
|
+
next unless n.is_a?(::Ox::Element)
|
|
525
|
+
|
|
526
|
+
results << n if n.name == element_name || element_name.empty?
|
|
527
|
+
end
|
|
528
|
+
return results.map do |n|
|
|
529
|
+
patch_node(n, find_parent_in_tree(n, node))
|
|
530
|
+
end
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
# Use Ox's locate method for other cases
|
|
534
|
+
results = node.locate(locate_expr)
|
|
535
|
+
|
|
536
|
+
# Wrap results and set their parents by finding them in the tree
|
|
537
|
+
results.map { |n| patch_node(n, find_parent_in_tree(n, node)) }
|
|
538
|
+
rescue StandardError => e
|
|
539
|
+
raise Moxml::XPathError.new(
|
|
540
|
+
"XPath translation failed: #{e.message}",
|
|
541
|
+
expression: expression,
|
|
542
|
+
adapter: "Ox",
|
|
543
|
+
node: node,
|
|
544
|
+
)
|
|
434
545
|
end
|
|
435
546
|
|
|
436
547
|
def at_xpath(node, expression, namespaces = {})
|
|
@@ -440,9 +551,24 @@ module Moxml
|
|
|
440
551
|
def serialize(node, options = {})
|
|
441
552
|
output = ""
|
|
442
553
|
if node.is_a?(::Ox::Document)
|
|
443
|
-
#
|
|
444
|
-
|
|
445
|
-
|
|
554
|
+
# Check if we should include declaration
|
|
555
|
+
# Priority: explicit option > document attributes
|
|
556
|
+
should_include_decl = if options.key?(:no_declaration)
|
|
557
|
+
!options[:no_declaration]
|
|
558
|
+
else
|
|
559
|
+
# Check if document has declaration attributes
|
|
560
|
+
node[:version] || node[:encoding] || node[:standalone]
|
|
561
|
+
end
|
|
562
|
+
|
|
563
|
+
# Only add declaration if should_include_decl is true
|
|
564
|
+
if should_include_decl
|
|
565
|
+
version = node[:version] || "1.0"
|
|
566
|
+
encoding = options[:encoding] || node[:encoding]
|
|
567
|
+
standalone = node[:standalone]
|
|
568
|
+
|
|
569
|
+
decl = create_native_declaration(version, encoding, standalone)
|
|
570
|
+
output = ::Ox.dump(::Ox::Document.new << decl).strip
|
|
571
|
+
end
|
|
446
572
|
end
|
|
447
573
|
|
|
448
574
|
ox_options = {
|
|
@@ -450,13 +576,76 @@ module Moxml
|
|
|
450
576
|
# with_xml: true,
|
|
451
577
|
with_instructions: true,
|
|
452
578
|
encoding: options[:encoding],
|
|
453
|
-
no_empty: options[:expand_empty]
|
|
579
|
+
no_empty: options[:expand_empty],
|
|
454
580
|
}
|
|
455
581
|
output + ::Ox.dump(node, ox_options)
|
|
456
582
|
end
|
|
457
583
|
|
|
458
584
|
private
|
|
459
585
|
|
|
586
|
+
# Translate a subset of XPath to Ox locate() syntax
|
|
587
|
+
# Supports: //element, /path/to/element, .//element, element[@attr]
|
|
588
|
+
# Note: Ox locate() doesn't support namespace prefixes in the path
|
|
589
|
+
def translate_xpath_to_locate(xpath, namespaces = {})
|
|
590
|
+
expr = xpath.dup
|
|
591
|
+
|
|
592
|
+
# Strip namespace prefixes from element names
|
|
593
|
+
# XPath: //ns:element → locate: element
|
|
594
|
+
if namespaces && !namespaces.empty?
|
|
595
|
+
namespaces.each_key do |prefix|
|
|
596
|
+
expr = expr.gsub("/#{prefix}:", "/")
|
|
597
|
+
expr = expr.gsub("/*#{prefix}:", "/*")
|
|
598
|
+
expr = expr.gsub("//*#{prefix}:", "//")
|
|
599
|
+
expr = expr.gsub("//#{prefix}:", "//")
|
|
600
|
+
expr = expr.gsub("///#{prefix}:", "///")
|
|
601
|
+
end
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
# Remove any remaining namespace prefixes
|
|
605
|
+
# Use possessive quantifier to prevent ReDoS
|
|
606
|
+
expr = expr.gsub(/[a-zA-Z_][\w-]*+:/, "")
|
|
607
|
+
|
|
608
|
+
# Remove attribute predicates for now - we'll filter manually
|
|
609
|
+
# Save the attribute name if present
|
|
610
|
+
expr = expr.gsub(/\[@(\w+)\]/, "")
|
|
611
|
+
|
|
612
|
+
# XPath: //element → locate: ?/element (any depth)
|
|
613
|
+
# Note: In Ox, ?/ means "any path"
|
|
614
|
+
expr = expr.sub(%r{^//}, "?/") if expr.start_with?("//")
|
|
615
|
+
|
|
616
|
+
# XPath: .//element → locate: ?/element (relative any depth)
|
|
617
|
+
# For relative paths from an element, we still use ?/ which searches
|
|
618
|
+
# descendants
|
|
619
|
+
expr = expr.sub(%r{^\.//}, "?/") if expr.start_with?(".//")
|
|
620
|
+
|
|
621
|
+
# XPath: /root/child → locate: root/child (absolute path)
|
|
622
|
+
# Remove leading / for Ox
|
|
623
|
+
expr = expr.sub(%r{^/}, "")
|
|
624
|
+
|
|
625
|
+
# XPath: ./element → locate: element (direct child, just remove ./)
|
|
626
|
+
expr.sub(%r{^\./}, "")
|
|
627
|
+
end
|
|
628
|
+
|
|
629
|
+
# Find the actual parent of a node by searching the tree
|
|
630
|
+
def find_parent_in_tree(target_node, search_root)
|
|
631
|
+
# Start from the document root if we have a document
|
|
632
|
+
root = search_root.is_a?(::Ox::Document) ? search_root : document(search_root)
|
|
633
|
+
|
|
634
|
+
result = nil
|
|
635
|
+
traverse(root) do |node|
|
|
636
|
+
next unless node.respond_to?(:nodes)
|
|
637
|
+
|
|
638
|
+
node.nodes&.each do |child|
|
|
639
|
+
if child.equal?(target_node)
|
|
640
|
+
result = node
|
|
641
|
+
break
|
|
642
|
+
end
|
|
643
|
+
end
|
|
644
|
+
break if result
|
|
645
|
+
end
|
|
646
|
+
result
|
|
647
|
+
end
|
|
648
|
+
|
|
460
649
|
def traverse(node, &block)
|
|
461
650
|
return unless node
|
|
462
651
|
|
|
@@ -467,5 +656,110 @@ module Moxml
|
|
|
467
656
|
end
|
|
468
657
|
end
|
|
469
658
|
end
|
|
659
|
+
|
|
660
|
+
# Bridge between Ox SAX and Moxml SAX
|
|
661
|
+
#
|
|
662
|
+
# Translates Ox::Sax events to Moxml::SAX::Handler events.
|
|
663
|
+
# Ox has a unique SAX pattern where attributes are delivered AFTER start_element.
|
|
664
|
+
#
|
|
665
|
+
# @private
|
|
666
|
+
class OxSAXBridge
|
|
667
|
+
def initialize(handler)
|
|
668
|
+
@handler = handler
|
|
669
|
+
@pending_attrs = {}
|
|
670
|
+
@pending_element_name = nil
|
|
671
|
+
@element_started = false
|
|
672
|
+
@document_started = false
|
|
673
|
+
end
|
|
674
|
+
|
|
675
|
+
# Ox delivers attributes AFTER start_element
|
|
676
|
+
def attr(name, value)
|
|
677
|
+
@pending_attrs[name] = value
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
# Called when element starts (but attributes come AFTER this)
|
|
681
|
+
def start_element(name)
|
|
682
|
+
# If we had a previous element waiting, we need to finalize it first
|
|
683
|
+
if @pending_element_name
|
|
684
|
+
finalize_pending_element
|
|
685
|
+
end
|
|
686
|
+
|
|
687
|
+
# Store this element name (convert symbol to string)
|
|
688
|
+
@pending_element_name = name.to_s
|
|
689
|
+
@element_started = true
|
|
690
|
+
|
|
691
|
+
# Call on_start_document if this is the first element
|
|
692
|
+
unless @document_started
|
|
693
|
+
@handler.on_start_document
|
|
694
|
+
@document_started = true
|
|
695
|
+
end
|
|
696
|
+
end
|
|
697
|
+
|
|
698
|
+
def end_element(name)
|
|
699
|
+
# Finalize any pending element before ending
|
|
700
|
+
if @pending_element_name
|
|
701
|
+
finalize_pending_element
|
|
702
|
+
end
|
|
703
|
+
|
|
704
|
+
# Convert symbol to string
|
|
705
|
+
@handler.on_end_element(name.to_s)
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
# Ox only has text() - no separate CDATA, comment, or PI events
|
|
709
|
+
def text(string)
|
|
710
|
+
# Finalize any pending element before text
|
|
711
|
+
if @pending_element_name
|
|
712
|
+
finalize_pending_element
|
|
713
|
+
end
|
|
714
|
+
|
|
715
|
+
@handler.on_characters(string)
|
|
716
|
+
end
|
|
717
|
+
|
|
718
|
+
def error(message, line, column)
|
|
719
|
+
error = Moxml::ParseError.new(message, line: line, column: column)
|
|
720
|
+
@handler.on_error(error)
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
# Called at end of parsing (not automatically by Ox)
|
|
724
|
+
def end_document
|
|
725
|
+
# Finalize any pending element
|
|
726
|
+
if @pending_element_name
|
|
727
|
+
finalize_pending_element
|
|
728
|
+
end
|
|
729
|
+
|
|
730
|
+
@handler.on_end_document if @document_started
|
|
731
|
+
end
|
|
732
|
+
|
|
733
|
+
private
|
|
734
|
+
|
|
735
|
+
def finalize_pending_element
|
|
736
|
+
# Separate namespace declarations from regular attributes
|
|
737
|
+
attr_hash = {}
|
|
738
|
+
namespaces_hash = {}
|
|
739
|
+
|
|
740
|
+
@pending_attrs.each do |attr_name, attr_value|
|
|
741
|
+
if attr_name.to_s.start_with?("xmlns")
|
|
742
|
+
# Namespace declaration
|
|
743
|
+
prefix = if attr_name.to_s == "xmlns"
|
|
744
|
+
nil
|
|
745
|
+
else
|
|
746
|
+
attr_name.to_s.sub(
|
|
747
|
+
"xmlns:", ""
|
|
748
|
+
)
|
|
749
|
+
end
|
|
750
|
+
namespaces_hash[prefix] = attr_value
|
|
751
|
+
else
|
|
752
|
+
attr_hash[attr_name.to_s] = attr_value
|
|
753
|
+
end
|
|
754
|
+
end
|
|
755
|
+
|
|
756
|
+
@handler.on_start_element(@pending_element_name, attr_hash,
|
|
757
|
+
namespaces_hash)
|
|
758
|
+
|
|
759
|
+
# Clear for next element
|
|
760
|
+
@pending_attrs = {}
|
|
761
|
+
@pending_element_name = nil
|
|
762
|
+
end
|
|
763
|
+
end
|
|
470
764
|
end
|
|
471
765
|
end
|