moxml 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +238 -40
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +45 -0
- data/docs/_guides/modifying-xml.adoc +293 -0
- data/docs/_guides/parsing-xml.adoc +231 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +98 -0
- data/docs/_pages/adapters/libxml.adoc +286 -0
- data/docs/_pages/adapters/nokogiri.adoc +252 -0
- data/docs/_pages/adapters/oga.adoc +292 -0
- data/docs/_pages/adapters/ox.adoc +55 -0
- data/docs/_pages/adapters/rexml.adoc +293 -0
- data/docs/_pages/best-practices.adoc +430 -0
- data/docs/_pages/compatibility.adoc +468 -0
- data/docs/_pages/configuration.adoc +251 -0
- data/docs/_pages/error-handling.adoc +350 -0
- data/docs/_pages/headed-ox-limitations.adoc +558 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +141 -0
- data/docs/_pages/node-api-reference.adoc +50 -0
- data/docs/_pages/performance.adoc +36 -0
- data/docs/_pages/quick-start.adoc +244 -0
- data/docs/_pages/thread-safety.adoc +29 -0
- data/docs/_references/document-api.adoc +408 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +268 -0
- data/docs/_tutorials/builder-pattern.adoc +343 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +325 -0
- data/docs/_tutorials/xpath-queries.adoc +359 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1548 -0
- data/lib/moxml/adapter/nokogiri.rb +121 -9
- data/lib/moxml/adapter/oga.rb +123 -12
- data/lib/moxml/adapter/ox.rb +282 -26
- data/lib/moxml/adapter/rexml.rb +127 -20
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +39 -1
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +39 -6
- data/lib/moxml/document_builder.rb +27 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +94 -3
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1768 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +176 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
data/lib/moxml/adapter/ox.rb
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
4
|
require "ox"
|
|
5
|
+
require "stringio"
|
|
5
6
|
require_relative "customized_ox/text"
|
|
6
7
|
require_relative "customized_ox/attribute"
|
|
7
8
|
require_relative "customized_ox/namespace"
|
|
8
9
|
|
|
9
10
|
# insert :parent methods to all Ox classes inherit the Node class
|
|
10
|
-
|
|
11
|
+
Ox::Node.attr_accessor :parent
|
|
11
12
|
module Moxml
|
|
12
13
|
module Adapter
|
|
13
14
|
class Ox < Base
|
|
@@ -29,12 +30,37 @@ module Moxml
|
|
|
29
30
|
doc
|
|
30
31
|
end
|
|
31
32
|
rescue ::Ox::ParseError => e
|
|
32
|
-
raise Moxml::ParseError
|
|
33
|
+
raise Moxml::ParseError.new(
|
|
34
|
+
e.message,
|
|
35
|
+
source: xml.is_a?(String) ? xml[0..100] : nil,
|
|
36
|
+
)
|
|
33
37
|
end
|
|
34
38
|
|
|
35
39
|
DocumentBuilder.new(Context.new(:ox)).build(native_doc)
|
|
36
40
|
end
|
|
37
41
|
|
|
42
|
+
# SAX parsing implementation for Ox
|
|
43
|
+
#
|
|
44
|
+
# @param xml [String, IO] XML to parse
|
|
45
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
46
|
+
# @return [void]
|
|
47
|
+
def sax_parse(xml, handler)
|
|
48
|
+
# Create bridge that translates Ox SAX to Moxml SAX
|
|
49
|
+
bridge = OxSAXBridge.new(handler)
|
|
50
|
+
|
|
51
|
+
# Parse using Ox's SAX parser
|
|
52
|
+
xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
|
|
53
|
+
|
|
54
|
+
begin
|
|
55
|
+
::Ox.sax_parse(bridge, StringIO.new(xml_string))
|
|
56
|
+
# Ox doesn't automatically call end_document, so we do it manually
|
|
57
|
+
bridge.end_document
|
|
58
|
+
rescue ::Ox::ParseError => e
|
|
59
|
+
error = Moxml::ParseError.new(e.message)
|
|
60
|
+
handler.on_error(error)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
38
64
|
def create_document(native_doc = nil)
|
|
39
65
|
attrs = native_doc&.attributes || {}
|
|
40
66
|
::Ox::Document.new(**attrs)
|
|
@@ -60,7 +86,7 @@ module Moxml
|
|
|
60
86
|
|
|
61
87
|
def create_native_doctype(name, external_id, system_id)
|
|
62
88
|
::Ox::DocType.new(
|
|
63
|
-
"#{name} PUBLIC \"#{external_id}\" \"#{system_id}\""
|
|
89
|
+
"#{name} PUBLIC \"#{external_id}\" \"#{system_id}\"",
|
|
64
90
|
)
|
|
65
91
|
end
|
|
66
92
|
|
|
@@ -87,7 +113,8 @@ module Moxml
|
|
|
87
113
|
end
|
|
88
114
|
|
|
89
115
|
def create_native_namespace(element, prefix, uri)
|
|
90
|
-
ns = ::Moxml::Adapter::CustomizedOx::Namespace.new(prefix, uri,
|
|
116
|
+
ns = ::Moxml::Adapter::CustomizedOx::Namespace.new(prefix, uri,
|
|
117
|
+
element)
|
|
91
118
|
set_attribute(element, ns.expanded_prefix, uri)
|
|
92
119
|
ns
|
|
93
120
|
end
|
|
@@ -97,8 +124,12 @@ module Moxml
|
|
|
97
124
|
|
|
98
125
|
prefix = ns.prefix
|
|
99
126
|
# attributes don't have attributes but can have a namespace prefix
|
|
100
|
-
|
|
101
|
-
|
|
127
|
+
if element.respond_to?(:attributes)
|
|
128
|
+
set_attribute(element, ns.expanded_prefix,
|
|
129
|
+
ns.uri)
|
|
130
|
+
end
|
|
131
|
+
element.name = [prefix,
|
|
132
|
+
element.name.delete_prefix("xmlns:")].compact.join(":")
|
|
102
133
|
namespace(element)
|
|
103
134
|
end
|
|
104
135
|
|
|
@@ -151,9 +182,14 @@ module Moxml
|
|
|
151
182
|
end
|
|
152
183
|
|
|
153
184
|
def node_name(node)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
185
|
+
name = begin
|
|
186
|
+
node.value
|
|
187
|
+
rescue StandardError
|
|
188
|
+
node.name
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Strip namespace prefix if present
|
|
192
|
+
name.to_s.split(":", 2).last
|
|
157
193
|
end
|
|
158
194
|
|
|
159
195
|
def set_node_name(node, name)
|
|
@@ -172,8 +208,12 @@ module Moxml
|
|
|
172
208
|
new_node =
|
|
173
209
|
case node
|
|
174
210
|
# it can be either attribute or namespace
|
|
175
|
-
when Array then ::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
176
|
-
|
|
211
|
+
when Array then ::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
212
|
+
node.first, node.last
|
|
213
|
+
)
|
|
214
|
+
when Hash then ::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
215
|
+
node.keys.first, node.values.first
|
|
216
|
+
)
|
|
177
217
|
when String then ::Moxml::Adapter::CustomizedOx::Text.new(node)
|
|
178
218
|
else node
|
|
179
219
|
end
|
|
@@ -186,7 +226,8 @@ module Moxml
|
|
|
186
226
|
def unpatch_node(node)
|
|
187
227
|
case node
|
|
188
228
|
# it can be either attribute or namespace
|
|
189
|
-
when ::Moxml::Adapter::CustomizedOx::Attribute then [node.name,
|
|
229
|
+
when ::Moxml::Adapter::CustomizedOx::Attribute then [node.name,
|
|
230
|
+
node.value]
|
|
190
231
|
# when ::Moxml::Adapter::CustomizedOx::Attribute then { node.name => node.value }
|
|
191
232
|
when ::Moxml::Adapter::CustomizedOx::Text then node.value
|
|
192
233
|
else node
|
|
@@ -230,15 +271,18 @@ module Moxml
|
|
|
230
271
|
end
|
|
231
272
|
|
|
232
273
|
def attributes(element)
|
|
233
|
-
|
|
274
|
+
unless element.respond_to?(:attributes) && element.attributes
|
|
275
|
+
return []
|
|
276
|
+
end
|
|
234
277
|
|
|
235
|
-
element.attributes.
|
|
236
|
-
next if name.start_with?("xmlns")
|
|
278
|
+
element.attributes.filter_map do |name, value|
|
|
279
|
+
next if name.to_s.start_with?("xmlns")
|
|
237
280
|
|
|
281
|
+
# Ensure value is passed correctly - Ox stores with symbol keys
|
|
238
282
|
::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
239
|
-
name, value, element
|
|
283
|
+
name.to_s, value, element
|
|
240
284
|
)
|
|
241
|
-
end
|
|
285
|
+
end
|
|
242
286
|
end
|
|
243
287
|
|
|
244
288
|
def attribute_element(attribute)
|
|
@@ -280,10 +324,15 @@ module Moxml
|
|
|
280
324
|
|
|
281
325
|
def get_attribute(element, name)
|
|
282
326
|
return unless element.respond_to?(:attributes) && element.attributes
|
|
283
|
-
|
|
327
|
+
unless element.attributes.key?(name.to_s) || element.attributes.key?(name.to_s.to_sym)
|
|
328
|
+
return
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Ox stores attributes with symbol keys, so try both string and symbol
|
|
332
|
+
value = element.attributes[name.to_s] || element.attributes[name.to_s.to_sym]
|
|
284
333
|
|
|
285
334
|
::Moxml::Adapter::CustomizedOx::Attribute.new(
|
|
286
|
-
name.to_s,
|
|
335
|
+
name.to_s, value, element
|
|
287
336
|
)
|
|
288
337
|
end
|
|
289
338
|
|
|
@@ -331,11 +380,13 @@ module Moxml
|
|
|
331
380
|
|
|
332
381
|
return unless parent(node)
|
|
333
382
|
|
|
334
|
-
parent(node).nodes.delete(node)
|
|
383
|
+
parent(node).nodes.delete(unpatch_node(node))
|
|
335
384
|
end
|
|
336
385
|
|
|
337
386
|
def replace(node, new_node)
|
|
338
|
-
|
|
387
|
+
if node.is_a?(String) && new_node.is_a?(String)
|
|
388
|
+
return node.replace(new_node)
|
|
389
|
+
end
|
|
339
390
|
# There are other cases:
|
|
340
391
|
# when node is a String and new_node isn't
|
|
341
392
|
# when node isn't a String, and new_node is a String
|
|
@@ -357,10 +408,14 @@ module Moxml
|
|
|
357
408
|
end
|
|
358
409
|
|
|
359
410
|
def text_content(node)
|
|
411
|
+
return "" if node.nil?
|
|
412
|
+
|
|
360
413
|
case node
|
|
361
414
|
when String then node.to_s
|
|
362
415
|
when ::Moxml::Adapter::CustomizedOx::Text then node.value
|
|
363
416
|
else
|
|
417
|
+
return "" unless node.respond_to?(:nodes)
|
|
418
|
+
|
|
364
419
|
node.nodes.map do |n|
|
|
365
420
|
text_content(n)
|
|
366
421
|
end.join
|
|
@@ -428,9 +483,38 @@ module Moxml
|
|
|
428
483
|
end.values
|
|
429
484
|
end
|
|
430
485
|
|
|
431
|
-
def xpath(node, expression,
|
|
432
|
-
#
|
|
433
|
-
|
|
486
|
+
def xpath(node, expression, namespaces = {})
|
|
487
|
+
# Translate common XPath patterns to Ox locate() syntax
|
|
488
|
+
locate_expr = translate_xpath_to_locate(expression, namespaces)
|
|
489
|
+
|
|
490
|
+
# Ox's locate() works differently on documents vs elements
|
|
491
|
+
# For relative descendant searches on elements, we need special handling
|
|
492
|
+
if expression.start_with?(".//") && node.is_a?(::Ox::Element)
|
|
493
|
+
# Manually search descendants for relative paths from elements
|
|
494
|
+
element_name = locate_expr.sub("?/", "")
|
|
495
|
+
results = []
|
|
496
|
+
traverse(node) do |n|
|
|
497
|
+
next unless n.is_a?(::Ox::Element)
|
|
498
|
+
|
|
499
|
+
results << n if n.name == element_name || element_name.empty?
|
|
500
|
+
end
|
|
501
|
+
return results.map do |n|
|
|
502
|
+
patch_node(n, find_parent_in_tree(n, node))
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# Use Ox's locate method for other cases
|
|
507
|
+
results = node.locate(locate_expr)
|
|
508
|
+
|
|
509
|
+
# Wrap results and set their parents by finding them in the tree
|
|
510
|
+
results.map { |n| patch_node(n, find_parent_in_tree(n, node)) }
|
|
511
|
+
rescue StandardError => e
|
|
512
|
+
raise Moxml::XPathError.new(
|
|
513
|
+
"XPath translation failed: #{e.message}",
|
|
514
|
+
expression: expression,
|
|
515
|
+
adapter: "Ox",
|
|
516
|
+
node: node,
|
|
517
|
+
)
|
|
434
518
|
end
|
|
435
519
|
|
|
436
520
|
def at_xpath(node, expression, namespaces = {})
|
|
@@ -441,7 +525,11 @@ module Moxml
|
|
|
441
525
|
output = ""
|
|
442
526
|
if node.is_a?(::Ox::Document)
|
|
443
527
|
# add declaration
|
|
444
|
-
|
|
528
|
+
version = node[:version] || "1.0"
|
|
529
|
+
encoding = options[:encoding] || node[:encoding]
|
|
530
|
+
standalone = node[:standalone]
|
|
531
|
+
|
|
532
|
+
decl = create_native_declaration(version, encoding, standalone)
|
|
445
533
|
output = ::Ox.dump(::Ox::Document.new << decl).strip
|
|
446
534
|
end
|
|
447
535
|
|
|
@@ -450,13 +538,76 @@ module Moxml
|
|
|
450
538
|
# with_xml: true,
|
|
451
539
|
with_instructions: true,
|
|
452
540
|
encoding: options[:encoding],
|
|
453
|
-
no_empty: options[:expand_empty]
|
|
541
|
+
no_empty: options[:expand_empty],
|
|
454
542
|
}
|
|
455
543
|
output + ::Ox.dump(node, ox_options)
|
|
456
544
|
end
|
|
457
545
|
|
|
458
546
|
private
|
|
459
547
|
|
|
548
|
+
# Translate a subset of XPath to Ox locate() syntax
|
|
549
|
+
# Supports: //element, /path/to/element, .//element, element[@attr]
|
|
550
|
+
# Note: Ox locate() doesn't support namespace prefixes in the path
|
|
551
|
+
def translate_xpath_to_locate(xpath, namespaces = {})
|
|
552
|
+
expr = xpath.dup
|
|
553
|
+
|
|
554
|
+
# Strip namespace prefixes from element names
|
|
555
|
+
# XPath: //ns:element → locate: element
|
|
556
|
+
if namespaces && !namespaces.empty?
|
|
557
|
+
namespaces.each_key do |prefix|
|
|
558
|
+
expr = expr.gsub("/#{prefix}:", "/")
|
|
559
|
+
expr = expr.gsub("/*#{prefix}:", "/*")
|
|
560
|
+
expr = expr.gsub("//*#{prefix}:", "//")
|
|
561
|
+
expr = expr.gsub("//#{prefix}:", "//")
|
|
562
|
+
expr = expr.gsub("///#{prefix}:", "///")
|
|
563
|
+
end
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
# Remove any remaining namespace prefixes
|
|
567
|
+
# Use possessive quantifier to prevent ReDoS
|
|
568
|
+
expr = expr.gsub(/[a-zA-Z_][\w-]*+:/, "")
|
|
569
|
+
|
|
570
|
+
# Remove attribute predicates for now - we'll filter manually
|
|
571
|
+
# Save the attribute name if present
|
|
572
|
+
expr = expr.gsub(/\[@(\w+)\]/, "")
|
|
573
|
+
|
|
574
|
+
# XPath: //element → locate: ?/element (any depth)
|
|
575
|
+
# Note: In Ox, ?/ means "any path"
|
|
576
|
+
expr = expr.sub(%r{^//}, "?/") if expr.start_with?("//")
|
|
577
|
+
|
|
578
|
+
# XPath: .//element → locate: ?/element (relative any depth)
|
|
579
|
+
# For relative paths from an element, we still use ?/ which searches
|
|
580
|
+
# descendants
|
|
581
|
+
expr = expr.sub(%r{^\.//}, "?/") if expr.start_with?(".//")
|
|
582
|
+
|
|
583
|
+
# XPath: /root/child → locate: root/child (absolute path)
|
|
584
|
+
# Remove leading / for Ox
|
|
585
|
+
expr = expr.sub(%r{^/}, "")
|
|
586
|
+
|
|
587
|
+
# XPath: ./element → locate: element (direct child, just remove ./)
|
|
588
|
+
expr.sub(%r{^\./}, "")
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
# Find the actual parent of a node by searching the tree
|
|
592
|
+
def find_parent_in_tree(target_node, search_root)
|
|
593
|
+
# Start from the document root if we have a document
|
|
594
|
+
root = search_root.is_a?(::Ox::Document) ? search_root : document(search_root)
|
|
595
|
+
|
|
596
|
+
result = nil
|
|
597
|
+
traverse(root) do |node|
|
|
598
|
+
next unless node.respond_to?(:nodes)
|
|
599
|
+
|
|
600
|
+
node.nodes&.each do |child|
|
|
601
|
+
if child.equal?(target_node)
|
|
602
|
+
result = node
|
|
603
|
+
break
|
|
604
|
+
end
|
|
605
|
+
end
|
|
606
|
+
break if result
|
|
607
|
+
end
|
|
608
|
+
result
|
|
609
|
+
end
|
|
610
|
+
|
|
460
611
|
def traverse(node, &block)
|
|
461
612
|
return unless node
|
|
462
613
|
|
|
@@ -467,5 +618,110 @@ module Moxml
|
|
|
467
618
|
end
|
|
468
619
|
end
|
|
469
620
|
end
|
|
621
|
+
|
|
622
|
+
# Bridge between Ox SAX and Moxml SAX
|
|
623
|
+
#
|
|
624
|
+
# Translates Ox::Sax events to Moxml::SAX::Handler events.
|
|
625
|
+
# Ox has a unique SAX pattern where attributes are delivered AFTER start_element.
|
|
626
|
+
#
|
|
627
|
+
# @private
|
|
628
|
+
class OxSAXBridge
|
|
629
|
+
def initialize(handler)
|
|
630
|
+
@handler = handler
|
|
631
|
+
@pending_attrs = {}
|
|
632
|
+
@pending_element_name = nil
|
|
633
|
+
@element_started = false
|
|
634
|
+
@document_started = false
|
|
635
|
+
end
|
|
636
|
+
|
|
637
|
+
# Ox delivers attributes AFTER start_element
|
|
638
|
+
def attr(name, value)
|
|
639
|
+
@pending_attrs[name] = value
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
# Called when element starts (but attributes come AFTER this)
|
|
643
|
+
def start_element(name)
|
|
644
|
+
# If we had a previous element waiting, we need to finalize it first
|
|
645
|
+
if @pending_element_name
|
|
646
|
+
finalize_pending_element
|
|
647
|
+
end
|
|
648
|
+
|
|
649
|
+
# Store this element name (convert symbol to string)
|
|
650
|
+
@pending_element_name = name.to_s
|
|
651
|
+
@element_started = true
|
|
652
|
+
|
|
653
|
+
# Call on_start_document if this is the first element
|
|
654
|
+
unless @document_started
|
|
655
|
+
@handler.on_start_document
|
|
656
|
+
@document_started = true
|
|
657
|
+
end
|
|
658
|
+
end
|
|
659
|
+
|
|
660
|
+
def end_element(name)
|
|
661
|
+
# Finalize any pending element before ending
|
|
662
|
+
if @pending_element_name
|
|
663
|
+
finalize_pending_element
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
# Convert symbol to string
|
|
667
|
+
@handler.on_end_element(name.to_s)
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
# Ox only has text() - no separate CDATA, comment, or PI events
|
|
671
|
+
def text(string)
|
|
672
|
+
# Finalize any pending element before text
|
|
673
|
+
if @pending_element_name
|
|
674
|
+
finalize_pending_element
|
|
675
|
+
end
|
|
676
|
+
|
|
677
|
+
@handler.on_characters(string)
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
def error(message, line, column)
|
|
681
|
+
error = Moxml::ParseError.new(message, line: line, column: column)
|
|
682
|
+
@handler.on_error(error)
|
|
683
|
+
end
|
|
684
|
+
|
|
685
|
+
# Called at end of parsing (not automatically by Ox)
|
|
686
|
+
def end_document
|
|
687
|
+
# Finalize any pending element
|
|
688
|
+
if @pending_element_name
|
|
689
|
+
finalize_pending_element
|
|
690
|
+
end
|
|
691
|
+
|
|
692
|
+
@handler.on_end_document if @document_started
|
|
693
|
+
end
|
|
694
|
+
|
|
695
|
+
private
|
|
696
|
+
|
|
697
|
+
def finalize_pending_element
|
|
698
|
+
# Separate namespace declarations from regular attributes
|
|
699
|
+
attr_hash = {}
|
|
700
|
+
namespaces_hash = {}
|
|
701
|
+
|
|
702
|
+
@pending_attrs.each do |attr_name, attr_value|
|
|
703
|
+
if attr_name.to_s.start_with?("xmlns")
|
|
704
|
+
# Namespace declaration
|
|
705
|
+
prefix = if attr_name.to_s == "xmlns"
|
|
706
|
+
nil
|
|
707
|
+
else
|
|
708
|
+
attr_name.to_s.sub(
|
|
709
|
+
"xmlns:", ""
|
|
710
|
+
)
|
|
711
|
+
end
|
|
712
|
+
namespaces_hash[prefix] = attr_value
|
|
713
|
+
else
|
|
714
|
+
attr_hash[attr_name.to_s] = attr_value
|
|
715
|
+
end
|
|
716
|
+
end
|
|
717
|
+
|
|
718
|
+
@handler.on_start_element(@pending_element_name, attr_hash,
|
|
719
|
+
namespaces_hash)
|
|
720
|
+
|
|
721
|
+
# Clear for next element
|
|
722
|
+
@pending_attrs = {}
|
|
723
|
+
@pending_element_name = nil
|
|
724
|
+
end
|
|
725
|
+
end
|
|
470
726
|
end
|
|
471
727
|
end
|
data/lib/moxml/adapter/rexml.rb
CHANGED
|
@@ -14,13 +14,41 @@ module Moxml
|
|
|
14
14
|
native_doc = begin
|
|
15
15
|
::REXML::Document.new(xml)
|
|
16
16
|
rescue ::REXML::ParseException => e
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
if options[:strict]
|
|
18
|
+
raise Moxml::ParseError.new(
|
|
19
|
+
e.message,
|
|
20
|
+
line: e.line,
|
|
21
|
+
source: xml.is_a?(String) ? xml[0..100] : nil,
|
|
22
|
+
)
|
|
23
|
+
end
|
|
19
24
|
create_document
|
|
20
25
|
end
|
|
21
26
|
DocumentBuilder.new(Context.new(:rexml)).build(native_doc)
|
|
22
27
|
end
|
|
23
28
|
|
|
29
|
+
# SAX parsing implementation for REXML
|
|
30
|
+
#
|
|
31
|
+
# @param xml [String, IO] XML to parse
|
|
32
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
33
|
+
# @return [void]
|
|
34
|
+
def sax_parse(xml, handler)
|
|
35
|
+
require "rexml/parsers/sax2parser"
|
|
36
|
+
require "rexml/source"
|
|
37
|
+
require "stringio"
|
|
38
|
+
|
|
39
|
+
bridge = REXMLSAX2Bridge.new(handler)
|
|
40
|
+
|
|
41
|
+
xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
|
|
42
|
+
source = ::REXML::IOSource.new(StringIO.new(xml_string))
|
|
43
|
+
|
|
44
|
+
parser = ::REXML::Parsers::SAX2Parser.new(source)
|
|
45
|
+
parser.listen(bridge)
|
|
46
|
+
parser.parse
|
|
47
|
+
rescue ::REXML::ParseException => e
|
|
48
|
+
error = Moxml::ParseError.new(e.message, line: e.line)
|
|
49
|
+
handler.on_error(error)
|
|
50
|
+
end
|
|
51
|
+
|
|
24
52
|
def create_document(_native_doc = nil)
|
|
25
53
|
::REXML::Document.new
|
|
26
54
|
end
|
|
@@ -55,10 +83,10 @@ module Moxml
|
|
|
55
83
|
|
|
56
84
|
parts = [name]
|
|
57
85
|
if external_id
|
|
58
|
-
parts.
|
|
86
|
+
parts.push("PUBLIC", %("#{external_id}"))
|
|
59
87
|
parts << %("#{system_id}") if system_id
|
|
60
88
|
elsif system_id
|
|
61
|
-
parts.
|
|
89
|
+
parts.push("SYSTEM", %("#{system_id}"))
|
|
62
90
|
end
|
|
63
91
|
|
|
64
92
|
::REXML::DocType.new(parts.join(" "))
|
|
@@ -189,7 +217,7 @@ module Moxml
|
|
|
189
217
|
|
|
190
218
|
# Only return non-namespace attributes
|
|
191
219
|
element.attributes.values
|
|
192
|
-
|
|
220
|
+
.reject { |attr| attr.prefix.to_s.start_with?("xmlns") }
|
|
193
221
|
end
|
|
194
222
|
|
|
195
223
|
def attribute_element(attribute)
|
|
@@ -197,8 +225,8 @@ module Moxml
|
|
|
197
225
|
end
|
|
198
226
|
|
|
199
227
|
def set_attribute(element, name, value)
|
|
200
|
-
element.attributes[name&.to_s] = value
|
|
201
|
-
|
|
228
|
+
element.attributes[name&.to_s] = value&.to_s
|
|
229
|
+
element.attributes.get_attribute(name&.to_s)
|
|
202
230
|
end
|
|
203
231
|
|
|
204
232
|
def set_attribute_name(attribute, name)
|
|
@@ -327,8 +355,8 @@ module Moxml
|
|
|
327
355
|
def inner_text(node)
|
|
328
356
|
# Get direct text children only, filter duplicates
|
|
329
357
|
text_children = node.children
|
|
330
|
-
|
|
331
|
-
|
|
358
|
+
.select { _1.is_a?(::REXML::Text) }
|
|
359
|
+
.uniq(&:object_id)
|
|
332
360
|
text_children.map(&:value).join
|
|
333
361
|
end
|
|
334
362
|
|
|
@@ -353,7 +381,10 @@ module Moxml
|
|
|
353
381
|
# add a namespace prefix to the element name AND a namespace definition
|
|
354
382
|
def set_namespace(element, ns)
|
|
355
383
|
prefix = ns.name.to_s.empty? ? "xmlns" : ns.name.to_s
|
|
356
|
-
|
|
384
|
+
if element.respond_to?(:add_namespace)
|
|
385
|
+
element.add_namespace(prefix,
|
|
386
|
+
ns.value)
|
|
387
|
+
end
|
|
357
388
|
element.name = "#{prefix}:#{element.name}"
|
|
358
389
|
owner = element.is_a?(::REXML::Attribute) ? element.element : element
|
|
359
390
|
::REXML::Attribute.new(prefix, ns.value, owner)
|
|
@@ -405,7 +436,12 @@ module Moxml
|
|
|
405
436
|
def xpath(node, expression, _namespaces = {})
|
|
406
437
|
node.get_elements(expression).to_a
|
|
407
438
|
rescue ::REXML::ParseException => e
|
|
408
|
-
raise Moxml::XPathError
|
|
439
|
+
raise Moxml::XPathError.new(
|
|
440
|
+
e.message,
|
|
441
|
+
expression: expression,
|
|
442
|
+
adapter: "REXML",
|
|
443
|
+
node: node,
|
|
444
|
+
)
|
|
409
445
|
end
|
|
410
446
|
|
|
411
447
|
def at_xpath(node, expression, namespaces = {})
|
|
@@ -414,11 +450,12 @@ module Moxml
|
|
|
414
450
|
end
|
|
415
451
|
|
|
416
452
|
def serialize(node, options = {})
|
|
417
|
-
output =
|
|
453
|
+
output = +""
|
|
418
454
|
|
|
419
455
|
if node.is_a?(::REXML::Document)
|
|
420
456
|
# Always include XML declaration
|
|
421
|
-
decl = node.xml_decl || ::REXML::XMLDecl.new("1.0",
|
|
457
|
+
decl = node.xml_decl || ::REXML::XMLDecl.new("1.0",
|
|
458
|
+
options[:encoding] || "UTF-8")
|
|
422
459
|
decl.encoding = options[:encoding] if options[:encoding]
|
|
423
460
|
output << "<?xml"
|
|
424
461
|
output << %( version="#{decl.version}") if decl.version
|
|
@@ -427,20 +464,22 @@ module Moxml
|
|
|
427
464
|
output << "?>"
|
|
428
465
|
# output << "\n"
|
|
429
466
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
# output << "\n"
|
|
433
|
-
end
|
|
467
|
+
# output << "\n"
|
|
468
|
+
node.doctype&.write(output)
|
|
434
469
|
|
|
435
470
|
# Write processing instructions
|
|
436
471
|
node.children.each do |child|
|
|
437
|
-
next unless [::REXML::Instruction, ::REXML::CData,
|
|
472
|
+
next unless [::REXML::Instruction, ::REXML::CData,
|
|
473
|
+
::REXML::Comment, ::REXML::Text].include?(child.class)
|
|
438
474
|
|
|
439
475
|
write_with_formatter(child, output, options[:indent] || 2)
|
|
440
476
|
# output << "\n"
|
|
441
477
|
end
|
|
442
478
|
|
|
443
|
-
|
|
479
|
+
if node.root
|
|
480
|
+
write_with_formatter(node.root, output,
|
|
481
|
+
options[:indent] || 2)
|
|
482
|
+
end
|
|
444
483
|
else
|
|
445
484
|
write_with_formatter(node, output, options[:indent] || 2)
|
|
446
485
|
end
|
|
@@ -452,11 +491,79 @@ module Moxml
|
|
|
452
491
|
|
|
453
492
|
def write_with_formatter(node, output, indent = 2)
|
|
454
493
|
formatter = ::Moxml::Adapter::CustomizedRexml::Formatter.new(
|
|
455
|
-
indentation: indent, self_close_empty: false
|
|
494
|
+
indentation: indent, self_close_empty: false,
|
|
456
495
|
)
|
|
457
496
|
formatter.write(node, output)
|
|
458
497
|
end
|
|
459
498
|
end
|
|
460
499
|
end
|
|
500
|
+
|
|
501
|
+
# Bridge between REXML SAX2 and Moxml SAX
|
|
502
|
+
#
|
|
503
|
+
# Translates REXML::SAX2Parser events to Moxml::SAX::Handler events
|
|
504
|
+
#
|
|
505
|
+
# @private
|
|
506
|
+
class REXMLSAX2Bridge
|
|
507
|
+
def initialize(handler)
|
|
508
|
+
@handler = handler
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
# REXML splits element name into uri/localname/qname
|
|
512
|
+
def start_element(_uri, _localname, qname, attributes)
|
|
513
|
+
# Convert REXML attributes to hash
|
|
514
|
+
attr_hash = {}
|
|
515
|
+
ns_hash = {}
|
|
516
|
+
|
|
517
|
+
attributes.each do |name, value|
|
|
518
|
+
if name.to_s.start_with?("xmlns")
|
|
519
|
+
# Namespace declaration
|
|
520
|
+
prefix = name.to_s == "xmlns" ? nil : name.to_s.sub("xmlns:", "")
|
|
521
|
+
ns_hash[prefix] = value
|
|
522
|
+
else
|
|
523
|
+
attr_hash[name.to_s] = value
|
|
524
|
+
end
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
# Use qname (qualified name) for element name
|
|
528
|
+
@handler.on_start_element(qname, attr_hash, ns_hash)
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
def end_element(_uri, _localname, qname)
|
|
532
|
+
@handler.on_end_element(qname)
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
def characters(text)
|
|
536
|
+
@handler.on_characters(text)
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
def cdata(content)
|
|
540
|
+
@handler.on_cdata(content)
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
def comment(text)
|
|
544
|
+
@handler.on_comment(text)
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def processing_instruction(target, data)
|
|
548
|
+
@handler.on_processing_instruction(target, data || "")
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
def start_document
|
|
552
|
+
@handler.on_start_document
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
def end_document
|
|
556
|
+
@handler.on_end_document
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
# REXML calls these but we don't need to handle them
|
|
560
|
+
def xmldecl(version, encoding, standalone)
|
|
561
|
+
# XML declaration - we don't need to do anything
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
def progress(position)
|
|
565
|
+
# Progress callback - we don't need to do anything
|
|
566
|
+
end
|
|
567
|
+
end
|
|
461
568
|
end
|
|
462
569
|
end
|