moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -25,12 +25,37 @@ module Moxml
|
|
|
25
25
|
end
|
|
26
26
|
end
|
|
27
27
|
rescue ::Nokogiri::XML::SyntaxError => e
|
|
28
|
-
raise Moxml::ParseError.new(e.message, line: e.line,
|
|
28
|
+
raise Moxml::ParseError.new(e.message, line: e.line,
|
|
29
|
+
column: e.column)
|
|
29
30
|
end
|
|
30
31
|
|
|
31
32
|
DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
|
|
32
33
|
end
|
|
33
34
|
|
|
35
|
+
# SAX parsing implementation for Nokogiri
|
|
36
|
+
#
|
|
37
|
+
# @param xml [String, IO] XML to parse
|
|
38
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
39
|
+
# @return [void]
|
|
40
|
+
def sax_parse(xml, handler)
|
|
41
|
+
# Create bridge that translates Nokogiri SAX to Moxml SAX
|
|
42
|
+
bridge = NokogiriSAXBridge.new(handler)
|
|
43
|
+
|
|
44
|
+
# Create Nokogiri SAX parser
|
|
45
|
+
parser = ::Nokogiri::XML::SAX::Parser.new(bridge)
|
|
46
|
+
|
|
47
|
+
# Parse
|
|
48
|
+
if xml.respond_to?(:read)
|
|
49
|
+
parser.parse(xml)
|
|
50
|
+
else
|
|
51
|
+
parser.parse(xml.to_s)
|
|
52
|
+
end
|
|
53
|
+
rescue ::Nokogiri::XML::SyntaxError => e
|
|
54
|
+
error = Moxml::ParseError.new(e.message, line: e.line,
|
|
55
|
+
column: e.column)
|
|
56
|
+
handler.on_error(error)
|
|
57
|
+
end
|
|
58
|
+
|
|
34
59
|
def create_document(_native_doc = nil)
|
|
35
60
|
::Nokogiri::XML::Document.new
|
|
36
61
|
end
|
|
@@ -39,7 +64,7 @@ module Moxml
|
|
|
39
64
|
# document fragments are weird and should be used with caution:
|
|
40
65
|
# https://github.com/sparklemotion/nokogiri/issues/572
|
|
41
66
|
::Nokogiri::XML::DocumentFragment.new(
|
|
42
|
-
::Nokogiri::XML::Document.new
|
|
67
|
+
::Nokogiri::XML::Document.new,
|
|
43
68
|
)
|
|
44
69
|
end
|
|
45
70
|
|
|
@@ -75,7 +100,7 @@ module Moxml
|
|
|
75
100
|
::Nokogiri::XML::ProcessingInstruction.new(
|
|
76
101
|
create_document,
|
|
77
102
|
"xml",
|
|
78
|
-
build_declaration_attrs(version, encoding, standalone)
|
|
103
|
+
build_declaration_attrs(version, encoding, standalone),
|
|
79
104
|
)
|
|
80
105
|
end
|
|
81
106
|
|
|
@@ -196,6 +221,23 @@ module Moxml
|
|
|
196
221
|
end
|
|
197
222
|
|
|
198
223
|
def add_child(element, child)
|
|
224
|
+
# Special handling for declarations on Nokogiri documents
|
|
225
|
+
if element.is_a?(::Nokogiri::XML::Document) &&
|
|
226
|
+
child.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
|
|
227
|
+
child.name == "xml"
|
|
228
|
+
# Set document's xml_decl property
|
|
229
|
+
version = declaration_attribute(child, "version") || "1.0"
|
|
230
|
+
encoding = declaration_attribute(child, "encoding")
|
|
231
|
+
standalone = declaration_attribute(child, "standalone")
|
|
232
|
+
|
|
233
|
+
# Nokogiri's xml_decl can only be set via instance variable
|
|
234
|
+
element.instance_variable_set(:@xml_decl, {
|
|
235
|
+
version: version,
|
|
236
|
+
encoding: encoding,
|
|
237
|
+
standalone: standalone,
|
|
238
|
+
}.compact)
|
|
239
|
+
end
|
|
240
|
+
|
|
199
241
|
if node_type(child) == :doctype
|
|
200
242
|
# avoid exceptions: cannot reparent Nokogiri::XML::DTD there
|
|
201
243
|
element.create_internal_subset(
|
|
@@ -215,6 +257,14 @@ module Moxml
|
|
|
215
257
|
end
|
|
216
258
|
|
|
217
259
|
def remove(node)
|
|
260
|
+
# Special handling for declarations on Nokogiri documents
|
|
261
|
+
if node.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
|
|
262
|
+
node.name == "xml" &&
|
|
263
|
+
node.parent.is_a?(::Nokogiri::XML::Document)
|
|
264
|
+
# Clear document's xml_decl when removing declaration
|
|
265
|
+
node.parent.instance_variable_set(:@xml_decl, nil)
|
|
266
|
+
end
|
|
267
|
+
|
|
218
268
|
node.remove
|
|
219
269
|
end
|
|
220
270
|
|
|
@@ -274,27 +324,53 @@ module Moxml
|
|
|
274
324
|
def xpath(node, expression, namespaces = nil)
|
|
275
325
|
node.xpath(expression, namespaces).to_a
|
|
276
326
|
rescue ::Nokogiri::XML::XPath::SyntaxError => e
|
|
277
|
-
raise Moxml::XPathError
|
|
327
|
+
raise Moxml::XPathError.new(
|
|
328
|
+
e.message,
|
|
329
|
+
expression: expression,
|
|
330
|
+
adapter: "Nokogiri",
|
|
331
|
+
node: node,
|
|
332
|
+
)
|
|
278
333
|
end
|
|
279
334
|
|
|
280
335
|
def at_xpath(node, expression, namespaces = nil)
|
|
281
336
|
node.at_xpath(expression, namespaces)
|
|
282
337
|
rescue ::Nokogiri::XML::XPath::SyntaxError => e
|
|
283
|
-
raise Moxml::XPathError
|
|
338
|
+
raise Moxml::XPathError.new(
|
|
339
|
+
e.message,
|
|
340
|
+
expression: expression,
|
|
341
|
+
adapter: "Nokogiri",
|
|
342
|
+
node: node,
|
|
343
|
+
)
|
|
284
344
|
end
|
|
285
345
|
|
|
286
346
|
def serialize(node, options = {})
|
|
287
347
|
save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML
|
|
288
348
|
|
|
289
349
|
# Don't force expand empty elements if they're really empty
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
350
|
+
if options[:expand_empty]
|
|
351
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
|
|
352
|
+
end
|
|
353
|
+
if options[:indent].to_i.positive?
|
|
354
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Handle declaration option
|
|
358
|
+
# Priority:
|
|
359
|
+
# 1. Explicit no_declaration option
|
|
360
|
+
# 2. Check Nokogiri's internal @xml_decl (when remove is called, this becomes nil)
|
|
361
|
+
if options.key?(:no_declaration)
|
|
362
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
|
|
363
|
+
elsif node.respond_to?(:instance_variable_get) &&
|
|
364
|
+
node.instance_variable_defined?(:@xml_decl)
|
|
365
|
+
# Nokogiri's internal state - if nil, declaration was removed
|
|
366
|
+
xml_decl = node.instance_variable_get(:@xml_decl)
|
|
367
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
|
|
368
|
+
end
|
|
293
369
|
|
|
294
370
|
node.to_xml(
|
|
295
371
|
indent: options[:indent],
|
|
296
372
|
encoding: options[:encoding],
|
|
297
|
-
save_with: save_options
|
|
373
|
+
save_with: save_options,
|
|
298
374
|
)
|
|
299
375
|
end
|
|
300
376
|
|
|
@@ -316,6 +392,77 @@ module Moxml
|
|
|
316
392
|
end
|
|
317
393
|
end
|
|
318
394
|
end
|
|
395
|
+
|
|
396
|
+
# Bridge between Nokogiri SAX and Moxml SAX
|
|
397
|
+
#
|
|
398
|
+
# Translates Nokogiri::XML::SAX::Document events to Moxml::SAX::Handler events
|
|
399
|
+
#
|
|
400
|
+
# @private
|
|
401
|
+
class NokogiriSAXBridge < ::Nokogiri::XML::SAX::Document
|
|
402
|
+
def initialize(handler)
|
|
403
|
+
super()
|
|
404
|
+
@handler = handler
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# Map Nokogiri events to Moxml events
|
|
408
|
+
|
|
409
|
+
def start_document
|
|
410
|
+
@handler.on_start_document
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def end_document
|
|
414
|
+
@handler.on_end_document
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def start_element(name, attributes = [])
|
|
418
|
+
# Convert Nokogiri attributes array to hash
|
|
419
|
+
attr_hash = {}
|
|
420
|
+
namespaces_hash = {}
|
|
421
|
+
|
|
422
|
+
attributes.each do |attr|
|
|
423
|
+
attr_name = attr[0]
|
|
424
|
+
attr_value = attr[1]
|
|
425
|
+
|
|
426
|
+
if attr_name.start_with?("xmlns")
|
|
427
|
+
# Namespace declaration
|
|
428
|
+
prefix = attr_name == "xmlns" ? nil : attr_name.sub("xmlns:", "")
|
|
429
|
+
namespaces_hash[prefix] = attr_value
|
|
430
|
+
else
|
|
431
|
+
attr_hash[attr_name] = attr_value
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
@handler.on_start_element(name, attr_hash, namespaces_hash)
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
def end_element(name)
|
|
439
|
+
@handler.on_end_element(name)
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
def characters(string)
|
|
443
|
+
@handler.on_characters(string)
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
def cdata_block(string)
|
|
447
|
+
@handler.on_cdata(string)
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
def comment(string)
|
|
451
|
+
@handler.on_comment(string)
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
def processing_instruction(target, data)
|
|
455
|
+
@handler.on_processing_instruction(target, data || "")
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
def error(string)
|
|
459
|
+
@handler.on_error(Moxml::ParseError.new(string))
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def warning(string)
|
|
463
|
+
@handler.on_warning(string)
|
|
464
|
+
end
|
|
465
|
+
end
|
|
319
466
|
end
|
|
320
467
|
end
|
|
321
468
|
end
|
data/lib/moxml/adapter/oga.rb
CHANGED
|
@@ -10,7 +10,10 @@ module Moxml
|
|
|
10
10
|
class Oga < Base
|
|
11
11
|
class << self
|
|
12
12
|
def set_root(doc, element)
|
|
13
|
-
|
|
13
|
+
# Clear existing root element if any - Oga's NodeSet needs special handling
|
|
14
|
+
# We need to manually remove elements since NodeSet doesn't support clear or delete_if
|
|
15
|
+
elements_to_remove = doc.children.select { |child| child.is_a?(::Oga::XML::Element) }
|
|
16
|
+
elements_to_remove.each { |elem| doc.children.delete(elem) }
|
|
14
17
|
doc.children << element
|
|
15
18
|
end
|
|
16
19
|
|
|
@@ -18,12 +21,37 @@ module Moxml
|
|
|
18
21
|
native_doc = begin
|
|
19
22
|
::Oga.parse_xml(xml, strict: options[:strict])
|
|
20
23
|
rescue LL::ParserError => e
|
|
21
|
-
raise Moxml::ParseError
|
|
24
|
+
raise Moxml::ParseError.new(
|
|
25
|
+
e.message,
|
|
26
|
+
source: xml.is_a?(String) ? xml[0..100] : nil,
|
|
27
|
+
)
|
|
22
28
|
end
|
|
23
29
|
|
|
24
30
|
DocumentBuilder.new(Context.new(:oga)).build(native_doc)
|
|
25
31
|
end
|
|
26
32
|
|
|
33
|
+
# SAX parsing implementation for Oga
|
|
34
|
+
#
|
|
35
|
+
# @param xml [String, IO] XML to parse
|
|
36
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
37
|
+
# @return [void]
|
|
38
|
+
def sax_parse(xml, handler)
|
|
39
|
+
bridge = OgaSAXBridge.new(handler)
|
|
40
|
+
|
|
41
|
+
xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
|
|
42
|
+
|
|
43
|
+
# Manually call start_document (Oga doesn't)
|
|
44
|
+
handler.on_start_document
|
|
45
|
+
|
|
46
|
+
::Oga.sax_parse_xml(bridge, xml_string)
|
|
47
|
+
|
|
48
|
+
# Manually call end_document (Oga doesn't)
|
|
49
|
+
handler.on_end_document
|
|
50
|
+
rescue StandardError => e
|
|
51
|
+
error = Moxml::ParseError.new(e.message)
|
|
52
|
+
handler.on_error(error)
|
|
53
|
+
end
|
|
54
|
+
|
|
27
55
|
def create_document(_native_doc = nil)
|
|
28
56
|
::Oga::XML::Document.new
|
|
29
57
|
end
|
|
@@ -46,7 +74,7 @@ module Moxml
|
|
|
46
74
|
|
|
47
75
|
def create_native_doctype(name, external_id, system_id)
|
|
48
76
|
::Oga::XML::Doctype.new(
|
|
49
|
-
name: name, public_id: external_id, system_id: system_id, type: "PUBLIC"
|
|
77
|
+
name: name, public_id: external_id, system_id: system_id, type: "PUBLIC",
|
|
50
78
|
)
|
|
51
79
|
end
|
|
52
80
|
|
|
@@ -58,19 +86,23 @@ module Moxml
|
|
|
58
86
|
attrs = {
|
|
59
87
|
version: version,
|
|
60
88
|
encoding: encoding,
|
|
61
|
-
standalone: standalone
|
|
89
|
+
standalone: standalone,
|
|
62
90
|
}.compact
|
|
63
91
|
::Moxml::Adapter::CustomizedOga::XmlDeclaration.new(attrs)
|
|
64
92
|
end
|
|
65
93
|
|
|
66
94
|
def declaration_attribute(declaration, attr_name)
|
|
67
|
-
|
|
95
|
+
unless ::Moxml::Declaration::ALLOWED_ATTRIBUTES.include?(attr_name.to_s)
|
|
96
|
+
return
|
|
97
|
+
end
|
|
68
98
|
|
|
69
99
|
declaration.public_send(attr_name)
|
|
70
100
|
end
|
|
71
101
|
|
|
72
102
|
def set_declaration_attribute(declaration, attr_name, value)
|
|
73
|
-
|
|
103
|
+
unless ::Moxml::Declaration::ALLOWED_ATTRIBUTES.include?(attr_name.to_s)
|
|
104
|
+
return
|
|
105
|
+
end
|
|
74
106
|
|
|
75
107
|
declaration.public_send("#{attr_name}=", value)
|
|
76
108
|
end
|
|
@@ -80,7 +112,8 @@ module Moxml
|
|
|
80
112
|
return ns unless ns.nil?
|
|
81
113
|
|
|
82
114
|
# Oga creates an attribute and registers a namespace
|
|
83
|
-
set_attribute(element,
|
|
115
|
+
set_attribute(element,
|
|
116
|
+
[::Oga::XML::Element::XMLNS_PREFIX, prefix].compact.join(":"), uri)
|
|
84
117
|
element.register_namespace(prefix, uri)
|
|
85
118
|
::Oga::XML::Namespace.new(name: prefix, uri: uri)
|
|
86
119
|
end
|
|
@@ -131,7 +164,10 @@ module Moxml
|
|
|
131
164
|
def children(node)
|
|
132
165
|
all_children = []
|
|
133
166
|
|
|
134
|
-
|
|
167
|
+
if node.is_a?(::Oga::XML::Document)
|
|
168
|
+
all_children += [node.xml_declaration,
|
|
169
|
+
node.doctype].compact
|
|
170
|
+
end
|
|
135
171
|
|
|
136
172
|
return all_children unless node.respond_to?(:children)
|
|
137
173
|
|
|
@@ -180,12 +216,15 @@ module Moxml
|
|
|
180
216
|
|
|
181
217
|
def set_attribute(element, name, value)
|
|
182
218
|
namespace_name = nil
|
|
183
|
-
|
|
219
|
+
if name.to_s.include?(":")
|
|
220
|
+
namespace_name, name = name.to_s.split(":",
|
|
221
|
+
2)
|
|
222
|
+
end
|
|
184
223
|
|
|
185
224
|
attr = ::Oga::XML::Attribute.new(
|
|
186
225
|
name: name.to_s,
|
|
187
226
|
namespace_name: namespace_name,
|
|
188
|
-
value: value.to_s
|
|
227
|
+
value: value.to_s,
|
|
189
228
|
)
|
|
190
229
|
element.add_attribute(attr)
|
|
191
230
|
end
|
|
@@ -211,6 +250,13 @@ module Moxml
|
|
|
211
250
|
child_or_text
|
|
212
251
|
end
|
|
213
252
|
|
|
253
|
+
# Special handling for declarations on Oga documents
|
|
254
|
+
if element.is_a?(::Oga::XML::Document) &&
|
|
255
|
+
child.is_a?(::Oga::XML::XmlDeclaration)
|
|
256
|
+
# Set as document's xml_declaration
|
|
257
|
+
element.instance_variable_set(:@xml_declaration, child)
|
|
258
|
+
end
|
|
259
|
+
|
|
214
260
|
element.children << child
|
|
215
261
|
end
|
|
216
262
|
|
|
@@ -237,6 +283,13 @@ module Moxml
|
|
|
237
283
|
end
|
|
238
284
|
|
|
239
285
|
def remove(node)
|
|
286
|
+
# Special handling for declarations on Oga documents
|
|
287
|
+
if node.is_a?(::Oga::XML::XmlDeclaration) &&
|
|
288
|
+
node.parent.is_a?(::Oga::XML::Document)
|
|
289
|
+
# Clear document's xml_declaration when removing declaration
|
|
290
|
+
node.parent.instance_variable_set(:@xml_declaration, nil)
|
|
291
|
+
end
|
|
292
|
+
|
|
240
293
|
node.remove
|
|
241
294
|
end
|
|
242
295
|
|
|
@@ -313,22 +366,144 @@ module Moxml
|
|
|
313
366
|
end
|
|
314
367
|
|
|
315
368
|
def xpath(node, expression, namespaces = nil)
|
|
316
|
-
node.xpath(expression, {},
|
|
369
|
+
node.xpath(expression, {},
|
|
370
|
+
namespaces: namespaces&.transform_keys(&:to_s)).to_a
|
|
317
371
|
rescue ::LL::ParserError => e
|
|
318
|
-
raise Moxml::XPathError
|
|
372
|
+
raise Moxml::XPathError.new(
|
|
373
|
+
e.message,
|
|
374
|
+
expression: expression,
|
|
375
|
+
adapter: "Oga",
|
|
376
|
+
node: node,
|
|
377
|
+
)
|
|
319
378
|
end
|
|
320
379
|
|
|
321
380
|
def at_xpath(node, expression, namespaces = nil)
|
|
322
381
|
node.at_xpath(expression, namespaces: namespaces)
|
|
323
382
|
rescue ::Oga::XPath::Error => e
|
|
324
|
-
raise Moxml::XPathError
|
|
383
|
+
raise Moxml::XPathError.new(
|
|
384
|
+
e.message,
|
|
385
|
+
expression: expression,
|
|
386
|
+
adapter: "Oga",
|
|
387
|
+
node: node,
|
|
388
|
+
)
|
|
325
389
|
end
|
|
326
390
|
|
|
327
|
-
def serialize(node,
|
|
328
|
-
#
|
|
391
|
+
def serialize(node, options = {})
|
|
392
|
+
# Oga's XmlGenerator doesn't support options directly
|
|
393
|
+
# We need to handle declaration options ourselves for Document nodes
|
|
394
|
+
if node.is_a?(::Oga::XML::Document)
|
|
395
|
+
# Check if we should include declaration
|
|
396
|
+
# Priority: explicit option > existence of xml_declaration node
|
|
397
|
+
should_include_decl = if options.key?(:no_declaration)
|
|
398
|
+
!options[:no_declaration]
|
|
399
|
+
elsif options.key?(:declaration)
|
|
400
|
+
options[:declaration]
|
|
401
|
+
else
|
|
402
|
+
# Default: include if document has xml_declaration node
|
|
403
|
+
node.xml_declaration ? true : false
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
if should_include_decl && !node.xml_declaration
|
|
407
|
+
# Need to add declaration - create default one
|
|
408
|
+
output = +""
|
|
409
|
+
output << '<?xml version="1.0" encoding="UTF-8"?>'
|
|
410
|
+
output << "\n"
|
|
411
|
+
|
|
412
|
+
# Serialize doctype if present
|
|
413
|
+
output << node.doctype.to_xml << "\n" if node.doctype
|
|
414
|
+
|
|
415
|
+
# Serialize children
|
|
416
|
+
node.children.each do |child|
|
|
417
|
+
output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
return output
|
|
421
|
+
elsif !should_include_decl
|
|
422
|
+
# Skip xml_declaration
|
|
423
|
+
output = +""
|
|
424
|
+
|
|
425
|
+
# Serialize doctype if present
|
|
426
|
+
output << node.doctype.to_xml << "\n" if node.doctype
|
|
427
|
+
|
|
428
|
+
# Serialize root and other children
|
|
429
|
+
node.children.each do |child|
|
|
430
|
+
next if child.is_a?(::Oga::XML::XmlDeclaration)
|
|
431
|
+
|
|
432
|
+
output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
return output
|
|
436
|
+
end
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
# Default: use XmlGenerator
|
|
329
440
|
::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
|
|
330
441
|
end
|
|
331
442
|
end
|
|
332
443
|
end
|
|
444
|
+
|
|
445
|
+
# Bridge between Oga SAX and Moxml SAX
|
|
446
|
+
#
|
|
447
|
+
# Translates Oga SAX events to Moxml::SAX::Handler events.
|
|
448
|
+
# Oga has different event naming and namespace as first param.
|
|
449
|
+
#
|
|
450
|
+
# @private
|
|
451
|
+
class OgaSAXBridge
|
|
452
|
+
def initialize(handler)
|
|
453
|
+
@handler = handler
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Oga: on_element(namespace, name, attributes)
|
|
457
|
+
# namespace may be nil
|
|
458
|
+
# attributes is an array of [name, value] pairs
|
|
459
|
+
def on_element(namespace, name, attributes)
|
|
460
|
+
# Build full qualified name if namespace present
|
|
461
|
+
element_name = namespace ? "#{namespace}:#{name}" : name
|
|
462
|
+
|
|
463
|
+
# Convert Oga attributes to hash
|
|
464
|
+
attr_hash = {}
|
|
465
|
+
ns_hash = {}
|
|
466
|
+
|
|
467
|
+
# Oga delivers attributes as array of [name, value] pairs
|
|
468
|
+
attributes.each do |attr_name, attr_value|
|
|
469
|
+
if attr_name.to_s.start_with?("xmlns")
|
|
470
|
+
prefix = if attr_name.to_s == "xmlns"
|
|
471
|
+
nil
|
|
472
|
+
else
|
|
473
|
+
attr_name.to_s.sub(
|
|
474
|
+
"xmlns:", ""
|
|
475
|
+
)
|
|
476
|
+
end
|
|
477
|
+
ns_hash[prefix] = attr_value
|
|
478
|
+
else
|
|
479
|
+
attr_hash[attr_name.to_s] = attr_value
|
|
480
|
+
end
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
@handler.on_start_element(element_name, attr_hash, ns_hash)
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# Oga: after_element(namespace, name)
|
|
487
|
+
def after_element(namespace, name)
|
|
488
|
+
element_name = namespace ? "#{namespace}:#{name}" : name
|
|
489
|
+
@handler.on_end_element(element_name)
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
def on_text(text)
|
|
493
|
+
@handler.on_characters(text)
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def on_cdata(text)
|
|
497
|
+
@handler.on_cdata(text)
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def on_comment(text)
|
|
501
|
+
@handler.on_comment(text)
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
def on_processing_instruction(name, text)
|
|
505
|
+
@handler.on_processing_instruction(name, text || "")
|
|
506
|
+
end
|
|
507
|
+
end
|
|
333
508
|
end
|
|
334
509
|
end
|