moxml 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +12 -4
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +238 -40
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +45 -0
- data/docs/_guides/modifying-xml.adoc +293 -0
- data/docs/_guides/parsing-xml.adoc +231 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +98 -0
- data/docs/_pages/adapters/libxml.adoc +286 -0
- data/docs/_pages/adapters/nokogiri.adoc +252 -0
- data/docs/_pages/adapters/oga.adoc +292 -0
- data/docs/_pages/adapters/ox.adoc +55 -0
- data/docs/_pages/adapters/rexml.adoc +293 -0
- data/docs/_pages/best-practices.adoc +430 -0
- data/docs/_pages/compatibility.adoc +468 -0
- data/docs/_pages/configuration.adoc +251 -0
- data/docs/_pages/error-handling.adoc +350 -0
- data/docs/_pages/headed-ox-limitations.adoc +558 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +141 -0
- data/docs/_pages/node-api-reference.adoc +50 -0
- data/docs/_pages/performance.adoc +36 -0
- data/docs/_pages/quick-start.adoc +244 -0
- data/docs/_pages/thread-safety.adoc +29 -0
- data/docs/_references/document-api.adoc +408 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +268 -0
- data/docs/_tutorials/builder-pattern.adoc +343 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +325 -0
- data/docs/_tutorials/xpath-queries.adoc +359 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -3
- data/lib/moxml/adapter/customized_ox/namespace.rb +0 -2
- data/lib/moxml/adapter/customized_ox/text.rb +0 -2
- data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1548 -0
- data/lib/moxml/adapter/nokogiri.rb +121 -9
- data/lib/moxml/adapter/oga.rb +123 -12
- data/lib/moxml/adapter/ox.rb +283 -27
- data/lib/moxml/adapter/rexml.rb +127 -20
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +39 -1
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +39 -6
- data/lib/moxml/document_builder.rb +27 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +94 -3
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1768 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +176 -35
- data/lib/ox/node.rb +0 -9
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -25,12 +25,37 @@ module Moxml
|
|
|
25
25
|
end
|
|
26
26
|
end
|
|
27
27
|
rescue ::Nokogiri::XML::SyntaxError => e
|
|
28
|
-
raise Moxml::ParseError.new(e.message, line: e.line,
|
|
28
|
+
raise Moxml::ParseError.new(e.message, line: e.line,
|
|
29
|
+
column: e.column)
|
|
29
30
|
end
|
|
30
31
|
|
|
31
32
|
DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
|
|
32
33
|
end
|
|
33
34
|
|
|
35
|
+
# SAX parsing implementation for Nokogiri
|
|
36
|
+
#
|
|
37
|
+
# @param xml [String, IO] XML to parse
|
|
38
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
39
|
+
# @return [void]
|
|
40
|
+
def sax_parse(xml, handler)
|
|
41
|
+
# Create bridge that translates Nokogiri SAX to Moxml SAX
|
|
42
|
+
bridge = NokogiriSAXBridge.new(handler)
|
|
43
|
+
|
|
44
|
+
# Create Nokogiri SAX parser
|
|
45
|
+
parser = ::Nokogiri::XML::SAX::Parser.new(bridge)
|
|
46
|
+
|
|
47
|
+
# Parse
|
|
48
|
+
if xml.respond_to?(:read)
|
|
49
|
+
parser.parse(xml)
|
|
50
|
+
else
|
|
51
|
+
parser.parse(xml.to_s)
|
|
52
|
+
end
|
|
53
|
+
rescue ::Nokogiri::XML::SyntaxError => e
|
|
54
|
+
error = Moxml::ParseError.new(e.message, line: e.line,
|
|
55
|
+
column: e.column)
|
|
56
|
+
handler.on_error(error)
|
|
57
|
+
end
|
|
58
|
+
|
|
34
59
|
def create_document(_native_doc = nil)
|
|
35
60
|
::Nokogiri::XML::Document.new
|
|
36
61
|
end
|
|
@@ -39,7 +64,7 @@ module Moxml
|
|
|
39
64
|
# document fragments are weird and should be used with caution:
|
|
40
65
|
# https://github.com/sparklemotion/nokogiri/issues/572
|
|
41
66
|
::Nokogiri::XML::DocumentFragment.new(
|
|
42
|
-
::Nokogiri::XML::Document.new
|
|
67
|
+
::Nokogiri::XML::Document.new,
|
|
43
68
|
)
|
|
44
69
|
end
|
|
45
70
|
|
|
@@ -75,7 +100,7 @@ module Moxml
|
|
|
75
100
|
::Nokogiri::XML::ProcessingInstruction.new(
|
|
76
101
|
create_document,
|
|
77
102
|
"xml",
|
|
78
|
-
build_declaration_attrs(version, encoding, standalone)
|
|
103
|
+
build_declaration_attrs(version, encoding, standalone),
|
|
79
104
|
)
|
|
80
105
|
end
|
|
81
106
|
|
|
@@ -274,27 +299,43 @@ module Moxml
|
|
|
274
299
|
def xpath(node, expression, namespaces = nil)
|
|
275
300
|
node.xpath(expression, namespaces).to_a
|
|
276
301
|
rescue ::Nokogiri::XML::XPath::SyntaxError => e
|
|
277
|
-
raise Moxml::XPathError
|
|
302
|
+
raise Moxml::XPathError.new(
|
|
303
|
+
e.message,
|
|
304
|
+
expression: expression,
|
|
305
|
+
adapter: "Nokogiri",
|
|
306
|
+
node: node,
|
|
307
|
+
)
|
|
278
308
|
end
|
|
279
309
|
|
|
280
310
|
def at_xpath(node, expression, namespaces = nil)
|
|
281
311
|
node.at_xpath(expression, namespaces)
|
|
282
312
|
rescue ::Nokogiri::XML::XPath::SyntaxError => e
|
|
283
|
-
raise Moxml::XPathError
|
|
313
|
+
raise Moxml::XPathError.new(
|
|
314
|
+
e.message,
|
|
315
|
+
expression: expression,
|
|
316
|
+
adapter: "Nokogiri",
|
|
317
|
+
node: node,
|
|
318
|
+
)
|
|
284
319
|
end
|
|
285
320
|
|
|
286
321
|
def serialize(node, options = {})
|
|
287
322
|
save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML
|
|
288
323
|
|
|
289
324
|
# Don't force expand empty elements if they're really empty
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
325
|
+
if options[:expand_empty]
|
|
326
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
|
|
327
|
+
end
|
|
328
|
+
if options[:indent].to_i.positive?
|
|
329
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
|
|
330
|
+
end
|
|
331
|
+
if options[:no_declaration]
|
|
332
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
|
|
333
|
+
end
|
|
293
334
|
|
|
294
335
|
node.to_xml(
|
|
295
336
|
indent: options[:indent],
|
|
296
337
|
encoding: options[:encoding],
|
|
297
|
-
save_with: save_options
|
|
338
|
+
save_with: save_options,
|
|
298
339
|
)
|
|
299
340
|
end
|
|
300
341
|
|
|
@@ -316,6 +357,77 @@ module Moxml
|
|
|
316
357
|
end
|
|
317
358
|
end
|
|
318
359
|
end
|
|
360
|
+
|
|
361
|
+
# Bridge between Nokogiri SAX and Moxml SAX
|
|
362
|
+
#
|
|
363
|
+
# Translates Nokogiri::XML::SAX::Document events to Moxml::SAX::Handler events
|
|
364
|
+
#
|
|
365
|
+
# @private
|
|
366
|
+
class NokogiriSAXBridge < ::Nokogiri::XML::SAX::Document
|
|
367
|
+
def initialize(handler)
|
|
368
|
+
super()
|
|
369
|
+
@handler = handler
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Map Nokogiri events to Moxml events
|
|
373
|
+
|
|
374
|
+
def start_document
|
|
375
|
+
@handler.on_start_document
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def end_document
|
|
379
|
+
@handler.on_end_document
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def start_element(name, attributes = [])
|
|
383
|
+
# Convert Nokogiri attributes array to hash
|
|
384
|
+
attr_hash = {}
|
|
385
|
+
namespaces_hash = {}
|
|
386
|
+
|
|
387
|
+
attributes.each do |attr|
|
|
388
|
+
attr_name = attr[0]
|
|
389
|
+
attr_value = attr[1]
|
|
390
|
+
|
|
391
|
+
if attr_name.start_with?("xmlns")
|
|
392
|
+
# Namespace declaration
|
|
393
|
+
prefix = attr_name == "xmlns" ? nil : attr_name.sub("xmlns:", "")
|
|
394
|
+
namespaces_hash[prefix] = attr_value
|
|
395
|
+
else
|
|
396
|
+
attr_hash[attr_name] = attr_value
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
@handler.on_start_element(name, attr_hash, namespaces_hash)
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def end_element(name)
|
|
404
|
+
@handler.on_end_element(name)
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
def characters(string)
|
|
408
|
+
@handler.on_characters(string)
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def cdata_block(string)
|
|
412
|
+
@handler.on_cdata(string)
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
def comment(string)
|
|
416
|
+
@handler.on_comment(string)
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
def processing_instruction(target, data)
|
|
420
|
+
@handler.on_processing_instruction(target, data || "")
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def error(string)
|
|
424
|
+
@handler.on_error(Moxml::ParseError.new(string))
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def warning(string)
|
|
428
|
+
@handler.on_warning(string)
|
|
429
|
+
end
|
|
430
|
+
end
|
|
319
431
|
end
|
|
320
432
|
end
|
|
321
433
|
end
|
data/lib/moxml/adapter/oga.rb
CHANGED
|
@@ -18,12 +18,37 @@ module Moxml
|
|
|
18
18
|
native_doc = begin
|
|
19
19
|
::Oga.parse_xml(xml, strict: options[:strict])
|
|
20
20
|
rescue LL::ParserError => e
|
|
21
|
-
raise Moxml::ParseError
|
|
21
|
+
raise Moxml::ParseError.new(
|
|
22
|
+
e.message,
|
|
23
|
+
source: xml.is_a?(String) ? xml[0..100] : nil,
|
|
24
|
+
)
|
|
22
25
|
end
|
|
23
26
|
|
|
24
27
|
DocumentBuilder.new(Context.new(:oga)).build(native_doc)
|
|
25
28
|
end
|
|
26
29
|
|
|
30
|
+
# SAX parsing implementation for Oga
|
|
31
|
+
#
|
|
32
|
+
# @param xml [String, IO] XML to parse
|
|
33
|
+
# @param handler [Moxml::SAX::Handler] Moxml SAX handler
|
|
34
|
+
# @return [void]
|
|
35
|
+
def sax_parse(xml, handler)
|
|
36
|
+
bridge = OgaSAXBridge.new(handler)
|
|
37
|
+
|
|
38
|
+
xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
|
|
39
|
+
|
|
40
|
+
# Manually call start_document (Oga doesn't)
|
|
41
|
+
handler.on_start_document
|
|
42
|
+
|
|
43
|
+
::Oga.sax_parse_xml(bridge, xml_string)
|
|
44
|
+
|
|
45
|
+
# Manually call end_document (Oga doesn't)
|
|
46
|
+
handler.on_end_document
|
|
47
|
+
rescue StandardError => e
|
|
48
|
+
error = Moxml::ParseError.new(e.message)
|
|
49
|
+
handler.on_error(error)
|
|
50
|
+
end
|
|
51
|
+
|
|
27
52
|
def create_document(_native_doc = nil)
|
|
28
53
|
::Oga::XML::Document.new
|
|
29
54
|
end
|
|
@@ -46,7 +71,7 @@ module Moxml
|
|
|
46
71
|
|
|
47
72
|
def create_native_doctype(name, external_id, system_id)
|
|
48
73
|
::Oga::XML::Doctype.new(
|
|
49
|
-
name: name, public_id: external_id, system_id: system_id, type: "PUBLIC"
|
|
74
|
+
name: name, public_id: external_id, system_id: system_id, type: "PUBLIC",
|
|
50
75
|
)
|
|
51
76
|
end
|
|
52
77
|
|
|
@@ -58,19 +83,23 @@ module Moxml
|
|
|
58
83
|
attrs = {
|
|
59
84
|
version: version,
|
|
60
85
|
encoding: encoding,
|
|
61
|
-
standalone: standalone
|
|
86
|
+
standalone: standalone,
|
|
62
87
|
}.compact
|
|
63
88
|
::Moxml::Adapter::CustomizedOga::XmlDeclaration.new(attrs)
|
|
64
89
|
end
|
|
65
90
|
|
|
66
91
|
def declaration_attribute(declaration, attr_name)
|
|
67
|
-
|
|
92
|
+
unless ::Moxml::Declaration::ALLOWED_ATTRIBUTES.include?(attr_name.to_s)
|
|
93
|
+
return
|
|
94
|
+
end
|
|
68
95
|
|
|
69
96
|
declaration.public_send(attr_name)
|
|
70
97
|
end
|
|
71
98
|
|
|
72
99
|
def set_declaration_attribute(declaration, attr_name, value)
|
|
73
|
-
|
|
100
|
+
unless ::Moxml::Declaration::ALLOWED_ATTRIBUTES.include?(attr_name.to_s)
|
|
101
|
+
return
|
|
102
|
+
end
|
|
74
103
|
|
|
75
104
|
declaration.public_send("#{attr_name}=", value)
|
|
76
105
|
end
|
|
@@ -80,7 +109,8 @@ module Moxml
|
|
|
80
109
|
return ns unless ns.nil?
|
|
81
110
|
|
|
82
111
|
# Oga creates an attribute and registers a namespace
|
|
83
|
-
set_attribute(element,
|
|
112
|
+
set_attribute(element,
|
|
113
|
+
[::Oga::XML::Element::XMLNS_PREFIX, prefix].compact.join(":"), uri)
|
|
84
114
|
element.register_namespace(prefix, uri)
|
|
85
115
|
::Oga::XML::Namespace.new(name: prefix, uri: uri)
|
|
86
116
|
end
|
|
@@ -131,7 +161,10 @@ module Moxml
|
|
|
131
161
|
def children(node)
|
|
132
162
|
all_children = []
|
|
133
163
|
|
|
134
|
-
|
|
164
|
+
if node.is_a?(::Oga::XML::Document)
|
|
165
|
+
all_children += [node.xml_declaration,
|
|
166
|
+
node.doctype].compact
|
|
167
|
+
end
|
|
135
168
|
|
|
136
169
|
return all_children unless node.respond_to?(:children)
|
|
137
170
|
|
|
@@ -180,12 +213,15 @@ module Moxml
|
|
|
180
213
|
|
|
181
214
|
def set_attribute(element, name, value)
|
|
182
215
|
namespace_name = nil
|
|
183
|
-
|
|
216
|
+
if name.to_s.include?(":")
|
|
217
|
+
namespace_name, name = name.to_s.split(":",
|
|
218
|
+
2)
|
|
219
|
+
end
|
|
184
220
|
|
|
185
221
|
attr = ::Oga::XML::Attribute.new(
|
|
186
222
|
name: name.to_s,
|
|
187
223
|
namespace_name: namespace_name,
|
|
188
|
-
value: value.to_s
|
|
224
|
+
value: value.to_s,
|
|
189
225
|
)
|
|
190
226
|
element.add_attribute(attr)
|
|
191
227
|
end
|
|
@@ -313,15 +349,26 @@ module Moxml
|
|
|
313
349
|
end
|
|
314
350
|
|
|
315
351
|
def xpath(node, expression, namespaces = nil)
|
|
316
|
-
node.xpath(expression, {},
|
|
352
|
+
node.xpath(expression, {},
|
|
353
|
+
namespaces: namespaces&.transform_keys(&:to_s)).to_a
|
|
317
354
|
rescue ::LL::ParserError => e
|
|
318
|
-
raise Moxml::XPathError
|
|
355
|
+
raise Moxml::XPathError.new(
|
|
356
|
+
e.message,
|
|
357
|
+
expression: expression,
|
|
358
|
+
adapter: "Oga",
|
|
359
|
+
node: node,
|
|
360
|
+
)
|
|
319
361
|
end
|
|
320
362
|
|
|
321
363
|
def at_xpath(node, expression, namespaces = nil)
|
|
322
364
|
node.at_xpath(expression, namespaces: namespaces)
|
|
323
365
|
rescue ::Oga::XPath::Error => e
|
|
324
|
-
raise Moxml::XPathError
|
|
366
|
+
raise Moxml::XPathError.new(
|
|
367
|
+
e.message,
|
|
368
|
+
expression: expression,
|
|
369
|
+
adapter: "Oga",
|
|
370
|
+
node: node,
|
|
371
|
+
)
|
|
325
372
|
end
|
|
326
373
|
|
|
327
374
|
def serialize(node, _options = {})
|
|
@@ -330,5 +377,69 @@ module Moxml
|
|
|
330
377
|
end
|
|
331
378
|
end
|
|
332
379
|
end
|
|
380
|
+
|
|
381
|
+
# Bridge between Oga SAX and Moxml SAX
|
|
382
|
+
#
|
|
383
|
+
# Translates Oga SAX events to Moxml::SAX::Handler events.
|
|
384
|
+
# Oga has different event naming and namespace as first param.
|
|
385
|
+
#
|
|
386
|
+
# @private
|
|
387
|
+
class OgaSAXBridge
|
|
388
|
+
def initialize(handler)
|
|
389
|
+
@handler = handler
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Oga: on_element(namespace, name, attributes)
|
|
393
|
+
# namespace may be nil
|
|
394
|
+
# attributes is an array of [name, value] pairs
|
|
395
|
+
def on_element(namespace, name, attributes)
|
|
396
|
+
# Build full qualified name if namespace present
|
|
397
|
+
element_name = namespace ? "#{namespace}:#{name}" : name
|
|
398
|
+
|
|
399
|
+
# Convert Oga attributes to hash
|
|
400
|
+
attr_hash = {}
|
|
401
|
+
ns_hash = {}
|
|
402
|
+
|
|
403
|
+
# Oga delivers attributes as array of [name, value] pairs
|
|
404
|
+
attributes.each do |attr_name, attr_value|
|
|
405
|
+
if attr_name.to_s.start_with?("xmlns")
|
|
406
|
+
prefix = if attr_name.to_s == "xmlns"
|
|
407
|
+
nil
|
|
408
|
+
else
|
|
409
|
+
attr_name.to_s.sub(
|
|
410
|
+
"xmlns:", ""
|
|
411
|
+
)
|
|
412
|
+
end
|
|
413
|
+
ns_hash[prefix] = attr_value
|
|
414
|
+
else
|
|
415
|
+
attr_hash[attr_name.to_s] = attr_value
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
@handler.on_start_element(element_name, attr_hash, ns_hash)
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
# Oga: after_element(namespace, name)
|
|
423
|
+
def after_element(namespace, name)
|
|
424
|
+
element_name = namespace ? "#{namespace}:#{name}" : name
|
|
425
|
+
@handler.on_end_element(element_name)
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
def on_text(text)
|
|
429
|
+
@handler.on_characters(text)
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def on_cdata(text)
|
|
433
|
+
@handler.on_cdata(text)
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def on_comment(text)
|
|
437
|
+
@handler.on_comment(text)
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def on_processing_instruction(name, text)
|
|
441
|
+
@handler.on_processing_instruction(name, text || "")
|
|
442
|
+
end
|
|
443
|
+
end
|
|
333
444
|
end
|
|
334
445
|
end
|