canon 0.2.9 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +21 -22
- data/Rakefile +25 -2
- data/lib/canon/cache.rb +18 -27
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +20 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/node_inspector.rb +13 -48
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -5
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +2 -6
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +144 -267
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +2 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +10 -12
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +3 -3
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff/xml_serialization_formatter.rb +0 -3
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +20 -17
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +119 -3
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -5
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +27 -61
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -29
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +26 -20
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +1 -11
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +7 -15
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +6 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +5 -10
- data/lib/canon/validators/html_validator.rb +2 -8
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +2 -8
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +5 -15
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +1 -8
- data/lib/canon/xml/whitespace_normalizer.rb +2 -2
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +32 -18
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +28 -15
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -270
data/lib/canon/tree_diff.rb
CHANGED
|
@@ -1,33 +1,32 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Canon
|
|
4
|
+
# Semantic tree-diff algorithm — distinct from the DOM positional
|
|
5
|
+
# diff in {Canon::Comparison}.
|
|
6
|
+
#
|
|
7
|
+
# This module computes signature-based tree matches and produces
|
|
8
|
+
# INSERT/DELETE/UPDATE/MOVE operations. Sub-namespaces:
|
|
9
|
+
#
|
|
10
|
+
# * Core — TreeNode, Matching, NodeSignature, NodeWeight,
|
|
11
|
+
# AttributeComparator, XmlEntityDecoder
|
|
12
|
+
# * Matchers — HashMatcher, SimilarityMatcher, StructuralPropagator,
|
|
13
|
+
# UniversalMatcher
|
|
14
|
+
# * Operations — Operation, OperationDetector
|
|
15
|
+
# * Adapters — format-specific tree adapters (XML, JSON, HTML, YAML)
|
|
16
|
+
# * OperationConverterHelpers — MetadataEnricher, ReasonBuilder,
|
|
17
|
+
# PostProcessor, UpdateChangeHandler
|
|
18
|
+
#
|
|
19
|
+
# Top-level entry points: OperationConverter and TreeDiffIntegrator.
|
|
20
|
+
#
|
|
21
|
+
# All children are autoloaded — never `require_relative` them.
|
|
4
22
|
module TreeDiff
|
|
5
|
-
|
|
23
|
+
autoload :Adapters, "canon/tree_diff/adapters"
|
|
24
|
+
autoload :Core, "canon/tree_diff/core"
|
|
25
|
+
autoload :Matchers, "canon/tree_diff/matchers"
|
|
26
|
+
autoload :OperationConverter, "canon/tree_diff/operation_converter"
|
|
27
|
+
autoload :OperationConverterHelpers,
|
|
28
|
+
"canon/tree_diff/operation_converter_helpers"
|
|
29
|
+
autoload :Operations, "canon/tree_diff/operations"
|
|
30
|
+
autoload :TreeDiffIntegrator, "canon/tree_diff/tree_diff_integrator"
|
|
6
31
|
end
|
|
7
32
|
end
|
|
8
|
-
|
|
9
|
-
# Load core components
|
|
10
|
-
require_relative "tree_diff/core/tree_node"
|
|
11
|
-
require_relative "tree_diff/core/node_signature"
|
|
12
|
-
require_relative "tree_diff/core/node_weight"
|
|
13
|
-
require_relative "tree_diff/core/matching"
|
|
14
|
-
|
|
15
|
-
# Load matchers
|
|
16
|
-
require_relative "tree_diff/matchers/hash_matcher"
|
|
17
|
-
require_relative "tree_diff/matchers/similarity_matcher"
|
|
18
|
-
require_relative "tree_diff/matchers/structural_propagator"
|
|
19
|
-
require_relative "tree_diff/matchers/universal_matcher"
|
|
20
|
-
|
|
21
|
-
# Load operations
|
|
22
|
-
require_relative "tree_diff/operations/operation"
|
|
23
|
-
require_relative "tree_diff/operations/operation_detector"
|
|
24
|
-
require_relative "tree_diff/operation_converter"
|
|
25
|
-
|
|
26
|
-
# Load adapters
|
|
27
|
-
require_relative "tree_diff/adapters/xml_adapter"
|
|
28
|
-
require_relative "tree_diff/adapters/json_adapter"
|
|
29
|
-
require_relative "tree_diff/adapters/html_adapter"
|
|
30
|
-
require_relative "tree_diff/adapters/yaml_adapter"
|
|
31
|
-
|
|
32
|
-
# Load integrator
|
|
33
|
-
require_relative "tree_diff/tree_diff_integrator"
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../errors"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
4
|
module Validators
|
|
7
5
|
# Base class for all input validators
|
|
@@ -29,17 +27,14 @@ module Canon
|
|
|
29
27
|
line = nil
|
|
30
28
|
column = nil
|
|
31
29
|
|
|
32
|
-
|
|
33
|
-
if error.respond_to?(:line)
|
|
30
|
+
if error.is_a?(Nokogiri::XML::SyntaxError)
|
|
34
31
|
line = error.line
|
|
32
|
+
column = error.column
|
|
35
33
|
elsif error.message =~ /line[:\s]+(\d+)/i
|
|
36
34
|
line = ::Regexp.last_match(1).to_i
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
column = error.column
|
|
41
|
-
elsif error.message =~ /column[:\s]+(\d+)/i
|
|
42
|
-
column = ::Regexp.last_match(1).to_i
|
|
35
|
+
if error.message =~ /column[:\s]+(\d+)/i
|
|
36
|
+
column = ::Regexp.last_match(1).to_i
|
|
37
|
+
end
|
|
43
38
|
end
|
|
44
39
|
|
|
45
40
|
{ line: line, column: column }
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
|
-
require_relative "base_validator"
|
|
5
4
|
|
|
6
5
|
module Canon
|
|
7
6
|
module Validators
|
|
@@ -94,13 +93,8 @@ module Canon
|
|
|
94
93
|
#
|
|
95
94
|
# @param error [Nokogiri::XML::SyntaxError] The syntax error
|
|
96
95
|
# @return [String, nil] Additional details about the error
|
|
97
|
-
def self.extract_details(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
details = error.errors.map(&:message).reject do |msg|
|
|
101
|
-
msg == error.message
|
|
102
|
-
end
|
|
103
|
-
details.join("; ") unless details.empty?
|
|
96
|
+
def self.extract_details(_error)
|
|
97
|
+
nil
|
|
104
98
|
end
|
|
105
99
|
|
|
106
100
|
# Build error details from multiple errors
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
|
-
require_relative "base_validator"
|
|
5
4
|
|
|
6
5
|
module Canon
|
|
7
6
|
module Validators
|
|
@@ -38,13 +37,8 @@ module Canon
|
|
|
38
37
|
#
|
|
39
38
|
# @param error [Nokogiri::XML::SyntaxError] The syntax error
|
|
40
39
|
# @return [String, nil] Additional details about the error
|
|
41
|
-
def self.extract_details(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
details = error.errors.map(&:message).reject do |msg|
|
|
45
|
-
msg == error.message
|
|
46
|
-
end
|
|
47
|
-
details.join("; ") unless details.empty?
|
|
40
|
+
def self.extract_details(_error)
|
|
41
|
+
nil
|
|
48
42
|
end
|
|
49
43
|
|
|
50
44
|
private_class_method :extract_details
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Format-specific validators that raise {Error} on invalid input.
|
|
5
|
+
module Validators
|
|
6
|
+
autoload :BaseValidator, "canon/validators/base_validator"
|
|
7
|
+
autoload :HtmlValidator, "canon/validators/html_validator"
|
|
8
|
+
autoload :JsonValidator, "canon/validators/json_validator"
|
|
9
|
+
autoload :XmlValidator, "canon/validators/xml_validator"
|
|
10
|
+
autoload :YamlValidator, "canon/validators/yaml_validator"
|
|
11
|
+
end
|
|
12
|
+
end
|
data/lib/canon/version.rb
CHANGED
data/lib/canon/xml/c14n.rb
CHANGED
data/lib/canon/xml/data_model.rb
CHANGED
|
@@ -2,16 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
4
|
require "set"
|
|
5
|
-
require_relative "../data_model"
|
|
6
|
-
require_relative "../xml_backend"
|
|
7
|
-
require_relative "../xml_parsing"
|
|
8
|
-
require_relative "nodes/root_node"
|
|
9
|
-
require_relative "nodes/element_node"
|
|
10
|
-
require_relative "nodes/namespace_node"
|
|
11
|
-
require_relative "nodes/attribute_node"
|
|
12
|
-
require_relative "nodes/text_node"
|
|
13
|
-
require_relative "nodes/comment_node"
|
|
14
|
-
require_relative "nodes/processing_instruction_node"
|
|
15
5
|
|
|
16
6
|
module Canon
|
|
17
7
|
module Xml
|
|
@@ -139,7 +129,7 @@ module Canon
|
|
|
139
129
|
def self.build_from_nokogiri(nokogiri_doc, preserve_whitespace: false)
|
|
140
130
|
root = Nodes::RootNode.new
|
|
141
131
|
|
|
142
|
-
if nokogiri_doc.
|
|
132
|
+
if nokogiri_doc.is_a?(Nokogiri::XML::Document) && nokogiri_doc.root
|
|
143
133
|
root.add_child(build_element_node(nokogiri_doc.root,
|
|
144
134
|
preserve_whitespace: preserve_whitespace))
|
|
145
135
|
nokogiri_doc.children.each do |child|
|
|
@@ -275,7 +265,7 @@ preserve_whitespace: false)
|
|
|
275
265
|
def self.build_from_moxml(moxml_doc, preserve_whitespace: false)
|
|
276
266
|
root = Nodes::RootNode.new
|
|
277
267
|
|
|
278
|
-
if moxml_doc.
|
|
268
|
+
if moxml_doc.is_a?(Moxml::Document) && moxml_doc.root
|
|
279
269
|
root.add_child(build_moxml_element_node(moxml_doc.root,
|
|
280
270
|
preserve_whitespace: preserve_whitespace))
|
|
281
271
|
end
|
|
@@ -327,7 +317,7 @@ preserve_whitespace: false)
|
|
|
327
317
|
element.add_namespace(ns_node)
|
|
328
318
|
end
|
|
329
319
|
|
|
330
|
-
unless element.
|
|
320
|
+
unless element.namespace_nodes.any? do |n|
|
|
331
321
|
n.prefix == "xml"
|
|
332
322
|
end
|
|
333
323
|
element.add_namespace(Nodes::NamespaceNode.new(
|
|
@@ -348,7 +338,7 @@ preserve_whitespace: false)
|
|
|
348
338
|
end
|
|
349
339
|
|
|
350
340
|
def self.build_moxml_text_node(moxml_text, preserve_whitespace: false)
|
|
351
|
-
content = moxml_text.
|
|
341
|
+
content = moxml_text.content
|
|
352
342
|
|
|
353
343
|
if !preserve_whitespace && content.strip.empty? && moxml_text.parent.is_a?(Moxml::Element)
|
|
354
344
|
return nil
|
|
@@ -358,7 +348,7 @@ preserve_whitespace: false)
|
|
|
358
348
|
end
|
|
359
349
|
|
|
360
350
|
def self.build_moxml_comment_node(moxml_comment)
|
|
361
|
-
Nodes::CommentNode.new(value: moxml_comment.
|
|
351
|
+
Nodes::CommentNode.new(value: moxml_comment.content)
|
|
362
352
|
end
|
|
363
353
|
|
|
364
354
|
def self.build_moxml_pi_node(moxml_pi)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Xml
|
|
5
|
+
# XPath data model node types. All nodes inherit from
|
|
6
|
+
# {Canon::Xml::Node}. Children are autoloaded — never
|
|
7
|
+
# `require_relative` them.
|
|
8
|
+
module Nodes
|
|
9
|
+
autoload :AttributeNode, "canon/xml/nodes/attribute_node"
|
|
10
|
+
autoload :CommentNode, "canon/xml/nodes/comment_node"
|
|
11
|
+
autoload :ElementNode, "canon/xml/nodes/element_node"
|
|
12
|
+
autoload :NamespaceNode, "canon/xml/nodes/namespace_node"
|
|
13
|
+
autoload :ProcessingInstructionNode,
|
|
14
|
+
"canon/xml/nodes/processing_instruction_node"
|
|
15
|
+
autoload :RootNode, "canon/xml/nodes/root_node"
|
|
16
|
+
autoload :TextNode, "canon/xml/nodes/text_node"
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
data/lib/canon/xml/processor.rb
CHANGED
|
@@ -1,13 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
|
-
require_relative "nodes/root_node"
|
|
5
|
-
require_relative "nodes/element_node"
|
|
6
|
-
require_relative "nodes/namespace_node"
|
|
7
|
-
require_relative "nodes/attribute_node"
|
|
8
|
-
require_relative "nodes/text_node"
|
|
9
|
-
require_relative "nodes/comment_node"
|
|
10
|
-
require_relative "nodes/processing_instruction_node"
|
|
11
4
|
|
|
12
5
|
module Canon
|
|
13
6
|
module Xml
|
|
@@ -28,7 +21,7 @@ module Canon
|
|
|
28
21
|
# For C14N, use strip_doctype: true to avoid DTD default attribute expansion:
|
|
29
22
|
# root = SaxBuilder.parse(xml_string, strip_doctype: true)
|
|
30
23
|
#
|
|
31
|
-
class SaxBuilder < Nokogiri::XML::SAX::Document
|
|
24
|
+
class SaxBuilder < (RUBY_ENGINE == "opal" ? Object : Nokogiri::XML::SAX::Document)
|
|
32
25
|
# Parse XML string and return Canon::Xml::Node tree
|
|
33
26
|
#
|
|
34
27
|
# @param xml_string [String] XML content to parse
|
|
@@ -43,9 +43,9 @@ module Canon
|
|
|
43
43
|
# @param node [Moxml::Node] Node to check
|
|
44
44
|
# @return [Boolean] true if node is whitespace-only and should be ignored
|
|
45
45
|
def inter_element_whitespace?(node)
|
|
46
|
-
return false unless node.
|
|
46
|
+
return false unless node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
|
|
47
47
|
|
|
48
|
-
text = node.
|
|
48
|
+
text = node.is_a?(Moxml::Text) ? node.content.to_s : node.content.to_s
|
|
49
49
|
text.strip.empty?
|
|
50
50
|
end
|
|
51
51
|
|
data/lib/canon/xml.rb
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Native XML data model, parsing, and DOM utilities.
|
|
5
|
+
#
|
|
6
|
+
# This namespace holds Canon's own XML representation (independent of
|
|
7
|
+
# Nokogiri/Moxml), including:
|
|
8
|
+
# * the XPath data model nodes (Canon::Xml::Node and Canon::Xml::Nodes::*)
|
|
9
|
+
# * the SAX builder that produces them
|
|
10
|
+
# * element matching, line-range mapping, xpath, C14N, processors
|
|
11
|
+
# * canonicalization (C14n) and serialization helpers
|
|
12
|
+
#
|
|
13
|
+
# All children are autoloaded from this file. The nested Nodes namespace
|
|
14
|
+
# is itself a sibling and is autoloaded on first reference to
|
|
15
|
+
# Canon::Xml::Nodes.
|
|
16
|
+
module Xml
|
|
17
|
+
autoload :AttributeHandler, "canon/xml/attribute_handler"
|
|
18
|
+
autoload :C14n, "canon/xml/c14n"
|
|
19
|
+
autoload :CharacterEncoder, "canon/xml/character_encoder"
|
|
20
|
+
autoload :DataModel, "canon/xml/data_model"
|
|
21
|
+
autoload :ElementMatcher, "canon/xml/element_matcher"
|
|
22
|
+
autoload :LineRangeMapper, "canon/xml/line_range_mapper"
|
|
23
|
+
autoload :NamespaceHandler, "canon/xml/namespace_handler"
|
|
24
|
+
autoload :NamespaceHelper, "canon/xml/namespace_helper"
|
|
25
|
+
autoload :Node, "canon/xml/node"
|
|
26
|
+
autoload :Nodes, "canon/xml/nodes"
|
|
27
|
+
autoload :Processor, "canon/xml/processor"
|
|
28
|
+
autoload :SaxBuilder, "canon/xml/sax_builder"
|
|
29
|
+
autoload :WhitespaceNormalizer, "canon/xml/whitespace_normalizer"
|
|
30
|
+
autoload :XmlBaseHandler, "canon/xml/xml_base_handler"
|
|
31
|
+
autoload :XPathEngine, "canon/xml/xpath_engine"
|
|
32
|
+
end
|
|
33
|
+
end
|
data/lib/canon/xml_backend.rb
CHANGED
|
@@ -1,20 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Canon
|
|
4
|
-
# Centralized XML backend detection for Canon.
|
|
5
|
-
#
|
|
6
|
-
# Canon supports two XML backends:
|
|
7
|
-
# - :nokogiri — MRI with Nokogiri installed (default, existing code path)
|
|
8
|
-
# - :moxml — Opal runtime or MRI without Nokogiri (uses Oga via moxml)
|
|
9
|
-
#
|
|
10
|
-
# The active backend is determined once at load time and cached.
|
|
11
|
-
# All XML-related code should check `Canon::XmlBackend.moxml?` or
|
|
12
|
-
# `Canon::XmlBackend.nokogiri?` to select the appropriate code path.
|
|
13
|
-
#
|
|
14
|
-
# This module intentionally does NOT wrap Nokogiri through moxml.
|
|
15
|
-
# Each backend path is independent — the Nokogiri path is the existing
|
|
16
|
-
# battle-tested code; the moxml path is a parallel implementation for
|
|
17
|
-
# environments where Nokogiri is unavailable.
|
|
18
4
|
module XmlBackend
|
|
19
5
|
class << self
|
|
20
6
|
def active
|
|
@@ -33,6 +19,56 @@ module Canon
|
|
|
33
19
|
@active = nil
|
|
34
20
|
end
|
|
35
21
|
|
|
22
|
+
# Whether the node is a document fragment (any variant).
|
|
23
|
+
def document_fragment?(node)
|
|
24
|
+
if nokogiri?
|
|
25
|
+
node.is_a?(Nokogiri::XML::DocumentFragment) ||
|
|
26
|
+
node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
|
|
27
|
+
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
28
|
+
else
|
|
29
|
+
false
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Whether the node is an HTML document (any variant).
|
|
34
|
+
def html_document?(node)
|
|
35
|
+
if nokogiri?
|
|
36
|
+
node.is_a?(Nokogiri::HTML::Document) ||
|
|
37
|
+
node.is_a?(Nokogiri::HTML4::Document) ||
|
|
38
|
+
node.is_a?(Nokogiri::HTML5::Document)
|
|
39
|
+
else
|
|
40
|
+
false
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Detect HTML version from a Nokogiri node.
|
|
45
|
+
# Returns :html5 or :html4. Defaults to :html5 for non-Nokogiri nodes.
|
|
46
|
+
def html_version_from_node(node)
|
|
47
|
+
if nokogiri?
|
|
48
|
+
if node.is_a?(Nokogiri::HTML5::Document) ||
|
|
49
|
+
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
50
|
+
:html5
|
|
51
|
+
elsif node.is_a?(Nokogiri::HTML4::Document) ||
|
|
52
|
+
node.is_a?(Nokogiri::HTML4::DocumentFragment)
|
|
53
|
+
:html4
|
|
54
|
+
else
|
|
55
|
+
:html5
|
|
56
|
+
end
|
|
57
|
+
else
|
|
58
|
+
:html5
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Parse an HTML string into an XML fragment.
|
|
63
|
+
def xml_fragment(html_string)
|
|
64
|
+
if nokogiri?
|
|
65
|
+
Nokogiri::XML.fragment(html_string)
|
|
66
|
+
else
|
|
67
|
+
raise Canon::Error,
|
|
68
|
+
"HTML fragment parsing requires the Nokogiri backend"
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
36
72
|
private
|
|
37
73
|
|
|
38
74
|
def detect
|
data/lib/canon/xml_parsing.rb
CHANGED
|
@@ -14,7 +14,7 @@ module Canon
|
|
|
14
14
|
module XmlParsing
|
|
15
15
|
class << self
|
|
16
16
|
def moxml_context
|
|
17
|
-
@moxml_context ||= Moxml.new(:oga)
|
|
17
|
+
@moxml_context ||= Moxml.new(RUBY_ENGINE == "opal" ? :rexml : :oga)
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
# --- Parsing ---
|
|
@@ -47,10 +47,15 @@ module Canon
|
|
|
47
47
|
end
|
|
48
48
|
|
|
49
49
|
# --- Type checks (backend-safe) ---
|
|
50
|
+
#
|
|
51
|
+
# Both Nokogiri and Moxml are loaded as dependencies. XmlBackend
|
|
52
|
+
# determines which is used for *parsing*, but nodes from either
|
|
53
|
+
# library may flow through comparison code (e.g. tests, format
|
|
54
|
+
# detection). Under Nokogiri backend, both types are checked.
|
|
50
55
|
|
|
51
56
|
def document?(obj)
|
|
52
57
|
if XmlBackend.nokogiri?
|
|
53
|
-
obj.is_a?(Nokogiri::XML::Document)
|
|
58
|
+
obj.is_a?(Nokogiri::XML::Document) || obj.is_a?(Moxml::Document)
|
|
54
59
|
else
|
|
55
60
|
obj.is_a?(Moxml::Document)
|
|
56
61
|
end
|
|
@@ -58,7 +63,7 @@ module Canon
|
|
|
58
63
|
|
|
59
64
|
def xml_node?(obj)
|
|
60
65
|
if XmlBackend.nokogiri?
|
|
61
|
-
obj.is_a?(Nokogiri::XML::Node)
|
|
66
|
+
obj.is_a?(Nokogiri::XML::Node) || obj.is_a?(Moxml::Node)
|
|
62
67
|
else
|
|
63
68
|
obj.is_a?(Moxml::Node)
|
|
64
69
|
end
|
|
@@ -66,7 +71,7 @@ module Canon
|
|
|
66
71
|
|
|
67
72
|
def element?(node)
|
|
68
73
|
if XmlBackend.nokogiri?
|
|
69
|
-
node.is_a?(Nokogiri::XML::Element)
|
|
74
|
+
node.is_a?(Nokogiri::XML::Element) || node.is_a?(Moxml::Element)
|
|
70
75
|
else
|
|
71
76
|
node.is_a?(Moxml::Element)
|
|
72
77
|
end
|
|
@@ -74,7 +79,7 @@ module Canon
|
|
|
74
79
|
|
|
75
80
|
def text_node?(node)
|
|
76
81
|
if XmlBackend.nokogiri?
|
|
77
|
-
node.is_a?(Nokogiri::XML::Text)
|
|
82
|
+
node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
|
|
78
83
|
else
|
|
79
84
|
node.is_a?(Moxml::Text)
|
|
80
85
|
end
|
|
@@ -82,7 +87,7 @@ module Canon
|
|
|
82
87
|
|
|
83
88
|
def comment?(node)
|
|
84
89
|
if XmlBackend.nokogiri?
|
|
85
|
-
node.is_a?(Nokogiri::XML::Comment)
|
|
90
|
+
node.is_a?(Nokogiri::XML::Comment) || node.is_a?(Moxml::Comment)
|
|
86
91
|
else
|
|
87
92
|
node.is_a?(Moxml::Comment)
|
|
88
93
|
end
|
|
@@ -90,7 +95,7 @@ module Canon
|
|
|
90
95
|
|
|
91
96
|
def cdata?(node)
|
|
92
97
|
if XmlBackend.nokogiri?
|
|
93
|
-
node.is_a?(Nokogiri::XML::CDATA)
|
|
98
|
+
node.is_a?(Nokogiri::XML::CDATA) || node.is_a?(Moxml::Cdata)
|
|
94
99
|
else
|
|
95
100
|
node.is_a?(Moxml::Cdata)
|
|
96
101
|
end
|
|
@@ -98,7 +103,7 @@ module Canon
|
|
|
98
103
|
|
|
99
104
|
def processing_instruction?(node)
|
|
100
105
|
if XmlBackend.nokogiri?
|
|
101
|
-
node.is_a?(Nokogiri::XML::ProcessingInstruction)
|
|
106
|
+
node.is_a?(Nokogiri::XML::ProcessingInstruction) || node.is_a?(Moxml::ProcessingInstruction)
|
|
102
107
|
else
|
|
103
108
|
node.is_a?(Moxml::ProcessingInstruction)
|
|
104
109
|
end
|
|
@@ -108,7 +113,7 @@ module Canon
|
|
|
108
113
|
if XmlBackend.nokogiri?
|
|
109
114
|
obj.is_a?(Nokogiri::XML::DocumentFragment)
|
|
110
115
|
else
|
|
111
|
-
|
|
116
|
+
false
|
|
112
117
|
end
|
|
113
118
|
end
|
|
114
119
|
|
|
@@ -142,7 +147,14 @@ module Canon
|
|
|
142
147
|
if XmlBackend.nokogiri?
|
|
143
148
|
node.is_a?(Nokogiri::XML::Node) ? node.content : node.to_s
|
|
144
149
|
else
|
|
145
|
-
|
|
150
|
+
case node
|
|
151
|
+
when Moxml::Text, Moxml::Cdata, Moxml::Comment
|
|
152
|
+
node.content.to_s
|
|
153
|
+
when Moxml::Node
|
|
154
|
+
node.text.to_s
|
|
155
|
+
else
|
|
156
|
+
node.to_s
|
|
157
|
+
end
|
|
146
158
|
end
|
|
147
159
|
end
|
|
148
160
|
|
|
@@ -252,17 +264,19 @@ module Canon
|
|
|
252
264
|
node.to_xml
|
|
253
265
|
end
|
|
254
266
|
|
|
255
|
-
def moxml_canonicalize(
|
|
256
|
-
|
|
267
|
+
def moxml_canonicalize(_node, _options)
|
|
268
|
+
raise Canon::Error,
|
|
269
|
+
"C14N canonicalization is not supported by the moxml backend. " \
|
|
270
|
+
"Use the Nokogiri backend or a different preprocessing option."
|
|
257
271
|
end
|
|
258
272
|
|
|
259
273
|
def moxml_node_type(node)
|
|
260
|
-
return :element if node.
|
|
261
|
-
return :text if node.
|
|
262
|
-
return :comment if node.
|
|
263
|
-
return :cdata if node.
|
|
264
|
-
return :document if node.
|
|
265
|
-
return :processing_instruction if node.
|
|
274
|
+
return :element if node.element?
|
|
275
|
+
return :text if node.text?
|
|
276
|
+
return :comment if node.comment?
|
|
277
|
+
return :cdata if node.cdata?
|
|
278
|
+
return :document if node.document?
|
|
279
|
+
return :processing_instruction if node.processing_instruction?
|
|
266
280
|
|
|
267
281
|
nil
|
|
268
282
|
end
|