canon 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
# TreeDiffIntegrator provides integration between Canon's DOM diff system
|
|
6
|
+
# and the new semantic tree diff system.
|
|
7
|
+
#
|
|
8
|
+
# This class orchestrates:
|
|
9
|
+
# - Format-specific adapter selection
|
|
10
|
+
# - Tree conversion from parsed documents
|
|
11
|
+
# - Tree matching via UniversalMatcher
|
|
12
|
+
# - Operation detection
|
|
13
|
+
# - Results formatting
|
|
14
|
+
#
|
|
15
|
+
# @example XML tree diff
|
|
16
|
+
# integrator = TreeDiffIntegrator.new(format: :xml)
|
|
17
|
+
# result = integrator.diff(doc1, doc2)
|
|
18
|
+
# result[:operations] # => [Operation(...), ...]
|
|
19
|
+
#
|
|
20
|
+
class TreeDiffIntegrator
|
|
21
|
+
attr_reader :format, :adapter, :matcher, :match_options
|
|
22
|
+
|
|
23
|
+
# Initialize integrator for a specific format
|
|
24
|
+
#
|
|
25
|
+
# @param format [Symbol] Format type (:xml, :json, :html, :yaml)
|
|
26
|
+
# @param options [Hash] Configuration options (match options from Canon::Comparison)
|
|
27
|
+
# @option options [Float] :similarity_threshold Threshold for similarity matching (default: 0.95)
|
|
28
|
+
# @option options [Boolean] :hash_matching Enable hash matching phase (default: true)
|
|
29
|
+
# @option options [Boolean] :similarity_matching Enable similarity matching phase (default: true)
|
|
30
|
+
# @option options [Boolean] :propagation Enable propagation phase (default: true)
|
|
31
|
+
# @option options [Symbol] :text_content How to compare text (:strict, :normalize)
|
|
32
|
+
# @option options [Symbol] :attribute_order How to compare attributes (:strict, :ignore)
|
|
33
|
+
def initialize(format:, options: {})
|
|
34
|
+
@format = format
|
|
35
|
+
@options = options
|
|
36
|
+
@match_options = options # Store full match options for downstream use
|
|
37
|
+
|
|
38
|
+
# Initialize format-specific adapter WITH match options
|
|
39
|
+
@adapter = create_adapter(format, options)
|
|
40
|
+
|
|
41
|
+
# Initialize matcher with options
|
|
42
|
+
matcher_options = {
|
|
43
|
+
similarity_threshold: options[:similarity_threshold] || 0.95,
|
|
44
|
+
hash_matching: options.fetch(:hash_matching, true),
|
|
45
|
+
similarity_matching: options.fetch(:similarity_matching, true),
|
|
46
|
+
propagation: options.fetch(:propagation, true),
|
|
47
|
+
attribute_order: options[:attribute_order] || :ignore,
|
|
48
|
+
}
|
|
49
|
+
@matcher = Matchers::UniversalMatcher.new(matcher_options)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Perform tree diff on two documents
|
|
53
|
+
#
|
|
54
|
+
# @param doc1 [Object] First document (format-specific)
|
|
55
|
+
# @param doc2 [Object] Second document (format-specific)
|
|
56
|
+
# @return [Hash] Diff results with :operations, :matching, :statistics
|
|
57
|
+
def diff(doc1, doc2)
|
|
58
|
+
# Convert documents to tree nodes
|
|
59
|
+
tree1 = @adapter.to_tree(doc1)
|
|
60
|
+
tree2 = @adapter.to_tree(doc2)
|
|
61
|
+
|
|
62
|
+
# Check node count limits
|
|
63
|
+
check_node_count_limit(tree1)
|
|
64
|
+
check_node_count_limit(tree2)
|
|
65
|
+
|
|
66
|
+
# Match trees
|
|
67
|
+
matching = @matcher.match(tree1, tree2)
|
|
68
|
+
|
|
69
|
+
# Detect operations with match_options for proper normalization
|
|
70
|
+
detector = Operations::OperationDetector.new(tree1, tree2, matching,
|
|
71
|
+
@match_options)
|
|
72
|
+
operations = detector.detect
|
|
73
|
+
|
|
74
|
+
# Return comprehensive results
|
|
75
|
+
{
|
|
76
|
+
operations: operations,
|
|
77
|
+
matching: matching,
|
|
78
|
+
statistics: @matcher.statistics,
|
|
79
|
+
trees: { tree1: tree1, tree2: tree2 },
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Check if two documents are semantically equivalent
|
|
84
|
+
#
|
|
85
|
+
# @param doc1 [Object] First document
|
|
86
|
+
# @param doc2 [Object] Second document
|
|
87
|
+
# @return [Boolean] true if no operations detected
|
|
88
|
+
def equivalent?(doc1, doc2)
|
|
89
|
+
result = diff(doc1, doc2)
|
|
90
|
+
result[:operations].empty?
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
# Create format-specific adapter
|
|
96
|
+
#
|
|
97
|
+
# @param format [Symbol] Format type
|
|
98
|
+
# @param match_options [Hash] Match options for text/attribute normalization
|
|
99
|
+
# @return [Object] Adapter instance
|
|
100
|
+
def create_adapter(format, match_options = {})
|
|
101
|
+
case format
|
|
102
|
+
when :xml
|
|
103
|
+
Adapters::XMLAdapter.new(match_options: match_options)
|
|
104
|
+
when :html, :html4, :html5
|
|
105
|
+
Adapters::HTMLAdapter.new(match_options: match_options)
|
|
106
|
+
when :json
|
|
107
|
+
Adapters::JSONAdapter.new(match_options: match_options)
|
|
108
|
+
when :yaml
|
|
109
|
+
Adapters::YAMLAdapter.new(match_options: match_options)
|
|
110
|
+
else
|
|
111
|
+
raise ArgumentError, "Unsupported format: #{format}. " \
|
|
112
|
+
"Supported formats: :xml, :html, :html4, :html5, :json, :yaml"
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Check if tree node count exceeds configured limit
|
|
117
|
+
#
|
|
118
|
+
# @param tree [TreeNode] Root node of tree
|
|
119
|
+
# @raise [Canon::SizeLimitExceededError] if node count exceeds limit
|
|
120
|
+
def check_node_count_limit(tree)
|
|
121
|
+
node_count = tree.size
|
|
122
|
+
max_count = get_max_node_count
|
|
123
|
+
|
|
124
|
+
return unless max_count&.positive?
|
|
125
|
+
return if node_count <= max_count
|
|
126
|
+
|
|
127
|
+
raise Canon::SizeLimitExceededError.new(:node_count, node_count,
|
|
128
|
+
max_count)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Get max node count limit for current format
|
|
132
|
+
#
|
|
133
|
+
# @return [Integer, nil] Max node count
|
|
134
|
+
def get_max_node_count
|
|
135
|
+
# Get from options if provided, otherwise use default
|
|
136
|
+
@options[:max_node_count] || 10_000
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
# Tree diff module for semantic object tree diffing
|
|
6
|
+
end
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# Load core components
|
|
10
|
+
require_relative "tree_diff/core/tree_node"
|
|
11
|
+
require_relative "tree_diff/core/node_signature"
|
|
12
|
+
require_relative "tree_diff/core/node_weight"
|
|
13
|
+
require_relative "tree_diff/core/matching"
|
|
14
|
+
|
|
15
|
+
# Load matchers
|
|
16
|
+
require_relative "tree_diff/matchers/hash_matcher"
|
|
17
|
+
require_relative "tree_diff/matchers/similarity_matcher"
|
|
18
|
+
require_relative "tree_diff/matchers/structural_propagator"
|
|
19
|
+
require_relative "tree_diff/matchers/universal_matcher"
|
|
20
|
+
|
|
21
|
+
# Load operations
|
|
22
|
+
require_relative "tree_diff/operations/operation"
|
|
23
|
+
require_relative "tree_diff/operations/operation_detector"
|
|
24
|
+
require_relative "tree_diff/operation_converter"
|
|
25
|
+
|
|
26
|
+
# Load adapters
|
|
27
|
+
require_relative "tree_diff/adapters/xml_adapter"
|
|
28
|
+
require_relative "tree_diff/adapters/json_adapter"
|
|
29
|
+
require_relative "tree_diff/adapters/html_adapter"
|
|
30
|
+
require_relative "tree_diff/adapters/yaml_adapter"
|
|
31
|
+
|
|
32
|
+
# Load integrator
|
|
33
|
+
require_relative "tree_diff/tree_diff_integrator"
|
|
@@ -17,7 +17,9 @@ module Canon
|
|
|
17
17
|
# @raise [Canon::ValidationError] If JSON is malformed
|
|
18
18
|
# @return [void]
|
|
19
19
|
def self.validate!(input)
|
|
20
|
-
return if input.nil?
|
|
20
|
+
return if input.nil?
|
|
21
|
+
return if input.is_a?(Hash) || input.is_a?(Array) # Already parsed
|
|
22
|
+
return if input.strip.empty?
|
|
21
23
|
|
|
22
24
|
JSON.parse(input)
|
|
23
25
|
rescue JSON::ParserError => e
|
|
@@ -19,7 +19,9 @@ module Canon
|
|
|
19
19
|
# @raise [Canon::ValidationError] If YAML is malformed
|
|
20
20
|
# @return [void]
|
|
21
21
|
def self.validate!(input)
|
|
22
|
-
return if input.nil?
|
|
22
|
+
return if input.nil?
|
|
23
|
+
return if input.is_a?(Hash) || input.is_a?(Array) # Already parsed
|
|
24
|
+
return if input.strip.empty?
|
|
23
25
|
|
|
24
26
|
YAML.safe_load(input, permitted_classes: [Symbol, Date, Time])
|
|
25
27
|
rescue Psych::SyntaxError => e
|
data/lib/canon/version.rb
CHANGED
data/lib/canon/xml/data_model.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "nokogiri"
|
|
4
4
|
require "set"
|
|
5
|
+
require_relative "../data_model"
|
|
5
6
|
require_relative "nodes/root_node"
|
|
6
7
|
require_relative "nodes/element_node"
|
|
7
8
|
require_relative "nodes/namespace_node"
|
|
@@ -13,8 +14,11 @@ require_relative "nodes/processing_instruction_node"
|
|
|
13
14
|
module Canon
|
|
14
15
|
module Xml
|
|
15
16
|
# Builds XPath data model from XML
|
|
16
|
-
class DataModel
|
|
17
|
+
class DataModel < Canon::DataModel
|
|
17
18
|
# Build XPath data model from XML string
|
|
19
|
+
#
|
|
20
|
+
# @param xml_string [String] XML content to parse
|
|
21
|
+
# @return [Nodes::RootNode] Root of the data model tree
|
|
18
22
|
def self.from_xml(xml_string)
|
|
19
23
|
# Parse with Nokogiri
|
|
20
24
|
doc = Nokogiri::XML(xml_string) do |config|
|
|
@@ -29,24 +33,19 @@ module Canon
|
|
|
29
33
|
build_from_nokogiri(doc)
|
|
30
34
|
end
|
|
31
35
|
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# @return [Nodes::RootNode] Root of the data model tree
|
|
37
|
-
def self.from_html(html_string, version: :html4)
|
|
38
|
-
# Parse with Nokogiri using appropriate HTML parser
|
|
39
|
-
doc = if version == :html5
|
|
40
|
-
Nokogiri::HTML5.fragment(html_string)
|
|
41
|
-
else
|
|
42
|
-
Nokogiri::HTML4.fragment(html_string)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# HTML doesn't have strict namespace requirements like XML,
|
|
46
|
-
# so skip the relative namespace URI check
|
|
36
|
+
# Alias for compatibility with base class interface
|
|
37
|
+
def self.parse(xml_string)
|
|
38
|
+
from_xml(xml_string)
|
|
39
|
+
end
|
|
47
40
|
|
|
48
|
-
|
|
49
|
-
|
|
41
|
+
# Serialize XML node to string
|
|
42
|
+
#
|
|
43
|
+
# @param node [Nodes::RootNode, Nodes::ElementNode] Node to serialize
|
|
44
|
+
# @return [String] Serialized XML string
|
|
45
|
+
def self.serialize(node)
|
|
46
|
+
# Implementation will delegate to existing XML serialization
|
|
47
|
+
# This is a placeholder for the base class interface
|
|
48
|
+
node.to_s
|
|
50
49
|
end
|
|
51
50
|
|
|
52
51
|
# Check for relative namespace URIs (prohibited by C14N 1.1)
|
|
@@ -184,9 +183,7 @@ module Canon
|
|
|
184
183
|
|
|
185
184
|
# Build attribute nodes for an element
|
|
186
185
|
def self.build_attribute_nodes(nokogiri_element, element)
|
|
187
|
-
nokogiri_element.attributes.
|
|
188
|
-
next if name.start_with?("xmlns")
|
|
189
|
-
|
|
186
|
+
nokogiri_element.attributes.each_value do |attr|
|
|
190
187
|
attr_node = Nodes::AttributeNode.new(
|
|
191
188
|
name: attr.name,
|
|
192
189
|
value: attr.value,
|
|
@@ -199,9 +196,11 @@ module Canon
|
|
|
199
196
|
|
|
200
197
|
# Build text node from Nokogiri text node
|
|
201
198
|
def self.build_text_node(nokogiri_text)
|
|
202
|
-
#
|
|
203
|
-
#
|
|
199
|
+
# XML text nodes: preserve all content including whitespace
|
|
200
|
+
# Unlike HTML, XML treats all whitespace as significant
|
|
204
201
|
content = nokogiri_text.content
|
|
202
|
+
|
|
203
|
+
# Skip empty text nodes between elements (common formatting whitespace)
|
|
205
204
|
return nil if content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
|
|
206
205
|
|
|
207
206
|
# Nokogiri already handles CDATA conversion and entity resolution
|
|
@@ -55,19 +55,63 @@ module Canon
|
|
|
55
55
|
# Default attributes used to identify elements
|
|
56
56
|
DEFAULT_IDENTITY_ATTRS = %w[id ref name key].freeze
|
|
57
57
|
|
|
58
|
-
#
|
|
59
|
-
|
|
58
|
+
# Represents the result of matching an element across two DOM trees
|
|
59
|
+
#
|
|
60
|
+
# A MatchResult indicates whether an element was found in both trees
|
|
61
|
+
# (matched), only in the first tree (deleted), or only in the second
|
|
62
|
+
# tree (inserted).
|
|
63
|
+
#
|
|
64
|
+
# == Attributes
|
|
65
|
+
#
|
|
66
|
+
# - status: Symbol indicating match type (:matched, :deleted, :inserted)
|
|
67
|
+
# - elem1: Element from first tree (nil if inserted)
|
|
68
|
+
# - elem2: Element from second tree (nil if deleted)
|
|
69
|
+
# - path: Array of element names showing location in tree
|
|
70
|
+
# - pos1: Integer index of elem1 in its parent's children (nil if inserted)
|
|
71
|
+
# - pos2: Integer index of elem2 in its parent's children (nil if deleted)
|
|
72
|
+
#
|
|
73
|
+
# == Position Change Detection
|
|
74
|
+
#
|
|
75
|
+
# When status is :matched and pos1 ≠ pos2, the element has moved positions.
|
|
76
|
+
# This is tracked as a semantic difference via the :element_position dimension.
|
|
77
|
+
#
|
|
78
|
+
class MatchResult
|
|
79
|
+
attr_reader :status, :elem1, :elem2, :path, :pos1, :pos2
|
|
80
|
+
|
|
81
|
+
# @param status [Symbol] Match status (:matched, :deleted, :inserted)
|
|
82
|
+
# @param elem1 [Object, nil] Element from first tree
|
|
83
|
+
# @param elem2 [Object, nil] Element from second tree
|
|
84
|
+
# @param path [Array<String>] Element path in tree
|
|
85
|
+
# @param pos1 [Integer, nil] Position index in first tree
|
|
86
|
+
# @param pos2 [Integer, nil] Position index in second tree
|
|
87
|
+
def initialize(status:, elem1:, elem2:, path:, pos1: nil, pos2: nil)
|
|
88
|
+
@status = status
|
|
89
|
+
@elem1 = elem1
|
|
90
|
+
@elem2 = elem2
|
|
91
|
+
@path = path
|
|
92
|
+
@pos1 = pos1
|
|
93
|
+
@pos2 = pos2
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# @return [Boolean] true if element found in both trees
|
|
60
97
|
def matched?
|
|
61
98
|
status == :matched
|
|
62
99
|
end
|
|
63
100
|
|
|
101
|
+
# @return [Boolean] true if element only in second tree
|
|
64
102
|
def inserted?
|
|
65
103
|
status == :inserted
|
|
66
104
|
end
|
|
67
105
|
|
|
106
|
+
# @return [Boolean] true if element only in first tree
|
|
68
107
|
def deleted?
|
|
69
108
|
status == :deleted
|
|
70
109
|
end
|
|
110
|
+
|
|
111
|
+
# @return [Boolean] true if element moved to different position
|
|
112
|
+
def position_changed?
|
|
113
|
+
matched? && pos1 && pos2 && pos1 != pos2
|
|
114
|
+
end
|
|
71
115
|
end
|
|
72
116
|
|
|
73
117
|
def initialize(identity_attrs: DEFAULT_IDENTITY_ATTRS)
|
|
@@ -105,13 +149,32 @@ module Canon
|
|
|
105
149
|
map1.each do |identity, elem1|
|
|
106
150
|
if map2.key?(identity)
|
|
107
151
|
elem2 = map2[identity]
|
|
108
|
-
|
|
109
|
-
|
|
152
|
+
|
|
153
|
+
# Build path with namespace information for clarity
|
|
154
|
+
elem_path_with_ns = if elem1.namespace_uri && !elem1.namespace_uri.empty?
|
|
155
|
+
path + ["{#{elem1.namespace_uri}}#{elem1.name}"]
|
|
156
|
+
else
|
|
157
|
+
path + [elem1.name]
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Track positions
|
|
161
|
+
pos1 = elems1.index(elem1)
|
|
162
|
+
pos2 = elems2.index(elem2)
|
|
163
|
+
|
|
164
|
+
@matches << MatchResult.new(
|
|
165
|
+
status: :matched,
|
|
166
|
+
elem1: elem1,
|
|
167
|
+
elem2: elem2,
|
|
168
|
+
path: elem_path_with_ns,
|
|
169
|
+
pos1: pos1,
|
|
170
|
+
pos2: pos2,
|
|
171
|
+
)
|
|
172
|
+
|
|
110
173
|
matched1.add(elem1)
|
|
111
174
|
matched2.add(elem2)
|
|
112
175
|
|
|
113
176
|
# Recursively match children
|
|
114
|
-
match_children(elem1.children, elem2.children,
|
|
177
|
+
match_children(elem1.children, elem2.children, elem_path_with_ns)
|
|
115
178
|
end
|
|
116
179
|
end
|
|
117
180
|
|
|
@@ -125,44 +188,89 @@ module Canon
|
|
|
125
188
|
unmatched1.each do |elem1|
|
|
126
189
|
next if matched1.include?(elem1)
|
|
127
190
|
|
|
128
|
-
|
|
129
|
-
|
|
191
|
+
elem_path_with_ns = if elem1.namespace_uri && !elem1.namespace_uri.empty?
|
|
192
|
+
path + ["{#{elem1.namespace_uri}}#{elem1.name}"]
|
|
193
|
+
else
|
|
194
|
+
path + [elem1.name]
|
|
195
|
+
end
|
|
196
|
+
pos1 = elems1.index(elem1)
|
|
197
|
+
|
|
198
|
+
@matches << MatchResult.new(
|
|
199
|
+
status: :deleted,
|
|
200
|
+
elem1: elem1,
|
|
201
|
+
elem2: nil,
|
|
202
|
+
path: elem_path_with_ns,
|
|
203
|
+
pos1: pos1,
|
|
204
|
+
pos2: nil,
|
|
205
|
+
)
|
|
130
206
|
end
|
|
131
207
|
|
|
132
208
|
unmatched2.each do |elem2|
|
|
133
209
|
next if matched2.include?(elem2)
|
|
134
210
|
|
|
135
|
-
|
|
136
|
-
|
|
211
|
+
elem_path_with_ns = if elem2.namespace_uri && !elem2.namespace_uri.empty?
|
|
212
|
+
path + ["{#{elem2.namespace_uri}}#{elem2.name}"]
|
|
213
|
+
else
|
|
214
|
+
path + [elem2.name]
|
|
215
|
+
end
|
|
216
|
+
pos2 = elems2.index(elem2)
|
|
217
|
+
|
|
218
|
+
@matches << MatchResult.new(
|
|
219
|
+
status: :inserted,
|
|
220
|
+
elem1: nil,
|
|
221
|
+
elem2: elem2,
|
|
222
|
+
path: elem_path_with_ns,
|
|
223
|
+
pos1: nil,
|
|
224
|
+
pos2: pos2,
|
|
225
|
+
)
|
|
137
226
|
end
|
|
138
227
|
end
|
|
139
228
|
|
|
140
229
|
# Match remaining elements by name and position
|
|
141
230
|
def match_by_position(elems1, elems2, path, matched1, matched2)
|
|
142
|
-
# Group by element name
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
# For each name, match by position
|
|
147
|
-
by_name1.each do |name, list1|
|
|
148
|
-
next unless by_name2.key?(name)
|
|
231
|
+
# Group by element name AND namespace_uri
|
|
232
|
+
by_identity1 = elems1.group_by { |e| [e.name, e.namespace_uri] }
|
|
233
|
+
by_identity2 = elems2.group_by { |e| [e.name, e.namespace_uri] }
|
|
149
234
|
|
|
150
|
-
|
|
235
|
+
# For each name+namespace combination, match by position
|
|
236
|
+
by_identity1.each do |identity, list1|
|
|
237
|
+
next unless by_identity2.key?(identity)
|
|
151
238
|
|
|
152
239
|
# Match pairs by position
|
|
240
|
+
list2 = by_identity2[identity]
|
|
241
|
+
name = identity[0] # Extract name from [name, namespace_uri] tuple
|
|
242
|
+
namespace_uri = identity[1] # Extract namespace_uri
|
|
243
|
+
|
|
153
244
|
[list1.length, list2.length].min.times do |i|
|
|
154
245
|
elem1 = list1[i]
|
|
155
246
|
elem2 = list2[i]
|
|
156
247
|
|
|
157
248
|
next if matched1.include?(elem1) || matched2.include?(elem2)
|
|
158
249
|
|
|
159
|
-
|
|
160
|
-
|
|
250
|
+
# Build path with namespace information for clarity
|
|
251
|
+
elem_path_with_ns = if namespace_uri && !namespace_uri.empty?
|
|
252
|
+
path + ["{#{namespace_uri}}#{name}"]
|
|
253
|
+
else
|
|
254
|
+
path + [name]
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Track positions in original element lists
|
|
258
|
+
pos1 = elems1.index(elem1)
|
|
259
|
+
pos2 = elems2.index(elem2)
|
|
260
|
+
|
|
261
|
+
@matches << MatchResult.new(
|
|
262
|
+
status: :matched,
|
|
263
|
+
elem1: elem1,
|
|
264
|
+
elem2: elem2,
|
|
265
|
+
path: elem_path_with_ns,
|
|
266
|
+
pos1: pos1,
|
|
267
|
+
pos2: pos2,
|
|
268
|
+
)
|
|
161
269
|
matched1.add(elem1)
|
|
162
270
|
matched2.add(elem2)
|
|
163
271
|
|
|
164
272
|
# Recursively match children
|
|
165
|
-
match_children(elem1.children, elem2.children,
|
|
273
|
+
match_children(elem1.children, elem2.children, elem_path_with_ns)
|
|
166
274
|
end
|
|
167
275
|
end
|
|
168
276
|
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Xml
|
|
5
|
+
# Helper module for formatting namespace information in diff output
|
|
6
|
+
module NamespaceHelper
|
|
7
|
+
# Format a namespace URI for display in diff output
|
|
8
|
+
#
|
|
9
|
+
# @param namespace_uri [String, nil] The namespace URI to format
|
|
10
|
+
# @return [String] Formatted namespace string
|
|
11
|
+
#
|
|
12
|
+
# @example Empty namespace
|
|
13
|
+
# format_namespace(nil) #=> "ns:[{blank}]"
|
|
14
|
+
# format_namespace("") #=> "ns:[{blank}]"
|
|
15
|
+
#
|
|
16
|
+
# @example Populated namespace
|
|
17
|
+
# format_namespace("http://example.com") #=> "ns:[http://example.com]"
|
|
18
|
+
def self.format_namespace(namespace_uri)
|
|
19
|
+
if namespace_uri.nil? || namespace_uri.empty?
|
|
20
|
+
"ns:[{blank}]"
|
|
21
|
+
else
|
|
22
|
+
"ns:[#{namespace_uri}]"
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Determine the type of mismatch between two nodes
|
|
27
|
+
#
|
|
28
|
+
# @param node1 [Object] First node (ElementNode or AttributeNode)
|
|
29
|
+
# @param node2 [Object] Second node (ElementNode or AttributeNode)
|
|
30
|
+
# @return [Symbol] Type of mismatch (:name, :namespace, :both, :none)
|
|
31
|
+
def self.mismatch_type(node1, node2)
|
|
32
|
+
return :none unless node1 && node2
|
|
33
|
+
|
|
34
|
+
name_differs = node1.name != node2.name
|
|
35
|
+
namespace_differs = normalize_namespace(node1.namespace_uri) !=
|
|
36
|
+
normalize_namespace(node2.namespace_uri)
|
|
37
|
+
|
|
38
|
+
if name_differs && namespace_differs
|
|
39
|
+
:both
|
|
40
|
+
elsif name_differs
|
|
41
|
+
:name
|
|
42
|
+
elsif namespace_differs
|
|
43
|
+
:namespace
|
|
44
|
+
else
|
|
45
|
+
:none
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Generate a mismatch message for element differences
|
|
50
|
+
#
|
|
51
|
+
# @param node1 [ElementNode] First element
|
|
52
|
+
# @param node2 [ElementNode] Second element
|
|
53
|
+
# @return [String] Human-readable mismatch message
|
|
54
|
+
def self.element_mismatch_message(node1, node2)
|
|
55
|
+
type = mismatch_type(node1, node2)
|
|
56
|
+
|
|
57
|
+
case type
|
|
58
|
+
when :name
|
|
59
|
+
ns = format_namespace(node1.namespace_uri)
|
|
60
|
+
"mismatched element name: '#{node1.name}' vs '#{node2.name}' (#{ns})"
|
|
61
|
+
when :namespace
|
|
62
|
+
"mismatched element namespace: '#{node1.name}' " \
|
|
63
|
+
"(#{format_namespace(node1.namespace_uri)} vs " \
|
|
64
|
+
"#{format_namespace(node2.namespace_uri)})"
|
|
65
|
+
when :both
|
|
66
|
+
"mismatched element name and namespace: " \
|
|
67
|
+
"'#{node1.name}' (#{format_namespace(node1.namespace_uri)}) vs " \
|
|
68
|
+
"'#{node2.name}' (#{format_namespace(node2.namespace_uri)})"
|
|
69
|
+
else
|
|
70
|
+
"elements differ"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Generate a mismatch message for attribute differences
|
|
75
|
+
#
|
|
76
|
+
# @param node1 [AttributeNode] First attribute
|
|
77
|
+
# @param node2 [AttributeNode] Second attribute
|
|
78
|
+
# @return [String] Human-readable mismatch message
|
|
79
|
+
def self.attribute_mismatch_message(node1, node2)
|
|
80
|
+
type = mismatch_type(node1, node2)
|
|
81
|
+
|
|
82
|
+
case type
|
|
83
|
+
when :name
|
|
84
|
+
ns = format_namespace(node1.namespace_uri)
|
|
85
|
+
"mismatched attribute name: '#{node1.name}' vs '#{node2.name}' (#{ns})"
|
|
86
|
+
when :namespace
|
|
87
|
+
"mismatched attribute namespace: '#{node1.name}' " \
|
|
88
|
+
"(#{format_namespace(node1.namespace_uri)} vs " \
|
|
89
|
+
"#{format_namespace(node2.namespace_uri)})"
|
|
90
|
+
when :both
|
|
91
|
+
"mismatched attribute name and namespace: " \
|
|
92
|
+
"'#{node1.name}' (#{format_namespace(node1.namespace_uri)}) vs " \
|
|
93
|
+
"'#{node2.name}' (#{format_namespace(node2.namespace_uri)})"
|
|
94
|
+
else
|
|
95
|
+
"attributes differ"
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Normalize namespace URI for comparison
|
|
100
|
+
#
|
|
101
|
+
# @param namespace_uri [String, nil] Namespace URI
|
|
102
|
+
# @return [String] Normalized namespace (empty string for nil)
|
|
103
|
+
def self.normalize_namespace(namespace_uri)
|
|
104
|
+
namespace_uri.to_s
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
private_class_method :normalize_namespace
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
data/lib/canon.rb
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "canon/version"
|
|
4
4
|
require_relative "canon/errors"
|
|
5
|
+
require_relative "canon/config"
|
|
6
|
+
require_relative "canon/data_model"
|
|
7
|
+
require_relative "canon/html"
|
|
5
8
|
require_relative "canon/formatters/xml_formatter"
|
|
6
9
|
require_relative "canon/formatters/yaml_formatter"
|
|
7
10
|
require_relative "canon/formatters/json_formatter"
|