canon 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +25 -135
- data/README.adoc +13 -13
- data/docs/.lycheeignore +69 -0
- data/docs/advanced/extending-canon.adoc +193 -0
- data/docs/internals/diffnode-enrichment.adoc +611 -0
- data/docs/internals/index.adoc +251 -0
- data/docs/lychee.toml +13 -6
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +250 -0
- data/docs/understanding/architecture.adoc +749 -33
- data/docs/understanding/comparison-pipeline.adoc +122 -0
- data/false_positive_analysis.txt +0 -0
- data/file1.html +1 -0
- data/file2.html +1 -0
- data/lib/canon/cache.rb +129 -0
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
- data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
- data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
- data/lib/canon/comparison/dimensions/registry.rb +77 -0
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
- data/lib/canon/comparison/dimensions.rb +54 -0
- data/lib/canon/comparison/format_detector.rb +86 -0
- data/lib/canon/comparison/html_comparator.rb +51 -18
- data/lib/canon/comparison/html_parser.rb +80 -0
- data/lib/canon/comparison/json_comparator.rb +12 -0
- data/lib/canon/comparison/json_parser.rb +19 -0
- data/lib/canon/comparison/markup_comparator.rb +293 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +143 -0
- data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
- data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
- data/lib/canon/comparison/match_options.rb +68 -463
- data/lib/canon/comparison/profile_definition.rb +149 -0
- data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +189 -0
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +74 -0
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +95 -0
- data/lib/canon/comparison/xml_comparator.rb +52 -664
- data/lib/canon/comparison/xml_node_comparison.rb +297 -0
- data/lib/canon/comparison/xml_parser.rb +19 -0
- data/lib/canon/comparison/yaml_comparator.rb +3 -3
- data/lib/canon/comparison.rb +265 -110
- data/lib/canon/diff/diff_node.rb +32 -2
- data/lib/canon/diff/node_serializer.rb +191 -0
- data/lib/canon/diff/path_builder.rb +143 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
- data/lib/canon/diff_formatter.rb +1 -1
- data/lib/canon/rspec_matchers.rb +1 -1
- data/lib/canon/tree_diff/operation_converter.rb +92 -338
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
- data/lib/canon/version.rb +1 -1
- data/old-docs/ADVANCED_TOPICS.adoc +20 -0
- data/old-docs/BASIC_USAGE.adoc +16 -0
- data/old-docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/old-docs/CLI.adoc +497 -0
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/old-docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/old-docs/DIFF_FORMATTING.adoc +540 -0
- data/old-docs/DIFF_PARAMETERS.adoc +261 -0
- data/old-docs/DOM_DIFF.adoc +1017 -0
- data/old-docs/ENV_CONFIG.adoc +876 -0
- data/old-docs/FORMATS.adoc +867 -0
- data/old-docs/INPUT_VALIDATION.adoc +477 -0
- data/old-docs/MATCHER_BEHAVIOR.adoc +90 -0
- data/old-docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/old-docs/MATCH_OPTIONS.adoc +912 -0
- data/old-docs/MODES.adoc +432 -0
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/old-docs/OPTIONS.adoc +1387 -0
- data/old-docs/PREPROCESSING.adoc +491 -0
- data/old-docs/README.old.adoc +2831 -0
- data/old-docs/RSPEC.adoc +814 -0
- data/old-docs/RUBY_API.adoc +485 -0
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +646 -0
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +765 -0
- data/old-docs/STRING_COMPARE.adoc +345 -0
- data/old-docs/TMP.adoc +3384 -0
- data/old-docs/TREE_DIFF.adoc +1080 -0
- data/old-docs/UNDERSTANDING_CANON.adoc +17 -0
- data/old-docs/VERBOSE.adoc +482 -0
- data/old-docs/VISUALIZATION_MAP.adoc +625 -0
- data/old-docs/WHITESPACE_TREATMENT.adoc +1155 -0
- data/scripts/analyze_current_state.rb +85 -0
- data/scripts/analyze_false_positives.rb +114 -0
- data/scripts/analyze_remaining_failures.rb +105 -0
- data/scripts/compare_current_failures.rb +95 -0
- data/scripts/compare_dom_tree_diff.rb +158 -0
- data/scripts/compare_failures.rb +151 -0
- data/scripts/debug_attribute_extraction.rb +66 -0
- data/scripts/debug_blocks_839.rb +115 -0
- data/scripts/debug_meta_matching.rb +52 -0
- data/scripts/debug_p_matching.rb +192 -0
- data/scripts/debug_signature_matching.rb +118 -0
- data/scripts/debug_sourcecode_124.rb +32 -0
- data/scripts/debug_whitespace_sensitive.rb +192 -0
- data/scripts/extract_false_positives.rb +138 -0
- data/scripts/find_actual_false_positives.rb +125 -0
- data/scripts/investigate_all_false_positives.rb +161 -0
- data/scripts/investigate_batch1.rb +127 -0
- data/scripts/investigate_classification.rb +150 -0
- data/scripts/investigate_classification_detailed.rb +190 -0
- data/scripts/investigate_common_failures.rb +342 -0
- data/scripts/investigate_false_negative.rb +80 -0
- data/scripts/investigate_false_positive.rb +83 -0
- data/scripts/investigate_false_positives.rb +227 -0
- data/scripts/investigate_false_positives_batch.rb +163 -0
- data/scripts/investigate_mixed_content.rb +125 -0
- data/scripts/investigate_remaining_16.rb +214 -0
- data/scripts/run_single_test.rb +29 -0
- data/scripts/test_all_false_positives.rb +95 -0
- data/scripts/test_attribute_details.rb +61 -0
- data/scripts/test_both_algorithms.rb +49 -0
- data/scripts/test_both_simple.rb +49 -0
- data/scripts/test_enhanced_semantic_output.rb +125 -0
- data/scripts/test_readme_examples.rb +131 -0
- data/scripts/test_semantic_tree_diff.rb +99 -0
- data/scripts/test_semantic_ux_improvements.rb +135 -0
- data/scripts/test_single_false_positive.rb +119 -0
- data/scripts/test_size_limits.rb +99 -0
- data/test_html_1.html +21 -0
- data/test_html_2.html +21 -0
- data/test_nokogiri.rb +33 -0
- data/test_normalize.rb +45 -0
- metadata +123 -2
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
module XmlComparatorHelpers
|
|
6
|
+
# Child comparison service for XML nodes
|
|
7
|
+
#
|
|
8
|
+
# Handles comparison of child nodes using both semantic matching (ElementMatcher)
|
|
9
|
+
# and simple positional comparison. Delegates back to the comparator for
|
|
10
|
+
# individual node comparisons.
|
|
11
|
+
#
|
|
12
|
+
# This module encapsulates the complex child comparison logic, making the
|
|
13
|
+
# main XmlComparator cleaner and more maintainable.
|
|
14
|
+
module ChildComparison
|
|
15
|
+
class << self
|
|
16
|
+
# Compare children of two nodes using semantic matching
|
|
17
|
+
#
|
|
18
|
+
# Uses ElementMatcher to pair children semantically (by identity attributes
|
|
19
|
+
# or position), then compares matched pairs and detects position changes.
|
|
20
|
+
#
|
|
21
|
+
# @param node1 [Object] First parent node
|
|
22
|
+
# @param node2 [Object] Second parent node
|
|
23
|
+
# @param comparator [XmlComparator] The comparator instance for delegation
|
|
24
|
+
# @param opts [Hash] Comparison options
|
|
25
|
+
# @param child_opts [Hash] Options for child comparison
|
|
26
|
+
# @param diff_children [Boolean] Whether to diff children
|
|
27
|
+
# @param differences [Array] Array to collect differences
|
|
28
|
+
# @return [Integer] Comparison result code
|
|
29
|
+
def compare(node1, node2, comparator, opts, child_opts, diff_children, differences)
|
|
30
|
+
children1 = comparator.send(:filter_children, node1.children, opts)
|
|
31
|
+
children2 = comparator.send(:filter_children, node2.children, opts)
|
|
32
|
+
|
|
33
|
+
# Quick check: if both have no children, they're equivalent
|
|
34
|
+
return Comparison::EQUIVALENT if children1.empty? && children2.empty?
|
|
35
|
+
|
|
36
|
+
# Check if we can use ElementMatcher (requires Canon::Xml::DataModel nodes)
|
|
37
|
+
if can_use_element_matcher?(children1, children2)
|
|
38
|
+
use_element_matcher_comparison(children1, children2, node1, comparator,
|
|
39
|
+
opts, child_opts, diff_children, differences)
|
|
40
|
+
else
|
|
41
|
+
use_positional_comparison(children1, children2, node1, comparator,
|
|
42
|
+
opts, child_opts, diff_children, differences)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
# Check if ElementMatcher can be used for these children
|
|
49
|
+
#
|
|
50
|
+
# ElementMatcher expects Canon::Xml::DataModel nodes with .node_type
|
|
51
|
+
# method that returns symbols, and only works with element nodes.
|
|
52
|
+
def can_use_element_matcher?(children1, children2)
|
|
53
|
+
!children1.empty? && !children2.empty? &&
|
|
54
|
+
children1.all? { |c| c.is_a?(Canon::Xml::Node) && c.node_type == :element } &&
|
|
55
|
+
children2.all? { |c| c.is_a?(Canon::Xml::Node) && c.node_type == :element }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Use ElementMatcher for semantic comparison
|
|
59
|
+
def use_element_matcher_comparison(children1, children2, parent_node, comparator,
|
|
60
|
+
opts, child_opts, diff_children, differences)
|
|
61
|
+
require_relative "../../xml/element_matcher"
|
|
62
|
+
require_relative "../../xml/nodes/root_node"
|
|
63
|
+
|
|
64
|
+
# Create temporary RootNode wrappers
|
|
65
|
+
temp_root1 = Canon::Xml::Nodes::RootNode.new
|
|
66
|
+
temp_root1.instance_variable_set(:@children, children1.dup)
|
|
67
|
+
|
|
68
|
+
temp_root2 = Canon::Xml::Nodes::RootNode.new
|
|
69
|
+
temp_root2.instance_variable_set(:@children, children2.dup)
|
|
70
|
+
|
|
71
|
+
matcher = Canon::Xml::ElementMatcher.new
|
|
72
|
+
matches = matcher.match_trees(temp_root1, temp_root2)
|
|
73
|
+
|
|
74
|
+
# Filter matches to only include direct children
|
|
75
|
+
matches = matches.select do |m|
|
|
76
|
+
(m.elem1.nil? || children1.include?(m.elem1)) &&
|
|
77
|
+
(m.elem2.nil? || children2.include?(m.elem2))
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# If no matches and children exist, they're all different
|
|
81
|
+
if matches.empty? && (!children1.empty? || !children2.empty?)
|
|
82
|
+
comparator.send(:add_difference, parent_node, parent_node,
|
|
83
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
84
|
+
:text_content, opts, differences)
|
|
85
|
+
return Comparison::UNEQUAL_ELEMENTS
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
process_matches(matches, children1, children2, parent_node, comparator,
|
|
89
|
+
opts, child_opts, diff_children, differences)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Process ElementMatcher results
|
|
93
|
+
def process_matches(matches, _children1, _children2, _parent_node, comparator,
|
|
94
|
+
opts, child_opts, diff_children, differences)
|
|
95
|
+
all_equivalent = true
|
|
96
|
+
|
|
97
|
+
matches.each do |match|
|
|
98
|
+
case match.status
|
|
99
|
+
when :matched
|
|
100
|
+
# Check if element position changed
|
|
101
|
+
if match.position_changed?
|
|
102
|
+
match_opts = opts[:match_opts]
|
|
103
|
+
position_behavior = match_opts[:element_position] || :strict
|
|
104
|
+
|
|
105
|
+
# Only create DiffNode if element_position is not :ignore
|
|
106
|
+
if position_behavior != :ignore
|
|
107
|
+
comparator.send(:add_difference, match.elem1, match.elem2,
|
|
108
|
+
"position #{match.pos1}", "position #{match.pos2}",
|
|
109
|
+
:element_position, opts, differences)
|
|
110
|
+
all_equivalent = false if position_behavior == :strict
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Compare the matched elements for content/attribute differences
|
|
115
|
+
result = comparator.send(:compare_nodes, match.elem1, match.elem2,
|
|
116
|
+
child_opts, child_opts, diff_children, differences)
|
|
117
|
+
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
118
|
+
|
|
119
|
+
when :deleted
|
|
120
|
+
# Element present in first tree but not second
|
|
121
|
+
comparator.send(:add_difference, match.elem1, nil,
|
|
122
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
123
|
+
:element_structure, opts, differences)
|
|
124
|
+
all_equivalent = false
|
|
125
|
+
|
|
126
|
+
when :inserted
|
|
127
|
+
# Element present in second tree but not first
|
|
128
|
+
comparator.send(:add_difference, nil, match.elem2,
|
|
129
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
130
|
+
:element_structure, opts, differences)
|
|
131
|
+
all_equivalent = false
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ELEMENTS
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Use simple positional comparison for children
|
|
139
|
+
def use_positional_comparison(children1, children2, parent_node, comparator,
|
|
140
|
+
opts, child_opts, diff_children, differences)
|
|
141
|
+
# Length check
|
|
142
|
+
unless children1.length == children2.length
|
|
143
|
+
dimension = determine_dimension_for_mismatch(children1, children2, comparator)
|
|
144
|
+
comparator.send(:add_difference, parent_node, parent_node,
|
|
145
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
146
|
+
dimension, opts, differences)
|
|
147
|
+
return Comparison::MISSING_NODE
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Compare children pairwise by position
|
|
151
|
+
result = Comparison::EQUIVALENT
|
|
152
|
+
children1.zip(children2).each do |child1, child2|
|
|
153
|
+
child_result = comparator.send(:compare_nodes, child1, child2,
|
|
154
|
+
child_opts, child_opts, diff_children, differences)
|
|
155
|
+
result = child_result unless child_result == Comparison::EQUIVALENT
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
result
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Determine dimension for length mismatch
|
|
162
|
+
def determine_dimension_for_mismatch(children1, children2, comparator)
|
|
163
|
+
dimension = :text_content # default
|
|
164
|
+
|
|
165
|
+
# Compare position by position to find first difference
|
|
166
|
+
max_len = [children1.length, children2.length].max
|
|
167
|
+
(0...max_len).each do |i|
|
|
168
|
+
if i >= children1.length
|
|
169
|
+
# Extra child in children2
|
|
170
|
+
dimension = comparator.send(:determine_node_dimension, children2[i])
|
|
171
|
+
break
|
|
172
|
+
elsif i >= children2.length
|
|
173
|
+
# Extra child in children1
|
|
174
|
+
dimension = comparator.send(:determine_node_dimension, children1[i])
|
|
175
|
+
break
|
|
176
|
+
elsif !comparator.send(:same_node_type?, children1[i], children2[i])
|
|
177
|
+
# Different node types at same position
|
|
178
|
+
dimension = comparator.send(:determine_node_dimension, children1[i])
|
|
179
|
+
break
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
dimension
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../diff/diff_node"
|
|
4
|
+
require_relative "../../diff/path_builder"
|
|
5
|
+
require_relative "../../diff/node_serializer"
|
|
6
|
+
|
|
7
|
+
module Canon
|
|
8
|
+
module Comparison
|
|
9
|
+
# Builder for creating enriched DiffNode objects
|
|
10
|
+
# Handles path building, serialization, and attribute extraction
|
|
11
|
+
class DiffNodeBuilder
|
|
12
|
+
# Build an enriched DiffNode
|
|
13
|
+
#
|
|
14
|
+
# @param node1 [Object, nil] First node
|
|
15
|
+
# @param node2 [Object, nil] Second node
|
|
16
|
+
# @param diff1 [String] Difference type for node1
|
|
17
|
+
# @param diff2 [String] Difference type for node2
|
|
18
|
+
# @param dimension [Symbol] The match dimension causing this difference
|
|
19
|
+
# @return [DiffNode, nil] Enriched DiffNode or nil if dimension is nil
|
|
20
|
+
def self.build(node1:, node2:, diff1:, diff2:, dimension:, **_opts)
|
|
21
|
+
# Validate dimension is required
|
|
22
|
+
if dimension.nil?
|
|
23
|
+
raise ArgumentError,
|
|
24
|
+
"dimension required for DiffNode"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Build informative reason message
|
|
28
|
+
reason = build_reason(node1, node2, diff1, diff2, dimension)
|
|
29
|
+
|
|
30
|
+
# Enrich with path, serialized content, and attributes for Stage 4 rendering
|
|
31
|
+
metadata = enrich_metadata(node1, node2)
|
|
32
|
+
|
|
33
|
+
Canon::Diff::DiffNode.new(
|
|
34
|
+
node1: node1,
|
|
35
|
+
node2: node2,
|
|
36
|
+
dimension: dimension,
|
|
37
|
+
reason: reason,
|
|
38
|
+
**metadata,
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Build a human-readable reason for a difference
|
|
43
|
+
#
|
|
44
|
+
# @param node1 [Object] First node
|
|
45
|
+
# @param node2 [Object] Second node
|
|
46
|
+
# @param diff1 [String] Difference type for node1
|
|
47
|
+
# @param diff2 [String] Difference type for node2
|
|
48
|
+
# @param dimension [Symbol] The dimension of the difference
|
|
49
|
+
# @return [String] Human-readable reason
|
|
50
|
+
def self.build_reason(node1, node2, diff1, diff2, dimension)
|
|
51
|
+
# For deleted/inserted nodes, include namespace information if available
|
|
52
|
+
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
53
|
+
node = node1 || node2
|
|
54
|
+
if node.respond_to?(:name) && node.respond_to?(:namespace_uri)
|
|
55
|
+
ns = node.namespace_uri
|
|
56
|
+
ns_info = if ns.nil? || ns.empty?
|
|
57
|
+
""
|
|
58
|
+
else
|
|
59
|
+
" (namespace: #{ns})"
|
|
60
|
+
end
|
|
61
|
+
return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
"#{diff1} vs #{diff2}"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Enrich DiffNode with canonical path, serialized content, and attributes
|
|
69
|
+
# This extracts presentation-ready metadata from nodes for Stage 4 rendering
|
|
70
|
+
#
|
|
71
|
+
# @param node1 [Object, nil] First node
|
|
72
|
+
# @param node2 [Object, nil] Second node
|
|
73
|
+
# @return [Hash] Enriched metadata hash
|
|
74
|
+
def self.enrich_metadata(node1, node2)
|
|
75
|
+
{
|
|
76
|
+
path: build_path(node1 || node2),
|
|
77
|
+
serialized_before: serialize(node1),
|
|
78
|
+
serialized_after: serialize(node2),
|
|
79
|
+
attributes_before: extract_attributes(node1),
|
|
80
|
+
attributes_after: extract_attributes(node2),
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Build canonical path for a node
|
|
85
|
+
#
|
|
86
|
+
# @param node [Object] Node to build path for
|
|
87
|
+
# @return [String, nil] Canonical path with ordinal indices
|
|
88
|
+
def self.build_path(node)
|
|
89
|
+
return nil if node.nil?
|
|
90
|
+
|
|
91
|
+
Canon::Diff::PathBuilder.build(node, format: :document)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Serialize a node to string for display
|
|
95
|
+
#
|
|
96
|
+
# @param node [Object, nil] Node to serialize
|
|
97
|
+
# @return [String, nil] Serialized content
|
|
98
|
+
def self.serialize(node)
|
|
99
|
+
return nil if node.nil?
|
|
100
|
+
|
|
101
|
+
Canon::Diff::NodeSerializer.serialize(node)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Extract attributes from a node as a normalized hash
|
|
105
|
+
#
|
|
106
|
+
# @param node [Object, nil] Node to extract attributes from
|
|
107
|
+
# @return [Hash, nil] Normalized attributes hash
|
|
108
|
+
def self.extract_attributes(node)
|
|
109
|
+
return nil if node.nil?
|
|
110
|
+
|
|
111
|
+
Canon::Diff::NodeSerializer.extract_attributes(node)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
module XmlComparatorHelpers
|
|
6
|
+
# Namespace declaration comparison logic
|
|
7
|
+
# Handles comparison of xmlns and xmlns:* attributes
|
|
8
|
+
class NamespaceComparator
|
|
9
|
+
# Compare namespace declarations between two nodes
|
|
10
|
+
#
|
|
11
|
+
# @param node1 [Object] First node
|
|
12
|
+
# @param node2 [Object] Second node
|
|
13
|
+
# @param opts [Hash] Comparison options
|
|
14
|
+
# @param differences [Array] Array to append differences to
|
|
15
|
+
# @return [Symbol] Comparison result
|
|
16
|
+
def self.compare(node1, node2, opts, differences)
|
|
17
|
+
ns_decls1 = extract_declarations(node1)
|
|
18
|
+
ns_decls2 = extract_declarations(node2)
|
|
19
|
+
|
|
20
|
+
# Find missing, extra, and changed namespace declarations
|
|
21
|
+
missing = ns_decls1.keys - ns_decls2.keys # In node1 but not node2
|
|
22
|
+
extra = ns_decls2.keys - ns_decls1.keys # In node2 but not node1
|
|
23
|
+
changed = ns_decls1.select do |prefix, uri|
|
|
24
|
+
ns_decls2[prefix] && ns_decls2[prefix] != uri
|
|
25
|
+
end.keys
|
|
26
|
+
|
|
27
|
+
# If there are any differences, create a DiffNode
|
|
28
|
+
if missing.any? || extra.any? || changed.any?
|
|
29
|
+
add_namespace_difference(node1, node2, missing, extra, changed,
|
|
30
|
+
opts, differences)
|
|
31
|
+
return Comparison::UNEQUAL_ATTRIBUTES
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
Comparison::EQUIVALENT
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Extract namespace declarations from a node
|
|
38
|
+
#
|
|
39
|
+
# @param node [Object] Node to extract namespace declarations from
|
|
40
|
+
# @return [Hash] Hash of prefix => URI mappings
|
|
41
|
+
def self.extract_declarations(node)
|
|
42
|
+
declarations = {}
|
|
43
|
+
|
|
44
|
+
# Handle Canon::Xml::Node (uses namespace_nodes)
|
|
45
|
+
if node.respond_to?(:namespace_nodes)
|
|
46
|
+
return extract_from_namespace_nodes(node.namespace_nodes,
|
|
47
|
+
declarations)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Handle Nokogiri/Moxml nodes (use attributes)
|
|
51
|
+
raw_attrs = node.respond_to?(:attribute_nodes) ? node.attribute_nodes : node.attributes
|
|
52
|
+
|
|
53
|
+
# Handle Canon::Xml::Node attribute format (array of AttributeNode)
|
|
54
|
+
if raw_attrs.is_a?(Array)
|
|
55
|
+
extract_from_array_attributes(raw_attrs, declarations)
|
|
56
|
+
else
|
|
57
|
+
# Handle Nokogiri and Moxml attribute formats (Hash-like)
|
|
58
|
+
extract_from_hash_attributes(raw_attrs, declarations)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
declarations
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Extract from Canon::Xml::Node namespace_nodes
|
|
65
|
+
#
|
|
66
|
+
# @param namespace_nodes [Array] Array of NamespaceNode objects
|
|
67
|
+
# @param declarations [Hash] Output hash to populate
|
|
68
|
+
# @return [Hash] Declarations hash
|
|
69
|
+
def self.extract_from_namespace_nodes(namespace_nodes, declarations)
|
|
70
|
+
namespace_nodes.each do |ns|
|
|
71
|
+
# Skip the implicit xml namespace (always present)
|
|
72
|
+
next if ns.prefix == "xml" && ns.uri == "http://www.w3.org/XML/1998/namespace"
|
|
73
|
+
|
|
74
|
+
prefix = ns.prefix || ""
|
|
75
|
+
declarations[prefix] = ns.uri
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
declarations
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Extract from array-format attributes
|
|
82
|
+
#
|
|
83
|
+
# @param raw_attrs [Array] Array of AttributeNode objects
|
|
84
|
+
# @param declarations [Hash] Output hash to populate
|
|
85
|
+
# @return [Hash] Declarations hash
|
|
86
|
+
def self.extract_from_array_attributes(raw_attrs, declarations)
|
|
87
|
+
raw_attrs.each do |attr|
|
|
88
|
+
name = attr.name
|
|
89
|
+
value = attr.value
|
|
90
|
+
|
|
91
|
+
if namespace_declaration?(name)
|
|
92
|
+
# Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
|
|
93
|
+
prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
|
|
94
|
+
declarations[prefix] = value
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
declarations
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Extract from hash-format attributes
|
|
102
|
+
#
|
|
103
|
+
# @param raw_attrs [Hash] Hash-like attributes
|
|
104
|
+
# @param declarations [Hash] Output hash to populate
|
|
105
|
+
# @return [Hash] Declarations hash
|
|
106
|
+
def self.extract_from_hash_attributes(raw_attrs, declarations)
|
|
107
|
+
raw_attrs.each do |key, val|
|
|
108
|
+
# Normalize key and value
|
|
109
|
+
name = if key.is_a?(String)
|
|
110
|
+
# Nokogiri format: key=name (String), val=attr object
|
|
111
|
+
key
|
|
112
|
+
else
|
|
113
|
+
# Moxml format: key=attr object, val=nil
|
|
114
|
+
key.respond_to?(:name) ? key.name : key.to_s
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
if namespace_declaration?(name)
|
|
118
|
+
value = if val.respond_to?(:value)
|
|
119
|
+
val.value
|
|
120
|
+
else
|
|
121
|
+
val.to_s
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
|
|
125
|
+
prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
|
|
126
|
+
declarations[prefix] = value
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
declarations
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Check if an attribute name is a namespace declaration
|
|
134
|
+
#
|
|
135
|
+
# @param attr_name [String] Attribute name
|
|
136
|
+
# @return [Boolean] true if it's a namespace declaration
|
|
137
|
+
def self.namespace_declaration?(attr_name)
|
|
138
|
+
attr_name == "xmlns" || attr_name.start_with?("xmlns:")
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Add a namespace declaration difference
|
|
142
|
+
#
|
|
143
|
+
# @param node1 [Object] First node
|
|
144
|
+
# @param node2 [Object] Second node
|
|
145
|
+
# @param missing [Array] Missing prefixes
|
|
146
|
+
# @param extra [Array] Extra prefixes
|
|
147
|
+
# @param changed [Array] Changed prefixes
|
|
148
|
+
# @param opts [Hash] Options
|
|
149
|
+
# @param differences [Array] Array to append difference to
|
|
150
|
+
def self.add_namespace_difference(node1, node2, missing, extra,
|
|
151
|
+
changed, opts, differences)
|
|
152
|
+
# Build a descriptive reason
|
|
153
|
+
reasons = []
|
|
154
|
+
if missing.any?
|
|
155
|
+
reasons << "removed: #{missing.map do |p|
|
|
156
|
+
p.empty? ? 'xmlns' : "xmlns:#{p}"
|
|
157
|
+
end.join(', ')}"
|
|
158
|
+
end
|
|
159
|
+
if extra.any?
|
|
160
|
+
reasons << "added: #{extra.map do |p|
|
|
161
|
+
p.empty? ? 'xmlns' : "xmlns:#{p}"
|
|
162
|
+
end.join(', ')}"
|
|
163
|
+
end
|
|
164
|
+
if changed.any?
|
|
165
|
+
reasons << "changed: #{changed.map do |p|
|
|
166
|
+
p.empty? ? 'xmlns' : "xmlns:#{p}"
|
|
167
|
+
end.join(', ')}"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Import DiffNodeBuilder to avoid circular dependency
|
|
171
|
+
require_relative "diff_node_builder"
|
|
172
|
+
|
|
173
|
+
diff_node = DiffNodeBuilder.build(
|
|
174
|
+
node1: node1,
|
|
175
|
+
node2: node2,
|
|
176
|
+
diff1: Comparison::UNEQUAL_ATTRIBUTES,
|
|
177
|
+
diff2: Comparison::UNEQUAL_ATTRIBUTES,
|
|
178
|
+
dimension: :namespace_declarations,
|
|
179
|
+
**opts,
|
|
180
|
+
)
|
|
181
|
+
differences << diff_node if diff_node
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../xml/c14n"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Comparison
|
|
7
|
+
module XmlComparatorHelpers
|
|
8
|
+
# Node parser with preprocessing support
|
|
9
|
+
# Handles conversion of strings and various node types to Canon::Xml::Node
|
|
10
|
+
class NodeParser
|
|
11
|
+
# Parse a node from string or return as-is
|
|
12
|
+
# Applies preprocessing transformation before parsing if specified
|
|
13
|
+
#
|
|
14
|
+
# @param node [String, Object] Node to parse
|
|
15
|
+
# @param preprocessing [Symbol] Preprocessing mode (:none, :normalize, :c14n, :format)
|
|
16
|
+
# @return [Canon::Xml::Node] Parsed node
|
|
17
|
+
def self.parse(node, preprocessing = :none)
|
|
18
|
+
# If already a Canon::Xml::Node, return as-is
|
|
19
|
+
return node if node.is_a?(Canon::Xml::Node)
|
|
20
|
+
|
|
21
|
+
# If it's a Nokogiri or Moxml node, convert to DataModel
|
|
22
|
+
unless node.is_a?(String)
|
|
23
|
+
return convert_from_node(node)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Apply preprocessing to XML string before parsing
|
|
27
|
+
xml_string = apply_preprocessing(node, preprocessing)
|
|
28
|
+
|
|
29
|
+
# Use Canon::Xml::DataModel for parsing to get Canon::Xml::Node instances
|
|
30
|
+
Canon::Xml::DataModel.from_xml(xml_string)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Apply preprocessing transformation to XML string
|
|
34
|
+
#
|
|
35
|
+
# @param xml_string [String] XML string to preprocess
|
|
36
|
+
# @param preprocessing [Symbol] Preprocessing mode
|
|
37
|
+
# @return [String] Preprocessed XML string
|
|
38
|
+
def self.apply_preprocessing(xml_string, preprocessing)
|
|
39
|
+
case preprocessing
|
|
40
|
+
when :normalize
|
|
41
|
+
# Normalize whitespace: collapse runs, trim lines
|
|
42
|
+
xml_string.lines.map(&:strip).reject(&:empty?).join("\n")
|
|
43
|
+
when :c14n
|
|
44
|
+
# Canonicalize the XML
|
|
45
|
+
Canon::Xml::C14n.canonicalize(xml_string, with_comments: false)
|
|
46
|
+
when :format
|
|
47
|
+
# Pretty format the XML
|
|
48
|
+
Canon.format(xml_string, :xml)
|
|
49
|
+
else
|
|
50
|
+
# :none or unrecognized - use as-is
|
|
51
|
+
xml_string
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Convert from Nokogiri/Moxml node to Canon::Xml::Node
|
|
56
|
+
#
|
|
57
|
+
# @param node [Object] Nokogiri or Moxml node
|
|
58
|
+
# @return [Canon::Xml::Node] Converted node
|
|
59
|
+
def self.convert_from_node(node)
|
|
60
|
+
# Convert to XML string then parse through DataModel
|
|
61
|
+
xml_str = if node.respond_to?(:to_xml)
|
|
62
|
+
node.to_xml
|
|
63
|
+
elsif node.respond_to?(:to_s)
|
|
64
|
+
node.to_s
|
|
65
|
+
else
|
|
66
|
+
raise Canon::Error,
|
|
67
|
+
"Unable to convert node to string: #{node.class}"
|
|
68
|
+
end
|
|
69
|
+
Canon::Xml::DataModel.from_xml(xml_str)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
module XmlComparatorHelpers
|
|
6
|
+
# Node type comparison strategy for XML nodes
|
|
7
|
+
#
|
|
8
|
+
# Handles dispatching comparison logic based on node type.
|
|
9
|
+
# Supports both Canon::Xml::Node (with symbolic node_type) and
|
|
10
|
+
# Moxml/Nokogiri nodes (with predicate methods like element?, text?, etc.)
|
|
11
|
+
#
|
|
12
|
+
# This module encapsulates the complex node type detection and dispatch
|
|
13
|
+
# logic, making the main XmlComparator cleaner and more maintainable.
|
|
14
|
+
module NodeTypeComparator
|
|
15
|
+
class << self
|
|
16
|
+
# Compare two nodes by dispatching to appropriate comparison method
|
|
17
|
+
#
|
|
18
|
+
# @param node1 [Object] First node
|
|
19
|
+
# @param node2 [Object] Second node
|
|
20
|
+
# @param comparator [XmlComparator] The comparator instance for method delegation
|
|
21
|
+
# @param opts [Hash] Comparison options
|
|
22
|
+
# @param child_opts [Hash] Options for child comparison
|
|
23
|
+
# @param diff_children [Boolean] Whether to diff children
|
|
24
|
+
# @param differences [Array] Array to collect differences
|
|
25
|
+
# @return [Integer] Comparison result code
|
|
26
|
+
def compare(node1, node2, comparator, opts, child_opts, diff_children, differences)
|
|
27
|
+
# Dispatch based on node type
|
|
28
|
+
# Canon::Xml::Node types use .node_type method that returns symbols
|
|
29
|
+
# Nokogiri also has .node_type but returns integers, so check for Symbol
|
|
30
|
+
if node1.respond_to?(:node_type) && node2.respond_to?(:node_type) &&
|
|
31
|
+
node1.node_type.is_a?(Symbol) && node2.node_type.is_a?(Symbol)
|
|
32
|
+
compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
|
|
33
|
+
diff_children, differences)
|
|
34
|
+
# Moxml/Nokogiri types use .element?, .text?, etc. methods
|
|
35
|
+
else
|
|
36
|
+
compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
|
|
37
|
+
diff_children, differences)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Compare nodes using symbolic node_type (Canon::Xml::Node)
|
|
44
|
+
def compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
|
|
45
|
+
diff_children, differences)
|
|
46
|
+
case node1.node_type
|
|
47
|
+
when :root
|
|
48
|
+
comparator.send(:compare_children, node1, node2, opts, child_opts,
|
|
49
|
+
diff_children, differences)
|
|
50
|
+
when :element
|
|
51
|
+
comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
|
|
52
|
+
diff_children, differences)
|
|
53
|
+
when :text
|
|
54
|
+
comparator.send(:compare_text_nodes, node1, node2, opts, differences)
|
|
55
|
+
when :comment
|
|
56
|
+
comparator.send(:compare_comment_nodes, node1, node2, opts, differences)
|
|
57
|
+
when :cdata
|
|
58
|
+
comparator.send(:compare_text_nodes, node1, node2, opts, differences)
|
|
59
|
+
when :processing_instruction
|
|
60
|
+
comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
|
|
61
|
+
differences)
|
|
62
|
+
else
|
|
63
|
+
Comparison::EQUIVALENT
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Compare nodes using predicate methods (Moxml/Nokogiri)
|
|
68
|
+
def compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
|
|
69
|
+
diff_children, differences)
|
|
70
|
+
if node1.respond_to?(:element?) && node1.element?
|
|
71
|
+
comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
|
|
72
|
+
diff_children, differences)
|
|
73
|
+
elsif node1.respond_to?(:text?) && node1.text?
|
|
74
|
+
comparator.send(:compare_text_nodes, node1, node2, opts, differences)
|
|
75
|
+
elsif node1.respond_to?(:comment?) && node1.comment?
|
|
76
|
+
comparator.send(:compare_comment_nodes, node1, node2, opts, differences)
|
|
77
|
+
elsif node1.respond_to?(:cdata?) && node1.cdata?
|
|
78
|
+
comparator.send(:compare_text_nodes, node1, node2, opts, differences)
|
|
79
|
+
elsif node1.respond_to?(:processing_instruction?) &&
|
|
80
|
+
node1.processing_instruction?
|
|
81
|
+
comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
|
|
82
|
+
differences)
|
|
83
|
+
elsif node1.respond_to?(:root)
|
|
84
|
+
# Document node (Moxml/Nokogiri - legacy path)
|
|
85
|
+
comparator.send(:compare_document_nodes, node1, node2, opts, child_opts,
|
|
86
|
+
diff_children, differences)
|
|
87
|
+
else
|
|
88
|
+
Comparison::EQUIVALENT
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|