canon 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +25 -135
- data/README.adoc +13 -13
- data/docs/.lycheeignore +69 -0
- data/docs/advanced/extending-canon.adoc +193 -0
- data/docs/internals/diffnode-enrichment.adoc +611 -0
- data/docs/internals/index.adoc +251 -0
- data/docs/lychee.toml +13 -6
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +250 -0
- data/docs/understanding/architecture.adoc +749 -33
- data/docs/understanding/comparison-pipeline.adoc +122 -0
- data/false_positive_analysis.txt +0 -0
- data/file1.html +1 -0
- data/file2.html +1 -0
- data/lib/canon/cache.rb +129 -0
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
- data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
- data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
- data/lib/canon/comparison/dimensions/registry.rb +77 -0
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
- data/lib/canon/comparison/dimensions.rb +54 -0
- data/lib/canon/comparison/format_detector.rb +86 -0
- data/lib/canon/comparison/html_comparator.rb +51 -18
- data/lib/canon/comparison/html_parser.rb +80 -0
- data/lib/canon/comparison/json_comparator.rb +12 -0
- data/lib/canon/comparison/json_parser.rb +19 -0
- data/lib/canon/comparison/markup_comparator.rb +293 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +143 -0
- data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
- data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
- data/lib/canon/comparison/match_options.rb +68 -463
- data/lib/canon/comparison/profile_definition.rb +149 -0
- data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +189 -0
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +74 -0
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +95 -0
- data/lib/canon/comparison/xml_comparator.rb +52 -664
- data/lib/canon/comparison/xml_node_comparison.rb +297 -0
- data/lib/canon/comparison/xml_parser.rb +19 -0
- data/lib/canon/comparison/yaml_comparator.rb +3 -3
- data/lib/canon/comparison.rb +265 -110
- data/lib/canon/diff/diff_node.rb +32 -2
- data/lib/canon/diff/node_serializer.rb +191 -0
- data/lib/canon/diff/path_builder.rb +143 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
- data/lib/canon/diff_formatter.rb +1 -1
- data/lib/canon/rspec_matchers.rb +1 -1
- data/lib/canon/tree_diff/operation_converter.rb +92 -338
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
- data/lib/canon/version.rb +1 -1
- data/old-docs/ADVANCED_TOPICS.adoc +20 -0
- data/old-docs/BASIC_USAGE.adoc +16 -0
- data/old-docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/old-docs/CLI.adoc +497 -0
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/old-docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/old-docs/DIFF_FORMATTING.adoc +540 -0
- data/old-docs/DIFF_PARAMETERS.adoc +261 -0
- data/old-docs/DOM_DIFF.adoc +1017 -0
- data/old-docs/ENV_CONFIG.adoc +876 -0
- data/old-docs/FORMATS.adoc +867 -0
- data/old-docs/INPUT_VALIDATION.adoc +477 -0
- data/old-docs/MATCHER_BEHAVIOR.adoc +90 -0
- data/old-docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/old-docs/MATCH_OPTIONS.adoc +912 -0
- data/old-docs/MODES.adoc +432 -0
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/old-docs/OPTIONS.adoc +1387 -0
- data/old-docs/PREPROCESSING.adoc +491 -0
- data/old-docs/README.old.adoc +2831 -0
- data/old-docs/RSPEC.adoc +814 -0
- data/old-docs/RUBY_API.adoc +485 -0
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +646 -0
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +765 -0
- data/old-docs/STRING_COMPARE.adoc +345 -0
- data/old-docs/TMP.adoc +3384 -0
- data/old-docs/TREE_DIFF.adoc +1080 -0
- data/old-docs/UNDERSTANDING_CANON.adoc +17 -0
- data/old-docs/VERBOSE.adoc +482 -0
- data/old-docs/VISUALIZATION_MAP.adoc +625 -0
- data/old-docs/WHITESPACE_TREATMENT.adoc +1155 -0
- data/scripts/analyze_current_state.rb +85 -0
- data/scripts/analyze_false_positives.rb +114 -0
- data/scripts/analyze_remaining_failures.rb +105 -0
- data/scripts/compare_current_failures.rb +95 -0
- data/scripts/compare_dom_tree_diff.rb +158 -0
- data/scripts/compare_failures.rb +151 -0
- data/scripts/debug_attribute_extraction.rb +66 -0
- data/scripts/debug_blocks_839.rb +115 -0
- data/scripts/debug_meta_matching.rb +52 -0
- data/scripts/debug_p_matching.rb +192 -0
- data/scripts/debug_signature_matching.rb +118 -0
- data/scripts/debug_sourcecode_124.rb +32 -0
- data/scripts/debug_whitespace_sensitive.rb +192 -0
- data/scripts/extract_false_positives.rb +138 -0
- data/scripts/find_actual_false_positives.rb +125 -0
- data/scripts/investigate_all_false_positives.rb +161 -0
- data/scripts/investigate_batch1.rb +127 -0
- data/scripts/investigate_classification.rb +150 -0
- data/scripts/investigate_classification_detailed.rb +190 -0
- data/scripts/investigate_common_failures.rb +342 -0
- data/scripts/investigate_false_negative.rb +80 -0
- data/scripts/investigate_false_positive.rb +83 -0
- data/scripts/investigate_false_positives.rb +227 -0
- data/scripts/investigate_false_positives_batch.rb +163 -0
- data/scripts/investigate_mixed_content.rb +125 -0
- data/scripts/investigate_remaining_16.rb +214 -0
- data/scripts/run_single_test.rb +29 -0
- data/scripts/test_all_false_positives.rb +95 -0
- data/scripts/test_attribute_details.rb +61 -0
- data/scripts/test_both_algorithms.rb +49 -0
- data/scripts/test_both_simple.rb +49 -0
- data/scripts/test_enhanced_semantic_output.rb +125 -0
- data/scripts/test_readme_examples.rb +131 -0
- data/scripts/test_semantic_tree_diff.rb +99 -0
- data/scripts/test_semantic_ux_improvements.rb +135 -0
- data/scripts/test_single_false_positive.rb +119 -0
- data/scripts/test_size_limits.rb +99 -0
- data/test_html_1.html +21 -0
- data/test_html_2.html +21 -0
- data/test_nokogiri.rb +33 -0
- data/test_normalize.rb +45 -0
- metadata +123 -2
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "match_options"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Comparison
|
|
7
|
+
# Profile definition DSL with full validation
|
|
8
|
+
#
|
|
9
|
+
# Provides a clean, validated way to define custom comparison profiles.
|
|
10
|
+
# Catches errors at definition time with clear, actionable messages.
|
|
11
|
+
#
|
|
12
|
+
# @example Define a custom profile
|
|
13
|
+
# Canon::Comparison.define_profile(:my_custom) do
|
|
14
|
+
# text_content :normalize
|
|
15
|
+
# comments :ignore
|
|
16
|
+
# preprocessing :rendered
|
|
17
|
+
# end
|
|
18
|
+
class ProfileDefinition
|
|
19
|
+
# All valid dimensions for XML/HTML comparison
|
|
20
|
+
# These must match MatchOptions::Xml::MATCH_DIMENSIONS
|
|
21
|
+
VALID_DIMENSIONS = %i[
|
|
22
|
+
text_content
|
|
23
|
+
structural_whitespace
|
|
24
|
+
attribute_presence
|
|
25
|
+
attribute_order
|
|
26
|
+
attribute_values
|
|
27
|
+
element_position
|
|
28
|
+
comments
|
|
29
|
+
].freeze
|
|
30
|
+
|
|
31
|
+
# Behaviors valid for each dimension
|
|
32
|
+
# Maps dimension name to array of valid behavior symbols
|
|
33
|
+
DIMENSION_BEHAVIORS = {
|
|
34
|
+
text_content: %i[strict normalize ignore],
|
|
35
|
+
structural_whitespace: %i[strict normalize ignore],
|
|
36
|
+
attribute_presence: %i[strict ignore],
|
|
37
|
+
attribute_order: %i[strict ignore],
|
|
38
|
+
attribute_values: %i[strict strip compact normalize ignore],
|
|
39
|
+
element_position: %i[strict ignore],
|
|
40
|
+
comments: %i[strict ignore],
|
|
41
|
+
}.freeze
|
|
42
|
+
|
|
43
|
+
attr_reader :name, :settings
|
|
44
|
+
|
|
45
|
+
# Initialize a new profile definition
|
|
46
|
+
#
|
|
47
|
+
# @param name [Symbol] Profile name
|
|
48
|
+
def initialize(name)
|
|
49
|
+
@name = name
|
|
50
|
+
@settings = {}
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Define a profile using DSL syntax
|
|
54
|
+
#
|
|
55
|
+
# @param name [Symbol] Profile name
|
|
56
|
+
# @yield [ProfileDefinition] DSL block for defining profile
|
|
57
|
+
# @return [Hash] Profile settings hash
|
|
58
|
+
# @raise [ProfileError] if profile definition is invalid
|
|
59
|
+
def self.define(name, &block)
|
|
60
|
+
definition = new(name)
|
|
61
|
+
definition.instance_eval(&block) if block
|
|
62
|
+
definition.validate!
|
|
63
|
+
definition.to_h
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Create DSL methods for each dimension
|
|
67
|
+
VALID_DIMENSIONS.each do |dimension|
|
|
68
|
+
define_method(dimension) do |behavior|
|
|
69
|
+
@settings[dimension] = behavior
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Set preprocessing mode
|
|
74
|
+
#
|
|
75
|
+
# @param mode [Symbol] Preprocessing mode
|
|
76
|
+
# @raise [ProfileError] if mode is invalid
|
|
77
|
+
def preprocessing(mode)
|
|
78
|
+
unless MatchOptions::PREPROCESSING_OPTIONS.include?(mode)
|
|
79
|
+
raise ProfileError,
|
|
80
|
+
"Invalid preprocessing mode: #{mode}. " \
|
|
81
|
+
"Valid options: #{MatchOptions::PREPROCESSING_OPTIONS.join(', ')}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
@settings[:preprocessing] = mode
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Enable/disable semantic diff
|
|
88
|
+
#
|
|
89
|
+
# @param enabled [Boolean] Whether to enable semantic diff (default: true)
|
|
90
|
+
def semantic_diff(enabled: true)
|
|
91
|
+
@settings[:semantic_diff] = enabled
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Set similarity threshold for semantic matching
|
|
95
|
+
#
|
|
96
|
+
# @param value [Numeric] Threshold between 0 and 1
|
|
97
|
+
# @raise [ProfileError] if value is out of range
|
|
98
|
+
def similarity_threshold(value)
|
|
99
|
+
unless value.is_a?(Numeric) && value >= 0 && value <= 1
|
|
100
|
+
raise ProfileError,
|
|
101
|
+
"Similarity threshold must be between 0 and 1, got: #{value}"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
@settings[:similarity_threshold] = value
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Validate the profile definition
|
|
108
|
+
#
|
|
109
|
+
# @raise [ProfileError] if profile definition is invalid
|
|
110
|
+
def validate!
|
|
111
|
+
@settings.each do |key, value|
|
|
112
|
+
validate_dimension!(key, value) if VALID_DIMENSIONS.include?(key)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Convert to hash
|
|
117
|
+
#
|
|
118
|
+
# @return [Hash] Profile settings
|
|
119
|
+
def to_h
|
|
120
|
+
@settings.dup
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
private
|
|
124
|
+
|
|
125
|
+
# Validate a dimension setting
|
|
126
|
+
#
|
|
127
|
+
# @param dimension [Symbol] Dimension name
|
|
128
|
+
# @param behavior [Symbol] Behavior value
|
|
129
|
+
# @raise [ProfileError] if dimension or behavior is invalid
|
|
130
|
+
def validate_dimension!(dimension, behavior)
|
|
131
|
+
unless DIMENSION_BEHAVIORS.key?(dimension)
|
|
132
|
+
raise ProfileError,
|
|
133
|
+
"Unknown dimension: #{dimension}. " \
|
|
134
|
+
"Valid dimensions: #{VALID_DIMENSIONS.join(', ')}"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
valid_behaviors = DIMENSION_BEHAVIORS[dimension]
|
|
138
|
+
unless valid_behaviors.include?(behavior)
|
|
139
|
+
raise ProfileError,
|
|
140
|
+
"Invalid behavior '#{behavior}' for dimension '#{dimension}'. " \
|
|
141
|
+
"Valid behaviors: #{valid_behaviors.join(', ')}"
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Custom error for profile definition issues
|
|
147
|
+
class ProfileError < Error; end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
# Ruby Object Comparison Utilities
|
|
6
|
+
#
|
|
7
|
+
# Provides public comparison methods for Ruby objects (Hash, Array, primitives).
|
|
8
|
+
# This module extracts shared comparison logic that was previously
|
|
9
|
+
# accessed via send() from YamlComparator.
|
|
10
|
+
module RubyObjectComparator
|
|
11
|
+
# Compare Ruby objects (Hash, Array, primitives) for JSON/YAML
|
|
12
|
+
#
|
|
13
|
+
# @param obj1 [Object] First object
|
|
14
|
+
# @param obj2 [Object] Second object
|
|
15
|
+
# @param opts [Hash] Comparison options
|
|
16
|
+
# @param differences [Array] Array to append differences to
|
|
17
|
+
# @param path [String] Current path in the object structure
|
|
18
|
+
# @return [Symbol] Comparison result constant
|
|
19
|
+
def self.compare_objects(obj1, obj2, opts, differences, path)
|
|
20
|
+
# Check for type mismatch
|
|
21
|
+
unless obj1.instance_of?(obj2.class)
|
|
22
|
+
add_difference(path, obj1, obj2, Comparison::UNEQUAL_TYPES,
|
|
23
|
+
opts, differences)
|
|
24
|
+
return Comparison::UNEQUAL_TYPES
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
case obj1
|
|
28
|
+
when Hash
|
|
29
|
+
compare_hashes(obj1, obj2, opts, differences, path)
|
|
30
|
+
when Array
|
|
31
|
+
compare_arrays(obj1, obj2, opts, differences, path)
|
|
32
|
+
when NilClass, TrueClass, FalseClass, Numeric, String, Symbol
|
|
33
|
+
compare_primitives(obj1, obj2, opts, differences, path)
|
|
34
|
+
else
|
|
35
|
+
# Fallback to equality comparison
|
|
36
|
+
if obj1 == obj2
|
|
37
|
+
Comparison::EQUIVALENT
|
|
38
|
+
else
|
|
39
|
+
add_difference(path, obj1, obj2,
|
|
40
|
+
Comparison::UNEQUAL_PRIMITIVES, opts,
|
|
41
|
+
differences)
|
|
42
|
+
Comparison::UNEQUAL_PRIMITIVES
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Compare two hashes
|
|
48
|
+
#
|
|
49
|
+
# @param hash1 [Hash] First hash
|
|
50
|
+
# @param hash2 [Hash] Second hash
|
|
51
|
+
# @param opts [Hash] Comparison options
|
|
52
|
+
# @param differences [Array] Array to append differences to
|
|
53
|
+
# @param path [String] Current path in the object structure
|
|
54
|
+
# @return [Symbol] Comparison result constant
|
|
55
|
+
def self.compare_hashes(hash1, hash2, opts, differences, path)
|
|
56
|
+
keys1 = hash1.keys
|
|
57
|
+
keys2 = hash2.keys
|
|
58
|
+
|
|
59
|
+
# Sort keys if order should be ignored (based on match options)
|
|
60
|
+
match_opts = opts[:match_opts]
|
|
61
|
+
if match_opts && match_opts[:key_order] != :strict
|
|
62
|
+
keys1 = keys1.sort_by(&:to_s)
|
|
63
|
+
keys2 = keys2.sort_by(&:to_s)
|
|
64
|
+
elsif match_opts && match_opts[:key_order] == :strict
|
|
65
|
+
# Strict mode: key order matters
|
|
66
|
+
# Check if keys are in same order
|
|
67
|
+
# Keys are different or in different order
|
|
68
|
+
# First check if it's just ordering (same keys, different order)
|
|
69
|
+
if (keys1 != keys2) && (keys1.sort_by(&:to_s) == keys2.sort_by(&:to_s))
|
|
70
|
+
# Same keys, different order - this is a key_order difference
|
|
71
|
+
key_path = path.empty? ? "(key order)" : "#{path}.(key order)"
|
|
72
|
+
add_difference(key_path, keys1, keys2,
|
|
73
|
+
Comparison::UNEQUAL_HASH_KEY_ORDER, opts, differences)
|
|
74
|
+
return Comparison::UNEQUAL_HASH_KEY_ORDER
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Check for missing keys
|
|
79
|
+
missing_in_second = keys1 - keys2
|
|
80
|
+
missing_in_first = keys2 - keys1
|
|
81
|
+
|
|
82
|
+
missing_in_second.each do |key|
|
|
83
|
+
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
84
|
+
add_difference(key_path, hash1[key], nil,
|
|
85
|
+
Comparison::MISSING_HASH_KEY, opts, differences)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
missing_in_first.each do |key|
|
|
89
|
+
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
90
|
+
add_difference(key_path, nil, hash2[key],
|
|
91
|
+
Comparison::MISSING_HASH_KEY, opts, differences)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
has_missing_keys = !missing_in_first.empty? || !missing_in_second.empty?
|
|
95
|
+
|
|
96
|
+
# Compare common keys
|
|
97
|
+
common_keys = keys1 & keys2
|
|
98
|
+
all_equivalent = true
|
|
99
|
+
common_keys.each do |key|
|
|
100
|
+
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
101
|
+
result = compare_objects(hash1[key], hash2[key], opts,
|
|
102
|
+
differences, key_path)
|
|
103
|
+
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Return appropriate status
|
|
107
|
+
return Comparison::MISSING_HASH_KEY if has_missing_keys && all_equivalent
|
|
108
|
+
return Comparison::UNEQUAL_HASH_VALUES unless all_equivalent
|
|
109
|
+
|
|
110
|
+
has_missing_keys ? Comparison::MISSING_HASH_KEY : Comparison::EQUIVALENT
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Compare two arrays
|
|
114
|
+
#
|
|
115
|
+
# @param arr1 [Array] First array
|
|
116
|
+
# @param arr2 [Array] Second array
|
|
117
|
+
# @param opts [Hash] Comparison options
|
|
118
|
+
# @param differences [Array] Array to append differences to
|
|
119
|
+
# @param path [String] Current path in the object structure
|
|
120
|
+
# @return [Symbol] Comparison result constant
|
|
121
|
+
def self.compare_arrays(arr1, arr2, opts, differences, path)
|
|
122
|
+
unless arr1.length == arr2.length
|
|
123
|
+
add_difference(path, arr1, arr2,
|
|
124
|
+
Comparison::UNEQUAL_ARRAY_LENGTHS, opts,
|
|
125
|
+
differences)
|
|
126
|
+
return Comparison::UNEQUAL_ARRAY_LENGTHS
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
all_equivalent = true
|
|
130
|
+
arr1.each_with_index do |elem1, index|
|
|
131
|
+
elem2 = arr2[index]
|
|
132
|
+
elem_path = "#{path}[#{index}]"
|
|
133
|
+
result = compare_objects(elem1, elem2, opts, differences,
|
|
134
|
+
elem_path)
|
|
135
|
+
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ARRAY_ELEMENTS
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Compare primitive values
|
|
142
|
+
#
|
|
143
|
+
# @param val1 [Object] First value
|
|
144
|
+
# @param val2 [Object] Second value
|
|
145
|
+
# @param opts [Hash] Comparison options
|
|
146
|
+
# @param differences [Array] Array to append differences to
|
|
147
|
+
# @param path [String] Current path in the object structure
|
|
148
|
+
# @return [Symbol] Comparison result constant
|
|
149
|
+
def self.compare_primitives(val1, val2, opts, differences, path)
|
|
150
|
+
if val1 == val2
|
|
151
|
+
Comparison::EQUIVALENT
|
|
152
|
+
else
|
|
153
|
+
add_difference(path, val1, val2,
|
|
154
|
+
Comparison::UNEQUAL_PRIMITIVES, opts,
|
|
155
|
+
differences)
|
|
156
|
+
Comparison::UNEQUAL_PRIMITIVES
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Add a Ruby object difference
|
|
161
|
+
#
|
|
162
|
+
# @param path [String] Path to the difference
|
|
163
|
+
# @param obj1 [Object] First object
|
|
164
|
+
# @param obj2 [Object] Second object
|
|
165
|
+
# @param diff_code [Symbol] Difference code
|
|
166
|
+
# @param opts [Hash] Comparison options
|
|
167
|
+
# @param differences [Array] Array to append difference to
|
|
168
|
+
def self.add_difference(path, obj1, obj2, diff_code, opts, differences)
|
|
169
|
+
return unless opts[:verbose]
|
|
170
|
+
|
|
171
|
+
differences << {
|
|
172
|
+
path: path,
|
|
173
|
+
value1: obj1,
|
|
174
|
+
value2: obj2,
|
|
175
|
+
difference: diff_code,
|
|
176
|
+
}
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require_relative "base_match_strategy"
|
|
4
4
|
require_relative "../../tree_diff/tree_diff_integrator"
|
|
5
5
|
require_relative "../../tree_diff/operation_converter"
|
|
6
|
+
require_relative "../xml_node_comparison"
|
|
6
7
|
|
|
7
8
|
module Canon
|
|
8
9
|
module Comparison
|
|
@@ -125,10 +126,9 @@ module Canon
|
|
|
125
126
|
# @return [Array<String>] Preprocessed strings
|
|
126
127
|
def preprocess_xml(doc1, doc2)
|
|
127
128
|
# Serialize XML to string
|
|
128
|
-
# Use
|
|
129
|
+
# Use XmlNodeComparison's serializer for Canon::Xml::Node
|
|
129
130
|
xml1 = if doc1.is_a?(Canon::Xml::Node)
|
|
130
|
-
|
|
131
|
-
XmlComparator.send(:serialize_node_to_xml, doc1)
|
|
131
|
+
XmlNodeComparison.serialize_node_to_xml(doc1)
|
|
132
132
|
elsif doc1.respond_to?(:to_xml)
|
|
133
133
|
doc1.to_xml
|
|
134
134
|
else
|
|
@@ -136,8 +136,7 @@ module Canon
|
|
|
136
136
|
end
|
|
137
137
|
|
|
138
138
|
xml2 = if doc2.is_a?(Canon::Xml::Node)
|
|
139
|
-
|
|
140
|
-
XmlComparator.send(:serialize_node_to_xml, doc2)
|
|
139
|
+
XmlNodeComparison.serialize_node_to_xml(doc2)
|
|
141
140
|
elsif doc2.respond_to?(:to_xml)
|
|
142
141
|
doc2.to_xml
|
|
143
142
|
else
|
|
@@ -161,12 +160,11 @@ module Canon
|
|
|
161
160
|
# @param doc2 [Object] Second HTML document
|
|
162
161
|
# @return [Array<String>] Preprocessed strings
|
|
163
162
|
def preprocess_html(doc1, doc2)
|
|
164
|
-
# For Canon::Xml::Node, use
|
|
163
|
+
# For Canon::Xml::Node, use XmlNodeComparison's serializer
|
|
165
164
|
# For XML::DocumentFragment (from parse_node_as_fragment), use to_s
|
|
166
165
|
# to avoid Nokogiri auto-inserting meta tags during to_html serialization
|
|
167
166
|
html1 = if doc1.is_a?(Canon::Xml::Node)
|
|
168
|
-
|
|
169
|
-
XmlComparator.send(:serialize_node_to_xml, doc1)
|
|
167
|
+
XmlNodeComparison.serialize_node_to_xml(doc1)
|
|
170
168
|
elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
|
|
171
169
|
doc1.to_s
|
|
172
170
|
elsif doc1.respond_to?(:to_html)
|
|
@@ -176,8 +174,7 @@ module Canon
|
|
|
176
174
|
end
|
|
177
175
|
|
|
178
176
|
html2 = if doc2.is_a?(Canon::Xml::Node)
|
|
179
|
-
|
|
180
|
-
XmlComparator.send(:serialize_node_to_xml, doc2)
|
|
177
|
+
XmlNodeComparison.serialize_node_to_xml(doc2)
|
|
181
178
|
elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
|
|
182
179
|
doc2.to_s
|
|
183
180
|
elsif doc2.respond_to?(:to_html)
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
module XmlComparatorHelpers
|
|
6
|
+
# Attribute comparison logic
|
|
7
|
+
# Handles comparison of attribute sets with filtering and ordering
|
|
8
|
+
class AttributeComparator
|
|
9
|
+
# Compare attribute sets between two nodes
|
|
10
|
+
#
|
|
11
|
+
# @param node1 [Object] First node
|
|
12
|
+
# @param node2 [Object] Second node
|
|
13
|
+
# @param opts [Hash] Comparison options
|
|
14
|
+
# @param differences [Array] Array to append differences to
|
|
15
|
+
# @return [Symbol] Comparison result
|
|
16
|
+
def self.compare(node1, node2, opts, differences)
|
|
17
|
+
# Get attributes using the appropriate method for each node type
|
|
18
|
+
raw_attrs1 = node1.respond_to?(:attribute_nodes) ? node1.attribute_nodes : node1.attributes
|
|
19
|
+
raw_attrs2 = node2.respond_to?(:attribute_nodes) ? node2.attribute_nodes : node2.attributes
|
|
20
|
+
|
|
21
|
+
attrs1 = XmlComparatorHelpers::AttributeFilter.filter(raw_attrs1,
|
|
22
|
+
opts)
|
|
23
|
+
attrs2 = XmlComparatorHelpers::AttributeFilter.filter(raw_attrs2,
|
|
24
|
+
opts)
|
|
25
|
+
|
|
26
|
+
match_opts = opts[:match_opts]
|
|
27
|
+
attribute_order_behavior = match_opts[:attribute_order] || :strict
|
|
28
|
+
|
|
29
|
+
# Check attribute order if not ignored
|
|
30
|
+
keys1 = attrs1.keys.map(&:to_s)
|
|
31
|
+
keys2 = attrs2.keys.map(&:to_s)
|
|
32
|
+
|
|
33
|
+
if attribute_order_behavior == :strict
|
|
34
|
+
compare_strict_order(node1, node2, attrs1, attrs2, keys1, keys2, opts,
|
|
35
|
+
differences)
|
|
36
|
+
else
|
|
37
|
+
compare_flexible_order(node1, node2, attrs1, attrs2, keys1, keys2, opts,
|
|
38
|
+
differences)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Compare with strict attribute ordering
|
|
43
|
+
#
|
|
44
|
+
# @param node1 [Object] First node
|
|
45
|
+
# @param node2 [Object] Second node
|
|
46
|
+
# @param attrs1 [Hash] First node's attributes
|
|
47
|
+
# @param attrs2 [Hash] Second node's attributes
|
|
48
|
+
# @param keys1 [Array<String>] First node's attribute keys
|
|
49
|
+
# @param keys2 [Array<String>] Second node's attribute keys
|
|
50
|
+
# @param opts [Hash] Comparison options
|
|
51
|
+
# @param differences [Array] Array to append differences to
|
|
52
|
+
# @return [Symbol] Comparison result
|
|
53
|
+
def self.compare_strict_order(node1, node2, attrs1, attrs2, keys1, keys2, opts,
|
|
54
|
+
differences)
|
|
55
|
+
if keys1 != keys2
|
|
56
|
+
# Keys are different or in different order
|
|
57
|
+
if keys1.sort == keys2.sort
|
|
58
|
+
# Same keys, different order - attribute_order difference
|
|
59
|
+
add_attribute_difference(n1: node1, n2: node2,
|
|
60
|
+
diff1: Comparison::UNEQUAL_ATTRIBUTES,
|
|
61
|
+
diff2: Comparison::UNEQUAL_ATTRIBUTES,
|
|
62
|
+
dimension: :attribute_order,
|
|
63
|
+
opts: opts,
|
|
64
|
+
differences: differences)
|
|
65
|
+
return Comparison::UNEQUAL_ATTRIBUTES
|
|
66
|
+
else
|
|
67
|
+
# Different keys - attribute_presence difference
|
|
68
|
+
add_attribute_difference(n1: node1, n2: node2,
|
|
69
|
+
diff1: Comparison::MISSING_ATTRIBUTE,
|
|
70
|
+
diff2: Comparison::MISSING_ATTRIBUTE,
|
|
71
|
+
dimension: :attribute_presence,
|
|
72
|
+
opts: opts,
|
|
73
|
+
differences: differences)
|
|
74
|
+
return Comparison::MISSING_ATTRIBUTE
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Order matches, check values
|
|
79
|
+
compare_attribute_values(node1, node2, attrs1, attrs2, opts,
|
|
80
|
+
differences)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Compare with flexible attribute ordering
|
|
84
|
+
#
|
|
85
|
+
# @param node1 [Object] First node
|
|
86
|
+
# @param node2 [Object] Second node
|
|
87
|
+
# @param attrs1 [Hash] First node's attributes
|
|
88
|
+
# @param attrs2 [Hash] Second node's attributes
|
|
89
|
+
# @param keys1 [Array<String>] First node's attribute keys
|
|
90
|
+
# @param keys2 [Array<String>] Second node's attribute keys
|
|
91
|
+
# @param opts [Hash] Comparison options
|
|
92
|
+
# @param differences [Array] Array to append differences to
|
|
93
|
+
# @return [Symbol] Comparison result
|
|
94
|
+
def self.compare_flexible_order(node1, node2, attrs1, attrs2, keys1, keys2, opts,
|
|
95
|
+
differences)
|
|
96
|
+
# Check if order differs (but keys are the same) - track as informative
|
|
97
|
+
if keys1 != keys2 && keys1.sort == keys2.sort && opts[:verbose]
|
|
98
|
+
add_attribute_difference(n1: node1, n2: node2,
|
|
99
|
+
diff1: Comparison::UNEQUAL_ATTRIBUTES,
|
|
100
|
+
diff2: Comparison::UNEQUAL_ATTRIBUTES,
|
|
101
|
+
dimension: :attribute_order,
|
|
102
|
+
opts: opts,
|
|
103
|
+
differences: differences)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Sort attributes so order doesn't matter for comparison
|
|
107
|
+
attrs1 = attrs1.sort_by { |k, _v| k.to_s }.to_h
|
|
108
|
+
attrs2 = attrs2.sort_by { |k, _v| k.to_s }.to_h
|
|
109
|
+
|
|
110
|
+
unless attrs1.keys.map(&:to_s).sort == attrs2.keys.map(&:to_s).sort
|
|
111
|
+
add_attribute_difference(n1: node1, n2: node2,
|
|
112
|
+
diff1: Comparison::MISSING_ATTRIBUTE,
|
|
113
|
+
diff2: Comparison::MISSING_ATTRIBUTE,
|
|
114
|
+
dimension: :attribute_presence,
|
|
115
|
+
opts: opts,
|
|
116
|
+
differences: differences)
|
|
117
|
+
return Comparison::MISSING_ATTRIBUTE
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
compare_attribute_values(node1, node2, attrs1, attrs2, opts,
|
|
121
|
+
differences)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Compare attribute values
|
|
125
|
+
#
|
|
126
|
+
# @param node1 [Object] First node
|
|
127
|
+
# @param node2 [Object] Second node
|
|
128
|
+
# @param attrs1 [Hash] First node's attributes
|
|
129
|
+
# @param attrs2 [Hash] Second node's attributes
|
|
130
|
+
# @param opts [Hash] Comparison options
|
|
131
|
+
# @param differences [Array] Array to append differences to
|
|
132
|
+
# @return [Symbol] Comparison result
|
|
133
|
+
def self.compare_attribute_values(node1, node2, attrs1, attrs2, opts,
|
|
134
|
+
differences)
|
|
135
|
+
attrs1.each do |name, value|
|
|
136
|
+
unless attrs2[name] == value
|
|
137
|
+
add_attribute_difference(n1: node1, n2: node2,
|
|
138
|
+
diff1: Comparison::UNEQUAL_ATTRIBUTES,
|
|
139
|
+
diff2: Comparison::UNEQUAL_ATTRIBUTES,
|
|
140
|
+
dimension: :attribute_values,
|
|
141
|
+
opts: opts,
|
|
142
|
+
differences: differences)
|
|
143
|
+
return Comparison::UNEQUAL_ATTRIBUTES
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
Comparison::EQUIVALENT
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Add an attribute difference
|
|
151
|
+
#
|
|
152
|
+
# @param n1 [Object] First node
|
|
153
|
+
# @param n2 [Object] Second node
|
|
154
|
+
# @param diff1 [String] Difference type for node1
|
|
155
|
+
# @param diff2 [String] Difference type for node2
|
|
156
|
+
# @param dimension [Symbol] The match dimension
|
|
157
|
+
# @param opts [Hash] Options
|
|
158
|
+
# @param differences [Array] Array to append difference to
|
|
159
|
+
def self.add_attribute_difference(n1:, n2:, diff1:, diff2:,
|
|
160
|
+
dimension:, differences:, **opts)
|
|
161
|
+
# Import DiffNodeBuilder to avoid circular dependency
|
|
162
|
+
require_relative "diff_node_builder"
|
|
163
|
+
|
|
164
|
+
diff_node = Canon::Comparison::DiffNodeBuilder.build(
|
|
165
|
+
node1: n1,
|
|
166
|
+
node2: n2,
|
|
167
|
+
diff1: diff1,
|
|
168
|
+
diff2: diff2,
|
|
169
|
+
dimension: dimension,
|
|
170
|
+
**opts,
|
|
171
|
+
)
|
|
172
|
+
differences << diff_node if diff_node
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../match_options"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Comparison
|
|
7
|
+
module XmlComparatorHelpers
|
|
8
|
+
# Attribute filtering logic
|
|
9
|
+
# Handles filtering of attributes based on options and match settings
|
|
10
|
+
class AttributeFilter
|
|
11
|
+
# Filter attributes based on options
|
|
12
|
+
#
|
|
13
|
+
# @param attributes [Array, Hash] Raw attributes
|
|
14
|
+
# @param opts [Hash] Comparison options
|
|
15
|
+
# @return [Hash] Filtered attributes
|
|
16
|
+
def self.filter(attributes, opts)
|
|
17
|
+
filtered = {}
|
|
18
|
+
match_opts = opts[:match_opts]
|
|
19
|
+
|
|
20
|
+
# Handle Canon::Xml::Node attribute format (array of AttributeNode)
|
|
21
|
+
if attributes.is_a?(Array)
|
|
22
|
+
filter_array_attributes(attributes, opts, match_opts, filtered)
|
|
23
|
+
else
|
|
24
|
+
# Handle Nokogiri and Moxml attribute formats (Hash-like)
|
|
25
|
+
filter_hash_attributes(attributes, opts, match_opts, filtered)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
filtered
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Filter array-format attributes (Canon::Xml::Node)
|
|
32
|
+
#
|
|
33
|
+
# @param attributes [Array] Array of AttributeNode objects
|
|
34
|
+
# @param opts [Hash] Comparison options
|
|
35
|
+
# @param match_opts [Hash] Resolved match options
|
|
36
|
+
# @param filtered [Hash] Output hash to populate
|
|
37
|
+
def self.filter_array_attributes(attributes, opts, match_opts, filtered)
|
|
38
|
+
attributes.each do |attr|
|
|
39
|
+
name = attr.name
|
|
40
|
+
value = attr.value
|
|
41
|
+
|
|
42
|
+
# Skip namespace declarations - they're handled separately
|
|
43
|
+
next if namespace_declaration?(name)
|
|
44
|
+
|
|
45
|
+
# Skip if attribute name should be ignored
|
|
46
|
+
next if ignore_by_name?(name, opts)
|
|
47
|
+
|
|
48
|
+
# Skip if attribute content should be ignored
|
|
49
|
+
next if ignore_by_content?(value, opts)
|
|
50
|
+
|
|
51
|
+
# Apply match options for attribute values
|
|
52
|
+
behavior = match_opts[:attribute_values] || :strict
|
|
53
|
+
value = MatchOptions.process_attribute_value(value, behavior)
|
|
54
|
+
|
|
55
|
+
filtered[name] = value
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Filter hash-format attributes (Nokogiri/Moxml)
|
|
60
|
+
#
|
|
61
|
+
# @param attributes [Hash] Hash-like attributes
|
|
62
|
+
# @param opts [Hash] Comparison options
|
|
63
|
+
# @param match_opts [Hash] Resolved match options
|
|
64
|
+
# @param filtered [Hash] Output hash to populate
|
|
65
|
+
def self.filter_hash_attributes(attributes, opts, match_opts, filtered)
|
|
66
|
+
attributes.each do |key, val|
|
|
67
|
+
# Normalize key and value
|
|
68
|
+
name, value = normalize_attribute_pair(key, val)
|
|
69
|
+
|
|
70
|
+
# Skip namespace declarations - they're handled separately
|
|
71
|
+
next if namespace_declaration?(name)
|
|
72
|
+
|
|
73
|
+
# Skip if attribute name should be ignored
|
|
74
|
+
next if ignore_by_name?(name, opts)
|
|
75
|
+
|
|
76
|
+
# Skip if attribute content should be ignored
|
|
77
|
+
next if ignore_by_content?(value, opts)
|
|
78
|
+
|
|
79
|
+
# Apply match options for attribute values
|
|
80
|
+
behavior = match_opts[:attribute_values] || :strict
|
|
81
|
+
value = MatchOptions.process_attribute_value(value, behavior)
|
|
82
|
+
|
|
83
|
+
filtered[name] = value
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Normalize attribute key-value pair from different formats
|
|
88
|
+
#
|
|
89
|
+
# @param key [Object] Attribute key (String or Attribute object)
|
|
90
|
+
# @param val [Object] Attribute value
|
|
91
|
+
# @return [Array<String, String>] Normalized [name, value] pair
|
|
92
|
+
def self.normalize_attribute_pair(key, val)
|
|
93
|
+
if key.is_a?(String)
|
|
94
|
+
# Nokogiri format: key=name (String), val=attr object
|
|
95
|
+
name = key
|
|
96
|
+
value = val.respond_to?(:value) ? val.value : val.to_s
|
|
97
|
+
else
|
|
98
|
+
# Moxml format: key=attr object, val=nil
|
|
99
|
+
name = key.respond_to?(:name) ? key.name : key.to_s
|
|
100
|
+
value = key.respond_to?(:value) ? key.value : key.to_s
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
[name, value]
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Check if attribute should be ignored by name
|
|
107
|
+
#
|
|
108
|
+
# @param name [String] Attribute name
|
|
109
|
+
# @param opts [Hash] Comparison options
|
|
110
|
+
# @return [Boolean] true if should ignore
|
|
111
|
+
def self.ignore_by_name?(name, opts)
|
|
112
|
+
opts[:ignore_attrs_by_name].any? { |pattern| name.include?(pattern) }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Check if attribute should be ignored by content
|
|
116
|
+
#
|
|
117
|
+
# @param value [String] Attribute value
|
|
118
|
+
# @param opts [Hash] Comparison options
|
|
119
|
+
# @return [Boolean] true if should ignore
|
|
120
|
+
def self.ignore_by_content?(value, opts)
|
|
121
|
+
opts[:ignore_attr_content].any? do |pattern|
|
|
122
|
+
value.to_s.include?(pattern)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Check if an attribute name is a namespace declaration
|
|
127
|
+
#
|
|
128
|
+
# @param attr_name [String] Attribute name
|
|
129
|
+
# @return [Boolean] true if it's a namespace declaration
|
|
130
|
+
def self.namespace_declaration?(attr_name)
|
|
131
|
+
attr_name == "xmlns" || attr_name.start_with?("xmlns:")
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|