canon 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +25 -135
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/advanced/extending-canon.adoc +193 -0
  6. data/docs/internals/diffnode-enrichment.adoc +611 -0
  7. data/docs/internals/index.adoc +251 -0
  8. data/docs/lychee.toml +13 -6
  9. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +250 -0
  10. data/docs/understanding/architecture.adoc +749 -33
  11. data/docs/understanding/comparison-pipeline.adoc +122 -0
  12. data/false_positive_analysis.txt +0 -0
  13. data/file1.html +1 -0
  14. data/file2.html +1 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +86 -0
  27. data/lib/canon/comparison/html_comparator.rb +51 -18
  28. data/lib/canon/comparison/html_parser.rb +80 -0
  29. data/lib/canon/comparison/json_comparator.rb +12 -0
  30. data/lib/canon/comparison/json_parser.rb +19 -0
  31. data/lib/canon/comparison/markup_comparator.rb +293 -0
  32. data/lib/canon/comparison/match_options/base_resolver.rb +143 -0
  33. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  34. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  35. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  36. data/lib/canon/comparison/match_options.rb +68 -463
  37. data/lib/canon/comparison/profile_definition.rb +149 -0
  38. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  39. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  40. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  41. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  42. data/lib/canon/comparison/xml_comparator/child_comparison.rb +189 -0
  43. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  44. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  45. data/lib/canon/comparison/xml_comparator/node_parser.rb +74 -0
  46. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +95 -0
  47. data/lib/canon/comparison/xml_comparator.rb +52 -664
  48. data/lib/canon/comparison/xml_node_comparison.rb +297 -0
  49. data/lib/canon/comparison/xml_parser.rb +19 -0
  50. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  51. data/lib/canon/comparison.rb +265 -110
  52. data/lib/canon/diff/diff_node.rb +32 -2
  53. data/lib/canon/diff/node_serializer.rb +191 -0
  54. data/lib/canon/diff/path_builder.rb +143 -0
  55. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  56. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  57. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  58. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  59. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  60. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  61. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  64. data/lib/canon/diff_formatter.rb +1 -1
  65. data/lib/canon/rspec_matchers.rb +1 -1
  66. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  67. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  68. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  69. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  70. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  71. data/lib/canon/version.rb +1 -1
  72. data/old-docs/ADVANCED_TOPICS.adoc +20 -0
  73. data/old-docs/BASIC_USAGE.adoc +16 -0
  74. data/old-docs/CHARACTER_VISUALIZATION.adoc +567 -0
  75. data/old-docs/CLI.adoc +497 -0
  76. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  77. data/old-docs/DIFF_ARCHITECTURE.adoc +435 -0
  78. data/old-docs/DIFF_FORMATTING.adoc +540 -0
  79. data/old-docs/DIFF_PARAMETERS.adoc +261 -0
  80. data/old-docs/DOM_DIFF.adoc +1017 -0
  81. data/old-docs/ENV_CONFIG.adoc +876 -0
  82. data/old-docs/FORMATS.adoc +867 -0
  83. data/old-docs/INPUT_VALIDATION.adoc +477 -0
  84. data/old-docs/MATCHER_BEHAVIOR.adoc +90 -0
  85. data/old-docs/MATCH_ARCHITECTURE.adoc +463 -0
  86. data/old-docs/MATCH_OPTIONS.adoc +912 -0
  87. data/old-docs/MODES.adoc +432 -0
  88. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  89. data/old-docs/OPTIONS.adoc +1387 -0
  90. data/old-docs/PREPROCESSING.adoc +491 -0
  91. data/old-docs/README.old.adoc +2831 -0
  92. data/old-docs/RSPEC.adoc +814 -0
  93. data/old-docs/RUBY_API.adoc +485 -0
  94. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +646 -0
  95. data/old-docs/SEMANTIC_TREE_DIFF.adoc +765 -0
  96. data/old-docs/STRING_COMPARE.adoc +345 -0
  97. data/old-docs/TMP.adoc +3384 -0
  98. data/old-docs/TREE_DIFF.adoc +1080 -0
  99. data/old-docs/UNDERSTANDING_CANON.adoc +17 -0
  100. data/old-docs/VERBOSE.adoc +482 -0
  101. data/old-docs/VISUALIZATION_MAP.adoc +625 -0
  102. data/old-docs/WHITESPACE_TREATMENT.adoc +1155 -0
  103. data/scripts/analyze_current_state.rb +85 -0
  104. data/scripts/analyze_false_positives.rb +114 -0
  105. data/scripts/analyze_remaining_failures.rb +105 -0
  106. data/scripts/compare_current_failures.rb +95 -0
  107. data/scripts/compare_dom_tree_diff.rb +158 -0
  108. data/scripts/compare_failures.rb +151 -0
  109. data/scripts/debug_attribute_extraction.rb +66 -0
  110. data/scripts/debug_blocks_839.rb +115 -0
  111. data/scripts/debug_meta_matching.rb +52 -0
  112. data/scripts/debug_p_matching.rb +192 -0
  113. data/scripts/debug_signature_matching.rb +118 -0
  114. data/scripts/debug_sourcecode_124.rb +32 -0
  115. data/scripts/debug_whitespace_sensitive.rb +192 -0
  116. data/scripts/extract_false_positives.rb +138 -0
  117. data/scripts/find_actual_false_positives.rb +125 -0
  118. data/scripts/investigate_all_false_positives.rb +161 -0
  119. data/scripts/investigate_batch1.rb +127 -0
  120. data/scripts/investigate_classification.rb +150 -0
  121. data/scripts/investigate_classification_detailed.rb +190 -0
  122. data/scripts/investigate_common_failures.rb +342 -0
  123. data/scripts/investigate_false_negative.rb +80 -0
  124. data/scripts/investigate_false_positive.rb +83 -0
  125. data/scripts/investigate_false_positives.rb +227 -0
  126. data/scripts/investigate_false_positives_batch.rb +163 -0
  127. data/scripts/investigate_mixed_content.rb +125 -0
  128. data/scripts/investigate_remaining_16.rb +214 -0
  129. data/scripts/run_single_test.rb +29 -0
  130. data/scripts/test_all_false_positives.rb +95 -0
  131. data/scripts/test_attribute_details.rb +61 -0
  132. data/scripts/test_both_algorithms.rb +49 -0
  133. data/scripts/test_both_simple.rb +49 -0
  134. data/scripts/test_enhanced_semantic_output.rb +125 -0
  135. data/scripts/test_readme_examples.rb +131 -0
  136. data/scripts/test_semantic_tree_diff.rb +99 -0
  137. data/scripts/test_semantic_ux_improvements.rb +135 -0
  138. data/scripts/test_single_false_positive.rb +119 -0
  139. data/scripts/test_size_limits.rb +99 -0
  140. data/test_html_1.html +21 -0
  141. data/test_html_2.html +21 -0
  142. data/test_nokogiri.rb +33 -0
  143. data/test_normalize.rb +45 -0
  144. metadata +123 -2
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "match_options"
4
+
5
+ module Canon
6
+ module Comparison
7
+ # Profile definition DSL with full validation
8
+ #
9
+ # Provides a clean, validated way to define custom comparison profiles.
10
+ # Catches errors at definition time with clear, actionable messages.
11
+ #
12
+ # @example Define a custom profile
13
+ # Canon::Comparison.define_profile(:my_custom) do
14
+ # text_content :normalize
15
+ # comments :ignore
16
+ # preprocessing :rendered
17
+ # end
18
+ class ProfileDefinition
19
+ # All valid dimensions for XML/HTML comparison
20
+ # These must match MatchOptions::Xml::MATCH_DIMENSIONS
21
+ VALID_DIMENSIONS = %i[
22
+ text_content
23
+ structural_whitespace
24
+ attribute_presence
25
+ attribute_order
26
+ attribute_values
27
+ element_position
28
+ comments
29
+ ].freeze
30
+
31
+ # Behaviors valid for each dimension
32
+ # Maps dimension name to array of valid behavior symbols
33
+ DIMENSION_BEHAVIORS = {
34
+ text_content: %i[strict normalize ignore],
35
+ structural_whitespace: %i[strict normalize ignore],
36
+ attribute_presence: %i[strict ignore],
37
+ attribute_order: %i[strict ignore],
38
+ attribute_values: %i[strict strip compact normalize ignore],
39
+ element_position: %i[strict ignore],
40
+ comments: %i[strict ignore],
41
+ }.freeze
42
+
43
+ attr_reader :name, :settings
44
+
45
+ # Initialize a new profile definition
46
+ #
47
+ # @param name [Symbol] Profile name
48
+ def initialize(name)
49
+ @name = name
50
+ @settings = {}
51
+ end
52
+
53
+ # Define a profile using DSL syntax
54
+ #
55
+ # @param name [Symbol] Profile name
56
+ # @yield [ProfileDefinition] DSL block for defining profile
57
+ # @return [Hash] Profile settings hash
58
+ # @raise [ProfileError] if profile definition is invalid
59
+ def self.define(name, &block)
60
+ definition = new(name)
61
+ definition.instance_eval(&block) if block
62
+ definition.validate!
63
+ definition.to_h
64
+ end
65
+
66
+ # Create DSL methods for each dimension
67
+ VALID_DIMENSIONS.each do |dimension|
68
+ define_method(dimension) do |behavior|
69
+ @settings[dimension] = behavior
70
+ end
71
+ end
72
+
73
+ # Set preprocessing mode
74
+ #
75
+ # @param mode [Symbol] Preprocessing mode
76
+ # @raise [ProfileError] if mode is invalid
77
+ def preprocessing(mode)
78
+ unless MatchOptions::PREPROCESSING_OPTIONS.include?(mode)
79
+ raise ProfileError,
80
+ "Invalid preprocessing mode: #{mode}. " \
81
+ "Valid options: #{MatchOptions::PREPROCESSING_OPTIONS.join(', ')}"
82
+ end
83
+
84
+ @settings[:preprocessing] = mode
85
+ end
86
+
87
+ # Enable/disable semantic diff
88
+ #
89
+ # @param enabled [Boolean] Whether to enable semantic diff (default: true)
90
+ def semantic_diff(enabled: true)
91
+ @settings[:semantic_diff] = enabled
92
+ end
93
+
94
+ # Set similarity threshold for semantic matching
95
+ #
96
+ # @param value [Numeric] Threshold between 0 and 1
97
+ # @raise [ProfileError] if value is out of range
98
+ def similarity_threshold(value)
99
+ unless value.is_a?(Numeric) && value >= 0 && value <= 1
100
+ raise ProfileError,
101
+ "Similarity threshold must be between 0 and 1, got: #{value}"
102
+ end
103
+
104
+ @settings[:similarity_threshold] = value
105
+ end
106
+
107
+ # Validate the profile definition
108
+ #
109
+ # @raise [ProfileError] if profile definition is invalid
110
+ def validate!
111
+ @settings.each do |key, value|
112
+ validate_dimension!(key, value) if VALID_DIMENSIONS.include?(key)
113
+ end
114
+ end
115
+
116
+ # Convert to hash
117
+ #
118
+ # @return [Hash] Profile settings
119
+ def to_h
120
+ @settings.dup
121
+ end
122
+
123
+ private
124
+
125
+ # Validate a dimension setting
126
+ #
127
+ # @param dimension [Symbol] Dimension name
128
+ # @param behavior [Symbol] Behavior value
129
+ # @raise [ProfileError] if dimension or behavior is invalid
130
+ def validate_dimension!(dimension, behavior)
131
+ unless DIMENSION_BEHAVIORS.key?(dimension)
132
+ raise ProfileError,
133
+ "Unknown dimension: #{dimension}. " \
134
+ "Valid dimensions: #{VALID_DIMENSIONS.join(', ')}"
135
+ end
136
+
137
+ valid_behaviors = DIMENSION_BEHAVIORS[dimension]
138
+ unless valid_behaviors.include?(behavior)
139
+ raise ProfileError,
140
+ "Invalid behavior '#{behavior}' for dimension '#{dimension}'. " \
141
+ "Valid behaviors: #{valid_behaviors.join(', ')}"
142
+ end
143
+ end
144
+ end
145
+
146
+ # Custom error for profile definition issues
147
+ class ProfileError < Error; end
148
+ end
149
+ end
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Ruby Object Comparison Utilities
6
+ #
7
+ # Provides public comparison methods for Ruby objects (Hash, Array, primitives).
8
+ # This module extracts shared comparison logic that was previously
9
+ # accessed via send() from YamlComparator.
10
+ module RubyObjectComparator
11
+ # Compare Ruby objects (Hash, Array, primitives) for JSON/YAML
12
+ #
13
+ # @param obj1 [Object] First object
14
+ # @param obj2 [Object] Second object
15
+ # @param opts [Hash] Comparison options
16
+ # @param differences [Array] Array to append differences to
17
+ # @param path [String] Current path in the object structure
18
+ # @return [Symbol] Comparison result constant
19
+ def self.compare_objects(obj1, obj2, opts, differences, path)
20
+ # Check for type mismatch
21
+ unless obj1.instance_of?(obj2.class)
22
+ add_difference(path, obj1, obj2, Comparison::UNEQUAL_TYPES,
23
+ opts, differences)
24
+ return Comparison::UNEQUAL_TYPES
25
+ end
26
+
27
+ case obj1
28
+ when Hash
29
+ compare_hashes(obj1, obj2, opts, differences, path)
30
+ when Array
31
+ compare_arrays(obj1, obj2, opts, differences, path)
32
+ when NilClass, TrueClass, FalseClass, Numeric, String, Symbol
33
+ compare_primitives(obj1, obj2, opts, differences, path)
34
+ else
35
+ # Fallback to equality comparison
36
+ if obj1 == obj2
37
+ Comparison::EQUIVALENT
38
+ else
39
+ add_difference(path, obj1, obj2,
40
+ Comparison::UNEQUAL_PRIMITIVES, opts,
41
+ differences)
42
+ Comparison::UNEQUAL_PRIMITIVES
43
+ end
44
+ end
45
+ end
46
+
47
+ # Compare two hashes
48
+ #
49
+ # @param hash1 [Hash] First hash
50
+ # @param hash2 [Hash] Second hash
51
+ # @param opts [Hash] Comparison options
52
+ # @param differences [Array] Array to append differences to
53
+ # @param path [String] Current path in the object structure
54
+ # @return [Symbol] Comparison result constant
55
+ def self.compare_hashes(hash1, hash2, opts, differences, path)
56
+ keys1 = hash1.keys
57
+ keys2 = hash2.keys
58
+
59
+ # Sort keys if order should be ignored (based on match options)
60
+ match_opts = opts[:match_opts]
61
+ if match_opts && match_opts[:key_order] != :strict
62
+ keys1 = keys1.sort_by(&:to_s)
63
+ keys2 = keys2.sort_by(&:to_s)
64
+ elsif match_opts && match_opts[:key_order] == :strict
65
+ # Strict mode: key order matters
66
+ # Check if keys are in same order
67
+ # Keys are different or in different order
68
+ # First check if it's just ordering (same keys, different order)
69
+ if (keys1 != keys2) && (keys1.sort_by(&:to_s) == keys2.sort_by(&:to_s))
70
+ # Same keys, different order - this is a key_order difference
71
+ key_path = path.empty? ? "(key order)" : "#{path}.(key order)"
72
+ add_difference(key_path, keys1, keys2,
73
+ Comparison::UNEQUAL_HASH_KEY_ORDER, opts, differences)
74
+ return Comparison::UNEQUAL_HASH_KEY_ORDER
75
+ end
76
+ end
77
+
78
+ # Check for missing keys
79
+ missing_in_second = keys1 - keys2
80
+ missing_in_first = keys2 - keys1
81
+
82
+ missing_in_second.each do |key|
83
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
84
+ add_difference(key_path, hash1[key], nil,
85
+ Comparison::MISSING_HASH_KEY, opts, differences)
86
+ end
87
+
88
+ missing_in_first.each do |key|
89
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
90
+ add_difference(key_path, nil, hash2[key],
91
+ Comparison::MISSING_HASH_KEY, opts, differences)
92
+ end
93
+
94
+ has_missing_keys = !missing_in_first.empty? || !missing_in_second.empty?
95
+
96
+ # Compare common keys
97
+ common_keys = keys1 & keys2
98
+ all_equivalent = true
99
+ common_keys.each do |key|
100
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
101
+ result = compare_objects(hash1[key], hash2[key], opts,
102
+ differences, key_path)
103
+ all_equivalent = false unless result == Comparison::EQUIVALENT
104
+ end
105
+
106
+ # Return appropriate status
107
+ return Comparison::MISSING_HASH_KEY if has_missing_keys && all_equivalent
108
+ return Comparison::UNEQUAL_HASH_VALUES unless all_equivalent
109
+
110
+ has_missing_keys ? Comparison::MISSING_HASH_KEY : Comparison::EQUIVALENT
111
+ end
112
+
113
+ # Compare two arrays
114
+ #
115
+ # @param arr1 [Array] First array
116
+ # @param arr2 [Array] Second array
117
+ # @param opts [Hash] Comparison options
118
+ # @param differences [Array] Array to append differences to
119
+ # @param path [String] Current path in the object structure
120
+ # @return [Symbol] Comparison result constant
121
+ def self.compare_arrays(arr1, arr2, opts, differences, path)
122
+ unless arr1.length == arr2.length
123
+ add_difference(path, arr1, arr2,
124
+ Comparison::UNEQUAL_ARRAY_LENGTHS, opts,
125
+ differences)
126
+ return Comparison::UNEQUAL_ARRAY_LENGTHS
127
+ end
128
+
129
+ all_equivalent = true
130
+ arr1.each_with_index do |elem1, index|
131
+ elem2 = arr2[index]
132
+ elem_path = "#{path}[#{index}]"
133
+ result = compare_objects(elem1, elem2, opts, differences,
134
+ elem_path)
135
+ all_equivalent = false unless result == Comparison::EQUIVALENT
136
+ end
137
+
138
+ all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ARRAY_ELEMENTS
139
+ end
140
+
141
+ # Compare primitive values
142
+ #
143
+ # @param val1 [Object] First value
144
+ # @param val2 [Object] Second value
145
+ # @param opts [Hash] Comparison options
146
+ # @param differences [Array] Array to append differences to
147
+ # @param path [String] Current path in the object structure
148
+ # @return [Symbol] Comparison result constant
149
+ def self.compare_primitives(val1, val2, opts, differences, path)
150
+ if val1 == val2
151
+ Comparison::EQUIVALENT
152
+ else
153
+ add_difference(path, val1, val2,
154
+ Comparison::UNEQUAL_PRIMITIVES, opts,
155
+ differences)
156
+ Comparison::UNEQUAL_PRIMITIVES
157
+ end
158
+ end
159
+
160
+ # Add a Ruby object difference
161
+ #
162
+ # @param path [String] Path to the difference
163
+ # @param obj1 [Object] First object
164
+ # @param obj2 [Object] Second object
165
+ # @param diff_code [Symbol] Difference code
166
+ # @param opts [Hash] Comparison options
167
+ # @param differences [Array] Array to append difference to
168
+ def self.add_difference(path, obj1, obj2, diff_code, opts, differences)
169
+ return unless opts[:verbose]
170
+
171
+ differences << {
172
+ path: path,
173
+ value1: obj1,
174
+ value2: obj2,
175
+ difference: diff_code,
176
+ }
177
+ end
178
+ end
179
+ end
180
+ end
@@ -3,6 +3,7 @@
3
3
  require_relative "base_match_strategy"
4
4
  require_relative "../../tree_diff/tree_diff_integrator"
5
5
  require_relative "../../tree_diff/operation_converter"
6
+ require_relative "../xml_node_comparison"
6
7
 
7
8
  module Canon
8
9
  module Comparison
@@ -125,10 +126,9 @@ module Canon
125
126
  # @return [Array<String>] Preprocessed strings
126
127
  def preprocess_xml(doc1, doc2)
127
128
  # Serialize XML to string
128
- # Use XmlComparator's serializer for Canon::Xml::Node
129
+ # Use XmlNodeComparison's serializer for Canon::Xml::Node
129
130
  xml1 = if doc1.is_a?(Canon::Xml::Node)
130
- require_relative "../xml_comparator"
131
- XmlComparator.send(:serialize_node_to_xml, doc1)
131
+ XmlNodeComparison.serialize_node_to_xml(doc1)
132
132
  elsif doc1.respond_to?(:to_xml)
133
133
  doc1.to_xml
134
134
  else
@@ -136,8 +136,7 @@ module Canon
136
136
  end
137
137
 
138
138
  xml2 = if doc2.is_a?(Canon::Xml::Node)
139
- require_relative "../xml_comparator"
140
- XmlComparator.send(:serialize_node_to_xml, doc2)
139
+ XmlNodeComparison.serialize_node_to_xml(doc2)
141
140
  elsif doc2.respond_to?(:to_xml)
142
141
  doc2.to_xml
143
142
  else
@@ -161,12 +160,11 @@ module Canon
161
160
  # @param doc2 [Object] Second HTML document
162
161
  # @return [Array<String>] Preprocessed strings
163
162
  def preprocess_html(doc1, doc2)
164
- # For Canon::Xml::Node, use XmlComparator's serializer
163
+ # For Canon::Xml::Node, use XmlNodeComparison's serializer
165
164
  # For XML::DocumentFragment (from parse_node_as_fragment), use to_s
166
165
  # to avoid Nokogiri auto-inserting meta tags during to_html serialization
167
166
  html1 = if doc1.is_a?(Canon::Xml::Node)
168
- require_relative "../xml_comparator"
169
- XmlComparator.send(:serialize_node_to_xml, doc1)
167
+ XmlNodeComparison.serialize_node_to_xml(doc1)
170
168
  elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
171
169
  doc1.to_s
172
170
  elsif doc1.respond_to?(:to_html)
@@ -176,8 +174,7 @@ module Canon
176
174
  end
177
175
 
178
176
  html2 = if doc2.is_a?(Canon::Xml::Node)
179
- require_relative "../xml_comparator"
180
- XmlComparator.send(:serialize_node_to_xml, doc2)
177
+ XmlNodeComparison.serialize_node_to_xml(doc2)
181
178
  elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
182
179
  doc2.to_s
183
180
  elsif doc2.respond_to?(:to_html)
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module XmlComparatorHelpers
6
+ # Attribute comparison logic
7
+ # Handles comparison of attribute sets with filtering and ordering
8
+ class AttributeComparator
9
+ # Compare attribute sets between two nodes
10
+ #
11
+ # @param node1 [Object] First node
12
+ # @param node2 [Object] Second node
13
+ # @param opts [Hash] Comparison options
14
+ # @param differences [Array] Array to append differences to
15
+ # @return [Symbol] Comparison result
16
+ def self.compare(node1, node2, opts, differences)
17
+ # Get attributes using the appropriate method for each node type
18
+ raw_attrs1 = node1.respond_to?(:attribute_nodes) ? node1.attribute_nodes : node1.attributes
19
+ raw_attrs2 = node2.respond_to?(:attribute_nodes) ? node2.attribute_nodes : node2.attributes
20
+
21
+ attrs1 = XmlComparatorHelpers::AttributeFilter.filter(raw_attrs1,
22
+ opts)
23
+ attrs2 = XmlComparatorHelpers::AttributeFilter.filter(raw_attrs2,
24
+ opts)
25
+
26
+ match_opts = opts[:match_opts]
27
+ attribute_order_behavior = match_opts[:attribute_order] || :strict
28
+
29
+ # Check attribute order if not ignored
30
+ keys1 = attrs1.keys.map(&:to_s)
31
+ keys2 = attrs2.keys.map(&:to_s)
32
+
33
+ if attribute_order_behavior == :strict
34
+ compare_strict_order(node1, node2, attrs1, attrs2, keys1, keys2, opts,
35
+ differences)
36
+ else
37
+ compare_flexible_order(node1, node2, attrs1, attrs2, keys1, keys2, opts,
38
+ differences)
39
+ end
40
+ end
41
+
42
+ # Compare with strict attribute ordering
43
+ #
44
+ # @param node1 [Object] First node
45
+ # @param node2 [Object] Second node
46
+ # @param attrs1 [Hash] First node's attributes
47
+ # @param attrs2 [Hash] Second node's attributes
48
+ # @param keys1 [Array<String>] First node's attribute keys
49
+ # @param keys2 [Array<String>] Second node's attribute keys
50
+ # @param opts [Hash] Comparison options
51
+ # @param differences [Array] Array to append differences to
52
+ # @return [Symbol] Comparison result
53
+ def self.compare_strict_order(node1, node2, attrs1, attrs2, keys1, keys2, opts,
54
+ differences)
55
+ if keys1 != keys2
56
+ # Keys are different or in different order
57
+ if keys1.sort == keys2.sort
58
+ # Same keys, different order - attribute_order difference
59
+ add_attribute_difference(n1: node1, n2: node2,
60
+ diff1: Comparison::UNEQUAL_ATTRIBUTES,
61
+ diff2: Comparison::UNEQUAL_ATTRIBUTES,
62
+ dimension: :attribute_order,
63
+ opts: opts,
64
+ differences: differences)
65
+ return Comparison::UNEQUAL_ATTRIBUTES
66
+ else
67
+ # Different keys - attribute_presence difference
68
+ add_attribute_difference(n1: node1, n2: node2,
69
+ diff1: Comparison::MISSING_ATTRIBUTE,
70
+ diff2: Comparison::MISSING_ATTRIBUTE,
71
+ dimension: :attribute_presence,
72
+ opts: opts,
73
+ differences: differences)
74
+ return Comparison::MISSING_ATTRIBUTE
75
+ end
76
+ end
77
+
78
+ # Order matches, check values
79
+ compare_attribute_values(node1, node2, attrs1, attrs2, opts,
80
+ differences)
81
+ end
82
+
83
+ # Compare with flexible attribute ordering
84
+ #
85
+ # @param node1 [Object] First node
86
+ # @param node2 [Object] Second node
87
+ # @param attrs1 [Hash] First node's attributes
88
+ # @param attrs2 [Hash] Second node's attributes
89
+ # @param keys1 [Array<String>] First node's attribute keys
90
+ # @param keys2 [Array<String>] Second node's attribute keys
91
+ # @param opts [Hash] Comparison options
92
+ # @param differences [Array] Array to append differences to
93
+ # @return [Symbol] Comparison result
94
+ def self.compare_flexible_order(node1, node2, attrs1, attrs2, keys1, keys2, opts,
95
+ differences)
96
+ # Check if order differs (but keys are the same) - track as informative
97
+ if keys1 != keys2 && keys1.sort == keys2.sort && opts[:verbose]
98
+ add_attribute_difference(n1: node1, n2: node2,
99
+ diff1: Comparison::UNEQUAL_ATTRIBUTES,
100
+ diff2: Comparison::UNEQUAL_ATTRIBUTES,
101
+ dimension: :attribute_order,
102
+ opts: opts,
103
+ differences: differences)
104
+ end
105
+
106
+ # Sort attributes so order doesn't matter for comparison
107
+ attrs1 = attrs1.sort_by { |k, _v| k.to_s }.to_h
108
+ attrs2 = attrs2.sort_by { |k, _v| k.to_s }.to_h
109
+
110
+ unless attrs1.keys.map(&:to_s).sort == attrs2.keys.map(&:to_s).sort
111
+ add_attribute_difference(n1: node1, n2: node2,
112
+ diff1: Comparison::MISSING_ATTRIBUTE,
113
+ diff2: Comparison::MISSING_ATTRIBUTE,
114
+ dimension: :attribute_presence,
115
+ opts: opts,
116
+ differences: differences)
117
+ return Comparison::MISSING_ATTRIBUTE
118
+ end
119
+
120
+ compare_attribute_values(node1, node2, attrs1, attrs2, opts,
121
+ differences)
122
+ end
123
+
124
+ # Compare attribute values
125
+ #
126
+ # @param node1 [Object] First node
127
+ # @param node2 [Object] Second node
128
+ # @param attrs1 [Hash] First node's attributes
129
+ # @param attrs2 [Hash] Second node's attributes
130
+ # @param opts [Hash] Comparison options
131
+ # @param differences [Array] Array to append differences to
132
+ # @return [Symbol] Comparison result
133
+ def self.compare_attribute_values(node1, node2, attrs1, attrs2, opts,
134
+ differences)
135
+ attrs1.each do |name, value|
136
+ unless attrs2[name] == value
137
+ add_attribute_difference(n1: node1, n2: node2,
138
+ diff1: Comparison::UNEQUAL_ATTRIBUTES,
139
+ diff2: Comparison::UNEQUAL_ATTRIBUTES,
140
+ dimension: :attribute_values,
141
+ opts: opts,
142
+ differences: differences)
143
+ return Comparison::UNEQUAL_ATTRIBUTES
144
+ end
145
+ end
146
+
147
+ Comparison::EQUIVALENT
148
+ end
149
+
150
+ # Add an attribute difference
151
+ #
152
+ # @param n1 [Object] First node
153
+ # @param n2 [Object] Second node
154
+ # @param diff1 [String] Difference type for node1
155
+ # @param diff2 [String] Difference type for node2
156
+ # @param dimension [Symbol] The match dimension
157
+ # @param opts [Hash] Options
158
+ # @param differences [Array] Array to append difference to
159
+ def self.add_attribute_difference(n1:, n2:, diff1:, diff2:,
160
+ dimension:, differences:, **opts)
161
+ # Import DiffNodeBuilder to avoid circular dependency
162
+ require_relative "diff_node_builder"
163
+
164
+ diff_node = Canon::Comparison::DiffNodeBuilder.build(
165
+ node1: n1,
166
+ node2: n2,
167
+ diff1: diff1,
168
+ diff2: diff2,
169
+ dimension: dimension,
170
+ **opts,
171
+ )
172
+ differences << diff_node if diff_node
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../match_options"
4
+
5
+ module Canon
6
+ module Comparison
7
+ module XmlComparatorHelpers
8
+ # Attribute filtering logic
9
+ # Handles filtering of attributes based on options and match settings
10
+ class AttributeFilter
11
+ # Filter attributes based on options
12
+ #
13
+ # @param attributes [Array, Hash] Raw attributes
14
+ # @param opts [Hash] Comparison options
15
+ # @return [Hash] Filtered attributes
16
+ def self.filter(attributes, opts)
17
+ filtered = {}
18
+ match_opts = opts[:match_opts]
19
+
20
+ # Handle Canon::Xml::Node attribute format (array of AttributeNode)
21
+ if attributes.is_a?(Array)
22
+ filter_array_attributes(attributes, opts, match_opts, filtered)
23
+ else
24
+ # Handle Nokogiri and Moxml attribute formats (Hash-like)
25
+ filter_hash_attributes(attributes, opts, match_opts, filtered)
26
+ end
27
+
28
+ filtered
29
+ end
30
+
31
+ # Filter array-format attributes (Canon::Xml::Node)
32
+ #
33
+ # @param attributes [Array] Array of AttributeNode objects
34
+ # @param opts [Hash] Comparison options
35
+ # @param match_opts [Hash] Resolved match options
36
+ # @param filtered [Hash] Output hash to populate
37
+ def self.filter_array_attributes(attributes, opts, match_opts, filtered)
38
+ attributes.each do |attr|
39
+ name = attr.name
40
+ value = attr.value
41
+
42
+ # Skip namespace declarations - they're handled separately
43
+ next if namespace_declaration?(name)
44
+
45
+ # Skip if attribute name should be ignored
46
+ next if ignore_by_name?(name, opts)
47
+
48
+ # Skip if attribute content should be ignored
49
+ next if ignore_by_content?(value, opts)
50
+
51
+ # Apply match options for attribute values
52
+ behavior = match_opts[:attribute_values] || :strict
53
+ value = MatchOptions.process_attribute_value(value, behavior)
54
+
55
+ filtered[name] = value
56
+ end
57
+ end
58
+
59
+ # Filter hash-format attributes (Nokogiri/Moxml)
60
+ #
61
+ # @param attributes [Hash] Hash-like attributes
62
+ # @param opts [Hash] Comparison options
63
+ # @param match_opts [Hash] Resolved match options
64
+ # @param filtered [Hash] Output hash to populate
65
+ def self.filter_hash_attributes(attributes, opts, match_opts, filtered)
66
+ attributes.each do |key, val|
67
+ # Normalize key and value
68
+ name, value = normalize_attribute_pair(key, val)
69
+
70
+ # Skip namespace declarations - they're handled separately
71
+ next if namespace_declaration?(name)
72
+
73
+ # Skip if attribute name should be ignored
74
+ next if ignore_by_name?(name, opts)
75
+
76
+ # Skip if attribute content should be ignored
77
+ next if ignore_by_content?(value, opts)
78
+
79
+ # Apply match options for attribute values
80
+ behavior = match_opts[:attribute_values] || :strict
81
+ value = MatchOptions.process_attribute_value(value, behavior)
82
+
83
+ filtered[name] = value
84
+ end
85
+ end
86
+
87
+ # Normalize attribute key-value pair from different formats
88
+ #
89
+ # @param key [Object] Attribute key (String or Attribute object)
90
+ # @param val [Object] Attribute value
91
+ # @return [Array<String, String>] Normalized [name, value] pair
92
+ def self.normalize_attribute_pair(key, val)
93
+ if key.is_a?(String)
94
+ # Nokogiri format: key=name (String), val=attr object
95
+ name = key
96
+ value = val.respond_to?(:value) ? val.value : val.to_s
97
+ else
98
+ # Moxml format: key=attr object, val=nil
99
+ name = key.respond_to?(:name) ? key.name : key.to_s
100
+ value = key.respond_to?(:value) ? key.value : key.to_s
101
+ end
102
+
103
+ [name, value]
104
+ end
105
+
106
+ # Check if attribute should be ignored by name
107
+ #
108
+ # @param name [String] Attribute name
109
+ # @param opts [Hash] Comparison options
110
+ # @return [Boolean] true if should ignore
111
+ def self.ignore_by_name?(name, opts)
112
+ opts[:ignore_attrs_by_name].any? { |pattern| name.include?(pattern) }
113
+ end
114
+
115
+ # Check if attribute should be ignored by content
116
+ #
117
+ # @param value [String] Attribute value
118
+ # @param opts [Hash] Comparison options
119
+ # @return [Boolean] true if should ignore
120
+ def self.ignore_by_content?(value, opts)
121
+ opts[:ignore_attr_content].any? do |pattern|
122
+ value.to_s.include?(pattern)
123
+ end
124
+ end
125
+
126
+ # Check if an attribute name is a namespace declaration
127
+ #
128
+ # @param attr_name [String] Attribute name
129
+ # @return [Boolean] true if it's a namespace declaration
130
+ def self.namespace_declaration?(attr_name)
131
+ attr_name == "xmlns" || attr_name.start_with?("xmlns:")
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end