canon 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +69 -92
- data/README.adoc +13 -13
- data/docs/.lycheeignore +69 -0
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/advanced/extending-canon.adoc +193 -0
- data/docs/features/match-options/index.adoc +239 -1
- data/docs/internals/diffnode-enrichment.adoc +611 -0
- data/docs/internals/index.adoc +251 -0
- data/docs/lychee.toml +13 -6
- data/docs/understanding/architecture.adoc +749 -33
- data/docs/understanding/comparison-pipeline.adoc +122 -0
- data/lib/canon/cache.rb +129 -0
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
- data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
- data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
- data/lib/canon/comparison/dimensions/registry.rb +77 -0
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
- data/lib/canon/comparison/dimensions.rb +54 -0
- data/lib/canon/comparison/format_detector.rb +87 -0
- data/lib/canon/comparison/html_comparator.rb +70 -26
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/html_parser.rb +80 -0
- data/lib/canon/comparison/json_comparator.rb +12 -0
- data/lib/canon/comparison/json_parser.rb +19 -0
- data/lib/canon/comparison/markup_comparator.rb +293 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
- data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
- data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
- data/lib/canon/comparison/match_options.rb +68 -463
- data/lib/canon/comparison/profile_definition.rb +149 -0
- data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
- data/lib/canon/comparison/xml_comparator.rb +97 -684
- data/lib/canon/comparison/xml_node_comparison.rb +319 -0
- data/lib/canon/comparison/xml_parser.rb +19 -0
- data/lib/canon/comparison/yaml_comparator.rb +3 -3
- data/lib/canon/comparison.rb +265 -110
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/diff_node.rb +32 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/node_serializer.rb +191 -0
- data/lib/canon/diff/path_builder.rb +143 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
- data/lib/canon/diff_formatter.rb +1 -1
- data/lib/canon/rspec_matchers.rb +38 -9
- data/lib/canon/tree_diff/operation_converter.rb +92 -338
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +48 -2
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "match_options"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Comparison
|
|
7
|
+
# Profile definition DSL with full validation
|
|
8
|
+
#
|
|
9
|
+
# Provides a clean, validated way to define custom comparison profiles.
|
|
10
|
+
# Catches errors at definition time with clear, actionable messages.
|
|
11
|
+
#
|
|
12
|
+
# @example Define a custom profile
|
|
13
|
+
# Canon::Comparison.define_profile(:my_custom) do
|
|
14
|
+
# text_content :normalize
|
|
15
|
+
# comments :ignore
|
|
16
|
+
# preprocessing :rendered
|
|
17
|
+
# end
|
|
18
|
+
class ProfileDefinition
|
|
19
|
+
# All valid dimensions for XML/HTML comparison
|
|
20
|
+
# These must match MatchOptions::Xml::MATCH_DIMENSIONS
|
|
21
|
+
VALID_DIMENSIONS = %i[
|
|
22
|
+
text_content
|
|
23
|
+
structural_whitespace
|
|
24
|
+
attribute_presence
|
|
25
|
+
attribute_order
|
|
26
|
+
attribute_values
|
|
27
|
+
element_position
|
|
28
|
+
comments
|
|
29
|
+
].freeze
|
|
30
|
+
|
|
31
|
+
# Behaviors valid for each dimension
|
|
32
|
+
# Maps dimension name to array of valid behavior symbols
|
|
33
|
+
DIMENSION_BEHAVIORS = {
|
|
34
|
+
text_content: %i[strict normalize ignore],
|
|
35
|
+
structural_whitespace: %i[strict normalize ignore],
|
|
36
|
+
attribute_presence: %i[strict ignore],
|
|
37
|
+
attribute_order: %i[strict ignore],
|
|
38
|
+
attribute_values: %i[strict strip compact normalize ignore],
|
|
39
|
+
element_position: %i[strict ignore],
|
|
40
|
+
comments: %i[strict ignore],
|
|
41
|
+
}.freeze
|
|
42
|
+
|
|
43
|
+
attr_reader :name, :settings
|
|
44
|
+
|
|
45
|
+
# Initialize a new profile definition
|
|
46
|
+
#
|
|
47
|
+
# @param name [Symbol] Profile name
|
|
48
|
+
def initialize(name)
|
|
49
|
+
@name = name
|
|
50
|
+
@settings = {}
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Define a profile using DSL syntax
|
|
54
|
+
#
|
|
55
|
+
# @param name [Symbol] Profile name
|
|
56
|
+
# @yield [ProfileDefinition] DSL block for defining profile
|
|
57
|
+
# @return [Hash] Profile settings hash
|
|
58
|
+
# @raise [ProfileError] if profile definition is invalid
|
|
59
|
+
def self.define(name, &block)
|
|
60
|
+
definition = new(name)
|
|
61
|
+
definition.instance_eval(&block) if block
|
|
62
|
+
definition.validate!
|
|
63
|
+
definition.to_h
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Create DSL methods for each dimension
|
|
67
|
+
VALID_DIMENSIONS.each do |dimension|
|
|
68
|
+
define_method(dimension) do |behavior|
|
|
69
|
+
@settings[dimension] = behavior
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Set preprocessing mode
|
|
74
|
+
#
|
|
75
|
+
# @param mode [Symbol] Preprocessing mode
|
|
76
|
+
# @raise [ProfileError] if mode is invalid
|
|
77
|
+
def preprocessing(mode)
|
|
78
|
+
unless MatchOptions::PREPROCESSING_OPTIONS.include?(mode)
|
|
79
|
+
raise ProfileError,
|
|
80
|
+
"Invalid preprocessing mode: #{mode}. " \
|
|
81
|
+
"Valid options: #{MatchOptions::PREPROCESSING_OPTIONS.join(', ')}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
@settings[:preprocessing] = mode
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Enable/disable semantic diff
|
|
88
|
+
#
|
|
89
|
+
# @param enabled [Boolean] Whether to enable semantic diff (default: true)
|
|
90
|
+
def semantic_diff(enabled: true)
|
|
91
|
+
@settings[:semantic_diff] = enabled
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Set similarity threshold for semantic matching
|
|
95
|
+
#
|
|
96
|
+
# @param value [Numeric] Threshold between 0 and 1
|
|
97
|
+
# @raise [ProfileError] if value is out of range
|
|
98
|
+
def similarity_threshold(value)
|
|
99
|
+
unless value.is_a?(Numeric) && value >= 0 && value <= 1
|
|
100
|
+
raise ProfileError,
|
|
101
|
+
"Similarity threshold must be between 0 and 1, got: #{value}"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
@settings[:similarity_threshold] = value
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Validate the profile definition
|
|
108
|
+
#
|
|
109
|
+
# @raise [ProfileError] if profile definition is invalid
|
|
110
|
+
def validate!
|
|
111
|
+
@settings.each do |key, value|
|
|
112
|
+
validate_dimension!(key, value) if VALID_DIMENSIONS.include?(key)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Convert to hash
|
|
117
|
+
#
|
|
118
|
+
# @return [Hash] Profile settings
|
|
119
|
+
def to_h
|
|
120
|
+
@settings.dup
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
private
|
|
124
|
+
|
|
125
|
+
# Validate a dimension setting
|
|
126
|
+
#
|
|
127
|
+
# @param dimension [Symbol] Dimension name
|
|
128
|
+
# @param behavior [Symbol] Behavior value
|
|
129
|
+
# @raise [ProfileError] if dimension or behavior is invalid
|
|
130
|
+
def validate_dimension!(dimension, behavior)
|
|
131
|
+
unless DIMENSION_BEHAVIORS.key?(dimension)
|
|
132
|
+
raise ProfileError,
|
|
133
|
+
"Unknown dimension: #{dimension}. " \
|
|
134
|
+
"Valid dimensions: #{VALID_DIMENSIONS.join(', ')}"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
valid_behaviors = DIMENSION_BEHAVIORS[dimension]
|
|
138
|
+
unless valid_behaviors.include?(behavior)
|
|
139
|
+
raise ProfileError,
|
|
140
|
+
"Invalid behavior '#{behavior}' for dimension '#{dimension}'. " \
|
|
141
|
+
"Valid behaviors: #{valid_behaviors.join(', ')}"
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Custom error for profile definition issues
|
|
147
|
+
class ProfileError < Error; end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
# Ruby Object Comparison Utilities
|
|
6
|
+
#
|
|
7
|
+
# Provides public comparison methods for Ruby objects (Hash, Array, primitives).
|
|
8
|
+
# This module extracts shared comparison logic that was previously
|
|
9
|
+
# accessed via send() from YamlComparator.
|
|
10
|
+
module RubyObjectComparator
|
|
11
|
+
# Compare Ruby objects (Hash, Array, primitives) for JSON/YAML
|
|
12
|
+
#
|
|
13
|
+
# @param obj1 [Object] First object
|
|
14
|
+
# @param obj2 [Object] Second object
|
|
15
|
+
# @param opts [Hash] Comparison options
|
|
16
|
+
# @param differences [Array] Array to append differences to
|
|
17
|
+
# @param path [String] Current path in the object structure
|
|
18
|
+
# @return [Symbol] Comparison result constant
|
|
19
|
+
def self.compare_objects(obj1, obj2, opts, differences, path)
|
|
20
|
+
# Check for type mismatch
|
|
21
|
+
unless obj1.instance_of?(obj2.class)
|
|
22
|
+
add_difference(path, obj1, obj2, Comparison::UNEQUAL_TYPES,
|
|
23
|
+
opts, differences)
|
|
24
|
+
return Comparison::UNEQUAL_TYPES
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
case obj1
|
|
28
|
+
when Hash
|
|
29
|
+
compare_hashes(obj1, obj2, opts, differences, path)
|
|
30
|
+
when Array
|
|
31
|
+
compare_arrays(obj1, obj2, opts, differences, path)
|
|
32
|
+
when NilClass, TrueClass, FalseClass, Numeric, String, Symbol
|
|
33
|
+
compare_primitives(obj1, obj2, opts, differences, path)
|
|
34
|
+
else
|
|
35
|
+
# Fallback to equality comparison
|
|
36
|
+
if obj1 == obj2
|
|
37
|
+
Comparison::EQUIVALENT
|
|
38
|
+
else
|
|
39
|
+
add_difference(path, obj1, obj2,
|
|
40
|
+
Comparison::UNEQUAL_PRIMITIVES, opts,
|
|
41
|
+
differences)
|
|
42
|
+
Comparison::UNEQUAL_PRIMITIVES
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Compare two hashes
|
|
48
|
+
#
|
|
49
|
+
# @param hash1 [Hash] First hash
|
|
50
|
+
# @param hash2 [Hash] Second hash
|
|
51
|
+
# @param opts [Hash] Comparison options
|
|
52
|
+
# @param differences [Array] Array to append differences to
|
|
53
|
+
# @param path [String] Current path in the object structure
|
|
54
|
+
# @return [Symbol] Comparison result constant
|
|
55
|
+
def self.compare_hashes(hash1, hash2, opts, differences, path)
|
|
56
|
+
keys1 = hash1.keys
|
|
57
|
+
keys2 = hash2.keys
|
|
58
|
+
|
|
59
|
+
# Sort keys if order should be ignored (based on match options)
|
|
60
|
+
match_opts = opts[:match_opts]
|
|
61
|
+
if match_opts && match_opts[:key_order] != :strict
|
|
62
|
+
keys1 = keys1.sort_by(&:to_s)
|
|
63
|
+
keys2 = keys2.sort_by(&:to_s)
|
|
64
|
+
elsif match_opts && match_opts[:key_order] == :strict
|
|
65
|
+
# Strict mode: key order matters
|
|
66
|
+
# Check if keys are in same order
|
|
67
|
+
# Keys are different or in different order
|
|
68
|
+
# First check if it's just ordering (same keys, different order)
|
|
69
|
+
if (keys1 != keys2) && (keys1.sort_by(&:to_s) == keys2.sort_by(&:to_s))
|
|
70
|
+
# Same keys, different order - this is a key_order difference
|
|
71
|
+
key_path = path.empty? ? "(key order)" : "#{path}.(key order)"
|
|
72
|
+
add_difference(key_path, keys1, keys2,
|
|
73
|
+
Comparison::UNEQUAL_HASH_KEY_ORDER, opts, differences)
|
|
74
|
+
return Comparison::UNEQUAL_HASH_KEY_ORDER
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Check for missing keys
|
|
79
|
+
missing_in_second = keys1 - keys2
|
|
80
|
+
missing_in_first = keys2 - keys1
|
|
81
|
+
|
|
82
|
+
missing_in_second.each do |key|
|
|
83
|
+
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
84
|
+
add_difference(key_path, hash1[key], nil,
|
|
85
|
+
Comparison::MISSING_HASH_KEY, opts, differences)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
missing_in_first.each do |key|
|
|
89
|
+
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
90
|
+
add_difference(key_path, nil, hash2[key],
|
|
91
|
+
Comparison::MISSING_HASH_KEY, opts, differences)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
has_missing_keys = !missing_in_first.empty? || !missing_in_second.empty?
|
|
95
|
+
|
|
96
|
+
# Compare common keys
|
|
97
|
+
common_keys = keys1 & keys2
|
|
98
|
+
all_equivalent = true
|
|
99
|
+
common_keys.each do |key|
|
|
100
|
+
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
101
|
+
result = compare_objects(hash1[key], hash2[key], opts,
|
|
102
|
+
differences, key_path)
|
|
103
|
+
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Return appropriate status
|
|
107
|
+
return Comparison::MISSING_HASH_KEY if has_missing_keys && all_equivalent
|
|
108
|
+
return Comparison::UNEQUAL_HASH_VALUES unless all_equivalent
|
|
109
|
+
|
|
110
|
+
has_missing_keys ? Comparison::MISSING_HASH_KEY : Comparison::EQUIVALENT
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Compare two arrays
|
|
114
|
+
#
|
|
115
|
+
# @param arr1 [Array] First array
|
|
116
|
+
# @param arr2 [Array] Second array
|
|
117
|
+
# @param opts [Hash] Comparison options
|
|
118
|
+
# @param differences [Array] Array to append differences to
|
|
119
|
+
# @param path [String] Current path in the object structure
|
|
120
|
+
# @return [Symbol] Comparison result constant
|
|
121
|
+
def self.compare_arrays(arr1, arr2, opts, differences, path)
|
|
122
|
+
unless arr1.length == arr2.length
|
|
123
|
+
add_difference(path, arr1, arr2,
|
|
124
|
+
Comparison::UNEQUAL_ARRAY_LENGTHS, opts,
|
|
125
|
+
differences)
|
|
126
|
+
return Comparison::UNEQUAL_ARRAY_LENGTHS
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
all_equivalent = true
|
|
130
|
+
arr1.each_with_index do |elem1, index|
|
|
131
|
+
elem2 = arr2[index]
|
|
132
|
+
elem_path = "#{path}[#{index}]"
|
|
133
|
+
result = compare_objects(elem1, elem2, opts, differences,
|
|
134
|
+
elem_path)
|
|
135
|
+
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ARRAY_ELEMENTS
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Compare primitive values
|
|
142
|
+
#
|
|
143
|
+
# @param val1 [Object] First value
|
|
144
|
+
# @param val2 [Object] Second value
|
|
145
|
+
# @param opts [Hash] Comparison options
|
|
146
|
+
# @param differences [Array] Array to append differences to
|
|
147
|
+
# @param path [String] Current path in the object structure
|
|
148
|
+
# @return [Symbol] Comparison result constant
|
|
149
|
+
def self.compare_primitives(val1, val2, opts, differences, path)
|
|
150
|
+
if val1 == val2
|
|
151
|
+
Comparison::EQUIVALENT
|
|
152
|
+
else
|
|
153
|
+
add_difference(path, val1, val2,
|
|
154
|
+
Comparison::UNEQUAL_PRIMITIVES, opts,
|
|
155
|
+
differences)
|
|
156
|
+
Comparison::UNEQUAL_PRIMITIVES
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Add a Ruby object difference
|
|
161
|
+
#
|
|
162
|
+
# @param path [String] Path to the difference
|
|
163
|
+
# @param obj1 [Object] First object
|
|
164
|
+
# @param obj2 [Object] Second object
|
|
165
|
+
# @param diff_code [Symbol] Difference code
|
|
166
|
+
# @param opts [Hash] Comparison options
|
|
167
|
+
# @param differences [Array] Array to append difference to
|
|
168
|
+
def self.add_difference(path, obj1, obj2, diff_code, opts, differences)
|
|
169
|
+
return unless opts[:verbose]
|
|
170
|
+
|
|
171
|
+
differences << {
|
|
172
|
+
path: path,
|
|
173
|
+
value1: obj1,
|
|
174
|
+
value2: obj2,
|
|
175
|
+
difference: diff_code,
|
|
176
|
+
}
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require_relative "base_match_strategy"
|
|
4
4
|
require_relative "../../tree_diff/tree_diff_integrator"
|
|
5
5
|
require_relative "../../tree_diff/operation_converter"
|
|
6
|
+
require_relative "../xml_node_comparison"
|
|
6
7
|
|
|
7
8
|
module Canon
|
|
8
9
|
module Comparison
|
|
@@ -125,10 +126,9 @@ module Canon
|
|
|
125
126
|
# @return [Array<String>] Preprocessed strings
|
|
126
127
|
def preprocess_xml(doc1, doc2)
|
|
127
128
|
# Serialize XML to string
|
|
128
|
-
# Use
|
|
129
|
+
# Use XmlNodeComparison's serializer for Canon::Xml::Node
|
|
129
130
|
xml1 = if doc1.is_a?(Canon::Xml::Node)
|
|
130
|
-
|
|
131
|
-
XmlComparator.send(:serialize_node_to_xml, doc1)
|
|
131
|
+
XmlNodeComparison.serialize_node_to_xml(doc1)
|
|
132
132
|
elsif doc1.respond_to?(:to_xml)
|
|
133
133
|
doc1.to_xml
|
|
134
134
|
else
|
|
@@ -136,8 +136,7 @@ module Canon
|
|
|
136
136
|
end
|
|
137
137
|
|
|
138
138
|
xml2 = if doc2.is_a?(Canon::Xml::Node)
|
|
139
|
-
|
|
140
|
-
XmlComparator.send(:serialize_node_to_xml, doc2)
|
|
139
|
+
XmlNodeComparison.serialize_node_to_xml(doc2)
|
|
141
140
|
elsif doc2.respond_to?(:to_xml)
|
|
142
141
|
doc2.to_xml
|
|
143
142
|
else
|
|
@@ -161,12 +160,11 @@ module Canon
|
|
|
161
160
|
# @param doc2 [Object] Second HTML document
|
|
162
161
|
# @return [Array<String>] Preprocessed strings
|
|
163
162
|
def preprocess_html(doc1, doc2)
|
|
164
|
-
# For Canon::Xml::Node, use
|
|
163
|
+
# For Canon::Xml::Node, use XmlNodeComparison's serializer
|
|
165
164
|
# For XML::DocumentFragment (from parse_node_as_fragment), use to_s
|
|
166
165
|
# to avoid Nokogiri auto-inserting meta tags during to_html serialization
|
|
167
166
|
html1 = if doc1.is_a?(Canon::Xml::Node)
|
|
168
|
-
|
|
169
|
-
XmlComparator.send(:serialize_node_to_xml, doc1)
|
|
167
|
+
XmlNodeComparison.serialize_node_to_xml(doc1)
|
|
170
168
|
elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
|
|
171
169
|
doc1.to_s
|
|
172
170
|
elsif doc1.respond_to?(:to_html)
|
|
@@ -176,8 +174,7 @@ module Canon
|
|
|
176
174
|
end
|
|
177
175
|
|
|
178
176
|
html2 = if doc2.is_a?(Canon::Xml::Node)
|
|
179
|
-
|
|
180
|
-
XmlComparator.send(:serialize_node_to_xml, doc2)
|
|
177
|
+
XmlNodeComparison.serialize_node_to_xml(doc2)
|
|
181
178
|
elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
|
|
182
179
|
doc2.to_s
|
|
183
180
|
elsif doc2.respond_to?(:to_html)
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
# Whitespace sensitivity utilities for element-level control
|
|
6
|
+
#
|
|
7
|
+
# This module provides logic to determine whether whitespace should be
|
|
8
|
+
# preserved during comparison based on:
|
|
9
|
+
# - Format-specific defaults (HTML has built-in sensitive elements)
|
|
10
|
+
# - User-configured whitelist (elements that care about whitespace)
|
|
11
|
+
# - User-configured blacklist (elements that don't care about whitespace)
|
|
12
|
+
# - xml:space attribute in the document itself
|
|
13
|
+
# - respect_xml_space flag (whether to honor or override xml:space)
|
|
14
|
+
#
|
|
15
|
+
# == Priority Order
|
|
16
|
+
#
|
|
17
|
+
# 1. respect_xml_space: false → User config only (ignore xml:space)
|
|
18
|
+
# 2. User whitelist → Use whitelist (user explicitly declared)
|
|
19
|
+
# 3. Format defaults → HTML: [:pre, :textarea, :script, :style], XML: []
|
|
20
|
+
# 4. User blacklist → Remove from defaults/whitelist
|
|
21
|
+
# 5. xml:space="preserve" → Element is sensitive
|
|
22
|
+
# 6. xml:space="default" → Use steps 1-4
|
|
23
|
+
#
|
|
24
|
+
# == Usage
|
|
25
|
+
#
|
|
26
|
+
# WhitespaceSensitivity.element_sensitive?(node, opts)
|
|
27
|
+
# => true if whitespace should be preserved for this element
|
|
28
|
+
module WhitespaceSensitivity
|
|
29
|
+
class << self
|
|
30
|
+
# Check if an element is whitespace-sensitive based on configuration
|
|
31
|
+
#
|
|
32
|
+
# @param node [Object] The element node to check
|
|
33
|
+
# @param opts [Hash] Comparison options containing match_opts
|
|
34
|
+
# @return [Boolean] true if whitespace should be preserved for this element
|
|
35
|
+
def element_sensitive?(node, opts)
|
|
36
|
+
match_opts = opts[:match_opts]
|
|
37
|
+
return false unless match_opts
|
|
38
|
+
return false unless text_node_parent?(node)
|
|
39
|
+
|
|
40
|
+
parent = node.parent
|
|
41
|
+
|
|
42
|
+
# 1. Check if we should ignore xml:space (user override)
|
|
43
|
+
if !respect_xml_space?(match_opts)
|
|
44
|
+
return user_config_sensitive?(parent, match_opts)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# 2. Check xml:space="preserve" (document declaration)
|
|
48
|
+
return true if xml_space_preserve?(parent)
|
|
49
|
+
|
|
50
|
+
# 3. Check xml:space="default" (use configured behavior)
|
|
51
|
+
return false if xml_space_default?(parent)
|
|
52
|
+
|
|
53
|
+
# 4. Use user configuration + format defaults
|
|
54
|
+
configured_sensitive?(parent, match_opts)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Check if whitespace-only text node should be filtered
|
|
58
|
+
#
|
|
59
|
+
# @param node [Object] The text node to check
|
|
60
|
+
# @param opts [Hash] Comparison options
|
|
61
|
+
# @return [Boolean] true if node should be preserved (not filtered)
|
|
62
|
+
def preserve_whitespace_node?(node, opts)
|
|
63
|
+
return false unless node.respond_to?(:parent)
|
|
64
|
+
return false unless node.parent
|
|
65
|
+
|
|
66
|
+
element_sensitive?(node, opts)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Get format-specific default sensitive elements
|
|
70
|
+
#
|
|
71
|
+
# This is the SINGLE SOURCE OF TRUTH for default whitespace-sensitive
|
|
72
|
+
# elements. All other code should use this method to get the list.
|
|
73
|
+
#
|
|
74
|
+
# @param match_opts [Hash] Resolved match options
|
|
75
|
+
# @return [Array<Symbol>] Default sensitive element names
|
|
76
|
+
def format_default_sensitive_elements(match_opts)
|
|
77
|
+
format = match_opts[:format] || :xml
|
|
78
|
+
|
|
79
|
+
case format
|
|
80
|
+
when :html, :html4, :html5
|
|
81
|
+
# HTML specification: these elements preserve whitespace
|
|
82
|
+
%i[pre code textarea script style].freeze
|
|
83
|
+
when :xml
|
|
84
|
+
# XML has no default sensitive elements - purely user-controlled
|
|
85
|
+
[].freeze
|
|
86
|
+
else
|
|
87
|
+
[].freeze
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Check if an element is in the default sensitive list for its format
|
|
92
|
+
#
|
|
93
|
+
# Convenience method for checking element sensitivity without building
|
|
94
|
+
# the full list first.
|
|
95
|
+
#
|
|
96
|
+
# @param element_name [String, Symbol] The element name to check
|
|
97
|
+
# @param match_opts [Hash] Resolved match options
|
|
98
|
+
# @return [Boolean] true if element is in default sensitive list
|
|
99
|
+
def default_sensitive_element?(element_name, match_opts)
|
|
100
|
+
format_default_sensitive_elements(match_opts)
|
|
101
|
+
.include?(element_name.to_sym)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
# Check if we should respect xml:space attribute
|
|
107
|
+
#
|
|
108
|
+
# @param match_opts [Hash] Resolved match options
|
|
109
|
+
# @return [Boolean] true if xml:space should be respected
|
|
110
|
+
def respect_xml_space?(match_opts)
|
|
111
|
+
if match_opts.key?(:respect_xml_space)
|
|
112
|
+
match_opts[:respect_xml_space]
|
|
113
|
+
else
|
|
114
|
+
true
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Check if xml:space="preserve" is set
|
|
119
|
+
#
|
|
120
|
+
# @param element [Object] The element to check
|
|
121
|
+
# @return [Boolean] true if xml:space="preserve"
|
|
122
|
+
def xml_space_preserve?(element)
|
|
123
|
+
if element.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
124
|
+
# Check attribute_nodes for xml:space attribute
|
|
125
|
+
# xml:space is stored with name="space" and namespace_uri="http://www.w3.org/XML/1998/namespace"
|
|
126
|
+
element.attribute_nodes.any? do |attr|
|
|
127
|
+
attr.name == "space" &&
|
|
128
|
+
attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
|
|
129
|
+
attr.value == "preserve"
|
|
130
|
+
end
|
|
131
|
+
elsif element.respond_to?(:[])
|
|
132
|
+
element["xml:space"] == "preserve"
|
|
133
|
+
else
|
|
134
|
+
false
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Check if xml:space="default" is set
|
|
139
|
+
#
|
|
140
|
+
# @param element [Object] The element to check
|
|
141
|
+
# @return [Boolean] true if xml:space="default"
|
|
142
|
+
def xml_space_default?(element)
|
|
143
|
+
if element.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
144
|
+
# Check attribute_nodes for xml:space attribute
|
|
145
|
+
# xml:space is stored with name="space" and namespace_uri="http://www.w3.org/XML/1998/namespace"
|
|
146
|
+
element.attribute_nodes.any? do |attr|
|
|
147
|
+
attr.name == "space" &&
|
|
148
|
+
attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
|
|
149
|
+
attr.value == "default"
|
|
150
|
+
end
|
|
151
|
+
elsif element.respond_to?(:[])
|
|
152
|
+
element["xml:space"] == "default"
|
|
153
|
+
else
|
|
154
|
+
false
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Check sensitivity based on user configuration
|
|
159
|
+
#
|
|
160
|
+
# @param element [Object] The element to check
|
|
161
|
+
# @param match_opts [Hash] Resolved match options
|
|
162
|
+
# @return [Boolean] true if element is in whitelist
|
|
163
|
+
def user_config_sensitive?(element, match_opts)
|
|
164
|
+
return false unless match_opts[:whitespace_sensitive_elements]
|
|
165
|
+
|
|
166
|
+
match_opts[:whitespace_sensitive_elements].include?(element.name.to_sym)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Check sensitivity based on user config + format defaults
|
|
170
|
+
#
|
|
171
|
+
# @param element [Object] The element to check
|
|
172
|
+
# @param match_opts [Hash] Resolved match options
|
|
173
|
+
# @return [Boolean] true if element should be sensitive
|
|
174
|
+
def configured_sensitive?(element, match_opts)
|
|
175
|
+
# Start with format defaults
|
|
176
|
+
sensitive = format_default_sensitive_elements(match_opts).to_set
|
|
177
|
+
|
|
178
|
+
# Apply whitelist (adds to defaults)
|
|
179
|
+
if match_opts[:whitespace_sensitive_elements]
|
|
180
|
+
sensitive |= match_opts[:whitespace_sensitive_elements]
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Apply blacklist (removes from everything)
|
|
184
|
+
if match_opts[:whitespace_insensitive_elements]
|
|
185
|
+
sensitive -= match_opts[:whitespace_insensitive_elements]
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
sensitive.include?(element.name.to_sym)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Check if node has a parent that's an element (not document root)
|
|
192
|
+
#
|
|
193
|
+
# @param node [Object] The node to check
|
|
194
|
+
# @return [Boolean] true if node has an element parent
|
|
195
|
+
def text_node_parent?(node)
|
|
196
|
+
return false unless node.respond_to?(:parent)
|
|
197
|
+
return false unless node.parent
|
|
198
|
+
|
|
199
|
+
parent = node.parent
|
|
200
|
+
return true if parent.respond_to?(:element?) && parent.element?
|
|
201
|
+
|
|
202
|
+
# Nokogiri compatibility
|
|
203
|
+
parent.respond_to?(:node_type) && parent.node_type == :element
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|