canon 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +12 -22
- data/Rakefile +5 -2
- data/lib/canon/cache.rb +3 -1
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +18 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +143 -266
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +0 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +0 -4
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +0 -1
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +11 -9
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +0 -10
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +0 -8
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +0 -2
- data/lib/canon/validators/html_validator.rb +0 -1
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +0 -1
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +0 -10
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +0 -7
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +4 -2
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +24 -11
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "match_options/base_resolver"
|
|
4
|
-
require_relative "match_options/xml_resolver"
|
|
5
|
-
require_relative "match_options/json_resolver"
|
|
6
|
-
require_relative "match_options/yaml_resolver"
|
|
7
|
-
|
|
8
3
|
module Canon
|
|
9
4
|
module Comparison
|
|
10
5
|
# Matching Options for Canon Comparison
|
|
@@ -41,12 +36,6 @@ module Canon
|
|
|
41
36
|
@options[:preprocessing]
|
|
42
37
|
end
|
|
43
38
|
|
|
44
|
-
# Check if semantic diff is enabled
|
|
45
|
-
# @return [Boolean] true if semantic diff is enabled
|
|
46
|
-
def semantic_diff?
|
|
47
|
-
@options[:semantic_diff] == true
|
|
48
|
-
end
|
|
49
|
-
|
|
50
39
|
def to_h
|
|
51
40
|
@options.dup
|
|
52
41
|
end
|
|
@@ -54,6 +43,11 @@ module Canon
|
|
|
54
43
|
|
|
55
44
|
# Module containing match option utilities and format-specific modules
|
|
56
45
|
module MatchOptions
|
|
46
|
+
autoload :BaseResolver, "canon/comparison/match_options/base_resolver"
|
|
47
|
+
autoload :JsonResolver, "canon/comparison/match_options/json_resolver"
|
|
48
|
+
autoload :XmlResolver, "canon/comparison/match_options/xml_resolver"
|
|
49
|
+
autoload :YamlResolver, "canon/comparison/match_options/yaml_resolver"
|
|
50
|
+
|
|
57
51
|
# Preprocessing options - what to do before comparison
|
|
58
52
|
PREPROCESSING_OPTIONS = %i[none c14n normalize format rendered].freeze
|
|
59
53
|
|
|
@@ -90,30 +84,15 @@ module Canon
|
|
|
90
84
|
|
|
91
85
|
# Normalize text by collapsing whitespace and trimming
|
|
92
86
|
# Mimics HTML whitespace collapsing
|
|
93
|
-
#
|
|
94
|
-
# Handles both ASCII and Unicode whitespace characters including:
|
|
95
|
-
# - Regular space (U+0020)
|
|
96
|
-
# - Non-breaking space (U+00A0)
|
|
97
|
-
# - Other Unicode whitespace per \p{Space}
|
|
98
|
-
#
|
|
99
|
-
# @param text [String] Text to normalize
|
|
100
|
-
# @return [String] Normalized text
|
|
101
87
|
def normalize_text(text)
|
|
102
88
|
return "" if text.nil?
|
|
103
89
|
|
|
104
90
|
text.to_s
|
|
105
|
-
.gsub(/[\p{Space}
|
|
91
|
+
.gsub(/[\p{Space} ]+/, " ") # Collapse all whitespace to single space
|
|
106
92
|
.strip # Remove leading/trailing whitespace
|
|
107
93
|
end
|
|
108
94
|
|
|
109
95
|
# Normalize text preserving Unicode whitespace type distinctions.
|
|
110
|
-
#
|
|
111
|
-
# Only ASCII whitespace (space, tab, newline, etc.) is collapsed.
|
|
112
|
-
# Unicode whitespace (NBSP, ideographic space, etc.) is preserved,
|
|
113
|
-
# so different whitespace types remain distinguishable.
|
|
114
|
-
#
|
|
115
|
-
# @param text [String] Text to normalize
|
|
116
|
-
# @return [String] Normalized text with preserved whitespace types
|
|
117
96
|
def normalize_text_preserving_type(text)
|
|
118
97
|
return "" if text.nil?
|
|
119
98
|
|
|
@@ -123,10 +102,6 @@ module Canon
|
|
|
123
102
|
end
|
|
124
103
|
|
|
125
104
|
# Process attribute value according to match behavior
|
|
126
|
-
#
|
|
127
|
-
# @param value [String] Attribute value to process
|
|
128
|
-
# @param behavior [Symbol] Match behavior (:strict, :strip, :compact, :normalize, :ignore)
|
|
129
|
-
# @return [String] Processed value
|
|
130
105
|
def process_attribute_value(value, behavior)
|
|
131
106
|
case behavior
|
|
132
107
|
when :strict
|
|
@@ -134,7 +109,7 @@ module Canon
|
|
|
134
109
|
when :strip
|
|
135
110
|
value.to_s.strip
|
|
136
111
|
when :compact
|
|
137
|
-
value.to_s.gsub(/[\p{Space}
|
|
112
|
+
value.to_s.gsub(/[\p{Space} ]+/, " ")
|
|
138
113
|
when :normalize
|
|
139
114
|
normalize_text(value)
|
|
140
115
|
when :ignore
|
|
@@ -147,16 +122,8 @@ module Canon
|
|
|
147
122
|
|
|
148
123
|
# XML/HTML-specific matching options
|
|
149
124
|
module Xml
|
|
150
|
-
#
|
|
151
|
-
MATCH_DIMENSIONS =
|
|
152
|
-
text_content
|
|
153
|
-
structural_whitespace
|
|
154
|
-
attribute_presence
|
|
155
|
-
attribute_order
|
|
156
|
-
attribute_values
|
|
157
|
-
element_position
|
|
158
|
-
comments
|
|
159
|
-
].freeze
|
|
125
|
+
# Single source of truth: derived from the DimensionSet in Registry.
|
|
126
|
+
MATCH_DIMENSIONS = Dimensions::Registry.for(:xml).names.freeze
|
|
160
127
|
|
|
161
128
|
# Expose FORMAT_DEFAULTS from XmlResolver (for backward compatibility)
|
|
162
129
|
FORMAT_DEFAULTS = MatchOptions::XmlResolver.const_get(:FORMAT_DEFAULTS)
|
|
@@ -165,27 +132,18 @@ module Canon
|
|
|
165
132
|
MATCH_PROFILES = MatchOptions::XmlResolver.const_get(:MATCH_PROFILES)
|
|
166
133
|
|
|
167
134
|
class << self
|
|
168
|
-
# Delegate to XmlResolver
|
|
169
135
|
def resolve(**kwargs)
|
|
170
136
|
MatchOptions::XmlResolver.resolve(**kwargs)
|
|
171
137
|
end
|
|
172
138
|
|
|
173
|
-
# Delegate to XmlResolver
|
|
174
139
|
def get_profile_options(profile)
|
|
175
140
|
MatchOptions::XmlResolver.get_profile_options(profile)
|
|
176
141
|
end
|
|
177
142
|
|
|
178
|
-
# Get valid match dimensions for XML/HTML
|
|
179
|
-
#
|
|
180
|
-
# @return [Array<Symbol>] Valid dimensions
|
|
181
143
|
def match_dimensions
|
|
182
144
|
MatchOptions::XmlResolver.match_dimensions
|
|
183
145
|
end
|
|
184
146
|
|
|
185
|
-
# Get format-specific default options
|
|
186
|
-
#
|
|
187
|
-
# @param format [Symbol] Format type
|
|
188
|
-
# @return [Hash] Default options for the format
|
|
189
147
|
def format_defaults(format)
|
|
190
148
|
MatchOptions::XmlResolver.format_defaults(format)
|
|
191
149
|
end
|
|
@@ -194,41 +152,25 @@ module Canon
|
|
|
194
152
|
|
|
195
153
|
# JSON-specific matching options
|
|
196
154
|
module Json
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
text_content
|
|
200
|
-
structural_whitespace
|
|
201
|
-
key_order
|
|
202
|
-
].freeze
|
|
203
|
-
|
|
204
|
-
# Expose FORMAT_DEFAULTS from JsonResolver (for backward compatibility)
|
|
155
|
+
MATCH_DIMENSIONS = Dimensions::Registry.for(:json).names.freeze
|
|
156
|
+
|
|
205
157
|
FORMAT_DEFAULTS = MatchOptions::JsonResolver.const_get(:FORMAT_DEFAULTS)
|
|
206
158
|
|
|
207
|
-
# Expose MATCH_PROFILES from JsonResolver (for backward compatibility)
|
|
208
159
|
MATCH_PROFILES = MatchOptions::JsonResolver.const_get(:MATCH_PROFILES)
|
|
209
160
|
|
|
210
161
|
class << self
|
|
211
|
-
# Delegate to JsonResolver
|
|
212
162
|
def resolve(**kwargs)
|
|
213
163
|
MatchOptions::JsonResolver.resolve(**kwargs)
|
|
214
164
|
end
|
|
215
165
|
|
|
216
|
-
# Delegate to JsonResolver
|
|
217
166
|
def get_profile_options(profile)
|
|
218
167
|
MatchOptions::JsonResolver.get_profile_options(profile)
|
|
219
168
|
end
|
|
220
169
|
|
|
221
|
-
# Get valid match dimensions for JSON
|
|
222
|
-
#
|
|
223
|
-
# @return [Array<Symbol>] Valid dimensions
|
|
224
170
|
def match_dimensions
|
|
225
171
|
MatchOptions::JsonResolver.match_dimensions
|
|
226
172
|
end
|
|
227
173
|
|
|
228
|
-
# Get format-specific default options
|
|
229
|
-
#
|
|
230
|
-
# @param format [Symbol] Format type
|
|
231
|
-
# @return [Hash] Default options for the format
|
|
232
174
|
def format_defaults(format)
|
|
233
175
|
MatchOptions::JsonResolver.format_defaults(format)
|
|
234
176
|
end
|
|
@@ -237,42 +179,25 @@ module Canon
|
|
|
237
179
|
|
|
238
180
|
# YAML-specific matching options
|
|
239
181
|
module Yaml
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
text_content
|
|
243
|
-
structural_whitespace
|
|
244
|
-
key_order
|
|
245
|
-
comments
|
|
246
|
-
].freeze
|
|
247
|
-
|
|
248
|
-
# Expose FORMAT_DEFAULTS from YamlResolver (for backward compatibility)
|
|
182
|
+
MATCH_DIMENSIONS = Dimensions::Registry.for(:yaml).names.freeze
|
|
183
|
+
|
|
249
184
|
FORMAT_DEFAULTS = MatchOptions::YamlResolver.const_get(:FORMAT_DEFAULTS)
|
|
250
185
|
|
|
251
|
-
# Expose MATCH_PROFILES from YamlResolver (for backward compatibility)
|
|
252
186
|
MATCH_PROFILES = MatchOptions::YamlResolver.const_get(:MATCH_PROFILES)
|
|
253
187
|
|
|
254
188
|
class << self
|
|
255
|
-
# Delegate to YamlResolver
|
|
256
189
|
def resolve(**kwargs)
|
|
257
190
|
MatchOptions::YamlResolver.resolve(**kwargs)
|
|
258
191
|
end
|
|
259
192
|
|
|
260
|
-
# Delegate to YamlResolver
|
|
261
193
|
def get_profile_options(profile)
|
|
262
194
|
MatchOptions::YamlResolver.get_profile_options(profile)
|
|
263
195
|
end
|
|
264
196
|
|
|
265
|
-
# Get valid match dimensions for YAML
|
|
266
|
-
#
|
|
267
|
-
# @return [Array<Symbol>] Valid dimensions
|
|
268
197
|
def match_dimensions
|
|
269
198
|
MatchOptions::YamlResolver.match_dimensions
|
|
270
199
|
end
|
|
271
200
|
|
|
272
|
-
# Get format-specific default options
|
|
273
|
-
#
|
|
274
|
-
# @param format [Symbol] Format type
|
|
275
|
-
# @return [Hash] Default options for the format
|
|
276
201
|
def format_defaults(format)
|
|
277
202
|
MatchOptions::YamlResolver.format_defaults(format)
|
|
278
203
|
end
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
# Shared comparison pipeline helpers used by both algorithms.
|
|
6
|
+
#
|
|
7
|
+
# Both `dom_diff` and `semantic_diff` need to:
|
|
8
|
+
# - detect document format from inputs (with optional hint)
|
|
9
|
+
# - validate that the two formats are comparable
|
|
10
|
+
# - merge global config-sourced profile / options into the opts hash
|
|
11
|
+
# - capture original-string snapshots before parsing mutates inputs
|
|
12
|
+
# - parse both inputs through the format-specific comparator
|
|
13
|
+
#
|
|
14
|
+
# These steps are pure pipeline mechanics — they have nothing to do with
|
|
15
|
+
# the comparison algorithm itself. Keeping them here ensures the two
|
|
16
|
+
# algorithm entrypoints cannot drift out of sync (see lutaml/canon
|
|
17
|
+
# "Two Comparison Algorithms — Distinct by Design" in CLAUDE.md —
|
|
18
|
+
# the algorithm cores stay separate; only shared infrastructure is
|
|
19
|
+
# consolidated).
|
|
20
|
+
module Pipeline
|
|
21
|
+
# Formats whose Canon::Config exposes a match profile / options.
|
|
22
|
+
CONFIG_BACKED_FORMATS = %i[xml html json yaml string].freeze
|
|
23
|
+
|
|
24
|
+
# Cross-format compatibility groups. DOM comparison accepts these
|
|
25
|
+
# pairings because both sides parse to the same Ruby structure.
|
|
26
|
+
# Semantic comparison does not — it requires exact format match.
|
|
27
|
+
COMPATIBLE_FORMAT_GROUPS = [
|
|
28
|
+
%i[json ruby_object].freeze,
|
|
29
|
+
%i[yaml ruby_object].freeze,
|
|
30
|
+
].freeze
|
|
31
|
+
|
|
32
|
+
class << self
|
|
33
|
+
# Detect formats for both inputs, honouring an explicit hint.
|
|
34
|
+
#
|
|
35
|
+
# @param obj1 [Object] First input
|
|
36
|
+
# @param obj2 [Object] Second input
|
|
37
|
+
# @param format_hint [Symbol, nil] Explicit format override
|
|
38
|
+
# @return [Array<Symbol, Symbol>] Detected or hinted formats
|
|
39
|
+
def detect_formats(obj1, obj2, format_hint)
|
|
40
|
+
return [format_hint, format_hint] if format_hint
|
|
41
|
+
|
|
42
|
+
[FormatDetector.detect(obj1), FormatDetector.detect(obj2)]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# True when the two formats can be compared by the DOM algorithm.
|
|
46
|
+
#
|
|
47
|
+
# DOM allows `ruby_object` to be compared against `json` or `yaml`
|
|
48
|
+
# because both sides parse to the same Ruby structure. Semantic
|
|
49
|
+
# comparison does not allow this — it requires exact format match.
|
|
50
|
+
#
|
|
51
|
+
# @param format1 [Symbol]
|
|
52
|
+
# @param format2 [Symbol]
|
|
53
|
+
# @param strict [Boolean] When true, require exact match (semantic)
|
|
54
|
+
# @return [Boolean]
|
|
55
|
+
def formats_compatible?(format1, format2, strict: false)
|
|
56
|
+
return true if format1 == format2
|
|
57
|
+
return false if strict
|
|
58
|
+
|
|
59
|
+
COMPATIBLE_FORMAT_GROUPS.any? do |group|
|
|
60
|
+
group.include?(format1) && group.include?(format2)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Raise a helpful error if formats are incompatible.
|
|
65
|
+
#
|
|
66
|
+
# @param format1 [Symbol]
|
|
67
|
+
# @param format2 [Symbol]
|
|
68
|
+
# @param strict [Boolean] Passed to {formats_compatible?}
|
|
69
|
+
# @raise [Canon::CompareFormatMismatchError]
|
|
70
|
+
# @return [void]
|
|
71
|
+
def validate_compatible!(format1, format2, strict: false)
|
|
72
|
+
return if formats_compatible?(format1, format2, strict: strict)
|
|
73
|
+
|
|
74
|
+
raise Canon::CompareFormatMismatchError.new(format1, format2)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Merge global config-sourced profile and options into `opts`.
|
|
78
|
+
#
|
|
79
|
+
# Reads `Canon::Config.instance.<format>.match` for a global
|
|
80
|
+
# `profile` and `profile_options`, and merges them into a copy of
|
|
81
|
+
# the supplied opts hash. Caller-supplied values always win:
|
|
82
|
+
# config-derived `profile_options` extend rather than replace
|
|
83
|
+
# caller-supplied `global_options`.
|
|
84
|
+
#
|
|
85
|
+
# Returns the original opts hash unchanged when the format is not
|
|
86
|
+
# config-backed (e.g. `:ruby_object`).
|
|
87
|
+
#
|
|
88
|
+
# @param format [Symbol]
|
|
89
|
+
# @param opts [Hash] Caller opts (will not be mutated)
|
|
90
|
+
# @return [Hash] New opts hash with config globals merged in
|
|
91
|
+
def resolve_config(format, opts)
|
|
92
|
+
return opts unless CONFIG_BACKED_FORMATS.include?(format)
|
|
93
|
+
|
|
94
|
+
format_config = Canon::Config.instance.public_send(format)
|
|
95
|
+
match_config = format_config.match
|
|
96
|
+
profile = match_config.profile
|
|
97
|
+
profile_opts = match_config.profile_options
|
|
98
|
+
|
|
99
|
+
resolved = opts.dup
|
|
100
|
+
if resolved[:global_profile].nil? && profile
|
|
101
|
+
resolved[:global_profile] = profile
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
if profile_opts.any?
|
|
105
|
+
resolved[:global_options] = merge_profile_options(
|
|
106
|
+
resolved[:global_options], profile_opts
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
resolved
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Capture pre-parse string snapshots for diff display.
|
|
114
|
+
#
|
|
115
|
+
# Parsing (especially HTML) can mutate inputs, so originals must
|
|
116
|
+
# be captured before any parsing happens. Strings pass through
|
|
117
|
+
# unchanged; parsed nodes are serialized via NodeSerializer.
|
|
118
|
+
#
|
|
119
|
+
# @param obj1 [Object]
|
|
120
|
+
# @param obj2 [Object]
|
|
121
|
+
# @return [Array<String, String>] Captured original strings
|
|
122
|
+
def capture_originals(obj1, obj2)
|
|
123
|
+
[extract_original_string(obj1), extract_original_string(obj2)]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Parse both inputs through the format-specific comparator.
|
|
127
|
+
#
|
|
128
|
+
# Delegates to `XmlComparator`, `HtmlComparator`, `JsonComparator`,
|
|
129
|
+
# or `YamlComparator` based on format. Uses `Cache` so the same
|
|
130
|
+
# string is not re-parsed across runs.
|
|
131
|
+
#
|
|
132
|
+
# @param obj1 [Object]
|
|
133
|
+
# @param obj2 [Object]
|
|
134
|
+
# @param format [Symbol]
|
|
135
|
+
# @param match_opts_hash [Hash] Resolved match options
|
|
136
|
+
# @return [Array<Object, Object>] Parsed documents
|
|
137
|
+
def parse_pair(obj1, obj2, format, match_opts_hash)
|
|
138
|
+
preprocessing = match_opts_hash[:preprocessing] || :none
|
|
139
|
+
|
|
140
|
+
case format
|
|
141
|
+
when :xml
|
|
142
|
+
[
|
|
143
|
+
parse_with_cache(obj1, format, preprocessing) do |doc|
|
|
144
|
+
XmlComparator.parse(doc, preprocessing)
|
|
145
|
+
end,
|
|
146
|
+
parse_with_cache(obj2, format, preprocessing) do |doc|
|
|
147
|
+
XmlComparator.parse(doc, preprocessing)
|
|
148
|
+
end,
|
|
149
|
+
]
|
|
150
|
+
when :html, :html4, :html5
|
|
151
|
+
[
|
|
152
|
+
parse_with_cache(obj1, format, preprocessing) do |doc|
|
|
153
|
+
HtmlComparator.parse(doc, preprocessing)
|
|
154
|
+
end,
|
|
155
|
+
parse_with_cache(obj2, format, preprocessing) do |doc|
|
|
156
|
+
HtmlComparator.parse(doc, preprocessing)
|
|
157
|
+
end,
|
|
158
|
+
]
|
|
159
|
+
when :json
|
|
160
|
+
[
|
|
161
|
+
parse_with_cache(obj1, format, :none) do |doc|
|
|
162
|
+
JsonComparator.parse(doc)
|
|
163
|
+
end,
|
|
164
|
+
parse_with_cache(obj2, format, :none) do |doc|
|
|
165
|
+
JsonComparator.parse(doc)
|
|
166
|
+
end,
|
|
167
|
+
]
|
|
168
|
+
when :yaml
|
|
169
|
+
[
|
|
170
|
+
parse_with_cache(obj1, format, :none) do |doc|
|
|
171
|
+
YamlComparator.parse(doc)
|
|
172
|
+
end,
|
|
173
|
+
parse_with_cache(obj2, format, :none) do |doc|
|
|
174
|
+
YamlComparator.parse(doc)
|
|
175
|
+
end,
|
|
176
|
+
]
|
|
177
|
+
else
|
|
178
|
+
[obj1, obj2]
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Pre-parse HTML strings through `HtmlParser.parse(_, :html5)`.
|
|
183
|
+
#
|
|
184
|
+
# The DOM comparator needs HTML4 and HTML5 inputs to share HTML's
|
|
185
|
+
# whitespace-sensitivity semantics, which means routing both
|
|
186
|
+
# through Nokogiri::HTML5.fragment up front (issue #118).
|
|
187
|
+
# The semantic comparator does not need this — it uses Canon's
|
|
188
|
+
# own HTML data model downstream — so this helper is opt-in.
|
|
189
|
+
#
|
|
190
|
+
# Returns the inputs unchanged if they are not strings.
|
|
191
|
+
#
|
|
192
|
+
# @param obj1 [Object]
|
|
193
|
+
# @param obj2 [Object]
|
|
194
|
+
# @return [Array<Object, Object>] Potentially pre-parsed HTML inputs
|
|
195
|
+
def preparse_html_pair(obj1, obj2)
|
|
196
|
+
[
|
|
197
|
+
html_string?(obj1) ? HtmlParser.parse(obj1, :html5) : obj1,
|
|
198
|
+
html_string?(obj2) ? HtmlParser.parse(obj2, :html5) : obj2,
|
|
199
|
+
]
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# True when the input is a String AND should be treated as HTML.
|
|
203
|
+
#
|
|
204
|
+
# @param obj [Object]
|
|
205
|
+
# @return [Boolean]
|
|
206
|
+
def html_string?(obj)
|
|
207
|
+
obj.is_a?(String)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
private
|
|
211
|
+
|
|
212
|
+
# Merge caller-supplied global_options with config profile_opts.
|
|
213
|
+
#
|
|
214
|
+
# Caller values win on key conflict; profile_opts fill in gaps.
|
|
215
|
+
# `MatchConfig#profile_options` already returns a fresh hash
|
|
216
|
+
# (via `Hash#except`), so we can return it directly without dup.
|
|
217
|
+
#
|
|
218
|
+
# @param existing [Hash, nil] Caller-supplied options
|
|
219
|
+
# @param profile_opts [Hash] Config-sourced options
|
|
220
|
+
# @return [Hash] Merged hash
|
|
221
|
+
def merge_profile_options(existing, profile_opts)
|
|
222
|
+
return profile_opts if existing.nil?
|
|
223
|
+
|
|
224
|
+
profile_opts.merge(existing)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Parse a single document with cache lookup.
|
|
228
|
+
#
|
|
229
|
+
# @param doc [Object] Document (string or already-parsed)
|
|
230
|
+
# @param format [Symbol] Document format
|
|
231
|
+
# @param preprocessing [Symbol] Preprocessing option
|
|
232
|
+
# @yield Block to parse the document if not cached
|
|
233
|
+
# @return [Object] Parsed document
|
|
234
|
+
def parse_with_cache(doc, format, preprocessing)
|
|
235
|
+
return doc unless doc.is_a?(String)
|
|
236
|
+
|
|
237
|
+
Cache.fetch(:document_parse,
|
|
238
|
+
Cache.key_for_document(doc, format, preprocessing)) do # rubocop:disable Lint/UselessDefaultValueArgument
|
|
239
|
+
yield doc
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Extract a string snapshot from various input types.
|
|
244
|
+
#
|
|
245
|
+
# Strings pass through; Nokogiri documents use to_html; Canon and
|
|
246
|
+
# other XML nodes go through NodeSerializer; everything else
|
|
247
|
+
# falls back to to_s.
|
|
248
|
+
#
|
|
249
|
+
# @param obj [Object]
|
|
250
|
+
# @return [String] String snapshot
|
|
251
|
+
def extract_original_string(obj)
|
|
252
|
+
case obj
|
|
253
|
+
when String
|
|
254
|
+
obj
|
|
255
|
+
when Nokogiri::XML::Document, Nokogiri::HTML::Document,
|
|
256
|
+
Nokogiri::XML::DocumentFragment, Nokogiri::HTML::DocumentFragment
|
|
257
|
+
obj.to_html
|
|
258
|
+
else
|
|
259
|
+
if Canon::XmlParsing.xml_node?(obj) || obj.is_a?(Canon::Xml::Node)
|
|
260
|
+
Canon::XmlParsing.serialize(obj)
|
|
261
|
+
else
|
|
262
|
+
obj.to_s
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
@@ -1,72 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "base_match_strategy"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
4
|
module Comparison
|
|
7
5
|
module Strategies
|
|
8
6
|
# Factory for creating match strategies
|
|
9
7
|
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
# @example Create a strategy
|
|
15
|
-
# strategy = MatchStrategyFactory.create(
|
|
16
|
-
# format: :xml,
|
|
17
|
-
# match_options: { semantic_diff: true }
|
|
18
|
-
# )
|
|
19
|
-
# differences = strategy.match(doc1, doc2)
|
|
20
|
-
#
|
|
8
|
+
# After semantic dispatch normalization, this factory is only called
|
|
9
|
+
# with semantic_diff: true. DOM matching is handled directly by
|
|
10
|
+
# the format comparators (XmlComparator, HtmlComparator, etc.).
|
|
21
11
|
class MatchStrategyFactory
|
|
22
|
-
# Create appropriate match strategy
|
|
23
|
-
#
|
|
24
|
-
# Examines match options to determine which strategy to use:
|
|
25
|
-
# - If semantic_diff is enabled: SemanticTreeMatchStrategy
|
|
26
|
-
# - Otherwise (default): DomMatchStrategy
|
|
27
|
-
#
|
|
28
|
-
# Future strategies can be added here by checking additional
|
|
29
|
-
# options and returning the appropriate strategy class.
|
|
30
|
-
#
|
|
31
|
-
# @param format [Symbol] Document format (:xml, :html, :json, :yaml)
|
|
32
|
-
# @param match_options [Hash] Match options
|
|
33
|
-
# @option match_options [Boolean] :semantic_diff Use semantic tree matching
|
|
34
|
-
# @return [BaseMatchStrategy] Instantiated strategy
|
|
35
|
-
#
|
|
36
|
-
# @example DOM matching (default)
|
|
37
|
-
# strategy = MatchStrategyFactory.create(
|
|
38
|
-
# format: :xml,
|
|
39
|
-
# match_options: {}
|
|
40
|
-
# )
|
|
41
|
-
# # Returns DomMatchStrategy
|
|
42
|
-
#
|
|
43
|
-
# @example Semantic tree matching
|
|
44
|
-
# strategy = MatchStrategyFactory.create(
|
|
45
|
-
# format: :xml,
|
|
46
|
-
# match_options: { semantic_diff: true }
|
|
47
|
-
# )
|
|
48
|
-
# # Returns SemanticTreeMatchStrategy
|
|
49
|
-
#
|
|
50
12
|
def self.create(format:, match_options:)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
match_options: match_options)
|
|
56
|
-
else
|
|
57
|
-
# Default to DOM matching
|
|
58
|
-
require_relative "dom_match_strategy"
|
|
59
|
-
DomMatchStrategy.new(format: format, match_options: match_options)
|
|
13
|
+
unless match_options[:semantic_diff]
|
|
14
|
+
raise ArgumentError,
|
|
15
|
+
"MatchStrategyFactory requires semantic_diff: true; " \
|
|
16
|
+
"DOM matching is handled by format comparators directly"
|
|
60
17
|
end
|
|
61
18
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
# elsif match_options[:hybrid_diff]
|
|
65
|
-
# require_relative "hybrid_match_strategy"
|
|
66
|
-
# HybridMatchStrategy.new(format, match_options)
|
|
67
|
-
# elsif match_options[:fuzzy_diff]
|
|
68
|
-
# require_relative "fuzzy_match_strategy"
|
|
69
|
-
# FuzzyMatchStrategy.new(format, match_options)
|
|
19
|
+
SemanticTreeMatchStrategy.new(format: format,
|
|
20
|
+
match_options: match_options)
|
|
70
21
|
end
|
|
71
22
|
end
|
|
72
23
|
end
|
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "base_match_strategy"
|
|
4
|
-
require_relative "../../tree_diff/tree_diff_integrator"
|
|
5
|
-
require_relative "../../tree_diff/operation_converter"
|
|
6
|
-
require_relative "../xml_node_comparison"
|
|
7
|
-
|
|
8
3
|
module Canon
|
|
9
4
|
module Comparison
|
|
10
5
|
module Strategies
|
|
@@ -126,7 +121,7 @@ module Canon
|
|
|
126
121
|
# @return [Array<String>] Preprocessed strings
|
|
127
122
|
def preprocess_xml(doc1, doc2)
|
|
128
123
|
xml1 = if doc1.is_a?(Canon::Xml::Node)
|
|
129
|
-
|
|
124
|
+
Canon::Diff::NodeSerializer.serialize(doc1)
|
|
130
125
|
elsif Canon::XmlParsing.xml_node?(doc1)
|
|
131
126
|
Canon::XmlParsing.serialize(doc1)
|
|
132
127
|
else
|
|
@@ -134,7 +129,7 @@ module Canon
|
|
|
134
129
|
end
|
|
135
130
|
|
|
136
131
|
xml2 = if doc2.is_a?(Canon::Xml::Node)
|
|
137
|
-
|
|
132
|
+
Canon::Diff::NodeSerializer.serialize(doc2)
|
|
138
133
|
elsif Canon::XmlParsing.xml_node?(doc2)
|
|
139
134
|
Canon::XmlParsing.serialize(doc2)
|
|
140
135
|
else
|
|
@@ -162,7 +157,7 @@ module Canon
|
|
|
162
157
|
# For XML::DocumentFragment (from parse_node_as_fragment), use to_s
|
|
163
158
|
# to avoid Nokogiri auto-inserting meta tags during to_html serialization
|
|
164
159
|
html1 = if doc1.is_a?(Canon::Xml::Node)
|
|
165
|
-
|
|
160
|
+
Canon::Diff::NodeSerializer.serialize(doc1)
|
|
166
161
|
elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
|
|
167
162
|
doc1.to_s
|
|
168
163
|
elsif Canon::XmlParsing.xml_node?(doc1)
|
|
@@ -172,7 +167,7 @@ module Canon
|
|
|
172
167
|
end
|
|
173
168
|
|
|
174
169
|
html2 = if doc2.is_a?(Canon::Xml::Node)
|
|
175
|
-
|
|
170
|
+
Canon::Diff::NodeSerializer.serialize(doc2)
|
|
176
171
|
elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
|
|
177
172
|
doc2.to_s
|
|
178
173
|
elsif Canon::XmlParsing.xml_node?(doc2)
|
|
@@ -194,7 +189,6 @@ module Canon
|
|
|
194
189
|
# @param doc2 [Object] Second JSON document
|
|
195
190
|
# @return [Array<String>] Preprocessed strings
|
|
196
191
|
def preprocess_json(doc1, doc2)
|
|
197
|
-
require_relative "../../formatters/json_formatter"
|
|
198
192
|
[Canon.format(doc1, :json), Canon.format(doc2, :json)]
|
|
199
193
|
end
|
|
200
194
|
|
|
@@ -206,7 +200,6 @@ module Canon
|
|
|
206
200
|
# @param doc2 [Object] Second YAML document
|
|
207
201
|
# @return [Array<String>] Preprocessed strings
|
|
208
202
|
def preprocess_yaml(doc1, doc2)
|
|
209
|
-
require_relative "../../formatters/yaml_formatter"
|
|
210
203
|
[Canon.format(doc1, :yaml), Canon.format(doc2, :yaml)]
|
|
211
204
|
end
|
|
212
205
|
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
# Match strategy framework. Children are autoloaded — never
|
|
6
|
+
# `require_relative` them.
|
|
7
|
+
module Strategies
|
|
8
|
+
autoload :BaseMatchStrategy,
|
|
9
|
+
"canon/comparison/strategies/base_match_strategy"
|
|
10
|
+
autoload :MatchStrategyFactory,
|
|
11
|
+
"canon/comparison/strategies/match_strategy_factory"
|
|
12
|
+
autoload :SemanticTreeMatchStrategy,
|
|
13
|
+
"canon/comparison/strategies/semantic_tree_match_strategy"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -158,9 +158,6 @@ differences)
|
|
|
158
158
|
# @param differences [Array] Array to append difference to
|
|
159
159
|
def self.add_attribute_difference(n1:, n2:, diff1:, diff2:,
|
|
160
160
|
dimension:, differences:, **opts)
|
|
161
|
-
# Import DiffNodeBuilder to avoid circular dependency
|
|
162
|
-
require_relative "diff_node_builder"
|
|
163
|
-
|
|
164
161
|
diff_node = Canon::Comparison::DiffNodeBuilder.build(
|
|
165
162
|
node1: n1,
|
|
166
163
|
node2: n2,
|