canon 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -15,11 +15,12 @@ module Canon
|
|
|
15
15
|
# Wrapper class for resolved match options
|
|
16
16
|
# Provides convenient methods for accessing behaviors by dimension
|
|
17
17
|
class ResolvedMatchOptions
|
|
18
|
-
attr_reader :options, :format
|
|
18
|
+
attr_reader :options, :format, :compare_profile
|
|
19
19
|
|
|
20
|
-
def initialize(options, format:)
|
|
20
|
+
def initialize(options, format:, compare_profile: nil)
|
|
21
21
|
@options = options
|
|
22
22
|
@format = format
|
|
23
|
+
@compare_profile = compare_profile
|
|
23
24
|
end
|
|
24
25
|
|
|
25
26
|
# Get the behavior for a specific dimension
|
|
@@ -35,6 +36,12 @@ module Canon
|
|
|
35
36
|
@options[:preprocessing]
|
|
36
37
|
end
|
|
37
38
|
|
|
39
|
+
# Check if semantic diff is enabled
|
|
40
|
+
# @return [Boolean] true if semantic diff is enabled
|
|
41
|
+
def semantic_diff?
|
|
42
|
+
@options[:semantic_diff] == true
|
|
43
|
+
end
|
|
44
|
+
|
|
38
45
|
def to_h
|
|
39
46
|
@options.dup
|
|
40
47
|
end
|
|
@@ -116,8 +123,13 @@ module Canon
|
|
|
116
123
|
text_content
|
|
117
124
|
structural_whitespace
|
|
118
125
|
attribute_presence
|
|
126
|
+
attribute_order
|
|
119
127
|
attribute_values
|
|
128
|
+
element_position
|
|
120
129
|
comments
|
|
130
|
+
element_structure
|
|
131
|
+
element_position
|
|
132
|
+
element_hierarchy
|
|
121
133
|
].freeze
|
|
122
134
|
|
|
123
135
|
# Format-specific defaults
|
|
@@ -127,7 +139,9 @@ module Canon
|
|
|
127
139
|
text_content: :normalize,
|
|
128
140
|
structural_whitespace: :normalize,
|
|
129
141
|
attribute_presence: :strict,
|
|
142
|
+
attribute_order: :ignore,
|
|
130
143
|
attribute_values: :strict,
|
|
144
|
+
element_position: :ignore,
|
|
131
145
|
comments: :ignore,
|
|
132
146
|
},
|
|
133
147
|
xml: {
|
|
@@ -135,7 +149,9 @@ module Canon
|
|
|
135
149
|
text_content: :strict,
|
|
136
150
|
structural_whitespace: :strict,
|
|
137
151
|
attribute_presence: :strict,
|
|
152
|
+
attribute_order: :ignore,
|
|
138
153
|
attribute_values: :strict,
|
|
154
|
+
element_position: :strict,
|
|
139
155
|
comments: :strict,
|
|
140
156
|
},
|
|
141
157
|
}.freeze
|
|
@@ -148,7 +164,9 @@ module Canon
|
|
|
148
164
|
text_content: :strict,
|
|
149
165
|
structural_whitespace: :strict,
|
|
150
166
|
attribute_presence: :strict,
|
|
167
|
+
attribute_order: :strict,
|
|
151
168
|
attribute_values: :strict,
|
|
169
|
+
element_position: :strict,
|
|
152
170
|
comments: :strict,
|
|
153
171
|
},
|
|
154
172
|
|
|
@@ -159,7 +177,9 @@ module Canon
|
|
|
159
177
|
text_content: :normalize,
|
|
160
178
|
structural_whitespace: :normalize,
|
|
161
179
|
attribute_presence: :strict,
|
|
180
|
+
attribute_order: :strict,
|
|
162
181
|
attribute_values: :strict,
|
|
182
|
+
element_position: :strict,
|
|
163
183
|
comments: :ignore,
|
|
164
184
|
},
|
|
165
185
|
|
|
@@ -170,7 +190,9 @@ module Canon
|
|
|
170
190
|
text_content: :normalize,
|
|
171
191
|
structural_whitespace: :normalize,
|
|
172
192
|
attribute_presence: :strict,
|
|
193
|
+
attribute_order: :strict,
|
|
173
194
|
attribute_values: :normalize,
|
|
195
|
+
element_position: :ignore,
|
|
174
196
|
comments: :ignore,
|
|
175
197
|
},
|
|
176
198
|
|
|
@@ -180,7 +202,9 @@ module Canon
|
|
|
180
202
|
text_content: :normalize,
|
|
181
203
|
structural_whitespace: :normalize,
|
|
182
204
|
attribute_presence: :strict,
|
|
205
|
+
attribute_order: :strict,
|
|
183
206
|
attribute_values: :strict,
|
|
207
|
+
element_position: :ignore,
|
|
184
208
|
comments: :ignore,
|
|
185
209
|
},
|
|
186
210
|
|
|
@@ -191,7 +215,9 @@ module Canon
|
|
|
191
215
|
text_content: :normalize,
|
|
192
216
|
structural_whitespace: :ignore,
|
|
193
217
|
attribute_presence: :strict,
|
|
218
|
+
attribute_order: :ignore,
|
|
194
219
|
attribute_values: :normalize,
|
|
220
|
+
element_position: :ignore,
|
|
195
221
|
comments: :ignore,
|
|
196
222
|
},
|
|
197
223
|
|
|
@@ -201,7 +227,9 @@ module Canon
|
|
|
201
227
|
text_content: :normalize,
|
|
202
228
|
structural_whitespace: :ignore,
|
|
203
229
|
attribute_presence: :strict,
|
|
230
|
+
attribute_order: :ignore,
|
|
204
231
|
attribute_values: :normalize,
|
|
232
|
+
element_position: :ignore,
|
|
205
233
|
comments: :ignore,
|
|
206
234
|
},
|
|
207
235
|
}.freeze
|
|
@@ -293,9 +321,19 @@ module Canon
|
|
|
293
321
|
|
|
294
322
|
# Validate match options
|
|
295
323
|
def validate_match_options!(match_options)
|
|
324
|
+
# Special options that don't need validation as dimensions
|
|
325
|
+
special_options = %i[
|
|
326
|
+
preprocessing
|
|
327
|
+
semantic_diff
|
|
328
|
+
similarity_threshold
|
|
329
|
+
hash_matching
|
|
330
|
+
similarity_matching
|
|
331
|
+
propagation
|
|
332
|
+
]
|
|
333
|
+
|
|
296
334
|
match_options.each do |dimension, behavior|
|
|
297
|
-
# Skip
|
|
298
|
-
next if dimension
|
|
335
|
+
# Skip special options (validated elsewhere or passed through)
|
|
336
|
+
next if special_options.include?(dimension)
|
|
299
337
|
|
|
300
338
|
unless MATCH_DIMENSIONS.include?(dimension)
|
|
301
339
|
raise Canon::Error,
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
module Strategies
|
|
6
|
+
# Abstract base class for match strategies
|
|
7
|
+
#
|
|
8
|
+
# All match strategies must inherit from this class and implement:
|
|
9
|
+
# - match(doc1, doc2) → Array<DiffNode>
|
|
10
|
+
# - preprocess_for_display(doc1, doc2) → [String, String]
|
|
11
|
+
#
|
|
12
|
+
# This provides a common interface for different matching algorithms,
|
|
13
|
+
# enabling the Strategy Pattern for extensible comparison methods.
|
|
14
|
+
#
|
|
15
|
+
# @example Create a custom match strategy
|
|
16
|
+
# class MyMatchStrategy < BaseMatchStrategy
|
|
17
|
+
# def match(doc1, doc2)
|
|
18
|
+
# # Custom matching logic
|
|
19
|
+
# # Must return Array<Canon::Diff::DiffNode>
|
|
20
|
+
# end
|
|
21
|
+
#
|
|
22
|
+
# def preprocess_for_display(doc1, doc2)
|
|
23
|
+
# # Format documents for diff display
|
|
24
|
+
# # Must return [String, String]
|
|
25
|
+
# end
|
|
26
|
+
# end
|
|
27
|
+
#
|
|
28
|
+
class BaseMatchStrategy
|
|
29
|
+
attr_reader :format, :match_options
|
|
30
|
+
|
|
31
|
+
# Initialize strategy
|
|
32
|
+
#
|
|
33
|
+
# @param format [Symbol] Document format (:xml, :html, :json, :yaml)
|
|
34
|
+
# @param match_options [Hash] Match options for comparison
|
|
35
|
+
def initialize(format:, match_options:)
|
|
36
|
+
@format = format
|
|
37
|
+
@match_options = match_options
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Perform matching and return DiffNodes
|
|
41
|
+
#
|
|
42
|
+
# This is the core method that implements the matching algorithm.
|
|
43
|
+
# All strategies must implement this to produce DiffNodes that
|
|
44
|
+
# flow through the standard diff rendering pipeline.
|
|
45
|
+
#
|
|
46
|
+
# @param doc1 [Object] First document
|
|
47
|
+
# @param doc2 [Object] Second document
|
|
48
|
+
# @return [Array<Canon::Diff::DiffNode>] Array of differences
|
|
49
|
+
# @raise [NotImplementedError] If not implemented by subclass
|
|
50
|
+
def match(doc1, doc2)
|
|
51
|
+
raise NotImplementedError,
|
|
52
|
+
"#{self.class} must implement #match(doc1, doc2)"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Preprocess documents for display in diff output
|
|
56
|
+
#
|
|
57
|
+
# This method formats the documents into strings suitable for
|
|
58
|
+
# line-by-line diff display. The format must be consistent across
|
|
59
|
+
# all strategies for the same format to ensure the diff rendering
|
|
60
|
+
# pipeline produces correct output.
|
|
61
|
+
#
|
|
62
|
+
# @param doc1 [Object] First document
|
|
63
|
+
# @param doc2 [Object] Second document
|
|
64
|
+
# @return [Array<String>] Preprocessed [doc1_string, doc2_string]
|
|
65
|
+
# @raise [NotImplementedError] If not implemented by subclass
|
|
66
|
+
def preprocess_for_display(doc1, doc2)
|
|
67
|
+
raise NotImplementedError,
|
|
68
|
+
"#{self.class} must implement #preprocess_for_display(doc1, doc2)"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Optional metadata to include in ComparisonResult
|
|
72
|
+
#
|
|
73
|
+
# Subclasses can override this to provide algorithm-specific
|
|
74
|
+
# metadata such as statistics, configuration, etc.
|
|
75
|
+
#
|
|
76
|
+
# @return [Hash] Additional metadata
|
|
77
|
+
def metadata
|
|
78
|
+
{}
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Algorithm name derived from class name
|
|
82
|
+
#
|
|
83
|
+
# Automatically generates algorithm identifier from class name.
|
|
84
|
+
# For example:
|
|
85
|
+
# - DomMatchStrategy → :dom
|
|
86
|
+
# - SemanticTreeMatchStrategy → :semantic_tree
|
|
87
|
+
#
|
|
88
|
+
# @return [Symbol] Algorithm identifier
|
|
89
|
+
def algorithm_name
|
|
90
|
+
self.class.name.split("::").last
|
|
91
|
+
.gsub("MatchStrategy", "")
|
|
92
|
+
.gsub(/([A-Z])/, '_\1')
|
|
93
|
+
.downcase[1..]
|
|
94
|
+
.to_sym
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base_match_strategy"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Comparison
|
|
7
|
+
module Strategies
|
|
8
|
+
# Factory for creating match strategies
|
|
9
|
+
#
|
|
10
|
+
# Selects the appropriate match strategy based on match options.
|
|
11
|
+
# This provides a single point for strategy instantiation and enables
|
|
12
|
+
# easy extension with new matching algorithms.
|
|
13
|
+
#
|
|
14
|
+
# @example Create a strategy
|
|
15
|
+
# strategy = MatchStrategyFactory.create(
|
|
16
|
+
# format: :xml,
|
|
17
|
+
# match_options: { semantic_diff: true }
|
|
18
|
+
# )
|
|
19
|
+
# differences = strategy.match(doc1, doc2)
|
|
20
|
+
#
|
|
21
|
+
class MatchStrategyFactory
|
|
22
|
+
# Create appropriate match strategy
|
|
23
|
+
#
|
|
24
|
+
# Examines match options to determine which strategy to use:
|
|
25
|
+
# - If semantic_diff is enabled: SemanticTreeMatchStrategy
|
|
26
|
+
# - Otherwise (default): DomMatchStrategy
|
|
27
|
+
#
|
|
28
|
+
# Future strategies can be added here by checking additional
|
|
29
|
+
# options and returning the appropriate strategy class.
|
|
30
|
+
#
|
|
31
|
+
# @param format [Symbol] Document format (:xml, :html, :json, :yaml)
|
|
32
|
+
# @param match_options [Hash] Match options
|
|
33
|
+
# @option match_options [Boolean] :semantic_diff Use semantic tree matching
|
|
34
|
+
# @return [BaseMatchStrategy] Instantiated strategy
|
|
35
|
+
#
|
|
36
|
+
# @example DOM matching (default)
|
|
37
|
+
# strategy = MatchStrategyFactory.create(
|
|
38
|
+
# format: :xml,
|
|
39
|
+
# match_options: {}
|
|
40
|
+
# )
|
|
41
|
+
# # Returns DomMatchStrategy
|
|
42
|
+
#
|
|
43
|
+
# @example Semantic tree matching
|
|
44
|
+
# strategy = MatchStrategyFactory.create(
|
|
45
|
+
# format: :xml,
|
|
46
|
+
# match_options: { semantic_diff: true }
|
|
47
|
+
# )
|
|
48
|
+
# # Returns SemanticTreeMatchStrategy
|
|
49
|
+
#
|
|
50
|
+
def self.create(format:, match_options:)
|
|
51
|
+
# Check for semantic diff option
|
|
52
|
+
if match_options[:semantic_diff]
|
|
53
|
+
require_relative "semantic_tree_match_strategy"
|
|
54
|
+
SemanticTreeMatchStrategy.new(format: format,
|
|
55
|
+
match_options: match_options)
|
|
56
|
+
else
|
|
57
|
+
# Default to DOM matching
|
|
58
|
+
require_relative "dom_match_strategy"
|
|
59
|
+
DomMatchStrategy.new(format: format, match_options: match_options)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Future: Add more strategies here
|
|
63
|
+
# Example:
|
|
64
|
+
# elsif match_options[:hybrid_diff]
|
|
65
|
+
# require_relative "hybrid_match_strategy"
|
|
66
|
+
# HybridMatchStrategy.new(format, match_options)
|
|
67
|
+
# elsif match_options[:fuzzy_diff]
|
|
68
|
+
# require_relative "fuzzy_match_strategy"
|
|
69
|
+
# FuzzyMatchStrategy.new(format, match_options)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base_match_strategy"
|
|
4
|
+
require_relative "../../tree_diff/tree_diff_integrator"
|
|
5
|
+
require_relative "../../tree_diff/operation_converter"
|
|
6
|
+
|
|
7
|
+
module Canon
|
|
8
|
+
module Comparison
|
|
9
|
+
module Strategies
|
|
10
|
+
# Semantic tree matching strategy
|
|
11
|
+
#
|
|
12
|
+
# Uses TreeDiffIntegrator for intelligent structure-aware matching.
|
|
13
|
+
# This strategy:
|
|
14
|
+
# 1. Converts documents to tree representation
|
|
15
|
+
# 2. Performs semantic matching via TreeDiffIntegrator
|
|
16
|
+
# 3. Converts Operations to DiffNodes via OperationConverter
|
|
17
|
+
# 4. Returns DiffNodes that flow through standard rendering pipeline
|
|
18
|
+
#
|
|
19
|
+
# Key difference from DOM matching: Uses tree-based structural
|
|
20
|
+
# similarity and edit distance for matching instead of simple
|
|
21
|
+
# node-by-node comparison.
|
|
22
|
+
#
|
|
23
|
+
# @example Use semantic tree matching
|
|
24
|
+
# strategy = SemanticTreeMatchStrategy.new(:xml, match_options)
|
|
25
|
+
# diff_nodes = strategy.match(doc1, doc2)
|
|
26
|
+
#
|
|
27
|
+
class SemanticTreeMatchStrategy < BaseMatchStrategy
|
|
28
|
+
# Perform semantic tree matching
|
|
29
|
+
#
|
|
30
|
+
# @param doc1 [Object] First document (Nokogiri node, Hash, etc.)
|
|
31
|
+
# @param doc2 [Object] Second document
|
|
32
|
+
# @return [Array<Canon::Diff::DiffNode>] Array of differences
|
|
33
|
+
def match(doc1, doc2)
|
|
34
|
+
# Create integrator with format-specific adapter
|
|
35
|
+
integrator = create_integrator
|
|
36
|
+
|
|
37
|
+
# Perform tree diff - returns Operations
|
|
38
|
+
result = integrator.diff(doc1, doc2)
|
|
39
|
+
|
|
40
|
+
# Store statistics for metadata
|
|
41
|
+
@statistics = result[:statistics]
|
|
42
|
+
|
|
43
|
+
# Convert Operations to DiffNodes using OperationConverter
|
|
44
|
+
# This is the KEY FIX - ensures we use proper DiffNodes
|
|
45
|
+
convert_operations_to_diff_nodes(result[:operations])
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Preprocess documents for display
|
|
49
|
+
#
|
|
50
|
+
# IMPORTANT: This must use the SAME format as DomMatchStrategy
|
|
51
|
+
# to ensure consistent diff rendering.
|
|
52
|
+
#
|
|
53
|
+
# @param doc1 [Object] First document
|
|
54
|
+
# @param doc2 [Object] Second document
|
|
55
|
+
# @return [Array<String>] Preprocessed [doc1_string, doc2_string]
|
|
56
|
+
def preprocess_for_display(doc1, doc2)
|
|
57
|
+
case @format
|
|
58
|
+
when :xml
|
|
59
|
+
preprocess_xml(doc1, doc2)
|
|
60
|
+
when :html, :html4, :html5
|
|
61
|
+
preprocess_html(doc1, doc2)
|
|
62
|
+
when :json
|
|
63
|
+
preprocess_json(doc1, doc2)
|
|
64
|
+
when :yaml
|
|
65
|
+
preprocess_yaml(doc1, doc2)
|
|
66
|
+
else
|
|
67
|
+
raise ArgumentError, "Unsupported format: #{@format}"
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Include tree diff statistics in metadata
|
|
72
|
+
#
|
|
73
|
+
# @return [Hash] Metadata including statistics
|
|
74
|
+
def metadata
|
|
75
|
+
{
|
|
76
|
+
tree_diff_statistics: @statistics,
|
|
77
|
+
tree_diff_enabled: true,
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
# Create TreeDiffIntegrator with options
|
|
84
|
+
#
|
|
85
|
+
# @return [Canon::TreeDiff::TreeDiffIntegrator] Configured integrator
|
|
86
|
+
def create_integrator
|
|
87
|
+
Canon::TreeDiff::TreeDiffIntegrator.new(
|
|
88
|
+
format: @format,
|
|
89
|
+
options: {
|
|
90
|
+
similarity_threshold: @match_options[:similarity_threshold] || 0.95,
|
|
91
|
+
hash_matching: @match_options.fetch(:hash_matching, true),
|
|
92
|
+
similarity_matching: @match_options.fetch(:similarity_matching,
|
|
93
|
+
true),
|
|
94
|
+
propagation: @match_options.fetch(:propagation, true),
|
|
95
|
+
attribute_order: @match_options[:attribute_order] || :ignore,
|
|
96
|
+
},
|
|
97
|
+
)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Convert Operations to DiffNodes using OperationConverter
|
|
101
|
+
#
|
|
102
|
+
# This is crucial - it ensures we produce proper DiffNodes with:
|
|
103
|
+
# - Correct dimension mapping
|
|
104
|
+
# - Normative/informative classification
|
|
105
|
+
# - Proper node extraction from TreeNodes
|
|
106
|
+
#
|
|
107
|
+
# @param operations [Array<Operation>] Operations from tree diff
|
|
108
|
+
# @return [Array<Canon::Diff::DiffNode>] Converted DiffNodes
|
|
109
|
+
def convert_operations_to_diff_nodes(operations)
|
|
110
|
+
converter = Canon::TreeDiff::OperationConverter.new(
|
|
111
|
+
format: @format,
|
|
112
|
+
match_options: @match_options,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
converter.convert(operations)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Preprocess XML documents
|
|
119
|
+
#
|
|
120
|
+
# Uses simple line break insertion (same as DOM diff)
|
|
121
|
+
# NOT Canon.format() which adds full indentation
|
|
122
|
+
#
|
|
123
|
+
# @param doc1 [Object] First XML document
|
|
124
|
+
# @param doc2 [Object] Second XML document
|
|
125
|
+
# @return [Array<String>] Preprocessed strings
|
|
126
|
+
def preprocess_xml(doc1, doc2)
|
|
127
|
+
# Serialize XML to string
|
|
128
|
+
# Use XmlComparator's serializer for Canon::Xml::Node
|
|
129
|
+
xml1 = if doc1.is_a?(Canon::Xml::Node)
|
|
130
|
+
require_relative "../xml_comparator"
|
|
131
|
+
XmlComparator.send(:serialize_node_to_xml, doc1)
|
|
132
|
+
elsif doc1.respond_to?(:to_xml)
|
|
133
|
+
doc1.to_xml
|
|
134
|
+
else
|
|
135
|
+
doc1.to_s
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
xml2 = if doc2.is_a?(Canon::Xml::Node)
|
|
139
|
+
require_relative "../xml_comparator"
|
|
140
|
+
XmlComparator.send(:serialize_node_to_xml, doc2)
|
|
141
|
+
elsif doc2.respond_to?(:to_xml)
|
|
142
|
+
doc2.to_xml
|
|
143
|
+
else
|
|
144
|
+
doc2.to_s
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# MUST match DOM diff preprocessing EXACTLY (xml_comparator.rb:106-109)
|
|
148
|
+
# Simple pattern: add newline between adjacent tags
|
|
149
|
+
[
|
|
150
|
+
xml1.gsub(/></, ">\n<"),
|
|
151
|
+
xml2.gsub(/></, ">\n<"),
|
|
152
|
+
]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Preprocess HTML documents
|
|
156
|
+
#
|
|
157
|
+
# Uses native HTML serialization with line break insertion
|
|
158
|
+
# (same as DOM diff) to ensure proper line-by-line display
|
|
159
|
+
#
|
|
160
|
+
# @param doc1 [Object] First HTML document
|
|
161
|
+
# @param doc2 [Object] Second HTML document
|
|
162
|
+
# @return [Array<String>] Preprocessed strings
|
|
163
|
+
def preprocess_html(doc1, doc2)
|
|
164
|
+
# For Canon::Xml::Node, use XmlComparator's serializer
|
|
165
|
+
# For XML::DocumentFragment (from parse_node_as_fragment), use to_s
|
|
166
|
+
# to avoid Nokogiri auto-inserting meta tags during to_html serialization
|
|
167
|
+
html1 = if doc1.is_a?(Canon::Xml::Node)
|
|
168
|
+
require_relative "../xml_comparator"
|
|
169
|
+
XmlComparator.send(:serialize_node_to_xml, doc1)
|
|
170
|
+
elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
|
|
171
|
+
doc1.to_s
|
|
172
|
+
elsif doc1.respond_to?(:to_html)
|
|
173
|
+
doc1.to_html
|
|
174
|
+
else
|
|
175
|
+
doc1.to_s
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
html2 = if doc2.is_a?(Canon::Xml::Node)
|
|
179
|
+
require_relative "../xml_comparator"
|
|
180
|
+
XmlComparator.send(:serialize_node_to_xml, doc2)
|
|
181
|
+
elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
|
|
182
|
+
doc2.to_s
|
|
183
|
+
elsif doc2.respond_to?(:to_html)
|
|
184
|
+
doc2.to_html
|
|
185
|
+
else
|
|
186
|
+
doc2.to_s
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# KEY FIX: Use simple gsub, NOT Canon.format
|
|
190
|
+
# This ensures proper line-by-line display matching DOM diff format
|
|
191
|
+
[html1.gsub(/></, ">\n<"), html2.gsub(/></, ">\n<")]
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Preprocess JSON documents
|
|
195
|
+
#
|
|
196
|
+
# Uses Canon formatter for consistent formatting
|
|
197
|
+
#
|
|
198
|
+
# @param doc1 [Object] First JSON document
|
|
199
|
+
# @param doc2 [Object] Second JSON document
|
|
200
|
+
# @return [Array<String>] Preprocessed strings
|
|
201
|
+
def preprocess_json(doc1, doc2)
|
|
202
|
+
require_relative "../../formatters/json_formatter"
|
|
203
|
+
[Canon.format(doc1, :json), Canon.format(doc2, :json)]
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Preprocess YAML documents
|
|
207
|
+
#
|
|
208
|
+
# Uses Canon formatter for consistent formatting
|
|
209
|
+
#
|
|
210
|
+
# @param doc1 [Object] First YAML document
|
|
211
|
+
# @param doc2 [Object] Second YAML document
|
|
212
|
+
# @return [Array<String>] Preprocessed strings
|
|
213
|
+
def preprocess_yaml(doc1, doc2)
|
|
214
|
+
require_relative "../../formatters/yaml_formatter"
|
|
215
|
+
[Canon.format(doc1, :yaml), Canon.format(doc2, :yaml)]
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|