canon 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -0,0 +1,631 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../diff/diff_node"
|
|
4
|
+
require_relative "../comparison/match_options"
|
|
5
|
+
|
|
6
|
+
module Canon
|
|
7
|
+
module TreeDiff
|
|
8
|
+
# Converts TreeDiff Operations to DiffNodes for integration with Canon's
|
|
9
|
+
# existing diff pipeline.
|
|
10
|
+
#
|
|
11
|
+
# This class bridges the semantic tree diff system with Canon's DOM-based
|
|
12
|
+
# diff architecture by mapping operations to match dimensions and creating
|
|
13
|
+
# DiffNode objects that can be processed by the standard diff formatter.
|
|
14
|
+
#
|
|
15
|
+
# @example Convert operations to diff nodes
|
|
16
|
+
# converter = OperationConverter.new(format: :xml, match_options: opts)
|
|
17
|
+
# diff_nodes = converter.convert(operations)
|
|
18
|
+
#
|
|
19
|
+
class OperationConverter
|
|
20
|
+
# Mapping from operation types to match dimensions
|
|
21
|
+
OPERATION_TO_DIMENSION = {
|
|
22
|
+
insert: :element_structure,
|
|
23
|
+
delete: :element_structure,
|
|
24
|
+
update: :text_content, # Default, refined based on what changed
|
|
25
|
+
move: :element_position,
|
|
26
|
+
merge: :element_structure,
|
|
27
|
+
split: :element_structure,
|
|
28
|
+
upgrade: :element_hierarchy,
|
|
29
|
+
downgrade: :element_hierarchy,
|
|
30
|
+
}.freeze
|
|
31
|
+
|
|
32
|
+
# Metadata/presentation elements that should be treated as informative
|
|
33
|
+
# These elements don't affect semantic equivalence
|
|
34
|
+
METADATA_ELEMENTS = %w[
|
|
35
|
+
semx fmt-concept fmt-name fmt-title fmt-xref fmt-eref
|
|
36
|
+
fmt-termref fmt-element-name fmt-link autonum
|
|
37
|
+
meta link base title style script
|
|
38
|
+
].freeze
|
|
39
|
+
|
|
40
|
+
attr_reader :format, :match_options
|
|
41
|
+
|
|
42
|
+
# Initialize converter
|
|
43
|
+
#
|
|
44
|
+
# @param format [Symbol] Document format (:xml, :html, :json, :yaml)
|
|
45
|
+
# @param match_options [Hash] Match options for determining normative/informative
|
|
46
|
+
def initialize(format:, match_options: {})
|
|
47
|
+
@format = format
|
|
48
|
+
|
|
49
|
+
# Resolve match options using format-specific module
|
|
50
|
+
match_opts_hash = case format
|
|
51
|
+
when :xml, :html, :html4, :html5
|
|
52
|
+
Canon::Comparison::MatchOptions::Xml.resolve(
|
|
53
|
+
format: format,
|
|
54
|
+
match: match_options,
|
|
55
|
+
)
|
|
56
|
+
when :json
|
|
57
|
+
Canon::Comparison::MatchOptions::Json.resolve(
|
|
58
|
+
format: format,
|
|
59
|
+
match: match_options,
|
|
60
|
+
)
|
|
61
|
+
when :yaml
|
|
62
|
+
Canon::Comparison::MatchOptions::Yaml.resolve(
|
|
63
|
+
format: format,
|
|
64
|
+
match: match_options,
|
|
65
|
+
)
|
|
66
|
+
else
|
|
67
|
+
raise ArgumentError, "Unknown format: #{format}"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Wrap in ResolvedMatchOptions
|
|
71
|
+
@match_options = Canon::Comparison::ResolvedMatchOptions.new(
|
|
72
|
+
match_opts_hash,
|
|
73
|
+
format: format,
|
|
74
|
+
)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Convert array of Operations to array of DiffNodes
|
|
78
|
+
#
|
|
79
|
+
# @param operations [Array<Operation>] Operations to convert
|
|
80
|
+
# @return [Array<DiffNode>] Converted diff nodes
|
|
81
|
+
def convert(operations)
|
|
82
|
+
diff_nodes = operations.flat_map do |operation|
|
|
83
|
+
convert_operation(operation)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Post-process to detect attribute-order-only differences
|
|
87
|
+
detect_attribute_order_diffs(diff_nodes)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Convert a single Operation to a DiffNode
|
|
93
|
+
#
|
|
94
|
+
# @param operation [Operation] Operation to convert
|
|
95
|
+
# @return [DiffNode] Converted diff node
|
|
96
|
+
def convert_operation(operation)
|
|
97
|
+
case operation.type
|
|
98
|
+
when :insert
|
|
99
|
+
convert_insert(operation)
|
|
100
|
+
when :delete
|
|
101
|
+
convert_delete(operation)
|
|
102
|
+
when :update
|
|
103
|
+
convert_update(operation)
|
|
104
|
+
when :move
|
|
105
|
+
convert_move(operation)
|
|
106
|
+
when :merge
|
|
107
|
+
convert_merge(operation)
|
|
108
|
+
when :split
|
|
109
|
+
convert_split(operation)
|
|
110
|
+
when :upgrade
|
|
111
|
+
convert_upgrade(operation)
|
|
112
|
+
when :downgrade
|
|
113
|
+
convert_downgrade(operation)
|
|
114
|
+
else
|
|
115
|
+
raise ArgumentError, "Unknown operation type: #{operation.type}"
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Convert INSERT operation to DiffNode
|
|
120
|
+
#
|
|
121
|
+
# @param operation [Operation] Insert operation
|
|
122
|
+
# @return [DiffNode] Diff node representing insertion
|
|
123
|
+
def convert_insert(operation)
|
|
124
|
+
node2 = extract_source_node(operation[:node])
|
|
125
|
+
|
|
126
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
127
|
+
node1: nil,
|
|
128
|
+
node2: node2,
|
|
129
|
+
dimension: :element_structure,
|
|
130
|
+
reason: build_insert_reason(operation),
|
|
131
|
+
)
|
|
132
|
+
# Metadata elements are informative (don't affect equivalence)
|
|
133
|
+
diff_node.normative = metadata_element?(node2) ? false : determine_normative(:element_structure)
|
|
134
|
+
diff_node
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Convert DELETE operation to DiffNode
|
|
138
|
+
#
|
|
139
|
+
# @param operation [Operation] Delete operation
|
|
140
|
+
# @return [DiffNode] Diff node representing deletion
|
|
141
|
+
def convert_delete(operation)
|
|
142
|
+
node1 = extract_source_node(operation[:node])
|
|
143
|
+
|
|
144
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
145
|
+
node1: node1,
|
|
146
|
+
node2: nil,
|
|
147
|
+
dimension: :element_structure,
|
|
148
|
+
reason: build_delete_reason(operation),
|
|
149
|
+
)
|
|
150
|
+
# Metadata elements are informative (don't affect equivalence)
|
|
151
|
+
diff_node.normative = metadata_element?(node1) ? false : determine_normative(:element_structure)
|
|
152
|
+
diff_node
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Convert UPDATE operation to DiffNode(s)
|
|
156
|
+
#
|
|
157
|
+
# May return multiple DiffNodes if multiple dimensions changed
|
|
158
|
+
#
|
|
159
|
+
# @param operation [Operation] Update operation
|
|
160
|
+
# @return [Array<DiffNode>] Diff nodes representing updates
|
|
161
|
+
def convert_update(operation)
|
|
162
|
+
node1 = extract_source_node(operation[:node1])
|
|
163
|
+
node2 = extract_source_node(operation[:node2])
|
|
164
|
+
changes = operation[:changes]
|
|
165
|
+
|
|
166
|
+
# Handle case where changes is a boolean or non-hash value
|
|
167
|
+
changes = {} unless changes.is_a?(Hash)
|
|
168
|
+
|
|
169
|
+
# Check if nodes are metadata elements
|
|
170
|
+
is_metadata = metadata_element?(node1) || metadata_element?(node2)
|
|
171
|
+
|
|
172
|
+
diff_nodes = []
|
|
173
|
+
|
|
174
|
+
# Create separate DiffNode for each change dimension
|
|
175
|
+
# This ensures each dimension can be classified independently
|
|
176
|
+
|
|
177
|
+
if changes.key?(:attributes)
|
|
178
|
+
# Attribute value differences
|
|
179
|
+
# Changes can be either true (flag) or { old: ..., new: ... } (detailed)
|
|
180
|
+
if changes[:attributes].is_a?(Hash) && changes[:attributes].key?(:old)
|
|
181
|
+
old_attrs = changes[:attributes][:old]
|
|
182
|
+
new_attrs = changes[:attributes][:new]
|
|
183
|
+
diff_details = build_attribute_diff_details(old_attrs, new_attrs)
|
|
184
|
+
else
|
|
185
|
+
diff_details = "attribute values differ"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
189
|
+
node1: node1,
|
|
190
|
+
node2: node2,
|
|
191
|
+
dimension: :attribute_values,
|
|
192
|
+
reason: diff_details,
|
|
193
|
+
)
|
|
194
|
+
diff_node.normative = is_metadata ? false : determine_normative(:attribute_values)
|
|
195
|
+
diff_nodes << diff_node
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
if changes.key?(:attribute_order)
|
|
199
|
+
# Attribute order differences
|
|
200
|
+
if changes[:attribute_order].is_a?(Hash) && changes[:attribute_order].key?(:old)
|
|
201
|
+
old_order = changes[:attribute_order][:old]
|
|
202
|
+
new_order = changes[:attribute_order][:new]
|
|
203
|
+
reason = "Attribute order changed: [#{old_order.join(', ')}] → [#{new_order.join(', ')}]"
|
|
204
|
+
else
|
|
205
|
+
reason = "attribute order differs"
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
209
|
+
node1: node1,
|
|
210
|
+
node2: node2,
|
|
211
|
+
dimension: :attribute_order,
|
|
212
|
+
reason: reason,
|
|
213
|
+
)
|
|
214
|
+
diff_node.normative = is_metadata ? false : determine_normative(:attribute_order)
|
|
215
|
+
diff_nodes << diff_node
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
if changes.key?(:value)
|
|
219
|
+
# Text content differences
|
|
220
|
+
if changes[:value].is_a?(Hash) && changes[:value].key?(:old)
|
|
221
|
+
old_val = changes[:value][:old] || ""
|
|
222
|
+
new_val = changes[:value][:new] || ""
|
|
223
|
+
preview_old = truncate_for_reason(old_val.to_s, 40)
|
|
224
|
+
preview_new = truncate_for_reason(new_val.to_s, 40)
|
|
225
|
+
reason = "Text content changed: \"#{preview_old}\" → \"#{preview_new}\""
|
|
226
|
+
else
|
|
227
|
+
reason = "text content differs"
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
231
|
+
node1: node1,
|
|
232
|
+
node2: node2,
|
|
233
|
+
dimension: :text_content,
|
|
234
|
+
reason: reason,
|
|
235
|
+
)
|
|
236
|
+
diff_node.normative = is_metadata ? false : determine_normative(:text_content)
|
|
237
|
+
diff_nodes << diff_node
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
if changes.key?(:label)
|
|
241
|
+
# Element name differences
|
|
242
|
+
if changes[:label].is_a?(Hash) && changes[:label].key?(:old)
|
|
243
|
+
old_label = changes[:label][:old]
|
|
244
|
+
new_label = changes[:label][:new]
|
|
245
|
+
reason = "Element name changed: <#{old_label}> → <#{new_label}>"
|
|
246
|
+
else
|
|
247
|
+
reason = "element name differs"
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
251
|
+
node1: node1,
|
|
252
|
+
node2: node2,
|
|
253
|
+
dimension: :element_structure,
|
|
254
|
+
reason: reason,
|
|
255
|
+
)
|
|
256
|
+
diff_node.normative = is_metadata ? false : determine_normative(:element_structure)
|
|
257
|
+
diff_nodes << diff_node
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# If no specific changes detected, create a generic update
|
|
261
|
+
if diff_nodes.empty?
|
|
262
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
263
|
+
node1: node1,
|
|
264
|
+
node2: node2,
|
|
265
|
+
dimension: :text_content,
|
|
266
|
+
reason: "content differs",
|
|
267
|
+
)
|
|
268
|
+
diff_node.normative = is_metadata ? false : determine_normative(:text_content)
|
|
269
|
+
diff_nodes << diff_node
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
diff_nodes
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Convert MOVE operation to DiffNode
|
|
276
|
+
#
|
|
277
|
+
# @param operation [Operation] Move operation
|
|
278
|
+
# @return [DiffNode] Diff node representing move
|
|
279
|
+
def convert_move(operation)
|
|
280
|
+
node1 = extract_source_node(operation[:node1])
|
|
281
|
+
node2 = extract_source_node(operation[:node2])
|
|
282
|
+
|
|
283
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
284
|
+
node1: node1,
|
|
285
|
+
node2: node2,
|
|
286
|
+
dimension: :element_position,
|
|
287
|
+
reason: build_move_reason(operation),
|
|
288
|
+
)
|
|
289
|
+
# Metadata elements are informative (don't affect equivalence)
|
|
290
|
+
is_metadata = metadata_element?(node1) || metadata_element?(node2)
|
|
291
|
+
diff_node.normative = is_metadata ? false : determine_normative(:element_position)
|
|
292
|
+
diff_node
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Convert MERGE operation to DiffNode
|
|
296
|
+
#
|
|
297
|
+
# @param operation [Operation] Merge operation
|
|
298
|
+
# @return [DiffNode] Diff node representing merge
|
|
299
|
+
def convert_merge(operation)
|
|
300
|
+
# Merge combines multiple nodes into one
|
|
301
|
+
# node1 represents the source nodes, node2 is the merged result
|
|
302
|
+
node1 = extract_source_node(operation[:nodes]&.first)
|
|
303
|
+
node2 = extract_source_node(operation[:result])
|
|
304
|
+
|
|
305
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
306
|
+
node1: node1,
|
|
307
|
+
node2: node2,
|
|
308
|
+
dimension: :element_structure,
|
|
309
|
+
reason: "merged #{operation[:nodes]&.length || 0} nodes",
|
|
310
|
+
)
|
|
311
|
+
diff_node.normative = true # Merges are structural changes, always normative
|
|
312
|
+
diff_node
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Convert SPLIT operation to DiffNode
|
|
316
|
+
#
|
|
317
|
+
# @param operation [Operation] Split operation
|
|
318
|
+
# @return [DiffNode] Diff node representing split
|
|
319
|
+
def convert_split(operation)
|
|
320
|
+
# Split divides one node into multiple
|
|
321
|
+
# node1 is the original, node2 represents the split results
|
|
322
|
+
node1 = extract_source_node(operation[:node])
|
|
323
|
+
node2 = extract_source_node(operation[:results]&.first)
|
|
324
|
+
|
|
325
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
326
|
+
node1: node1,
|
|
327
|
+
node2: node2,
|
|
328
|
+
dimension: :element_structure,
|
|
329
|
+
reason: "split into #{operation[:results]&.length || 0} nodes",
|
|
330
|
+
)
|
|
331
|
+
diff_node.normative = true # Splits are structural changes, always normative
|
|
332
|
+
diff_node
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
# Convert UPGRADE operation to DiffNode (promote/decrease depth)
|
|
336
|
+
#
|
|
337
|
+
# @param operation [Operation] Upgrade operation
|
|
338
|
+
# @return [DiffNode] Diff node representing upgrade
|
|
339
|
+
def convert_upgrade(operation)
|
|
340
|
+
node1 = extract_source_node(operation[:node1])
|
|
341
|
+
node2 = extract_source_node(operation[:node2])
|
|
342
|
+
|
|
343
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
344
|
+
node1: node1,
|
|
345
|
+
node2: node2,
|
|
346
|
+
dimension: :element_hierarchy,
|
|
347
|
+
reason: "promoted to higher level",
|
|
348
|
+
)
|
|
349
|
+
diff_node.normative = determine_normative(:element_hierarchy)
|
|
350
|
+
diff_node
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Convert DOWNGRADE operation to DiffNode (demote/increase depth)
|
|
354
|
+
#
|
|
355
|
+
# @param operation [Operation] Downgrade operation
|
|
356
|
+
# @return [DiffNode] Diff node representing downgrade
|
|
357
|
+
def convert_downgrade(operation)
|
|
358
|
+
node1 = extract_source_node(operation[:node1])
|
|
359
|
+
node2 = extract_source_node(operation[:node2])
|
|
360
|
+
|
|
361
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
362
|
+
node1: node1,
|
|
363
|
+
node2: node2,
|
|
364
|
+
dimension: :element_hierarchy,
|
|
365
|
+
reason: "demoted to lower level",
|
|
366
|
+
)
|
|
367
|
+
diff_node.normative = determine_normative(:element_hierarchy)
|
|
368
|
+
diff_node
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
# Extract source node from TreeNode
|
|
372
|
+
#
|
|
373
|
+
# @param tree_node [TreeNode, nil] Tree node wrapper
|
|
374
|
+
# @return [Object, nil] Source node (Nokogiri, Hash, etc.)
|
|
375
|
+
def extract_source_node(tree_node)
|
|
376
|
+
return nil if tree_node.nil?
|
|
377
|
+
|
|
378
|
+
tree_node.respond_to?(:source_node) ? tree_node.source_node : tree_node
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
# Determine update dimension based on what changed
|
|
382
|
+
#
|
|
383
|
+
# @param operation [Operation] Update operation
|
|
384
|
+
# @return [Symbol] Match dimension
|
|
385
|
+
def determine_update_dimension(operation)
|
|
386
|
+
changes = operation[:changes] || {}
|
|
387
|
+
|
|
388
|
+
# Check what actually changed
|
|
389
|
+
if changes.key?(:attribute_order)
|
|
390
|
+
# Only attribute order changed
|
|
391
|
+
:attribute_order
|
|
392
|
+
elsif changes.key?(:attributes)
|
|
393
|
+
# Attribute values changed
|
|
394
|
+
:attribute_values
|
|
395
|
+
elsif changes.key?(:value)
|
|
396
|
+
# Text content changed
|
|
397
|
+
:text_content
|
|
398
|
+
elsif changes.key?(:label)
|
|
399
|
+
# Element name changed (rare)
|
|
400
|
+
:element_structure
|
|
401
|
+
else
|
|
402
|
+
# Default to text_content for generic updates
|
|
403
|
+
:text_content
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# Determine if a diff is normative based on match options
|
|
408
|
+
#
|
|
409
|
+
# @param dimension [Symbol] Match dimension
|
|
410
|
+
# @return [Boolean] true if normative (should be shown)
|
|
411
|
+
def determine_normative(dimension)
|
|
412
|
+
# Check match options behavior for this dimension
|
|
413
|
+
behavior = @match_options.behavior_for(dimension)
|
|
414
|
+
|
|
415
|
+
# If behavior is :ignore, it's informative (not shown by default)
|
|
416
|
+
# Otherwise it's normative (shown)
|
|
417
|
+
behavior != :ignore
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# Build reason string for INSERT operation
|
|
421
|
+
#
|
|
422
|
+
# @param operation [Operation] Operation
|
|
423
|
+
# @return [String] Reason description
|
|
424
|
+
def build_insert_reason(operation)
|
|
425
|
+
node = operation[:node]
|
|
426
|
+
content = operation[:content]
|
|
427
|
+
|
|
428
|
+
if node.respond_to?(:label)
|
|
429
|
+
# Include content preview for clarity
|
|
430
|
+
"Element inserted: #{content || "<#{node.label}>"}"
|
|
431
|
+
else
|
|
432
|
+
"Element inserted"
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
# Build reason string for DELETE operation
|
|
437
|
+
#
|
|
438
|
+
# @param operation [Operation] Operation
|
|
439
|
+
# @return [String] Reason description
|
|
440
|
+
def build_delete_reason(operation)
|
|
441
|
+
node = operation[:node]
|
|
442
|
+
content = operation[:content]
|
|
443
|
+
|
|
444
|
+
if node.respond_to?(:label)
|
|
445
|
+
# Include content preview for clarity
|
|
446
|
+
"Element deleted: #{content || "<#{node.label}>"}"
|
|
447
|
+
else
|
|
448
|
+
"Element deleted"
|
|
449
|
+
end
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
# Build reason string for UPDATE operation
|
|
453
|
+
#
|
|
454
|
+
# @param operation [Operation] Operation
|
|
455
|
+
# @return [String] Reason description
|
|
456
|
+
def build_update_reason(operation)
|
|
457
|
+
change_type = operation[:change_type] || "content"
|
|
458
|
+
"updated #{change_type}"
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
# Build reason string for MOVE operation
|
|
462
|
+
#
|
|
463
|
+
# @param operation [Operation] Operation
|
|
464
|
+
# @return [String] Reason description
|
|
465
|
+
def build_move_reason(operation)
|
|
466
|
+
from_pos = operation[:from_position]
|
|
467
|
+
to_pos = operation[:to_position]
|
|
468
|
+
|
|
469
|
+
if from_pos && to_pos
|
|
470
|
+
"moved from position #{from_pos} to #{to_pos}"
|
|
471
|
+
else
|
|
472
|
+
"moved to different position"
|
|
473
|
+
end
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
# Detect INSERT/DELETE pairs that differ only in attribute order
|
|
477
|
+
# and reclassify them to use the attribute_order dimension
|
|
478
|
+
#
|
|
479
|
+
# @param diff_nodes [Array<DiffNode>] Diff nodes to process
|
|
480
|
+
# @return [Array<DiffNode>] Processed diff nodes
|
|
481
|
+
def detect_attribute_order_diffs(diff_nodes)
|
|
482
|
+
# Group nodes by parent and element type
|
|
483
|
+
deletes = diff_nodes.select { |dn| dn.node1 && !dn.node2 }
|
|
484
|
+
inserts = diff_nodes.select { |dn| !dn.node1 && dn.node2 }
|
|
485
|
+
|
|
486
|
+
# For each DELETE, try to find a matching INSERT
|
|
487
|
+
deletes.each do |delete_node|
|
|
488
|
+
node1 = delete_node.node1
|
|
489
|
+
next unless node1.respond_to?(:name) && node1.respond_to?(:attributes)
|
|
490
|
+
|
|
491
|
+
# Skip if node has no attributes (can't be attribute order diff)
|
|
492
|
+
next if node1.attributes.nil? || node1.attributes.empty?
|
|
493
|
+
|
|
494
|
+
# Find inserts with same element name at same position
|
|
495
|
+
matching_insert = inserts.find do |insert_node|
|
|
496
|
+
node2 = insert_node.node2
|
|
497
|
+
next false unless node2.respond_to?(:name) && node2.respond_to?(:attributes)
|
|
498
|
+
next false unless node1.name == node2.name
|
|
499
|
+
|
|
500
|
+
# Must have attributes to differ in order
|
|
501
|
+
next false if node2.attributes.nil? || node2.attributes.empty?
|
|
502
|
+
|
|
503
|
+
# Check if they differ only in attribute order
|
|
504
|
+
next false unless attributes_equal_ignoring_order?(
|
|
505
|
+
node1.attributes, node2.attributes
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# Ensure same content (text and children structure)
|
|
509
|
+
nodes_same_except_attr_order?(node1, node2)
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
next unless matching_insert
|
|
513
|
+
|
|
514
|
+
# Found an attribute-order-only difference
|
|
515
|
+
# Reclassify both nodes to use attribute_order dimension
|
|
516
|
+
delete_node.dimension = :attribute_order
|
|
517
|
+
delete_node.reason = "attribute order changed"
|
|
518
|
+
delete_node.normative = determine_normative(:attribute_order)
|
|
519
|
+
|
|
520
|
+
matching_insert.dimension = :attribute_order
|
|
521
|
+
matching_insert.reason = "attribute order changed"
|
|
522
|
+
matching_insert.normative = determine_normative(:attribute_order)
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
diff_nodes
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
# Check if two attribute hashes are equal ignoring order
|
|
529
|
+
#
|
|
530
|
+
# @param attrs1 [Hash] First attribute hash
|
|
531
|
+
# @param attrs2 [Hash] Second attribute hash
|
|
532
|
+
# @return [Boolean] True if attributes are equal (ignoring order)
|
|
533
|
+
def attributes_equal_ignoring_order?(attrs1, attrs2)
|
|
534
|
+
return true if attrs1.nil? && attrs2.nil?
|
|
535
|
+
return false if attrs1.nil? || attrs2.nil?
|
|
536
|
+
|
|
537
|
+
# Convert to hashes if needed
|
|
538
|
+
attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
|
|
539
|
+
attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
|
|
540
|
+
|
|
541
|
+
# Compare as sets (order-independent)
|
|
542
|
+
attrs1.sort.to_h == attrs2.sort.to_h
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
# Check if two nodes are the same except for attribute order
|
|
546
|
+
#
|
|
547
|
+
# @param node1 [Nokogiri::XML::Node] First node
|
|
548
|
+
# @param node2 [Nokogiri::XML::Node] Second node
|
|
549
|
+
# @return [Boolean] True if nodes are same except attribute order
|
|
550
|
+
def nodes_same_except_attr_order?(node1, node2)
|
|
551
|
+
# Same text content
|
|
552
|
+
return false if node1.text != node2.text
|
|
553
|
+
|
|
554
|
+
# Same number of children
|
|
555
|
+
return false if node1.children.length != node2.children.length
|
|
556
|
+
|
|
557
|
+
# If has children, they should have same structure
|
|
558
|
+
if node1.children.any?
|
|
559
|
+
node1.children.zip(node2.children).all? do |child1, child2|
|
|
560
|
+
child1.name == child2.name
|
|
561
|
+
end
|
|
562
|
+
else
|
|
563
|
+
true
|
|
564
|
+
end
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
# Check if a node is a metadata/presentation element
|
|
568
|
+
#
|
|
569
|
+
# @param node [Object] Node to check (could be TreeNode or Nokogiri node)
|
|
570
|
+
# @return [Boolean] true if node is a metadata element
|
|
571
|
+
def metadata_element?(node)
|
|
572
|
+
return false if node.nil?
|
|
573
|
+
|
|
574
|
+
# Get element name from node
|
|
575
|
+
element_name = if node.respond_to?(:label)
|
|
576
|
+
node.label # TreeNode
|
|
577
|
+
elsif node.respond_to?(:name)
|
|
578
|
+
node.name # Nokogiri node
|
|
579
|
+
else
|
|
580
|
+
return false
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
# Check if it's in our metadata elements list
|
|
584
|
+
METADATA_ELEMENTS.include?(element_name)
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
# Build detailed reason for attribute differences
|
|
588
|
+
#
|
|
589
|
+
# @param old_attrs [Hash] Old attributes
|
|
590
|
+
# @param new_attrs [Hash] New attributes
|
|
591
|
+
# @return [String] Detailed reason
|
|
592
|
+
def build_attribute_diff_details(old_attrs, new_attrs)
|
|
593
|
+
old_keys = Set.new(old_attrs.keys)
|
|
594
|
+
new_keys = Set.new(new_attrs.keys)
|
|
595
|
+
|
|
596
|
+
missing = old_keys - new_keys
|
|
597
|
+
extra = new_keys - old_keys
|
|
598
|
+
changed = (old_keys & new_keys).reject do |k|
|
|
599
|
+
old_attrs[k] == new_attrs[k]
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
parts = []
|
|
603
|
+
parts << "Missing: #{missing.to_a.join(', ')}" if missing.any?
|
|
604
|
+
parts << "Extra: #{extra.to_a.join(', ')}" if extra.any?
|
|
605
|
+
if changed.any?
|
|
606
|
+
parts << "Changed: #{changed.map do |k|
|
|
607
|
+
"#{k}=\"#{truncate_for_reason(old_attrs[k],
|
|
608
|
+
20)}\" → \"#{truncate_for_reason(new_attrs[k],
|
|
609
|
+
20)}\""
|
|
610
|
+
end.join(', ')}"
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
parts.any? ? "Attributes differ (#{parts.join('; ')})" : "Attribute values differ"
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# Truncate text for reason messages
|
|
617
|
+
#
|
|
618
|
+
# @param text [String] Text to truncate
|
|
619
|
+
# @param max_length [Integer] Maximum length
|
|
620
|
+
# @return [String] Truncated text
|
|
621
|
+
def truncate_for_reason(text, max_length)
|
|
622
|
+
return "" if text.nil?
|
|
623
|
+
|
|
624
|
+
text = text.to_s
|
|
625
|
+
return text if text.length <= max_length
|
|
626
|
+
|
|
627
|
+
"#{text[0...max_length - 3]}..."
|
|
628
|
+
end
|
|
629
|
+
end
|
|
630
|
+
end
|
|
631
|
+
end
|