canon 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +69 -92
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/Gemfile +1 -0
  6. data/docs/_config.yml +90 -1
  7. data/docs/advanced/diff-classification.adoc +82 -2
  8. data/docs/advanced/extending-canon.adoc +193 -0
  9. data/docs/features/match-options/index.adoc +239 -1
  10. data/docs/internals/diffnode-enrichment.adoc +611 -0
  11. data/docs/internals/index.adoc +251 -0
  12. data/docs/lychee.toml +13 -6
  13. data/docs/understanding/architecture.adoc +749 -33
  14. data/docs/understanding/comparison-pipeline.adoc +122 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +87 -0
  27. data/lib/canon/comparison/html_comparator.rb +70 -26
  28. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  29. data/lib/canon/comparison/html_parser.rb +80 -0
  30. data/lib/canon/comparison/json_comparator.rb +12 -0
  31. data/lib/canon/comparison/json_parser.rb +19 -0
  32. data/lib/canon/comparison/markup_comparator.rb +293 -0
  33. data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
  34. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  35. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  36. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  37. data/lib/canon/comparison/match_options.rb +68 -463
  38. data/lib/canon/comparison/profile_definition.rb +149 -0
  39. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  40. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  41. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  42. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  43. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  44. data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
  45. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  46. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  47. data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
  48. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
  49. data/lib/canon/comparison/xml_comparator.rb +97 -684
  50. data/lib/canon/comparison/xml_node_comparison.rb +319 -0
  51. data/lib/canon/comparison/xml_parser.rb +19 -0
  52. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  53. data/lib/canon/comparison.rb +265 -110
  54. data/lib/canon/diff/diff_classifier.rb +101 -2
  55. data/lib/canon/diff/diff_node.rb +32 -2
  56. data/lib/canon/diff/formatting_detector.rb +1 -1
  57. data/lib/canon/diff/node_serializer.rb +191 -0
  58. data/lib/canon/diff/path_builder.rb +143 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  61. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  62. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  64. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  65. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  66. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  67. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  68. data/lib/canon/diff_formatter.rb +1 -1
  69. data/lib/canon/rspec_matchers.rb +38 -9
  70. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  71. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  72. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  73. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  74. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +24 -13
  77. metadata +48 -2
@@ -287,7 +287,7 @@ module Canon
287
287
  # @return [String] Formatted diff output
288
288
  def format_comparison_result(comparison_result, expected, actual)
289
289
  # Detect format from expected content
290
- format = Canon::Comparison.send(:detect_format, expected)
290
+ format = Canon::Comparison::FormatDetector.detect(expected)
291
291
 
292
292
  formatter_options = {
293
293
  use_color: @use_color,
@@ -63,6 +63,15 @@ module Canon
63
63
  self
64
64
  end
65
65
 
66
+ # Chain method for setting match options
67
+ # @param match_opts [Hash] match options
68
+ # @return [SerializationMatcher] self for chaining
69
+ def with_match(**match_opts)
70
+ @match ||= {}
71
+ @match = @match.merge(match_opts)
72
+ self
73
+ end
74
+
66
75
  def matches?(target)
67
76
  @target = target
68
77
 
@@ -130,7 +139,7 @@ module Canon
130
139
  else
131
140
  # Fall back to detection only if format not provided
132
141
  begin
133
- detected_format = Canon::Comparison.send(:detect_format, @expected)
142
+ detected_format = Canon::Comparison::FormatDetector.detect(@expected)
134
143
  detected_format.to_s.upcase
135
144
  rescue StandardError
136
145
  "CONTENT"
@@ -252,12 +261,22 @@ module Canon
252
261
  diff_algorithm: diff_algorithm)
253
262
  end
254
263
 
255
- def be_yaml_equivalent_to(expected)
256
- SerializationMatcher.new(expected, :yaml)
264
+ def be_yaml_equivalent_to(expected, match_profile: nil, match: nil,
265
+ preprocessing: nil, diff_algorithm: nil)
266
+ SerializationMatcher.new(expected, :yaml,
267
+ match_profile: match_profile,
268
+ match: match,
269
+ preprocessing: preprocessing,
270
+ diff_algorithm: diff_algorithm)
257
271
  end
258
272
 
259
- def be_json_equivalent_to(expected)
260
- SerializationMatcher.new(expected, :json)
273
+ def be_json_equivalent_to(expected, match_profile: nil, match: nil,
274
+ preprocessing: nil, diff_algorithm: nil)
275
+ SerializationMatcher.new(expected, :json,
276
+ match_profile: match_profile,
277
+ match: match,
278
+ preprocessing: preprocessing,
279
+ diff_algorithm: diff_algorithm)
261
280
  end
262
281
 
263
282
  def be_html_equivalent_to(expected, match_profile: nil, match: nil,
@@ -287,12 +306,22 @@ module Canon
287
306
  diff_algorithm: diff_algorithm)
288
307
  end
289
308
 
290
- def be_equivalent_to(expected)
291
- SerializationMatcher.new(expected, nil)
309
+ def be_equivalent_to(expected, match_profile: nil, match: nil,
310
+ preprocessing: nil, diff_algorithm: nil)
311
+ SerializationMatcher.new(expected, nil,
312
+ match_profile: match_profile,
313
+ match: match,
314
+ preprocessing: preprocessing,
315
+ diff_algorithm: diff_algorithm)
292
316
  end
293
317
 
294
- def be_string_equivalent_to(expected)
295
- SerializationMatcher.new(expected, :string)
318
+ def be_string_equivalent_to(expected, match_profile: nil, match: nil,
319
+ preprocessing: nil, diff_algorithm: nil)
320
+ SerializationMatcher.new(expected, :string,
321
+ match_profile: match_profile,
322
+ match: match,
323
+ preprocessing: preprocessing,
324
+ diff_algorithm: diff_algorithm)
296
325
  end
297
326
 
298
327
  if defined?(::RSpec) && ::RSpec.respond_to?(:configure)
@@ -2,6 +2,11 @@
2
2
 
3
3
  require_relative "../diff/diff_node"
4
4
  require_relative "../comparison/match_options"
5
+ # OperationConverter helper modules
6
+ require_relative "operation_converter_helpers/metadata_enricher"
7
+ require_relative "operation_converter_helpers/reason_builder"
8
+ require_relative "operation_converter_helpers/post_processor"
9
+ require_relative "operation_converter_helpers/update_change_handler"
5
10
 
6
11
  module Canon
7
12
  module TreeDiff
@@ -84,7 +89,10 @@ module Canon
84
89
  end
85
90
 
86
91
  # Post-process to detect attribute-order-only differences
87
- detect_attribute_order_diffs(diff_nodes)
92
+ OperationConverterHelpers::PostProcessor.detect_attribute_order_diffs(
93
+ diff_nodes,
94
+ ->(dimension) { determine_normative(dimension) },
95
+ )
88
96
  end
89
97
 
90
98
  private
@@ -121,13 +129,19 @@ module Canon
121
129
  # @param operation [Operation] Insert operation
122
130
  # @return [DiffNode] Diff node representing insertion
123
131
  def convert_insert(operation)
124
- node2 = extract_source_node(operation[:node])
132
+ tree_node2 = operation[:node] # TreeNode from adapter
133
+ node2 = extract_source_node(tree_node2)
134
+
135
+ # Enrich with path and serialized content
136
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(nil,
137
+ tree_node2, @format)
125
138
 
126
139
  diff_node = Canon::Diff::DiffNode.new(
127
140
  node1: nil,
128
141
  node2: node2,
129
142
  dimension: :element_structure,
130
- reason: build_insert_reason(operation),
143
+ reason: OperationConverterHelpers::ReasonBuilder.build_insert_reason(operation),
144
+ **metadata,
131
145
  )
132
146
  # Metadata elements are informative (don't affect equivalence)
133
147
  diff_node.normative = metadata_element?(node2) ? false : determine_normative(:element_structure)
@@ -139,13 +153,20 @@ module Canon
139
153
  # @param operation [Operation] Delete operation
140
154
  # @return [DiffNode] Diff node representing deletion
141
155
  def convert_delete(operation)
142
- node1 = extract_source_node(operation[:node])
156
+ tree_node1 = operation[:node] # TreeNode from adapter
157
+ node1 = extract_source_node(tree_node1)
158
+
159
+ # Enrich with path and serialized content
160
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
161
+ tree_node1, nil, @format
162
+ )
143
163
 
144
164
  diff_node = Canon::Diff::DiffNode.new(
145
165
  node1: node1,
146
166
  node2: nil,
147
167
  dimension: :element_structure,
148
- reason: build_delete_reason(operation),
168
+ reason: OperationConverterHelpers::ReasonBuilder.build_delete_reason(operation),
169
+ **metadata,
149
170
  )
150
171
  # Metadata elements are informative (don't affect equivalence)
151
172
  diff_node.normative = metadata_element?(node1) ? false : determine_normative(:element_structure)
@@ -159,117 +180,26 @@ module Canon
159
180
  # @param operation [Operation] Update operation
160
181
  # @return [Array<DiffNode>] Diff nodes representing updates
161
182
  def convert_update(operation)
162
- node1 = extract_source_node(operation[:node1])
163
- node2 = extract_source_node(operation[:node2])
164
- changes = operation[:changes]
183
+ tree_node1 = operation[:node1] # TreeNode from adapter
184
+ tree_node2 = operation[:node2] # TreeNode from adapter
165
185
 
166
- # Handle case where changes is a boolean or non-hash value
167
- changes = {} unless changes.is_a?(Hash)
186
+ # Enrich with path and serialized content (shared by all DiffNodes from this operation)
187
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
188
+ tree_node1, tree_node2, @format
189
+ )
168
190
 
169
191
  # Check if nodes are metadata elements
192
+ node1 = extract_source_node(tree_node1)
193
+ node2 = extract_source_node(tree_node2)
170
194
  is_metadata = metadata_element?(node1) || metadata_element?(node2)
171
195
 
172
- diff_nodes = []
173
-
174
- # Create separate DiffNode for each change dimension
175
- # This ensures each dimension can be classified independently
176
-
177
- if changes.key?(:attributes)
178
- # Attribute value differences
179
- # Changes can be either true (flag) or { old: ..., new: ... } (detailed)
180
- if changes[:attributes].is_a?(Hash) && changes[:attributes].key?(:old)
181
- old_attrs = changes[:attributes][:old]
182
- new_attrs = changes[:attributes][:new]
183
- diff_details = build_attribute_diff_details(old_attrs, new_attrs)
184
- else
185
- diff_details = "attribute values differ"
186
- end
187
-
188
- diff_node = Canon::Diff::DiffNode.new(
189
- node1: node1,
190
- node2: node2,
191
- dimension: :attribute_values,
192
- reason: diff_details,
193
- )
194
- diff_node.normative = is_metadata ? false : determine_normative(:attribute_values)
195
- diff_nodes << diff_node
196
- end
197
-
198
- if changes.key?(:attribute_order)
199
- # Attribute order differences
200
- if changes[:attribute_order].is_a?(Hash) && changes[:attribute_order].key?(:old)
201
- old_order = changes[:attribute_order][:old]
202
- new_order = changes[:attribute_order][:new]
203
- reason = "Attribute order changed: [#{old_order.join(', ')}] → [#{new_order.join(', ')}]"
204
- else
205
- reason = "attribute order differs"
206
- end
207
-
208
- diff_node = Canon::Diff::DiffNode.new(
209
- node1: node1,
210
- node2: node2,
211
- dimension: :attribute_order,
212
- reason: reason,
213
- )
214
- diff_node.normative = is_metadata ? false : determine_normative(:attribute_order)
215
- diff_nodes << diff_node
216
- end
217
-
218
- if changes.key?(:value)
219
- # Text content differences
220
- if changes[:value].is_a?(Hash) && changes[:value].key?(:old)
221
- old_val = changes[:value][:old] || ""
222
- new_val = changes[:value][:new] || ""
223
- preview_old = truncate_for_reason(old_val.to_s, 40)
224
- preview_new = truncate_for_reason(new_val.to_s, 40)
225
- reason = "Text content changed: \"#{preview_old}\" → \"#{preview_new}\""
226
- else
227
- reason = "text content differs"
228
- end
229
-
230
- diff_node = Canon::Diff::DiffNode.new(
231
- node1: node1,
232
- node2: node2,
233
- dimension: :text_content,
234
- reason: reason,
235
- )
236
- diff_node.normative = is_metadata ? false : determine_normative(:text_content)
237
- diff_nodes << diff_node
238
- end
239
-
240
- if changes.key?(:label)
241
- # Element name differences
242
- if changes[:label].is_a?(Hash) && changes[:label].key?(:old)
243
- old_label = changes[:label][:old]
244
- new_label = changes[:label][:new]
245
- reason = "Element name changed: <#{old_label}> → <#{new_label}>"
246
- else
247
- reason = "element name differs"
248
- end
249
-
250
- diff_node = Canon::Diff::DiffNode.new(
251
- node1: node1,
252
- node2: node2,
253
- dimension: :element_structure,
254
- reason: reason,
255
- )
256
- diff_node.normative = is_metadata ? false : determine_normative(:element_structure)
257
- diff_nodes << diff_node
258
- end
259
-
260
- # If no specific changes detected, create a generic update
261
- if diff_nodes.empty?
262
- diff_node = Canon::Diff::DiffNode.new(
263
- node1: node1,
264
- node2: node2,
265
- dimension: :text_content,
266
- reason: "content differs",
267
- )
268
- diff_node.normative = is_metadata ? false : determine_normative(:text_content)
269
- diff_nodes << diff_node
270
- end
271
-
272
- diff_nodes
196
+ # Use UpdateChangeHandler to process different change types
197
+ OperationConverterHelpers::UpdateChangeHandler.convert(
198
+ operation,
199
+ metadata,
200
+ is_metadata,
201
+ ->(dimension) { determine_normative(dimension) },
202
+ )
273
203
  end
274
204
 
275
205
  # Convert MOVE operation to DiffNode
@@ -277,14 +207,22 @@ module Canon
277
207
  # @param operation [Operation] Move operation
278
208
  # @return [DiffNode] Diff node representing move
279
209
  def convert_move(operation)
280
- node1 = extract_source_node(operation[:node1])
281
- node2 = extract_source_node(operation[:node2])
210
+ tree_node1 = operation[:node1]
211
+ tree_node2 = operation[:node2]
212
+ node1 = extract_source_node(tree_node1)
213
+ node2 = extract_source_node(tree_node2)
214
+
215
+ # Enrich with path and serialized content
216
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
217
+ tree_node1, tree_node2, @format
218
+ )
282
219
 
283
220
  diff_node = Canon::Diff::DiffNode.new(
284
221
  node1: node1,
285
222
  node2: node2,
286
223
  dimension: :element_position,
287
- reason: build_move_reason(operation),
224
+ reason: OperationConverterHelpers::ReasonBuilder.build_move_reason(operation),
225
+ **metadata,
288
226
  )
289
227
  # Metadata elements are informative (don't affect equivalence)
290
228
  is_metadata = metadata_element?(node1) || metadata_element?(node2)
@@ -299,14 +237,22 @@ module Canon
299
237
  def convert_merge(operation)
300
238
  # Merge combines multiple nodes into one
301
239
  # node1 represents the source nodes, node2 is the merged result
302
- node1 = extract_source_node(operation[:nodes]&.first)
303
- node2 = extract_source_node(operation[:result])
240
+ tree_node1 = operation[:nodes]&.first
241
+ tree_node2 = operation[:result]
242
+ node1 = extract_source_node(tree_node1)
243
+ node2 = extract_source_node(tree_node2)
244
+
245
+ # Enrich with path and serialized content
246
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
247
+ tree_node1, tree_node2, @format
248
+ )
304
249
 
305
250
  diff_node = Canon::Diff::DiffNode.new(
306
251
  node1: node1,
307
252
  node2: node2,
308
253
  dimension: :element_structure,
309
254
  reason: "merged #{operation[:nodes]&.length || 0} nodes",
255
+ **metadata,
310
256
  )
311
257
  diff_node.normative = true # Merges are structural changes, always normative
312
258
  diff_node
@@ -319,14 +265,22 @@ module Canon
319
265
  def convert_split(operation)
320
266
  # Split divides one node into multiple
321
267
  # node1 is the original, node2 represents the split results
322
- node1 = extract_source_node(operation[:node])
323
- node2 = extract_source_node(operation[:results]&.first)
268
+ tree_node1 = operation[:node]
269
+ tree_node2 = operation[:results]&.first
270
+ node1 = extract_source_node(tree_node1)
271
+ node2 = extract_source_node(tree_node2)
272
+
273
+ # Enrich with path and serialized content
274
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
275
+ tree_node1, tree_node2, @format
276
+ )
324
277
 
325
278
  diff_node = Canon::Diff::DiffNode.new(
326
279
  node1: node1,
327
280
  node2: node2,
328
281
  dimension: :element_structure,
329
282
  reason: "split into #{operation[:results]&.length || 0} nodes",
283
+ **metadata,
330
284
  )
331
285
  diff_node.normative = true # Splits are structural changes, always normative
332
286
  diff_node
@@ -337,14 +291,22 @@ module Canon
337
291
  # @param operation [Operation] Upgrade operation
338
292
  # @return [DiffNode] Diff node representing upgrade
339
293
  def convert_upgrade(operation)
340
- node1 = extract_source_node(operation[:node1])
341
- node2 = extract_source_node(operation[:node2])
294
+ tree_node1 = operation[:node1]
295
+ tree_node2 = operation[:node2]
296
+ node1 = extract_source_node(tree_node1)
297
+ node2 = extract_source_node(tree_node2)
298
+
299
+ # Enrich with path and serialized content
300
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
301
+ tree_node1, tree_node2, @format
302
+ )
342
303
 
343
304
  diff_node = Canon::Diff::DiffNode.new(
344
305
  node1: node1,
345
306
  node2: node2,
346
307
  dimension: :element_hierarchy,
347
308
  reason: "promoted to higher level",
309
+ **metadata,
348
310
  )
349
311
  diff_node.normative = determine_normative(:element_hierarchy)
350
312
  diff_node
@@ -355,14 +317,22 @@ module Canon
355
317
  # @param operation [Operation] Downgrade operation
356
318
  # @return [DiffNode] Diff node representing downgrade
357
319
  def convert_downgrade(operation)
358
- node1 = extract_source_node(operation[:node1])
359
- node2 = extract_source_node(operation[:node2])
320
+ tree_node1 = operation[:node1]
321
+ tree_node2 = operation[:node2]
322
+ node1 = extract_source_node(tree_node1)
323
+ node2 = extract_source_node(tree_node2)
324
+
325
+ # Enrich with path and serialized content
326
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
327
+ tree_node1, tree_node2, @format
328
+ )
360
329
 
361
330
  diff_node = Canon::Diff::DiffNode.new(
362
331
  node1: node1,
363
332
  node2: node2,
364
333
  dimension: :element_hierarchy,
365
334
  reason: "demoted to lower level",
335
+ **metadata,
366
336
  )
367
337
  diff_node.normative = determine_normative(:element_hierarchy)
368
338
  diff_node
@@ -378,32 +348,6 @@ module Canon
378
348
  tree_node.respond_to?(:source_node) ? tree_node.source_node : tree_node
379
349
  end
380
350
 
381
- # Determine update dimension based on what changed
382
- #
383
- # @param operation [Operation] Update operation
384
- # @return [Symbol] Match dimension
385
- def determine_update_dimension(operation)
386
- changes = operation[:changes] || {}
387
-
388
- # Check what actually changed
389
- if changes.key?(:attribute_order)
390
- # Only attribute order changed
391
- :attribute_order
392
- elsif changes.key?(:attributes)
393
- # Attribute values changed
394
- :attribute_values
395
- elsif changes.key?(:value)
396
- # Text content changed
397
- :text_content
398
- elsif changes.key?(:label)
399
- # Element name changed (rare)
400
- :element_structure
401
- else
402
- # Default to text_content for generic updates
403
- :text_content
404
- end
405
- end
406
-
407
351
  # Determine if a diff is normative based on match options
408
352
  #
409
353
  # @param dimension [Symbol] Match dimension
@@ -417,153 +361,6 @@ module Canon
417
361
  behavior != :ignore
418
362
  end
419
363
 
420
- # Build reason string for INSERT operation
421
- #
422
- # @param operation [Operation] Operation
423
- # @return [String] Reason description
424
- def build_insert_reason(operation)
425
- node = operation[:node]
426
- content = operation[:content]
427
-
428
- if node.respond_to?(:label)
429
- # Include content preview for clarity
430
- "Element inserted: #{content || "<#{node.label}>"}"
431
- else
432
- "Element inserted"
433
- end
434
- end
435
-
436
- # Build reason string for DELETE operation
437
- #
438
- # @param operation [Operation] Operation
439
- # @return [String] Reason description
440
- def build_delete_reason(operation)
441
- node = operation[:node]
442
- content = operation[:content]
443
-
444
- if node.respond_to?(:label)
445
- # Include content preview for clarity
446
- "Element deleted: #{content || "<#{node.label}>"}"
447
- else
448
- "Element deleted"
449
- end
450
- end
451
-
452
- # Build reason string for UPDATE operation
453
- #
454
- # @param operation [Operation] Operation
455
- # @return [String] Reason description
456
- def build_update_reason(operation)
457
- change_type = operation[:change_type] || "content"
458
- "updated #{change_type}"
459
- end
460
-
461
- # Build reason string for MOVE operation
462
- #
463
- # @param operation [Operation] Operation
464
- # @return [String] Reason description
465
- def build_move_reason(operation)
466
- from_pos = operation[:from_position]
467
- to_pos = operation[:to_position]
468
-
469
- if from_pos && to_pos
470
- "moved from position #{from_pos} to #{to_pos}"
471
- else
472
- "moved to different position"
473
- end
474
- end
475
-
476
- # Detect INSERT/DELETE pairs that differ only in attribute order
477
- # and reclassify them to use the attribute_order dimension
478
- #
479
- # @param diff_nodes [Array<DiffNode>] Diff nodes to process
480
- # @return [Array<DiffNode>] Processed diff nodes
481
- def detect_attribute_order_diffs(diff_nodes)
482
- # Group nodes by parent and element type
483
- deletes = diff_nodes.select { |dn| dn.node1 && !dn.node2 }
484
- inserts = diff_nodes.select { |dn| !dn.node1 && dn.node2 }
485
-
486
- # For each DELETE, try to find a matching INSERT
487
- deletes.each do |delete_node|
488
- node1 = delete_node.node1
489
- next unless node1.respond_to?(:name) && node1.respond_to?(:attributes)
490
-
491
- # Skip if node has no attributes (can't be attribute order diff)
492
- next if node1.attributes.nil? || node1.attributes.empty?
493
-
494
- # Find inserts with same element name at same position
495
- matching_insert = inserts.find do |insert_node|
496
- node2 = insert_node.node2
497
- next false unless node2.respond_to?(:name) && node2.respond_to?(:attributes)
498
- next false unless node1.name == node2.name
499
-
500
- # Must have attributes to differ in order
501
- next false if node2.attributes.nil? || node2.attributes.empty?
502
-
503
- # Check if they differ only in attribute order
504
- next false unless attributes_equal_ignoring_order?(
505
- node1.attributes, node2.attributes
506
- )
507
-
508
- # Ensure same content (text and children structure)
509
- nodes_same_except_attr_order?(node1, node2)
510
- end
511
-
512
- next unless matching_insert
513
-
514
- # Found an attribute-order-only difference
515
- # Reclassify both nodes to use attribute_order dimension
516
- delete_node.dimension = :attribute_order
517
- delete_node.reason = "attribute order changed"
518
- delete_node.normative = determine_normative(:attribute_order)
519
-
520
- matching_insert.dimension = :attribute_order
521
- matching_insert.reason = "attribute order changed"
522
- matching_insert.normative = determine_normative(:attribute_order)
523
- end
524
-
525
- diff_nodes
526
- end
527
-
528
- # Check if two attribute hashes are equal ignoring order
529
- #
530
- # @param attrs1 [Hash] First attribute hash
531
- # @param attrs2 [Hash] Second attribute hash
532
- # @return [Boolean] True if attributes are equal (ignoring order)
533
- def attributes_equal_ignoring_order?(attrs1, attrs2)
534
- return true if attrs1.nil? && attrs2.nil?
535
- return false if attrs1.nil? || attrs2.nil?
536
-
537
- # Convert to hashes if needed
538
- attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
539
- attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
540
-
541
- # Compare as sets (order-independent)
542
- attrs1.sort.to_h == attrs2.sort.to_h
543
- end
544
-
545
- # Check if two nodes are the same except for attribute order
546
- #
547
- # @param node1 [Nokogiri::XML::Node] First node
548
- # @param node2 [Nokogiri::XML::Node] Second node
549
- # @return [Boolean] True if nodes are same except attribute order
550
- def nodes_same_except_attr_order?(node1, node2)
551
- # Same text content
552
- return false if node1.text != node2.text
553
-
554
- # Same number of children
555
- return false if node1.children.length != node2.children.length
556
-
557
- # If has children, they should have same structure
558
- if node1.children.any?
559
- node1.children.zip(node2.children).all? do |child1, child2|
560
- child1.name == child2.name
561
- end
562
- else
563
- true
564
- end
565
- end
566
-
567
364
  # Check if a node is a metadata/presentation element
568
365
  #
569
366
  # @param node [Object] Node to check (could be TreeNode or Nokogiri node)
@@ -583,49 +380,6 @@ module Canon
583
380
  # Check if it's in our metadata elements list
584
381
  METADATA_ELEMENTS.include?(element_name)
585
382
  end
586
-
587
- # Build detailed reason for attribute differences
588
- #
589
- # @param old_attrs [Hash] Old attributes
590
- # @param new_attrs [Hash] New attributes
591
- # @return [String] Detailed reason
592
- def build_attribute_diff_details(old_attrs, new_attrs)
593
- old_keys = Set.new(old_attrs.keys)
594
- new_keys = Set.new(new_attrs.keys)
595
-
596
- missing = old_keys - new_keys
597
- extra = new_keys - old_keys
598
- changed = (old_keys & new_keys).reject do |k|
599
- old_attrs[k] == new_attrs[k]
600
- end
601
-
602
- parts = []
603
- parts << "Missing: #{missing.to_a.join(', ')}" if missing.any?
604
- parts << "Extra: #{extra.to_a.join(', ')}" if extra.any?
605
- if changed.any?
606
- parts << "Changed: #{changed.map do |k|
607
- "#{k}=\"#{truncate_for_reason(old_attrs[k],
608
- 20)}\" → \"#{truncate_for_reason(new_attrs[k],
609
- 20)}\""
610
- end.join(', ')}"
611
- end
612
-
613
- parts.any? ? "Attributes differ (#{parts.join('; ')})" : "Attribute values differ"
614
- end
615
-
616
- # Truncate text for reason messages
617
- #
618
- # @param text [String] Text to truncate
619
- # @param max_length [Integer] Maximum length
620
- # @return [String] Truncated text
621
- def truncate_for_reason(text, max_length)
622
- return "" if text.nil?
623
-
624
- text = text.to_s
625
- return text if text.length <= max_length
626
-
627
- "#{text[0...max_length - 3]}..."
628
- end
629
383
  end
630
384
  end
631
385
  end