canon 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +25 -135
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/advanced/extending-canon.adoc +193 -0
  6. data/docs/internals/diffnode-enrichment.adoc +611 -0
  7. data/docs/internals/index.adoc +251 -0
  8. data/docs/lychee.toml +13 -6
  9. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +250 -0
  10. data/docs/understanding/architecture.adoc +749 -33
  11. data/docs/understanding/comparison-pipeline.adoc +122 -0
  12. data/false_positive_analysis.txt +0 -0
  13. data/file1.html +1 -0
  14. data/file2.html +1 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +86 -0
  27. data/lib/canon/comparison/html_comparator.rb +51 -18
  28. data/lib/canon/comparison/html_parser.rb +80 -0
  29. data/lib/canon/comparison/json_comparator.rb +12 -0
  30. data/lib/canon/comparison/json_parser.rb +19 -0
  31. data/lib/canon/comparison/markup_comparator.rb +293 -0
  32. data/lib/canon/comparison/match_options/base_resolver.rb +143 -0
  33. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  34. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  35. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  36. data/lib/canon/comparison/match_options.rb +68 -463
  37. data/lib/canon/comparison/profile_definition.rb +149 -0
  38. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  39. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  40. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  41. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  42. data/lib/canon/comparison/xml_comparator/child_comparison.rb +189 -0
  43. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  44. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  45. data/lib/canon/comparison/xml_comparator/node_parser.rb +74 -0
  46. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +95 -0
  47. data/lib/canon/comparison/xml_comparator.rb +52 -664
  48. data/lib/canon/comparison/xml_node_comparison.rb +297 -0
  49. data/lib/canon/comparison/xml_parser.rb +19 -0
  50. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  51. data/lib/canon/comparison.rb +265 -110
  52. data/lib/canon/diff/diff_node.rb +32 -2
  53. data/lib/canon/diff/node_serializer.rb +191 -0
  54. data/lib/canon/diff/path_builder.rb +143 -0
  55. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  56. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  57. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  58. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  59. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  60. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  61. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  64. data/lib/canon/diff_formatter.rb +1 -1
  65. data/lib/canon/rspec_matchers.rb +1 -1
  66. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  67. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  68. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  69. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  70. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  71. data/lib/canon/version.rb +1 -1
  72. data/old-docs/ADVANCED_TOPICS.adoc +20 -0
  73. data/old-docs/BASIC_USAGE.adoc +16 -0
  74. data/old-docs/CHARACTER_VISUALIZATION.adoc +567 -0
  75. data/old-docs/CLI.adoc +497 -0
  76. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  77. data/old-docs/DIFF_ARCHITECTURE.adoc +435 -0
  78. data/old-docs/DIFF_FORMATTING.adoc +540 -0
  79. data/old-docs/DIFF_PARAMETERS.adoc +261 -0
  80. data/old-docs/DOM_DIFF.adoc +1017 -0
  81. data/old-docs/ENV_CONFIG.adoc +876 -0
  82. data/old-docs/FORMATS.adoc +867 -0
  83. data/old-docs/INPUT_VALIDATION.adoc +477 -0
  84. data/old-docs/MATCHER_BEHAVIOR.adoc +90 -0
  85. data/old-docs/MATCH_ARCHITECTURE.adoc +463 -0
  86. data/old-docs/MATCH_OPTIONS.adoc +912 -0
  87. data/old-docs/MODES.adoc +432 -0
  88. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  89. data/old-docs/OPTIONS.adoc +1387 -0
  90. data/old-docs/PREPROCESSING.adoc +491 -0
  91. data/old-docs/README.old.adoc +2831 -0
  92. data/old-docs/RSPEC.adoc +814 -0
  93. data/old-docs/RUBY_API.adoc +485 -0
  94. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +646 -0
  95. data/old-docs/SEMANTIC_TREE_DIFF.adoc +765 -0
  96. data/old-docs/STRING_COMPARE.adoc +345 -0
  97. data/old-docs/TMP.adoc +3384 -0
  98. data/old-docs/TREE_DIFF.adoc +1080 -0
  99. data/old-docs/UNDERSTANDING_CANON.adoc +17 -0
  100. data/old-docs/VERBOSE.adoc +482 -0
  101. data/old-docs/VISUALIZATION_MAP.adoc +625 -0
  102. data/old-docs/WHITESPACE_TREATMENT.adoc +1155 -0
  103. data/scripts/analyze_current_state.rb +85 -0
  104. data/scripts/analyze_false_positives.rb +114 -0
  105. data/scripts/analyze_remaining_failures.rb +105 -0
  106. data/scripts/compare_current_failures.rb +95 -0
  107. data/scripts/compare_dom_tree_diff.rb +158 -0
  108. data/scripts/compare_failures.rb +151 -0
  109. data/scripts/debug_attribute_extraction.rb +66 -0
  110. data/scripts/debug_blocks_839.rb +115 -0
  111. data/scripts/debug_meta_matching.rb +52 -0
  112. data/scripts/debug_p_matching.rb +192 -0
  113. data/scripts/debug_signature_matching.rb +118 -0
  114. data/scripts/debug_sourcecode_124.rb +32 -0
  115. data/scripts/debug_whitespace_sensitive.rb +192 -0
  116. data/scripts/extract_false_positives.rb +138 -0
  117. data/scripts/find_actual_false_positives.rb +125 -0
  118. data/scripts/investigate_all_false_positives.rb +161 -0
  119. data/scripts/investigate_batch1.rb +127 -0
  120. data/scripts/investigate_classification.rb +150 -0
  121. data/scripts/investigate_classification_detailed.rb +190 -0
  122. data/scripts/investigate_common_failures.rb +342 -0
  123. data/scripts/investigate_false_negative.rb +80 -0
  124. data/scripts/investigate_false_positive.rb +83 -0
  125. data/scripts/investigate_false_positives.rb +227 -0
  126. data/scripts/investigate_false_positives_batch.rb +163 -0
  127. data/scripts/investigate_mixed_content.rb +125 -0
  128. data/scripts/investigate_remaining_16.rb +214 -0
  129. data/scripts/run_single_test.rb +29 -0
  130. data/scripts/test_all_false_positives.rb +95 -0
  131. data/scripts/test_attribute_details.rb +61 -0
  132. data/scripts/test_both_algorithms.rb +49 -0
  133. data/scripts/test_both_simple.rb +49 -0
  134. data/scripts/test_enhanced_semantic_output.rb +125 -0
  135. data/scripts/test_readme_examples.rb +131 -0
  136. data/scripts/test_semantic_tree_diff.rb +99 -0
  137. data/scripts/test_semantic_ux_improvements.rb +135 -0
  138. data/scripts/test_single_false_positive.rb +119 -0
  139. data/scripts/test_size_limits.rb +99 -0
  140. data/test_html_1.html +21 -0
  141. data/test_html_2.html +21 -0
  142. data/test_nokogiri.rb +33 -0
  143. data/test_normalize.rb +45 -0
  144. metadata +123 -2
@@ -287,7 +287,7 @@ module Canon
287
287
  # @return [String] Formatted diff output
288
288
  def format_comparison_result(comparison_result, expected, actual)
289
289
  # Detect format from expected content
290
- format = Canon::Comparison.send(:detect_format, expected)
290
+ format = Canon::Comparison::FormatDetector.detect(expected)
291
291
 
292
292
  formatter_options = {
293
293
  use_color: @use_color,
@@ -130,7 +130,7 @@ module Canon
130
130
  else
131
131
  # Fall back to detection only if format not provided
132
132
  begin
133
- detected_format = Canon::Comparison.send(:detect_format, @expected)
133
+ detected_format = Canon::Comparison::FormatDetector.detect(@expected)
134
134
  detected_format.to_s.upcase
135
135
  rescue StandardError
136
136
  "CONTENT"
@@ -2,6 +2,11 @@
2
2
 
3
3
  require_relative "../diff/diff_node"
4
4
  require_relative "../comparison/match_options"
5
+ # OperationConverter helper modules
6
+ require_relative "operation_converter_helpers/metadata_enricher"
7
+ require_relative "operation_converter_helpers/reason_builder"
8
+ require_relative "operation_converter_helpers/post_processor"
9
+ require_relative "operation_converter_helpers/update_change_handler"
5
10
 
6
11
  module Canon
7
12
  module TreeDiff
@@ -84,7 +89,10 @@ module Canon
84
89
  end
85
90
 
86
91
  # Post-process to detect attribute-order-only differences
87
- detect_attribute_order_diffs(diff_nodes)
92
+ OperationConverterHelpers::PostProcessor.detect_attribute_order_diffs(
93
+ diff_nodes,
94
+ ->(dimension) { determine_normative(dimension) },
95
+ )
88
96
  end
89
97
 
90
98
  private
@@ -121,13 +129,19 @@ module Canon
121
129
  # @param operation [Operation] Insert operation
122
130
  # @return [DiffNode] Diff node representing insertion
123
131
  def convert_insert(operation)
124
- node2 = extract_source_node(operation[:node])
132
+ tree_node2 = operation[:node] # TreeNode from adapter
133
+ node2 = extract_source_node(tree_node2)
134
+
135
+ # Enrich with path and serialized content
136
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(nil,
137
+ tree_node2, @format)
125
138
 
126
139
  diff_node = Canon::Diff::DiffNode.new(
127
140
  node1: nil,
128
141
  node2: node2,
129
142
  dimension: :element_structure,
130
- reason: build_insert_reason(operation),
143
+ reason: OperationConverterHelpers::ReasonBuilder.build_insert_reason(operation),
144
+ **metadata,
131
145
  )
132
146
  # Metadata elements are informative (don't affect equivalence)
133
147
  diff_node.normative = metadata_element?(node2) ? false : determine_normative(:element_structure)
@@ -139,13 +153,20 @@ module Canon
139
153
  # @param operation [Operation] Delete operation
140
154
  # @return [DiffNode] Diff node representing deletion
141
155
  def convert_delete(operation)
142
- node1 = extract_source_node(operation[:node])
156
+ tree_node1 = operation[:node] # TreeNode from adapter
157
+ node1 = extract_source_node(tree_node1)
158
+
159
+ # Enrich with path and serialized content
160
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
161
+ tree_node1, nil, @format
162
+ )
143
163
 
144
164
  diff_node = Canon::Diff::DiffNode.new(
145
165
  node1: node1,
146
166
  node2: nil,
147
167
  dimension: :element_structure,
148
- reason: build_delete_reason(operation),
168
+ reason: OperationConverterHelpers::ReasonBuilder.build_delete_reason(operation),
169
+ **metadata,
149
170
  )
150
171
  # Metadata elements are informative (don't affect equivalence)
151
172
  diff_node.normative = metadata_element?(node1) ? false : determine_normative(:element_structure)
@@ -159,117 +180,26 @@ module Canon
159
180
  # @param operation [Operation] Update operation
160
181
  # @return [Array<DiffNode>] Diff nodes representing updates
161
182
  def convert_update(operation)
162
- node1 = extract_source_node(operation[:node1])
163
- node2 = extract_source_node(operation[:node2])
164
- changes = operation[:changes]
183
+ tree_node1 = operation[:node1] # TreeNode from adapter
184
+ tree_node2 = operation[:node2] # TreeNode from adapter
165
185
 
166
- # Handle case where changes is a boolean or non-hash value
167
- changes = {} unless changes.is_a?(Hash)
186
+ # Enrich with path and serialized content (shared by all DiffNodes from this operation)
187
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
188
+ tree_node1, tree_node2, @format
189
+ )
168
190
 
169
191
  # Check if nodes are metadata elements
192
+ node1 = extract_source_node(tree_node1)
193
+ node2 = extract_source_node(tree_node2)
170
194
  is_metadata = metadata_element?(node1) || metadata_element?(node2)
171
195
 
172
- diff_nodes = []
173
-
174
- # Create separate DiffNode for each change dimension
175
- # This ensures each dimension can be classified independently
176
-
177
- if changes.key?(:attributes)
178
- # Attribute value differences
179
- # Changes can be either true (flag) or { old: ..., new: ... } (detailed)
180
- if changes[:attributes].is_a?(Hash) && changes[:attributes].key?(:old)
181
- old_attrs = changes[:attributes][:old]
182
- new_attrs = changes[:attributes][:new]
183
- diff_details = build_attribute_diff_details(old_attrs, new_attrs)
184
- else
185
- diff_details = "attribute values differ"
186
- end
187
-
188
- diff_node = Canon::Diff::DiffNode.new(
189
- node1: node1,
190
- node2: node2,
191
- dimension: :attribute_values,
192
- reason: diff_details,
193
- )
194
- diff_node.normative = is_metadata ? false : determine_normative(:attribute_values)
195
- diff_nodes << diff_node
196
- end
197
-
198
- if changes.key?(:attribute_order)
199
- # Attribute order differences
200
- if changes[:attribute_order].is_a?(Hash) && changes[:attribute_order].key?(:old)
201
- old_order = changes[:attribute_order][:old]
202
- new_order = changes[:attribute_order][:new]
203
- reason = "Attribute order changed: [#{old_order.join(', ')}] → [#{new_order.join(', ')}]"
204
- else
205
- reason = "attribute order differs"
206
- end
207
-
208
- diff_node = Canon::Diff::DiffNode.new(
209
- node1: node1,
210
- node2: node2,
211
- dimension: :attribute_order,
212
- reason: reason,
213
- )
214
- diff_node.normative = is_metadata ? false : determine_normative(:attribute_order)
215
- diff_nodes << diff_node
216
- end
217
-
218
- if changes.key?(:value)
219
- # Text content differences
220
- if changes[:value].is_a?(Hash) && changes[:value].key?(:old)
221
- old_val = changes[:value][:old] || ""
222
- new_val = changes[:value][:new] || ""
223
- preview_old = truncate_for_reason(old_val.to_s, 40)
224
- preview_new = truncate_for_reason(new_val.to_s, 40)
225
- reason = "Text content changed: \"#{preview_old}\" → \"#{preview_new}\""
226
- else
227
- reason = "text content differs"
228
- end
229
-
230
- diff_node = Canon::Diff::DiffNode.new(
231
- node1: node1,
232
- node2: node2,
233
- dimension: :text_content,
234
- reason: reason,
235
- )
236
- diff_node.normative = is_metadata ? false : determine_normative(:text_content)
237
- diff_nodes << diff_node
238
- end
239
-
240
- if changes.key?(:label)
241
- # Element name differences
242
- if changes[:label].is_a?(Hash) && changes[:label].key?(:old)
243
- old_label = changes[:label][:old]
244
- new_label = changes[:label][:new]
245
- reason = "Element name changed: <#{old_label}> → <#{new_label}>"
246
- else
247
- reason = "element name differs"
248
- end
249
-
250
- diff_node = Canon::Diff::DiffNode.new(
251
- node1: node1,
252
- node2: node2,
253
- dimension: :element_structure,
254
- reason: reason,
255
- )
256
- diff_node.normative = is_metadata ? false : determine_normative(:element_structure)
257
- diff_nodes << diff_node
258
- end
259
-
260
- # If no specific changes detected, create a generic update
261
- if diff_nodes.empty?
262
- diff_node = Canon::Diff::DiffNode.new(
263
- node1: node1,
264
- node2: node2,
265
- dimension: :text_content,
266
- reason: "content differs",
267
- )
268
- diff_node.normative = is_metadata ? false : determine_normative(:text_content)
269
- diff_nodes << diff_node
270
- end
271
-
272
- diff_nodes
196
+ # Use UpdateChangeHandler to process different change types
197
+ OperationConverterHelpers::UpdateChangeHandler.convert(
198
+ operation,
199
+ metadata,
200
+ is_metadata,
201
+ ->(dimension) { determine_normative(dimension) },
202
+ )
273
203
  end
274
204
 
275
205
  # Convert MOVE operation to DiffNode
@@ -277,14 +207,22 @@ module Canon
277
207
  # @param operation [Operation] Move operation
278
208
  # @return [DiffNode] Diff node representing move
279
209
  def convert_move(operation)
280
- node1 = extract_source_node(operation[:node1])
281
- node2 = extract_source_node(operation[:node2])
210
+ tree_node1 = operation[:node1]
211
+ tree_node2 = operation[:node2]
212
+ node1 = extract_source_node(tree_node1)
213
+ node2 = extract_source_node(tree_node2)
214
+
215
+ # Enrich with path and serialized content
216
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
217
+ tree_node1, tree_node2, @format
218
+ )
282
219
 
283
220
  diff_node = Canon::Diff::DiffNode.new(
284
221
  node1: node1,
285
222
  node2: node2,
286
223
  dimension: :element_position,
287
- reason: build_move_reason(operation),
224
+ reason: OperationConverterHelpers::ReasonBuilder.build_move_reason(operation),
225
+ **metadata,
288
226
  )
289
227
  # Metadata elements are informative (don't affect equivalence)
290
228
  is_metadata = metadata_element?(node1) || metadata_element?(node2)
@@ -299,14 +237,22 @@ module Canon
299
237
  def convert_merge(operation)
300
238
  # Merge combines multiple nodes into one
301
239
  # node1 represents the source nodes, node2 is the merged result
302
- node1 = extract_source_node(operation[:nodes]&.first)
303
- node2 = extract_source_node(operation[:result])
240
+ tree_node1 = operation[:nodes]&.first
241
+ tree_node2 = operation[:result]
242
+ node1 = extract_source_node(tree_node1)
243
+ node2 = extract_source_node(tree_node2)
244
+
245
+ # Enrich with path and serialized content
246
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
247
+ tree_node1, tree_node2, @format
248
+ )
304
249
 
305
250
  diff_node = Canon::Diff::DiffNode.new(
306
251
  node1: node1,
307
252
  node2: node2,
308
253
  dimension: :element_structure,
309
254
  reason: "merged #{operation[:nodes]&.length || 0} nodes",
255
+ **metadata,
310
256
  )
311
257
  diff_node.normative = true # Merges are structural changes, always normative
312
258
  diff_node
@@ -319,14 +265,22 @@ module Canon
319
265
  def convert_split(operation)
320
266
  # Split divides one node into multiple
321
267
  # node1 is the original, node2 represents the split results
322
- node1 = extract_source_node(operation[:node])
323
- node2 = extract_source_node(operation[:results]&.first)
268
+ tree_node1 = operation[:node]
269
+ tree_node2 = operation[:results]&.first
270
+ node1 = extract_source_node(tree_node1)
271
+ node2 = extract_source_node(tree_node2)
272
+
273
+ # Enrich with path and serialized content
274
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
275
+ tree_node1, tree_node2, @format
276
+ )
324
277
 
325
278
  diff_node = Canon::Diff::DiffNode.new(
326
279
  node1: node1,
327
280
  node2: node2,
328
281
  dimension: :element_structure,
329
282
  reason: "split into #{operation[:results]&.length || 0} nodes",
283
+ **metadata,
330
284
  )
331
285
  diff_node.normative = true # Splits are structural changes, always normative
332
286
  diff_node
@@ -337,14 +291,22 @@ module Canon
337
291
  # @param operation [Operation] Upgrade operation
338
292
  # @return [DiffNode] Diff node representing upgrade
339
293
  def convert_upgrade(operation)
340
- node1 = extract_source_node(operation[:node1])
341
- node2 = extract_source_node(operation[:node2])
294
+ tree_node1 = operation[:node1]
295
+ tree_node2 = operation[:node2]
296
+ node1 = extract_source_node(tree_node1)
297
+ node2 = extract_source_node(tree_node2)
298
+
299
+ # Enrich with path and serialized content
300
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
301
+ tree_node1, tree_node2, @format
302
+ )
342
303
 
343
304
  diff_node = Canon::Diff::DiffNode.new(
344
305
  node1: node1,
345
306
  node2: node2,
346
307
  dimension: :element_hierarchy,
347
308
  reason: "promoted to higher level",
309
+ **metadata,
348
310
  )
349
311
  diff_node.normative = determine_normative(:element_hierarchy)
350
312
  diff_node
@@ -355,14 +317,22 @@ module Canon
355
317
  # @param operation [Operation] Downgrade operation
356
318
  # @return [DiffNode] Diff node representing downgrade
357
319
  def convert_downgrade(operation)
358
- node1 = extract_source_node(operation[:node1])
359
- node2 = extract_source_node(operation[:node2])
320
+ tree_node1 = operation[:node1]
321
+ tree_node2 = operation[:node2]
322
+ node1 = extract_source_node(tree_node1)
323
+ node2 = extract_source_node(tree_node2)
324
+
325
+ # Enrich with path and serialized content
326
+ metadata = OperationConverterHelpers::MetadataEnricher.enrich(
327
+ tree_node1, tree_node2, @format
328
+ )
360
329
 
361
330
  diff_node = Canon::Diff::DiffNode.new(
362
331
  node1: node1,
363
332
  node2: node2,
364
333
  dimension: :element_hierarchy,
365
334
  reason: "demoted to lower level",
335
+ **metadata,
366
336
  )
367
337
  diff_node.normative = determine_normative(:element_hierarchy)
368
338
  diff_node
@@ -378,32 +348,6 @@ module Canon
378
348
  tree_node.respond_to?(:source_node) ? tree_node.source_node : tree_node
379
349
  end
380
350
 
381
- # Determine update dimension based on what changed
382
- #
383
- # @param operation [Operation] Update operation
384
- # @return [Symbol] Match dimension
385
- def determine_update_dimension(operation)
386
- changes = operation[:changes] || {}
387
-
388
- # Check what actually changed
389
- if changes.key?(:attribute_order)
390
- # Only attribute order changed
391
- :attribute_order
392
- elsif changes.key?(:attributes)
393
- # Attribute values changed
394
- :attribute_values
395
- elsif changes.key?(:value)
396
- # Text content changed
397
- :text_content
398
- elsif changes.key?(:label)
399
- # Element name changed (rare)
400
- :element_structure
401
- else
402
- # Default to text_content for generic updates
403
- :text_content
404
- end
405
- end
406
-
407
351
  # Determine if a diff is normative based on match options
408
352
  #
409
353
  # @param dimension [Symbol] Match dimension
@@ -417,153 +361,6 @@ module Canon
417
361
  behavior != :ignore
418
362
  end
419
363
 
420
- # Build reason string for INSERT operation
421
- #
422
- # @param operation [Operation] Operation
423
- # @return [String] Reason description
424
- def build_insert_reason(operation)
425
- node = operation[:node]
426
- content = operation[:content]
427
-
428
- if node.respond_to?(:label)
429
- # Include content preview for clarity
430
- "Element inserted: #{content || "<#{node.label}>"}"
431
- else
432
- "Element inserted"
433
- end
434
- end
435
-
436
- # Build reason string for DELETE operation
437
- #
438
- # @param operation [Operation] Operation
439
- # @return [String] Reason description
440
- def build_delete_reason(operation)
441
- node = operation[:node]
442
- content = operation[:content]
443
-
444
- if node.respond_to?(:label)
445
- # Include content preview for clarity
446
- "Element deleted: #{content || "<#{node.label}>"}"
447
- else
448
- "Element deleted"
449
- end
450
- end
451
-
452
- # Build reason string for UPDATE operation
453
- #
454
- # @param operation [Operation] Operation
455
- # @return [String] Reason description
456
- def build_update_reason(operation)
457
- change_type = operation[:change_type] || "content"
458
- "updated #{change_type}"
459
- end
460
-
461
- # Build reason string for MOVE operation
462
- #
463
- # @param operation [Operation] Operation
464
- # @return [String] Reason description
465
- def build_move_reason(operation)
466
- from_pos = operation[:from_position]
467
- to_pos = operation[:to_position]
468
-
469
- if from_pos && to_pos
470
- "moved from position #{from_pos} to #{to_pos}"
471
- else
472
- "moved to different position"
473
- end
474
- end
475
-
476
- # Detect INSERT/DELETE pairs that differ only in attribute order
477
- # and reclassify them to use the attribute_order dimension
478
- #
479
- # @param diff_nodes [Array<DiffNode>] Diff nodes to process
480
- # @return [Array<DiffNode>] Processed diff nodes
481
- def detect_attribute_order_diffs(diff_nodes)
482
- # Group nodes by parent and element type
483
- deletes = diff_nodes.select { |dn| dn.node1 && !dn.node2 }
484
- inserts = diff_nodes.select { |dn| !dn.node1 && dn.node2 }
485
-
486
- # For each DELETE, try to find a matching INSERT
487
- deletes.each do |delete_node|
488
- node1 = delete_node.node1
489
- next unless node1.respond_to?(:name) && node1.respond_to?(:attributes)
490
-
491
- # Skip if node has no attributes (can't be attribute order diff)
492
- next if node1.attributes.nil? || node1.attributes.empty?
493
-
494
- # Find inserts with same element name at same position
495
- matching_insert = inserts.find do |insert_node|
496
- node2 = insert_node.node2
497
- next false unless node2.respond_to?(:name) && node2.respond_to?(:attributes)
498
- next false unless node1.name == node2.name
499
-
500
- # Must have attributes to differ in order
501
- next false if node2.attributes.nil? || node2.attributes.empty?
502
-
503
- # Check if they differ only in attribute order
504
- next false unless attributes_equal_ignoring_order?(
505
- node1.attributes, node2.attributes
506
- )
507
-
508
- # Ensure same content (text and children structure)
509
- nodes_same_except_attr_order?(node1, node2)
510
- end
511
-
512
- next unless matching_insert
513
-
514
- # Found an attribute-order-only difference
515
- # Reclassify both nodes to use attribute_order dimension
516
- delete_node.dimension = :attribute_order
517
- delete_node.reason = "attribute order changed"
518
- delete_node.normative = determine_normative(:attribute_order)
519
-
520
- matching_insert.dimension = :attribute_order
521
- matching_insert.reason = "attribute order changed"
522
- matching_insert.normative = determine_normative(:attribute_order)
523
- end
524
-
525
- diff_nodes
526
- end
527
-
528
- # Check if two attribute hashes are equal ignoring order
529
- #
530
- # @param attrs1 [Hash] First attribute hash
531
- # @param attrs2 [Hash] Second attribute hash
532
- # @return [Boolean] True if attributes are equal (ignoring order)
533
- def attributes_equal_ignoring_order?(attrs1, attrs2)
534
- return true if attrs1.nil? && attrs2.nil?
535
- return false if attrs1.nil? || attrs2.nil?
536
-
537
- # Convert to hashes if needed
538
- attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
539
- attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
540
-
541
- # Compare as sets (order-independent)
542
- attrs1.sort.to_h == attrs2.sort.to_h
543
- end
544
-
545
- # Check if two nodes are the same except for attribute order
546
- #
547
- # @param node1 [Nokogiri::XML::Node] First node
548
- # @param node2 [Nokogiri::XML::Node] Second node
549
- # @return [Boolean] True if nodes are same except attribute order
550
- def nodes_same_except_attr_order?(node1, node2)
551
- # Same text content
552
- return false if node1.text != node2.text
553
-
554
- # Same number of children
555
- return false if node1.children.length != node2.children.length
556
-
557
- # If has children, they should have same structure
558
- if node1.children.any?
559
- node1.children.zip(node2.children).all? do |child1, child2|
560
- child1.name == child2.name
561
- end
562
- else
563
- true
564
- end
565
- end
566
-
567
364
  # Check if a node is a metadata/presentation element
568
365
  #
569
366
  # @param node [Object] Node to check (could be TreeNode or Nokogiri node)
@@ -583,49 +380,6 @@ module Canon
583
380
  # Check if it's in our metadata elements list
584
381
  METADATA_ELEMENTS.include?(element_name)
585
382
  end
586
-
587
- # Build detailed reason for attribute differences
588
- #
589
- # @param old_attrs [Hash] Old attributes
590
- # @param new_attrs [Hash] New attributes
591
- # @return [String] Detailed reason
592
- def build_attribute_diff_details(old_attrs, new_attrs)
593
- old_keys = Set.new(old_attrs.keys)
594
- new_keys = Set.new(new_attrs.keys)
595
-
596
- missing = old_keys - new_keys
597
- extra = new_keys - old_keys
598
- changed = (old_keys & new_keys).reject do |k|
599
- old_attrs[k] == new_attrs[k]
600
- end
601
-
602
- parts = []
603
- parts << "Missing: #{missing.to_a.join(', ')}" if missing.any?
604
- parts << "Extra: #{extra.to_a.join(', ')}" if extra.any?
605
- if changed.any?
606
- parts << "Changed: #{changed.map do |k|
607
- "#{k}=\"#{truncate_for_reason(old_attrs[k],
608
- 20)}\" → \"#{truncate_for_reason(new_attrs[k],
609
- 20)}\""
610
- end.join(', ')}"
611
- end
612
-
613
- parts.any? ? "Attributes differ (#{parts.join('; ')})" : "Attribute values differ"
614
- end
615
-
616
- # Truncate text for reason messages
617
- #
618
- # @param text [String] Text to truncate
619
- # @param max_length [Integer] Maximum length
620
- # @return [String] Truncated text
621
- def truncate_for_reason(text, max_length)
622
- return "" if text.nil?
623
-
624
- text = text.to_s
625
- return text if text.length <= max_length
626
-
627
- "#{text[0...max_length - 3]}..."
628
- end
629
383
  end
630
384
  end
631
385
  end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../diff/path_builder"
4
+ require_relative "../../diff/node_serializer"
5
+
6
+ module Canon
7
+ module TreeDiff
8
+ module OperationConverterHelpers
9
+ # Metadata enrichment for DiffNodes
10
+ # Handles path building, serialization, and attribute extraction
11
+ module MetadataEnricher
12
+ # Enrich DiffNode with canonical path, serialized content, and attributes
13
+ # This extracts presentation-ready metadata from TreeNodes for Stage 4 rendering
14
+ #
15
+ # @param tree_node1 [Canon::TreeDiff::Core::TreeNode, nil] First tree node
16
+ # @param tree_node2 [Canon::TreeDiff::Core::TreeNode, nil] Second tree node
17
+ # @param format [Symbol] Document format
18
+ # @return [Hash] Enriched metadata hash
19
+ def self.enrich(tree_node1, tree_node2, format)
20
+ {
21
+ path: build_path(tree_node1 || tree_node2, format),
22
+ serialized_before: serialize(tree_node1),
23
+ serialized_after: serialize(tree_node2),
24
+ attributes_before: extract_attributes(tree_node1),
25
+ attributes_after: extract_attributes(tree_node2),
26
+ }
27
+ end
28
+
29
+ # Build canonical path for a TreeNode
30
+ #
31
+ # @param tree_node [Canon::TreeDiff::Core::TreeNode] Tree node
32
+ # @param format [Symbol] Document format
33
+ # @return [String, nil] Canonical path with ordinal indices
34
+ def self.build_path(tree_node, format)
35
+ return nil if tree_node.nil?
36
+
37
+ Canon::Diff::PathBuilder.build(tree_node,
38
+ format: format == :xml ? :document : :fragment)
39
+ end
40
+
41
+ # Serialize a TreeNode's source node to string
42
+ #
43
+ # @param tree_node [Canon::TreeDiff::Core::TreeNode, nil] Tree node
44
+ # @return [String, nil] Serialized content
45
+ def self.serialize(tree_node)
46
+ return nil if tree_node.nil?
47
+
48
+ # Extract source node from TreeNode
49
+ source = if tree_node.respond_to?(:source_node)
50
+ tree_node.source_node
51
+ else
52
+ tree_node
53
+ end
54
+
55
+ Canon::Diff::NodeSerializer.serialize(source)
56
+ end
57
+
58
+ # Extract attributes from a TreeNode
59
+ #
60
+ # @param tree_node [Canon::TreeDiff::Core::TreeNode, nil] Tree node
61
+ # @return [Hash, nil] Attributes hash
62
+ def self.extract_attributes(tree_node)
63
+ return nil if tree_node.nil?
64
+
65
+ # Use TreeNode's attributes directly (already normalized by adapter)
66
+ tree_node.respond_to?(:attributes) ? (tree_node.attributes || {}) : {}
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end