canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -0,0 +1,631 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../diff/diff_node"
4
+ require_relative "../comparison/match_options"
5
+
6
+ module Canon
7
+ module TreeDiff
8
+ # Converts TreeDiff Operations to DiffNodes for integration with Canon's
9
+ # existing diff pipeline.
10
+ #
11
+ # This class bridges the semantic tree diff system with Canon's DOM-based
12
+ # diff architecture by mapping operations to match dimensions and creating
13
+ # DiffNode objects that can be processed by the standard diff formatter.
14
+ #
15
+ # @example Convert operations to diff nodes
16
+ # converter = OperationConverter.new(format: :xml, match_options: opts)
17
+ # diff_nodes = converter.convert(operations)
18
+ #
19
+ class OperationConverter
20
+ # Mapping from operation types to match dimensions
21
+ OPERATION_TO_DIMENSION = {
22
+ insert: :element_structure,
23
+ delete: :element_structure,
24
+ update: :text_content, # Default, refined based on what changed
25
+ move: :element_position,
26
+ merge: :element_structure,
27
+ split: :element_structure,
28
+ upgrade: :element_hierarchy,
29
+ downgrade: :element_hierarchy,
30
+ }.freeze
31
+
32
+ # Metadata/presentation elements that should be treated as informative
33
+ # These elements don't affect semantic equivalence
34
+ METADATA_ELEMENTS = %w[
35
+ semx fmt-concept fmt-name fmt-title fmt-xref fmt-eref
36
+ fmt-termref fmt-element-name fmt-link autonum
37
+ meta link base title style script
38
+ ].freeze
39
+
40
+ attr_reader :format, :match_options
41
+
42
+ # Initialize converter
43
+ #
44
+ # @param format [Symbol] Document format (:xml, :html, :json, :yaml)
45
+ # @param match_options [Hash] Match options for determining normative/informative
46
+ def initialize(format:, match_options: {})
47
+ @format = format
48
+
49
+ # Resolve match options using format-specific module
50
+ match_opts_hash = case format
51
+ when :xml, :html, :html4, :html5
52
+ Canon::Comparison::MatchOptions::Xml.resolve(
53
+ format: format,
54
+ match: match_options,
55
+ )
56
+ when :json
57
+ Canon::Comparison::MatchOptions::Json.resolve(
58
+ format: format,
59
+ match: match_options,
60
+ )
61
+ when :yaml
62
+ Canon::Comparison::MatchOptions::Yaml.resolve(
63
+ format: format,
64
+ match: match_options,
65
+ )
66
+ else
67
+ raise ArgumentError, "Unknown format: #{format}"
68
+ end
69
+
70
+ # Wrap in ResolvedMatchOptions
71
+ @match_options = Canon::Comparison::ResolvedMatchOptions.new(
72
+ match_opts_hash,
73
+ format: format,
74
+ )
75
+ end
76
+
77
+ # Convert array of Operations to array of DiffNodes
78
+ #
79
+ # @param operations [Array<Operation>] Operations to convert
80
+ # @return [Array<DiffNode>] Converted diff nodes
81
+ def convert(operations)
82
+ diff_nodes = operations.flat_map do |operation|
83
+ convert_operation(operation)
84
+ end
85
+
86
+ # Post-process to detect attribute-order-only differences
87
+ detect_attribute_order_diffs(diff_nodes)
88
+ end
89
+
90
+ private
91
+
92
+ # Convert a single Operation to a DiffNode
93
+ #
94
+ # @param operation [Operation] Operation to convert
95
+ # @return [DiffNode] Converted diff node
96
+ def convert_operation(operation)
97
+ case operation.type
98
+ when :insert
99
+ convert_insert(operation)
100
+ when :delete
101
+ convert_delete(operation)
102
+ when :update
103
+ convert_update(operation)
104
+ when :move
105
+ convert_move(operation)
106
+ when :merge
107
+ convert_merge(operation)
108
+ when :split
109
+ convert_split(operation)
110
+ when :upgrade
111
+ convert_upgrade(operation)
112
+ when :downgrade
113
+ convert_downgrade(operation)
114
+ else
115
+ raise ArgumentError, "Unknown operation type: #{operation.type}"
116
+ end
117
+ end
118
+
119
+ # Convert INSERT operation to DiffNode
120
+ #
121
+ # @param operation [Operation] Insert operation
122
+ # @return [DiffNode] Diff node representing insertion
123
+ def convert_insert(operation)
124
+ node2 = extract_source_node(operation[:node])
125
+
126
+ diff_node = Canon::Diff::DiffNode.new(
127
+ node1: nil,
128
+ node2: node2,
129
+ dimension: :element_structure,
130
+ reason: build_insert_reason(operation),
131
+ )
132
+ # Metadata elements are informative (don't affect equivalence)
133
+ diff_node.normative = metadata_element?(node2) ? false : determine_normative(:element_structure)
134
+ diff_node
135
+ end
136
+
137
+ # Convert DELETE operation to DiffNode
138
+ #
139
+ # @param operation [Operation] Delete operation
140
+ # @return [DiffNode] Diff node representing deletion
141
+ def convert_delete(operation)
142
+ node1 = extract_source_node(operation[:node])
143
+
144
+ diff_node = Canon::Diff::DiffNode.new(
145
+ node1: node1,
146
+ node2: nil,
147
+ dimension: :element_structure,
148
+ reason: build_delete_reason(operation),
149
+ )
150
+ # Metadata elements are informative (don't affect equivalence)
151
+ diff_node.normative = metadata_element?(node1) ? false : determine_normative(:element_structure)
152
+ diff_node
153
+ end
154
+
155
+ # Convert UPDATE operation to DiffNode(s)
156
+ #
157
+ # May return multiple DiffNodes if multiple dimensions changed
158
+ #
159
+ # @param operation [Operation] Update operation
160
+ # @return [Array<DiffNode>] Diff nodes representing updates
161
+ def convert_update(operation)
162
+ node1 = extract_source_node(operation[:node1])
163
+ node2 = extract_source_node(operation[:node2])
164
+ changes = operation[:changes]
165
+
166
+ # Handle case where changes is a boolean or non-hash value
167
+ changes = {} unless changes.is_a?(Hash)
168
+
169
+ # Check if nodes are metadata elements
170
+ is_metadata = metadata_element?(node1) || metadata_element?(node2)
171
+
172
+ diff_nodes = []
173
+
174
+ # Create separate DiffNode for each change dimension
175
+ # This ensures each dimension can be classified independently
176
+
177
+ if changes.key?(:attributes)
178
+ # Attribute value differences
179
+ # Changes can be either true (flag) or { old: ..., new: ... } (detailed)
180
+ if changes[:attributes].is_a?(Hash) && changes[:attributes].key?(:old)
181
+ old_attrs = changes[:attributes][:old]
182
+ new_attrs = changes[:attributes][:new]
183
+ diff_details = build_attribute_diff_details(old_attrs, new_attrs)
184
+ else
185
+ diff_details = "attribute values differ"
186
+ end
187
+
188
+ diff_node = Canon::Diff::DiffNode.new(
189
+ node1: node1,
190
+ node2: node2,
191
+ dimension: :attribute_values,
192
+ reason: diff_details,
193
+ )
194
+ diff_node.normative = is_metadata ? false : determine_normative(:attribute_values)
195
+ diff_nodes << diff_node
196
+ end
197
+
198
+ if changes.key?(:attribute_order)
199
+ # Attribute order differences
200
+ if changes[:attribute_order].is_a?(Hash) && changes[:attribute_order].key?(:old)
201
+ old_order = changes[:attribute_order][:old]
202
+ new_order = changes[:attribute_order][:new]
203
+ reason = "Attribute order changed: [#{old_order.join(', ')}] → [#{new_order.join(', ')}]"
204
+ else
205
+ reason = "attribute order differs"
206
+ end
207
+
208
+ diff_node = Canon::Diff::DiffNode.new(
209
+ node1: node1,
210
+ node2: node2,
211
+ dimension: :attribute_order,
212
+ reason: reason,
213
+ )
214
+ diff_node.normative = is_metadata ? false : determine_normative(:attribute_order)
215
+ diff_nodes << diff_node
216
+ end
217
+
218
+ if changes.key?(:value)
219
+ # Text content differences
220
+ if changes[:value].is_a?(Hash) && changes[:value].key?(:old)
221
+ old_val = changes[:value][:old] || ""
222
+ new_val = changes[:value][:new] || ""
223
+ preview_old = truncate_for_reason(old_val.to_s, 40)
224
+ preview_new = truncate_for_reason(new_val.to_s, 40)
225
+ reason = "Text content changed: \"#{preview_old}\" → \"#{preview_new}\""
226
+ else
227
+ reason = "text content differs"
228
+ end
229
+
230
+ diff_node = Canon::Diff::DiffNode.new(
231
+ node1: node1,
232
+ node2: node2,
233
+ dimension: :text_content,
234
+ reason: reason,
235
+ )
236
+ diff_node.normative = is_metadata ? false : determine_normative(:text_content)
237
+ diff_nodes << diff_node
238
+ end
239
+
240
+ if changes.key?(:label)
241
+ # Element name differences
242
+ if changes[:label].is_a?(Hash) && changes[:label].key?(:old)
243
+ old_label = changes[:label][:old]
244
+ new_label = changes[:label][:new]
245
+ reason = "Element name changed: <#{old_label}> → <#{new_label}>"
246
+ else
247
+ reason = "element name differs"
248
+ end
249
+
250
+ diff_node = Canon::Diff::DiffNode.new(
251
+ node1: node1,
252
+ node2: node2,
253
+ dimension: :element_structure,
254
+ reason: reason,
255
+ )
256
+ diff_node.normative = is_metadata ? false : determine_normative(:element_structure)
257
+ diff_nodes << diff_node
258
+ end
259
+
260
+ # If no specific changes detected, create a generic update
261
+ if diff_nodes.empty?
262
+ diff_node = Canon::Diff::DiffNode.new(
263
+ node1: node1,
264
+ node2: node2,
265
+ dimension: :text_content,
266
+ reason: "content differs",
267
+ )
268
+ diff_node.normative = is_metadata ? false : determine_normative(:text_content)
269
+ diff_nodes << diff_node
270
+ end
271
+
272
+ diff_nodes
273
+ end
274
+
275
+ # Convert MOVE operation to DiffNode
276
+ #
277
+ # @param operation [Operation] Move operation
278
+ # @return [DiffNode] Diff node representing move
279
+ def convert_move(operation)
280
+ node1 = extract_source_node(operation[:node1])
281
+ node2 = extract_source_node(operation[:node2])
282
+
283
+ diff_node = Canon::Diff::DiffNode.new(
284
+ node1: node1,
285
+ node2: node2,
286
+ dimension: :element_position,
287
+ reason: build_move_reason(operation),
288
+ )
289
+ # Metadata elements are informative (don't affect equivalence)
290
+ is_metadata = metadata_element?(node1) || metadata_element?(node2)
291
+ diff_node.normative = is_metadata ? false : determine_normative(:element_position)
292
+ diff_node
293
+ end
294
+
295
+ # Convert MERGE operation to DiffNode
296
+ #
297
+ # @param operation [Operation] Merge operation
298
+ # @return [DiffNode] Diff node representing merge
299
+ def convert_merge(operation)
300
+ # Merge combines multiple nodes into one
301
+ # node1 represents the source nodes, node2 is the merged result
302
+ node1 = extract_source_node(operation[:nodes]&.first)
303
+ node2 = extract_source_node(operation[:result])
304
+
305
+ diff_node = Canon::Diff::DiffNode.new(
306
+ node1: node1,
307
+ node2: node2,
308
+ dimension: :element_structure,
309
+ reason: "merged #{operation[:nodes]&.length || 0} nodes",
310
+ )
311
+ diff_node.normative = true # Merges are structural changes, always normative
312
+ diff_node
313
+ end
314
+
315
+ # Convert SPLIT operation to DiffNode
316
+ #
317
+ # @param operation [Operation] Split operation
318
+ # @return [DiffNode] Diff node representing split
319
+ def convert_split(operation)
320
+ # Split divides one node into multiple
321
+ # node1 is the original, node2 represents the split results
322
+ node1 = extract_source_node(operation[:node])
323
+ node2 = extract_source_node(operation[:results]&.first)
324
+
325
+ diff_node = Canon::Diff::DiffNode.new(
326
+ node1: node1,
327
+ node2: node2,
328
+ dimension: :element_structure,
329
+ reason: "split into #{operation[:results]&.length || 0} nodes",
330
+ )
331
+ diff_node.normative = true # Splits are structural changes, always normative
332
+ diff_node
333
+ end
334
+
335
+ # Convert UPGRADE operation to DiffNode (promote/decrease depth)
336
+ #
337
+ # @param operation [Operation] Upgrade operation
338
+ # @return [DiffNode] Diff node representing upgrade
339
+ def convert_upgrade(operation)
340
+ node1 = extract_source_node(operation[:node1])
341
+ node2 = extract_source_node(operation[:node2])
342
+
343
+ diff_node = Canon::Diff::DiffNode.new(
344
+ node1: node1,
345
+ node2: node2,
346
+ dimension: :element_hierarchy,
347
+ reason: "promoted to higher level",
348
+ )
349
+ diff_node.normative = determine_normative(:element_hierarchy)
350
+ diff_node
351
+ end
352
+
353
+ # Convert DOWNGRADE operation to DiffNode (demote/increase depth)
354
+ #
355
+ # @param operation [Operation] Downgrade operation
356
+ # @return [DiffNode] Diff node representing downgrade
357
+ def convert_downgrade(operation)
358
+ node1 = extract_source_node(operation[:node1])
359
+ node2 = extract_source_node(operation[:node2])
360
+
361
+ diff_node = Canon::Diff::DiffNode.new(
362
+ node1: node1,
363
+ node2: node2,
364
+ dimension: :element_hierarchy,
365
+ reason: "demoted to lower level",
366
+ )
367
+ diff_node.normative = determine_normative(:element_hierarchy)
368
+ diff_node
369
+ end
370
+
371
+ # Extract source node from TreeNode
372
+ #
373
+ # @param tree_node [TreeNode, nil] Tree node wrapper
374
+ # @return [Object, nil] Source node (Nokogiri, Hash, etc.)
375
+ def extract_source_node(tree_node)
376
+ return nil if tree_node.nil?
377
+
378
+ tree_node.respond_to?(:source_node) ? tree_node.source_node : tree_node
379
+ end
380
+
381
+ # Determine update dimension based on what changed
382
+ #
383
+ # @param operation [Operation] Update operation
384
+ # @return [Symbol] Match dimension
385
+ def determine_update_dimension(operation)
386
+ changes = operation[:changes] || {}
387
+
388
+ # Check what actually changed
389
+ if changes.key?(:attribute_order)
390
+ # Only attribute order changed
391
+ :attribute_order
392
+ elsif changes.key?(:attributes)
393
+ # Attribute values changed
394
+ :attribute_values
395
+ elsif changes.key?(:value)
396
+ # Text content changed
397
+ :text_content
398
+ elsif changes.key?(:label)
399
+ # Element name changed (rare)
400
+ :element_structure
401
+ else
402
+ # Default to text_content for generic updates
403
+ :text_content
404
+ end
405
+ end
406
+
407
+ # Determine if a diff is normative based on match options
408
+ #
409
+ # @param dimension [Symbol] Match dimension
410
+ # @return [Boolean] true if normative (should be shown)
411
+ def determine_normative(dimension)
412
+ # Check match options behavior for this dimension
413
+ behavior = @match_options.behavior_for(dimension)
414
+
415
+ # If behavior is :ignore, it's informative (not shown by default)
416
+ # Otherwise it's normative (shown)
417
+ behavior != :ignore
418
+ end
419
+
420
+ # Build reason string for INSERT operation
421
+ #
422
+ # @param operation [Operation] Operation
423
+ # @return [String] Reason description
424
+ def build_insert_reason(operation)
425
+ node = operation[:node]
426
+ content = operation[:content]
427
+
428
+ if node.respond_to?(:label)
429
+ # Include content preview for clarity
430
+ "Element inserted: #{content || "<#{node.label}>"}"
431
+ else
432
+ "Element inserted"
433
+ end
434
+ end
435
+
436
+ # Build reason string for DELETE operation
437
+ #
438
+ # @param operation [Operation] Operation
439
+ # @return [String] Reason description
440
+ def build_delete_reason(operation)
441
+ node = operation[:node]
442
+ content = operation[:content]
443
+
444
+ if node.respond_to?(:label)
445
+ # Include content preview for clarity
446
+ "Element deleted: #{content || "<#{node.label}>"}"
447
+ else
448
+ "Element deleted"
449
+ end
450
+ end
451
+
452
+ # Build reason string for UPDATE operation
453
+ #
454
+ # @param operation [Operation] Operation
455
+ # @return [String] Reason description
456
+ def build_update_reason(operation)
457
+ change_type = operation[:change_type] || "content"
458
+ "updated #{change_type}"
459
+ end
460
+
461
+ # Build reason string for MOVE operation
462
+ #
463
+ # @param operation [Operation] Operation
464
+ # @return [String] Reason description
465
+ def build_move_reason(operation)
466
+ from_pos = operation[:from_position]
467
+ to_pos = operation[:to_position]
468
+
469
+ if from_pos && to_pos
470
+ "moved from position #{from_pos} to #{to_pos}"
471
+ else
472
+ "moved to different position"
473
+ end
474
+ end
475
+
476
+ # Detect INSERT/DELETE pairs that differ only in attribute order
477
+ # and reclassify them to use the attribute_order dimension
478
+ #
479
+ # @param diff_nodes [Array<DiffNode>] Diff nodes to process
480
+ # @return [Array<DiffNode>] Processed diff nodes
481
+ def detect_attribute_order_diffs(diff_nodes)
482
+ # Group nodes by parent and element type
483
+ deletes = diff_nodes.select { |dn| dn.node1 && !dn.node2 }
484
+ inserts = diff_nodes.select { |dn| !dn.node1 && dn.node2 }
485
+
486
+ # For each DELETE, try to find a matching INSERT
487
+ deletes.each do |delete_node|
488
+ node1 = delete_node.node1
489
+ next unless node1.respond_to?(:name) && node1.respond_to?(:attributes)
490
+
491
+ # Skip if node has no attributes (can't be attribute order diff)
492
+ next if node1.attributes.nil? || node1.attributes.empty?
493
+
494
+ # Find inserts with same element name at same position
495
+ matching_insert = inserts.find do |insert_node|
496
+ node2 = insert_node.node2
497
+ next false unless node2.respond_to?(:name) && node2.respond_to?(:attributes)
498
+ next false unless node1.name == node2.name
499
+
500
+ # Must have attributes to differ in order
501
+ next false if node2.attributes.nil? || node2.attributes.empty?
502
+
503
+ # Check if they differ only in attribute order
504
+ next false unless attributes_equal_ignoring_order?(
505
+ node1.attributes, node2.attributes
506
+ )
507
+
508
+ # Ensure same content (text and children structure)
509
+ nodes_same_except_attr_order?(node1, node2)
510
+ end
511
+
512
+ next unless matching_insert
513
+
514
+ # Found an attribute-order-only difference
515
+ # Reclassify both nodes to use attribute_order dimension
516
+ delete_node.dimension = :attribute_order
517
+ delete_node.reason = "attribute order changed"
518
+ delete_node.normative = determine_normative(:attribute_order)
519
+
520
+ matching_insert.dimension = :attribute_order
521
+ matching_insert.reason = "attribute order changed"
522
+ matching_insert.normative = determine_normative(:attribute_order)
523
+ end
524
+
525
+ diff_nodes
526
+ end
527
+
528
+ # Check if two attribute hashes are equal ignoring order
529
+ #
530
+ # @param attrs1 [Hash] First attribute hash
531
+ # @param attrs2 [Hash] Second attribute hash
532
+ # @return [Boolean] True if attributes are equal (ignoring order)
533
+ def attributes_equal_ignoring_order?(attrs1, attrs2)
534
+ return true if attrs1.nil? && attrs2.nil?
535
+ return false if attrs1.nil? || attrs2.nil?
536
+
537
+ # Convert to hashes if needed
538
+ attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
539
+ attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
540
+
541
+ # Compare as sets (order-independent)
542
+ attrs1.sort.to_h == attrs2.sort.to_h
543
+ end
544
+
545
+ # Check if two nodes are the same except for attribute order
546
+ #
547
+ # @param node1 [Nokogiri::XML::Node] First node
548
+ # @param node2 [Nokogiri::XML::Node] Second node
549
+ # @return [Boolean] True if nodes are same except attribute order
550
+ def nodes_same_except_attr_order?(node1, node2)
551
+ # Same text content
552
+ return false if node1.text != node2.text
553
+
554
+ # Same number of children
555
+ return false if node1.children.length != node2.children.length
556
+
557
+ # If has children, they should have same structure
558
+ if node1.children.any?
559
+ node1.children.zip(node2.children).all? do |child1, child2|
560
+ child1.name == child2.name
561
+ end
562
+ else
563
+ true
564
+ end
565
+ end
566
+
567
+ # Check if a node is a metadata/presentation element
568
+ #
569
+ # @param node [Object] Node to check (could be TreeNode or Nokogiri node)
570
+ # @return [Boolean] true if node is a metadata element
571
+ def metadata_element?(node)
572
+ return false if node.nil?
573
+
574
+ # Get element name from node
575
+ element_name = if node.respond_to?(:label)
576
+ node.label # TreeNode
577
+ elsif node.respond_to?(:name)
578
+ node.name # Nokogiri node
579
+ else
580
+ return false
581
+ end
582
+
583
+ # Check if it's in our metadata elements list
584
+ METADATA_ELEMENTS.include?(element_name)
585
+ end
586
+
587
+ # Build detailed reason for attribute differences
588
+ #
589
+ # @param old_attrs [Hash] Old attributes
590
+ # @param new_attrs [Hash] New attributes
591
+ # @return [String] Detailed reason
592
+ def build_attribute_diff_details(old_attrs, new_attrs)
593
+ old_keys = Set.new(old_attrs.keys)
594
+ new_keys = Set.new(new_attrs.keys)
595
+
596
+ missing = old_keys - new_keys
597
+ extra = new_keys - old_keys
598
+ changed = (old_keys & new_keys).reject do |k|
599
+ old_attrs[k] == new_attrs[k]
600
+ end
601
+
602
+ parts = []
603
+ parts << "Missing: #{missing.to_a.join(', ')}" if missing.any?
604
+ parts << "Extra: #{extra.to_a.join(', ')}" if extra.any?
605
+ if changed.any?
606
+ parts << "Changed: #{changed.map do |k|
607
+ "#{k}=\"#{truncate_for_reason(old_attrs[k],
608
+ 20)}\" → \"#{truncate_for_reason(new_attrs[k],
609
+ 20)}\""
610
+ end.join(', ')}"
611
+ end
612
+
613
+ parts.any? ? "Attributes differ (#{parts.join('; ')})" : "Attribute values differ"
614
+ end
615
+
616
+ # Truncate text for reason messages
617
+ #
618
+ # @param text [String] Text to truncate
619
+ # @param max_length [Integer] Maximum length
620
+ # @return [String] Truncated text
621
+ def truncate_for_reason(text, max_length)
622
+ return "" if text.nil?
623
+
624
+ text = text.to_s
625
+ return text if text.length <= max_length
626
+
627
+ "#{text[0...max_length - 3]}..."
628
+ end
629
+ end
630
+ end
631
+ end