canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "paint"
4
+ require_relative "../xml/namespace_helper"
4
5
 
5
6
  module Canon
6
7
  class DiffFormatter
@@ -16,6 +17,14 @@ module Canon
16
17
  def format_report(differences, use_color: true)
17
18
  return "" if differences.empty?
18
19
 
20
+ # Group differences by normative status
21
+ normative = differences.select do |diff|
22
+ diff.respond_to?(:normative?) ? diff.normative? : true
23
+ end
24
+ informative = differences.select do |diff|
25
+ diff.respond_to?(:normative?) && !diff.normative?
26
+ end
27
+
19
28
  output = []
20
29
  output << ""
21
30
  output << colorize("=" * 70, :cyan, use_color, bold: true)
@@ -24,10 +33,33 @@ module Canon
24
33
  )
25
34
  output << colorize("=" * 70, :cyan, use_color, bold: true)
26
35
 
27
- differences.each_with_index do |diff, i|
36
+ # Show normative differences first
37
+ if normative.any?
28
38
  output << ""
29
- output << format_single_diff(diff, i + 1, differences.length,
30
- use_color)
39
+ output << colorize(
40
+ "┌─ NORMATIVE DIFFERENCES (#{normative.length}) ─┐", :green, use_color, bold: true
41
+ )
42
+
43
+ normative.each_with_index do |diff, i|
44
+ output << ""
45
+ output << format_single_diff(diff, i + 1, normative.length,
46
+ use_color, section: "NORMATIVE")
47
+ end
48
+ end
49
+
50
+ # Show informative differences second
51
+ if informative.any?
52
+ output << ""
53
+ output << ""
54
+ output << colorize(
55
+ "┌─ INFORMATIVE DIFFERENCES (#{informative.length}) ─┐", :yellow, use_color, bold: true
56
+ )
57
+
58
+ informative.each_with_index do |diff, i|
59
+ output << ""
60
+ output << format_single_diff(diff, i + 1, informative.length,
61
+ use_color, section: "INFORMATIVE")
62
+ end
31
63
  end
32
64
 
33
65
  output << ""
@@ -40,15 +72,15 @@ module Canon
40
72
  private
41
73
 
42
74
  # Format a single difference with dimension-specific details
43
- def format_single_diff(diff, number, total, use_color)
75
+ def format_single_diff(diff, number, total, use_color, section: nil)
44
76
  output = []
45
77
 
46
78
  # Header - handle both DiffNode and Hash
47
- status = if diff.respond_to?(:normative?)
48
- diff.normative? ? "NORMATIVE" : "INFORMATIVE"
49
- else
50
- "NORMATIVE" # Hash diffs are always normative
51
- end
79
+ status = section || (if diff.respond_to?(:normative?)
80
+ diff.normative? ? "NORMATIVE" : "INFORMATIVE"
81
+ else
82
+ "NORMATIVE" # Hash diffs are always normative
83
+ end)
52
84
  status_color = status == "NORMATIVE" ? :green : :yellow
53
85
  output << colorize("🔍 DIFFERENCE ##{number}/#{total} [#{status}]",
54
86
  status_color, use_color, bold: true)
@@ -182,10 +214,18 @@ module Canon
182
214
  dimension = diff.respond_to?(:dimension) ? diff.dimension : nil
183
215
 
184
216
  case dimension
217
+ when :element_structure
218
+ format_element_structure_details(diff, use_color)
185
219
  when :attribute_presence
186
220
  format_attribute_presence_details(diff, use_color)
187
221
  when :attribute_values
188
222
  format_attribute_values_details(diff, use_color)
223
+ when :attribute_order
224
+ format_attribute_order_details(diff, use_color)
225
+ when :namespace_uri
226
+ format_namespace_uri_details(diff, use_color)
227
+ when :namespace_declarations
228
+ format_namespace_declarations_details(diff, use_color)
189
229
  when :text_content
190
230
  format_text_content_details(diff, use_color)
191
231
  when :structural_whitespace
@@ -197,6 +237,225 @@ module Canon
197
237
  end
198
238
  end
199
239
 
240
+ # Format namespace_uri dimension details
241
+ def format_namespace_uri_details(diff, use_color)
242
+ node1 = diff.node1
243
+ node2 = diff.node2
244
+
245
+ # Use NamespaceHelper for consistent formatting
246
+ ns1_display = Canon::Xml::NamespaceHelper.format_namespace(
247
+ node1.respond_to?(:namespace_uri) ? node1.namespace_uri : nil,
248
+ )
249
+ ns2_display = Canon::Xml::NamespaceHelper.format_namespace(
250
+ node2.respond_to?(:namespace_uri) ? node2.namespace_uri : nil,
251
+ )
252
+
253
+ element_name = if node1.respond_to?(:name)
254
+ node1.name
255
+ else
256
+ node2.respond_to?(:name) ? node2.name : "element"
257
+ end
258
+
259
+ detail1 = "<#{element_name}> #{colorize(ns1_display, :cyan,
260
+ use_color)}"
261
+ detail2 = "<#{element_name}> #{colorize(ns2_display, :cyan,
262
+ use_color)}"
263
+
264
+ changes = "Namespace differs: #{colorize(ns1_display, :red,
265
+ use_color)} → #{colorize(
266
+ ns2_display, :green, use_color
267
+ )}"
268
+
269
+ [detail1, detail2, changes]
270
+ end
271
+
272
+ # Format namespace_declarations dimension details
273
+ def format_namespace_declarations_details(diff, use_color)
274
+ node1 = diff.node1
275
+ node2 = diff.node2
276
+
277
+ # Extract namespace declarations from both nodes
278
+ ns_decls1 = extract_namespace_declarations_from_node(node1)
279
+ ns_decls2 = extract_namespace_declarations_from_node(node2)
280
+
281
+ element_name = if node1.respond_to?(:name)
282
+ node1.name
283
+ else
284
+ node2.respond_to?(:name) ? node2.name : "element"
285
+ end
286
+
287
+ # Format namespace declarations for display
288
+ detail1 = if ns_decls1.empty?
289
+ "<#{element_name}> #{colorize(
290
+ '(no namespace declarations)', :red, use_color
291
+ )}"
292
+ else
293
+ ns_str = ns_decls1.map do |prefix, uri|
294
+ attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
295
+ "#{attr_name}=\"#{uri}\""
296
+ end.join(" ")
297
+ "<#{element_name}> #{ns_str}"
298
+ end
299
+
300
+ detail2 = if ns_decls2.empty?
301
+ "<#{element_name}> #{colorize(
302
+ '(no namespace declarations)', :green, use_color
303
+ )}"
304
+ else
305
+ ns_str = ns_decls2.map do |prefix, uri|
306
+ attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
307
+ "#{attr_name}=\"#{uri}\""
308
+ end.join(" ")
309
+ "<#{element_name}> #{ns_str}"
310
+ end
311
+
312
+ # Analyze changes
313
+ missing = ns_decls1.keys - ns_decls2.keys # In node1 but not node2
314
+ extra = ns_decls2.keys - ns_decls1.keys # In node2 but not node1
315
+ changed = ns_decls1.select do |prefix, uri|
316
+ ns_decls2[prefix] && ns_decls2[prefix] != uri
317
+ end.keys
318
+
319
+ # Format changes
320
+ changes_parts = []
321
+ if missing.any?
322
+ missing_str = missing.map do |prefix|
323
+ attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
324
+ colorize("-#{attr_name}=\"#{ns_decls1[prefix]}\"", :red,
325
+ use_color)
326
+ end.join(", ")
327
+ changes_parts << "Removed: #{missing_str}"
328
+ end
329
+ if extra.any?
330
+ extra_str = extra.map do |prefix|
331
+ attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
332
+ colorize("+#{attr_name}=\"#{ns_decls2[prefix]}\"", :green,
333
+ use_color)
334
+ end.join(", ")
335
+ changes_parts << "Added: #{extra_str}"
336
+ end
337
+ if changed.any?
338
+ changed_str = changed.map do |prefix|
339
+ attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
340
+ "#{colorize(attr_name, :cyan,
341
+ use_color)}: \"#{ns_decls1[prefix]}\" → \"#{ns_decls2[prefix]}\""
342
+ end.join(", ")
343
+ changes_parts << "Changed: #{changed_str}"
344
+ end
345
+
346
+ changes = changes_parts.join(" | ")
347
+
348
+ [detail1, detail2, changes]
349
+ end
350
+
351
+ # Extract namespace declarations from a node (helper for formatter)
352
+ # @param node [Object] Node to extract namespace declarations from
353
+ # @return [Hash] Hash of prefix => URI mappings
354
+ def extract_namespace_declarations_from_node(node)
355
+ return {} if node.nil?
356
+
357
+ declarations = {}
358
+
359
+ # Handle Canon::Xml::Node (uses namespace_nodes)
360
+ if node.respond_to?(:namespace_nodes)
361
+ node.namespace_nodes.each do |ns|
362
+ # Skip the implicit xml namespace (always present)
363
+ next if ns.prefix == "xml" && ns.uri == "http://www.w3.org/XML/1998/namespace"
364
+
365
+ prefix = ns.prefix || ""
366
+ declarations[prefix] = ns.uri
367
+ end
368
+ return declarations
369
+ end
370
+
371
+ # Handle Nokogiri/Moxml nodes (use attributes)
372
+ # Get raw attributes
373
+ raw_attrs = if node.respond_to?(:attribute_nodes)
374
+ node.attribute_nodes
375
+ elsif node.respond_to?(:attributes)
376
+ node.attributes
377
+ else
378
+ return {}
379
+ end
380
+
381
+ # Handle Canon::Xml::Node attribute format (array of AttributeNode)
382
+ if raw_attrs.is_a?(Array)
383
+ raw_attrs.each do |attr|
384
+ name = attr.name
385
+ value = attr.value
386
+
387
+ if name == "xmlns" || name.start_with?("xmlns:")
388
+ # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
389
+ prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
390
+ declarations[prefix] = value
391
+ end
392
+ end
393
+ else
394
+ # Handle Nokogiri and Moxml attribute formats (Hash-like)
395
+ raw_attrs.each do |key, val|
396
+ if key.is_a?(String)
397
+ # Nokogiri format: key=name (String), val=attr object
398
+ name = key
399
+ value = val.respond_to?(:value) ? val.value : val.to_s
400
+ else
401
+ # Moxml format: key=attr object, val=nil
402
+ name = key.respond_to?(:name) ? key.name : key.to_s
403
+ value = key.respond_to?(:value) ? key.value : key.to_s
404
+ end
405
+
406
+ if name == "xmlns" || name.start_with?("xmlns:")
407
+ # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
408
+ prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
409
+ declarations[prefix] = value
410
+ end
411
+ end
412
+ end
413
+
414
+ declarations
415
+ end
416
+
417
+ # Format element_structure dimension details (INSERT/DELETE operations)
418
+ def format_element_structure_details(diff, use_color)
419
+ node1 = diff.node1
420
+ node2 = diff.node2
421
+
422
+ # Determine operation type
423
+ if node1.nil? && !node2.nil?
424
+ # INSERT operation - show content preview
425
+ node2.respond_to?(:name) ? node2.name : "element"
426
+ content_preview = extract_content_preview(node2, 50)
427
+ detail1 = colorize("(not present)", :red, use_color)
428
+ detail2 = content_preview
429
+ changes = "Element inserted"
430
+ elsif !node1.nil? && node2.nil?
431
+ # DELETE operation - show content preview
432
+ node1.respond_to?(:name) ? node1.name : "element"
433
+ content_preview = extract_content_preview(node1, 50)
434
+ detail1 = content_preview
435
+ detail2 = colorize("(not present)", :green, use_color)
436
+ changes = "Element deleted"
437
+ elsif !node1.nil? && !node2.nil?
438
+ # STRUCTURAL CHANGE (both nodes present) - show both previews
439
+ name1 = node1.respond_to?(:name) ? node1.name : "element"
440
+ name2 = node2.respond_to?(:name) ? node2.name : "element"
441
+ detail1 = extract_content_preview(node1, 50)
442
+ detail2 = extract_content_preview(node2, 50)
443
+
444
+ changes = if name1 == name2
445
+ "Element structure changed"
446
+ else
447
+ "Element type changed: #{name1} → #{name2}"
448
+ end
449
+ else
450
+ # Both nil (shouldn't happen)
451
+ detail1 = "(nil)"
452
+ detail2 = "(nil)"
453
+ changes = "Unknown structural change"
454
+ end
455
+
456
+ [detail1, detail2, changes]
457
+ end
458
+
200
459
  # Format attribute_presence dimension details
201
460
  def format_attribute_presence_details(diff, use_color)
202
461
  node1 = diff.node1
@@ -240,26 +499,42 @@ module Canon
240
499
  node1 = diff.node1
241
500
  node2 = diff.node2
242
501
 
243
- # Find which attribute has different value
244
- differing_attr = find_differing_attribute(node1, node2)
245
-
246
- if differing_attr
247
- val1 = get_attribute_value(node1, differing_attr)
248
- val2 = get_attribute_value(node2, differing_attr)
249
-
250
- detail1 = "<#{node1.name}> #{colorize(differing_attr, :cyan,
251
- use_color)}=\"#{escape_quotes(val1)}\""
252
- detail2 = "<#{node2.name}> #{colorize(differing_attr, :cyan,
253
- use_color)}=\"#{escape_quotes(val2)}\""
502
+ # Find ALL attributes with different values
503
+ differing_attrs = find_all_differing_attributes(node1, node2)
504
+
505
+ if differing_attrs.any?
506
+ # Show element name with all differing attributes
507
+ attrs1_str = differing_attrs.map do |attr|
508
+ val1 = get_attribute_value(node1, attr)
509
+ "#{colorize(attr, :cyan, use_color)}=\"#{escape_quotes(val1)}\""
510
+ end.join(" ")
511
+
512
+ attrs2_str = differing_attrs.map do |attr|
513
+ val2 = get_attribute_value(node2, attr)
514
+ "#{colorize(attr, :cyan, use_color)}=\"#{escape_quotes(val2)}\""
515
+ end.join(" ")
516
+
517
+ detail1 = "<#{node1.name}> #{attrs1_str}"
518
+ detail2 = "<#{node2.name}> #{attrs2_str}"
519
+
520
+ # List all attribute changes
521
+ changes_parts = differing_attrs.map do |attr|
522
+ val1 = get_attribute_value(node1, attr)
523
+ val2 = get_attribute_value(node2, attr)
524
+
525
+ if val1.empty? && !val2.empty?
526
+ "#{colorize(attr, :cyan,
527
+ use_color)}: (added) → \"#{escape_quotes(val2)}\""
528
+ elsif !val1.empty? && val2.empty?
529
+ "#{colorize(attr, :cyan,
530
+ use_color)}: \"#{escape_quotes(val1)}\" → (removed)"
531
+ else
532
+ "#{colorize(attr, :cyan,
533
+ use_color)}: \"#{escape_quotes(val1)}\" → \"#{escape_quotes(val2)}\""
534
+ end
535
+ end
254
536
 
255
- # Analyze the difference
256
- changes = if val1.strip == val2.strip && val1 != val2
257
- "Whitespace difference only"
258
- elsif val1.gsub(/\s+/, " ") == val2.gsub(/\s+/, " ")
259
- "Whitespace normalization difference"
260
- else
261
- "Value changed"
262
- end
537
+ changes = changes_parts.join("; ")
263
538
 
264
539
  [detail1, detail2, changes]
265
540
  else
@@ -268,7 +543,32 @@ module Canon
268
543
  end
269
544
  end
270
545
 
271
- # Format text_content dimension details
546
+ # Format attribute_order dimension details
547
+ def format_attribute_order_details(diff, use_color)
548
+ node1 = diff.node1
549
+ node2 = diff.node2
550
+
551
+ # Get attribute names in order
552
+ attrs1 = get_attribute_names_in_order(node1)
553
+ attrs2 = get_attribute_names_in_order(node2)
554
+
555
+ # Format as ordered list
556
+ attrs1_str = "[#{attrs1.join(', ')}]"
557
+ attrs2_str = "[#{attrs2.join(', ')}]"
558
+
559
+ detail1 = "<#{node1.name}> attributes in order: #{colorize(
560
+ attrs1_str, :cyan, use_color
561
+ )}"
562
+ detail2 = "<#{node2.name}> attributes in order: #{colorize(
563
+ attrs2_str, :cyan, use_color
564
+ )}"
565
+
566
+ changes = "Attribute order changed: #{attrs1_str} → #{attrs2_str}"
567
+
568
+ [detail1, detail2, changes]
569
+ end
570
+
571
+ # Format text content dimension details
272
572
  def format_text_content_details(diff, use_color)
273
573
  node1 = diff.node1
274
574
  node2 = diff.node2
@@ -280,13 +580,46 @@ module Canon
280
580
  preview1 = truncate_text(text1, 100)
281
581
  preview2 = truncate_text(text2, 100)
282
582
 
283
- element_name = node1.respond_to?(:name) ? node1.name : "(text)"
284
-
285
- detail1 = "<#{element_name}> \"#{escape_quotes(preview1)}\""
286
- detail2 = "<#{element_name}> \"#{escape_quotes(preview2)}\""
583
+ # Get element names - for text nodes, use parent element name
584
+ # When one node is nil, use the other's name for context
585
+ element_name1 = get_element_name_for_display(node1)
586
+ element_name2 = get_element_name_for_display(node2)
587
+
588
+ # If one shows nil-node, try to use the other's name for context
589
+ if element_name1.include?("nil") && !element_name2.include?("nil")
590
+ # Use node2's name as a hint for what node1 should be
591
+ element_name1 = element_name2
592
+ elsif element_name2.include?("nil") && !element_name1.include?("nil")
593
+ # Use node1's name as a hint for what node2 should be
594
+ element_name2 = element_name1
595
+ end
287
596
 
288
- # Check if inside whitespace-preserving element
289
- changes = if inside_preserve_element?(node1) || inside_preserve_element?(node2)
597
+ # Get namespace URIs
598
+ ns1 = get_namespace_uri_for_display(node1)
599
+ ns2 = get_namespace_uri_for_display(node2)
600
+
601
+ # Build namespace display strings using NamespaceHelper
602
+ ns1_info = if ns1 && !ns1.empty?
603
+ " #{Canon::Xml::NamespaceHelper.format_namespace(ns1)}"
604
+ else
605
+ ""
606
+ end
607
+
608
+ ns2_info = if ns2 && !ns2.empty?
609
+ " #{Canon::Xml::NamespaceHelper.format_namespace(ns2)}"
610
+ else
611
+ ""
612
+ end
613
+
614
+ detail1 = "<#{element_name1}>#{ns1_info} \"#{escape_quotes(preview1)}\""
615
+ detail2 = "<#{element_name2}>#{ns2_info} \"#{escape_quotes(preview2)}\""
616
+
617
+ # Check if diff contains namespace information in reason
618
+ # If so, display it prominently
619
+ changes = if diff.respond_to?(:reason) && diff.reason&.include?("namespace")
620
+ diff.reason
621
+ # Check if inside whitespace-preserving element
622
+ elsif inside_preserve_element?(node1) || inside_preserve_element?(node2)
290
623
  colorize("⚠️ Whitespace preserved", :yellow, use_color,
291
624
  bold: true) +
292
625
  " (inside <pre>, <code>, etc. - whitespace is significant)"
@@ -297,7 +630,7 @@ module Canon
297
630
  [detail1, detail2, changes]
298
631
  end
299
632
 
300
- # Format structural_whitespace dimension details
633
+ # Format structural whitespace dimension details
301
634
  def format_structural_whitespace_details(diff, _use_color)
302
635
  node1 = diff.node1
303
636
  node2 = diff.node2
@@ -387,42 +720,142 @@ module Canon
387
720
 
388
721
  # Helper: Get attribute names from a node
389
722
  def get_attribute_names(node)
723
+ # Handle Canon::Xml::Nodes::ElementNode (uses attribute_nodes array)
724
+ if node.respond_to?(:attribute_nodes) && node.attribute_nodes.is_a?(Array)
725
+ return node.attribute_nodes.map(&:qname).sort
726
+ end
727
+
390
728
  return [] unless node.respond_to?(:attributes)
391
729
 
392
- node.attributes.map do |key, _val|
393
- if key.is_a?(String)
394
- key
395
- else
396
- (key.respond_to?(:name) ? key.name : key.to_s)
397
- end
398
- end.sort
730
+ attrs = node.attributes
731
+
732
+ # Handle Moxml::Element (attributes is an Array)
733
+ if attrs.is_a?(Array)
734
+ attrs.map do |attr|
735
+ if attr.respond_to?(:qname)
736
+ attr.qname
737
+ elsif attr.respond_to?(:name)
738
+ attr.name
739
+ else
740
+ attr.to_s
741
+ end
742
+ end.sort
743
+ # Handle Nokogiri nodes (attributes is a Hash)
744
+ else
745
+ attrs.map do |key, val|
746
+ # Get the qualified name (with prefix if present)
747
+ if val.respond_to?(:namespace) && val.namespace&.prefix
748
+ "#{val.namespace.prefix}:#{val.name}"
749
+ else
750
+ val.respond_to?(:name) ? val.name : key.to_s
751
+ end
752
+ end.sort
753
+ end
399
754
  end
400
755
 
401
- # Helper: Find which attribute has different value
402
- def find_differing_attribute(node1, node2)
403
- return nil unless node1.respond_to?(:attributes) && node2.respond_to?(:attributes)
756
+ # Helper: Find ALL attributes with different values
757
+ def find_all_differing_attributes(node1, node2)
758
+ return [] unless node1.respond_to?(:attributes) && node2.respond_to?(:attributes)
404
759
 
405
760
  attrs1 = get_attributes_hash(node1)
406
761
  attrs2 = get_attributes_hash(node2)
407
762
 
408
- # Find first attribute with different value
409
- common_keys = attrs1.keys & attrs2.keys
410
- common_keys.find { |key| attrs1[key] != attrs2[key] }
763
+ # Find all attributes with different values
764
+ all_keys = (attrs1.keys + attrs2.keys).uniq
765
+ all_keys.reject do |key|
766
+ attrs1[key] == attrs2[key]
767
+ end
768
+ end
769
+
770
+ # Helper: Get attribute names in document order (not sorted)
771
+ def get_attribute_names_in_order(node)
772
+ # Handle Canon::Xml::Nodes::ElementNode (uses attribute_nodes array)
773
+ if node.respond_to?(:attribute_nodes) && node.attribute_nodes.is_a?(Array)
774
+ return node.attribute_nodes.map(&:qname)
775
+ end
776
+
777
+ return [] unless node.respond_to?(:attributes)
778
+
779
+ attrs = node.attributes
780
+
781
+ # Handle Moxml::Element (attributes is an Array)
782
+ if attrs.is_a?(Array)
783
+ attrs.map do |attr|
784
+ # Use qname for AttributeNode objects (includes prefix)
785
+ if attr.respond_to?(:qname)
786
+ attr.qname
787
+ elsif attr.respond_to?(:name)
788
+ attr.name
789
+ else
790
+ attr.to_s
791
+ end
792
+ end
793
+ # Handle Nokogiri nodes (attributes is a Hash)
794
+ else
795
+ attrs.map do |key, val|
796
+ # For Nokogiri attributes, get the full qualified name
797
+ if key.is_a?(String)
798
+ key
799
+ elsif val.respond_to?(:namespace) && val.namespace
800
+ # Construct qualified name if attribute has a namespace prefix
801
+ prefix = val.namespace.prefix
802
+ name = val.respond_to?(:name) ? val.name : key.to_s
803
+ prefix ? "#{prefix}:#{name}" : name
804
+ else
805
+ (key.respond_to?(:name) ? key.name : key.to_s)
806
+ end
807
+ end
808
+ end
411
809
  end
412
810
 
413
811
  # Helper: Get attributes as hash
414
812
  def get_attributes_hash(node)
813
+ # Handle Canon::Xml::Nodes::ElementNode (uses attribute_nodes array)
814
+ if node.respond_to?(:attribute_nodes) && node.attribute_nodes.is_a?(Array)
815
+ hash = {}
816
+ node.attribute_nodes.each do |attr|
817
+ hash[attr.qname] = attr.value
818
+ end
819
+ return hash
820
+ end
821
+
415
822
  return {} unless node.respond_to?(:attributes)
416
823
 
417
824
  hash = {}
418
- node.attributes.each do |key, val|
419
- name = if key.is_a?(String)
420
- key
421
- else
422
- (key.respond_to?(:name) ? key.name : key.to_s)
423
- end
424
- value = val.respond_to?(:value) ? val.value : val.to_s
425
- hash[name] = value
825
+ attrs = node.attributes
826
+
827
+ # Handle Moxml::Element (attributes is an Array of Moxml::Attribute)
828
+ if attrs.is_a?(Array)
829
+ attrs.each do |attr|
830
+ # Use qname for AttributeNode objects (includes prefix)
831
+ name = if attr.respond_to?(:qname)
832
+ attr.qname
833
+ elsif attr.respond_to?(:name)
834
+ attr.name
835
+ else
836
+ attr.to_s
837
+ end
838
+ value = if attr.respond_to?(:value)
839
+ attr.value
840
+ elsif attr.respond_to?(:native) && attr.native.respond_to?(:value)
841
+ attr.native.value
842
+ else
843
+ attr.to_s
844
+ end
845
+ hash[name] = value
846
+ end
847
+ # Handle Nokogiri nodes (attributes is a Hash)
848
+ else
849
+ attrs.each do |key, val|
850
+ # Get the qualified name (with prefix if present)
851
+ name = if val.respond_to?(:namespace) && val.namespace&.prefix
852
+ "#{val.namespace.prefix}:#{val.name}"
853
+ else
854
+ val.respond_to?(:name) ? val.name : key.to_s
855
+ end
856
+ value = val.respond_to?(:value) ? val.value : val.to_s
857
+ hash[name] = value
858
+ end
426
859
  end
427
860
  hash
428
861
  end
@@ -446,6 +879,139 @@ module Canon
446
879
  end
447
880
  end
448
881
 
882
+ # Helper: Get element name for display
883
+ # For text nodes, returns parent element name
884
+ # For element nodes, returns the node's own name
885
+ def get_element_name_for_display(node)
886
+ # Handle completely nil nodes
887
+ return "(nil-node)" if node.nil?
888
+
889
+ # Try to get name
890
+ node_name = if node.respond_to?(:name)
891
+ begin
892
+ node.name
893
+ rescue StandardError
894
+ nil
895
+ end
896
+ end
897
+
898
+ # Special check: if name is explicitly nil (not just empty), this might be a parsing issue
899
+ # Show node type information to help debug
900
+ if node_name.nil?
901
+ # Try to show what type of node this is
902
+ if node.respond_to?(:node_type)
903
+ type = begin
904
+ node.node_type
905
+ rescue StandardError
906
+ nil
907
+ end
908
+ return "(nil-name:#{type})" if type
909
+ end
910
+
911
+ # fallback to class name
912
+ class_info = node.class.name&.split("::")&.last || "UnknownClass"
913
+ return "(nil-name:#{class_info})"
914
+ end
915
+
916
+ # If we have a valid element name, return it
917
+ if !node_name.to_s.empty? && !["#text", "text", "#document",
918
+ "document"].include?(node_name.to_s)
919
+ return node_name.to_s
920
+ end
921
+
922
+ # Check if this is a text node
923
+ is_text_node = if node.respond_to?(:node_type)
924
+ begin
925
+ node.node_type == :text
926
+ rescue StandardError
927
+ false
928
+ end
929
+ elsif ["#text", "text"].include?(node_name.to_s)
930
+ true
931
+ elsif node.class.name
932
+ node.class.name.include?("TextNode") ||
933
+ node.class.name.include?("Text")
934
+ else
935
+ false
936
+ end
937
+
938
+ # For text nodes or document nodes, try parent
939
+ if is_text_node || ["#text", "text", "#document",
940
+ "document"].include?(node_name.to_s)
941
+ parent = if node.respond_to?(:parent)
942
+ begin
943
+ node.parent
944
+ rescue StandardError
945
+ nil
946
+ end
947
+ end
948
+
949
+ max_depth = 5
950
+ depth = 0
951
+
952
+ # Traverse up to find named parent element
953
+ while parent && depth < max_depth
954
+ parent_name = if parent.respond_to?(:name)
955
+ begin
956
+ parent.name
957
+ rescue StandardError
958
+ nil
959
+ end
960
+ end
961
+
962
+ if parent_name && !parent_name.to_s.empty? &&
963
+ !["#text", "text", "#document",
964
+ "document"].include?(parent_name.to_s)
965
+ return parent_name.to_s
966
+ end
967
+
968
+ parent = if parent.respond_to?(:parent)
969
+ begin
970
+ parent.parent
971
+ rescue StandardError
972
+ nil
973
+ end
974
+ end
975
+ depth += 1
976
+ end
977
+
978
+ # Still no name found
979
+ return "(text)" if is_text_node
980
+
981
+ return "(no-name)"
982
+ end
983
+
984
+ # Fallback
985
+ node_name.to_s
986
+ end
987
+
988
+ # Helper: Get namespace URI for display
989
+ # For text nodes, returns parent element's namespace URI
990
+ # For element nodes, returns the node's own namespace URI
991
+ def get_namespace_uri_for_display(node)
992
+ # Check if this is a text node
993
+ is_text_node = if node.respond_to?(:node_type)
994
+ node.node_type == :text
995
+ elsif node.class.name
996
+ node.class.name.include?("TextNode") || node.class.name.include?("Text")
997
+ else
998
+ false
999
+ end
1000
+
1001
+ if is_text_node
1002
+ # For text nodes, get parent element's namespace
1003
+ parent = node.respond_to?(:parent) ? node.parent : nil
1004
+ if parent.respond_to?(:namespace_uri)
1005
+ parent.namespace_uri
1006
+ end
1007
+ elsif node.respond_to?(:namespace_uri)
1008
+ # For element nodes, use their own namespace
1009
+ node.namespace_uri
1010
+ else
1011
+ nil
1012
+ end
1013
+ end
1014
+
449
1015
  # Helper: Truncate text to max length
450
1016
  def truncate_text(text, max_length)
451
1017
  return text if text.length <= max_length
@@ -535,6 +1101,57 @@ module Canon
535
1101
  end
536
1102
  end
537
1103
 
1104
+ # Helper: Extract content preview from a node
1105
+ # Shows element name, attributes, and text content for clarity
1106
+ def extract_content_preview(node, max_length = 50)
1107
+ return "(nil)" if node.nil?
1108
+
1109
+ parts = []
1110
+
1111
+ # Add element name
1112
+ if node.respond_to?(:name)
1113
+ parts << "<#{node.name}>"
1114
+ end
1115
+
1116
+ # Add key attributes (id, class, name, type)
1117
+ if node.respond_to?(:attributes) && node.attributes&.any?
1118
+ key_attrs = %w[id class name type]
1119
+ attrs_hash = get_attributes_hash(node)
1120
+
1121
+ key_attr_strs = key_attrs.map do |key|
1122
+ next unless attrs_hash.key?(key)
1123
+
1124
+ val = attrs_hash[key]
1125
+ next if val.nil? || val.empty?
1126
+
1127
+ # Truncate long attribute values
1128
+ val_preview = val.length > 20 ? "#{val[0..17]}..." : val
1129
+ "#{key}=\"#{val_preview}\""
1130
+ end.compact
1131
+
1132
+ parts << "[#{key_attr_strs.join(' ')}]" if key_attr_strs.any?
1133
+ end
1134
+
1135
+ # Add text content preview
1136
+ text = get_node_text(node)
1137
+ if text && !text.empty?
1138
+ text_preview = text.strip
1139
+ # Only show text if meaningful (not just whitespace)
1140
+ if text_preview.length.positive?
1141
+ text_preview = text_preview.length > 40 ? "#{text_preview[0..37]}..." : text_preview
1142
+ parts << "\"#{text_preview}\""
1143
+ end
1144
+ elsif node.respond_to?(:children) && node.children&.any?
1145
+ # Show child count if no text but has children
1146
+ parts << "(#{node.children.length} children)"
1147
+ end
1148
+
1149
+ result = parts.join(" ")
1150
+
1151
+ # Truncate if still too long
1152
+ result.length > max_length ? "#{result[0...max_length - 3]}..." : result
1153
+ end
1154
+
538
1155
  # Helper: Colorize text
539
1156
  def colorize(text, color, use_color, bold: false)
540
1157
  return text unless use_color