canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Abstract base class for format-specific data models
5
+ # Provides common interface for parsing and serializing documents
6
+ class DataModel
7
+ class << self
8
+ # Parse input into data model
9
+ # Must be implemented by subclasses
10
+ #
11
+ # @param input [String] Input content to parse
12
+ # @return [Object] Parsed data model representation
13
+ # @raise [NotImplementedError] if not implemented by subclass
14
+ def parse(input)
15
+ raise NotImplementedError, "#{self} must implement #parse"
16
+ end
17
+
18
+ # Serialize data model node
19
+ # Must be implemented by subclasses
20
+ #
21
+ # @param node [Object] Node to serialize
22
+ # @return [String] Serialized representation
23
+ # @raise [NotImplementedError] if not implemented by subclass
24
+ def serialize(node)
25
+ raise NotImplementedError, "#{self} must implement #serialize"
26
+ end
27
+ end
28
+ end
29
+ end
@@ -1,22 +1,54 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "formatting_detector"
4
+ require_relative "../comparison/compare_profile"
5
+
3
6
  module Canon
4
7
  module Diff
5
8
  # Classifies DiffNodes as normative (affects equivalence) or informative (doesn't affect equivalence)
6
9
  # based on the match options in effect
7
10
  class DiffClassifier
8
- attr_reader :match_options
11
+ attr_reader :match_options, :profile
9
12
 
10
13
  # @param match_options [Canon::Comparison::ResolvedMatchOptions] The match options
11
14
  def initialize(match_options)
12
15
  @match_options = match_options
16
+ # Use the compare_profile from ResolvedMatchOptions if available (e.g., HtmlCompareProfile)
17
+ # Otherwise create a base CompareProfile
18
+ @profile = if match_options.respond_to?(:compare_profile) && match_options.compare_profile
19
+ match_options.compare_profile
20
+ else
21
+ Canon::Comparison::CompareProfile.new(match_options)
22
+ end
13
23
  end
14
24
 
15
25
  # Classify a single DiffNode as normative or informative
26
+ # Hierarchy: formatting-only < informative < normative
27
+ # CompareProfile determines base classification, FormattingDetector refines informative differences
16
28
  # @param diff_node [DiffNode] The diff node to classify
17
- # @return [DiffNode] The same diff node with normative attribute set
29
+ # @return [DiffNode] The same diff node with normative/formatting attributes set
18
30
  def classify(diff_node)
19
- diff_node.normative = normative_for_dimension?(diff_node.dimension)
31
+ # FIRST: Determine if this dimension is normative based on CompareProfile
32
+ # This respects the policy settings (strict/normalize/ignore)
33
+ is_normative = profile.normative_dimension?(diff_node.dimension)
34
+
35
+ # SECOND: Check if FormattingDetector should be consulted
36
+ # Only check for formatting-only when dimension is NOT normative
37
+ # This ensures strict mode differences remain normative
38
+ should_check_formatting = !is_normative &&
39
+ profile.supports_formatting_detection?(diff_node.dimension)
40
+
41
+ # If we should check formatting, see if it's formatting-only
42
+ if should_check_formatting && formatting_only_diff?(diff_node)
43
+ diff_node.formatting = true
44
+ diff_node.normative = false
45
+ return diff_node
46
+ end
47
+
48
+ # Otherwise, use the normative determination from CompareProfile
49
+ diff_node.formatting = false
50
+ diff_node.normative = is_normative
51
+
20
52
  diff_node
21
53
  end
22
54
 
@@ -29,17 +61,45 @@ module Canon
29
61
 
30
62
  private
31
63
 
32
- # Determine if a difference in a given dimension is normative
33
- # @param dimension [Symbol] The match dimension
34
- # @return [Boolean] true if differences in this dimension are normative
35
- def normative_for_dimension?(dimension)
36
- behavior = match_options.behavior_for(dimension)
37
-
38
- # :ignore → informative (difference doesn't matter)
39
- # :strict or :normalize → normative (difference persisted through matching)
40
- # Note: If a DiffNode exists, it means the comparison FAILED even after
41
- # applying normalization, so it's a real (normative) difference
42
- behavior != :ignore
64
+ # Check if a DiffNode represents a formatting-only difference
65
+ # @param diff_node [DiffNode] The diff node to check
66
+ # @return [Boolean] true if formatting-only
67
+ def formatting_only_diff?(diff_node)
68
+ text1 = extract_text_content(diff_node.node1)
69
+ text2 = extract_text_content(diff_node.node2)
70
+
71
+ FormattingDetector.formatting_only?(text1, text2)
72
+ end
73
+
74
+ # Extract text content from a node for formatting comparison
75
+ # @param node [Object] The node to extract text from
76
+ # @return [String, nil] The text content or nil
77
+ def extract_text_content(node)
78
+ return nil if node.nil?
79
+
80
+ # For TextNode with value attribute (Canon::Xml::Nodes::TextNode)
81
+ return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
82
+
83
+ # For XML/HTML nodes with text_content method
84
+ return node.text_content if node.respond_to?(:text_content)
85
+
86
+ # For nodes with text method
87
+ return node.text if node.respond_to?(:text)
88
+
89
+ # For nodes with content method
90
+ return node.content if node.respond_to?(:content)
91
+
92
+ # For nodes with value method (other types)
93
+ return node.value if node.respond_to?(:value)
94
+
95
+ # For simple text nodes or strings
96
+ return node.to_s if node.is_a?(String)
97
+
98
+ # For other node types, try to_s
99
+ node.to_s
100
+ rescue StandardError
101
+ # If extraction fails, return nil (not formatting-only)
102
+ nil
43
103
  end
44
104
  end
45
105
  end
@@ -41,6 +41,9 @@ grouping_lines: nil)
41
41
  create_context_for_blocks(block_group)
42
42
  end
43
43
 
44
+ # Merge overlapping contexts to avoid duplicate line display
45
+ contexts = merge_overlapping_contexts(contexts)
46
+
44
47
  # Filter out all-informative contexts if show_diffs was :normative
45
48
  # Note: The filtering based on show_diffs happens at the block level
46
49
  # in DiffBlockBuilder, so we don't need to re-filter here.
@@ -51,6 +54,44 @@ grouping_lines: nil)
51
54
 
52
55
  private
53
56
 
57
+ # Merge overlapping contexts into single contexts
58
+ # When contexts have overlapping line ranges, combine them
59
+ def merge_overlapping_contexts(contexts)
60
+ return contexts if contexts.empty?
61
+
62
+ # Sort by start_idx
63
+ sorted = contexts.sort_by(&:start_idx)
64
+ merged = [sorted.first]
65
+
66
+ sorted[1..].each do |context|
67
+ last = merged.last
68
+
69
+ # Check if contexts overlap (including touching contexts)
70
+ if context.start_idx <= last.end_idx + 1
71
+ # Merge: extend the range and combine blocks
72
+ new_end = [last.end_idx, context.end_idx].max
73
+ combined_blocks = (last.blocks + context.blocks).uniq
74
+
75
+ # Extract combined lines
76
+ combined_lines = @all_lines[last.start_idx..new_end]
77
+
78
+ # Replace last context with merged one
79
+ merged[-1] = DiffContext.new(
80
+ start_line: last.start_idx,
81
+ end_line: new_end,
82
+ blocks: combined_blocks,
83
+ lines: combined_lines,
84
+ normative: last.normative? || context.normative?,
85
+ )
86
+ else
87
+ # No overlap, add as separate context
88
+ merged << context
89
+ end
90
+ end
91
+
92
+ merged
93
+ end
94
+
54
95
  # Group blocks that are close together
55
96
  def group_nearby_blocks(blocks, max_gap)
56
97
  return [] if blocks.empty?
@@ -11,17 +11,22 @@ module Canon
11
11
  # @param content [String] The text content of the line
12
12
  # @param type [Symbol] The type of line (:unchanged, :added, :removed, :changed)
13
13
  # @param diff_node [DiffNode, nil] The semantic diff node this line belongs to
14
- def initialize(line_number:, content:, type:, diff_node: nil)
14
+ # @param formatting [Boolean] Whether this is a formatting-only difference
15
+ def initialize(line_number:, content:, type:, diff_node: nil,
16
+ formatting: false)
15
17
  @line_number = line_number
16
18
  @content = content
17
19
  @type = type
18
20
  @diff_node = diff_node
21
+ @formatting = formatting
19
22
  end
20
23
 
21
24
  # @return [Boolean] true if this line represents a normative difference
22
25
  # If diff_node is nil (not linked to any semantic difference), the line
23
26
  # is considered informative (cosmetic/unchanged)
27
+ # Formatting-only diffs are never normative
24
28
  def normative?
29
+ return false if formatting?
25
30
  return false if diff_node.nil?
26
31
 
27
32
  diff_node.normative?
@@ -29,12 +34,20 @@ module Canon
29
34
 
30
35
  # @return [Boolean] true if this line represents an informative-only difference
31
36
  # If diff_node is nil (not linked), it's not informative either (it's unchanged/cosmetic)
37
+ # Formatting-only diffs are never informative
32
38
  def informative?
39
+ return false if formatting?
33
40
  return false if diff_node.nil?
34
41
 
35
42
  diff_node.informative?
36
43
  end
37
44
 
45
+ # @return [Boolean] true if this line represents a formatting-only difference
46
+ # Formatting diffs are purely cosmetic (whitespace, line breaks) with no semantic meaning
47
+ def formatting?
48
+ @formatting == true
49
+ end
50
+
38
51
  # @return [Boolean] true if this line is unchanged
39
52
  def unchanged?
40
53
  type == :unchanged
@@ -62,6 +75,8 @@ module Canon
62
75
  type: type,
63
76
  diff_node: diff_node&.to_h,
64
77
  normative: normative?,
78
+ informative: informative?,
79
+ formatting: formatting?,
65
80
  }
66
81
  end
67
82
 
@@ -70,7 +85,8 @@ module Canon
70
85
  line_number == other.line_number &&
71
86
  content == other.content &&
72
87
  type == other.type &&
73
- diff_node == other.diff_node
88
+ diff_node == other.diff_node &&
89
+ @formatting == other.instance_variable_get(:@formatting)
74
90
  end
75
91
  end
76
92
  end
@@ -6,8 +6,8 @@ module Canon
6
6
  # This is created during the Comparison Layer and carries information about
7
7
  # which dimension caused the difference and whether it's normative or informative
8
8
  class DiffNode
9
- attr_reader :node1, :node2, :dimension, :reason
10
- attr_accessor :normative
9
+ attr_reader :node1, :node2
10
+ attr_accessor :dimension, :reason, :normative, :formatting
11
11
 
12
12
  # @param node1 [Object] The first node being compared
13
13
  # @param node2 [Object] The second node being compared
@@ -21,18 +21,31 @@ module Canon
21
21
  @dimension = dimension
22
22
  @reason = reason
23
23
  @normative = nil # Will be set by DiffClassifier
24
+ @formatting = nil # Will be set by DiffClassifier
24
25
  end
25
26
 
26
27
  # @return [Boolean] true if this diff is normative (affects equivalence)
28
+ # Formatting-only diffs are never normative
27
29
  def normative?
30
+ return false if formatting?
31
+
28
32
  @normative == true
29
33
  end
30
34
 
31
35
  # @return [Boolean] true if this diff is informative only (doesn't affect equivalence)
36
+ # Formatting-only diffs are never informative
32
37
  def informative?
38
+ return false if formatting?
39
+
33
40
  @normative == false
34
41
  end
35
42
 
43
+ # @return [Boolean] true if this diff is formatting-only (purely cosmetic)
44
+ # Formatting diffs are whitespace/line break differences with no semantic meaning
45
+ def formatting?
46
+ @formatting == true
47
+ end
48
+
36
49
  def to_h
37
50
  {
38
51
  node1: node1,
@@ -40,6 +53,7 @@ module Canon
40
53
  dimension: dimension,
41
54
  reason: reason,
42
55
  normative: normative,
56
+ formatting: formatting,
43
57
  }
44
58
  end
45
59
 
@@ -49,7 +63,8 @@ module Canon
49
63
  node2 == other.node2 &&
50
64
  dimension == other.dimension &&
51
65
  reason == other.reason &&
52
- normative == other.normative
66
+ normative == other.normative &&
67
+ formatting == other.formatting
53
68
  end
54
69
  end
55
70
  end
@@ -61,31 +61,75 @@ module Canon
61
61
  diff_node: nil,
62
62
  )
63
63
  when "-"
64
+ # Find the diff node for this line
65
+ node = shared_informative_node || find_diff_node_for_line(
66
+ line_num, lines1, :removed
67
+ )
68
+
69
+ # Check if this is formatting-only:
70
+ # 1. First check if the DiffNode itself is marked as formatting-only
71
+ # 2. Otherwise, check line-level formatting
72
+ formatting = if node.respond_to?(:formatting?) && node.formatting?
73
+ true
74
+ else
75
+ formatting_only_line?(
76
+ change.old_element, ""
77
+ )
78
+ end
79
+
64
80
  DiffLine.new(
65
81
  line_number: line_num,
66
82
  content: change.old_element,
67
83
  type: :removed,
68
- diff_node: shared_informative_node || find_diff_node_for_line(
69
- line_num, lines1, :removed
70
- ),
84
+ diff_node: node,
85
+ formatting: formatting,
71
86
  )
72
87
  when "+"
88
+ # Find the diff node for this line
89
+ node = shared_informative_node || find_diff_node_for_line(
90
+ line_num, lines2, :added
91
+ )
92
+
93
+ # Check if this is formatting-only:
94
+ # 1. First check if the DiffNode itself is marked as formatting-only
95
+ # 2. Otherwise, check line-level formatting
96
+ formatting = if node.respond_to?(:formatting?) && node.formatting?
97
+ true
98
+ else
99
+ formatting_only_line?("",
100
+ change.new_element)
101
+ end
102
+
73
103
  DiffLine.new(
74
104
  line_number: line_num,
75
105
  content: change.new_element,
76
106
  type: :added,
77
- diff_node: shared_informative_node || find_diff_node_for_line(
78
- line_num, lines2, :added
79
- ),
107
+ diff_node: node,
108
+ formatting: formatting,
80
109
  )
81
110
  when "!"
111
+ # Find the diff node for this line
112
+ node = shared_informative_node || find_diff_node_for_line(
113
+ line_num, lines2, :changed
114
+ )
115
+
116
+ # Check if this is formatting-only:
117
+ # 1. First check if the DiffNode itself is marked as formatting-only
118
+ # 2. Otherwise, check line-level formatting
119
+ formatting = if node.respond_to?(:formatting?) && node.formatting?
120
+ true
121
+ else
122
+ formatting_only_line?(
123
+ change.old_element, change.new_element
124
+ )
125
+ end
126
+
82
127
  DiffLine.new(
83
128
  line_number: line_num,
84
129
  content: change.new_element,
85
130
  type: :changed,
86
- diff_node: shared_informative_node || find_diff_node_for_line(
87
- line_num, lines2, :changed
88
- ),
131
+ diff_node: node,
132
+ formatting: formatting,
89
133
  )
90
134
  end
91
135
 
@@ -98,6 +142,15 @@ module Canon
98
142
 
99
143
  private
100
144
 
145
+ # Check if two lines differ only in formatting (whitespace)
146
+ # @param line1 [String] First line
147
+ # @param line2 [String] Second line
148
+ # @return [Boolean] true if formatting-only difference
149
+ def formatting_only_line?(line1, line2)
150
+ require_relative "formatting_detector"
151
+ FormattingDetector.formatting_only?(line1, line2)
152
+ end
153
+
101
154
  # Find the DiffNode associated with a line
102
155
  # Uses element name matching for precise line-level linking
103
156
  def find_diff_node_for_line(line_num, lines, change_type)
@@ -125,9 +178,15 @@ module Canon
125
178
  end
126
179
 
127
180
  nodes_to_check.any? do |node|
128
- next unless node.respond_to?(:name)
129
-
130
- node.name == line_element_name
181
+ # Check if the node itself has the matching name
182
+ if node.respond_to?(:name) && node.name == line_element_name
183
+ true
184
+ # Check if the node's parent has the matching name (for TextNode diffs)
185
+ elsif node.respond_to?(:parent) && node.parent.respond_to?(:name) && node.parent.name == line_element_name
186
+ true
187
+ else
188
+ false
189
+ end
131
190
  end
132
191
  end
133
192
  end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Diff
5
+ # Detects if differences between lines are formatting-only
6
+ # (whitespace, line breaks) with no semantic content changes
7
+ class FormattingDetector
8
+ # Detect if two lines differ only in formatting
9
+ #
10
+ # @param line1 [String, nil] First line to compare
11
+ # @param line2 [String, nil] Second line to compare
12
+ # @return [Boolean] true if lines differ only in formatting
13
+ def self.formatting_only?(line1, line2)
14
+ # If both are nil or empty, not a formatting diff
15
+ return false if blank?(line1) && blank?(line2)
16
+
17
+ # If only one is blank, it's not just formatting
18
+ return false if blank?(line1) || blank?(line2)
19
+
20
+ # Compare normalized versions
21
+ normalize_for_comparison(line1) == normalize_for_comparison(line2)
22
+ end
23
+
24
+ # Aggressive normalization for formatting comparison
25
+ # Collapses all whitespace to single space and strips
26
+ # Also normalizes whitespace around tag delimiters
27
+ #
28
+ # @param line [String, nil] Line to normalize
29
+ # @return [String] Normalized line
30
+ def self.normalize_for_comparison(line)
31
+ return "" if line.nil?
32
+
33
+ # Collapse all whitespace (spaces, tabs, newlines) to single space
34
+ normalized = line.gsub(/\s+/, " ").strip
35
+
36
+ # Normalize whitespace around tag delimiters
37
+ # Remove spaces before > and after <
38
+ normalized = normalized.gsub(/\s+>/, ">") # "div >" -> "div>"
39
+ normalized.gsub(/<\s+/, "<") # "< div" -> "<div"
40
+ end
41
+
42
+ # Check if a line is blank (nil or whitespace-only)
43
+ #
44
+ # @param line [String, nil] Line to check
45
+ # @return [Boolean] true if blank
46
+ def self.blank?(line)
47
+ line.nil? || line.strip.empty?
48
+ end
49
+
50
+ private_class_method :normalize_for_comparison, :blank?
51
+ end
52
+ end
53
+ end
@@ -68,6 +68,35 @@ module Canon
68
68
 
69
69
  protected
70
70
 
71
+ # Filter differences for display based on show_diffs setting
72
+ #
73
+ # @param differences [Array<Canon::Diff::DiffNode>] Array of differences
74
+ # @return [Array<Canon::Diff::DiffNode>] Filtered differences
75
+ def filter_differences_for_display(differences)
76
+ return differences if @show_diffs.nil? || @show_diffs == :all
77
+
78
+ differences.select do |diff|
79
+ # Handle both DiffNode objects and legacy Hash format
80
+ is_normative = if diff.respond_to?(:normative?)
81
+ diff.normative?
82
+ elsif diff.is_a?(Hash) && diff.key?(:normative)
83
+ diff[:normative]
84
+ else
85
+ # Default to normative if unknown
86
+ true
87
+ end
88
+
89
+ case @show_diffs
90
+ when :normative
91
+ is_normative
92
+ when :informative
93
+ !is_normative
94
+ else
95
+ true # Unknown value, show all
96
+ end
97
+ end
98
+ end
99
+
71
100
  # Build hunks from diff with context lines
72
101
  #
73
102
  # @param diffs [Array] LCS diff array
@@ -252,6 +281,7 @@ module Canon
252
281
  # @return [String] Formatted context
253
282
  def format_context(context, diffs, base_line1, base_line2)
254
283
  output = []
284
+ max_lines = get_max_diff_lines
255
285
 
256
286
  (context.start_idx..context.end_idx).each do |idx|
257
287
  change = diffs[idx]
@@ -275,6 +305,18 @@ module Canon
275
305
  change.old_element,
276
306
  change.new_element)
277
307
  end
308
+
309
+ # Check if we've exceeded the line limit
310
+ if max_lines&.positive? && output.size >= max_lines
311
+ output << ""
312
+ output << colorize(
313
+ "... Output truncated at #{max_lines} lines ...", :yellow, :bold
314
+ )
315
+ output << colorize(
316
+ "Increase limit via CANON_MAX_DIFF_LINES or config.diff.max_diff_lines", :yellow
317
+ )
318
+ break
319
+ end
278
320
  end
279
321
 
280
322
  output.join("\n")
@@ -299,19 +341,22 @@ module Canon
299
341
  #
300
342
  # @param old_num [Integer, nil] Line number in old file
301
343
  # @param new_num [Integer, nil] Line number in new file
302
- # @param marker [String] Diff marker (' ', '-', '+', '~')
344
+ # @param marker [String] Diff marker (' ', '-', '+', '<', '>', '[', ']')
303
345
  # @param content [String] Line content
304
346
  # @param color [Symbol, nil] Color for diff lines
305
347
  # @param informative [Boolean] Whether this is an informative diff
348
+ # @param formatting [Boolean] Whether this is a formatting-only diff
306
349
  # @return [String] Formatted line
307
350
  def format_unified_line(old_num, new_num, marker, content, color = nil,
308
- informative: false)
351
+ informative: false, formatting: false)
309
352
  old_str = old_num ? "%4d" % old_num : " "
310
353
  new_str = new_num ? "%4d" % new_num : " "
311
- marker_part = "#{marker} "
312
354
 
313
- # For informative diffs, use cyan color
314
- effective_color = informative ? :cyan : color
355
+ # Formatting and informative diffs use directional colors already passed in
356
+ # No need to override since callers set the correct color
357
+ effective_color = color
358
+
359
+ marker_part = "#{marker} "
315
360
 
316
361
  visualized_content = if effective_color
317
362
  apply_visualization(content, effective_color)
@@ -401,6 +446,16 @@ module Canon
401
446
  visual
402
447
  end
403
448
  end
449
+
450
+ # Get max diff lines limit
451
+ #
452
+ # @return [Integer, nil] Max diff output lines
453
+ def get_max_diff_lines
454
+ # Try to get from config if available
455
+ config = Canon::Config.instance
456
+ # Default to 10,000 if config not available
457
+ config&.xml&.diff&.max_diff_lines || 10_000
458
+ end
404
459
  end
405
460
  end
406
461
  end