RubyGems - canon - Versions diffs - 0.1.8 → 0.1.10 - Mend

canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

checksums.yaml +4 -4
data/.rubocop_todo.yml +83 -22
data/docs/Gemfile +1 -0
data/docs/_config.yml +90 -1
data/docs/advanced/diff-classification.adoc +196 -24
data/docs/features/match-options/index.adoc +239 -1
data/lib/canon/comparison/format_detector.rb +2 -1
data/lib/canon/comparison/html_comparator.rb +19 -8
data/lib/canon/comparison/html_compare_profile.rb +8 -2
data/lib/canon/comparison/markup_comparator.rb +109 -2
data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
data/lib/canon/comparison/xml_comparator.rb +240 -23
data/lib/canon/comparison/xml_node_comparison.rb +25 -3
data/lib/canon/diff/diff_classifier.rb +119 -5
data/lib/canon/diff/formatting_detector.rb +1 -1
data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
data/lib/canon/rspec_matchers.rb +37 -8
data/lib/canon/version.rb +1 -1
data/lib/canon/xml/data_model.rb +24 -13
metadata +4 -78
data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
data/false_positive_analysis.txt +0 -0
data/file1.html +0 -1
data/file2.html +0 -1
data/old-docs/ADVANCED_TOPICS.adoc +0 -20
data/old-docs/BASIC_USAGE.adoc +0 -16
data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
data/old-docs/CLI.adoc +0 -497
data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
data/old-docs/DIFF_FORMATTING.adoc +0 -540
data/old-docs/DIFF_PARAMETERS.adoc +0 -261
data/old-docs/DOM_DIFF.adoc +0 -1017
data/old-docs/ENV_CONFIG.adoc +0 -876
data/old-docs/FORMATS.adoc +0 -867
data/old-docs/INPUT_VALIDATION.adoc +0 -477
data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
data/old-docs/MATCH_OPTIONS.adoc +0 -912
data/old-docs/MODES.adoc +0 -432
data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
data/old-docs/OPTIONS.adoc +0 -1387
data/old-docs/PREPROCESSING.adoc +0 -491
data/old-docs/README.old.adoc +0 -2831
data/old-docs/RSPEC.adoc +0 -814
data/old-docs/RUBY_API.adoc +0 -485
data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
data/old-docs/STRING_COMPARE.adoc +0 -345
data/old-docs/TMP.adoc +0 -3384
data/old-docs/TREE_DIFF.adoc +0 -1080
data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
data/old-docs/VERBOSE.adoc +0 -482
data/old-docs/VISUALIZATION_MAP.adoc +0 -625
data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
data/scripts/analyze_current_state.rb +0 -85
data/scripts/analyze_false_positives.rb +0 -114
data/scripts/analyze_remaining_failures.rb +0 -105
data/scripts/compare_current_failures.rb +0 -95
data/scripts/compare_dom_tree_diff.rb +0 -158
data/scripts/compare_failures.rb +0 -151
data/scripts/debug_attribute_extraction.rb +0 -66
data/scripts/debug_blocks_839.rb +0 -115
data/scripts/debug_meta_matching.rb +0 -52
data/scripts/debug_p_matching.rb +0 -192
data/scripts/debug_signature_matching.rb +0 -118
data/scripts/debug_sourcecode_124.rb +0 -32
data/scripts/debug_whitespace_sensitive.rb +0 -192
data/scripts/extract_false_positives.rb +0 -138
data/scripts/find_actual_false_positives.rb +0 -125
data/scripts/investigate_all_false_positives.rb +0 -161
data/scripts/investigate_batch1.rb +0 -127
data/scripts/investigate_classification.rb +0 -150
data/scripts/investigate_classification_detailed.rb +0 -190
data/scripts/investigate_common_failures.rb +0 -342
data/scripts/investigate_false_negative.rb +0 -80
data/scripts/investigate_false_positive.rb +0 -83
data/scripts/investigate_false_positives.rb +0 -227
data/scripts/investigate_false_positives_batch.rb +0 -163
data/scripts/investigate_mixed_content.rb +0 -125
data/scripts/investigate_remaining_16.rb +0 -214
data/scripts/run_single_test.rb +0 -29
data/scripts/test_all_false_positives.rb +0 -95
data/scripts/test_attribute_details.rb +0 -61
data/scripts/test_both_algorithms.rb +0 -49
data/scripts/test_both_simple.rb +0 -49
data/scripts/test_enhanced_semantic_output.rb +0 -125
data/scripts/test_readme_examples.rb +0 -131
data/scripts/test_semantic_tree_diff.rb +0 -99
data/scripts/test_semantic_ux_improvements.rb +0 -135
data/scripts/test_single_false_positive.rb +0 -119
data/scripts/test_size_limits.rb +0 -99
data/test_html_1.html +0 -21
data/test_html_2.html +0 -21
data/test_nokogiri.rb +0 -33
data/test_normalize.rb +0 -45

data/lib/canon/comparison/xml_comparator.rb CHANGED Viewed

@@ -18,6 +18,8 @@ require_relative "xml_comparator/namespace_comparator"
 require_relative "xml_comparator/node_type_comparator"
 require_relative "xml_comparator/child_comparison"
 require_relative "xml_comparator/diff_node_builder"
+# Whitespace sensitivity module
+require_relative "whitespace_sensitivity"
 module Canon
   module Comparison
@@ -90,9 +92,15 @@ module Canon
           # Create child_opts with resolved options
           child_opts = opts.merge(child_opts)
+          # Determine if we should preserve whitespace during parsing
+          # When structural_whitespace is :strict, preserve all whitespace-only text nodes
+          preserve_whitespace = match_opts_hash[:structural_whitespace] == :strict
           # Parse nodes if they are strings, applying preprocessing if needed
-          node1 = parse_node(n1, match_opts_hash[:preprocessing])
-          node2 = parse_node(n2, match_opts_hash[:preprocessing])
+          node1 = parse_node(n1, match_opts_hash[:preprocessing],
+                             preserve_whitespace: preserve_whitespace)
+          node2 = parse_node(n2, match_opts_hash[:preprocessing],
+                             preserve_whitespace: preserve_whitespace)
           # Store original strings for line diff display (before preprocessing)
           original1 = if n1.is_a?(String)
@@ -209,8 +217,9 @@ module Canon
         # Parse a node from string or return as-is
         # Applies preprocessing transformation before parsing if specified
         # Delegates to NodeParser module
-        def parse_node(node, preprocessing = :none)
-          XmlComparatorHelpers::NodeParser.parse(node, preprocessing)
+        def parse_node(node, preprocessing = :none, preserve_whitespace: false)
+          XmlComparatorHelpers::NodeParser.parse(node, preprocessing,
+                                                 preserve_whitespace: preserve_whitespace)
         end
         # Main comparison dispatcher
@@ -331,7 +340,8 @@ module Canon
           # For HTML, check if text node is inside whitespace-preserving element
           # If so, always use strict comparison regardless of text_content setting
-          if should_preserve_whitespace_strictly?(n1, n2)
+          sensitive_element = should_preserve_whitespace_strictly?(n1, n2, opts)
+          if sensitive_element
             behavior = :strict
           end
@@ -344,15 +354,23 @@ module Canon
           # Determine the correct dimension for this difference
           # - If text_content is :strict, ALL differences use :text_content dimension
-          # - If text_content is :normalize, whitespace-only diffs use :structural_whitespace
+          # - If text_content is :normalize, whitespace-only diffs could use :structural_whitespace
+          #   but we keep :text_content to ensure correct classification behavior
           # - Otherwise use :text_content
-          dimension = if behavior == :normalize && whitespace_only_difference?(
-            text1, text2
-          )
-                        :structural_whitespace
-                      else
-                        :text_content
-                      end
+          # However, if element is whitespace-sensitive (like <pre> in HTML),
+          # always use :text_content dimension regardless of behavior
+          #
+          # NOTE: We keep the dimension as :text_content even for whitespace-only diffs
+          # when text_content: :normalize. This ensures that the classification uses
+          # the text_content behavior (:normalize) instead of structural_whitespace
+          # behavior (:strict for XML), which would incorrectly mark the diff as normative.
+          if sensitive_element
+          # Whitespace-sensitive element: always use :text_content dimension
+          else
+            # Always use :text_content for text differences
+            # This ensures correct classification based on text_content behavior
+          end
+          dimension = :text_content
           # Create DiffNode in verbose mode when raw content differs
           # This ensures informative diffs are created even for :ignore/:normalize
@@ -368,17 +386,23 @@ module Canon
         # Check if whitespace should be preserved strictly for these text nodes
         # This applies to HTML elements like pre, code, textarea, script, style
-        def should_preserve_whitespace_strictly?(n1, n2)
-          # Only applies to Nokogiri nodes (HTML)
-          return false unless n1.respond_to?(:parent) && n2.respond_to?(:parent)
-          return false unless n1.parent.respond_to?(:name) && n2.parent.respond_to?(:name)
+        # and elements with xml:space="preserve" or in user-configured whitelist
+        def should_preserve_whitespace_strictly?(n1, n2, opts)
+          # Use WhitespaceSensitivity module to check if element is sensitive
+          # Check both n1 and n2 - if either is in a sensitive element, preserve strictly
+          if n1.respond_to?(:parent)
+            sensitivity_opts = { match_opts: opts[:match_opts] }
+            return true if WhitespaceSensitivity.element_sensitive?(n1,
+                                                                    sensitivity_opts)
+          end
-          # Elements where whitespace must be preserved in HTML
-          preserve_elements = %w[pre code textarea script style]
+          if n2.respond_to?(:parent)
+            sensitivity_opts = { match_opts: opts[:match_opts] }
+            return true if WhitespaceSensitivity.element_sensitive?(n2,
+                                                                    sensitivity_opts)
+          end
-          # Check if either node is inside a whitespace-preserving element
-          in_preserve_element?(n1, preserve_elements) ||
-            in_preserve_element?(n2, preserve_elements)
+          false
         end
         # Check if a node is inside a whitespace-preserving element
@@ -469,7 +493,8 @@ module Canon
         #
         # Delegates to ChildComparison module which handles both ElementMatcher
         # (semantic matching) and simple positional comparison.
-        def compare_children(n1, n2, opts, child_opts, diff_children, differences)
+        def compare_children(n1, n2, opts, child_opts, diff_children,
+differences)
           XmlComparatorHelpers::ChildComparison.compare(
             n1, n2, self, opts, child_opts, diff_children, differences
           )
@@ -543,9 +568,201 @@ module Canon
             end
           end
+          # For attribute presence differences, show what attributes differ
+          if dimension == :attribute_presence
+            attrs1 = extract_attributes(node1)
+            attrs2 = extract_attributes(node2)
+            return build_attribute_diff_reason(attrs1, attrs2)
+          end
+          # For text content differences, show the actual text (truncated if needed)
+          if dimension == :text_content
+            text1 = extract_text_from_node(node1)
+            text2 = extract_text_from_node(node2)
+            return build_text_diff_reason(text1, text2)
+          end
           "#{diff1} vs #{diff2}"
         end
+        # Build a clear reason message for attribute presence differences
+        #
+        # @param attrs1 [Hash, nil] First node's attributes
+        # @param attrs2 [Hash, nil] Second node's attributes
+        # @return [String] Clear explanation of the attribute difference
+        def build_attribute_diff_reason(attrs1, attrs2)
+          return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes" unless attrs1 && attrs2
+          require "set"
+          keys1 = attrs1.keys.to_set
+          keys2 = attrs2.keys.to_set
+          only_in_first = keys1 - keys2
+          only_in_second = keys2 - keys1
+          common = keys1 & keys2
+          # Check if values differ for common keys
+          different_values = common.reject { |k| attrs1[k] == attrs2[k] }
+          parts = []
+          parts << "only in first: #{only_in_first.to_a.sort.join(', ')}" if only_in_first.any?
+          parts << "only in second: #{only_in_second.to_a.sort.join(', ')}" if only_in_second.any?
+          parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
+          if parts.empty?
+            "#{keys1.size} vs #{keys2.size} attributes (same names)"
+          else
+            parts.join("; ")
+          end
+        end
+        # Extract text from a node for diff reason
+        #
+        # @param node [Object, nil] Node to extract text from
+        # @return [String, nil] Text content or nil
+        def extract_text_from_node(node)
+          return nil if node.nil?
+          # For Canon::Xml::Nodes::TextNode
+          return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
+          # For XML/HTML nodes with text_content method
+          return node.text_content if node.respond_to?(:text_content)
+          # For nodes with text method
+          return node.text if node.respond_to?(:text)
+          # For nodes with content method (Moxml::Text)
+          return node.content if node.respond_to?(:content)
+          # For nodes with value method (other types)
+          return node.value if node.respond_to?(:value)
+          # For simple text nodes or strings
+          return node.to_s if node.is_a?(String)
+          # For other node types, try to_s
+          node.to_s
+        rescue StandardError
+          nil
+        end
+        # Build a clear reason message for text content differences
+        #
+        # @param text1 [String, nil] First text content
+        # @param text2 [String, nil] Second text content
+        # @return [String] Clear explanation of the text difference
+        def build_text_diff_reason(text1, text2)
+          # Handle nil cases
+          return "missing vs '#{truncate_text(text2)}'" if text1.nil? && text2
+          return "'#{truncate_text(text2)}' vs missing" if text1 && text2.nil?
+          return "both missing" if text1.nil? && text2.nil?
+          # Check if both are whitespace-only
+          if whitespace_only?(text1) && whitespace_only?(text2)
+            return "whitespace: #{describe_whitespace(text1)} vs #{describe_whitespace(text2)}"
+          end
+          # Show text with visible whitespace markers
+          # Use escaped representations for clarity: \n for newline, \t for tab, · for spaces
+          vis1 = visualize_whitespace(text1)
+          vis2 = visualize_whitespace(text2)
+          "Text: \"#{vis1}\" vs \"#{vis2}\""
+        end
+        # Check if text is only whitespace
+        #
+        # @param text [String] Text to check
+        # @return [Boolean] true if whitespace-only
+        def whitespace_only?(text)
+          return false if text.nil?
+          text.to_s.strip.empty?
+        end
+        # Make whitespace visible in text content
+        # Uses the existing character visualization map from DiffFormatter (single source of truth)
+        #
+        # @param text [String] Text to visualize
+        # @return [String] Text with visible whitespace markers
+        def visualize_whitespace(text)
+          return "" if text.nil?
+          # Use the character map loader as the single source of truth
+          viz_map = character_visualization_map
+          # Replace each character with its visualization
+          text.chars.map { |char| viz_map[char] || char }.join
+        end
+        # Get the character visualization map (lazy-loaded to avoid circular dependency)
+        #
+        # @return [Hash] Character to visualization symbol mapping
+        def character_visualization_map
+          @character_visualization_map ||= begin
+            # Load the YAML file directly to avoid circular dependency
+            require "yaml"
+            lib_root = File.expand_path("../..", __dir__)
+            yaml_path = File.join(lib_root,
+                                  "canon/diff_formatter/character_map.yml")
+            data = YAML.load_file(yaml_path)
+            # Build visualization map from the YAML data
+            visualization_map = {}
+            data["characters"].each do |char_data|
+              # Get the character from either unicode code point or character field
+              char = if char_data["unicode"]
+                       # Convert hex string to character
+                       [char_data["unicode"].to_i(16)].pack("U")
+                     else
+                       # Use character field directly (handles \n, \t, etc.)
+                       char_data["character"]
+                     end
+              vis = char_data["visualization"]
+              visualization_map[char] = vis
+            end
+            visualization_map
+          end
+        end
+        # Describe whitespace content in a readable way
+        #
+        # @param text [String] Whitespace text
+        # @return [String] Description like "4 chars (2 newlines, 2 spaces)"
+        def describe_whitespace(text)
+          return "0 chars" if text.nil? || text.empty?
+          char_count = text.length
+          newline_count = text.count("\n")
+          space_count = text.count(" ")
+          tab_count = text.count("\t")
+          parts = []
+          parts << "#{newline_count} newlines" if newline_count.positive?
+          parts << "#{space_count} spaces" if space_count.positive?
+          parts << "#{tab_count} tabs" if tab_count.positive?
+          description = parts.join(", ")
+          "#{char_count} chars (#{description})"
+        end
+        # Truncate text for display in reason messages
+        #
+        # @param text [String] Text to truncate
+        # @param max_length [Integer] Maximum length
+        # @return [String] Truncated text
+        def truncate_text(text, max_length = 40)
+          return "" if text.nil?
+          text = text.to_s
+          return text if text.length <= max_length
+          "#{text[0...max_length]}..."
+        end
         # Compare namespace declarations (xmlns and xmlns:* attributes)
         # Delegates to XmlComparatorHelpers::NamespaceComparator
         def compare_namespace_declarations(n1, n2, opts, differences)

data/lib/canon/comparison/xml_node_comparison.rb CHANGED Viewed

@@ -139,9 +139,13 @@ diff_children, differences)
         # Check structural_whitespace match option
         match_opts = opts[:match_opts]
-        # Filter out whitespace-only text nodes
-        if match_opts && %i[ignore
-                            normalize].include?(match_opts[:structural_whitespace]) && text_node?(node)
+        return false unless match_opts
+        # Filter out whitespace-only text nodes based on structural_whitespace setting
+        # - :ignore or :normalize: Filter all whitespace-only text nodes
+        # - :strict: Preserve all whitespace-only text nodes (don't filter any)
+        if text_node?(node) && %i[ignore
+                                  normalize].include?(match_opts[:structural_whitespace])
           text = node_text(node)
           return true if MatchOptions.normalize_text(text).empty?
         end
@@ -184,6 +188,24 @@ diff_children, differences)
           node.respond_to?(:node_type) && node.node_type == :text
       end
+      # Extract text content from a node
+      #
+      # @param node [Object] Node to extract text from
+      # @return [String] Text content
+      def self.node_text(node)
+        return "" unless node
+        if node.respond_to?(:content)
+          node.content.to_s
+        elsif node.respond_to?(:text)
+          node.text.to_s
+        elsif node.respond_to?(:value)
+          node.value.to_s
+        else
+          ""
+        end
+      end
       # Dispatch by Canon::Xml::Node type
       def self.dispatch_canon_node_type(node1, node2, opts, child_opts,
 diff_children, differences)

data/lib/canon/diff/diff_classifier.rb CHANGED Viewed

@@ -1,12 +1,19 @@
 # frozen_string_literal: true
 require_relative "formatting_detector"
+require_relative "xml_serialization_formatter"
 require_relative "../comparison/compare_profile"
+require_relative "../comparison/whitespace_sensitivity"
 module Canon
   module Diff
     # Classifies DiffNodes as normative (affects equivalence) or informative (doesn't affect equivalence)
     # based on the match options in effect
+    #
+    # Classification hierarchy (three distinct kinds of differences):
+    # 1. Serialization formatting: XML syntax differences (always non-normative)
+    # 2. Content formatting: Whitespace differences in content (non-normative when normalized)
+    # 3. Normative: Semantic content differences (affect equivalence)
     class DiffClassifier
       attr_reader :match_options, :profile
@@ -24,15 +31,46 @@ module Canon
       # Classify a single DiffNode as normative or informative
       # Hierarchy: formatting-only < informative < normative
-      # CompareProfile determines base classification, FormattingDetector refines informative differences
+      # CompareProfile determines base classification, XmlSerializationFormatter handles serialization formatting
       # @param diff_node [DiffNode] The diff node to classify
       # @return [DiffNode] The same diff node with normative/formatting attributes set
       def classify(diff_node)
-        # FIRST: Determine if this dimension is normative based on CompareProfile
+        # FIRST: Check for XML serialization-level formatting differences
+        # These are ALWAYS non-normative (formatting-only) regardless of match options
+        # Examples: self-closing tags (<tag/>) vs explicit closing tags (<tag></tag>)
+        if XmlSerializationFormatter.serialization_formatting?(diff_node)
+          diff_node.formatting = true
+          diff_node.normative = false
+          return diff_node
+        end
+        # SECOND: Handle content-level formatting for text_content with :normalize behavior
+        # When text_content is :normalize and the difference is formatting-only,
+        # it should be marked as non-normative (informative)
+        # This ensures that verbose and non-verbose modes give consistent results
+        #
+        # EXCEPTION: If the text node is inside a whitespace-sensitive element
+        # (like <pre>, <code>, <textarea> in HTML), don't apply formatting detection
+        # because whitespace should be preserved in these elements
+        #
+        # This check must come BEFORE normative_dimension? is called,
+        # because normative_dimension? returns true for text_content: :normalize
+        # (since the dimension affects equivalence), which would prevent formatting
+        # detection from being applied.
+        if diff_node.dimension == :text_content &&
+            profile.send(:behavior_for, :text_content) == :normalize &&
+            !inside_whitespace_sensitive_element?(diff_node) &&
+            formatting_only_diff?(diff_node)
+          diff_node.formatting = true
+          diff_node.normative = false
+          return diff_node
+        end
+        # THIRD: Determine if this dimension is normative based on CompareProfile
         # This respects the policy settings (strict/normalize/ignore)
         is_normative = profile.normative_dimension?(diff_node.dimension)
-        # SECOND: Check if FormattingDetector should be consulted
+        # FOURTH: Check if FormattingDetector should be consulted for non-normative dimensions
         # Only check for formatting-only when dimension is NOT normative
         # This ensures strict mode differences remain normative
         should_check_formatting = !is_normative &&
@@ -45,7 +83,7 @@ module Canon
           return diff_node
         end
-        # Otherwise, use the normative determination from CompareProfile
+        # FIFTH: Apply the normative determination from CompareProfile
         diff_node.formatting = false
         diff_node.normative = is_normative
@@ -65,10 +103,59 @@ module Canon
       # @param diff_node [DiffNode] The diff node to check
       # @return [Boolean] true if formatting-only
       def formatting_only_diff?(diff_node)
+        # Only apply formatting detection to actual text content differences
+        # If the nodes are not text nodes (e.g., element nodes), don't apply formatting detection
+        node1 = diff_node.node1
+        node2 = diff_node.node2
+        # Check if both nodes are text nodes
+        # If not, this is not a formatting-only difference
+        return false unless text_node?(node1) && text_node?(node2)
         text1 = extract_text_content(diff_node.node1)
         text2 = extract_text_content(diff_node.node2)
-        FormattingDetector.formatting_only?(text1, text2)
+        # For text_content dimension, use normalized text comparison
+        # This handles cases like "" vs "   " (both normalize to "")
+        if diff_node.dimension == :text_content
+          normalized_equivalent?(text1, text2)
+        else
+          FormattingDetector.formatting_only?(text1, text2)
+        end
+      end
+      # Check if two texts are equivalent after normalization
+      # This detects formatting-only differences where normalized texts match
+      # @param text1 [String, nil] First text
+      # @param text2 [String, nil] Second text
+      # @return [Boolean] true if normalized texts are equivalent
+      def normalized_equivalent?(text1, text2)
+        return false if text1.nil? && text2.nil?
+        return false if text1.nil? || text2.nil?
+        # Use MatchOptions.normalize_text for consistency
+        normalized1 = Canon::Comparison::MatchOptions.normalize_text(text1)
+        normalized2 = Canon::Comparison::MatchOptions.normalize_text(text2)
+        # If normalized texts are equivalent but originals are different,
+        # it's a formatting-only difference
+        normalized1 == normalized2 && text1 != text2
+      end
+      # Check if the text node is inside a whitespace-sensitive element
+      # @param diff_node [DiffNode] The diff node to check
+      # @return [Boolean] true if inside a whitespace-sensitive element
+      def inside_whitespace_sensitive_element?(diff_node)
+        # Get the text node (not the parent element)
+        node = diff_node.node1 || diff_node.node2
+        return false unless node
+        # WhitespaceSensitivity.element_sensitive? expects a text node
+        # and checks its parent element
+        # We need to pass the full options structure with :match_opts key
+        opts = { match_opts: @match_options.options }
+        Canon::Comparison::WhitespaceSensitivity.element_sensitive?(node, opts)
       end
       # Extract text content from a node for formatting comparison
@@ -101,6 +188,33 @@ module Canon
         # If extraction fails, return nil (not formatting-only)
         nil
       end
+      # Check if a node is a text node
+      # @param node [Object] The node to check
+      # @return [Boolean] true if the node is a text node
+      def text_node?(node)
+        return false if node.nil?
+        # Canon::Xml::Nodes::TextNode
+        return true if node.is_a?(Canon::Xml::Nodes::TextNode)
+        # Nokogiri text nodes (node_type returns integer constant like 3)
+        return true if node.respond_to?(:node_type) &&
+          node.node_type.is_a?(Integer) &&
+          node.node_type == Nokogiri::XML::Node::TEXT_NODE
+        # Moxml text nodes (node_type returns symbol)
+        return true if node.respond_to?(:node_type) && node.node_type == :text
+        # String
+        return true if node.is_a?(String)
+        # Test doubles or objects with text node-like interface
+        # Check if it has a value method (contains text content)
+        return true if node.respond_to?(:value)
+        false
+      end
     end
   end
 end

data/lib/canon/diff/formatting_detector.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module Canon
       # @param line2 [String, nil] Second line to compare
       # @return [Boolean] true if lines differ only in formatting
       def self.formatting_only?(line1, line2)
-        # If both are nil or empty, not a formatting diff
+        # If both are nil or empty, not a formatting diff (no difference)
         return false if blank?(line1) && blank?(line2)
         # If only one is blank, it's not just formatting