RubyGems - canon - Versions diffs - 0.2.3 → 0.2.4 - Mend

canon 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

checksums.yaml +4 -4
data/.rubocop_todo.yml +31 -149
data/README.adoc +9 -0
data/docs/advanced/semantic-diff-report.adoc +31 -0
data/docs/features/configuration-profiles.adoc +4 -2
data/docs/features/match-options/html-policies.adoc +2 -0
data/docs/features/match-options/index.adoc +40 -0
data/docs/guides/choosing-configuration.adoc +12 -1
data/docs/reference/cli-options.adoc +3 -0
data/docs/reference/options-across-interfaces.adoc +7 -1
data/docs/understanding/formats/html.adoc +9 -2
data/lib/canon/cli.rb +4 -0
data/lib/canon/commands/diff_command.rb +1 -0
data/lib/canon/comparison/comparison_result.rb +79 -0
data/lib/canon/comparison/html_comparator.rb +92 -11
data/lib/canon/comparison/markup_comparator.rb +19 -0
data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
data/lib/canon/comparison/match_options.rb +23 -2
data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
data/lib/canon/comparison/xml_comparator/child_comparison.rb +6 -0
data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
data/lib/canon/comparison/xml_comparator.rb +80 -4
data/lib/canon/comparison/xml_node_comparison.rb +29 -3
data/lib/canon/comparison.rb +84 -22
data/lib/canon/config/env_schema.rb +2 -1
data/lib/canon/config/profiles/metanorma.yml +3 -0
data/lib/canon/config.rb +51 -5
data/lib/canon/diff/diff_classifier.rb +18 -2
data/lib/canon/diff/diff_line_builder.rb +9 -8
data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +65 -17
data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +17 -0
data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
data/lib/canon/diff_formatter.rb +57 -173
data/lib/canon/html/data_model.rb +10 -4
data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
data/lib/canon/version.rb +1 -1
data/lib/canon/xml/c14n.rb +59 -5
data/lib/canon/xml/element_matcher.rb +3 -0
data/lib/canon/xml/node.rb +8 -1
data/lib/canon/xml/nodes/comment_node.rb +4 -0
data/lib/canon/xml/nodes/element_node.rb +4 -0
data/lib/canon/xml/nodes/text_node.rb +4 -0
data/lib/canon/xml/sax_builder.rb +11 -2
data/lib/canon/xml/xpath_engine.rb +238 -0
metadata +6 -2

data/lib/canon/comparison/html_comparator.rb CHANGED Viewed

@@ -13,6 +13,7 @@ require_relative "../diff/diff_classifier"
 require_relative "strategies/match_strategy_factory"
 require_relative "../html/data_model"
 require_relative "xml_node_comparison"
+require_relative "xml_comparator/diff_node_builder"
 # Whitespace sensitivity module (single source of truth for sensitive elements)
 require_relative "whitespace_sensitivity"
@@ -172,10 +173,42 @@ module Canon
         # @param node2 [Object] Second node
         # @return [Boolean] true if both are document fragments
         def fragment_nodes?(node1, node2)
-          (node1.is_a?(Nokogiri::HTML4::DocumentFragment) ||
-           node1.is_a?(Nokogiri::XML::DocumentFragment)) &&
-            (node2.is_a?(Nokogiri::HTML4::DocumentFragment) ||
-             node2.is_a?(Nokogiri::XML::DocumentFragment))
+          fragment_node?(node1) && fragment_node?(node2)
+        end
+        # Check if a single node is a recognised document fragment.
+        # All three Nokogiri fragment types (XML, HTML4, HTML5) must be
+        # accepted: dom_diff routes html/html4/html5 input through
+        # Nokogiri::HTML5.fragment per #118.
+        def fragment_node?(node)
+          node.is_a?(Nokogiri::XML::DocumentFragment) ||
+            node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
+            node.is_a?(Nokogiri::HTML5::DocumentFragment)
+        end
+        # Record a DiffNode for a fragment-level child-count mismatch.
+        # Each surplus child becomes its own MISSING_NODE diff so the
+        # downstream report shows what was added or removed.
+        def record_fragment_length_mismatch(_node1, _node2, children1,
+                                            children2, differences)
+          longer, shorter, side = if children1.length > children2.length
+                                    [children1, children2, :removed]
+                                  else
+                                    [children2, children1, :added]
+                                  end
+          longer[shorter.length...].each do |orphan|
+            n1 = side == :removed ? orphan : nil
+            n2 = side == :removed ? nil    : orphan
+            differences <<
+              Canon::Comparison::DiffNodeBuilder.build(
+                node1: n1,
+                node2: n2,
+                diff1: Comparison::MISSING_NODE,
+                diff2: Comparison::MISSING_NODE,
+                dimension: :element_structure,
+              )
+          end
         end
         # Compare children of document fragments
@@ -196,6 +229,13 @@ module Canon
           children2 = XmlNodeComparison.filter_children(all_children2, opts)
           if children1.length != children2.length
+            # Record the length mismatch as a DiffNode so verbose mode
+            # surfaces it. Without this, equivalent? wraps an empty
+            # differences array and incorrectly reports the inputs as
+            # equivalent.
+            record_fragment_length_mismatch(node1, node2,
+                                            children1, children2,
+                                            differences)
             return Comparison::UNEQUAL_ELEMENTS
           elsif children1.empty?
             return Comparison::EQUIVALENT
@@ -291,10 +331,12 @@ module Canon
                           node.to_html
                         end
-          # Use XML fragment parser to preserve structure without auto-generated elements
-          # This avoids both HTML4's meta tag insertion and HTML5's tag stripping
-          # See: https://stackoverflow.com/questions/25998824/stop-nokogiri-from-adding-doctype-and-meta-tags
-          frag = Nokogiri::XML.fragment(html_string)
+          # Use XML fragment parser to preserve structure without auto-generated elements.
+          # Decode HTML named entities (&nbsp; etc.) to UTF-8 characters since XML
+          # parser only understands the five XML entities.
+          frag = Nokogiri::XML.fragment(
+            decode_html_named_entities(html_string),
+          )
           # Apply preprocessing if needed
           if preprocessing == :rendered
@@ -448,8 +490,12 @@ module Canon
                         end
           # Parse as Nokogiri fragment for DOM comparison
-          # Use XML fragment parser to avoid auto-inserted meta tags
-          frag = Nokogiri::XML.fragment(html_string)
+          # Use XML fragment parser to avoid auto-inserted meta tags.
+          # Decode HTML named entities (&nbsp; etc.) to UTF-8 characters since
+          # XML parser only understands the five XML entities.
+          frag = Nokogiri::XML.fragment(
+            decode_html_named_entities(html_string),
+          )
           # Apply post-parsing filtering for :normalize, :format, and :rendered preprocessing
           if %i[normalize format rendered].include?(preprocessing)
@@ -496,6 +542,33 @@ module Canon
         # Detect HTML version from content
         #
+        # Decode HTML named entities to their UTF-8 character equivalents.
+        # This is a targeted replacement that only changes entity references,
+        # preserving all tag structure. Needed because Nokogiri::XML.fragment
+        # only understands the five XML entities (&amp; &lt; &gt; &quot; &apos;).
+        #
+        # @param str [String] HTML string possibly containing named entities
+        # @return [String] String with named entities replaced by UTF-8 chars
+        def decode_html_named_entities(str)
+          return str unless str.include?("&")
+          str.gsub(/&nbsp;/i, "\u00A0")
+            .gsub(/&ensp;/i, "\u2002")
+            .gsub(/&emsp;/i, "\u2003")
+            .gsub(/&thinsp;/i, "\u2009")
+            .gsub(/&copy;/i, "\u00A9")
+            .gsub(/&reg;/i, "\u00AE")
+            .gsub(/&trade;/i, "\u2122")
+            .gsub(/&mdash;/i, "\u2014")
+            .gsub(/&ndash;/i, "\u2013")
+            .gsub(/&lsquo;/i, "\u2018")
+            .gsub(/&rsquo;/i, "\u2019")
+            .gsub(/&ldquo;/i, "\u201C")
+            .gsub(/&rdquo;/i, "\u201D")
+            .gsub(/&bull;/i, "\u2022")
+            .gsub(/&hellip;/i, "\u2026")
+        end
         # @param content [String] HTML content
         # @return [Symbol] :html5 or :html4
         def detect_html_version(content)
@@ -721,8 +794,16 @@ compare_profile = nil)
             parent = text_node.parent
             next if ancestor_preserves_whitespace?(parent, preserve_whitespace)
+            content = text_node.content
+            # NBSP (U+00A0) is never insignificant — don't remove
+            next if content.include?("\u00A0")
+            # Whitespace between inline siblings is significant — don't remove
+            next if WhitespaceSensitivity.inline_whitespace_significant?(text_node)
             # Remove if the text is only whitespace (after normalization)
-            if text_node.content.strip.empty?
+            if content.strip.empty?
               text_node.remove
             end
           end

data/lib/canon/comparison/markup_comparator.rb CHANGED Viewed

@@ -182,6 +182,25 @@ module Canon
           return false unless text_node?(node) && node.parent
           return false unless MatchOptions.normalize_text(node_text(node)).empty?
+          # NBSP (U+00A0) is never insignificant whitespace —
+          # it always renders as a visible non-breaking space.
+          # For HTML: always preserve NBSP nodes.
+          # For XML with whitespace_type: :strict: preserve NBSP nodes so
+          # different Unicode whitespace types remain distinguishable.
+          format = opts[:format] || match_opts[:format]
+          whitespace_type = match_opts[:whitespace_type] || :strict
+          if (%i[html html4
+                 html5].include?(format) || whitespace_type == :strict) && WhitespaceSensitivity.contains_nbsp?(node_text(node))
+            return false
+          end
+          if %i[html html4
+                html5].include?(format) && WhitespaceSensitivity.inline_whitespace_significant?(node)
+            # Whitespace between inline element siblings is semantically
+            # significant (renders as a visible gap) and must not be stripped.
+            return false
+          end
           return true unless WhitespaceSensitivity.whitespace_preserved?(
             node.parent, match_opts
           )

data/lib/canon/comparison/match_options/base_resolver.rb CHANGED Viewed

@@ -146,6 +146,7 @@ module Canon
               respect_xml_space
               pretty_printed_expected
               pretty_printed_received
+              whitespace_type
             ]
             match_options.each do |dimension, behavior|

data/lib/canon/comparison/match_options/xml_resolver.rb CHANGED Viewed

@@ -24,6 +24,7 @@ module Canon
             attribute_values: :strict,
             element_position: :ignore,
             comments: :ignore,
+            whitespace_type: :strict,
           },
           xml: {
             preprocessing: :none,
@@ -34,6 +35,7 @@ module Canon
             attribute_values: :strict,
             element_position: :strict,
             comments: :strict,
+            whitespace_type: :strict,
           },
         }.freeze
@@ -51,6 +53,7 @@ module Canon
             attribute_values: :strict,
             element_position: :strict,
             comments: :strict,
+            whitespace_type: :strict,
           },
           # Rendered: Match rendered output (HTML default)
@@ -64,6 +67,7 @@ module Canon
             attribute_values: :strict,
             element_position: :strict,
             comments: :ignore,
+            whitespace_type: :strict,
           },
           # HTML4: Match HTML4 rendered output
@@ -77,6 +81,7 @@ module Canon
             attribute_values: :normalize,
             element_position: :ignore,
             comments: :ignore,
+            whitespace_type: :strict,
           },
           # HTML5: Match HTML5 rendered output (same as rendered)
@@ -89,6 +94,7 @@ module Canon
             attribute_values: :strict,
             element_position: :ignore,
             comments: :ignore,
+            whitespace_type: :strict,
           },
           # Spec-friendly: Formatting doesn't matter
@@ -102,6 +108,7 @@ module Canon
             attribute_values: :normalize,
             element_position: :ignore,
             comments: :ignore,
+            whitespace_type: :strict,
           },
           # Content-only: Only content matters
@@ -114,6 +121,7 @@ module Canon
             attribute_values: :normalize,
             element_position: :ignore,
             comments: :ignore,
+            whitespace_type: :strict,
           },
         }.freeze

data/lib/canon/comparison/match_options.rb CHANGED Viewed

@@ -69,13 +69,18 @@ module Canon
         # @param text1 [String] First text
         # @param text2 [String] Second text
         # @param behavior [Symbol] Match behavior (:strict, :normalize, :ignore)
+        # @param whitespace_type [Symbol] Whitespace type handling (:strict, :normalize)
         # @return [Boolean] true if texts match according to behavior
-        def match_text?(text1, text2, behavior)
+        def match_text?(text1, text2, behavior, whitespace_type: :strict)
           case behavior
           when :strict
             text1 == text2
           when :normalize
-            normalize_text(text1) == normalize_text(text2)
+            if whitespace_type == :normalize
+              normalize_text(text1) == normalize_text(text2)
+            else
+              normalize_text_preserving_type(text1) == normalize_text_preserving_type(text2)
+            end
           when :ignore
             true
           else
@@ -101,6 +106,22 @@ module Canon
             .strip # Remove leading/trailing whitespace
         end
+        # Normalize text preserving Unicode whitespace type distinctions.
+        #
+        # Only ASCII whitespace (space, tab, newline, etc.) is collapsed.
+        # Unicode whitespace (NBSP, ideographic space, etc.) is preserved,
+        # so different whitespace types remain distinguishable.
+        #
+        # @param text [String] Text to normalize
+        # @return [String] Normalized text with preserved whitespace types
+        def normalize_text_preserving_type(text)
+          return "" if text.nil?
+          text.to_s
+            .gsub(/[ \t\r\n\f\v]+/, " ") # Collapse only ASCII whitespace
+            .strip
+        end
         # Process attribute value according to match behavior
         #
         # @param value [String] Attribute value to process

data/lib/canon/comparison/whitespace_sensitivity.rb CHANGED Viewed

@@ -50,6 +50,15 @@ module Canon
       # HTML elements where every whitespace character is significant.
       HTML_PRESERVE_ELEMENTS = %w[pre code textarea script style].freeze
+      # HTML inline elements — whitespace between these is semantically
+      # significant (renders as a visible space).  Whitespace-only text
+      # nodes that sit between two inline siblings must not be stripped.
+      INLINE_ELEMENTS = %w[
+        a abbr acronym b bdo big br button cite code dfn em i img input kbd
+        label map object output q s samp select small span strong sub sup
+        time tt u var wbr
+      ].freeze
       class << self
         # Classify the whitespace behaviour for an element using ancestor walk.
         #
@@ -213,6 +222,69 @@ module Canon
             .include?(element_name.to_sym)
         end
+        # Check if a whitespace-only text node sits between two inline element
+        # siblings, making the whitespace semantically significant.
+        #
+        # In HTML rendering, a space between <span>A</span> <span>B</span>
+        # produces visible output.  Stripping such nodes produces false
+        # equivalence.
+        #
+        # Works with any parent type (element, DocumentFragment, RootNode)
+        # since the check is about sibling context, not parent type.
+        #
+        # @param text_node [Object] Text node (Nokogiri or Canon::Xml::Node)
+        # @return [Boolean] true if whitespace is between inline siblings
+        def inline_whitespace_significant?(text_node)
+          return false unless text_node.respond_to?(:parent)
+          parent = text_node.parent
+          return false unless parent
+          return false unless parent.respond_to?(:children)
+          siblings = parent.children
+          idx = siblings.index(text_node)
+          return false unless idx
+          # Look at the IMMEDIATE non-whitespace-text neighbour on each
+          # side. Whitespace at a block boundary is collapsed per CSS,
+          # so both immediate neighbours must be inline for the
+          # whitespace to be significant. Walking all siblings (the
+          # earlier behaviour) misclassified whitespace at a block
+          # boundary as significant whenever any inline element existed
+          # elsewhere among the siblings.
+          prev_neighbour = nearest_non_whitespace_sibling(siblings, idx, -1)
+          next_neighbour = nearest_non_whitespace_sibling(siblings, idx,  1)
+          inline_element?(prev_neighbour) && inline_element?(next_neighbour)
+        end
+        # Walk outward from +idx+ in +direction+ (+1 forward, -1 back),
+        # skipping whitespace-only text nodes, and return the first
+        # non-whitespace sibling found.  Returns nil if none.
+        def nearest_non_whitespace_sibling(siblings, idx, direction)
+          i = idx + direction
+          while i >= 0 && i < siblings.length
+            s = siblings[i]
+            unless s.respond_to?(:text?) && s.text? &&
+                s.respond_to?(:content) && s.content.to_s.strip.empty?
+              return s
+            end
+            i += direction
+          end
+          nil
+        end
+        # Check if text content contains a non-breaking space (U+00A0).
+        # NBSP is NOT collapsible whitespace in HTML — it always renders as
+        # a visible space and must never be stripped.
+        #
+        # @param text [String] Text content to check
+        # @return [Boolean] true if text contains U+00A0
+        def contains_nbsp?(text)
+          text.to_s.include?("\u00A0")
+        end
         private
         # Build the Set of preserve whitespace element names (strings).
@@ -336,6 +408,30 @@ module Canon
           # Nokogiri compatibility
           parent.respond_to?(:node_type) && parent.node_type == :element
         end
+        # Get the parent element of a text node, or nil.
+        # Works with both Nokogiri and Canon::Xml::Node types.
+        def parent_element_of(text_node)
+          return nil unless text_node.respond_to?(:parent)
+          parent = text_node.parent
+          return nil unless parent
+          if parent.is_a?(Canon::Xml::Nodes::ElementNode)
+            parent
+          elsif parent.respond_to?(:element?) && parent.element?
+            parent
+          elsif parent.respond_to?(:node_type) && parent.node_type == :element
+            parent
+          end
+        end
+        # Check if a node is an HTML inline element.
+        def inline_element?(node)
+          return false unless node.respond_to?(:name)
+          INLINE_ELEMENTS.include?(node.name.to_s.downcase)
+        end
       end
     end
   end

data/lib/canon/comparison/xml_comparator/child_comparison.rb CHANGED Viewed

@@ -28,6 +28,9 @@ module Canon
           # @return [Integer] Comparison result code
           def compare(node1, node2, comparator, opts, child_opts,
 diff_children, differences)
+            # FAST PATH: Object identity - same object means equivalent children
+            return Comparison::EQUIVALENT if node1.equal?(node2)
             # Apply side-specific pretty-print heuristic when either flag is set:
             # pretty_printed_expected → drop \n-starting whitespace nodes from node1
             # pretty_printed_received → drop \n-starting whitespace nodes from node2
@@ -43,6 +46,9 @@ diff_children, differences)
             # Quick check: if both have no children, they're equivalent
             return Comparison::EQUIVALENT if children1.empty? && children2.empty?
+            # FAST PATH: Identical children arrays mean equivalent subtrees
+            return Comparison::EQUIVALENT if children1.equal?(children2)
             # Check if we can use ElementMatcher (requires Canon::Xml::DataModel nodes)
             if can_use_element_matcher?(children1, children2)
               use_element_matcher_comparison(children1, children2, node1, comparator,

data/lib/canon/comparison/xml_comparator/node_parser.rb CHANGED Viewed

@@ -14,15 +14,18 @@ module Canon
         # @param node [String, Object] Node to parse
         # @param preprocessing [Symbol] Preprocessing mode (:none, :normalize, :c14n, :format)
         # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
+        # @param parser [Symbol] Parser backend (:sax or :dom, default from config)
         # @return [Canon::Xml::Node] Parsed node
-        def self.parse(node, preprocessing = :none, preserve_whitespace: false)
+        def self.parse(node, preprocessing = :none, preserve_whitespace: false,
+                       parser: nil)
           # If already a Canon::Xml::Node, return as-is
           return node if node.is_a?(Canon::Xml::Node)
           # If it's a Nokogiri or Moxml node, convert to DataModel
           unless node.is_a?(String)
             return convert_from_node(node,
-                                     preserve_whitespace: preserve_whitespace)
+                                     preserve_whitespace: preserve_whitespace,
+                                     parser: parser)
           end
           # Normalize encoding before preprocessing (UTF-16 strings can't use strip, etc.)
@@ -31,9 +34,17 @@ module Canon
           # Apply preprocessing to XML string before parsing
           xml_string = apply_preprocessing(node, preprocessing).strip
-          # Use Canon::Xml::DataModel for parsing to get Canon::Xml::Node instances
-          Canon::Xml::DataModel.from_xml(xml_string,
+          # Select parser backend
+          resolved_parser = parser || resolve_parser_config
+          if resolved_parser == :sax
+            require_relative "../../xml/sax_builder"
+            Canon::Xml::SaxBuilder.parse(xml_string,
                                          preserve_whitespace: preserve_whitespace)
+          else
+            Canon::Xml::DataModel.from_xml(xml_string,
+                                           preserve_whitespace: preserve_whitespace)
+          end
         end
         # Apply preprocessing transformation to XML string
@@ -62,9 +73,18 @@ module Canon
         #
         # @param node [Object] Nokogiri or Moxml node
         # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
+        # @param parser [Symbol, nil] Parser backend override
         # @return [Canon::Xml::Node] Converted node
-        def self.convert_from_node(node, preserve_whitespace: false)
-          # Convert to XML string then parse through DataModel
+        def self.convert_from_node(node, preserve_whitespace: false,
+parser: nil)
+          # FAST PATH: Convert Nokogiri/Moxml nodes directly without string round-trip
+          if defined?(Nokogiri::XML::Node) && node.is_a?(Nokogiri::XML::Node)
+            return Canon::Xml::DataModel.build_from_nokogiri(
+              node, preserve_whitespace: preserve_whitespace
+            )
+          end
+          # SLOW PATH: Fallback to string serialization for unknown node types
           xml_str = if node.respond_to?(:to_xml)
                       node.to_xml
                     elsif node.respond_to?(:to_s)
@@ -73,8 +93,26 @@ module Canon
                       raise Canon::Error,
                             "Unable to convert node to string: #{node.class}"
                     end
-          Canon::Xml::DataModel.from_xml(xml_str,
+          resolved_parser = parser || resolve_parser_config
+          if resolved_parser == :sax
+            require_relative "../../xml/sax_builder"
+            Canon::Xml::SaxBuilder.parse(xml_str,
                                          preserve_whitespace: preserve_whitespace)
+          else
+            Canon::Xml::DataModel.from_xml(xml_str,
+                                           preserve_whitespace: preserve_whitespace)
+          end
+        end
+        # Resolve parser config from global config
+        #
+        # @return [Symbol] :sax or :dom
+        def self.resolve_parser_config
+          Canon::Config.instance.xml.diff.parser
+        rescue StandardError
+          :sax
         end
       end
     end

data/lib/canon/comparison/xml_comparator.rb CHANGED Viewed

@@ -63,6 +63,18 @@ module Canon
         # @return [Boolean, Array] true if equivalent, or array of diffs if
         #   verbose
         def equivalent?(n1, n2, opts = {}, child_opts = {})
+          # FAST PATH: Object identity - same object is always equivalent
+          # Skip when semantic_diff is requested (caller needs tree diff metadata)
+          if n1.equal?(n2) && !opts.dig(:match, :semantic_diff)
+            return build_trivial_equivalent_result(n1, n2, opts)
+          end
+          # FAST PATH: String content equality - identical strings are equivalent
+          # Skip in verbose mode since caller may need full metadata (e.g. tree_diff statistics)
+          if !opts[:verbose] && n1.is_a?(String) && n2.is_a?(String) && n1 == n2
+            return true
+          end
           opts = DEFAULT_OPTS.merge(opts)
           # Resolve match options with format-specific defaults
@@ -92,8 +104,15 @@ module Canon
           # Create child_opts with resolved options
           child_opts = opts.merge(child_opts)
-          # Determine if we should preserve whitespace during parsing
-          # When structural_whitespace is :strict, preserve all whitespace-only text nodes
+          # Determine if we should preserve whitespace during parsing.
+          # Only structural_whitespace: :strict forces whitespace-only text
+          # nodes to survive parsing.  whitespace_type is about distinguishing
+          # Unicode whitespace *types* in surviving text-node content, and
+          # does NOT require indent text nodes to be kept — libxml's NOBLANKS
+          # only strips pure-ASCII whitespace-only nodes, so NBSP-only nodes
+          # survive regardless.  Coupling whitespace_type: :strict to
+          # parsing-time preservation made pretty-printed fixtures produce
+          # spurious element-position diffs (issue #112).
           preserve_whitespace = match_opts_hash[:structural_whitespace] == :strict
           # Parse nodes if they are strings, applying preprocessing if needed
@@ -218,8 +237,57 @@ module Canon
                                                  preserve_whitespace: preserve_whitespace)
         end
+        # Build result for trivially equivalent inputs (same object or identical strings)
+        #
+        # Returns plain `true` in non-verbose mode, or a ComparisonResult in verbose mode.
+        #
+        # @param n1 [Object] First input
+        # @param n2 [Object] Second input
+        # @param opts [Hash] Raw options (before merge with DEFAULT_OPTS)
+        # @return [Boolean, ComparisonResult]
+        def build_trivial_equivalent_result(n1, n2, opts)
+          return true unless opts[:verbose]
+          # Parse nodes for verbose display
+          preserve_whitespace = true
+          node1 = parse_node(n1, :none,
+                             preserve_whitespace: preserve_whitespace)
+          node2 = parse_node(n2, :none,
+                             preserve_whitespace: preserve_whitespace)
+          preprocessed = [
+            serialize_node(node1).gsub("><", ">\n<"),
+            serialize_node(node2).gsub("><", ">\n<"),
+          ]
+          original1 = if n1.is_a?(String)
+                        n1
+                      elsif n1.respond_to?(:to_xml)
+                        n1.to_xml
+                      else
+                        n1.to_s
+                      end
+          original2 = if n2.is_a?(String)
+                        n2
+                      elsif n2.respond_to?(:to_xml)
+                        n2.to_xml
+                      else
+                        n2.to_s
+                      end
+          ComparisonResult.new(
+            differences: [],
+            preprocessed_strings: preprocessed,
+            original_strings: [original1, original2],
+            format: :xml,
+            match_options: {},
+            algorithm: :dom,
+          )
+        end
         # Main comparison dispatcher
         def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
+          # FAST PATH: Object identity - same object is always equivalent
+          return Comparison::EQUIVALENT if n1.equal?(n2)
           # Handle DocumentFragment nodes - compare their children instead
           if n1.is_a?(Nokogiri::XML::DocumentFragment) &&
               n2.is_a?(Nokogiri::XML::DocumentFragment)
@@ -380,8 +448,10 @@ module Canon
           raw_differs = text1 != text2
           # Check if matches according to behavior
+          whitespace_type = match_opts[:whitespace_type] || :strict
           matches_per_behavior = MatchOptions.match_text?(text1, text2,
-                                                          behavior)
+                                                          behavior,
+                                                          whitespace_type: whitespace_type)
           # Determine the correct dimension for this difference
           # - If text_content is :strict, ALL differences use :text_content dimension
@@ -599,7 +669,13 @@ differences)
                         end
               return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
             elsif node.respond_to?(:name) && !node.respond_to?(:namespace_uri)
-              return "element missing: #{node}"
+              # TextNode and other nodes without namespace_uri
+              display = if node.respond_to?(:value) && node.node_type == :text
+                          "\"#{truncate_text(node.value)}\""
+                        else
+                          node.name.to_s
+                        end
+              return "element missing: #{display}"
             end
           end