RubyGems - canon - Versions diffs - 0.1.7 → 0.1.8 - Mend

canon 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

checksums.yaml +4 -4
data/.rubocop_todo.yml +25 -135
data/README.adoc +13 -13
data/docs/.lycheeignore +69 -0
data/docs/advanced/extending-canon.adoc +193 -0
data/docs/internals/diffnode-enrichment.adoc +611 -0
data/docs/internals/index.adoc +251 -0
data/docs/lychee.toml +13 -6
data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +250 -0
data/docs/understanding/architecture.adoc +749 -33
data/docs/understanding/comparison-pipeline.adoc +122 -0
data/false_positive_analysis.txt +0 -0
data/file1.html +1 -0
data/file2.html +1 -0
data/lib/canon/cache.rb +129 -0
data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
data/lib/canon/comparison/dimensions/registry.rb +77 -0
data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
data/lib/canon/comparison/dimensions.rb +54 -0
data/lib/canon/comparison/format_detector.rb +86 -0
data/lib/canon/comparison/html_comparator.rb +51 -18
data/lib/canon/comparison/html_parser.rb +80 -0
data/lib/canon/comparison/json_comparator.rb +12 -0
data/lib/canon/comparison/json_parser.rb +19 -0
data/lib/canon/comparison/markup_comparator.rb +293 -0
data/lib/canon/comparison/match_options/base_resolver.rb +143 -0
data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
data/lib/canon/comparison/match_options.rb +68 -463
data/lib/canon/comparison/profile_definition.rb +149 -0
data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
data/lib/canon/comparison/xml_comparator/child_comparison.rb +189 -0
data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
data/lib/canon/comparison/xml_comparator/node_parser.rb +74 -0
data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +95 -0
data/lib/canon/comparison/xml_comparator.rb +52 -664
data/lib/canon/comparison/xml_node_comparison.rb +297 -0
data/lib/canon/comparison/xml_parser.rb +19 -0
data/lib/canon/comparison/yaml_comparator.rb +3 -3
data/lib/canon/comparison.rb +265 -110
data/lib/canon/diff/diff_node.rb +32 -2
data/lib/canon/diff/node_serializer.rb +191 -0
data/lib/canon/diff/path_builder.rb +143 -0
data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
data/lib/canon/diff_formatter.rb +1 -1
data/lib/canon/rspec_matchers.rb +1 -1
data/lib/canon/tree_diff/operation_converter.rb +92 -338
data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
data/lib/canon/version.rb +1 -1
data/old-docs/ADVANCED_TOPICS.adoc +20 -0
data/old-docs/BASIC_USAGE.adoc +16 -0
data/old-docs/CHARACTER_VISUALIZATION.adoc +567 -0
data/old-docs/CLI.adoc +497 -0
data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
data/old-docs/DIFF_ARCHITECTURE.adoc +435 -0
data/old-docs/DIFF_FORMATTING.adoc +540 -0
data/old-docs/DIFF_PARAMETERS.adoc +261 -0
data/old-docs/DOM_DIFF.adoc +1017 -0
data/old-docs/ENV_CONFIG.adoc +876 -0
data/old-docs/FORMATS.adoc +867 -0
data/old-docs/INPUT_VALIDATION.adoc +477 -0
data/old-docs/MATCHER_BEHAVIOR.adoc +90 -0
data/old-docs/MATCH_ARCHITECTURE.adoc +463 -0
data/old-docs/MATCH_OPTIONS.adoc +912 -0
data/old-docs/MODES.adoc +432 -0
data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
data/old-docs/OPTIONS.adoc +1387 -0
data/old-docs/PREPROCESSING.adoc +491 -0
data/old-docs/README.old.adoc +2831 -0
data/old-docs/RSPEC.adoc +814 -0
data/old-docs/RUBY_API.adoc +485 -0
data/old-docs/SEMANTIC_DIFF_REPORT.adoc +646 -0
data/old-docs/SEMANTIC_TREE_DIFF.adoc +765 -0
data/old-docs/STRING_COMPARE.adoc +345 -0
data/old-docs/TMP.adoc +3384 -0
data/old-docs/TREE_DIFF.adoc +1080 -0
data/old-docs/UNDERSTANDING_CANON.adoc +17 -0
data/old-docs/VERBOSE.adoc +482 -0
data/old-docs/VISUALIZATION_MAP.adoc +625 -0
data/old-docs/WHITESPACE_TREATMENT.adoc +1155 -0
data/scripts/analyze_current_state.rb +85 -0
data/scripts/analyze_false_positives.rb +114 -0
data/scripts/analyze_remaining_failures.rb +105 -0
data/scripts/compare_current_failures.rb +95 -0
data/scripts/compare_dom_tree_diff.rb +158 -0
data/scripts/compare_failures.rb +151 -0
data/scripts/debug_attribute_extraction.rb +66 -0
data/scripts/debug_blocks_839.rb +115 -0
data/scripts/debug_meta_matching.rb +52 -0
data/scripts/debug_p_matching.rb +192 -0
data/scripts/debug_signature_matching.rb +118 -0
data/scripts/debug_sourcecode_124.rb +32 -0
data/scripts/debug_whitespace_sensitive.rb +192 -0
data/scripts/extract_false_positives.rb +138 -0
data/scripts/find_actual_false_positives.rb +125 -0
data/scripts/investigate_all_false_positives.rb +161 -0
data/scripts/investigate_batch1.rb +127 -0
data/scripts/investigate_classification.rb +150 -0
data/scripts/investigate_classification_detailed.rb +190 -0
data/scripts/investigate_common_failures.rb +342 -0
data/scripts/investigate_false_negative.rb +80 -0
data/scripts/investigate_false_positive.rb +83 -0
data/scripts/investigate_false_positives.rb +227 -0
data/scripts/investigate_false_positives_batch.rb +163 -0
data/scripts/investigate_mixed_content.rb +125 -0
data/scripts/investigate_remaining_16.rb +214 -0
data/scripts/run_single_test.rb +29 -0
data/scripts/test_all_false_positives.rb +95 -0
data/scripts/test_attribute_details.rb +61 -0
data/scripts/test_both_algorithms.rb +49 -0
data/scripts/test_both_simple.rb +49 -0
data/scripts/test_enhanced_semantic_output.rb +125 -0
data/scripts/test_readme_examples.rb +131 -0
data/scripts/test_semantic_tree_diff.rb +99 -0
data/scripts/test_semantic_ux_improvements.rb +135 -0
data/scripts/test_single_false_positive.rb +119 -0
data/scripts/test_size_limits.rb +99 -0
data/test_html_1.html +21 -0
data/test_html_2.html +21 -0
data/test_nokogiri.rb +33 -0
data/test_normalize.rb +45 -0
metadata +123 -2

data/lib/canon/comparison/html_comparator.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 require "nokogiri"
 require_relative "../comparison" # Load base module with constants first
+require_relative "markup_comparator"
 require_relative "xml_comparator"
 require_relative "match_options"
 require_relative "comparison_result"
@@ -11,12 +12,15 @@ require_relative "../diff/diff_node"
 require_relative "../diff/diff_classifier"
 require_relative "strategies/match_strategy_factory"
 require_relative "../html/data_model"
+require_relative "xml_node_comparison"
 module Canon
   module Comparison
     # HTML comparison class
     # Handles comparison of HTML nodes with various options
-    class HtmlComparator
+    #
+    # Inherits shared comparison functionality from MarkupComparator.
+    class HtmlComparator < MarkupComparator
       # Default comparison options for HTML
       DEFAULT_OPTS = {
         # Structural filtering options
@@ -108,6 +112,9 @@ module Canon
           # DocumentFragment nodes need special handling - compare their children
           # instead of the fragment nodes themselves
+          # This is a SAFETY CHECK for legacy cases where Nokogiri nodes might still be used
+          # The main path (parse_node) now returns Canon::Xml::Nodes::RootNode, so this
+          # check should rarely trigger, but we keep it for robustness
           if (node1.is_a?(Nokogiri::HTML4::DocumentFragment) ||
               node1.is_a?(Nokogiri::XML::DocumentFragment)) &&
               (node2.is_a?(Nokogiri::HTML4::DocumentFragment) ||
@@ -117,10 +124,8 @@ module Canon
             all_children2 = node2.children.to_a
             # Filter children based on match options (e.g., ignore comments)
-            children1 = XmlComparator.send(:filter_children, all_children1,
-                                           opts)
-            children2 = XmlComparator.send(:filter_children, all_children2,
-                                           opts)
+            children1 = XmlNodeComparison.filter_children(all_children1, opts)
+            children2 = XmlNodeComparison.filter_children(all_children2, opts)
             if children1.length != children2.length
               result = Comparison::UNEQUAL_ELEMENTS
@@ -130,9 +135,10 @@ module Canon
               # Compare each pair of children
               result = Comparison::EQUIVALENT
               children1.zip(children2).each do |child1, child2|
-                child_result = XmlComparator.send(:compare_nodes, child1, child2,
-                                                  opts, child_opts, diff_children,
-                                                  differences)
+                child_result = XmlNodeComparison.compare_nodes(child1, child2,
+                                                               opts, child_opts,
+                                                               diff_children,
+                                                               differences)
                 if child_result != Comparison::EQUIVALENT
                   result = child_result
                   break
@@ -140,8 +146,9 @@ module Canon
               end
             end
           else
-            result = XmlComparator.send(:compare_nodes, node1, node2, opts,
-                                        child_opts, diff_children, differences)
+            result = XmlNodeComparison.compare_nodes(node1, node2, opts,
+                                                     child_opts, diff_children,
+                                                     differences)
           end
           # Classify DiffNodes as normative/informative if we have verbose output
@@ -287,7 +294,16 @@ module Canon
                         end
           # Strip DOCTYPE for consistent parsing
-          html_string = html_string.gsub(/<!DOCTYPE[^>]*>/i, "").strip
+          # Use non-regex approach to avoid ReDoS vulnerability
+          # DOCTYPE declarations end with first > character
+          doctype_start = html_string =~ /<!DOCTYPE/i
+          if doctype_start
+            doctype_end = html_string.index(">", doctype_start)
+            html_string = html_string[0...doctype_start] + html_string[(doctype_end + 1)..] if doctype_end
+            html_string.strip!
+          else
+            html_string = html_string.strip
+          end
           # Apply preprocessing to HTML string before parsing
           processed_html = case preprocessing
@@ -313,8 +329,15 @@ module Canon
         # Parse a node from string or return as-is
         # Applies preprocessing transformation before parsing if specified
-        # For DOM comparison, returns Nokogiri nodes (not Canon::Xml::Node)
+        # Returns Nokogiri nodes for DOM comparison (preserves original behavior)
         def parse_node(node, preprocessing = :none, match_opts = {})
+          # If already a Canon::Xml::Node, convert to Nokogiri for DOM path
+          if node.is_a?(Canon::Xml::Node)
+            # Canon nodes used in semantic diff path, convert to Nokogiri for DOM path
+            xml_str = Canon::Xml::DataModel.serialize(node)
+            node = xml_str
+          end
           # If already a Nokogiri node, check for incompatible XML documents
           unless node.is_a?(String)
             # Detect if this is an XML document (not HTML)
@@ -357,7 +380,15 @@ module Canon
           # Strip DOCTYPE declarations from HTML strings
           # This normalizes parsed HTML (which includes DOCTYPE) with raw HTML strings
-          node = node.gsub(/<!DOCTYPE[^>]*>/i, "").strip
+          # Use non-regex approach to avoid ReDoS vulnerability
+          doctype_start = node =~ /<!DOCTYPE/i
+          if doctype_start
+            doctype_end = node.index(">", doctype_start)
+            node = node[0...doctype_start] + node[(doctype_end + 1)..] if doctype_end
+            node.strip!
+          else
+            node = node.strip
+          end
           # Apply preprocessing to HTML string before parsing
           html_string = case preprocessing
@@ -380,10 +411,12 @@ module Canon
           # Use XML fragment parser to avoid auto-inserted meta tags
           frag = Nokogiri::XML.fragment(html_string)
-          # Apply :rendered preprocessing if needed
-          if preprocessing == :rendered
+          # Apply post-parsing filtering for :normalize, :format, and :rendered preprocessing
+          if %i[normalize format rendered].include?(preprocessing)
             normalize_html_style_script_comments(frag)
-            normalize_rendered_whitespace(frag, match_opts)
+            if preprocessing == :rendered
+              normalize_rendered_whitespace(frag, match_opts)
+            end
             remove_whitespace_only_text_nodes(frag)
           end
@@ -461,9 +494,9 @@ module Canon
         # @param node [Canon::Xml::Node, Nokogiri::HTML::Document] Parsed node
         # @return [String] Serialized HTML string
         def serialize_for_display(node)
-          # Use XmlComparator's serializer for Canon::Xml::Node
+          # Use XmlNodeComparison's serializer for Canon::Xml::Node
           if node.is_a?(Canon::Xml::Node)
-            XmlComparator.send(:serialize_node_to_xml, node)
+            XmlNodeComparison.serialize_node_to_xml(node)
           elsif node.respond_to?(:to_html)
             node.to_html
           elsif node.respond_to?(:to_xml)

data/lib/canon/comparison/html_parser.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+require "nokogiri"
+module Canon
+  module Comparison
+    # HTML parsing service with version detection and fragment support
+    #
+    # Provides HTML parsing capabilities with automatic HTML4/HTML5 version
+    # detection. Handles both full documents and fragments.
+    #
+    # @example Parse HTML string
+    #   HtmlParser.parse("<div>content</div>", :html5)
+    #
+    # @example Auto-detect and parse
+    #   HtmlParser.detect_and_parse("<!DOCTYPE html><html>...</html>")
+    class HtmlParser
+      class << self
+        # Parse HTML string into Nokogiri document with the correct parser
+        #
+        # @param content [String, Object] Content to parse (returns as-is if not a string)
+        # @param format [Symbol] HTML format (:html, :html4, :html5)
+        # @return [Nokogiri::HTML::Document, Nokogiri::HTML5::Document, Nokogiri::HTML::DocumentFragment, Object]
+        def parse(content, format)
+          return content unless content.is_a?(String)
+          return content if already_parsed?(content)
+          begin
+            case format
+            when :html5
+              Nokogiri::HTML5.fragment(content)
+            when :html4
+              Nokogiri::HTML4.fragment(content)
+            when :html
+              detect_and_parse(content)
+            else
+              content
+            end
+          rescue StandardError
+            # Fallback to raw string if parsing fails (maintains backward compatibility)
+            content
+          end
+        end
+        # Check if content is already a parsed HTML document/fragment
+        #
+        # @param content [Object] Content to check
+        # @return [Boolean] true if already parsed
+        def already_parsed?(content)
+          content.is_a?(Nokogiri::HTML::Document) ||
+            content.is_a?(Nokogiri::HTML5::Document) ||
+            content.is_a?(Nokogiri::HTML::DocumentFragment) ||
+            content.is_a?(Nokogiri::HTML5::DocumentFragment)
+        end
+        # Detect HTML version from content and parse with appropriate parser
+        #
+        # @param content [String] HTML content to parse
+        # @return [Nokogiri::HTML::DocumentFragment] Parsed fragment
+        def detect_and_parse(content)
+          version = detect_version(content)
+          if version == :html5
+            Nokogiri::HTML5.fragment(content)
+          else
+            Nokogiri::HTML4.fragment(content)
+          end
+        end
+        # Detect HTML version from content string
+        #
+        # @param content [String] HTML content
+        # @return [Symbol] :html5 or :html4
+        def detect_version(content)
+          # Check for HTML5 DOCTYPE (case-insensitive)
+          content.include?("<!DOCTYPE html>") ? :html5 : :html4
+        end
+      end
+    end
+  end
+end

data/lib/canon/comparison/json_comparator.rb CHANGED Viewed

@@ -125,6 +125,18 @@ module Canon
           if match_opts[:key_order] != :strict
             keys1 = keys1.sort_by(&:to_s)
             keys2 = keys2.sort_by(&:to_s)
+          elsif keys1 != keys2
+            # Strict mode: key order matters
+            # Check if keys are in same order
+            # Keys are different or in different order
+            # First check if it's just ordering (same keys, different order)
+            if keys1.sort_by(&:to_s) == keys2.sort_by(&:to_s)
+              # Same keys, different order - this is a key_order difference
+              key_path = path.empty? ? "(key order)" : "#{path}.(key order)"
+              add_ruby_difference(key_path, keys1, keys2,
+                                  Comparison::UNEQUAL_HASH_KEY_ORDER, opts, differences)
+              return Comparison::UNEQUAL_HASH_KEY_ORDER
+            end
           end
           # Check for missing keys

data/lib/canon/comparison/json_parser.rb ADDED Viewed

@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+module Canon
+  module Comparison
+    # Public API for JSON parsing operations
+    # Provides access to parsing functionality without using send()
+    class JsonParser
+      # Parse an object to Ruby object
+      #
+      # @param obj [String, Hash, Array] Object to parse
+      # @return [Hash, Array] Parsed Ruby object
+      def self.parse_json(obj)
+        # Delegate to JsonComparator's private method via public API
+        require_relative "json_comparator"
+        JsonComparator.parse_json(obj)
+      end
+    end
+  end
+end

data/lib/canon/comparison/markup_comparator.rb ADDED Viewed

@@ -0,0 +1,293 @@
+# frozen_string_literal: true
+require_relative "../comparison" # Load base module with constants
+require_relative "../diff/diff_node"
+require_relative "../diff/path_builder"
+module Canon
+  module Comparison
+    # Base class for markup document comparison (XML, HTML)
+    #
+    # Provides shared comparison functionality for markup documents,
+    # including node type checking, text extraction, filtering,
+    # and difference creation.
+    #
+    # Format-specific comparators (XmlComparator, HtmlComparator)
+    # inherit from this class and add format-specific behavior.
+    class MarkupComparator
+      class << self
+        # Add a difference to the differences array
+        #
+        # Creates a DiffNode with enriched metadata including path,
+        # serialized content, and attributes for Stage 4 rendering.
+        #
+        # @param node1 [Object, nil] First node
+        # @param node2 [Object, nil] Second node
+        # @param diff1 [Symbol] Difference type for node1
+        # @param diff2 [Symbol] Difference type for node2
+        # @param dimension [Symbol] The match dimension causing this difference
+        # @param _opts [Hash] Options (unused but kept for interface compatibility)
+        # @param differences [Array] Array to append difference to
+        def add_difference(node1, node2, diff1, diff2, dimension, _opts,
+                           differences)
+          # All differences must be DiffNode objects (OO architecture)
+          if dimension.nil?
+            raise ArgumentError,
+                  "dimension required for DiffNode"
+          end
+          # Build informative reason message
+          reason = build_difference_reason(node1, node2, diff1, diff2,
+                                           dimension)
+          # Enrich with path, serialized content, and attributes for Stage 4 rendering
+          metadata = enrich_diff_metadata(node1, node2)
+          diff_node = Canon::Diff::DiffNode.new(
+            node1: node1,
+            node2: node2,
+            dimension: dimension,
+            reason: reason,
+            **metadata,
+          )
+          differences << diff_node
+        end
+        # Enrich DiffNode with canonical path, serialized content, and attributes
+        # This extracts presentation-ready metadata from nodes for Stage 4 rendering
+        #
+        # @param node1 [Object, nil] First node
+        # @param node2 [Object, nil] Second node
+        # @return [Hash] Enriched metadata hash
+        def enrich_diff_metadata(node1, node2)
+          {
+            path: build_path_for_node(node1 || node2),
+            serialized_before: serialize_node(node1),
+            serialized_after: serialize_node(node2),
+            attributes_before: extract_attributes(node1),
+            attributes_after: extract_attributes(node2),
+          }
+        end
+        # Build canonical path for a node
+        #
+        # @param node [Object] Node to build path for
+        # @return [String, nil] Canonical path with ordinal indices
+        def build_path_for_node(node)
+          return nil if node.nil?
+          Canon::Diff::PathBuilder.build(node, format: :document)
+        end
+        # Serialize a node to string for display
+        #
+        # @param node [Object, nil] Node to serialize
+        # @return [String, nil] Serialized content
+        def serialize_node(node)
+          return nil if node.nil?
+          # Canon::Xml::Node types
+          if node.is_a?(Canon::Xml::Nodes::RootNode)
+            # Serialize all children of root
+            node.children.map { |child| serialize_node(child) }.join
+          elsif node.is_a?(Canon::Xml::Nodes::ElementNode)
+            serialize_element_node(node)
+          elsif node.is_a?(Canon::Xml::Nodes::TextNode)
+            node.value
+          elsif node.is_a?(Canon::Xml::Nodes::CommentNode)
+            "<!--#{node.value}-->"
+          elsif node.is_a?(Canon::Xml::Nodes::ProcessingInstructionNode)
+            "<?#{node.target} #{node.data}?>"
+          elsif node.respond_to?(:to_xml)
+            node.to_xml
+          elsif node.respond_to?(:to_html)
+            node.to_html
+          else
+            node.to_s
+          end
+        end
+        # Extract attributes from a node
+        #
+        # @param node [Object, nil] Node to extract attributes from
+        # @return [Hash, nil] Hash of attribute name => value pairs
+        def extract_attributes(node)
+          return nil if node.nil?
+          # Canon::Xml::Node ElementNode
+          if node.is_a?(Canon::Xml::Nodes::ElementNode)
+            node.attribute_nodes.each_with_object({}) do |attr, hash|
+              hash[attr.name] = attr.value
+            end
+          # Nokogiri nodes
+          elsif node.respond_to?(:attributes)
+            node.attributes.each_with_object({}) do |(_, attr), hash|
+              hash[attr.name] = attr.value
+            end
+          else
+            {}
+          end
+        end
+        # Filter children based on options
+        #
+        # Removes nodes that should be excluded from comparison based on
+        # options like :ignore_nodes, :ignore_comments, etc.
+        #
+        # @param children [Array] Array of child nodes
+        # @param opts [Hash] Comparison options
+        # @return [Array] Filtered array of children
+        def filter_children(children, opts)
+          children.reject do |child|
+            node_excluded?(child, opts)
+          end
+        end
+        # Check if node should be excluded from comparison
+        #
+        # @param node [Object] Node to check
+        # @param opts [Hash] Comparison options
+        # @return [Boolean] true if node should be excluded
+        def node_excluded?(node, opts)
+          return false if node.nil?
+          return true if opts[:ignore_nodes]&.include?(node)
+          return true if opts[:ignore_comments] && comment_node?(node)
+          return true if opts[:ignore_text_nodes] && text_node?(node)
+          # Check structural_whitespace match option
+          match_opts = opts[:match_opts]
+          # Filter out whitespace-only text nodes
+          if match_opts && %i[ignore
+                              normalize].include?(match_opts[:structural_whitespace]) && text_node?(node)
+            text = node_text(node)
+            return true if MatchOptions.normalize_text(text).empty?
+          end
+          false
+        end
+        # Check if two nodes are the same type
+        #
+        # @param node1 [Object] First node
+        # @param node2 [Object] Second node
+        # @return [Boolean] true if nodes are same type
+        def same_node_type?(node1, node2)
+          return false if node1.class != node2.class
+          # For Nokogiri/Canon::Xml nodes, check node type
+          if node1.respond_to?(:node_type) && node2.respond_to?(:node_type)
+            node1.node_type == node2.node_type
+          else
+            true
+          end
+        end
+        # Check if a node is a comment node
+        #
+        # @param node [Object] Node to check
+        # @return [Boolean] true if node is a comment
+        def comment_node?(node)
+          node.respond_to?(:comment?) && node.comment? ||
+            node.respond_to?(:node_type) && node.node_type == :comment
+        end
+        # Check if a node is a text node
+        #
+        # @param node [Object] Node to check
+        # @return [Boolean] true if node is a text node
+        def text_node?(node)
+          node.respond_to?(:text?) && node.text? &&
+            !node.respond_to?(:element?) ||
+            node.respond_to?(:node_type) && node.node_type == :text
+        end
+        # Get text content from a node
+        #
+        # @param node [Object] Node to get text from
+        # @return [String] Text content
+        def node_text(node)
+          # Canon::Xml::Node TextNode uses .value
+          if node.respond_to?(:value)
+            node.value.to_s
+          # Nokogiri nodes use .content
+          elsif node.respond_to?(:content)
+            node.content.to_s
+          else
+            node.to_s
+          end
+        end
+        # Check if difference between two texts is only whitespace
+        #
+        # @param text1 [String] First text
+        # @param text2 [String] Second text
+        # @return [Boolean] true if difference is only in whitespace
+        def whitespace_only_difference?(text1, text2)
+          # Normalize both texts (collapse/trim whitespace)
+          norm1 = MatchOptions.normalize_text(text1)
+          norm2 = MatchOptions.normalize_text(text2)
+          # If normalized texts are the same, the difference was only whitespace
+          norm1 == norm2
+        end
+        # Build a human-readable reason for a difference
+        #
+        # @param node1 [Object, nil] First node
+        # @param node2 [Object, nil] Second node
+        # @param diff1 [Symbol] Difference type for node1
+        # @param diff2 [Symbol] Difference type for node2
+        # @param dimension [Symbol] The dimension of the difference
+        # @return [String] Human-readable reason
+        def build_difference_reason(_node1, _node2, diff1, diff2, dimension)
+          # Default reason - can be overridden in subclasses
+          "Difference in #{dimension}: #{diff1} vs #{diff2}"
+        end
+        # Serialize an element node to string
+        #
+        # @param node [Canon::Xml::Nodes::ElementNode] Element node
+        # @return [String] Serialized element
+        def serialize_element_node(node)
+          attrs = node.attribute_nodes.map do |a|
+            " #{a.name}=\"#{a.value}\""
+          end.join
+          children_xml = node.children.map { |c| serialize_node(c) }.join
+          if children_xml.empty?
+            "<#{node.name}#{attrs}/>"
+          else
+            "<#{node.name}#{attrs}>#{children_xml}</#{node.name}>"
+          end
+        end
+        # Determine the appropriate dimension for a node type
+        #
+        # @param node [Object] The node to check
+        # @return [Symbol] The dimension symbol
+        def determine_node_dimension(node)
+          # Canon::Xml::Node types
+          if node.respond_to?(:node_type) && node.node_type.is_a?(Symbol)
+            case node.node_type
+            when :comment then :comments
+            when :text, :cdata then :text_content
+            when :processing_instruction then :processing_instructions
+            else :text_content
+            end
+          # Moxml/Nokogiri types
+          elsif node.respond_to?(:comment?) && node.comment?
+            :comments
+          elsif node.respond_to?(:text?) && node.text?
+            :text_content
+          elsif node.respond_to?(:cdata?) && node.cdata?
+            :text_content
+          elsif node.respond_to?(:processing_instruction?) && node.processing_instruction?
+            :processing_instructions
+          else
+            :text_content
+          end
+        end
+      end
+    end
+  end
+end