RubyGems - canon - Versions diffs - 0.2.8 → 0.2.11 - Mend

canon 0.2.8 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

checksums.yaml +4 -4
data/.rspec-opal +7 -0
data/.rubocop_todo.yml +25 -73
data/Rakefile +37 -0
data/lib/canon/cache.rb +16 -27
data/lib/canon/cli.rb +1 -1
data/lib/canon/color_detector.rb +3 -5
data/lib/canon/comparison/compare_profile.rb +1 -4
data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
data/lib/canon/comparison/format_detector.rb +29 -20
data/lib/canon/comparison/html_comparator.rb +20 -29
data/lib/canon/comparison/html_compare_profile.rb +3 -10
data/lib/canon/comparison/html_parser.rb +1 -1
data/lib/canon/comparison/json_comparator.rb +8 -0
data/lib/canon/comparison/node_inspector.rb +117 -86
data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -2
data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +40 -8
data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
data/lib/canon/comparison/xml_comparator/node_parser.rb +14 -13
data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
data/lib/canon/comparison/xml_comparator.rb +63 -85
data/lib/canon/comparison/xml_node_comparison.rb +15 -15
data/lib/canon/comparison/yaml_comparator.rb +8 -0
data/lib/canon/comparison.rb +24 -24
data/lib/canon/config/profile_loader.rb +13 -13
data/lib/canon/config.rb +29 -5
data/lib/canon/diff/diff_classifier.rb +7 -41
data/lib/canon/diff/diff_line.rb +1 -1
data/lib/canon/diff/diff_line_builder.rb +2 -0
data/lib/canon/diff/diff_node_enricher.rb +22 -24
data/lib/canon/diff/diff_node_mapper.rb +10 -8
data/lib/canon/diff/formatting_detector.rb +3 -2
data/lib/canon/diff/node_serializer.rb +23 -30
data/lib/canon/diff/path_builder.rb +24 -37
data/lib/canon/diff/source_locator.rb +0 -3
data/lib/canon/diff/xml_serialization_formatter.rb +8 -84
data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
data/lib/canon/diff_formatter/by_object/base_formatter.rb +23 -17
data/lib/canon/diff_formatter/by_object/xml_formatter.rb +127 -11
data/lib/canon/diff_formatter/by_object_formatter.rb +2 -6
data/lib/canon/diff_formatter/debug_output.rb +12 -24
data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +3 -3
data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -27
data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
data/lib/canon/diff_formatter/legend.rb +2 -2
data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
data/lib/canon/diff_formatter/theme.rb +4 -4
data/lib/canon/diff_formatter.rb +17 -13
data/lib/canon/formatters/html_formatter.rb +1 -1
data/lib/canon/formatters/html_formatter_base.rb +1 -1
data/lib/canon/formatters/xml_formatter.rb +7 -32
data/lib/canon/html/data_model.rb +2 -2
data/lib/canon/pretty_printer/html.rb +1 -1
data/lib/canon/pretty_printer/xml.rb +16 -7
data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
data/lib/canon/rspec_matchers.rb +2 -2
data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
data/lib/canon/tree_diff/core/tree_node.rb +1 -3
data/lib/canon/tree_diff/operation_converter.rb +7 -7
data/lib/canon/tree_diff/operations/operation_detector.rb +4 -0
data/lib/canon/validators/base_validator.rb +5 -8
data/lib/canon/validators/html_validator.rb +3 -8
data/lib/canon/validators/xml_validator.rb +3 -8
data/lib/canon/version.rb +1 -1
data/lib/canon/xml/data_model.rb +132 -138
data/lib/canon/xml/namespace_helper.rb +5 -0
data/lib/canon/xml/node.rb +2 -1
data/lib/canon/xml/nodes/root_node.rb +4 -0
data/lib/canon/xml/nodes/text_node.rb +6 -1
data/lib/canon/xml/sax_builder.rb +5 -7
data/lib/canon/xml/whitespace_normalizer.rb +2 -2
data/lib/canon/xml_backend.rb +49 -0
data/lib/canon/xml_parsing.rb +283 -0
data/lib/canon.rb +3 -1
data/lib/tasks/benchmark_runner.rb +1 -1
data/lib/tasks/performance_helpers.rb +1 -1
metadata +9 -6

data/lib/canon/comparison/html_comparator.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-require "nokogiri"
+require "nokogiri" unless RUBY_ENGINE == "opal"
 require_relative "../comparison" # Load base module with constants first
 require_relative "markup_comparator"
 require_relative "xml_comparator"
@@ -167,6 +167,11 @@ module Canon
           end
         end
+        # Public parsing API for external callers
+        def parse(html, preprocessing = :none)
+          parse_node_for_semantic(html, preprocessing)
+        end
         private
         # Check if both nodes are document fragments
@@ -337,13 +342,10 @@ module Canon
           # Convert to string if needed
           html_string = if html.is_a?(String)
                           html
-                        elsif html.respond_to?(:to_html)
+                        elsif Canon::XmlParsing.xml_node?(html)
                           html.to_html
-                        elsif html.respond_to?(:to_s)
-                          html.to_s
                         else
-                          raise Canon::Error,
-                                "Unable to convert HTML to string: #{html.class}"
+                          html.to_s
                         end
           # Strip DOCTYPE for consistent parsing
@@ -492,22 +494,18 @@ module Canon
         end
         def find_and_normalize_style_script(node)
-          return unless node.respond_to?(:children)
+          return unless node.is_a?(Canon::Xml::Node)
           node.children.each do |child|
             next unless child.is_a?(Canon::Xml::Nodes::ElementNode)
             # If this is a style or script element, normalize its text content
             if %w[style script].include?(child.name.downcase)
-              # Get text children and remove HTML comments from them
               child.children.each do |text_child|
                 next unless text_child.is_a?(Canon::Xml::Nodes::TextNode)
-                # Remove HTML comments from text content without using regex
-                # to avoid ReDoS/incomplete sanitization vulnerabilities
                 normalized = remove_html_comments(text_child.value)
-                # Update the text value
-                text_child.instance_variable_set(:@value, normalized)
+                text_child.value = normalized
               end
             end
@@ -560,6 +558,8 @@ module Canon
           end
         end
+        public :detect_html_version
         # Detect HTML version from node
         #
         # @param node [Canon::Xml::Node, Nokogiri::XML::Node] HTML node
@@ -584,13 +584,10 @@ module Canon
         # @param node [Canon::Xml::Node, Nokogiri::HTML::Document] Parsed node
         # @return [String] Serialized HTML string
         def serialize_for_display(node)
-          # Use XmlNodeComparison's serializer for Canon::Xml::Node
           if node.is_a?(Canon::Xml::Node)
             XmlNodeComparison.serialize_node_to_xml(node)
-          elsif node.respond_to?(:to_html)
-            node.to_html
-          elsif node.respond_to?(:to_xml)
-            node.to_xml
+          elsif Canon::XmlParsing.xml_node?(node)
+            Canon::XmlBackend.nokogiri? ? node.to_html : Canon::XmlParsing.serialize(node)
           else
             node.to_s
           end
@@ -605,16 +602,11 @@ module Canon
           if html.is_a?(String)
             html
           elsif html.is_a?(Canon::Xml::Node)
-            # Serialize Canon nodes to string
             Canon::Xml::DataModel.serialize(html)
-          elsif html.respond_to?(:to_html)
-            # Nokogiri nodes - use to_html to preserve formatting
-            html.to_html
-          elsif html.respond_to?(:to_s)
-            html.to_s
+          elsif Canon::XmlParsing.xml_node?(html)
+            Canon::XmlBackend.nokogiri? ? html.to_html : html.to_s
           else
-            raise Canon::Error,
-                  "Unable to extract original string from: #{html.class}"
+            html.to_s
           end
         end
@@ -727,11 +719,10 @@ compare_profile = nil)
         # Check if any ancestor of the given node preserves whitespace
         def ancestor_preserves_whitespace?(node, preserve_list)
           current = node
-          while current.respond_to?(:name)
+          while current.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(current)
             return true if preserve_list.include?(current.name.downcase)
-            # Stop at document root - documents don't have parents
-            break if current.is_a?(Nokogiri::XML::Document)
+            break if Canon::XmlParsing.document?(current)
             current = current.parent
           end
@@ -811,7 +802,7 @@ compare_profile = nil)
           end
           # Check if it's a fragment that contains XML processing instructions
-          if node.respond_to?(:children) && node.children.any? do |child|
+          if (node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)) && node.children.any? do |child|
             child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
                 child.name == "xml"
           end

data/lib/canon/comparison/html_compare_profile.rb CHANGED Viewed

@@ -48,9 +48,8 @@ module Canon
             # If key exists, check if it's :strict
             return match_options[:comments] == :strict
-          elsif match_options.respond_to?(:behavior_for)
+          elsif match_options.is_a?(ResolvedMatchOptions)
             behavior = behavior_for(dimension)
-            # In HTML, only :strict makes comments affect equivalence
             return behavior == :strict
           end
           # Default: comments don't affect equivalence in HTML
@@ -106,14 +105,8 @@ module Canon
       def has_explicit_option?(dimension)
         if match_options.is_a?(Hash)
           match_options.key?(dimension)
-        elsif match_options.respond_to?(:[])
-          # For ResolvedMatchOptions, check if key exists
-          begin
-            match_options[dimension]
-            true
-          rescue StandardError
-            false
-          end
+        elsif match_options.is_a?(ResolvedMatchOptions)
+          !match_options.options[dimension].nil?
         else
           false
         end

data/lib/canon/comparison/html_parser.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-require "nokogiri"
+require "nokogiri" unless RUBY_ENGINE == "opal"
 module Canon
   module Comparison

data/lib/canon/comparison/json_comparator.rb CHANGED Viewed

@@ -26,6 +26,14 @@ module Canon
       }.freeze
       class << self
+        # Parse JSON from string or return as-is
+        #
+        # @param obj [String, Hash, Array] JSON string or parsed object
+        # @return [Object] Parsed JSON object
+        def parse(obj)
+          parse_json(obj)
+        end
         # Compare two JSON objects for equivalence
         #
         # @param json1 [String, Hash, Array] First JSON

data/lib/canon/comparison/node_inspector.rb CHANGED Viewed

@@ -4,94 +4,76 @@ module Canon
   module Comparison
     # Single source of truth for cross-backend node type operations.
     #
-    # The comparison pipeline handles nodes from two backends:
+    # The comparison pipeline handles nodes from multiple sources:
     # * Canon::Xml::Node (+ RootNode, ElementNode, TextNode, etc.) —
     #   custom DOM built by SAX builder and DataModel.
-    # * Nokogiri::XML::Node (+ subclasses) — native Nokogiri nodes used
-    #   by the HTML comparator and some legacy paths.
+    # * Canon::TreeDiff::Core::TreeNode — semantic tree diff nodes.
+    # * Backend-specific nodes (Nokogiri or Moxml) — live parsed nodes.
     #
-    # Every method here dispatches on type via +case/when+ (+is_a?+).
-    # No +respond_to?+ — the types are known at every call site.
+    # Architecture: NodeInspector handles Canon-native types (Canon::Xml::Node,
+    # TreeNode) directly, then delegates ALL backend-specific queries to
+    # XmlParsing. No Moxml/Nokogiri constants are referenced here — that
+    # knowledge lives exclusively in XmlParsing.
     module NodeInspector
-      CANON_TEXT_TYPE = :text
-      NOKOGIRI_TEXT_TYPE = defined?(Nokogiri::XML::Node::TEXT_NODE) ? Nokogiri::XML::Node::TEXT_NODE : 3
+      # --- Type predicates ---
-      # True when +node+ is a text node (whitespace, content, etc.).
       def self.text_node?(node)
-        case node
-        when Canon::Xml::Node
-          node.node_type == CANON_TEXT_TYPE
-        when Nokogiri::XML::Node
-          node.node_type == NOKOGIRI_TEXT_TYPE
-        else
-          false
-        end
-      end
+        return false unless node
+        return node.node_type == :text if node.is_a?(Canon::Xml::Node)
-      # Extract the text content of +node+ as a String.
-      def self.text_content(node)
-        case node
-        when Canon::Xml::Node
-          node.value.to_s
-        when Nokogiri::XML::Node
-          node.content.to_s
-        else
-          node.to_s
-        end
+        XmlParsing.text_node?(node)
       end
-      # True when +node+ is a text node whose content is whitespace-only.
-      # Empty-string text nodes return false — those represent genuine
-      # empty-vs-content asymmetry, not pretty-print indentation.
-      def self.whitespace_only_text?(node)
-        return false unless text_node?(node)
+      def self.element_node?(node)
+        return false unless node
+        return node.node_type == :element if node.is_a?(Canon::Xml::Node)
-        text = text_content(node)
-        !text.empty? && text.strip.empty?
+        XmlParsing.element?(node)
       end
-      # True when +node+ is a comment node.
-      # For HTML, also detects comments that Nokogiri parses as TEXT nodes
-      # (content like "<!-- comment -->" or escaped "<\\!-- comment -->").
       def self.comment_node?(node)
-        case node
-        when Canon::Xml::Node
-          node.node_type == :comment
-        when Nokogiri::XML::Node
-          return true if node.comment?
+        return false unless node
+        return node.node_type == :comment if node.is_a?(Canon::Xml::Node)
+        if XmlBackend.nokogiri?
+          return true if node.is_a?(Nokogiri::XML::Node) && node.comment?
           # HTML comments are parsed as TEXT nodes by Nokogiri
-          if node.text?
+          if node.is_a?(Nokogiri::XML::Node) && node.text?
             text_stripped = text_content(node).to_s.strip.gsub("\\", "")
             return true if text_stripped.start_with?("<!--") && text_stripped.end_with?("-->")
           end
           false
         else
-          false
+          XmlParsing.comment?(node)
         end
       end
-      # True when +node+ is an element node.
-      def self.element_node?(node)
-        case node
-        when Canon::Xml::Node
-          node.node_type == :element
-        when Nokogiri::XML::Node
-          node.element?
-        else
-          false
-        end
+      def self.document?(node)
+        return node.node_type == :root if node.is_a?(Canon::Xml::Node)
+        XmlParsing.document?(node)
       end
-      # Classify +node+ as a noise node and return the diff dimension
-      # it should be reported under, or +nil+ if it is structural content.
-      #
-      # Noise nodes (whitespace-only text, comments) are realigned past
-      # during child comparison so that content nodes line up correctly
-      # across sides.
-      #
-      # @param node [Object] DOM node to classify
-      # @return [Symbol, nil] +:whitespace_adjacency+, +:comments+, or +nil+
+      def self.document_fragment?(node)
+        return false unless node
+        return false unless node.is_a?(Canon::Xml::Nodes::RootNode)
+        node.fragment?
+      end
+      # True when +node+ is a text node whose content is whitespace-only.
+      # Empty-string text nodes return false — those represent genuine
+      # empty-vs-content asymmetry, not pretty-print indentation.
+      def self.whitespace_only_text?(node)
+        return false unless text_node?(node)
+        text = text_content(node)
+        !text.empty? && text.strip.empty?
+      end
+      # --- Noise classification ---
       def self.noise_dimension_for(node)
         if whitespace_only_text?(node)
           :whitespace_adjacency
@@ -100,37 +82,86 @@ module Canon
         end
       end
-      # True when +node+ is a noise node (whitespace-only text or comment).
-      # Convenience wrapper around +noise_dimension_for+.
-      #
-      # @param node [Object] DOM node to check
-      # @return [Boolean]
       def self.noise_node?(node)
         !noise_dimension_for(node).nil?
       end
-      # Extract parse-time errors carried on a node or its owning document.
-      # Returns an Array of Strings.
-      def self.parse_errors(node)
-        case node
-        when nil
-          []
-        when Canon::Xml::Node
-          errors = node.parse_errors
-          Array(errors).map(&:to_s)
-        when Nokogiri::XML::Document, Nokogiri::HTML5::Document
-          Array(node.errors).map(&:to_s)
+      # --- Node queries ---
+      def self.name(node)
+        return nil unless node
+        return node.name if node.is_a?(Canon::Xml::Node)
+        return node.label if node.is_a?(Canon::TreeDiff::Core::TreeNode)
+        XmlParsing.name(node)
+      end
+      def self.parent(node)
+        return nil unless node
+        return node.parent if node.is_a?(Canon::Xml::Node)
+        return node.parent if node.is_a?(Canon::TreeDiff::Core::TreeNode)
+        XmlParsing.parent(node)
+      end
+      def self.children(node)
+        return [] unless node
+        return node.children if node.is_a?(Canon::Xml::Node)
+        return node.children || [] if node.is_a?(Canon::TreeDiff::Core::TreeNode)
+        XmlParsing.children(node)
+      end
+      def self.text_content(node)
+        return node.value.to_s if node.is_a?(Canon::Xml::Nodes::TextNode)
+        return node.text_content.to_s if node.is_a?(Canon::Xml::Node)
+        XmlParsing.text_content(node).to_s
+      end
+      def self.node_type(node)
+        return nil unless node
+        return node.node_type if node.is_a?(Canon::Xml::Node)
+        return node.type&.to_sym if node.is_a?(Canon::TreeDiff::Core::TreeNode)
+        XmlParsing.node_type(node)
+      end
+      def self.attribute_value(node, attr_name)
+        return nil unless node
+        if node.is_a?(Canon::Xml::Nodes::ElementNode)
+          attr = node.attribute_nodes.find { |a| a.name == attr_name.to_s }
+          attr&.value
+        elsif node.is_a?(Canon::Xml::Node)
+          nil
         else
-          []
+          XmlParsing.attribute_value(node, attr_name)
+        end
+      end
+      def self.namespace_uri(node)
+        return nil unless node
+        if node.is_a?(Canon::Xml::Node)
+          node.is_a?(Canon::Xml::Nodes::ElementNode) ? node.namespace_uri : nil
+        else
+          XmlParsing.namespace_uri(node)
         end
       end
-      # Return the parent node of +node+, or nil when +node+ is not a
-      # recognised DOM backend type or has no parent.
-      def self.parent_of(node)
-        case node
-        when Canon::Xml::Node, Nokogiri::XML::Node
-          node.parent
+      def self.parse_errors(node)
+        return [] if node.nil?
+        return Array(node.parse_errors).map(&:to_s) if node.is_a?(Canon::Xml::Node)
+        if XmlBackend.nokogiri?
+          if node.is_a?(Nokogiri::XML::Document) || node.is_a?(Nokogiri::HTML5::Document)
+            Array(node.errors).map(&:to_s)
+          else
+            []
+          end
+        else
+          []
         end
       end
     end

data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb CHANGED Viewed

@@ -125,20 +125,18 @@ module Canon
         # @param doc2 [Object] Second XML document
         # @return [Array<String>] Preprocessed strings
         def preprocess_xml(doc1, doc2)
-          # Serialize XML to string
-          # Use XmlNodeComparison's serializer for Canon::Xml::Node
           xml1 = if doc1.is_a?(Canon::Xml::Node)
                    XmlNodeComparison.serialize_node_to_xml(doc1)
-                 elsif doc1.respond_to?(:to_xml)
-                   doc1.to_xml
+                 elsif Canon::XmlParsing.xml_node?(doc1)
+                   Canon::XmlParsing.serialize(doc1)
                  else
                    doc1.to_s
                  end
           xml2 = if doc2.is_a?(Canon::Xml::Node)
                    XmlNodeComparison.serialize_node_to_xml(doc2)
-                 elsif doc2.respond_to?(:to_xml)
-                   doc2.to_xml
+                 elsif Canon::XmlParsing.xml_node?(doc2)
+                   Canon::XmlParsing.serialize(doc2)
                  else
                    doc2.to_s
                  end
@@ -167,7 +165,7 @@ module Canon
                     XmlNodeComparison.serialize_node_to_xml(doc1)
                   elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
                     doc1.to_s
-                  elsif doc1.respond_to?(:to_html)
+                  elsif Canon::XmlParsing.xml_node?(doc1)
                     doc1.to_html
                   else
                     doc1.to_s
@@ -177,7 +175,7 @@ module Canon
                     XmlNodeComparison.serialize_node_to_xml(doc2)
                   elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
                     doc2.to_s
-                  elsif doc2.respond_to?(:to_html)
+                  elsif Canon::XmlParsing.xml_node?(doc2)
                     doc2.to_html
                   else
                     doc2.to_s