RubyGems - canon - Versions diffs - 0.1.8 → 0.1.10 - Mend

canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

checksums.yaml +4 -4
data/.rubocop_todo.yml +83 -22
data/docs/Gemfile +1 -0
data/docs/_config.yml +90 -1
data/docs/advanced/diff-classification.adoc +196 -24
data/docs/features/match-options/index.adoc +239 -1
data/lib/canon/comparison/format_detector.rb +2 -1
data/lib/canon/comparison/html_comparator.rb +19 -8
data/lib/canon/comparison/html_compare_profile.rb +8 -2
data/lib/canon/comparison/markup_comparator.rb +109 -2
data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
data/lib/canon/comparison/xml_comparator.rb +240 -23
data/lib/canon/comparison/xml_node_comparison.rb +25 -3
data/lib/canon/diff/diff_classifier.rb +119 -5
data/lib/canon/diff/formatting_detector.rb +1 -1
data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
data/lib/canon/rspec_matchers.rb +37 -8
data/lib/canon/version.rb +1 -1
data/lib/canon/xml/data_model.rb +24 -13
metadata +4 -78
data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
data/false_positive_analysis.txt +0 -0
data/file1.html +0 -1
data/file2.html +0 -1
data/old-docs/ADVANCED_TOPICS.adoc +0 -20
data/old-docs/BASIC_USAGE.adoc +0 -16
data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
data/old-docs/CLI.adoc +0 -497
data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
data/old-docs/DIFF_FORMATTING.adoc +0 -540
data/old-docs/DIFF_PARAMETERS.adoc +0 -261
data/old-docs/DOM_DIFF.adoc +0 -1017
data/old-docs/ENV_CONFIG.adoc +0 -876
data/old-docs/FORMATS.adoc +0 -867
data/old-docs/INPUT_VALIDATION.adoc +0 -477
data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
data/old-docs/MATCH_OPTIONS.adoc +0 -912
data/old-docs/MODES.adoc +0 -432
data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
data/old-docs/OPTIONS.adoc +0 -1387
data/old-docs/PREPROCESSING.adoc +0 -491
data/old-docs/README.old.adoc +0 -2831
data/old-docs/RSPEC.adoc +0 -814
data/old-docs/RUBY_API.adoc +0 -485
data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
data/old-docs/STRING_COMPARE.adoc +0 -345
data/old-docs/TMP.adoc +0 -3384
data/old-docs/TREE_DIFF.adoc +0 -1080
data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
data/old-docs/VERBOSE.adoc +0 -482
data/old-docs/VISUALIZATION_MAP.adoc +0 -625
data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
data/scripts/analyze_current_state.rb +0 -85
data/scripts/analyze_false_positives.rb +0 -114
data/scripts/analyze_remaining_failures.rb +0 -105
data/scripts/compare_current_failures.rb +0 -95
data/scripts/compare_dom_tree_diff.rb +0 -158
data/scripts/compare_failures.rb +0 -151
data/scripts/debug_attribute_extraction.rb +0 -66
data/scripts/debug_blocks_839.rb +0 -115
data/scripts/debug_meta_matching.rb +0 -52
data/scripts/debug_p_matching.rb +0 -192
data/scripts/debug_signature_matching.rb +0 -118
data/scripts/debug_sourcecode_124.rb +0 -32
data/scripts/debug_whitespace_sensitive.rb +0 -192
data/scripts/extract_false_positives.rb +0 -138
data/scripts/find_actual_false_positives.rb +0 -125
data/scripts/investigate_all_false_positives.rb +0 -161
data/scripts/investigate_batch1.rb +0 -127
data/scripts/investigate_classification.rb +0 -150
data/scripts/investigate_classification_detailed.rb +0 -190
data/scripts/investigate_common_failures.rb +0 -342
data/scripts/investigate_false_negative.rb +0 -80
data/scripts/investigate_false_positive.rb +0 -83
data/scripts/investigate_false_positives.rb +0 -227
data/scripts/investigate_false_positives_batch.rb +0 -163
data/scripts/investigate_mixed_content.rb +0 -125
data/scripts/investigate_remaining_16.rb +0 -214
data/scripts/run_single_test.rb +0 -29
data/scripts/test_all_false_positives.rb +0 -95
data/scripts/test_attribute_details.rb +0 -61
data/scripts/test_both_algorithms.rb +0 -49
data/scripts/test_both_simple.rb +0 -49
data/scripts/test_enhanced_semantic_output.rb +0 -125
data/scripts/test_readme_examples.rb +0 -131
data/scripts/test_semantic_tree_diff.rb +0 -99
data/scripts/test_semantic_ux_improvements.rb +0 -135
data/scripts/test_single_false_positive.rb +0 -119
data/scripts/test_size_limits.rb +0 -99
data/test_html_1.html +0 -21
data/test_html_2.html +0 -21
data/test_nokogiri.rb +0 -33
data/test_normalize.rb +0 -45

data/lib/canon/diff/xml_serialization_formatter.rb ADDED Viewed

@@ -0,0 +1,153 @@
+# frozen_string_literal: true
+module Canon
+  module Diff
+    # Detects and classifies XML serialization-level formatting differences.
+    #
+    # Serialization-level formatting differences are differences in XML syntax
+    # that do not affect the semantic content of the document. These differences
+    # arise from different valid ways to serialize the same semantic content.
+    #
+    # These differences are ALWAYS non-normative (formatting-only) regardless
+    # of match options, because they are purely syntactic variations.
+    #
+    # Examples:
+    # - Self-closing vs explicit closing tags: <tag/> vs <tag></tag>
+    # - Attribute quote style: attr="value" vs attr='value' (parser-normalized)
+    # - Whitespace within tags: <tag a="1" b="2"> vs <tag a="1"  b="2"> (parser-normalized)
+    #
+    # Note: Some serialization differences are normalized away by XML parsers
+    # (attribute quotes, tag spacing). This class focuses on differences that
+    # survive parsing and comparison, such as self-closing vs explicit closing.
+    class XmlSerializationFormatter
+      # Detect if a diff node represents an XML serialization formatting difference.
+      #
+      # Serialization formatting differences are ALWAYS non-normative because they
+      # represent different valid serializations of the same semantic content.
+      #
+      # @param diff_node [DiffNode] The diff node to check
+      # @return [Boolean] true if this is a serialization formatting difference
+      def self.serialization_formatting?(diff_node)
+        # Currently only handles text_content dimension
+        # Future: add detection for other dimensions
+        return false unless diff_node.dimension == :text_content
+        empty_text_content_serialization_diff?(diff_node)
+      end
+      # Check if a text_content difference is from XML serialization format.
+      #
+      # Specifically detects self-closing tags (<tag/>) vs explicit closing tags
+      # (<tag></tag>), which create different text node structures:
+      # - Self-closing: no text node (nil)
+      # - Explicit closing: empty or whitespace-only text node ("", " ", "\n", etc.)
+      #
+      # Per XML standards, these forms are semantically equivalent.
+      #
+      # @param diff_node [DiffNode] The diff node to check
+      # @return [Boolean] true if this is a serialization formatting difference
+      def self.empty_text_content_serialization_diff?(diff_node)
+        return false unless diff_node.dimension == :text_content
+        node1 = diff_node.node1
+        node2 = diff_node.node2
+        # Both nodes are nil - no actual difference, not a serialization formatting diff
+        return false if node1.nil? && node2.nil?
+        # Only one is nil (e.g., one doc has self-closing, other has text)
+        # If the non-nil one is blank, it's still serialization formatting
+        if node1.nil? || node2.nil?
+          non_nil = node1 || node2
+          return false unless text_node?(non_nil)
+          text = extract_text_content(non_nil)
+          return blank?(text)
+        end
+        # Both must be text nodes
+        return false unless text_node?(node1) && text_node?(node2)
+        text1 = extract_text_content(node1)
+        text2 = extract_text_content(node2)
+        # Check if both texts are blank/whitespace-only
+        # This indicates self-closing vs explicit closing tag syntax
+        blank?(text1) && blank?(text2)
+      end
+      # Check if a value is blank (nil or whitespace-only)
+      # @param value [String, nil] Value to check
+      # @return [Boolean] true if blank
+      def self.blank?(value)
+        value.nil? ||
+          (value.respond_to?(:empty?) && value.empty?) ||
+          (value.respond_to?(:strip) && value.strip.empty?)
+      end
+      # Check if a node is a text node
+      # @param node [Object] The node to check
+      # @return [Boolean] true if the node is a text node
+      def self.text_node?(node)
+        return false if node.nil?
+        # Canon::Xml::Nodes::TextNode
+        return true if node.is_a?(Canon::Xml::Nodes::TextNode)
+        # Moxml::Text (check before generic node_type check)
+        return true if node.is_a?(Moxml::Text)
+        # Nokogiri text nodes (node_type returns integer constant like 3)
+        return true if node.respond_to?(:node_type) &&
+          node.node_type.is_a?(Integer) &&
+          node.node_type == Nokogiri::XML::Node::TEXT_NODE
+        # Moxml text nodes (node_type returns symbol) - for when using Moxml adapters
+        return true if node.respond_to?(:node_type) && node.node_type == :text
+        # String
+        return true if node.is_a?(String)
+        # Test doubles or objects with text node-like interface
+        # Check if it has a value method (contains text content)
+        return true if node.respond_to?(:value)
+        false
+      end
+      # Extract text content from a node
+      # @param node [Object] The node to extract text from
+      # @return [String, nil] The text content or nil
+      def self.extract_text_content(node)
+        return nil if node.nil?
+        # For TextNode with value attribute (Canon::Xml::Nodes::TextNode)
+        return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
+        # For XML/HTML nodes with text_content method
+        return node.text_content if node.respond_to?(:text_content)
+        # For nodes with content method (try before text, as Moxml::Text.text returns "")
+        return node.content if node.respond_to?(:content)
+        # For nodes with text method
+        return node.text if node.respond_to?(:text)
+        # For nodes with value method (other types)
+        return node.value if node.respond_to?(:value)
+        # For simple text nodes or strings
+        return node.to_s if node.is_a?(String)
+        # For other node types, try to_s
+        node.to_s
+      rescue StandardError
+        # If extraction fails, return nil (not a serialization difference)
+        nil
+      end
+      private_class_method :blank?, :text_node?, :extract_text_content,
+                           :empty_text_content_serialization_diff?
+    end
+  end
+end

data/lib/canon/rspec_matchers.rb CHANGED Viewed

@@ -63,6 +63,15 @@ module Canon
         self
       end
+      # Chain method for setting match options
+      # @param match_opts [Hash] match options
+      # @return [SerializationMatcher] self for chaining
+      def with_match(**match_opts)
+        @match ||= {}
+        @match = @match.merge(match_opts)
+        self
+      end
       def matches?(target)
         @target = target
@@ -252,12 +261,22 @@ module Canon
                                diff_algorithm: diff_algorithm)
     end
-    def be_yaml_equivalent_to(expected)
-      SerializationMatcher.new(expected, :yaml)
+    def be_yaml_equivalent_to(expected, match_profile: nil, match: nil,
+                              preprocessing: nil, diff_algorithm: nil)
+      SerializationMatcher.new(expected, :yaml,
+                               match_profile: match_profile,
+                               match: match,
+                               preprocessing: preprocessing,
+                               diff_algorithm: diff_algorithm)
     end
-    def be_json_equivalent_to(expected)
-      SerializationMatcher.new(expected, :json)
+    def be_json_equivalent_to(expected, match_profile: nil, match: nil,
+                              preprocessing: nil, diff_algorithm: nil)
+      SerializationMatcher.new(expected, :json,
+                               match_profile: match_profile,
+                               match: match,
+                               preprocessing: preprocessing,
+                               diff_algorithm: diff_algorithm)
     end
     def be_html_equivalent_to(expected, match_profile: nil, match: nil,
@@ -287,12 +306,22 @@ module Canon
                                diff_algorithm: diff_algorithm)
     end
-    def be_equivalent_to(expected)
-      SerializationMatcher.new(expected, nil)
+    def be_equivalent_to(expected, match_profile: nil, match: nil,
+                         preprocessing: nil, diff_algorithm: nil)
+      SerializationMatcher.new(expected, nil,
+                               match_profile: match_profile,
+                               match: match,
+                               preprocessing: preprocessing,
+                               diff_algorithm: diff_algorithm)
     end
-    def be_string_equivalent_to(expected)
-      SerializationMatcher.new(expected, :string)
+    def be_string_equivalent_to(expected, match_profile: nil, match: nil,
+                                 preprocessing: nil, diff_algorithm: nil)
+      SerializationMatcher.new(expected, :string,
+                               match_profile: match_profile,
+                               match: match,
+                               preprocessing: preprocessing,
+                               diff_algorithm: diff_algorithm)
     end
     if defined?(::RSpec) && ::RSpec.respond_to?(:configure)

data/lib/canon/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Canon
-  VERSION = "0.1.8"
+  VERSION = "0.1.10"
 end

data/lib/canon/xml/data_model.rb CHANGED Viewed

@@ -18,8 +18,9 @@ module Canon
       # Build XPath data model from XML string
       #
       # @param xml_string [String] XML content to parse
+      # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
       # @return [Nodes::RootNode] Root of the data model tree
-      def self.from_xml(xml_string)
+      def self.from_xml(xml_string, preserve_whitespace: false)
         # Parse with Nokogiri
         doc = Nokogiri::XML(xml_string) do |config|
           config.nonet     # Disable network access
@@ -30,7 +31,7 @@ module Canon
         check_for_relative_namespace_uris(doc)
         # Convert to XPath data model
-        build_from_nokogiri(doc)
+        build_from_nokogiri(doc, preserve_whitespace: preserve_whitespace)
       end
       # Alias for compatibility with base class interface
@@ -74,19 +75,21 @@ module Canon
       # Build XPath data model from Nokogiri document or fragment
       # rubocop:disable Metrics/MethodLength
-      def self.build_from_nokogiri(nokogiri_doc)
+      def self.build_from_nokogiri(nokogiri_doc, preserve_whitespace: false)
         root = Nodes::RootNode.new
         if nokogiri_doc.respond_to?(:root) && nokogiri_doc.root
           # For Documents (XML, HTML4, HTML5, Moxml): process the root element
-          root.add_child(build_element_node(nokogiri_doc.root))
+          root.add_child(build_element_node(nokogiri_doc.root,
+                                            preserve_whitespace: preserve_whitespace))
           # Process PIs and comments outside doc element
           nokogiri_doc.children.each do |child|
             next if child == nokogiri_doc.root
             next if child.is_a?(Nokogiri::XML::DTD)
-            node = build_node_from_nokogiri(child)
+            node = build_node_from_nokogiri(child,
+                                            preserve_whitespace: preserve_whitespace)
             root.add_child(node) if node
           end
         else
@@ -95,7 +98,8 @@ module Canon
           nokogiri_doc.children.each do |child|
             next if child.is_a?(Nokogiri::XML::DTD)
-            node = build_node_from_nokogiri(child)
+            node = build_node_from_nokogiri(child,
+                                            preserve_whitespace: preserve_whitespace)
             root.add_child(node) if node
           end
         end
@@ -104,12 +108,15 @@ module Canon
       end
       # Build node from Nokogiri node
-      def self.build_node_from_nokogiri(nokogiri_node)
+      def self.build_node_from_nokogiri(nokogiri_node,
+preserve_whitespace: false)
         case nokogiri_node
         when Nokogiri::XML::Element
-          build_element_node(nokogiri_node)
+          build_element_node(nokogiri_node,
+                             preserve_whitespace: preserve_whitespace)
         when Nokogiri::XML::Text
-          build_text_node(nokogiri_node)
+          build_text_node(nokogiri_node,
+                          preserve_whitespace: preserve_whitespace)
         when Nokogiri::XML::Comment
           build_comment_node(nokogiri_node)
         when Nokogiri::XML::ProcessingInstruction
@@ -119,7 +126,7 @@ module Canon
       # Build element node from Nokogiri element
       # rubocop:disable Metrics/MethodLength
-      def self.build_element_node(nokogiri_element)
+      def self.build_element_node(nokogiri_element, preserve_whitespace: false)
         element = Nodes::ElementNode.new(
           name: nokogiri_element.name,
           namespace_uri: nokogiri_element.namespace&.href,
@@ -134,7 +141,8 @@ module Canon
         # Build child nodes
         nokogiri_element.children.each do |child|
-          node = build_node_from_nokogiri(child)
+          node = build_node_from_nokogiri(child,
+                                          preserve_whitespace: preserve_whitespace)
           element.add_child(node) if node
         end
@@ -195,13 +203,16 @@ module Canon
       end
       # Build text node from Nokogiri text node
-      def self.build_text_node(nokogiri_text)
+      def self.build_text_node(nokogiri_text, preserve_whitespace: false)
         # XML text nodes: preserve all content including whitespace
         # Unlike HTML, XML treats all whitespace as significant
         content = nokogiri_text.content
         # Skip empty text nodes between elements (common formatting whitespace)
-        return nil if content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
+        # UNLESS preserve_whitespace is true (for structural_whitespace: :strict)
+        if !preserve_whitespace && content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
+          return nil
+        end
         # Nokogiri already handles CDATA conversion and entity resolution
         Nodes::TextNode.new(value: content)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: canon
 version: !ruby/object:Gem::Version
-  version: 0.1.8
+  version: 0.1.10
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2026-01-18 00:00:00.000000000 Z
+date: 2026-01-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: diff-lcs
@@ -174,7 +174,6 @@ files:
 - docs/internals/diffnode-enrichment.adoc
 - docs/internals/index.adoc
 - docs/lychee.toml
-- docs/plans/2025-01-17-html-parser-selection-fix.adoc
 - docs/reference/cli-options.adoc
 - docs/reference/environment-variables.adoc
 - docs/reference/index.adoc
@@ -191,9 +190,6 @@ files:
 - docs/understanding/formats/yaml.adoc
 - docs/understanding/index.adoc
 - exe/canon
-- false_positive_analysis.txt
-- file1.html
-- file2.html
 - lib/canon.rb
 - lib/canon/cache.rb
 - lib/canon/cli.rb
@@ -230,6 +226,7 @@ files:
 - lib/canon/comparison/strategies/base_match_strategy.rb
 - lib/canon/comparison/strategies/match_strategy_factory.rb
 - lib/canon/comparison/strategies/semantic_tree_match_strategy.rb
+- lib/canon/comparison/whitespace_sensitivity.rb
 - lib/canon/comparison/xml_comparator.rb
 - lib/canon/comparison/xml_comparator/attribute_comparator.rb
 - lib/canon/comparison/xml_comparator/attribute_filter.rb
@@ -260,6 +257,7 @@ files:
 - lib/canon/diff/formatting_detector.rb
 - lib/canon/diff/node_serializer.rb
 - lib/canon/diff/path_builder.rb
+- lib/canon/diff/xml_serialization_formatter.rb
 - lib/canon/diff_formatter.rb
 - lib/canon/diff_formatter/by_line/base_formatter.rb
 - lib/canon/diff_formatter/by_line/html_formatter.rb
@@ -344,79 +342,7 @@ files:
 - lib/canon/xml/whitespace_normalizer.rb
 - lib/canon/xml/xml_base_handler.rb
 - lib/xml-c14n.rb
-- old-docs/ADVANCED_TOPICS.adoc
-- old-docs/BASIC_USAGE.adoc
-- old-docs/CHARACTER_VISUALIZATION.adoc
-- old-docs/CLI.adoc
-- old-docs/CUSTOMIZING_BEHAVIOR.adoc
-- old-docs/DIFF_ARCHITECTURE.adoc
-- old-docs/DIFF_FORMATTING.adoc
-- old-docs/DIFF_PARAMETERS.adoc
-- old-docs/DOM_DIFF.adoc
-- old-docs/ENV_CONFIG.adoc
-- old-docs/FORMATS.adoc
-- old-docs/INPUT_VALIDATION.adoc
-- old-docs/MATCHER_BEHAVIOR.adoc
-- old-docs/MATCH_ARCHITECTURE.adoc
-- old-docs/MATCH_OPTIONS.adoc
-- old-docs/MODES.adoc
-- old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc
-- old-docs/OPTIONS.adoc
-- old-docs/PREPROCESSING.adoc
-- old-docs/README.old.adoc
-- old-docs/RSPEC.adoc
-- old-docs/RUBY_API.adoc
-- old-docs/SEMANTIC_DIFF_REPORT.adoc
-- old-docs/SEMANTIC_TREE_DIFF.adoc
-- old-docs/STRING_COMPARE.adoc
-- old-docs/TMP.adoc
-- old-docs/TREE_DIFF.adoc
-- old-docs/UNDERSTANDING_CANON.adoc
-- old-docs/VERBOSE.adoc
-- old-docs/VISUALIZATION_MAP.adoc
-- old-docs/WHITESPACE_TREATMENT.adoc
-- scripts/analyze_current_state.rb
-- scripts/analyze_false_positives.rb
-- scripts/analyze_remaining_failures.rb
-- scripts/compare_current_failures.rb
-- scripts/compare_dom_tree_diff.rb
-- scripts/compare_failures.rb
-- scripts/debug_attribute_extraction.rb
-- scripts/debug_blocks_839.rb
-- scripts/debug_meta_matching.rb
-- scripts/debug_p_matching.rb
-- scripts/debug_signature_matching.rb
-- scripts/debug_sourcecode_124.rb
-- scripts/debug_whitespace_sensitive.rb
-- scripts/extract_false_positives.rb
-- scripts/find_actual_false_positives.rb
-- scripts/investigate_all_false_positives.rb
-- scripts/investigate_batch1.rb
-- scripts/investigate_classification.rb
-- scripts/investigate_classification_detailed.rb
-- scripts/investigate_common_failures.rb
-- scripts/investigate_false_negative.rb
-- scripts/investigate_false_positive.rb
-- scripts/investigate_false_positives.rb
-- scripts/investigate_false_positives_batch.rb
-- scripts/investigate_mixed_content.rb
-- scripts/investigate_remaining_16.rb
-- scripts/run_single_test.rb
-- scripts/test_all_false_positives.rb
-- scripts/test_attribute_details.rb
-- scripts/test_both_algorithms.rb
-- scripts/test_both_simple.rb
-- scripts/test_enhanced_semantic_output.rb
-- scripts/test_readme_examples.rb
-- scripts/test_semantic_tree_diff.rb
-- scripts/test_semantic_ux_improvements.rb
-- scripts/test_single_false_positive.rb
-- scripts/test_size_limits.rb
 - sig/xml/c14n.rbs
-- test_html_1.html
-- test_html_2.html
-- test_nokogiri.rb
-- test_normalize.rb
 homepage: https://github.com/lutaml/canon
 licenses:
 - BSD-2-Clause