RubyGems - canon - Versions diffs - 0.1.15 → 0.1.16 - Mend

canon 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/.rubocop_todo.yml +19 -83
data/README.adoc +57 -0
data/Rakefile +2 -0
data/docs/features/index.adoc +10 -0
data/docs/features/performance.adoc +161 -0
data/lib/canon/comparison/html_comparator.rb +45 -11
data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +7 -0
data/lib/canon/comparison/xml_comparator.rb +38 -0
data/lib/canon/diff/formatting_detector.rb +10 -4
data/lib/canon/diff_formatter/diff_detail_formatter.rb +2 -1
data/lib/canon/tree_diff/core/tree_node.rb +6 -3
data/lib/canon/version.rb +1 -1
data/lib/canon/xml/sax_builder.rb +360 -0
data/lib/tasks/benchmark_runner.rb +610 -0
data/lib/tasks/performance.rake +81 -0
data/lib/tasks/performance_comparator.rb +100 -0
data/lib/tasks/performance_helpers.rb +219 -0
metadata +8 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: '04860609f8d3300ccebf84a0f2208510600dcfaac4b3f54f698eb2de7eed0493'
-  data.tar.gz: fc50abe2a915d7d7ff1cd630c0a5a50849b6fd5780664cb6bb419300b2388743
+  metadata.gz: 0eb3c717365f052953d3deaf83a897112709c1a6084b472b99ddfdc2c9e43b67
+  data.tar.gz: fe4b2b513193b87692cd1fcb11569898e69c6818bea08ae3dccc753ad935f6e0
 SHA512:
-  metadata.gz: d1bcc3ad7439fdd7f65627c53cd0dc4b92d781bdbdaf330d2bb8ecc89b3319a4fc02e78de8e961cdb69854661881db39bf59c664210c7a59b7a011355b1db71b
-  data.tar.gz: cca1b3f2eee48054b5431118350e64acb46f485096cd8c88798e0935210985d63eb93de4727a42d17cfaad8ef03c55e7d1745922500f3a3495a791d86fd60676
+  metadata.gz: 2c6d351b873ebb745c5abcdb2ff6cdbcf4ce53da1ad7f070c0b1eefeeeb776e315fa62c8d82c24b216e6e93cf5ac1790ebe3c6a171a142036ef0abc356d5a9e6
+  data.tar.gz: 6c0228d16e387e2a7919786cb57636e5c3183f0a1a1e119684fb0e01122a5ac23ffc08849f9df55bf413495024de30c4bf2e420172e13a9197d48b30636f845a

data/.rubocop_todo.yml CHANGED Viewed

@@ -1,64 +1,24 @@
 # This configuration was generated by
 # `rubocop --auto-gen-config`
-# on 2026-02-17 14:18:53 UTC using RuboCop version 1.81.7.
+# on 2026-03-21 03:07:35 UTC using RuboCop version 1.85.1.
 # The point is for the user to remove these configuration records
 # one by one as the offenses are removed from the code base.
 # Note that changes in the inspected code, or installation of new
 # versions of RuboCop, may require this file to be generated again.
 # Offense count: 1
-# Configuration parameters: Severity.
 Gemspec/RequiredRubyVersion:
   Exclude:
     - 'canon.gemspec'
-# Offense count: 1
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: EnforcedStyle, IndentationWidth.
-# SupportedStyles: with_first_argument, with_fixed_indentation
-Layout/ArgumentAlignment:
-  Exclude:
-    - 'lib/canon/xml/element_matcher.rb'
-# Offense count: 23
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: EnforcedStyleAlignWith.
-# SupportedStylesAlignWith: either, start_of_block, start_of_line
-Layout/BlockAlignment:
-  Exclude:
-    - 'spec/canon/fixtures/isodoc_spec.rb'
-    - 'spec/canon/table_class_attribute_bug_spec.rb'
-# Offense count: 23
+# Offense count: 773
 # This cop supports safe autocorrection (--autocorrect).
-Layout/BlockEndNewline:
-  Exclude:
-    - 'spec/canon/fixtures/isodoc_spec.rb'
-    - 'spec/canon/table_class_attribute_bug_spec.rb'
-# Offense count: 46
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: Width, AllowedPatterns.
-Layout/IndentationWidth:
-  Exclude:
-    - 'spec/canon/fixtures/isodoc_spec.rb'
-    - 'spec/canon/table_class_attribute_bug_spec.rb'
-# Offense count: 780
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
+# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
 # URISchemes: http, https
 Layout/LineLength:
   Enabled: false
-# Offense count: 1
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: AllowInHeredoc.
-Layout/TrailingWhitespace:
-  Exclude:
-    - 'lib/canon/xml/element_matcher.rb'
-# Offense count: 48
+# Offense count: 49
 # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
 Lint/DuplicateBranch:
   Enabled: false
@@ -87,45 +47,44 @@ Lint/UnreachableCode:
   Exclude:
     - 'lib/canon/diff_formatter/debug_output.rb'
-# Offense count: 7
+# Offense count: 6
 # This cop supports safe autocorrection (--autocorrect).
 # Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
 # NotImplementedExceptions: NotImplementedError
 Lint/UnusedMethodArgument:
   Exclude:
-    - 'lib/canon/comparison.rb'
     - 'lib/canon/diff/path_builder.rb'
     - 'lib/canon/diff_formatter/by_line/base_formatter.rb'
     - 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
     - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
-# Offense count: 215
+# Offense count: 229
 # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
 Metrics/AbcSize:
   Enabled: false
-# Offense count: 21
+# Offense count: 22
 # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
 # AllowedMethods: refine
 Metrics/BlockLength:
   Max: 84
-# Offense count: 183
+# Offense count: 187
 # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
 Metrics/CyclomaticComplexity:
   Enabled: false
-# Offense count: 369
+# Offense count: 394
 # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
 Metrics/MethodLength:
-  Max: 115
+  Max: 95
-# Offense count: 44
+# Offense count: 45
 # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
 Metrics/ParameterLists:
   Max: 9
-# Offense count: 149
+# Offense count: 154
 # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
 Metrics/PerceivedComplexity:
   Enabled: false
@@ -139,16 +98,6 @@ Naming/MethodParameterName:
     - 'lib/canon/comparison/xml_comparator/attribute_comparator.rb'
     - 'lib/canon/xml/namespace_handler.rb'
-# Offense count: 1
-# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
-# NamePrefix: is_, has_, have_, does_
-# ForbiddenPrefixes: is_, has_, have_, does_
-# AllowedMethods: is_a?
-# MethodDefinitionMacros: define_method, define_singleton_method
-Naming/PredicatePrefix:
-  Exclude:
-    - 'lib/canon/comparison/html_comparator.rb'
 # Offense count: 6
 # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
 # SupportedStyles: snake_case, normalcase, non_integer
@@ -159,13 +108,12 @@ Naming/VariableNumber:
     - 'lib/canon/comparison/markup_comparator.rb'
     - 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
-# Offense count: 13
+# Offense count: 2
 # Configuration parameters: MinSize.
 Performance/CollectionLiteralInLoop:
   Exclude:
     - 'lib/canon/comparison/html_comparator.rb'
     - 'lib/canon/xml/xml_base_handler.rb'
-    - 'spec/canon/table_class_attribute_bug_spec.rb'
 # Offense count: 68
 # Configuration parameters: Prefixes, AllowedPatterns.
@@ -184,10 +132,10 @@ RSpec/DescribeMethod:
     - 'spec/canon/comparison/multiple_differences_spec.rb'
     - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
-# Offense count: 696
+# Offense count: 695
 # Configuration parameters: CountAsOne.
 RSpec/ExampleLength:
-  Max: 67
+  Max: 43
 # Offense count: 8
 # This cop supports safe autocorrection (--autocorrect).
@@ -240,7 +188,7 @@ RSpec/MultipleDescribes:
 RSpec/MultipleExpectations:
   Max: 15
-# Offense count: 70
+# Offense count: 71
 # Configuration parameters: AllowSubject.
 RSpec/MultipleMemoizedHelpers:
   Max: 13
@@ -259,13 +207,12 @@ RSpec/NamedSubject:
 RSpec/NestedGroups:
   Max: 4
-# Offense count: 11
+# Offense count: 10
 # Configuration parameters: AllowedPatterns.
 # AllowedPatterns: ^expect_, ^assert_
 RSpec/NoExpectationExample:
   Exclude:
     - 'spec/canon/context_grouping_spec.rb'
-    - 'spec/canon/fixtures/isodoc_spec.rb'
     - 'spec/canon/informative_diffs_debug_spec.rb'
     - 'spec/canon/isodoc_blockquotes_spec.rb'
     - 'spec/canon/match_scenarios_spec.rb'
@@ -283,28 +230,17 @@ RSpec/SpecFilePathFormat:
     - 'spec/canon/yaml/formatter_spec.rb'
     - 'spec/xml_c14n_spec.rb'
-# Offense count: 120
+# Offense count: 126
 # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
 RSpec/VerifiedDoubles:
   Exclude:
+    - 'spec/canon/comparison/diff_node_builder_spec.rb'
     - 'spec/canon/comparison/whitespace_sensitivity_spec.rb'
     - 'spec/canon/diff/diff_classifier_spec.rb'
     - 'spec/canon/diff/path_builder_spec.rb'
     - 'spec/canon/diff/xml_serialization_formatter_spec.rb'
     - 'spec/canon/tree_diff/operation_converter_spec.rb'
-# Offense count: 44
-# This cop supports safe autocorrection (--autocorrect).
-# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
-# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
-# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
-# FunctionalMethods: let, let!, subject, watch
-# AllowedMethods: lambda, proc, it
-Style/BlockDelimiters:
-  Exclude:
-    - 'spec/canon/fixtures/isodoc_spec.rb'
-    - 'spec/canon/table_class_attribute_bug_spec.rb'
 # Offense count: 1
 # This cop supports safe autocorrection (--autocorrect).
 # Configuration parameters: EnforcedStyle, AllowComments.

data/README.adoc CHANGED Viewed

@@ -901,6 +901,63 @@ After checking out the repo, run `bin/setup` to install dependencies. Then run
 `rake spec` to run the tests. You can also run `bin/console` for an interactive
 prompt.
+== Performance
+Canon includes a comprehensive performance benchmarking system to prevent regressions in XML/HTML parsing and comparison algorithms.
+=== Running Benchmarks
+[source,bash]
+----
+# Run all benchmarks (10s per test, takes ~5 minutes)
+bundle exec rake performance:run
+# Quick benchmark (2s per test, takes ~30 seconds)
+bundle exec rake performance:quick
+# Compare against main branch (for PRs)
+bundle exec rake performance:compare
+# Run specific category
+bundle exec rake performance:category[xml_parsing]
+# Output formats
+bundle exec rake performance:json
+bundle exec rake performance:yaml
+----
+=== Benchmark Categories
+* **XML Parsing**: DOM vs SAX, simple vs large documents
+* **HTML Parsing**: Simple vs complex (with scripts/tables)
+* **XML/HTML Comparison**: Identical, similar, different documents
+* **Format Canonicalization**: XML C14N, JSON, YAML
+=== SAX Parser
+Canon includes a SAX-based XML parser (`Canon::Xml::SaxBuilder`) that provides
+significantly faster XML parsing by avoiding intermediate Nokogiri DOM trees.
+[source,ruby]
+----
+require 'canon/xml/sax_builder'
+# Parse XML directly to Canon::Xml::Node tree
+root = Canon::Xml::SaxBuilder.parse(xml_string)
+# For C14N (strips DOCTYPE to avoid DTD default attribute expansion)
+root = Canon::Xml::SaxBuilder.parse(xml_string, strip_doctype: true)
+----
+Performance improvement: ~6x faster than DOM parsing + conversion for large documents.
+=== CI Integration
+Performance benchmarks run automatically on:
+* **Pull requests**: Compares against `main` branch and fails if regression exceeds 10%
+* **Main branch pushes**: Logs baseline metrics for performance tracking
 == Contributing
 Bug reports and pull requests are welcome on GitHub at

data/Rakefile CHANGED Viewed

@@ -9,4 +9,6 @@ require "rubocop/rake_task"
 RuboCop::RakeTask.new
+Dir.glob("lib/tasks/**/*.rake").each { |r| load r }
 task default: %i[spec rubocop]

data/docs/features/index.adoc CHANGED Viewed

@@ -92,6 +92,16 @@ Error handling and validation.
 * Format detection
 * Error messages
+=== Performance
+link:performance/[**Performance**]::
+Performance benchmarking and optimization.
++
+* SAX-based XML parser
+* Benchmark categories
+* CI integration
+* Regression detection
 == Quick Configuration Examples
 === Test-Friendly Comparison

data/docs/features/performance.adoc ADDED Viewed

@@ -0,0 +1,161 @@
+---
+layout: default
+title: Performance
+nav_order: 100
+---
+= Performance
+Canon includes a comprehensive performance benchmarking system to prevent regressions in XML/HTML parsing and comparison algorithms.
+== Running Benchmarks
+[source,bash]
+----
+# Run all benchmarks (10s per test, ~5 minutes total)
+bundle exec rake performance:run
+# Quick benchmark (2s per test, ~30 seconds)
+bundle exec rake performance:quick
+# Compare against main branch (for PRs, fails on regression)
+bundle exec rake performance:compare
+# Run specific category
+bundle exec rake performance:category[xml_parsing]
+# Output in different formats
+bundle exec rake performance:json
+bundle exec rake performance:yaml
+----
+== Benchmark Categories
+=== XML Parsing
+* **DOM (simple)**: Standard Nokogiri DOM parsing
+* **SAX (simple)**: SAX-based streaming parser
+* **DOM (large)**: Large document DOM parsing
+* **SAX (large)**: Large document SAX parsing
+SAX parser is typically ~20-50% faster than DOM for XML parsing.
+=== HTML Parsing
+* **Simple HTML**: Basic HTML document parsing
+* **Complex HTML**: HTML with scripts, styles, and tables
+=== XML/HTML Comparison
+* **Identical**: Comparing the same document to itself
+* **Similar**: Comparing documents with minor differences
+* **Different**: Comparing documents with different namespaces/structure
+=== Format Canonicalization
+* **XML C14N**: W3C Canonical XML
+* **JSON**: JSON formatting
+* **YAML**: YAML formatting
+== SAX Parser
+Canon includes a SAX-based XML parser (`Canon::Xml::SaxBuilder`) that provides
+significantly faster XML parsing by avoiding intermediate Nokogiri DOM trees.
+=== How It Works
+Traditional parsing:
+[source]
+----
+XML String → Nokogiri DOM (~60ms) → Canon::Xml::Node (~1200ms) = ~1260ms
+----
+SAX parsing:
+[source]
+----
+XML String → Nokogiri SAX → Canon::Xml::Node (~200ms) = ~200ms
+----
+=== Usage
+[source,ruby]
+----
+require 'canon/xml/sax_builder'
+# Parse XML directly to Canon::Xml::Node tree
+root = Canon::Xml::SaxBuilder.parse(xml_string)
+# With options
+root = Canon::Xml::SaxBuilder.parse(xml_string,
+  preserve_whitespace: true,
+  strip_doctype: true  # For C14N compatibility
+)
+----
+=== Options
+`preserve_whitespace`:: Keep whitespace-only text nodes (default: false)
+`strip_doctype`:: Remove DOCTYPE declaration (for C14N, avoids DTD default attribute expansion)
+== CI Integration
+Performance benchmarks run automatically on:
+=== Pull Requests
+When a PR is opened, the performance workflow compares benchmarks against the `main` branch.
+If any benchmark regresses by more than 10%, the check fails with a clear error message.
+[source,bash]
+----
+# CI output shows comparison
+Comparing against: Previous branch (main).
+Threshold: 10% regression allowed
+XML Parsing: SAX (large)
+  base: 1042.30 IPS
+  curr: 1285.20 IPS
+  change: +23.31%
+  ✅ OK
+----
+=== Main Branch
+On pushes to `main`, benchmarks run to log baseline metrics for performance tracking.
+== Threshold Configuration
+The default regression threshold is 10%. Configure via rake task:
+[source,bash]
+----
+# Custom threshold (e.g., 5%)
+RUBYOPT="-rbenchmark-runner" bundle exec rake performance:compare
+----
+Or modify `lib/tasks/performance_comparator.rb`:
+[source,ruby]
+----
+DEFAULT_THRESHOLD = 0.10 # 10%
+----
+== Adding New Benchmarks
+Add test methods to `lib/tasks/benchmark_runner.rb`:
+[source,ruby]
+----
+# In BENCHMARKS hash
+BENCHMARKS = {
+  xml_parsing: [
+    # ... existing tests ...
+    { name: "New Test", method: :my_new_test, desc: "Description" },
+  ],
+}.freeze
+# Add test method
+def my_new_test
+  xml = DataGenerator.generate_xml(items: @items)
+  measure { Canon::Xml::SaxBuilder.parse(xml) }
+end
+----

data/lib/canon/comparison/html_comparator.rb CHANGED Viewed

@@ -471,8 +471,9 @@ module Canon
               child.children.each do |text_child|
                 next unless text_child.is_a?(Canon::Xml::Nodes::TextNode)
-                # Remove HTML comments from text content
-                normalized = text_child.value.gsub(/<!--.*?-->/m, "").strip
+                # Remove HTML comments from text content without using regex
+                # to avoid ReDoS/incomplete sanitization vulnerabilities
+                normalized = remove_html_comments(text_child.value)
                 # Update the text value
                 text_child.instance_variable_set(:@value, normalized)
               end
@@ -562,13 +563,9 @@ module Canon
         # Also removes whitespace-only CDATA children that Nokogiri creates
         def normalize_html_style_script_comments(doc)
           doc.css("style, script").each do |element|
-            # Remove HTML comments from style/script content
-            # SAFE: This regex operates on already-parsed DOM element content,
-            # not on raw user input. The non-greedy .*? correctly matches
-            # comment boundaries. Any remaining <!-- would be literal text
-            # (not a comment), which is safe in this context.
-            # CodeQL false positive: see https://github.com/github/codeql/issues/XXXX
-            normalized = element.content.gsub(/<!--.*?-->/m, "").strip
+            # Remove HTML comments from style/script content without using regex
+            # to avoid ReDoS/incomplete sanitization vulnerabilities
+            normalized = remove_html_comments(element.content)
             if normalized.empty?
               # Remove all children (including whitespace-only CDATA nodes)
@@ -579,6 +576,43 @@ module Canon
           end
         end
+        # Remove HTML comments from a string without using regex
+        # This avoids ReDoS and incomplete sanitization vulnerabilities
+        #
+        # @param text [String] Text potentially containing HTML comments
+        # @return [String] Text with HTML comments removed
+        def remove_html_comments(text)
+          return "" if text.nil?
+          result = +""
+          pos = 0
+          while pos < text.length
+            # Look for comment start
+            comment_start = text.index("<!--", pos)
+            if comment_start.nil?
+              # No more comments, append rest of text
+              result << text[pos..]
+              break
+            end
+            # Append text before comment
+            result << text[pos...comment_start]
+            # Look for comment end
+            comment_end = text.index("-->", comment_start + 4)
+            if comment_end.nil?
+              # Unclosed comment, skip the rest
+              break
+            end
+            # Move past the comment
+            pos = comment_end + 3
+          end
+          result.strip
+        end
         # Normalize whitespace in text nodes according to HTML rendering rules
         # In HTML rendering, sequences of whitespace (spaces, tabs, newlines)
         # collapse to a single space, except in elements where whitespace is
@@ -621,8 +655,8 @@ compare_profile = nil)
             next if ancestor_preserves_whitespace?(parent, preserve_whitespace)
             # Collapse whitespace sequences (spaces, tabs, newlines) to single
-            # space
-            normalized = text_node.content.gsub(/\s+/, " ")
+            # space - use tr/squeeze to avoid ReDoS vulnerability from gsub(/\s+/)
+            normalized = text_node.content.tr("\t\n\r\f\v", " ").squeeze(" ")
             # Trim leading/trailing whitespace if appropriate
             normalized = normalized.strip if should_trim_text_node?(text_node)

data/lib/canon/comparison/xml_comparator/diff_node_builder.rb CHANGED Viewed

@@ -77,6 +77,13 @@ module Canon
           return build_text_difference_reason(text1, text2)
         end
+        # For attribute order differences, show the actual attribute names
+        if dimension == :attribute_order
+          attrs1 = extract_attributes(node1)&.keys || []
+          attrs2 = extract_attributes(node2)&.keys || []
+          return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
+        end
         # Default reason
         "#{diff1} vs #{diff2}"
       end

data/lib/canon/comparison/xml_comparator.rb CHANGED Viewed

@@ -615,9 +615,47 @@ differences)
             return build_text_diff_reason(text1, text2)
           end
+          # For attribute values differences, show the actual values
+          if dimension == :attribute_values
+            attrs1 = extract_attributes(node1)
+            attrs2 = extract_attributes(node2)
+            return build_attribute_value_diff_reason(attrs1, attrs2)
+          end
+          # For attribute order differences, show the actual attribute names
+          if dimension == :attribute_order
+            attrs1 = extract_attributes(node1)&.keys || []
+            attrs2 = extract_attributes(node2)&.keys || []
+            return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
+          end
           "#{diff1} vs #{diff2}"
         end
+        # Build a clear reason message for attribute value differences
+        #
+        # @param attrs1 [Hash, nil] First node's attributes
+        # @param attrs2 [Hash, nil] Second node's attributes
+        # @return [String] Clear explanation of the attribute value difference
+        def build_attribute_value_diff_reason(attrs1, attrs2)
+          return "missing vs present attributes" unless attrs1 && attrs2
+          require "set"
+          keys1 = attrs1.keys.to_set
+          keys2 = attrs2.keys.to_set
+          common = keys1 & keys2
+          different_values = common.reject { |k| attrs1[k] == attrs2[k] }
+          return "all attribute values match" if different_values.empty?
+          parts = different_values.map do |k|
+            "#{k}: #{attrs1[k].inspect} vs #{attrs2[k].inspect}"
+          end
+          parts.join("; ")
+        end
         # Build a clear reason message for attribute presence differences
         #
         # @param attrs1 [Hash, nil] First node's attributes

data/lib/canon/diff/formatting_detector.rb CHANGED Viewed

@@ -31,12 +31,18 @@ module Canon
         return "" if line.nil?
         # Collapse all whitespace (spaces, tabs, newlines) to single space
-        normalized = line.gsub(/\s+/, " ").strip
+        # Avoid regex to prevent ReDoS vulnerability - use String methods
+        normalized = line.strip.tr("\t\n\r\f\v", " ").squeeze(" ")
         # Normalize whitespace around tag delimiters
-        # Remove spaces before > and after <
-        normalized = normalized.gsub(/\s+>/, ">") # "div >" -> "div>"
-        normalized.gsub(/<\s+/, "<") # "< div" -> "<div"
+        # Remove spaces before > and after < (avoid regex for ReDoS safety)
+        while normalized.include?(" >")
+          normalized = normalized.gsub(" >", ">")
+        end
+        while normalized.include?("< ")
+          normalized = normalized.gsub("< ", "<")
+        end
+        normalized
       end
       # Check if a line is blank (nil or whitespace-only)

data/lib/canon/diff_formatter/diff_detail_formatter.rb CHANGED Viewed

@@ -113,7 +113,8 @@ module Canon
           # show reason if available
           if diff.respond_to?(:reason) && diff.reason
             output << "#{colorize('Reason:', :cyan, use_color,
-                                  bold: true)}  #{colorize(diff.reason, :yellow, use_color)}"
+                                  bold: true)}  #{colorize(diff.reason,
+                                                           :yellow, use_color)}"
           end
           output << ""

data/lib/canon/tree_diff/core/tree_node.rb CHANGED Viewed

@@ -307,9 +307,12 @@ module Canon
           # Add value
           result << "value:#{value}" if value
-          # Add attributes
-          attributes.each do |key, val|
-            result << "attr:#{key}=#{val}"
+          # Add attributes (key only, not values)
+          # This ensures nodes differing only in attribute VALUES still get matched
+          # and are then reported as attribute_updates rather than structural differences
+          # NOTE: The value differences are detected separately in detect_changes
+          attributes.each_key do |key|
+            result << "attr:#{key}"
           end
           # Add child labels