canon 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +69 -92
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/Gemfile +1 -0
  6. data/docs/_config.yml +90 -1
  7. data/docs/advanced/diff-classification.adoc +82 -2
  8. data/docs/advanced/extending-canon.adoc +193 -0
  9. data/docs/features/match-options/index.adoc +239 -1
  10. data/docs/internals/diffnode-enrichment.adoc +611 -0
  11. data/docs/internals/index.adoc +251 -0
  12. data/docs/lychee.toml +13 -6
  13. data/docs/understanding/architecture.adoc +749 -33
  14. data/docs/understanding/comparison-pipeline.adoc +122 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +87 -0
  27. data/lib/canon/comparison/html_comparator.rb +70 -26
  28. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  29. data/lib/canon/comparison/html_parser.rb +80 -0
  30. data/lib/canon/comparison/json_comparator.rb +12 -0
  31. data/lib/canon/comparison/json_parser.rb +19 -0
  32. data/lib/canon/comparison/markup_comparator.rb +293 -0
  33. data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
  34. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  35. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  36. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  37. data/lib/canon/comparison/match_options.rb +68 -463
  38. data/lib/canon/comparison/profile_definition.rb +149 -0
  39. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  40. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  41. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  42. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  43. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  44. data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
  45. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  46. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  47. data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
  48. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
  49. data/lib/canon/comparison/xml_comparator.rb +97 -684
  50. data/lib/canon/comparison/xml_node_comparison.rb +319 -0
  51. data/lib/canon/comparison/xml_parser.rb +19 -0
  52. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  53. data/lib/canon/comparison.rb +265 -110
  54. data/lib/canon/diff/diff_classifier.rb +101 -2
  55. data/lib/canon/diff/diff_node.rb +32 -2
  56. data/lib/canon/diff/formatting_detector.rb +1 -1
  57. data/lib/canon/diff/node_serializer.rb +191 -0
  58. data/lib/canon/diff/path_builder.rb +143 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  61. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  62. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  64. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  65. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  66. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  67. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  68. data/lib/canon/diff_formatter.rb +1 -1
  69. data/lib/canon/rspec_matchers.rb +38 -9
  70. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  71. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  72. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  73. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  74. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +24 -13
  77. metadata +48 -2
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "nokogiri"
4
4
  require_relative "../comparison" # Load base module with constants first
5
+ require_relative "markup_comparator"
5
6
  require_relative "xml_comparator"
6
7
  require_relative "match_options"
7
8
  require_relative "comparison_result"
@@ -11,12 +12,17 @@ require_relative "../diff/diff_node"
11
12
  require_relative "../diff/diff_classifier"
12
13
  require_relative "strategies/match_strategy_factory"
13
14
  require_relative "../html/data_model"
15
+ require_relative "xml_node_comparison"
16
+ # Whitespace sensitivity module (single source of truth for sensitive elements)
17
+ require_relative "whitespace_sensitivity"
14
18
 
15
19
  module Canon
16
20
  module Comparison
17
21
  # HTML comparison class
18
22
  # Handles comparison of HTML nodes with various options
19
- class HtmlComparator
23
+ #
24
+ # Inherits shared comparison functionality from MarkupComparator.
25
+ class HtmlComparator < MarkupComparator
20
26
  # Default comparison options for HTML
21
27
  DEFAULT_OPTS = {
22
28
  # Structural filtering options
@@ -108,6 +114,9 @@ module Canon
108
114
 
109
115
  # DocumentFragment nodes need special handling - compare their children
110
116
  # instead of the fragment nodes themselves
117
+ # This is a SAFETY CHECK for legacy cases where Nokogiri nodes might still be used
118
+ # The main path (parse_node) now returns Canon::Xml::Nodes::RootNode, so this
119
+ # check should rarely trigger, but we keep it for robustness
111
120
  if (node1.is_a?(Nokogiri::HTML4::DocumentFragment) ||
112
121
  node1.is_a?(Nokogiri::XML::DocumentFragment)) &&
113
122
  (node2.is_a?(Nokogiri::HTML4::DocumentFragment) ||
@@ -117,10 +126,8 @@ module Canon
117
126
  all_children2 = node2.children.to_a
118
127
 
119
128
  # Filter children based on match options (e.g., ignore comments)
120
- children1 = XmlComparator.send(:filter_children, all_children1,
121
- opts)
122
- children2 = XmlComparator.send(:filter_children, all_children2,
123
- opts)
129
+ children1 = XmlNodeComparison.filter_children(all_children1, opts)
130
+ children2 = XmlNodeComparison.filter_children(all_children2, opts)
124
131
 
125
132
  if children1.length != children2.length
126
133
  result = Comparison::UNEQUAL_ELEMENTS
@@ -130,9 +137,10 @@ module Canon
130
137
  # Compare each pair of children
131
138
  result = Comparison::EQUIVALENT
132
139
  children1.zip(children2).each do |child1, child2|
133
- child_result = XmlComparator.send(:compare_nodes, child1, child2,
134
- opts, child_opts, diff_children,
135
- differences)
140
+ child_result = XmlNodeComparison.compare_nodes(child1, child2,
141
+ opts, child_opts,
142
+ diff_children,
143
+ differences)
136
144
  if child_result != Comparison::EQUIVALENT
137
145
  result = child_result
138
146
  break
@@ -140,8 +148,9 @@ module Canon
140
148
  end
141
149
  end
142
150
  else
143
- result = XmlComparator.send(:compare_nodes, node1, node2, opts,
144
- child_opts, diff_children, differences)
151
+ result = XmlNodeComparison.compare_nodes(node1, node2, opts,
152
+ child_opts, diff_children,
153
+ differences)
145
154
  end
146
155
 
147
156
  # Classify DiffNodes as normative/informative if we have verbose output
@@ -287,7 +296,16 @@ module Canon
287
296
  end
288
297
 
289
298
  # Strip DOCTYPE for consistent parsing
290
- html_string = html_string.gsub(/<!DOCTYPE[^>]*>/i, "").strip
299
+ # Use non-regex approach to avoid ReDoS vulnerability
300
+ # DOCTYPE declarations end with first > character
301
+ doctype_start = html_string =~ /<!DOCTYPE/i
302
+ if doctype_start
303
+ doctype_end = html_string.index(">", doctype_start)
304
+ html_string = html_string[0...doctype_start] + html_string[(doctype_end + 1)..] if doctype_end
305
+ html_string.strip!
306
+ else
307
+ html_string = html_string.strip
308
+ end
291
309
 
292
310
  # Apply preprocessing to HTML string before parsing
293
311
  processed_html = case preprocessing
@@ -313,8 +331,15 @@ module Canon
313
331
 
314
332
  # Parse a node from string or return as-is
315
333
  # Applies preprocessing transformation before parsing if specified
316
- # For DOM comparison, returns Nokogiri nodes (not Canon::Xml::Node)
334
+ # Returns Nokogiri nodes for DOM comparison (preserves original behavior)
317
335
  def parse_node(node, preprocessing = :none, match_opts = {})
336
+ # If already a Canon::Xml::Node, convert to Nokogiri for DOM path
337
+ if node.is_a?(Canon::Xml::Node)
338
+ # Canon nodes used in semantic diff path, convert to Nokogiri for DOM path
339
+ xml_str = Canon::Xml::DataModel.serialize(node)
340
+ node = xml_str
341
+ end
342
+
318
343
  # If already a Nokogiri node, check for incompatible XML documents
319
344
  unless node.is_a?(String)
320
345
  # Detect if this is an XML document (not HTML)
@@ -357,7 +382,15 @@ module Canon
357
382
 
358
383
  # Strip DOCTYPE declarations from HTML strings
359
384
  # This normalizes parsed HTML (which includes DOCTYPE) with raw HTML strings
360
- node = node.gsub(/<!DOCTYPE[^>]*>/i, "").strip
385
+ # Use non-regex approach to avoid ReDoS vulnerability
386
+ doctype_start = node =~ /<!DOCTYPE/i
387
+ if doctype_start
388
+ doctype_end = node.index(">", doctype_start)
389
+ node = node[0...doctype_start] + node[(doctype_end + 1)..] if doctype_end
390
+ node.strip!
391
+ else
392
+ node = node.strip
393
+ end
361
394
 
362
395
  # Apply preprocessing to HTML string before parsing
363
396
  html_string = case preprocessing
@@ -380,10 +413,12 @@ module Canon
380
413
  # Use XML fragment parser to avoid auto-inserted meta tags
381
414
  frag = Nokogiri::XML.fragment(html_string)
382
415
 
383
- # Apply :rendered preprocessing if needed
384
- if preprocessing == :rendered
416
+ # Apply post-parsing filtering for :normalize, :format, and :rendered preprocessing
417
+ if %i[normalize format rendered].include?(preprocessing)
385
418
  normalize_html_style_script_comments(frag)
386
- normalize_rendered_whitespace(frag, match_opts)
419
+ if preprocessing == :rendered
420
+ normalize_rendered_whitespace(frag, match_opts)
421
+ end
387
422
  remove_whitespace_only_text_nodes(frag)
388
423
  end
389
424
 
@@ -461,9 +496,9 @@ module Canon
461
496
  # @param node [Canon::Xml::Node, Nokogiri::HTML::Document] Parsed node
462
497
  # @return [String] Serialized HTML string
463
498
  def serialize_for_display(node)
464
- # Use XmlComparator's serializer for Canon::Xml::Node
499
+ # Use XmlNodeComparison's serializer for Canon::Xml::Node
465
500
  if node.is_a?(Canon::Xml::Node)
466
- XmlComparator.send(:serialize_node_to_xml, node)
501
+ XmlNodeComparison.serialize_node_to_xml(node)
467
502
  elsif node.respond_to?(:to_html)
468
503
  node.to_html
469
504
  elsif node.respond_to?(:to_xml)
@@ -509,16 +544,22 @@ compare_profile = nil)
509
544
  return if match_opts[:text_content] == :strict
510
545
 
511
546
  # Elements where whitespace is significant - don't normalize
512
- # Use profile if available, otherwise use default list
547
+ # SINGLE SOURCE OF TRUTH: WhitespaceSensitivity.format_default_sensitive_elements
548
+ # This ensures consistency between preprocessing and comparison logic
549
+ # SINGLE SOURCE OF TRUTH: WhitespaceSensitivity.format_default_sensitive_elements
550
+ # This ensures consistency between preprocessing and comparison logic
513
551
  preserve_whitespace = if compare_profile.is_a?(HtmlCompareProfile)
514
552
  # Profile handles HTML-specific whitespace rules
515
- %w[pre code textarea script
516
- style].select do |elem|
517
- compare_profile.preserve_whitespace?(elem)
518
- end
553
+ # Get default list and filter by profile
554
+ WhitespaceSensitivity
555
+ .format_default_sensitive_elements(match_opts)
556
+ .select do |elem|
557
+ compare_profile.preserve_whitespace?(elem.to_s)
558
+ end
559
+ .map(&:to_s)
519
560
  else
520
- # Fallback to default list
521
- %w[pre code textarea script style]
561
+ # Use default list from WhitespaceSensitivity (single source of truth)
562
+ WhitespaceSensitivity.format_default_sensitive_elements(match_opts).map(&:to_s)
522
563
  end
523
564
 
524
565
  # Walk all text nodes
@@ -574,9 +615,12 @@ compare_profile = nil)
574
615
  #
575
616
  # CRITICAL: Do NOT remove whitespace-only text nodes from whitespace-sensitive
576
617
  # elements like <pre>, <code>, <textarea>, <script>, <style>
618
+ #
619
+ # SINGLE SOURCE OF TRUTH: Uses WhitespaceSensitivity.format_default_sensitive_elements
577
620
  def remove_whitespace_only_text_nodes(doc)
578
621
  # Elements where whitespace is significant - don't remove whitespace-only nodes
579
- preserve_whitespace = %w[pre code textarea script style]
622
+ # SINGLE SOURCE OF TRUTH: WhitespaceSensitivity.format_default_sensitive_elements
623
+ preserve_whitespace = WhitespaceSensitivity.format_default_sensitive_elements(format: :html).map(&:to_s)
580
624
 
581
625
  doc.xpath(".//text()").each do |text_node|
582
626
  # CRITICAL: Skip if this text node is inside a whitespace-preserving element
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "compare_profile"
4
+ # Whitespace sensitivity module (single source of truth for sensitive elements)
5
+ require_relative "whitespace_sensitivity"
4
6
 
5
7
  module Canon
6
8
  module Comparison
@@ -82,9 +84,13 @@ module Canon
82
84
  private
83
85
 
84
86
  # Elements where whitespace is semantically significant in HTML
85
- # @return [Array<String>] List of element names
87
+ #
88
+ # SINGLE SOURCE OF TRUTH: Delegates to WhitespaceSensitivity.format_default_sensitive_elements
89
+ # This ensures consistency across the codebase.
90
+ #
91
+ # @return [Array<String>] List of element names (as strings)
86
92
  def whitespace_sensitive_elements
87
- %w[pre code textarea script style]
93
+ WhitespaceSensitivity.format_default_sensitive_elements(format: @html_version).map(&:to_s)
88
94
  end
89
95
 
90
96
  # Check if a dimension is explicitly set to :strict
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module Canon
6
+ module Comparison
7
+ # HTML parsing service with version detection and fragment support
8
+ #
9
+ # Provides HTML parsing capabilities with automatic HTML4/HTML5 version
10
+ # detection. Handles both full documents and fragments.
11
+ #
12
+ # @example Parse HTML string
13
+ # HtmlParser.parse("<div>content</div>", :html5)
14
+ #
15
+ # @example Auto-detect and parse
16
+ # HtmlParser.detect_and_parse("<!DOCTYPE html><html>...</html>")
17
+ class HtmlParser
18
+ class << self
19
+ # Parse HTML string into Nokogiri document with the correct parser
20
+ #
21
+ # @param content [String, Object] Content to parse (returns as-is if not a string)
22
+ # @param format [Symbol] HTML format (:html, :html4, :html5)
23
+ # @return [Nokogiri::HTML::Document, Nokogiri::HTML5::Document, Nokogiri::HTML::DocumentFragment, Object]
24
+ def parse(content, format)
25
+ return content unless content.is_a?(String)
26
+ return content if already_parsed?(content)
27
+
28
+ begin
29
+ case format
30
+ when :html5
31
+ Nokogiri::HTML5.fragment(content)
32
+ when :html4
33
+ Nokogiri::HTML4.fragment(content)
34
+ when :html
35
+ detect_and_parse(content)
36
+ else
37
+ content
38
+ end
39
+ rescue StandardError
40
+ # Fallback to raw string if parsing fails (maintains backward compatibility)
41
+ content
42
+ end
43
+ end
44
+
45
+ # Check if content is already a parsed HTML document/fragment
46
+ #
47
+ # @param content [Object] Content to check
48
+ # @return [Boolean] true if already parsed
49
+ def already_parsed?(content)
50
+ content.is_a?(Nokogiri::HTML::Document) ||
51
+ content.is_a?(Nokogiri::HTML5::Document) ||
52
+ content.is_a?(Nokogiri::HTML::DocumentFragment) ||
53
+ content.is_a?(Nokogiri::HTML5::DocumentFragment)
54
+ end
55
+
56
+ # Detect HTML version from content and parse with appropriate parser
57
+ #
58
+ # @param content [String] HTML content to parse
59
+ # @return [Nokogiri::HTML::DocumentFragment] Parsed fragment
60
+ def detect_and_parse(content)
61
+ version = detect_version(content)
62
+ if version == :html5
63
+ Nokogiri::HTML5.fragment(content)
64
+ else
65
+ Nokogiri::HTML4.fragment(content)
66
+ end
67
+ end
68
+
69
+ # Detect HTML version from content string
70
+ #
71
+ # @param content [String] HTML content
72
+ # @return [Symbol] :html5 or :html4
73
+ def detect_version(content)
74
+ # Check for HTML5 DOCTYPE (case-insensitive)
75
+ content.include?("<!DOCTYPE html>") ? :html5 : :html4
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -125,6 +125,18 @@ module Canon
125
125
  if match_opts[:key_order] != :strict
126
126
  keys1 = keys1.sort_by(&:to_s)
127
127
  keys2 = keys2.sort_by(&:to_s)
128
+ elsif keys1 != keys2
129
+ # Strict mode: key order matters
130
+ # Check if keys are in same order
131
+ # Keys are different or in different order
132
+ # First check if it's just ordering (same keys, different order)
133
+ if keys1.sort_by(&:to_s) == keys2.sort_by(&:to_s)
134
+ # Same keys, different order - this is a key_order difference
135
+ key_path = path.empty? ? "(key order)" : "#{path}.(key order)"
136
+ add_ruby_difference(key_path, keys1, keys2,
137
+ Comparison::UNEQUAL_HASH_KEY_ORDER, opts, differences)
138
+ return Comparison::UNEQUAL_HASH_KEY_ORDER
139
+ end
128
140
  end
129
141
 
130
142
  # Check for missing keys
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Public API for JSON parsing operations
6
+ # Provides access to parsing functionality without using send()
7
+ class JsonParser
8
+ # Parse an object to Ruby object
9
+ #
10
+ # @param obj [String, Hash, Array] Object to parse
11
+ # @return [Hash, Array] Parsed Ruby object
12
+ def self.parse_json(obj)
13
+ # Delegate to JsonComparator's private method via public API
14
+ require_relative "json_comparator"
15
+ JsonComparator.parse_json(obj)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,293 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../comparison" # Load base module with constants
4
+ require_relative "../diff/diff_node"
5
+ require_relative "../diff/path_builder"
6
+
7
+ module Canon
8
+ module Comparison
9
+ # Base class for markup document comparison (XML, HTML)
10
+ #
11
+ # Provides shared comparison functionality for markup documents,
12
+ # including node type checking, text extraction, filtering,
13
+ # and difference creation.
14
+ #
15
+ # Format-specific comparators (XmlComparator, HtmlComparator)
16
+ # inherit from this class and add format-specific behavior.
17
+ class MarkupComparator
18
+ class << self
19
+ # Add a difference to the differences array
20
+ #
21
+ # Creates a DiffNode with enriched metadata including path,
22
+ # serialized content, and attributes for Stage 4 rendering.
23
+ #
24
+ # @param node1 [Object, nil] First node
25
+ # @param node2 [Object, nil] Second node
26
+ # @param diff1 [Symbol] Difference type for node1
27
+ # @param diff2 [Symbol] Difference type for node2
28
+ # @param dimension [Symbol] The match dimension causing this difference
29
+ # @param _opts [Hash] Options (unused but kept for interface compatibility)
30
+ # @param differences [Array] Array to append difference to
31
+ def add_difference(node1, node2, diff1, diff2, dimension, _opts,
32
+ differences)
33
+ # All differences must be DiffNode objects (OO architecture)
34
+ if dimension.nil?
35
+ raise ArgumentError,
36
+ "dimension required for DiffNode"
37
+ end
38
+
39
+ # Build informative reason message
40
+ reason = build_difference_reason(node1, node2, diff1, diff2,
41
+ dimension)
42
+
43
+ # Enrich with path, serialized content, and attributes for Stage 4 rendering
44
+ metadata = enrich_diff_metadata(node1, node2)
45
+
46
+ diff_node = Canon::Diff::DiffNode.new(
47
+ node1: node1,
48
+ node2: node2,
49
+ dimension: dimension,
50
+ reason: reason,
51
+ **metadata,
52
+ )
53
+ differences << diff_node
54
+ end
55
+
56
+ # Enrich DiffNode with canonical path, serialized content, and attributes
57
+ # This extracts presentation-ready metadata from nodes for Stage 4 rendering
58
+ #
59
+ # @param node1 [Object, nil] First node
60
+ # @param node2 [Object, nil] Second node
61
+ # @return [Hash] Enriched metadata hash
62
+ def enrich_diff_metadata(node1, node2)
63
+ {
64
+ path: build_path_for_node(node1 || node2),
65
+ serialized_before: serialize_node(node1),
66
+ serialized_after: serialize_node(node2),
67
+ attributes_before: extract_attributes(node1),
68
+ attributes_after: extract_attributes(node2),
69
+ }
70
+ end
71
+
72
+ # Build canonical path for a node
73
+ #
74
+ # @param node [Object] Node to build path for
75
+ # @return [String, nil] Canonical path with ordinal indices
76
+ def build_path_for_node(node)
77
+ return nil if node.nil?
78
+
79
+ Canon::Diff::PathBuilder.build(node, format: :document)
80
+ end
81
+
82
+ # Serialize a node to string for display
83
+ #
84
+ # @param node [Object, nil] Node to serialize
85
+ # @return [String, nil] Serialized content
86
+ def serialize_node(node)
87
+ return nil if node.nil?
88
+
89
+ # Canon::Xml::Node types
90
+ if node.is_a?(Canon::Xml::Nodes::RootNode)
91
+ # Serialize all children of root
92
+ node.children.map { |child| serialize_node(child) }.join
93
+ elsif node.is_a?(Canon::Xml::Nodes::ElementNode)
94
+ serialize_element_node(node)
95
+ elsif node.is_a?(Canon::Xml::Nodes::TextNode)
96
+ node.value
97
+ elsif node.is_a?(Canon::Xml::Nodes::CommentNode)
98
+ "<!--#{node.value}-->"
99
+ elsif node.is_a?(Canon::Xml::Nodes::ProcessingInstructionNode)
100
+ "<?#{node.target} #{node.data}?>"
101
+ elsif node.respond_to?(:to_xml)
102
+ node.to_xml
103
+ elsif node.respond_to?(:to_html)
104
+ node.to_html
105
+ else
106
+ node.to_s
107
+ end
108
+ end
109
+
110
+ # Extract attributes from a node
111
+ #
112
+ # @param node [Object, nil] Node to extract attributes from
113
+ # @return [Hash, nil] Hash of attribute name => value pairs
114
+ def extract_attributes(node)
115
+ return nil if node.nil?
116
+
117
+ # Canon::Xml::Node ElementNode
118
+ if node.is_a?(Canon::Xml::Nodes::ElementNode)
119
+ node.attribute_nodes.each_with_object({}) do |attr, hash|
120
+ hash[attr.name] = attr.value
121
+ end
122
+ # Nokogiri nodes
123
+ elsif node.respond_to?(:attributes)
124
+ node.attributes.each_with_object({}) do |(_, attr), hash|
125
+ hash[attr.name] = attr.value
126
+ end
127
+ else
128
+ {}
129
+ end
130
+ end
131
+
132
+ # Filter children based on options
133
+ #
134
+ # Removes nodes that should be excluded from comparison based on
135
+ # options like :ignore_nodes, :ignore_comments, etc.
136
+ #
137
+ # @param children [Array] Array of child nodes
138
+ # @param opts [Hash] Comparison options
139
+ # @return [Array] Filtered array of children
140
+ def filter_children(children, opts)
141
+ children.reject do |child|
142
+ node_excluded?(child, opts)
143
+ end
144
+ end
145
+
146
+ # Check if node should be excluded from comparison
147
+ #
148
+ # @param node [Object] Node to check
149
+ # @param opts [Hash] Comparison options
150
+ # @return [Boolean] true if node should be excluded
151
+ def node_excluded?(node, opts)
152
+ return false if node.nil?
153
+ return true if opts[:ignore_nodes]&.include?(node)
154
+ return true if opts[:ignore_comments] && comment_node?(node)
155
+ return true if opts[:ignore_text_nodes] && text_node?(node)
156
+
157
+ # Check structural_whitespace match option
158
+ match_opts = opts[:match_opts]
159
+ # Filter out whitespace-only text nodes
160
+ if match_opts && %i[ignore
161
+ normalize].include?(match_opts[:structural_whitespace]) && text_node?(node)
162
+ text = node_text(node)
163
+ return true if MatchOptions.normalize_text(text).empty?
164
+ end
165
+
166
+ false
167
+ end
168
+
169
+ # Check if two nodes are the same type
170
+ #
171
+ # @param node1 [Object] First node
172
+ # @param node2 [Object] Second node
173
+ # @return [Boolean] true if nodes are same type
174
+ def same_node_type?(node1, node2)
175
+ return false if node1.class != node2.class
176
+
177
+ # For Nokogiri/Canon::Xml nodes, check node type
178
+ if node1.respond_to?(:node_type) && node2.respond_to?(:node_type)
179
+ node1.node_type == node2.node_type
180
+ else
181
+ true
182
+ end
183
+ end
184
+
185
+ # Check if a node is a comment node
186
+ #
187
+ # @param node [Object] Node to check
188
+ # @return [Boolean] true if node is a comment
189
+ def comment_node?(node)
190
+ node.respond_to?(:comment?) && node.comment? ||
191
+ node.respond_to?(:node_type) && node.node_type == :comment
192
+ end
193
+
194
+ # Check if a node is a text node
195
+ #
196
+ # @param node [Object] Node to check
197
+ # @return [Boolean] true if node is a text node
198
+ def text_node?(node)
199
+ node.respond_to?(:text?) && node.text? &&
200
+ !node.respond_to?(:element?) ||
201
+ node.respond_to?(:node_type) && node.node_type == :text
202
+ end
203
+
204
+ # Get text content from a node
205
+ #
206
+ # @param node [Object] Node to get text from
207
+ # @return [String] Text content
208
+ def node_text(node)
209
+ # Canon::Xml::Node TextNode uses .value
210
+ if node.respond_to?(:value)
211
+ node.value.to_s
212
+ # Nokogiri nodes use .content
213
+ elsif node.respond_to?(:content)
214
+ node.content.to_s
215
+ else
216
+ node.to_s
217
+ end
218
+ end
219
+
220
+ # Check if difference between two texts is only whitespace
221
+ #
222
+ # @param text1 [String] First text
223
+ # @param text2 [String] Second text
224
+ # @return [Boolean] true if difference is only in whitespace
225
+ def whitespace_only_difference?(text1, text2)
226
+ # Normalize both texts (collapse/trim whitespace)
227
+ norm1 = MatchOptions.normalize_text(text1)
228
+ norm2 = MatchOptions.normalize_text(text2)
229
+
230
+ # If normalized texts are the same, the difference was only whitespace
231
+ norm1 == norm2
232
+ end
233
+
234
+ # Build a human-readable reason for a difference
235
+ #
236
+ # @param node1 [Object, nil] First node
237
+ # @param node2 [Object, nil] Second node
238
+ # @param diff1 [Symbol] Difference type for node1
239
+ # @param diff2 [Symbol] Difference type for node2
240
+ # @param dimension [Symbol] The dimension of the difference
241
+ # @return [String] Human-readable reason
242
+ def build_difference_reason(_node1, _node2, diff1, diff2, dimension)
243
+ # Default reason - can be overridden in subclasses
244
+ "Difference in #{dimension}: #{diff1} vs #{diff2}"
245
+ end
246
+
247
+ # Serialize an element node to string
248
+ #
249
+ # @param node [Canon::Xml::Nodes::ElementNode] Element node
250
+ # @return [String] Serialized element
251
+ def serialize_element_node(node)
252
+ attrs = node.attribute_nodes.map do |a|
253
+ " #{a.name}=\"#{a.value}\""
254
+ end.join
255
+ children_xml = node.children.map { |c| serialize_node(c) }.join
256
+
257
+ if children_xml.empty?
258
+ "<#{node.name}#{attrs}/>"
259
+ else
260
+ "<#{node.name}#{attrs}>#{children_xml}</#{node.name}>"
261
+ end
262
+ end
263
+
264
+ # Determine the appropriate dimension for a node type
265
+ #
266
+ # @param node [Object] The node to check
267
+ # @return [Symbol] The dimension symbol
268
+ def determine_node_dimension(node)
269
+ # Canon::Xml::Node types
270
+ if node.respond_to?(:node_type) && node.node_type.is_a?(Symbol)
271
+ case node.node_type
272
+ when :comment then :comments
273
+ when :text, :cdata then :text_content
274
+ when :processing_instruction then :processing_instructions
275
+ else :text_content
276
+ end
277
+ # Moxml/Nokogiri types
278
+ elsif node.respond_to?(:comment?) && node.comment?
279
+ :comments
280
+ elsif node.respond_to?(:text?) && node.text?
281
+ :text_content
282
+ elsif node.respond_to?(:cdata?) && node.cdata?
283
+ :text_content
284
+ elsif node.respond_to?(:processing_instruction?) && node.processing_instruction?
285
+ :processing_instructions
286
+ else
287
+ :text_content
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end