canon 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +16 -61
  4. data/README.adoc +5 -0
  5. data/Rakefile +17 -0
  6. data/docs/features/diff-formatting/comment-asymmetry.adoc +160 -0
  7. data/lib/canon/cli.rb +1 -1
  8. data/lib/canon/color_detector.rb +3 -5
  9. data/lib/canon/comparison/child_realignment.rb +140 -0
  10. data/lib/canon/comparison/compare_profile.rb +1 -4
  11. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  15. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  16. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  17. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  18. data/lib/canon/comparison/format_detector.rb +29 -20
  19. data/lib/canon/comparison/html_comparator.rb +36 -75
  20. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  21. data/lib/canon/comparison/html_parser.rb +1 -1
  22. data/lib/canon/comparison/json_comparator.rb +8 -0
  23. data/lib/canon/comparison/node_inspector.rb +150 -58
  24. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  25. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  26. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  27. data/lib/canon/comparison/xml_comparator/child_comparison.rb +32 -77
  28. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +43 -8
  29. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  30. data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
  31. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  32. data/lib/canon/comparison/xml_comparator.rb +89 -83
  33. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  34. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  35. data/lib/canon/comparison.rb +25 -23
  36. data/lib/canon/config/profile_loader.rb +13 -13
  37. data/lib/canon/config.rb +29 -5
  38. data/lib/canon/diff/diff_classifier.rb +16 -42
  39. data/lib/canon/diff/diff_line.rb +1 -1
  40. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  41. data/lib/canon/diff/node_serializer.rb +23 -30
  42. data/lib/canon/diff/path_builder.rb +24 -37
  43. data/lib/canon/diff/source_locator.rb +0 -3
  44. data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
  45. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  46. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  47. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  48. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  49. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  50. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  51. data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
  52. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
  53. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
  54. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  55. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  56. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  57. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  58. data/lib/canon/diff_formatter/legend.rb +2 -2
  59. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  60. data/lib/canon/diff_formatter/theme.rb +4 -4
  61. data/lib/canon/diff_formatter.rb +2 -2
  62. data/lib/canon/formatters/html_formatter.rb +1 -1
  63. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  64. data/lib/canon/formatters/xml_formatter.rb +7 -32
  65. data/lib/canon/html/data_model.rb +1 -1
  66. data/lib/canon/pretty_printer/html.rb +1 -1
  67. data/lib/canon/pretty_printer/xml.rb +16 -7
  68. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  69. data/lib/canon/rspec_matchers.rb +2 -2
  70. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  71. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  72. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  73. data/lib/canon/validators/html_validator.rb +1 -1
  74. data/lib/canon/validators/xml_validator.rb +1 -1
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +131 -137
  77. data/lib/canon/xml/namespace_helper.rb +5 -0
  78. data/lib/canon/xml/node.rb +2 -1
  79. data/lib/canon/xml/nodes/root_node.rb +4 -0
  80. data/lib/canon/xml/nodes/text_node.rb +6 -1
  81. data/lib/canon/xml/sax_builder.rb +4 -6
  82. data/lib/canon/xml_backend.rb +49 -0
  83. data/lib/canon/xml_parsing.rb +271 -0
  84. data/lib/canon.rb +3 -1
  85. data/lib/tasks/benchmark_runner.rb +1 -1
  86. data/lib/tasks/performance_helpers.rb +1 -1
  87. metadata +7 -2
@@ -53,7 +53,7 @@ module Canon
53
53
  # For deleted/inserted nodes, include namespace information if available
54
54
  if dimension == :text_content && (node1.nil? || node2.nil?)
55
55
  node = node1 || node2
56
- if node.is_a?(Canon::Xml::Node) || node.is_a?(Nokogiri::XML::Node)
56
+ if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
57
57
  ns = node.namespace_uri
58
58
  ns_info = if ns.nil? || ns.empty?
59
59
  ""
@@ -86,14 +86,22 @@ module Canon
86
86
  return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
87
87
  end
88
88
 
89
+ # For asymmetric comment nodes (#144), name the side that carries
90
+ # the comment and surface the comment text rather than reusing
91
+ # the generic "element structure mismatch" wording.
92
+ if dimension == :comments
93
+ comment_reason = build_comment_difference_reason(node1, node2)
94
+ return comment_reason if comment_reason
95
+ end
96
+
89
97
  # Default reason
90
98
  if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
91
99
  "element structure mismatch (children differ)"
92
100
  elsif dimension == :element_structure &&
93
101
  diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
94
102
  diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
95
- (node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
96
- (node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
103
+ (node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
104
+ (node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
97
105
  node1.name && node2.name && node1.name != node2.name
98
106
  "different element name (<#{node1.name}> vs <#{node2.name}>)"
99
107
  else
@@ -190,12 +198,14 @@ module Canon
190
198
  node.value
191
199
  when Canon::Xml::Node
192
200
  node.text_content
193
- when Nokogiri::XML::Node
194
- node.content.to_s
195
- when String
196
- node
197
201
  else
198
- node.to_s
202
+ if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
203
+ node.content.to_s
204
+ elsif Canon::XmlParsing.xml_node?(node)
205
+ Canon::XmlParsing.text_content(node)
206
+ else
207
+ node.to_s
208
+ end
199
209
  end
200
210
  rescue StandardError
201
211
  nil
@@ -217,6 +227,31 @@ module Canon
217
227
  "'#{truncate(text1)}' vs '#{truncate(text2)}'"
218
228
  end
219
229
 
230
+ # Build a Reason line for a +:comments+ diff. Returns +nil+ when
231
+ # neither side carries a comment (caller falls back to default).
232
+ def self.build_comment_difference_reason(node1, node2)
233
+ cm1 = node1 && Canon::Comparison::NodeInspector.comment_node?(node1)
234
+ cm2 = node2 && Canon::Comparison::NodeInspector.comment_node?(node2)
235
+
236
+ return nil unless cm1 || cm2
237
+
238
+ if cm1 && !cm2
239
+ "Comment present on EXPECTED only: " \
240
+ "<!--#{truncate(comment_text(node1))}-->"
241
+ elsif cm2 && !cm1
242
+ "Comment present on ACTUAL only: " \
243
+ "<!--#{truncate(comment_text(node2))}-->"
244
+ else
245
+ t1 = truncate(comment_text(node1))
246
+ t2 = truncate(comment_text(node2))
247
+ "Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
248
+ end
249
+ end
250
+
251
+ def self.comment_text(node)
252
+ Canon::Comparison::NodeInspector.text_content(node).to_s
253
+ end
254
+
220
255
  # Truncate text for display in reason messages
221
256
  #
222
257
  # @param text [String] Text to truncate
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../../xml/namespace_helper"
4
+
3
5
  module Canon
4
6
  module Comparison
5
7
  module XmlComparatorHelpers
@@ -41,20 +43,20 @@ module Canon
41
43
  def self.extract_declarations(node)
42
44
  declarations = {}
43
45
 
44
- # Handle Canon::Xml::Node (uses namespace_nodes)
45
- if node.respond_to?(:namespace_nodes)
46
- return extract_from_namespace_nodes(node.namespace_nodes,
47
- declarations)
48
- end
46
+ if node.is_a?(Canon::Xml::Node)
47
+ if node.namespace_nodes
48
+ return extract_from_namespace_nodes(node.namespace_nodes,
49
+ declarations)
50
+ end
49
51
 
50
- # Handle Nokogiri/Moxml nodes (use attributes)
51
- raw_attrs = node.respond_to?(:attribute_nodes) ? node.attribute_nodes : node.attributes
52
+ raw_attrs = node.attribute_nodes
53
+ else
54
+ raw_attrs = node.attributes
55
+ end
52
56
 
53
- # Handle Canon::Xml::Node attribute format (array of AttributeNode)
54
57
  if raw_attrs.is_a?(Array)
55
58
  extract_from_array_attributes(raw_attrs, declarations)
56
59
  else
57
- # Handle Nokogiri and Moxml attribute formats (Hash-like)
58
60
  extract_from_hash_attributes(raw_attrs, declarations)
59
61
  end
60
62
 
@@ -105,23 +107,11 @@ module Canon
105
107
  # @return [Hash] Declarations hash
106
108
  def self.extract_from_hash_attributes(raw_attrs, declarations)
107
109
  raw_attrs.each do |key, val|
108
- # Normalize key and value
109
- name = if key.is_a?(String)
110
- # Nokogiri format: key=name (String), val=attr object
111
- key
112
- else
113
- # Moxml format: key=attr object, val=nil
114
- key.respond_to?(:name) ? key.name : key.to_s
115
- end
110
+ name = key.is_a?(String) ? key : key.name
116
111
 
117
112
  if namespace_declaration?(name)
118
- value = if val.respond_to?(:value)
119
- val.value
120
- else
121
- val.to_s
122
- end
113
+ value = val.is_a?(String) ? val : val.value
123
114
 
124
- # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
125
115
  prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
126
116
  declarations[prefix] = value
127
117
  end
@@ -130,12 +120,8 @@ module Canon
130
120
  declarations
131
121
  end
132
122
 
133
- # Check if an attribute name is a namespace declaration
134
- #
135
- # @param attr_name [String] Attribute name
136
- # @return [Boolean] true if it's a namespace declaration
137
123
  def self.namespace_declaration?(attr_name)
138
- attr_name == "xmlns" || attr_name.start_with?("xmlns:")
124
+ Canon::Xml::NamespaceHelper.namespace_declaration?(attr_name)
139
125
  end
140
126
 
141
127
  # Add a namespace declaration difference
@@ -77,21 +77,22 @@ module Canon
77
77
  # @return [Canon::Xml::Node] Converted node
78
78
  def self.convert_from_node(node, preserve_whitespace: false,
79
79
  parser: nil)
80
- # FAST PATH: Convert Nokogiri/Moxml nodes directly without string round-trip
81
- if defined?(Nokogiri::XML::Node) && node.is_a?(Nokogiri::XML::Node)
82
- return Canon::Xml::DataModel.build_from_nokogiri(
80
+ if Canon::XmlBackend.nokogiri?
81
+ if node.is_a?(Nokogiri::XML::Node)
82
+ return Canon::Xml::DataModel.build_from_nokogiri(
83
+ node, preserve_whitespace: preserve_whitespace
84
+ )
85
+ end
86
+ elsif node.is_a?(Moxml::Node)
87
+ return Canon::Xml::DataModel.build_from_moxml(
83
88
  node, preserve_whitespace: preserve_whitespace
84
89
  )
85
90
  end
86
91
 
87
- # SLOW PATH: Fallback to string serialization for unknown node types
88
- xml_str = if node.respond_to?(:to_xml)
89
- node.to_xml
90
- elsif node.respond_to?(:to_s)
91
- node.to_s
92
+ xml_str = if node.is_a?(String)
93
+ node
92
94
  else
93
- raise Canon::Error,
94
- "Unable to convert node to string: #{node.class}"
95
+ node.to_xml
95
96
  end
96
97
 
97
98
  resolved_parser = parser || resolve_parser_config
@@ -112,7 +113,7 @@ parser: nil)
112
113
  def self.resolve_parser_config
113
114
  Canon::Config.instance.xml.diff.parser
114
115
  rescue StandardError
115
- :sax
116
+ Canon::XmlBackend.nokogiri? ? :sax : :dom
116
117
  end
117
118
  end
118
119
  end
@@ -7,90 +7,62 @@ module Canon
7
7
  #
8
8
  # Handles dispatching comparison logic based on node type.
9
9
  # Supports both Canon::Xml::Node (with symbolic node_type) and
10
- # Moxml/Nokogiri nodes (with predicate methods like element?, text?, etc.)
11
- #
12
- # This module encapsulates the complex node type detection and dispatch
13
- # logic, making the main XmlComparator cleaner and more maintainable.
10
+ # backend nodes (Nokogiri/Moxml) via XmlParsing type checks.
14
11
  module NodeTypeComparator
15
12
  class << self
16
- # Compare two nodes by dispatching to appropriate comparison method
17
- #
18
- # @param node1 [Object] First node
19
- # @param node2 [Object] Second node
20
- # @param comparator [XmlComparator] The comparator instance for method delegation
21
- # @param opts [Hash] Comparison options
22
- # @param child_opts [Hash] Options for child comparison
23
- # @param diff_children [Boolean] Whether to diff children
24
- # @param differences [Array] Array to collect differences
25
- # @return [Integer] Comparison result code
26
13
  def compare(node1, node2, comparator, opts, child_opts,
27
14
  diff_children, differences)
28
- # Dispatch based on node type
29
- # Canon::Xml::Node types use .node_type method that returns symbols
30
- # Nokogiri also has .node_type but returns integers, so check for Symbol
31
- if node1.respond_to?(:node_type) && node2.respond_to?(:node_type) &&
32
- node1.node_type.is_a?(Symbol) && node2.node_type.is_a?(Symbol)
15
+ if node1.is_a?(Canon::Xml::Node) && node2.is_a?(Canon::Xml::Node)
33
16
  compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
34
17
  diff_children, differences)
35
- # Moxml/Nokogiri types use .element?, .text?, etc. methods
36
18
  else
37
- compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
38
- diff_children, differences)
19
+ compare_by_backend_type(node1, node2, comparator, opts, child_opts,
20
+ diff_children, differences)
39
21
  end
40
22
  end
41
23
 
42
24
  private
43
25
 
44
- # Compare nodes using symbolic node_type (Canon::Xml::Node)
45
26
  def compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
46
27
  diff_children, differences)
47
28
  case node1.node_type
48
29
  when :root
49
- comparator.send(:compare_children, node1, node2, opts, child_opts,
50
- diff_children, differences)
30
+ comparator.compare_children(node1, node2, opts, child_opts,
31
+ diff_children, differences)
51
32
  when :element
52
- comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
53
- diff_children, differences)
33
+ comparator.compare_element_nodes(node1, node2, opts, child_opts,
34
+ diff_children, differences)
54
35
  when :text
55
- comparator.send(:compare_text_nodes, node1, node2, opts,
56
- differences)
36
+ comparator.compare_text_nodes(node1, node2, opts, differences)
57
37
  when :comment
58
- comparator.send(:compare_comment_nodes, node1, node2, opts,
59
- differences)
38
+ comparator.compare_comment_nodes(node1, node2, opts, differences)
60
39
  when :cdata
61
- comparator.send(:compare_text_nodes, node1, node2, opts,
62
- differences)
40
+ comparator.compare_text_nodes(node1, node2, opts, differences)
63
41
  when :processing_instruction
64
- comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
65
- differences)
42
+ comparator.compare_processing_instruction_nodes(node1, node2, opts,
43
+ differences)
66
44
  else
67
45
  Comparison::EQUIVALENT
68
46
  end
69
47
  end
70
48
 
71
- # Compare nodes using predicate methods (Moxml/Nokogiri)
72
- def compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
73
- diff_children, differences)
74
- if node1.respond_to?(:element?) && node1.element?
75
- comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
76
- diff_children, differences)
77
- elsif node1.respond_to?(:text?) && node1.text?
78
- comparator.send(:compare_text_nodes, node1, node2, opts,
79
- differences)
80
- elsif node1.respond_to?(:comment?) && node1.comment?
81
- comparator.send(:compare_comment_nodes, node1, node2, opts,
82
- differences)
83
- elsif node1.respond_to?(:cdata?) && node1.cdata?
84
- comparator.send(:compare_text_nodes, node1, node2, opts,
85
- differences)
86
- elsif node1.respond_to?(:processing_instruction?) &&
87
- node1.processing_instruction?
88
- comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
89
- differences)
90
- elsif node1.respond_to?(:root)
91
- # Document node (Moxml/Nokogiri - legacy path)
92
- comparator.send(:compare_document_nodes, node1, node2, opts, child_opts,
93
- diff_children, differences)
49
+ def compare_by_backend_type(node1, node2, comparator, opts, child_opts,
50
+ diff_children, differences)
51
+ if Canon::XmlParsing.element?(node1)
52
+ comparator.compare_element_nodes(node1, node2, opts, child_opts,
53
+ diff_children, differences)
54
+ elsif Canon::XmlParsing.text_node?(node1)
55
+ comparator.compare_text_nodes(node1, node2, opts, differences)
56
+ elsif Canon::XmlParsing.comment?(node1)
57
+ comparator.compare_comment_nodes(node1, node2, opts, differences)
58
+ elsif Canon::XmlParsing.cdata?(node1)
59
+ comparator.compare_text_nodes(node1, node2, opts, differences)
60
+ elsif Canon::XmlParsing.processing_instruction?(node1)
61
+ comparator.compare_processing_instruction_nodes(node1, node2, opts,
62
+ differences)
63
+ elsif Canon::XmlParsing.document?(node1)
64
+ comparator.compare_document_nodes(node1, node2, opts, child_opts,
65
+ diff_children, differences)
94
66
  else
95
67
  Comparison::EQUIVALENT
96
68
  end
@@ -122,16 +122,8 @@ module Canon
122
122
  preserve_whitespace: preserve_whitespace)
123
123
 
124
124
  # Store original strings for line diff display (before preprocessing)
125
- original1 = if n1.is_a?(String)
126
- n1
127
- else
128
- (n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
129
- end
130
- original2 = if n2.is_a?(String)
131
- n2
132
- else
133
- (n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
134
- end
125
+ original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
126
+ original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
135
127
 
136
128
  differences = []
137
129
  diff_children = opts[:diff_children] || false
@@ -187,16 +179,9 @@ module Canon
187
179
  # @return [Boolean, ComparisonResult] Result of tree diff comparison
188
180
  def perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
189
181
  # Store original strings for line diff display (before preprocessing)
190
- original1 = if n1.is_a?(String)
191
- n1
192
- else
193
- (n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
194
- end
195
- original2 = if n2.is_a?(String)
196
- n2
197
- else
198
- (n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
199
- end
182
+ # Store original strings for line diff display (before preprocessing)
183
+ original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
184
+ original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
200
185
 
201
186
  # Parse to Canon::Xml::Node (preserves preprocessing)
202
187
  node1 = parse_node(n1, match_opts_hash[:preprocessing])
@@ -262,20 +247,8 @@ module Canon
262
247
  serialize_node(node1).gsub("><", ">\n<"),
263
248
  serialize_node(node2).gsub("><", ">\n<"),
264
249
  ]
265
- original1 = if n1.is_a?(String)
266
- n1
267
- elsif n1.respond_to?(:to_xml)
268
- n1.to_xml
269
- else
270
- n1.to_s
271
- end
272
- original2 = if n2.is_a?(String)
273
- n2
274
- elsif n2.respond_to?(:to_xml)
275
- n2.to_xml
276
- else
277
- n2.to_s
278
- end
250
+ original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
251
+ original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
279
252
 
280
253
  ComparisonResult.new(
281
254
  differences: [],
@@ -289,14 +262,20 @@ module Canon
289
262
 
290
263
  public
291
264
 
265
+ # Public parsing API for external callers
266
+ def parse(node, preprocessing = :none, preserve_whitespace: false)
267
+ parse_node(node, preprocessing,
268
+ preserve_whitespace: preserve_whitespace)
269
+ end
270
+
292
271
  # Main comparison dispatcher
293
272
  def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
294
273
  # FAST PATH: Object identity - same object is always equivalent
295
274
  return Comparison::EQUIVALENT if n1.equal?(n2)
296
275
 
297
276
  # Handle DocumentFragment nodes - compare their children instead
298
- if n1.is_a?(Nokogiri::XML::DocumentFragment) &&
299
- n2.is_a?(Nokogiri::XML::DocumentFragment)
277
+ if Canon::XmlParsing.document_fragment?(n1) &&
278
+ Canon::XmlParsing.document_fragment?(n2)
300
279
  children1 = n1.children.to_a
301
280
  children2 = n2.children.to_a
302
281
 
@@ -392,8 +371,8 @@ module Canon
392
371
  end
393
372
 
394
373
  # Compare namespace URIs - elements with different namespaces are different elements
395
- ns1 = n1.respond_to?(:namespace_uri) ? n1.namespace_uri : nil
396
- ns2 = n2.respond_to?(:namespace_uri) ? n2.namespace_uri : nil
374
+ ns1 = Canon::XmlParsing.namespace_uri(n1)
375
+ ns2 = Canon::XmlParsing.namespace_uri(n2)
397
376
 
398
377
  unless ns1 == ns2
399
378
  # Create descriptive reason showing the actual namespace URIs
@@ -410,18 +389,30 @@ module Canon
410
389
  return Comparison::UNEQUAL_ELEMENTS
411
390
  end
412
391
 
392
+ # Track the worst result across namespace, attribute, and children
393
+ # comparisons. Do NOT return early on attribute/namespace mismatches —
394
+ # children must still be compared so structural differences in the
395
+ # subtree are reported. Early returns caused the comparator to skip
396
+ # entire subtrees when a root or intermediate element had different
397
+ # attributes, missing all nested structural changes.
398
+ worst_result = Comparison::EQUIVALENT
399
+
413
400
  # Compare namespace declarations (xmlns and xmlns:* attributes)
414
401
  ns_result = compare_namespace_declarations(n1, n2, opts, differences)
415
- return ns_result unless ns_result == Comparison::EQUIVALENT
402
+ worst_result = ns_result unless ns_result == Comparison::EQUIVALENT
416
403
 
417
404
  # Compare attributes
418
405
  attr_result = compare_attribute_sets(n1, n2, opts, differences)
419
- return attr_result unless attr_result == Comparison::EQUIVALENT
406
+ worst_result = attr_result unless attr_result == Comparison::EQUIVALENT
420
407
 
421
408
  # Compare children if not ignored
422
- return Comparison::EQUIVALENT if opts[:ignore_children]
409
+ unless opts[:ignore_children]
410
+ child_result = compare_children(n1, n2, opts, child_opts,
411
+ diff_children, differences)
412
+ worst_result = child_result unless child_result == Comparison::EQUIVALENT
413
+ end
423
414
 
424
- compare_children(n1, n2, opts, child_opts, diff_children, differences)
415
+ worst_result
425
416
  end
426
417
 
427
418
  # Compare attribute sets
@@ -500,7 +491,7 @@ module Canon
500
491
  def should_preserve_whitespace_strictly?(n1, n2, opts)
501
492
  # Check both n1 and n2 - if either is in a preserve whitespace element, preserve strictly
502
493
  [n1, n2].each do |node|
503
- next unless node.respond_to?(:parent)
494
+ next unless Canon::XmlParsing.xml_node?(node) || node.is_a?(Canon::Xml::Node)
504
495
 
505
496
  parent = node.parent
506
497
  next unless parent
@@ -516,15 +507,12 @@ module Canon
516
507
  # Check if a node is inside a whitespace-preserving element
517
508
  def in_preserve_element?(node, preserve_list)
518
509
  current = node.parent
519
- while current.respond_to?(:name)
510
+ while Canon::XmlParsing.xml_node?(current) || current.is_a?(Canon::Xml::Node)
520
511
  return true if preserve_list.include?(current.name.downcase)
521
512
 
522
- # Stop at document root
523
- break if current.is_a?(Nokogiri::XML::Document) ||
524
- current.is_a?(Nokogiri::HTML4::Document) ||
525
- current.is_a?(Nokogiri::HTML5::Document)
513
+ break if Canon::XmlParsing.document?(current)
526
514
 
527
- current = current.parent if current.respond_to?(:parent)
515
+ current = current.parent
528
516
  break unless current
529
517
  end
530
518
  false
@@ -567,8 +555,8 @@ module Canon
567
555
  return Comparison::UNEQUAL_NODES_TYPES
568
556
  end
569
557
 
570
- content1 = n1.respond_to?(:content) ? n1.content.to_s.strip : ""
571
- content2 = n2.respond_to?(:content) ? n2.content.to_s.strip : ""
558
+ content1 = Canon::XmlParsing.xml_node?(n1) ? n1.content.to_s.strip : ""
559
+ content2 = Canon::XmlParsing.xml_node?(n2) ? n2.content.to_s.strip : ""
572
560
 
573
561
  if content1 == content2
574
562
  Comparison::EQUIVALENT
@@ -618,17 +606,19 @@ differences)
618
606
  depth = 0
619
607
 
620
608
  while current && depth < max_depth
621
- if current.respond_to?(:name) && current.name
622
- path.unshift(current.name)
623
- end
609
+ n = if current.is_a?(Canon::Xml::Node)
610
+ current.name
611
+ elsif Canon::XmlParsing.xml_node?(current)
612
+ current.name
613
+ end
614
+ path.unshift(n) if n
624
615
 
625
- break unless current.respond_to?(:parent)
616
+ break unless Canon::XmlParsing.xml_node?(current) || current.is_a?(Canon::Xml::Node)
626
617
 
627
618
  current = current.parent
628
619
  depth += 1
629
620
 
630
- # Stop at document root
631
- break if current.respond_to?(:root)
621
+ break if Canon::XmlParsing.document?(current)
632
622
  end
633
623
 
634
624
  path
@@ -665,8 +655,8 @@ differences)
665
655
  # For deleted/inserted nodes, include namespace information if available
666
656
  if dimension == :text_content && (node1.nil? || node2.nil?)
667
657
  node = node1 || node2
668
- if node.respond_to?(:name) && node.respond_to?(:namespace_uri)
669
- ns = node.namespace_uri
658
+ if Canon::XmlParsing.xml_node?(node)
659
+ ns = Canon::XmlParsing.namespace_uri(node)
670
660
  ns_info = if ns.nil? || ns.empty?
671
661
  ""
672
662
  else
@@ -674,9 +664,8 @@ differences)
674
664
  end
675
665
  label = Canon::Comparison.code_pair_label(diff1, diff2)
676
666
  return "element '#{node.name}'#{ns_info}: #{label}"
677
- elsif node.respond_to?(:name) && !node.respond_to?(:namespace_uri)
678
- # TextNode and other nodes without namespace_uri
679
- display = if node.respond_to?(:value) && node.node_type == :text
667
+ elsif node.is_a?(Canon::Xml::Node)
668
+ display = if node.is_a?(Canon::Xml::Nodes::TextNode)
680
669
  "\"#{truncate_text(node.value)}\""
681
670
  else
682
671
  node.name.to_s
@@ -703,6 +692,10 @@ differences)
703
692
  return build_whitespace_adjacency_reason(node1, node2)
704
693
  end
705
694
 
695
+ if dimension == :comments
696
+ return build_comments_reason(node1, node2)
697
+ end
698
+
706
699
  # For attribute values differences, show the actual values
707
700
  if dimension == :attribute_values
708
701
  attrs1 = extract_attributes(node1)
@@ -722,8 +715,8 @@ differences)
722
715
  elsif dimension == :element_structure &&
723
716
  diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
724
717
  diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
725
- (node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
726
- (node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
718
+ (node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
719
+ (node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
727
720
  node1.name && node2.name && node1.name != node2.name
728
721
  # Most common case: differing element names. Surface the
729
722
  # actual names rather than a generic "elements differ".
@@ -794,27 +787,16 @@ differences)
794
787
  # @return [String, nil] Text content or nil
795
788
  def extract_text_from_node(node)
796
789
  return nil if node.nil?
797
-
798
- # For Canon::Xml::Nodes::TextNode
799
- return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
800
-
801
- # For XML/HTML nodes with text_content method
802
- return node.text_content if node.respond_to?(:text_content)
803
-
804
- # For nodes with text method
805
- return node.text if node.respond_to?(:text)
806
-
807
- # For nodes with content method (Moxml::Text)
808
- return node.content if node.respond_to?(:content)
809
-
810
- # For nodes with value method (other types)
811
- return node.value if node.respond_to?(:value)
812
-
813
- # For simple text nodes or strings
814
790
  return node.to_s if node.is_a?(String)
815
791
 
816
- # For other node types, try to_s
817
- node.to_s
792
+ case node
793
+ when Canon::Xml::Nodes::TextNode
794
+ node.value
795
+ when Canon::Xml::Node
796
+ node.text_content
797
+ else
798
+ Canon::XmlParsing.xml_node?(node) ? Canon::XmlParsing.text_content(node).to_s : node.to_s
799
+ end
818
800
  rescue StandardError
819
801
  nil
820
802
  end
@@ -934,6 +916,30 @@ differences)
934
916
  false
935
917
  end
936
918
 
919
+ # Build a Reason line for a +:comments+ diff (#144).
920
+ # Names the side that carries the comment and surfaces the
921
+ # comment text.
922
+ def build_comments_reason(node1, node2)
923
+ cm1 = node1 && NodeInspector.comment_node?(node1)
924
+ cm2 = node2 && NodeInspector.comment_node?(node2)
925
+
926
+ if cm1 && !cm2
927
+ "Comment present on EXPECTED only: <!--#{truncate_text(comment_text(node1))}-->"
928
+ elsif cm2 && !cm1
929
+ "Comment present on ACTUAL only: <!--#{truncate_text(comment_text(node2))}-->"
930
+ elsif cm1 && cm2
931
+ t1 = truncate_text(comment_text(node1))
932
+ t2 = truncate_text(comment_text(node2))
933
+ "Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
934
+ else
935
+ "element structure mismatch (children differ)"
936
+ end
937
+ end
938
+
939
+ def comment_text(node)
940
+ NodeInspector.text_content(node).to_s
941
+ end
942
+
937
943
  # Check if text is only whitespace
938
944
  #
939
945
  # @param text [String] Text to check