canon 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Single source of truth for cross-backend node type operations.
6
+ #
7
+ # The comparison pipeline handles nodes from two backends:
8
+ # * Canon::Xml::Node (+ RootNode, ElementNode, TextNode, etc.) —
9
+ # custom DOM built by SAX builder and DataModel.
10
+ # * Nokogiri::XML::Node (+ subclasses) — native Nokogiri nodes used
11
+ # by the HTML comparator and some legacy paths.
12
+ #
13
+ # Every method here dispatches on type via +case/when+ (+is_a?+).
14
+ # No +respond_to?+ — the types are known at every call site.
15
+ module NodeInspector
16
+ CANON_TEXT_TYPE = :text
17
+ NOKOGIRI_TEXT_TYPE = defined?(Nokogiri::XML::Node::TEXT_NODE) ? Nokogiri::XML::Node::TEXT_NODE : 3
18
+
19
+ # True when +node+ is a text node (whitespace, content, etc.).
20
+ def self.text_node?(node)
21
+ case node
22
+ when Canon::Xml::Node
23
+ node.node_type == CANON_TEXT_TYPE
24
+ when Nokogiri::XML::Node
25
+ node.node_type == NOKOGIRI_TEXT_TYPE
26
+ else
27
+ false
28
+ end
29
+ end
30
+
31
+ # Extract the text content of +node+ as a String.
32
+ def self.text_content(node)
33
+ case node
34
+ when Canon::Xml::Node
35
+ node.value.to_s
36
+ when Nokogiri::XML::Node
37
+ node.content.to_s
38
+ else
39
+ node.to_s
40
+ end
41
+ end
42
+
43
+ # True when +node+ is a text node whose content is whitespace-only.
44
+ # Empty-string text nodes return false — those represent genuine
45
+ # empty-vs-content asymmetry, not pretty-print indentation.
46
+ def self.whitespace_only_text?(node)
47
+ return false unless text_node?(node)
48
+
49
+ text = text_content(node)
50
+ !text.empty? && text.strip.empty?
51
+ end
52
+
53
+ # True when +node+ is a comment node.
54
+ # For HTML, also detects comments that Nokogiri parses as TEXT nodes
55
+ # (content like "<!-- comment -->" or escaped "<\\!-- comment -->").
56
+ def self.comment_node?(node)
57
+ case node
58
+ when Canon::Xml::Node
59
+ node.node_type == :comment
60
+ when Nokogiri::XML::Node
61
+ return true if node.comment?
62
+
63
+ # HTML comments are parsed as TEXT nodes by Nokogiri
64
+ if node.text?
65
+ text_stripped = text_content(node).to_s.strip.gsub("\\", "")
66
+ return true if text_stripped.start_with?("<!--") && text_stripped.end_with?("-->")
67
+ end
68
+ false
69
+ else
70
+ false
71
+ end
72
+ end
73
+
74
+ # True when +node+ is an element node.
75
+ def self.element_node?(node)
76
+ case node
77
+ when Canon::Xml::Node
78
+ node.node_type == :element
79
+ when Nokogiri::XML::Node
80
+ node.element?
81
+ else
82
+ false
83
+ end
84
+ end
85
+
86
+ # Extract parse-time errors carried on a node or its owning document.
87
+ # Returns an Array of Strings.
88
+ def self.parse_errors(node)
89
+ case node
90
+ when nil
91
+ []
92
+ when Canon::Xml::Node
93
+ errors = node.parse_errors
94
+ Array(errors).map(&:to_s)
95
+ when Nokogiri::XML::Document, Nokogiri::HTML5::Document
96
+ Array(node.errors).map(&:to_s)
97
+ else
98
+ []
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../node_inspector"
4
+
3
5
  module Canon
4
6
  module Comparison
5
7
  module XmlComparatorHelpers
@@ -27,7 +29,7 @@ module Canon
27
29
  # @param differences [Array] Array to collect differences
28
30
  # @return [Integer] Comparison result code
29
31
  def compare(node1, node2, comparator, opts, child_opts,
30
- diff_children, differences)
32
+ diff_children, differences)
31
33
  # FAST PATH: Object identity - same object means equivalent children
32
34
  return Comparison::EQUIVALENT if node1.equal?(node2)
33
35
 
@@ -40,8 +42,8 @@ diff_children, differences)
40
42
  opts1 = XmlNodeComparison.opts_for_side(opts, :expected)
41
43
  opts2 = XmlNodeComparison.opts_for_side(opts, :received)
42
44
 
43
- children1 = comparator.send(:filter_children, node1.children, opts1)
44
- children2 = comparator.send(:filter_children, node2.children, opts2)
45
+ children1 = comparator.filter_children(node1.children, opts1)
46
+ children2 = comparator.filter_children(node2.children, opts2)
45
47
 
46
48
  # Quick check: if both have no children, they're equivalent
47
49
  return Comparison::EQUIVALENT if children1.empty? && children2.empty?
@@ -97,9 +99,9 @@ diff_children, differences)
97
99
 
98
100
  # If no matches and children exist, they're all different
99
101
  if matches.empty? && (!children1.empty? || !children2.empty?)
100
- comparator.send(:add_difference, parent_node, parent_node,
101
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
102
- :text_content, opts, differences)
102
+ comparator.add_difference(parent_node, parent_node,
103
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
104
+ :text_content, opts, differences)
103
105
  return Comparison::UNEQUAL_ELEMENTS
104
106
  end
105
107
 
@@ -122,30 +124,30 @@ diff_children, differences)
122
124
 
123
125
  # Only create DiffNode if element_position is not :ignore
124
126
  if position_behavior != :ignore
125
- comparator.send(:add_difference, match.elem1, match.elem2,
126
- "position #{match.pos1}", "position #{match.pos2}",
127
- :element_position, opts, differences)
127
+ comparator.add_difference(match.elem1, match.elem2,
128
+ "position #{match.pos1}", "position #{match.pos2}",
129
+ :element_position, opts, differences)
128
130
  all_equivalent = false if position_behavior == :strict
129
131
  end
130
132
  end
131
133
 
132
134
  # Compare the matched elements for content/attribute differences
133
- result = comparator.send(:compare_nodes, match.elem1, match.elem2,
134
- child_opts, child_opts, diff_children, differences)
135
+ result = comparator.compare_nodes(match.elem1, match.elem2,
136
+ child_opts, child_opts, diff_children, differences)
135
137
  all_equivalent = false unless result == Comparison::EQUIVALENT
136
138
 
137
139
  when :deleted
138
140
  # Element present in first tree but not second
139
- comparator.send(:add_difference, match.elem1, nil,
140
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
141
- :element_structure, opts, differences)
141
+ comparator.add_difference(match.elem1, nil,
142
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
143
+ :element_structure, opts, differences)
142
144
  all_equivalent = false
143
145
 
144
146
  when :inserted
145
147
  # Element present in second tree but not first
146
- comparator.send(:add_difference, nil, match.elem2,
147
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
148
- :element_structure, opts, differences)
148
+ comparator.add_difference(nil, match.elem2,
149
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
150
+ :element_structure, opts, differences)
149
151
  all_equivalent = false
150
152
  end
151
153
  end
@@ -153,9 +155,16 @@ diff_children, differences)
153
155
  all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ELEMENTS
154
156
  end
155
157
 
156
- # Use simple positional comparison for children
158
+ # Use simple positional comparison for children, with
159
+ # whitespace-asymmetry-aware re-alignment. When positional
160
+ # +zip()+ would pair a whitespace-only text node on one side
161
+ # against a content node on the other, treat the whitespace
162
+ # node as a single-side gap: emit one +:whitespace_adjacency+
163
+ # diff anchored at the whitespace node and advance only the
164
+ # cursor carrying the whitespace, so the next iteration aligns
165
+ # content against content. See lutaml/canon#137.
157
166
  def use_positional_comparison(
158
- children1, children2, _parent_node, comparator,
167
+ children1, children2, parent_node, comparator,
159
168
  opts, child_opts, diff_children, differences
160
169
  )
161
170
  has_mismatch = false
@@ -163,53 +172,120 @@ diff_children, differences)
163
172
  # Length check
164
173
  unless children1.length == children2.length
165
174
  has_mismatch = true
166
- dimension = determine_dimension_for_mismatch(
167
- children1, children2, comparator
175
+
176
+ ws_asymmetric = asymmetric_whitespace_explains_length_diff?(
177
+ children1, children2
168
178
  )
169
179
 
170
- mismatched_children, children1, children2 =
171
- determine_mismatch_children(
180
+ if ws_asymmetric
181
+ dimension = nil
182
+ mismatched_children = []
183
+ else
184
+ dimension = determine_dimension_for_mismatch(
172
185
  children1, children2, comparator
173
186
  )
187
+ mismatched_children, children1, children2 =
188
+ determine_mismatch_children(
189
+ children1, children2, comparator
190
+ )
191
+ end
174
192
 
175
193
  if mismatched_children.empty?
176
- comparator.send(:add_difference, parent_node, parent_node,
177
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
178
- dimension, opts, differences)
194
+ unless ws_asymmetric
195
+ comparator.add_difference(parent_node, parent_node,
196
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
197
+ dimension, opts, differences)
198
+ end
179
199
  else
180
200
  mismatched_children.each do |child|
181
- if children1.length > children2.length # rubocop:disable Metrics/BlockNesting
182
- comparator.send(:add_difference, child, nil,
183
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
184
- dimension, opts, differences)
201
+ child_dim = comparator.determine_node_dimension(child)
202
+ if children1.length > children2.length
203
+ comparator.add_difference(child, nil,
204
+ Comparison::MISSING_NODE,
205
+ Comparison::MISSING_NODE,
206
+ child_dim, opts, differences)
185
207
  else
186
- comparator.send(:add_difference, nil, child,
187
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
188
- dimension, opts, differences)
208
+ comparator.add_difference(nil, child,
209
+ Comparison::MISSING_NODE,
210
+ Comparison::MISSING_NODE,
211
+ child_dim, opts, differences)
189
212
  end
190
213
  end
191
214
  end
192
- # Continue comparing children to find deeper differences like attribute values
193
- # Use zip to compare up to the shorter length
194
215
  end
195
216
 
196
- # Compare children pairwise by position
197
217
  result = has_mismatch ? Comparison::UNEQUAL_ELEMENTS : Comparison::EQUIVALENT
198
- children1.zip(children2).each do |child1, child2|
199
- # Skip if one is nil (due to different lengths)
200
- next if child1.nil? || child2.nil?
218
+ walk_result = walk_children_with_realignment(
219
+ children1, children2, comparator,
220
+ child_opts, diff_children, opts, differences
221
+ )
222
+ result = walk_result unless walk_result == Comparison::EQUIVALENT
223
+ result
224
+ end
201
225
 
202
- child_result = comparator.send(:compare_nodes, child1, child2,
203
- child_opts, child_opts, diff_children, differences)
226
+ # Two-cursor walk over paired children that re-aligns past
227
+ # asymmetric whitespace-only text nodes. Returns the worst
228
+ # child result encountered.
229
+ def walk_children_with_realignment(
230
+ children1, children2, comparator,
231
+ child_opts, diff_children, opts, differences
232
+ )
233
+ result = Comparison::EQUIVALENT
234
+ i = 0
235
+ j = 0
236
+
237
+ while i < children1.length || j < children2.length
238
+ c1 = children1[i]
239
+ c2 = children2[j]
240
+
241
+ if c1.nil?
242
+ j += 1
243
+ next
244
+ elsif c2.nil?
245
+ i += 1
246
+ next
247
+ end
204
248
 
205
- unless child_result == Comparison::EQUIVALENT
206
- result = child_result
249
+ ws1 = NodeInspector.whitespace_only_text?(c1)
250
+ ws2 = NodeInspector.whitespace_only_text?(c2)
251
+
252
+ if ws1 && !ws2
253
+ comparator.add_difference(c1, c2,
254
+ Comparison::UNEQUAL_TEXT_CONTENTS,
255
+ Comparison::UNEQUAL_TEXT_CONTENTS,
256
+ :whitespace_adjacency, opts, differences)
257
+ result = Comparison::UNEQUAL_TEXT_CONTENTS
258
+ i += 1
259
+ next
260
+ elsif ws2 && !ws1
261
+ comparator.add_difference(c1, c2,
262
+ Comparison::UNEQUAL_TEXT_CONTENTS,
263
+ Comparison::UNEQUAL_TEXT_CONTENTS,
264
+ :whitespace_adjacency, opts, differences)
265
+ result = Comparison::UNEQUAL_TEXT_CONTENTS
266
+ j += 1
267
+ next
207
268
  end
269
+
270
+ child_result = comparator.compare_nodes(c1, c2,
271
+ child_opts, child_opts,
272
+ diff_children, differences)
273
+ result = child_result unless child_result == Comparison::EQUIVALENT
274
+ i += 1
275
+ j += 1
208
276
  end
209
277
 
210
278
  result
211
279
  end
212
280
 
281
+ # True when the length difference between the two child arrays
282
+ # is fully explained by asymmetric whitespace-only text nodes.
283
+ def asymmetric_whitespace_explains_length_diff?(children1, children2)
284
+ non_ws1 = children1.reject { |c| NodeInspector.whitespace_only_text?(c) }
285
+ non_ws2 = children2.reject { |c| NodeInspector.whitespace_only_text?(c) }
286
+ non_ws1.length == non_ws2.length
287
+ end
288
+
213
289
  # Determine dimension for length mismatch
214
290
  def determine_dimension_for_mismatch(children1, children2, comparator)
215
291
  dimension = :text_content # default
@@ -219,22 +295,17 @@ diff_children, differences)
219
295
  (0...max_len).each do |i|
220
296
  if i >= children1.length
221
297
  # Extra child in children2
222
- dimension = comparator.send(:determine_node_dimension,
223
- children2[i])
298
+ dimension = comparator.determine_node_dimension(children2[i])
224
299
  break
225
300
  elsif i >= children2.length
226
301
  # Extra child in children1
227
- dimension = comparator.send(:determine_node_dimension,
228
- children1[i])
302
+ dimension = comparator.determine_node_dimension(children1[i])
229
303
  break
230
- elsif !comparator.send(:same_node_type?, children1[i],
231
- children2[i])
304
+ elsif !comparator.same_node_type?(children1[i], children2[i])
232
305
  # Different node types at same position
233
306
  # Check both nodes - if either is a comment, use :comments dimension
234
- dim1 = comparator.send(:determine_node_dimension,
235
- children1[i])
236
- dim2 = comparator.send(:determine_node_dimension,
237
- children2[i])
307
+ dim1 = comparator.determine_node_dimension(children1[i])
308
+ dim2 = comparator.determine_node_dimension(children2[i])
238
309
  dimension = [dim1, dim2].include?(:comments) ? :comments : dim1
239
310
  break
240
311
  end
@@ -256,7 +327,7 @@ diff_children, differences)
256
327
  end
257
328
 
258
329
  smaller_set_names = smaller_set.filter_map do |c|
259
- next nil unless c.respond_to?(:name)
330
+ next nil unless c.is_a?(Canon::Xml::Node) || c.is_a?(Nokogiri::XML::Node)
260
331
  # Exclude generic node-type names (e.g. "#text") that are
261
332
  # shared by all text nodes and cannot be used for matching.
262
333
  next nil if c.name.start_with?("#")
@@ -271,7 +342,8 @@ diff_children, differences)
271
342
  # If the smaller set has no child at this position,
272
343
  # consider it a mismatch
273
344
  mismatch_children << larger_set[i]
274
- elsif larger_set[i].respond_to?(:name) &&
345
+ elsif (larger_set[i].is_a?(Canon::Xml::Node) ||
346
+ larger_set[i].is_a?(Nokogiri::XML::Node)) &&
275
347
  !larger_set[i].name.start_with?("#") &&
276
348
  !smaller_set_names.include?(larger_set[i].name)
277
349
  # If the name of the node is not found in the smaller set,
@@ -4,6 +4,7 @@ require "set"
4
4
  require_relative "../../diff/diff_node"
5
5
  require_relative "../../diff/path_builder"
6
6
  require_relative "../../diff/node_serializer"
7
+ require_relative "../node_inspector"
7
8
 
8
9
  module Canon
9
10
  module Comparison
@@ -52,14 +53,15 @@ module Canon
52
53
  # For deleted/inserted nodes, include namespace information if available
53
54
  if dimension == :text_content && (node1.nil? || node2.nil?)
54
55
  node = node1 || node2
55
- if node.respond_to?(:name) && node.respond_to?(:namespace_uri)
56
+ if node.is_a?(Canon::Xml::Node) || node.is_a?(Nokogiri::XML::Node)
56
57
  ns = node.namespace_uri
57
58
  ns_info = if ns.nil? || ns.empty?
58
59
  ""
59
60
  else
60
61
  " (namespace: #{ns})"
61
62
  end
62
- return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
63
+ label = Canon::Comparison.code_pair_label(diff1, diff2)
64
+ return "element '#{node.name}'#{ns_info}: #{label}"
63
65
  end
64
66
  end
65
67
 
@@ -87,8 +89,15 @@ module Canon
87
89
  # Default reason
88
90
  if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
89
91
  "element structure mismatch (children differ)"
92
+ elsif dimension == :element_structure &&
93
+ diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
94
+ diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
95
+ (node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
96
+ (node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
97
+ node1.name && node2.name && node1.name != node2.name
98
+ "different element name (<#{node1.name}> vs <#{node2.name}>)"
90
99
  else
91
- "#{diff1} vs #{diff2}"
100
+ Canon::Comparison.code_pair_label(diff1, diff2)
92
101
  end
93
102
  end
94
103
 
@@ -176,26 +185,18 @@ module Canon
176
185
  def self.extract_text_content(node)
177
186
  return nil if node.nil?
178
187
 
179
- # For Canon::Xml::Nodes::TextNode
180
- return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
181
-
182
- # For XML/HTML nodes with text_content method
183
- return node.text_content if node.respond_to?(:text_content)
184
-
185
- # For nodes with text method
186
- return node.text if node.respond_to?(:text)
187
-
188
- # For nodes with content method (Moxml::Text)
189
- return node.content if node.respond_to?(:content)
190
-
191
- # For nodes with value method (other types)
192
- return node.value if node.respond_to?(:value)
193
-
194
- # For simple text nodes or strings
195
- return node.to_s if node.is_a?(String)
196
-
197
- # For other node types, try to_s
198
- node.to_s
188
+ case node
189
+ when Canon::Xml::Nodes::TextNode
190
+ node.value
191
+ when Canon::Xml::Node
192
+ node.text_content
193
+ when Nokogiri::XML::Node
194
+ node.content.to_s
195
+ when String
196
+ node
197
+ else
198
+ node.to_s
199
+ end
199
200
  rescue StandardError
200
201
  nil
201
202
  end
@@ -160,6 +160,8 @@ module Canon
160
160
  format: :xml,
161
161
  match_options: match_opts_hash,
162
162
  algorithm: :dom,
163
+ parse_errors_expected: Comparison.parse_errors_for(node1),
164
+ parse_errors_received: Comparison.parse_errors_for(node2),
163
165
  )
164
166
  elsif result != Comparison::EQUIVALENT && !differences.empty?
165
167
  # Non-verbose mode: check equivalence
@@ -222,6 +224,8 @@ module Canon
222
224
  format: :xml,
223
225
  match_options: match_opts_hash.merge(strategy.metadata),
224
226
  algorithm: :semantic,
227
+ parse_errors_expected: Comparison.parse_errors_for(node1),
228
+ parse_errors_received: Comparison.parse_errors_for(node2),
225
229
  )
226
230
  else
227
231
  # Simple boolean result - equivalent if no normative differences
@@ -283,6 +287,8 @@ module Canon
283
287
  )
284
288
  end
285
289
 
290
+ public
291
+
286
292
  # Main comparison dispatcher
287
293
  def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
288
294
  # FAST PATH: Object identity - same object is always equivalent
@@ -373,7 +379,6 @@ module Canon
373
379
  end
374
380
 
375
381
  # Public comparison methods - exposed for XmlNodeComparison module
376
- public
377
382
 
378
383
  # Compare two element nodes
379
384
  def compare_element_nodes(n1, n2, opts, child_opts, diff_children,
@@ -667,7 +672,8 @@ differences)
667
672
  else
668
673
  " (namespace: #{ns})"
669
674
  end
670
- return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
675
+ label = Canon::Comparison.code_pair_label(diff1, diff2)
676
+ return "element '#{node.name}'#{ns_info}: #{label}"
671
677
  elsif node.respond_to?(:name) && !node.respond_to?(:namespace_uri)
672
678
  # TextNode and other nodes without namespace_uri
673
679
  display = if node.respond_to?(:value) && node.node_type == :text
@@ -693,6 +699,10 @@ differences)
693
699
  return build_text_diff_reason(text1, text2)
694
700
  end
695
701
 
702
+ if dimension == :whitespace_adjacency
703
+ return build_whitespace_adjacency_reason(node1, node2)
704
+ end
705
+
696
706
  # For attribute values differences, show the actual values
697
707
  if dimension == :attribute_values
698
708
  attrs1 = extract_attributes(node1)
@@ -709,8 +719,17 @@ differences)
709
719
 
710
720
  if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
711
721
  "element structure mismatch (children differ)"
722
+ elsif dimension == :element_structure &&
723
+ diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
724
+ diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
725
+ (node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
726
+ (node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
727
+ node1.name && node2.name && node1.name != node2.name
728
+ # Most common case: differing element names. Surface the
729
+ # actual names rather than a generic "elements differ".
730
+ "different element name (<#{node1.name}> vs <#{node2.name}>)"
712
731
  else
713
- "#{diff1} vs #{diff2}"
732
+ Canon::Comparison.code_pair_label(diff1, diff2)
714
733
  end
715
734
  end
716
735
 
@@ -824,6 +843,81 @@ differences)
824
843
  "Text: \"#{vis1}\" vs \"#{vis2}\""
825
844
  end
826
845
 
846
+ # Build a Reason line for a +:whitespace_adjacency+ diff (#137).
847
+ # Names which side carries the whitespace, the adjacency position
848
+ # relative to content neighbours, and surfaces the whitespace
849
+ # with visible markers.
850
+ def build_whitespace_adjacency_reason(node1, node2)
851
+ text1 = extract_text_from_node(node1)
852
+ text2 = extract_text_from_node(node2)
853
+
854
+ ni = NodeInspector
855
+ ws_on_first = ni.whitespace_only_text?(node1) &&
856
+ !ni.whitespace_only_text?(node2)
857
+ ws_on_second = ni.whitespace_only_text?(node2) &&
858
+ !ni.whitespace_only_text?(node1)
859
+
860
+ if ws_on_first
861
+ ws_text = text1
862
+ content_text = text2
863
+ present_side = "EXPECTED"
864
+ absent_side = "ACTUAL"
865
+ ws_node = node1
866
+ elsif ws_on_second
867
+ ws_text = text2
868
+ content_text = text1
869
+ present_side = "ACTUAL"
870
+ absent_side = "EXPECTED"
871
+ ws_node = node2
872
+ else
873
+ return build_text_diff_reason(text1, text2)
874
+ end
875
+
876
+ direction = whitespace_partner_direction(ws_node)
877
+ ws_vis = visualize_whitespace(ws_text)
878
+ content_vis = content_text ? visualize_whitespace(truncate_text(content_text)) : "(none)"
879
+
880
+ "Whitespace #{direction} \"#{content_vis}\": " \
881
+ "present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
882
+ end
883
+
884
+ # Direction of the partner content relative to the whitespace node,
885
+ # phrased from the partner's point of view: "before" when the
886
+ # whitespace immediately precedes its next non-whitespace sibling
887
+ # (the alignment partner on the other side), "after" when the
888
+ # whitespace trails the previous non-whitespace sibling, or
889
+ # "adjacent to" as a degenerate fallback when neither neighbour
890
+ # exists.
891
+ def whitespace_partner_direction(ws_node)
892
+ return "adjacent to" unless ws_node.is_a?(Canon::Xml::Node) ||
893
+ ws_node.is_a?(Nokogiri::XML::Node)
894
+
895
+ parent = ws_node.parent
896
+ return "adjacent to" if parent.nil?
897
+
898
+ siblings = parent.children
899
+ idx = siblings.index(ws_node)
900
+ return "adjacent to" unless idx
901
+
902
+ if non_ws_sibling_exists?(siblings, idx, 1) then "before"
903
+ elsif non_ws_sibling_exists?(siblings, idx, -1) then "after"
904
+ else "adjacent to"
905
+ end
906
+ end
907
+
908
+ def non_ws_sibling_exists?(siblings, idx, direction)
909
+ i = idx + direction
910
+ while i >= 0 && i < siblings.length
911
+ s = siblings[i]
912
+ is_ws_text = NodeInspector.text_node?(s) &&
913
+ NodeInspector.text_content(s).strip.empty?
914
+ return true unless is_ws_text
915
+
916
+ i += direction
917
+ end
918
+ false
919
+ end
920
+
827
921
  # Check if text is only whitespace
828
922
  #
829
923
  # @param text [String] Text to check