canon 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../node_inspector"
4
+
3
5
  module Canon
4
6
  module Comparison
5
7
  module XmlComparatorHelpers
@@ -27,7 +29,7 @@ module Canon
27
29
  # @param differences [Array] Array to collect differences
28
30
  # @return [Integer] Comparison result code
29
31
  def compare(node1, node2, comparator, opts, child_opts,
30
- diff_children, differences)
32
+ diff_children, differences)
31
33
  # FAST PATH: Object identity - same object means equivalent children
32
34
  return Comparison::EQUIVALENT if node1.equal?(node2)
33
35
 
@@ -40,8 +42,8 @@ diff_children, differences)
40
42
  opts1 = XmlNodeComparison.opts_for_side(opts, :expected)
41
43
  opts2 = XmlNodeComparison.opts_for_side(opts, :received)
42
44
 
43
- children1 = comparator.send(:filter_children, node1.children, opts1)
44
- children2 = comparator.send(:filter_children, node2.children, opts2)
45
+ children1 = comparator.filter_children(node1.children, opts1)
46
+ children2 = comparator.filter_children(node2.children, opts2)
45
47
 
46
48
  # Quick check: if both have no children, they're equivalent
47
49
  return Comparison::EQUIVALENT if children1.empty? && children2.empty?
@@ -97,9 +99,9 @@ diff_children, differences)
97
99
 
98
100
  # If no matches and children exist, they're all different
99
101
  if matches.empty? && (!children1.empty? || !children2.empty?)
100
- comparator.send(:add_difference, parent_node, parent_node,
101
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
102
- :text_content, opts, differences)
102
+ comparator.add_difference(parent_node, parent_node,
103
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
104
+ :text_content, opts, differences)
103
105
  return Comparison::UNEQUAL_ELEMENTS
104
106
  end
105
107
 
@@ -122,30 +124,30 @@ diff_children, differences)
122
124
 
123
125
  # Only create DiffNode if element_position is not :ignore
124
126
  if position_behavior != :ignore
125
- comparator.send(:add_difference, match.elem1, match.elem2,
126
- "position #{match.pos1}", "position #{match.pos2}",
127
- :element_position, opts, differences)
127
+ comparator.add_difference(match.elem1, match.elem2,
128
+ "position #{match.pos1}", "position #{match.pos2}",
129
+ :element_position, opts, differences)
128
130
  all_equivalent = false if position_behavior == :strict
129
131
  end
130
132
  end
131
133
 
132
134
  # Compare the matched elements for content/attribute differences
133
- result = comparator.send(:compare_nodes, match.elem1, match.elem2,
134
- child_opts, child_opts, diff_children, differences)
135
+ result = comparator.compare_nodes(match.elem1, match.elem2,
136
+ child_opts, child_opts, diff_children, differences)
135
137
  all_equivalent = false unless result == Comparison::EQUIVALENT
136
138
 
137
139
  when :deleted
138
140
  # Element present in first tree but not second
139
- comparator.send(:add_difference, match.elem1, nil,
140
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
141
- :element_structure, opts, differences)
141
+ comparator.add_difference(match.elem1, nil,
142
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
143
+ :element_structure, opts, differences)
142
144
  all_equivalent = false
143
145
 
144
146
  when :inserted
145
147
  # Element present in second tree but not first
146
- comparator.send(:add_difference, nil, match.elem2,
147
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
148
- :element_structure, opts, differences)
148
+ comparator.add_difference(nil, match.elem2,
149
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
150
+ :element_structure, opts, differences)
149
151
  all_equivalent = false
150
152
  end
151
153
  end
@@ -153,9 +155,16 @@ diff_children, differences)
153
155
  all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ELEMENTS
154
156
  end
155
157
 
156
- # Use simple positional comparison for children
158
+ # Use simple positional comparison for children, with
159
+ # whitespace-asymmetry-aware re-alignment. When positional
160
+ # +zip()+ would pair a whitespace-only text node on one side
161
+ # against a content node on the other, treat the whitespace
162
+ # node as a single-side gap: emit one +:whitespace_adjacency+
163
+ # diff anchored at the whitespace node and advance only the
164
+ # cursor carrying the whitespace, so the next iteration aligns
165
+ # content against content. See lutaml/canon#137.
157
166
  def use_positional_comparison(
158
- children1, children2, _parent_node, comparator,
167
+ children1, children2, parent_node, comparator,
159
168
  opts, child_opts, diff_children, differences
160
169
  )
161
170
  has_mismatch = false
@@ -163,53 +172,120 @@ diff_children, differences)
163
172
  # Length check
164
173
  unless children1.length == children2.length
165
174
  has_mismatch = true
166
- dimension = determine_dimension_for_mismatch(
167
- children1, children2, comparator
175
+
176
+ ws_asymmetric = asymmetric_whitespace_explains_length_diff?(
177
+ children1, children2
168
178
  )
169
179
 
170
- mismatched_children, children1, children2 =
171
- determine_mismatch_children(
180
+ if ws_asymmetric
181
+ dimension = nil
182
+ mismatched_children = []
183
+ else
184
+ dimension = determine_dimension_for_mismatch(
172
185
  children1, children2, comparator
173
186
  )
187
+ mismatched_children, children1, children2 =
188
+ determine_mismatch_children(
189
+ children1, children2, comparator
190
+ )
191
+ end
174
192
 
175
193
  if mismatched_children.empty?
176
- comparator.send(:add_difference, parent_node, parent_node,
177
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
178
- dimension, opts, differences)
194
+ unless ws_asymmetric
195
+ comparator.add_difference(parent_node, parent_node,
196
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
197
+ dimension, opts, differences)
198
+ end
179
199
  else
180
200
  mismatched_children.each do |child|
181
- if children1.length > children2.length # rubocop:disable Metrics/BlockNesting
182
- comparator.send(:add_difference, child, nil,
183
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
184
- dimension, opts, differences)
201
+ child_dim = comparator.determine_node_dimension(child)
202
+ if children1.length > children2.length
203
+ comparator.add_difference(child, nil,
204
+ Comparison::MISSING_NODE,
205
+ Comparison::MISSING_NODE,
206
+ child_dim, opts, differences)
185
207
  else
186
- comparator.send(:add_difference, nil, child,
187
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
188
- dimension, opts, differences)
208
+ comparator.add_difference(nil, child,
209
+ Comparison::MISSING_NODE,
210
+ Comparison::MISSING_NODE,
211
+ child_dim, opts, differences)
189
212
  end
190
213
  end
191
214
  end
192
- # Continue comparing children to find deeper differences like attribute values
193
- # Use zip to compare up to the shorter length
194
215
  end
195
216
 
196
- # Compare children pairwise by position
197
217
  result = has_mismatch ? Comparison::UNEQUAL_ELEMENTS : Comparison::EQUIVALENT
198
- children1.zip(children2).each do |child1, child2|
199
- # Skip if one is nil (due to different lengths)
200
- next if child1.nil? || child2.nil?
218
+ walk_result = walk_children_with_realignment(
219
+ children1, children2, comparator,
220
+ child_opts, diff_children, opts, differences
221
+ )
222
+ result = walk_result unless walk_result == Comparison::EQUIVALENT
223
+ result
224
+ end
201
225
 
202
- child_result = comparator.send(:compare_nodes, child1, child2,
203
- child_opts, child_opts, diff_children, differences)
226
+ # Two-cursor walk over paired children that re-aligns past
227
+ # asymmetric whitespace-only text nodes. Returns the worst
228
+ # child result encountered.
229
+ def walk_children_with_realignment(
230
+ children1, children2, comparator,
231
+ child_opts, diff_children, opts, differences
232
+ )
233
+ result = Comparison::EQUIVALENT
234
+ i = 0
235
+ j = 0
236
+
237
+ while i < children1.length || j < children2.length
238
+ c1 = children1[i]
239
+ c2 = children2[j]
240
+
241
+ if c1.nil?
242
+ j += 1
243
+ next
244
+ elsif c2.nil?
245
+ i += 1
246
+ next
247
+ end
204
248
 
205
- unless child_result == Comparison::EQUIVALENT
206
- result = child_result
249
+ ws1 = NodeInspector.whitespace_only_text?(c1)
250
+ ws2 = NodeInspector.whitespace_only_text?(c2)
251
+
252
+ if ws1 && !ws2
253
+ comparator.add_difference(c1, c2,
254
+ Comparison::UNEQUAL_TEXT_CONTENTS,
255
+ Comparison::UNEQUAL_TEXT_CONTENTS,
256
+ :whitespace_adjacency, opts, differences)
257
+ result = Comparison::UNEQUAL_TEXT_CONTENTS
258
+ i += 1
259
+ next
260
+ elsif ws2 && !ws1
261
+ comparator.add_difference(c1, c2,
262
+ Comparison::UNEQUAL_TEXT_CONTENTS,
263
+ Comparison::UNEQUAL_TEXT_CONTENTS,
264
+ :whitespace_adjacency, opts, differences)
265
+ result = Comparison::UNEQUAL_TEXT_CONTENTS
266
+ j += 1
267
+ next
207
268
  end
269
+
270
+ child_result = comparator.compare_nodes(c1, c2,
271
+ child_opts, child_opts,
272
+ diff_children, differences)
273
+ result = child_result unless child_result == Comparison::EQUIVALENT
274
+ i += 1
275
+ j += 1
208
276
  end
209
277
 
210
278
  result
211
279
  end
212
280
 
281
+ # True when the length difference between the two child arrays
282
+ # is fully explained by asymmetric whitespace-only text nodes.
283
+ def asymmetric_whitespace_explains_length_diff?(children1, children2)
284
+ non_ws1 = children1.reject { |c| NodeInspector.whitespace_only_text?(c) }
285
+ non_ws2 = children2.reject { |c| NodeInspector.whitespace_only_text?(c) }
286
+ non_ws1.length == non_ws2.length
287
+ end
288
+
213
289
  # Determine dimension for length mismatch
214
290
  def determine_dimension_for_mismatch(children1, children2, comparator)
215
291
  dimension = :text_content # default
@@ -219,22 +295,17 @@ diff_children, differences)
219
295
  (0...max_len).each do |i|
220
296
  if i >= children1.length
221
297
  # Extra child in children2
222
- dimension = comparator.send(:determine_node_dimension,
223
- children2[i])
298
+ dimension = comparator.determine_node_dimension(children2[i])
224
299
  break
225
300
  elsif i >= children2.length
226
301
  # Extra child in children1
227
- dimension = comparator.send(:determine_node_dimension,
228
- children1[i])
302
+ dimension = comparator.determine_node_dimension(children1[i])
229
303
  break
230
- elsif !comparator.send(:same_node_type?, children1[i],
231
- children2[i])
304
+ elsif !comparator.same_node_type?(children1[i], children2[i])
232
305
  # Different node types at same position
233
306
  # Check both nodes - if either is a comment, use :comments dimension
234
- dim1 = comparator.send(:determine_node_dimension,
235
- children1[i])
236
- dim2 = comparator.send(:determine_node_dimension,
237
- children2[i])
307
+ dim1 = comparator.determine_node_dimension(children1[i])
308
+ dim2 = comparator.determine_node_dimension(children2[i])
238
309
  dimension = [dim1, dim2].include?(:comments) ? :comments : dim1
239
310
  break
240
311
  end
@@ -256,7 +327,7 @@ diff_children, differences)
256
327
  end
257
328
 
258
329
  smaller_set_names = smaller_set.filter_map do |c|
259
- next nil unless c.respond_to?(:name)
330
+ next nil unless c.is_a?(Canon::Xml::Node) || c.is_a?(Nokogiri::XML::Node)
260
331
  # Exclude generic node-type names (e.g. "#text") that are
261
332
  # shared by all text nodes and cannot be used for matching.
262
333
  next nil if c.name.start_with?("#")
@@ -271,7 +342,8 @@ diff_children, differences)
271
342
  # If the smaller set has no child at this position,
272
343
  # consider it a mismatch
273
344
  mismatch_children << larger_set[i]
274
- elsif larger_set[i].respond_to?(:name) &&
345
+ elsif (larger_set[i].is_a?(Canon::Xml::Node) ||
346
+ larger_set[i].is_a?(Nokogiri::XML::Node)) &&
275
347
  !larger_set[i].name.start_with?("#") &&
276
348
  !smaller_set_names.include?(larger_set[i].name)
277
349
  # If the name of the node is not found in the smaller set,
@@ -4,6 +4,7 @@ require "set"
4
4
  require_relative "../../diff/diff_node"
5
5
  require_relative "../../diff/path_builder"
6
6
  require_relative "../../diff/node_serializer"
7
+ require_relative "../node_inspector"
7
8
 
8
9
  module Canon
9
10
  module Comparison
@@ -52,14 +53,15 @@ module Canon
52
53
  # For deleted/inserted nodes, include namespace information if available
53
54
  if dimension == :text_content && (node1.nil? || node2.nil?)
54
55
  node = node1 || node2
55
- if node.respond_to?(:name) && node.respond_to?(:namespace_uri)
56
+ if node.is_a?(Canon::Xml::Node) || node.is_a?(Nokogiri::XML::Node)
56
57
  ns = node.namespace_uri
57
58
  ns_info = if ns.nil? || ns.empty?
58
59
  ""
59
60
  else
60
61
  " (namespace: #{ns})"
61
62
  end
62
- return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
63
+ label = Canon::Comparison.code_pair_label(diff1, diff2)
64
+ return "element '#{node.name}'#{ns_info}: #{label}"
63
65
  end
64
66
  end
65
67
 
@@ -87,8 +89,15 @@ module Canon
87
89
  # Default reason
88
90
  if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
89
91
  "element structure mismatch (children differ)"
92
+ elsif dimension == :element_structure &&
93
+ diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
94
+ diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
95
+ (node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
96
+ (node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
97
+ node1.name && node2.name && node1.name != node2.name
98
+ "different element name (<#{node1.name}> vs <#{node2.name}>)"
90
99
  else
91
- "#{diff1} vs #{diff2}"
100
+ Canon::Comparison.code_pair_label(diff1, diff2)
92
101
  end
93
102
  end
94
103
 
@@ -176,26 +185,18 @@ module Canon
176
185
  def self.extract_text_content(node)
177
186
  return nil if node.nil?
178
187
 
179
- # For Canon::Xml::Nodes::TextNode
180
- return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
181
-
182
- # For XML/HTML nodes with text_content method
183
- return node.text_content if node.respond_to?(:text_content)
184
-
185
- # For nodes with text method
186
- return node.text if node.respond_to?(:text)
187
-
188
- # For nodes with content method (Moxml::Text)
189
- return node.content if node.respond_to?(:content)
190
-
191
- # For nodes with value method (other types)
192
- return node.value if node.respond_to?(:value)
193
-
194
- # For simple text nodes or strings
195
- return node.to_s if node.is_a?(String)
196
-
197
- # For other node types, try to_s
198
- node.to_s
188
+ case node
189
+ when Canon::Xml::Nodes::TextNode
190
+ node.value
191
+ when Canon::Xml::Node
192
+ node.text_content
193
+ when Nokogiri::XML::Node
194
+ node.content.to_s
195
+ when String
196
+ node
197
+ else
198
+ node.to_s
199
+ end
199
200
  rescue StandardError
200
201
  nil
201
202
  end
@@ -160,6 +160,8 @@ module Canon
160
160
  format: :xml,
161
161
  match_options: match_opts_hash,
162
162
  algorithm: :dom,
163
+ parse_errors_expected: Comparison.parse_errors_for(node1),
164
+ parse_errors_received: Comparison.parse_errors_for(node2),
163
165
  )
164
166
  elsif result != Comparison::EQUIVALENT && !differences.empty?
165
167
  # Non-verbose mode: check equivalence
@@ -222,6 +224,8 @@ module Canon
222
224
  format: :xml,
223
225
  match_options: match_opts_hash.merge(strategy.metadata),
224
226
  algorithm: :semantic,
227
+ parse_errors_expected: Comparison.parse_errors_for(node1),
228
+ parse_errors_received: Comparison.parse_errors_for(node2),
225
229
  )
226
230
  else
227
231
  # Simple boolean result - equivalent if no normative differences
@@ -283,6 +287,8 @@ module Canon
283
287
  )
284
288
  end
285
289
 
290
+ public
291
+
286
292
  # Main comparison dispatcher
287
293
  def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
288
294
  # FAST PATH: Object identity - same object is always equivalent
@@ -373,7 +379,6 @@ module Canon
373
379
  end
374
380
 
375
381
  # Public comparison methods - exposed for XmlNodeComparison module
376
- public
377
382
 
378
383
  # Compare two element nodes
379
384
  def compare_element_nodes(n1, n2, opts, child_opts, diff_children,
@@ -667,7 +672,8 @@ differences)
667
672
  else
668
673
  " (namespace: #{ns})"
669
674
  end
670
- return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
675
+ label = Canon::Comparison.code_pair_label(diff1, diff2)
676
+ return "element '#{node.name}'#{ns_info}: #{label}"
671
677
  elsif node.respond_to?(:name) && !node.respond_to?(:namespace_uri)
672
678
  # TextNode and other nodes without namespace_uri
673
679
  display = if node.respond_to?(:value) && node.node_type == :text
@@ -693,6 +699,10 @@ differences)
693
699
  return build_text_diff_reason(text1, text2)
694
700
  end
695
701
 
702
+ if dimension == :whitespace_adjacency
703
+ return build_whitespace_adjacency_reason(node1, node2)
704
+ end
705
+
696
706
  # For attribute values differences, show the actual values
697
707
  if dimension == :attribute_values
698
708
  attrs1 = extract_attributes(node1)
@@ -709,8 +719,17 @@ differences)
709
719
 
710
720
  if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
711
721
  "element structure mismatch (children differ)"
722
+ elsif dimension == :element_structure &&
723
+ diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
724
+ diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
725
+ (node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
726
+ (node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
727
+ node1.name && node2.name && node1.name != node2.name
728
+ # Most common case: differing element names. Surface the
729
+ # actual names rather than a generic "elements differ".
730
+ "different element name (<#{node1.name}> vs <#{node2.name}>)"
712
731
  else
713
- "#{diff1} vs #{diff2}"
732
+ Canon::Comparison.code_pair_label(diff1, diff2)
714
733
  end
715
734
  end
716
735
 
@@ -824,6 +843,78 @@ differences)
824
843
  "Text: \"#{vis1}\" vs \"#{vis2}\""
825
844
  end
826
845
 
846
+ # Build a Reason line for a +:whitespace_adjacency+ diff (#137).
847
+ # Names which side carries the whitespace, the adjacency position
848
+ # relative to content neighbours, and surfaces the whitespace
849
+ # with visible markers.
850
+ def build_whitespace_adjacency_reason(node1, node2)
851
+ text1 = extract_text_from_node(node1)
852
+ text2 = extract_text_from_node(node2)
853
+
854
+ ni = NodeInspector
855
+ ws_on_first = ni.whitespace_only_text?(node1) &&
856
+ !ni.whitespace_only_text?(node2)
857
+ ws_on_second = ni.whitespace_only_text?(node2) &&
858
+ !ni.whitespace_only_text?(node1)
859
+
860
+ if ws_on_first
861
+ ws_text = text1
862
+ content_text = text2
863
+ present_side = "EXPECTED"
864
+ absent_side = "ACTUAL"
865
+ ws_node = node1
866
+ elsif ws_on_second
867
+ ws_text = text2
868
+ content_text = text1
869
+ present_side = "ACTUAL"
870
+ absent_side = "EXPECTED"
871
+ ws_node = node2
872
+ else
873
+ return build_text_diff_reason(text1, text2)
874
+ end
875
+
876
+ position = whitespace_adjacency_position(ws_node)
877
+ ws_vis = visualize_whitespace(ws_text)
878
+ content_vis = content_text ? visualize_whitespace(truncate_text(content_text)) : "(none)"
879
+
880
+ "Whitespace #{position} \"#{content_vis}\": " \
881
+ "present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
882
+ end
883
+
884
+ def whitespace_adjacency_position(ws_node)
885
+ return :isolated unless ws_node.is_a?(Canon::Xml::Node) ||
886
+ ws_node.is_a?(Nokogiri::XML::Node)
887
+
888
+ parent = ws_node.parent
889
+ return :isolated if parent.nil?
890
+
891
+ siblings = parent.children
892
+ idx = siblings.index(ws_node)
893
+ return :isolated unless idx
894
+
895
+ before = sibling_with_content?(siblings, idx, -1)
896
+ after = sibling_with_content?(siblings, idx, 1)
897
+
898
+ if before && after then :surrounding
899
+ elsif before then :following
900
+ elsif after then :preceding
901
+ else :isolated
902
+ end
903
+ end
904
+
905
+ def sibling_with_content?(siblings, idx, direction)
906
+ i = idx + direction
907
+ while i >= 0 && i < siblings.length
908
+ s = siblings[i]
909
+ is_ws_text = NodeInspector.text_node?(s) &&
910
+ NodeInspector.text_content(s).strip.empty?
911
+ return true unless is_ws_text
912
+
913
+ i += direction
914
+ end
915
+ false
916
+ end
917
+
827
918
  # Check if text is only whitespace
828
919
  #
829
920
  # @param text [String] Text to check