canon 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +4 -0
- data/docs/advanced/diff-classification.adoc +16 -0
- data/docs/advanced/semantic-diff-report.adoc +65 -0
- data/docs/features/diff-formatting/index.adoc +5 -0
- data/docs/features/diff-formatting/whitespace-adjacency.adoc +218 -0
- data/docs/reference/environment-variables.adoc +3 -1
- data/lib/canon/comparison/comparison_result.rb +16 -2
- data/lib/canon/comparison/html_comparator.rb +4 -0
- data/lib/canon/comparison/markup_comparator.rb +49 -71
- data/lib/canon/comparison/node_inspector.rb +103 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +127 -55
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +24 -23
- data/lib/canon/comparison/xml_comparator.rb +97 -3
- data/lib/canon/comparison/xml_node_comparison.rb +37 -81
- data/lib/canon/comparison.rb +59 -0
- data/lib/canon/diff/diff_classifier.rb +37 -39
- data/lib/canon/diff/xml_serialization_formatter.rb +27 -42
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +119 -9
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +75 -4
- data/lib/canon/diff_formatter.rb +71 -2
- data/lib/canon/pretty_printer/html.rb +76 -14
- data/lib/canon/pretty_printer/html_void_elements.rb +20 -0
- data/lib/canon/pretty_printer/xml_normalized.rb +10 -3
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +13 -1
- data/lib/canon/xml/node.rb +15 -0
- data/lib/canon/xml/sax_builder.rb +18 -0
- metadata +5 -2
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
# Single source of truth for cross-backend node type operations.
|
|
6
|
+
#
|
|
7
|
+
# The comparison pipeline handles nodes from two backends:
|
|
8
|
+
# * Canon::Xml::Node (+ RootNode, ElementNode, TextNode, etc.) —
|
|
9
|
+
# custom DOM built by SAX builder and DataModel.
|
|
10
|
+
# * Nokogiri::XML::Node (+ subclasses) — native Nokogiri nodes used
|
|
11
|
+
# by the HTML comparator and some legacy paths.
|
|
12
|
+
#
|
|
13
|
+
# Every method here dispatches on type via +case/when+ (+is_a?+).
|
|
14
|
+
# No +respond_to?+ — the types are known at every call site.
|
|
15
|
+
module NodeInspector
|
|
16
|
+
CANON_TEXT_TYPE = :text
|
|
17
|
+
NOKOGIRI_TEXT_TYPE = defined?(Nokogiri::XML::Node::TEXT_NODE) ? Nokogiri::XML::Node::TEXT_NODE : 3
|
|
18
|
+
|
|
19
|
+
# True when +node+ is a text node (whitespace, content, etc.).
|
|
20
|
+
def self.text_node?(node)
|
|
21
|
+
case node
|
|
22
|
+
when Canon::Xml::Node
|
|
23
|
+
node.node_type == CANON_TEXT_TYPE
|
|
24
|
+
when Nokogiri::XML::Node
|
|
25
|
+
node.node_type == NOKOGIRI_TEXT_TYPE
|
|
26
|
+
else
|
|
27
|
+
false
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Extract the text content of +node+ as a String.
|
|
32
|
+
def self.text_content(node)
|
|
33
|
+
case node
|
|
34
|
+
when Canon::Xml::Node
|
|
35
|
+
node.value.to_s
|
|
36
|
+
when Nokogiri::XML::Node
|
|
37
|
+
node.content.to_s
|
|
38
|
+
else
|
|
39
|
+
node.to_s
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# True when +node+ is a text node whose content is whitespace-only.
|
|
44
|
+
# Empty-string text nodes return false — those represent genuine
|
|
45
|
+
# empty-vs-content asymmetry, not pretty-print indentation.
|
|
46
|
+
def self.whitespace_only_text?(node)
|
|
47
|
+
return false unless text_node?(node)
|
|
48
|
+
|
|
49
|
+
text = text_content(node)
|
|
50
|
+
!text.empty? && text.strip.empty?
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# True when +node+ is a comment node.
|
|
54
|
+
# For HTML, also detects comments that Nokogiri parses as TEXT nodes
|
|
55
|
+
# (content like "<!-- comment -->" or escaped "<\\!-- comment -->").
|
|
56
|
+
def self.comment_node?(node)
|
|
57
|
+
case node
|
|
58
|
+
when Canon::Xml::Node
|
|
59
|
+
node.node_type == :comment
|
|
60
|
+
when Nokogiri::XML::Node
|
|
61
|
+
return true if node.comment?
|
|
62
|
+
|
|
63
|
+
# HTML comments are parsed as TEXT nodes by Nokogiri
|
|
64
|
+
if node.text?
|
|
65
|
+
text_stripped = text_content(node).to_s.strip.gsub("\\", "")
|
|
66
|
+
return true if text_stripped.start_with?("<!--") && text_stripped.end_with?("-->")
|
|
67
|
+
end
|
|
68
|
+
false
|
|
69
|
+
else
|
|
70
|
+
false
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# True when +node+ is an element node.
|
|
75
|
+
def self.element_node?(node)
|
|
76
|
+
case node
|
|
77
|
+
when Canon::Xml::Node
|
|
78
|
+
node.node_type == :element
|
|
79
|
+
when Nokogiri::XML::Node
|
|
80
|
+
node.element?
|
|
81
|
+
else
|
|
82
|
+
false
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Extract parse-time errors carried on a node or its owning document.
|
|
87
|
+
# Returns an Array of Strings.
|
|
88
|
+
def self.parse_errors(node)
|
|
89
|
+
case node
|
|
90
|
+
when nil
|
|
91
|
+
[]
|
|
92
|
+
when Canon::Xml::Node
|
|
93
|
+
errors = node.parse_errors
|
|
94
|
+
Array(errors).map(&:to_s)
|
|
95
|
+
when Nokogiri::XML::Document, Nokogiri::HTML5::Document
|
|
96
|
+
Array(node.errors).map(&:to_s)
|
|
97
|
+
else
|
|
98
|
+
[]
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../node_inspector"
|
|
4
|
+
|
|
3
5
|
module Canon
|
|
4
6
|
module Comparison
|
|
5
7
|
module XmlComparatorHelpers
|
|
@@ -27,7 +29,7 @@ module Canon
|
|
|
27
29
|
# @param differences [Array] Array to collect differences
|
|
28
30
|
# @return [Integer] Comparison result code
|
|
29
31
|
def compare(node1, node2, comparator, opts, child_opts,
|
|
30
|
-
diff_children, differences)
|
|
32
|
+
diff_children, differences)
|
|
31
33
|
# FAST PATH: Object identity - same object means equivalent children
|
|
32
34
|
return Comparison::EQUIVALENT if node1.equal?(node2)
|
|
33
35
|
|
|
@@ -40,8 +42,8 @@ diff_children, differences)
|
|
|
40
42
|
opts1 = XmlNodeComparison.opts_for_side(opts, :expected)
|
|
41
43
|
opts2 = XmlNodeComparison.opts_for_side(opts, :received)
|
|
42
44
|
|
|
43
|
-
children1 = comparator.
|
|
44
|
-
children2 = comparator.
|
|
45
|
+
children1 = comparator.filter_children(node1.children, opts1)
|
|
46
|
+
children2 = comparator.filter_children(node2.children, opts2)
|
|
45
47
|
|
|
46
48
|
# Quick check: if both have no children, they're equivalent
|
|
47
49
|
return Comparison::EQUIVALENT if children1.empty? && children2.empty?
|
|
@@ -97,9 +99,9 @@ diff_children, differences)
|
|
|
97
99
|
|
|
98
100
|
# If no matches and children exist, they're all different
|
|
99
101
|
if matches.empty? && (!children1.empty? || !children2.empty?)
|
|
100
|
-
comparator.
|
|
101
|
-
|
|
102
|
-
|
|
102
|
+
comparator.add_difference(parent_node, parent_node,
|
|
103
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
104
|
+
:text_content, opts, differences)
|
|
103
105
|
return Comparison::UNEQUAL_ELEMENTS
|
|
104
106
|
end
|
|
105
107
|
|
|
@@ -122,30 +124,30 @@ diff_children, differences)
|
|
|
122
124
|
|
|
123
125
|
# Only create DiffNode if element_position is not :ignore
|
|
124
126
|
if position_behavior != :ignore
|
|
125
|
-
comparator.
|
|
126
|
-
|
|
127
|
-
|
|
127
|
+
comparator.add_difference(match.elem1, match.elem2,
|
|
128
|
+
"position #{match.pos1}", "position #{match.pos2}",
|
|
129
|
+
:element_position, opts, differences)
|
|
128
130
|
all_equivalent = false if position_behavior == :strict
|
|
129
131
|
end
|
|
130
132
|
end
|
|
131
133
|
|
|
132
134
|
# Compare the matched elements for content/attribute differences
|
|
133
|
-
result = comparator.
|
|
134
|
-
|
|
135
|
+
result = comparator.compare_nodes(match.elem1, match.elem2,
|
|
136
|
+
child_opts, child_opts, diff_children, differences)
|
|
135
137
|
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
136
138
|
|
|
137
139
|
when :deleted
|
|
138
140
|
# Element present in first tree but not second
|
|
139
|
-
comparator.
|
|
140
|
-
|
|
141
|
-
|
|
141
|
+
comparator.add_difference(match.elem1, nil,
|
|
142
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
143
|
+
:element_structure, opts, differences)
|
|
142
144
|
all_equivalent = false
|
|
143
145
|
|
|
144
146
|
when :inserted
|
|
145
147
|
# Element present in second tree but not first
|
|
146
|
-
comparator.
|
|
147
|
-
|
|
148
|
-
|
|
148
|
+
comparator.add_difference(nil, match.elem2,
|
|
149
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
150
|
+
:element_structure, opts, differences)
|
|
149
151
|
all_equivalent = false
|
|
150
152
|
end
|
|
151
153
|
end
|
|
@@ -153,9 +155,16 @@ diff_children, differences)
|
|
|
153
155
|
all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ELEMENTS
|
|
154
156
|
end
|
|
155
157
|
|
|
156
|
-
# Use simple positional comparison for children
|
|
158
|
+
# Use simple positional comparison for children, with
|
|
159
|
+
# whitespace-asymmetry-aware re-alignment. When positional
|
|
160
|
+
# +zip()+ would pair a whitespace-only text node on one side
|
|
161
|
+
# against a content node on the other, treat the whitespace
|
|
162
|
+
# node as a single-side gap: emit one +:whitespace_adjacency+
|
|
163
|
+
# diff anchored at the whitespace node and advance only the
|
|
164
|
+
# cursor carrying the whitespace, so the next iteration aligns
|
|
165
|
+
# content against content. See lutaml/canon#137.
|
|
157
166
|
def use_positional_comparison(
|
|
158
|
-
children1, children2,
|
|
167
|
+
children1, children2, parent_node, comparator,
|
|
159
168
|
opts, child_opts, diff_children, differences
|
|
160
169
|
)
|
|
161
170
|
has_mismatch = false
|
|
@@ -163,53 +172,120 @@ diff_children, differences)
|
|
|
163
172
|
# Length check
|
|
164
173
|
unless children1.length == children2.length
|
|
165
174
|
has_mismatch = true
|
|
166
|
-
|
|
167
|
-
|
|
175
|
+
|
|
176
|
+
ws_asymmetric = asymmetric_whitespace_explains_length_diff?(
|
|
177
|
+
children1, children2
|
|
168
178
|
)
|
|
169
179
|
|
|
170
|
-
|
|
171
|
-
|
|
180
|
+
if ws_asymmetric
|
|
181
|
+
dimension = nil
|
|
182
|
+
mismatched_children = []
|
|
183
|
+
else
|
|
184
|
+
dimension = determine_dimension_for_mismatch(
|
|
172
185
|
children1, children2, comparator
|
|
173
186
|
)
|
|
187
|
+
mismatched_children, children1, children2 =
|
|
188
|
+
determine_mismatch_children(
|
|
189
|
+
children1, children2, comparator
|
|
190
|
+
)
|
|
191
|
+
end
|
|
174
192
|
|
|
175
193
|
if mismatched_children.empty?
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
194
|
+
unless ws_asymmetric
|
|
195
|
+
comparator.add_difference(parent_node, parent_node,
|
|
196
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
197
|
+
dimension, opts, differences)
|
|
198
|
+
end
|
|
179
199
|
else
|
|
180
200
|
mismatched_children.each do |child|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
201
|
+
child_dim = comparator.determine_node_dimension(child)
|
|
202
|
+
if children1.length > children2.length
|
|
203
|
+
comparator.add_difference(child, nil,
|
|
204
|
+
Comparison::MISSING_NODE,
|
|
205
|
+
Comparison::MISSING_NODE,
|
|
206
|
+
child_dim, opts, differences)
|
|
185
207
|
else
|
|
186
|
-
comparator.
|
|
187
|
-
|
|
188
|
-
|
|
208
|
+
comparator.add_difference(nil, child,
|
|
209
|
+
Comparison::MISSING_NODE,
|
|
210
|
+
Comparison::MISSING_NODE,
|
|
211
|
+
child_dim, opts, differences)
|
|
189
212
|
end
|
|
190
213
|
end
|
|
191
214
|
end
|
|
192
|
-
# Continue comparing children to find deeper differences like attribute values
|
|
193
|
-
# Use zip to compare up to the shorter length
|
|
194
215
|
end
|
|
195
216
|
|
|
196
|
-
# Compare children pairwise by position
|
|
197
217
|
result = has_mismatch ? Comparison::UNEQUAL_ELEMENTS : Comparison::EQUIVALENT
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
218
|
+
walk_result = walk_children_with_realignment(
|
|
219
|
+
children1, children2, comparator,
|
|
220
|
+
child_opts, diff_children, opts, differences
|
|
221
|
+
)
|
|
222
|
+
result = walk_result unless walk_result == Comparison::EQUIVALENT
|
|
223
|
+
result
|
|
224
|
+
end
|
|
201
225
|
|
|
202
|
-
|
|
203
|
-
|
|
226
|
+
# Two-cursor walk over paired children that re-aligns past
|
|
227
|
+
# asymmetric whitespace-only text nodes. Returns the worst
|
|
228
|
+
# child result encountered.
|
|
229
|
+
def walk_children_with_realignment(
|
|
230
|
+
children1, children2, comparator,
|
|
231
|
+
child_opts, diff_children, opts, differences
|
|
232
|
+
)
|
|
233
|
+
result = Comparison::EQUIVALENT
|
|
234
|
+
i = 0
|
|
235
|
+
j = 0
|
|
236
|
+
|
|
237
|
+
while i < children1.length || j < children2.length
|
|
238
|
+
c1 = children1[i]
|
|
239
|
+
c2 = children2[j]
|
|
240
|
+
|
|
241
|
+
if c1.nil?
|
|
242
|
+
j += 1
|
|
243
|
+
next
|
|
244
|
+
elsif c2.nil?
|
|
245
|
+
i += 1
|
|
246
|
+
next
|
|
247
|
+
end
|
|
204
248
|
|
|
205
|
-
|
|
206
|
-
|
|
249
|
+
ws1 = NodeInspector.whitespace_only_text?(c1)
|
|
250
|
+
ws2 = NodeInspector.whitespace_only_text?(c2)
|
|
251
|
+
|
|
252
|
+
if ws1 && !ws2
|
|
253
|
+
comparator.add_difference(c1, c2,
|
|
254
|
+
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
255
|
+
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
256
|
+
:whitespace_adjacency, opts, differences)
|
|
257
|
+
result = Comparison::UNEQUAL_TEXT_CONTENTS
|
|
258
|
+
i += 1
|
|
259
|
+
next
|
|
260
|
+
elsif ws2 && !ws1
|
|
261
|
+
comparator.add_difference(c1, c2,
|
|
262
|
+
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
263
|
+
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
264
|
+
:whitespace_adjacency, opts, differences)
|
|
265
|
+
result = Comparison::UNEQUAL_TEXT_CONTENTS
|
|
266
|
+
j += 1
|
|
267
|
+
next
|
|
207
268
|
end
|
|
269
|
+
|
|
270
|
+
child_result = comparator.compare_nodes(c1, c2,
|
|
271
|
+
child_opts, child_opts,
|
|
272
|
+
diff_children, differences)
|
|
273
|
+
result = child_result unless child_result == Comparison::EQUIVALENT
|
|
274
|
+
i += 1
|
|
275
|
+
j += 1
|
|
208
276
|
end
|
|
209
277
|
|
|
210
278
|
result
|
|
211
279
|
end
|
|
212
280
|
|
|
281
|
+
# True when the length difference between the two child arrays
|
|
282
|
+
# is fully explained by asymmetric whitespace-only text nodes.
|
|
283
|
+
def asymmetric_whitespace_explains_length_diff?(children1, children2)
|
|
284
|
+
non_ws1 = children1.reject { |c| NodeInspector.whitespace_only_text?(c) }
|
|
285
|
+
non_ws2 = children2.reject { |c| NodeInspector.whitespace_only_text?(c) }
|
|
286
|
+
non_ws1.length == non_ws2.length
|
|
287
|
+
end
|
|
288
|
+
|
|
213
289
|
# Determine dimension for length mismatch
|
|
214
290
|
def determine_dimension_for_mismatch(children1, children2, comparator)
|
|
215
291
|
dimension = :text_content # default
|
|
@@ -219,22 +295,17 @@ diff_children, differences)
|
|
|
219
295
|
(0...max_len).each do |i|
|
|
220
296
|
if i >= children1.length
|
|
221
297
|
# Extra child in children2
|
|
222
|
-
dimension = comparator.
|
|
223
|
-
children2[i])
|
|
298
|
+
dimension = comparator.determine_node_dimension(children2[i])
|
|
224
299
|
break
|
|
225
300
|
elsif i >= children2.length
|
|
226
301
|
# Extra child in children1
|
|
227
|
-
dimension = comparator.
|
|
228
|
-
children1[i])
|
|
302
|
+
dimension = comparator.determine_node_dimension(children1[i])
|
|
229
303
|
break
|
|
230
|
-
elsif !comparator.
|
|
231
|
-
children2[i])
|
|
304
|
+
elsif !comparator.same_node_type?(children1[i], children2[i])
|
|
232
305
|
# Different node types at same position
|
|
233
306
|
# Check both nodes - if either is a comment, use :comments dimension
|
|
234
|
-
dim1 = comparator.
|
|
235
|
-
|
|
236
|
-
dim2 = comparator.send(:determine_node_dimension,
|
|
237
|
-
children2[i])
|
|
307
|
+
dim1 = comparator.determine_node_dimension(children1[i])
|
|
308
|
+
dim2 = comparator.determine_node_dimension(children2[i])
|
|
238
309
|
dimension = [dim1, dim2].include?(:comments) ? :comments : dim1
|
|
239
310
|
break
|
|
240
311
|
end
|
|
@@ -256,7 +327,7 @@ diff_children, differences)
|
|
|
256
327
|
end
|
|
257
328
|
|
|
258
329
|
smaller_set_names = smaller_set.filter_map do |c|
|
|
259
|
-
next nil unless c.
|
|
330
|
+
next nil unless c.is_a?(Canon::Xml::Node) || c.is_a?(Nokogiri::XML::Node)
|
|
260
331
|
# Exclude generic node-type names (e.g. "#text") that are
|
|
261
332
|
# shared by all text nodes and cannot be used for matching.
|
|
262
333
|
next nil if c.name.start_with?("#")
|
|
@@ -271,7 +342,8 @@ diff_children, differences)
|
|
|
271
342
|
# If the smaller set has no child at this position,
|
|
272
343
|
# consider it a mismatch
|
|
273
344
|
mismatch_children << larger_set[i]
|
|
274
|
-
elsif larger_set[i].
|
|
345
|
+
elsif (larger_set[i].is_a?(Canon::Xml::Node) ||
|
|
346
|
+
larger_set[i].is_a?(Nokogiri::XML::Node)) &&
|
|
275
347
|
!larger_set[i].name.start_with?("#") &&
|
|
276
348
|
!smaller_set_names.include?(larger_set[i].name)
|
|
277
349
|
# If the name of the node is not found in the smaller set,
|
|
@@ -4,6 +4,7 @@ require "set"
|
|
|
4
4
|
require_relative "../../diff/diff_node"
|
|
5
5
|
require_relative "../../diff/path_builder"
|
|
6
6
|
require_relative "../../diff/node_serializer"
|
|
7
|
+
require_relative "../node_inspector"
|
|
7
8
|
|
|
8
9
|
module Canon
|
|
9
10
|
module Comparison
|
|
@@ -52,14 +53,15 @@ module Canon
|
|
|
52
53
|
# For deleted/inserted nodes, include namespace information if available
|
|
53
54
|
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
54
55
|
node = node1 || node2
|
|
55
|
-
if node.
|
|
56
|
+
if node.is_a?(Canon::Xml::Node) || node.is_a?(Nokogiri::XML::Node)
|
|
56
57
|
ns = node.namespace_uri
|
|
57
58
|
ns_info = if ns.nil? || ns.empty?
|
|
58
59
|
""
|
|
59
60
|
else
|
|
60
61
|
" (namespace: #{ns})"
|
|
61
62
|
end
|
|
62
|
-
|
|
63
|
+
label = Canon::Comparison.code_pair_label(diff1, diff2)
|
|
64
|
+
return "element '#{node.name}'#{ns_info}: #{label}"
|
|
63
65
|
end
|
|
64
66
|
end
|
|
65
67
|
|
|
@@ -87,8 +89,15 @@ module Canon
|
|
|
87
89
|
# Default reason
|
|
88
90
|
if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
|
|
89
91
|
"element structure mismatch (children differ)"
|
|
92
|
+
elsif dimension == :element_structure &&
|
|
93
|
+
diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
94
|
+
diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
95
|
+
(node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
|
|
96
|
+
(node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
|
|
97
|
+
node1.name && node2.name && node1.name != node2.name
|
|
98
|
+
"different element name (<#{node1.name}> vs <#{node2.name}>)"
|
|
90
99
|
else
|
|
91
|
-
|
|
100
|
+
Canon::Comparison.code_pair_label(diff1, diff2)
|
|
92
101
|
end
|
|
93
102
|
end
|
|
94
103
|
|
|
@@ -176,26 +185,18 @@ module Canon
|
|
|
176
185
|
def self.extract_text_content(node)
|
|
177
186
|
return nil if node.nil?
|
|
178
187
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
# For nodes with value method (other types)
|
|
192
|
-
return node.value if node.respond_to?(:value)
|
|
193
|
-
|
|
194
|
-
# For simple text nodes or strings
|
|
195
|
-
return node.to_s if node.is_a?(String)
|
|
196
|
-
|
|
197
|
-
# For other node types, try to_s
|
|
198
|
-
node.to_s
|
|
188
|
+
case node
|
|
189
|
+
when Canon::Xml::Nodes::TextNode
|
|
190
|
+
node.value
|
|
191
|
+
when Canon::Xml::Node
|
|
192
|
+
node.text_content
|
|
193
|
+
when Nokogiri::XML::Node
|
|
194
|
+
node.content.to_s
|
|
195
|
+
when String
|
|
196
|
+
node
|
|
197
|
+
else
|
|
198
|
+
node.to_s
|
|
199
|
+
end
|
|
199
200
|
rescue StandardError
|
|
200
201
|
nil
|
|
201
202
|
end
|
|
@@ -160,6 +160,8 @@ module Canon
|
|
|
160
160
|
format: :xml,
|
|
161
161
|
match_options: match_opts_hash,
|
|
162
162
|
algorithm: :dom,
|
|
163
|
+
parse_errors_expected: Comparison.parse_errors_for(node1),
|
|
164
|
+
parse_errors_received: Comparison.parse_errors_for(node2),
|
|
163
165
|
)
|
|
164
166
|
elsif result != Comparison::EQUIVALENT && !differences.empty?
|
|
165
167
|
# Non-verbose mode: check equivalence
|
|
@@ -222,6 +224,8 @@ module Canon
|
|
|
222
224
|
format: :xml,
|
|
223
225
|
match_options: match_opts_hash.merge(strategy.metadata),
|
|
224
226
|
algorithm: :semantic,
|
|
227
|
+
parse_errors_expected: Comparison.parse_errors_for(node1),
|
|
228
|
+
parse_errors_received: Comparison.parse_errors_for(node2),
|
|
225
229
|
)
|
|
226
230
|
else
|
|
227
231
|
# Simple boolean result - equivalent if no normative differences
|
|
@@ -283,6 +287,8 @@ module Canon
|
|
|
283
287
|
)
|
|
284
288
|
end
|
|
285
289
|
|
|
290
|
+
public
|
|
291
|
+
|
|
286
292
|
# Main comparison dispatcher
|
|
287
293
|
def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
|
|
288
294
|
# FAST PATH: Object identity - same object is always equivalent
|
|
@@ -373,7 +379,6 @@ module Canon
|
|
|
373
379
|
end
|
|
374
380
|
|
|
375
381
|
# Public comparison methods - exposed for XmlNodeComparison module
|
|
376
|
-
public
|
|
377
382
|
|
|
378
383
|
# Compare two element nodes
|
|
379
384
|
def compare_element_nodes(n1, n2, opts, child_opts, diff_children,
|
|
@@ -667,7 +672,8 @@ differences)
|
|
|
667
672
|
else
|
|
668
673
|
" (namespace: #{ns})"
|
|
669
674
|
end
|
|
670
|
-
|
|
675
|
+
label = Canon::Comparison.code_pair_label(diff1, diff2)
|
|
676
|
+
return "element '#{node.name}'#{ns_info}: #{label}"
|
|
671
677
|
elsif node.respond_to?(:name) && !node.respond_to?(:namespace_uri)
|
|
672
678
|
# TextNode and other nodes without namespace_uri
|
|
673
679
|
display = if node.respond_to?(:value) && node.node_type == :text
|
|
@@ -693,6 +699,10 @@ differences)
|
|
|
693
699
|
return build_text_diff_reason(text1, text2)
|
|
694
700
|
end
|
|
695
701
|
|
|
702
|
+
if dimension == :whitespace_adjacency
|
|
703
|
+
return build_whitespace_adjacency_reason(node1, node2)
|
|
704
|
+
end
|
|
705
|
+
|
|
696
706
|
# For attribute values differences, show the actual values
|
|
697
707
|
if dimension == :attribute_values
|
|
698
708
|
attrs1 = extract_attributes(node1)
|
|
@@ -709,8 +719,17 @@ differences)
|
|
|
709
719
|
|
|
710
720
|
if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
|
|
711
721
|
"element structure mismatch (children differ)"
|
|
722
|
+
elsif dimension == :element_structure &&
|
|
723
|
+
diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
724
|
+
diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
725
|
+
(node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
|
|
726
|
+
(node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
|
|
727
|
+
node1.name && node2.name && node1.name != node2.name
|
|
728
|
+
# Most common case: differing element names. Surface the
|
|
729
|
+
# actual names rather than a generic "elements differ".
|
|
730
|
+
"different element name (<#{node1.name}> vs <#{node2.name}>)"
|
|
712
731
|
else
|
|
713
|
-
|
|
732
|
+
Canon::Comparison.code_pair_label(diff1, diff2)
|
|
714
733
|
end
|
|
715
734
|
end
|
|
716
735
|
|
|
@@ -824,6 +843,81 @@ differences)
|
|
|
824
843
|
"Text: \"#{vis1}\" vs \"#{vis2}\""
|
|
825
844
|
end
|
|
826
845
|
|
|
846
|
+
# Build a Reason line for a +:whitespace_adjacency+ diff (#137).
|
|
847
|
+
# Names which side carries the whitespace, the adjacency position
|
|
848
|
+
# relative to content neighbours, and surfaces the whitespace
|
|
849
|
+
# with visible markers.
|
|
850
|
+
def build_whitespace_adjacency_reason(node1, node2)
|
|
851
|
+
text1 = extract_text_from_node(node1)
|
|
852
|
+
text2 = extract_text_from_node(node2)
|
|
853
|
+
|
|
854
|
+
ni = NodeInspector
|
|
855
|
+
ws_on_first = ni.whitespace_only_text?(node1) &&
|
|
856
|
+
!ni.whitespace_only_text?(node2)
|
|
857
|
+
ws_on_second = ni.whitespace_only_text?(node2) &&
|
|
858
|
+
!ni.whitespace_only_text?(node1)
|
|
859
|
+
|
|
860
|
+
if ws_on_first
|
|
861
|
+
ws_text = text1
|
|
862
|
+
content_text = text2
|
|
863
|
+
present_side = "EXPECTED"
|
|
864
|
+
absent_side = "ACTUAL"
|
|
865
|
+
ws_node = node1
|
|
866
|
+
elsif ws_on_second
|
|
867
|
+
ws_text = text2
|
|
868
|
+
content_text = text1
|
|
869
|
+
present_side = "ACTUAL"
|
|
870
|
+
absent_side = "EXPECTED"
|
|
871
|
+
ws_node = node2
|
|
872
|
+
else
|
|
873
|
+
return build_text_diff_reason(text1, text2)
|
|
874
|
+
end
|
|
875
|
+
|
|
876
|
+
direction = whitespace_partner_direction(ws_node)
|
|
877
|
+
ws_vis = visualize_whitespace(ws_text)
|
|
878
|
+
content_vis = content_text ? visualize_whitespace(truncate_text(content_text)) : "(none)"
|
|
879
|
+
|
|
880
|
+
"Whitespace #{direction} \"#{content_vis}\": " \
|
|
881
|
+
"present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
|
|
882
|
+
end
|
|
883
|
+
|
|
884
|
+
# Direction of the partner content relative to the whitespace node,
|
|
885
|
+
# phrased from the partner's point of view: "before" when the
|
|
886
|
+
# whitespace immediately precedes its next non-whitespace sibling
|
|
887
|
+
# (the alignment partner on the other side), "after" when the
|
|
888
|
+
# whitespace trails the previous non-whitespace sibling, or
|
|
889
|
+
# "adjacent to" as a degenerate fallback when neither neighbour
|
|
890
|
+
# exists.
|
|
891
|
+
def whitespace_partner_direction(ws_node)
|
|
892
|
+
return "adjacent to" unless ws_node.is_a?(Canon::Xml::Node) ||
|
|
893
|
+
ws_node.is_a?(Nokogiri::XML::Node)
|
|
894
|
+
|
|
895
|
+
parent = ws_node.parent
|
|
896
|
+
return "adjacent to" if parent.nil?
|
|
897
|
+
|
|
898
|
+
siblings = parent.children
|
|
899
|
+
idx = siblings.index(ws_node)
|
|
900
|
+
return "adjacent to" unless idx
|
|
901
|
+
|
|
902
|
+
if non_ws_sibling_exists?(siblings, idx, 1) then "before"
|
|
903
|
+
elsif non_ws_sibling_exists?(siblings, idx, -1) then "after"
|
|
904
|
+
else "adjacent to"
|
|
905
|
+
end
|
|
906
|
+
end
|
|
907
|
+
|
|
908
|
+
def non_ws_sibling_exists?(siblings, idx, direction)
|
|
909
|
+
i = idx + direction
|
|
910
|
+
while i >= 0 && i < siblings.length
|
|
911
|
+
s = siblings[i]
|
|
912
|
+
is_ws_text = NodeInspector.text_node?(s) &&
|
|
913
|
+
NodeInspector.text_content(s).strip.empty?
|
|
914
|
+
return true unless is_ws_text
|
|
915
|
+
|
|
916
|
+
i += direction
|
|
917
|
+
end
|
|
918
|
+
false
|
|
919
|
+
end
|
|
920
|
+
|
|
827
921
|
# Check if text is only whitespace
|
|
828
922
|
#
|
|
829
923
|
# @param text [String] Text to check
|