canon 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +16 -61
- data/README.adoc +5 -0
- data/Rakefile +17 -0
- data/docs/features/diff-formatting/comment-asymmetry.adoc +160 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/child_realignment.rb +140 -0
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +36 -75
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +150 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +32 -77
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +43 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +89 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +25 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +16 -42
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +7 -2
|
@@ -53,7 +53,7 @@ module Canon
|
|
|
53
53
|
# For deleted/inserted nodes, include namespace information if available
|
|
54
54
|
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
55
55
|
node = node1 || node2
|
|
56
|
-
if node.is_a?(Canon::Xml::Node) ||
|
|
56
|
+
if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
|
|
57
57
|
ns = node.namespace_uri
|
|
58
58
|
ns_info = if ns.nil? || ns.empty?
|
|
59
59
|
""
|
|
@@ -86,14 +86,22 @@ module Canon
|
|
|
86
86
|
return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
|
|
87
87
|
end
|
|
88
88
|
|
|
89
|
+
# For asymmetric comment nodes (#144), name the side that carries
|
|
90
|
+
# the comment and surface the comment text rather than reusing
|
|
91
|
+
# the generic "element structure mismatch" wording.
|
|
92
|
+
if dimension == :comments
|
|
93
|
+
comment_reason = build_comment_difference_reason(node1, node2)
|
|
94
|
+
return comment_reason if comment_reason
|
|
95
|
+
end
|
|
96
|
+
|
|
89
97
|
# Default reason
|
|
90
98
|
if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
|
|
91
99
|
"element structure mismatch (children differ)"
|
|
92
100
|
elsif dimension == :element_structure &&
|
|
93
101
|
diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
94
102
|
diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
95
|
-
(node1.is_a?(Canon::Xml::Node) ||
|
|
96
|
-
(node2.is_a?(Canon::Xml::Node) ||
|
|
103
|
+
(node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
|
|
104
|
+
(node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
|
|
97
105
|
node1.name && node2.name && node1.name != node2.name
|
|
98
106
|
"different element name (<#{node1.name}> vs <#{node2.name}>)"
|
|
99
107
|
else
|
|
@@ -190,12 +198,14 @@ module Canon
|
|
|
190
198
|
node.value
|
|
191
199
|
when Canon::Xml::Node
|
|
192
200
|
node.text_content
|
|
193
|
-
when Nokogiri::XML::Node
|
|
194
|
-
node.content.to_s
|
|
195
|
-
when String
|
|
196
|
-
node
|
|
197
201
|
else
|
|
198
|
-
node.
|
|
202
|
+
if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
|
|
203
|
+
node.content.to_s
|
|
204
|
+
elsif Canon::XmlParsing.xml_node?(node)
|
|
205
|
+
Canon::XmlParsing.text_content(node)
|
|
206
|
+
else
|
|
207
|
+
node.to_s
|
|
208
|
+
end
|
|
199
209
|
end
|
|
200
210
|
rescue StandardError
|
|
201
211
|
nil
|
|
@@ -217,6 +227,31 @@ module Canon
|
|
|
217
227
|
"'#{truncate(text1)}' vs '#{truncate(text2)}'"
|
|
218
228
|
end
|
|
219
229
|
|
|
230
|
+
# Build a Reason line for a +:comments+ diff. Returns +nil+ when
|
|
231
|
+
# neither side carries a comment (caller falls back to default).
|
|
232
|
+
def self.build_comment_difference_reason(node1, node2)
|
|
233
|
+
cm1 = node1 && Canon::Comparison::NodeInspector.comment_node?(node1)
|
|
234
|
+
cm2 = node2 && Canon::Comparison::NodeInspector.comment_node?(node2)
|
|
235
|
+
|
|
236
|
+
return nil unless cm1 || cm2
|
|
237
|
+
|
|
238
|
+
if cm1 && !cm2
|
|
239
|
+
"Comment present on EXPECTED only: " \
|
|
240
|
+
"<!--#{truncate(comment_text(node1))}-->"
|
|
241
|
+
elsif cm2 && !cm1
|
|
242
|
+
"Comment present on ACTUAL only: " \
|
|
243
|
+
"<!--#{truncate(comment_text(node2))}-->"
|
|
244
|
+
else
|
|
245
|
+
t1 = truncate(comment_text(node1))
|
|
246
|
+
t2 = truncate(comment_text(node2))
|
|
247
|
+
"Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def self.comment_text(node)
|
|
252
|
+
Canon::Comparison::NodeInspector.text_content(node).to_s
|
|
253
|
+
end
|
|
254
|
+
|
|
220
255
|
# Truncate text for display in reason messages
|
|
221
256
|
#
|
|
222
257
|
# @param text [String] Text to truncate
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../../xml/namespace_helper"
|
|
4
|
+
|
|
3
5
|
module Canon
|
|
4
6
|
module Comparison
|
|
5
7
|
module XmlComparatorHelpers
|
|
@@ -41,20 +43,20 @@ module Canon
|
|
|
41
43
|
def self.extract_declarations(node)
|
|
42
44
|
declarations = {}
|
|
43
45
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
if node.is_a?(Canon::Xml::Node)
|
|
47
|
+
if node.namespace_nodes
|
|
48
|
+
return extract_from_namespace_nodes(node.namespace_nodes,
|
|
49
|
+
declarations)
|
|
50
|
+
end
|
|
49
51
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
raw_attrs = node.attribute_nodes
|
|
53
|
+
else
|
|
54
|
+
raw_attrs = node.attributes
|
|
55
|
+
end
|
|
52
56
|
|
|
53
|
-
# Handle Canon::Xml::Node attribute format (array of AttributeNode)
|
|
54
57
|
if raw_attrs.is_a?(Array)
|
|
55
58
|
extract_from_array_attributes(raw_attrs, declarations)
|
|
56
59
|
else
|
|
57
|
-
# Handle Nokogiri and Moxml attribute formats (Hash-like)
|
|
58
60
|
extract_from_hash_attributes(raw_attrs, declarations)
|
|
59
61
|
end
|
|
60
62
|
|
|
@@ -105,23 +107,11 @@ module Canon
|
|
|
105
107
|
# @return [Hash] Declarations hash
|
|
106
108
|
def self.extract_from_hash_attributes(raw_attrs, declarations)
|
|
107
109
|
raw_attrs.each do |key, val|
|
|
108
|
-
|
|
109
|
-
name = if key.is_a?(String)
|
|
110
|
-
# Nokogiri format: key=name (String), val=attr object
|
|
111
|
-
key
|
|
112
|
-
else
|
|
113
|
-
# Moxml format: key=attr object, val=nil
|
|
114
|
-
key.respond_to?(:name) ? key.name : key.to_s
|
|
115
|
-
end
|
|
110
|
+
name = key.is_a?(String) ? key : key.name
|
|
116
111
|
|
|
117
112
|
if namespace_declaration?(name)
|
|
118
|
-
value =
|
|
119
|
-
val.value
|
|
120
|
-
else
|
|
121
|
-
val.to_s
|
|
122
|
-
end
|
|
113
|
+
value = val.is_a?(String) ? val : val.value
|
|
123
114
|
|
|
124
|
-
# Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
|
|
125
115
|
prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
|
|
126
116
|
declarations[prefix] = value
|
|
127
117
|
end
|
|
@@ -130,12 +120,8 @@ module Canon
|
|
|
130
120
|
declarations
|
|
131
121
|
end
|
|
132
122
|
|
|
133
|
-
# Check if an attribute name is a namespace declaration
|
|
134
|
-
#
|
|
135
|
-
# @param attr_name [String] Attribute name
|
|
136
|
-
# @return [Boolean] true if it's a namespace declaration
|
|
137
123
|
def self.namespace_declaration?(attr_name)
|
|
138
|
-
|
|
124
|
+
Canon::Xml::NamespaceHelper.namespace_declaration?(attr_name)
|
|
139
125
|
end
|
|
140
126
|
|
|
141
127
|
# Add a namespace declaration difference
|
|
@@ -77,21 +77,22 @@ module Canon
|
|
|
77
77
|
# @return [Canon::Xml::Node] Converted node
|
|
78
78
|
def self.convert_from_node(node, preserve_whitespace: false,
|
|
79
79
|
parser: nil)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
if Canon::XmlBackend.nokogiri?
|
|
81
|
+
if node.is_a?(Nokogiri::XML::Node)
|
|
82
|
+
return Canon::Xml::DataModel.build_from_nokogiri(
|
|
83
|
+
node, preserve_whitespace: preserve_whitespace
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
elsif node.is_a?(Moxml::Node)
|
|
87
|
+
return Canon::Xml::DataModel.build_from_moxml(
|
|
83
88
|
node, preserve_whitespace: preserve_whitespace
|
|
84
89
|
)
|
|
85
90
|
end
|
|
86
91
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
node.to_xml
|
|
90
|
-
elsif node.respond_to?(:to_s)
|
|
91
|
-
node.to_s
|
|
92
|
+
xml_str = if node.is_a?(String)
|
|
93
|
+
node
|
|
92
94
|
else
|
|
93
|
-
|
|
94
|
-
"Unable to convert node to string: #{node.class}"
|
|
95
|
+
node.to_xml
|
|
95
96
|
end
|
|
96
97
|
|
|
97
98
|
resolved_parser = parser || resolve_parser_config
|
|
@@ -112,7 +113,7 @@ parser: nil)
|
|
|
112
113
|
def self.resolve_parser_config
|
|
113
114
|
Canon::Config.instance.xml.diff.parser
|
|
114
115
|
rescue StandardError
|
|
115
|
-
:sax
|
|
116
|
+
Canon::XmlBackend.nokogiri? ? :sax : :dom
|
|
116
117
|
end
|
|
117
118
|
end
|
|
118
119
|
end
|
|
@@ -7,90 +7,62 @@ module Canon
|
|
|
7
7
|
#
|
|
8
8
|
# Handles dispatching comparison logic based on node type.
|
|
9
9
|
# Supports both Canon::Xml::Node (with symbolic node_type) and
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
# This module encapsulates the complex node type detection and dispatch
|
|
13
|
-
# logic, making the main XmlComparator cleaner and more maintainable.
|
|
10
|
+
# backend nodes (Nokogiri/Moxml) via XmlParsing type checks.
|
|
14
11
|
module NodeTypeComparator
|
|
15
12
|
class << self
|
|
16
|
-
# Compare two nodes by dispatching to appropriate comparison method
|
|
17
|
-
#
|
|
18
|
-
# @param node1 [Object] First node
|
|
19
|
-
# @param node2 [Object] Second node
|
|
20
|
-
# @param comparator [XmlComparator] The comparator instance for method delegation
|
|
21
|
-
# @param opts [Hash] Comparison options
|
|
22
|
-
# @param child_opts [Hash] Options for child comparison
|
|
23
|
-
# @param diff_children [Boolean] Whether to diff children
|
|
24
|
-
# @param differences [Array] Array to collect differences
|
|
25
|
-
# @return [Integer] Comparison result code
|
|
26
13
|
def compare(node1, node2, comparator, opts, child_opts,
|
|
27
14
|
diff_children, differences)
|
|
28
|
-
|
|
29
|
-
# Canon::Xml::Node types use .node_type method that returns symbols
|
|
30
|
-
# Nokogiri also has .node_type but returns integers, so check for Symbol
|
|
31
|
-
if node1.respond_to?(:node_type) && node2.respond_to?(:node_type) &&
|
|
32
|
-
node1.node_type.is_a?(Symbol) && node2.node_type.is_a?(Symbol)
|
|
15
|
+
if node1.is_a?(Canon::Xml::Node) && node2.is_a?(Canon::Xml::Node)
|
|
33
16
|
compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
|
|
34
17
|
diff_children, differences)
|
|
35
|
-
# Moxml/Nokogiri types use .element?, .text?, etc. methods
|
|
36
18
|
else
|
|
37
|
-
|
|
38
|
-
|
|
19
|
+
compare_by_backend_type(node1, node2, comparator, opts, child_opts,
|
|
20
|
+
diff_children, differences)
|
|
39
21
|
end
|
|
40
22
|
end
|
|
41
23
|
|
|
42
24
|
private
|
|
43
25
|
|
|
44
|
-
# Compare nodes using symbolic node_type (Canon::Xml::Node)
|
|
45
26
|
def compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
|
|
46
27
|
diff_children, differences)
|
|
47
28
|
case node1.node_type
|
|
48
29
|
when :root
|
|
49
|
-
comparator.
|
|
50
|
-
|
|
30
|
+
comparator.compare_children(node1, node2, opts, child_opts,
|
|
31
|
+
diff_children, differences)
|
|
51
32
|
when :element
|
|
52
|
-
comparator.
|
|
53
|
-
|
|
33
|
+
comparator.compare_element_nodes(node1, node2, opts, child_opts,
|
|
34
|
+
diff_children, differences)
|
|
54
35
|
when :text
|
|
55
|
-
comparator.
|
|
56
|
-
differences)
|
|
36
|
+
comparator.compare_text_nodes(node1, node2, opts, differences)
|
|
57
37
|
when :comment
|
|
58
|
-
comparator.
|
|
59
|
-
differences)
|
|
38
|
+
comparator.compare_comment_nodes(node1, node2, opts, differences)
|
|
60
39
|
when :cdata
|
|
61
|
-
comparator.
|
|
62
|
-
differences)
|
|
40
|
+
comparator.compare_text_nodes(node1, node2, opts, differences)
|
|
63
41
|
when :processing_instruction
|
|
64
|
-
comparator.
|
|
65
|
-
|
|
42
|
+
comparator.compare_processing_instruction_nodes(node1, node2, opts,
|
|
43
|
+
differences)
|
|
66
44
|
else
|
|
67
45
|
Comparison::EQUIVALENT
|
|
68
46
|
end
|
|
69
47
|
end
|
|
70
48
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
elsif
|
|
81
|
-
comparator.
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
|
|
89
|
-
differences)
|
|
90
|
-
elsif node1.respond_to?(:root)
|
|
91
|
-
# Document node (Moxml/Nokogiri - legacy path)
|
|
92
|
-
comparator.send(:compare_document_nodes, node1, node2, opts, child_opts,
|
|
93
|
-
diff_children, differences)
|
|
49
|
+
def compare_by_backend_type(node1, node2, comparator, opts, child_opts,
|
|
50
|
+
diff_children, differences)
|
|
51
|
+
if Canon::XmlParsing.element?(node1)
|
|
52
|
+
comparator.compare_element_nodes(node1, node2, opts, child_opts,
|
|
53
|
+
diff_children, differences)
|
|
54
|
+
elsif Canon::XmlParsing.text_node?(node1)
|
|
55
|
+
comparator.compare_text_nodes(node1, node2, opts, differences)
|
|
56
|
+
elsif Canon::XmlParsing.comment?(node1)
|
|
57
|
+
comparator.compare_comment_nodes(node1, node2, opts, differences)
|
|
58
|
+
elsif Canon::XmlParsing.cdata?(node1)
|
|
59
|
+
comparator.compare_text_nodes(node1, node2, opts, differences)
|
|
60
|
+
elsif Canon::XmlParsing.processing_instruction?(node1)
|
|
61
|
+
comparator.compare_processing_instruction_nodes(node1, node2, opts,
|
|
62
|
+
differences)
|
|
63
|
+
elsif Canon::XmlParsing.document?(node1)
|
|
64
|
+
comparator.compare_document_nodes(node1, node2, opts, child_opts,
|
|
65
|
+
diff_children, differences)
|
|
94
66
|
else
|
|
95
67
|
Comparison::EQUIVALENT
|
|
96
68
|
end
|
|
@@ -122,16 +122,8 @@ module Canon
|
|
|
122
122
|
preserve_whitespace: preserve_whitespace)
|
|
123
123
|
|
|
124
124
|
# Store original strings for line diff display (before preprocessing)
|
|
125
|
-
original1 =
|
|
126
|
-
|
|
127
|
-
else
|
|
128
|
-
(n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
|
|
129
|
-
end
|
|
130
|
-
original2 = if n2.is_a?(String)
|
|
131
|
-
n2
|
|
132
|
-
else
|
|
133
|
-
(n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
|
|
134
|
-
end
|
|
125
|
+
original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
|
|
126
|
+
original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
|
|
135
127
|
|
|
136
128
|
differences = []
|
|
137
129
|
diff_children = opts[:diff_children] || false
|
|
@@ -187,16 +179,9 @@ module Canon
|
|
|
187
179
|
# @return [Boolean, ComparisonResult] Result of tree diff comparison
|
|
188
180
|
def perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
|
|
189
181
|
# Store original strings for line diff display (before preprocessing)
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
(n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
|
|
194
|
-
end
|
|
195
|
-
original2 = if n2.is_a?(String)
|
|
196
|
-
n2
|
|
197
|
-
else
|
|
198
|
-
(n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
|
|
199
|
-
end
|
|
182
|
+
# Store original strings for line diff display (before preprocessing)
|
|
183
|
+
original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
|
|
184
|
+
original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
|
|
200
185
|
|
|
201
186
|
# Parse to Canon::Xml::Node (preserves preprocessing)
|
|
202
187
|
node1 = parse_node(n1, match_opts_hash[:preprocessing])
|
|
@@ -262,20 +247,8 @@ module Canon
|
|
|
262
247
|
serialize_node(node1).gsub("><", ">\n<"),
|
|
263
248
|
serialize_node(node2).gsub("><", ">\n<"),
|
|
264
249
|
]
|
|
265
|
-
original1 =
|
|
266
|
-
|
|
267
|
-
elsif n1.respond_to?(:to_xml)
|
|
268
|
-
n1.to_xml
|
|
269
|
-
else
|
|
270
|
-
n1.to_s
|
|
271
|
-
end
|
|
272
|
-
original2 = if n2.is_a?(String)
|
|
273
|
-
n2
|
|
274
|
-
elsif n2.respond_to?(:to_xml)
|
|
275
|
-
n2.to_xml
|
|
276
|
-
else
|
|
277
|
-
n2.to_s
|
|
278
|
-
end
|
|
250
|
+
original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
|
|
251
|
+
original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
|
|
279
252
|
|
|
280
253
|
ComparisonResult.new(
|
|
281
254
|
differences: [],
|
|
@@ -289,14 +262,20 @@ module Canon
|
|
|
289
262
|
|
|
290
263
|
public
|
|
291
264
|
|
|
265
|
+
# Public parsing API for external callers
|
|
266
|
+
def parse(node, preprocessing = :none, preserve_whitespace: false)
|
|
267
|
+
parse_node(node, preprocessing,
|
|
268
|
+
preserve_whitespace: preserve_whitespace)
|
|
269
|
+
end
|
|
270
|
+
|
|
292
271
|
# Main comparison dispatcher
|
|
293
272
|
def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
|
|
294
273
|
# FAST PATH: Object identity - same object is always equivalent
|
|
295
274
|
return Comparison::EQUIVALENT if n1.equal?(n2)
|
|
296
275
|
|
|
297
276
|
# Handle DocumentFragment nodes - compare their children instead
|
|
298
|
-
if
|
|
299
|
-
|
|
277
|
+
if Canon::XmlParsing.document_fragment?(n1) &&
|
|
278
|
+
Canon::XmlParsing.document_fragment?(n2)
|
|
300
279
|
children1 = n1.children.to_a
|
|
301
280
|
children2 = n2.children.to_a
|
|
302
281
|
|
|
@@ -392,8 +371,8 @@ module Canon
|
|
|
392
371
|
end
|
|
393
372
|
|
|
394
373
|
# Compare namespace URIs - elements with different namespaces are different elements
|
|
395
|
-
ns1 =
|
|
396
|
-
ns2 =
|
|
374
|
+
ns1 = Canon::XmlParsing.namespace_uri(n1)
|
|
375
|
+
ns2 = Canon::XmlParsing.namespace_uri(n2)
|
|
397
376
|
|
|
398
377
|
unless ns1 == ns2
|
|
399
378
|
# Create descriptive reason showing the actual namespace URIs
|
|
@@ -410,18 +389,30 @@ module Canon
|
|
|
410
389
|
return Comparison::UNEQUAL_ELEMENTS
|
|
411
390
|
end
|
|
412
391
|
|
|
392
|
+
# Track the worst result across namespace, attribute, and children
|
|
393
|
+
# comparisons. Do NOT return early on attribute/namespace mismatches —
|
|
394
|
+
# children must still be compared so structural differences in the
|
|
395
|
+
# subtree are reported. Early returns caused the comparator to skip
|
|
396
|
+
# entire subtrees when a root or intermediate element had different
|
|
397
|
+
# attributes, missing all nested structural changes.
|
|
398
|
+
worst_result = Comparison::EQUIVALENT
|
|
399
|
+
|
|
413
400
|
# Compare namespace declarations (xmlns and xmlns:* attributes)
|
|
414
401
|
ns_result = compare_namespace_declarations(n1, n2, opts, differences)
|
|
415
|
-
|
|
402
|
+
worst_result = ns_result unless ns_result == Comparison::EQUIVALENT
|
|
416
403
|
|
|
417
404
|
# Compare attributes
|
|
418
405
|
attr_result = compare_attribute_sets(n1, n2, opts, differences)
|
|
419
|
-
|
|
406
|
+
worst_result = attr_result unless attr_result == Comparison::EQUIVALENT
|
|
420
407
|
|
|
421
408
|
# Compare children if not ignored
|
|
422
|
-
|
|
409
|
+
unless opts[:ignore_children]
|
|
410
|
+
child_result = compare_children(n1, n2, opts, child_opts,
|
|
411
|
+
diff_children, differences)
|
|
412
|
+
worst_result = child_result unless child_result == Comparison::EQUIVALENT
|
|
413
|
+
end
|
|
423
414
|
|
|
424
|
-
|
|
415
|
+
worst_result
|
|
425
416
|
end
|
|
426
417
|
|
|
427
418
|
# Compare attribute sets
|
|
@@ -500,7 +491,7 @@ module Canon
|
|
|
500
491
|
def should_preserve_whitespace_strictly?(n1, n2, opts)
|
|
501
492
|
# Check both n1 and n2 - if either is in a preserve whitespace element, preserve strictly
|
|
502
493
|
[n1, n2].each do |node|
|
|
503
|
-
next unless node.
|
|
494
|
+
next unless Canon::XmlParsing.xml_node?(node) || node.is_a?(Canon::Xml::Node)
|
|
504
495
|
|
|
505
496
|
parent = node.parent
|
|
506
497
|
next unless parent
|
|
@@ -516,15 +507,12 @@ module Canon
|
|
|
516
507
|
# Check if a node is inside a whitespace-preserving element
|
|
517
508
|
def in_preserve_element?(node, preserve_list)
|
|
518
509
|
current = node.parent
|
|
519
|
-
while current.
|
|
510
|
+
while Canon::XmlParsing.xml_node?(current) || current.is_a?(Canon::Xml::Node)
|
|
520
511
|
return true if preserve_list.include?(current.name.downcase)
|
|
521
512
|
|
|
522
|
-
|
|
523
|
-
break if current.is_a?(Nokogiri::XML::Document) ||
|
|
524
|
-
current.is_a?(Nokogiri::HTML4::Document) ||
|
|
525
|
-
current.is_a?(Nokogiri::HTML5::Document)
|
|
513
|
+
break if Canon::XmlParsing.document?(current)
|
|
526
514
|
|
|
527
|
-
current = current.parent
|
|
515
|
+
current = current.parent
|
|
528
516
|
break unless current
|
|
529
517
|
end
|
|
530
518
|
false
|
|
@@ -567,8 +555,8 @@ module Canon
|
|
|
567
555
|
return Comparison::UNEQUAL_NODES_TYPES
|
|
568
556
|
end
|
|
569
557
|
|
|
570
|
-
content1 =
|
|
571
|
-
content2 =
|
|
558
|
+
content1 = Canon::XmlParsing.xml_node?(n1) ? n1.content.to_s.strip : ""
|
|
559
|
+
content2 = Canon::XmlParsing.xml_node?(n2) ? n2.content.to_s.strip : ""
|
|
572
560
|
|
|
573
561
|
if content1 == content2
|
|
574
562
|
Comparison::EQUIVALENT
|
|
@@ -618,17 +606,19 @@ differences)
|
|
|
618
606
|
depth = 0
|
|
619
607
|
|
|
620
608
|
while current && depth < max_depth
|
|
621
|
-
if current.
|
|
622
|
-
|
|
623
|
-
|
|
609
|
+
n = if current.is_a?(Canon::Xml::Node)
|
|
610
|
+
current.name
|
|
611
|
+
elsif Canon::XmlParsing.xml_node?(current)
|
|
612
|
+
current.name
|
|
613
|
+
end
|
|
614
|
+
path.unshift(n) if n
|
|
624
615
|
|
|
625
|
-
break unless current.
|
|
616
|
+
break unless Canon::XmlParsing.xml_node?(current) || current.is_a?(Canon::Xml::Node)
|
|
626
617
|
|
|
627
618
|
current = current.parent
|
|
628
619
|
depth += 1
|
|
629
620
|
|
|
630
|
-
|
|
631
|
-
break if current.respond_to?(:root)
|
|
621
|
+
break if Canon::XmlParsing.document?(current)
|
|
632
622
|
end
|
|
633
623
|
|
|
634
624
|
path
|
|
@@ -665,8 +655,8 @@ differences)
|
|
|
665
655
|
# For deleted/inserted nodes, include namespace information if available
|
|
666
656
|
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
667
657
|
node = node1 || node2
|
|
668
|
-
if
|
|
669
|
-
ns =
|
|
658
|
+
if Canon::XmlParsing.xml_node?(node)
|
|
659
|
+
ns = Canon::XmlParsing.namespace_uri(node)
|
|
670
660
|
ns_info = if ns.nil? || ns.empty?
|
|
671
661
|
""
|
|
672
662
|
else
|
|
@@ -674,9 +664,8 @@ differences)
|
|
|
674
664
|
end
|
|
675
665
|
label = Canon::Comparison.code_pair_label(diff1, diff2)
|
|
676
666
|
return "element '#{node.name}'#{ns_info}: #{label}"
|
|
677
|
-
elsif node.
|
|
678
|
-
|
|
679
|
-
display = if node.respond_to?(:value) && node.node_type == :text
|
|
667
|
+
elsif node.is_a?(Canon::Xml::Node)
|
|
668
|
+
display = if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
680
669
|
"\"#{truncate_text(node.value)}\""
|
|
681
670
|
else
|
|
682
671
|
node.name.to_s
|
|
@@ -703,6 +692,10 @@ differences)
|
|
|
703
692
|
return build_whitespace_adjacency_reason(node1, node2)
|
|
704
693
|
end
|
|
705
694
|
|
|
695
|
+
if dimension == :comments
|
|
696
|
+
return build_comments_reason(node1, node2)
|
|
697
|
+
end
|
|
698
|
+
|
|
706
699
|
# For attribute values differences, show the actual values
|
|
707
700
|
if dimension == :attribute_values
|
|
708
701
|
attrs1 = extract_attributes(node1)
|
|
@@ -722,8 +715,8 @@ differences)
|
|
|
722
715
|
elsif dimension == :element_structure &&
|
|
723
716
|
diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
724
717
|
diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
725
|
-
(node1.is_a?(Canon::Xml::Node) ||
|
|
726
|
-
(node2.is_a?(Canon::Xml::Node) ||
|
|
718
|
+
(node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
|
|
719
|
+
(node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
|
|
727
720
|
node1.name && node2.name && node1.name != node2.name
|
|
728
721
|
# Most common case: differing element names. Surface the
|
|
729
722
|
# actual names rather than a generic "elements differ".
|
|
@@ -794,27 +787,16 @@ differences)
|
|
|
794
787
|
# @return [String, nil] Text content or nil
|
|
795
788
|
def extract_text_from_node(node)
|
|
796
789
|
return nil if node.nil?
|
|
797
|
-
|
|
798
|
-
# For Canon::Xml::Nodes::TextNode
|
|
799
|
-
return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
800
|
-
|
|
801
|
-
# For XML/HTML nodes with text_content method
|
|
802
|
-
return node.text_content if node.respond_to?(:text_content)
|
|
803
|
-
|
|
804
|
-
# For nodes with text method
|
|
805
|
-
return node.text if node.respond_to?(:text)
|
|
806
|
-
|
|
807
|
-
# For nodes with content method (Moxml::Text)
|
|
808
|
-
return node.content if node.respond_to?(:content)
|
|
809
|
-
|
|
810
|
-
# For nodes with value method (other types)
|
|
811
|
-
return node.value if node.respond_to?(:value)
|
|
812
|
-
|
|
813
|
-
# For simple text nodes or strings
|
|
814
790
|
return node.to_s if node.is_a?(String)
|
|
815
791
|
|
|
816
|
-
|
|
817
|
-
|
|
792
|
+
case node
|
|
793
|
+
when Canon::Xml::Nodes::TextNode
|
|
794
|
+
node.value
|
|
795
|
+
when Canon::Xml::Node
|
|
796
|
+
node.text_content
|
|
797
|
+
else
|
|
798
|
+
Canon::XmlParsing.xml_node?(node) ? Canon::XmlParsing.text_content(node).to_s : node.to_s
|
|
799
|
+
end
|
|
818
800
|
rescue StandardError
|
|
819
801
|
nil
|
|
820
802
|
end
|
|
@@ -934,6 +916,30 @@ differences)
|
|
|
934
916
|
false
|
|
935
917
|
end
|
|
936
918
|
|
|
919
|
+
# Build a Reason line for a +:comments+ diff (#144).
|
|
920
|
+
# Names the side that carries the comment and surfaces the
|
|
921
|
+
# comment text.
|
|
922
|
+
def build_comments_reason(node1, node2)
|
|
923
|
+
cm1 = node1 && NodeInspector.comment_node?(node1)
|
|
924
|
+
cm2 = node2 && NodeInspector.comment_node?(node2)
|
|
925
|
+
|
|
926
|
+
if cm1 && !cm2
|
|
927
|
+
"Comment present on EXPECTED only: <!--#{truncate_text(comment_text(node1))}-->"
|
|
928
|
+
elsif cm2 && !cm1
|
|
929
|
+
"Comment present on ACTUAL only: <!--#{truncate_text(comment_text(node2))}-->"
|
|
930
|
+
elsif cm1 && cm2
|
|
931
|
+
t1 = truncate_text(comment_text(node1))
|
|
932
|
+
t2 = truncate_text(comment_text(node2))
|
|
933
|
+
"Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
|
|
934
|
+
else
|
|
935
|
+
"element structure mismatch (children differ)"
|
|
936
|
+
end
|
|
937
|
+
end
|
|
938
|
+
|
|
939
|
+
def comment_text(node)
|
|
940
|
+
NodeInspector.text_content(node).to_s
|
|
941
|
+
end
|
|
942
|
+
|
|
937
943
|
# Check if text is only whitespace
|
|
938
944
|
#
|
|
939
945
|
# @param text [String] Text to check
|