canon 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +14 -71
- data/Rakefile +17 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +18 -29
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +146 -80
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +61 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +23 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +7 -41
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +5 -2
|
@@ -77,21 +77,22 @@ module Canon
|
|
|
77
77
|
# @return [Canon::Xml::Node] Converted node
|
|
78
78
|
def self.convert_from_node(node, preserve_whitespace: false,
|
|
79
79
|
parser: nil)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
if Canon::XmlBackend.nokogiri?
|
|
81
|
+
if node.is_a?(Nokogiri::XML::Node)
|
|
82
|
+
return Canon::Xml::DataModel.build_from_nokogiri(
|
|
83
|
+
node, preserve_whitespace: preserve_whitespace
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
elsif node.is_a?(Moxml::Node)
|
|
87
|
+
return Canon::Xml::DataModel.build_from_moxml(
|
|
83
88
|
node, preserve_whitespace: preserve_whitespace
|
|
84
89
|
)
|
|
85
90
|
end
|
|
86
91
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
node.to_xml
|
|
90
|
-
elsif node.respond_to?(:to_s)
|
|
91
|
-
node.to_s
|
|
92
|
+
xml_str = if node.is_a?(String)
|
|
93
|
+
node
|
|
92
94
|
else
|
|
93
|
-
|
|
94
|
-
"Unable to convert node to string: #{node.class}"
|
|
95
|
+
node.to_xml
|
|
95
96
|
end
|
|
96
97
|
|
|
97
98
|
resolved_parser = parser || resolve_parser_config
|
|
@@ -112,7 +113,7 @@ parser: nil)
|
|
|
112
113
|
def self.resolve_parser_config
|
|
113
114
|
Canon::Config.instance.xml.diff.parser
|
|
114
115
|
rescue StandardError
|
|
115
|
-
:sax
|
|
116
|
+
Canon::XmlBackend.nokogiri? ? :sax : :dom
|
|
116
117
|
end
|
|
117
118
|
end
|
|
118
119
|
end
|
|
@@ -7,90 +7,62 @@ module Canon
|
|
|
7
7
|
#
|
|
8
8
|
# Handles dispatching comparison logic based on node type.
|
|
9
9
|
# Supports both Canon::Xml::Node (with symbolic node_type) and
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
# This module encapsulates the complex node type detection and dispatch
|
|
13
|
-
# logic, making the main XmlComparator cleaner and more maintainable.
|
|
10
|
+
# backend nodes (Nokogiri/Moxml) via XmlParsing type checks.
|
|
14
11
|
module NodeTypeComparator
|
|
15
12
|
class << self
|
|
16
|
-
# Compare two nodes by dispatching to appropriate comparison method
|
|
17
|
-
#
|
|
18
|
-
# @param node1 [Object] First node
|
|
19
|
-
# @param node2 [Object] Second node
|
|
20
|
-
# @param comparator [XmlComparator] The comparator instance for method delegation
|
|
21
|
-
# @param opts [Hash] Comparison options
|
|
22
|
-
# @param child_opts [Hash] Options for child comparison
|
|
23
|
-
# @param diff_children [Boolean] Whether to diff children
|
|
24
|
-
# @param differences [Array] Array to collect differences
|
|
25
|
-
# @return [Integer] Comparison result code
|
|
26
13
|
def compare(node1, node2, comparator, opts, child_opts,
|
|
27
14
|
diff_children, differences)
|
|
28
|
-
|
|
29
|
-
# Canon::Xml::Node types use .node_type method that returns symbols
|
|
30
|
-
# Nokogiri also has .node_type but returns integers, so check for Symbol
|
|
31
|
-
if node1.respond_to?(:node_type) && node2.respond_to?(:node_type) &&
|
|
32
|
-
node1.node_type.is_a?(Symbol) && node2.node_type.is_a?(Symbol)
|
|
15
|
+
if node1.is_a?(Canon::Xml::Node) && node2.is_a?(Canon::Xml::Node)
|
|
33
16
|
compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
|
|
34
17
|
diff_children, differences)
|
|
35
|
-
# Moxml/Nokogiri types use .element?, .text?, etc. methods
|
|
36
18
|
else
|
|
37
|
-
|
|
38
|
-
|
|
19
|
+
compare_by_backend_type(node1, node2, comparator, opts, child_opts,
|
|
20
|
+
diff_children, differences)
|
|
39
21
|
end
|
|
40
22
|
end
|
|
41
23
|
|
|
42
24
|
private
|
|
43
25
|
|
|
44
|
-
# Compare nodes using symbolic node_type (Canon::Xml::Node)
|
|
45
26
|
def compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
|
|
46
27
|
diff_children, differences)
|
|
47
28
|
case node1.node_type
|
|
48
29
|
when :root
|
|
49
|
-
comparator.
|
|
50
|
-
|
|
30
|
+
comparator.compare_children(node1, node2, opts, child_opts,
|
|
31
|
+
diff_children, differences)
|
|
51
32
|
when :element
|
|
52
|
-
comparator.
|
|
53
|
-
|
|
33
|
+
comparator.compare_element_nodes(node1, node2, opts, child_opts,
|
|
34
|
+
diff_children, differences)
|
|
54
35
|
when :text
|
|
55
|
-
comparator.
|
|
56
|
-
differences)
|
|
36
|
+
comparator.compare_text_nodes(node1, node2, opts, differences)
|
|
57
37
|
when :comment
|
|
58
|
-
comparator.
|
|
59
|
-
differences)
|
|
38
|
+
comparator.compare_comment_nodes(node1, node2, opts, differences)
|
|
60
39
|
when :cdata
|
|
61
|
-
comparator.
|
|
62
|
-
differences)
|
|
40
|
+
comparator.compare_text_nodes(node1, node2, opts, differences)
|
|
63
41
|
when :processing_instruction
|
|
64
|
-
comparator.
|
|
65
|
-
|
|
42
|
+
comparator.compare_processing_instruction_nodes(node1, node2, opts,
|
|
43
|
+
differences)
|
|
66
44
|
else
|
|
67
45
|
Comparison::EQUIVALENT
|
|
68
46
|
end
|
|
69
47
|
end
|
|
70
48
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
elsif
|
|
81
|
-
comparator.
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
|
|
89
|
-
differences)
|
|
90
|
-
elsif node1.respond_to?(:root)
|
|
91
|
-
# Document node (Moxml/Nokogiri - legacy path)
|
|
92
|
-
comparator.send(:compare_document_nodes, node1, node2, opts, child_opts,
|
|
93
|
-
diff_children, differences)
|
|
49
|
+
def compare_by_backend_type(node1, node2, comparator, opts, child_opts,
|
|
50
|
+
diff_children, differences)
|
|
51
|
+
if Canon::XmlParsing.element?(node1)
|
|
52
|
+
comparator.compare_element_nodes(node1, node2, opts, child_opts,
|
|
53
|
+
diff_children, differences)
|
|
54
|
+
elsif Canon::XmlParsing.text_node?(node1)
|
|
55
|
+
comparator.compare_text_nodes(node1, node2, opts, differences)
|
|
56
|
+
elsif Canon::XmlParsing.comment?(node1)
|
|
57
|
+
comparator.compare_comment_nodes(node1, node2, opts, differences)
|
|
58
|
+
elsif Canon::XmlParsing.cdata?(node1)
|
|
59
|
+
comparator.compare_text_nodes(node1, node2, opts, differences)
|
|
60
|
+
elsif Canon::XmlParsing.processing_instruction?(node1)
|
|
61
|
+
comparator.compare_processing_instruction_nodes(node1, node2, opts,
|
|
62
|
+
differences)
|
|
63
|
+
elsif Canon::XmlParsing.document?(node1)
|
|
64
|
+
comparator.compare_document_nodes(node1, node2, opts, child_opts,
|
|
65
|
+
diff_children, differences)
|
|
94
66
|
else
|
|
95
67
|
Comparison::EQUIVALENT
|
|
96
68
|
end
|
|
@@ -122,16 +122,8 @@ module Canon
|
|
|
122
122
|
preserve_whitespace: preserve_whitespace)
|
|
123
123
|
|
|
124
124
|
# Store original strings for line diff display (before preprocessing)
|
|
125
|
-
original1 =
|
|
126
|
-
|
|
127
|
-
else
|
|
128
|
-
(n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
|
|
129
|
-
end
|
|
130
|
-
original2 = if n2.is_a?(String)
|
|
131
|
-
n2
|
|
132
|
-
else
|
|
133
|
-
(n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
|
|
134
|
-
end
|
|
125
|
+
original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
|
|
126
|
+
original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
|
|
135
127
|
|
|
136
128
|
differences = []
|
|
137
129
|
diff_children = opts[:diff_children] || false
|
|
@@ -187,16 +179,9 @@ module Canon
|
|
|
187
179
|
# @return [Boolean, ComparisonResult] Result of tree diff comparison
|
|
188
180
|
def perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
|
|
189
181
|
# Store original strings for line diff display (before preprocessing)
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
(n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
|
|
194
|
-
end
|
|
195
|
-
original2 = if n2.is_a?(String)
|
|
196
|
-
n2
|
|
197
|
-
else
|
|
198
|
-
(n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
|
|
199
|
-
end
|
|
182
|
+
# Store original strings for line diff display (before preprocessing)
|
|
183
|
+
original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
|
|
184
|
+
original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
|
|
200
185
|
|
|
201
186
|
# Parse to Canon::Xml::Node (preserves preprocessing)
|
|
202
187
|
node1 = parse_node(n1, match_opts_hash[:preprocessing])
|
|
@@ -262,20 +247,8 @@ module Canon
|
|
|
262
247
|
serialize_node(node1).gsub("><", ">\n<"),
|
|
263
248
|
serialize_node(node2).gsub("><", ">\n<"),
|
|
264
249
|
]
|
|
265
|
-
original1 =
|
|
266
|
-
|
|
267
|
-
elsif n1.respond_to?(:to_xml)
|
|
268
|
-
n1.to_xml
|
|
269
|
-
else
|
|
270
|
-
n1.to_s
|
|
271
|
-
end
|
|
272
|
-
original2 = if n2.is_a?(String)
|
|
273
|
-
n2
|
|
274
|
-
elsif n2.respond_to?(:to_xml)
|
|
275
|
-
n2.to_xml
|
|
276
|
-
else
|
|
277
|
-
n2.to_s
|
|
278
|
-
end
|
|
250
|
+
original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
|
|
251
|
+
original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
|
|
279
252
|
|
|
280
253
|
ComparisonResult.new(
|
|
281
254
|
differences: [],
|
|
@@ -289,14 +262,20 @@ module Canon
|
|
|
289
262
|
|
|
290
263
|
public
|
|
291
264
|
|
|
265
|
+
# Public parsing API for external callers
|
|
266
|
+
def parse(node, preprocessing = :none, preserve_whitespace: false)
|
|
267
|
+
parse_node(node, preprocessing,
|
|
268
|
+
preserve_whitespace: preserve_whitespace)
|
|
269
|
+
end
|
|
270
|
+
|
|
292
271
|
# Main comparison dispatcher
|
|
293
272
|
def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
|
|
294
273
|
# FAST PATH: Object identity - same object is always equivalent
|
|
295
274
|
return Comparison::EQUIVALENT if n1.equal?(n2)
|
|
296
275
|
|
|
297
276
|
# Handle DocumentFragment nodes - compare their children instead
|
|
298
|
-
if
|
|
299
|
-
|
|
277
|
+
if Canon::XmlParsing.document_fragment?(n1) &&
|
|
278
|
+
Canon::XmlParsing.document_fragment?(n2)
|
|
300
279
|
children1 = n1.children.to_a
|
|
301
280
|
children2 = n2.children.to_a
|
|
302
281
|
|
|
@@ -392,8 +371,8 @@ module Canon
|
|
|
392
371
|
end
|
|
393
372
|
|
|
394
373
|
# Compare namespace URIs - elements with different namespaces are different elements
|
|
395
|
-
ns1 =
|
|
396
|
-
ns2 =
|
|
374
|
+
ns1 = Canon::XmlParsing.namespace_uri(n1)
|
|
375
|
+
ns2 = Canon::XmlParsing.namespace_uri(n2)
|
|
397
376
|
|
|
398
377
|
unless ns1 == ns2
|
|
399
378
|
# Create descriptive reason showing the actual namespace URIs
|
|
@@ -410,18 +389,30 @@ module Canon
|
|
|
410
389
|
return Comparison::UNEQUAL_ELEMENTS
|
|
411
390
|
end
|
|
412
391
|
|
|
392
|
+
# Track the worst result across namespace, attribute, and children
|
|
393
|
+
# comparisons. Do NOT return early on attribute/namespace mismatches —
|
|
394
|
+
# children must still be compared so structural differences in the
|
|
395
|
+
# subtree are reported. Early returns caused the comparator to skip
|
|
396
|
+
# entire subtrees when a root or intermediate element had different
|
|
397
|
+
# attributes, missing all nested structural changes.
|
|
398
|
+
worst_result = Comparison::EQUIVALENT
|
|
399
|
+
|
|
413
400
|
# Compare namespace declarations (xmlns and xmlns:* attributes)
|
|
414
401
|
ns_result = compare_namespace_declarations(n1, n2, opts, differences)
|
|
415
|
-
|
|
402
|
+
worst_result = ns_result unless ns_result == Comparison::EQUIVALENT
|
|
416
403
|
|
|
417
404
|
# Compare attributes
|
|
418
405
|
attr_result = compare_attribute_sets(n1, n2, opts, differences)
|
|
419
|
-
|
|
406
|
+
worst_result = attr_result unless attr_result == Comparison::EQUIVALENT
|
|
420
407
|
|
|
421
408
|
# Compare children if not ignored
|
|
422
|
-
|
|
409
|
+
unless opts[:ignore_children]
|
|
410
|
+
child_result = compare_children(n1, n2, opts, child_opts,
|
|
411
|
+
diff_children, differences)
|
|
412
|
+
worst_result = child_result unless child_result == Comparison::EQUIVALENT
|
|
413
|
+
end
|
|
423
414
|
|
|
424
|
-
|
|
415
|
+
worst_result
|
|
425
416
|
end
|
|
426
417
|
|
|
427
418
|
# Compare attribute sets
|
|
@@ -500,7 +491,7 @@ module Canon
|
|
|
500
491
|
def should_preserve_whitespace_strictly?(n1, n2, opts)
|
|
501
492
|
# Check both n1 and n2 - if either is in a preserve whitespace element, preserve strictly
|
|
502
493
|
[n1, n2].each do |node|
|
|
503
|
-
next unless node.
|
|
494
|
+
next unless Canon::XmlParsing.xml_node?(node) || node.is_a?(Canon::Xml::Node)
|
|
504
495
|
|
|
505
496
|
parent = node.parent
|
|
506
497
|
next unless parent
|
|
@@ -516,15 +507,12 @@ module Canon
|
|
|
516
507
|
# Check if a node is inside a whitespace-preserving element
|
|
517
508
|
def in_preserve_element?(node, preserve_list)
|
|
518
509
|
current = node.parent
|
|
519
|
-
while current.
|
|
510
|
+
while Canon::XmlParsing.xml_node?(current) || current.is_a?(Canon::Xml::Node)
|
|
520
511
|
return true if preserve_list.include?(current.name.downcase)
|
|
521
512
|
|
|
522
|
-
|
|
523
|
-
break if current.is_a?(Nokogiri::XML::Document) ||
|
|
524
|
-
current.is_a?(Nokogiri::HTML4::Document) ||
|
|
525
|
-
current.is_a?(Nokogiri::HTML5::Document)
|
|
513
|
+
break if Canon::XmlParsing.document?(current)
|
|
526
514
|
|
|
527
|
-
current = current.parent
|
|
515
|
+
current = current.parent
|
|
528
516
|
break unless current
|
|
529
517
|
end
|
|
530
518
|
false
|
|
@@ -567,8 +555,8 @@ module Canon
|
|
|
567
555
|
return Comparison::UNEQUAL_NODES_TYPES
|
|
568
556
|
end
|
|
569
557
|
|
|
570
|
-
content1 =
|
|
571
|
-
content2 =
|
|
558
|
+
content1 = Canon::XmlParsing.xml_node?(n1) ? n1.content.to_s.strip : ""
|
|
559
|
+
content2 = Canon::XmlParsing.xml_node?(n2) ? n2.content.to_s.strip : ""
|
|
572
560
|
|
|
573
561
|
if content1 == content2
|
|
574
562
|
Comparison::EQUIVALENT
|
|
@@ -618,17 +606,19 @@ differences)
|
|
|
618
606
|
depth = 0
|
|
619
607
|
|
|
620
608
|
while current && depth < max_depth
|
|
621
|
-
if current.
|
|
622
|
-
|
|
623
|
-
|
|
609
|
+
n = if current.is_a?(Canon::Xml::Node)
|
|
610
|
+
current.name
|
|
611
|
+
elsif Canon::XmlParsing.xml_node?(current)
|
|
612
|
+
current.name
|
|
613
|
+
end
|
|
614
|
+
path.unshift(n) if n
|
|
624
615
|
|
|
625
|
-
break unless current.
|
|
616
|
+
break unless Canon::XmlParsing.xml_node?(current) || current.is_a?(Canon::Xml::Node)
|
|
626
617
|
|
|
627
618
|
current = current.parent
|
|
628
619
|
depth += 1
|
|
629
620
|
|
|
630
|
-
|
|
631
|
-
break if current.respond_to?(:root)
|
|
621
|
+
break if Canon::XmlParsing.document?(current)
|
|
632
622
|
end
|
|
633
623
|
|
|
634
624
|
path
|
|
@@ -665,8 +655,8 @@ differences)
|
|
|
665
655
|
# For deleted/inserted nodes, include namespace information if available
|
|
666
656
|
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
667
657
|
node = node1 || node2
|
|
668
|
-
if
|
|
669
|
-
ns =
|
|
658
|
+
if Canon::XmlParsing.xml_node?(node)
|
|
659
|
+
ns = Canon::XmlParsing.namespace_uri(node)
|
|
670
660
|
ns_info = if ns.nil? || ns.empty?
|
|
671
661
|
""
|
|
672
662
|
else
|
|
@@ -674,9 +664,8 @@ differences)
|
|
|
674
664
|
end
|
|
675
665
|
label = Canon::Comparison.code_pair_label(diff1, diff2)
|
|
676
666
|
return "element '#{node.name}'#{ns_info}: #{label}"
|
|
677
|
-
elsif node.
|
|
678
|
-
|
|
679
|
-
display = if node.respond_to?(:value) && node.node_type == :text
|
|
667
|
+
elsif node.is_a?(Canon::Xml::Node)
|
|
668
|
+
display = if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
680
669
|
"\"#{truncate_text(node.value)}\""
|
|
681
670
|
else
|
|
682
671
|
node.name.to_s
|
|
@@ -726,8 +715,8 @@ differences)
|
|
|
726
715
|
elsif dimension == :element_structure &&
|
|
727
716
|
diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
728
717
|
diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
729
|
-
(node1.is_a?(Canon::Xml::Node) ||
|
|
730
|
-
(node2.is_a?(Canon::Xml::Node) ||
|
|
718
|
+
(node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
|
|
719
|
+
(node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
|
|
731
720
|
node1.name && node2.name && node1.name != node2.name
|
|
732
721
|
# Most common case: differing element names. Surface the
|
|
733
722
|
# actual names rather than a generic "elements differ".
|
|
@@ -798,27 +787,16 @@ differences)
|
|
|
798
787
|
# @return [String, nil] Text content or nil
|
|
799
788
|
def extract_text_from_node(node)
|
|
800
789
|
return nil if node.nil?
|
|
801
|
-
|
|
802
|
-
# For Canon::Xml::Nodes::TextNode
|
|
803
|
-
return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
804
|
-
|
|
805
|
-
# For XML/HTML nodes with text_content method
|
|
806
|
-
return node.text_content if node.respond_to?(:text_content)
|
|
807
|
-
|
|
808
|
-
# For nodes with text method
|
|
809
|
-
return node.text if node.respond_to?(:text)
|
|
810
|
-
|
|
811
|
-
# For nodes with content method (Moxml::Text)
|
|
812
|
-
return node.content if node.respond_to?(:content)
|
|
813
|
-
|
|
814
|
-
# For nodes with value method (other types)
|
|
815
|
-
return node.value if node.respond_to?(:value)
|
|
816
|
-
|
|
817
|
-
# For simple text nodes or strings
|
|
818
790
|
return node.to_s if node.is_a?(String)
|
|
819
791
|
|
|
820
|
-
|
|
821
|
-
|
|
792
|
+
case node
|
|
793
|
+
when Canon::Xml::Nodes::TextNode
|
|
794
|
+
node.value
|
|
795
|
+
when Canon::Xml::Node
|
|
796
|
+
node.text_content
|
|
797
|
+
else
|
|
798
|
+
Canon::XmlParsing.xml_node?(node) ? Canon::XmlParsing.text_content(node).to_s : node.to_s
|
|
799
|
+
end
|
|
822
800
|
rescue StandardError
|
|
823
801
|
nil
|
|
824
802
|
end
|
|
@@ -27,8 +27,8 @@ module Canon
|
|
|
27
27
|
def self.compare_nodes(node1, node2, opts, child_opts, diff_children,
|
|
28
28
|
differences)
|
|
29
29
|
# Handle DocumentFragment nodes - compare their children instead
|
|
30
|
-
if
|
|
31
|
-
|
|
30
|
+
if Canon::XmlParsing.document_fragment?(node1) &&
|
|
31
|
+
Canon::XmlParsing.document_fragment?(node2)
|
|
32
32
|
return compare_document_fragments(node1, node2, opts, child_opts,
|
|
33
33
|
diff_children, differences)
|
|
34
34
|
end
|
|
@@ -285,10 +285,14 @@ diff_children, differences)
|
|
|
285
285
|
return false if node1.class != node2.class
|
|
286
286
|
|
|
287
287
|
case node1
|
|
288
|
-
when Canon::Xml::Node
|
|
288
|
+
when Canon::Xml::Node
|
|
289
289
|
node1.node_type == node2.node_type
|
|
290
290
|
else
|
|
291
|
-
|
|
291
|
+
if Canon::XmlBackend.nokogiri?
|
|
292
|
+
node1.is_a?(Nokogiri::XML::Node) && node1.node_type == node2.node_type
|
|
293
|
+
else
|
|
294
|
+
Canon::XmlParsing.xml_node?(node1) && Canon::XmlParsing.node_type(node1) == Canon::XmlParsing.node_type(node2)
|
|
295
|
+
end
|
|
292
296
|
end
|
|
293
297
|
end
|
|
294
298
|
|
|
@@ -305,7 +309,7 @@ diff_children, differences)
|
|
|
305
309
|
def self.comment_node?(node, check_children: false)
|
|
306
310
|
return true if NodeInspector.comment_node?(node)
|
|
307
311
|
|
|
308
|
-
if check_children &&
|
|
312
|
+
if check_children && Canon::XmlParsing.element?(node) && !Canon::XmlParsing.children(node).empty?
|
|
309
313
|
node.children.any? { |child| NodeInspector.comment_node?(child) }
|
|
310
314
|
else
|
|
311
315
|
false
|
|
@@ -360,24 +364,20 @@ diff_children, differences)
|
|
|
360
364
|
# Dispatch by legacy Nokogiri/Moxml node type
|
|
361
365
|
def self.dispatch_legacy_node_type(node1, node2, opts, child_opts,
|
|
362
366
|
diff_children, differences)
|
|
363
|
-
# Import XmlComparator to use its comparison methods
|
|
364
367
|
require_relative "xml_comparator"
|
|
365
368
|
|
|
366
|
-
|
|
367
|
-
when Nokogiri::XML::Document
|
|
369
|
+
if Canon::XmlParsing.document?(node1)
|
|
368
370
|
XmlComparator.compare_document_nodes(node1, node2, opts, child_opts,
|
|
369
371
|
diff_children, differences)
|
|
370
|
-
|
|
371
|
-
if
|
|
372
|
+
elsif Canon::XmlParsing.xml_node?(node1)
|
|
373
|
+
if Canon::XmlParsing.element?(node1)
|
|
372
374
|
XmlComparator.compare_element_nodes(node1, node2, opts, child_opts,
|
|
373
375
|
diff_children, differences)
|
|
374
|
-
elsif node1.
|
|
376
|
+
elsif Canon::XmlParsing.text_node?(node1) || Canon::XmlParsing.cdata?(node1)
|
|
375
377
|
XmlComparator.compare_text_nodes(node1, node2, opts, differences)
|
|
376
|
-
elsif
|
|
378
|
+
elsif Canon::XmlParsing.comment?(node1)
|
|
377
379
|
XmlComparator.compare_comment_nodes(node1, node2, opts, differences)
|
|
378
|
-
elsif
|
|
379
|
-
XmlComparator.compare_text_nodes(node1, node2, opts, differences)
|
|
380
|
-
elsif node1.processing_instruction?
|
|
380
|
+
elsif Canon::XmlParsing.processing_instruction?(node1)
|
|
381
381
|
XmlComparator.compare_processing_instruction_nodes(node1, node2,
|
|
382
382
|
opts, differences)
|
|
383
383
|
else
|
|
@@ -27,6 +27,14 @@ module Canon
|
|
|
27
27
|
}.freeze
|
|
28
28
|
|
|
29
29
|
class << self
|
|
30
|
+
# Parse YAML from string or return as-is
|
|
31
|
+
#
|
|
32
|
+
# @param obj [String, Hash, Array] YAML string or parsed object
|
|
33
|
+
# @return [Object] Parsed YAML object
|
|
34
|
+
def parse(obj)
|
|
35
|
+
parse_yaml(obj)
|
|
36
|
+
end
|
|
37
|
+
|
|
30
38
|
# Compare two YAML objects for equivalence
|
|
31
39
|
#
|
|
32
40
|
# @param yaml1 [String, Hash, Array] First YAML
|
data/lib/canon/comparison.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "moxml"
|
|
4
|
-
require "nokogiri"
|
|
4
|
+
require "nokogiri" if Canon::XmlBackend.nokogiri?
|
|
5
5
|
require_relative "xml/whitespace_normalizer"
|
|
6
6
|
require_relative "comparison/xml_comparator"
|
|
7
7
|
require_relative "comparison/html_comparator"
|
|
@@ -316,7 +316,8 @@ module Canon
|
|
|
316
316
|
|
|
317
317
|
# Get global config options if not defined in opts
|
|
318
318
|
# This is needed because semantic_diff doesn't go through dom_diff's config handling
|
|
319
|
-
if !(opts[:match_profile] || opts[:global_options]) &&
|
|
319
|
+
if !(opts[:match_profile] || opts[:global_options]) && %i[xml html json
|
|
320
|
+
yaml string].include?(format1)
|
|
320
321
|
format_config = Canon::Config.instance.public_send(format1)
|
|
321
322
|
if format_config.match.profile
|
|
322
323
|
opts[:match_profile] =
|
|
@@ -333,7 +334,8 @@ module Canon
|
|
|
333
334
|
|
|
334
335
|
# Also read diff options from config (e.g., max_node_count for large documents)
|
|
335
336
|
# This is independent of match options and needs to be passed to TreeDiffIntegrator
|
|
336
|
-
if !match_opts_hash[:max_node_count] &&
|
|
337
|
+
if !match_opts_hash[:max_node_count] && %i[xml html json yaml
|
|
338
|
+
string].include?(format1)
|
|
337
339
|
diff_max_node = Canon::Config.instance.public_send(format1).diff.max_node_count
|
|
338
340
|
if diff_max_node > 10_000
|
|
339
341
|
match_opts_hash[:max_node_count] =
|
|
@@ -564,43 +566,39 @@ module Canon
|
|
|
564
566
|
|
|
565
567
|
case format
|
|
566
568
|
when :xml
|
|
567
|
-
# Delegate to XmlComparator's
|
|
568
|
-
# Adapter now handles Canon::Xml::Node directly
|
|
569
|
+
# Delegate to XmlComparator's parse - returns Canon::Xml::Node
|
|
569
570
|
doc1 = parse_with_cache(obj1, format, preprocessing) do |doc|
|
|
570
|
-
XmlComparator.
|
|
571
|
+
XmlComparator.parse(doc, preprocessing)
|
|
571
572
|
end
|
|
572
573
|
doc2 = parse_with_cache(obj2, format, preprocessing) do |doc|
|
|
573
|
-
XmlComparator.
|
|
574
|
+
XmlComparator.parse(doc, preprocessing)
|
|
574
575
|
end
|
|
575
576
|
[doc1, doc2]
|
|
576
577
|
when :html, :html4, :html5
|
|
577
|
-
# Delegate to HtmlComparator's parse_node_for_semantic for Canon::Xml::Node
|
|
578
578
|
[
|
|
579
579
|
parse_with_cache(obj1, format, preprocessing) do |doc|
|
|
580
|
-
HtmlComparator.
|
|
580
|
+
HtmlComparator.parse(doc, preprocessing)
|
|
581
581
|
end,
|
|
582
582
|
parse_with_cache(obj2, format, preprocessing) do |doc|
|
|
583
|
-
HtmlComparator.
|
|
583
|
+
HtmlComparator.parse(doc, preprocessing)
|
|
584
584
|
end,
|
|
585
585
|
]
|
|
586
586
|
when :json
|
|
587
|
-
# Delegate to JsonComparator's parse_json
|
|
588
587
|
[
|
|
589
588
|
parse_with_cache(obj1, format, :none) do |doc|
|
|
590
|
-
JsonComparator.
|
|
589
|
+
JsonComparator.parse(doc)
|
|
591
590
|
end,
|
|
592
591
|
parse_with_cache(obj2, format, :none) do |doc|
|
|
593
|
-
JsonComparator.
|
|
592
|
+
JsonComparator.parse(doc)
|
|
594
593
|
end,
|
|
595
594
|
]
|
|
596
595
|
when :yaml
|
|
597
|
-
# Delegate to YamlComparator's parse_yaml
|
|
598
596
|
[
|
|
599
597
|
parse_with_cache(obj1, format, :none) do |doc|
|
|
600
|
-
YamlComparator.
|
|
598
|
+
YamlComparator.parse(doc)
|
|
601
599
|
end,
|
|
602
600
|
parse_with_cache(obj2, format, :none) do |doc|
|
|
603
|
-
YamlComparator.
|
|
601
|
+
YamlComparator.parse(doc)
|
|
604
602
|
end,
|
|
605
603
|
]
|
|
606
604
|
else
|
|
@@ -651,12 +649,10 @@ module Canon
|
|
|
651
649
|
obj
|
|
652
650
|
when Nokogiri::XML::Document, Nokogiri::HTML::Document,
|
|
653
651
|
Nokogiri::XML::DocumentFragment, Nokogiri::HTML::DocumentFragment
|
|
654
|
-
obj.
|
|
652
|
+
obj.to_html
|
|
655
653
|
else
|
|
656
|
-
if obj.
|
|
657
|
-
obj
|
|
658
|
-
elsif obj.respond_to?(:to_xml)
|
|
659
|
-
obj.to_xml
|
|
654
|
+
if Canon::XmlParsing.xml_node?(obj) || obj.is_a?(Canon::Xml::Node)
|
|
655
|
+
Canon::XmlParsing.serialize(obj)
|
|
660
656
|
else
|
|
661
657
|
obj.to_s
|
|
662
658
|
end
|
|
@@ -667,7 +663,11 @@ module Canon
|
|
|
667
663
|
def serialize_document(doc, format)
|
|
668
664
|
case format
|
|
669
665
|
when :xml, :html, :html4, :html5
|
|
670
|
-
|
|
666
|
+
if Canon::XmlParsing.xml_node?(doc) || doc.is_a?(Canon::Xml::Node)
|
|
667
|
+
Canon::XmlParsing.serialize(doc)
|
|
668
|
+
else
|
|
669
|
+
doc.to_s
|
|
670
|
+
end
|
|
671
671
|
when :json
|
|
672
672
|
require "json"
|
|
673
673
|
JSON.pretty_generate(doc)
|
|
@@ -750,7 +750,7 @@ module Canon
|
|
|
750
750
|
|
|
751
751
|
# get match_profile if it is not defined in options
|
|
752
752
|
# but defined in config
|
|
753
|
-
if
|
|
753
|
+
if %i[xml html json yaml string].include?(comparison_format)
|
|
754
754
|
format_config = Canon::Config.instance.public_send(comparison_format)
|
|
755
755
|
if opts[:global_profile].nil? && format_config.match.profile
|
|
756
756
|
# Config-sourced profile has *global* priority (applied before
|