canon 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +12 -22
- data/Rakefile +5 -2
- data/lib/canon/cache.rb +3 -1
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +18 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +143 -266
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +0 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +0 -4
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +0 -1
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +11 -9
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +0 -10
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +0 -8
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +0 -2
- data/lib/canon/validators/html_validator.rb +0 -1
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +0 -1
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +0 -10
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +0 -7
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +4 -2
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +24 -11
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../node_inspector"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
4
|
module Comparison
|
|
7
5
|
module XmlComparatorHelpers
|
|
@@ -38,7 +36,6 @@ module Canon
|
|
|
38
36
|
# pretty_printed_received → drop \n-starting whitespace nodes from node2
|
|
39
37
|
# The ephemeral _pretty_print_side_active flag is consumed by node_excluded?
|
|
40
38
|
# and must NOT be forwarded into recursive compare_nodes calls.
|
|
41
|
-
require_relative "../xml_node_comparison"
|
|
42
39
|
opts1 = XmlNodeComparison.opts_for_side(opts, :expected)
|
|
43
40
|
opts2 = XmlNodeComparison.opts_for_side(opts, :received)
|
|
44
41
|
|
|
@@ -78,9 +75,6 @@ module Canon
|
|
|
78
75
|
# Use ElementMatcher for semantic comparison
|
|
79
76
|
def use_element_matcher_comparison(children1, children2, parent_node, comparator,
|
|
80
77
|
opts, child_opts, diff_children, differences)
|
|
81
|
-
require_relative "../../xml/element_matcher"
|
|
82
|
-
require_relative "../../xml/nodes/root_node"
|
|
83
|
-
|
|
84
78
|
# Create temporary RootNode wrappers
|
|
85
79
|
temp_root1 = Canon::Xml::Nodes::RootNode.new
|
|
86
80
|
temp_root1.children = children1.dup
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../../xml/namespace_helper"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
4
|
module Comparison
|
|
7
5
|
module XmlComparatorHelpers
|
|
@@ -153,10 +151,7 @@ changed, opts, differences)
|
|
|
153
151
|
end.join(', ')}"
|
|
154
152
|
end
|
|
155
153
|
|
|
156
|
-
|
|
157
|
-
require_relative "diff_node_builder"
|
|
158
|
-
|
|
159
|
-
diff_node = DiffNodeBuilder.build(
|
|
154
|
+
diff_node = Canon::Comparison::DiffNodeBuilder.build(
|
|
160
155
|
node1: node1,
|
|
161
156
|
node2: node2,
|
|
162
157
|
diff1: Comparison::UNEQUAL_ATTRIBUTES,
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../../xml/c14n"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
4
|
module Comparison
|
|
7
5
|
module XmlComparatorHelpers
|
|
@@ -38,7 +36,6 @@ module Canon
|
|
|
38
36
|
resolved_parser = parser || resolve_parser_config
|
|
39
37
|
|
|
40
38
|
if resolved_parser == :sax && RUBY_ENGINE != "opal"
|
|
41
|
-
require_relative "../../xml/sax_builder"
|
|
42
39
|
Canon::Xml::SaxBuilder.parse(xml_string,
|
|
43
40
|
preserve_whitespace: preserve_whitespace)
|
|
44
41
|
else
|
|
@@ -98,7 +95,6 @@ parser: nil)
|
|
|
98
95
|
resolved_parser = parser || resolve_parser_config
|
|
99
96
|
|
|
100
97
|
if resolved_parser == :sax && RUBY_ENGINE != "opal"
|
|
101
|
-
require_relative "../../xml/sax_builder"
|
|
102
98
|
Canon::Xml::SaxBuilder.parse(xml_str,
|
|
103
99
|
preserve_whitespace: preserve_whitespace)
|
|
104
100
|
else
|
|
@@ -1,26 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../xml/c14n"
|
|
4
|
-
require_relative "markup_comparator"
|
|
5
|
-
require_relative "match_options"
|
|
6
|
-
require_relative "../diff/diff_node"
|
|
7
|
-
require_relative "../diff/diff_classifier"
|
|
8
|
-
require_relative "../diff/path_builder"
|
|
9
|
-
require_relative "../diff/node_serializer"
|
|
10
|
-
require_relative "comparison_result"
|
|
11
|
-
require_relative "../tree_diff"
|
|
12
|
-
require_relative "strategies/match_strategy_factory"
|
|
13
|
-
# XmlComparator modules
|
|
14
|
-
require_relative "xml_comparator/node_parser"
|
|
15
|
-
require_relative "xml_comparator/attribute_filter"
|
|
16
|
-
require_relative "xml_comparator/attribute_comparator"
|
|
17
|
-
require_relative "xml_comparator/namespace_comparator"
|
|
18
|
-
require_relative "xml_comparator/node_type_comparator"
|
|
19
|
-
require_relative "xml_comparator/child_comparison"
|
|
20
|
-
require_relative "xml_comparator/diff_node_builder"
|
|
21
|
-
# Whitespace sensitivity module
|
|
22
|
-
require_relative "whitespace_sensitivity"
|
|
23
|
-
|
|
24
3
|
module Canon
|
|
25
4
|
module Comparison
|
|
26
5
|
# XML comparison class
|
|
@@ -64,8 +43,7 @@ module Canon
|
|
|
64
43
|
# verbose
|
|
65
44
|
def equivalent?(n1, n2, opts = {}, child_opts = {})
|
|
66
45
|
# FAST PATH: Object identity - same object is always equivalent
|
|
67
|
-
|
|
68
|
-
if n1.equal?(n2) && !opts.dig(:match, :semantic_diff)
|
|
46
|
+
if n1.equal?(n2)
|
|
69
47
|
return build_trivial_equivalent_result(n1, n2, opts)
|
|
70
48
|
end
|
|
71
49
|
|
|
@@ -96,11 +74,6 @@ module Canon
|
|
|
96
74
|
# Store resolved match options hash for use in comparison logic
|
|
97
75
|
opts[:match_opts] = match_opts_hash
|
|
98
76
|
|
|
99
|
-
# Use tree diff if semantic_diff option is enabled
|
|
100
|
-
if match_opts.semantic_diff?
|
|
101
|
-
return perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
|
|
102
|
-
end
|
|
103
|
-
|
|
104
77
|
# Create child_opts with resolved options
|
|
105
78
|
child_opts = opts.merge(child_opts)
|
|
106
79
|
|
|
@@ -170,54 +143,6 @@ module Canon
|
|
|
170
143
|
|
|
171
144
|
private
|
|
172
145
|
|
|
173
|
-
# Perform semantic tree diff using SemanticTreeMatchStrategy
|
|
174
|
-
#
|
|
175
|
-
# @param n1 [String, Moxml::Node] First node
|
|
176
|
-
# @param n2 [String, Moxml::Node] Second node
|
|
177
|
-
# @param opts [Hash] Comparison options
|
|
178
|
-
# @param match_opts_hash [Hash] Resolved match options
|
|
179
|
-
# @return [Boolean, ComparisonResult] Result of tree diff comparison
|
|
180
|
-
def perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
|
|
181
|
-
# Store original strings for line diff display (before preprocessing)
|
|
182
|
-
# Store original strings for line diff display (before preprocessing)
|
|
183
|
-
original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
|
|
184
|
-
original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
|
|
185
|
-
|
|
186
|
-
# Parse to Canon::Xml::Node (preserves preprocessing)
|
|
187
|
-
node1 = parse_node(n1, match_opts_hash[:preprocessing])
|
|
188
|
-
node2 = parse_node(n2, match_opts_hash[:preprocessing])
|
|
189
|
-
|
|
190
|
-
# Create strategy using factory
|
|
191
|
-
strategy = Strategies::MatchStrategyFactory.create(
|
|
192
|
-
format: :xml,
|
|
193
|
-
match_options: match_opts_hash,
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
# Pass Canon::Xml::Node directly - XML adapter now handles it
|
|
197
|
-
differences = strategy.match(node1, node2)
|
|
198
|
-
|
|
199
|
-
# Return based on verbose mode
|
|
200
|
-
if opts[:verbose]
|
|
201
|
-
# Get preprocessed strings for display
|
|
202
|
-
preprocessed = strategy.preprocess_for_display(node1, node2)
|
|
203
|
-
|
|
204
|
-
# Return ComparisonResult with strategy metadata
|
|
205
|
-
ComparisonResult.new(
|
|
206
|
-
differences: differences,
|
|
207
|
-
preprocessed_strings: preprocessed,
|
|
208
|
-
original_strings: [original1, original2],
|
|
209
|
-
format: :xml,
|
|
210
|
-
match_options: match_opts_hash.merge(strategy.metadata),
|
|
211
|
-
algorithm: :semantic,
|
|
212
|
-
parse_errors_expected: Comparison.parse_errors_for(node1),
|
|
213
|
-
parse_errors_received: Comparison.parse_errors_for(node2),
|
|
214
|
-
)
|
|
215
|
-
else
|
|
216
|
-
# Simple boolean result - equivalent if no normative differences
|
|
217
|
-
differences.none?(&:normative?)
|
|
218
|
-
end
|
|
219
|
-
end
|
|
220
|
-
|
|
221
146
|
# Parse a node from string or return as-is
|
|
222
147
|
# Applies preprocessing transformation before parsing if specified
|
|
223
148
|
# Delegates to NodeParser module
|
|
@@ -346,8 +271,6 @@ module Canon
|
|
|
346
271
|
# @param node2 [Object] Second node
|
|
347
272
|
# @return [Boolean] true if exactly one node is a comment
|
|
348
273
|
def comment_vs_non_comment_comparison?(node1, node2)
|
|
349
|
-
require_relative "xml_node_comparison"
|
|
350
|
-
|
|
351
274
|
node1_comment = XmlNodeComparison
|
|
352
275
|
.comment_node?(node1, check_children: true)
|
|
353
276
|
node2_comment = XmlNodeComparison
|
|
@@ -375,15 +298,12 @@ module Canon
|
|
|
375
298
|
ns2 = Canon::XmlParsing.namespace_uri(n2)
|
|
376
299
|
|
|
377
300
|
unless ns1 == ns2
|
|
378
|
-
|
|
379
|
-
ns1_display = ns1.nil? || ns1.empty? ? "(no namespace)" : ns1
|
|
380
|
-
ns2_display = ns2.nil? || ns2.empty? ? "(no namespace)" : ns2
|
|
381
|
-
|
|
382
|
-
diff_node = Canon::Diff::DiffNode.new(
|
|
301
|
+
diff_node = Canon::Comparison::DiffNodeBuilder.build(
|
|
383
302
|
node1: n1,
|
|
384
303
|
node2: n2,
|
|
304
|
+
diff1: Comparison::UNEQUAL_ELEMENTS,
|
|
305
|
+
diff2: Comparison::UNEQUAL_ELEMENTS,
|
|
385
306
|
dimension: :namespace_uri,
|
|
386
|
-
reason: "namespace '#{ns1_display}' vs '#{ns2_display}' on element '#{n1.name}'",
|
|
387
307
|
)
|
|
388
308
|
differences << diff_node if opts[:verbose]
|
|
389
309
|
return Comparison::UNEQUAL_ELEMENTS
|
|
@@ -624,414 +544,6 @@ differences)
|
|
|
624
544
|
path
|
|
625
545
|
end
|
|
626
546
|
|
|
627
|
-
# Serialize a node to string for display
|
|
628
|
-
#
|
|
629
|
-
# @param node [Object, nil] Node to serialize
|
|
630
|
-
# @return [String, nil] Serialized content
|
|
631
|
-
def serialize_node(node)
|
|
632
|
-
return nil if node.nil?
|
|
633
|
-
|
|
634
|
-
Canon::Diff::NodeSerializer.serialize(node)
|
|
635
|
-
end
|
|
636
|
-
|
|
637
|
-
# Extract attributes from a node as a normalized hash
|
|
638
|
-
#
|
|
639
|
-
# @param node [Object, nil] Node to extract attributes from
|
|
640
|
-
# @return [Hash, nil] Normalized attributes hash
|
|
641
|
-
def extract_attributes(node)
|
|
642
|
-
return nil if node.nil?
|
|
643
|
-
|
|
644
|
-
Canon::Diff::NodeSerializer.extract_attributes(node)
|
|
645
|
-
end
|
|
646
|
-
|
|
647
|
-
# Build a human-readable reason for a difference
|
|
648
|
-
# @param node1 [Object] First node
|
|
649
|
-
# @param node2 [Object] Second node
|
|
650
|
-
# @param diff1 [String] Difference type for node1
|
|
651
|
-
# @param diff2 [String] Difference type for node2
|
|
652
|
-
# @param dimension [Symbol] The dimension of the difference
|
|
653
|
-
# @return [String] Human-readable reason
|
|
654
|
-
def build_difference_reason(node1, node2, diff1, diff2, dimension)
|
|
655
|
-
# For deleted/inserted nodes, include namespace information if available
|
|
656
|
-
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
657
|
-
node = node1 || node2
|
|
658
|
-
if Canon::XmlParsing.xml_node?(node)
|
|
659
|
-
ns = Canon::XmlParsing.namespace_uri(node)
|
|
660
|
-
ns_info = if ns.nil? || ns.empty?
|
|
661
|
-
""
|
|
662
|
-
else
|
|
663
|
-
" (namespace: #{ns})"
|
|
664
|
-
end
|
|
665
|
-
label = Canon::Comparison.code_pair_label(diff1, diff2)
|
|
666
|
-
return "element '#{node.name}'#{ns_info}: #{label}"
|
|
667
|
-
elsif node.is_a?(Canon::Xml::Node)
|
|
668
|
-
display = if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
669
|
-
"\"#{truncate_text(node.value)}\""
|
|
670
|
-
else
|
|
671
|
-
node.name.to_s
|
|
672
|
-
end
|
|
673
|
-
return "element missing: #{display}"
|
|
674
|
-
end
|
|
675
|
-
end
|
|
676
|
-
|
|
677
|
-
# For attribute presence differences, show what attributes differ
|
|
678
|
-
if dimension == :attribute_presence
|
|
679
|
-
attrs1 = extract_attributes(node1)
|
|
680
|
-
attrs2 = extract_attributes(node2)
|
|
681
|
-
return build_attribute_diff_reason(attrs1, attrs2)
|
|
682
|
-
end
|
|
683
|
-
|
|
684
|
-
# For text content differences, show the actual text (truncated if needed)
|
|
685
|
-
if dimension == :text_content
|
|
686
|
-
text1 = extract_text_from_node(node1)
|
|
687
|
-
text2 = extract_text_from_node(node2)
|
|
688
|
-
return build_text_diff_reason(text1, text2)
|
|
689
|
-
end
|
|
690
|
-
|
|
691
|
-
if dimension == :whitespace_adjacency
|
|
692
|
-
return build_whitespace_adjacency_reason(node1, node2)
|
|
693
|
-
end
|
|
694
|
-
|
|
695
|
-
if dimension == :comments
|
|
696
|
-
return build_comments_reason(node1, node2)
|
|
697
|
-
end
|
|
698
|
-
|
|
699
|
-
# For attribute values differences, show the actual values
|
|
700
|
-
if dimension == :attribute_values
|
|
701
|
-
attrs1 = extract_attributes(node1)
|
|
702
|
-
attrs2 = extract_attributes(node2)
|
|
703
|
-
return build_attribute_value_diff_reason(attrs1, attrs2)
|
|
704
|
-
end
|
|
705
|
-
|
|
706
|
-
# For attribute order differences, show the actual attribute names
|
|
707
|
-
if dimension == :attribute_order
|
|
708
|
-
attrs1 = extract_attributes(node1)&.keys || []
|
|
709
|
-
attrs2 = extract_attributes(node2)&.keys || []
|
|
710
|
-
return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
|
|
711
|
-
end
|
|
712
|
-
|
|
713
|
-
if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
|
|
714
|
-
"element structure mismatch (children differ)"
|
|
715
|
-
elsif dimension == :element_structure &&
|
|
716
|
-
diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
717
|
-
diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
718
|
-
(node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
|
|
719
|
-
(node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
|
|
720
|
-
node1.name && node2.name && node1.name != node2.name
|
|
721
|
-
# Most common case: differing element names. Surface the
|
|
722
|
-
# actual names rather than a generic "elements differ".
|
|
723
|
-
"different element name (<#{node1.name}> vs <#{node2.name}>)"
|
|
724
|
-
else
|
|
725
|
-
Canon::Comparison.code_pair_label(diff1, diff2)
|
|
726
|
-
end
|
|
727
|
-
end
|
|
728
|
-
|
|
729
|
-
# Build a clear reason message for attribute value differences
|
|
730
|
-
#
|
|
731
|
-
# @param attrs1 [Hash, nil] First node's attributes
|
|
732
|
-
# @param attrs2 [Hash, nil] Second node's attributes
|
|
733
|
-
# @return [String] Clear explanation of the attribute value difference
|
|
734
|
-
def build_attribute_value_diff_reason(attrs1, attrs2)
|
|
735
|
-
return "missing vs present attributes" unless attrs1 && attrs2
|
|
736
|
-
|
|
737
|
-
require "set"
|
|
738
|
-
keys1 = attrs1.keys.to_set
|
|
739
|
-
keys2 = attrs2.keys.to_set
|
|
740
|
-
|
|
741
|
-
common = keys1 & keys2
|
|
742
|
-
different_values = common.reject { |k| attrs1[k] == attrs2[k] }
|
|
743
|
-
|
|
744
|
-
return "all attribute values match" if different_values.empty?
|
|
745
|
-
|
|
746
|
-
parts = different_values.map do |k|
|
|
747
|
-
"#{k}: #{attrs1[k].inspect} vs #{attrs2[k].inspect}"
|
|
748
|
-
end
|
|
749
|
-
|
|
750
|
-
parts.join("; ")
|
|
751
|
-
end
|
|
752
|
-
|
|
753
|
-
# Build a clear reason message for attribute presence differences
|
|
754
|
-
#
|
|
755
|
-
# @param attrs1 [Hash, nil] First node's attributes
|
|
756
|
-
# @param attrs2 [Hash, nil] Second node's attributes
|
|
757
|
-
# @return [String] Clear explanation of the attribute difference
|
|
758
|
-
def build_attribute_diff_reason(attrs1, attrs2)
|
|
759
|
-
return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes" unless attrs1 && attrs2
|
|
760
|
-
|
|
761
|
-
require "set"
|
|
762
|
-
keys1 = attrs1.keys.to_set
|
|
763
|
-
keys2 = attrs2.keys.to_set
|
|
764
|
-
|
|
765
|
-
only_in_first = keys1 - keys2
|
|
766
|
-
only_in_second = keys2 - keys1
|
|
767
|
-
common = keys1 & keys2
|
|
768
|
-
|
|
769
|
-
# Check if values differ for common keys
|
|
770
|
-
different_values = common.reject { |k| attrs1[k] == attrs2[k] }
|
|
771
|
-
|
|
772
|
-
parts = []
|
|
773
|
-
parts << "only in first: #{only_in_first.to_a.sort.join(', ')}" if only_in_first.any?
|
|
774
|
-
parts << "only in second: #{only_in_second.to_a.sort.join(', ')}" if only_in_second.any?
|
|
775
|
-
parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
|
|
776
|
-
|
|
777
|
-
if parts.empty?
|
|
778
|
-
"#{keys1.size} vs #{keys2.size} attributes (same names)"
|
|
779
|
-
else
|
|
780
|
-
parts.join("; ")
|
|
781
|
-
end
|
|
782
|
-
end
|
|
783
|
-
|
|
784
|
-
# Extract text from a node for diff reason
|
|
785
|
-
#
|
|
786
|
-
# @param node [Object, nil] Node to extract text from
|
|
787
|
-
# @return [String, nil] Text content or nil
|
|
788
|
-
def extract_text_from_node(node)
|
|
789
|
-
return nil if node.nil?
|
|
790
|
-
return node.to_s if node.is_a?(String)
|
|
791
|
-
|
|
792
|
-
case node
|
|
793
|
-
when Canon::Xml::Nodes::TextNode
|
|
794
|
-
node.value
|
|
795
|
-
when Canon::Xml::Node
|
|
796
|
-
node.text_content
|
|
797
|
-
else
|
|
798
|
-
Canon::XmlParsing.xml_node?(node) ? Canon::XmlParsing.text_content(node).to_s : node.to_s
|
|
799
|
-
end
|
|
800
|
-
rescue StandardError
|
|
801
|
-
nil
|
|
802
|
-
end
|
|
803
|
-
|
|
804
|
-
# Build a clear reason message for text content differences
|
|
805
|
-
#
|
|
806
|
-
# @param text1 [String, nil] First text content
|
|
807
|
-
# @param text2 [String, nil] Second text content
|
|
808
|
-
# @return [String] Clear explanation of the text difference
|
|
809
|
-
def build_text_diff_reason(text1, text2)
|
|
810
|
-
# Handle nil cases
|
|
811
|
-
return "missing vs '#{truncate_text(text2)}'" if text1.nil? && text2
|
|
812
|
-
return "'#{truncate_text(text2)}' vs missing" if text1 && text2.nil?
|
|
813
|
-
return "both missing" if text1.nil? && text2.nil?
|
|
814
|
-
|
|
815
|
-
# Check if both are whitespace-only
|
|
816
|
-
if whitespace_only?(text1) && whitespace_only?(text2)
|
|
817
|
-
return "whitespace: #{describe_whitespace(text1)} vs #{describe_whitespace(text2)}"
|
|
818
|
-
end
|
|
819
|
-
|
|
820
|
-
# Show text with visible whitespace markers
|
|
821
|
-
# Use escaped representations for clarity: \n for newline, \t for tab, · for spaces
|
|
822
|
-
vis1 = visualize_whitespace(text1)
|
|
823
|
-
vis2 = visualize_whitespace(text2)
|
|
824
|
-
|
|
825
|
-
"Text: \"#{vis1}\" vs \"#{vis2}\""
|
|
826
|
-
end
|
|
827
|
-
|
|
828
|
-
# Build a Reason line for a +:whitespace_adjacency+ diff (#137).
|
|
829
|
-
# Names which side carries the whitespace, the adjacency position
|
|
830
|
-
# relative to content neighbours, and surfaces the whitespace
|
|
831
|
-
# with visible markers.
|
|
832
|
-
def build_whitespace_adjacency_reason(node1, node2)
|
|
833
|
-
text1 = extract_text_from_node(node1)
|
|
834
|
-
text2 = extract_text_from_node(node2)
|
|
835
|
-
|
|
836
|
-
ni = NodeInspector
|
|
837
|
-
ws_on_first = ni.whitespace_only_text?(node1) &&
|
|
838
|
-
!ni.whitespace_only_text?(node2)
|
|
839
|
-
ws_on_second = ni.whitespace_only_text?(node2) &&
|
|
840
|
-
!ni.whitespace_only_text?(node1)
|
|
841
|
-
|
|
842
|
-
if ws_on_first
|
|
843
|
-
ws_text = text1
|
|
844
|
-
content_text = text2
|
|
845
|
-
present_side = "EXPECTED"
|
|
846
|
-
absent_side = "ACTUAL"
|
|
847
|
-
ws_node = node1
|
|
848
|
-
elsif ws_on_second
|
|
849
|
-
ws_text = text2
|
|
850
|
-
content_text = text1
|
|
851
|
-
present_side = "ACTUAL"
|
|
852
|
-
absent_side = "EXPECTED"
|
|
853
|
-
ws_node = node2
|
|
854
|
-
else
|
|
855
|
-
return build_text_diff_reason(text1, text2)
|
|
856
|
-
end
|
|
857
|
-
|
|
858
|
-
ws_vis = visualize_whitespace(ws_text)
|
|
859
|
-
|
|
860
|
-
if content_text.nil? || content_text.strip.empty?
|
|
861
|
-
# Partner content extracts to "" / whitespace-only — naming it
|
|
862
|
-
# in the Reason ("Whitespace before \"\"") gives the reader
|
|
863
|
-
# nothing. Fall back to the parent element name so the
|
|
864
|
-
# diff carries structural context (issue #112's contract,
|
|
865
|
-
# extended from :text_content to :whitespace_adjacency).
|
|
866
|
-
parent_label = whitespace_adjacency_parent_label(ws_node)
|
|
867
|
-
"Whitespace inside #{parent_label}: " \
|
|
868
|
-
"present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
|
|
869
|
-
else
|
|
870
|
-
direction = whitespace_partner_direction(ws_node)
|
|
871
|
-
content_vis = visualize_whitespace(truncate_text(content_text))
|
|
872
|
-
"Whitespace #{direction} \"#{content_vis}\": " \
|
|
873
|
-
"present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
|
|
874
|
-
end
|
|
875
|
-
end
|
|
876
|
-
|
|
877
|
-
def whitespace_adjacency_parent_label(ws_node)
|
|
878
|
-
parent = NodeInspector.parent(ws_node)
|
|
879
|
-
return "(unknown parent)" unless parent
|
|
880
|
-
|
|
881
|
-
name = parent.name
|
|
882
|
-
name && !name.empty? ? "<#{name}>" : "(unknown parent)"
|
|
883
|
-
end
|
|
884
|
-
|
|
885
|
-
# Direction of the partner content relative to the whitespace node,
|
|
886
|
-
# phrased from the partner's point of view: "before" when the
|
|
887
|
-
# whitespace immediately precedes its next non-whitespace sibling
|
|
888
|
-
# (the alignment partner on the other side), "after" when the
|
|
889
|
-
# whitespace trails the previous non-whitespace sibling, or
|
|
890
|
-
# "adjacent to" as a degenerate fallback when neither neighbour
|
|
891
|
-
# exists.
|
|
892
|
-
def whitespace_partner_direction(ws_node)
|
|
893
|
-
parent = NodeInspector.parent(ws_node)
|
|
894
|
-
return "adjacent to" unless parent
|
|
895
|
-
|
|
896
|
-
siblings = parent.children
|
|
897
|
-
idx = siblings.index(ws_node)
|
|
898
|
-
return "adjacent to" unless idx
|
|
899
|
-
|
|
900
|
-
if non_ws_sibling_exists?(siblings, idx, 1) then "before"
|
|
901
|
-
elsif non_ws_sibling_exists?(siblings, idx, -1) then "after"
|
|
902
|
-
else "adjacent to"
|
|
903
|
-
end
|
|
904
|
-
end
|
|
905
|
-
|
|
906
|
-
def non_ws_sibling_exists?(siblings, idx, direction)
|
|
907
|
-
i = idx + direction
|
|
908
|
-
while i >= 0 && i < siblings.length
|
|
909
|
-
s = siblings[i]
|
|
910
|
-
is_ws_text = NodeInspector.text_node?(s) &&
|
|
911
|
-
NodeInspector.text_content(s).strip.empty?
|
|
912
|
-
return true unless is_ws_text
|
|
913
|
-
|
|
914
|
-
i += direction
|
|
915
|
-
end
|
|
916
|
-
false
|
|
917
|
-
end
|
|
918
|
-
|
|
919
|
-
# Build a Reason line for a +:comments+ diff (#144).
|
|
920
|
-
# Names the side that carries the comment and surfaces the
|
|
921
|
-
# comment text.
|
|
922
|
-
def build_comments_reason(node1, node2)
|
|
923
|
-
cm1 = node1 && NodeInspector.comment_node?(node1)
|
|
924
|
-
cm2 = node2 && NodeInspector.comment_node?(node2)
|
|
925
|
-
|
|
926
|
-
if cm1 && !cm2
|
|
927
|
-
"Comment present on EXPECTED only: <!--#{truncate_text(comment_text(node1))}-->"
|
|
928
|
-
elsif cm2 && !cm1
|
|
929
|
-
"Comment present on ACTUAL only: <!--#{truncate_text(comment_text(node2))}-->"
|
|
930
|
-
elsif cm1 && cm2
|
|
931
|
-
t1 = truncate_text(comment_text(node1))
|
|
932
|
-
t2 = truncate_text(comment_text(node2))
|
|
933
|
-
"Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
|
|
934
|
-
else
|
|
935
|
-
"element structure mismatch (children differ)"
|
|
936
|
-
end
|
|
937
|
-
end
|
|
938
|
-
|
|
939
|
-
def comment_text(node)
|
|
940
|
-
NodeInspector.text_content(node).to_s
|
|
941
|
-
end
|
|
942
|
-
|
|
943
|
-
# Check if text is only whitespace
|
|
944
|
-
#
|
|
945
|
-
# @param text [String] Text to check
|
|
946
|
-
# @return [Boolean] true if whitespace-only
|
|
947
|
-
def whitespace_only?(text)
|
|
948
|
-
return false if text.nil?
|
|
949
|
-
|
|
950
|
-
text.to_s.strip.empty?
|
|
951
|
-
end
|
|
952
|
-
|
|
953
|
-
# Make whitespace visible in text content
|
|
954
|
-
# Uses the existing character visualization map from DiffFormatter (single source of truth)
|
|
955
|
-
#
|
|
956
|
-
# @param text [String] Text to visualize
|
|
957
|
-
# @return [String] Text with visible whitespace markers
|
|
958
|
-
def visualize_whitespace(text)
|
|
959
|
-
return "" if text.nil?
|
|
960
|
-
|
|
961
|
-
# Use the character map loader as the single source of truth
|
|
962
|
-
viz_map = character_visualization_map
|
|
963
|
-
|
|
964
|
-
# Replace each character with its visualization
|
|
965
|
-
text.chars.map { |char| viz_map[char] || char }.join
|
|
966
|
-
end
|
|
967
|
-
|
|
968
|
-
# Get the character visualization map (lazy-loaded to avoid circular dependency)
|
|
969
|
-
#
|
|
970
|
-
# @return [Hash] Character to visualization symbol mapping
|
|
971
|
-
def character_visualization_map
|
|
972
|
-
@character_visualization_map ||= begin
|
|
973
|
-
# Load the YAML file directly to avoid circular dependency
|
|
974
|
-
require "yaml"
|
|
975
|
-
lib_root = File.expand_path("../..", __dir__)
|
|
976
|
-
yaml_path = File.join(lib_root,
|
|
977
|
-
"canon/diff_formatter/character_map.yml")
|
|
978
|
-
data = YAML.load_file(yaml_path)
|
|
979
|
-
|
|
980
|
-
# Build visualization map from the YAML data
|
|
981
|
-
visualization_map = {}
|
|
982
|
-
data["characters"].each do |char_data|
|
|
983
|
-
# Get the character from either unicode code point or character field
|
|
984
|
-
char = if char_data["unicode"]
|
|
985
|
-
# Convert hex string to character
|
|
986
|
-
[char_data["unicode"].to_i(16)].pack("U")
|
|
987
|
-
else
|
|
988
|
-
# Use character field directly (handles \n, \t, etc.)
|
|
989
|
-
char_data["character"]
|
|
990
|
-
end
|
|
991
|
-
|
|
992
|
-
vis = char_data["visualization"]
|
|
993
|
-
visualization_map[char] = vis
|
|
994
|
-
end
|
|
995
|
-
|
|
996
|
-
visualization_map
|
|
997
|
-
end
|
|
998
|
-
end
|
|
999
|
-
|
|
1000
|
-
# Describe whitespace content in a readable way
|
|
1001
|
-
#
|
|
1002
|
-
# @param text [String] Whitespace text
|
|
1003
|
-
# @return [String] Description like "4 chars (2 newlines, 2 spaces)"
|
|
1004
|
-
def describe_whitespace(text)
|
|
1005
|
-
return "0 chars" if text.nil? || text.empty?
|
|
1006
|
-
|
|
1007
|
-
char_count = text.length
|
|
1008
|
-
newline_count = text.count("\n")
|
|
1009
|
-
space_count = text.count(" ")
|
|
1010
|
-
tab_count = text.count("\t")
|
|
1011
|
-
|
|
1012
|
-
parts = []
|
|
1013
|
-
parts << "#{newline_count} newlines" if newline_count.positive?
|
|
1014
|
-
parts << "#{space_count} spaces" if space_count.positive?
|
|
1015
|
-
parts << "#{tab_count} tabs" if tab_count.positive?
|
|
1016
|
-
|
|
1017
|
-
description = parts.join(", ")
|
|
1018
|
-
"#{char_count} chars (#{description})"
|
|
1019
|
-
end
|
|
1020
|
-
|
|
1021
|
-
# Truncate text for display in reason messages
|
|
1022
|
-
#
|
|
1023
|
-
# @param text [String] Text to truncate
|
|
1024
|
-
# @param max_length [Integer] Maximum length
|
|
1025
|
-
# @return [String] Truncated text
|
|
1026
|
-
def truncate_text(text, max_length = 40)
|
|
1027
|
-
return "" if text.nil?
|
|
1028
|
-
|
|
1029
|
-
text = text.to_s
|
|
1030
|
-
return text if text.length <= max_length
|
|
1031
|
-
|
|
1032
|
-
"#{text[0...max_length]}..."
|
|
1033
|
-
end
|
|
1034
|
-
|
|
1035
547
|
# Compare namespace declarations (xmlns and xmlns:* attributes)
|
|
1036
548
|
# Delegates to XmlComparatorHelpers::NamespaceComparator
|
|
1037
549
|
def compare_namespace_declarations(n1, n2, opts, differences)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
# Helper classes used by {XmlComparator}. Children are autoloaded —
|
|
6
|
+
# never `require_relative` them.
|
|
7
|
+
module XmlComparatorHelpers
|
|
8
|
+
autoload :AttributeComparator,
|
|
9
|
+
"canon/comparison/xml_comparator/attribute_comparator"
|
|
10
|
+
autoload :AttributeFilter,
|
|
11
|
+
"canon/comparison/xml_comparator/attribute_filter"
|
|
12
|
+
autoload :ChildComparison,
|
|
13
|
+
"canon/comparison/xml_comparator/child_comparison"
|
|
14
|
+
autoload :NamespaceComparator,
|
|
15
|
+
"canon/comparison/xml_comparator/namespace_comparator"
|
|
16
|
+
autoload :NodeParser, "canon/comparison/xml_comparator/node_parser"
|
|
17
|
+
autoload :NodeTypeComparator,
|
|
18
|
+
"canon/comparison/xml_comparator/node_type_comparator"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|