canon 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +12 -22
  3. data/Rakefile +5 -2
  4. data/lib/canon/cache.rb +3 -1
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +18 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/pipeline.rb +269 -0
  28. data/lib/canon/comparison/profile_definition.rb +0 -2
  29. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  30. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  31. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  32. data/lib/canon/comparison/strategies.rb +16 -0
  33. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
  34. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  35. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  36. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  37. data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
  38. data/lib/canon/comparison/xml_comparator.rb +4 -492
  39. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  40. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  41. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  42. data/lib/canon/comparison.rb +143 -266
  43. data/lib/canon/config/config_dsl.rb +159 -0
  44. data/lib/canon/config/env_provider.rb +0 -3
  45. data/lib/canon/config/env_schema.rb +48 -58
  46. data/lib/canon/config/profile_loader.rb +0 -1
  47. data/lib/canon/config.rb +116 -468
  48. data/lib/canon/diff/diff_block_builder.rb +0 -2
  49. data/lib/canon/diff/diff_classifier.rb +0 -5
  50. data/lib/canon/diff/diff_context.rb +0 -2
  51. data/lib/canon/diff/diff_context_builder.rb +0 -2
  52. data/lib/canon/diff/diff_line_builder.rb +0 -3
  53. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  54. data/lib/canon/diff/diff_node_mapper.rb +0 -4
  55. data/lib/canon/diff/diff_report_builder.rb +0 -4
  56. data/lib/canon/diff/formatting_detector.rb +0 -1
  57. data/lib/canon/diff/node_serializer.rb +0 -7
  58. data/lib/canon/diff.rb +39 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  61. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  62. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  63. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  64. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
  66. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  67. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
  68. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  70. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
  71. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
  72. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  74. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  75. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  76. data/lib/canon/diff_formatter.rb +11 -9
  77. data/lib/canon/formatters/html4_formatter.rb +0 -2
  78. data/lib/canon/formatters/html5_formatter.rb +0 -2
  79. data/lib/canon/formatters/html_formatter.rb +0 -3
  80. data/lib/canon/formatters/json_formatter.rb +0 -1
  81. data/lib/canon/formatters/xml_formatter.rb +0 -4
  82. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  83. data/lib/canon/formatters.rb +16 -0
  84. data/lib/canon/html/data_model.rb +0 -10
  85. data/lib/canon/html.rb +4 -3
  86. data/lib/canon/options/cli_generator.rb +0 -2
  87. data/lib/canon/options/registry.rb +0 -2
  88. data/lib/canon/options.rb +9 -0
  89. data/lib/canon/pretty_printer/html.rb +0 -1
  90. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  91. data/lib/canon/pretty_printer.rb +12 -0
  92. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  93. data/lib/canon/tree_diff/adapters.rb +14 -0
  94. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  95. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  96. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  97. data/lib/canon/tree_diff/core.rb +17 -0
  98. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  99. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  100. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  101. data/lib/canon/tree_diff/matchers.rb +15 -0
  102. data/lib/canon/tree_diff/operation_converter.rb +0 -8
  103. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  104. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  105. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  106. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  107. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  108. data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
  109. data/lib/canon/tree_diff/operations.rb +13 -0
  110. data/lib/canon/tree_diff.rb +26 -27
  111. data/lib/canon/validators/base_validator.rb +0 -2
  112. data/lib/canon/validators/html_validator.rb +0 -1
  113. data/lib/canon/validators/json_validator.rb +0 -1
  114. data/lib/canon/validators/xml_validator.rb +0 -1
  115. data/lib/canon/validators/yaml_validator.rb +0 -1
  116. data/lib/canon/validators.rb +12 -0
  117. data/lib/canon/version.rb +1 -1
  118. data/lib/canon/xml/c14n.rb +0 -4
  119. data/lib/canon/xml/data_model.rb +0 -10
  120. data/lib/canon/xml/line_range_mapper.rb +0 -2
  121. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  122. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  123. data/lib/canon/xml/nodes/element_node.rb +0 -2
  124. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  125. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  126. data/lib/canon/xml/nodes/root_node.rb +0 -2
  127. data/lib/canon/xml/nodes/text_node.rb +0 -2
  128. data/lib/canon/xml/nodes.rb +19 -0
  129. data/lib/canon/xml/processor.rb +0 -5
  130. data/lib/canon/xml/sax_builder.rb +0 -7
  131. data/lib/canon/xml.rb +33 -0
  132. data/lib/canon/xml_backend.rb +50 -14
  133. data/lib/canon/xml_parsing.rb +4 -2
  134. data/lib/canon.rb +25 -15
  135. data/lib/tasks/performance.rake +0 -58
  136. data/lib/tasks/performance_comparator.rb +132 -65
  137. data/lib/tasks/performance_helpers.rb +4 -249
  138. data/lib/tasks/performance_report.rb +309 -0
  139. metadata +24 -11
  140. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  141. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  142. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  143. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  144. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  145. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  146. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  147. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  148. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node_inspector"
4
-
5
3
  module Canon
6
4
  module Comparison
7
5
  module XmlComparatorHelpers
@@ -38,7 +36,6 @@ module Canon
38
36
  # pretty_printed_received → drop \n-starting whitespace nodes from node2
39
37
  # The ephemeral _pretty_print_side_active flag is consumed by node_excluded?
40
38
  # and must NOT be forwarded into recursive compare_nodes calls.
41
- require_relative "../xml_node_comparison"
42
39
  opts1 = XmlNodeComparison.opts_for_side(opts, :expected)
43
40
  opts2 = XmlNodeComparison.opts_for_side(opts, :received)
44
41
 
@@ -78,9 +75,6 @@ module Canon
78
75
  # Use ElementMatcher for semantic comparison
79
76
  def use_element_matcher_comparison(children1, children2, parent_node, comparator,
80
77
  opts, child_opts, diff_children, differences)
81
- require_relative "../../xml/element_matcher"
82
- require_relative "../../xml/nodes/root_node"
83
-
84
78
  # Create temporary RootNode wrappers
85
79
  temp_root1 = Canon::Xml::Nodes::RootNode.new
86
80
  temp_root1.children = children1.dup
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../../xml/namespace_helper"
4
-
5
3
  module Canon
6
4
  module Comparison
7
5
  module XmlComparatorHelpers
@@ -153,10 +151,7 @@ changed, opts, differences)
153
151
  end.join(', ')}"
154
152
  end
155
153
 
156
- # Import DiffNodeBuilder to avoid circular dependency
157
- require_relative "diff_node_builder"
158
-
159
- diff_node = DiffNodeBuilder.build(
154
+ diff_node = Canon::Comparison::DiffNodeBuilder.build(
160
155
  node1: node1,
161
156
  node2: node2,
162
157
  diff1: Comparison::UNEQUAL_ATTRIBUTES,
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../../xml/c14n"
4
-
5
3
  module Canon
6
4
  module Comparison
7
5
  module XmlComparatorHelpers
@@ -38,7 +36,6 @@ module Canon
38
36
  resolved_parser = parser || resolve_parser_config
39
37
 
40
38
  if resolved_parser == :sax && RUBY_ENGINE != "opal"
41
- require_relative "../../xml/sax_builder"
42
39
  Canon::Xml::SaxBuilder.parse(xml_string,
43
40
  preserve_whitespace: preserve_whitespace)
44
41
  else
@@ -98,7 +95,6 @@ parser: nil)
98
95
  resolved_parser = parser || resolve_parser_config
99
96
 
100
97
  if resolved_parser == :sax && RUBY_ENGINE != "opal"
101
- require_relative "../../xml/sax_builder"
102
98
  Canon::Xml::SaxBuilder.parse(xml_str,
103
99
  preserve_whitespace: preserve_whitespace)
104
100
  else
@@ -1,26 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../xml/c14n"
4
- require_relative "markup_comparator"
5
- require_relative "match_options"
6
- require_relative "../diff/diff_node"
7
- require_relative "../diff/diff_classifier"
8
- require_relative "../diff/path_builder"
9
- require_relative "../diff/node_serializer"
10
- require_relative "comparison_result"
11
- require_relative "../tree_diff"
12
- require_relative "strategies/match_strategy_factory"
13
- # XmlComparator modules
14
- require_relative "xml_comparator/node_parser"
15
- require_relative "xml_comparator/attribute_filter"
16
- require_relative "xml_comparator/attribute_comparator"
17
- require_relative "xml_comparator/namespace_comparator"
18
- require_relative "xml_comparator/node_type_comparator"
19
- require_relative "xml_comparator/child_comparison"
20
- require_relative "xml_comparator/diff_node_builder"
21
- # Whitespace sensitivity module
22
- require_relative "whitespace_sensitivity"
23
-
24
3
  module Canon
25
4
  module Comparison
26
5
  # XML comparison class
@@ -64,8 +43,7 @@ module Canon
64
43
  # verbose
65
44
  def equivalent?(n1, n2, opts = {}, child_opts = {})
66
45
  # FAST PATH: Object identity - same object is always equivalent
67
- # Skip when semantic_diff is requested (caller needs tree diff metadata)
68
- if n1.equal?(n2) && !opts.dig(:match, :semantic_diff)
46
+ if n1.equal?(n2)
69
47
  return build_trivial_equivalent_result(n1, n2, opts)
70
48
  end
71
49
 
@@ -96,11 +74,6 @@ module Canon
96
74
  # Store resolved match options hash for use in comparison logic
97
75
  opts[:match_opts] = match_opts_hash
98
76
 
99
- # Use tree diff if semantic_diff option is enabled
100
- if match_opts.semantic_diff?
101
- return perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
102
- end
103
-
104
77
  # Create child_opts with resolved options
105
78
  child_opts = opts.merge(child_opts)
106
79
 
@@ -170,54 +143,6 @@ module Canon
170
143
 
171
144
  private
172
145
 
173
- # Perform semantic tree diff using SemanticTreeMatchStrategy
174
- #
175
- # @param n1 [String, Moxml::Node] First node
176
- # @param n2 [String, Moxml::Node] Second node
177
- # @param opts [Hash] Comparison options
178
- # @param match_opts_hash [Hash] Resolved match options
179
- # @return [Boolean, ComparisonResult] Result of tree diff comparison
180
- def perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
181
- # Store original strings for line diff display (before preprocessing)
182
- # Store original strings for line diff display (before preprocessing)
183
- original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
184
- original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
185
-
186
- # Parse to Canon::Xml::Node (preserves preprocessing)
187
- node1 = parse_node(n1, match_opts_hash[:preprocessing])
188
- node2 = parse_node(n2, match_opts_hash[:preprocessing])
189
-
190
- # Create strategy using factory
191
- strategy = Strategies::MatchStrategyFactory.create(
192
- format: :xml,
193
- match_options: match_opts_hash,
194
- )
195
-
196
- # Pass Canon::Xml::Node directly - XML adapter now handles it
197
- differences = strategy.match(node1, node2)
198
-
199
- # Return based on verbose mode
200
- if opts[:verbose]
201
- # Get preprocessed strings for display
202
- preprocessed = strategy.preprocess_for_display(node1, node2)
203
-
204
- # Return ComparisonResult with strategy metadata
205
- ComparisonResult.new(
206
- differences: differences,
207
- preprocessed_strings: preprocessed,
208
- original_strings: [original1, original2],
209
- format: :xml,
210
- match_options: match_opts_hash.merge(strategy.metadata),
211
- algorithm: :semantic,
212
- parse_errors_expected: Comparison.parse_errors_for(node1),
213
- parse_errors_received: Comparison.parse_errors_for(node2),
214
- )
215
- else
216
- # Simple boolean result - equivalent if no normative differences
217
- differences.none?(&:normative?)
218
- end
219
- end
220
-
221
146
  # Parse a node from string or return as-is
222
147
  # Applies preprocessing transformation before parsing if specified
223
148
  # Delegates to NodeParser module
@@ -346,8 +271,6 @@ module Canon
346
271
  # @param node2 [Object] Second node
347
272
  # @return [Boolean] true if exactly one node is a comment
348
273
  def comment_vs_non_comment_comparison?(node1, node2)
349
- require_relative "xml_node_comparison"
350
-
351
274
  node1_comment = XmlNodeComparison
352
275
  .comment_node?(node1, check_children: true)
353
276
  node2_comment = XmlNodeComparison
@@ -375,15 +298,12 @@ module Canon
375
298
  ns2 = Canon::XmlParsing.namespace_uri(n2)
376
299
 
377
300
  unless ns1 == ns2
378
- # Create descriptive reason showing the actual namespace URIs
379
- ns1_display = ns1.nil? || ns1.empty? ? "(no namespace)" : ns1
380
- ns2_display = ns2.nil? || ns2.empty? ? "(no namespace)" : ns2
381
-
382
- diff_node = Canon::Diff::DiffNode.new(
301
+ diff_node = Canon::Comparison::DiffNodeBuilder.build(
383
302
  node1: n1,
384
303
  node2: n2,
304
+ diff1: Comparison::UNEQUAL_ELEMENTS,
305
+ diff2: Comparison::UNEQUAL_ELEMENTS,
385
306
  dimension: :namespace_uri,
386
- reason: "namespace '#{ns1_display}' vs '#{ns2_display}' on element '#{n1.name}'",
387
307
  )
388
308
  differences << diff_node if opts[:verbose]
389
309
  return Comparison::UNEQUAL_ELEMENTS
@@ -624,414 +544,6 @@ differences)
624
544
  path
625
545
  end
626
546
 
627
- # Serialize a node to string for display
628
- #
629
- # @param node [Object, nil] Node to serialize
630
- # @return [String, nil] Serialized content
631
- def serialize_node(node)
632
- return nil if node.nil?
633
-
634
- Canon::Diff::NodeSerializer.serialize(node)
635
- end
636
-
637
- # Extract attributes from a node as a normalized hash
638
- #
639
- # @param node [Object, nil] Node to extract attributes from
640
- # @return [Hash, nil] Normalized attributes hash
641
- def extract_attributes(node)
642
- return nil if node.nil?
643
-
644
- Canon::Diff::NodeSerializer.extract_attributes(node)
645
- end
646
-
647
- # Build a human-readable reason for a difference
648
- # @param node1 [Object] First node
649
- # @param node2 [Object] Second node
650
- # @param diff1 [String] Difference type for node1
651
- # @param diff2 [String] Difference type for node2
652
- # @param dimension [Symbol] The dimension of the difference
653
- # @return [String] Human-readable reason
654
- def build_difference_reason(node1, node2, diff1, diff2, dimension)
655
- # For deleted/inserted nodes, include namespace information if available
656
- if dimension == :text_content && (node1.nil? || node2.nil?)
657
- node = node1 || node2
658
- if Canon::XmlParsing.xml_node?(node)
659
- ns = Canon::XmlParsing.namespace_uri(node)
660
- ns_info = if ns.nil? || ns.empty?
661
- ""
662
- else
663
- " (namespace: #{ns})"
664
- end
665
- label = Canon::Comparison.code_pair_label(diff1, diff2)
666
- return "element '#{node.name}'#{ns_info}: #{label}"
667
- elsif node.is_a?(Canon::Xml::Node)
668
- display = if node.is_a?(Canon::Xml::Nodes::TextNode)
669
- "\"#{truncate_text(node.value)}\""
670
- else
671
- node.name.to_s
672
- end
673
- return "element missing: #{display}"
674
- end
675
- end
676
-
677
- # For attribute presence differences, show what attributes differ
678
- if dimension == :attribute_presence
679
- attrs1 = extract_attributes(node1)
680
- attrs2 = extract_attributes(node2)
681
- return build_attribute_diff_reason(attrs1, attrs2)
682
- end
683
-
684
- # For text content differences, show the actual text (truncated if needed)
685
- if dimension == :text_content
686
- text1 = extract_text_from_node(node1)
687
- text2 = extract_text_from_node(node2)
688
- return build_text_diff_reason(text1, text2)
689
- end
690
-
691
- if dimension == :whitespace_adjacency
692
- return build_whitespace_adjacency_reason(node1, node2)
693
- end
694
-
695
- if dimension == :comments
696
- return build_comments_reason(node1, node2)
697
- end
698
-
699
- # For attribute values differences, show the actual values
700
- if dimension == :attribute_values
701
- attrs1 = extract_attributes(node1)
702
- attrs2 = extract_attributes(node2)
703
- return build_attribute_value_diff_reason(attrs1, attrs2)
704
- end
705
-
706
- # For attribute order differences, show the actual attribute names
707
- if dimension == :attribute_order
708
- attrs1 = extract_attributes(node1)&.keys || []
709
- attrs2 = extract_attributes(node2)&.keys || []
710
- return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
711
- end
712
-
713
- if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
714
- "element structure mismatch (children differ)"
715
- elsif dimension == :element_structure &&
716
- diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
717
- diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
718
- (node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
719
- (node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
720
- node1.name && node2.name && node1.name != node2.name
721
- # Most common case: differing element names. Surface the
722
- # actual names rather than a generic "elements differ".
723
- "different element name (<#{node1.name}> vs <#{node2.name}>)"
724
- else
725
- Canon::Comparison.code_pair_label(diff1, diff2)
726
- end
727
- end
728
-
729
- # Build a clear reason message for attribute value differences
730
- #
731
- # @param attrs1 [Hash, nil] First node's attributes
732
- # @param attrs2 [Hash, nil] Second node's attributes
733
- # @return [String] Clear explanation of the attribute value difference
734
- def build_attribute_value_diff_reason(attrs1, attrs2)
735
- return "missing vs present attributes" unless attrs1 && attrs2
736
-
737
- require "set"
738
- keys1 = attrs1.keys.to_set
739
- keys2 = attrs2.keys.to_set
740
-
741
- common = keys1 & keys2
742
- different_values = common.reject { |k| attrs1[k] == attrs2[k] }
743
-
744
- return "all attribute values match" if different_values.empty?
745
-
746
- parts = different_values.map do |k|
747
- "#{k}: #{attrs1[k].inspect} vs #{attrs2[k].inspect}"
748
- end
749
-
750
- parts.join("; ")
751
- end
752
-
753
- # Build a clear reason message for attribute presence differences
754
- #
755
- # @param attrs1 [Hash, nil] First node's attributes
756
- # @param attrs2 [Hash, nil] Second node's attributes
757
- # @return [String] Clear explanation of the attribute difference
758
- def build_attribute_diff_reason(attrs1, attrs2)
759
- return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes" unless attrs1 && attrs2
760
-
761
- require "set"
762
- keys1 = attrs1.keys.to_set
763
- keys2 = attrs2.keys.to_set
764
-
765
- only_in_first = keys1 - keys2
766
- only_in_second = keys2 - keys1
767
- common = keys1 & keys2
768
-
769
- # Check if values differ for common keys
770
- different_values = common.reject { |k| attrs1[k] == attrs2[k] }
771
-
772
- parts = []
773
- parts << "only in first: #{only_in_first.to_a.sort.join(', ')}" if only_in_first.any?
774
- parts << "only in second: #{only_in_second.to_a.sort.join(', ')}" if only_in_second.any?
775
- parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
776
-
777
- if parts.empty?
778
- "#{keys1.size} vs #{keys2.size} attributes (same names)"
779
- else
780
- parts.join("; ")
781
- end
782
- end
783
-
784
- # Extract text from a node for diff reason
785
- #
786
- # @param node [Object, nil] Node to extract text from
787
- # @return [String, nil] Text content or nil
788
- def extract_text_from_node(node)
789
- return nil if node.nil?
790
- return node.to_s if node.is_a?(String)
791
-
792
- case node
793
- when Canon::Xml::Nodes::TextNode
794
- node.value
795
- when Canon::Xml::Node
796
- node.text_content
797
- else
798
- Canon::XmlParsing.xml_node?(node) ? Canon::XmlParsing.text_content(node).to_s : node.to_s
799
- end
800
- rescue StandardError
801
- nil
802
- end
803
-
804
- # Build a clear reason message for text content differences
805
- #
806
- # @param text1 [String, nil] First text content
807
- # @param text2 [String, nil] Second text content
808
- # @return [String] Clear explanation of the text difference
809
- def build_text_diff_reason(text1, text2)
810
- # Handle nil cases
811
- return "missing vs '#{truncate_text(text2)}'" if text1.nil? && text2
812
- return "'#{truncate_text(text2)}' vs missing" if text1 && text2.nil?
813
- return "both missing" if text1.nil? && text2.nil?
814
-
815
- # Check if both are whitespace-only
816
- if whitespace_only?(text1) && whitespace_only?(text2)
817
- return "whitespace: #{describe_whitespace(text1)} vs #{describe_whitespace(text2)}"
818
- end
819
-
820
- # Show text with visible whitespace markers
821
- # Use escaped representations for clarity: \n for newline, \t for tab, · for spaces
822
- vis1 = visualize_whitespace(text1)
823
- vis2 = visualize_whitespace(text2)
824
-
825
- "Text: \"#{vis1}\" vs \"#{vis2}\""
826
- end
827
-
828
- # Build a Reason line for a +:whitespace_adjacency+ diff (#137).
829
- # Names which side carries the whitespace, the adjacency position
830
- # relative to content neighbours, and surfaces the whitespace
831
- # with visible markers.
832
- def build_whitespace_adjacency_reason(node1, node2)
833
- text1 = extract_text_from_node(node1)
834
- text2 = extract_text_from_node(node2)
835
-
836
- ni = NodeInspector
837
- ws_on_first = ni.whitespace_only_text?(node1) &&
838
- !ni.whitespace_only_text?(node2)
839
- ws_on_second = ni.whitespace_only_text?(node2) &&
840
- !ni.whitespace_only_text?(node1)
841
-
842
- if ws_on_first
843
- ws_text = text1
844
- content_text = text2
845
- present_side = "EXPECTED"
846
- absent_side = "ACTUAL"
847
- ws_node = node1
848
- elsif ws_on_second
849
- ws_text = text2
850
- content_text = text1
851
- present_side = "ACTUAL"
852
- absent_side = "EXPECTED"
853
- ws_node = node2
854
- else
855
- return build_text_diff_reason(text1, text2)
856
- end
857
-
858
- ws_vis = visualize_whitespace(ws_text)
859
-
860
- if content_text.nil? || content_text.strip.empty?
861
- # Partner content extracts to "" / whitespace-only — naming it
862
- # in the Reason ("Whitespace before \"\"") gives the reader
863
- # nothing. Fall back to the parent element name so the
864
- # diff carries structural context (issue #112's contract,
865
- # extended from :text_content to :whitespace_adjacency).
866
- parent_label = whitespace_adjacency_parent_label(ws_node)
867
- "Whitespace inside #{parent_label}: " \
868
- "present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
869
- else
870
- direction = whitespace_partner_direction(ws_node)
871
- content_vis = visualize_whitespace(truncate_text(content_text))
872
- "Whitespace #{direction} \"#{content_vis}\": " \
873
- "present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
874
- end
875
- end
876
-
877
- def whitespace_adjacency_parent_label(ws_node)
878
- parent = NodeInspector.parent(ws_node)
879
- return "(unknown parent)" unless parent
880
-
881
- name = parent.name
882
- name && !name.empty? ? "<#{name}>" : "(unknown parent)"
883
- end
884
-
885
- # Direction of the partner content relative to the whitespace node,
886
- # phrased from the partner's point of view: "before" when the
887
- # whitespace immediately precedes its next non-whitespace sibling
888
- # (the alignment partner on the other side), "after" when the
889
- # whitespace trails the previous non-whitespace sibling, or
890
- # "adjacent to" as a degenerate fallback when neither neighbour
891
- # exists.
892
- def whitespace_partner_direction(ws_node)
893
- parent = NodeInspector.parent(ws_node)
894
- return "adjacent to" unless parent
895
-
896
- siblings = parent.children
897
- idx = siblings.index(ws_node)
898
- return "adjacent to" unless idx
899
-
900
- if non_ws_sibling_exists?(siblings, idx, 1) then "before"
901
- elsif non_ws_sibling_exists?(siblings, idx, -1) then "after"
902
- else "adjacent to"
903
- end
904
- end
905
-
906
- def non_ws_sibling_exists?(siblings, idx, direction)
907
- i = idx + direction
908
- while i >= 0 && i < siblings.length
909
- s = siblings[i]
910
- is_ws_text = NodeInspector.text_node?(s) &&
911
- NodeInspector.text_content(s).strip.empty?
912
- return true unless is_ws_text
913
-
914
- i += direction
915
- end
916
- false
917
- end
918
-
919
- # Build a Reason line for a +:comments+ diff (#144).
920
- # Names the side that carries the comment and surfaces the
921
- # comment text.
922
- def build_comments_reason(node1, node2)
923
- cm1 = node1 && NodeInspector.comment_node?(node1)
924
- cm2 = node2 && NodeInspector.comment_node?(node2)
925
-
926
- if cm1 && !cm2
927
- "Comment present on EXPECTED only: <!--#{truncate_text(comment_text(node1))}-->"
928
- elsif cm2 && !cm1
929
- "Comment present on ACTUAL only: <!--#{truncate_text(comment_text(node2))}-->"
930
- elsif cm1 && cm2
931
- t1 = truncate_text(comment_text(node1))
932
- t2 = truncate_text(comment_text(node2))
933
- "Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
934
- else
935
- "element structure mismatch (children differ)"
936
- end
937
- end
938
-
939
- def comment_text(node)
940
- NodeInspector.text_content(node).to_s
941
- end
942
-
943
- # Check if text is only whitespace
944
- #
945
- # @param text [String] Text to check
946
- # @return [Boolean] true if whitespace-only
947
- def whitespace_only?(text)
948
- return false if text.nil?
949
-
950
- text.to_s.strip.empty?
951
- end
952
-
953
- # Make whitespace visible in text content
954
- # Uses the existing character visualization map from DiffFormatter (single source of truth)
955
- #
956
- # @param text [String] Text to visualize
957
- # @return [String] Text with visible whitespace markers
958
- def visualize_whitespace(text)
959
- return "" if text.nil?
960
-
961
- # Use the character map loader as the single source of truth
962
- viz_map = character_visualization_map
963
-
964
- # Replace each character with its visualization
965
- text.chars.map { |char| viz_map[char] || char }.join
966
- end
967
-
968
- # Get the character visualization map (lazy-loaded to avoid circular dependency)
969
- #
970
- # @return [Hash] Character to visualization symbol mapping
971
- def character_visualization_map
972
- @character_visualization_map ||= begin
973
- # Load the YAML file directly to avoid circular dependency
974
- require "yaml"
975
- lib_root = File.expand_path("../..", __dir__)
976
- yaml_path = File.join(lib_root,
977
- "canon/diff_formatter/character_map.yml")
978
- data = YAML.load_file(yaml_path)
979
-
980
- # Build visualization map from the YAML data
981
- visualization_map = {}
982
- data["characters"].each do |char_data|
983
- # Get the character from either unicode code point or character field
984
- char = if char_data["unicode"]
985
- # Convert hex string to character
986
- [char_data["unicode"].to_i(16)].pack("U")
987
- else
988
- # Use character field directly (handles \n, \t, etc.)
989
- char_data["character"]
990
- end
991
-
992
- vis = char_data["visualization"]
993
- visualization_map[char] = vis
994
- end
995
-
996
- visualization_map
997
- end
998
- end
999
-
1000
- # Describe whitespace content in a readable way
1001
- #
1002
- # @param text [String] Whitespace text
1003
- # @return [String] Description like "4 chars (2 newlines, 2 spaces)"
1004
- def describe_whitespace(text)
1005
- return "0 chars" if text.nil? || text.empty?
1006
-
1007
- char_count = text.length
1008
- newline_count = text.count("\n")
1009
- space_count = text.count(" ")
1010
- tab_count = text.count("\t")
1011
-
1012
- parts = []
1013
- parts << "#{newline_count} newlines" if newline_count.positive?
1014
- parts << "#{space_count} spaces" if space_count.positive?
1015
- parts << "#{tab_count} tabs" if tab_count.positive?
1016
-
1017
- description = parts.join(", ")
1018
- "#{char_count} chars (#{description})"
1019
- end
1020
-
1021
- # Truncate text for display in reason messages
1022
- #
1023
- # @param text [String] Text to truncate
1024
- # @param max_length [Integer] Maximum length
1025
- # @return [String] Truncated text
1026
- def truncate_text(text, max_length = 40)
1027
- return "" if text.nil?
1028
-
1029
- text = text.to_s
1030
- return text if text.length <= max_length
1031
-
1032
- "#{text[0...max_length]}..."
1033
- end
1034
-
1035
547
  # Compare namespace declarations (xmlns and xmlns:* attributes)
1036
548
  # Delegates to XmlComparatorHelpers::NamespaceComparator
1037
549
  def compare_namespace_declarations(n1, n2, opts, differences)
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Helper classes used by {XmlComparator}. Children are autoloaded —
6
+ # never `require_relative` them.
7
+ module XmlComparatorHelpers
8
+ autoload :AttributeComparator,
9
+ "canon/comparison/xml_comparator/attribute_comparator"
10
+ autoload :AttributeFilter,
11
+ "canon/comparison/xml_comparator/attribute_filter"
12
+ autoload :ChildComparison,
13
+ "canon/comparison/xml_comparator/child_comparison"
14
+ autoload :NamespaceComparator,
15
+ "canon/comparison/xml_comparator/namespace_comparator"
16
+ autoload :NodeParser, "canon/comparison/xml_comparator/node_parser"
17
+ autoload :NodeTypeComparator,
18
+ "canon/comparison/xml_comparator/node_type_comparator"
19
+ end
20
+ end
21
+ end