canon 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +25 -135
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/advanced/extending-canon.adoc +193 -0
  6. data/docs/internals/diffnode-enrichment.adoc +611 -0
  7. data/docs/internals/index.adoc +251 -0
  8. data/docs/lychee.toml +13 -6
  9. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +250 -0
  10. data/docs/understanding/architecture.adoc +749 -33
  11. data/docs/understanding/comparison-pipeline.adoc +122 -0
  12. data/false_positive_analysis.txt +0 -0
  13. data/file1.html +1 -0
  14. data/file2.html +1 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +86 -0
  27. data/lib/canon/comparison/html_comparator.rb +51 -18
  28. data/lib/canon/comparison/html_parser.rb +80 -0
  29. data/lib/canon/comparison/json_comparator.rb +12 -0
  30. data/lib/canon/comparison/json_parser.rb +19 -0
  31. data/lib/canon/comparison/markup_comparator.rb +293 -0
  32. data/lib/canon/comparison/match_options/base_resolver.rb +143 -0
  33. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  34. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  35. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  36. data/lib/canon/comparison/match_options.rb +68 -463
  37. data/lib/canon/comparison/profile_definition.rb +149 -0
  38. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  39. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  40. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  41. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  42. data/lib/canon/comparison/xml_comparator/child_comparison.rb +189 -0
  43. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  44. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  45. data/lib/canon/comparison/xml_comparator/node_parser.rb +74 -0
  46. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +95 -0
  47. data/lib/canon/comparison/xml_comparator.rb +52 -664
  48. data/lib/canon/comparison/xml_node_comparison.rb +297 -0
  49. data/lib/canon/comparison/xml_parser.rb +19 -0
  50. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  51. data/lib/canon/comparison.rb +265 -110
  52. data/lib/canon/diff/diff_node.rb +32 -2
  53. data/lib/canon/diff/node_serializer.rb +191 -0
  54. data/lib/canon/diff/path_builder.rb +143 -0
  55. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  56. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  57. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  58. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  59. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  60. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  61. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  64. data/lib/canon/diff_formatter.rb +1 -1
  65. data/lib/canon/rspec_matchers.rb +1 -1
  66. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  67. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  68. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  69. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  70. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  71. data/lib/canon/version.rb +1 -1
  72. data/old-docs/ADVANCED_TOPICS.adoc +20 -0
  73. data/old-docs/BASIC_USAGE.adoc +16 -0
  74. data/old-docs/CHARACTER_VISUALIZATION.adoc +567 -0
  75. data/old-docs/CLI.adoc +497 -0
  76. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  77. data/old-docs/DIFF_ARCHITECTURE.adoc +435 -0
  78. data/old-docs/DIFF_FORMATTING.adoc +540 -0
  79. data/old-docs/DIFF_PARAMETERS.adoc +261 -0
  80. data/old-docs/DOM_DIFF.adoc +1017 -0
  81. data/old-docs/ENV_CONFIG.adoc +876 -0
  82. data/old-docs/FORMATS.adoc +867 -0
  83. data/old-docs/INPUT_VALIDATION.adoc +477 -0
  84. data/old-docs/MATCHER_BEHAVIOR.adoc +90 -0
  85. data/old-docs/MATCH_ARCHITECTURE.adoc +463 -0
  86. data/old-docs/MATCH_OPTIONS.adoc +912 -0
  87. data/old-docs/MODES.adoc +432 -0
  88. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  89. data/old-docs/OPTIONS.adoc +1387 -0
  90. data/old-docs/PREPROCESSING.adoc +491 -0
  91. data/old-docs/README.old.adoc +2831 -0
  92. data/old-docs/RSPEC.adoc +814 -0
  93. data/old-docs/RUBY_API.adoc +485 -0
  94. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +646 -0
  95. data/old-docs/SEMANTIC_TREE_DIFF.adoc +765 -0
  96. data/old-docs/STRING_COMPARE.adoc +345 -0
  97. data/old-docs/TMP.adoc +3384 -0
  98. data/old-docs/TREE_DIFF.adoc +1080 -0
  99. data/old-docs/UNDERSTANDING_CANON.adoc +17 -0
  100. data/old-docs/VERBOSE.adoc +482 -0
  101. data/old-docs/VISUALIZATION_MAP.adoc +625 -0
  102. data/old-docs/WHITESPACE_TREATMENT.adoc +1155 -0
  103. data/scripts/analyze_current_state.rb +85 -0
  104. data/scripts/analyze_false_positives.rb +114 -0
  105. data/scripts/analyze_remaining_failures.rb +105 -0
  106. data/scripts/compare_current_failures.rb +95 -0
  107. data/scripts/compare_dom_tree_diff.rb +158 -0
  108. data/scripts/compare_failures.rb +151 -0
  109. data/scripts/debug_attribute_extraction.rb +66 -0
  110. data/scripts/debug_blocks_839.rb +115 -0
  111. data/scripts/debug_meta_matching.rb +52 -0
  112. data/scripts/debug_p_matching.rb +192 -0
  113. data/scripts/debug_signature_matching.rb +118 -0
  114. data/scripts/debug_sourcecode_124.rb +32 -0
  115. data/scripts/debug_whitespace_sensitive.rb +192 -0
  116. data/scripts/extract_false_positives.rb +138 -0
  117. data/scripts/find_actual_false_positives.rb +125 -0
  118. data/scripts/investigate_all_false_positives.rb +161 -0
  119. data/scripts/investigate_batch1.rb +127 -0
  120. data/scripts/investigate_classification.rb +150 -0
  121. data/scripts/investigate_classification_detailed.rb +190 -0
  122. data/scripts/investigate_common_failures.rb +342 -0
  123. data/scripts/investigate_false_negative.rb +80 -0
  124. data/scripts/investigate_false_positive.rb +83 -0
  125. data/scripts/investigate_false_positives.rb +227 -0
  126. data/scripts/investigate_false_positives_batch.rb +163 -0
  127. data/scripts/investigate_mixed_content.rb +125 -0
  128. data/scripts/investigate_remaining_16.rb +214 -0
  129. data/scripts/run_single_test.rb +29 -0
  130. data/scripts/test_all_false_positives.rb +95 -0
  131. data/scripts/test_attribute_details.rb +61 -0
  132. data/scripts/test_both_algorithms.rb +49 -0
  133. data/scripts/test_both_simple.rb +49 -0
  134. data/scripts/test_enhanced_semantic_output.rb +125 -0
  135. data/scripts/test_readme_examples.rb +131 -0
  136. data/scripts/test_semantic_tree_diff.rb +99 -0
  137. data/scripts/test_semantic_ux_improvements.rb +135 -0
  138. data/scripts/test_single_false_positive.rb +119 -0
  139. data/scripts/test_size_limits.rb +99 -0
  140. data/test_html_1.html +21 -0
  141. data/test_html_2.html +21 -0
  142. data/test_nokogiri.rb +33 -0
  143. data/test_normalize.rb +45 -0
  144. metadata +123 -2
@@ -0,0 +1,189 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module XmlComparatorHelpers
6
+ # Child comparison service for XML nodes
7
+ #
8
+ # Handles comparison of child nodes using both semantic matching (ElementMatcher)
9
+ # and simple positional comparison. Delegates back to the comparator for
10
+ # individual node comparisons.
11
+ #
12
+ # This module encapsulates the complex child comparison logic, making the
13
+ # main XmlComparator cleaner and more maintainable.
14
+ module ChildComparison
15
+ class << self
16
+ # Compare children of two nodes using semantic matching
17
+ #
18
+ # Uses ElementMatcher to pair children semantically (by identity attributes
19
+ # or position), then compares matched pairs and detects position changes.
20
+ #
21
+ # @param node1 [Object] First parent node
22
+ # @param node2 [Object] Second parent node
23
+ # @param comparator [XmlComparator] The comparator instance for delegation
24
+ # @param opts [Hash] Comparison options
25
+ # @param child_opts [Hash] Options for child comparison
26
+ # @param diff_children [Boolean] Whether to diff children
27
+ # @param differences [Array] Array to collect differences
28
+ # @return [Integer] Comparison result code
29
+ def compare(node1, node2, comparator, opts, child_opts, diff_children, differences)
30
+ children1 = comparator.send(:filter_children, node1.children, opts)
31
+ children2 = comparator.send(:filter_children, node2.children, opts)
32
+
33
+ # Quick check: if both have no children, they're equivalent
34
+ return Comparison::EQUIVALENT if children1.empty? && children2.empty?
35
+
36
+ # Check if we can use ElementMatcher (requires Canon::Xml::DataModel nodes)
37
+ if can_use_element_matcher?(children1, children2)
38
+ use_element_matcher_comparison(children1, children2, node1, comparator,
39
+ opts, child_opts, diff_children, differences)
40
+ else
41
+ use_positional_comparison(children1, children2, node1, comparator,
42
+ opts, child_opts, diff_children, differences)
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ # Check if ElementMatcher can be used for these children
49
+ #
50
+ # ElementMatcher expects Canon::Xml::DataModel nodes with .node_type
51
+ # method that returns symbols, and only works with element nodes.
52
+ def can_use_element_matcher?(children1, children2)
53
+ !children1.empty? && !children2.empty? &&
54
+ children1.all? { |c| c.is_a?(Canon::Xml::Node) && c.node_type == :element } &&
55
+ children2.all? { |c| c.is_a?(Canon::Xml::Node) && c.node_type == :element }
56
+ end
57
+
58
+ # Use ElementMatcher for semantic comparison
59
+ def use_element_matcher_comparison(children1, children2, parent_node, comparator,
60
+ opts, child_opts, diff_children, differences)
61
+ require_relative "../../xml/element_matcher"
62
+ require_relative "../../xml/nodes/root_node"
63
+
64
+ # Create temporary RootNode wrappers
65
+ temp_root1 = Canon::Xml::Nodes::RootNode.new
66
+ temp_root1.instance_variable_set(:@children, children1.dup)
67
+
68
+ temp_root2 = Canon::Xml::Nodes::RootNode.new
69
+ temp_root2.instance_variable_set(:@children, children2.dup)
70
+
71
+ matcher = Canon::Xml::ElementMatcher.new
72
+ matches = matcher.match_trees(temp_root1, temp_root2)
73
+
74
+ # Filter matches to only include direct children
75
+ matches = matches.select do |m|
76
+ (m.elem1.nil? || children1.include?(m.elem1)) &&
77
+ (m.elem2.nil? || children2.include?(m.elem2))
78
+ end
79
+
80
+ # If no matches and children exist, they're all different
81
+ if matches.empty? && (!children1.empty? || !children2.empty?)
82
+ comparator.send(:add_difference, parent_node, parent_node,
83
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
84
+ :text_content, opts, differences)
85
+ return Comparison::UNEQUAL_ELEMENTS
86
+ end
87
+
88
+ process_matches(matches, children1, children2, parent_node, comparator,
89
+ opts, child_opts, diff_children, differences)
90
+ end
91
+
92
+ # Process ElementMatcher results
93
+ def process_matches(matches, _children1, _children2, _parent_node, comparator,
94
+ opts, child_opts, diff_children, differences)
95
+ all_equivalent = true
96
+
97
+ matches.each do |match|
98
+ case match.status
99
+ when :matched
100
+ # Check if element position changed
101
+ if match.position_changed?
102
+ match_opts = opts[:match_opts]
103
+ position_behavior = match_opts[:element_position] || :strict
104
+
105
+ # Only create DiffNode if element_position is not :ignore
106
+ if position_behavior != :ignore
107
+ comparator.send(:add_difference, match.elem1, match.elem2,
108
+ "position #{match.pos1}", "position #{match.pos2}",
109
+ :element_position, opts, differences)
110
+ all_equivalent = false if position_behavior == :strict
111
+ end
112
+ end
113
+
114
+ # Compare the matched elements for content/attribute differences
115
+ result = comparator.send(:compare_nodes, match.elem1, match.elem2,
116
+ child_opts, child_opts, diff_children, differences)
117
+ all_equivalent = false unless result == Comparison::EQUIVALENT
118
+
119
+ when :deleted
120
+ # Element present in first tree but not second
121
+ comparator.send(:add_difference, match.elem1, nil,
122
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
123
+ :element_structure, opts, differences)
124
+ all_equivalent = false
125
+
126
+ when :inserted
127
+ # Element present in second tree but not first
128
+ comparator.send(:add_difference, nil, match.elem2,
129
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
130
+ :element_structure, opts, differences)
131
+ all_equivalent = false
132
+ end
133
+ end
134
+
135
+ all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ELEMENTS
136
+ end
137
+
138
+ # Use simple positional comparison for children
139
+ def use_positional_comparison(children1, children2, parent_node, comparator,
140
+ opts, child_opts, diff_children, differences)
141
+ # Length check
142
+ unless children1.length == children2.length
143
+ dimension = determine_dimension_for_mismatch(children1, children2, comparator)
144
+ comparator.send(:add_difference, parent_node, parent_node,
145
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
146
+ dimension, opts, differences)
147
+ return Comparison::MISSING_NODE
148
+ end
149
+
150
+ # Compare children pairwise by position
151
+ result = Comparison::EQUIVALENT
152
+ children1.zip(children2).each do |child1, child2|
153
+ child_result = comparator.send(:compare_nodes, child1, child2,
154
+ child_opts, child_opts, diff_children, differences)
155
+ result = child_result unless child_result == Comparison::EQUIVALENT
156
+ end
157
+
158
+ result
159
+ end
160
+
161
+ # Determine dimension for length mismatch
162
+ def determine_dimension_for_mismatch(children1, children2, comparator)
163
+ dimension = :text_content # default
164
+
165
+ # Compare position by position to find first difference
166
+ max_len = [children1.length, children2.length].max
167
+ (0...max_len).each do |i|
168
+ if i >= children1.length
169
+ # Extra child in children2
170
+ dimension = comparator.send(:determine_node_dimension, children2[i])
171
+ break
172
+ elsif i >= children2.length
173
+ # Extra child in children1
174
+ dimension = comparator.send(:determine_node_dimension, children1[i])
175
+ break
176
+ elsif !comparator.send(:same_node_type?, children1[i], children2[i])
177
+ # Different node types at same position
178
+ dimension = comparator.send(:determine_node_dimension, children1[i])
179
+ break
180
+ end
181
+ end
182
+
183
+ dimension
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../diff/diff_node"
4
+ require_relative "../../diff/path_builder"
5
+ require_relative "../../diff/node_serializer"
6
+
7
+ module Canon
8
+ module Comparison
9
+ # Builder for creating enriched DiffNode objects
10
+ # Handles path building, serialization, and attribute extraction
11
+ class DiffNodeBuilder
12
+ # Build an enriched DiffNode
13
+ #
14
+ # @param node1 [Object, nil] First node
15
+ # @param node2 [Object, nil] Second node
16
+ # @param diff1 [String] Difference type for node1
17
+ # @param diff2 [String] Difference type for node2
18
+ # @param dimension [Symbol] The match dimension causing this difference
19
+ # @return [DiffNode, nil] Enriched DiffNode or nil if dimension is nil
20
+ def self.build(node1:, node2:, diff1:, diff2:, dimension:, **_opts)
21
+ # Validate dimension is required
22
+ if dimension.nil?
23
+ raise ArgumentError,
24
+ "dimension required for DiffNode"
25
+ end
26
+
27
+ # Build informative reason message
28
+ reason = build_reason(node1, node2, diff1, diff2, dimension)
29
+
30
+ # Enrich with path, serialized content, and attributes for Stage 4 rendering
31
+ metadata = enrich_metadata(node1, node2)
32
+
33
+ Canon::Diff::DiffNode.new(
34
+ node1: node1,
35
+ node2: node2,
36
+ dimension: dimension,
37
+ reason: reason,
38
+ **metadata,
39
+ )
40
+ end
41
+
42
+ # Build a human-readable reason for a difference
43
+ #
44
+ # @param node1 [Object] First node
45
+ # @param node2 [Object] Second node
46
+ # @param diff1 [String] Difference type for node1
47
+ # @param diff2 [String] Difference type for node2
48
+ # @param dimension [Symbol] The dimension of the difference
49
+ # @return [String] Human-readable reason
50
+ def self.build_reason(node1, node2, diff1, diff2, dimension)
51
+ # For deleted/inserted nodes, include namespace information if available
52
+ if dimension == :text_content && (node1.nil? || node2.nil?)
53
+ node = node1 || node2
54
+ if node.respond_to?(:name) && node.respond_to?(:namespace_uri)
55
+ ns = node.namespace_uri
56
+ ns_info = if ns.nil? || ns.empty?
57
+ ""
58
+ else
59
+ " (namespace: #{ns})"
60
+ end
61
+ return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
62
+ end
63
+ end
64
+
65
+ "#{diff1} vs #{diff2}"
66
+ end
67
+
68
+ # Enrich DiffNode with canonical path, serialized content, and attributes
69
+ # This extracts presentation-ready metadata from nodes for Stage 4 rendering
70
+ #
71
+ # @param node1 [Object, nil] First node
72
+ # @param node2 [Object, nil] Second node
73
+ # @return [Hash] Enriched metadata hash
74
+ def self.enrich_metadata(node1, node2)
75
+ {
76
+ path: build_path(node1 || node2),
77
+ serialized_before: serialize(node1),
78
+ serialized_after: serialize(node2),
79
+ attributes_before: extract_attributes(node1),
80
+ attributes_after: extract_attributes(node2),
81
+ }
82
+ end
83
+
84
+ # Build canonical path for a node
85
+ #
86
+ # @param node [Object] Node to build path for
87
+ # @return [String, nil] Canonical path with ordinal indices
88
+ def self.build_path(node)
89
+ return nil if node.nil?
90
+
91
+ Canon::Diff::PathBuilder.build(node, format: :document)
92
+ end
93
+
94
+ # Serialize a node to string for display
95
+ #
96
+ # @param node [Object, nil] Node to serialize
97
+ # @return [String, nil] Serialized content
98
+ def self.serialize(node)
99
+ return nil if node.nil?
100
+
101
+ Canon::Diff::NodeSerializer.serialize(node)
102
+ end
103
+
104
+ # Extract attributes from a node as a normalized hash
105
+ #
106
+ # @param node [Object, nil] Node to extract attributes from
107
+ # @return [Hash, nil] Normalized attributes hash
108
+ def self.extract_attributes(node)
109
+ return nil if node.nil?
110
+
111
+ Canon::Diff::NodeSerializer.extract_attributes(node)
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module XmlComparatorHelpers
6
+ # Namespace declaration comparison logic
7
+ # Handles comparison of xmlns and xmlns:* attributes
8
+ class NamespaceComparator
9
+ # Compare namespace declarations between two nodes
10
+ #
11
+ # @param node1 [Object] First node
12
+ # @param node2 [Object] Second node
13
+ # @param opts [Hash] Comparison options
14
+ # @param differences [Array] Array to append differences to
15
+ # @return [Symbol] Comparison result
16
+ def self.compare(node1, node2, opts, differences)
17
+ ns_decls1 = extract_declarations(node1)
18
+ ns_decls2 = extract_declarations(node2)
19
+
20
+ # Find missing, extra, and changed namespace declarations
21
+ missing = ns_decls1.keys - ns_decls2.keys # In node1 but not node2
22
+ extra = ns_decls2.keys - ns_decls1.keys # In node2 but not node1
23
+ changed = ns_decls1.select do |prefix, uri|
24
+ ns_decls2[prefix] && ns_decls2[prefix] != uri
25
+ end.keys
26
+
27
+ # If there are any differences, create a DiffNode
28
+ if missing.any? || extra.any? || changed.any?
29
+ add_namespace_difference(node1, node2, missing, extra, changed,
30
+ opts, differences)
31
+ return Comparison::UNEQUAL_ATTRIBUTES
32
+ end
33
+
34
+ Comparison::EQUIVALENT
35
+ end
36
+
37
+ # Extract namespace declarations from a node
38
+ #
39
+ # @param node [Object] Node to extract namespace declarations from
40
+ # @return [Hash] Hash of prefix => URI mappings
41
+ def self.extract_declarations(node)
42
+ declarations = {}
43
+
44
+ # Handle Canon::Xml::Node (uses namespace_nodes)
45
+ if node.respond_to?(:namespace_nodes)
46
+ return extract_from_namespace_nodes(node.namespace_nodes,
47
+ declarations)
48
+ end
49
+
50
+ # Handle Nokogiri/Moxml nodes (use attributes)
51
+ raw_attrs = node.respond_to?(:attribute_nodes) ? node.attribute_nodes : node.attributes
52
+
53
+ # Handle Canon::Xml::Node attribute format (array of AttributeNode)
54
+ if raw_attrs.is_a?(Array)
55
+ extract_from_array_attributes(raw_attrs, declarations)
56
+ else
57
+ # Handle Nokogiri and Moxml attribute formats (Hash-like)
58
+ extract_from_hash_attributes(raw_attrs, declarations)
59
+ end
60
+
61
+ declarations
62
+ end
63
+
64
+ # Extract from Canon::Xml::Node namespace_nodes
65
+ #
66
+ # @param namespace_nodes [Array] Array of NamespaceNode objects
67
+ # @param declarations [Hash] Output hash to populate
68
+ # @return [Hash] Declarations hash
69
+ def self.extract_from_namespace_nodes(namespace_nodes, declarations)
70
+ namespace_nodes.each do |ns|
71
+ # Skip the implicit xml namespace (always present)
72
+ next if ns.prefix == "xml" && ns.uri == "http://www.w3.org/XML/1998/namespace"
73
+
74
+ prefix = ns.prefix || ""
75
+ declarations[prefix] = ns.uri
76
+ end
77
+
78
+ declarations
79
+ end
80
+
81
+ # Extract from array-format attributes
82
+ #
83
+ # @param raw_attrs [Array] Array of AttributeNode objects
84
+ # @param declarations [Hash] Output hash to populate
85
+ # @return [Hash] Declarations hash
86
+ def self.extract_from_array_attributes(raw_attrs, declarations)
87
+ raw_attrs.each do |attr|
88
+ name = attr.name
89
+ value = attr.value
90
+
91
+ if namespace_declaration?(name)
92
+ # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
93
+ prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
94
+ declarations[prefix] = value
95
+ end
96
+ end
97
+
98
+ declarations
99
+ end
100
+
101
+ # Extract from hash-format attributes
102
+ #
103
+ # @param raw_attrs [Hash] Hash-like attributes
104
+ # @param declarations [Hash] Output hash to populate
105
+ # @return [Hash] Declarations hash
106
+ def self.extract_from_hash_attributes(raw_attrs, declarations)
107
+ raw_attrs.each do |key, val|
108
+ # Normalize key and value
109
+ name = if key.is_a?(String)
110
+ # Nokogiri format: key=name (String), val=attr object
111
+ key
112
+ else
113
+ # Moxml format: key=attr object, val=nil
114
+ key.respond_to?(:name) ? key.name : key.to_s
115
+ end
116
+
117
+ if namespace_declaration?(name)
118
+ value = if val.respond_to?(:value)
119
+ val.value
120
+ else
121
+ val.to_s
122
+ end
123
+
124
+ # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
125
+ prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
126
+ declarations[prefix] = value
127
+ end
128
+ end
129
+
130
+ declarations
131
+ end
132
+
133
+ # Check if an attribute name is a namespace declaration
134
+ #
135
+ # @param attr_name [String] Attribute name
136
+ # @return [Boolean] true if it's a namespace declaration
137
+ def self.namespace_declaration?(attr_name)
138
+ attr_name == "xmlns" || attr_name.start_with?("xmlns:")
139
+ end
140
+
141
+ # Add a namespace declaration difference
142
+ #
143
+ # @param node1 [Object] First node
144
+ # @param node2 [Object] Second node
145
+ # @param missing [Array] Missing prefixes
146
+ # @param extra [Array] Extra prefixes
147
+ # @param changed [Array] Changed prefixes
148
+ # @param opts [Hash] Options
149
+ # @param differences [Array] Array to append difference to
150
+ def self.add_namespace_difference(node1, node2, missing, extra,
151
+ changed, opts, differences)
152
+ # Build a descriptive reason
153
+ reasons = []
154
+ if missing.any?
155
+ reasons << "removed: #{missing.map do |p|
156
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
157
+ end.join(', ')}"
158
+ end
159
+ if extra.any?
160
+ reasons << "added: #{extra.map do |p|
161
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
162
+ end.join(', ')}"
163
+ end
164
+ if changed.any?
165
+ reasons << "changed: #{changed.map do |p|
166
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
167
+ end.join(', ')}"
168
+ end
169
+
170
+ # Import DiffNodeBuilder to avoid circular dependency
171
+ require_relative "diff_node_builder"
172
+
173
+ diff_node = DiffNodeBuilder.build(
174
+ node1: node1,
175
+ node2: node2,
176
+ diff1: Comparison::UNEQUAL_ATTRIBUTES,
177
+ diff2: Comparison::UNEQUAL_ATTRIBUTES,
178
+ dimension: :namespace_declarations,
179
+ **opts,
180
+ )
181
+ differences << diff_node if diff_node
182
+ end
183
+ end
184
+ end
185
+ end
186
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../xml/c14n"
4
+
5
+ module Canon
6
+ module Comparison
7
+ module XmlComparatorHelpers
8
+ # Node parser with preprocessing support
9
+ # Handles conversion of strings and various node types to Canon::Xml::Node
10
+ class NodeParser
11
+ # Parse a node from string or return as-is
12
+ # Applies preprocessing transformation before parsing if specified
13
+ #
14
+ # @param node [String, Object] Node to parse
15
+ # @param preprocessing [Symbol] Preprocessing mode (:none, :normalize, :c14n, :format)
16
+ # @return [Canon::Xml::Node] Parsed node
17
+ def self.parse(node, preprocessing = :none)
18
+ # If already a Canon::Xml::Node, return as-is
19
+ return node if node.is_a?(Canon::Xml::Node)
20
+
21
+ # If it's a Nokogiri or Moxml node, convert to DataModel
22
+ unless node.is_a?(String)
23
+ return convert_from_node(node)
24
+ end
25
+
26
+ # Apply preprocessing to XML string before parsing
27
+ xml_string = apply_preprocessing(node, preprocessing)
28
+
29
+ # Use Canon::Xml::DataModel for parsing to get Canon::Xml::Node instances
30
+ Canon::Xml::DataModel.from_xml(xml_string)
31
+ end
32
+
33
+ # Apply preprocessing transformation to XML string
34
+ #
35
+ # @param xml_string [String] XML string to preprocess
36
+ # @param preprocessing [Symbol] Preprocessing mode
37
+ # @return [String] Preprocessed XML string
38
+ def self.apply_preprocessing(xml_string, preprocessing)
39
+ case preprocessing
40
+ when :normalize
41
+ # Normalize whitespace: collapse runs, trim lines
42
+ xml_string.lines.map(&:strip).reject(&:empty?).join("\n")
43
+ when :c14n
44
+ # Canonicalize the XML
45
+ Canon::Xml::C14n.canonicalize(xml_string, with_comments: false)
46
+ when :format
47
+ # Pretty format the XML
48
+ Canon.format(xml_string, :xml)
49
+ else
50
+ # :none or unrecognized - use as-is
51
+ xml_string
52
+ end
53
+ end
54
+
55
+ # Convert from Nokogiri/Moxml node to Canon::Xml::Node
56
+ #
57
+ # @param node [Object] Nokogiri or Moxml node
58
+ # @return [Canon::Xml::Node] Converted node
59
+ def self.convert_from_node(node)
60
+ # Convert to XML string then parse through DataModel
61
+ xml_str = if node.respond_to?(:to_xml)
62
+ node.to_xml
63
+ elsif node.respond_to?(:to_s)
64
+ node.to_s
65
+ else
66
+ raise Canon::Error,
67
+ "Unable to convert node to string: #{node.class}"
68
+ end
69
+ Canon::Xml::DataModel.from_xml(xml_str)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module XmlComparatorHelpers
6
+ # Node type comparison strategy for XML nodes
7
+ #
8
+ # Handles dispatching comparison logic based on node type.
9
+ # Supports both Canon::Xml::Node (with symbolic node_type) and
10
+ # Moxml/Nokogiri nodes (with predicate methods like element?, text?, etc.)
11
+ #
12
+ # This module encapsulates the complex node type detection and dispatch
13
+ # logic, making the main XmlComparator cleaner and more maintainable.
14
+ module NodeTypeComparator
15
+ class << self
16
+ # Compare two nodes by dispatching to appropriate comparison method
17
+ #
18
+ # @param node1 [Object] First node
19
+ # @param node2 [Object] Second node
20
+ # @param comparator [XmlComparator] The comparator instance for method delegation
21
+ # @param opts [Hash] Comparison options
22
+ # @param child_opts [Hash] Options for child comparison
23
+ # @param diff_children [Boolean] Whether to diff children
24
+ # @param differences [Array] Array to collect differences
25
+ # @return [Integer] Comparison result code
26
+ def compare(node1, node2, comparator, opts, child_opts, diff_children, differences)
27
+ # Dispatch based on node type
28
+ # Canon::Xml::Node types use .node_type method that returns symbols
29
+ # Nokogiri also has .node_type but returns integers, so check for Symbol
30
+ if node1.respond_to?(:node_type) && node2.respond_to?(:node_type) &&
31
+ node1.node_type.is_a?(Symbol) && node2.node_type.is_a?(Symbol)
32
+ compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
33
+ diff_children, differences)
34
+ # Moxml/Nokogiri types use .element?, .text?, etc. methods
35
+ else
36
+ compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
37
+ diff_children, differences)
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ # Compare nodes using symbolic node_type (Canon::Xml::Node)
44
+ def compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
45
+ diff_children, differences)
46
+ case node1.node_type
47
+ when :root
48
+ comparator.send(:compare_children, node1, node2, opts, child_opts,
49
+ diff_children, differences)
50
+ when :element
51
+ comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
52
+ diff_children, differences)
53
+ when :text
54
+ comparator.send(:compare_text_nodes, node1, node2, opts, differences)
55
+ when :comment
56
+ comparator.send(:compare_comment_nodes, node1, node2, opts, differences)
57
+ when :cdata
58
+ comparator.send(:compare_text_nodes, node1, node2, opts, differences)
59
+ when :processing_instruction
60
+ comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
61
+ differences)
62
+ else
63
+ Comparison::EQUIVALENT
64
+ end
65
+ end
66
+
67
+ # Compare nodes using predicate methods (Moxml/Nokogiri)
68
+ def compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
69
+ diff_children, differences)
70
+ if node1.respond_to?(:element?) && node1.element?
71
+ comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
72
+ diff_children, differences)
73
+ elsif node1.respond_to?(:text?) && node1.text?
74
+ comparator.send(:compare_text_nodes, node1, node2, opts, differences)
75
+ elsif node1.respond_to?(:comment?) && node1.comment?
76
+ comparator.send(:compare_comment_nodes, node1, node2, opts, differences)
77
+ elsif node1.respond_to?(:cdata?) && node1.cdata?
78
+ comparator.send(:compare_text_nodes, node1, node2, opts, differences)
79
+ elsif node1.respond_to?(:processing_instruction?) &&
80
+ node1.processing_instruction?
81
+ comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
82
+ differences)
83
+ elsif node1.respond_to?(:root)
84
+ # Document node (Moxml/Nokogiri - legacy path)
85
+ comparator.send(:compare_document_nodes, node1, node2, opts, child_opts,
86
+ diff_children, differences)
87
+ else
88
+ Comparison::EQUIVALENT
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end