canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ # TreeDiffIntegrator provides integration between Canon's DOM diff system
6
+ # and the new semantic tree diff system.
7
+ #
8
+ # This class orchestrates:
9
+ # - Format-specific adapter selection
10
+ # - Tree conversion from parsed documents
11
+ # - Tree matching via UniversalMatcher
12
+ # - Operation detection
13
+ # - Results formatting
14
+ #
15
+ # @example XML tree diff
16
+ # integrator = TreeDiffIntegrator.new(format: :xml)
17
+ # result = integrator.diff(doc1, doc2)
18
+ # result[:operations] # => [Operation(...), ...]
19
+ #
20
+ class TreeDiffIntegrator
21
+ attr_reader :format, :adapter, :matcher, :match_options
22
+
23
+ # Initialize integrator for a specific format
24
+ #
25
+ # @param format [Symbol] Format type (:xml, :json, :html, :yaml)
26
+ # @param options [Hash] Configuration options (match options from Canon::Comparison)
27
+ # @option options [Float] :similarity_threshold Threshold for similarity matching (default: 0.95)
28
+ # @option options [Boolean] :hash_matching Enable hash matching phase (default: true)
29
+ # @option options [Boolean] :similarity_matching Enable similarity matching phase (default: true)
30
+ # @option options [Boolean] :propagation Enable propagation phase (default: true)
31
+ # @option options [Symbol] :text_content How to compare text (:strict, :normalize)
32
+ # @option options [Symbol] :attribute_order How to compare attributes (:strict, :ignore)
33
+ def initialize(format:, options: {})
34
+ @format = format
35
+ @options = options
36
+ @match_options = options # Store full match options for downstream use
37
+
38
+ # Initialize format-specific adapter WITH match options
39
+ @adapter = create_adapter(format, options)
40
+
41
+ # Initialize matcher with options
42
+ matcher_options = {
43
+ similarity_threshold: options[:similarity_threshold] || 0.95,
44
+ hash_matching: options.fetch(:hash_matching, true),
45
+ similarity_matching: options.fetch(:similarity_matching, true),
46
+ propagation: options.fetch(:propagation, true),
47
+ attribute_order: options[:attribute_order] || :ignore,
48
+ }
49
+ @matcher = Matchers::UniversalMatcher.new(matcher_options)
50
+ end
51
+
52
+ # Perform tree diff on two documents
53
+ #
54
+ # @param doc1 [Object] First document (format-specific)
55
+ # @param doc2 [Object] Second document (format-specific)
56
+ # @return [Hash] Diff results with :operations, :matching, :statistics
57
+ def diff(doc1, doc2)
58
+ # Convert documents to tree nodes
59
+ tree1 = @adapter.to_tree(doc1)
60
+ tree2 = @adapter.to_tree(doc2)
61
+
62
+ # Check node count limits
63
+ check_node_count_limit(tree1)
64
+ check_node_count_limit(tree2)
65
+
66
+ # Match trees
67
+ matching = @matcher.match(tree1, tree2)
68
+
69
+ # Detect operations with match_options for proper normalization
70
+ detector = Operations::OperationDetector.new(tree1, tree2, matching,
71
+ @match_options)
72
+ operations = detector.detect
73
+
74
+ # Return comprehensive results
75
+ {
76
+ operations: operations,
77
+ matching: matching,
78
+ statistics: @matcher.statistics,
79
+ trees: { tree1: tree1, tree2: tree2 },
80
+ }
81
+ end
82
+
83
+ # Check if two documents are semantically equivalent
84
+ #
85
+ # @param doc1 [Object] First document
86
+ # @param doc2 [Object] Second document
87
+ # @return [Boolean] true if no operations detected
88
+ def equivalent?(doc1, doc2)
89
+ result = diff(doc1, doc2)
90
+ result[:operations].empty?
91
+ end
92
+
93
+ private
94
+
95
+ # Create format-specific adapter
96
+ #
97
+ # @param format [Symbol] Format type
98
+ # @param match_options [Hash] Match options for text/attribute normalization
99
+ # @return [Object] Adapter instance
100
+ def create_adapter(format, match_options = {})
101
+ case format
102
+ when :xml
103
+ Adapters::XMLAdapter.new(match_options: match_options)
104
+ when :html, :html4, :html5
105
+ Adapters::HTMLAdapter.new(match_options: match_options)
106
+ when :json
107
+ Adapters::JSONAdapter.new(match_options: match_options)
108
+ when :yaml
109
+ Adapters::YAMLAdapter.new(match_options: match_options)
110
+ else
111
+ raise ArgumentError, "Unsupported format: #{format}. " \
112
+ "Supported formats: :xml, :html, :html4, :html5, :json, :yaml"
113
+ end
114
+ end
115
+
116
+ # Check if tree node count exceeds configured limit
117
+ #
118
+ # @param tree [TreeNode] Root node of tree
119
+ # @raise [Canon::SizeLimitExceededError] if node count exceeds limit
120
+ def check_node_count_limit(tree)
121
+ node_count = tree.size
122
+ max_count = get_max_node_count
123
+
124
+ return unless max_count&.positive?
125
+ return if node_count <= max_count
126
+
127
+ raise Canon::SizeLimitExceededError.new(:node_count, node_count,
128
+ max_count)
129
+ end
130
+
131
+ # Get max node count limit for current format
132
+ #
133
+ # @return [Integer, nil] Max node count
134
+ def get_max_node_count
135
+ # Get from options if provided, otherwise use default
136
+ @options[:max_node_count] || 10_000
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ # Tree diff module for semantic object tree diffing
6
+ end
7
+ end
8
+
9
+ # Load core components
10
+ require_relative "tree_diff/core/tree_node"
11
+ require_relative "tree_diff/core/node_signature"
12
+ require_relative "tree_diff/core/node_weight"
13
+ require_relative "tree_diff/core/matching"
14
+
15
+ # Load matchers
16
+ require_relative "tree_diff/matchers/hash_matcher"
17
+ require_relative "tree_diff/matchers/similarity_matcher"
18
+ require_relative "tree_diff/matchers/structural_propagator"
19
+ require_relative "tree_diff/matchers/universal_matcher"
20
+
21
+ # Load operations
22
+ require_relative "tree_diff/operations/operation"
23
+ require_relative "tree_diff/operations/operation_detector"
24
+ require_relative "tree_diff/operation_converter"
25
+
26
+ # Load adapters
27
+ require_relative "tree_diff/adapters/xml_adapter"
28
+ require_relative "tree_diff/adapters/json_adapter"
29
+ require_relative "tree_diff/adapters/html_adapter"
30
+ require_relative "tree_diff/adapters/yaml_adapter"
31
+
32
+ # Load integrator
33
+ require_relative "tree_diff/tree_diff_integrator"
@@ -17,7 +17,9 @@ module Canon
17
17
  # @raise [Canon::ValidationError] If JSON is malformed
18
18
  # @return [void]
19
19
  def self.validate!(input)
20
- return if input.nil? || input.strip.empty?
20
+ return if input.nil?
21
+ return if input.is_a?(Hash) || input.is_a?(Array) # Already parsed
22
+ return if input.strip.empty?
21
23
 
22
24
  JSON.parse(input)
23
25
  rescue JSON::ParserError => e
@@ -19,7 +19,9 @@ module Canon
19
19
  # @raise [Canon::ValidationError] If YAML is malformed
20
20
  # @return [void]
21
21
  def self.validate!(input)
22
- return if input.nil? || input.strip.empty?
22
+ return if input.nil?
23
+ return if input.is_a?(Hash) || input.is_a?(Array) # Already parsed
24
+ return if input.strip.empty?
23
25
 
24
26
  YAML.safe_load(input, permitted_classes: [Symbol, Date, Time])
25
27
  rescue Psych::SyntaxError => e
data/lib/canon/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
- VERSION = "0.1.6"
4
+ VERSION = "0.1.7"
5
5
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "nokogiri"
4
4
  require "set"
5
+ require_relative "../data_model"
5
6
  require_relative "nodes/root_node"
6
7
  require_relative "nodes/element_node"
7
8
  require_relative "nodes/namespace_node"
@@ -13,8 +14,11 @@ require_relative "nodes/processing_instruction_node"
13
14
  module Canon
14
15
  module Xml
15
16
  # Builds XPath data model from XML
16
- class DataModel
17
+ class DataModel < Canon::DataModel
17
18
  # Build XPath data model from XML string
19
+ #
20
+ # @param xml_string [String] XML content to parse
21
+ # @return [Nodes::RootNode] Root of the data model tree
18
22
  def self.from_xml(xml_string)
19
23
  # Parse with Nokogiri
20
24
  doc = Nokogiri::XML(xml_string) do |config|
@@ -29,24 +33,19 @@ module Canon
29
33
  build_from_nokogiri(doc)
30
34
  end
31
35
 
32
- # Build XPath data model from HTML string
33
- #
34
- # @param html_string [String] HTML content to parse
35
- # @param version [Symbol] HTML version (:html4 or :html5)
36
- # @return [Nodes::RootNode] Root of the data model tree
37
- def self.from_html(html_string, version: :html4)
38
- # Parse with Nokogiri using appropriate HTML parser
39
- doc = if version == :html5
40
- Nokogiri::HTML5.fragment(html_string)
41
- else
42
- Nokogiri::HTML4.fragment(html_string)
43
- end
44
-
45
- # HTML doesn't have strict namespace requirements like XML,
46
- # so skip the relative namespace URI check
36
+ # Alias for compatibility with base class interface
37
+ def self.parse(xml_string)
38
+ from_xml(xml_string)
39
+ end
47
40
 
48
- # Convert to XPath data model (reuse XML infrastructure)
49
- build_from_nokogiri(doc)
41
+ # Serialize XML node to string
42
+ #
43
+ # @param node [Nodes::RootNode, Nodes::ElementNode] Node to serialize
44
+ # @return [String] Serialized XML string
45
+ def self.serialize(node)
46
+ # Implementation will delegate to existing XML serialization
47
+ # This is a placeholder for the base class interface
48
+ node.to_s
50
49
  end
51
50
 
52
51
  # Check for relative namespace URIs (prohibited by C14N 1.1)
@@ -184,9 +183,7 @@ module Canon
184
183
 
185
184
  # Build attribute nodes for an element
186
185
  def self.build_attribute_nodes(nokogiri_element, element)
187
- nokogiri_element.attributes.each do |name, attr|
188
- next if name.start_with?("xmlns")
189
-
186
+ nokogiri_element.attributes.each_value do |attr|
190
187
  attr_node = Nodes::AttributeNode.new(
191
188
  name: attr.name,
192
189
  value: attr.value,
@@ -199,9 +196,11 @@ module Canon
199
196
 
200
197
  # Build text node from Nokogiri text node
201
198
  def self.build_text_node(nokogiri_text)
202
- # Skip text nodes that are only whitespace between elements
203
- # unless they have significant content
199
+ # XML text nodes: preserve all content including whitespace
200
+ # Unlike HTML, XML treats all whitespace as significant
204
201
  content = nokogiri_text.content
202
+
203
+ # Skip empty text nodes between elements (common formatting whitespace)
205
204
  return nil if content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
206
205
 
207
206
  # Nokogiri already handles CDATA conversion and entity resolution
@@ -55,19 +55,63 @@ module Canon
55
55
  # Default attributes used to identify elements
56
56
  DEFAULT_IDENTITY_ATTRS = %w[id ref name key].freeze
57
57
 
58
- # Match result for an element
59
- MatchResult = Struct.new(:status, :elem1, :elem2, :path) do
58
+ # Represents the result of matching an element across two DOM trees
59
+ #
60
+ # A MatchResult indicates whether an element was found in both trees
61
+ # (matched), only in the first tree (deleted), or only in the second
62
+ # tree (inserted).
63
+ #
64
+ # == Attributes
65
+ #
66
+ # - status: Symbol indicating match type (:matched, :deleted, :inserted)
67
+ # - elem1: Element from first tree (nil if inserted)
68
+ # - elem2: Element from second tree (nil if deleted)
69
+ # - path: Array of element names showing location in tree
70
+ # - pos1: Integer index of elem1 in its parent's children (nil if inserted)
71
+ # - pos2: Integer index of elem2 in its parent's children (nil if deleted)
72
+ #
73
+ # == Position Change Detection
74
+ #
75
+ # When status is :matched and pos1 ≠ pos2, the element has moved positions.
76
+ # This is tracked as a semantic difference via the :element_position dimension.
77
+ #
78
+ class MatchResult
79
+ attr_reader :status, :elem1, :elem2, :path, :pos1, :pos2
80
+
81
+ # @param status [Symbol] Match status (:matched, :deleted, :inserted)
82
+ # @param elem1 [Object, nil] Element from first tree
83
+ # @param elem2 [Object, nil] Element from second tree
84
+ # @param path [Array<String>] Element path in tree
85
+ # @param pos1 [Integer, nil] Position index in first tree
86
+ # @param pos2 [Integer, nil] Position index in second tree
87
+ def initialize(status:, elem1:, elem2:, path:, pos1: nil, pos2: nil)
88
+ @status = status
89
+ @elem1 = elem1
90
+ @elem2 = elem2
91
+ @path = path
92
+ @pos1 = pos1
93
+ @pos2 = pos2
94
+ end
95
+
96
+ # @return [Boolean] true if element found in both trees
60
97
  def matched?
61
98
  status == :matched
62
99
  end
63
100
 
101
+ # @return [Boolean] true if element only in second tree
64
102
  def inserted?
65
103
  status == :inserted
66
104
  end
67
105
 
106
+ # @return [Boolean] true if element only in first tree
68
107
  def deleted?
69
108
  status == :deleted
70
109
  end
110
+
111
+ # @return [Boolean] true if element moved to different position
112
+ def position_changed?
113
+ matched? && pos1 && pos2 && pos1 != pos2
114
+ end
71
115
  end
72
116
 
73
117
  def initialize(identity_attrs: DEFAULT_IDENTITY_ATTRS)
@@ -105,13 +149,32 @@ module Canon
105
149
  map1.each do |identity, elem1|
106
150
  if map2.key?(identity)
107
151
  elem2 = map2[identity]
108
- elem_path = path + [elem1.name]
109
- @matches << MatchResult.new(:matched, elem1, elem2, elem_path)
152
+
153
+ # Build path with namespace information for clarity
154
+ elem_path_with_ns = if elem1.namespace_uri && !elem1.namespace_uri.empty?
155
+ path + ["{#{elem1.namespace_uri}}#{elem1.name}"]
156
+ else
157
+ path + [elem1.name]
158
+ end
159
+
160
+ # Track positions
161
+ pos1 = elems1.index(elem1)
162
+ pos2 = elems2.index(elem2)
163
+
164
+ @matches << MatchResult.new(
165
+ status: :matched,
166
+ elem1: elem1,
167
+ elem2: elem2,
168
+ path: elem_path_with_ns,
169
+ pos1: pos1,
170
+ pos2: pos2,
171
+ )
172
+
110
173
  matched1.add(elem1)
111
174
  matched2.add(elem2)
112
175
 
113
176
  # Recursively match children
114
- match_children(elem1.children, elem2.children, elem_path)
177
+ match_children(elem1.children, elem2.children, elem_path_with_ns)
115
178
  end
116
179
  end
117
180
 
@@ -125,44 +188,89 @@ module Canon
125
188
  unmatched1.each do |elem1|
126
189
  next if matched1.include?(elem1)
127
190
 
128
- elem_path = path + [elem1.name]
129
- @matches << MatchResult.new(:deleted, elem1, nil, elem_path)
191
+ elem_path_with_ns = if elem1.namespace_uri && !elem1.namespace_uri.empty?
192
+ path + ["{#{elem1.namespace_uri}}#{elem1.name}"]
193
+ else
194
+ path + [elem1.name]
195
+ end
196
+ pos1 = elems1.index(elem1)
197
+
198
+ @matches << MatchResult.new(
199
+ status: :deleted,
200
+ elem1: elem1,
201
+ elem2: nil,
202
+ path: elem_path_with_ns,
203
+ pos1: pos1,
204
+ pos2: nil,
205
+ )
130
206
  end
131
207
 
132
208
  unmatched2.each do |elem2|
133
209
  next if matched2.include?(elem2)
134
210
 
135
- elem_path = path + [elem2.name]
136
- @matches << MatchResult.new(:inserted, nil, elem2, elem_path)
211
+ elem_path_with_ns = if elem2.namespace_uri && !elem2.namespace_uri.empty?
212
+ path + ["{#{elem2.namespace_uri}}#{elem2.name}"]
213
+ else
214
+ path + [elem2.name]
215
+ end
216
+ pos2 = elems2.index(elem2)
217
+
218
+ @matches << MatchResult.new(
219
+ status: :inserted,
220
+ elem1: nil,
221
+ elem2: elem2,
222
+ path: elem_path_with_ns,
223
+ pos1: nil,
224
+ pos2: pos2,
225
+ )
137
226
  end
138
227
  end
139
228
 
140
229
  # Match remaining elements by name and position
141
230
  def match_by_position(elems1, elems2, path, matched1, matched2)
142
- # Group by element name
143
- by_name1 = elems1.group_by(&:name)
144
- by_name2 = elems2.group_by(&:name)
145
-
146
- # For each name, match by position
147
- by_name1.each do |name, list1|
148
- next unless by_name2.key?(name)
231
+ # Group by element name AND namespace_uri
232
+ by_identity1 = elems1.group_by { |e| [e.name, e.namespace_uri] }
233
+ by_identity2 = elems2.group_by { |e| [e.name, e.namespace_uri] }
149
234
 
150
- list2 = by_name2[name]
235
+ # For each name+namespace combination, match by position
236
+ by_identity1.each do |identity, list1|
237
+ next unless by_identity2.key?(identity)
151
238
 
152
239
  # Match pairs by position
240
+ list2 = by_identity2[identity]
241
+ name = identity[0] # Extract name from [name, namespace_uri] tuple
242
+ namespace_uri = identity[1] # Extract namespace_uri
243
+
153
244
  [list1.length, list2.length].min.times do |i|
154
245
  elem1 = list1[i]
155
246
  elem2 = list2[i]
156
247
 
157
248
  next if matched1.include?(elem1) || matched2.include?(elem2)
158
249
 
159
- elem_path = path + [name]
160
- @matches << MatchResult.new(:matched, elem1, elem2, elem_path)
250
+ # Build path with namespace information for clarity
251
+ elem_path_with_ns = if namespace_uri && !namespace_uri.empty?
252
+ path + ["{#{namespace_uri}}#{name}"]
253
+ else
254
+ path + [name]
255
+ end
256
+
257
+ # Track positions in original element lists
258
+ pos1 = elems1.index(elem1)
259
+ pos2 = elems2.index(elem2)
260
+
261
+ @matches << MatchResult.new(
262
+ status: :matched,
263
+ elem1: elem1,
264
+ elem2: elem2,
265
+ path: elem_path_with_ns,
266
+ pos1: pos1,
267
+ pos2: pos2,
268
+ )
161
269
  matched1.add(elem1)
162
270
  matched2.add(elem2)
163
271
 
164
272
  # Recursively match children
165
- match_children(elem1.children, elem2.children, elem_path)
273
+ match_children(elem1.children, elem2.children, elem_path_with_ns)
166
274
  end
167
275
  end
168
276
  end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Xml
5
+ # Helper module for formatting namespace information in diff output
6
+ module NamespaceHelper
7
+ # Format a namespace URI for display in diff output
8
+ #
9
+ # @param namespace_uri [String, nil] The namespace URI to format
10
+ # @return [String] Formatted namespace string
11
+ #
12
+ # @example Empty namespace
13
+ # format_namespace(nil) #=> "ns:[{blank}]"
14
+ # format_namespace("") #=> "ns:[{blank}]"
15
+ #
16
+ # @example Populated namespace
17
+ # format_namespace("http://example.com") #=> "ns:[http://example.com]"
18
+ def self.format_namespace(namespace_uri)
19
+ if namespace_uri.nil? || namespace_uri.empty?
20
+ "ns:[{blank}]"
21
+ else
22
+ "ns:[#{namespace_uri}]"
23
+ end
24
+ end
25
+
26
+ # Determine the type of mismatch between two nodes
27
+ #
28
+ # @param node1 [Object] First node (ElementNode or AttributeNode)
29
+ # @param node2 [Object] Second node (ElementNode or AttributeNode)
30
+ # @return [Symbol] Type of mismatch (:name, :namespace, :both, :none)
31
+ def self.mismatch_type(node1, node2)
32
+ return :none unless node1 && node2
33
+
34
+ name_differs = node1.name != node2.name
35
+ namespace_differs = normalize_namespace(node1.namespace_uri) !=
36
+ normalize_namespace(node2.namespace_uri)
37
+
38
+ if name_differs && namespace_differs
39
+ :both
40
+ elsif name_differs
41
+ :name
42
+ elsif namespace_differs
43
+ :namespace
44
+ else
45
+ :none
46
+ end
47
+ end
48
+
49
+ # Generate a mismatch message for element differences
50
+ #
51
+ # @param node1 [ElementNode] First element
52
+ # @param node2 [ElementNode] Second element
53
+ # @return [String] Human-readable mismatch message
54
+ def self.element_mismatch_message(node1, node2)
55
+ type = mismatch_type(node1, node2)
56
+
57
+ case type
58
+ when :name
59
+ ns = format_namespace(node1.namespace_uri)
60
+ "mismatched element name: '#{node1.name}' vs '#{node2.name}' (#{ns})"
61
+ when :namespace
62
+ "mismatched element namespace: '#{node1.name}' " \
63
+ "(#{format_namespace(node1.namespace_uri)} vs " \
64
+ "#{format_namespace(node2.namespace_uri)})"
65
+ when :both
66
+ "mismatched element name and namespace: " \
67
+ "'#{node1.name}' (#{format_namespace(node1.namespace_uri)}) vs " \
68
+ "'#{node2.name}' (#{format_namespace(node2.namespace_uri)})"
69
+ else
70
+ "elements differ"
71
+ end
72
+ end
73
+
74
+ # Generate a mismatch message for attribute differences
75
+ #
76
+ # @param node1 [AttributeNode] First attribute
77
+ # @param node2 [AttributeNode] Second attribute
78
+ # @return [String] Human-readable mismatch message
79
+ def self.attribute_mismatch_message(node1, node2)
80
+ type = mismatch_type(node1, node2)
81
+
82
+ case type
83
+ when :name
84
+ ns = format_namespace(node1.namespace_uri)
85
+ "mismatched attribute name: '#{node1.name}' vs '#{node2.name}' (#{ns})"
86
+ when :namespace
87
+ "mismatched attribute namespace: '#{node1.name}' " \
88
+ "(#{format_namespace(node1.namespace_uri)} vs " \
89
+ "#{format_namespace(node2.namespace_uri)})"
90
+ when :both
91
+ "mismatched attribute name and namespace: " \
92
+ "'#{node1.name}' (#{format_namespace(node1.namespace_uri)}) vs " \
93
+ "'#{node2.name}' (#{format_namespace(node2.namespace_uri)})"
94
+ else
95
+ "attributes differ"
96
+ end
97
+ end
98
+
99
+ # Normalize namespace URI for comparison
100
+ #
101
+ # @param namespace_uri [String, nil] Namespace URI
102
+ # @return [String] Normalized namespace (empty string for nil)
103
+ def self.normalize_namespace(namespace_uri)
104
+ namespace_uri.to_s
105
+ end
106
+
107
+ private_class_method :normalize_namespace
108
+ end
109
+ end
110
+ end
data/lib/canon.rb CHANGED
@@ -2,6 +2,9 @@
2
2
 
3
3
  require_relative "canon/version"
4
4
  require_relative "canon/errors"
5
+ require_relative "canon/config"
6
+ require_relative "canon/data_model"
7
+ require_relative "canon/html"
5
8
  require_relative "canon/formatters/xml_formatter"
6
9
  require_relative "canon/formatters/yaml_formatter"
7
10
  require_relative "canon/formatters/json_formatter"