canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -5,6 +5,8 @@ require_relative "match_options"
5
5
  require_relative "../diff/diff_node"
6
6
  require_relative "../diff/diff_classifier"
7
7
  require_relative "comparison_result"
8
+ require_relative "../tree_diff"
9
+ require_relative "strategies/match_strategy_factory"
8
10
 
9
11
  module Canon
10
12
  module Comparison
@@ -67,6 +69,11 @@ module Canon
67
69
  # Store resolved match options hash for use in comparison logic
68
70
  opts[:match_opts] = match_opts_hash
69
71
 
72
+ # Use tree diff if semantic_diff option is enabled
73
+ if match_opts.semantic_diff?
74
+ return perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
75
+ end
76
+
70
77
  # Create child_opts with resolved options
71
78
  child_opts = opts.merge(child_opts)
72
79
 
@@ -74,6 +81,18 @@ module Canon
74
81
  node1 = parse_node(n1, match_opts_hash[:preprocessing])
75
82
  node2 = parse_node(n2, match_opts_hash[:preprocessing])
76
83
 
84
+ # Store original strings for line diff display (before preprocessing)
85
+ original1 = if n1.is_a?(String)
86
+ n1
87
+ else
88
+ (n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
89
+ end
90
+ original2 = if n2.is_a?(String)
91
+ n2
92
+ else
93
+ (n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
94
+ end
95
+
77
96
  differences = []
78
97
  diff_children = opts[:diff_children] || false
79
98
 
@@ -89,33 +108,110 @@ module Canon
89
108
  end
90
109
 
91
110
  if opts[:verbose]
92
- # Return ComparisonResult for proper equivalence checking
93
- # Format XMLfor line-by-line display by adding line breaks between elements
94
- xml1 = node1.respond_to?(:to_xml) ? node1.to_xml : node1.to_s
95
- xml2 = node2.respond_to?(:to_xml) ? node2.to_xml : node2.to_s
96
-
111
+ # Serialize parsed nodes for consistent formatting
112
+ # This ensures both sides formatted identically, showing only real differences
97
113
  preprocessed = [
98
- xml1.gsub(/></, ">\n<"),
99
- xml2.gsub(/></, ">\n<"),
114
+ serialize_node_to_xml(node1).gsub(/></, ">\n<"),
115
+ serialize_node_to_xml(node2).gsub(/></, ">\n<"),
100
116
  ]
101
117
 
102
118
  ComparisonResult.new(
103
119
  differences: differences,
104
120
  preprocessed_strings: preprocessed,
121
+ original_strings: [original1, original2],
105
122
  format: :xml,
106
123
  match_options: match_opts_hash,
124
+ algorithm: :dom,
107
125
  )
126
+ elsif result != Comparison::EQUIVALENT && !differences.empty?
127
+ # Non-verbose mode: check equivalence
128
+ # If comparison found differences, classify them to determine if normative
129
+ classifier = Canon::Diff::DiffClassifier.new(match_opts)
130
+ classifier.classify_all(differences.select do |d|
131
+ d.is_a?(Canon::Diff::DiffNode)
132
+ end)
133
+ # Equivalent if no normative differences (matches semantic algorithm)
134
+ differences.none?(&:normative?)
108
135
  else
136
+ # Either equivalent or no differences tracked
109
137
  result == Comparison::EQUIVALENT
110
138
  end
111
139
  end
112
140
 
113
141
  private
114
142
 
143
+ # Perform semantic tree diff using SemanticTreeMatchStrategy
144
+ #
145
+ # @param n1 [String, Moxml::Node] First node
146
+ # @param n2 [String, Moxml::Node] Second node
147
+ # @param opts [Hash] Comparison options
148
+ # @param match_opts_hash [Hash] Resolved match options
149
+ # @return [Boolean, ComparisonResult] Result of tree diff comparison
150
+ def perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
151
+ # Store original strings for line diff display (before preprocessing)
152
+ original1 = if n1.is_a?(String)
153
+ n1
154
+ else
155
+ (n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
156
+ end
157
+ original2 = if n2.is_a?(String)
158
+ n2
159
+ else
160
+ (n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
161
+ end
162
+
163
+ # Parse to Canon::Xml::Node (preserves preprocessing)
164
+ node1 = parse_node(n1, match_opts_hash[:preprocessing])
165
+ node2 = parse_node(n2, match_opts_hash[:preprocessing])
166
+
167
+ # Create strategy using factory
168
+ strategy = Strategies::MatchStrategyFactory.create(
169
+ format: :xml,
170
+ match_options: match_opts_hash,
171
+ )
172
+
173
+ # Pass Canon::Xml::Node directly - XML adapter now handles it
174
+ differences = strategy.match(node1, node2)
175
+
176
+ # Return based on verbose mode
177
+ if opts[:verbose]
178
+ # Get preprocessed strings for display
179
+ preprocessed = strategy.preprocess_for_display(node1, node2)
180
+
181
+ # Return ComparisonResult with strategy metadata
182
+ ComparisonResult.new(
183
+ differences: differences,
184
+ preprocessed_strings: preprocessed,
185
+ original_strings: [original1, original2],
186
+ format: :xml,
187
+ match_options: match_opts_hash.merge(strategy.metadata),
188
+ algorithm: :semantic,
189
+ )
190
+ else
191
+ # Simple boolean result - equivalent if no normative differences
192
+ differences.none?(&:normative?)
193
+ end
194
+ end
195
+
115
196
  # Parse a node from string or return as-is
116
197
  # Applies preprocessing transformation before parsing if specified
117
198
  def parse_node(node, preprocessing = :none)
118
- return node unless node.is_a?(String)
199
+ # If already a Canon::Xml::Node, return as-is
200
+ return node if node.is_a?(Canon::Xml::Node)
201
+
202
+ # If it's a Nokogiri or Moxml node, convert to DataModel
203
+ unless node.is_a?(String)
204
+ # Convert to XML string then parse through DataModel
205
+ xml_str = if node.respond_to?(:to_xml)
206
+ node.to_xml
207
+ elsif node.respond_to?(:to_s)
208
+ node.to_s
209
+ else
210
+ raise Canon::Error,
211
+ "Unable to convert node to string: #{node.class}"
212
+ end
213
+ return Canon::Xml::DataModel.from_xml(xml_str)
214
+ end
119
215
 
120
216
  # Apply preprocessing to XML string before parsing
121
217
  xml_string = case preprocessing
@@ -134,8 +230,8 @@ module Canon
134
230
  node
135
231
  end
136
232
 
137
- # Use Moxml for XML parsing
138
- Moxml.new.parse(xml_string)
233
+ # Use Canon::Xml::DataModel for parsing to get Canon::Xml::Node instances
234
+ Canon::Xml::DataModel.from_xml(xml_string)
139
235
  end
140
236
 
141
237
  # Main comparison dispatcher
@@ -159,10 +255,7 @@ module Canon
159
255
  children1.zip(children2).each do |child1, child2|
160
256
  child_result = compare_nodes(child1, child2, opts, child_opts,
161
257
  diff_children, differences)
162
- if child_result != Comparison::EQUIVALENT
163
- result = child_result
164
- break
165
- end
258
+ result = child_result unless child_result == Comparison::EQUIVALENT
166
259
  end
167
260
  return result
168
261
  end
@@ -187,7 +280,30 @@ module Canon
187
280
  end
188
281
 
189
282
  # Dispatch based on node type
190
- if n1.respond_to?(:element?) && n1.element?
283
+ # Canon::Xml::Node types use .node_type method that returns symbols
284
+ # Nokogiri also has .node_type but returns integers, so check for Symbol
285
+ if n1.respond_to?(:node_type) && n2.respond_to?(:node_type) &&
286
+ n1.node_type.is_a?(Symbol) && n2.node_type.is_a?(Symbol)
287
+ case n1.node_type
288
+ when :root
289
+ compare_children(n1, n2, opts, child_opts, diff_children,
290
+ differences)
291
+ when :element
292
+ compare_element_nodes(n1, n2, opts, child_opts, diff_children,
293
+ differences)
294
+ when :text
295
+ compare_text_nodes(n1, n2, opts, differences)
296
+ when :comment
297
+ compare_comment_nodes(n1, n2, opts, differences)
298
+ when :cdata
299
+ compare_text_nodes(n1, n2, opts, differences)
300
+ when :processing_instruction
301
+ compare_processing_instruction_nodes(n1, n2, opts, differences)
302
+ else
303
+ Comparison::EQUIVALENT
304
+ end
305
+ # Moxml/Nokogiri types use .element?, .text?, etc. methods
306
+ elsif n1.respond_to?(:element?) && n1.element?
191
307
  compare_element_nodes(n1, n2, opts, child_opts, diff_children,
192
308
  differences)
193
309
  elsif n1.respond_to?(:text?) && n1.text?
@@ -200,7 +316,7 @@ module Canon
200
316
  n1.processing_instruction?
201
317
  compare_processing_instruction_nodes(n1, n2, opts, differences)
202
318
  elsif n1.respond_to?(:root)
203
- # Document node
319
+ # Document node (Moxml/Nokogiri - legacy path)
204
320
  compare_document_nodes(n1, n2, opts, child_opts, diff_children,
205
321
  differences)
206
322
  else
@@ -214,11 +330,34 @@ module Canon
214
330
  # Compare element names
215
331
  unless n1.name == n2.name
216
332
  add_difference(n1, n2, Comparison::UNEQUAL_ELEMENTS,
217
- Comparison::UNEQUAL_ELEMENTS, :text_content, opts,
333
+ Comparison::UNEQUAL_ELEMENTS, :element_structure, opts,
218
334
  differences)
219
335
  return Comparison::UNEQUAL_ELEMENTS
220
336
  end
221
337
 
338
+ # Compare namespace URIs - elements with different namespaces are different elements
339
+ ns1 = n1.respond_to?(:namespace_uri) ? n1.namespace_uri : nil
340
+ ns2 = n2.respond_to?(:namespace_uri) ? n2.namespace_uri : nil
341
+
342
+ unless ns1 == ns2
343
+ # Create descriptive reason showing the actual namespace URIs
344
+ ns1_display = ns1.nil? || ns1.empty? ? "(no namespace)" : ns1
345
+ ns2_display = ns2.nil? || ns2.empty? ? "(no namespace)" : ns2
346
+
347
+ diff_node = Canon::Diff::DiffNode.new(
348
+ node1: n1,
349
+ node2: n2,
350
+ dimension: :namespace_uri,
351
+ reason: "namespace '#{ns1_display}' vs '#{ns2_display}' on element '#{n1.name}'",
352
+ )
353
+ differences << diff_node if opts[:verbose]
354
+ return Comparison::UNEQUAL_ELEMENTS
355
+ end
356
+
357
+ # Compare namespace declarations (xmlns and xmlns:* attributes)
358
+ ns_result = compare_namespace_declarations(n1, n2, opts, differences)
359
+ return ns_result unless ns_result == Comparison::EQUIVALENT
360
+
222
361
  # Compare attributes
223
362
  attr_result = compare_attribute_sets(n1, n2, opts, differences)
224
363
  return attr_result unless attr_result == Comparison::EQUIVALENT
@@ -231,20 +370,67 @@ module Canon
231
370
 
232
371
  # Compare attribute sets
233
372
  def compare_attribute_sets(n1, n2, opts, differences)
234
- attrs1 = filter_attributes(n1.attributes, opts)
235
- attrs2 = filter_attributes(n2.attributes, opts)
373
+ # Get attributes using the appropriate method for each node type
374
+ raw_attrs1 = n1.respond_to?(:attribute_nodes) ? n1.attribute_nodes : n1.attributes
375
+ raw_attrs2 = n2.respond_to?(:attribute_nodes) ? n2.attribute_nodes : n2.attributes
236
376
 
237
- # Always sort attributes since attribute order doesn't matter in XML/HTML
238
- attrs1 = attrs1.sort_by { |k, _v| k.to_s }.to_h
239
- attrs2 = attrs2.sort_by { |k, _v| k.to_s }.to_h
377
+ attrs1 = filter_attributes(raw_attrs1, opts)
378
+ attrs2 = filter_attributes(raw_attrs2, opts)
240
379
 
241
- unless attrs1.keys.map(&:to_s).sort == attrs2.keys.map(&:to_s).sort
242
- add_difference(n1, n2, Comparison::MISSING_ATTRIBUTE,
243
- Comparison::MISSING_ATTRIBUTE,
244
- :attribute_presence, opts, differences)
245
- return Comparison::MISSING_ATTRIBUTE
246
- end
380
+ match_opts = opts[:match_opts]
381
+ attribute_order_behavior = match_opts[:attribute_order] || :strict
382
+
383
+ # Check attribute order if not ignored
384
+ keys1 = attrs1.keys.map(&:to_s)
385
+ keys2 = attrs2.keys.map(&:to_s)
386
+ if attribute_order_behavior == :strict
387
+ # Strict mode: attribute order matters
388
+ # Check if keys are in same order
389
+
390
+ if keys1 != keys2
391
+ # Keys are different or in different order
392
+ # First check if it's just ordering (same keys, different order)
393
+ if keys1.sort == keys2.sort
394
+ # Same keys, different order - this is an attribute_order difference
395
+ add_difference(n1, n2, Comparison::UNEQUAL_ATTRIBUTES,
396
+ Comparison::UNEQUAL_ATTRIBUTES,
397
+ :attribute_order, opts, differences)
398
+ return Comparison::UNEQUAL_ATTRIBUTES
399
+ else
400
+ # Different keys - this is attribute_presence difference
401
+ add_difference(n1, n2, Comparison::MISSING_ATTRIBUTE,
402
+ Comparison::MISSING_ATTRIBUTE,
403
+ :attribute_presence, opts, differences)
404
+ return Comparison::MISSING_ATTRIBUTE
405
+ end
406
+ end
407
+
408
+ # Order matches, now check values in order
409
+ else
410
+ # Ignore/normalize mode: attribute order doesn't affect equivalence
411
+ # But in verbose mode, we should still track order differences as informative
412
+
413
+ # Check if order differs (but keys are the same)
414
+ if keys1 != keys2 && keys1.sort == keys2.sort && opts[:verbose]
415
+ # Same keys, different order - create informative DiffNode
416
+ # This allows line diffs to be properly classified as informative
417
+ add_difference(n1, n2, Comparison::UNEQUAL_ATTRIBUTES,
418
+ Comparison::UNEQUAL_ATTRIBUTES,
419
+ :attribute_order, opts, differences)
420
+ end
421
+
422
+ # Sort attributes so order doesn't matter for comparison
423
+ attrs1 = attrs1.sort_by { |k, _v| k.to_s }.to_h
424
+ attrs2 = attrs2.sort_by { |k, _v| k.to_s }.to_h
247
425
 
426
+ unless attrs1.keys.map(&:to_s).sort == attrs2.keys.map(&:to_s).sort
427
+ add_difference(n1, n2, Comparison::MISSING_ATTRIBUTE,
428
+ Comparison::MISSING_ATTRIBUTE,
429
+ :attribute_presence, opts, differences)
430
+ return Comparison::MISSING_ATTRIBUTE
431
+ end
432
+
433
+ end
248
434
  attrs1.each do |name, value|
249
435
  unless attrs2[name] == value
250
436
  add_difference(n1, n2, Comparison::UNEQUAL_ATTRIBUTES,
@@ -262,32 +448,57 @@ module Canon
262
448
  filtered = {}
263
449
  match_opts = opts[:match_opts]
264
450
 
265
- attributes.each do |key, val|
266
- # Handle both Nokogiri and Moxml attribute formats:
451
+ # Handle Canon::Xml::Node attribute format (array of AttributeNode)
452
+ if attributes.is_a?(Array)
453
+ attributes.each do |attr|
454
+ name = attr.name
455
+ value = attr.value
456
+
457
+ # Skip namespace declarations - they're handled separately
458
+ next if is_namespace_declaration?(name)
459
+
460
+ # Skip if attribute name should be ignored
461
+ next if should_ignore_attr_by_name?(name, opts)
462
+
463
+ # Skip if attribute content should be ignored
464
+ next if should_ignore_attr_content?(value, opts)
465
+
466
+ # Apply match options for attribute values
467
+ behavior = match_opts[:attribute_values] || :strict
468
+ value = MatchOptions.process_attribute_value(value, behavior)
469
+
470
+ filtered[name] = value
471
+ end
472
+ else
473
+ # Handle Nokogiri and Moxml attribute formats (Hash-like):
267
474
  # - Nokogiri: key is String name, val is Nokogiri::XML::Attr object
268
475
  # - Moxml: key is Moxml::Attribute object, val is nil
476
+ attributes.each do |key, val|
477
+ if key.is_a?(String)
478
+ # Nokogiri format: key=name (String), val=attr object
479
+ name = key
480
+ value = val.respond_to?(:value) ? val.value : val.to_s
481
+ else
482
+ # Moxml format: key=attr object, val=nil
483
+ name = key.respond_to?(:name) ? key.name : key.to_s
484
+ value = key.respond_to?(:value) ? key.value : key.to_s
485
+ end
269
486
 
270
- if key.is_a?(String)
271
- # Nokogiri format: key=name (String), val=attr object
272
- name = key
273
- value = val.respond_to?(:value) ? val.value : val.to_s
274
- else
275
- # Moxml format: key=attr object, val=nil
276
- name = key.respond_to?(:name) ? key.name : key.to_s
277
- value = key.respond_to?(:value) ? key.value : key.to_s
278
- end
487
+ # Skip namespace declarations - they're handled separately
488
+ next if is_namespace_declaration?(name)
279
489
 
280
- # Skip if attribute name should be ignored
281
- next if should_ignore_attr_by_name?(name, opts)
490
+ # Skip if attribute name should be ignored
491
+ next if should_ignore_attr_by_name?(name, opts)
282
492
 
283
- # Skip if attribute content should be ignored
284
- next if should_ignore_attr_content?(value, opts)
493
+ # Skip if attribute content should be ignored
494
+ next if should_ignore_attr_content?(value, opts)
285
495
 
286
- # Apply match options for attribute values
287
- behavior = match_opts[:attribute_values] || :strict
288
- value = MatchOptions.process_attribute_value(value, behavior)
496
+ # Apply match options for attribute values
497
+ behavior = match_opts[:attribute_values] || :strict
498
+ value = MatchOptions.process_attribute_value(value, behavior)
289
499
 
290
- filtered[name] = value
500
+ filtered[name] = value
501
+ end
291
502
  end
292
503
 
293
504
  filtered
@@ -324,26 +535,35 @@ module Canon
324
535
  behavior = :strict
325
536
  end
326
537
 
327
- if MatchOptions.match_text?(text1, text2, behavior)
328
- Comparison::EQUIVALENT
329
- else
330
- # Determine the correct dimension for this difference
331
- # - If text_content is :strict, ALL differences use :text_content dimension
332
- # - If text_content is :normalize, whitespace-only diffs use :structural_whitespace
333
- # - Otherwise use :text_content
334
- dimension = if behavior == :normalize && whitespace_only_difference?(
335
- text1, text2
336
- )
337
- :structural_whitespace
338
- else
339
- :text_content
340
- end
538
+ # Check if raw content differs
539
+ raw_differs = text1 != text2
540
+
541
+ # Check if matches according to behavior
542
+ matches_per_behavior = MatchOptions.match_text?(text1, text2,
543
+ behavior)
341
544
 
545
+ # Determine the correct dimension for this difference
546
+ # - If text_content is :strict, ALL differences use :text_content dimension
547
+ # - If text_content is :normalize, whitespace-only diffs use :structural_whitespace
548
+ # - Otherwise use :text_content
549
+ dimension = if behavior == :normalize && whitespace_only_difference?(
550
+ text1, text2
551
+ )
552
+ :structural_whitespace
553
+ else
554
+ :text_content
555
+ end
556
+
557
+ # Create DiffNode in verbose mode when raw content differs
558
+ # This ensures informative diffs are created even for :ignore/:normalize
559
+ if raw_differs && opts[:verbose]
342
560
  add_difference(n1, n2, Comparison::UNEQUAL_TEXT_CONTENTS,
343
561
  Comparison::UNEQUAL_TEXT_CONTENTS, dimension,
344
562
  opts, differences)
345
- Comparison::UNEQUAL_TEXT_CONTENTS
346
563
  end
564
+
565
+ # Return based on whether behavior makes difference acceptable
566
+ matches_per_behavior ? Comparison::EQUIVALENT : Comparison::UNEQUAL_TEXT_CONTENTS
347
567
  end
348
568
 
349
569
  # Check if the difference between two texts is only whitespace-related
@@ -396,18 +616,25 @@ module Canon
396
616
  match_opts = opts[:match_opts]
397
617
  behavior = match_opts[:comments]
398
618
 
399
- # If comments are ignored, consider them equivalent
400
- return Comparison::EQUIVALENT if behavior == :ignore
619
+ # Canon::Xml::Node CommentNode uses .value, Nokogiri uses .content
620
+ content1 = node_text(n1)
621
+ content2 = node_text(n2)
401
622
 
402
- content1 = n1.content.to_s
403
- content2 = n2.content.to_s
623
+ # Check if content differs
624
+ contents_differ = content1 != content2
404
625
 
405
- if MatchOptions.match_text?(content1, content2, behavior)
406
- Comparison::EQUIVALENT
407
- else
626
+ # Create DiffNode in verbose mode when content differs
627
+ # This ensures informative diffs are created even for :ignore behavior
628
+ if contents_differ && opts[:verbose]
408
629
  add_difference(n1, n2, Comparison::UNEQUAL_COMMENTS,
409
630
  Comparison::UNEQUAL_COMMENTS, :comments, opts,
410
631
  differences)
632
+ end
633
+
634
+ # Return based on behavior and whether content matches
635
+ if behavior == :ignore || !contents_differ
636
+ Comparison::EQUIVALENT
637
+ else
411
638
  Comparison::UNEQUAL_COMMENTS
412
639
  end
413
640
  end
@@ -451,25 +678,155 @@ module Canon
451
678
  differences)
452
679
  end
453
680
 
454
- # Compare children of two nodes
681
+ # Compare children of two nodes using semantic matching
682
+ #
683
+ # Uses ElementMatcher to pair children semantically (by identity attributes
684
+ # or position), then compares matched pairs and detects position changes.
455
685
  def compare_children(n1, n2, opts, child_opts, diff_children,
456
686
  differences)
457
687
  children1 = filter_children(n1.children, opts)
458
688
  children2 = filter_children(n2.children, opts)
459
689
 
460
- unless children1.length == children2.length
461
- add_difference(n1, n2, Comparison::MISSING_NODE,
462
- Comparison::MISSING_NODE, :text_content, opts, differences)
463
- return Comparison::MISSING_NODE
690
+ # Quick check: if both have no children, they're equivalent
691
+ return Comparison::EQUIVALENT if children1.empty? && children2.empty?
692
+
693
+ # Check if we can use ElementMatcher (requires Canon::Xml::DataModel nodes)
694
+ # ElementMatcher expects nodes with .node_type method that returns symbols
695
+ # and only works with element nodes (filters out text, comment, etc.)
696
+ can_use_matcher = children1.all? do |c|
697
+ c.is_a?(Canon::Xml::Node) && c.node_type == :element
698
+ end &&
699
+ children2.all? { |c| c.is_a?(Canon::Xml::Node) && c.node_type == :element }
700
+
701
+ if can_use_matcher && !children1.empty? && !children2.empty?
702
+ # Use ElementMatcher for semantic matching with position tracking
703
+ use_element_matcher_comparison(children1, children2, n1, opts,
704
+ child_opts, diff_children, differences)
705
+ else
706
+ # Fall back to simple positional comparison for Moxml/Nokogiri nodes
707
+ # Length check
708
+ unless children1.length == children2.length
709
+ # Determine dimension based on type of first differing child
710
+ # When lengths differ, find which child is missing/extra
711
+ dimension = :text_content # default
712
+
713
+ # Compare position by position to find first difference
714
+ max_len = [children1.length, children2.length].max
715
+ (0...max_len).each do |i|
716
+ if i >= children1.length
717
+ # Extra child in children2
718
+ dimension = determine_node_dimension(children2[i])
719
+ break
720
+ elsif i >= children2.length
721
+ # Extra child in children1
722
+ dimension = determine_node_dimension(children1[i])
723
+ break
724
+ elsif !same_node_type?(children1[i], children2[i])
725
+ # Different node types at same position
726
+ dimension = determine_node_dimension(children1[i])
727
+ break
728
+ end
729
+ end
730
+
731
+ add_difference(n1, n2, Comparison::MISSING_NODE,
732
+ Comparison::MISSING_NODE, dimension, opts,
733
+ differences)
734
+ return Comparison::MISSING_NODE
735
+ end
736
+
737
+ # Compare children pairwise by position
738
+ result = Comparison::EQUIVALENT
739
+ children1.zip(children2).each do |child1, child2|
740
+ child_result = compare_nodes(child1, child2, child_opts, child_opts,
741
+ diff_children, differences)
742
+ result = child_result unless child_result == Comparison::EQUIVALENT
743
+ end
744
+
745
+ result
464
746
  end
747
+ end
748
+
749
+ # Use ElementMatcher for semantic comparison (Canon::Xml::DataModel nodes)
750
+ def use_element_matcher_comparison(children1, children2, parent_node,
751
+ opts, child_opts, diff_children,
752
+ differences)
753
+ require_relative "../xml/element_matcher"
465
754
 
466
- children1.zip(children2).each do |child1, child2|
467
- result = compare_nodes(child1, child2, child_opts, child_opts,
468
- diff_children, differences)
469
- return result unless result == Comparison::EQUIVALENT
755
+ # Create temporary RootNode wrappers to use ElementMatcher
756
+ # Don't modify parent pointers - just set @children directly
757
+ require_relative "../xml/nodes/root_node"
758
+
759
+ temp_root1 = Canon::Xml::Nodes::RootNode.new
760
+ temp_root1.instance_variable_set(:@children, children1.dup)
761
+
762
+ temp_root2 = Canon::Xml::Nodes::RootNode.new
763
+ temp_root2.instance_variable_set(:@children, children2.dup)
764
+
765
+ matcher = Canon::Xml::ElementMatcher.new
766
+ matches = matcher.match_trees(temp_root1, temp_root2)
767
+
768
+ # Filter matches to only include direct children
769
+ # match_trees returns ALL descendants, but we only want direct children
770
+ matches = matches.select do |m|
771
+ (m.elem1.nil? || children1.include?(m.elem1)) &&
772
+ (m.elem2.nil? || children2.include?(m.elem2))
470
773
  end
471
774
 
472
- Comparison::EQUIVALENT
775
+ # If no matches and children exist, they're all different
776
+ if matches.empty? && (!children1.empty? || !children2.empty?)
777
+ add_difference(parent_node, parent_node, Comparison::MISSING_NODE,
778
+ Comparison::MISSING_NODE, :text_content, opts,
779
+ differences)
780
+ return Comparison::UNEQUAL_ELEMENTS
781
+ end
782
+
783
+ all_equivalent = true
784
+
785
+ matches.each do |match|
786
+ case match.status
787
+ when :matched
788
+ # Check if element position changed
789
+ if match.position_changed?
790
+ match_opts = opts[:match_opts]
791
+ position_behavior = match_opts[:element_position] || :strict
792
+
793
+ # Only create DiffNode if element_position is not :ignore
794
+ if position_behavior != :ignore
795
+ add_difference(
796
+ match.elem1,
797
+ match.elem2,
798
+ "position #{match.pos1}",
799
+ "position #{match.pos2}",
800
+ :element_position,
801
+ opts,
802
+ differences,
803
+ )
804
+ all_equivalent = false if position_behavior == :strict
805
+ end
806
+ end
807
+
808
+ # Compare the matched elements for content/attribute differences
809
+ result = compare_nodes(match.elem1, match.elem2, child_opts,
810
+ child_opts, diff_children, differences)
811
+ all_equivalent = false unless result == Comparison::EQUIVALENT
812
+
813
+ when :deleted
814
+ # Element present in first tree but not second
815
+ add_difference(match.elem1, nil, Comparison::MISSING_NODE,
816
+ Comparison::MISSING_NODE, :element_structure, opts,
817
+ differences)
818
+ all_equivalent = false
819
+
820
+ when :inserted
821
+ # Element present in second tree but not first
822
+ add_difference(nil, match.elem2, Comparison::MISSING_NODE,
823
+ Comparison::MISSING_NODE, :element_structure, opts,
824
+ differences)
825
+ all_equivalent = false
826
+ end
827
+ end
828
+
829
+ all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ELEMENTS
473
830
  end
474
831
 
475
832
  # Filter children based on options
@@ -483,20 +840,28 @@ module Canon
483
840
  def node_excluded?(node, opts)
484
841
  match_opts = opts[:match_opts]
485
842
 
486
- # Ignore comments based on match options
487
- if node.respond_to?(:comment?) && node.comment? && (match_opts[:comments] == :ignore)
488
- return true
843
+ # Determine node type
844
+ # Canon::Xml::Node uses node_type that returns Symbol
845
+ # Nokogiri uses node_type that returns Integer, so check for Symbol first
846
+ if node.respond_to?(:node_type) && node.node_type.is_a?(Symbol)
847
+ node.node_type == :comment
848
+ else
849
+ node.respond_to?(:comment?) && node.comment?
489
850
  end
490
851
 
852
+ is_text = if node.respond_to?(:node_type) && node.node_type.is_a?(Symbol)
853
+ node.node_type == :text
854
+ else
855
+ node.respond_to?(:text?) && node.text?
856
+ end
857
+
491
858
  # Ignore text nodes if specified
492
- return true if opts[:ignore_text_nodes] &&
493
- node.respond_to?(:text?) && node.text?
859
+ return true if opts[:ignore_text_nodes] && is_text
494
860
 
495
861
  # Ignore whitespace-only text nodes based on structural_whitespace
496
862
  # Both :ignore and :normalize should filter out whitespace-only nodes
497
863
  if %i[ignore
498
- normalize].include?(match_opts[:structural_whitespace]) &&
499
- node.respond_to?(:text?) && node.text?
864
+ normalize].include?(match_opts[:structural_whitespace]) && is_text
500
865
  text = node_text(node)
501
866
  return true if MatchOptions.normalize_text(text).empty?
502
867
  end
@@ -504,8 +869,40 @@ module Canon
504
869
  false
505
870
  end
506
871
 
872
+ # Determine the appropriate dimension for a node type
873
+ # @param node [Object] The node to check
874
+ # @return [Symbol] The dimension symbol
875
+ def determine_node_dimension(node)
876
+ # Canon::Xml::Node types
877
+ if node.respond_to?(:node_type) && node.node_type.is_a?(Symbol)
878
+ case node.node_type
879
+ when :comment then :comments
880
+ when :text, :cdata then :text_content
881
+ when :processing_instruction then :processing_instructions
882
+ else :text_content
883
+ end
884
+ # Moxml/Nokogiri types
885
+ elsif node.respond_to?(:comment?) && node.comment?
886
+ :comments
887
+ elsif node.respond_to?(:text?) && node.text?
888
+ :text_content
889
+ elsif node.respond_to?(:cdata?) && node.cdata?
890
+ :text_content
891
+ elsif node.respond_to?(:processing_instruction?) && node.processing_instruction?
892
+ :processing_instructions
893
+ else
894
+ :text_content
895
+ end
896
+ end
897
+
507
898
  # Check if two nodes are the same type
508
899
  def same_node_type?(n1, n2)
900
+ # Canon::Xml::Node types - check node_type method
901
+ if n1.respond_to?(:node_type) && n2.respond_to?(:node_type)
902
+ return n1.node_type == n2.node_type
903
+ end
904
+
905
+ # Moxml/Nokogiri types - check individual type methods
509
906
  return true if n1.respond_to?(:element?) && n1.element? &&
510
907
  n2.respond_to?(:element?) && n2.element?
511
908
  return true if n1.respond_to?(:text?) && n1.text? &&
@@ -525,7 +922,10 @@ module Canon
525
922
 
526
923
  # Get text content from a node
527
924
  def node_text(node)
528
- if node.respond_to?(:content)
925
+ # Canon::Xml::Node TextNode uses .value
926
+ if node.respond_to?(:value)
927
+ node.value.to_s
928
+ elsif node.respond_to?(:content)
529
929
  node.content.to_s
530
930
  elsif node.respond_to?(:text)
531
931
  node.text.to_s
@@ -534,6 +934,66 @@ module Canon
534
934
  end
535
935
  end
536
936
 
937
+ # Extract element path for context (best effort)
938
+ # @param node [Object] Node to extract path from
939
+ # @return [Array<String>] Path components
940
+ def extract_element_path(node)
941
+ path = []
942
+ current = node
943
+ max_depth = 20
944
+ depth = 0
945
+
946
+ while current && depth < max_depth
947
+ if current.respond_to?(:name) && current.name
948
+ path.unshift(current.name)
949
+ end
950
+
951
+ break unless current.respond_to?(:parent)
952
+
953
+ current = current.parent
954
+ depth += 1
955
+
956
+ # Stop at document root
957
+ break if current.respond_to?(:root)
958
+ end
959
+
960
+ path
961
+ end
962
+
963
+ # Serialize a node to XML string
964
+ # @param node [Canon::Xml::Node, Object] Node to serialize
965
+ # @return [String] XML string representation
966
+ def serialize_node_to_xml(node)
967
+ if node.is_a?(Canon::Xml::Nodes::RootNode)
968
+ # Serialize all children of root
969
+ node.children.map { |child| serialize_node_to_xml(child) }.join
970
+ elsif node.is_a?(Canon::Xml::Nodes::ElementNode)
971
+ # Serialize element with attributes and children
972
+ attrs = node.attribute_nodes.map do |a|
973
+ " #{a.name}=\"#{a.value}\""
974
+ end.join
975
+ children_xml = node.children.map do |c|
976
+ serialize_node_to_xml(c)
977
+ end.join
978
+
979
+ if children_xml.empty?
980
+ "<#{node.name}#{attrs}/>"
981
+ else
982
+ "<#{node.name}#{attrs}>#{children_xml}</#{node.name}>"
983
+ end
984
+ elsif node.is_a?(Canon::Xml::Nodes::TextNode)
985
+ node.value
986
+ elsif node.is_a?(Canon::Xml::Nodes::CommentNode)
987
+ "<!--#{node.value}-->"
988
+ elsif node.is_a?(Canon::Xml::Nodes::ProcessingInstructionNode)
989
+ "<?#{node.target} #{node.data}?>"
990
+ elsif node.respond_to?(:to_xml)
991
+ node.to_xml
992
+ else
993
+ node.to_s
994
+ end
995
+ end
996
+
537
997
  # Add a difference to the differences array
538
998
  # @param node1 [Object] First node
539
999
  # @param node2 [Object] Second node
@@ -542,24 +1002,168 @@ module Canon
542
1002
  # @param dimension [Symbol] The match dimension causing this difference
543
1003
  # @param opts [Hash] Options
544
1004
  # @param differences [Array] Array to append difference to
545
- def add_difference(node1, node2, diff1, diff2, dimension, opts,
1005
+ def add_difference(node1, node2, diff1, diff2, dimension, _opts,
546
1006
  differences)
547
- return unless opts[:verbose]
548
-
549
1007
  # All differences must be DiffNode objects (OO architecture)
550
1008
  if dimension.nil?
551
1009
  raise ArgumentError,
552
1010
  "dimension required for DiffNode"
553
1011
  end
554
1012
 
1013
+ # Build informative reason message
1014
+ reason = build_difference_reason(node1, node2, diff1, diff2,
1015
+ dimension)
1016
+
555
1017
  diff_node = Canon::Diff::DiffNode.new(
556
1018
  node1: node1,
557
1019
  node2: node2,
558
1020
  dimension: dimension,
559
- reason: "#{diff1} vs #{diff2}",
1021
+ reason: reason,
560
1022
  )
561
1023
  differences << diff_node
562
1024
  end
1025
+
1026
+ # Build a human-readable reason for a difference
1027
+ # @param node1 [Object] First node
1028
+ # @param node2 [Object] Second node
1029
+ # @param diff1 [String] Difference type for node1
1030
+ # @param diff2 [String] Difference type for node2
1031
+ # @param dimension [Symbol] The dimension of the difference
1032
+ # @return [String] Human-readable reason
1033
+ def build_difference_reason(node1, node2, diff1, diff2, dimension)
1034
+ # For deleted/inserted nodes, include namespace information if available
1035
+ if dimension == :text_content && (node1.nil? || node2.nil?)
1036
+ node = node1 || node2
1037
+ if node.respond_to?(:name) && node.respond_to?(:namespace_uri)
1038
+ ns = node.namespace_uri
1039
+ ns_info = if ns.nil? || ns.empty?
1040
+ ""
1041
+ else
1042
+ " (namespace: #{ns})"
1043
+ end
1044
+ return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
1045
+ end
1046
+ end
1047
+
1048
+ "#{diff1} vs #{diff2}"
1049
+ end
1050
+
1051
+ # Compare namespace declarations (xmlns and xmlns:* attributes)
1052
+ # @param n1 [Object] First node
1053
+ # @param n2 [Object] Second node
1054
+ # @param opts [Hash] Options
1055
+ # @param differences [Array] Array to append differences to
1056
+ # @return [Symbol] Comparison result
1057
+ def compare_namespace_declarations(n1, n2, opts, differences)
1058
+ ns_decls1 = extract_namespace_declarations(n1)
1059
+ ns_decls2 = extract_namespace_declarations(n2)
1060
+
1061
+ # Find missing, extra, and changed namespace declarations
1062
+ missing = ns_decls1.keys - ns_decls2.keys # In n1 but not n2
1063
+ extra = ns_decls2.keys - ns_decls1.keys # In n2 but not n1
1064
+ changed = ns_decls1.select do |prefix, uri|
1065
+ ns_decls2[prefix] && ns_decls2[prefix] != uri
1066
+ end.keys
1067
+
1068
+ # If there are any differences, create a DiffNode
1069
+ if missing.any? || extra.any? || changed.any?
1070
+ # Build a descriptive reason
1071
+ reasons = []
1072
+ if missing.any?
1073
+ reasons << "removed: #{missing.map do |p|
1074
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
1075
+ end.join(', ')}"
1076
+ end
1077
+ if extra.any?
1078
+ reasons << "added: #{extra.map do |p|
1079
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
1080
+ end.join(', ')}"
1081
+ end
1082
+ if changed.any?
1083
+ reasons << "changed: #{changed.map do |p|
1084
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
1085
+ end.join(', ')}"
1086
+ end
1087
+
1088
+ add_difference(
1089
+ n1,
1090
+ n2,
1091
+ Comparison::UNEQUAL_ATTRIBUTES,
1092
+ Comparison::UNEQUAL_ATTRIBUTES,
1093
+ :namespace_declarations,
1094
+ opts,
1095
+ differences,
1096
+ )
1097
+ return Comparison::UNEQUAL_ATTRIBUTES
1098
+ end
1099
+
1100
+ Comparison::EQUIVALENT
1101
+ end
1102
+
1103
+ # Extract namespace declarations from a node
1104
+ # @param node [Object] Node to extract namespace declarations from
1105
+ # @return [Hash] Hash of prefix => URI mappings
1106
+ def extract_namespace_declarations(node)
1107
+ declarations = {}
1108
+
1109
+ # Handle Canon::Xml::Node (uses namespace_nodes)
1110
+ if node.respond_to?(:namespace_nodes)
1111
+ node.namespace_nodes.each do |ns|
1112
+ # Skip the implicit xml namespace (always present)
1113
+ next if ns.prefix == "xml" && ns.uri == "http://www.w3.org/XML/1998/namespace"
1114
+
1115
+ prefix = ns.prefix || ""
1116
+ declarations[prefix] = ns.uri
1117
+ end
1118
+ return declarations
1119
+ end
1120
+
1121
+ # Handle Nokogiri/Moxml nodes (use attributes)
1122
+ # Get raw attributes
1123
+ raw_attrs = node.respond_to?(:attribute_nodes) ? node.attribute_nodes : node.attributes
1124
+
1125
+ # Handle Canon::Xml::Node attribute format (array of AttributeNode)
1126
+ if raw_attrs.is_a?(Array)
1127
+ raw_attrs.each do |attr|
1128
+ name = attr.name
1129
+ value = attr.value
1130
+
1131
+ if is_namespace_declaration?(name)
1132
+ # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
1133
+ prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
1134
+ declarations[prefix] = value
1135
+ end
1136
+ end
1137
+ else
1138
+ # Handle Nokogiri and Moxml attribute formats (Hash-like)
1139
+ raw_attrs.each do |key, val|
1140
+ if key.is_a?(String)
1141
+ # Nokogiri format: key=name (String), val=attr object
1142
+ name = key
1143
+ value = val.respond_to?(:value) ? val.value : val.to_s
1144
+ else
1145
+ # Moxml format: key=attr object, val=nil
1146
+ name = key.respond_to?(:name) ? key.name : key.to_s
1147
+ value = key.respond_to?(:value) ? key.value : key.to_s
1148
+ end
1149
+
1150
+ if is_namespace_declaration?(name)
1151
+ # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
1152
+ prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
1153
+ declarations[prefix] = value
1154
+ end
1155
+ end
1156
+ end
1157
+
1158
+ declarations
1159
+ end
1160
+
1161
+ # Check if an attribute name is a namespace declaration
1162
+ # @param attr_name [String] Attribute name
1163
+ # @return [Boolean] true if it's a namespace declaration
1164
+ def is_namespace_declaration?(attr_name)
1165
+ attr_name == "xmlns" || attr_name.start_with?("xmlns:")
1166
+ end
563
1167
  end
564
1168
  end
565
1169
  end