canon 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +14 -71
  4. data/Rakefile +17 -0
  5. data/lib/canon/cli.rb +1 -1
  6. data/lib/canon/color_detector.rb +3 -5
  7. data/lib/canon/comparison/compare_profile.rb +1 -4
  8. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  9. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  10. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  11. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  15. data/lib/canon/comparison/format_detector.rb +29 -20
  16. data/lib/canon/comparison/html_comparator.rb +18 -29
  17. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  18. data/lib/canon/comparison/html_parser.rb +1 -1
  19. data/lib/canon/comparison/json_comparator.rb +8 -0
  20. data/lib/canon/comparison/node_inspector.rb +146 -80
  21. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  22. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  23. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  24. data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
  25. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
  26. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  27. data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
  28. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  29. data/lib/canon/comparison/xml_comparator.rb +61 -83
  30. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  31. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  32. data/lib/canon/comparison.rb +23 -23
  33. data/lib/canon/config/profile_loader.rb +13 -13
  34. data/lib/canon/config.rb +29 -5
  35. data/lib/canon/diff/diff_classifier.rb +7 -41
  36. data/lib/canon/diff/diff_line.rb +1 -1
  37. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  38. data/lib/canon/diff/node_serializer.rb +23 -30
  39. data/lib/canon/diff/path_builder.rb +24 -37
  40. data/lib/canon/diff/source_locator.rb +0 -3
  41. data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
  42. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  43. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  44. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  45. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  46. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  47. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  48. data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
  49. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
  50. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
  51. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  52. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  53. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  54. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  55. data/lib/canon/diff_formatter/legend.rb +2 -2
  56. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  57. data/lib/canon/diff_formatter/theme.rb +4 -4
  58. data/lib/canon/diff_formatter.rb +2 -2
  59. data/lib/canon/formatters/html_formatter.rb +1 -1
  60. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  61. data/lib/canon/formatters/xml_formatter.rb +7 -32
  62. data/lib/canon/html/data_model.rb +1 -1
  63. data/lib/canon/pretty_printer/html.rb +1 -1
  64. data/lib/canon/pretty_printer/xml.rb +16 -7
  65. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  66. data/lib/canon/rspec_matchers.rb +2 -2
  67. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  68. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  69. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  70. data/lib/canon/validators/html_validator.rb +1 -1
  71. data/lib/canon/validators/xml_validator.rb +1 -1
  72. data/lib/canon/version.rb +1 -1
  73. data/lib/canon/xml/data_model.rb +131 -137
  74. data/lib/canon/xml/namespace_helper.rb +5 -0
  75. data/lib/canon/xml/node.rb +2 -1
  76. data/lib/canon/xml/nodes/root_node.rb +4 -0
  77. data/lib/canon/xml/nodes/text_node.rb +6 -1
  78. data/lib/canon/xml/sax_builder.rb +4 -6
  79. data/lib/canon/xml_backend.rb +49 -0
  80. data/lib/canon/xml_parsing.rb +271 -0
  81. data/lib/canon.rb +3 -1
  82. data/lib/tasks/benchmark_runner.rb +1 -1
  83. data/lib/tasks/performance_helpers.rb +1 -1
  84. metadata +5 -2
@@ -3,41 +3,6 @@
3
3
  module Canon
4
4
  module Comparison
5
5
  # Whitespace sensitivity utilities for element-level control
6
- #
7
- # This module provides three-way classification of whitespace behaviour
8
- # at the element level:
9
- #
10
- # * **:preserve** — every whitespace character is significant. `" "` ≠ `"\n"`.
11
- # Configured via +preserve_whitespace_elements+ (HTML default: pre, code,
12
- # textarea, script, style; XML default: none).
13
- #
14
- # * **:collapse** — presence ≠ absence, but all whitespace forms are
15
- # equivalent: `" "` == `"\n "`. Configured via +collapse_whitespace_elements+
16
- # (HTML default: p, li, dt, dd, td, th, h1-h6, caption, figcaption, label,
17
- # legend, summary, blockquote, address; XML default: none).
18
- #
19
- # * **:strip** — all whitespace is structural formatting noise and is
20
- # dropped. Default for XML; HTML elements not in the above lists.
21
- #
22
- # Classification is **ancestor-based**: the closest matching ancestor
23
- # determines the class. The strip blacklist (+strip_whitespace_elements+)
24
- # overrides any sensitive ancestor.
25
- #
26
- # == Priority Order
27
- #
28
- # 1. respect_xml_space: false → User config only (ignore xml:space)
29
- # 2. Ancestor walk (strip blacklist wins; then preserve; then collapse)
30
- # 3. xml:space="preserve" → preserve
31
- # 4. xml:space="default" → use configured behaviour
32
- # 5. Format defaults (HTML: collapse for most elements; XML: strip)
33
- #
34
- # == Usage
35
- #
36
- # WhitespaceSensitivity.classify_element(element, match_opts)
37
- # => :preserve, :collapse, or :strip
38
- #
39
- # WhitespaceSensitivity.element_sensitive?(node, opts)
40
- # => true if whitespace should be preserved (preserve or collapse)
41
6
  module WhitespaceSensitivity
42
7
  # HTML mixed-content "leaf block" elements where whitespace presence
43
8
  # matters but all forms are equivalent (CSS block whitespace collapsing).
@@ -61,80 +26,52 @@ module Canon
61
26
 
62
27
  class << self
63
28
  # Classify the whitespace behaviour for an element using ancestor walk.
64
- #
65
- # @param element [Object] The element node to classify
66
- # @param match_opts [Hash] Resolved match options
67
- # @return [Symbol] :preserve, :collapse, or :strip
68
29
  def classify_element(element, match_opts)
69
30
  return :strip unless element
70
- return :strip unless element.respond_to?(:name)
31
+ return :strip unless node_name(element)
71
32
 
72
33
  preserve_set = resolved_preserve_elements_set(match_opts)
73
34
  collapse_set = resolved_collapse_elements_set(match_opts)
74
35
  strip_set = resolved_strip_elements_set(match_opts)
75
36
 
76
- # Ancestor walk: start at the element itself, walk up.
77
- # Strip blacklist wins over any sensitive ancestor.
78
37
  walk_ancestor_classification(element, preserve_set, collapse_set,
79
- strip_set, match_opts)
38
+ strip_set)
80
39
  end
81
40
 
82
41
  # Check if an element is whitespace-sensitive based on configuration.
83
- # Returns true for :preserve or :collapse classification.
84
- #
85
- # @param node [Object] The element node to check
86
- # @param opts [Hash] Comparison options containing match_opts
87
- # @return [Boolean] true if whitespace should be preserved for this element
88
42
  def element_sensitive?(node, opts)
89
43
  match_opts = opts[:match_opts]
90
44
  return false unless match_opts
91
45
  return false unless text_node_parent?(node)
92
46
 
93
- parent = node.parent
47
+ parent = node_parent(node)
94
48
 
95
- # 1. Check if we should ignore xml:space (user override)
96
49
  unless respect_xml_space?(match_opts)
97
50
  return user_config_sensitive?(parent, match_opts)
98
51
  end
99
52
 
100
- # 2. Check xml:space="preserve" (document declaration)
101
53
  return true if xml_space_preserve?(parent)
102
-
103
- # 3. Check xml:space="default" (use configured behavior)
104
54
  return false if xml_space_default?(parent)
105
55
 
106
- # 4. Three-way classification (ancestor-based)
107
56
  classification = classify_element(parent, match_opts)
108
57
  %i[preserve collapse].include?(classification)
109
58
  end
110
59
 
111
60
  # Check if whitespace-only text node should be filtered
112
- #
113
- # @param node [Object] The text node to check
114
- # @param opts [Hash] Comparison options
115
- # @return [Boolean] true if node should be preserved (not filtered)
116
61
  def preserve_whitespace_node?(node, opts)
117
- return false unless node.respond_to?(:parent)
118
- return false unless node.parent
62
+ parent = node_parent(node)
63
+ return false unless parent
119
64
 
120
65
  element_sensitive?(node, opts)
121
66
  end
122
67
 
123
68
  # Return the whitespace class for a text node used during comparison.
124
- #
125
- # :preserve → preserve all whitespace character-by-character
126
- # :collapse → preserve presence (normalize to single space)
127
- # :strip → drop whitespace-only text nodes
128
- #
129
- # @param node [Object] Text node to classify
130
- # @param opts [Hash] Comparison options containing match_opts
131
- # @return [Symbol] :preserve, :collapse, or :strip
132
69
  def classify_text_node(node, opts)
133
70
  match_opts = opts[:match_opts]
134
71
  return :strip unless match_opts
135
72
  return :strip unless text_node_parent?(node)
136
73
 
137
- parent = node.parent
74
+ parent = node_parent(node)
138
75
 
139
76
  unless respect_xml_space?(match_opts)
140
77
  return user_config_sensitive?(parent,
@@ -148,15 +85,6 @@ module Canon
148
85
  end
149
86
 
150
87
  # Check if structural whitespace is preserved (not stripped) for an element.
151
- #
152
- # Uses the same priority chain as element_sensitive? / classify_text_node:
153
- # 1. xml:space="preserve" → always preserved
154
- # 2. xml:space="default" → use configured behaviour
155
- # 3. ancestor-walk classification (strip = dropped)
156
- #
157
- # @param element [Object] Element node to check
158
- # @param match_opts [Hash] Resolved match options
159
- # @return [Boolean] true if whitespace is preserved (not stripped)
160
88
  def whitespace_preserved?(element, match_opts)
161
89
  if respect_xml_space?(match_opts)
162
90
  return true if xml_space_preserve?(element)
@@ -167,27 +95,14 @@ module Canon
167
95
  %i[preserve collapse].include?(classification)
168
96
  end
169
97
 
170
- # Get resolved list of preserve whitespace element names (strings).
171
- #
172
- # @param match_opts [Hash] Resolved match options
173
- # @return [Array<String>] Preserve element names
174
98
  def resolved_preserve_elements(match_opts)
175
99
  resolved_preserve_elements_set(match_opts).to_a
176
100
  end
177
101
 
178
- # Get resolved list of collapse whitespace element names (strings).
179
- #
180
- # @param match_opts [Hash] Resolved match options
181
- # @return [Array<String>] Collapse element names
182
102
  def resolved_collapse_elements(match_opts)
183
103
  resolved_collapse_elements_set(match_opts).to_a
184
104
  end
185
105
 
186
- # Get format-specific default preserve (exact-whitespace) elements.
187
- # This is the SINGLE SOURCE OF TRUTH for default preserve-whitespace elements.
188
- #
189
- # @param match_opts [Hash] Resolved match options
190
- # @return [Array<Symbol>] Default preserve element names
191
106
  def format_default_preserve_elements(match_opts)
192
107
  format = match_opts[:format] || :xml
193
108
  case format
@@ -198,10 +113,6 @@ module Canon
198
113
  end
199
114
  end
200
115
 
201
- # Get format-specific default collapse elements.
202
- #
203
- # @param match_opts [Hash] Resolved match options
204
- # @return [Array<Symbol>] Default collapse element names
205
116
  def format_default_collapse_elements(match_opts)
206
117
  format = match_opts[:format] || :xml
207
118
  case format
@@ -212,61 +123,32 @@ module Canon
212
123
  end
213
124
  end
214
125
 
215
- # Check if an element is in the default sensitive list for its format
216
- #
217
- # @param element_name [String, Symbol] The element name to check
218
- # @param match_opts [Hash] Resolved match options
219
- # @return [Boolean] true if element is in default sensitive list
220
126
  def default_sensitive_element?(element_name, match_opts)
221
127
  format_default_preserve_elements(match_opts)
222
128
  .include?(element_name.to_sym)
223
129
  end
224
130
 
225
- # Check if a whitespace-only text node sits between two inline element
131
+ # Check if whitespace-only text node sits between two inline element
226
132
  # siblings, making the whitespace semantically significant.
227
- #
228
- # In HTML rendering, a space between <span>A</span> <span>B</span>
229
- # produces visible output. Stripping such nodes produces false
230
- # equivalence.
231
- #
232
- # Works with any parent type (element, DocumentFragment, RootNode)
233
- # since the check is about sibling context, not parent type.
234
- #
235
- # @param text_node [Object] Text node (Nokogiri or Canon::Xml::Node)
236
- # @return [Boolean] true if whitespace is between inline siblings
237
133
  def inline_whitespace_significant?(text_node)
238
- return false unless text_node.respond_to?(:parent)
239
-
240
- parent = text_node.parent
134
+ parent = NodeInspector.parent(text_node)
241
135
  return false unless parent
242
- return false unless parent.respond_to?(:children)
243
136
 
244
- siblings = parent.children
137
+ siblings = NodeInspector.children(parent)
245
138
  idx = siblings.index(text_node)
246
139
  return false unless idx
247
140
 
248
- # Look at the IMMEDIATE non-whitespace-text neighbour on each
249
- # side. Whitespace at a block boundary is collapsed per CSS,
250
- # so both immediate neighbours must be inline for the
251
- # whitespace to be significant. Walking all siblings (the
252
- # earlier behaviour) misclassified whitespace at a block
253
- # boundary as significant whenever any inline element existed
254
- # elsewhere among the siblings.
255
141
  prev_neighbour = nearest_non_whitespace_sibling(siblings, idx, -1)
256
142
  next_neighbour = nearest_non_whitespace_sibling(siblings, idx, 1)
257
143
 
258
144
  inline_element?(prev_neighbour) && inline_element?(next_neighbour)
259
145
  end
260
146
 
261
- # Walk outward from +idx+ in +direction+ (+1 forward, -1 back),
262
- # skipping whitespace-only text nodes, and return the first
263
- # non-whitespace sibling found. Returns nil if none.
264
147
  def nearest_non_whitespace_sibling(siblings, idx, direction)
265
148
  i = idx + direction
266
149
  while i >= 0 && i < siblings.length
267
150
  s = siblings[i]
268
- unless s.respond_to?(:text?) && s.text? &&
269
- s.respond_to?(:content) && s.content.to_s.strip.empty?
151
+ unless whitespace_text_node?(s)
270
152
  return s
271
153
  end
272
154
 
@@ -275,19 +157,32 @@ module Canon
275
157
  nil
276
158
  end
277
159
 
278
- # Check if text content contains a non-breaking space (U+00A0).
279
- # NBSP is NOT collapsible whitespace in HTML — it always renders as
280
- # a visible space and must never be stripped.
281
- #
282
- # @param text [String] Text content to check
283
- # @return [Boolean] true if text contains U+00A0
284
160
  def contains_nbsp?(text)
285
- text.to_s.include?("\u00A0")
161
+ text.to_s.include?(" ")
286
162
  end
287
163
 
288
164
  private
289
165
 
290
- # Build the Set of preserve whitespace element names (strings).
166
+ def whitespace_text_node?(node)
167
+ NodeInspector.whitespace_only_text?(node)
168
+ end
169
+
170
+ def node_name(node)
171
+ NodeInspector.name(node)
172
+ end
173
+
174
+ def node_parent(node)
175
+ NodeInspector.parent(node)
176
+ end
177
+
178
+ def node_children(node)
179
+ NodeInspector.children(node)
180
+ end
181
+
182
+ def element?(node)
183
+ NodeInspector.element_node?(node)
184
+ end
185
+
291
186
  def resolved_preserve_elements_set(match_opts)
292
187
  set = Set.new(format_default_preserve_elements(match_opts).map(&:to_s))
293
188
 
@@ -295,12 +190,10 @@ module Canon
295
190
  set |= match_opts[:preserve_whitespace_elements].map(&:to_s)
296
191
  end
297
192
 
298
- # Remove blacklisted elements
299
193
  strip_set = resolved_strip_elements_set(match_opts)
300
194
  set.reject { |e| strip_set.include?(e) }.to_set
301
195
  end
302
196
 
303
- # Build the Set of collapse whitespace element names (strings).
304
197
  def resolved_collapse_elements_set(match_opts)
305
198
  set = Set.new(format_default_collapse_elements(match_opts).map(&:to_s))
306
199
 
@@ -308,49 +201,36 @@ module Canon
308
201
  set |= match_opts[:collapse_whitespace_elements].map(&:to_s)
309
202
  end
310
203
 
311
- # Remove blacklisted elements
312
204
  strip_set = resolved_strip_elements_set(match_opts)
313
205
  set.reject { |e| strip_set.include?(e) }.to_set
314
206
  end
315
207
 
316
- # Build the Set of strip (blacklist) element names (strings).
317
208
  def resolved_strip_elements_set(match_opts)
318
209
  raw = match_opts[:strip_whitespace_elements]
319
210
  Set.new((raw || []).map(&:to_s))
320
211
  end
321
212
 
322
- # Perform the ancestor walk classification.
323
- # The element itself is checked first, then its ancestors.
324
- # Strip blacklist wins over any sensitive ancestor.
325
213
  def walk_ancestor_classification(element, preserve_set, collapse_set,
326
- strip_set, _match_opts)
214
+ strip_set)
327
215
  current = element
328
- while current.respond_to?(:name)
329
- name = current.name.to_s
216
+ while current
217
+ name = node_name(current)
218
+ break unless name
330
219
 
331
- return :strip if strip_set.include?(name)
332
- return :preserve if preserve_set.include?(name)
333
- return :collapse if collapse_set.include?(name)
220
+ return :strip if strip_set.include?(name.to_s)
221
+ return :preserve if preserve_set.include?(name.to_s)
222
+ return :collapse if collapse_set.include?(name.to_s)
334
223
 
335
- # Walk up
336
- break unless current.respond_to?(:parent)
337
-
338
- parent = current.parent
224
+ parent = node_parent(current)
339
225
  break if parent.nil?
340
- break unless parent.respond_to?(:name)
341
- break if parent == current # guard infinite loop
226
+ break if parent == current
342
227
 
343
228
  current = parent
344
229
  end
345
230
 
346
- # No matching ancestor — whitespace sensitivity is always opt-in.
347
- # Elements not in any list are strip regardless of format.
348
- # (HTML_COLLAPSE_ELEMENTS are already merged into the collapse_set
349
- # by resolved_collapse_elements_set, so they are found during the walk.)
350
231
  :strip
351
232
  end
352
233
 
353
- # Check if we should respect xml:space attribute
354
234
  def respect_xml_space?(match_opts)
355
235
  if match_opts.key?(:respect_xml_space)
356
236
  match_opts[:respect_xml_space]
@@ -359,7 +239,6 @@ module Canon
359
239
  end
360
240
  end
361
241
 
362
- # Check if xml:space="preserve" is set
363
242
  def xml_space_preserve?(element)
364
243
  if element.is_a?(Canon::Xml::Nodes::ElementNode)
365
244
  element.attribute_nodes.any? do |attr|
@@ -367,14 +246,12 @@ module Canon
367
246
  attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
368
247
  attr.value == "preserve"
369
248
  end
370
- elsif element.respond_to?(:[])
371
- element["xml:space"] == "preserve"
372
249
  else
373
- false
250
+ Canon::XmlParsing.attribute_value(element,
251
+ "xml:space") == "preserve"
374
252
  end
375
253
  end
376
254
 
377
- # Check if xml:space="default" is set
378
255
  def xml_space_default?(element)
379
256
  if element.is_a?(Canon::Xml::Nodes::ElementNode)
380
257
  element.attribute_nodes.any? do |attr|
@@ -382,55 +259,40 @@ module Canon
382
259
  attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
383
260
  attr.value == "default"
384
261
  end
385
- elsif element.respond_to?(:[])
386
- element["xml:space"] == "default"
387
262
  else
388
- false
263
+ Canon::XmlParsing.attribute_value(element, "xml:space") == "default"
389
264
  end
390
265
  end
391
266
 
392
- # Check sensitivity based on user configuration (binary, no ancestor)
393
267
  def user_config_sensitive?(element, match_opts)
394
268
  list = match_opts[:preserve_whitespace_elements]
395
269
  return false unless list
396
270
 
397
- list.map(&:to_s).include?(element.name.to_s)
271
+ name = node_name(element)
272
+ return false unless name
273
+
274
+ list.map(&:to_s).include?(name.to_s)
398
275
  end
399
276
 
400
- # Check if node has a parent that's an element (not document root)
401
277
  def text_node_parent?(node)
402
- return false unless node.respond_to?(:parent)
403
- return false unless node.parent
404
-
405
- parent = node.parent
406
- return true if parent.respond_to?(:element?) && parent.element?
278
+ parent = node_parent(node)
279
+ return false unless parent
407
280
 
408
- # Nokogiri compatibility
409
- parent.respond_to?(:node_type) && parent.node_type == :element
281
+ element?(parent)
410
282
  end
411
283
 
412
- # Get the parent element of a text node, or nil.
413
- # Works with both Nokogiri and Canon::Xml::Node types.
414
284
  def parent_element_of(text_node)
415
- return nil unless text_node.respond_to?(:parent)
416
-
417
- parent = text_node.parent
285
+ parent = node_parent(text_node)
418
286
  return nil unless parent
419
287
 
420
- if parent.is_a?(Canon::Xml::Nodes::ElementNode)
421
- parent
422
- elsif parent.respond_to?(:element?) && parent.element?
423
- parent
424
- elsif parent.respond_to?(:node_type) && parent.node_type == :element
425
- parent
426
- end
288
+ parent if element?(parent)
427
289
  end
428
290
 
429
- # Check if a node is an HTML inline element.
430
291
  def inline_element?(node)
431
- return false unless node.respond_to?(:name)
292
+ name = node_name(node)
293
+ return false unless name
432
294
 
433
- INLINE_ELEMENTS.include?(node.name.to_s.downcase)
295
+ INLINE_ELEMENTS.include?(name.to_s.downcase)
434
296
  end
435
297
  end
436
298
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "../match_options"
4
+ require_relative "../../xml/namespace_helper"
4
5
 
5
6
  module Canon
6
7
  module Comparison
@@ -91,13 +92,11 @@ module Canon
91
92
  # @return [Array<String, String>] Normalized [name, value] pair
92
93
  def self.normalize_attribute_pair(key, val)
93
94
  if key.is_a?(String)
94
- # Nokogiri format: key=name (String), val=attr object
95
95
  name = key
96
- value = val.respond_to?(:value) ? val.value : val.to_s
96
+ value = val.is_a?(String) ? val : val.value
97
97
  else
98
- # Moxml format: key=attr object, val=nil
99
- name = key.respond_to?(:name) ? key.name : key.to_s
100
- value = key.respond_to?(:value) ? key.value : key.to_s
98
+ name = key.name
99
+ value = key.value
101
100
  end
102
101
 
103
102
  [name, value]
@@ -123,12 +122,8 @@ module Canon
123
122
  end
124
123
  end
125
124
 
126
- # Check if an attribute name is a namespace declaration
127
- #
128
- # @param attr_name [String] Attribute name
129
- # @return [Boolean] true if it's a namespace declaration
130
125
  def self.namespace_declaration?(attr_name)
131
- attr_name == "xmlns" || attr_name.start_with?("xmlns:")
126
+ Canon::Xml::NamespaceHelper.namespace_declaration?(attr_name)
132
127
  end
133
128
  end
134
129
  end
@@ -83,10 +83,10 @@ module Canon
83
83
 
84
84
  # Create temporary RootNode wrappers
85
85
  temp_root1 = Canon::Xml::Nodes::RootNode.new
86
- temp_root1.instance_variable_set(:@children, children1.dup)
86
+ temp_root1.children = children1.dup
87
87
 
88
88
  temp_root2 = Canon::Xml::Nodes::RootNode.new
89
- temp_root2.instance_variable_set(:@children, children2.dup)
89
+ temp_root2.children = children2.dup
90
90
 
91
91
  matcher = Canon::Xml::ElementMatcher.new
92
92
  matches = matcher.match_trees(temp_root1, temp_root2)
@@ -282,7 +282,7 @@ module Canon
282
282
  end
283
283
 
284
284
  smaller_set_names = smaller_set.filter_map do |c|
285
- next nil unless c.is_a?(Canon::Xml::Node) || c.is_a?(Nokogiri::XML::Node)
285
+ next nil unless c.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(c)
286
286
  # Exclude generic node-type names (e.g. "#text") that are
287
287
  # shared by all text nodes and cannot be used for matching.
288
288
  next nil if c.name.start_with?("#")
@@ -298,7 +298,7 @@ module Canon
298
298
  # consider it a mismatch
299
299
  mismatch_children << larger_set[i]
300
300
  elsif (larger_set[i].is_a?(Canon::Xml::Node) ||
301
- larger_set[i].is_a?(Nokogiri::XML::Node)) &&
301
+ Canon::XmlParsing.xml_node?(larger_set[i])) &&
302
302
  !larger_set[i].name.start_with?("#") &&
303
303
  !smaller_set_names.include?(larger_set[i].name)
304
304
  # If the name of the node is not found in the smaller set,
@@ -53,7 +53,7 @@ module Canon
53
53
  # For deleted/inserted nodes, include namespace information if available
54
54
  if dimension == :text_content && (node1.nil? || node2.nil?)
55
55
  node = node1 || node2
56
- if node.is_a?(Canon::Xml::Node) || node.is_a?(Nokogiri::XML::Node)
56
+ if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
57
57
  ns = node.namespace_uri
58
58
  ns_info = if ns.nil? || ns.empty?
59
59
  ""
@@ -100,8 +100,8 @@ module Canon
100
100
  elsif dimension == :element_structure &&
101
101
  diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
102
102
  diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
103
- (node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
104
- (node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
103
+ (node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
104
+ (node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
105
105
  node1.name && node2.name && node1.name != node2.name
106
106
  "different element name (<#{node1.name}> vs <#{node2.name}>)"
107
107
  else
@@ -198,12 +198,14 @@ module Canon
198
198
  node.value
199
199
  when Canon::Xml::Node
200
200
  node.text_content
201
- when Nokogiri::XML::Node
202
- node.content.to_s
203
- when String
204
- node
205
201
  else
206
- node.to_s
202
+ if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
203
+ node.content.to_s
204
+ elsif Canon::XmlParsing.xml_node?(node)
205
+ Canon::XmlParsing.text_content(node)
206
+ else
207
+ node.to_s
208
+ end
207
209
  end
208
210
  rescue StandardError
209
211
  nil
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../../xml/namespace_helper"
4
+
3
5
  module Canon
4
6
  module Comparison
5
7
  module XmlComparatorHelpers
@@ -41,20 +43,20 @@ module Canon
41
43
  def self.extract_declarations(node)
42
44
  declarations = {}
43
45
 
44
- # Handle Canon::Xml::Node (uses namespace_nodes)
45
- if node.respond_to?(:namespace_nodes)
46
- return extract_from_namespace_nodes(node.namespace_nodes,
47
- declarations)
48
- end
46
+ if node.is_a?(Canon::Xml::Node)
47
+ if node.namespace_nodes
48
+ return extract_from_namespace_nodes(node.namespace_nodes,
49
+ declarations)
50
+ end
49
51
 
50
- # Handle Nokogiri/Moxml nodes (use attributes)
51
- raw_attrs = node.respond_to?(:attribute_nodes) ? node.attribute_nodes : node.attributes
52
+ raw_attrs = node.attribute_nodes
53
+ else
54
+ raw_attrs = node.attributes
55
+ end
52
56
 
53
- # Handle Canon::Xml::Node attribute format (array of AttributeNode)
54
57
  if raw_attrs.is_a?(Array)
55
58
  extract_from_array_attributes(raw_attrs, declarations)
56
59
  else
57
- # Handle Nokogiri and Moxml attribute formats (Hash-like)
58
60
  extract_from_hash_attributes(raw_attrs, declarations)
59
61
  end
60
62
 
@@ -105,23 +107,11 @@ module Canon
105
107
  # @return [Hash] Declarations hash
106
108
  def self.extract_from_hash_attributes(raw_attrs, declarations)
107
109
  raw_attrs.each do |key, val|
108
- # Normalize key and value
109
- name = if key.is_a?(String)
110
- # Nokogiri format: key=name (String), val=attr object
111
- key
112
- else
113
- # Moxml format: key=attr object, val=nil
114
- key.respond_to?(:name) ? key.name : key.to_s
115
- end
110
+ name = key.is_a?(String) ? key : key.name
116
111
 
117
112
  if namespace_declaration?(name)
118
- value = if val.respond_to?(:value)
119
- val.value
120
- else
121
- val.to_s
122
- end
113
+ value = val.is_a?(String) ? val : val.value
123
114
 
124
- # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
125
115
  prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
126
116
  declarations[prefix] = value
127
117
  end
@@ -130,12 +120,8 @@ module Canon
130
120
  declarations
131
121
  end
132
122
 
133
- # Check if an attribute name is a namespace declaration
134
- #
135
- # @param attr_name [String] Attribute name
136
- # @return [Boolean] true if it's a namespace declaration
137
123
  def self.namespace_declaration?(attr_name)
138
- attr_name == "xmlns" || attr_name.start_with?("xmlns:")
124
+ Canon::Xml::NamespaceHelper.namespace_declaration?(attr_name)
139
125
  end
140
126
 
141
127
  # Add a namespace declaration difference