canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +83 -22
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +196 -24
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/markup_comparator.rb +109 -2
  11. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  12. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  13. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  14. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
  15. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  16. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  17. data/lib/canon/comparison/xml_comparator.rb +240 -23
  18. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  19. data/lib/canon/diff/diff_classifier.rb +119 -5
  20. data/lib/canon/diff/formatting_detector.rb +1 -1
  21. data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
  22. data/lib/canon/rspec_matchers.rb +37 -8
  23. data/lib/canon/version.rb +1 -1
  24. data/lib/canon/xml/data_model.rb +24 -13
  25. metadata +4 -78
  26. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  27. data/false_positive_analysis.txt +0 -0
  28. data/file1.html +0 -1
  29. data/file2.html +0 -1
  30. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  31. data/old-docs/BASIC_USAGE.adoc +0 -16
  32. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  33. data/old-docs/CLI.adoc +0 -497
  34. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  35. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  36. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  37. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  38. data/old-docs/DOM_DIFF.adoc +0 -1017
  39. data/old-docs/ENV_CONFIG.adoc +0 -876
  40. data/old-docs/FORMATS.adoc +0 -867
  41. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  42. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  43. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  44. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  45. data/old-docs/MODES.adoc +0 -432
  46. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  47. data/old-docs/OPTIONS.adoc +0 -1387
  48. data/old-docs/PREPROCESSING.adoc +0 -491
  49. data/old-docs/README.old.adoc +0 -2831
  50. data/old-docs/RSPEC.adoc +0 -814
  51. data/old-docs/RUBY_API.adoc +0 -485
  52. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  53. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  54. data/old-docs/STRING_COMPARE.adoc +0 -345
  55. data/old-docs/TMP.adoc +0 -3384
  56. data/old-docs/TREE_DIFF.adoc +0 -1080
  57. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  58. data/old-docs/VERBOSE.adoc +0 -482
  59. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  60. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  61. data/scripts/analyze_current_state.rb +0 -85
  62. data/scripts/analyze_false_positives.rb +0 -114
  63. data/scripts/analyze_remaining_failures.rb +0 -105
  64. data/scripts/compare_current_failures.rb +0 -95
  65. data/scripts/compare_dom_tree_diff.rb +0 -158
  66. data/scripts/compare_failures.rb +0 -151
  67. data/scripts/debug_attribute_extraction.rb +0 -66
  68. data/scripts/debug_blocks_839.rb +0 -115
  69. data/scripts/debug_meta_matching.rb +0 -52
  70. data/scripts/debug_p_matching.rb +0 -192
  71. data/scripts/debug_signature_matching.rb +0 -118
  72. data/scripts/debug_sourcecode_124.rb +0 -32
  73. data/scripts/debug_whitespace_sensitive.rb +0 -192
  74. data/scripts/extract_false_positives.rb +0 -138
  75. data/scripts/find_actual_false_positives.rb +0 -125
  76. data/scripts/investigate_all_false_positives.rb +0 -161
  77. data/scripts/investigate_batch1.rb +0 -127
  78. data/scripts/investigate_classification.rb +0 -150
  79. data/scripts/investigate_classification_detailed.rb +0 -190
  80. data/scripts/investigate_common_failures.rb +0 -342
  81. data/scripts/investigate_false_negative.rb +0 -80
  82. data/scripts/investigate_false_positive.rb +0 -83
  83. data/scripts/investigate_false_positives.rb +0 -227
  84. data/scripts/investigate_false_positives_batch.rb +0 -163
  85. data/scripts/investigate_mixed_content.rb +0 -125
  86. data/scripts/investigate_remaining_16.rb +0 -214
  87. data/scripts/run_single_test.rb +0 -29
  88. data/scripts/test_all_false_positives.rb +0 -95
  89. data/scripts/test_attribute_details.rb +0 -61
  90. data/scripts/test_both_algorithms.rb +0 -49
  91. data/scripts/test_both_simple.rb +0 -49
  92. data/scripts/test_enhanced_semantic_output.rb +0 -125
  93. data/scripts/test_readme_examples.rb +0 -131
  94. data/scripts/test_semantic_tree_diff.rb +0 -99
  95. data/scripts/test_semantic_ux_improvements.rb +0 -135
  96. data/scripts/test_single_false_positive.rb +0 -119
  97. data/scripts/test_size_limits.rb +0 -99
  98. data/test_html_1.html +0 -21
  99. data/test_html_2.html +0 -21
  100. data/test_nokogiri.rb +0 -33
  101. data/test_normalize.rb +0 -45
@@ -45,10 +45,37 @@ Match dimensions are orthogonal aspects that can be configured independently.
45
45
 
46
46
  `:strict`:: Text must match exactly, character-for-character including all whitespace
47
47
 
48
- `:normalize`:: Whitespace is normalized (collapsed/trimmed) before comparison
48
+ `:normalize`:: Whitespace is normalized (collapsed/trimmed) before comparison.
49
+ Formatting-only differences (e.g., extra spaces around text) are classified as
50
+ *informative* rather than normative. This means documents with only whitespace
51
+ differences in text content are considered equivalent.
49
52
 
50
53
  `:ignore`:: Text content is completely ignored in comparison
51
54
 
55
+ .Using text_content: :normalize
56
+ [example]
57
+ ====
58
+ [source,ruby]
59
+ ----
60
+ # These are equivalent with :normalize
61
+ # Whitespace differences are formatting-only (informative)
62
+ Canon.equivalent?(
63
+ '<p> text </p>',
64
+ '<p>text</p>',
65
+ match: { text_content: :normalize }
66
+ )
67
+ # => true
68
+
69
+ # These differ in :strict mode
70
+ Canon.equivalent?(
71
+ '<p> text </p>',
72
+ '<p>text</p>',
73
+ match: { text_content: :strict }
74
+ )
75
+ # => false
76
+ ----
77
+ ====
78
+
52
79
  === structural_whitespace
53
80
 
54
81
  **Applies to**: All formats
@@ -63,6 +90,200 @@ Match dimensions are orthogonal aspects that can be configured independently.
63
90
 
64
91
  `:ignore`:: Structural whitespace is completely ignored
65
92
 
93
+
94
+ === Whitespace sensitivity at element level
95
+
96
+ ==== General
97
+
98
+ In XML, whitespace sensitivity can vary by schema and element:
99
+
100
+ * Elements that apply `xml:space="preserve"` are whitespace-sensitive.
101
+
102
+ * Other elements may be defined as sensitive by schema (e.g.
103
+ `xs:space="preserve"` in XML Schema) or unannounced conventions, such as
104
+ for mixed content.
105
+
106
+ In HTML, elements like `<pre>` and `<code>` preserve whitespace, while others
107
+ like `<div>` and `<p>` do not.
108
+
109
+ In the unannounced cases, the developer must indicate which elements are
110
+ whitespace-sensitive.
111
+
112
+ In Canon, you can control whitespace sensitivity at the element level using
113
+ `structural_whitespace: :strict` or `text_content: :normalize`.
114
+
115
+ Element-level sensitivity controls both:
116
+
117
+ * `structural_whitespace`: Whether whitespace between elements in the element is
118
+ preserved
119
+
120
+ * `text_content`: Whether whitespace within text nodes of the element is
121
+ normalized
122
+
123
+ Options for controlling element-level sensitivity include:
124
+
125
+ * **xml:space attribute** - XML standard for declaring whitespace sensitivity in documents
126
+ * **whitelist/blacklist options** - User-specified element lists
127
+ * **Format defaults** - HTML has built-in sensitive elements
128
+ * **respect_xml_space option** - Control whether xml:space is honored
129
+
130
+ For elements marked as sensitive, whitespace differences are always normative.
131
+
132
+ For non-sensitive elements using `text_content: :normalize`, whitespace
133
+ differences are classified as formatting-only (informative).
134
+
135
+
136
+ ==== xml:space attribute support
137
+
138
+ The `xml:space` attribute is the XML standard way to declare whitespace
139
+ sensitivity in XML instance documents:
140
+
141
+ [source,xml]
142
+ ----
143
+ <!-- Preserve whitespace in this element -->
144
+ <code xml:space="preserve">
145
+ Indentation and newlines matter here
146
+ </code>
147
+
148
+ <!-- Use default behavior -->
149
+ <text xml:space="default">
150
+ Whitespace handling follows configured behavior
151
+ </text>
152
+ ----
153
+
154
+ ==== Whitelist and blacklist options
155
+
156
+ You can explicitly specify which elements are whitespace-sensitive:
157
+
158
+ [source,ruby]
159
+ ----
160
+ # Specify elements that preserve whitespace
161
+ Canon::Comparison.equivalent?(xml1, xml2,
162
+ match: {
163
+ structural_whitespace: :strict,
164
+ whitespace_sensitive_elements: [:pre, :code, :sample],
165
+ whitespace_insensitive_elements: [:p, :div] # Override defaults/whitelist
166
+ }
167
+ )
168
+ ----
169
+
170
+ ==== respect_xml_space option
171
+
172
+ Control whether xml:space attributes in the document are honored:
173
+
174
+ [source,ruby]
175
+ ----
176
+ # Honor xml:space (default)
177
+ Canon::Comparison.equivalent?(xml1, xml2,
178
+ match: {
179
+ structural_whitespace: :strict,
180
+ respect_xml_space: true # Use xml:space attributes in document
181
+ }
182
+ )
183
+
184
+ # Ignore xml:space, use only user configuration
185
+ Canon::Comparison.equivalent?(xml1, xml2,
186
+ match: {
187
+ structural_whitespace: :strict,
188
+ respect_xml_space: false # Override document declarations
189
+ }
190
+ )
191
+ ----
192
+
193
+ ==== Priority order
194
+
195
+ When determining if an element is whitespace-sensitive, Canon uses this priority:
196
+
197
+ [source]
198
+ ----
199
+ 1. respect_xml_space: false → User config only (ignore xml:space)
200
+
201
+ 2. User whitelist → Use whitelist (user explicitly declared)
202
+
203
+ 3. Format defaults → HTML: [:pre, :textarea, :script, :style], XML: []
204
+
205
+ 4. User blacklist → Remove from defaults/whitelist
206
+
207
+ 5. xml:space="preserve" → Element is sensitive
208
+
209
+ 6. xml:space="default" → Use steps 1-4
210
+ ----
211
+
212
+ ==== Format-specific defaults
213
+
214
+ **HTML**:: `[:pre, :textarea, :script, :style]` - These elements preserve whitespace by HTML specification
215
+ **XML**:: `[]` - No default whitespace-sensitive elements, purely user-controlled
216
+
217
+ ==== Examples
218
+
219
+ .Using xml:space attribute
220
+ [source,ruby]
221
+ ----
222
+ xml1 = '<root><code xml:space="preserve"> indented </code></root>'
223
+ xml2 = '<root><code xml:space="preserve">indented</code></root>'
224
+
225
+ # These are NOT equivalent (whitespace matters in xml:space="preserve")
226
+ Canon::Comparison.equivalent?(xml1, xml2,
227
+ match: { structural_whitespace: :strict }
228
+ )
229
+ # => false
230
+ ----
231
+
232
+ .Using whitelist
233
+ [source,ruby]
234
+ ----
235
+ # Make <p> elements whitespace-sensitive
236
+ Canon::Comparison.equivalent?(xml1, xml2,
237
+ match: {
238
+ structural_whitespace: :strict,
239
+ whitespace_sensitive_elements: [:p, :pre]
240
+ }
241
+ )
242
+ ----
243
+
244
+ .Overriding HTML defaults
245
+ [source,ruby]
246
+ ----
247
+ # Make <script> NOT whitespace-sensitive (override HTML default)
248
+ Canon::Comparison.equivalent?(html1, html2,
249
+ format: :html,
250
+ match: {
251
+ structural_whitespace: :strict,
252
+ whitespace_insensitive_elements: [:script]
253
+ }
254
+ )
255
+ ----
256
+
257
+ .Using text_content: :normalize with whitespace_insensitive_elements
258
+ [source,ruby]
259
+ ----
260
+ # HTML defaults: [:pre, :code, :textarea, :script, :style]
261
+ # Excluding :code means it's no longer whitespace-sensitive
262
+ html1 = '<root><pre> indented </pre><code> code </code></root>'
263
+ html2 = '<root><pre> indented </pre><code>code</code></root>'
264
+
265
+ # With :code blacklisted, whitespace in <code> is normalized (formatting-only)
266
+ # HTML uses text_content: :normalize by default
267
+ Canon::Comparison.equivalent?(html1, html2,
268
+ format: :html,
269
+ match: {
270
+ whitespace_insensitive_elements: [:code],
271
+ }
272
+ )
273
+ # => true (whitespace differences in <code> are formatting-only)
274
+
275
+ # Without blacklisting, <code> is sensitive (whitespace matters)
276
+ Canon::Comparison.equivalent?(html1, html2,
277
+ format: :html,
278
+ match: {
279
+ structural_whitespace: :strict,
280
+ }
281
+ )
282
+ # => false (whitespace in <code> is normative)
283
+ ----
284
+
285
+
286
+
66
287
  === attribute_whitespace
67
288
 
68
289
  **Applies to**: XML, HTML only
@@ -414,6 +635,23 @@ expect(actual).to be_xml_equivalent_to(expected,
414
635
  element_position: :ignore,
415
636
  element_hierarchy: :ignore
416
637
  )
638
+
639
+ # Element-level whitespace sensitivity
640
+ expect(actual).to be_xml_equivalent_to(expected,
641
+ match: { structural_whitespace: :strict }
642
+ )
643
+ .with_options(
644
+ whitespace_sensitive_elements: [:pre, :code, :sample],
645
+ respect_xml_space: true
646
+ )
647
+
648
+ # Override HTML default whitespace-sensitive elements
649
+ expect(html).to be_html_equivalent_to(expected,
650
+ match: { structural_whitespace: :strict }
651
+ )
652
+ .with_options(
653
+ whitespace_insensitive_elements: [:script, :style]
654
+ )
417
655
  ====
418
656
 
419
657
  == Comments dimension
@@ -72,7 +72,8 @@ module Canon
72
72
  return :json if trimmed.start_with?("{", "[")
73
73
 
74
74
  # HTML indicators
75
- return :html if trimmed.start_with?("<!DOCTYPE html", "<html", "<HTML")
75
+ return :html if trimmed.start_with?("<!DOCTYPE html", "<html",
76
+ "<HTML")
76
77
 
77
78
  # XML indicators - must start with < and end with >
78
79
  return :xml if trimmed.start_with?("<") && trimmed.end_with?(">")
@@ -13,6 +13,8 @@ require_relative "../diff/diff_classifier"
13
13
  require_relative "strategies/match_strategy_factory"
14
14
  require_relative "../html/data_model"
15
15
  require_relative "xml_node_comparison"
16
+ # Whitespace sensitivity module (single source of truth for sensitive elements)
17
+ require_relative "whitespace_sensitivity"
16
18
 
17
19
  module Canon
18
20
  module Comparison
@@ -542,16 +544,22 @@ compare_profile = nil)
542
544
  return if match_opts[:text_content] == :strict
543
545
 
544
546
  # Elements where whitespace is significant - don't normalize
545
- # Use profile if available, otherwise use default list
547
+ # SINGLE SOURCE OF TRUTH: WhitespaceSensitivity.format_default_sensitive_elements
548
+ # This ensures consistency between preprocessing and comparison logic
549
+ # SINGLE SOURCE OF TRUTH: WhitespaceSensitivity.format_default_sensitive_elements
550
+ # This ensures consistency between preprocessing and comparison logic
546
551
  preserve_whitespace = if compare_profile.is_a?(HtmlCompareProfile)
547
552
  # Profile handles HTML-specific whitespace rules
548
- %w[pre code textarea script
549
- style].select do |elem|
550
- compare_profile.preserve_whitespace?(elem)
551
- end
553
+ # Get default list and filter by profile
554
+ WhitespaceSensitivity
555
+ .format_default_sensitive_elements(match_opts)
556
+ .select do |elem|
557
+ compare_profile.preserve_whitespace?(elem.to_s)
558
+ end
559
+ .map(&:to_s)
552
560
  else
553
- # Fallback to default list
554
- %w[pre code textarea script style]
561
+ # Use default list from WhitespaceSensitivity (single source of truth)
562
+ WhitespaceSensitivity.format_default_sensitive_elements(match_opts).map(&:to_s)
555
563
  end
556
564
 
557
565
  # Walk all text nodes
@@ -607,9 +615,12 @@ compare_profile = nil)
607
615
  #
608
616
  # CRITICAL: Do NOT remove whitespace-only text nodes from whitespace-sensitive
609
617
  # elements like <pre>, <code>, <textarea>, <script>, <style>
618
+ #
619
+ # SINGLE SOURCE OF TRUTH: Uses WhitespaceSensitivity.format_default_sensitive_elements
610
620
  def remove_whitespace_only_text_nodes(doc)
611
621
  # Elements where whitespace is significant - don't remove whitespace-only nodes
612
- preserve_whitespace = %w[pre code textarea script style]
622
+ # SINGLE SOURCE OF TRUTH: WhitespaceSensitivity.format_default_sensitive_elements
623
+ preserve_whitespace = WhitespaceSensitivity.format_default_sensitive_elements(format: :html).map(&:to_s)
613
624
 
614
625
  doc.xpath(".//text()").each do |text_node|
615
626
  # CRITICAL: Skip if this text node is inside a whitespace-preserving element
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "compare_profile"
4
+ # Whitespace sensitivity module (single source of truth for sensitive elements)
5
+ require_relative "whitespace_sensitivity"
4
6
 
5
7
  module Canon
6
8
  module Comparison
@@ -82,9 +84,13 @@ module Canon
82
84
  private
83
85
 
84
86
  # Elements where whitespace is semantically significant in HTML
85
- # @return [Array<String>] List of element names
87
+ #
88
+ # SINGLE SOURCE OF TRUTH: Delegates to WhitespaceSensitivity.format_default_sensitive_elements
89
+ # This ensures consistency across the codebase.
90
+ #
91
+ # @return [Array<String>] List of element names (as strings)
86
92
  def whitespace_sensitive_elements
87
- %w[pre code textarea script style]
93
+ WhitespaceSensitivity.format_default_sensitive_elements(format: @html_version).map(&:to_s)
88
94
  end
89
95
 
90
96
  # Check if a dimension is explicitly set to :strict
@@ -239,9 +239,116 @@ module Canon
239
239
  # @param diff2 [Symbol] Difference type for node2
240
240
  # @param dimension [Symbol] The dimension of the difference
241
241
  # @return [String] Human-readable reason
242
- def build_difference_reason(_node1, _node2, diff1, diff2, dimension)
242
+ def build_difference_reason(node1, node2, diff1, diff2, dimension)
243
+ # For attribute presence differences, show what attributes differ
244
+ if dimension == :attribute_presence
245
+ attrs1 = extract_attributes(node1)
246
+ attrs2 = extract_attributes(node2)
247
+ return build_attribute_difference_reason(attrs1, attrs2)
248
+ end
249
+
250
+ # For text content differences, show the actual text (truncated if needed)
251
+ if dimension == :text_content
252
+ text1 = extract_text_content_from_node(node1)
253
+ text2 = extract_text_content_from_node(node2)
254
+ return build_text_difference_reason(text1, text2)
255
+ end
256
+
243
257
  # Default reason - can be overridden in subclasses
244
- "Difference in #{dimension}: #{diff1} vs #{diff2}"
258
+ "#{diff1} vs #{diff2}"
259
+ end
260
+
261
+ # Build a clear reason message for attribute presence differences
262
+ # Shows which attributes are only in node1, only in node2, or different values
263
+ #
264
+ # @param attrs1 [Hash, nil] First node's attributes
265
+ # @param attrs2 [Hash, nil] Second node's attributes
266
+ # @return [String] Clear explanation of the attribute difference
267
+ def build_attribute_difference_reason(attrs1, attrs2)
268
+ return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes" unless attrs1 && attrs2
269
+
270
+ require "set"
271
+ keys1 = attrs1.keys.to_set
272
+ keys2 = attrs2.keys.to_set
273
+
274
+ only_in_1 = keys1 - keys2
275
+ only_in_2 = keys2 - keys1
276
+ common = keys1 & keys2
277
+
278
+ # Check if values differ for common keys
279
+ different_values = common.reject { |k| attrs1[k] == attrs2[k] }
280
+
281
+ parts = []
282
+ parts << "only in first: #{only_in_1.to_a.sort.join(', ')}" if only_in_1.any?
283
+ parts << "only in second: #{only_in_2.to_a.sort.join(', ')}" if only_in_2.any?
284
+ parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
285
+
286
+ if parts.empty?
287
+ "#{keys1.size} vs #{keys2.size} attributes (same names)"
288
+ else
289
+ parts.join("; ")
290
+ end
291
+ end
292
+
293
+ # Extract text content from a node for diff reason
294
+ #
295
+ # @param node [Object, nil] Node to extract text from
296
+ # @return [String, nil] Text content or nil
297
+ def extract_text_content_from_node(node)
298
+ return nil if node.nil?
299
+
300
+ # For Canon::Xml::Nodes::TextNode
301
+ return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
302
+
303
+ # For XML/HTML nodes with text_content method
304
+ return node.text_content if node.respond_to?(:text_content)
305
+
306
+ # For nodes with text method
307
+ return node.text if node.respond_to?(:text)
308
+
309
+ # For nodes with content method (Moxml::Text)
310
+ return node.content if node.respond_to?(:content)
311
+
312
+ # For nodes with value method (other types)
313
+ return node.value if node.respond_to?(:value)
314
+
315
+ # For simple text nodes or strings
316
+ return node.to_s if node.is_a?(String)
317
+
318
+ # For other node types, try to_s
319
+ node.to_s
320
+ rescue StandardError
321
+ nil
322
+ end
323
+
324
+ # Build a clear reason message for text content differences
325
+ # Shows the actual text content (truncated if too long)
326
+ #
327
+ # @param text1 [String, nil] First text content
328
+ # @param text2 [String, nil] Second text content
329
+ # @return [String] Clear explanation of the text difference
330
+ def build_text_difference_reason(text1, text2)
331
+ # Handle nil cases
332
+ return "missing vs '#{truncate_text(text2)}'" if text1.nil? && text2
333
+ return "'#{truncate_text(text1)}' vs missing" if text1 && text2.nil?
334
+ return "both missing" if text1.nil? && text2.nil?
335
+
336
+ # Both have content - show truncated versions
337
+ "'#{truncate_text(text1)}' vs '#{truncate_text(text2)}'"
338
+ end
339
+
340
+ # Truncate text for display in reason messages
341
+ #
342
+ # @param text [String] Text to truncate
343
+ # @param max_length [Integer] Maximum length
344
+ # @return [String] Truncated text
345
+ def truncate_text(text, max_length = 40)
346
+ return "" if text.nil?
347
+
348
+ text = text.to_s
349
+ return text if text.length <= max_length
350
+
351
+ "#{text[0...max_length]}..."
245
352
  end
246
353
 
247
354
  # Serialize an element node to string
@@ -27,6 +27,9 @@ module Canon
27
27
  # Start with format-specific defaults
28
28
  options = format_defaults(format).dup
29
29
 
30
+ # Store format for later use (e.g., WhitespaceSensitivity needs it)
31
+ options[:format] = format
32
+
30
33
  # Apply global profile if specified
31
34
  if global_profile
32
35
  profile_opts = get_profile_options(global_profile)
@@ -111,12 +114,16 @@ module Canon
111
114
  def validate_match_options!(match_options)
112
115
  # Special options that don't need validation as dimensions
113
116
  special_options = %i[
117
+ format
114
118
  preprocessing
115
119
  semantic_diff
116
120
  similarity_threshold
117
121
  hash_matching
118
122
  similarity_matching
119
123
  propagation
124
+ whitespace_sensitive_elements
125
+ whitespace_insensitive_elements
126
+ respect_xml_space
120
127
  ]
121
128
 
122
129
  match_options.each do |dimension, behavior|