canon 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +69 -92
- data/README.adoc +13 -13
- data/docs/.lycheeignore +69 -0
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/advanced/extending-canon.adoc +193 -0
- data/docs/features/match-options/index.adoc +239 -1
- data/docs/internals/diffnode-enrichment.adoc +611 -0
- data/docs/internals/index.adoc +251 -0
- data/docs/lychee.toml +13 -6
- data/docs/understanding/architecture.adoc +749 -33
- data/docs/understanding/comparison-pipeline.adoc +122 -0
- data/lib/canon/cache.rb +129 -0
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
- data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
- data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
- data/lib/canon/comparison/dimensions/registry.rb +77 -0
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
- data/lib/canon/comparison/dimensions.rb +54 -0
- data/lib/canon/comparison/format_detector.rb +87 -0
- data/lib/canon/comparison/html_comparator.rb +70 -26
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/html_parser.rb +80 -0
- data/lib/canon/comparison/json_comparator.rb +12 -0
- data/lib/canon/comparison/json_parser.rb +19 -0
- data/lib/canon/comparison/markup_comparator.rb +293 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
- data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
- data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
- data/lib/canon/comparison/match_options.rb +68 -463
- data/lib/canon/comparison/profile_definition.rb +149 -0
- data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
- data/lib/canon/comparison/xml_comparator.rb +97 -684
- data/lib/canon/comparison/xml_node_comparison.rb +319 -0
- data/lib/canon/comparison/xml_parser.rb +19 -0
- data/lib/canon/comparison/yaml_comparator.rb +3 -3
- data/lib/canon/comparison.rb +265 -110
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/diff_node.rb +32 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/node_serializer.rb +191 -0
- data/lib/canon/diff/path_builder.rb +143 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
- data/lib/canon/diff_formatter.rb +1 -1
- data/lib/canon/rspec_matchers.rb +38 -9
- data/lib/canon/tree_diff/operation_converter.rb +92 -338
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +48 -2
|
@@ -229,6 +229,69 @@ result = Canon::Comparison.equivalent?(
|
|
|
229
229
|
----
|
|
230
230
|
====
|
|
231
231
|
|
|
232
|
+
==== Text Content
|
|
233
|
+
|
|
234
|
+
* **`:strict` behavior** → Normative
|
|
235
|
+
- Text must match exactly, including all whitespace
|
|
236
|
+
- Any text difference causes non-equivalence
|
|
237
|
+
|
|
238
|
+
* **`:normalize` behavior** → Normative (after normalization) or Informative (if formatting-only)
|
|
239
|
+
- Whitespace is normalized (collapsed/trimmed) before comparison
|
|
240
|
+
- If normalized texts match but originals differ, classified as formatting-only (informative)
|
|
241
|
+
- This ensures that whitespace-only differences don't affect equivalence
|
|
242
|
+
- Element-level sensitivity is respected (e.g., `<pre>`, `<code>` preserve whitespace)
|
|
243
|
+
|
|
244
|
+
* **`:ignore` behavior** → Informative
|
|
245
|
+
- Text content differences tracked but don't affect equivalence
|
|
246
|
+
|
|
247
|
+
.Example: Text content with normalize behavior
|
|
248
|
+
====
|
|
249
|
+
[source,ruby]
|
|
250
|
+
----
|
|
251
|
+
# Formatting-only difference - normalized texts match
|
|
252
|
+
xml1 = '<p>Hello world</p>'
|
|
253
|
+
xml2 = '<p>Hello world</p>'
|
|
254
|
+
|
|
255
|
+
result = Canon::Comparison.equivalent?(
|
|
256
|
+
xml1, xml2,
|
|
257
|
+
match: { text_content: :normalize }
|
|
258
|
+
)
|
|
259
|
+
# => true (extra space is formatting-only, classified as informative)
|
|
260
|
+
|
|
261
|
+
# Shows as informative in verbose output
|
|
262
|
+
result.differences.first.normative?
|
|
263
|
+
# => false
|
|
264
|
+
result.differences.first.formatting?
|
|
265
|
+
# => true
|
|
266
|
+
----
|
|
267
|
+
|
|
268
|
+
.Using text_content: :normalize with element-level sensitivity
|
|
269
|
+
====
|
|
270
|
+
[source,ruby]
|
|
271
|
+
----
|
|
272
|
+
# HTML defaults: <code> is whitespace-sensitive
|
|
273
|
+
html1 = '<code> indented </code><p> text </p>'
|
|
274
|
+
html2 = '<code>indented</code><p>text</p>'
|
|
275
|
+
|
|
276
|
+
# With <code> blacklisted from sensitive elements
|
|
277
|
+
Canon::Comparison.equivalent?(html1, html2,
|
|
278
|
+
format: :html,
|
|
279
|
+
match: {
|
|
280
|
+
whitespace_insensitive_elements: [:code],
|
|
281
|
+
}
|
|
282
|
+
)
|
|
283
|
+
# => true
|
|
284
|
+
# - <code> whitespace: formatting-only (informative)
|
|
285
|
+
# - <p> whitespace: formatting-only (informative)
|
|
286
|
+
|
|
287
|
+
# Without blacklisting (default HTML behavior)
|
|
288
|
+
Canon::Comparison.equivalent?(html1, html2, format: :html)
|
|
289
|
+
# => false
|
|
290
|
+
# - <code> whitespace: normative (sensitive element)
|
|
291
|
+
# - <p> whitespace: formatting-only (informative)
|
|
292
|
+
----
|
|
293
|
+
====
|
|
294
|
+
|
|
232
295
|
=== FormattingDetector Integration
|
|
233
296
|
|
|
234
297
|
For dimensions that support it (`:text_content`, `:structural_whitespace`),
|
|
@@ -262,12 +325,23 @@ The [`CompareProfile`](../../lib/canon/comparison/compare_profile.rb) class prov
|
|
|
262
325
|
* `affects_equivalence?(dimension)` - Does this dimension affect equivalence?
|
|
263
326
|
* `supports_formatting_detection?(dimension)` - Can this dimension have formatting-only diffs?
|
|
264
327
|
|
|
265
|
-
The [`DiffClassifier`](../../lib/canon/diff/diff_classifier.rb) uses CompareProfile to classify:
|
|
328
|
+
The [`DiffClassifier`](../../lib/canon/diff/diff_classifier.rb) uses CompareProfile to classify differences, with special handling for `text_content: :normalize`:
|
|
266
329
|
|
|
267
330
|
[source,ruby]
|
|
268
331
|
----
|
|
269
332
|
def classify(diff_node)
|
|
270
|
-
#
|
|
333
|
+
# SPECIAL CASE: text_content with :normalize behavior
|
|
334
|
+
# Formatting-only differences (whitespace-only) are marked as non-normative
|
|
335
|
+
if diff_node.dimension == :text_content &&
|
|
336
|
+
profile.send(:behavior_for, :text_content) == :normalize &&
|
|
337
|
+
!inside_whitespace_sensitive_element?(diff_node) &&
|
|
338
|
+
formatting_only_diff?(diff_node)
|
|
339
|
+
diff_node.formatting = true
|
|
340
|
+
diff_node.normative = false
|
|
341
|
+
return diff_node
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Standard classification flow
|
|
271
345
|
is_normative = profile.normative_dimension?(diff_node.dimension)
|
|
272
346
|
|
|
273
347
|
# Only check formatting for non-normative dimensions
|
|
@@ -284,6 +358,12 @@ def classify(diff_node)
|
|
|
284
358
|
end
|
|
285
359
|
----
|
|
286
360
|
|
|
361
|
+
The key distinction for `text_content: :normalize`:
|
|
362
|
+
|
|
363
|
+
* **Formatting-only detection**: Uses `normalized_equivalent?` method to compare normalized texts
|
|
364
|
+
* **Element sensitivity**: Respects element-level whitespace sensitivity (`<pre>`, `<code>`, etc.)
|
|
365
|
+
* **Result**: Whitespace-only differences are classified as *informative* (non-normative) when using `:normalize`
|
|
366
|
+
|
|
287
367
|
== Visual Indicators
|
|
288
368
|
|
|
289
369
|
=== Normative Diffs
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Extending Canon
|
|
3
|
+
parent: Advanced
|
|
4
|
+
nav_order: 8
|
|
5
|
+
---
|
|
6
|
+
= Extending Canon
|
|
7
|
+
|
|
8
|
+
== Purpose
|
|
9
|
+
|
|
10
|
+
This document explains how to extend Canon with custom functionality, including creating custom comparators, formatters, and adapters for different document formats.
|
|
11
|
+
|
|
12
|
+
== Overview
|
|
13
|
+
|
|
14
|
+
Canon is designed to be extensible at multiple layers:
|
|
15
|
+
|
|
16
|
+
* **Layer 1**: Custom preprocessing/normalization
|
|
17
|
+
* **Layer 2**: Custom comparison algorithms
|
|
18
|
+
* **Layer 3**: Custom match options and dimensions
|
|
19
|
+
* **Layer 4**: Custom diff formatters and renderers
|
|
20
|
+
|
|
21
|
+
== Adapter Pattern
|
|
22
|
+
|
|
23
|
+
Canon uses an adapter pattern to work with different parsing libraries (Nokogiri, Moxml, etc.).
|
|
24
|
+
|
|
25
|
+
=== Adapter Structure
|
|
26
|
+
|
|
27
|
+
[source,ruby]
|
|
28
|
+
----
|
|
29
|
+
module Canon
|
|
30
|
+
module Adapters
|
|
31
|
+
class NokogiriAdapter
|
|
32
|
+
def parse(input)
|
|
33
|
+
# Parse with Nokogiri
|
|
34
|
+
Nokogiri::XML(input)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def serialize(node)
|
|
38
|
+
# Serialize with Nokogiri
|
|
39
|
+
node.to_xml
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
----
|
|
45
|
+
|
|
46
|
+
=== Creating a Custom Adapter
|
|
47
|
+
|
|
48
|
+
To add support for a new parsing library:
|
|
49
|
+
|
|
50
|
+
1. Create an adapter class that implements `parse` and `serialize` methods
|
|
51
|
+
2. Register the adapter with the format system
|
|
52
|
+
3. Add tests for the new adapter
|
|
53
|
+
|
|
54
|
+
== Custom Comparators
|
|
55
|
+
|
|
56
|
+
=== Creating a Custom Comparison Algorithm
|
|
57
|
+
|
|
58
|
+
[source,ruby]
|
|
59
|
+
----
|
|
60
|
+
module Canon
|
|
61
|
+
module Comparison
|
|
62
|
+
class CustomComparator < BaseComparator
|
|
63
|
+
def compare(node1, node2, opts)
|
|
64
|
+
# Your comparison logic here
|
|
65
|
+
differences = []
|
|
66
|
+
|
|
67
|
+
# Your algorithm implementation
|
|
68
|
+
|
|
69
|
+
differences
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
----
|
|
75
|
+
|
|
76
|
+
=== Registering Your Algorithm
|
|
77
|
+
|
|
78
|
+
[source,ruby]
|
|
79
|
+
----
|
|
80
|
+
Canon::Comparison.register_algorithm(:custom, CustomComparator)
|
|
81
|
+
----
|
|
82
|
+
|
|
83
|
+
Then use it:
|
|
84
|
+
|
|
85
|
+
[source,ruby]
|
|
86
|
+
----
|
|
87
|
+
Canon::Comparison.equivalent?(doc1, doc2, diff_algorithm: :custom)
|
|
88
|
+
----
|
|
89
|
+
|
|
90
|
+
== Custom Formatters
|
|
91
|
+
|
|
92
|
+
=== Creating a Custom Diff Formatter
|
|
93
|
+
|
|
94
|
+
[source,ruby]
|
|
95
|
+
----
|
|
96
|
+
module Canon
|
|
97
|
+
class DiffFormatter
|
|
98
|
+
class CustomFormatter
|
|
99
|
+
def format(differences, opts)
|
|
100
|
+
# Your formatting logic here
|
|
101
|
+
formatted_output = ""
|
|
102
|
+
|
|
103
|
+
differences.each do |diff|
|
|
104
|
+
formatted_output += format_difference(diff, opts)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
formatted_output
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
----
|
|
113
|
+
|
|
114
|
+
=== Using Your Formatter
|
|
115
|
+
|
|
116
|
+
[source,ruby]
|
|
117
|
+
----
|
|
118
|
+
result = Canon::Comparison.equivalent?(doc1, doc2, verbose: true)
|
|
119
|
+
|
|
120
|
+
formatter = Canon::DiffFormatter::CustomFormatter.new
|
|
121
|
+
output = formatter.format(result.differences, use_color: true)
|
|
122
|
+
puts output
|
|
123
|
+
----
|
|
124
|
+
|
|
125
|
+
== Custom Match Options
|
|
126
|
+
|
|
127
|
+
=== Defining Custom Dimensions
|
|
128
|
+
|
|
129
|
+
[source,ruby]
|
|
130
|
+
----
|
|
131
|
+
module Canon
|
|
132
|
+
module Comparison
|
|
133
|
+
class CustomDimension
|
|
134
|
+
def self.key
|
|
135
|
+
:custom_dimension
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def self.compare(node1, node2, behavior, opts)
|
|
139
|
+
# Your comparison logic for this dimension
|
|
140
|
+
case behavior
|
|
141
|
+
when :strict
|
|
142
|
+
node1 == node2
|
|
143
|
+
when :normalize
|
|
144
|
+
normalize(node1) == normalize(node2)
|
|
145
|
+
when :ignore
|
|
146
|
+
true
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
----
|
|
153
|
+
|
|
154
|
+
Register your dimension:
|
|
155
|
+
|
|
156
|
+
[source,ruby]
|
|
157
|
+
----
|
|
158
|
+
Canon::Comparison.register_dimension(CustomDimension)
|
|
159
|
+
----
|
|
160
|
+
|
|
161
|
+
== Best Practices
|
|
162
|
+
|
|
163
|
+
=== Testing Your Extensions
|
|
164
|
+
|
|
165
|
+
1. Write comprehensive tests for your extensions
|
|
166
|
+
2. Use the existing test helpers and fixtures
|
|
167
|
+
3. Test edge cases and error conditions
|
|
168
|
+
|
|
169
|
+
=== Performance Considerations
|
|
170
|
+
|
|
171
|
+
1. Cache expensive computations
|
|
172
|
+
2. Use lazy evaluation where appropriate
|
|
173
|
+
3. Avoid unnecessary node cloning
|
|
174
|
+
|
|
175
|
+
=== Error Handling
|
|
176
|
+
|
|
177
|
+
1. Provide clear error messages
|
|
178
|
+
2. Use Canon's error classes consistently
|
|
179
|
+
3. Document error conditions
|
|
180
|
+
|
|
181
|
+
== Examples
|
|
182
|
+
|
|
183
|
+
See the source code for examples of:
|
|
184
|
+
|
|
185
|
+
* link:xml-comparator[DOM Comparator implementation]
|
|
186
|
+
* link:semantic-tree-diff-internals[Semantic Diff implementation]
|
|
187
|
+
* link:diff-formatting/[Diff Formatter implementations]
|
|
188
|
+
|
|
189
|
+
== See Also
|
|
190
|
+
|
|
191
|
+
* link:../understanding/architecture.adoc[Architecture] - 4-layer architecture overview
|
|
192
|
+
* link:../features/diff-formatting/[Diff Formatting] - Layer 4 rendering options
|
|
193
|
+
* link:diff-pipeline[Comparison Pipeline] - Technical pipeline details
|
|
@@ -45,10 +45,37 @@ Match dimensions are orthogonal aspects that can be configured independently.
|
|
|
45
45
|
|
|
46
46
|
`:strict`:: Text must match exactly, character-for-character including all whitespace
|
|
47
47
|
|
|
48
|
-
`:normalize`:: Whitespace is normalized (collapsed/trimmed) before comparison
|
|
48
|
+
`:normalize`:: Whitespace is normalized (collapsed/trimmed) before comparison.
|
|
49
|
+
Formatting-only differences (e.g., extra spaces around text) are classified as
|
|
50
|
+
*informative* rather than normative. This means documents with only whitespace
|
|
51
|
+
differences in text content are considered equivalent.
|
|
49
52
|
|
|
50
53
|
`:ignore`:: Text content is completely ignored in comparison
|
|
51
54
|
|
|
55
|
+
.Using text_content: :normalize
|
|
56
|
+
[example]
|
|
57
|
+
====
|
|
58
|
+
[source,ruby]
|
|
59
|
+
----
|
|
60
|
+
# These are equivalent with :normalize
|
|
61
|
+
# Whitespace differences are formatting-only (informative)
|
|
62
|
+
Canon.equivalent?(
|
|
63
|
+
'<p> text </p>',
|
|
64
|
+
'<p>text</p>',
|
|
65
|
+
match: { text_content: :normalize }
|
|
66
|
+
)
|
|
67
|
+
# => true
|
|
68
|
+
|
|
69
|
+
# These differ in :strict mode
|
|
70
|
+
Canon.equivalent?(
|
|
71
|
+
'<p> text </p>',
|
|
72
|
+
'<p>text</p>',
|
|
73
|
+
match: { text_content: :strict }
|
|
74
|
+
)
|
|
75
|
+
# => false
|
|
76
|
+
----
|
|
77
|
+
====
|
|
78
|
+
|
|
52
79
|
=== structural_whitespace
|
|
53
80
|
|
|
54
81
|
**Applies to**: All formats
|
|
@@ -63,6 +90,200 @@ Match dimensions are orthogonal aspects that can be configured independently.
|
|
|
63
90
|
|
|
64
91
|
`:ignore`:: Structural whitespace is completely ignored
|
|
65
92
|
|
|
93
|
+
|
|
94
|
+
=== Whitespace sensitivity at element level
|
|
95
|
+
|
|
96
|
+
==== General
|
|
97
|
+
|
|
98
|
+
In XML, whitespace sensitivity can vary by schema and element:
|
|
99
|
+
|
|
100
|
+
* Elements that apply `xml:space="preserve"` are whitespace-sensitive.
|
|
101
|
+
|
|
102
|
+
* Other elements may be defined as sensitive by schema (e.g.
|
|
103
|
+
`xs:space="preserve"` in XML Schema) or unannounced conventions, such as
|
|
104
|
+
for mixed content.
|
|
105
|
+
|
|
106
|
+
In HTML, elements like `<pre>` and `<code>` preserve whitespace, while others
|
|
107
|
+
like `<div>` and `<p>` do not.
|
|
108
|
+
|
|
109
|
+
In the unannounced cases, the developer must indicate which elements are
|
|
110
|
+
whitespace-sensitive.
|
|
111
|
+
|
|
112
|
+
In Canon, you can control whitespace sensitivity at the element level using
|
|
113
|
+
`structural_whitespace: :strict` or `text_content: :normalize`.
|
|
114
|
+
|
|
115
|
+
Element-level sensitivity controls both:
|
|
116
|
+
|
|
117
|
+
* `structural_whitespace`: Whether whitespace between elements in the element is
|
|
118
|
+
preserved
|
|
119
|
+
|
|
120
|
+
* `text_content`: Whether whitespace within text nodes of the element is
|
|
121
|
+
normalized
|
|
122
|
+
|
|
123
|
+
Options for controlling element-level sensitivity include:
|
|
124
|
+
|
|
125
|
+
* **xml:space attribute** - XML standard for declaring whitespace sensitivity in documents
|
|
126
|
+
* **whitelist/blacklist options** - User-specified element lists
|
|
127
|
+
* **Format defaults** - HTML has built-in sensitive elements
|
|
128
|
+
* **respect_xml_space option** - Control whether xml:space is honored
|
|
129
|
+
|
|
130
|
+
For elements marked as sensitive, whitespace differences are always normative.
|
|
131
|
+
|
|
132
|
+
For non-sensitive elements using `text_content: :normalize`, whitespace
|
|
133
|
+
differences are classified as formatting-only (informative).
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
==== xml:space attribute support
|
|
137
|
+
|
|
138
|
+
The `xml:space` attribute is the XML standard way to declare whitespace
|
|
139
|
+
sensitivity in XML instance documents:
|
|
140
|
+
|
|
141
|
+
[source,xml]
|
|
142
|
+
----
|
|
143
|
+
<!-- Preserve whitespace in this element -->
|
|
144
|
+
<code xml:space="preserve">
|
|
145
|
+
Indentation and newlines matter here
|
|
146
|
+
</code>
|
|
147
|
+
|
|
148
|
+
<!-- Use default behavior -->
|
|
149
|
+
<text xml:space="default">
|
|
150
|
+
Whitespace handling follows configured behavior
|
|
151
|
+
</text>
|
|
152
|
+
----
|
|
153
|
+
|
|
154
|
+
==== Whitelist and blacklist options
|
|
155
|
+
|
|
156
|
+
You can explicitly specify which elements are whitespace-sensitive:
|
|
157
|
+
|
|
158
|
+
[source,ruby]
|
|
159
|
+
----
|
|
160
|
+
# Specify elements that preserve whitespace
|
|
161
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
162
|
+
match: {
|
|
163
|
+
structural_whitespace: :strict,
|
|
164
|
+
whitespace_sensitive_elements: [:pre, :code, :sample],
|
|
165
|
+
whitespace_insensitive_elements: [:p, :div] # Override defaults/whitelist
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
----
|
|
169
|
+
|
|
170
|
+
==== respect_xml_space option
|
|
171
|
+
|
|
172
|
+
Control whether xml:space attributes in the document are honored:
|
|
173
|
+
|
|
174
|
+
[source,ruby]
|
|
175
|
+
----
|
|
176
|
+
# Honor xml:space (default)
|
|
177
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
178
|
+
match: {
|
|
179
|
+
structural_whitespace: :strict,
|
|
180
|
+
respect_xml_space: true # Use xml:space attributes in document
|
|
181
|
+
}
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Ignore xml:space, use only user configuration
|
|
185
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
186
|
+
match: {
|
|
187
|
+
structural_whitespace: :strict,
|
|
188
|
+
respect_xml_space: false # Override document declarations
|
|
189
|
+
}
|
|
190
|
+
)
|
|
191
|
+
----
|
|
192
|
+
|
|
193
|
+
==== Priority order
|
|
194
|
+
|
|
195
|
+
When determining if an element is whitespace-sensitive, Canon uses this priority:
|
|
196
|
+
|
|
197
|
+
[source]
|
|
198
|
+
----
|
|
199
|
+
1. respect_xml_space: false → User config only (ignore xml:space)
|
|
200
|
+
↓
|
|
201
|
+
2. User whitelist → Use whitelist (user explicitly declared)
|
|
202
|
+
↓
|
|
203
|
+
3. Format defaults → HTML: [:pre, :textarea, :script, :style], XML: []
|
|
204
|
+
↓
|
|
205
|
+
4. User blacklist → Remove from defaults/whitelist
|
|
206
|
+
↓
|
|
207
|
+
5. xml:space="preserve" → Element is sensitive
|
|
208
|
+
↓
|
|
209
|
+
6. xml:space="default" → Use steps 1-4
|
|
210
|
+
----
|
|
211
|
+
|
|
212
|
+
==== Format-specific defaults
|
|
213
|
+
|
|
214
|
+
**HTML**:: `[:pre, :textarea, :script, :style]` - These elements preserve whitespace by HTML specification
|
|
215
|
+
**XML**:: `[]` - No default whitespace-sensitive elements, purely user-controlled
|
|
216
|
+
|
|
217
|
+
==== Examples
|
|
218
|
+
|
|
219
|
+
.Using xml:space attribute
|
|
220
|
+
[source,ruby]
|
|
221
|
+
----
|
|
222
|
+
xml1 = '<root><code xml:space="preserve"> indented </code></root>'
|
|
223
|
+
xml2 = '<root><code xml:space="preserve">indented</code></root>'
|
|
224
|
+
|
|
225
|
+
# These are NOT equivalent (whitespace matters in xml:space="preserve")
|
|
226
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
227
|
+
match: { structural_whitespace: :strict }
|
|
228
|
+
)
|
|
229
|
+
# => false
|
|
230
|
+
----
|
|
231
|
+
|
|
232
|
+
.Using whitelist
|
|
233
|
+
[source,ruby]
|
|
234
|
+
----
|
|
235
|
+
# Make <p> elements whitespace-sensitive
|
|
236
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
237
|
+
match: {
|
|
238
|
+
structural_whitespace: :strict,
|
|
239
|
+
whitespace_sensitive_elements: [:p, :pre]
|
|
240
|
+
}
|
|
241
|
+
)
|
|
242
|
+
----
|
|
243
|
+
|
|
244
|
+
.Overriding HTML defaults
|
|
245
|
+
[source,ruby]
|
|
246
|
+
----
|
|
247
|
+
# Make <script> NOT whitespace-sensitive (override HTML default)
|
|
248
|
+
Canon::Comparison.equivalent?(html1, html2,
|
|
249
|
+
format: :html,
|
|
250
|
+
match: {
|
|
251
|
+
structural_whitespace: :strict,
|
|
252
|
+
whitespace_insensitive_elements: [:script]
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
----
|
|
256
|
+
|
|
257
|
+
.Using text_content: :normalize with whitespace_insensitive_elements
|
|
258
|
+
[source,ruby]
|
|
259
|
+
----
|
|
260
|
+
# HTML defaults: [:pre, :code, :textarea, :script, :style]
|
|
261
|
+
# Excluding :code means it's no longer whitespace-sensitive
|
|
262
|
+
html1 = '<root><pre> indented </pre><code> code </code></root>'
|
|
263
|
+
html2 = '<root><pre> indented </pre><code>code</code></root>'
|
|
264
|
+
|
|
265
|
+
# With :code blacklisted, whitespace in <code> is normalized (formatting-only)
|
|
266
|
+
# HTML uses text_content: :normalize by default
|
|
267
|
+
Canon::Comparison.equivalent?(html1, html2,
|
|
268
|
+
format: :html,
|
|
269
|
+
match: {
|
|
270
|
+
whitespace_insensitive_elements: [:code],
|
|
271
|
+
}
|
|
272
|
+
)
|
|
273
|
+
# => true (whitespace differences in <code> are formatting-only)
|
|
274
|
+
|
|
275
|
+
# Without blacklisting, <code> is sensitive (whitespace matters)
|
|
276
|
+
Canon::Comparison.equivalent?(html1, html2,
|
|
277
|
+
format: :html,
|
|
278
|
+
match: {
|
|
279
|
+
structural_whitespace: :strict,
|
|
280
|
+
}
|
|
281
|
+
)
|
|
282
|
+
# => false (whitespace in <code> is normative)
|
|
283
|
+
----
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
|
|
66
287
|
=== attribute_whitespace
|
|
67
288
|
|
|
68
289
|
**Applies to**: XML, HTML only
|
|
@@ -414,6 +635,23 @@ expect(actual).to be_xml_equivalent_to(expected,
|
|
|
414
635
|
element_position: :ignore,
|
|
415
636
|
element_hierarchy: :ignore
|
|
416
637
|
)
|
|
638
|
+
|
|
639
|
+
# Element-level whitespace sensitivity
|
|
640
|
+
expect(actual).to be_xml_equivalent_to(expected,
|
|
641
|
+
match: { structural_whitespace: :strict }
|
|
642
|
+
)
|
|
643
|
+
.with_options(
|
|
644
|
+
whitespace_sensitive_elements: [:pre, :code, :sample],
|
|
645
|
+
respect_xml_space: true
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
# Override HTML default whitespace-sensitive elements
|
|
649
|
+
expect(html).to be_html_equivalent_to(expected,
|
|
650
|
+
match: { structural_whitespace: :strict }
|
|
651
|
+
)
|
|
652
|
+
.with_options(
|
|
653
|
+
whitespace_insensitive_elements: [:script, :style]
|
|
654
|
+
)
|
|
417
655
|
====
|
|
418
656
|
|
|
419
657
|
== Comments dimension
|