canon 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +83 -22
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +196 -24
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/markup_comparator.rb +109 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +240 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +119 -5
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +4 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
data/old-docs/MATCH_OPTIONS.adoc
DELETED
|
@@ -1,912 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
layout: default
|
|
3
|
-
title: Match Options
|
|
4
|
-
nav_order: 30
|
|
5
|
-
parent: Customizing Behavior
|
|
6
|
-
---
|
|
7
|
-
= Match options
|
|
8
|
-
:toc:
|
|
9
|
-
:toclevels: 3
|
|
10
|
-
|
|
11
|
-
== Scope
|
|
12
|
-
|
|
13
|
-
This document provides a complete reference for Canon's match options,
|
|
14
|
-
including match dimensions, behaviors, and predefined profiles.
|
|
15
|
-
|
|
16
|
-
Match options control Phase 2 (semantic matching) of Canon's comparison
|
|
17
|
-
architecture. See link:MATCH_ARCHITECTURE[Match architecture] for the
|
|
18
|
-
complete flow.
|
|
19
|
-
|
|
20
|
-
== General
|
|
21
|
-
|
|
22
|
-
Match options control which aspects of documents are compared and how
|
|
23
|
-
strictly they are compared. Canon provides:
|
|
24
|
-
|
|
25
|
-
* **Match dimensions**: Independent aspects of documents (text, whitespace,
|
|
26
|
-
attributes, etc.)
|
|
27
|
-
* **Dimension behaviors**: How each dimension is compared (`:strict`,
|
|
28
|
-
`:normalize`, `:ignore`)
|
|
29
|
-
* **Match profiles**: Predefined combinations for common scenarios
|
|
30
|
-
|
|
31
|
-
== Match dimensions
|
|
32
|
-
|
|
33
|
-
Match dimensions are orthogonal aspects that can be configured independently.
|
|
34
|
-
|
|
35
|
-
=== text_content
|
|
36
|
-
|
|
37
|
-
**Applies to**: All formats
|
|
38
|
-
|
|
39
|
-
**Purpose**: Controls how text content within elements/values is compared.
|
|
40
|
-
|
|
41
|
-
**Behaviors**:
|
|
42
|
-
|
|
43
|
-
`:strict`:: Text must match exactly, character-for-character including all
|
|
44
|
-
whitespace
|
|
45
|
-
|
|
46
|
-
`:normalize`:: Whitespace is normalized (collapsed/trimmed) before comparison
|
|
47
|
-
|
|
48
|
-
`:ignore`:: Text content is completely ignored in comparison
|
|
49
|
-
|
|
50
|
-
.text_content examples
|
|
51
|
-
[example]
|
|
52
|
-
====
|
|
53
|
-
**Input**:
|
|
54
|
-
|
|
55
|
-
[source,xml]
|
|
56
|
-
----
|
|
57
|
-
<!-- File 1 -->
|
|
58
|
-
<message>Hello World</message>
|
|
59
|
-
|
|
60
|
-
<!-- File 2 -->
|
|
61
|
-
<message>Hello World</message>
|
|
62
|
-
----
|
|
63
|
-
|
|
64
|
-
**Results**:
|
|
65
|
-
|
|
66
|
-
* `:strict` → Different (whitespace differs: 3 spaces vs 1 space)
|
|
67
|
-
* `:normalize` → Equivalent (both normalize to "Hello World")
|
|
68
|
-
* `:ignore` → Equivalent (text content ignored, structure matches)
|
|
69
|
-
====
|
|
70
|
-
|
|
71
|
-
=== structural_whitespace
|
|
72
|
-
|
|
73
|
-
**Applies to**: All formats
|
|
74
|
-
|
|
75
|
-
**Purpose**: Controls how whitespace between elements (indentation, newlines)
|
|
76
|
-
is handled.
|
|
77
|
-
|
|
78
|
-
**Behaviors**:
|
|
79
|
-
|
|
80
|
-
`:strict`:: All structural whitespace must match exactly
|
|
81
|
-
|
|
82
|
-
`:normalize`:: Structural whitespace is normalized
|
|
83
|
-
|
|
84
|
-
`:ignore`:: Structural whitespace is completely ignored
|
|
85
|
-
|
|
86
|
-
.structural_whitespace examples
|
|
87
|
-
[example]
|
|
88
|
-
====
|
|
89
|
-
**Input**:
|
|
90
|
-
|
|
91
|
-
[source,xml]
|
|
92
|
-
----
|
|
93
|
-
<!-- File 1 -->
|
|
94
|
-
<root>
|
|
95
|
-
<item>A</item>
|
|
96
|
-
<item>B</item>
|
|
97
|
-
</root>
|
|
98
|
-
|
|
99
|
-
<!-- File 2 -->
|
|
100
|
-
<root><item>A</item><item>B</item></root>
|
|
101
|
-
----
|
|
102
|
-
|
|
103
|
-
**Results**:
|
|
104
|
-
|
|
105
|
-
* `:strict` → Different (indentation and newlines differ)
|
|
106
|
-
* `:normalize` → Equivalent (whitespace between elements normalized)
|
|
107
|
-
* `:ignore` → Equivalent (only element structure compared)
|
|
108
|
-
====
|
|
109
|
-
|
|
110
|
-
=== attribute_whitespace
|
|
111
|
-
|
|
112
|
-
**Applies to**: XML, HTML only
|
|
113
|
-
|
|
114
|
-
**Purpose**: Controls how whitespace in attribute values is handled.
|
|
115
|
-
|
|
116
|
-
**Behaviors**:
|
|
117
|
-
|
|
118
|
-
`:strict`:: Attribute value whitespace must match exactly
|
|
119
|
-
|
|
120
|
-
`:normalize`:: Whitespace in attribute values is normalized
|
|
121
|
-
|
|
122
|
-
`:ignore`:: Whitespace in attribute values is ignored
|
|
123
|
-
|
|
124
|
-
.attribute_whitespace examples
|
|
125
|
-
[example]
|
|
126
|
-
====
|
|
127
|
-
**Input**:
|
|
128
|
-
|
|
129
|
-
[source,xml]
|
|
130
|
-
----
|
|
131
|
-
<!-- File 1 -->
|
|
132
|
-
<div class="item active">Content</div>
|
|
133
|
-
|
|
134
|
-
<!-- File 2 -->
|
|
135
|
-
<div class="item active">Content</div>
|
|
136
|
-
----
|
|
137
|
-
|
|
138
|
-
**Results**:
|
|
139
|
-
|
|
140
|
-
* `:strict` → Different (2 spaces vs 1 space)
|
|
141
|
-
* `:normalize` → Equivalent ("item active" normalizes to "item active")
|
|
142
|
-
* `:ignore` → Equivalent (only attribute presence compared)
|
|
143
|
-
|
|
144
|
-
**HTML `class` attribute special handling**:
|
|
145
|
-
|
|
146
|
-
HTML's `class` attribute is space-separated, so normalization is particularly
|
|
147
|
-
useful:
|
|
148
|
-
|
|
149
|
-
[source,html]
|
|
150
|
-
----
|
|
151
|
-
<!-- These are equivalent with :normalize -->
|
|
152
|
-
<div class="btn primary active">Click</div>
|
|
153
|
-
<div class="btn primary active">Click</div>
|
|
154
|
-
----
|
|
155
|
-
====
|
|
156
|
-
|
|
157
|
-
=== attribute_order
|
|
158
|
-
|
|
159
|
-
**Applies to**: XML, HTML only
|
|
160
|
-
|
|
161
|
-
**Purpose**: Controls whether attribute order matters.
|
|
162
|
-
|
|
163
|
-
**Behaviors**:
|
|
164
|
-
|
|
165
|
-
`:strict`:: Attributes must appear in the same order
|
|
166
|
-
|
|
167
|
-
`:ignore`:: Attribute order doesn't matter (set-based comparison)
|
|
168
|
-
|
|
169
|
-
.attribute_order examples
|
|
170
|
-
[example]
|
|
171
|
-
====
|
|
172
|
-
**Input**:
|
|
173
|
-
|
|
174
|
-
[source,xml]
|
|
175
|
-
----
|
|
176
|
-
<!-- File 1 -->
|
|
177
|
-
<element id="123" class="active" data-value="test"/>
|
|
178
|
-
|
|
179
|
-
<!-- File 2 -->
|
|
180
|
-
<element class="active" data-value="test" id="123"/>
|
|
181
|
-
----
|
|
182
|
-
|
|
183
|
-
**Results**:
|
|
184
|
-
|
|
185
|
-
* `:strict` → Different (attribute order differs)
|
|
186
|
-
* `:ignore` → Equivalent (same attributes present, unordered comparison)
|
|
187
|
-
|
|
188
|
-
**HTML default**:
|
|
189
|
-
|
|
190
|
-
HTML attributes are inherently unordered per the HTML specification, so the
|
|
191
|
-
default for HTML is `:ignore`:
|
|
192
|
-
|
|
193
|
-
[source,html]
|
|
194
|
-
----
|
|
195
|
-
<!-- These are always equivalent for HTML -->
|
|
196
|
-
<input type="text" id="name" class="form-control">
|
|
197
|
-
<input class="form-control" id="name" type="text">
|
|
198
|
-
----
|
|
199
|
-
====
|
|
200
|
-
|
|
201
|
-
=== attribute_values
|
|
202
|
-
|
|
203
|
-
**Applies to**: XML, HTML only
|
|
204
|
-
|
|
205
|
-
**Purpose**: Controls how attribute values are compared.
|
|
206
|
-
|
|
207
|
-
**Behaviors**:
|
|
208
|
-
|
|
209
|
-
`:strict`:: Attribute values must match exactly
|
|
210
|
-
|
|
211
|
-
`:normalize`:: Whitespace in values is normalized
|
|
212
|
-
|
|
213
|
-
`:ignore`:: Only attribute presence is checked, values ignored
|
|
214
|
-
|
|
215
|
-
.attribute_values examples
|
|
216
|
-
[example]
|
|
217
|
-
====
|
|
218
|
-
**Input**:
|
|
219
|
-
|
|
220
|
-
[source,xml]
|
|
221
|
-
----
|
|
222
|
-
<!-- File 1 -->
|
|
223
|
-
<element id="123" class="normative"/>
|
|
224
|
-
|
|
225
|
-
<!-- File 2 -->
|
|
226
|
-
<element id="456" class="informative"/>
|
|
227
|
-
----
|
|
228
|
-
|
|
229
|
-
**Results**:
|
|
230
|
-
|
|
231
|
-
* `:strict` → Different (attribute values differ)
|
|
232
|
-
* `:normalize` → Different (values still differ after normalization)
|
|
233
|
-
* `:ignore` → Equivalent (both have `id` and `class` attributes, values
|
|
234
|
-
ignored)
|
|
235
|
-
|
|
236
|
-
**Use case**: Useful when you want to verify that certain attributes exist
|
|
237
|
-
but don't care about their specific values (e.g., testing that generated IDs
|
|
238
|
-
are present).
|
|
239
|
-
====
|
|
240
|
-
|
|
241
|
-
=== key_order
|
|
242
|
-
|
|
243
|
-
**Applies to**: JSON, YAML only
|
|
244
|
-
|
|
245
|
-
**Purpose**: Controls whether object key order matters.
|
|
246
|
-
|
|
247
|
-
**Behaviors**:
|
|
248
|
-
|
|
249
|
-
`:strict`:: Keys must appear in the same order
|
|
250
|
-
|
|
251
|
-
`:ignore`:: Key order doesn't matter (unordered comparison)
|
|
252
|
-
|
|
253
|
-
.key_order examples
|
|
254
|
-
[example]
|
|
255
|
-
====
|
|
256
|
-
**JSON input**:
|
|
257
|
-
|
|
258
|
-
[source,json]
|
|
259
|
-
----
|
|
260
|
-
// File 1
|
|
261
|
-
{
|
|
262
|
-
"name": "John",
|
|
263
|
-
"age": 30,
|
|
264
|
-
"city": "NYC"
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
// File 2
|
|
268
|
-
{
|
|
269
|
-
"city": "NYC",
|
|
270
|
-
"name": "John",
|
|
271
|
-
"age": 30
|
|
272
|
-
}
|
|
273
|
-
----
|
|
274
|
-
|
|
275
|
-
**Results**:
|
|
276
|
-
|
|
277
|
-
* `:strict` → Different (key order differs)
|
|
278
|
-
* `:ignore` → Equivalent (same keys and values, unordered)
|
|
279
|
-
|
|
280
|
-
**YAML input**:
|
|
281
|
-
|
|
282
|
-
[source,yaml]
|
|
283
|
-
----
|
|
284
|
-
# File 1
|
|
285
|
-
name: John
|
|
286
|
-
age: 30
|
|
287
|
-
city: NYC
|
|
288
|
-
|
|
289
|
-
# File 2
|
|
290
|
-
city: NYC
|
|
291
|
-
name: John
|
|
292
|
-
age: 30
|
|
293
|
-
----
|
|
294
|
-
|
|
295
|
-
**Results**:
|
|
296
|
-
|
|
297
|
-
* `:strict` → Different (key order differs)
|
|
298
|
-
* `:ignore` → Equivalent (same structure and values)
|
|
299
|
-
====
|
|
300
|
-
|
|
301
|
-
=== comments
|
|
302
|
-
|
|
303
|
-
**Applies to**: XML, HTML, YAML (JSON doesn't support comments in standard
|
|
304
|
-
spec)
|
|
305
|
-
|
|
306
|
-
**Purpose**: Controls how comments are compared.
|
|
307
|
-
|
|
308
|
-
**Behaviors**:
|
|
309
|
-
|
|
310
|
-
`:strict`:: Comments must match exactly (including whitespace)
|
|
311
|
-
|
|
312
|
-
`:normalize`:: Whitespace in comments is normalized
|
|
313
|
-
|
|
314
|
-
`:ignore`:: Comments are completely ignored
|
|
315
|
-
|
|
316
|
-
.comments examples
|
|
317
|
-
[example]
|
|
318
|
-
====
|
|
319
|
-
**XML input**:
|
|
320
|
-
|
|
321
|
-
[source,xml]
|
|
322
|
-
----
|
|
323
|
-
<!-- File 1 -->
|
|
324
|
-
<root>
|
|
325
|
-
<!-- This is a comment -->
|
|
326
|
-
<element>Value</element>
|
|
327
|
-
</root>
|
|
328
|
-
|
|
329
|
-
<!-- File 2 -->
|
|
330
|
-
<root>
|
|
331
|
-
<element>Value</element>
|
|
332
|
-
</root>
|
|
333
|
-
----
|
|
334
|
-
|
|
335
|
-
**Results**:
|
|
336
|
-
|
|
337
|
-
* `:strict` → Different (File 1 has a comment, File 2 doesn't)
|
|
338
|
-
* `:normalize` → Different (still different, comment present vs absent)
|
|
339
|
-
* `:ignore` → Equivalent (comments ignored, structure matches)
|
|
340
|
-
|
|
341
|
-
**YAML input**:
|
|
342
|
-
|
|
343
|
-
[source,yaml]
|
|
344
|
-
----
|
|
345
|
-
# File 1
|
|
346
|
-
# Configuration file
|
|
347
|
-
name: test
|
|
348
|
-
# Database settings
|
|
349
|
-
database: prod
|
|
350
|
-
|
|
351
|
-
# File 2
|
|
352
|
-
name: test
|
|
353
|
-
database: prod
|
|
354
|
-
----
|
|
355
|
-
|
|
356
|
-
**Results**:
|
|
357
|
-
|
|
358
|
-
* `:strict` → Different (comments differ)
|
|
359
|
-
* `:normalize` → Different (comments still differ)
|
|
360
|
-
* `:ignore` → Equivalent (comments ignored)
|
|
361
|
-
====
|
|
362
|
-
|
|
363
|
-
=== element_structure
|
|
364
|
-
|
|
365
|
-
**Applies to**: All formats (primarily used with semantic diff algorithm)
|
|
366
|
-
|
|
367
|
-
**Purpose**: Controls how element/node type changes are handled during
|
|
368
|
-
semantic tree comparison.
|
|
369
|
-
|
|
370
|
-
**Behaviors**:
|
|
371
|
-
|
|
372
|
-
`:strict`:: Element type changes are treated as differences
|
|
373
|
-
|
|
374
|
-
`:ignore`:: Element type changes are ignored if content is similar
|
|
375
|
-
|
|
376
|
-
**Note**: This dimension is primarily used by the semantic diff algorithm to
|
|
377
|
-
detect structural changes like element upgrades/downgrades (e.g., `<p>` to
|
|
378
|
-
`<div>`).
|
|
379
|
-
|
|
380
|
-
.element_structure examples
|
|
381
|
-
[example]
|
|
382
|
-
====
|
|
383
|
-
**XML input**:
|
|
384
|
-
|
|
385
|
-
[source,xml]
|
|
386
|
-
----
|
|
387
|
-
<!-- File 1 -->
|
|
388
|
-
<document>
|
|
389
|
-
<paragraph>Text content</paragraph>
|
|
390
|
-
</document>
|
|
391
|
-
|
|
392
|
-
<!-- File 2 -->
|
|
393
|
-
<document>
|
|
394
|
-
<section>Text content</section>
|
|
395
|
-
</document>
|
|
396
|
-
----
|
|
397
|
-
|
|
398
|
-
**Results with semantic algorithm**:
|
|
399
|
-
|
|
400
|
-
* `:strict` → Different (element types differ: paragraph vs section)
|
|
401
|
-
* `:ignore` → Potentially equivalent (if content matches, type change ignored)
|
|
402
|
-
|
|
403
|
-
**Use case**: Useful when refactoring markup where element names change but
|
|
404
|
-
semantic content remains the same.
|
|
405
|
-
====
|
|
406
|
-
|
|
407
|
-
=== element_position
|
|
408
|
-
|
|
409
|
-
**Applies to**: All formats (primarily used with semantic diff algorithm)
|
|
410
|
-
|
|
411
|
-
**Purpose**: Controls how element position/order changes are detected and
|
|
412
|
-
reported.
|
|
413
|
-
|
|
414
|
-
**Behaviors**:
|
|
415
|
-
|
|
416
|
-
`:strict`:: Element positions must match exactly
|
|
417
|
-
|
|
418
|
-
`:ignore`:: Element reordering is allowed if content matches
|
|
419
|
-
|
|
420
|
-
**Note**: This dimension enables the semantic diff algorithm to detect move
|
|
421
|
-
operations when elements are reordered.
|
|
422
|
-
|
|
423
|
-
.element_position examples
|
|
424
|
-
[example]
|
|
425
|
-
====
|
|
426
|
-
**XML input**:
|
|
427
|
-
|
|
428
|
-
[source,xml]
|
|
429
|
-
----
|
|
430
|
-
<!-- File 1 -->
|
|
431
|
-
<list>
|
|
432
|
-
<item id="a">First</item>
|
|
433
|
-
<item id="b">Second</item>
|
|
434
|
-
<item id="c">Third</item>
|
|
435
|
-
</list>
|
|
436
|
-
|
|
437
|
-
<!-- File 2 -->
|
|
438
|
-
<list>
|
|
439
|
-
<item id="b">Second</item>
|
|
440
|
-
<item id="a">First</item>
|
|
441
|
-
<item id="c">Third</item>
|
|
442
|
-
</list>
|
|
443
|
-
----
|
|
444
|
-
|
|
445
|
-
**Results with semantic algorithm**:
|
|
446
|
-
|
|
447
|
-
* `:strict` → Different (items a and b are in different positions)
|
|
448
|
-
* `:ignore` → Equivalent (same items present, order doesn't matter)
|
|
449
|
-
|
|
450
|
-
**Use case**: Useful when testing JSON arrays or XML lists where order may
|
|
451
|
-
vary but content is equivalent.
|
|
452
|
-
====
|
|
453
|
-
|
|
454
|
-
=== element_hierarchy
|
|
455
|
-
|
|
456
|
-
**Applies to**: All formats (primarily used with semantic diff algorithm)
|
|
457
|
-
|
|
458
|
-
**Purpose**: Controls how hierarchical structure changes are detected, such
|
|
459
|
-
as elements being moved to different parent nodes.
|
|
460
|
-
|
|
461
|
-
**Behaviors**:
|
|
462
|
-
|
|
463
|
-
`:strict`:: Elements must maintain exact parent-child relationships
|
|
464
|
-
|
|
465
|
-
`:ignore`:: Elements can move between parents if content matches
|
|
466
|
-
|
|
467
|
-
**Note**: This dimension enables the semantic diff algorithm to detect when
|
|
468
|
-
elements are reorganized into different hierarchical structures.
|
|
469
|
-
|
|
470
|
-
.element_hierarchy examples
|
|
471
|
-
[example]
|
|
472
|
-
====
|
|
473
|
-
**XML input**:
|
|
474
|
-
|
|
475
|
-
[source,xml]
|
|
476
|
-
----
|
|
477
|
-
<!-- File 1 -->
|
|
478
|
-
<document>
|
|
479
|
-
<section>
|
|
480
|
-
<note>Important information</note>
|
|
481
|
-
</section>
|
|
482
|
-
</document>
|
|
483
|
-
|
|
484
|
-
<!-- File 2 -->
|
|
485
|
-
<document>
|
|
486
|
-
<note>Important information</note>
|
|
487
|
-
<section>
|
|
488
|
-
</section>
|
|
489
|
-
</document>
|
|
490
|
-
----
|
|
491
|
-
|
|
492
|
-
**Results with semantic algorithm**:
|
|
493
|
-
|
|
494
|
-
* `:strict` → Different (note moved from section child to document child)
|
|
495
|
-
* `:ignore` → Potentially equivalent (note content preserved, hierarchy
|
|
496
|
-
change ignored)
|
|
497
|
-
|
|
498
|
-
**Use case**: Useful when restructuring documents where content blocks move
|
|
499
|
-
between sections but the content itself remains unchanged.
|
|
500
|
-
====
|
|
501
|
-
|
|
502
|
-
== Match profiles
|
|
503
|
-
|
|
504
|
-
Profiles are predefined combinations of dimension settings for common
|
|
505
|
-
scenarios.
|
|
506
|
-
|
|
507
|
-
=== strict
|
|
508
|
-
|
|
509
|
-
**Purpose**: Exact matching - all dimensions use `:strict` behavior.
|
|
510
|
-
|
|
511
|
-
**When to use**:
|
|
512
|
-
|
|
513
|
-
* Character-perfect matching required
|
|
514
|
-
* Testing exact serializer output
|
|
515
|
-
* Verifying formatting compliance
|
|
516
|
-
* Maximum strictness needed
|
|
517
|
-
|
|
518
|
-
**Settings**:
|
|
519
|
-
|
|
520
|
-
[source,ruby]
|
|
521
|
-
----
|
|
522
|
-
{
|
|
523
|
-
preprocessing: :none,
|
|
524
|
-
text_content: :strict,
|
|
525
|
-
structural_whitespace: :strict,
|
|
526
|
-
attribute_whitespace: :strict,
|
|
527
|
-
attribute_order: :strict,
|
|
528
|
-
attribute_values: :strict,
|
|
529
|
-
key_order: :strict,
|
|
530
|
-
comments: :strict,
|
|
531
|
-
element_structure: :strict,
|
|
532
|
-
element_position: :strict,
|
|
533
|
-
element_hierarchy: :strict
|
|
534
|
-
}
|
|
535
|
-
----
|
|
536
|
-
|
|
537
|
-
.strict profile usage
|
|
538
|
-
[example]
|
|
539
|
-
====
|
|
540
|
-
[source,ruby]
|
|
541
|
-
----
|
|
542
|
-
Canon::Comparison.equivalent?(doc1, doc2,
|
|
543
|
-
match_profile: :strict
|
|
544
|
-
)
|
|
545
|
-
----
|
|
546
|
-
|
|
547
|
-
Every aspect must match exactly.
|
|
548
|
-
====
|
|
549
|
-
|
|
550
|
-
=== rendered
|
|
551
|
-
|
|
552
|
-
**Purpose**: Mimics how browsers/CSS engines render content.
|
|
553
|
-
|
|
554
|
-
**When to use**:
|
|
555
|
-
|
|
556
|
-
* Comparing HTML rendered output
|
|
557
|
-
* Formatting doesn't affect display
|
|
558
|
-
* Testing web page generation
|
|
559
|
-
* Browser-equivalent comparison
|
|
560
|
-
|
|
561
|
-
**Settings**:
|
|
562
|
-
|
|
563
|
-
[source,ruby]
|
|
564
|
-
----
|
|
565
|
-
{
|
|
566
|
-
preprocessing: :none,
|
|
567
|
-
text_content: :normalize,
|
|
568
|
-
structural_whitespace: :normalize,
|
|
569
|
-
attribute_whitespace: :normalize,
|
|
570
|
-
attribute_order: :ignore,
|
|
571
|
-
attribute_values: :strict,
|
|
572
|
-
key_order: :ignore,
|
|
573
|
-
comments: :ignore,
|
|
574
|
-
element_structure: :strict,
|
|
575
|
-
element_position: :strict,
|
|
576
|
-
element_hierarchy: :strict
|
|
577
|
-
}
|
|
578
|
-
----
|
|
579
|
-
|
|
580
|
-
.rendered profile usage
|
|
581
|
-
[example]
|
|
582
|
-
====
|
|
583
|
-
[source,ruby]
|
|
584
|
-
----
|
|
585
|
-
Canon::Comparison.equivalent?(html1, html2,
|
|
586
|
-
match_profile: :rendered
|
|
587
|
-
)
|
|
588
|
-
----
|
|
589
|
-
|
|
590
|
-
Focuses on how content would appear in a browser.
|
|
591
|
-
====
|
|
592
|
-
|
|
593
|
-
=== spec_friendly
|
|
594
|
-
|
|
595
|
-
**Purpose**: Test-friendly comparison that ignores most formatting
|
|
596
|
-
differences.
|
|
597
|
-
|
|
598
|
-
**When to use**:
|
|
599
|
-
|
|
600
|
-
* Writing RSpec tests
|
|
601
|
-
* Testing semantic correctness
|
|
602
|
-
* Ignoring pretty-printing differences
|
|
603
|
-
* Most common test scenario
|
|
604
|
-
|
|
605
|
-
**Settings**:
|
|
606
|
-
|
|
607
|
-
[source,ruby]
|
|
608
|
-
----
|
|
609
|
-
{
|
|
610
|
-
preprocessing: :normalize,
|
|
611
|
-
text_content: :normalize,
|
|
612
|
-
structural_whitespace: :ignore,
|
|
613
|
-
attribute_whitespace: :normalize,
|
|
614
|
-
attribute_order: :ignore,
|
|
615
|
-
attribute_values: :strict,
|
|
616
|
-
key_order: :ignore,
|
|
617
|
-
comments: :ignore,
|
|
618
|
-
element_structure: :strict,
|
|
619
|
-
element_position: :ignore,
|
|
620
|
-
element_hierarchy: :strict
|
|
621
|
-
}
|
|
622
|
-
----
|
|
623
|
-
|
|
624
|
-
.spec_friendly profile usage
|
|
625
|
-
[example]
|
|
626
|
-
====
|
|
627
|
-
[source,ruby]
|
|
628
|
-
----
|
|
629
|
-
Canon::Comparison.equivalent?(doc1, doc2,
|
|
630
|
-
match_profile: :spec_friendly
|
|
631
|
-
)
|
|
632
|
-
----
|
|
633
|
-
|
|
634
|
-
Focuses on content, not formatting.
|
|
635
|
-
====
|
|
636
|
-
|
|
637
|
-
=== content_only
|
|
638
|
-
|
|
639
|
-
**Purpose**: Only semantic content matters - maximum tolerance for formatting.
|
|
640
|
-
|
|
641
|
-
**When to use**:
|
|
642
|
-
|
|
643
|
-
* Only care about data, not presentation
|
|
644
|
-
* Maximum flexibility needed
|
|
645
|
-
* Comparing across different formats
|
|
646
|
-
* Structural equivalence only
|
|
647
|
-
|
|
648
|
-
**Settings**:
|
|
649
|
-
|
|
650
|
-
[source,ruby]
|
|
651
|
-
----
|
|
652
|
-
{
|
|
653
|
-
preprocessing: :normalize,
|
|
654
|
-
text_content: :normalize,
|
|
655
|
-
structural_whitespace: :ignore,
|
|
656
|
-
attribute_whitespace: :ignore,
|
|
657
|
-
attribute_order: :ignore,
|
|
658
|
-
attribute_values: :ignore,
|
|
659
|
-
key_order: :ignore,
|
|
660
|
-
comments: :ignore,
|
|
661
|
-
element_structure: :ignore,
|
|
662
|
-
element_position: :ignore,
|
|
663
|
-
element_hierarchy: :ignore
|
|
664
|
-
}
|
|
665
|
-
----
|
|
666
|
-
|
|
667
|
-
.content_only profile usage
|
|
668
|
-
[example]
|
|
669
|
-
====
|
|
670
|
-
[source,ruby]
|
|
671
|
-
----
|
|
672
|
-
Canon::Comparison.equivalent?(doc1, doc2,
|
|
673
|
-
match_profile: :content_only
|
|
674
|
-
)
|
|
675
|
-
----
|
|
676
|
-
|
|
677
|
-
Maximum tolerance, content focus only.
|
|
678
|
-
====
|
|
679
|
-
|
|
680
|
-
== Format defaults
|
|
681
|
-
|
|
682
|
-
Each format has sensible defaults based on typical usage:
|
|
683
|
-
|
|
684
|
-
[cols="1,1,1,1,1"]
|
|
685
|
-
|===
|
|
686
|
-
|Dimension |XML |HTML |JSON |YAML
|
|
687
|
-
|
|
688
|
-
|`text_content`
|
|
689
|
-
|`:strict`
|
|
690
|
-
|`:normalize`
|
|
691
|
-
|`:strict`
|
|
692
|
-
|`:strict`
|
|
693
|
-
|
|
694
|
-
|`structural_whitespace`
|
|
695
|
-
|`:strict`
|
|
696
|
-
|`:normalize`
|
|
697
|
-
|`:strict`
|
|
698
|
-
|`:strict`
|
|
699
|
-
|
|
700
|
-
|`attribute_whitespace`
|
|
701
|
-
|`:strict`
|
|
702
|
-
|`:normalize`
|
|
703
|
-
|—
|
|
704
|
-
|—
|
|
705
|
-
|
|
706
|
-
|`attribute_order`
|
|
707
|
-
|`:ignore`
|
|
708
|
-
|`:ignore`
|
|
709
|
-
|—
|
|
710
|
-
|—
|
|
711
|
-
|
|
712
|
-
|`attribute_values`
|
|
713
|
-
|`:strict`
|
|
714
|
-
|`:strict`
|
|
715
|
-
|—
|
|
716
|
-
|—
|
|
717
|
-
|
|
718
|
-
|`key_order`
|
|
719
|
-
|—
|
|
720
|
-
|—
|
|
721
|
-
|`:strict`
|
|
722
|
-
|`:strict`
|
|
723
|
-
|
|
724
|
-
|`comments`
|
|
725
|
-
|`:strict`
|
|
726
|
-
|`:ignore`
|
|
727
|
-
|—
|
|
728
|
-
|`:strict`
|
|
729
|
-
|
|
730
|
-
|`element_structure`
|
|
731
|
-
|`:strict`
|
|
732
|
-
|`:strict`
|
|
733
|
-
|`:strict`
|
|
734
|
-
|`:strict`
|
|
735
|
-
|
|
736
|
-
|`element_position`
|
|
737
|
-
|`:strict`
|
|
738
|
-
|`:strict`
|
|
739
|
-
|`:strict`
|
|
740
|
-
|`:strict`
|
|
741
|
-
|
|
742
|
-
|`element_hierarchy`
|
|
743
|
-
|`:strict`
|
|
744
|
-
|`:strict`
|
|
745
|
-
|`:strict`
|
|
746
|
-
|`:strict`
|
|
747
|
-
|===
|
|
748
|
-
|
|
749
|
-
== Configuration precedence
|
|
750
|
-
|
|
751
|
-
When options are specified in multiple places, Canon resolves them using this
|
|
752
|
-
hierarchy (highest to lowest priority):
|
|
753
|
-
|
|
754
|
-
[source]
|
|
755
|
-
----
|
|
756
|
-
1. Per-comparison explicit options (highest)
|
|
757
|
-
↓
|
|
758
|
-
2. Per-comparison profile
|
|
759
|
-
↓
|
|
760
|
-
3. Global configuration explicit options
|
|
761
|
-
↓
|
|
762
|
-
4. Global configuration profile
|
|
763
|
-
↓
|
|
764
|
-
5. Format defaults (lowest)
|
|
765
|
-
----
|
|
766
|
-
|
|
767
|
-
.Precedence example
|
|
768
|
-
[example]
|
|
769
|
-
====
|
|
770
|
-
**Global configuration**:
|
|
771
|
-
|
|
772
|
-
[source,ruby]
|
|
773
|
-
----
|
|
774
|
-
Canon::RSpecMatchers.configure do |config|
|
|
775
|
-
config.xml.match.profile = :spec_friendly
|
|
776
|
-
config.xml.match.options = { comments: :strict }
|
|
777
|
-
end
|
|
778
|
-
----
|
|
779
|
-
|
|
780
|
-
The `:spec_friendly` profile sets:
|
|
781
|
-
|
|
782
|
-
* `text_content: :normalize`
|
|
783
|
-
* `structural_whitespace: :ignore`
|
|
784
|
-
* `comments: :ignore`
|
|
785
|
-
|
|
786
|
-
But the explicit `comments: :strict` overrides the profile setting.
|
|
787
|
-
|
|
788
|
-
**Per-test usage**:
|
|
789
|
-
|
|
790
|
-
[source,ruby]
|
|
791
|
-
----
|
|
792
|
-
expect(actual).to be_xml_equivalent_to(expected)
|
|
793
|
-
.with_profile(:rendered)
|
|
794
|
-
.with_options(structural_whitespace: :ignore)
|
|
795
|
-
----
|
|
796
|
-
|
|
797
|
-
**Final resolved options**:
|
|
798
|
-
|
|
799
|
-
* `text_content: :normalize` (from `:rendered` per-test profile)
|
|
800
|
-
* `structural_whitespace: :ignore` (from per-test explicit option)
|
|
801
|
-
* `comments: :strict` (from global explicit option)
|
|
802
|
-
* Other dimensions use `:rendered` profile or format defaults
|
|
803
|
-
====
|
|
804
|
-
|
|
805
|
-
== Usage
|
|
806
|
-
|
|
807
|
-
=== Ruby API
|
|
808
|
-
|
|
809
|
-
[source,ruby]
|
|
810
|
-
----
|
|
811
|
-
# Use specific dimensions
|
|
812
|
-
Canon::Comparison.equivalent?(doc1, doc2,
|
|
813
|
-
match: {
|
|
814
|
-
text_content: :normalize,
|
|
815
|
-
structural_whitespace: :ignore,
|
|
816
|
-
comments: :ignore
|
|
817
|
-
}
|
|
818
|
-
)
|
|
819
|
-
|
|
820
|
-
# Use a profile
|
|
821
|
-
Canon::Comparison.equivalent?(doc1, doc2,
|
|
822
|
-
match_profile: :spec_friendly
|
|
823
|
-
)
|
|
824
|
-
|
|
825
|
-
# Profile with dimension overrides
|
|
826
|
-
Canon::Comparison.equivalent?(doc1, doc2,
|
|
827
|
-
match_profile: :spec_friendly,
|
|
828
|
-
match: {
|
|
829
|
-
comments: :strict # Override profile
|
|
830
|
-
}
|
|
831
|
-
)
|
|
832
|
-
|
|
833
|
-
# Use semantic dimensions
|
|
834
|
-
Canon::Comparison.equivalent?(doc1, doc2,
|
|
835
|
-
diff_algorithm: :semantic,
|
|
836
|
-
match: {
|
|
837
|
-
element_position: :ignore,
|
|
838
|
-
element_hierarchy: :ignore
|
|
839
|
-
}
|
|
840
|
-
)
|
|
841
|
-
----
|
|
842
|
-
|
|
843
|
-
=== CLI
|
|
844
|
-
|
|
845
|
-
[source,bash]
|
|
846
|
-
----
|
|
847
|
-
# Use profile
|
|
848
|
-
$ canon diff file1.xml file2.xml \
|
|
849
|
-
--match-profile spec_friendly \
|
|
850
|
-
--verbose
|
|
851
|
-
|
|
852
|
-
# Override specific dimensions
|
|
853
|
-
$ canon diff file1.xml file2.xml \
|
|
854
|
-
--text-content normalize \
|
|
855
|
-
--structural-whitespace ignore \
|
|
856
|
-
--verbose
|
|
857
|
-
|
|
858
|
-
# Combine profile with overrides
|
|
859
|
-
$ canon diff file1.xml file2.xml \
|
|
860
|
-
--match-profile spec_friendly \
|
|
861
|
-
--comments strict \
|
|
862
|
-
--verbose
|
|
863
|
-
|
|
864
|
-
# Use semantic algorithm with flexible positioning
|
|
865
|
-
$ canon diff file1.xml file2.xml \
|
|
866
|
-
--diff-algorithm semantic \
|
|
867
|
-
--element-position ignore \
|
|
868
|
-
--verbose
|
|
869
|
-
----
|
|
870
|
-
|
|
871
|
-
=== RSpec
|
|
872
|
-
|
|
873
|
-
[source,ruby]
|
|
874
|
-
----
|
|
875
|
-
# Global configuration
|
|
876
|
-
Canon::RSpecMatchers.configure do |config|
|
|
877
|
-
config.xml.match.profile = :spec_friendly
|
|
878
|
-
config.xml.match.options = {
|
|
879
|
-
text_content: :normalize,
|
|
880
|
-
comments: :ignore
|
|
881
|
-
}
|
|
882
|
-
end
|
|
883
|
-
|
|
884
|
-
# Per-test override
|
|
885
|
-
expect(actual).to be_xml_equivalent_to(expected)
|
|
886
|
-
.with_profile(:strict)
|
|
887
|
-
|
|
888
|
-
# Per-test dimension override
|
|
889
|
-
expect(actual).to be_xml_equivalent_to(expected)
|
|
890
|
-
.with_options(
|
|
891
|
-
structural_whitespace: :strict,
|
|
892
|
-
text_content: :strict
|
|
893
|
-
)
|
|
894
|
-
|
|
895
|
-
# Semantic algorithm with flexible hierarchy
|
|
896
|
-
expect(actual).to be_xml_equivalent_to(expected,
|
|
897
|
-
diff_algorithm: :semantic
|
|
898
|
-
)
|
|
899
|
-
.with_options(
|
|
900
|
-
element_position: :ignore,
|
|
901
|
-
element_hierarchy: :ignore
|
|
902
|
-
)
|
|
903
|
-
----
|
|
904
|
-
|
|
905
|
-
== See also
|
|
906
|
-
|
|
907
|
-
* link:MATCH_ARCHITECTURE[Match architecture]
|
|
908
|
-
* link:PREPROCESSING[Preprocessing options]
|
|
909
|
-
* link:FORMATS[Format support]
|
|
910
|
-
* link:RUBY_API[Ruby API documentation]
|
|
911
|
-
* link:CLI[Command-line interface]
|
|
912
|
-
* link:RSPEC[RSpec matchers]
|