canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +83 -22
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +196 -24
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/markup_comparator.rb +109 -2
  11. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  12. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  13. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  14. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
  15. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  16. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  17. data/lib/canon/comparison/xml_comparator.rb +240 -23
  18. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  19. data/lib/canon/diff/diff_classifier.rb +119 -5
  20. data/lib/canon/diff/formatting_detector.rb +1 -1
  21. data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
  22. data/lib/canon/rspec_matchers.rb +37 -8
  23. data/lib/canon/version.rb +1 -1
  24. data/lib/canon/xml/data_model.rb +24 -13
  25. metadata +4 -78
  26. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  27. data/false_positive_analysis.txt +0 -0
  28. data/file1.html +0 -1
  29. data/file2.html +0 -1
  30. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  31. data/old-docs/BASIC_USAGE.adoc +0 -16
  32. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  33. data/old-docs/CLI.adoc +0 -497
  34. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  35. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  36. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  37. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  38. data/old-docs/DOM_DIFF.adoc +0 -1017
  39. data/old-docs/ENV_CONFIG.adoc +0 -876
  40. data/old-docs/FORMATS.adoc +0 -867
  41. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  42. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  43. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  44. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  45. data/old-docs/MODES.adoc +0 -432
  46. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  47. data/old-docs/OPTIONS.adoc +0 -1387
  48. data/old-docs/PREPROCESSING.adoc +0 -491
  49. data/old-docs/README.old.adoc +0 -2831
  50. data/old-docs/RSPEC.adoc +0 -814
  51. data/old-docs/RUBY_API.adoc +0 -485
  52. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  53. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  54. data/old-docs/STRING_COMPARE.adoc +0 -345
  55. data/old-docs/TMP.adoc +0 -3384
  56. data/old-docs/TREE_DIFF.adoc +0 -1080
  57. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  58. data/old-docs/VERBOSE.adoc +0 -482
  59. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  60. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  61. data/scripts/analyze_current_state.rb +0 -85
  62. data/scripts/analyze_false_positives.rb +0 -114
  63. data/scripts/analyze_remaining_failures.rb +0 -105
  64. data/scripts/compare_current_failures.rb +0 -95
  65. data/scripts/compare_dom_tree_diff.rb +0 -158
  66. data/scripts/compare_failures.rb +0 -151
  67. data/scripts/debug_attribute_extraction.rb +0 -66
  68. data/scripts/debug_blocks_839.rb +0 -115
  69. data/scripts/debug_meta_matching.rb +0 -52
  70. data/scripts/debug_p_matching.rb +0 -192
  71. data/scripts/debug_signature_matching.rb +0 -118
  72. data/scripts/debug_sourcecode_124.rb +0 -32
  73. data/scripts/debug_whitespace_sensitive.rb +0 -192
  74. data/scripts/extract_false_positives.rb +0 -138
  75. data/scripts/find_actual_false_positives.rb +0 -125
  76. data/scripts/investigate_all_false_positives.rb +0 -161
  77. data/scripts/investigate_batch1.rb +0 -127
  78. data/scripts/investigate_classification.rb +0 -150
  79. data/scripts/investigate_classification_detailed.rb +0 -190
  80. data/scripts/investigate_common_failures.rb +0 -342
  81. data/scripts/investigate_false_negative.rb +0 -80
  82. data/scripts/investigate_false_positive.rb +0 -83
  83. data/scripts/investigate_false_positives.rb +0 -227
  84. data/scripts/investigate_false_positives_batch.rb +0 -163
  85. data/scripts/investigate_mixed_content.rb +0 -125
  86. data/scripts/investigate_remaining_16.rb +0 -214
  87. data/scripts/run_single_test.rb +0 -29
  88. data/scripts/test_all_false_positives.rb +0 -95
  89. data/scripts/test_attribute_details.rb +0 -61
  90. data/scripts/test_both_algorithms.rb +0 -49
  91. data/scripts/test_both_simple.rb +0 -49
  92. data/scripts/test_enhanced_semantic_output.rb +0 -125
  93. data/scripts/test_readme_examples.rb +0 -131
  94. data/scripts/test_semantic_tree_diff.rb +0 -99
  95. data/scripts/test_semantic_ux_improvements.rb +0 -135
  96. data/scripts/test_single_false_positive.rb +0 -119
  97. data/scripts/test_size_limits.rb +0 -99
  98. data/test_html_1.html +0 -21
  99. data/test_html_2.html +0 -21
  100. data/test_nokogiri.rb +0 -33
  101. data/test_normalize.rb +0 -45
@@ -1,912 +0,0 @@
1
- ---
2
- layout: default
3
- title: Match Options
4
- nav_order: 30
5
- parent: Customizing Behavior
6
- ---
7
- = Match options
8
- :toc:
9
- :toclevels: 3
10
-
11
- == Scope
12
-
13
- This document provides a complete reference for Canon's match options,
14
- including match dimensions, behaviors, and predefined profiles.
15
-
16
- Match options control Phase 2 (semantic matching) of Canon's comparison
17
- architecture. See link:MATCH_ARCHITECTURE[Match architecture] for the
18
- complete flow.
19
-
20
- == General
21
-
22
- Match options control which aspects of documents are compared and how
23
- strictly they are compared. Canon provides:
24
-
25
- * **Match dimensions**: Independent aspects of documents (text, whitespace,
26
- attributes, etc.)
27
- * **Dimension behaviors**: How each dimension is compared (`:strict`,
28
- `:normalize`, `:ignore`)
29
- * **Match profiles**: Predefined combinations for common scenarios
30
-
31
- == Match dimensions
32
-
33
- Match dimensions are orthogonal aspects that can be configured independently.
34
-
35
- === text_content
36
-
37
- **Applies to**: All formats
38
-
39
- **Purpose**: Controls how text content within elements/values is compared.
40
-
41
- **Behaviors**:
42
-
43
- `:strict`:: Text must match exactly, character-for-character including all
44
- whitespace
45
-
46
- `:normalize`:: Whitespace is normalized (collapsed/trimmed) before comparison
47
-
48
- `:ignore`:: Text content is completely ignored in comparison
49
-
50
- .text_content examples
51
- [example]
52
- ====
53
- **Input**:
54
-
55
- [source,xml]
56
- ----
57
- <!-- File 1 -->
58
- <message>Hello World</message>
59
-
60
- <!-- File 2 -->
61
- <message>Hello World</message>
62
- ----
63
-
64
- **Results**:
65
-
66
- * `:strict` → Different (whitespace differs: 3 spaces vs 1 space)
67
- * `:normalize` → Equivalent (both normalize to "Hello World")
68
- * `:ignore` → Equivalent (text content ignored, structure matches)
69
- ====
70
-
71
- === structural_whitespace
72
-
73
- **Applies to**: All formats
74
-
75
- **Purpose**: Controls how whitespace between elements (indentation, newlines)
76
- is handled.
77
-
78
- **Behaviors**:
79
-
80
- `:strict`:: All structural whitespace must match exactly
81
-
82
- `:normalize`:: Structural whitespace is normalized
83
-
84
- `:ignore`:: Structural whitespace is completely ignored
85
-
86
- .structural_whitespace examples
87
- [example]
88
- ====
89
- **Input**:
90
-
91
- [source,xml]
92
- ----
93
- <!-- File 1 -->
94
- <root>
95
- <item>A</item>
96
- <item>B</item>
97
- </root>
98
-
99
- <!-- File 2 -->
100
- <root><item>A</item><item>B</item></root>
101
- ----
102
-
103
- **Results**:
104
-
105
- * `:strict` → Different (indentation and newlines differ)
106
- * `:normalize` → Equivalent (whitespace between elements normalized)
107
- * `:ignore` → Equivalent (only element structure compared)
108
- ====
109
-
110
- === attribute_whitespace
111
-
112
- **Applies to**: XML, HTML only
113
-
114
- **Purpose**: Controls how whitespace in attribute values is handled.
115
-
116
- **Behaviors**:
117
-
118
- `:strict`:: Attribute value whitespace must match exactly
119
-
120
- `:normalize`:: Whitespace in attribute values is normalized
121
-
122
- `:ignore`:: Whitespace in attribute values is ignored
123
-
124
- .attribute_whitespace examples
125
- [example]
126
- ====
127
- **Input**:
128
-
129
- [source,xml]
130
- ----
131
- <!-- File 1 -->
132
- <div class="item active">Content</div>
133
-
134
- <!-- File 2 -->
135
- <div class="item active">Content</div>
136
- ----
137
-
138
- **Results**:
139
-
140
- * `:strict` → Different (2 spaces vs 1 space)
141
- * `:normalize` → Equivalent ("item active" normalizes to "item active")
142
- * `:ignore` → Equivalent (only attribute presence compared)
143
-
144
- **HTML `class` attribute special handling**:
145
-
146
- HTML's `class` attribute is space-separated, so normalization is particularly
147
- useful:
148
-
149
- [source,html]
150
- ----
151
- <!-- These are equivalent with :normalize -->
152
- <div class="btn primary active">Click</div>
153
- <div class="btn primary active">Click</div>
154
- ----
155
- ====
156
-
157
- === attribute_order
158
-
159
- **Applies to**: XML, HTML only
160
-
161
- **Purpose**: Controls whether attribute order matters.
162
-
163
- **Behaviors**:
164
-
165
- `:strict`:: Attributes must appear in the same order
166
-
167
- `:ignore`:: Attribute order doesn't matter (set-based comparison)
168
-
169
- .attribute_order examples
170
- [example]
171
- ====
172
- **Input**:
173
-
174
- [source,xml]
175
- ----
176
- <!-- File 1 -->
177
- <element id="123" class="active" data-value="test"/>
178
-
179
- <!-- File 2 -->
180
- <element class="active" data-value="test" id="123"/>
181
- ----
182
-
183
- **Results**:
184
-
185
- * `:strict` → Different (attribute order differs)
186
- * `:ignore` → Equivalent (same attributes present, unordered comparison)
187
-
188
- **HTML default**:
189
-
190
- HTML attributes are inherently unordered per the HTML specification, so the
191
- default for HTML is `:ignore`:
192
-
193
- [source,html]
194
- ----
195
- <!-- These are always equivalent for HTML -->
196
- <input type="text" id="name" class="form-control">
197
- <input class="form-control" id="name" type="text">
198
- ----
199
- ====
200
-
201
- === attribute_values
202
-
203
- **Applies to**: XML, HTML only
204
-
205
- **Purpose**: Controls how attribute values are compared.
206
-
207
- **Behaviors**:
208
-
209
- `:strict`:: Attribute values must match exactly
210
-
211
- `:normalize`:: Whitespace in values is normalized
212
-
213
- `:ignore`:: Only attribute presence is checked, values ignored
214
-
215
- .attribute_values examples
216
- [example]
217
- ====
218
- **Input**:
219
-
220
- [source,xml]
221
- ----
222
- <!-- File 1 -->
223
- <element id="123" class="normative"/>
224
-
225
- <!-- File 2 -->
226
- <element id="456" class="informative"/>
227
- ----
228
-
229
- **Results**:
230
-
231
- * `:strict` → Different (attribute values differ)
232
- * `:normalize` → Different (values still differ after normalization)
233
- * `:ignore` → Equivalent (both have `id` and `class` attributes, values
234
- ignored)
235
-
236
- **Use case**: Useful when you want to verify that certain attributes exist
237
- but don't care about their specific values (e.g., testing that generated IDs
238
- are present).
239
- ====
240
-
241
- === key_order
242
-
243
- **Applies to**: JSON, YAML only
244
-
245
- **Purpose**: Controls whether object key order matters.
246
-
247
- **Behaviors**:
248
-
249
- `:strict`:: Keys must appear in the same order
250
-
251
- `:ignore`:: Key order doesn't matter (unordered comparison)
252
-
253
- .key_order examples
254
- [example]
255
- ====
256
- **JSON input**:
257
-
258
- [source,json]
259
- ----
260
- // File 1
261
- {
262
- "name": "John",
263
- "age": 30,
264
- "city": "NYC"
265
- }
266
-
267
- // File 2
268
- {
269
- "city": "NYC",
270
- "name": "John",
271
- "age": 30
272
- }
273
- ----
274
-
275
- **Results**:
276
-
277
- * `:strict` → Different (key order differs)
278
- * `:ignore` → Equivalent (same keys and values, unordered)
279
-
280
- **YAML input**:
281
-
282
- [source,yaml]
283
- ----
284
- # File 1
285
- name: John
286
- age: 30
287
- city: NYC
288
-
289
- # File 2
290
- city: NYC
291
- name: John
292
- age: 30
293
- ----
294
-
295
- **Results**:
296
-
297
- * `:strict` → Different (key order differs)
298
- * `:ignore` → Equivalent (same structure and values)
299
- ====
300
-
301
- === comments
302
-
303
- **Applies to**: XML, HTML, YAML (JSON doesn't support comments in standard
304
- spec)
305
-
306
- **Purpose**: Controls how comments are compared.
307
-
308
- **Behaviors**:
309
-
310
- `:strict`:: Comments must match exactly (including whitespace)
311
-
312
- `:normalize`:: Whitespace in comments is normalized
313
-
314
- `:ignore`:: Comments are completely ignored
315
-
316
- .comments examples
317
- [example]
318
- ====
319
- **XML input**:
320
-
321
- [source,xml]
322
- ----
323
- <!-- File 1 -->
324
- <root>
325
- <!-- This is a comment -->
326
- <element>Value</element>
327
- </root>
328
-
329
- <!-- File 2 -->
330
- <root>
331
- <element>Value</element>
332
- </root>
333
- ----
334
-
335
- **Results**:
336
-
337
- * `:strict` → Different (File 1 has a comment, File 2 doesn't)
338
- * `:normalize` → Different (still different, comment present vs absent)
339
- * `:ignore` → Equivalent (comments ignored, structure matches)
340
-
341
- **YAML input**:
342
-
343
- [source,yaml]
344
- ----
345
- # File 1
346
- # Configuration file
347
- name: test
348
- # Database settings
349
- database: prod
350
-
351
- # File 2
352
- name: test
353
- database: prod
354
- ----
355
-
356
- **Results**:
357
-
358
- * `:strict` → Different (comments differ)
359
- * `:normalize` → Different (comments still differ)
360
- * `:ignore` → Equivalent (comments ignored)
361
- ====
362
-
363
- === element_structure
364
-
365
- **Applies to**: All formats (primarily used with semantic diff algorithm)
366
-
367
- **Purpose**: Controls how element/node type changes are handled during
368
- semantic tree comparison.
369
-
370
- **Behaviors**:
371
-
372
- `:strict`:: Element type changes are treated as differences
373
-
374
- `:ignore`:: Element type changes are ignored if content is similar
375
-
376
- **Note**: This dimension is primarily used by the semantic diff algorithm to
377
- detect structural changes like element upgrades/downgrades (e.g., `<p>` to
378
- `<div>`).
379
-
380
- .element_structure examples
381
- [example]
382
- ====
383
- **XML input**:
384
-
385
- [source,xml]
386
- ----
387
- <!-- File 1 -->
388
- <document>
389
- <paragraph>Text content</paragraph>
390
- </document>
391
-
392
- <!-- File 2 -->
393
- <document>
394
- <section>Text content</section>
395
- </document>
396
- ----
397
-
398
- **Results with semantic algorithm**:
399
-
400
- * `:strict` → Different (element types differ: paragraph vs section)
401
- * `:ignore` → Potentially equivalent (if content matches, type change ignored)
402
-
403
- **Use case**: Useful when refactoring markup where element names change but
404
- semantic content remains the same.
405
- ====
406
-
407
- === element_position
408
-
409
- **Applies to**: All formats (primarily used with semantic diff algorithm)
410
-
411
- **Purpose**: Controls how element position/order changes are detected and
412
- reported.
413
-
414
- **Behaviors**:
415
-
416
- `:strict`:: Element positions must match exactly
417
-
418
- `:ignore`:: Element reordering is allowed if content matches
419
-
420
- **Note**: This dimension enables the semantic diff algorithm to detect move
421
- operations when elements are reordered.
422
-
423
- .element_position examples
424
- [example]
425
- ====
426
- **XML input**:
427
-
428
- [source,xml]
429
- ----
430
- <!-- File 1 -->
431
- <list>
432
- <item id="a">First</item>
433
- <item id="b">Second</item>
434
- <item id="c">Third</item>
435
- </list>
436
-
437
- <!-- File 2 -->
438
- <list>
439
- <item id="b">Second</item>
440
- <item id="a">First</item>
441
- <item id="c">Third</item>
442
- </list>
443
- ----
444
-
445
- **Results with semantic algorithm**:
446
-
447
- * `:strict` → Different (items a and b are in different positions)
448
- * `:ignore` → Equivalent (same items present, order doesn't matter)
449
-
450
- **Use case**: Useful when testing JSON arrays or XML lists where order may
451
- vary but content is equivalent.
452
- ====
453
-
454
- === element_hierarchy
455
-
456
- **Applies to**: All formats (primarily used with semantic diff algorithm)
457
-
458
- **Purpose**: Controls how hierarchical structure changes are detected, such
459
- as elements being moved to different parent nodes.
460
-
461
- **Behaviors**:
462
-
463
- `:strict`:: Elements must maintain exact parent-child relationships
464
-
465
- `:ignore`:: Elements can move between parents if content matches
466
-
467
- **Note**: This dimension enables the semantic diff algorithm to detect when
468
- elements are reorganized into different hierarchical structures.
469
-
470
- .element_hierarchy examples
471
- [example]
472
- ====
473
- **XML input**:
474
-
475
- [source,xml]
476
- ----
477
- <!-- File 1 -->
478
- <document>
479
- <section>
480
- <note>Important information</note>
481
- </section>
482
- </document>
483
-
484
- <!-- File 2 -->
485
- <document>
486
- <note>Important information</note>
487
- <section>
488
- </section>
489
- </document>
490
- ----
491
-
492
- **Results with semantic algorithm**:
493
-
494
- * `:strict` → Different (note moved from section child to document child)
495
- * `:ignore` → Potentially equivalent (note content preserved, hierarchy
496
- change ignored)
497
-
498
- **Use case**: Useful when restructuring documents where content blocks move
499
- between sections but the content itself remains unchanged.
500
- ====
501
-
502
- == Match profiles
503
-
504
- Profiles are predefined combinations of dimension settings for common
505
- scenarios.
506
-
507
- === strict
508
-
509
- **Purpose**: Exact matching - all dimensions use `:strict` behavior.
510
-
511
- **When to use**:
512
-
513
- * Character-perfect matching required
514
- * Testing exact serializer output
515
- * Verifying formatting compliance
516
- * Maximum strictness needed
517
-
518
- **Settings**:
519
-
520
- [source,ruby]
521
- ----
522
- {
523
- preprocessing: :none,
524
- text_content: :strict,
525
- structural_whitespace: :strict,
526
- attribute_whitespace: :strict,
527
- attribute_order: :strict,
528
- attribute_values: :strict,
529
- key_order: :strict,
530
- comments: :strict,
531
- element_structure: :strict,
532
- element_position: :strict,
533
- element_hierarchy: :strict
534
- }
535
- ----
536
-
537
- .strict profile usage
538
- [example]
539
- ====
540
- [source,ruby]
541
- ----
542
- Canon::Comparison.equivalent?(doc1, doc2,
543
- match_profile: :strict
544
- )
545
- ----
546
-
547
- Every aspect must match exactly.
548
- ====
549
-
550
- === rendered
551
-
552
- **Purpose**: Mimics how browsers/CSS engines render content.
553
-
554
- **When to use**:
555
-
556
- * Comparing HTML rendered output
557
- * Formatting doesn't affect display
558
- * Testing web page generation
559
- * Browser-equivalent comparison
560
-
561
- **Settings**:
562
-
563
- [source,ruby]
564
- ----
565
- {
566
- preprocessing: :none,
567
- text_content: :normalize,
568
- structural_whitespace: :normalize,
569
- attribute_whitespace: :normalize,
570
- attribute_order: :ignore,
571
- attribute_values: :strict,
572
- key_order: :ignore,
573
- comments: :ignore,
574
- element_structure: :strict,
575
- element_position: :strict,
576
- element_hierarchy: :strict
577
- }
578
- ----
579
-
580
- .rendered profile usage
581
- [example]
582
- ====
583
- [source,ruby]
584
- ----
585
- Canon::Comparison.equivalent?(html1, html2,
586
- match_profile: :rendered
587
- )
588
- ----
589
-
590
- Focuses on how content would appear in a browser.
591
- ====
592
-
593
- === spec_friendly
594
-
595
- **Purpose**: Test-friendly comparison that ignores most formatting
596
- differences.
597
-
598
- **When to use**:
599
-
600
- * Writing RSpec tests
601
- * Testing semantic correctness
602
- * Ignoring pretty-printing differences
603
- * Most common test scenario
604
-
605
- **Settings**:
606
-
607
- [source,ruby]
608
- ----
609
- {
610
- preprocessing: :normalize,
611
- text_content: :normalize,
612
- structural_whitespace: :ignore,
613
- attribute_whitespace: :normalize,
614
- attribute_order: :ignore,
615
- attribute_values: :strict,
616
- key_order: :ignore,
617
- comments: :ignore,
618
- element_structure: :strict,
619
- element_position: :ignore,
620
- element_hierarchy: :strict
621
- }
622
- ----
623
-
624
- .spec_friendly profile usage
625
- [example]
626
- ====
627
- [source,ruby]
628
- ----
629
- Canon::Comparison.equivalent?(doc1, doc2,
630
- match_profile: :spec_friendly
631
- )
632
- ----
633
-
634
- Focuses on content, not formatting.
635
- ====
636
-
637
- === content_only
638
-
639
- **Purpose**: Only semantic content matters - maximum tolerance for formatting.
640
-
641
- **When to use**:
642
-
643
- * Only care about data, not presentation
644
- * Maximum flexibility needed
645
- * Comparing across different formats
646
- * Structural equivalence only
647
-
648
- **Settings**:
649
-
650
- [source,ruby]
651
- ----
652
- {
653
- preprocessing: :normalize,
654
- text_content: :normalize,
655
- structural_whitespace: :ignore,
656
- attribute_whitespace: :ignore,
657
- attribute_order: :ignore,
658
- attribute_values: :ignore,
659
- key_order: :ignore,
660
- comments: :ignore,
661
- element_structure: :ignore,
662
- element_position: :ignore,
663
- element_hierarchy: :ignore
664
- }
665
- ----
666
-
667
- .content_only profile usage
668
- [example]
669
- ====
670
- [source,ruby]
671
- ----
672
- Canon::Comparison.equivalent?(doc1, doc2,
673
- match_profile: :content_only
674
- )
675
- ----
676
-
677
- Maximum tolerance, content focus only.
678
- ====
679
-
680
- == Format defaults
681
-
682
- Each format has sensible defaults based on typical usage:
683
-
684
- [cols="1,1,1,1,1"]
685
- |===
686
- |Dimension |XML |HTML |JSON |YAML
687
-
688
- |`text_content`
689
- |`:strict`
690
- |`:normalize`
691
- |`:strict`
692
- |`:strict`
693
-
694
- |`structural_whitespace`
695
- |`:strict`
696
- |`:normalize`
697
- |`:strict`
698
- |`:strict`
699
-
700
- |`attribute_whitespace`
701
- |`:strict`
702
- |`:normalize`
703
- |—
704
- |—
705
-
706
- |`attribute_order`
707
- |`:ignore`
708
- |`:ignore`
709
- |—
710
- |—
711
-
712
- |`attribute_values`
713
- |`:strict`
714
- |`:strict`
715
- |—
716
- |—
717
-
718
- |`key_order`
719
- |—
720
- |—
721
- |`:strict`
722
- |`:strict`
723
-
724
- |`comments`
725
- |`:strict`
726
- |`:ignore`
727
- |—
728
- |`:strict`
729
-
730
- |`element_structure`
731
- |`:strict`
732
- |`:strict`
733
- |`:strict`
734
- |`:strict`
735
-
736
- |`element_position`
737
- |`:strict`
738
- |`:strict`
739
- |`:strict`
740
- |`:strict`
741
-
742
- |`element_hierarchy`
743
- |`:strict`
744
- |`:strict`
745
- |`:strict`
746
- |`:strict`
747
- |===
748
-
749
- == Configuration precedence
750
-
751
- When options are specified in multiple places, Canon resolves them using this
752
- hierarchy (highest to lowest priority):
753
-
754
- [source]
755
- ----
756
- 1. Per-comparison explicit options (highest)
757
-
758
- 2. Per-comparison profile
759
-
760
- 3. Global configuration explicit options
761
-
762
- 4. Global configuration profile
763
-
764
- 5. Format defaults (lowest)
765
- ----
766
-
767
- .Precedence example
768
- [example]
769
- ====
770
- **Global configuration**:
771
-
772
- [source,ruby]
773
- ----
774
- Canon::RSpecMatchers.configure do |config|
775
- config.xml.match.profile = :spec_friendly
776
- config.xml.match.options = { comments: :strict }
777
- end
778
- ----
779
-
780
- The `:spec_friendly` profile sets:
781
-
782
- * `text_content: :normalize`
783
- * `structural_whitespace: :ignore`
784
- * `comments: :ignore`
785
-
786
- But the explicit `comments: :strict` overrides the profile setting.
787
-
788
- **Per-test usage**:
789
-
790
- [source,ruby]
791
- ----
792
- expect(actual).to be_xml_equivalent_to(expected)
793
- .with_profile(:rendered)
794
- .with_options(structural_whitespace: :ignore)
795
- ----
796
-
797
- **Final resolved options**:
798
-
799
- * `text_content: :normalize` (from `:rendered` per-test profile)
800
- * `structural_whitespace: :ignore` (from per-test explicit option)
801
- * `comments: :strict` (from global explicit option)
802
- * Other dimensions use `:rendered` profile or format defaults
803
- ====
804
-
805
- == Usage
806
-
807
- === Ruby API
808
-
809
- [source,ruby]
810
- ----
811
- # Use specific dimensions
812
- Canon::Comparison.equivalent?(doc1, doc2,
813
- match: {
814
- text_content: :normalize,
815
- structural_whitespace: :ignore,
816
- comments: :ignore
817
- }
818
- )
819
-
820
- # Use a profile
821
- Canon::Comparison.equivalent?(doc1, doc2,
822
- match_profile: :spec_friendly
823
- )
824
-
825
- # Profile with dimension overrides
826
- Canon::Comparison.equivalent?(doc1, doc2,
827
- match_profile: :spec_friendly,
828
- match: {
829
- comments: :strict # Override profile
830
- }
831
- )
832
-
833
- # Use semantic dimensions
834
- Canon::Comparison.equivalent?(doc1, doc2,
835
- diff_algorithm: :semantic,
836
- match: {
837
- element_position: :ignore,
838
- element_hierarchy: :ignore
839
- }
840
- )
841
- ----
842
-
843
- === CLI
844
-
845
- [source,bash]
846
- ----
847
- # Use profile
848
- $ canon diff file1.xml file2.xml \
849
- --match-profile spec_friendly \
850
- --verbose
851
-
852
- # Override specific dimensions
853
- $ canon diff file1.xml file2.xml \
854
- --text-content normalize \
855
- --structural-whitespace ignore \
856
- --verbose
857
-
858
- # Combine profile with overrides
859
- $ canon diff file1.xml file2.xml \
860
- --match-profile spec_friendly \
861
- --comments strict \
862
- --verbose
863
-
864
- # Use semantic algorithm with flexible positioning
865
- $ canon diff file1.xml file2.xml \
866
- --diff-algorithm semantic \
867
- --element-position ignore \
868
- --verbose
869
- ----
870
-
871
- === RSpec
872
-
873
- [source,ruby]
874
- ----
875
- # Global configuration
876
- Canon::RSpecMatchers.configure do |config|
877
- config.xml.match.profile = :spec_friendly
878
- config.xml.match.options = {
879
- text_content: :normalize,
880
- comments: :ignore
881
- }
882
- end
883
-
884
- # Per-test override
885
- expect(actual).to be_xml_equivalent_to(expected)
886
- .with_profile(:strict)
887
-
888
- # Per-test dimension override
889
- expect(actual).to be_xml_equivalent_to(expected)
890
- .with_options(
891
- structural_whitespace: :strict,
892
- text_content: :strict
893
- )
894
-
895
- # Semantic algorithm with flexible hierarchy
896
- expect(actual).to be_xml_equivalent_to(expected,
897
- diff_algorithm: :semantic
898
- )
899
- .with_options(
900
- element_position: :ignore,
901
- element_hierarchy: :ignore
902
- )
903
- ----
904
-
905
- == See also
906
-
907
- * link:MATCH_ARCHITECTURE[Match architecture]
908
- * link:PREPROCESSING[Preprocessing options]
909
- * link:FORMATS[Format support]
910
- * link:RUBY_API[Ruby API documentation]
911
- * link:CLI[Command-line interface]
912
- * link:RSPEC[RSpec matchers]