canon 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +112 -25
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +48 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +3 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
data/old-docs/FORMATS.adoc
DELETED
|
@@ -1,867 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
layout: default
|
|
3
|
-
title: Format Support
|
|
4
|
-
nav_order: 20
|
|
5
|
-
parent: Understanding Canon
|
|
6
|
-
---
|
|
7
|
-
= Format support
|
|
8
|
-
:toc:
|
|
9
|
-
:toclevels: 3
|
|
10
|
-
|
|
11
|
-
== Scope
|
|
12
|
-
|
|
13
|
-
This document describes Canon's support for XML, HTML, JSON, and YAML formats,
|
|
14
|
-
including canonicalization rules, format detection, and format-specific
|
|
15
|
-
features.
|
|
16
|
-
|
|
17
|
-
For usage examples, see link:RUBY_API[Ruby API], link:CLI[CLI], or
|
|
18
|
-
link:RSPEC[RSpec documentation].
|
|
19
|
-
|
|
20
|
-
== General
|
|
21
|
-
|
|
22
|
-
Canon provides unified canonicalization and comparison for four serialization
|
|
23
|
-
formats. Each format has specific rules and defaults optimized for its typical
|
|
24
|
-
usage.
|
|
25
|
-
|
|
26
|
-
== XML format
|
|
27
|
-
|
|
28
|
-
=== Canonicalization
|
|
29
|
-
|
|
30
|
-
Canon implements the https://www.w3.org/TR/xml-c14n11/[W3C Canonical XML
|
|
31
|
-
Version 1.1] specification.
|
|
32
|
-
|
|
33
|
-
**Key features:**
|
|
34
|
-
|
|
35
|
-
* Namespace declaration ordering (lexicographic by prefix)
|
|
36
|
-
* Attribute ordering (lexicographic by namespace URI, then local name)
|
|
37
|
-
* Character encoding normalization to UTF-8
|
|
38
|
-
* Special character encoding in text and attributes
|
|
39
|
-
* Removal of superfluous namespace declarations
|
|
40
|
-
* Support for xml:base, xml:lang, xml:space, and xml:id attributes
|
|
41
|
-
* Processing instruction and comment handling
|
|
42
|
-
* Document subset support with attribute inheritance
|
|
43
|
-
|
|
44
|
-
.XML canonicalization example
|
|
45
|
-
[example]
|
|
46
|
-
====
|
|
47
|
-
[source,ruby]
|
|
48
|
-
----
|
|
49
|
-
xml = <<~XML
|
|
50
|
-
<root xmlns:b="http://b.com" xmlns:a="http://a.com">
|
|
51
|
-
<item b:attr="2" a:attr="1">
|
|
52
|
-
Text content
|
|
53
|
-
</item>
|
|
54
|
-
</root>
|
|
55
|
-
XML
|
|
56
|
-
|
|
57
|
-
Canon.format(xml, :xml)
|
|
58
|
-
# => Namespace prefixes sorted, attributes sorted, whitespace normalized
|
|
59
|
-
----
|
|
60
|
-
====
|
|
61
|
-
|
|
62
|
-
=== Format defaults
|
|
63
|
-
|
|
64
|
-
[cols="1,1"]
|
|
65
|
-
|===
|
|
66
|
-
|Dimension |Default Behavior
|
|
67
|
-
|
|
68
|
-
|`text_content`
|
|
69
|
-
|`:strict`
|
|
70
|
-
|
|
71
|
-
|`structural_whitespace`
|
|
72
|
-
|`:strict`
|
|
73
|
-
|
|
74
|
-
|`attribute_whitespace`
|
|
75
|
-
|`:strict`
|
|
76
|
-
|
|
77
|
-
|`attribute_order`
|
|
78
|
-
|`:ignore`
|
|
79
|
-
|
|
80
|
-
|`attribute_values`
|
|
81
|
-
|`:strict`
|
|
82
|
-
|
|
83
|
-
|`comments`
|
|
84
|
-
|`:strict`
|
|
85
|
-
|===
|
|
86
|
-
|
|
87
|
-
Default diff mode: `:by_object` (tree-based semantic diff)
|
|
88
|
-
|
|
89
|
-
NOTE: XML `attribute_order` defaults to `:ignore` because the XML specification
|
|
90
|
-
states that attribute order is not significant. Use the `strict` profile if you
|
|
91
|
-
need to enforce specific attribute ordering.
|
|
92
|
-
|
|
93
|
-
=== Match profiles for XML
|
|
94
|
-
|
|
95
|
-
Canon provides predefined profiles optimized for XML documents. Each profile
|
|
96
|
-
configures preprocessing, match options, diff algorithm, and formatting.
|
|
97
|
-
|
|
98
|
-
==== strict profile
|
|
99
|
-
|
|
100
|
-
**Purpose**: Character-perfect XML matching
|
|
101
|
-
|
|
102
|
-
**Configuration**:
|
|
103
|
-
|
|
104
|
-
[source,ruby]
|
|
105
|
-
----
|
|
106
|
-
{
|
|
107
|
-
preprocessing: :none,
|
|
108
|
-
diff_algorithm: :dom, # DOM-based positional diff
|
|
109
|
-
diff_mode: :by_object, # Tree-based diff output
|
|
110
|
-
match: {
|
|
111
|
-
text_content: :strict,
|
|
112
|
-
structural_whitespace: :strict,
|
|
113
|
-
attribute_whitespace: :strict,
|
|
114
|
-
attribute_order: :strict,
|
|
115
|
-
attribute_values: :strict,
|
|
116
|
-
comments: :strict
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
----
|
|
120
|
-
|
|
121
|
-
**Use when**: Testing exact serializer output, verifying XML formatting
|
|
122
|
-
compliance, character-perfect matching required.
|
|
123
|
-
|
|
124
|
-
==== rendered profile
|
|
125
|
-
|
|
126
|
-
**Purpose**: Browser-rendered equivalence
|
|
127
|
-
|
|
128
|
-
**Configuration**:
|
|
129
|
-
|
|
130
|
-
[source,ruby]
|
|
131
|
-
----
|
|
132
|
-
{
|
|
133
|
-
preprocessing: :none,
|
|
134
|
-
diff_algorithm: :dom,
|
|
135
|
-
diff_mode: :by_line, # Line-based diff output
|
|
136
|
-
match: {
|
|
137
|
-
text_content: :normalize,
|
|
138
|
-
structural_whitespace: :normalize,
|
|
139
|
-
attribute_whitespace: :normalize,
|
|
140
|
-
attribute_order: :ignore,
|
|
141
|
-
attribute_values: :strict,
|
|
142
|
-
comments: :ignore
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
----
|
|
146
|
-
|
|
147
|
-
**Use when**: Comparing how content would render (XHTML), ignoring formatting
|
|
148
|
-
that doesn't affect display.
|
|
149
|
-
|
|
150
|
-
==== spec_friendly profile
|
|
151
|
-
|
|
152
|
-
**Purpose**: Test-friendly comparison for RSpec
|
|
153
|
-
|
|
154
|
-
**Configuration**:
|
|
155
|
-
|
|
156
|
-
[source,ruby]
|
|
157
|
-
----
|
|
158
|
-
{
|
|
159
|
-
preprocessing: :normalize, # Applies whitespace normalization
|
|
160
|
-
diff_algorithm: :dom,
|
|
161
|
-
diff_mode: :by_object,
|
|
162
|
-
match: {
|
|
163
|
-
text_content: :normalize,
|
|
164
|
-
structural_whitespace: :ignore,
|
|
165
|
-
attribute_whitespace: :normalize,
|
|
166
|
-
attribute_order: :ignore,
|
|
167
|
-
attribute_values: :strict,
|
|
168
|
-
comments: :ignore
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
----
|
|
172
|
-
|
|
173
|
-
**Use when**: Writing RSpec tests, testing semantic correctness, ignoring
|
|
174
|
-
pretty-printing differences. Most common for testing.
|
|
175
|
-
|
|
176
|
-
==== content_only profile
|
|
177
|
-
|
|
178
|
-
**Purpose**: Maximum tolerance - only data matters
|
|
179
|
-
|
|
180
|
-
**Configuration**:
|
|
181
|
-
|
|
182
|
-
[source,ruby]
|
|
183
|
-
----
|
|
184
|
-
{
|
|
185
|
-
preprocessing: :normalize,
|
|
186
|
-
diff_algorithm: :dom,
|
|
187
|
-
diff_mode: :by_object,
|
|
188
|
-
match: {
|
|
189
|
-
text_content: :normalize,
|
|
190
|
-
structural_whitespace: :ignore,
|
|
191
|
-
attribute_whitespace: :ignore,
|
|
192
|
-
attribute_order: :ignore,
|
|
193
|
-
attribute_values: :ignore,
|
|
194
|
-
comments: :ignore
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
----
|
|
198
|
-
|
|
199
|
-
**Use when**: Only structural equivalence needed, maximum flexibility for
|
|
200
|
-
formatting differences.
|
|
201
|
-
|
|
202
|
-
=== XML-specific features
|
|
203
|
-
|
|
204
|
-
**Comment handling**: XML comments are preserved in canonical form unless
|
|
205
|
-
`--with-comments` is explicitly set.
|
|
206
|
-
|
|
207
|
-
**Namespace normalization**: Namespace declarations are sorted and duplicate
|
|
208
|
-
declarations are removed.
|
|
209
|
-
|
|
210
|
-
**xml: attributes**: Special attributes like `xml:lang`, `xml:space`, `xml:id`,
|
|
211
|
-
and `xml:base` are properly handled per specification.
|
|
212
|
-
|
|
213
|
-
== HTML format
|
|
214
|
-
|
|
215
|
-
=== Canonicalization
|
|
216
|
-
|
|
217
|
-
Canon supports HTML 4, HTML5, and XHTML with automatic format detection.
|
|
218
|
-
|
|
219
|
-
**Key features:**
|
|
220
|
-
|
|
221
|
-
* Automatic HTML vs XHTML detection
|
|
222
|
-
* HTML5 parser for modern HTML
|
|
223
|
-
* XML parser for XHTML
|
|
224
|
-
* Consistent attribute ordering
|
|
225
|
-
* Whitespace normalization
|
|
226
|
-
* Comment handling in `<style>` and `<script>` tags
|
|
227
|
-
|
|
228
|
-
.HTML canonicalization example
|
|
229
|
-
[example]
|
|
230
|
-
====
|
|
231
|
-
[source,ruby]
|
|
232
|
-
----
|
|
233
|
-
html = <<~HTML
|
|
234
|
-
<!DOCTYPE html>
|
|
235
|
-
<html>
|
|
236
|
-
<body>
|
|
237
|
-
<div class="foo" id="bar">
|
|
238
|
-
Content
|
|
239
|
-
</div>
|
|
240
|
-
</body>
|
|
241
|
-
</html>
|
|
242
|
-
HTML
|
|
243
|
-
|
|
244
|
-
Canon.format(html, :html)
|
|
245
|
-
# => Normalized structure with consistent formatting
|
|
246
|
-
----
|
|
247
|
-
====
|
|
248
|
-
|
|
249
|
-
=== Format defaults
|
|
250
|
-
|
|
251
|
-
[cols="1,1"]
|
|
252
|
-
|===
|
|
253
|
-
|Dimension |Default Behavior
|
|
254
|
-
|
|
255
|
-
|`text_content`
|
|
256
|
-
|`:normalize`
|
|
257
|
-
|
|
258
|
-
|`structural_whitespace`
|
|
259
|
-
|`:normalize`
|
|
260
|
-
|
|
261
|
-
|`attribute_whitespace`
|
|
262
|
-
|`:normalize`
|
|
263
|
-
|
|
264
|
-
|`attribute_order`
|
|
265
|
-
|`:ignore`
|
|
266
|
-
|
|
267
|
-
|`attribute_values`
|
|
268
|
-
|`:strict`
|
|
269
|
-
|
|
270
|
-
|`comments`
|
|
271
|
-
|`:ignore`
|
|
272
|
-
|===
|
|
273
|
-
|
|
274
|
-
Default diff mode: `:by_line` (line-based diff)
|
|
275
|
-
|
|
276
|
-
=== Match profiles for HTML
|
|
277
|
-
|
|
278
|
-
Canon provides predefined profiles optimized for HTML documents. Each profile
|
|
279
|
-
configures preprocessing, match options, diff algorithm, and formatting.
|
|
280
|
-
|
|
281
|
-
==== strict profile
|
|
282
|
-
|
|
283
|
-
**Purpose**: Character-perfect HTML matching
|
|
284
|
-
|
|
285
|
-
**Configuration**:
|
|
286
|
-
|
|
287
|
-
[source,ruby]
|
|
288
|
-
----
|
|
289
|
-
{
|
|
290
|
-
preprocessing: :none,
|
|
291
|
-
diff_algorithm: :dom,
|
|
292
|
-
diff_mode: :by_line, # Line-based diff output (HTML default)
|
|
293
|
-
match: {
|
|
294
|
-
text_content: :strict,
|
|
295
|
-
structural_whitespace: :strict,
|
|
296
|
-
attribute_whitespace: :strict,
|
|
297
|
-
attribute_order: :strict,
|
|
298
|
-
attribute_values: :strict,
|
|
299
|
-
comments: :strict
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
----
|
|
303
|
-
|
|
304
|
-
**Use when**: Testing exact HTML formatter output, verifying HTML formatting
|
|
305
|
-
compliance.
|
|
306
|
-
|
|
307
|
-
==== rendered profile
|
|
308
|
-
|
|
309
|
-
**Purpose**: Browser-rendered equivalence (most common for HTML)
|
|
310
|
-
|
|
311
|
-
**Configuration**:
|
|
312
|
-
|
|
313
|
-
[source,ruby]
|
|
314
|
-
----
|
|
315
|
-
{
|
|
316
|
-
preprocessing: :none,
|
|
317
|
-
diff_algorithm: :dom,
|
|
318
|
-
diff_mode: :by_line,
|
|
319
|
-
match: {
|
|
320
|
-
text_content: :normalize,
|
|
321
|
-
structural_whitespace: :normalize,
|
|
322
|
-
attribute_whitespace: :normalize,
|
|
323
|
-
attribute_order: :ignore, # HTML attributes are unordered
|
|
324
|
-
attribute_values: :strict,
|
|
325
|
-
comments: :ignore
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
----
|
|
329
|
-
|
|
330
|
-
**Use when**: Comparing HTML as browsers render it, testing web page output,
|
|
331
|
-
ignoring formatting that doesn't affect display. This is the recommended
|
|
332
|
-
profile for most HTML comparisons.
|
|
333
|
-
|
|
334
|
-
==== spec_friendly profile
|
|
335
|
-
|
|
336
|
-
**Purpose**: Test-friendly comparison for RSpec
|
|
337
|
-
|
|
338
|
-
**Configuration**:
|
|
339
|
-
|
|
340
|
-
[source,ruby]
|
|
341
|
-
----
|
|
342
|
-
{
|
|
343
|
-
preprocessing: :normalize,
|
|
344
|
-
diff_algorithm: :dom,
|
|
345
|
-
diff_mode: :by_object, # Tree-based for better test output
|
|
346
|
-
match: {
|
|
347
|
-
text_content: :normalize,
|
|
348
|
-
structural_whitespace: :ignore,
|
|
349
|
-
attribute_whitespace: :normalize,
|
|
350
|
-
attribute_order: :ignore,
|
|
351
|
-
attribute_values: :strict,
|
|
352
|
-
comments: :ignore
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
----
|
|
356
|
-
|
|
357
|
-
**Use when**: Writing RSpec tests for HTML generation, testing semantic HTML
|
|
358
|
-
correctness.
|
|
359
|
-
|
|
360
|
-
==== content_only profile
|
|
361
|
-
|
|
362
|
-
**Purpose**: Maximum tolerance - only structure matters
|
|
363
|
-
|
|
364
|
-
**Configuration**:
|
|
365
|
-
|
|
366
|
-
[source,ruby]
|
|
367
|
-
----
|
|
368
|
-
{
|
|
369
|
-
preprocessing: :normalize,
|
|
370
|
-
diff_algorithm: :dom,
|
|
371
|
-
diff_mode: :by_object,
|
|
372
|
-
match: {
|
|
373
|
-
text_content: :normalize,
|
|
374
|
-
structural_whitespace: :ignore,
|
|
375
|
-
attribute_whitespace: :ignore,
|
|
376
|
-
attribute_order: :ignore,
|
|
377
|
-
attribute_values: :ignore,
|
|
378
|
-
comments: :ignore
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
----
|
|
382
|
-
|
|
383
|
-
**Use when**: Only HTML structure needs to match, maximum flexibility for all
|
|
384
|
-
formatting and attribute differences.
|
|
385
|
-
|
|
386
|
-
=== HTML-specific features
|
|
387
|
-
|
|
388
|
-
**Format detection**: Automatically detects HTML5, HTML4, or XHTML based on
|
|
389
|
-
DOCTYPE and structure.
|
|
390
|
-
|
|
391
|
-
**Whitespace handling**: HTML whitespace is collapsed per CSS rendering rules.
|
|
392
|
-
Empty text nodes between elements are removed.
|
|
393
|
-
|
|
394
|
-
**Attribute order**: HTML attributes are inherently unordered per the HTML
|
|
395
|
-
specification, so default is `:ignore`.
|
|
396
|
-
|
|
397
|
-
**Special tags**: Comments in `<style>` and `<script>` tags are normalized
|
|
398
|
-
specially to handle CSS/JavaScript syntax.
|
|
399
|
-
|
|
400
|
-
== JSON format
|
|
401
|
-
|
|
402
|
-
=== Canonicalization
|
|
403
|
-
|
|
404
|
-
Canon provides JSON canonicalization with sorted keys at all nesting levels.
|
|
405
|
-
|
|
406
|
-
**Key features:**
|
|
407
|
-
|
|
408
|
-
* Alphabetically sorted object keys
|
|
409
|
-
* Consistent indentation (configurable)
|
|
410
|
-
* Proper escape sequences
|
|
411
|
-
* No trailing commas
|
|
412
|
-
* Unicode normalization
|
|
413
|
-
|
|
414
|
-
.JSON canonicalization example
|
|
415
|
-
[example]
|
|
416
|
-
====
|
|
417
|
-
[source,ruby]
|
|
418
|
-
----
|
|
419
|
-
json = '{"z":3,"a":1,"nested":{"y":2,"x":1}}'
|
|
420
|
-
|
|
421
|
-
Canon.format(json, :json)
|
|
422
|
-
# => {"a":1,"nested":{"x":1,"y":2},"z":3}
|
|
423
|
-
# Keys sorted at all levels
|
|
424
|
-
----
|
|
425
|
-
====
|
|
426
|
-
|
|
427
|
-
=== Format defaults
|
|
428
|
-
|
|
429
|
-
[cols="1,1"]
|
|
430
|
-
|===
|
|
431
|
-
|Dimension |Default Behavior
|
|
432
|
-
|
|
433
|
-
|`text_content`
|
|
434
|
-
|`:strict`
|
|
435
|
-
|
|
436
|
-
|`structural_whitespace`
|
|
437
|
-
|`:strict`
|
|
438
|
-
|
|
439
|
-
|`key_order`
|
|
440
|
-
|`:strict`
|
|
441
|
-
|===
|
|
442
|
-
|
|
443
|
-
Default diff mode: `:by_object` (tree-based semantic diff)
|
|
444
|
-
|
|
445
|
-
=== Match profiles for JSON
|
|
446
|
-
|
|
447
|
-
Canon provides predefined profiles optimized for JSON documents. Each profile
|
|
448
|
-
configures preprocessing, match options, diff algorithm, and formatting.
|
|
449
|
-
|
|
450
|
-
==== strict profile
|
|
451
|
-
|
|
452
|
-
**Purpose**: Character-perfect JSON matching
|
|
453
|
-
|
|
454
|
-
**Configuration**:
|
|
455
|
-
|
|
456
|
-
[source,ruby]
|
|
457
|
-
----
|
|
458
|
-
{
|
|
459
|
-
preprocessing: :none,
|
|
460
|
-
diff_algorithm: :dom,
|
|
461
|
-
diff_mode: :by_object, # Tree-based diff output (JSON default)
|
|
462
|
-
match: {
|
|
463
|
-
text_content: :strict,
|
|
464
|
-
structural_whitespace: :strict,
|
|
465
|
-
key_order: :strict
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
----
|
|
469
|
-
|
|
470
|
-
**Use when**: Testing exact JSON serializer output, verifying JSON formatting
|
|
471
|
-
compliance.
|
|
472
|
-
|
|
473
|
-
==== rendered profile
|
|
474
|
-
|
|
475
|
-
**Purpose**: Normalized JSON comparison
|
|
476
|
-
|
|
477
|
-
**Configuration**:
|
|
478
|
-
|
|
479
|
-
[source,ruby]
|
|
480
|
-
----
|
|
481
|
-
{
|
|
482
|
-
preprocessing: :none,
|
|
483
|
-
diff_algorithm: :dom,
|
|
484
|
-
diff_mode: :by_object,
|
|
485
|
-
match: {
|
|
486
|
-
text_content: :normalize,
|
|
487
|
-
structural_whitespace: :normalize,
|
|
488
|
-
key_order: :ignore # Allow unordered object keys
|
|
489
|
-
}
|
|
490
|
-
}
|
|
491
|
-
----
|
|
492
|
-
|
|
493
|
-
**Use when**: Comparing JSON data where key order and whitespace don't matter.
|
|
494
|
-
|
|
495
|
-
==== spec_friendly profile
|
|
496
|
-
|
|
497
|
-
**Purpose**: Test-friendly comparison for RSpec
|
|
498
|
-
|
|
499
|
-
**Configuration**:
|
|
500
|
-
|
|
501
|
-
[source,ruby]
|
|
502
|
-
----
|
|
503
|
-
{
|
|
504
|
-
preprocessing: :normalize,
|
|
505
|
-
diff_algorithm: :dom,
|
|
506
|
-
diff_mode: :by_object,
|
|
507
|
-
match: {
|
|
508
|
-
text_content: :normalize,
|
|
509
|
-
structural_whitespace: :ignore,
|
|
510
|
-
key_order: :ignore
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
----
|
|
514
|
-
|
|
515
|
-
**Use when**: Writing RSpec tests for JSON generation, testing semantic JSON
|
|
516
|
-
correctness. Most common for JSON testing.
|
|
517
|
-
|
|
518
|
-
==== content_only profile
|
|
519
|
-
|
|
520
|
-
**Purpose**: Maximum tolerance - only values matter
|
|
521
|
-
|
|
522
|
-
**Configuration**:
|
|
523
|
-
|
|
524
|
-
[source,ruby]
|
|
525
|
-
----
|
|
526
|
-
{
|
|
527
|
-
preprocessing: :normalize,
|
|
528
|
-
diff_algorithm: :dom,
|
|
529
|
-
diff_mode: :by_object,
|
|
530
|
-
match: {
|
|
531
|
-
text_content: :normalize,
|
|
532
|
-
structural_whitespace: :ignore,
|
|
533
|
-
key_order: :ignore
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
----
|
|
537
|
-
|
|
538
|
-
**Use when**: Only JSON structure and values need to match, maximum flexibility
|
|
539
|
-
for formatting and key order.
|
|
540
|
-
|
|
541
|
-
=== JSON-specific features
|
|
542
|
-
|
|
543
|
-
**Key ordering**: Object keys are sorted alphabetically for consistent
|
|
544
|
-
comparison.
|
|
545
|
-
|
|
546
|
-
**Type preservation**: Distinguishes between numbers, strings, booleans, and
|
|
547
|
-
null.
|
|
548
|
-
|
|
549
|
-
**Nested structures**: Handles deeply nested objects and arrays.
|
|
550
|
-
|
|
551
|
-
**No comments**: Standard JSON does not support comments.
|
|
552
|
-
|
|
553
|
-
== YAML format
|
|
554
|
-
|
|
555
|
-
=== Canonicalization
|
|
556
|
-
|
|
557
|
-
Canon provides YAML canonicalization with sorted keys and standard formatting.
|
|
558
|
-
|
|
559
|
-
**Key features:**
|
|
560
|
-
|
|
561
|
-
* Alphabetically sorted mapping keys
|
|
562
|
-
* Consistent indentation
|
|
563
|
-
* Standard YAML 1.2 format
|
|
564
|
-
* Comment preservation (optional)
|
|
565
|
-
* Anchor and alias handling
|
|
566
|
-
|
|
567
|
-
.YAML canonicalization example
|
|
568
|
-
[example]
|
|
569
|
-
====
|
|
570
|
-
[source,ruby]
|
|
571
|
-
----
|
|
572
|
-
yaml = <<~YAML
|
|
573
|
-
z: 3
|
|
574
|
-
a: 1
|
|
575
|
-
nested:
|
|
576
|
-
y: 2
|
|
577
|
-
x: 1
|
|
578
|
-
YAML
|
|
579
|
-
|
|
580
|
-
Canon.format(yaml, :yaml)
|
|
581
|
-
# => Keys sorted at all levels
|
|
582
|
-
----
|
|
583
|
-
====
|
|
584
|
-
|
|
585
|
-
=== Format defaults
|
|
586
|
-
|
|
587
|
-
[cols="1,1"]
|
|
588
|
-
|===
|
|
589
|
-
|Dimension |Default Behavior
|
|
590
|
-
|
|
591
|
-
|`text_content`
|
|
592
|
-
|`:strict`
|
|
593
|
-
|
|
594
|
-
|`structural_whitespace`
|
|
595
|
-
|`:strict`
|
|
596
|
-
|
|
597
|
-
|`key_order`
|
|
598
|
-
|`:strict`
|
|
599
|
-
|
|
600
|
-
|`comments`
|
|
601
|
-
|`:strict`
|
|
602
|
-
|===
|
|
603
|
-
|
|
604
|
-
Default diff mode: `:by_object` (tree-based semantic diff)
|
|
605
|
-
|
|
606
|
-
=== Match profiles for YAML
|
|
607
|
-
|
|
608
|
-
Canon provides predefined profiles optimized for YAML documents. Each profile
|
|
609
|
-
configures preprocessing, match options, diff algorithm, and formatting.
|
|
610
|
-
|
|
611
|
-
==== strict profile
|
|
612
|
-
|
|
613
|
-
**Purpose**: Character-perfect YAML matching
|
|
614
|
-
|
|
615
|
-
**Configuration**:
|
|
616
|
-
|
|
617
|
-
[source,ruby]
|
|
618
|
-
----
|
|
619
|
-
{
|
|
620
|
-
preprocessing: :none,
|
|
621
|
-
diff_algorithm: :dom,
|
|
622
|
-
diff_mode: :by_object, # Tree-based diff output (YAML default)
|
|
623
|
-
match: {
|
|
624
|
-
text_content: :strict,
|
|
625
|
-
structural_whitespace: :strict,
|
|
626
|
-
key_order: :strict,
|
|
627
|
-
comments: :strict
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
|
-
----
|
|
631
|
-
|
|
632
|
-
**Use when**: Testing exact YAML serializer output, verifying YAML formatting
|
|
633
|
-
compliance.
|
|
634
|
-
|
|
635
|
-
==== rendered profile
|
|
636
|
-
|
|
637
|
-
**Purpose**: Normalized YAML comparison
|
|
638
|
-
|
|
639
|
-
**Configuration**:
|
|
640
|
-
|
|
641
|
-
[source,ruby]
|
|
642
|
-
----
|
|
643
|
-
{
|
|
644
|
-
preprocessing: :none,
|
|
645
|
-
diff_algorithm: :dom,
|
|
646
|
-
diff_mode: :by_object,
|
|
647
|
-
match: {
|
|
648
|
-
text_content: :normalize,
|
|
649
|
-
structural_whitespace: :normalize,
|
|
650
|
-
key_order: :ignore, # Allow unordered mapping keys
|
|
651
|
-
comments: :ignore
|
|
652
|
-
}
|
|
653
|
-
}
|
|
654
|
-
----
|
|
655
|
-
|
|
656
|
-
**Use when**: Comparing YAML data where key order, whitespace, and comments
|
|
657
|
-
don't matter.
|
|
658
|
-
|
|
659
|
-
==== spec_friendly profile
|
|
660
|
-
|
|
661
|
-
**Purpose**: Test-friendly comparison for RSpec
|
|
662
|
-
|
|
663
|
-
**Configuration**:
|
|
664
|
-
|
|
665
|
-
[source,ruby]
|
|
666
|
-
----
|
|
667
|
-
{
|
|
668
|
-
preprocessing: :normalize,
|
|
669
|
-
diff_algorithm: :dom,
|
|
670
|
-
diff_mode: :by_object,
|
|
671
|
-
match: {
|
|
672
|
-
text_content: :normalize,
|
|
673
|
-
structural_whitespace: :ignore,
|
|
674
|
-
key_order: :ignore,
|
|
675
|
-
comments: :ignore
|
|
676
|
-
}
|
|
677
|
-
}
|
|
678
|
-
----
|
|
679
|
-
|
|
680
|
-
**Use when**: Writing RSpec tests for YAML generation, testing semantic YAML
|
|
681
|
-
correctness. Most common for YAML testing.
|
|
682
|
-
|
|
683
|
-
==== content_only profile
|
|
684
|
-
|
|
685
|
-
**Purpose**: Maximum tolerance - only values matter
|
|
686
|
-
|
|
687
|
-
**Configuration**:
|
|
688
|
-
|
|
689
|
-
[source,ruby]
|
|
690
|
-
----
|
|
691
|
-
{
|
|
692
|
-
preprocessing: :normalize,
|
|
693
|
-
diff_algorithm: :dom,
|
|
694
|
-
diff_mode: :by_object,
|
|
695
|
-
match: {
|
|
696
|
-
text_content: :normalize,
|
|
697
|
-
structural_whitespace: :ignore,
|
|
698
|
-
key_order: :ignore,
|
|
699
|
-
comments: :ignore
|
|
700
|
-
}
|
|
701
|
-
}
|
|
702
|
-
----
|
|
703
|
-
|
|
704
|
-
**Use when**: Only YAML structure and values need to match, maximum flexibility
|
|
705
|
-
for formatting, key order, and comments.
|
|
706
|
-
|
|
707
|
-
=== YAML-specific features
|
|
708
|
-
|
|
709
|
-
**Comment support**: YAML comments are preserved and can be compared.
|
|
710
|
-
|
|
711
|
-
**Key ordering**: Mapping keys are sorted alphabetically for consistent output.
|
|
712
|
-
|
|
713
|
-
**Type detection**: YAML's rich type system is preserved (strings, numbers,
|
|
714
|
-
booleans, dates, etc.).
|
|
715
|
-
|
|
716
|
-
**Anchors and aliases**: YAML anchors (`&`) and aliases (`*`) are properly
|
|
717
|
-
handled.
|
|
718
|
-
|
|
719
|
-
== Format detection
|
|
720
|
-
|
|
721
|
-
Canon automatically detects format based on file extensions:
|
|
722
|
-
|
|
723
|
-
[cols="1,1"]
|
|
724
|
-
|===
|
|
725
|
-
|Extension |Format
|
|
726
|
-
|
|
727
|
-
|`.xml`
|
|
728
|
-
|XML
|
|
729
|
-
|
|
730
|
-
|`.html`, `.htm`
|
|
731
|
-
|HTML
|
|
732
|
-
|
|
733
|
-
|`.json`
|
|
734
|
-
|JSON
|
|
735
|
-
|
|
736
|
-
|`.yaml`, `.yml`
|
|
737
|
-
|YAML
|
|
738
|
-
|===
|
|
739
|
-
|
|
740
|
-
You can override auto-detection by explicitly specifying the format:
|
|
741
|
-
|
|
742
|
-
.Explicit format specification
|
|
743
|
-
[example]
|
|
744
|
-
====
|
|
745
|
-
[source,ruby]
|
|
746
|
-
----
|
|
747
|
-
# Ruby API
|
|
748
|
-
Canon.format(content, :xml)
|
|
749
|
-
|
|
750
|
-
# CLI
|
|
751
|
-
$ canon format file.txt --format xml
|
|
752
|
-
|
|
753
|
-
# Comparison
|
|
754
|
-
Canon::Comparison.equivalent?(doc1, doc2, format: :xml)
|
|
755
|
-
----
|
|
756
|
-
====
|
|
757
|
-
|
|
758
|
-
== Format comparison matrix
|
|
759
|
-
|
|
760
|
-
[cols="1,1,1,1,1"]
|
|
761
|
-
|===
|
|
762
|
-
|Feature |XML |HTML |JSON |YAML
|
|
763
|
-
|
|
764
|
-
|Canonicalization standard
|
|
765
|
-
|W3C C14N 1.1
|
|
766
|
-
|Custom
|
|
767
|
-
|Custom
|
|
768
|
-
|YAML 1.2
|
|
769
|
-
|
|
770
|
-
|Comment support
|
|
771
|
-
|Yes
|
|
772
|
-
|Yes
|
|
773
|
-
|No
|
|
774
|
-
|Yes
|
|
775
|
-
|
|
776
|
-
|Attribute/key ordering
|
|
777
|
-
|Ignored default
|
|
778
|
-
|Ignored default
|
|
779
|
-
|Strict default
|
|
780
|
-
|Strict default
|
|
781
|
-
|
|
782
|
-
|Default diff mode
|
|
783
|
-
|by-object
|
|
784
|
-
|by-line
|
|
785
|
-
|by-object
|
|
786
|
-
|by-object
|
|
787
|
-
|
|
788
|
-
|Whitespace handling
|
|
789
|
-
|Strict default
|
|
790
|
-
|Normalized default
|
|
791
|
-
|Strict default
|
|
792
|
-
|Strict default
|
|
793
|
-
|
|
794
|
-
|Namespace support
|
|
795
|
-
|Yes
|
|
796
|
-
|Limited (XHTML)
|
|
797
|
-
|No
|
|
798
|
-
|No
|
|
799
|
-
|===
|
|
800
|
-
|
|
801
|
-
== Working with multiple formats
|
|
802
|
-
|
|
803
|
-
Canon's unified API works consistently across all formats:
|
|
804
|
-
|
|
805
|
-
.Unified API examples
|
|
806
|
-
[example]
|
|
807
|
-
====
|
|
808
|
-
[source,ruby]
|
|
809
|
-
----
|
|
810
|
-
# Format any content
|
|
811
|
-
Canon.format(xml_content, :xml)
|
|
812
|
-
Canon.format(html_content, :html)
|
|
813
|
-
Canon.format(json_content, :json)
|
|
814
|
-
Canon.format(yaml_content, :yaml)
|
|
815
|
-
|
|
816
|
-
# Compare any format
|
|
817
|
-
Canon::Comparison.equivalent?(xml1, xml2)
|
|
818
|
-
Canon::Comparison.equivalent?(html1, html2)
|
|
819
|
-
Canon::Comparison.equivalent?(json1, json2)
|
|
820
|
-
Canon::Comparison.equivalent?(yaml1, yaml2)
|
|
821
|
-
|
|
822
|
-
# RSpec matchers
|
|
823
|
-
expect(actual_xml).to be_xml_equivalent_to(expected_xml)
|
|
824
|
-
expect(actual_html).to be_html_equivalent_to(expected_html)
|
|
825
|
-
expect(actual_json).to be_json_equivalent_to(expected_json)
|
|
826
|
-
expect(actual_yaml).to be_yaml_equivalent_to(expected_yaml)
|
|
827
|
-
----
|
|
828
|
-
====
|
|
829
|
-
|
|
830
|
-
== Format-specific comparators
|
|
831
|
-
|
|
832
|
-
You can use format-specific comparator classes directly:
|
|
833
|
-
|
|
834
|
-
.Format-specific comparators
|
|
835
|
-
[example]
|
|
836
|
-
====
|
|
837
|
-
[source,ruby]
|
|
838
|
-
----
|
|
839
|
-
# XML comparator
|
|
840
|
-
Canon::Comparison::XmlComparator.equivalent?(xml1, xml2,
|
|
841
|
-
match: { attribute_order: :ignore }
|
|
842
|
-
)
|
|
843
|
-
|
|
844
|
-
# HTML comparator
|
|
845
|
-
Canon::Comparison::HtmlComparator.equivalent?(html1, html2,
|
|
846
|
-
match_profile: :rendered
|
|
847
|
-
)
|
|
848
|
-
|
|
849
|
-
# JSON comparator
|
|
850
|
-
Canon::Comparison::JsonComparator.equivalent?(json1, json2,
|
|
851
|
-
match: { key_order: :ignore }
|
|
852
|
-
)
|
|
853
|
-
|
|
854
|
-
# YAML comparator
|
|
855
|
-
Canon::Comparison::YamlComparator.equivalent?(yaml1, yaml2,
|
|
856
|
-
match: { comments: :ignore }
|
|
857
|
-
)
|
|
858
|
-
----
|
|
859
|
-
====
|
|
860
|
-
|
|
861
|
-
== See also
|
|
862
|
-
|
|
863
|
-
* link:RUBY_API[Ruby API documentation]
|
|
864
|
-
* link:CLI[Command-line interface]
|
|
865
|
-
* link:MATCH_OPTIONS[Match options reference]
|
|
866
|
-
* link:MODES[Diff modes]
|
|
867
|
-
* link:PREPROCESSING[Preprocessing options]
|