canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
data/docs/FORMATS.adoc ADDED
@@ -0,0 +1,447 @@
1
+ ---
2
+ layout: default
3
+ title: Format Support
4
+ nav_order: 20
5
+ parent: Understanding Canon
6
+ ---
7
+ = Format support
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == Scope
12
+
13
+ This document describes Canon's support for XML, HTML, JSON, and YAML formats,
14
+ including canonicalization rules, format detection, and format-specific
15
+ features.
16
+
17
+ For usage examples, see link:RUBY_API[Ruby API], link:CLI[CLI], or
18
+ link:RSPEC[RSpec documentation].
19
+
20
+ == General
21
+
22
+ Canon provides unified canonicalization and comparison for four serialization
23
+ formats. Each format has specific rules and defaults optimized for its typical
24
+ usage.
25
+
26
+ == XML format
27
+
28
+ === Canonicalization
29
+
30
+ Canon implements the https://www.w3.org/TR/xml-c14n11/[W3C Canonical XML
31
+ Version 1.1] specification.
32
+
33
+ **Key features:**
34
+
35
+ * Namespace declaration ordering (lexicographic by prefix)
36
+ * Attribute ordering (lexicographic by namespace URI, then local name)
37
+ * Character encoding normalization to UTF-8
38
+ * Special character encoding in text and attributes
39
+ * Removal of superfluous namespace declarations
40
+ * Support for xml:base, xml:lang, xml:space, and xml:id attributes
41
+ * Processing instruction and comment handling
42
+ * Document subset support with attribute inheritance
43
+
44
+ .XML canonicalization example
45
+ [example]
46
+ ====
47
+ [source,ruby]
48
+ ----
49
+ xml = <<~XML
50
+ <root xmlns:b="http://b.com" xmlns:a="http://a.com">
51
+ <item b:attr="2" a:attr="1">
52
+ Text content
53
+ </item>
54
+ </root>
55
+ XML
56
+
57
+ Canon.format(xml, :xml)
58
+ # => Namespace prefixes sorted, attributes sorted, whitespace normalized
59
+ ----
60
+ ====
61
+
62
+ === Format defaults
63
+
64
+ [cols="1,1"]
65
+ |===
66
+ |Dimension |Default Behavior
67
+
68
+ |`text_content`
69
+ |`:strict`
70
+
71
+ |`structural_whitespace`
72
+ |`:strict`
73
+
74
+ |`attribute_whitespace`
75
+ |`:strict`
76
+
77
+ |`attribute_order`
78
+ |`:strict`
79
+
80
+ |`attribute_values`
81
+ |`:strict`
82
+
83
+ |`comments`
84
+ |`:strict`
85
+ |===
86
+
87
+ Default diff mode: `:by_object` (tree-based semantic diff)
88
+
89
+ === XML-specific features
90
+
91
+ **Comment handling**: XML comments are preserved in canonical form unless
92
+ `--with-comments` is explicitly set.
93
+
94
+ **Namespace normalization**: Namespace declarations are sorted and duplicate
95
+ declarations are removed.
96
+
97
+ **xml: attributes**: Special attributes like `xml:lang`, `xml:space`, `xml:id`,
98
+ and `xml:base` are properly handled per specification.
99
+
100
+ == HTML format
101
+
102
+ === Canonicalization
103
+
104
+ Canon supports HTML 4, HTML5, and XHTML with automatic format detection.
105
+
106
+ **Key features:**
107
+
108
+ * Automatic HTML vs XHTML detection
109
+ * HTML5 parser for modern HTML
110
+ * XML parser for XHTML
111
+ * Consistent attribute ordering
112
+ * Whitespace normalization
113
+ * Comment handling in `<style>` and `<script>` tags
114
+
115
+ .HTML canonicalization example
116
+ [example]
117
+ ====
118
+ [source,ruby]
119
+ ----
120
+ html = <<~HTML
121
+ <!DOCTYPE html>
122
+ <html>
123
+ <body>
124
+ <div class="foo" id="bar">
125
+ Content
126
+ </div>
127
+ </body>
128
+ </html>
129
+ HTML
130
+
131
+ Canon.format(html, :html)
132
+ # => Normalized structure with consistent formatting
133
+ ----
134
+ ====
135
+
136
+ === Format defaults
137
+
138
+ [cols="1,1"]
139
+ |===
140
+ |Dimension |Default Behavior
141
+
142
+ |`text_content`
143
+ |`:normalize`
144
+
145
+ |`structural_whitespace`
146
+ |`:normalize`
147
+
148
+ |`attribute_whitespace`
149
+ |`:normalize`
150
+
151
+ |`attribute_order`
152
+ |`:ignore`
153
+
154
+ |`attribute_values`
155
+ |`:strict`
156
+
157
+ |`comments`
158
+ |`:ignore`
159
+ |===
160
+
161
+ Default diff mode: `:by_line` (line-based diff)
162
+
163
+ === HTML-specific features
164
+
165
+ **Format detection**: Automatically detects HTML5, HTML4, or XHTML based on
166
+ DOCTYPE and structure.
167
+
168
+ **Whitespace handling**: HTML whitespace is collapsed per CSS rendering rules.
169
+ Empty text nodes between elements are removed.
170
+
171
+ **Attribute order**: HTML attributes are inherently unordered per the HTML
172
+ specification, so default is `:ignore`.
173
+
174
+ **Special tags**: Comments in `<style>` and `<script>` tags are normalized
175
+ specially to handle CSS/JavaScript syntax.
176
+
177
+ == JSON format
178
+
179
+ === Canonicalization
180
+
181
+ Canon provides JSON canonicalization with sorted keys at all nesting levels.
182
+
183
+ **Key features:**
184
+
185
+ * Alphabetically sorted object keys
186
+ * Consistent indentation (configurable)
187
+ * Proper escape sequences
188
+ * No trailing commas
189
+ * Unicode normalization
190
+
191
+ .JSON canonicalization example
192
+ [example]
193
+ ====
194
+ [source,ruby]
195
+ ----
196
+ json = '{"z":3,"a":1,"nested":{"y":2,"x":1}}'
197
+
198
+ Canon.format(json, :json)
199
+ # => {"a":1,"nested":{"x":1,"y":2},"z":3}
200
+ # Keys sorted at all levels
201
+ ----
202
+ ====
203
+
204
+ === Format defaults
205
+
206
+ [cols="1,1"]
207
+ |===
208
+ |Dimension |Default Behavior
209
+
210
+ |`text_content`
211
+ |`:strict`
212
+
213
+ |`structural_whitespace`
214
+ |`:strict`
215
+
216
+ |`key_order`
217
+ |`:strict`
218
+ |===
219
+
220
+ Default diff mode: `:by_object` (tree-based semantic diff)
221
+
222
+ === JSON-specific features
223
+
224
+ **Key ordering**: Object keys are sorted alphabetically for consistent
225
+ comparison.
226
+
227
+ **Type preservation**: Distinguishes between numbers, strings, booleans, and
228
+ null.
229
+
230
+ **Nested structures**: Handles deeply nested objects and arrays.
231
+
232
+ **No comments**: Standard JSON does not support comments.
233
+
234
+ == YAML format
235
+
236
+ === Canonicalization
237
+
238
+ Canon provides YAML canonicalization with sorted keys and standard formatting.
239
+
240
+ **Key features:**
241
+
242
+ * Alphabetically sorted mapping keys
243
+ * Consistent indentation
244
+ * Standard YAML 1.2 format
245
+ * Comment preservation (optional)
246
+ * Anchor and alias handling
247
+
248
+ .YAML canonicalization example
249
+ [example]
250
+ ====
251
+ [source,ruby]
252
+ ----
253
+ yaml = <<~YAML
254
+ z: 3
255
+ a: 1
256
+ nested:
257
+ y: 2
258
+ x: 1
259
+ YAML
260
+
261
+ Canon.format(yaml, :yaml)
262
+ # => Keys sorted at all levels
263
+ ----
264
+ ====
265
+
266
+ === Format defaults
267
+
268
+ [cols="1,1"]
269
+ |===
270
+ |Dimension |Default Behavior
271
+
272
+ |`text_content`
273
+ |`:strict`
274
+
275
+ |`structural_whitespace`
276
+ |`:strict`
277
+
278
+ |`key_order`
279
+ |`:strict`
280
+
281
+ |`comments`
282
+ |`:strict`
283
+ |===
284
+
285
+ Default diff mode: `:by_object` (tree-based semantic diff)
286
+
287
+ === YAML-specific features
288
+
289
+ **Comment support**: YAML comments are preserved and can be compared.
290
+
291
+ **Key ordering**: Mapping keys are sorted alphabetically for consistent output.
292
+
293
+ **Type detection**: YAML's rich type system is preserved (strings, numbers,
294
+ booleans, dates, etc.).
295
+
296
+ **Anchors and aliases**: YAML anchors (`&`) and aliases (`*`) are properly
297
+ handled.
298
+
299
+ == Format detection
300
+
301
+ Canon automatically detects format based on file extensions:
302
+
303
+ [cols="1,1"]
304
+ |===
305
+ |Extension |Format
306
+
307
+ |`.xml`
308
+ |XML
309
+
310
+ |`.html`, `.htm`
311
+ |HTML
312
+
313
+ |`.json`
314
+ |JSON
315
+
316
+ |`.yaml`, `.yml`
317
+ |YAML
318
+ |===
319
+
320
+ You can override auto-detection by explicitly specifying the format:
321
+
322
+ .Explicit format specification
323
+ [example]
324
+ ====
325
+ [source,ruby]
326
+ ----
327
+ # Ruby API
328
+ Canon.format(content, :xml)
329
+
330
+ # CLI
331
+ $ canon format file.txt --format xml
332
+
333
+ # Comparison
334
+ Canon::Comparison.equivalent?(doc1, doc2, format: :xml)
335
+ ----
336
+ ====
337
+
338
+ == Format comparison matrix
339
+
340
+ [cols="1,1,1,1,1"]
341
+ |===
342
+ |Feature |XML |HTML |JSON |YAML
343
+
344
+ |Canonicalization standard
345
+ |W3C C14N 1.1
346
+ |Custom
347
+ |Custom
348
+ |YAML 1.2
349
+
350
+ |Comment support
351
+ |Yes
352
+ |Yes
353
+ |No
354
+ |Yes
355
+
356
+ |Attribute/key ordering
357
+ |Strict default
358
+ |Ignored default
359
+ |Strict default
360
+ |Strict default
361
+
362
+ |Default diff mode
363
+ |by-object
364
+ |by-line
365
+ |by-object
366
+ |by-object
367
+
368
+ |Whitespace handling
369
+ |Strict default
370
+ |Normalized default
371
+ |Strict default
372
+ |Strict default
373
+
374
+ |Namespace support
375
+ |Yes
376
+ |Limited (XHTML)
377
+ |No
378
+ |No
379
+ |===
380
+
381
+ == Working with multiple formats
382
+
383
+ Canon's unified API works consistently across all formats:
384
+
385
+ .Unified API examples
386
+ [example]
387
+ ====
388
+ [source,ruby]
389
+ ----
390
+ # Format any content
391
+ Canon.format(xml_content, :xml)
392
+ Canon.format(html_content, :html)
393
+ Canon.format(json_content, :json)
394
+ Canon.format(yaml_content, :yaml)
395
+
396
+ # Compare any format
397
+ Canon::Comparison.equivalent?(xml1, xml2)
398
+ Canon::Comparison.equivalent?(html1, html2)
399
+ Canon::Comparison.equivalent?(json1, json2)
400
+ Canon::Comparison.equivalent?(yaml1, yaml2)
401
+
402
+ # RSpec matchers
403
+ expect(actual_xml).to be_xml_equivalent_to(expected_xml)
404
+ expect(actual_html).to be_html_equivalent_to(expected_html)
405
+ expect(actual_json).to be_json_equivalent_to(expected_json)
406
+ expect(actual_yaml).to be_yaml_equivalent_to(expected_yaml)
407
+ ----
408
+ ====
409
+
410
+ == Format-specific comparators
411
+
412
+ You can use format-specific comparator classes directly:
413
+
414
+ .Format-specific comparators
415
+ [example]
416
+ ====
417
+ [source,ruby]
418
+ ----
419
+ # XML comparator
420
+ Canon::Comparison::XmlComparator.equivalent?(xml1, xml2,
421
+ match: { attribute_order: :ignore }
422
+ )
423
+
424
+ # HTML comparator
425
+ Canon::Comparison::HtmlComparator.equivalent?(html1, html2,
426
+ match_profile: :rendered
427
+ )
428
+
429
+ # JSON comparator
430
+ Canon::Comparison::JsonComparator.equivalent?(json1, json2,
431
+ match: { key_order: :ignore }
432
+ )
433
+
434
+ # YAML comparator
435
+ Canon::Comparison::YamlComparator.equivalent?(yaml1, yaml2,
436
+ match: { comments: :ignore }
437
+ )
438
+ ----
439
+ ====
440
+
441
+ == See also
442
+
443
+ * link:RUBY_API[Ruby API documentation]
444
+ * link:CLI[Command-line interface]
445
+ * link:MATCH_OPTIONS[Match options reference]
446
+ * link:MODES[Diff modes]
447
+ * link:PREPROCESSING[Preprocessing options]
data/docs/INDEX.adoc ADDED
@@ -0,0 +1,222 @@
1
+ ---
2
+ layout: default
3
+ title: Documentation Index
4
+ nav_order: 1
5
+ has_children: false
6
+ ---
7
+ = Index
8
+ :toc:
9
+ :toclevels: 2
10
+
11
+ == Documentation overview
12
+
13
+ This index organizes all Canon documentation by complexity, progressing from
14
+ basic usage to advanced technical topics.
15
+
16
+ == Level 1: Getting started
17
+
18
+ Start here if you're new to Canon:
19
+
20
+ * **link:../README[README]** - Project overview, installation, and quick
21
+ start examples
22
+
23
+ == Level 2: Basic usage
24
+
25
+ Learn how to use Canon through different interfaces (choose based on your
26
+ needs):
27
+
28
+ * **link:RUBY_API[Ruby API]** - Using Canon from Ruby code
29
+ * **link:CLI[Command-line interface]** - Terminal commands and options
30
+ * **link:RSPEC[RSpec matchers]** - Testing with Canon in RSpec
31
+
32
+ == Level 3: Understanding Canon
33
+
34
+ Learn how Canon works internally:
35
+
36
+ * **link:FORMATS[Format support]** - XML, HTML, JSON, YAML
37
+ canonicalization
38
+ * **link:MODES[Diff modes]** - By-line vs by-object comparison modes
39
+ * **link:MATCH_ARCHITECTURE[Match architecture]** - Three-phase
40
+ comparison flow
41
+
42
+ == Level 4: Customizing behavior
43
+
44
+ Configure Canon for your specific needs:
45
+
46
+ * **link:MATCH_OPTIONS[Match options]** - Match dimensions and profiles
47
+ * **link:PREPROCESSING[Preprocessing]** - Document normalization options
48
+ * **link:DIFF_FORMATTING[Diff formatting]** - Customizing diff output
49
+ * **link:INPUT_VALIDATION[Input validation]** - Error handling
50
+ * **link:CHARACTER_VISUALIZATION[Character visualization]** - Whitespace
51
+ visibility
52
+
53
+ == Level 5: Advanced topics
54
+
55
+ For developers and advanced users:
56
+
57
+ * **link:VERBOSE[Verbose mode]** - Two-tier diff output architecture
58
+ * **link:SEMANTIC_DIFF_REPORT[Semantic diff report]** - Detailed report
59
+ format
60
+ * **link:NORMATIVE_INFORMATIVE_DIFFS[Normative vs informative diffs]** - Diff
61
+ classification
62
+ * **link:DIFF_ARCHITECTURE[Diff architecture]** - Six-layer technical
63
+ pipeline
64
+
65
+ == Documentation by topic
66
+
67
+ === Canonicalization and formatting
68
+
69
+ * link:FORMATS[Format support] - Format-specific canonicalization rules
70
+ * link:RUBY_API#formatting[Ruby API: Formatting]
71
+ * link:CLI#format-command[CLI: Format command]
72
+
73
+ === Comparison and matching
74
+
75
+ * link:MATCH_ARCHITECTURE[Match architecture] - Three-phase comparison
76
+ * link:MATCH_OPTIONS[Match options] - Dimensions and profiles
77
+ * link:PREPROCESSING[Preprocessing] - Document normalization
78
+ * link:RUBY_API#comparison[Ruby API: Comparison]
79
+ * link:CLI#diff-command[CLI: Diff command]
80
+
81
+ === Diff output
82
+
83
+ * link:MODES[Diff modes] - By-line vs by-object
84
+ * link:DIFF_FORMATTING[Diff formatting] - Output customization
85
+ * link:CHARACTER_VISUALIZATION[Character visualization] - Whitespace
86
+ visibility
87
+ * link:VERBOSE[Verbose mode] - Detailed diff output
88
+ * link:SEMANTIC_DIFF_REPORT[Semantic diff report] - Report structure
89
+
90
+ === Testing
91
+
92
+ * link:RSPEC[RSpec matchers] - Testing guide
93
+ * link:MATCH_OPTIONS#match-profiles[Match profiles] - Test-friendly
94
+ profiles
95
+ * link:INPUT_VALIDATION[Input validation] - Error handling in tests
96
+
97
+ === Technical architecture
98
+
99
+ * link:MATCH_ARCHITECTURE[Match architecture] - Comparison phases
100
+ * link:DIFF_ARCHITECTURE[Diff architecture] - Six-layer pipeline
101
+ * link:NORMATIVE_INFORMATIVE_DIFFS[Normative vs informative diffs] - Classification
102
+ system
103
+
104
+ == Quick reference
105
+
106
+ === Common tasks
107
+
108
+ **Format a document**::
109
+ * Ruby: link:RUBY_API#formatting[Formatting section]
110
+ * CLI: link:CLI#format-command[Format command]
111
+
112
+ **Compare documents**::
113
+ * Ruby: link:RUBY_API#comparison[Comparison section]
114
+ * CLI: link:CLI#diff-command[Diff command]
115
+ * RSpec: link:RSPEC#basic-usage[Basic usage]
116
+
117
+ **Configure comparison behavior**::
118
+ * link:MATCH_OPTIONS[Match options] - Dimensions and profiles
119
+ * link:PREPROCESSING[Preprocessing] - Normalization options
120
+
121
+ **Customize diff output**::
122
+ * link:DIFF_FORMATTING[Diff formatting] - Colors, context, grouping
123
+ * link:MODES[Diff modes] - By-line or by-object
124
+
125
+ **Debug test failures**::
126
+ * link:RSPEC#troubleshooting[RSpec troubleshooting]
127
+ * link:CHARACTER_VISUALIZATION[Character visualization]
128
+ * link:INPUT_VALIDATION[Input validation]
129
+
130
+ === By format
131
+
132
+ **XML**::
133
+ * link:FORMATS#xml-format[XML format details]
134
+ * link:RUBY_API[Ruby API] (all examples include XML)
135
+ * link:MODES[Diff modes] (by-object default, by-line optional)
136
+
137
+ **HTML**::
138
+ * link:FORMATS#html-format[HTML format details]
139
+ * link:MODES#by-line-mode[By-line mode] (HTML always uses this)
140
+
141
+ **JSON**::
142
+ * link:FORMATS#json-format[JSON format details]
143
+ * link:MODES#by-object-mode[By-object mode] (JSON default)
144
+
145
+ **YAML**::
146
+ * link:FORMATS#yaml-format[YAML format details]
147
+ * link:MODES#by-object-mode[By-object mode] (YAML default)
148
+
149
+ == Contributing to documentation
150
+
151
+ === Documentation style guidelines
152
+
153
+ When updating Canon documentation, follow these principles:
154
+
155
+ **Heading style**::
156
+ * Use sentence-case for all headings (e.g., "Match architecture", not "Match
157
+ Architecture")
158
+ * Use descriptive, clear headings that indicate content scope
159
+
160
+ **Structure**::
161
+ * Begin with a Scope section explaining what the document covers
162
+ * Include a General section for background and key concepts
163
+ * Organize content into logical sections with clear hierarchy
164
+ * Use "See also" section at the end for cross-references
165
+
166
+ **MECE principle**::
167
+ * **Mutually Exclusive**: Each document covers a distinct topic without
168
+ overlap
169
+ * **Collectively Exhaustive**: All topics covered without gaps
170
+ * Avoid duplicate information across documents
171
+
172
+ **Examples and code**::
173
+ * Wrap examples with `[example]` and `====` delimiters
174
+ * Use `[source,lang]` with `----` delimiters for code blocks
175
+ * Provide clear example titles describing what is shown
176
+ * Include explanation after code examples
177
+ * Use practical, real-world examples when possible
178
+
179
+ **Lists**::
180
+ * Separate lists from surrounding content with blank lines before and after
181
+ * Ordered lists: Use `. ` flush with the line beginning
182
+ * Unordered lists: Use `* ` flush with the line beginning
183
+ * Second-level ordered: Use `.. `
184
+ * Second-level unordered: Use `** `
185
+ * Definition lists: Use `term:: description` format
186
+
187
+ **Line length and formatting**::
188
+ * Wrap lines at 80 characters
189
+ * Exceptions: Cross-references, formulas, and code blocks
190
+ * No "hanging paragraphs" - if needed, create a "General" subsection
191
+
192
+ **Cross-references**::
193
+ * Link to related documents using `link:DOCUMENT[Link text]`
194
+ * Include "See also" section listing related documentation
195
+ * Reference specific sections using anchors where appropriate
196
+
197
+ **File organization**::
198
+ * Each file uses `:toc:` and `:toclevels: 3` for navigation
199
+ * Keep files focused on a single topic
200
+ * Aim for 200-500 lines per document (except comprehensive references)
201
+
202
+ === Content guidelines
203
+
204
+ **Clarity**::
205
+ * Write in clear, technical prose
206
+ * Define terms when first introduced
207
+ * Use consistent terminology throughout
208
+
209
+ **Completeness**::
210
+ * Provide complete syntax definitions
211
+ * Include "Where," legend explaining syntax elements
212
+ * Show both basic and advanced usage
213
+
214
+ **Accuracy**::
215
+ * Verify all code examples work correctly
216
+ * Ensure cross-references point to existing documents
217
+ * Keep documentation synchronized with code changes
218
+
219
+ **Accessibility**::
220
+ * Write for users with varying expertise levels
221
+ * Progress from simple to complex topics
222
+ * Link to prerequisite knowledge