canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -0,0 +1,504 @@
1
+ ---
2
+ title: YAML Format
3
+ parent: Format Support
4
+ grand_parent: Understanding
5
+ nav_order: 4
6
+ ---
7
+ = YAML format
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == Purpose
12
+
13
+ This page describes Canon's YAML format support, including canonicalization with sorted keys, comment preservation, anchors and aliases, and YAML-specific features.
14
+
15
+ == Canonicalization
16
+
17
+ Canon provides YAML canonicalization with sorted keys and standard formatting.
18
+
19
+ **Key features:**
20
+
21
+ * Alphabetically sorted mapping keys
22
+ * Consistent indentation
23
+ * Standard YAML 1.2 format
24
+ * Comment preservation (optional)
25
+ * Anchor and alias handling
26
+
27
+ .YAML canonicalization example
28
+ [example]
29
+ ====
30
+ [source,ruby]
31
+ ----
32
+ yaml = <<~YAML
33
+ z: 3
34
+ a: 1
35
+ nested:
36
+ y: 2
37
+ x: 1
38
+ YAML
39
+
40
+ Canon.format(yaml, :yaml)
41
+ # => Keys sorted at all levels
42
+ ----
43
+ ====
44
+
45
+ == Format defaults
46
+
47
+ [cols="1,1"]
48
+ |===
49
+ |Dimension |Default Behavior
50
+
51
+ |`text_content`
52
+ |`:strict`
53
+
54
+ |`structural_whitespace`
55
+ |`:strict`
56
+
57
+ |`key_order`
58
+ |`:strict`
59
+
60
+ |`comments`
61
+ |`:strict`
62
+ |===
63
+
64
+ Default diff mode: `:by_object` (tree-based semantic diff)
65
+
66
+ == Match profiles for YAML
67
+
68
+ Canon provides predefined profiles optimized for YAML documents. Each profile configures preprocessing, match options, diff algorithm, and formatting.
69
+
70
+ === strict profile
71
+
72
+ **Purpose**: Character-perfect YAML matching
73
+
74
+ **Configuration**:
75
+
76
+ [source,ruby]
77
+ ----
78
+ {
79
+ preprocessing: :none,
80
+ diff_algorithm: :dom,
81
+ diff_mode: :by_object, # Tree-based diff output (YAML default)
82
+ match: {
83
+ text_content: :strict,
84
+ structural_whitespace: :strict,
85
+ key_order: :strict,
86
+ comments: :strict
87
+ }
88
+ }
89
+ ----
90
+
91
+ **Use when**: Testing exact YAML serializer output, verifying YAML formatting compliance.
92
+
93
+ === rendered profile
94
+
95
+ **Purpose**: Normalized YAML comparison
96
+
97
+ **Configuration**:
98
+
99
+ [source,ruby]
100
+ ----
101
+ {
102
+ preprocessing: :none,
103
+ diff_algorithm: :dom,
104
+ diff_mode: :by_object,
105
+ match: {
106
+ text_content: :normalize,
107
+ structural_whitespace: :normalize,
108
+ key_order: :ignore, # Allow unordered mapping keys
109
+ comments: :ignore
110
+ }
111
+ }
112
+ ----
113
+
114
+ **Use when**: Comparing YAML data where key order, whitespace, and comments don't matter.
115
+
116
+ === spec_friendly profile
117
+
118
+ **Purpose**: Test-friendly comparison for RSpec
119
+
120
+ **Configuration**:
121
+
122
+ [source,ruby]
123
+ ----
124
+ {
125
+ preprocessing: :normalize,
126
+ diff_algorithm: :dom,
127
+ diff_mode: :by_object,
128
+ match: {
129
+ text_content: :normalize,
130
+ structural_whitespace: :ignore,
131
+ key_order: :ignore,
132
+ comments: :ignore
133
+ }
134
+ }
135
+ ----
136
+
137
+ **Use when**: Writing RSpec tests for YAML generation, testing semantic YAML correctness. Most common for YAML testing.
138
+
139
+ === content_only profile
140
+
141
+ **Purpose**: Maximum tolerance - only values matter
142
+
143
+ **Configuration**:
144
+
145
+ [source,ruby]
146
+ ----
147
+ {
148
+ preprocessing: :normalize,
149
+ diff_algorithm: :dom,
150
+ diff_mode: :by_object,
151
+ match: {
152
+ text_content: :normalize,
153
+ structural_whitespace: :ignore,
154
+ key_order: :ignore,
155
+ comments: :ignore
156
+ }
157
+ }
158
+ ----
159
+
160
+ **Use when**: Only YAML structure and values need to match, maximum flexibility for formatting, key order, and comments.
161
+
162
+ == YAML-specific features
163
+
164
+ === Comment support
165
+
166
+ YAML comments are preserved and can be compared.
167
+
168
+ .Comment handling example
169
+ [example]
170
+ ====
171
+ [source,yaml]
172
+ ----
173
+ # Configuration file
174
+ name: test
175
+ # Database settings
176
+ database:
177
+ host: localhost
178
+ port: 5432
179
+ ----
180
+
181
+ Comments can be preserved or ignored using the `comments` dimension:
182
+
183
+ [source,ruby]
184
+ ----
185
+ # Preserve comments
186
+ Canon::Comparison.equivalent?(yaml1, yaml2,
187
+ match: { comments: :strict }
188
+ )
189
+
190
+ # Ignore comments
191
+ Canon::Comparison.equivalent?(yaml1, yaml2,
192
+ match: { comments: :ignore }
193
+ )
194
+ ----
195
+ ====
196
+
197
+ === Key ordering
198
+
199
+ Mapping keys are sorted alphabetically for consistent output.
200
+
201
+ .Key ordering example
202
+ [example]
203
+ ====
204
+ [source,yaml]
205
+ ----
206
+ # Unordered input
207
+ name: Alice
208
+ age: 30
209
+ city: NYC
210
+
211
+ # Canonicalized output (keys sorted)
212
+ age: 30
213
+ city: NYC
214
+ name: Alice
215
+ ----
216
+ ====
217
+
218
+ === Type detection
219
+
220
+ YAML's rich type system is preserved (strings, numbers, booleans, dates, etc.).
221
+
222
+ .Type detection example
223
+ [example]
224
+ ====
225
+ [source,yaml]
226
+ ----
227
+ string: "123"
228
+ number: 123
229
+ boolean: true
230
+ null_value: null
231
+ date: 2024-01-01
232
+ float: 123.45
233
+ unquoted: hello
234
+ ----
235
+
236
+ YAML automatically detects types:
237
+ * `123` (number) ≠ `"123"` (string)
238
+ * `true` (boolean) ≠ `"true"` (string)
239
+ * `null` ≠ `"null"` (string)
240
+ * `2024-01-01` (date object) ≠ `"2024-01-01"` (string)
241
+ ====
242
+
243
+ === Anchors and aliases
244
+
245
+ YAML anchors (`&`) and aliases (`*`) are properly handled.
246
+
247
+ .Anchors and aliases example
248
+ [example]
249
+ ====
250
+ [source,yaml]
251
+ ----
252
+ defaults: &defaults
253
+ timeout: 30
254
+ retries: 3
255
+
256
+ production:
257
+ <<: *defaults
258
+ host: prod.example.com
259
+
260
+ development:
261
+ <<: *defaults
262
+ host: dev.example.com
263
+ ----
264
+
265
+ Canon correctly expands anchors and aliases during comparison:
266
+
267
+ [source,yaml]
268
+ ----
269
+ # Expanded equivalent
270
+ production:
271
+ timeout: 30
272
+ retries: 3
273
+ host: prod.example.com
274
+
275
+ development:
276
+ timeout: 30
277
+ retries: 3
278
+ host: dev.example.com
279
+ ----
280
+ ====
281
+
282
+ === Multi-line strings
283
+
284
+ YAML supports multiple styles for multi-line strings.
285
+
286
+ .Multi-line string styles
287
+ [example]
288
+ ====
289
+ [source,yaml]
290
+ ----
291
+ # Literal block scalar (preserves newlines)
292
+ literal: |
293
+ Line 1
294
+ Line 2
295
+ Line 3
296
+
297
+ # Folded block scalar (folds newlines to spaces)
298
+ folded: >
299
+ This is a very long line that will be
300
+ folded into a single line with spaces
301
+ replacing the newlines.
302
+
303
+ # Plain string
304
+ plain: "This is a plain string"
305
+ ----
306
+
307
+ These are treated as different values unless `text_content: :normalize` is used.
308
+ ====
309
+
310
+ == Usage examples
311
+
312
+ === Basic YAML comparison
313
+
314
+ [source,ruby]
315
+ ----
316
+ yaml1 = File.read("config1.yml")
317
+ yaml2 = File.read("config2.yml")
318
+
319
+ Canon::Comparison.equivalent?(yaml1, yaml2)
320
+ ----
321
+
322
+ === Ignoring comments and key order
323
+
324
+ [source,ruby]
325
+ ----
326
+ Canon::Comparison.equivalent?(yaml1, yaml2,
327
+ match: {
328
+ key_order: :ignore,
329
+ comments: :ignore
330
+ }
331
+ )
332
+ ----
333
+
334
+ === Test-friendly YAML comparison
335
+
336
+ [source,ruby]
337
+ ----
338
+ expect(actual_yaml).to be_yaml_equivalent_to(expected_yaml)
339
+ .with_profile(:spec_friendly)
340
+ ----
341
+
342
+ === Using YAML comparator directly
343
+
344
+ [source,ruby]
345
+ ----
346
+ Canon::Comparison::YamlComparator.equivalent?(yaml1, yaml2,
347
+ match: { comments: :ignore }
348
+ )
349
+ ----
350
+
351
+ === CLI usage
352
+
353
+ [source,bash]
354
+ ----
355
+ # Basic comparison
356
+ canon diff config1.yml config2.yml --verbose
357
+
358
+ # Ignore comments and key order
359
+ canon diff file1.yml file2.yml \
360
+ --match-profile spec_friendly \
361
+ --verbose
362
+ ----
363
+
364
+ == Common YAML comparison scenarios
365
+
366
+ === Configuration file comparison
367
+
368
+ [source,ruby]
369
+ ----
370
+ # Compare config files ignoring formatting
371
+ Canon::Comparison.equivalent?(config1, config2,
372
+ match_profile: :spec_friendly,
373
+ verbose: true
374
+ )
375
+ ----
376
+
377
+ === CI/CD configuration comparison
378
+
379
+ [source,ruby]
380
+ ----
381
+ # Compare workflow files with comments ignored
382
+ Canon::Comparison.equivalent?(workflow1, workflow2,
383
+ match: {
384
+ comments: :ignore,
385
+ key_order: :ignore
386
+ },
387
+ verbose: true
388
+ )
389
+ ----
390
+
391
+ === Array vs sequence
392
+
393
+ .Array order example
394
+ [example]
395
+ ====
396
+ [source,yaml]
397
+ ----
398
+ # File 1
399
+ items:
400
+ - one
401
+ - two
402
+ - three
403
+
404
+ # File 2
405
+ items:
406
+ - three
407
+ - two
408
+ - one
409
+ ----
410
+
411
+ These are **NOT** equivalent because YAML sequences (arrays) are ordered, just like JSON arrays.
412
+ ====
413
+
414
+ == YAML quirks and edge cases
415
+
416
+ === Boolean interpretation
417
+
418
+ .Boolean interpretation
419
+ [example]
420
+ ====
421
+ [source,yaml]
422
+ ----
423
+ # All these are boolean true
424
+ value1: true
425
+ value2: True
426
+ value3: TRUE
427
+ value4: yes
428
+ value5: Yes
429
+ value6: YES
430
+ value7: on
431
+ value8: On
432
+ value9: ON
433
+ ----
434
+
435
+ YAML 1.1 has many boolean synonyms. YAML 1.2 (which Canon uses) is stricter: only `true` and `false` are booleans.
436
+ ====
437
+
438
+ === Numeric strings
439
+
440
+ .Numeric strings
441
+ [example]
442
+ ====
443
+ [source,yaml]
444
+ ----
445
+ # Number
446
+ port: 8080
447
+
448
+ # String (quoted)
449
+ port: "8080"
450
+
451
+ # String (with non-numeric character)
452
+ port: 8080a
453
+ ----
454
+
455
+ Quoting forces string interpretation.
456
+ ====
457
+
458
+ === Empty values
459
+
460
+ .Empty values
461
+ [example]
462
+ ====
463
+ [source,yaml]
464
+ ----
465
+ # Different meanings
466
+ key1: # null (no value)
467
+ key2: "" # empty string
468
+ key3: null # explicit null
469
+ key4: [] # empty array
470
+ key5: {} # empty object
471
+ ----
472
+
473
+ These are all different and not equivalent.
474
+ ====
475
+
476
+ === Indentation sensitivity
477
+
478
+ .Indentation example
479
+ [example]
480
+ ====
481
+ [source,yaml]
482
+ ----
483
+ # Valid YAML
484
+ parent:
485
+ child1: value1
486
+ child2: value2
487
+
488
+ # Invalid YAML (inconsistent indentation)
489
+ parent:
490
+ child1: value1
491
+ child2: value2 # Wrong indentation!
492
+ ----
493
+
494
+ YAML is sensitive to indentation. Use `structural_whitespace: :ignore` to handle minor indentation differences.
495
+ ====
496
+
497
+ == See also
498
+
499
+ * link:../comparison-pipeline.adoc[Comparison Pipeline] - Understanding the 4 layers
500
+ * link:../../features/match-options/[Match Options] - All matching options
501
+ * link:../../guides/choosing-configuration.adoc[Choosing Configuration] - Decision guide
502
+ * link:index.adoc[Format Support] - Overview of all formats
503
+ * link:json.adoc[JSON Format] - JSON-specific features (similar to YAML)
504
+ * link:xml.adoc[XML Format] - XML-specific features
@@ -0,0 +1,130 @@
1
+ ---
2
+ layout: default
3
+ title: Understanding Canon
4
+ nav_order: 4
5
+ has_children: true
6
+ ---
7
+ = Understanding Canon
8
+
9
+ Learn how Canon works internally and the principles behind its design.
10
+
11
+ == Overview
12
+
13
+ This section explains Canon's architecture, algorithms, and design decisions. Understanding these concepts will help you use Canon more effectively and troubleshoot issues when they arise.
14
+
15
+ == What You'll Learn
16
+
17
+ link:architecture[**Architecture**]::
18
+ High-level system overview, module responsibilities, and design principles (MECE, orchestrator pattern).
19
+
20
+ link:formats/[**Format Support**]::
21
+ How Canon handles XML, HTML, JSON, and YAML, including format-specific canonicalization rules and comparison behaviors.
22
+
23
+ link:algorithms/[**Comparison Algorithms**]::
24
+ The two comparison strategies: DOM diff (stable, position-based) and Semantic Tree Diff (experimental, structure-aware).
25
+
26
+ link:diff-modes/[**Diff Output Modes**]::
27
+ How differences are displayed: by-line (traditional) vs by-object (tree-based semantic).
28
+
29
+ == The Comparison Pipeline
30
+
31
+ Canon's comparison system has 4 distinct layers:
32
+
33
+ [source]
34
+ ----
35
+ ┌─────────────────────────────────────────────────┐
36
+ │ Layer 1: Preprocessing │
37
+ │ Options: none, c14n, normalize, format │
38
+ └─────────────────────────────────────────────────┘
39
+
40
+ ┌─────────────────────────────────────────────────┐
41
+ │ Layer 2: Match Algorithm Selection │
42
+ │ Options: dom, semantic │
43
+ └─────────────────────────────────────────────────┘
44
+
45
+ ┌─────────────────────────────────────────────────┐
46
+ │ Layer 3: Match Options │
47
+ │ • Match dimensions (what to compare) │
48
+ │ • Match profiles (preset combinations) │
49
+ └─────────────────────────────────────────────────┘
50
+
51
+ ┌─────────────────────────────────────────────────┐
52
+ │ Layer 4: Diff Formatting │
53
+ │ • Diff mode (by-line, by-object) │
54
+ │ • Colors, symbols, context │
55
+ └─────────────────────────────────────────────────┘
56
+ ----
57
+
58
+ See link:../features/[Features] for detailed configuration of each layer.
59
+
60
+ == Key Concepts
61
+
62
+ === Canonicalization
63
+
64
+ Converting documents to a standardized, normalized form where semantically equivalent documents produce identical output.
65
+
66
+ Benefits:
67
+
68
+ * Enables reliable comparison
69
+ * Simplifies digital signatures
70
+ * Facilitates caching and deduplication
71
+
72
+ See link:formats/[Format Support] for format-specific canonicalization rules.
73
+
74
+ === Semantic Comparison
75
+
76
+ Comparing documents based on their meaning and structure, not just textual representation.
77
+
78
+ Examples:
79
+
80
+ * Element order doesn't matter (unless explicitly required)
81
+ * Whitespace between elements is insignificant
82
+ * Comments can be ignored
83
+ * Attribute order is flexible
84
+
85
+ See link:../features/match-options/[Match Options] for controlling semantic comparison.
86
+
87
+ === Diff Algorithms
88
+
89
+ **DOM Diff** (default):
90
+
91
+ * Position-based element matching
92
+ * Fast and stable
93
+ * Traditional line-by-line output
94
+ * No move detection
95
+
96
+ **Semantic Tree Diff** (experimental):
97
+
98
+ * Signature-based tree matching
99
+ * Detects moves, merges, splits
100
+ * Operation-based output (INSERT, DELETE, UPDATE, MOVE)
101
+ * Slower but more intelligent
102
+
103
+ See link:algorithms/[Comparison Algorithms] for details.
104
+
105
+ == Design Principles
106
+
107
+ **MECE Organization**::
108
+ Canon follows Mutually Exclusive, Collectively Exhaustive principles. Each module has a clear, non-overlapping responsibility.
109
+
110
+ **Orchestrator Pattern**::
111
+ High-level orchestrators coordinate specialized workers. This makes the codebase modular and testable.
112
+
113
+ **Progressive Disclosure**::
114
+ Simple operations require minimal configuration. Advanced features are available when needed.
115
+
116
+ **Format Agnostic Core**::
117
+ Core comparison logic is independent of specific formats. Format adapters handle format-specific details.
118
+
119
+ == Next Steps
120
+
121
+ * Read link:architecture[Architecture] for the big picture
122
+ * Explore link:algorithms/[Comparison Algorithms] to understand matching strategies
123
+ * Check link:formats/[Format Support] for format-specific behaviors
124
+ * Review link:diff-modes/[Diff Modes] to understand output options
125
+
126
+ == See Also
127
+
128
+ * link:../features/[Features] - Customizing Canon's behavior
129
+ * link:../advanced/[Advanced Topics] - Deep dives into internals
130
+ * link:../interfaces/[Interfaces] - How to use Canon
data/lib/canon/cli.rb CHANGED
@@ -3,6 +3,7 @@
3
3
  require "thor"
4
4
  require_relative "commands/format_command"
5
5
  require_relative "commands/diff_command"
6
+ require_relative "options/registry"
6
7
 
7
8
  module Canon
8
9
  # Command-line interface for Canon
@@ -74,6 +75,14 @@ module Canon
74
75
  explicitly specified with --format (for both files) or --format1 and
75
76
  --format2 (for different formats).
76
77
 
78
+ Diff Algorithms:
79
+ - dom: DOM-based positional comparison (default)
80
+ - semantic: Tree-based semantic diff with move/insert/delete/update operations
81
+
82
+ Diff Modes:
83
+ - by_object: Semantic tree-based diff (default for JSON/YAML/XML)
84
+ - by_line: Line-by-line diff (default for HTML)
85
+
77
86
  Match Profiles:
78
87
  - strict: Exact matching (all whitespace significant)
79
88
  - rendered: Mimics browser/CSS rendering (HTML default)
@@ -91,6 +100,15 @@ module Canon
91
100
  # Basic semantic comparison (uses format defaults)
92
101
  $ canon diff file1.xml file2.xml
93
102
 
103
+ # Use semantic tree diff algorithm
104
+ $ canon diff file1.xml file2.xml --diff-algorithm semantic
105
+
106
+ # Use DOM algorithm with by-line mode
107
+ $ canon diff file1.xml file2.xml --diff-algorithm dom --diff-mode by_line
108
+
109
+ # Use semantic algorithm with by-object mode
110
+ $ canon diff file1.json file2.json --diff-algorithm semantic --diff-mode by_object
111
+
94
112
  # Use match profile for test-friendly comparison
95
113
  $ canon diff file1.xml file2.xml --match-profile spec_friendly
96
114
 
@@ -131,10 +149,20 @@ module Canon
131
149
  type: :boolean,
132
150
  default: false,
133
151
  desc: "Show detailed differences"
152
+ method_option :diff_algorithm,
153
+ aliases: "-a",
154
+ type: :string,
155
+ enum: %w[dom semantic],
156
+ default: "dom",
157
+ desc: "Diff algorithm: dom (positional) or semantic (tree-based)"
158
+ method_option :diff_mode,
159
+ type: :string,
160
+ enum: %w[by_line by_object],
161
+ desc: "Diff output mode: by_line or by_object (default: format-specific)"
134
162
  method_option :by_line,
135
163
  type: :boolean,
136
164
  default: false,
137
- desc: "Use line-by-line diff for XML (default: by-object)"
165
+ desc: "DEPRECATED: Use --diff-mode by_line instead"
138
166
  # New match options
139
167
  method_option :match_profile,
140
168
  aliases: "-p",
@@ -157,6 +185,14 @@ module Canon
157
185
  type: :string,
158
186
  enum: %w[strict normalize ignore],
159
187
  desc: "Attribute whitespace matching (XML/HTML only): strict, normalize, or ignore"
188
+ method_option :attribute_order,
189
+ type: :string,
190
+ enum: %w[strict ignore],
191
+ desc: "Attribute ordering (XML/HTML only): strict or ignore"
192
+ method_option :attribute_values,
193
+ type: :string,
194
+ enum: %w[strict normalize ignore],
195
+ desc: "Attribute value matching (XML/HTML only): strict, normalize, or ignore"
160
196
  method_option :key_order,
161
197
  type: :string,
162
198
  enum: %w[strict ignore],
@@ -165,6 +201,11 @@ module Canon
165
201
  type: :string,
166
202
  enum: %w[strict normalize ignore],
167
203
  desc: "Comment matching: strict, normalize, or ignore"
204
+ method_option :show_diffs,
205
+ type: :string,
206
+ enum: %w[all normative informative],
207
+ default: "all",
208
+ desc: "Control which diffs to display: all, normative, or informative"
168
209
  method_option :context_lines,
169
210
  type: :numeric,
170
211
  default: 3,