canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,540 @@
1
+ ---
2
+ layout: default
3
+ title: Diff Formatting
4
+ nav_order: 32
5
+ parent: Customizing Behavior
6
+ ---
7
+ = Diff formatting
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == Scope
12
+
13
+ This document describes Canon's diff formatting options that control how
14
+ differences are displayed. Diff formatting is Phase 3 of Canon's comparison
15
+ architecture.
16
+
17
+ For match architecture, see link:MATCH_ARCHITECTURE[Match architecture].
18
+
19
+ For character visualization, see link:CHARACTER_VISUALIZATION[Character
20
+ visualization].
21
+
22
+ == General
23
+
24
+ When documents differ, Canon formats the differences for human review with
25
+ syntax highlighting, context lines, and whitespace visualization.
26
+
27
+ Diff formatting options control:
28
+
29
+ * Color output
30
+ * Context lines around changes
31
+ * Grouping of nearby changes
32
+ * Diff mode (by-line vs by-object)
33
+
34
+ == Diff formatting options
35
+
36
+ === use_color
37
+
38
+ **Purpose**: Enable/disable ANSI color codes in diff output.
39
+
40
+ **Type**: Boolean
41
+
42
+ **Default**: `true`
43
+
44
+ **When to use**:
45
+
46
+ * Enable (`true`): Terminal viewing, interactive use
47
+ * Disable (`false`): Piping to files, CI environments, plain text output
48
+
49
+ **Color scheme**:
50
+
51
+ * **Red**: Deletions/removed content (`-` markers)
52
+ * **Green**: Additions/inserted content (`+` markers)
53
+ * **Yellow**: Line numbers, structure markers, pipe separators
54
+ * **Cyan**: Innormative diffs (when shown)
55
+ * **Default terminal color**: Unchanged context lines
56
+
57
+ .use_color examples
58
+ [example]
59
+ ====
60
+ [source,ruby]
61
+ ----
62
+ # Ruby API - enable colors
63
+ Canon::Comparison.equivalent?(doc1, doc2,
64
+ verbose: true,
65
+ diff: { use_color: true }
66
+ )
67
+
68
+ # Ruby API - disable colors for file output
69
+ result = Canon::Comparison.equivalent?(doc1, doc2,
70
+ verbose: true,
71
+ diff: { use_color: false }
72
+ )
73
+ File.write('diff.txt', result[:diff])
74
+
75
+ # RSpec - disable for CI
76
+ Canon::RSpecMatchers.configure do |config|
77
+ config.xml.diff.use_color = !ENV['CI']
78
+ end
79
+ ----
80
+
81
+ CLI:
82
+
83
+ [source,bash]
84
+ ----
85
+ # Enable colors (default)
86
+ $ canon diff file1.xml file2.xml --verbose
87
+
88
+ # Disable colors
89
+ $ canon diff file1.xml file2.xml --no-color --verbose
90
+
91
+ # Pipe to file
92
+ $ canon diff file1.xml file2.xml --no-color --verbose > diff.txt
93
+ ----
94
+ ====
95
+
96
+ === context_lines
97
+
98
+ **Purpose**: Number of unchanged lines to show around each change for context.
99
+
100
+ **Type**: Integer
101
+
102
+ **Default**: `3`
103
+
104
+ **Range**: `0` to any positive integer
105
+
106
+ **Effect**: Higher values show more surrounding context, lower values show
107
+ only changes.
108
+
109
+ .context_lines examples
110
+ [example]
111
+ ====
112
+ **With `context_lines: 3` (default)**:
113
+
114
+ [source]
115
+ ----
116
+ 2| | <document>
117
+ 3| | <preface>
118
+ 4| - | <foreword id="fwd">
119
+ 4| + | <foreword displayorder="2" id="fwd">
120
+ 5| | <p>First paragraph</p>
121
+ 6| | </foreword>
122
+ 7| | </preface>
123
+ ----
124
+
125
+ Shows 3 lines before and after the change.
126
+
127
+ **With `context_lines: 1`**:
128
+
129
+ [source]
130
+ ----
131
+ 3| | <preface>
132
+ 4| - | <foreword id="fwd">
133
+ 4| + | <foreword displayorder="2" id="fwd">
134
+ 5| | <p>First paragraph</p>
135
+ ----
136
+
137
+ Shows only 1 line before and after.
138
+
139
+ **With `context_lines: 0`**:
140
+
141
+ [source]
142
+ ----
143
+ 4| - | <foreword id="fwd">
144
+ 4| + | <foreword displayorder="2" id="fwd">
145
+ ----
146
+
147
+ Shows only the changed lines.
148
+
149
+ **Usage**:
150
+
151
+ [source,ruby]
152
+ ----
153
+ # Ruby API
154
+ Canon::Comparison.equivalent?(doc1, doc2,
155
+ verbose: true,
156
+ diff: { context_lines: 5 }
157
+ )
158
+
159
+ # RSpec
160
+ Canon::RSpecMatchers.configure do |config|
161
+ config.xml.diff.context_lines = 5
162
+ end
163
+ ----
164
+
165
+ CLI:
166
+
167
+ [source,bash]
168
+ ----
169
+ $ canon diff file1.xml file2.xml --context-lines 5 --verbose
170
+ ----
171
+ ====
172
+
173
+ === diff_grouping_lines
174
+
175
+ **Purpose**: Maximum line distance between separate changes to group them
176
+ into a single context block.
177
+
178
+ **Type**: Integer or `nil`
179
+
180
+ **Default**: `nil` (no grouping)
181
+
182
+ **Effect**: When set, changes within N lines of each other are grouped into
183
+ context blocks with a header showing the number of diffs.
184
+
185
+ .diff_grouping_lines examples
186
+ [example]
187
+ ====
188
+ **Without grouping** (`nil`):
189
+
190
+ [source]
191
+ ----
192
+ Context block (line 4):
193
+ 4| - | <foreword id="fwd">
194
+ 4| + | <foreword displayorder="2" id="fwd">
195
+
196
+ Context block (line 10):
197
+ 10| + | <p>New content</p>
198
+ ----
199
+
200
+ Each change is a separate block.
201
+
202
+ **With `diff_grouping_lines: 10`**:
203
+
204
+ [source]
205
+ ----
206
+ Context block has 2 diffs (lines 4-10):
207
+ 4| - | <foreword id="fwd">
208
+ 4| + | <foreword displayorder="2" id="fwd">
209
+ 5| | <p>First paragraph</p>
210
+ 6| | </foreword>
211
+ ...
212
+ 10| + | <p>New content</p>
213
+ 11| | </clause>
214
+ ----
215
+
216
+ Changes within 10 lines grouped together.
217
+
218
+ **Usage**:
219
+
220
+ [source,ruby]
221
+ ----
222
+ # Ruby API
223
+ Canon::Comparison.equivalent?(doc1, doc2,
224
+ verbose: true,
225
+ diff: { grouping_lines: 10 }
226
+ )
227
+
228
+ # RSpec
229
+ Canon::RSpecMatchers.configure do |config|
230
+ config.xml.diff.grouping_lines = 10
231
+ end
232
+ ----
233
+
234
+ CLI:
235
+
236
+ [source,bash]
237
+ ----
238
+ $ canon diff file1.xml file2.xml \
239
+ --diff-grouping-lines 10 \
240
+ --verbose
241
+ ----
242
+ ====
243
+
244
+ === mode
245
+
246
+ **Purpose**: Diff mode - `by_line` or `by_object`.
247
+
248
+ **Type**: Symbol (`:by_line` or `:by_object`)
249
+
250
+ **Default**: Format-dependent (HTML: `:by_line`, XML/JSON/YAML:
251
+ `:by_object`)
252
+
253
+ **Effect**: Changes how differences are calculated and displayed.
254
+
255
+ See link:MODES[Diff modes] for complete details.
256
+
257
+ .mode examples
258
+ [example]
259
+ ====
260
+ [source,ruby]
261
+ ----
262
+ # Ruby API - force by-line mode for XML
263
+ Canon::Comparison.equivalent?(xml1, xml2,
264
+ verbose: true,
265
+ diff: { mode: :by_line }
266
+ )
267
+
268
+ # RSpec
269
+ Canon::RSpecMatchers.configure do |config|
270
+ config.xml.diff.mode = :by_line
271
+ end
272
+ ----
273
+
274
+ CLI:
275
+
276
+ [source,bash]
277
+ ----
278
+ $ canon diff file1.xml file2.xml --by-line --verbose
279
+ ----
280
+ ====
281
+
282
+ == Enhanced diff features
283
+
284
+ When `use_color` is enabled, Canon provides several enhancements to make
285
+ diffs more readable.
286
+
287
+ === Color-coded line numbers
288
+
289
+ **Purpose**: Distinguish structural elements from content changes.
290
+
291
+ **Color scheme**:
292
+
293
+ * **Yellow**: Line numbers and pipe separators
294
+ * **Red**: Deletion markers (`-`) and removed content
295
+ * **Green**: Addition markers (`+`) and inserted content
296
+ * **Default terminal color**: Unchanged context lines
297
+
298
+ .Color-coded output example
299
+ [example]
300
+ ====
301
+ In a colored terminal:
302
+
303
+ [source]
304
+ ----
305
+ 5| 5 | <p>First paragraph</p> # Yellow numbers/pipes, default text
306
+ 6| -| <old>Text</old> # Yellow numbers/pipes, red marker/content
307
+ | 6+| <new>Text</new> # Yellow numbers/pipes, green marker/content
308
+ ----
309
+
310
+ Colors help distinguish:
311
+
312
+ * Diff structure (line numbers, pipes) - yellow
313
+ * Removed content - red
314
+ * Added content - green
315
+ * Unchanged content - your terminal's default color
316
+ ====
317
+
318
+ === Whitespace visualization
319
+
320
+ **Purpose**: Make invisible whitespace and special characters visible in
321
+ diffs.
322
+
323
+ **Scope**: Applied only to diff lines (additions, deletions, changes), not
324
+ context lines.
325
+
326
+ **Default characters visualized**:
327
+
328
+ * Regular space (U+0020) → `░` (Light Shade)
329
+ * Tab (U+0009) → `⇥` (Rightwards Arrow to Bar)
330
+ * Non-breaking space (U+00A0) → `␣` (Open Box)
331
+ * Line feed (U+000A) → `↵` (Downwards Arrow with Corner Leftwards)
332
+ * Zero-width space (U+200B) → `→` (Rightwards Arrow)
333
+
334
+ See link:CHARACTER_VISUALIZATION[Character visualization] for complete
335
+ character map and customization.
336
+
337
+ .Whitespace visualization example
338
+ [example]
339
+ ====
340
+ [source]
341
+ ----
342
+ # Space added between tags
343
+ 10| -| <tag>Value</tag> # No space
344
+ | 10+| <tag>░Value</tag> # Space added (green light shade)
345
+
346
+ # Tab character
347
+ 15| -| <tag>⇥Value</tag> # Tab (red arrow-to-bar)
348
+ | 15+| <tag>░░Value</tag> # Two spaces (green light shades)
349
+
350
+ # Non-breaking space
351
+ 20| -| <tag>Value</tag> # Regular space
352
+ | 20+| <tag>Value␣</tag> # Non-breaking space (green open box)
353
+ ----
354
+
355
+ Visualization makes invisible differences visible.
356
+ ====
357
+
358
+ === Non-ASCII detection
359
+
360
+ **Purpose**: Alert users when diffs contain non-ASCII characters that might
361
+ cause unexpected comparison failures.
362
+
363
+ **When shown**: When Canon detects non-ASCII characters (Unicode codepoint >
364
+ U+007F) in a diff block.
365
+
366
+ **Format**: Yellow warning with specific characters and Unicode codepoints.
367
+
368
+ .Non-ASCII warning example
369
+ [example]
370
+ ====
371
+ [source]
372
+ ----
373
+ Context block has 1 diff (line 10):
374
+ (WARNING: non-ASCII characters detected in diff: [' ' (U+00A0, shown as: ␣)])
375
+
376
+ 10| -| <p>Hello world</p> # U+0020 (regular space)
377
+ | 10+| <p>Hello␣world</p> # U+00A0 (non-breaking space)
378
+ ----
379
+
380
+ The warning shows:
381
+
382
+ * Which non-ASCII characters were found
383
+ * Their Unicode codepoints
384
+ * How they're visualized in the diff
385
+
386
+ **Common non-ASCII characters**:
387
+
388
+ * Non-breaking space (U+00A0) - from web copy-paste
389
+ * Em dash (U+2014) - from word processors
390
+ * Smart quotes (U+2018-U+201D) - from text editors
391
+ ====
392
+
393
+ == Configuration across interfaces
394
+
395
+ === Ruby API
396
+
397
+ [source,ruby]
398
+ ----
399
+ Canon::Comparison.equivalent?(doc1, doc2,
400
+ verbose: true,
401
+ diff: {
402
+ mode: :by_line,
403
+ use_color: true,
404
+ context_lines: 5,
405
+ grouping_lines: 10
406
+ }
407
+ )
408
+ ----
409
+
410
+ === CLI
411
+
412
+ [source,bash]
413
+ ----
414
+ $ canon diff file1.xml file2.xml \
415
+ --verbose \
416
+ --by-line \
417
+ --color \
418
+ --context-lines 5 \
419
+ --diff-grouping-lines 10
420
+ ----
421
+
422
+ === RSpec
423
+
424
+ [source,ruby]
425
+ ----
426
+ Canon::RSpecMatchers.configure do |config|
427
+ # Format-specific configuration
428
+ config.xml.diff.mode = :by_line
429
+ config.xml.diff.use_color = true
430
+ config.xml.diff.context_lines = 5
431
+ config.xml.diff.grouping_lines = 10
432
+
433
+ config.html.diff.mode = :by_line
434
+ config.html.diff.use_color = true
435
+
436
+ config.json.diff.mode = :by_object
437
+ config.json.diff.context_lines = 3
438
+ end
439
+ ----
440
+
441
+ == Combining formatting options
442
+
443
+ .Optimal settings for different scenarios
444
+ [example]
445
+ ====
446
+ **Interactive terminal review**:
447
+
448
+ [source,ruby]
449
+ ----
450
+ diff: {
451
+ use_color: true,
452
+ context_lines: 5, # More context
453
+ grouping_lines: 10 # Group nearby changes
454
+ }
455
+ ----
456
+
457
+ **CI/automated testing**:
458
+
459
+ [source,ruby]
460
+ ----
461
+ diff: {
462
+ use_color: false, # No ANSI codes
463
+ context_lines: 3, # Standard context
464
+ grouping_lines: nil # No grouping
465
+ }
466
+ ----
467
+
468
+ **Minimal diff output**:
469
+
470
+ [source,ruby]
471
+ ----
472
+ diff: {
473
+ use_color: false,
474
+ context_lines: 0, # Only changes
475
+ grouping_lines: nil
476
+ }
477
+ ----
478
+
479
+ **Maximum detail**:
480
+
481
+ [source,ruby]
482
+ ----
483
+ diff: {
484
+ use_color: true,
485
+ context_lines: 10, # Lots of context
486
+ grouping_lines: 5, # Group close changes
487
+ mode: :by_line # Line-level detail
488
+ }
489
+ ----
490
+ ====
491
+
492
+ == Troubleshooting
493
+
494
+ === Colors not showing
495
+
496
+ **Problem**: Diff output shows escape codes instead of colors.
497
+
498
+ **Solutions**:
499
+
500
+ * Ensure terminal supports ANSI colors
501
+ * Check if output is being piped (colors auto-disabled)
502
+ * Manually disable: `use_color: false`
503
+
504
+ === Too much context
505
+
506
+ **Problem**: Diff shows too many unchanged lines.
507
+
508
+ **Solution**: Reduce `context_lines`:
509
+
510
+ [source,ruby]
511
+ ----
512
+ diff: { context_lines: 1 }
513
+ ----
514
+
515
+ === Changes too scattered
516
+
517
+ **Problem**: Many small separate diff blocks.
518
+
519
+ **Solution**: Use `grouping_lines`:
520
+
521
+ [source,ruby]
522
+ ----
523
+ diff: { grouping_lines: 10 }
524
+ ----
525
+
526
+ === Whitespace not visible
527
+
528
+ **Problem**: Can't see whitespace differences.
529
+
530
+ **Solution**: Ensure `use_color: true` (whitespace visualization requires
531
+ colors).
532
+
533
+ == See also
534
+
535
+ * link:MODES[Diff modes]
536
+ * link:CHARACTER_VISUALIZATION[Character visualization]
537
+ * link:MATCH_ARCHITECTURE[Match architecture]
538
+ * link:RUBY_API[Ruby API documentation]
539
+ * link:CLI[Command-line interface]
540
+ * link:RSPEC[RSpec matchers]