canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
data/docs/CLI.adoc ADDED
@@ -0,0 +1,493 @@
1
+ ---
2
+ layout: default
3
+ title: Command-Line Interface
4
+ nav_order: 11
5
+ parent: Basic Usage
6
+ ---
7
+ = Canon command-line interface
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == Scope
12
+
13
+ This document describes Canon's command-line interface (CLI). The `canon`
14
+ command provides file formatting and comparison capabilities.
15
+
16
+ For Ruby API usage, see link:RUBY_API[Ruby API documentation].
17
+
18
+ For RSpec testing, see link:RSPEC[RSpec documentation].
19
+
20
+ == General
21
+
22
+ After installing the Canon gem, the `canon` command becomes available in your
23
+ shell. It provides two main commands:
24
+
25
+ * `canon format` - Format files in XML, HTML, JSON, or YAML
26
+ * `canon diff` - Compare files semantically
27
+
28
+ == Installation
29
+
30
+ [source,bash]
31
+ ----
32
+ $ gem install canon
33
+ $ canon --help
34
+ ----
35
+
36
+ == Format command
37
+
38
+ The `format` command formats files in canonical or pretty-print mode.
39
+
40
+ === Syntax
41
+
42
+ [source,bash]
43
+ ----
44
+ canon format FILE [OPTIONS]
45
+ ----
46
+
47
+ === Output modes
48
+
49
+ `pretty` (default):: Human-readable output with indentation (2 spaces)
50
+ `c14n`:: Canonical form without indentation (compact)
51
+
52
+ === Options
53
+
54
+ `-f, --format FORMAT`:: Specify format: `xml`, `html`, `json`, or `yaml`
55
+ (auto-detected from extension if not specified)
56
+
57
+ `-m, --mode MODE`:: Output mode: `pretty` (default) or `c14n`
58
+
59
+ `-i, --indent N`:: Indentation spaces for pretty mode (default: 2)
60
+
61
+ `--indent-type TYPE`:: Indentation type: `space` (default) or `tab`
62
+
63
+ `-o, --output FILE`:: Write output to file instead of stdout
64
+
65
+ `-c, --with-comments`:: Include comments in canonical XML output
66
+
67
+ === Format detection
68
+
69
+ When `--format` is not specified, Canon detects the format from file extension:
70
+
71
+ [cols="1,1"]
72
+ |===
73
+ |File Extension |Detected Format
74
+
75
+ |`.xml`
76
+ |XML
77
+
78
+ |`.html`, `.htm`
79
+ |HTML
80
+
81
+ |`.json`
82
+ |JSON
83
+
84
+ |`.yaml`, `.yml`
85
+ |YAML
86
+ |===
87
+
88
+ === Examples
89
+
90
+ .Pretty-print with default settings
91
+ [example]
92
+ ====
93
+ [source,bash]
94
+ ----
95
+ $ canon format input.xml
96
+ <?xml version="1.0" encoding="UTF-8"?>
97
+ <root>
98
+ <a>1</a>
99
+ <b>2</b>
100
+ </root>
101
+ ----
102
+ ====
103
+
104
+ .Canonical mode (compact)
105
+ [example]
106
+ ====
107
+ [source,bash]
108
+ ----
109
+ $ canon format input.xml --mode c14n
110
+ <root><a>1</a><b>2</b></root>
111
+ ----
112
+ ====
113
+
114
+ .Custom indentation
115
+ [example]
116
+ ====
117
+ [source,bash]
118
+ ----
119
+ # 4-space indentation
120
+ $ canon format input.xml --mode pretty --indent 4
121
+
122
+ # Tab indentation
123
+ $ canon format input.xml --indent-type tab
124
+
125
+ # JSON with 4 spaces
126
+ $ canon format data.json --indent 4
127
+ ----
128
+ ====
129
+
130
+ .Specify format explicitly
131
+ [example]
132
+ ====
133
+ [source,bash]
134
+ ----
135
+ # Format a .txt file as XML
136
+ $ canon format data.txt --format xml
137
+ ----
138
+ ====
139
+
140
+ .Save to file
141
+ [example]
142
+ ====
143
+ [source,bash]
144
+ ----
145
+ $ canon format input.xml --output formatted.xml
146
+
147
+ # Or use shell redirection
148
+ $ canon format input.xml > formatted.xml
149
+ ----
150
+ ====
151
+
152
+ .Include XML comments in canonical output
153
+ [example]
154
+ ====
155
+ [source,bash]
156
+ ----
157
+ $ canon format doc.xml --mode c14n --with-comments
158
+ ----
159
+ ====
160
+
161
+ .Format different file types
162
+ [example]
163
+ ====
164
+ [source,bash]
165
+ ----
166
+ # HTML files
167
+ $ canon format page.html
168
+ $ canon format page.html --mode c14n
169
+
170
+ # JSON files
171
+ $ canon format config.json
172
+ $ canon format config.json --indent 4
173
+
174
+ # YAML files
175
+ $ canon format data.yaml
176
+ ----
177
+ ====
178
+
179
+ == Diff command
180
+
181
+ The `diff` command performs semantic comparison of files.
182
+
183
+ === Syntax
184
+
185
+ [source,bash]
186
+ ----
187
+ canon diff FILE1 FILE2 [OPTIONS]
188
+ ----
189
+
190
+ === Diff modes
191
+
192
+ Canon supports two diff modes optimized for different use cases:
193
+
194
+ `by-object`:: (default for JSON/YAML) Semantic tree-based diff showing
195
+ structural changes
196
+
197
+ `by-line`:: (default for HTML, optional for XML) Line-by-line diff after
198
+ canonicalization
199
+
200
+ See link:MODES[Diff modes] for details.
201
+
202
+ === Format options
203
+
204
+ `-f, --format FORMAT`:: Format for both files: `xml`, `html`, `json`, or
205
+ `yaml` (auto-detected from extension if not specified)
206
+
207
+ `--format1 FORMAT`:: Format of first file (when comparing different formats)
208
+
209
+ `--format2 FORMAT`:: Format of second file (when comparing different formats)
210
+
211
+ === Comparison options
212
+
213
+ `-v, --verbose`:: Show detailed differences (default: just show if files
214
+ differ)
215
+
216
+ `--by-line`:: Use line-by-line diff for XML (default: by-object mode)
217
+
218
+ `--text-content BEHAVIOR`:: How to compare text content: `strict`,
219
+ `normalize`, or `ignore`
220
+
221
+ `--structural-whitespace BEHAVIOR`:: How to handle whitespace between
222
+ elements: `strict`, `normalize`, or `ignore`
223
+
224
+ `--attribute-whitespace BEHAVIOR`:: How to handle whitespace in attribute
225
+ values: `strict`, `normalize`, or `ignore` (XML/HTML only)
226
+
227
+ `--attribute-order BEHAVIOR`:: Whether attribute order matters: `strict` or
228
+ `ignore` (XML/HTML only)
229
+
230
+ `--attribute-values BEHAVIOR`:: How to compare attribute values: `strict`,
231
+ `normalize`, or `ignore` (XML/HTML only)
232
+
233
+ `--key-order BEHAVIOR`:: Whether key order matters: `strict` or `ignore`
234
+ (JSON/YAML only)
235
+
236
+ `--comments BEHAVIOR`:: How to handle comments: `strict`, `normalize`, or
237
+ `ignore`
238
+
239
+ `--match-profile PROFILE`:: Use predefined match profile: `strict`,
240
+ `rendered`, `spec_friendly`, or `content_only`
241
+
242
+ See link:MATCH_OPTIONS[Match options] for detailed dimension reference.
243
+
244
+ === Output options
245
+
246
+ `--color` / `--no-color`:: Enable/disable colored output (default: enabled)
247
+
248
+ `--context-lines N`:: Number of context lines around changes (default: 3)
249
+
250
+ `--diff-grouping-lines N`:: Group changes within N lines into blocks
251
+
252
+ See link:DIFF_FORMATTING[Diff formatting] for details.
253
+
254
+ === Exit codes
255
+
256
+ * `0` - Files are semantically equivalent
257
+ * `1` - Files are semantically different
258
+ * Other - Error occurred
259
+
260
+ === Examples
261
+
262
+ .Basic comparison
263
+ [example]
264
+ ====
265
+ [source,bash]
266
+ ----
267
+ # Compare two JSON files
268
+ $ canon diff config1.json config2.json
269
+ Files are semantically different
270
+
271
+ # Compare two XML files
272
+ $ canon diff file1.xml file2.xml
273
+ ✅ Files are semantically equivalent
274
+ ----
275
+ ====
276
+
277
+ .Verbose mode with detailed diff
278
+ [example]
279
+ ====
280
+ [source,bash]
281
+ ----
282
+ $ canon diff config1.json config2.json --verbose
283
+ Visual Diff:
284
+ ├── settings.debug:
285
+ │ ├── - true
286
+ │ └── + false
287
+ └── version:
288
+ ├── - "1.0.0"
289
+ └── + "2.0.0"
290
+ ----
291
+ ====
292
+
293
+ .XML comparison with by-line mode
294
+ [example]
295
+ ====
296
+ [source,bash]
297
+ ----
298
+ $ canon diff document1.xml document2.xml --by-line --verbose
299
+ Line-by-line diff:
300
+ 4 - | <foreword id="fwd">
301
+ 4 + | <foreword displayorder="2" id="fwd">
302
+ 5 | <p>First paragraph</p>
303
+ 10 + | <p>New content</p>
304
+ 11 | </clause>
305
+ ----
306
+ ====
307
+
308
+ .HTML comparison
309
+ [example]
310
+ ====
311
+ [source,bash]
312
+ ----
313
+ $ canon diff page1.html page2.html --verbose
314
+ Line-by-line diff:
315
+ 4 - | <title>My Page</title>
316
+ 4 + | <title>My Updated Page</title>
317
+ 7 - | <div class="header">
318
+ 7 + | <nav class="header">
319
+ ----
320
+ ====
321
+
322
+ .Using match profiles
323
+ [example]
324
+ ====
325
+ [source,bash]
326
+ ----
327
+ # Use spec_friendly profile
328
+ $ canon diff file1.xml file2.xml \
329
+ --match-profile spec_friendly \
330
+ --verbose
331
+
332
+ # Use rendered profile for HTML
333
+ $ canon diff page1.html page2.html \
334
+ --match-profile rendered \
335
+ --verbose
336
+
337
+ # Use strict profile (exact matching)
338
+ $ canon diff file1.xml file2.xml \
339
+ --match-profile strict \
340
+ --verbose
341
+ ----
342
+ ====
343
+
344
+ .Customize match dimensions
345
+ [example]
346
+ ====
347
+ [source,bash]
348
+ ----
349
+ # Normalize text content, ignore whitespace
350
+ $ canon diff file1.xml file2.xml \
351
+ --text-content normalize \
352
+ --structural-whitespace ignore \
353
+ --verbose
354
+
355
+ # Ignore comments and attribute order
356
+ $ canon diff file1.xml file2.xml \
357
+ --comments ignore \
358
+ --attribute-order ignore \
359
+ --verbose
360
+
361
+ # Multiple dimension overrides
362
+ $ canon diff file1.xml file2.xml \
363
+ --text-content normalize \
364
+ --structural-whitespace ignore \
365
+ --attribute-whitespace normalize \
366
+ --comments ignore \
367
+ --verbose
368
+ ----
369
+ ====
370
+
371
+ .Combine profile with dimension overrides
372
+ [example]
373
+ ====
374
+ [source,bash]
375
+ ----
376
+ # Use spec_friendly but require strict comments
377
+ $ canon diff file1.xml file2.xml \
378
+ --match-profile spec_friendly \
379
+ --comments strict \
380
+ --verbose
381
+ ----
382
+ ====
383
+
384
+ .Customize diff output
385
+ [example]
386
+ ====
387
+ [source,bash]
388
+ ----
389
+ # Show more context lines
390
+ $ canon diff file1.xml file2.xml \
391
+ --verbose \
392
+ --context-lines 5
393
+
394
+ # Group nearby changes
395
+ $ canon diff file1.xml file2.xml \
396
+ --verbose \
397
+ --diff-grouping-lines 10
398
+
399
+ # Disable colors for piping to files
400
+ $ canon diff file1.xml file2.xml \
401
+ --verbose \
402
+ --no-color > diff.txt
403
+
404
+ # Combine diff options
405
+ $ canon diff file1.xml file2.xml \
406
+ --verbose \
407
+ --context-lines 5 \
408
+ --diff-grouping-lines 2 \
409
+ --no-color
410
+ ----
411
+ ====
412
+
413
+ .Compare different formats
414
+ [example]
415
+ ====
416
+ [source,bash]
417
+ ----
418
+ # Compare JSON with YAML (must have same structure)
419
+ $ canon diff config.json config.yaml \
420
+ --format1 json \
421
+ --format2 yaml \
422
+ --verbose
423
+ ----
424
+ ====
425
+
426
+ .JSON/YAML comparison examples
427
+ [example]
428
+ ====
429
+ [source,bash]
430
+ ----
431
+ # JSON comparison (uses by-object mode by default)
432
+ $ canon diff config1.json config2.json --verbose
433
+
434
+ # YAML comparison with key order ignored
435
+ $ canon diff data1.yaml data2.yaml \
436
+ --key-order ignore \
437
+ --verbose
438
+
439
+ # Show 10 context lines for large config files
440
+ $ canon diff large-config1.json large-config2.json \
441
+ --verbose \
442
+ --context-lines 10
443
+ ----
444
+ ====
445
+
446
+ .Shell integration
447
+ [example]
448
+ ====
449
+ [source,bash]
450
+ ----
451
+ # Use in scripts
452
+ if canon diff expected.xml actual.xml; then
453
+ echo "Files match!"
454
+ else
455
+ echo "Files differ"
456
+ canon diff expected.xml actual.xml --verbose
457
+ fi
458
+
459
+ # Generate diff report
460
+ canon diff file1.xml file2.xml --verbose --no-color > diff-report.txt
461
+
462
+ # Compare with process substitution
463
+ canon diff <(curl https://example.com/api/v1) \
464
+ <(curl https://example.com/api/v2) \
465
+ --format json \
466
+ --verbose
467
+ ----
468
+ ====
469
+
470
+ == Help command
471
+
472
+ Get help on available commands and options:
473
+
474
+ [source,bash]
475
+ ----
476
+ # General help
477
+ $ canon help
478
+
479
+ # Command-specific help
480
+ $ canon help format
481
+ $ canon help diff
482
+
483
+ # Show version
484
+ $ canon --version
485
+ ----
486
+
487
+ == See also
488
+
489
+ * link:RUBY_API[Ruby API documentation]
490
+ * link:RSPEC[RSpec matchers]
491
+ * link:MATCH_OPTIONS[Match options reference]
492
+ * link:MODES[Diff modes]
493
+ * link:DIFF_FORMATTING[Diff formatting options]
@@ -0,0 +1,19 @@
1
+ ---
2
+ layout: default
3
+ title: Customizing Behavior
4
+ nav_order: 4
5
+ has_children: true
6
+ ---
7
+ = Customizing behavior
8
+
9
+ Configure Canon for your specific needs:
10
+
11
+ * **link:MATCH_OPTIONS[Match options]** - Match dimensions and profiles
12
+ * **link:PREPROCESSING[Preprocessing]** - Document normalization options
13
+ * **link:DIFF_FORMATTING[Diff formatting]** - Customizing diff output
14
+ * **link:INPUT_VALIDATION[Input validation]** - Error handling
15
+ * **link:CHARACTER_VISUALIZATION[Character visualization]** - Whitespace
16
+ visibility
17
+
18
+ These documents cover Canon's configuration options for fine-tuning comparison
19
+ behavior and diff output.