canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
data/docs/VERBOSE.adoc ADDED
@@ -0,0 +1,482 @@
1
+ ---
2
+ layout: default
3
+ title: Verbose Mode
4
+ nav_order: 40
5
+ parent: Advanced Topics
6
+ ---
7
+ = Canon verbose mode guide
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == General
12
+
13
+ Canon provides a two-tier verbose output architecture for debugging
14
+ comparison failures:
15
+
16
+ * **Semantic Diff Report**: Always shown in verbose mode - provides
17
+ actionable, dimension-specific details for each difference
18
+ * **CANON VERBOSE tables**: Extra detailed option tables shown only when
19
+ `CANON_VERBOSE=1` environment variable is set
20
+
21
+ This progressive disclosure ensures developers get useful information by
22
+ default, with additional debugging details available when needed.
23
+
24
+ == Architecture
25
+
26
+ The output architecture follows a clear three-tier structure:
27
+
28
+ [source]
29
+ ----
30
+ ╔═════════════════════════════════════════════════════════════════════╗
31
+ ║ CANON VERBOSE MODE OUTPUT ARCHITECTURE ║
32
+ ╚═════════════════════════════════════════════════════════════════════╝
33
+
34
+ When verbose: true is used:
35
+
36
+ ┌─────────────────────────────────────────────────────────────────────┐
37
+ │ TIER 1: CANON VERBOSE Tables (ONLY if CANON_VERBOSE=1) │
38
+ │ │
39
+ │ ┌────────────────────────────────────────────────────────────────┐ │
40
+ │ │ Match Options Table │ │
41
+ │ │ • Shows preprocessing behavior │ │
42
+ │ │ • Shows dimension behaviors (strict/normalize/ignore) │ │
43
+ │ │ • Explains what each setting means │ │
44
+ │ └────────────────────────────────────────────────────────────────┘ │
45
+ │ ┌────────────────────────────────────────────────────────────────┐ │
46
+ │ │ Formatter Options Table │ │
47
+ │ │ • Shows mode (by_line vs by_object) │ │
48
+ │ │ • Shows context_lines, diff_grouping_lines │ │
49
+ │ │ • Shows show_diffs filter setting │ │
50
+ │ └────────────────────────────────────────────────────────────────┘ │
51
+ │ ┌────────────────────────────────────────────────────────────────┐ │
52
+ │ │ Comparison Result Summary │ │
53
+ │ │ • Equivalent? (YES/NO) │ │
54
+ │ │ • Normative/Informative/Total diff counts │ │
55
+ │ └────────────────────────────────────────────────────────────────┘ │
56
+ └─────────────────────────────────────────────────────────────────────┘
57
+
58
+
59
+ ┌─────────────────────────────────────────────────────────────────────┐
60
+ │ TIER 2: Semantic Diff Report (ALWAYS if diffs exist) │
61
+ │ │
62
+ │ For each difference: │
63
+ │ • XPath location (e.g., /html/body/div/table/pre/text) │
64
+ │ • Dimension classification (attribute_presence, text_content) │
65
+ │ • Specific changes (Added: +xmlns:v, +xmlns:o) │
66
+ │ • Normative/Informative status │
67
+ │ • Dimension-specific formatting │
68
+ └─────────────────────────────────────────────────────────────────────┘
69
+
70
+
71
+ ┌─────────────────────────────────────────────────────────────────────┐
72
+ │ TIER 3: Detailed Diff (ALWAYS) │
73
+ │ │
74
+ │ Either: │
75
+ │ • Line-by-line diff (for HTML, or with --by-line flag) │
76
+ │ • Object tree diff (for XML/JSON/YAML by default) │
77
+ └─────────────────────────────────────────────────────────────────────┘
78
+ ----
79
+
80
+ === Output flow
81
+
82
+ The `DiffFormatter.format_comparison_result()` method orchestrates the
83
+ output:
84
+
85
+ . Check if `CANON_VERBOSE=1` → Render option tables
86
+ . Check if differences exist → Render Semantic Diff Report
87
+ . Always render detailed diff (by-line or by-object)
88
+
89
+ == Semantic diff report
90
+
91
+ === General
92
+
93
+ The Semantic Diff Report is the core verbose output, always shown when
94
+ differences exist. It provides dimension-specific, actionable details for
95
+ each difference.
96
+
97
+ Unlike the detailed diff (which shows every changed line), the Semantic
98
+ Diff Report shows a high-level summary of WHAT changed and WHY it matters.
99
+
100
+ === Output format
101
+
102
+ [example]
103
+ ====
104
+ [source]
105
+ ----
106
+ ======================================================================
107
+ SEMANTIC DIFF REPORT (1 difference)
108
+ ======================================================================
109
+
110
+ 🔍 DIFFERENCE #1/1 [NORMATIVE]
111
+ ──────────────────────────────────────────────────────────────────────
112
+ Dimension: attribute_presence
113
+ Location: /html
114
+
115
+ ⊖ Expected (File 1):
116
+ <html> with 2 attributes: lang, xmlns:epub
117
+
118
+ ⊕ Actual (File 2):
119
+ <html> with 6 attributes: lang, xmlns:epub, xmlns:m, xmlns:o,
120
+ xmlns:v, xmlns:w
121
+
122
+ ✨ Changes:
123
+ Added: +xmlns:m, +xmlns:o, +xmlns:v, +xmlns:w
124
+
125
+ ======================================================================
126
+ ----
127
+ ====
128
+
129
+ === Format structure
130
+
131
+ Each difference displays:
132
+
133
+ * **Status indicator**: `[NORMATIVE]` (green) or `[INFORMATIVE]` (yellow)
134
+ * **Dimension**: Which aspect differs (colorized in magenta)
135
+ * **Location**: XPath for XML/HTML, path for JSON/YAML (colorized in blue)
136
+ * **Expected section**: What was in File 1 (red heading, bold)
137
+ * **Actual section**: What was in File 2 (green heading, bold)
138
+ * **Changes summary**: Actionable description of the difference (yellow,
139
+ bold)
140
+
141
+ === Dimension-specific formats
142
+
143
+ ==== Attribute presence differences
144
+
145
+ For missing or extra attributes:
146
+
147
+ [example]
148
+ ====
149
+ [source]
150
+ ----
151
+ Dimension: attribute_presence
152
+ Location: /html/body/p
153
+
154
+ ⊖ Expected: <p> with 2 attributes: id, lang
155
+ ⊕ Actual: <p> with 4 attributes: id, lang, data-value, aria-label
156
+
157
+ ✨ Changes: Added: +data-value, +aria-label
158
+ ----
159
+ ====
160
+
161
+ Shows:
162
+
163
+ * Element name (`<p>`)
164
+ * How many attributes each has
165
+ * Which attributes were added (green with `+` prefix) or removed (red with
166
+ `-` prefix)
167
+
168
+ ==== Attribute value differences
169
+
170
+ For differing attribute values:
171
+
172
+ [example]
173
+ ====
174
+ [source]
175
+ ----
176
+ Dimension: attribute_values
177
+ Location: /html/body/div
178
+
179
+ ⊖ Expected: <div> class=" container fluid "
180
+ ⊕ Actual: <div> class="container fluid"
181
+
182
+ ✨ Changes: Whitespace normalization difference
183
+ ----
184
+ ====
185
+
186
+ Shows:
187
+
188
+ * Which specific attribute differs (highlighted in cyan)
189
+ * Exact values on both sides
190
+ * Analysis: "Whitespace difference only", "Whitespace normalization
191
+ difference", or "Value changed"
192
+
193
+ ==== Text content differences
194
+
195
+ For text that differs:
196
+
197
+ [example]
198
+ ====
199
+ [source]
200
+ ----
201
+ Dimension: text_content
202
+ Location: /html/body/div/table/tbody/tr/td/pre/text
203
+
204
+ ⊖ Expected: <text> "
205
+ puts \"Hello, world.\"
206
+ "
207
+ ⊕ Actual: <text> "puts \"Hello, world.\" "
208
+
209
+ ✨ Changes: ⚠️ Whitespace preserved (inside <pre>, <code>, etc. -
210
+ whitespace is significant)
211
+ ----
212
+ ====
213
+
214
+ Shows:
215
+
216
+ * Text preview (truncated at 100 characters)
217
+ * Special warning if inside `<pre>`, `<code>`, `<textarea>`, `<script>`,
218
+ or `<style>` elements (where whitespace is significant)
219
+
220
+ ==== Structural whitespace differences
221
+
222
+ For whitespace-only differences (usually informative):
223
+
224
+ [example]
225
+ ====
226
+ [source]
227
+ ----
228
+ Dimension: structural_whitespace
229
+ Location: /root/p
230
+
231
+ ⊖ Expected: <p> "hello␣␣world"
232
+ ⊕ Actual: <p> "hello␣world"
233
+
234
+ ✨ Changes: Whitespace-only difference (informative)
235
+ ----
236
+ ====
237
+
238
+ Shows:
239
+
240
+ * Whitespace visualized: `␣` for space, `→` for tab, `↵` for newline
241
+ * Marked as `[INFORMATIVE]` (yellow)
242
+
243
+ ==== JSON/YAML differences
244
+
245
+ For JSON/YAML path-based differences:
246
+
247
+ [example]
248
+ ====
249
+ [source]
250
+ ----
251
+ Dimension: 15
252
+ Location: user.email
253
+
254
+ ⊖ Expected: user.email = "alice@example.com"
255
+ ⊕ Actual: user.email = "bob@example.com"
256
+
257
+ ✨ Changes: Value changed
258
+ ----
259
+ ====
260
+
261
+ == CANON VERBOSE mode
262
+
263
+ === General
264
+
265
+ CANON VERBOSE mode adds detailed option tables BEFORE the Semantic Diff
266
+ Report. These tables help understand:
267
+
268
+ * What match options are in effect
269
+ * How the diff formatter is configured
270
+ * Statistics about the comparison result
271
+
272
+ To enable, set the `CANON_VERBOSE` environment variable:
273
+
274
+ [source,bash]
275
+ ----
276
+ CANON_VERBOSE=1 bundle exec rspec spec/my_failing_spec.rb:123
277
+ ----
278
+
279
+ === Match options table
280
+
281
+ Shows preprocessing and dimension behaviors:
282
+
283
+ [example]
284
+ ====
285
+ [source]
286
+ ----
287
+ ╭────────────────────────────────────────────────────────────────────╮
288
+ │ Match Options (HTML) │
289
+ ├────────────────────┬───────────┬────────────────────────────────────┤
290
+ │ Dimension │ Behavior │ Meaning │
291
+ ├────────────────────┼───────────┼────────────────────────────────────┤
292
+ │ preprocessing │ rendered │ As browser-rendered (compacted wh… │
293
+ │ text_content │ normalize │ Normalized then compared (normative… │
294
+ │ structural_whit… │ ignore │ Differences IGNORED (informative) │
295
+ │ attribute_presence │ strict │ Must match exactly (normative) │
296
+ │ attribute_values │ normalize │ Normalized then compared (normative… │
297
+ │ comments │ ignore │ Differences IGNORED (informative) │
298
+ ╰────────────────────┴───────────┴────────────────────────────────────╯
299
+ ----
300
+ ====
301
+
302
+ Preprocessing behaviors:
303
+
304
+ * `:none` - No preprocessing (compare as-is)
305
+ * `:c14n` - Canonicalize (XML C14N normalization)
306
+ * `:normalize` - Normalize (collapse whitespace, trim lines)
307
+ * `:format` - Pretty-format (consistent indentation)
308
+ * `:rendered` - As browser-rendered (compacted whitespace, to_html)
309
+
310
+ Dimension behaviors:
311
+
312
+ * `:ignore` - Differences IGNORED (innormative, won't fail test)
313
+ * `:strict` - Must match exactly (normative, will fail test)
314
+ * `:normalize` - Normalized then compared (normative if different after
315
+ normalization)
316
+ * `:strip` - Strip leading/trailing whitespace only
317
+ * `:compact` - Collapse whitespace runs to single space
318
+
319
+ === Formatter options table
320
+
321
+ Shows diff formatting settings:
322
+
323
+ [example]
324
+ ====
325
+ [source]
326
+ ----
327
+ ╭────────────────────────────────────────────────────────────────────╮
328
+ │ Formatter Options │
329
+ ├─────────────────────┬─────────┬────────────────────────────────────┤
330
+ │ Option │ Value │ Impact │
331
+ ├─────────────────────┼─────────┼─────────────────────────────────────┤
332
+ │ mode │ by_line │ Line-by-line diff │
333
+ │ context_lines │ 3 │ 3 lines of context around diffs │
334
+ │ show_diffs │ all │ Show all diffs (normative + informative) │
335
+ ╰─────────────────────┴─────────┴────────────────────────────────────╯
336
+ ----
337
+ ====
338
+
339
+ === Comparison result summary
340
+
341
+ Shows diff statistics:
342
+
343
+ [example]
344
+ ====
345
+ [source]
346
+ ----
347
+ ╭─────────────────────────────────────────────────────────────────────╮
348
+ │ Comparison Result Summary │
349
+ ├────────────────┬─────────┬──────────────────────────────────────────┤
350
+ │ Equivalent? │ ✗ NO │ Documents have semantic differences │
351
+ │ Normative Diffs │ 1 diffs │ Semantic differences that matter │
352
+ │ Informative Diffs │ 0 │ Textual/formatting differences (ignored) │
353
+ │ Total Diffs │ 1 │ All differences found │
354
+ ╰────────────────┴─────────┴──────────────────────────────────────────╯
355
+ ----
356
+ ====
357
+
358
+ == Usage
359
+
360
+ === Using in RSpec matchers
361
+
362
+ Verbose mode is activated by using `verbose: true` in the comparison:
363
+
364
+ [source,ruby]
365
+ ----
366
+ result = Canon::Comparison::XmlComparator.equivalent?(
367
+ xml1,
368
+ xml2,
369
+ verbose: true
370
+ )
371
+ # Returns ComparisonResult object
372
+ # Semantic Diff Report shown if differences exist
373
+ ----
374
+
375
+ With RSpec matchers, verbose mode is automatic on test failure:
376
+
377
+ [source,ruby]
378
+ ----
379
+ # Semantic Diff Report automatically shown on failure
380
+ expect(actual_html).to be_html4_equivalent_to(expected_html)
381
+ ----
382
+
383
+ To enable CANON VERBOSE tables:
384
+
385
+ [source,bash]
386
+ ----
387
+ CANON_VERBOSE=1 bundle exec rspec spec/my_spec.rb:123
388
+ ----
389
+
390
+ === Using via CLI
391
+
392
+ [source,bash]
393
+ ----
394
+ # Verbose mode (shows Semantic Diff Report)
395
+ canon diff file1.xml file2.xml --verbose
396
+
397
+ # With CANON VERBOSE tables
398
+ CANON_VERBOSE=1 canon diff file1.xml file2.xml --verbose
399
+ ----
400
+
401
+ === Configuration
402
+
403
+ You can enable CANON VERBOSE mode permanently for a project:
404
+
405
+ [source,ruby]
406
+ ----
407
+ # In spec/spec_helper.rb
408
+ ENV['CANON_VERBOSE'] = '1' if ENV['DEBUG']
409
+
410
+ # Or in your test
411
+ before(:each) do
412
+ ENV['CANON_VERBOSE'] = '1'
413
+ end
414
+ ----
415
+
416
+ == Implementation
417
+
418
+ === DiffDetailFormatter module
419
+
420
+ Location: `lib/canon/diff_formatter/diff_detail_formatter.rb`
421
+
422
+ Responsible for:
423
+
424
+ * Formatting the Semantic Diff Report
425
+ * Dispatching to dimension-specific formatters
426
+ * Extracting XPath/JSON paths
427
+ * Detecting whitespace-preserving elements (`<pre>`, `<code>`, etc.)
428
+ * Colorizing output
429
+
430
+ Key methods:
431
+
432
+ * `format_report(differences)` - Main entry point
433
+ * `format_attribute_presence_details()` - Format attribute presence diffs
434
+ * `format_attribute_values_details()` - Format attribute value diffs
435
+ * `format_text_content_details()` - Format text content diffs
436
+ * `extract_xpath(node)` - Extract XPath with safety limits
437
+ * `inside_preserve_element?(node)` - Detect whitespace preservation
438
+
439
+ === DebugOutput module
440
+
441
+ Location: `lib/canon/diff_formatter/debug_output.rb`
442
+
443
+ Responsible for:
444
+
445
+ * Rendering CANON VERBOSE option tables
446
+ * Checking if `CANON_VERBOSE=1` is set
447
+ * Formatting match options with descriptions
448
+ * Formatting formatter options with impact
449
+ * Formatting comparison summary statistics
450
+
451
+ Key methods:
452
+
453
+ * `verbose_tables_only()` - Returns CANON VERBOSE tables or empty string
454
+ * `format_match_options_table()` - Render match options as table
455
+ * `format_formatter_options_table()` - Render formatter options as table
456
+ * `format_comparison_summary()` - Render result summary as table
457
+
458
+ === DiffFormatter integration
459
+
460
+ Location: `lib/canon/diff_formatter.rb`
461
+
462
+ The `format_comparison_result()` method orchestrates output:
463
+
464
+ [source,ruby]
465
+ ----
466
+ def format_comparison_result(comparison_result, expected, actual)
467
+ output = []
468
+
469
+ # 1. CANON VERBOSE tables (ONLY if CANON_VERBOSE=1)
470
+ output << DebugOutput.verbose_tables_only(...)
471
+
472
+ # 2. Semantic Diff Report (ALWAYS if diffs exist)
473
+ output << DiffDetailFormatter.format_report(...)
474
+
475
+ # 3. Detailed diff (ALWAYS)
476
+ output << format(differences, ...)
477
+
478
+ output.compact.join("\n")
479
+ end
480
+ ----
481
+
482
+ This ensures the correct output order and separation of concerns.
data/exe/canon ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "canon"
5
+ require "canon/cli"
6
+
7
+ Canon::Cli.start(ARGV)
data/lib/canon/cli.rb ADDED
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require_relative "commands/format_command"
5
+ require_relative "commands/diff_command"
6
+
7
+ module Canon
8
+ # Command-line interface for Canon
9
+ class Cli < Thor
10
+ def self.exit_on_failure?
11
+ true
12
+ end
13
+
14
+ desc "format FILE",
15
+ "Canonicalize or pretty-print a file (XML, JSON, or YAML)"
16
+ long_desc <<~DESC
17
+ Canonicalize or pretty-print a file in XML, JSON, or YAML format.
18
+
19
+ The format is auto-detected from the file extension (.xml, .json, .yaml, .yml),
20
+ or can be explicitly specified with --format.
21
+
22
+ Mode options:
23
+ - pretty (default): Pretty-printed with indentation
24
+ - c14n: Canonical XML 1.1 for XML, canonical form for JSON/YAML/HTML
25
+
26
+ Examples:
27
+
28
+ $ canon format input.xml
29
+ $ canon format input.xml --mode pretty --indent 4
30
+ $ canon format input.json --output output.json
31
+ $ canon format data.xml --with-comments
32
+ $ canon format file.txt --format xml
33
+ DESC
34
+ method_option :format,
35
+ aliases: "-f",
36
+ type: :string,
37
+ enum: %w[xml json yaml html],
38
+ desc: "Format type (xml, json, yaml, or html)"
39
+ method_option :mode,
40
+ aliases: "-m",
41
+ type: :string,
42
+ enum: %w[c14n pretty],
43
+ default: "pretty",
44
+ desc: "Output mode: c14n (canonical) or pretty (indented)"
45
+ method_option :indent,
46
+ aliases: "-i",
47
+ type: :numeric,
48
+ default: 2,
49
+ desc: "Indentation amount for pretty mode (default: 2)"
50
+ method_option :indent_type,
51
+ type: :string,
52
+ enum: %w[space tab],
53
+ default: "space",
54
+ desc: "Indentation type: space or tab (default: space)"
55
+ method_option :output,
56
+ aliases: "-o",
57
+ type: :string,
58
+ desc: "Output file (default: stdout)"
59
+ method_option :with_comments,
60
+ aliases: "-c",
61
+ type: :boolean,
62
+ default: false,
63
+ desc: "Include comments in canonical XML output"
64
+ def format(file)
65
+ Commands::FormatCommand.new(options).run(file)
66
+ end
67
+
68
+ desc "diff FILE1 FILE2", "Compare two files semantically"
69
+ long_desc <<~DESC
70
+ Compare two files using semantic comparison (not text-based line diffs).
71
+
72
+ Supports XML, HTML, JSON, and YAML formats with intelligent structural
73
+ comparison. The format is auto-detected from file extensions, or can be
74
+ explicitly specified with --format (for both files) or --format1 and
75
+ --format2 (for different formats).
76
+
77
+ Match Profiles:
78
+ - strict: Exact matching (all whitespace significant)
79
+ - rendered: Mimics browser/CSS rendering (HTML default)
80
+ - spec_friendly: Ignores formatting differences (test-friendly)
81
+ - content_only: Ignores all structural differences
82
+
83
+ Preprocessing Options:
84
+ - none: No preprocessing (default)
85
+ - c14n: Canonicalize before comparison
86
+ - normalize: Normalize whitespace before comparison
87
+ - format: Pretty-print before comparison
88
+
89
+ Examples:
90
+
91
+ # Basic semantic comparison (uses format defaults)
92
+ $ canon diff file1.xml file2.xml
93
+
94
+ # Use match profile for test-friendly comparison
95
+ $ canon diff file1.xml file2.xml --match-profile spec_friendly
96
+
97
+ # Preprocess with normalization, then compare
98
+ $ canon diff file1.xml file2.xml --preprocessing normalize
99
+
100
+ # Match text content flexibly but keep structural whitespace strict
101
+ $ canon diff file1.xml file2.xml --text-content normalize --structural-whitespace strict
102
+
103
+ # Verbose mode with detailed differences
104
+ $ canon diff file1.json file2.json --verbose
105
+
106
+ # Compare different formats (same structure)
107
+ $ canon diff config.json config.yaml --format1 json --format2 yaml
108
+
109
+ # Disable color output
110
+ $ canon diff file1.xml file2.xml --no-color
111
+ DESC
112
+ method_option :format,
113
+ aliases: "-f",
114
+ type: :string,
115
+ enum: %w[xml html json yaml],
116
+ desc: "Format type for both files"
117
+ method_option :format1,
118
+ type: :string,
119
+ enum: %w[xml html json yaml],
120
+ desc: "Format type for first file"
121
+ method_option :format2,
122
+ type: :string,
123
+ enum: %w[xml html json yaml],
124
+ desc: "Format type for second file"
125
+ method_option :color,
126
+ type: :boolean,
127
+ default: true,
128
+ desc: "Colorize diff output"
129
+ method_option :verbose,
130
+ aliases: "-v",
131
+ type: :boolean,
132
+ default: false,
133
+ desc: "Show detailed differences"
134
+ method_option :by_line,
135
+ type: :boolean,
136
+ default: false,
137
+ desc: "Use line-by-line diff for XML (default: by-object)"
138
+ # New match options
139
+ method_option :match_profile,
140
+ aliases: "-p",
141
+ type: :string,
142
+ enum: %w[strict rendered spec_friendly content_only],
143
+ desc: "Match profile: strict, rendered, spec_friendly, or content_only"
144
+ method_option :preprocessing,
145
+ type: :string,
146
+ enum: %w[none c14n normalize format],
147
+ desc: "Preprocessing: none, c14n, normalize, or format"
148
+ method_option :text_content,
149
+ type: :string,
150
+ enum: %w[strict normalize ignore],
151
+ desc: "Text content matching: strict, normalize, or ignore"
152
+ method_option :structural_whitespace,
153
+ type: :string,
154
+ enum: %w[strict normalize ignore],
155
+ desc: "Structural whitespace matching: strict, normalize, or ignore"
156
+ method_option :attribute_whitespace,
157
+ type: :string,
158
+ enum: %w[strict normalize ignore],
159
+ desc: "Attribute whitespace matching (XML/HTML only): strict, normalize, or ignore"
160
+ method_option :key_order,
161
+ type: :string,
162
+ enum: %w[strict ignore],
163
+ desc: "Key ordering (JSON/YAML only): strict or ignore"
164
+ method_option :comments,
165
+ type: :string,
166
+ enum: %w[strict normalize ignore],
167
+ desc: "Comment matching: strict, normalize, or ignore"
168
+ method_option :context_lines,
169
+ type: :numeric,
170
+ default: 3,
171
+ desc: "Number of context lines around changes (default: 3)"
172
+ method_option :diff_grouping_lines,
173
+ type: :numeric,
174
+ desc: "Group diffs within N lines into context blocks (default: no grouping)"
175
+ def diff(file1, file2)
176
+ Commands::DiffCommand.new(options).run(file1, file2)
177
+ end
178
+ end
179
+ end