canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
data/docs/MODES.adoc ADDED
@@ -0,0 +1,432 @@
1
+ ---
2
+ layout: default
3
+ title: Diff Modes
4
+ nav_order: 21
5
+ parent: Understanding Canon
6
+ ---
7
+ = Diff modes
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == Scope
12
+
13
+ This document explains Canon's two diff modes: by-line and by-object. Each
14
+ mode is optimized for different use cases and document formats.
15
+
16
+ For usage examples, see link:RUBY_API[Ruby API], link:CLI[CLI], or
17
+ link:RSPEC[RSpec documentation].
18
+
19
+ == General
20
+
21
+ Canon provides two distinct diff modes that present differences in different
22
+ ways:
23
+
24
+ * **by-line**: Traditional line-by-line diff after canonicalization
25
+ * **by-object**: Tree-based semantic diff showing structural changes
26
+
27
+ The choice of mode affects how differences are calculated, displayed, and
28
+ what information is emphasized.
29
+
30
+ == by-line mode
31
+
32
+ === Purpose
33
+
34
+ Show differences as traditional line-by-line changes, similar to `diff` or
35
+ `git diff`. Best for reviewing textual changes and understanding exact line
36
+ modifications.
37
+
38
+ === How it works
39
+
40
+ . Canonicalize both documents
41
+ . Perform line-by-line comparison using Longest Common Subsequence (LCS)
42
+ . For XML: Apply DOM-guided semantic matching to pair corresponding elements
43
+ . Display with line numbers, diff markers, and context
44
+
45
+ === When to use
46
+
47
+ **Best for:**
48
+
49
+ * HTML documents where line-level changes matter
50
+ * Reviewing markup structure changes
51
+ * Understanding exact textual differences
52
+ * When you need full document context
53
+ * Debugging formatting issues
54
+
55
+ **Default for:**
56
+
57
+ * HTML format (always uses by-line mode)
58
+ * XML format when `--by-line` flag is specified
59
+
60
+ === Output format
61
+
62
+ [source]
63
+ ----
64
+ 4| - | <foreword id="fwd">
65
+ 4| + | <foreword displayorder="2" id="fwd">
66
+ 5| | <p>First paragraph</p>
67
+ ...
68
+ 10| + | <p>New content</p>
69
+ 11| | </clause>
70
+ ----
71
+
72
+ Where:
73
+
74
+ * Left column: Line number in first document
75
+ * Middle column: Line number in second document
76
+ * Marker: `-` (removed), `+` (added), ` ` (unchanged)
77
+ * Right column: Line content
78
+
79
+ === Features
80
+
81
+ **DOM-guided semantic matching** (XML only)::
82
+ Elements are matched by identity attributes (`id`, `ref`, `name`, `key`) and
83
+ element paths, ensuring corresponding elements are compared even when at
84
+ different line positions.
85
+
86
+ **Token-level highlighting**::
87
+ Within changed lines, individual tokens (element names, attribute names,
88
+ values) are highlighted to show exactly what changed.
89
+
90
+ **Context lines**::
91
+ Shows unchanged lines around changes for context (configurable via
92
+ `context_lines`).
93
+
94
+ **Whitespace visualization**::
95
+ Makes invisible characters visible when `use_color` is enabled.
96
+
97
+ .by-line mode example
98
+ [example]
99
+ ====
100
+ [source,ruby]
101
+ ----
102
+ # Ruby API - XML with by-line mode
103
+ result = Canon::Comparison.equivalent?(xml1, xml2,
104
+ verbose: true,
105
+ diff: { mode: :by_line }
106
+ )
107
+
108
+ # CLI
109
+ $ canon diff document1.xml document2.xml --by-line --verbose
110
+ ----
111
+
112
+ Output shows line-by-line changes with full context:
113
+
114
+ [source]
115
+ ----
116
+ 1| | <?xml version="1.0" encoding="UTF-8"?>
117
+ 2| | <document>
118
+ 3| | <preface>
119
+ 4| - | <foreword id="fwd">
120
+ 4| + | <foreword displayorder="2" id="fwd">
121
+ 5| | <p>First paragraph</p>
122
+ 6| | </foreword>
123
+ ----
124
+ ====
125
+
126
+ == by-object mode
127
+
128
+ === Purpose
129
+
130
+ Show differences as structured tree changes, focusing on what changed in the
131
+ document's semantic structure rather than line-by-line text.
132
+
133
+ === How it works
134
+
135
+ . Parse both documents into object trees (DOM for XML/HTML, objects for
136
+ JSON/YAML)
137
+ . Perform semantic tree comparison
138
+ . Identify additions, deletions, and modifications
139
+ . Display as visual tree showing only changes
140
+
141
+ === When to use
142
+
143
+ **Best for:**
144
+
145
+ * JSON and YAML documents (configuration files, API responses)
146
+ * XML when you care about structural changes
147
+ * Understanding what values changed
148
+ * Reviewing configuration differences
149
+ * Comparing API responses
150
+
151
+ **Default for:**
152
+
153
+ * JSON format
154
+ * YAML format
155
+ * XML format (unless `--by-line` is specified)
156
+
157
+ === Output format
158
+
159
+ [source]
160
+ ----
161
+ Visual Diff:
162
+ ├── settings.debug:
163
+ │ ├── - true
164
+ │ └── + false
165
+ └── version:
166
+ ├── - "1.0.0"
167
+ └── + "2.0.0"
168
+ ----
169
+
170
+ The tree shows:
171
+
172
+ * `├──` Tree structure using box-drawing characters
173
+ * `│` Path to the changed value
174
+ * `-` Removed/old value (red)
175
+ * `+` Added/new value (green)
176
+
177
+ === Features
178
+
179
+ **Structural focus**::
180
+ Shows only what changed in the object hierarchy, ignoring formatting.
181
+
182
+ **Path display**::
183
+ Full path to each change (e.g., `settings.debug`, `user.address.city`).
184
+
185
+ **Value highlighting**::
186
+ Clear before/after values with color coding.
187
+
188
+ **Compact output**::
189
+ Omits unchanged parts of the structure.
190
+
191
+ .by-object mode example
192
+ [example]
193
+ ====
194
+ [source,ruby]
195
+ ----
196
+ # Ruby API - JSON with by-object mode (default)
197
+ result = Canon::Comparison.equivalent?(json1, json2,
198
+ verbose: true
199
+ # mode: :by_object is default for JSON
200
+ )
201
+
202
+ # CLI
203
+ $ canon diff config1.json config2.json --verbose
204
+ ----
205
+
206
+ Output shows semantic changes in tree form:
207
+
208
+ [source]
209
+ ----
210
+ Visual Diff:
211
+ ├── database.host:
212
+ │ ├── - "localhost"
213
+ │ └── + "production.db.example.com"
214
+ ├── database.port:
215
+ │ ├── - 5432
216
+ │ └── + 5433
217
+ └── logging.level:
218
+ ├── - "info"
219
+ └── + "debug"
220
+ ----
221
+ ====
222
+
223
+ == Mode comparison
224
+
225
+ [cols="1,1,1"]
226
+ |===
227
+ |Aspect |by-line |by-object
228
+
229
+ |**View**
230
+ |Line-by-line text
231
+ |Tree structure
232
+
233
+ |**Focus**
234
+ |Textual changes
235
+ |Semantic changes
236
+
237
+ |**Shows**
238
+ |All lines with context
239
+ |Only changed values
240
+
241
+ |**Best for**
242
+ |HTML, markup review
243
+ |JSON, YAML, config files
244
+
245
+ |**Default formats**
246
+ |HTML
247
+ |XML, JSON, YAML
248
+
249
+ |**Whitespace**
250
+ |Visualized in diff lines
251
+ |Not shown (normalized)
252
+
253
+ |**Context**
254
+ |Surrounding lines
255
+ |Object hierarchy path
256
+
257
+ |**Output size**
258
+ |Larger (full context)
259
+ |Smaller (changes only)
260
+ |===
261
+
262
+ == Choosing the right mode
263
+
264
+ === Use by-line mode when
265
+
266
+ * Reviewing HTML markup changes
267
+ * Debugging formatting or whitespace issues
268
+ * Need to see exact line positions
269
+ * Want full document context
270
+ * Working with primarily text-based formats
271
+ * Need to match with traditional diff tools
272
+
273
+ === Use by-object mode when
274
+
275
+ * Comparing configuration files (JSON/YAML)
276
+ * Reviewing API response differences
277
+ * Focus on what values changed
278
+ * Don't care about formatting
279
+ * Want compact diff output
280
+ * Working with nested data structures
281
+
282
+ == Format-specific defaults
283
+
284
+ [cols="1,1,2"]
285
+ |===
286
+ |Format |Default Mode |Rationale
287
+
288
+ |**HTML**
289
+ |by-line
290
+ |Markup structure matters; line-level changes important
291
+
292
+ |**XML**
293
+ |by-object
294
+ |Semantic structure focus; DOM-based comparison more meaningful
295
+
296
+ |**JSON**
297
+ |by-object
298
+ |Object graph comparison; structure over formatting
299
+
300
+ |**YAML**
301
+ |by-object
302
+ |Configuration focus; value changes matter most
303
+ |===
304
+
305
+ == Configuration
306
+
307
+ === Ruby API
308
+
309
+ [source,ruby]
310
+ ----
311
+ # Specify mode explicitly
312
+ Canon::Comparison.equivalent?(doc1, doc2,
313
+ verbose: true,
314
+ diff: { mode: :by_line }
315
+ )
316
+
317
+ # Global configuration for RSpec
318
+ Canon::RSpecMatchers.configure do |config|
319
+ config.xml.diff.mode = :by_line
320
+ config.json.diff.mode = :by_object
321
+ end
322
+ ----
323
+
324
+ === CLI
325
+
326
+ [source,bash]
327
+ ----
328
+ # Force by-line mode for XML
329
+ $ canon diff file1.xml file2.xml --by-line --verbose
330
+
331
+ # by-object is default for JSON
332
+ $ canon diff config1.json config2.json --verbose
333
+ ----
334
+
335
+ === RSpec
336
+
337
+ [source,ruby]
338
+ ----
339
+ # Global configuration
340
+ Canon::RSpecMatchers.configure do |config|
341
+ config.xml.diff.mode = :by_line
342
+ end
343
+
344
+ # Override per-test
345
+ expect(actual).to be_xml_equivalent_to(expected, verbose: true)
346
+ # Uses global config mode
347
+ ----
348
+
349
+ == Advanced features
350
+
351
+ === by-line mode advanced features
352
+
353
+ **DOM-guided semantic matching** (XML only)::
354
+ Matches elements across documents using identity attributes, ensuring
355
+ corresponding elements are compared even when at different positions.
356
+
357
+ **Token highlighting**::
358
+ Within changed lines, highlights specific tokens that differ (element names,
359
+ attributes, values).
360
+
361
+ **Grouped contexts**::
362
+ Groups nearby changes into context blocks with `diff_grouping_lines`.
363
+
364
+ .XML DOM-guided matching example
365
+ [example]
366
+ ====
367
+ When elements are reordered but have unique IDs, by-line mode matches them
368
+ semantically:
369
+
370
+ [source,xml]
371
+ ----
372
+ <!-- File 1 -->
373
+ <items>
374
+ <item id="1" name="First"/>
375
+ <item id="2" name="Second"/>
376
+ </items>
377
+
378
+ <!-- File 2 -->
379
+ <items>
380
+ <item id="2" name="Second"/>
381
+ <item id="1" name="First Edited"/>
382
+ </items>
383
+ ----
384
+
385
+ Diff shows only the actual change:
386
+
387
+ [source]
388
+ ----
389
+ Visual Diff:
390
+ └── items.item[id="1"].name:
391
+ ├── - "First"
392
+ └── + "First Edited"
393
+ ----
394
+
395
+ Elements matched by `id` attribute, position change ignored.
396
+ ====
397
+
398
+ === by-object mode advanced features
399
+
400
+ **Deep nesting support**::
401
+ Handles arbitrarily nested structures with clear path display.
402
+
403
+ **Type-aware comparison**::
404
+ Distinguishes between different data types (string "1" vs number 1).
405
+
406
+ **Array handling**::
407
+ Shows array element changes with index notation.
408
+
409
+ .Nested structure example
410
+ [example]
411
+ ====
412
+ [source]
413
+ ----
414
+ Visual Diff:
415
+ ├── users[0].profile.settings.notifications.email:
416
+ │ ├── - true
417
+ │ └── + false
418
+ └── users[1].name:
419
+ ├── - "John Doe"
420
+ └── + "Jane Doe"
421
+ ----
422
+
423
+ Clear path to each change in deeply nested structure.
424
+ ====
425
+
426
+ == See also
427
+
428
+ * link:RUBY_API[Ruby API documentation]
429
+ * link:CLI[Command-line interface]
430
+ * link:RSPEC[RSpec matchers]
431
+ * link:DIFF_FORMATTING[Diff formatting options]
432
+ * link:MATCH_ARCHITECTURE[Match architecture]
@@ -0,0 +1,219 @@
1
+ ---
2
+ layout: default
3
+ title: Normative vs Informative Diffs
4
+ nav_order: 42
5
+ parent: Advanced Topics
6
+ ---
7
+ = Canon Normative vs Informative Diffs Architecture
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == Overview
12
+
13
+ Canon distinguishes between two types of differences when comparing documents:
14
+
15
+ * **Normative diffs**: Semantic differences that affect the match result based on your match options
16
+ * **Informative diffs**: Textual differences that don't affect the match (semantically equivalent per match options)
17
+
18
+ This allows you to focus on differences that matter while optionally viewing formatting-only changes.
19
+
20
+ == Architecture
21
+
22
+ ----
23
+ ╔═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗
24
+ ║ CANON DIFF ARCHITECTURE ║
25
+ ║ Normative vs Informative Differences ║
26
+ ╚═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝
27
+
28
+ ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
29
+ │ COMPARISON LAYER: Semantic Analysis │
30
+ │ │
31
+ │ Input: Two documents (HTML, XML, JSON, YAML) │
32
+ │ Output: Semantic differences between document trees │
33
+ │ │
34
+ │ ┌────────────────────┐ ┌────────────────────┐ ┌────────────────────┐ ┌──────────────────────────────────────┐ │
35
+ │ │ Tree Comparator │ │ Tree Comparator │ │ Tree Comparator │ │ Match Options │ │
36
+ │ │ (HTML/XML) │ │ (JSON) │ │ (YAML) │ │──────────────────────────────────────│ │
37
+ │ └─────────┬──────────┘ └─────────┬──────────┘ └─────────┬──────────┘ │ Define what matters: │ │
38
+ │ └───────────────────────────┴───────────────────────┘ │ • text_content │ │
39
+ │ │ │ • structural_whitespace │ │
40
+ │ ▼ │ • attribute_whitespace │ │
41
+ │ Creates DiffNodes (semantic differences) │ • comments │ │
42
+ │ │ │ • key_order │ │
43
+ │ │ │ │ │
44
+ │ ┌─────────────────────────────────────┴───────────────────────────────────┐ │ Each dimension has behavior: │ │
45
+ │ │ DiffNode │ │ • :strict → must match exactly │ │
46
+ │ │ Represents one semantic difference │ │ • :normalize → match after cleanup │ │
47
+ │ │──────────────────────────────────────────────────────────────────────────│ │ • :ignore → don't care │ │
48
+ │ │ • References nodes in both trees │◄───Classified──┤ │ │
49
+ │ │ • Has a dimension (what differs) │ based on │ Classification: │ │
50
+ │ │ • Has a reason code │ │ If dimension is :ignore │ │
51
+ │ │ • Marked as normative or informative │ │ → INFORMATIVE diff │ │
52
+ │ └──────────────────────────────────────────────────────────────────────────┘ │ If dimension is :strict or :normalize│ │
53
+ │ │ → NORMATIVE diff │ │
54
+ │ Normative diff = Difference that affects the match result └──────────────────────────────────────┘ │
55
+ │ Informative diff = Difference that doesn't affect the match (semantically equivalent) │
56
+ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
57
+
58
+ ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
59
+ │ RENDERING LAYER: Text Display │
60
+ │ │
61
+ │ Input: DiffNodes from comparison, original text documents │
62
+ │ Output: Formatted diff with line numbers and markers │
63
+ │ │
64
+ │ ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │
65
+ │ │ STEP 1: Map Semantic Diffs to Text Lines │ │
66
+ │ │ │ │
67
+ │ │ DiffNodeMapper bridges semantic differences to text representation: │ │
68
+ │ │ • Uses line-by-line text diff (Diff::LCS) │ │
69
+ │ │ • Maps each changed line to its corresponding DiffNode │ │
70
+ │ │ • Creates DiffLine objects that link text ↔ semantics │ │
71
+ │ │ │ │
72
+ │ │ DiffLine │ │
73
+ │ │ ──────── │ │
74
+ │ │ • Line numbers in both documents │ │
75
+ │ │ • Content of the line │ │
76
+ │ │ • Reference to DiffNode (may be nil for purely textual changes) │ │
77
+ │ │ • Inherits normative/informative status from DiffNode │ │
78
+ │ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ │
79
+ │ │ │
80
+ │ ▼ │
81
+ │ ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │
82
+ │ │ STEP 2: Group Lines into Blocks │ │
83
+ │ │ │ │
84
+ │ │ DiffBlock │ │
85
+ │ │ ────────── │ │
86
+ │ │ • Contiguous run of changed lines │ │
87
+ │ │ • Contains one or more DiffLines │ │
88
+ │ │ • Has reference to DiffNode if block maps to single semantic change │ │
89
+ │ │ • Normative if any contained line is normative │ │
90
+ │ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ │
91
+ │ │ │
92
+ │ ▼ │
93
+ │ ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │
94
+ │ │ STEP 3: Filter Based on Rendering Options │ │
95
+ │ │ │ │
96
+ │ │ show_diffs option controls which blocks are displayed: │ │
97
+ │ │ • :normative → Show only blocks with normative differences │ │
98
+ │ │ • :informative → Show only blocks with informative differences │ │
99
+ │ │ • :all → Show all differences │ │
100
+ │ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ │
101
+ │ │ │
102
+ │ ▼ │
103
+ │ ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │
104
+ │ │ STEP 4: Add Context and Group for Display │ │
105
+ │ │ │ │
106
+ │ │ DiffContext │ │
107
+ │ │ ─────────── │ │
108
+ │ │ • One or more DiffBlocks grouped by proximity │ │
109
+ │ │ • Includes surrounding unchanged lines for context │ │
110
+ │ │ • Controlled by context_lines and diff_grouping_lines options │ │
111
+ │ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ │
112
+ │ │ │
113
+ │ ▼ │
114
+ │ ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │
115
+ │ │ STEP 5: Format for Display │ │
116
+ │ │ │ │
117
+ │ │ Normative differences: Informative differences: │ │
118
+ │ │ 18| - <div class="TOC" id="_"> 18| ~ <div class="TOC" id="_"> │ │
119
+ │ │ 18| + <div id="_" class="TOC"> 18| ~ <div id="_" class="TOC"> │ │
120
+ │ │ ▲ ▲ │ │
121
+ │ │ Red/Green markers Cyan marker │ │
122
+ │ │ (semantic difference) (textual difference only) │ │
123
+ │ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ │
124
+ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
125
+ ----
126
+
127
+ == Example: Attribute Order
128
+
129
+ Consider comparing these two HTML elements:
130
+
131
+ [source,html]
132
+ ----
133
+ <!-- Document 1 -->
134
+ <div class="TOC" id="_">
135
+
136
+ <!-- Document 2 -->
137
+ <div id="_" class="TOC">
138
+ ----
139
+
140
+ === Comparison Layer
141
+
142
+ The comparator detects that attribute order differs and creates a `DiffNode`:
143
+
144
+ * `dimension`: `:attribute_whitespace`
145
+ * `reason`: `ATTRIBUTE_ORDER_DIFFERS`
146
+
147
+ === Classification
148
+
149
+ The `DiffNode` is classified based on your match options:
150
+
151
+ * If `attribute_whitespace: :strict` → **NORMATIVE** (order matters to you)
152
+ ** Display: `18| - <div class="TOC" id="_">` (red)
153
+ ** Display: `18| + <div id="_" class="TOC">` (green)
154
+
155
+ * If `attribute_whitespace: :normalize` → **INFORMATIVE** (order doesn't matter)
156
+ ** Display: `18| ~ <div class="TOC" id="_">` (cyan)
157
+ ** Display: `18| ~ <div id="_" class="TOC">` (cyan)
158
+
159
+ === Rendering Options
160
+
161
+ Control which diffs are displayed with `show_diffs`:
162
+
163
+ * `:normative` → Skip this block (it's informative when normalized)
164
+ * `:informative` → Show with `~` marker in cyan
165
+ * `:all` → Show all diffs
166
+
167
+ == Usage
168
+
169
+ === RSpec Matchers
170
+
171
+ [source,ruby]
172
+ ----
173
+ # Show only normative diffs (default)
174
+ expect(actual).to be_html4_equivalent_to(expected, show_diffs: :normative)
175
+
176
+ # Show only informative diffs
177
+ expect(actual).to be_html4_equivalent_to(expected, show_diffs: :informative)
178
+
179
+ # Show all diffs
180
+ expect(actual).to be_html4_equivalent_to(expected, show_diffs: :all)
181
+ ----
182
+
183
+ === CLI
184
+
185
+ [source,bash]
186
+ ----
187
+ # Show only normative diffs (default)
188
+ canon diff file1.html file2.html
189
+
190
+ # Show informative diffs
191
+ canon diff file1.html file2.html --show-diffs=informative
192
+
193
+ # Show all diffs
194
+ canon diff file1.html file2.html --show-diffs=all
195
+ ----
196
+
197
+ == Implementation Components
198
+
199
+ === New Classes
200
+
201
+ * `Canon::Diff::DiffNode` - Represents a semantic difference
202
+ * `Canon::Diff::DiffLine` - Represents a changed text line
203
+ * `Canon::Diff::DiffNodeMapper` - Bridges semantic and textual differences
204
+ * `Canon::Diff::DiffClassifier` - Classifies diffs as normative/informative
205
+
206
+ === Enhanced Classes
207
+
208
+ * `Canon::Diff::DiffBlock` - Add `diff_lines`, `diff_node`, `normative?`
209
+ * `Canon::Diff::DiffContext` - Add `has_normative_diffs?`, `has_informative_diffs?`
210
+ * `Canon::DiffFormatter` - Add `show_diffs` option
211
+ * Line formatters - Support `~` marker and cyan color for informative diffs
212
+
213
+ === Comparators
214
+
215
+ Update `XmlComparator`, `JsonComparator`, `YamlComparator` to:
216
+
217
+ * Track dimension for each difference
218
+ * Create `DiffNode` objects instead of simple hashes
219
+ * Store differences with semantic context