canon 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: default
|
|
3
|
+
title: Diff Pipeline Architecture
|
|
4
|
+
parent: Advanced
|
|
5
|
+
nav_order: 3
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
:toc:
|
|
9
|
+
:toclevels: 3
|
|
10
|
+
|
|
11
|
+
== Purpose
|
|
12
|
+
|
|
13
|
+
Canon's diff system follows a strict separation of concerns with each layer having a single, well-defined responsibility. This architecture ensures clean code, maintainability, and testability.
|
|
14
|
+
|
|
15
|
+
== Pipeline Layers
|
|
16
|
+
|
|
17
|
+
Canon processes comparisons through six distinct layers:
|
|
18
|
+
|
|
19
|
+
[source]
|
|
20
|
+
----
|
|
21
|
+
Input Documents
|
|
22
|
+
↓
|
|
23
|
+
[1] Comparison → Creates semantic differences (DiffNodes)
|
|
24
|
+
↓
|
|
25
|
+
[2] Classification → Marks as normative/informative
|
|
26
|
+
↓
|
|
27
|
+
[3] Mapping → Maps semantic diffs to text lines
|
|
28
|
+
↓
|
|
29
|
+
[4] Blocking → Groups contiguous changed lines
|
|
30
|
+
↓
|
|
31
|
+
[5] Contexting → Adds surrounding unchanged lines
|
|
32
|
+
↓
|
|
33
|
+
[6] Formatting → Renders to colored output
|
|
34
|
+
↓
|
|
35
|
+
Display Output
|
|
36
|
+
----
|
|
37
|
+
|
|
38
|
+
== Layer 1: Comparison
|
|
39
|
+
|
|
40
|
+
**Responsibility**: Create semantic differences
|
|
41
|
+
|
|
42
|
+
**Input**: Two documents (doc1, doc2) + match options
|
|
43
|
+
|
|
44
|
+
**Process**: Compare DOM/JSON trees semantically
|
|
45
|
+
|
|
46
|
+
**Output**: Array of DiffNode objects
|
|
47
|
+
|
|
48
|
+
[source,ruby]
|
|
49
|
+
----
|
|
50
|
+
# DiffNode structure
|
|
51
|
+
{
|
|
52
|
+
node1: Element from doc1,
|
|
53
|
+
node2: Element from doc2,
|
|
54
|
+
dimension: :text_content | :attribute_order | etc.,
|
|
55
|
+
reason: "Description of difference",
|
|
56
|
+
normative: nil # Set by classifier
|
|
57
|
+
}
|
|
58
|
+
----
|
|
59
|
+
|
|
60
|
+
**Key Point**: Comparators only detect **semantic** differences based on match options. They don't know about text lines or formatting.
|
|
61
|
+
|
|
62
|
+
== Layer 2: Classification
|
|
63
|
+
|
|
64
|
+
**Responsibility**: Mark differences as normative or informative
|
|
65
|
+
|
|
66
|
+
**Input**: DiffNode array + match options
|
|
67
|
+
|
|
68
|
+
**Process**: Check each dimension's behavior (`:strict`, `:normalize`, `:ignore`)
|
|
69
|
+
|
|
70
|
+
**Output**: Same DiffNodes with `normative` flag set
|
|
71
|
+
|
|
72
|
+
[source,ruby]
|
|
73
|
+
----
|
|
74
|
+
classifier = DiffClassifier.new(match_options)
|
|
75
|
+
diff_nodes.each do |node|
|
|
76
|
+
behavior = match_options.behavior_for(node.dimension)
|
|
77
|
+
node.normative = (behavior != :ignore)
|
|
78
|
+
end
|
|
79
|
+
----
|
|
80
|
+
|
|
81
|
+
**Classification Rules**:
|
|
82
|
+
* `:ignore` → informative (cyan, doesn't affect equivalence)
|
|
83
|
+
* `:strict` or `:normalize` → normative (red/green, affects equivalence)
|
|
84
|
+
|
|
85
|
+
== Layer 3: Mapping
|
|
86
|
+
|
|
87
|
+
**Responsibility**: Map semantic diffs to text line positions
|
|
88
|
+
|
|
89
|
+
**Input**: DiffNode array + original text documents
|
|
90
|
+
|
|
91
|
+
**Process**:
|
|
92
|
+
1. Run text diff (Diff::LCS) on original strings
|
|
93
|
+
2. For each changed line, find corresponding DiffNode
|
|
94
|
+
3. Create DiffLine linking line ↔ DiffNode
|
|
95
|
+
4. Inherit normative/informative from DiffNode
|
|
96
|
+
|
|
97
|
+
**Output**: Array of DiffLine objects
|
|
98
|
+
|
|
99
|
+
[source,ruby]
|
|
100
|
+
----
|
|
101
|
+
# DiffLine structure
|
|
102
|
+
{
|
|
103
|
+
line_number: 5,
|
|
104
|
+
content: "<p>Changed text</p>",
|
|
105
|
+
type: :changed, # :added, :removed, :unchanged
|
|
106
|
+
diff_node: DiffNode reference,
|
|
107
|
+
normative: true # from diff_node
|
|
108
|
+
}
|
|
109
|
+
----
|
|
110
|
+
|
|
111
|
+
**Key Point**: This layer bridges semantic differences to their textual representation.
|
|
112
|
+
|
|
113
|
+
== Layer 4: Blocking
|
|
114
|
+
|
|
115
|
+
**Responsibility**: Group contiguous changed lines into blocks
|
|
116
|
+
|
|
117
|
+
**Input**: DiffLine array + `show_diffs` option
|
|
118
|
+
|
|
119
|
+
**Process**:
|
|
120
|
+
1. Identify runs of consecutive changed lines
|
|
121
|
+
2. Create DiffBlock for each run
|
|
122
|
+
3. Set `block.normative` based on contained lines
|
|
123
|
+
4. Filter blocks by `show_diffs` setting
|
|
124
|
+
|
|
125
|
+
**Output**: Array of DiffBlock objects
|
|
126
|
+
|
|
127
|
+
[source,ruby]
|
|
128
|
+
----
|
|
129
|
+
# DiffBlock structure
|
|
130
|
+
{
|
|
131
|
+
start_idx: 10,
|
|
132
|
+
end_idx: 15,
|
|
133
|
+
types: ['-', '+'],
|
|
134
|
+
diff_lines: [DiffLine, ...],
|
|
135
|
+
diff_node: DiffNode (if all from same node),
|
|
136
|
+
normative: true # true if ANY line is normative
|
|
137
|
+
}
|
|
138
|
+
----
|
|
139
|
+
|
|
140
|
+
**Filtering**:
|
|
141
|
+
* `show_diffs: :normative` → keep only normative blocks
|
|
142
|
+
* `show_diffs: :informative` → keep only informative blocks
|
|
143
|
+
* `show_diffs: :all` → keep all blocks
|
|
144
|
+
|
|
145
|
+
== Layer 5: Contexting
|
|
146
|
+
|
|
147
|
+
**Responsibility**: Add surrounding context lines
|
|
148
|
+
|
|
149
|
+
**Input**: DiffBlock array + context/grouping options
|
|
150
|
+
|
|
151
|
+
**Process**:
|
|
152
|
+
1. Group nearby blocks (within `diff_grouping_lines`)
|
|
153
|
+
2. Expand each group with `context_lines` before/after
|
|
154
|
+
3. Create DiffContext for each group
|
|
155
|
+
|
|
156
|
+
**Output**: Array of DiffContext objects
|
|
157
|
+
|
|
158
|
+
[source,ruby]
|
|
159
|
+
----
|
|
160
|
+
# DiffContext structure
|
|
161
|
+
{
|
|
162
|
+
start_idx: 7, # includes context before
|
|
163
|
+
end_idx: 18, # includes context after
|
|
164
|
+
blocks: [DiffBlock, ...]
|
|
165
|
+
}
|
|
166
|
+
----
|
|
167
|
+
|
|
168
|
+
**Key Point**: This layer controls how much unchanged content is shown around changes.
|
|
169
|
+
|
|
170
|
+
== Layer 6: Formatting
|
|
171
|
+
|
|
172
|
+
**Responsibility**: Render to colored string output
|
|
173
|
+
|
|
174
|
+
**Input**: Array of DiffContext objects
|
|
175
|
+
|
|
176
|
+
**Process**:
|
|
177
|
+
* Apply line numbers
|
|
178
|
+
* Add color codes (red/green/cyan)
|
|
179
|
+
* Visualize whitespace characters
|
|
180
|
+
* Format for terminal display
|
|
181
|
+
|
|
182
|
+
**Output**: Formatted string ready for display
|
|
183
|
+
|
|
184
|
+
**Key Point**: Formatters are pure display - no business logic, no filtering, no decisions.
|
|
185
|
+
|
|
186
|
+
== Data Flow Example
|
|
187
|
+
|
|
188
|
+
=== Scenario: Attribute order normalized away
|
|
189
|
+
|
|
190
|
+
[source]
|
|
191
|
+
----
|
|
192
|
+
Input:
|
|
193
|
+
doc1: <div class="TOC" id="_">
|
|
194
|
+
doc2: <div id="_" class="TOC">
|
|
195
|
+
|
|
196
|
+
match_options: { attribute_order: :ignore }
|
|
197
|
+
|
|
198
|
+
Layer 1 - Comparison:
|
|
199
|
+
XmlComparator sees attribute order differs
|
|
200
|
+
BUT match option is :ignore
|
|
201
|
+
→ NO DiffNode created (semantically equivalent)
|
|
202
|
+
|
|
203
|
+
Layer 2 - Classification:
|
|
204
|
+
No DiffNodes to classify
|
|
205
|
+
→ Skip
|
|
206
|
+
|
|
207
|
+
Layer 3 - Mapping:
|
|
208
|
+
No DiffNodes to map
|
|
209
|
+
→ No DiffLines created
|
|
210
|
+
|
|
211
|
+
Layer 4 - Blocking:
|
|
212
|
+
No DiffLines to block
|
|
213
|
+
→ No DiffBlocks created
|
|
214
|
+
|
|
215
|
+
Layer 5 - Contexting:
|
|
216
|
+
No DiffBlocks to contextualize
|
|
217
|
+
→ No DiffContexts created
|
|
218
|
+
|
|
219
|
+
Layer 6 - Formatting:
|
|
220
|
+
No contexts to format
|
|
221
|
+
→ Returns empty string (no diff shown)
|
|
222
|
+
|
|
223
|
+
Result: Files are equivalent, no output
|
|
224
|
+
----
|
|
225
|
+
|
|
226
|
+
=== Scenario: Real text difference
|
|
227
|
+
|
|
228
|
+
[source]
|
|
229
|
+
----
|
|
230
|
+
Input:
|
|
231
|
+
doc1: <p>Test 1</p>
|
|
232
|
+
doc2: <p>Test 2</p>
|
|
233
|
+
|
|
234
|
+
match_options: { text_content: :strict }
|
|
235
|
+
|
|
236
|
+
Layer 1 - Comparison:
|
|
237
|
+
XmlComparator finds text differs
|
|
238
|
+
Creates: DiffNode(dimension: :text_content)
|
|
239
|
+
|
|
240
|
+
Layer 2 - Classification:
|
|
241
|
+
text_content: :strict → normative
|
|
242
|
+
Sets: diff_node.normative = true
|
|
243
|
+
|
|
244
|
+
Layer 3 - Mapping:
|
|
245
|
+
Maps to line 1 (changed)
|
|
246
|
+
Creates: DiffLine(type: :changed, normative: true)
|
|
247
|
+
|
|
248
|
+
Layer 4 - Blocking:
|
|
249
|
+
Groups line into block
|
|
250
|
+
Creates: DiffBlock([DiffLine], normative: true)
|
|
251
|
+
Filter: show_diffs: :normative → keeps block
|
|
252
|
+
|
|
253
|
+
Layer 5 - Contexting:
|
|
254
|
+
Adds context lines (0 before, 0 after if short file)
|
|
255
|
+
Creates: DiffContext([DiffBlock])
|
|
256
|
+
|
|
257
|
+
Layer 6 - Formatting:
|
|
258
|
+
Renders with colors:
|
|
259
|
+
1| - | <p>Test 1</p>
|
|
260
|
+
| 1+ | <p>Test 2</p>
|
|
261
|
+
|
|
262
|
+
Result: Files differ, diff shown in red/green
|
|
263
|
+
----
|
|
264
|
+
|
|
265
|
+
== Class Responsibilities
|
|
266
|
+
|
|
267
|
+
=== Comparison Layer
|
|
268
|
+
|
|
269
|
+
[`XmlComparator`](../../lib/canon/comparison/xml_comparator.rb):: Compares DOM nodes semantically, creates DiffNodes
|
|
270
|
+
|
|
271
|
+
[`DiffClassifier`](../../lib/canon/diff/diff_classifier.rb):: Classifies DiffNodes as normative/informative
|
|
272
|
+
|
|
273
|
+
=== Processing Layers
|
|
274
|
+
|
|
275
|
+
[`DiffNodeMapper`](../../lib/canon/diff/diff_node_mapper.rb):: Maps semantic diffs to text line positions
|
|
276
|
+
|
|
277
|
+
[`DiffBlockBuilder`](../../lib/canon/diff/diff_block_builder.rb):: Groups contiguous lines into blocks, filters by show_diffs
|
|
278
|
+
|
|
279
|
+
[`DiffContextBuilder`](../../lib/canon/diff/diff_context_builder.rb):: Adds context lines, groups nearby blocks
|
|
280
|
+
|
|
281
|
+
[`DiffReportBuilder`](../../lib/canon/diff/diff_report_builder.rb):: Orchestrates the full pipeline
|
|
282
|
+
|
|
283
|
+
=== Formatting Layer
|
|
284
|
+
|
|
285
|
+
[`ByLine::XmlFormatter`](../../lib/canon/diff_formatter/by_line/xml_formatter.rb):: Renders line-by-line XML diffs
|
|
286
|
+
|
|
287
|
+
[`ByLine::HtmlFormatter`](../../lib/canon/diff_formatter/by_line/html_formatter.rb):: Renders line-by-line HTML diffs
|
|
288
|
+
|
|
289
|
+
[`ByObject::XmlFormatter`](../../lib/canon/diff_formatter/by_object/xml_formatter.rb):: Renders tree-based XML diffs
|
|
290
|
+
|
|
291
|
+
== Key Principles
|
|
292
|
+
|
|
293
|
+
=== Single Responsibility
|
|
294
|
+
|
|
295
|
+
Each class does ONE thing:
|
|
296
|
+
|
|
297
|
+
* **Comparator**: Compares → DiffNodes
|
|
298
|
+
* **Classifier**: Classifies → normative flags
|
|
299
|
+
* **Mapper**: Maps nodes → lines
|
|
300
|
+
* **BlockBuilder**: Groups lines → blocks
|
|
301
|
+
* **ContextBuilder**: Adds context → contexts
|
|
302
|
+
* **Formatter**: Renders → string
|
|
303
|
+
|
|
304
|
+
=== Separation of Concerns
|
|
305
|
+
|
|
306
|
+
**Business Logic** (Layers 1-5):
|
|
307
|
+
* Lives in `lib/canon/diff/` and `lib/canon/comparison/`
|
|
308
|
+
* No knowledge of rendering or colors
|
|
309
|
+
* Pure data transformations
|
|
310
|
+
|
|
311
|
+
**Presentation** (Layer 6):
|
|
312
|
+
* Lives in `lib/canon/diff_formatter/`
|
|
313
|
+
* No business logic
|
|
314
|
+
* Just renders what it's given
|
|
315
|
+
|
|
316
|
+
=== Information Expert
|
|
317
|
+
|
|
318
|
+
Each object knows about its own data:
|
|
319
|
+
|
|
320
|
+
* `DiffNode.normative?` - knows if semantically different
|
|
321
|
+
* `DiffLine.normative?` - knows via its DiffNode
|
|
322
|
+
* `DiffBlock.normative?` - knows via its DiffLines
|
|
323
|
+
* `DiffContext` - knows about its blocks
|
|
324
|
+
|
|
325
|
+
=== Tell, Don't Ask
|
|
326
|
+
|
|
327
|
+
Don't ask objects for data to make decisions elsewhere:
|
|
328
|
+
|
|
329
|
+
[source,ruby]
|
|
330
|
+
----
|
|
331
|
+
# BAD (Ask)
|
|
332
|
+
if diff_node.dimension == :attribute_order &&
|
|
333
|
+
match_options[:attribute_order] == :ignore
|
|
334
|
+
# make decision here
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# GOOD (Tell)
|
|
338
|
+
if diff_node.normative?
|
|
339
|
+
# decision already made
|
|
340
|
+
end
|
|
341
|
+
----
|
|
342
|
+
|
|
343
|
+
== Benefits
|
|
344
|
+
|
|
345
|
+
**Testability**: Each layer tested independently
|
|
346
|
+
|
|
347
|
+
**Maintainability**: Clear responsibilities, easy to understand
|
|
348
|
+
|
|
349
|
+
**Extensibility**: Easy to add new filtering, grouping, or rendering strategies
|
|
350
|
+
|
|
351
|
+
**Correctness**: When DiffNodes are empty (all normalized), entire pipeline produces no output
|
|
352
|
+
|
|
353
|
+
== See Also
|
|
354
|
+
|
|
355
|
+
* link:diff-classification.html[Diff Classification] - Normative vs informative
|
|
356
|
+
* link:semantic-diff-report.html[Semantic Diff Report] - High-level diff summary
|
|
357
|
+
* link:../understanding/comparison-pipeline.html[Comparison Pipeline] - User-facing overview
|
|
358
|
+
* link:../understanding/architecture.html[Architecture] - System design
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: default
|
|
3
|
+
title: Advanced Topics
|
|
4
|
+
nav_order: 6
|
|
5
|
+
has_children: true
|
|
6
|
+
---
|
|
7
|
+
= Advanced Topics
|
|
8
|
+
|
|
9
|
+
Deep technical documentation for developers and advanced users.
|
|
10
|
+
|
|
11
|
+
== Overview
|
|
12
|
+
|
|
13
|
+
This section provides detailed technical information about Canon's internal algorithms, architectures, and extension points. Read this section if you:
|
|
14
|
+
|
|
15
|
+
* Need to understand Canon's implementation details
|
|
16
|
+
* Want to contribute to Canon development
|
|
17
|
+
* Are debugging complex comparison issues
|
|
18
|
+
* Plan to extend Canon with custom comparators
|
|
19
|
+
|
|
20
|
+
== What You'll Learn
|
|
21
|
+
|
|
22
|
+
link:dom-diff-internals[**DOM Diff Internals**]::
|
|
23
|
+
Deep dive into Canon's default DOM diff algorithm: position-based matching, operation detection, and diff generation.
|
|
24
|
+
|
|
25
|
+
link:semantic-tree-diff-internals[**Semantic Tree Diff Internals**]::
|
|
26
|
+
How the experimental semantic tree diff works: signature calculation, similarity matching, and operation classification.
|
|
27
|
+
|
|
28
|
+
link:verbose-mode-architecture[**Verbose Mode Architecture**]::
|
|
29
|
+
The two-tier diff output system: normative vs informative diffs, and how verbose mode enriches output.
|
|
30
|
+
|
|
31
|
+
link:diff-classification[**Diff Classification System**]::
|
|
32
|
+
How Canon classifies differences as normative (structural) or informative (presentational).
|
|
33
|
+
|
|
34
|
+
link:diff-pipeline[**Diff Pipeline Architecture**]::
|
|
35
|
+
The six-layer technical pipeline from input to formatted output.
|
|
36
|
+
|
|
37
|
+
link:extending-canon[**Extending Canon**]::
|
|
38
|
+
How to create custom comparators, formatters, and match strategies.
|
|
39
|
+
|
|
40
|
+
== Algorithm Deep Dives
|
|
41
|
+
|
|
42
|
+
=== DOM Diff Algorithm
|
|
43
|
+
|
|
44
|
+
The DOM diff algorithm is Canon's stable, well-tested comparison strategy.
|
|
45
|
+
|
|
46
|
+
**Key Components**:
|
|
47
|
+
|
|
48
|
+
* Position-based element matching
|
|
49
|
+
* Line-by-line comparison
|
|
50
|
+
* Context-aware grouping
|
|
51
|
+
* Character-level visualization
|
|
52
|
+
|
|
53
|
+
**When to use**:
|
|
54
|
+
|
|
55
|
+
* Similar documents with minor differences
|
|
56
|
+
* Traditional diff output needed
|
|
57
|
+
* Performance is critical
|
|
58
|
+
* Stable, predictable behavior required
|
|
59
|
+
|
|
60
|
+
See link:dom-diff-internals[DOM Diff Internals] for implementation details.
|
|
61
|
+
|
|
62
|
+
=== Semantic Tree Diff Algorithm
|
|
63
|
+
|
|
64
|
+
The semantic tree diff is an experimental algorithm that understands document structure.
|
|
65
|
+
|
|
66
|
+
**Key Components**:
|
|
67
|
+
|
|
68
|
+
* Signature-based node matching
|
|
69
|
+
* Similarity scoring
|
|
70
|
+
* Operation detection (INSERT, DELETE, UPDATE, MOVE)
|
|
71
|
+
* Structural propagation
|
|
72
|
+
|
|
73
|
+
**When to use**:
|
|
74
|
+
|
|
75
|
+
* Documents with significant restructuring
|
|
76
|
+
* Move detection needed
|
|
77
|
+
* Operation-based analysis desired
|
|
78
|
+
* Willing to accept experimental status
|
|
79
|
+
|
|
80
|
+
See link:semantic-tree-diff-internals[Semantic Tree Diff Internals] for implementation details.
|
|
81
|
+
|
|
82
|
+
== Architecture Patterns
|
|
83
|
+
|
|
84
|
+
=== The Orchestrator Pattern
|
|
85
|
+
|
|
86
|
+
Canon uses orchestrators to coordinate specialized workers:
|
|
87
|
+
|
|
88
|
+
[source,ruby]
|
|
89
|
+
----
|
|
90
|
+
class Comparison
|
|
91
|
+
def self.compare(doc1, doc2, options = {})
|
|
92
|
+
# Orchestrate: validation → parsing → matching → diffing
|
|
93
|
+
validator.validate!(doc1, doc2)
|
|
94
|
+
parsed1 = parser.parse(doc1)
|
|
95
|
+
parsed2 = parser.parse(doc2)
|
|
96
|
+
matcher.match(parsed1, parsed2, options)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
----
|
|
100
|
+
|
|
101
|
+
Benefits:
|
|
102
|
+
|
|
103
|
+
* Clear separation of concerns
|
|
104
|
+
* Easy to test individual components
|
|
105
|
+
* Simple to add new formats or algorithms
|
|
106
|
+
|
|
107
|
+
=== The Adapter Pattern
|
|
108
|
+
|
|
109
|
+
Format-specific details are handled by adapters:
|
|
110
|
+
|
|
111
|
+
[source,ruby]
|
|
112
|
+
----
|
|
113
|
+
module TreeDiff
|
|
114
|
+
module Adapters
|
|
115
|
+
class XmlAdapter
|
|
116
|
+
def create_tree_node(element)
|
|
117
|
+
# XML-specific tree node creation
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
class JsonAdapter
|
|
122
|
+
def create_tree_node(object)
|
|
123
|
+
# JSON-specific tree node creation
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
----
|
|
129
|
+
|
|
130
|
+
See link:extending-canon#adapter-pattern[Extending Canon: Adapter Pattern] for details.
|
|
131
|
+
|
|
132
|
+
== Extension Points
|
|
133
|
+
|
|
134
|
+
Canon provides several extension points:
|
|
135
|
+
|
|
136
|
+
**Custom Comparators**::
|
|
137
|
+
Implement format-specific comparison logic.
|
|
138
|
+
|
|
139
|
+
**Custom Formatters**::
|
|
140
|
+
Create new output formats for diffs.
|
|
141
|
+
|
|
142
|
+
**Custom Match Strategies**::
|
|
143
|
+
Define custom matching algorithms.
|
|
144
|
+
|
|
145
|
+
**Custom Preprocessors**::
|
|
146
|
+
Add new preprocessing transformations.
|
|
147
|
+
|
|
148
|
+
See link:extending-canon[Extending Canon] for implementation guides.
|
|
149
|
+
|
|
150
|
+
== Performance Considerations
|
|
151
|
+
|
|
152
|
+
=== Algorithm Performance
|
|
153
|
+
|
|
154
|
+
**DOM Diff**:
|
|
155
|
+
|
|
156
|
+
* Time complexity: O(n) for similar documents
|
|
157
|
+
* Space complexity: O(n)
|
|
158
|
+
* Best for: Documents with <10,000 nodes
|
|
159
|
+
|
|
160
|
+
**Semantic Tree Diff**:
|
|
161
|
+
|
|
162
|
+
* Time complexity: O(n²) in worst case
|
|
163
|
+
* Space complexity: O(n)
|
|
164
|
+
* Best for: Documents with <1,000 nodes
|
|
165
|
+
|
|
166
|
+
=== Optimization Strategies
|
|
167
|
+
|
|
168
|
+
* Use size limits to prevent hangs
|
|
169
|
+
* Enable preprocessing for normalized comparison
|
|
170
|
+
* Choose appropriate diff algorithm
|
|
171
|
+
* Configure context lines wisely
|
|
172
|
+
|
|
173
|
+
See link:../features/environment-configuration/size-limits[Size Limits] for configuration.
|
|
174
|
+
|
|
175
|
+
== Debugging Canon
|
|
176
|
+
|
|
177
|
+
=== Enable Verbose Logging
|
|
178
|
+
|
|
179
|
+
[source,ruby]
|
|
180
|
+
----
|
|
181
|
+
ENV['CANON_DEBUG'] = 'true'
|
|
182
|
+
result = Canon::Comparison.compare(doc1, doc2, verbose: true)
|
|
183
|
+
----
|
|
184
|
+
|
|
185
|
+
=== Inspect Internal Structures
|
|
186
|
+
|
|
187
|
+
[source,ruby]
|
|
188
|
+
----
|
|
189
|
+
result = Canon::Comparison.compare(doc1, doc2)
|
|
190
|
+
puts result.operations # For semantic diff
|
|
191
|
+
puts result.diff_report # Detailed report
|
|
192
|
+
----
|
|
193
|
+
|
|
194
|
+
=== Use Character Visualization
|
|
195
|
+
|
|
196
|
+
[source,ruby]
|
|
197
|
+
----
|
|
198
|
+
result = Canon::Comparison.compare(doc1, doc2,
|
|
199
|
+
verbose: true,
|
|
200
|
+
visualize_whitespace: true
|
|
201
|
+
)
|
|
202
|
+
----
|
|
203
|
+
|
|
204
|
+
== Next Steps
|
|
205
|
+
|
|
206
|
+
* Read link:dom-diff-internals[DOM Diff Internals] to understand the default algorithm
|
|
207
|
+
* Explore link:semantic-tree-diff-internals[Semantic Tree Diff] for advanced matching
|
|
208
|
+
* Check link:extending-canon[Extending Canon] to add custom functionality
|
|
209
|
+
|
|
210
|
+
== See Also
|
|
211
|
+
|
|
212
|
+
* link:../understanding/algorithms/[Comparison Algorithms] - High-level algorithm overview
|
|
213
|
+
* link:../features/[Features] - Configuring Canon's behavior
|
|
214
|
+
* link:../reference/[Reference] - Complete API documentation
|