canon 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -0,0 +1,533 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Algorithm-Specific Output
|
|
3
|
+
parent: Diff Formatting
|
|
4
|
+
grand_parent: Features
|
|
5
|
+
nav_order: 2
|
|
6
|
+
---
|
|
7
|
+
= Algorithm-Specific Output
|
|
8
|
+
|
|
9
|
+
== Purpose
|
|
10
|
+
|
|
11
|
+
Different comparison algorithms produce fundamentally different types of output. Understanding these differences is essential for choosing the right diff mode and interpreting results correctly.
|
|
12
|
+
|
|
13
|
+
This page explains the output formats from DOM and Semantic algorithms and provides guidance on choosing the appropriate diff mode for each.
|
|
14
|
+
|
|
15
|
+
== Key Concept
|
|
16
|
+
|
|
17
|
+
**Each algorithm has a natural output format**:
|
|
18
|
+
|
|
19
|
+
* **DOM algorithm** generates line-based differences (natural fit: `by_line`)
|
|
20
|
+
* **Semantic algorithm** generates operation-based differences (natural fit: `by_object`)
|
|
21
|
+
|
|
22
|
+
Both algorithms support both diff modes, but the natural fit provides the most useful output.
|
|
23
|
+
|
|
24
|
+
== DOM Algorithm Output
|
|
25
|
+
|
|
26
|
+
=== Natural Output Format
|
|
27
|
+
|
|
28
|
+
The DOM algorithm produces **line-based differences** showing positional changes:
|
|
29
|
+
|
|
30
|
+
**Characteristics**:
|
|
31
|
+
* Each line is compared at its position
|
|
32
|
+
* Shows additions, deletions, and modifications
|
|
33
|
+
* Traditional diff format (similar to `git diff`)
|
|
34
|
+
* No move detection
|
|
35
|
+
|
|
36
|
+
**Output Structure**:
|
|
37
|
+
[source]
|
|
38
|
+
----
|
|
39
|
+
- <line removed at position>
|
|
40
|
+
+ <line added at position>
|
|
41
|
+
<unchanged line>
|
|
42
|
+
<unchanged line>
|
|
43
|
+
- <another removed line>
|
|
44
|
+
+ <another added line>
|
|
45
|
+
----
|
|
46
|
+
|
|
47
|
+
=== DOM with `by_line` Mode (Natural Fit)
|
|
48
|
+
|
|
49
|
+
This is the recommended mode for DOM algorithm.
|
|
50
|
+
|
|
51
|
+
[source,ruby]
|
|
52
|
+
----
|
|
53
|
+
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
54
|
+
diff_algorithm: :dom,
|
|
55
|
+
diff_mode: :by_line,
|
|
56
|
+
verbose: true
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
puts result.diff
|
|
60
|
+
----
|
|
61
|
+
|
|
62
|
+
**Example Output**:
|
|
63
|
+
[source,diff]
|
|
64
|
+
----
|
|
65
|
+
<book>
|
|
66
|
+
- <title>Old Title</title>
|
|
67
|
+
+ <title>New Title</title>
|
|
68
|
+
<author>John Doe</author>
|
|
69
|
+
- <year>2020</year>
|
|
70
|
+
+ <year>2024</year>
|
|
71
|
+
</book>
|
|
72
|
+
----
|
|
73
|
+
|
|
74
|
+
**Best For**:
|
|
75
|
+
* Code review workflows
|
|
76
|
+
* Understanding line-by-line changes
|
|
77
|
+
* Traditional diff tools integration
|
|
78
|
+
* Quick visual scanning
|
|
79
|
+
|
|
80
|
+
=== DOM with `by_object` Mode
|
|
81
|
+
|
|
82
|
+
DOM algorithm can also produce tree-based output, though it's less natural.
|
|
83
|
+
|
|
84
|
+
[source,ruby]
|
|
85
|
+
----
|
|
86
|
+
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
87
|
+
diff_algorithm: :dom,
|
|
88
|
+
diff_mode: :by_object,
|
|
89
|
+
verbose: true
|
|
90
|
+
)
|
|
91
|
+
----
|
|
92
|
+
|
|
93
|
+
**Example Output**:
|
|
94
|
+
[source]
|
|
95
|
+
----
|
|
96
|
+
book
|
|
97
|
+
title
|
|
98
|
+
- Old Title
|
|
99
|
+
+ New Title
|
|
100
|
+
author
|
|
101
|
+
= John Doe
|
|
102
|
+
year
|
|
103
|
+
- 2020
|
|
104
|
+
+ 2024
|
|
105
|
+
----
|
|
106
|
+
|
|
107
|
+
**Characteristics**:
|
|
108
|
+
* Shows tree structure
|
|
109
|
+
* Still position-based (no moves)
|
|
110
|
+
* Can be useful for structured view
|
|
111
|
+
* No semantic operations
|
|
112
|
+
|
|
113
|
+
**Use When**:
|
|
114
|
+
* You want tree structure visualization
|
|
115
|
+
* Working with deeply nested documents
|
|
116
|
+
* Need hierarchical context
|
|
117
|
+
|
|
118
|
+
== Semantic Algorithm Output
|
|
119
|
+
|
|
120
|
+
=== Natural Output Format
|
|
121
|
+
|
|
122
|
+
The Semantic algorithm produces **operation-based differences** showing semantic changes:
|
|
123
|
+
|
|
124
|
+
**Characteristics**:
|
|
125
|
+
* Detects INSERT, DELETE, UPDATE, MOVE operations
|
|
126
|
+
* Understands structural changes
|
|
127
|
+
* Shows element paths
|
|
128
|
+
* Provides operation statistics
|
|
129
|
+
|
|
130
|
+
**Output Structure**:
|
|
131
|
+
[source]
|
|
132
|
+
----
|
|
133
|
+
INSERT: <path> <content>
|
|
134
|
+
DELETE: <path> <content>
|
|
135
|
+
UPDATE: <path> <old> -> <new>
|
|
136
|
+
MOVE: <from-path> -> <to-path>
|
|
137
|
+
----
|
|
138
|
+
|
|
139
|
+
=== Semantic with `by_object` Mode (Natural Fit)
|
|
140
|
+
|
|
141
|
+
This is the recommended mode for Semantic algorithm.
|
|
142
|
+
|
|
143
|
+
[source,ruby]
|
|
144
|
+
----
|
|
145
|
+
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
146
|
+
diff_algorithm: :semantic,
|
|
147
|
+
diff_mode: :by_object,
|
|
148
|
+
verbose: true
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
puts result.operations
|
|
152
|
+
puts result.statistics
|
|
153
|
+
----
|
|
154
|
+
|
|
155
|
+
**Example Output**:
|
|
156
|
+
[source]
|
|
157
|
+
----
|
|
158
|
+
UPDATE: book/title: "Old Title" -> "New Title"
|
|
159
|
+
UPDATE: book/year: "2020" -> "2024"
|
|
160
|
+
MOVE: book/chapter[2] -> book/chapter[1]
|
|
161
|
+
|
|
162
|
+
Statistics:
|
|
163
|
+
INSERT: 0
|
|
164
|
+
DELETE: 0
|
|
165
|
+
UPDATE: 2
|
|
166
|
+
MOVE: 1
|
|
167
|
+
----
|
|
168
|
+
|
|
169
|
+
**Best For**:
|
|
170
|
+
* Understanding semantic changes
|
|
171
|
+
* Tracking content evolution
|
|
172
|
+
* Detecting restructuring
|
|
173
|
+
* Operation-level analysis
|
|
174
|
+
|
|
175
|
+
=== Semantic with `by_line` Mode
|
|
176
|
+
|
|
177
|
+
Semantic algorithm can also produce line-based output for traditional workflows.
|
|
178
|
+
|
|
179
|
+
[source,ruby]
|
|
180
|
+
----
|
|
181
|
+
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
182
|
+
diff_algorithm: :semantic,
|
|
183
|
+
diff_mode: :by_line,
|
|
184
|
+
verbose: true
|
|
185
|
+
)
|
|
186
|
+
----
|
|
187
|
+
|
|
188
|
+
**Example Output**:
|
|
189
|
+
[source,diff]
|
|
190
|
+
----
|
|
191
|
+
<book>
|
|
192
|
+
~ <title>New Title</title> [UPDATE]
|
|
193
|
+
<author>John Doe</author>
|
|
194
|
+
~ <year>2024</year> [UPDATE]
|
|
195
|
+
→ <chapter id="2"> [MOVE from position 2]
|
|
196
|
+
<chapter id="1">
|
|
197
|
+
</book>
|
|
198
|
+
----
|
|
199
|
+
|
|
200
|
+
**Characteristics**:
|
|
201
|
+
* Traditional line-based format
|
|
202
|
+
* Annotated with operation types
|
|
203
|
+
* Shows move indicators
|
|
204
|
+
* Combines both worlds
|
|
205
|
+
|
|
206
|
+
**Use When**:
|
|
207
|
+
* Need traditional diff format
|
|
208
|
+
* Want operation annotations
|
|
209
|
+
* Integrating with line-based tools
|
|
210
|
+
* Users familiar with git diff
|
|
211
|
+
|
|
212
|
+
== Output Comparison Table
|
|
213
|
+
|
|
214
|
+
[cols="2,3,3"]
|
|
215
|
+
|===
|
|
216
|
+
|Feature |DOM Output |Semantic Output
|
|
217
|
+
|
|
218
|
+
|**Primary Format**
|
|
219
|
+
|Line-based positional differences
|
|
220
|
+
|Operation-based semantic changes
|
|
221
|
+
|
|
222
|
+
|**Move Detection**
|
|
223
|
+
|No (shows as DELETE + INSERT)
|
|
224
|
+
|Yes (shows as MOVE operation)
|
|
225
|
+
|
|
226
|
+
|**Operation Types**
|
|
227
|
+
|Add, Remove, Modify
|
|
228
|
+
|INSERT, DELETE, UPDATE, MOVE
|
|
229
|
+
|
|
230
|
+
|**Best Diff Mode**
|
|
231
|
+
|`by_line`
|
|
232
|
+
|`by_object`
|
|
233
|
+
|
|
234
|
+
|**Statistics**
|
|
235
|
+
|Line counts
|
|
236
|
+
|Operation counts
|
|
237
|
+
|
|
238
|
+
|**Path Information**
|
|
239
|
+
|Line numbers
|
|
240
|
+
|Element paths
|
|
241
|
+
|
|
242
|
+
|**Performance**
|
|
243
|
+
|Fast
|
|
244
|
+
|Slower
|
|
245
|
+
|
|
246
|
+
|**Best For**
|
|
247
|
+
|Traditional workflows
|
|
248
|
+
|Semantic analysis
|
|
249
|
+
|===
|
|
250
|
+
|
|
251
|
+
== Choosing the Right Combination
|
|
252
|
+
|
|
253
|
+
=== Recommended Configurations
|
|
254
|
+
|
|
255
|
+
[cols="2,2,2,3"]
|
|
256
|
+
|===
|
|
257
|
+
|Algorithm |Diff Mode |Use Case |Output Type
|
|
258
|
+
|
|
259
|
+
|DOM
|
|
260
|
+
|by_line
|
|
261
|
+
|Code review, quick diffs
|
|
262
|
+
|Traditional line-based diff
|
|
263
|
+
|
|
264
|
+
|DOM
|
|
265
|
+
|by_object
|
|
266
|
+
|Structured document view
|
|
267
|
+
|Tree view without operations
|
|
268
|
+
|
|
269
|
+
|Semantic
|
|
270
|
+
|by_object
|
|
271
|
+
|Semantic analysis
|
|
272
|
+
|Operation-based tree diff
|
|
273
|
+
|
|
274
|
+
|Semantic
|
|
275
|
+
|by_line
|
|
276
|
+
|Traditional format with operations
|
|
277
|
+
|Annotated line-based diff
|
|
278
|
+
|===
|
|
279
|
+
|
|
280
|
+
== Detailed Examples
|
|
281
|
+
|
|
282
|
+
=== Example 1: Simple Text Change
|
|
283
|
+
|
|
284
|
+
**Input Documents**:
|
|
285
|
+
[source,xml]
|
|
286
|
+
----
|
|
287
|
+
<!-- doc1.xml -->
|
|
288
|
+
<message>Hello World</message>
|
|
289
|
+
|
|
290
|
+
<!-- doc2.xml -->
|
|
291
|
+
<message>Hello Universe</message>
|
|
292
|
+
----
|
|
293
|
+
|
|
294
|
+
**DOM by_line Output**:
|
|
295
|
+
[source,diff]
|
|
296
|
+
----
|
|
297
|
+
- <message>Hello World</message>
|
|
298
|
+
+ <message>Hello Universe</message>
|
|
299
|
+
----
|
|
300
|
+
|
|
301
|
+
**Semantic by_object Output**:
|
|
302
|
+
[source]
|
|
303
|
+
----
|
|
304
|
+
UPDATE: message: "Hello World" -> "Hello Universe"
|
|
305
|
+
----
|
|
306
|
+
|
|
307
|
+
=== Example 2: Element Reordering
|
|
308
|
+
|
|
309
|
+
**Input Documents**:
|
|
310
|
+
[source,xml]
|
|
311
|
+
----
|
|
312
|
+
<!-- doc1.xml -->
|
|
313
|
+
<book>
|
|
314
|
+
<title>Canon</title>
|
|
315
|
+
<author>John</author>
|
|
316
|
+
</book>
|
|
317
|
+
|
|
318
|
+
<!-- doc2.xml -->
|
|
319
|
+
<book>
|
|
320
|
+
<author>John</author>
|
|
321
|
+
<title>Canon</title>
|
|
322
|
+
</book>
|
|
323
|
+
----
|
|
324
|
+
|
|
325
|
+
**DOM by_line Output**:
|
|
326
|
+
[source,diff]
|
|
327
|
+
----
|
|
328
|
+
<book>
|
|
329
|
+
- <title>Canon</title>
|
|
330
|
+
<author>John</author>
|
|
331
|
+
+ <title>Canon</title>
|
|
332
|
+
</book>
|
|
333
|
+
----
|
|
334
|
+
|
|
335
|
+
**Semantic by_object Output**:
|
|
336
|
+
[source]
|
|
337
|
+
----
|
|
338
|
+
MOVE: book/title -> book/title (from position 1 to position 2)
|
|
339
|
+
|
|
340
|
+
Statistics:
|
|
341
|
+
INSERT: 0
|
|
342
|
+
DELETE: 0
|
|
343
|
+
UPDATE: 0
|
|
344
|
+
MOVE: 1
|
|
345
|
+
----
|
|
346
|
+
|
|
347
|
+
=== Example 3: Complex Restructuring
|
|
348
|
+
|
|
349
|
+
**Input Documents**:
|
|
350
|
+
[source,xml]
|
|
351
|
+
----
|
|
352
|
+
<!-- doc1.xml -->
|
|
353
|
+
<doc>
|
|
354
|
+
<section id="1">
|
|
355
|
+
<para>Text A</para>
|
|
356
|
+
<para>Text B</para>
|
|
357
|
+
</section>
|
|
358
|
+
<section id="2">
|
|
359
|
+
<para>Text C</para>
|
|
360
|
+
</section>
|
|
361
|
+
</doc>
|
|
362
|
+
|
|
363
|
+
<!-- doc2.xml -->
|
|
364
|
+
<doc>
|
|
365
|
+
<section id="2">
|
|
366
|
+
<para>Text C</para>
|
|
367
|
+
<para>Text B</para>
|
|
368
|
+
</section>
|
|
369
|
+
<section id="1">
|
|
370
|
+
<para>Text A</para>
|
|
371
|
+
</section>
|
|
372
|
+
</doc>
|
|
373
|
+
----
|
|
374
|
+
|
|
375
|
+
**DOM by_line Output** (shows many line changes):
|
|
376
|
+
[source,diff]
|
|
377
|
+
----
|
|
378
|
+
<doc>
|
|
379
|
+
- <section id="1">
|
|
380
|
+
- <para>Text A</para>
|
|
381
|
+
- <para>Text B</para>
|
|
382
|
+
- </section>
|
|
383
|
+
<section id="2">
|
|
384
|
+
<para>Text C</para>
|
|
385
|
+
+ <para>Text B</para>
|
|
386
|
+
</section>
|
|
387
|
+
+ <section id="1">
|
|
388
|
+
+ <para>Text A</para>
|
|
389
|
+
+ </section>
|
|
390
|
+
</doc>
|
|
391
|
+
----
|
|
392
|
+
|
|
393
|
+
**Semantic by_object Output** (shows semantic operations):
|
|
394
|
+
[source]
|
|
395
|
+
----
|
|
396
|
+
MOVE: doc/section[@id='1'] -> doc/section[@id='1'] (position 1 to 2)
|
|
397
|
+
MOVE: doc/section[@id='1']/para[2] -> doc/section[@id='2']/para[2]
|
|
398
|
+
|
|
399
|
+
Statistics:
|
|
400
|
+
INSERT: 0
|
|
401
|
+
DELETE: 0
|
|
402
|
+
UPDATE: 0
|
|
403
|
+
MOVE: 2
|
|
404
|
+
----
|
|
405
|
+
|
|
406
|
+
== Accessing Algorithm-Specific Output
|
|
407
|
+
|
|
408
|
+
=== Ruby API
|
|
409
|
+
|
|
410
|
+
[source,ruby]
|
|
411
|
+
----
|
|
412
|
+
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
413
|
+
diff_algorithm: :semantic,
|
|
414
|
+
diff_mode: :by_object,
|
|
415
|
+
verbose: true
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# Check which algorithm was used
|
|
419
|
+
case result.diff_algorithm
|
|
420
|
+
when :dom
|
|
421
|
+
puts result.diff_lines # Array of diff lines
|
|
422
|
+
puts result.line_count # Statistics
|
|
423
|
+
when :semantic
|
|
424
|
+
puts result.operations # Array of operations
|
|
425
|
+
puts result.statistics # Operation counts
|
|
426
|
+
puts result.tree_diff # Tree representation
|
|
427
|
+
end
|
|
428
|
+
----
|
|
429
|
+
|
|
430
|
+
=== Operation Objects (Semantic Only)
|
|
431
|
+
|
|
432
|
+
[source,ruby]
|
|
433
|
+
----
|
|
434
|
+
result.operations.each do |op|
|
|
435
|
+
puts "Type: #{op.type}" # :insert, :delete, :update, :move
|
|
436
|
+
puts "Path: #{op.path}" # Element path
|
|
437
|
+
puts "Old: #{op.old_value}" # For UPDATE
|
|
438
|
+
puts "New: #{op.new_value}" # For UPDATE/INSERT
|
|
439
|
+
puts "From: #{op.from_path}" # For MOVE
|
|
440
|
+
puts "To: #{op.to_path}" # For MOVE
|
|
441
|
+
end
|
|
442
|
+
----
|
|
443
|
+
|
|
444
|
+
== Performance Implications
|
|
445
|
+
|
|
446
|
+
=== DOM Output Generation
|
|
447
|
+
|
|
448
|
+
* **Speed**: Fast (linear with document size)
|
|
449
|
+
* **Memory**: Low (line-by-line processing)
|
|
450
|
+
* **Scaling**: Handles large documents well
|
|
451
|
+
|
|
452
|
+
=== Semantic Output Generation
|
|
453
|
+
|
|
454
|
+
* **Speed**: Slower (quadratic worst case)
|
|
455
|
+
* **Memory**: Higher (tree structures in memory)
|
|
456
|
+
* **Scaling**: Best for smaller documents (< 10KB)
|
|
457
|
+
|
|
458
|
+
== Common Patterns
|
|
459
|
+
|
|
460
|
+
=== Pattern 1: Quick Diff Review
|
|
461
|
+
|
|
462
|
+
[source,ruby]
|
|
463
|
+
----
|
|
464
|
+
# Fast line-based diff
|
|
465
|
+
Canon::Comparison.equivalent?(expected, actual,
|
|
466
|
+
diff_algorithm: :dom,
|
|
467
|
+
diff_mode: :by_line,
|
|
468
|
+
verbose: true,
|
|
469
|
+
use_color: true
|
|
470
|
+
)
|
|
471
|
+
----
|
|
472
|
+
|
|
473
|
+
=== Pattern 2: Semantic Analysis
|
|
474
|
+
|
|
475
|
+
[source,ruby]
|
|
476
|
+
----
|
|
477
|
+
# Detailed operation analysis
|
|
478
|
+
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
479
|
+
diff_algorithm: :semantic,
|
|
480
|
+
diff_mode: :by_object,
|
|
481
|
+
verbose: true
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# Analyze operations
|
|
485
|
+
puts "Total changes: #{result.statistics.total}"
|
|
486
|
+
puts "Moves detected: #{result.statistics.moves}"
|
|
487
|
+
----
|
|
488
|
+
|
|
489
|
+
=== Pattern 3: Hybrid View
|
|
490
|
+
|
|
491
|
+
[source,ruby]
|
|
492
|
+
----
|
|
493
|
+
# Use semantic algorithm but traditional output
|
|
494
|
+
Canon::Comparison.equivalent?(doc1, doc2,
|
|
495
|
+
diff_algorithm: :semantic,
|
|
496
|
+
diff_mode: :by_line, # Traditional format
|
|
497
|
+
verbose: true,
|
|
498
|
+
use_color: true
|
|
499
|
+
)
|
|
500
|
+
# Get operation annotations in traditional diff
|
|
501
|
+
----
|
|
502
|
+
|
|
503
|
+
== Migration Guide
|
|
504
|
+
|
|
505
|
+
=== Migrating Output Format
|
|
506
|
+
|
|
507
|
+
When switching algorithms, update your output expectations:
|
|
508
|
+
|
|
509
|
+
[source,ruby]
|
|
510
|
+
----
|
|
511
|
+
# Before: DOM algorithm
|
|
512
|
+
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
513
|
+
diff_algorithm: :dom,
|
|
514
|
+
diff_mode: :by_line
|
|
515
|
+
)
|
|
516
|
+
# Expect: result.diff_lines
|
|
517
|
+
|
|
518
|
+
# After: Semantic algorithm
|
|
519
|
+
result = Canon::Comparison.equivalent?(doc1, doc2,
|
|
520
|
+
diff_algorithm: :semantic,
|
|
521
|
+
diff_mode: :by_object
|
|
522
|
+
)
|
|
523
|
+
# Expect: result.operations, result.statistics
|
|
524
|
+
----
|
|
525
|
+
|
|
526
|
+
== See Also
|
|
527
|
+
|
|
528
|
+
* link:index.adoc[Diff Formatting Overview]
|
|
529
|
+
* link:diff-modes.adoc[Diff Modes] - by_line vs by_object details
|
|
530
|
+
* link:../match-options/algorithm-specific-behavior.adoc[Algorithm-Specific Behavior] - How algorithms work
|
|
531
|
+
* link:../../understanding/algorithms/[Algorithms] - Detailed algorithm documentation
|
|
532
|
+
* link:colors-and-symbols.adoc[Colors and Symbols] - Visual formatting options
|
|
533
|
+
* link:../../guides/choosing-configuration.adoc[Choosing Configuration] - Decision guide
|
data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc}
RENAMED
|
@@ -1,42 +1,28 @@
|
|
|
1
1
|
---
|
|
2
|
-
layout: default
|
|
3
2
|
title: Character Visualization
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
parent: Diff Formatting
|
|
4
|
+
grand_parent: Features
|
|
5
|
+
nav_order: 4
|
|
6
6
|
---
|
|
7
|
-
=
|
|
7
|
+
= Character visualization
|
|
8
8
|
:toc:
|
|
9
9
|
:toclevels: 3
|
|
10
10
|
|
|
11
|
-
==
|
|
11
|
+
== Purpose
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
system, which makes invisible characters visible in diff output.
|
|
13
|
+
Canon's character visualization system makes invisible characters (spaces, tabs, zero-width characters) visible in diff output, helping you quickly identify whitespace differences that cause test failures.
|
|
15
14
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
== General
|
|
19
|
-
|
|
20
|
-
When comparing documents, invisible characters like spaces, tabs, and
|
|
21
|
-
zero-width characters can cause mysterious test failures. Canon's character
|
|
22
|
-
visualization makes these characters visible in diff output, helping you
|
|
23
|
-
quickly identify the exact difference.
|
|
24
|
-
|
|
25
|
-
Visualization is **CJK-safe**, using Unicode symbols that don't conflict with
|
|
26
|
-
Chinese, Japanese, or Korean text.
|
|
15
|
+
Visualization is **CJK-safe**, using Unicode symbols that don't conflict with Chinese, Japanese, or Korean text.
|
|
27
16
|
|
|
28
17
|
== When visualization is applied
|
|
29
18
|
|
|
30
|
-
Character visualization is applied **only to diff lines** (additions,
|
|
31
|
-
deletions, and changes), not to context lines (unchanged lines). This ensures:
|
|
19
|
+
Character visualization is applied **only to diff lines** (additions, deletions, and changes), not to context lines (unchanged lines). This ensures:
|
|
32
20
|
|
|
33
21
|
* Context lines display content in original form
|
|
34
22
|
* Only actual changes show visualization
|
|
35
23
|
* Differences are easier to spot
|
|
36
24
|
|
|
37
|
-
Within changed lines showing token-level diffs, unchanged tokens are displayed
|
|
38
|
-
in the terminal's default color (not red/green) to distinguish them from
|
|
39
|
-
actual changes.
|
|
25
|
+
Within changed lines showing token-level diffs, unchanged tokens are displayed in the terminal's default color (not red/green) to distinguish them from actual changes.
|
|
40
26
|
|
|
41
27
|
== Default character map
|
|
42
28
|
|
|
@@ -261,8 +247,7 @@ Canon provides a comprehensive CJK-safe character mapping.
|
|
|
261
247
|
|
|
262
248
|
== CJK safety
|
|
263
249
|
|
|
264
|
-
The visualization characters are specifically chosen to avoid conflicts with
|
|
265
|
-
CJK text:
|
|
250
|
+
The visualization characters are specifically chosen to avoid conflicts with CJK text:
|
|
266
251
|
|
|
267
252
|
**Avoided characters**:
|
|
268
253
|
|
|
@@ -290,8 +275,7 @@ CJK text:
|
|
|
290
275
|
| 10+| <tag>░Value</tag> # Space added (green light shade)
|
|
291
276
|
----
|
|
292
277
|
|
|
293
|
-
The `░` symbol clearly shows a regular space was added between `<tag>` and
|
|
294
|
-
`Value`.
|
|
278
|
+
The `░` symbol clearly shows a regular space was added between `<tag>` and `Value`.
|
|
295
279
|
====
|
|
296
280
|
|
|
297
281
|
=== Tab vs spaces
|
|
@@ -305,8 +289,7 @@ The `░` symbol clearly shows a regular space was added between `<tag>` and
|
|
|
305
289
|
| 15+| <tag>░░Value</tag> # Two spaces (green light shades)
|
|
306
290
|
----
|
|
307
291
|
|
|
308
|
-
The difference between a tab (`⇥`) and two spaces (`░░`) is immediately
|
|
309
|
-
visible.
|
|
292
|
+
The difference between a tab (`⇥`) and two spaces (`░░`) is immediately visible.
|
|
310
293
|
====
|
|
311
294
|
|
|
312
295
|
=== Non-breaking space
|
|
@@ -330,8 +313,7 @@ With visualization:
|
|
|
330
313
|
| 4+| <foreword␣id="fwd"> # Non-breaking space (U+00A0)
|
|
331
314
|
----
|
|
332
315
|
|
|
333
|
-
The different symbols (`░` vs `␣`) clearly show that one uses a regular space
|
|
334
|
-
while the other uses a non-breaking space, likely from copying from a web page.
|
|
316
|
+
The different symbols (`░` vs `␣`) clearly show that one uses a regular space while the other uses a non-breaking space, likely from copying from a web page.
|
|
335
317
|
====
|
|
336
318
|
|
|
337
319
|
=== Zero-width space
|
|
@@ -358,26 +340,11 @@ The diff shows:
|
|
|
358
340
|
The rightwards arrow (`→`) reveals the presence of a zero-width space.
|
|
359
341
|
====
|
|
360
342
|
|
|
361
|
-
=== Mixed invisible characters
|
|
362
|
-
|
|
363
|
-
.Multiple whitespace types
|
|
364
|
-
[example]
|
|
365
|
-
====
|
|
366
|
-
[source]
|
|
367
|
-
----
|
|
368
|
-
30| -| <p>Text▬more</p> # Em space (red black rectangle)
|
|
369
|
-
| 30+| <p>Text░more</p> # Regular space (green light shade)
|
|
370
|
-
----
|
|
371
|
-
|
|
372
|
-
Different space types shown with different symbols.
|
|
373
|
-
====
|
|
374
|
-
|
|
375
343
|
== Real-world scenarios
|
|
376
344
|
|
|
377
345
|
=== Web copy-paste
|
|
378
346
|
|
|
379
|
-
**Problem**: Text copied from web pages often contains non-breaking spaces
|
|
380
|
-
(U+00A0) instead of regular spaces.
|
|
347
|
+
**Problem**: Text copied from web pages often contains non-breaking spaces (U+00A0) instead of regular spaces.
|
|
381
348
|
|
|
382
349
|
.Detection example
|
|
383
350
|
[example]
|
|
@@ -393,8 +360,7 @@ The `␣` symbol immediately identifies the non-breaking space.
|
|
|
393
360
|
|
|
394
361
|
=== Smart quotes
|
|
395
362
|
|
|
396
|
-
**Problem**: Text editors may automatically convert straight quotes to curly
|
|
397
|
-
quotes.
|
|
363
|
+
**Problem**: Text editors may automatically convert straight quotes to curly quotes.
|
|
398
364
|
|
|
399
365
|
.Detection example
|
|
400
366
|
[example]
|
|
@@ -446,9 +412,6 @@ formatter = Canon::DiffFormatter.new(
|
|
|
446
412
|
use_color: true,
|
|
447
413
|
visualization_map: custom_map
|
|
448
414
|
)
|
|
449
|
-
|
|
450
|
-
# The custom map merges with defaults, so unspecified
|
|
451
|
-
# characters still use the default visualization
|
|
452
415
|
----
|
|
453
416
|
|
|
454
417
|
=== When to customize
|
|
@@ -468,8 +431,7 @@ formatter = Canon::DiffFormatter.new(
|
|
|
468
431
|
|
|
469
432
|
== Configuration
|
|
470
433
|
|
|
471
|
-
Character visualization is automatically enabled when `use_color: true` and
|
|
472
|
-
applies across all Canon interfaces.
|
|
434
|
+
Character visualization is automatically enabled when `use_color: true` and applies across all Canon interfaces.
|
|
473
435
|
|
|
474
436
|
=== Enabling/disabling
|
|
475
437
|
|
|
@@ -511,10 +473,10 @@ Canon::Comparison.equivalent?(doc1, doc2,
|
|
|
511
473
|
[source,bash]
|
|
512
474
|
----
|
|
513
475
|
# Enable (default)
|
|
514
|
-
|
|
476
|
+
canon diff file1.xml file2.xml --verbose
|
|
515
477
|
|
|
516
478
|
# Disable
|
|
517
|
-
|
|
479
|
+
canon diff file1.xml file2.xml --no-color --verbose
|
|
518
480
|
----
|
|
519
481
|
====
|
|
520
482
|
|
|
@@ -556,12 +518,11 @@ end
|
|
|
556
518
|
|
|
557
519
|
**Problem**: Visualization conflicts with CJK text.
|
|
558
520
|
|
|
559
|
-
**Solution**: Canon's defaults are CJK-safe. If using custom map, avoid the
|
|
560
|
-
characters listed in "CJK safety" section.
|
|
521
|
+
**Solution**: Canon's defaults are CJK-safe. If using custom map, avoid the characters listed in "CJK safety" section.
|
|
561
522
|
|
|
562
523
|
== See also
|
|
563
524
|
|
|
564
|
-
* link:
|
|
565
|
-
* link:
|
|
566
|
-
* link
|
|
567
|
-
* link
|
|
525
|
+
* link:index.adoc[Diff Formatting] - Overview of formatting options
|
|
526
|
+
* link:colors-and-symbols.adoc[Colors and Symbols] - Color scheme details
|
|
527
|
+
* link:../../interfaces/cli/index.adoc[CLI Interface] - Command-line usage
|
|
528
|
+
* link:../../interfaces/ruby-api/index.adoc[Ruby API] - Programmatic usage
|