canon 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/.rubocop_todo.yml +276 -7
- data/README.adoc +203 -138
- data/_config.yml +116 -0
- data/docs/ADVANCED_TOPICS.adoc +20 -0
- data/docs/BASIC_USAGE.adoc +16 -0
- data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/docs/CLI.adoc +493 -0
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/docs/DIFF_FORMATTING.adoc +540 -0
- data/docs/FORMATS.adoc +447 -0
- data/docs/INDEX.adoc +222 -0
- data/docs/INPUT_VALIDATION.adoc +477 -0
- data/docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/docs/MATCH_OPTIONS.adoc +719 -0
- data/docs/MODES.adoc +432 -0
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/docs/OPTIONS.adoc +1387 -0
- data/docs/PREPROCESSING.adoc +491 -0
- data/docs/RSPEC.adoc +605 -0
- data/docs/RUBY_API.adoc +478 -0
- data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
- data/docs/UNDERSTANDING_CANON.adoc +17 -0
- data/docs/VERBOSE.adoc +482 -0
- data/exe/canon +7 -0
- data/lib/canon/cli.rb +179 -0
- data/lib/canon/commands/diff_command.rb +195 -0
- data/lib/canon/commands/format_command.rb +113 -0
- data/lib/canon/comparison/base_comparator.rb +39 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +410 -0
- data/lib/canon/comparison/json_comparator.rb +212 -0
- data/lib/canon/comparison/match_options.rb +616 -0
- data/lib/canon/comparison/xml_comparator.rb +566 -0
- data/lib/canon/comparison/yaml_comparator.rb +93 -0
- data/lib/canon/comparison.rb +239 -0
- data/lib/canon/config.rb +172 -0
- data/lib/canon/diff/diff_block.rb +71 -0
- data/lib/canon/diff/diff_block_builder.rb +105 -0
- data/lib/canon/diff/diff_classifier.rb +46 -0
- data/lib/canon/diff/diff_context.rb +85 -0
- data/lib/canon/diff/diff_context_builder.rb +107 -0
- data/lib/canon/diff/diff_line.rb +77 -0
- data/lib/canon/diff/diff_node.rb +56 -0
- data/lib/canon/diff/diff_node_mapper.rb +148 -0
- data/lib/canon/diff/diff_report.rb +133 -0
- data/lib/canon/diff/diff_report_builder.rb +62 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
- data/lib/canon/diff_formatter/character_map.yml +197 -0
- data/lib/canon/diff_formatter/debug_output.rb +431 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
- data/lib/canon/diff_formatter/legend.rb +141 -0
- data/lib/canon/diff_formatter.rb +520 -0
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html4_formatter.rb +17 -0
- data/lib/canon/formatters/html5_formatter.rb +17 -0
- data/lib/canon/formatters/html_formatter.rb +37 -0
- data/lib/canon/formatters/html_formatter_base.rb +163 -0
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/xml_formatter.rb +20 -55
- data/lib/canon/formatters/yaml_formatter.rb +4 -1
- data/lib/canon/pretty_printer/html.rb +57 -0
- data/lib/canon/pretty_printer/json.rb +25 -0
- data/lib/canon/pretty_printer/xml.rb +29 -0
- data/lib/canon/rspec_matchers.rb +222 -80
- data/lib/canon/validators/base_validator.rb +49 -0
- data/lib/canon/validators/html_validator.rb +138 -0
- data/lib/canon/validators/json_validator.rb +89 -0
- data/lib/canon/validators/xml_validator.rb +53 -0
- data/lib/canon/validators/yaml_validator.rb +73 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/attribute_handler.rb +80 -0
- data/lib/canon/xml/c14n.rb +36 -0
- data/lib/canon/xml/character_encoder.rb +38 -0
- data/lib/canon/xml/data_model.rb +225 -0
- data/lib/canon/xml/element_matcher.rb +196 -0
- data/lib/canon/xml/line_range_mapper.rb +158 -0
- data/lib/canon/xml/namespace_handler.rb +86 -0
- data/lib/canon/xml/node.rb +32 -0
- data/lib/canon/xml/nodes/attribute_node.rb +54 -0
- data/lib/canon/xml/nodes/comment_node.rb +23 -0
- data/lib/canon/xml/nodes/element_node.rb +56 -0
- data/lib/canon/xml/nodes/namespace_node.rb +38 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
- data/lib/canon/xml/nodes/root_node.rb +16 -0
- data/lib/canon/xml/nodes/text_node.rb +23 -0
- data/lib/canon/xml/processor.rb +151 -0
- data/lib/canon/xml/whitespace_normalizer.rb +72 -0
- data/lib/canon/xml/xml_base_handler.rb +188 -0
- data/lib/canon.rb +14 -3
- metadata +116 -21
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: default
|
|
3
|
+
title: Character Visualization
|
|
4
|
+
nav_order: 34
|
|
5
|
+
parent: Customizing Behavior
|
|
6
|
+
---
|
|
7
|
+
= Canon character visualization
|
|
8
|
+
:toc:
|
|
9
|
+
:toclevels: 3
|
|
10
|
+
|
|
11
|
+
== Scope
|
|
12
|
+
|
|
13
|
+
This document describes Canon's whitespace and special character visualization
|
|
14
|
+
system, which makes invisible characters visible in diff output.
|
|
15
|
+
|
|
16
|
+
For diff formatting options, see link:DIFF_FORMATTING[Diff formatting].
|
|
17
|
+
|
|
18
|
+
== General
|
|
19
|
+
|
|
20
|
+
When comparing documents, invisible characters like spaces, tabs, and
|
|
21
|
+
zero-width characters can cause mysterious test failures. Canon's character
|
|
22
|
+
visualization makes these characters visible in diff output, helping you
|
|
23
|
+
quickly identify the exact difference.
|
|
24
|
+
|
|
25
|
+
Visualization is **CJK-safe**, using Unicode symbols that don't conflict with
|
|
26
|
+
Chinese, Japanese, or Korean text.
|
|
27
|
+
|
|
28
|
+
== When visualization is applied
|
|
29
|
+
|
|
30
|
+
Character visualization is applied **only to diff lines** (additions,
|
|
31
|
+
deletions, and changes), not to context lines (unchanged lines). This ensures:
|
|
32
|
+
|
|
33
|
+
* Context lines display content in original form
|
|
34
|
+
* Only actual changes show visualization
|
|
35
|
+
* Differences are easier to spot
|
|
36
|
+
|
|
37
|
+
Within changed lines showing token-level diffs, unchanged tokens are displayed
|
|
38
|
+
in the terminal's default color (not red/green) to distinguish them from
|
|
39
|
+
actual changes.
|
|
40
|
+
|
|
41
|
+
== Default character map
|
|
42
|
+
|
|
43
|
+
Canon provides a comprehensive CJK-safe character mapping.
|
|
44
|
+
|
|
45
|
+
=== Common whitespace
|
|
46
|
+
|
|
47
|
+
[cols="1,1,1,2"]
|
|
48
|
+
|===
|
|
49
|
+
|Character |Unicode |Symbol |Description
|
|
50
|
+
|
|
51
|
+
|Regular space
|
|
52
|
+
|U+0020
|
|
53
|
+
|`░`
|
|
54
|
+
|Light Shade (U+2591)
|
|
55
|
+
|
|
56
|
+
|Tab
|
|
57
|
+
|U+0009
|
|
58
|
+
|`⇥`
|
|
59
|
+
|Rightwards Arrow to Bar (U+21E5)
|
|
60
|
+
|
|
61
|
+
|Non-breaking space
|
|
62
|
+
|U+00A0
|
|
63
|
+
|`␣`
|
|
64
|
+
|Open Box (U+2423)
|
|
65
|
+
|===
|
|
66
|
+
|
|
67
|
+
=== Line endings
|
|
68
|
+
|
|
69
|
+
[cols="1,1,1,2"]
|
|
70
|
+
|===
|
|
71
|
+
|Character |Unicode |Symbol |Description
|
|
72
|
+
|
|
73
|
+
|Line feed (LF)
|
|
74
|
+
|U+000A
|
|
75
|
+
|`↵`
|
|
76
|
+
|Downwards Arrow with Corner Leftwards (U+21B5)
|
|
77
|
+
|
|
78
|
+
|Carriage return (CR)
|
|
79
|
+
|U+000D
|
|
80
|
+
|`⏎`
|
|
81
|
+
|Return Symbol (U+23CE)
|
|
82
|
+
|
|
83
|
+
|Windows line ending (CRLF)
|
|
84
|
+
|U+000D U+000A
|
|
85
|
+
|`↵`
|
|
86
|
+
|Downwards Arrow with Corner Leftwards (U+21B5)
|
|
87
|
+
|
|
88
|
+
|Next line (NEL)
|
|
89
|
+
|U+0085
|
|
90
|
+
|`⏎`
|
|
91
|
+
|Return Symbol (U+23CE)
|
|
92
|
+
|
|
93
|
+
|Line separator
|
|
94
|
+
|U+2028
|
|
95
|
+
|`⤓`
|
|
96
|
+
|Downwards Arrow to Bar (U+2913)
|
|
97
|
+
|
|
98
|
+
|Paragraph separator
|
|
99
|
+
|U+2029
|
|
100
|
+
|`⤓`
|
|
101
|
+
|Downwards Arrow to Bar (U+2913)
|
|
102
|
+
|===
|
|
103
|
+
|
|
104
|
+
=== Unicode spaces
|
|
105
|
+
|
|
106
|
+
[cols="1,1,1,2"]
|
|
107
|
+
|===
|
|
108
|
+
|Character |Unicode |Symbol |Description
|
|
109
|
+
|
|
110
|
+
|En space
|
|
111
|
+
|U+2002
|
|
112
|
+
|`▭`
|
|
113
|
+
|White Rectangle (U+25AD)
|
|
114
|
+
|
|
115
|
+
|Em space
|
|
116
|
+
|U+2003
|
|
117
|
+
|`▬`
|
|
118
|
+
|Black Rectangle (U+25AC)
|
|
119
|
+
|
|
120
|
+
|Four-per-em space
|
|
121
|
+
|U+2005
|
|
122
|
+
|`⏓`
|
|
123
|
+
|Metrical Short Over Long (U+23D3)
|
|
124
|
+
|
|
125
|
+
|Six-per-em space
|
|
126
|
+
|U+2006
|
|
127
|
+
|`⏕`
|
|
128
|
+
|Metrical Two Shorts Over Long (U+23D5)
|
|
129
|
+
|
|
130
|
+
|Thin space
|
|
131
|
+
|U+2009
|
|
132
|
+
|`▯`
|
|
133
|
+
|White Vertical Rectangle (U+25AF)
|
|
134
|
+
|
|
135
|
+
|Hair space
|
|
136
|
+
|U+200A
|
|
137
|
+
|`▮`
|
|
138
|
+
|Black Vertical Rectangle (U+25AE)
|
|
139
|
+
|
|
140
|
+
|Figure space
|
|
141
|
+
|U+2007
|
|
142
|
+
|`□`
|
|
143
|
+
|White Square (U+25A1)
|
|
144
|
+
|
|
145
|
+
|Narrow no-break space
|
|
146
|
+
|U+202F
|
|
147
|
+
|`▫`
|
|
148
|
+
|White Small Square (U+25AB)
|
|
149
|
+
|
|
150
|
+
|Medium mathematical space
|
|
151
|
+
|U+205F
|
|
152
|
+
|`▭`
|
|
153
|
+
|White Rectangle (U+25AD)
|
|
154
|
+
|
|
155
|
+
|Ideographic space
|
|
156
|
+
|U+3000
|
|
157
|
+
|`⎵`
|
|
158
|
+
|Bottom Square Bracket (U+23B5)
|
|
159
|
+
|
|
160
|
+
|Ideographic half space
|
|
161
|
+
|U+303F
|
|
162
|
+
|`⏑`
|
|
163
|
+
|Metrical Breve (U+23D1)
|
|
164
|
+
|===
|
|
165
|
+
|
|
166
|
+
=== Zero-width characters
|
|
167
|
+
|
|
168
|
+
[cols="1,1,1,2"]
|
|
169
|
+
|===
|
|
170
|
+
|Character |Unicode |Symbol |Description
|
|
171
|
+
|
|
172
|
+
|Zero-width space
|
|
173
|
+
|U+200B
|
|
174
|
+
|`→`
|
|
175
|
+
|Rightwards Arrow (U+2192)
|
|
176
|
+
|
|
177
|
+
|Zero-width non-joiner
|
|
178
|
+
|U+200C
|
|
179
|
+
|`↛`
|
|
180
|
+
|Rightwards Arrow with Stroke (U+219B)
|
|
181
|
+
|
|
182
|
+
|Zero-width joiner
|
|
183
|
+
|U+200D
|
|
184
|
+
|`⇢`
|
|
185
|
+
|Rightwards Dashed Arrow (U+21E2)
|
|
186
|
+
|
|
187
|
+
|Zero-width no-break space (BOM)
|
|
188
|
+
|U+FEFF
|
|
189
|
+
|`⇨`
|
|
190
|
+
|Rightwards White Arrow (U+21E8)
|
|
191
|
+
|===
|
|
192
|
+
|
|
193
|
+
=== Bidirectional/RTL markers
|
|
194
|
+
|
|
195
|
+
[cols="1,1,1,2"]
|
|
196
|
+
|===
|
|
197
|
+
|Character |Unicode |Symbol |Description
|
|
198
|
+
|
|
199
|
+
|Left-to-right mark
|
|
200
|
+
|U+200E
|
|
201
|
+
|`⟹`
|
|
202
|
+
|Long Rightwards Double Arrow (U+27F9)
|
|
203
|
+
|
|
204
|
+
|Right-to-left mark
|
|
205
|
+
|U+200F
|
|
206
|
+
|`⟸`
|
|
207
|
+
|Long Leftwards Double Arrow (U+27F8)
|
|
208
|
+
|
|
209
|
+
|LTR embedding
|
|
210
|
+
|U+202A
|
|
211
|
+
|`⇒`
|
|
212
|
+
|Rightwards Double Arrow (U+21D2)
|
|
213
|
+
|
|
214
|
+
|RTL embedding
|
|
215
|
+
|U+202B
|
|
216
|
+
|`⇐`
|
|
217
|
+
|Leftwards Double Arrow (U+21D0)
|
|
218
|
+
|
|
219
|
+
|Pop directional formatting
|
|
220
|
+
|U+202C
|
|
221
|
+
|`↔`
|
|
222
|
+
|Left Right Arrow (U+2194)
|
|
223
|
+
|
|
224
|
+
|LTR override
|
|
225
|
+
|U+202D
|
|
226
|
+
|`⇉`
|
|
227
|
+
|Rightwards Paired Arrows (U+21C9)
|
|
228
|
+
|
|
229
|
+
|RTL override
|
|
230
|
+
|U+202E
|
|
231
|
+
|`⇇`
|
|
232
|
+
|Leftwards Paired Arrows (U+21C7)
|
|
233
|
+
|===
|
|
234
|
+
|
|
235
|
+
=== Control characters
|
|
236
|
+
|
|
237
|
+
[cols="1,1,1,2"]
|
|
238
|
+
|===
|
|
239
|
+
|Character |Unicode |Symbol |Description
|
|
240
|
+
|
|
241
|
+
|Null
|
|
242
|
+
|U+0000
|
|
243
|
+
|`␀`
|
|
244
|
+
|Symbol for Null (U+2400)
|
|
245
|
+
|
|
246
|
+
|Soft hyphen
|
|
247
|
+
|U+00AD
|
|
248
|
+
|`‐`
|
|
249
|
+
|Hyphen (U+2010)
|
|
250
|
+
|
|
251
|
+
|Backspace
|
|
252
|
+
|U+0008
|
|
253
|
+
|`␈`
|
|
254
|
+
|Symbol for Backspace (U+2408)
|
|
255
|
+
|
|
256
|
+
|Delete
|
|
257
|
+
|U+007F
|
|
258
|
+
|`␡`
|
|
259
|
+
|Symbol for Delete (U+2421)
|
|
260
|
+
|===
|
|
261
|
+
|
|
262
|
+
== CJK safety
|
|
263
|
+
|
|
264
|
+
The visualization characters are specifically chosen to avoid conflicts with
|
|
265
|
+
CJK text:
|
|
266
|
+
|
|
267
|
+
**Avoided characters**:
|
|
268
|
+
|
|
269
|
+
* **No middle dots** (`·`) - commonly used as separators in CJK
|
|
270
|
+
* **No bullets** (`∙`) - used in CJK lists
|
|
271
|
+
* **No circles** (`◌◍◎`) - look similar to CJK characters like ○ ●
|
|
272
|
+
* **No small dots** (`⋅`) - conflict with CJK punctuation
|
|
273
|
+
|
|
274
|
+
**Used instead**:
|
|
275
|
+
|
|
276
|
+
* Box characters (`□▭▬▯▮▫`) for various space types
|
|
277
|
+
* Arrow symbols (`→↛⇢⇨⟹⟸⇒⇐`) for zero-width and directional characters
|
|
278
|
+
* Control Pictures block symbols (`␀␈␡`) for control characters
|
|
279
|
+
|
|
280
|
+
== Examples in use
|
|
281
|
+
|
|
282
|
+
=== Space added
|
|
283
|
+
|
|
284
|
+
.Regular space added
|
|
285
|
+
[example]
|
|
286
|
+
====
|
|
287
|
+
[source]
|
|
288
|
+
----
|
|
289
|
+
10| -| <tag>Value</tag> # No space
|
|
290
|
+
| 10+| <tag>░Value</tag> # Space added (green light shade)
|
|
291
|
+
----
|
|
292
|
+
|
|
293
|
+
The `░` symbol clearly shows a regular space was added between `<tag>` and
|
|
294
|
+
`Value`.
|
|
295
|
+
====
|
|
296
|
+
|
|
297
|
+
=== Tab vs spaces
|
|
298
|
+
|
|
299
|
+
.Tab replaced with spaces
|
|
300
|
+
[example]
|
|
301
|
+
====
|
|
302
|
+
[source]
|
|
303
|
+
----
|
|
304
|
+
15| -| <tag>⇥Value</tag> # Tab (red arrow-to-bar)
|
|
305
|
+
| 15+| <tag>░░Value</tag> # Two spaces (green light shades)
|
|
306
|
+
----
|
|
307
|
+
|
|
308
|
+
The difference between a tab (`⇥`) and two spaces (`░░`) is immediately
|
|
309
|
+
visible.
|
|
310
|
+
====
|
|
311
|
+
|
|
312
|
+
=== Non-breaking space
|
|
313
|
+
|
|
314
|
+
.Non-breaking space from web copy-paste
|
|
315
|
+
[example]
|
|
316
|
+
====
|
|
317
|
+
Without visualization, these look identical:
|
|
318
|
+
|
|
319
|
+
[source,xml]
|
|
320
|
+
----
|
|
321
|
+
<foreword id="fwd">
|
|
322
|
+
<foreword id="fwd">
|
|
323
|
+
----
|
|
324
|
+
|
|
325
|
+
With visualization:
|
|
326
|
+
|
|
327
|
+
[source]
|
|
328
|
+
----
|
|
329
|
+
4| -| <foreword░id="fwd"> # Regular space (U+0020)
|
|
330
|
+
| 4+| <foreword␣id="fwd"> # Non-breaking space (U+00A0)
|
|
331
|
+
----
|
|
332
|
+
|
|
333
|
+
The different symbols (`░` vs `␣`) clearly show that one uses a regular space
|
|
334
|
+
while the other uses a non-breaking space, likely from copying from a web page.
|
|
335
|
+
====
|
|
336
|
+
|
|
337
|
+
=== Zero-width space
|
|
338
|
+
|
|
339
|
+
.Zero-width space (completely invisible)
|
|
340
|
+
[example]
|
|
341
|
+
====
|
|
342
|
+
Zero-width characters are invisible but affect comparison:
|
|
343
|
+
|
|
344
|
+
[source,xml]
|
|
345
|
+
----
|
|
346
|
+
<item>Widget</item>
|
|
347
|
+
<item>Widget</item> <!-- Contains U+200B zero-width space after "Widget" -->
|
|
348
|
+
----
|
|
349
|
+
|
|
350
|
+
The diff shows:
|
|
351
|
+
|
|
352
|
+
[source]
|
|
353
|
+
----
|
|
354
|
+
5| -| <item>Widget</item>
|
|
355
|
+
| 5+| <item>Widget→</item> # Zero-width space visualized as →
|
|
356
|
+
----
|
|
357
|
+
|
|
358
|
+
The rightwards arrow (`→`) reveals the presence of a zero-width space.
|
|
359
|
+
====
|
|
360
|
+
|
|
361
|
+
=== Mixed invisible characters
|
|
362
|
+
|
|
363
|
+
.Multiple whitespace types
|
|
364
|
+
[example]
|
|
365
|
+
====
|
|
366
|
+
[source]
|
|
367
|
+
----
|
|
368
|
+
30| -| <p>Text▬more</p> # Em space (red black rectangle)
|
|
369
|
+
| 30+| <p>Text░more</p> # Regular space (green light shade)
|
|
370
|
+
----
|
|
371
|
+
|
|
372
|
+
Different space types shown with different symbols.
|
|
373
|
+
====
|
|
374
|
+
|
|
375
|
+
== Real-world scenarios
|
|
376
|
+
|
|
377
|
+
=== Web copy-paste
|
|
378
|
+
|
|
379
|
+
**Problem**: Text copied from web pages often contains non-breaking spaces
|
|
380
|
+
(U+00A0) instead of regular spaces.
|
|
381
|
+
|
|
382
|
+
.Detection example
|
|
383
|
+
[example]
|
|
384
|
+
====
|
|
385
|
+
[source]
|
|
386
|
+
----
|
|
387
|
+
4| -| <p>Hello░world</p> # U+0020 (regular space)
|
|
388
|
+
| 4+| <p>Hello␣world</p> # U+00A0 (non-breaking space)
|
|
389
|
+
----
|
|
390
|
+
|
|
391
|
+
The `␣` symbol immediately identifies the non-breaking space.
|
|
392
|
+
====
|
|
393
|
+
|
|
394
|
+
=== Smart quotes
|
|
395
|
+
|
|
396
|
+
**Problem**: Text editors may automatically convert straight quotes to curly
|
|
397
|
+
quotes.
|
|
398
|
+
|
|
399
|
+
.Detection example
|
|
400
|
+
[example]
|
|
401
|
+
====
|
|
402
|
+
[source]
|
|
403
|
+
----
|
|
404
|
+
10| -| <title>John's Book</title> # Straight apostrophe
|
|
405
|
+
| 10+| <title>John's Book</title> # Curly apostrophe (U+2019)
|
|
406
|
+
----
|
|
407
|
+
|
|
408
|
+
Non-ASCII warning will alert you to the smart quote.
|
|
409
|
+
====
|
|
410
|
+
|
|
411
|
+
=== Template generation
|
|
412
|
+
|
|
413
|
+
**Problem**: Generated output has invisible character differences.
|
|
414
|
+
|
|
415
|
+
.Detection example
|
|
416
|
+
[example]
|
|
417
|
+
====
|
|
418
|
+
[source]
|
|
419
|
+
----
|
|
420
|
+
20| -| <item>Value→</item> # Zero-width space present
|
|
421
|
+
| 20+| <item>Value</item> # No zero-width space
|
|
422
|
+
----
|
|
423
|
+
|
|
424
|
+
The `→` symbol reveals the zero-width space in generated content.
|
|
425
|
+
====
|
|
426
|
+
|
|
427
|
+
== Customizing character visualization
|
|
428
|
+
|
|
429
|
+
You can customize the visualization map for specific needs.
|
|
430
|
+
|
|
431
|
+
=== Custom map
|
|
432
|
+
|
|
433
|
+
[source,ruby]
|
|
434
|
+
----
|
|
435
|
+
require 'canon/diff_formatter'
|
|
436
|
+
|
|
437
|
+
# Create custom visualization map
|
|
438
|
+
custom_map = Canon::DiffFormatter.merge_visualization_map({
|
|
439
|
+
' ' => '·', # Use middle dot for spaces (if not using CJK)
|
|
440
|
+
"\t" => '→', # Use simple arrow for tabs
|
|
441
|
+
"\u200B" => '⚠' # Warning symbol for zero-width space
|
|
442
|
+
})
|
|
443
|
+
|
|
444
|
+
# Use custom map with formatter
|
|
445
|
+
formatter = Canon::DiffFormatter.new(
|
|
446
|
+
use_color: true,
|
|
447
|
+
visualization_map: custom_map
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
# The custom map merges with defaults, so unspecified
|
|
451
|
+
# characters still use the default visualization
|
|
452
|
+
----
|
|
453
|
+
|
|
454
|
+
=== When to customize
|
|
455
|
+
|
|
456
|
+
**Use custom visualization when**:
|
|
457
|
+
|
|
458
|
+
* Working with non-CJK text exclusively
|
|
459
|
+
* Prefer simpler symbols
|
|
460
|
+
* Need specific character highlighting
|
|
461
|
+
* Integrating with existing tools
|
|
462
|
+
|
|
463
|
+
**Keep defaults when**:
|
|
464
|
+
|
|
465
|
+
* Working with CJK text
|
|
466
|
+
* Maximum compatibility needed
|
|
467
|
+
* Standard behavior preferred
|
|
468
|
+
|
|
469
|
+
== Configuration
|
|
470
|
+
|
|
471
|
+
Character visualization is automatically enabled when `use_color: true` and
|
|
472
|
+
applies across all Canon interfaces.
|
|
473
|
+
|
|
474
|
+
=== Enabling/disabling
|
|
475
|
+
|
|
476
|
+
Visualization is tied to color output:
|
|
477
|
+
|
|
478
|
+
[source,ruby]
|
|
479
|
+
----
|
|
480
|
+
# Enable (visualization active)
|
|
481
|
+
diff: { use_color: true }
|
|
482
|
+
|
|
483
|
+
# Disable (no visualization)
|
|
484
|
+
diff: { use_color: false }
|
|
485
|
+
----
|
|
486
|
+
|
|
487
|
+
=== Interface configuration
|
|
488
|
+
|
|
489
|
+
.Ruby API
|
|
490
|
+
[example]
|
|
491
|
+
====
|
|
492
|
+
[source,ruby]
|
|
493
|
+
----
|
|
494
|
+
# Visualization enabled by default
|
|
495
|
+
Canon::Comparison.equivalent?(doc1, doc2,
|
|
496
|
+
verbose: true,
|
|
497
|
+
diff: { use_color: true } # Visualization active
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# Disable for plain text
|
|
501
|
+
Canon::Comparison.equivalent?(doc1, doc2,
|
|
502
|
+
verbose: true,
|
|
503
|
+
diff: { use_color: false } # No visualization
|
|
504
|
+
)
|
|
505
|
+
----
|
|
506
|
+
====
|
|
507
|
+
|
|
508
|
+
.CLI
|
|
509
|
+
[example]
|
|
510
|
+
====
|
|
511
|
+
[source,bash]
|
|
512
|
+
----
|
|
513
|
+
# Enable (default)
|
|
514
|
+
$ canon diff file1.xml file2.xml --verbose
|
|
515
|
+
|
|
516
|
+
# Disable
|
|
517
|
+
$ canon diff file1.xml file2.xml --no-color --verbose
|
|
518
|
+
----
|
|
519
|
+
====
|
|
520
|
+
|
|
521
|
+
.RSpec
|
|
522
|
+
[example]
|
|
523
|
+
====
|
|
524
|
+
[source,ruby]
|
|
525
|
+
----
|
|
526
|
+
Canon::RSpecMatchers.configure do |config|
|
|
527
|
+
# Enable for local development
|
|
528
|
+
config.xml.diff.use_color = !ENV['CI']
|
|
529
|
+
end
|
|
530
|
+
----
|
|
531
|
+
====
|
|
532
|
+
|
|
533
|
+
== Troubleshooting
|
|
534
|
+
|
|
535
|
+
=== Visualization not showing
|
|
536
|
+
|
|
537
|
+
**Problem**: Invisible characters not visualized.
|
|
538
|
+
|
|
539
|
+
**Solutions**:
|
|
540
|
+
|
|
541
|
+
* Ensure `use_color: true`
|
|
542
|
+
* Check terminal supports Unicode
|
|
543
|
+
* Verify the characters are in diff lines (not context lines)
|
|
544
|
+
|
|
545
|
+
=== Wrong symbols displayed
|
|
546
|
+
|
|
547
|
+
**Problem**: Symbols appear garbled or as boxes.
|
|
548
|
+
|
|
549
|
+
**Solutions**:
|
|
550
|
+
|
|
551
|
+
* Use terminal with Unicode support
|
|
552
|
+
* Install Unicode-compatible font
|
|
553
|
+
* Check terminal encoding (should be UTF-8)
|
|
554
|
+
|
|
555
|
+
=== CJK text affected
|
|
556
|
+
|
|
557
|
+
**Problem**: Visualization conflicts with CJK text.
|
|
558
|
+
|
|
559
|
+
**Solution**: Canon's defaults are CJK-safe. If using custom map, avoid the
|
|
560
|
+
characters listed in "CJK safety" section.
|
|
561
|
+
|
|
562
|
+
== See also
|
|
563
|
+
|
|
564
|
+
* link:DIFF_FORMATTING[Diff formatting]
|
|
565
|
+
* link:MODES[Diff modes]
|
|
566
|
+
* link:MATCH_ARCHITECTURE[Match architecture]
|
|
567
|
+
* link:RUBY_API[Ruby API documentation]
|