canon 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +112 -25
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +48 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +3 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
|
@@ -1,261 +0,0 @@
|
|
|
1
|
-
== Diff renderer
|
|
2
|
-
|
|
3
|
-
=== General
|
|
4
|
-
|
|
5
|
-
A diff renderer is responsible for taking a diff report and producing a
|
|
6
|
-
human-readable representation of the differences between two text files.
|
|
7
|
-
|
|
8
|
-
Canon provides a built-in diff renderer that produces a colored
|
|
9
|
-
diff output, highlighting the differences between the two files,
|
|
10
|
-
in either a by-object or by-line format.
|
|
11
|
-
|
|
12
|
-
=== Concepts
|
|
13
|
-
|
|
14
|
-
==== Diff report
|
|
15
|
-
|
|
16
|
-
A diff report is a structured representation of the complete differences between
|
|
17
|
-
two text files. It is composed of multiple diff contexts.
|
|
18
|
-
|
|
19
|
-
A diff report serves as the input to a diff renderer, which processes the report
|
|
20
|
-
to generate a human-readable diff output.
|
|
21
|
-
|
|
22
|
-
A diff report is generated by a comparison engine that analyzes the two text
|
|
23
|
-
files and identifies the differences between them.
|
|
24
|
-
|
|
25
|
-
==== Diff context
|
|
26
|
-
|
|
27
|
-
A diff context is a representation of a group of diff blocks with surrounding
|
|
28
|
-
grouping lines.
|
|
29
|
-
|
|
30
|
-
When the amount of grouping lines is set to 0, each diff block is treated as its
|
|
31
|
-
own context.
|
|
32
|
-
|
|
33
|
-
When the amount of grouping lines is greater than 0, multiple diff blocks that are close to each other can be grouped together into a single context.
|
|
34
|
-
|
|
35
|
-
==== Diff block
|
|
36
|
-
|
|
37
|
-
A diff block is a representation of a contiguous block of changes.
|
|
38
|
-
|
|
39
|
-
In typical line-based diffing, a diff block consists of a run of consecutive lines that have been added, removed, or modified.
|
|
40
|
-
|
|
41
|
-
In Canon, which uses semantic diffing, a diff block is a representation of a
|
|
42
|
-
contiguous block of changes, which may be a many-to-many mapping of changes
|
|
43
|
-
lines depending on the nature of the change.
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
=== Parameters
|
|
47
|
-
|
|
48
|
-
Canon diff renderers support the following parameters:
|
|
49
|
-
|
|
50
|
-
==== Parameters
|
|
51
|
-
|
|
52
|
-
The following table shows all available diff formatting parameters and their
|
|
53
|
-
availability across interfaces:
|
|
54
|
-
|
|
55
|
-
[cols="1,1,1,1,2,1"]
|
|
56
|
-
|===
|
|
57
|
-
|Parameter |RSpec |CLI |Ruby API |Description |Default
|
|
58
|
-
|
|
59
|
-
|`use_color`
|
|
60
|
-
|✓
|
|
61
|
-
|✓
|
|
62
|
-
|✓
|
|
63
|
-
|Enable/disable colored output
|
|
64
|
-
|`true`
|
|
65
|
-
|
|
66
|
-
|`diff_mode`
|
|
67
|
-
|✓
|
|
68
|
-
|✓
|
|
69
|
-
|✓
|
|
70
|
-
|Comparison mode: `:by_object` or `:by_line`
|
|
71
|
-
|`:by_line` (RSpec), `:by_object` (XML/JSON/YAML)
|
|
72
|
-
|
|
73
|
-
|`context_lines`
|
|
74
|
-
|✓
|
|
75
|
-
|✓
|
|
76
|
-
|✓
|
|
77
|
-
|Number of unchanged lines to show around each change
|
|
78
|
-
|`3`
|
|
79
|
-
|
|
80
|
-
|`diff_grouping_lines`
|
|
81
|
-
|✓
|
|
82
|
-
|✓
|
|
83
|
-
|✓
|
|
84
|
-
|Maximum line distance to group separate diffs into context blocks
|
|
85
|
-
|`10`
|
|
86
|
-
|===
|
|
87
|
-
|
|
88
|
-
==== Use color
|
|
89
|
-
|
|
90
|
-
`use_color: <boolean>` default: `true`
|
|
91
|
-
|
|
92
|
-
Specifies whether to produce colored diff output using ANSI color codes.
|
|
93
|
-
|
|
94
|
-
When `use_color` is `true`, the diff output includes ANSI color codes to
|
|
95
|
-
enhance readability by visually distinguishing different types of changes.
|
|
96
|
-
|
|
97
|
-
* Type: Boolean
|
|
98
|
-
* Default: `true`
|
|
99
|
-
* Colors used:
|
|
100
|
-
** Red: Deletions/removed content
|
|
101
|
-
** Green: Additions/inserted content
|
|
102
|
-
** Yellow: Modified content
|
|
103
|
-
** Cyan: Element names and structure
|
|
104
|
-
|
|
105
|
-
When `use_color` is `false`:
|
|
106
|
-
|
|
107
|
-
* Line numbers and pipes are plain text
|
|
108
|
-
* Whitespace is not visualized (remains invisible)
|
|
109
|
-
* Unicode legend is still shown (but without color)
|
|
110
|
-
* Content changes are shown without color highlighting
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
**Purpose**: Improve readability by distinguishing structural elements from
|
|
114
|
-
content changes.
|
|
115
|
-
|
|
116
|
-
When color mode is enabled (`use_color: true`), the diff formatter uses a
|
|
117
|
-
consistent color scheme:
|
|
118
|
-
|
|
119
|
-
* **Yellow**: Line numbers and pipe separators
|
|
120
|
-
* **Red**: Deletion markers (`-`) and removed content
|
|
121
|
-
* **Green**: Addition markers (`+`) and inserted content
|
|
122
|
-
* **Default terminal color**: Unchanged context lines (no ANSI codes applied)
|
|
123
|
-
|
|
124
|
-
This color scheme helps differentiate between:
|
|
125
|
-
|
|
126
|
-
* The diff structure (line numbers, pipes)
|
|
127
|
-
* Content that was removed (red)
|
|
128
|
-
* Content that was added (green)
|
|
129
|
-
* Content that stayed the same (your terminal's default color)
|
|
130
|
-
|
|
131
|
-
.Example colored diff output
|
|
132
|
-
[example]
|
|
133
|
-
In a colored terminal, a typical diff line appears as:
|
|
134
|
-
|
|
135
|
-
[source]
|
|
136
|
-
----
|
|
137
|
-
5| 5 | <p>First paragraph</p> # Context line (yellow numbers/pipes, default text)
|
|
138
|
-
6| -| <old>Text</old> # Deletion (yellow numbers/pipes, red marker/content)
|
|
139
|
-
| 6+| <new>Text</new> # Addition (yellow numbers/pipes, green marker/content)
|
|
140
|
-
----
|
|
141
|
-
|
|
142
|
-
Where:
|
|
143
|
-
|
|
144
|
-
* Line numbers (`5`, `6`) are in yellow
|
|
145
|
-
* Pipe separators (`|`) are in yellow
|
|
146
|
-
* Markers (`-`, `+`) are in red/green respectively
|
|
147
|
-
* Changed content is highlighted in red (deletions) or green (additions)
|
|
148
|
-
* Unchanged content uses your terminal's default color (no forced white/black)
|
|
149
|
-
|
|
150
|
-
**Why this matters**: When running tests with RSpec, the framework initially sets
|
|
151
|
-
output to red. Canon's diff formatter explicitly resets colors to prevent RSpec's
|
|
152
|
-
red from bleeding into the diff output, ensuring consistent and readable diffs.
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
==== Diff mode
|
|
156
|
-
|
|
157
|
-
`diff_mode: <string>` default: `by_line`
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
==== Context lines
|
|
161
|
-
|
|
162
|
-
`context_lines: <number>` default: `3`
|
|
163
|
-
|
|
164
|
-
Specifies the number of context lines before and after the diff block to show.
|
|
165
|
-
|
|
166
|
-
Usage:
|
|
167
|
-
|
|
168
|
-
[source,ruby]
|
|
169
|
-
----
|
|
170
|
-
renderer = Canon::DiffFormatter::Renderer.new(context_lines: 5)
|
|
171
|
-
diff_output = renderer.render(diff_report)
|
|
172
|
-
----
|
|
173
|
-
|
|
174
|
-
.Example of XML line-by-line diff with context lines set to 3
|
|
175
|
-
[example]
|
|
176
|
-
====
|
|
177
|
-
There are 3 context lines before and after the diff block:
|
|
178
|
-
|
|
179
|
-
[source]
|
|
180
|
-
----
|
|
181
|
-
Line-by-line diff (XML mode):
|
|
182
|
-
Character Visualization Legend:
|
|
183
|
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
184
|
-
Whitespace:
|
|
185
|
-
'␣': U+00A0 (' ') NO-Break-Space
|
|
186
|
-
|
|
187
|
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
188
|
-
|
|
189
|
-
29| 29 | <eref bibitemid="_" citeas="ISO 639-2" id="_" type="inline">
|
|
190
|
-
30| 30 | </eref>
|
|
191
|
-
31| 31 | <semx element="eref" source="_">
|
|
192
|
-
32| - | <fmt-eref░bibitemid="_"░citeas="ISO░639-2"░type="inline">ISO\u00a0639-2</fmt-eref>
|
|
193
|
-
| 32+ | <fmt-eref░bibitemid="_"░citeas="ISO░639-2"░type="inline">ISO␣639-2</fmt-eref>
|
|
194
|
-
33| 33 | </semx>
|
|
195
|
-
34| 34 | </p>
|
|
196
|
-
35| 35 | </clause>
|
|
197
|
-
----
|
|
198
|
-
====
|
|
199
|
-
|
|
200
|
-
==== Grouping lines
|
|
201
|
-
|
|
202
|
-
`diff_grouping_lines: <number>` default: `0`
|
|
203
|
-
|
|
204
|
-
Specifies the number of grouping lines to coalesce nearby diff blocks into a
|
|
205
|
-
single context.
|
|
206
|
-
|
|
207
|
-
The algorithm groups diff blocks as long as the distance between the previous
|
|
208
|
-
block's end and the next block's start is less than or equal to the grouping
|
|
209
|
-
lines setting.
|
|
210
|
-
|
|
211
|
-
The default value of `0` means that each diff block is treated as its own
|
|
212
|
-
context.
|
|
213
|
-
|
|
214
|
-
When set to `5`, for example, any two diff blocks that are within 5 lines of
|
|
215
|
-
each other will be grouped together into a single context.
|
|
216
|
-
|
|
217
|
-
.Example of XML line-by-line diff with grouping lines set to 10
|
|
218
|
-
[example]
|
|
219
|
-
====
|
|
220
|
-
Here, multiple diff blocks are grouped together into a single context because
|
|
221
|
-
they are within 10 lines of each other.
|
|
222
|
-
|
|
223
|
-
[source]
|
|
224
|
-
----
|
|
225
|
-
Line-by-line diff (XML mode):
|
|
226
|
-
Character Visualization Legend:
|
|
227
|
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
228
|
-
Whitespace:
|
|
229
|
-
'␣': U+00A0 (' ') NO-Break-Space
|
|
230
|
-
|
|
231
|
-
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
232
|
-
|
|
233
|
-
22| 22 | <span class="fmt-element-name">Figure</span>
|
|
234
|
-
23| 23 | <semx element="autonum" source="B1">1</semx>
|
|
235
|
-
24| 24 | </span>
|
|
236
|
-
25| - | <span░class="fmt-caption-delim">\u00a0—░</span>
|
|
237
|
-
| 25+ | <span░class="fmt-caption-delim">␣—░</span>
|
|
238
|
-
26| 26 | <semx element="name" source="_">First</semx>
|
|
239
|
-
27| 27 | </fmt-name>
|
|
240
|
-
28| 28 | <fmt-xref-label>
|
|
241
|
-
|
|
242
|
-
57| 57 | <span class="fmt-element-name">Figure</span>
|
|
243
|
-
58| 58 | <semx element="autonum" source="B2">2</semx>
|
|
244
|
-
59| 59 | </span>
|
|
245
|
-
60| - | <span░class="fmt-caption-delim">\u00a0—░</span>
|
|
246
|
-
| 60+ | <span░class="fmt-caption-delim">␣—░</span>
|
|
247
|
-
61| 61 | <semx element="name" source="_">Second</semx>
|
|
248
|
-
62| 62 | </fmt-name>
|
|
249
|
-
63| 63 | <fmt-xref-label>
|
|
250
|
-
|
|
251
|
-
100| 100 | <span class="fmt-element-name">Figure</span>
|
|
252
|
-
101| 101 | <semx element="autonum" source="B3">3</semx>
|
|
253
|
-
102| 102 | </span>
|
|
254
|
-
103| - | <span░class="fmt-caption-delim">\u00a0—░</span>
|
|
255
|
-
| 103+ | <span░class="fmt-caption-delim">␣—░</span>
|
|
256
|
-
104| 104 | <semx element="name" source="_">Third</semx>
|
|
257
|
-
105| 105 | </fmt-name>
|
|
258
|
-
106| 106 | <fmt-xref-label>
|
|
259
|
-
----
|
|
260
|
-
====
|
|
261
|
-
|