canon 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +31 -149
- data/README.adoc +9 -0
- data/docs/advanced/semantic-diff-report.adoc +96 -0
- data/docs/features/configuration-profiles.adoc +4 -2
- data/docs/features/diff-formatting/index.adoc +3 -0
- data/docs/features/diff-formatting/whitespace-adjacency.adoc +140 -0
- data/docs/features/match-options/html-policies.adoc +2 -0
- data/docs/features/match-options/index.adoc +40 -0
- data/docs/guides/choosing-configuration.adoc +12 -1
- data/docs/reference/cli-options.adoc +3 -0
- data/docs/reference/environment-variables.adoc +3 -1
- data/docs/reference/options-across-interfaces.adoc +7 -1
- data/docs/understanding/formats/html.adoc +9 -2
- data/lib/canon/cli.rb +4 -0
- data/lib/canon/commands/diff_command.rb +1 -0
- data/lib/canon/comparison/comparison_result.rb +95 -2
- data/lib/canon/comparison/html_comparator.rb +96 -11
- data/lib/canon/comparison/markup_comparator.rb +68 -71
- data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
- data/lib/canon/comparison/match_options.rb +23 -2
- data/lib/canon/comparison/node_inspector.rb +103 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +133 -55
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +24 -23
- data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
- data/lib/canon/comparison/xml_comparator.rb +174 -7
- data/lib/canon/comparison/xml_node_comparison.rb +48 -66
- data/lib/canon/comparison.rb +143 -22
- data/lib/canon/config/env_schema.rb +2 -1
- data/lib/canon/config/profiles/metanorma.yml +3 -0
- data/lib/canon/config.rb +51 -5
- data/lib/canon/diff/diff_classifier.rb +55 -41
- data/lib/canon/diff/diff_line_builder.rb +9 -8
- data/lib/canon/diff/xml_serialization_formatter.rb +27 -42
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
- data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
- data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +184 -26
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +92 -4
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
- data/lib/canon/diff_formatter.rb +128 -175
- data/lib/canon/html/data_model.rb +10 -4
- data/lib/canon/pretty_printer/html.rb +76 -14
- data/lib/canon/pretty_printer/html_void_elements.rb +20 -0
- data/lib/canon/pretty_printer/xml_normalized.rb +10 -3
- data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +59 -5
- data/lib/canon/xml/data_model.rb +13 -1
- data/lib/canon/xml/element_matcher.rb +3 -0
- data/lib/canon/xml/node.rb +23 -1
- data/lib/canon/xml/nodes/comment_node.rb +4 -0
- data/lib/canon/xml/nodes/element_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/canon/xml/sax_builder.rb +29 -2
- data/lib/canon/xml/xpath_engine.rb +238 -0
- metadata +9 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 615e3154c89a9850e86c39852201e5573b461ac62d52cc423523e444ace301f7
|
|
4
|
+
data.tar.gz: 37ee00969f0682dde670168fbd7888294edda612220bfbebb7c950efbcb76aa2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bce4239ab6a471edd896fd3b54def4e57e21714078cb3631b55363b50646349a6923eed1e208e5706c3319d3e7a2ae75f2db698ffe853c0e03a754d76c856679
|
|
7
|
+
data.tar.gz: 1441bd5412658d9d2b975e3889fc95bfd080dec2b89b731f71e191f5ca7bbc7e0a8aa63e787916781bd5e653732c16d5c03b0d3fc3b967a3b653a2a735e62636
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-04-
|
|
3
|
+
# on 2026-04-27 09:48:55 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,125 +11,38 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'canon.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count:
|
|
14
|
+
# Offense count: 30
|
|
15
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
16
16
|
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
17
|
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
18
|
Layout/ArgumentAlignment:
|
|
19
19
|
Exclude:
|
|
20
|
-
- 'lib/canon/comparison/whitespace_sensitivity.rb'
|
|
21
20
|
- 'lib/canon/comparison/xml_comparator.rb'
|
|
22
|
-
- '
|
|
23
|
-
- 'lib/canon/config.rb'
|
|
24
|
-
- 'lib/canon/diff/diff_classifier.rb'
|
|
25
|
-
- 'lib/canon/diff_formatter.rb'
|
|
26
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
|
|
27
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
28
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
29
|
-
- 'spec/canon/config/profile_spec.rb'
|
|
30
|
-
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
31
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
32
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
33
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
21
|
+
- 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
|
|
34
22
|
|
|
35
|
-
# Offense count:
|
|
36
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
37
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
38
|
-
# SupportedStyles: with_first_element, with_fixed_indentation
|
|
39
|
-
Layout/ArrayAlignment:
|
|
40
|
-
Exclude:
|
|
41
|
-
- 'lib/canon/comparison/match_options/base_resolver.rb'
|
|
42
|
-
- 'lib/canon/comparison/match_options/xml_resolver.rb'
|
|
43
|
-
- 'spec/canon/config/profile_spec.rb'
|
|
44
|
-
|
|
45
|
-
# Offense count: 16
|
|
23
|
+
# Offense count: 1
|
|
46
24
|
# This cop supports safe autocorrection (--autocorrect).
|
|
47
25
|
# Configuration parameters: EnforcedStyleAlignWith.
|
|
48
26
|
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
49
27
|
Layout/BlockAlignment:
|
|
50
28
|
Exclude:
|
|
51
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/
|
|
52
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
53
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
54
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
55
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
56
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
57
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
29
|
+
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
58
30
|
|
|
59
|
-
# Offense count:
|
|
31
|
+
# Offense count: 1
|
|
60
32
|
# This cop supports safe autocorrection (--autocorrect).
|
|
61
33
|
Layout/BlockEndNewline:
|
|
62
34
|
Exclude:
|
|
63
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/
|
|
64
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
65
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
66
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
67
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
68
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
69
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
70
|
-
|
|
71
|
-
# Offense count: 5
|
|
72
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
73
|
-
Layout/ClosingParenthesisIndentation:
|
|
74
|
-
Exclude:
|
|
75
|
-
- 'lib/canon/config/profile_loader.rb'
|
|
76
|
-
- 'lib/canon/diff/diff_classifier.rb'
|
|
77
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
35
|
+
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
78
36
|
|
|
79
37
|
# Offense count: 2
|
|
80
38
|
# This cop supports safe autocorrection (--autocorrect).
|
|
81
|
-
Layout/ElseAlignment:
|
|
82
|
-
Exclude:
|
|
83
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
|
|
84
|
-
|
|
85
|
-
# Offense count: 2
|
|
86
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
87
|
-
# Configuration parameters: EnforcedStyleAlignWith.
|
|
88
|
-
# SupportedStylesAlignWith: keyword, variable, start_of_line
|
|
89
|
-
Layout/EndAlignment:
|
|
90
|
-
Exclude:
|
|
91
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
|
|
92
|
-
|
|
93
|
-
# Offense count: 5
|
|
94
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
95
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
96
|
-
# SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
|
|
97
|
-
Layout/FirstArgumentIndentation:
|
|
98
|
-
Exclude:
|
|
99
|
-
- 'lib/canon/config/profile_loader.rb'
|
|
100
|
-
- 'lib/canon/diff/diff_classifier.rb'
|
|
101
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
102
|
-
|
|
103
|
-
# Offense count: 30
|
|
104
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
105
|
-
# Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
|
|
106
|
-
# SupportedHashRocketStyles: key, separator, table
|
|
107
|
-
# SupportedColonStyles: key, separator, table
|
|
108
|
-
# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
|
|
109
|
-
Layout/HashAlignment:
|
|
110
|
-
Exclude:
|
|
111
|
-
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
112
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
113
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
114
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
115
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
116
|
-
|
|
117
|
-
# Offense count: 36
|
|
118
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
119
39
|
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
120
40
|
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
121
41
|
Layout/IndentationWidth:
|
|
122
42
|
Exclude:
|
|
123
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/
|
|
124
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
|
|
125
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
126
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
127
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
128
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
129
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
130
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
43
|
+
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
131
44
|
|
|
132
|
-
# Offense count:
|
|
45
|
+
# Offense count: 1347
|
|
133
46
|
# This cop supports safe autocorrection (--autocorrect).
|
|
134
47
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
135
48
|
# URISchemes: http, https
|
|
@@ -138,20 +51,12 @@ Layout/LineLength:
|
|
|
138
51
|
|
|
139
52
|
# Offense count: 2
|
|
140
53
|
# This cop supports safe autocorrection (--autocorrect).
|
|
141
|
-
# Configuration parameters: EnforcedStyle.
|
|
142
|
-
# SupportedStyles: symmetrical, new_line, same_line
|
|
143
|
-
Layout/MultilineMethodCallBraceLayout:
|
|
144
|
-
Exclude:
|
|
145
|
-
- 'lib/canon/config/profile_loader.rb'
|
|
146
|
-
- 'lib/canon/diff/diff_classifier.rb'
|
|
147
|
-
|
|
148
|
-
# Offense count: 57
|
|
149
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
150
54
|
# Configuration parameters: AllowInHeredoc.
|
|
151
55
|
Layout/TrailingWhitespace:
|
|
152
|
-
|
|
56
|
+
Exclude:
|
|
57
|
+
- 'lib/canon/comparison/xml_comparator.rb'
|
|
153
58
|
|
|
154
|
-
# Offense count:
|
|
59
|
+
# Offense count: 58
|
|
155
60
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
156
61
|
Lint/DuplicateBranch:
|
|
157
62
|
Enabled: false
|
|
@@ -196,7 +101,7 @@ Lint/UselessConstantScoping:
|
|
|
196
101
|
Exclude:
|
|
197
102
|
- 'lib/canon/diff_formatter/theme.rb'
|
|
198
103
|
|
|
199
|
-
# Offense count:
|
|
104
|
+
# Offense count: 322
|
|
200
105
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
201
106
|
Metrics/AbcSize:
|
|
202
107
|
Enabled: false
|
|
@@ -207,32 +112,32 @@ Metrics/AbcSize:
|
|
|
207
112
|
Metrics/BlockLength:
|
|
208
113
|
Max: 92
|
|
209
114
|
|
|
210
|
-
# Offense count:
|
|
115
|
+
# Offense count: 1
|
|
211
116
|
# Configuration parameters: CountBlocks, CountModifierForms.
|
|
212
117
|
Metrics/BlockNesting:
|
|
213
118
|
Max: 4
|
|
214
119
|
|
|
215
|
-
# Offense count:
|
|
120
|
+
# Offense count: 281
|
|
216
121
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
217
122
|
Metrics/CyclomaticComplexity:
|
|
218
123
|
Enabled: false
|
|
219
124
|
|
|
220
|
-
# Offense count:
|
|
125
|
+
# Offense count: 517
|
|
221
126
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
222
127
|
Metrics/MethodLength:
|
|
223
128
|
Max: 146
|
|
224
129
|
|
|
225
|
-
# Offense count:
|
|
130
|
+
# Offense count: 56
|
|
226
131
|
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
|
227
132
|
Metrics/ParameterLists:
|
|
228
133
|
Max: 10
|
|
229
134
|
|
|
230
|
-
# Offense count:
|
|
135
|
+
# Offense count: 225
|
|
231
136
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
232
137
|
Metrics/PerceivedComplexity:
|
|
233
138
|
Enabled: false
|
|
234
139
|
|
|
235
|
-
# Offense count:
|
|
140
|
+
# Offense count: 30
|
|
236
141
|
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
237
142
|
# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
|
|
238
143
|
Naming/MethodParameterName:
|
|
@@ -260,13 +165,13 @@ Performance/CollectionLiteralInLoop:
|
|
|
260
165
|
- 'lib/canon/xml/xml_base_handler.rb'
|
|
261
166
|
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
262
167
|
|
|
263
|
-
# Offense count:
|
|
168
|
+
# Offense count: 85
|
|
264
169
|
# Configuration parameters: Prefixes, AllowedPatterns.
|
|
265
170
|
# Prefixes: when, with, without
|
|
266
171
|
RSpec/ContextWording:
|
|
267
172
|
Enabled: false
|
|
268
173
|
|
|
269
|
-
# Offense count:
|
|
174
|
+
# Offense count: 43
|
|
270
175
|
# Configuration parameters: IgnoredMetadata.
|
|
271
176
|
RSpec/DescribeClass:
|
|
272
177
|
Enabled: false
|
|
@@ -277,7 +182,7 @@ RSpec/DescribeMethod:
|
|
|
277
182
|
- 'spec/canon/comparison/multiple_differences_spec.rb'
|
|
278
183
|
- 'spec/canon/diff_formatter/character_map_customization_spec.rb'
|
|
279
184
|
|
|
280
|
-
# Offense count:
|
|
185
|
+
# Offense count: 847
|
|
281
186
|
# Configuration parameters: CountAsOne.
|
|
282
187
|
RSpec/ExampleLength:
|
|
283
188
|
Max: 44
|
|
@@ -291,6 +196,12 @@ RSpec/ExpectActual:
|
|
|
291
196
|
- 'spec/canon/rspec_matchers_spec.rb'
|
|
292
197
|
- 'spec/canon/string_matcher_spec.rb'
|
|
293
198
|
|
|
199
|
+
# Offense count: 7
|
|
200
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
201
|
+
RSpec/IncludeExamples:
|
|
202
|
+
Exclude:
|
|
203
|
+
- 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
|
|
204
|
+
|
|
294
205
|
# Offense count: 177
|
|
295
206
|
# Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
|
|
296
207
|
RSpec/IndexedLet:
|
|
@@ -329,7 +240,7 @@ RSpec/MultipleDescribes:
|
|
|
329
240
|
Exclude:
|
|
330
241
|
- 'spec/canon/comparison/match_options_spec.rb'
|
|
331
242
|
|
|
332
|
-
# Offense count:
|
|
243
|
+
# Offense count: 694
|
|
333
244
|
RSpec/MultipleExpectations:
|
|
334
245
|
Max: 15
|
|
335
246
|
|
|
@@ -347,7 +258,7 @@ RSpec/NamedSubject:
|
|
|
347
258
|
- 'spec/canon/pretty_printer/json_spec.rb'
|
|
348
259
|
- 'spec/canon/pretty_printer/xml_spec.rb'
|
|
349
260
|
|
|
350
|
-
# Offense count:
|
|
261
|
+
# Offense count: 53
|
|
351
262
|
# Configuration parameters: AllowedGroups.
|
|
352
263
|
RSpec/NestedGroups:
|
|
353
264
|
Max: 4
|
|
@@ -381,7 +292,7 @@ RSpec/SpecFilePathFormat:
|
|
|
381
292
|
- 'spec/canon/yaml/formatter_spec.rb'
|
|
382
293
|
- 'spec/xml_c14n_spec.rb'
|
|
383
294
|
|
|
384
|
-
# Offense count:
|
|
295
|
+
# Offense count: 134
|
|
385
296
|
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
|
|
386
297
|
RSpec/VerifiedDoubles:
|
|
387
298
|
Exclude:
|
|
@@ -393,23 +304,6 @@ RSpec/VerifiedDoubles:
|
|
|
393
304
|
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
394
305
|
- 'spec/canon/tree_diff/operation_converter_spec.rb'
|
|
395
306
|
|
|
396
|
-
# Offense count: 25
|
|
397
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
398
|
-
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
399
|
-
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
400
|
-
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
401
|
-
# FunctionalMethods: let, let!, subject, watch
|
|
402
|
-
# AllowedMethods: lambda, proc, it
|
|
403
|
-
Style/BlockDelimiters:
|
|
404
|
-
Exclude:
|
|
405
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
|
|
406
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
407
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
408
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
409
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
410
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
411
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
412
|
-
|
|
413
307
|
# Offense count: 1
|
|
414
308
|
# This cop supports safe autocorrection (--autocorrect).
|
|
415
309
|
# Configuration parameters: EnforcedStyle, AllowComments.
|
|
@@ -432,18 +326,6 @@ Style/IdenticalConditionalBranches:
|
|
|
432
326
|
- 'lib/canon/diff_formatter/by_object/base_formatter.rb'
|
|
433
327
|
- 'lib/canon/diff_formatter/legend.rb'
|
|
434
328
|
|
|
435
|
-
# Offense count: 3
|
|
436
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
437
|
-
Style/MultilineIfModifier:
|
|
438
|
-
Exclude:
|
|
439
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
440
|
-
|
|
441
|
-
# Offense count: 2
|
|
442
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
443
|
-
Style/MultilineTernaryOperator:
|
|
444
|
-
Exclude:
|
|
445
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
|
|
446
|
-
|
|
447
329
|
# Offense count: 1
|
|
448
330
|
# Configuration parameters: AllowedMethods.
|
|
449
331
|
# AllowedMethods: respond_to_missing?
|
data/README.adoc
CHANGED
|
@@ -770,6 +770,15 @@ Each dimension can have one of three behaviors:
|
|
|
770
770
|
* **`:normalize`**: Differences are normalized; only semantic changes are normative
|
|
771
771
|
* **`:ignore`**: Differences are informative only (don't affect equivalence)
|
|
772
772
|
|
|
773
|
+
In addition, the `whitespace_type` option controls how Unicode whitespace
|
|
774
|
+
characters are compared:
|
|
775
|
+
|
|
776
|
+
* **`whitespace_type: :strict`** (default): Different whitespace types (space,
|
|
777
|
+
NBSP, ideographic space, etc.) are detected as differences — useful for catching
|
|
778
|
+
accidental insertion of wrong whitespace.
|
|
779
|
+
* **`whitespace_type: :normalize`**: All Unicode whitespace types are treated as
|
|
780
|
+
equivalent.
|
|
781
|
+
|
|
773
782
|
.Example: Whitespace handling
|
|
774
783
|
[example]
|
|
775
784
|
====
|
|
@@ -14,6 +14,39 @@ The Semantic Diff Report provides dimension-specific, actionable details for eac
|
|
|
14
14
|
|
|
15
15
|
The report is automatically shown in verbose mode when differences exist, appearing before the detailed diff output.
|
|
16
16
|
|
|
17
|
+
== Parse errors
|
|
18
|
+
|
|
19
|
+
When Canon's underlying parser (libxml for XML, HTML5 for HTML) reports errors during input parsing, Canon surfaces them at the top of the diff report in a banner section before any per-difference output. The banner names the offending side and warns that the diff below describes the parsed tree, not the input — content the parser could not represent has been silently dropped from the comparison tree.
|
|
20
|
+
|
|
21
|
+
This is purely a transparency feature: Canon does not modify the parse to "fix" invalid input. The user is responsible for deciding whether the parse failure was expected (e.g. testing legacy fixtures during a migration) or symptomatic of an upstream bug.
|
|
22
|
+
|
|
23
|
+
.Example: Banner for a duplicate-attribute FATAL on the received side
|
|
24
|
+
[example]
|
|
25
|
+
====
|
|
26
|
+
[source]
|
|
27
|
+
----
|
|
28
|
+
======================================================================
|
|
29
|
+
⚠️ PARSE ERRORS
|
|
30
|
+
======================================================================
|
|
31
|
+
Received side:
|
|
32
|
+
Attribute xml:lang redefined
|
|
33
|
+
|
|
34
|
+
⚠️ The diff below describes the parsed tree, not the input.
|
|
35
|
+
Content that the parser could not represent has been
|
|
36
|
+
dropped and may appear as "missing" in the report.
|
|
37
|
+
======================================================================
|
|
38
|
+
----
|
|
39
|
+
====
|
|
40
|
+
|
|
41
|
+
Common triggers in HTML / XHTML round-trips:
|
|
42
|
+
|
|
43
|
+
* Duplicate attributes (XML strict; HTML5 permissive — only XML mode triggers a banner)
|
|
44
|
+
* Stray processing instructions in fragment context
|
|
45
|
+
* Malformed namespace declarations
|
|
46
|
+
* DOCTYPE in unexpected positions
|
|
47
|
+
|
|
48
|
+
The banner is rendered when `Canon::Comparison::ComparisonResult#parse_errors?` is true. Programmatic callers can read `parse_errors_expected` and `parse_errors_received` directly off the result.
|
|
49
|
+
|
|
17
50
|
== Key Features
|
|
18
51
|
|
|
19
52
|
* XPath locations for XML/HTML elements
|
|
@@ -148,6 +181,69 @@ Location: /html/body/div/table/tbody/tr/td/pre/text
|
|
|
148
181
|
|
|
149
182
|
The warning appears for text inside whitespace-preserving elements where Canon automatically switches to strict mode.
|
|
150
183
|
|
|
184
|
+
==== Parent-context fallback for ambiguous text diffs
|
|
185
|
+
|
|
186
|
+
For a `text_content` difference, Canon normally renders the two sides as JSON-quoted strings.
|
|
187
|
+
When both sides would collapse to the same (or visually indistinguishable) short string -- both empty (`""`), both whitespace-only, or both equal on the text-node extraction even though the surrounding DOM differs -- that rendering conveys nothing.
|
|
188
|
+
|
|
189
|
+
In this case Canon instead serializes each side's *parent element* compactly and visualizes whitespace (`·` for space, `→` for tab, `¬` for newline, `<NBSP>` for non-breaking space) so the structural contrast is visible.
|
|
190
|
+
|
|
191
|
+
.Example: Ambiguous empty-vs-whitespace text diff
|
|
192
|
+
[example]
|
|
193
|
+
====
|
|
194
|
+
[source]
|
|
195
|
+
----
|
|
196
|
+
🔍 DIFFERENCE #1/1 [NORMATIVE]
|
|
197
|
+
──────────────────────────────────────────────────────────────────────
|
|
198
|
+
Dimension: text_content
|
|
199
|
+
Location: /#document[0]/fmt-title[0]/span[0]/span/text()[0]
|
|
200
|
+
Reason: Text: "¬······:¬······"
|
|
201
|
+
vs.: ":"
|
|
202
|
+
|
|
203
|
+
⊖ Expected (File 1):
|
|
204
|
+
<span·class="fmt-caption-delim">¬······:¬······<tab/>¬···</span>
|
|
205
|
+
⊕ Actual (File 2):
|
|
206
|
+
<span·class="fmt-caption-delim">:<tab/></span>
|
|
207
|
+
|
|
208
|
+
✨ Changes:
|
|
209
|
+
Content differs: <span·class="fmt-caption-delim">¬······:¬······<tab/>¬···</span> → <span·class="fmt-caption-delim">:<tab/></span>
|
|
210
|
+
----
|
|
211
|
+
====
|
|
212
|
+
|
|
213
|
+
This fallback is implemented in `Canon::DiffFormatter::DiffDetailFormatterHelpers::DimensionFormatter.format_text_content_details` and only triggers when `TextUtils.ambiguous_text_pair?` returns `true` _and_ at least one side has a parent element to render.
|
|
214
|
+
|
|
215
|
+
==== One-sided text diffs (added or removed text nodes)
|
|
216
|
+
|
|
217
|
+
When a `text_content` difference carries a text node on one side and `nil` on the other (issue #125) -- the shape that fragment-length mismatches and child-comparison emit when a text-node child is missing -- the renderer mirrors `element_structure`: the missing side reads `(not present)`, and the present side reads the text-node content (whitespace-visualised) plus a brief parent open-tag hint for context. The full ancestor subtree is *not* dumped; only the immediate parent's opening tag is shown, so a missing whitespace text node cannot make the diff look like the entire ancestor differs.
|
|
218
|
+
|
|
219
|
+
.Example: Whitespace text node missing on the received side
|
|
220
|
+
[example]
|
|
221
|
+
====
|
|
222
|
+
[source]
|
|
223
|
+
----
|
|
224
|
+
🔍 DIFFERENCE #1/1 [NORMATIVE]
|
|
225
|
+
──────────────────────────────────────────────────────────────────────
|
|
226
|
+
Dimension: text_content
|
|
227
|
+
Reason: element missing: text
|
|
228
|
+
|
|
229
|
+
⊖ Expected (File 1):
|
|
230
|
+
text "¬············" in <div id="A">
|
|
231
|
+
⊕ Actual (File 2):
|
|
232
|
+
(not present)
|
|
233
|
+
|
|
234
|
+
✨ Changes:
|
|
235
|
+
Text removed: text "¬············" in <div id="A">
|
|
236
|
+
----
|
|
237
|
+
====
|
|
238
|
+
|
|
239
|
+
The `Changes:` line uses `Text removed:` or `Text added:` to mirror the `Element removed:` / `Element added:` phrasing of `element_structure`.
|
|
240
|
+
|
|
241
|
+
==== Element-shaped diffs misclassified as text_content
|
|
242
|
+
|
|
243
|
+
In rare cases an upstream comparator may emit an *element*-shaped one-sided diff under `dimension: :text_content`. Without a guard, the one-sided text formatter would call `raw_text_value` on the element (which returns `""` for an empty element such as `<br/>`) and render `text "" in <parent>` -- meaningless when an element is what's actually missing.
|
|
244
|
+
|
|
245
|
+
The formatter detects element-shaped present-side nodes (Canon `ElementNode` or Nokogiri `Element`) and delegates to `format_element_structure_details`, so the rendered output reads `<br/>` and `Element removed:` rather than `text ""` and `Text removed:`. This is defence in depth -- the construction-side fix in `XmlComparatorHelpers::ChildComparison` ensures element orphans are now tagged `:element_structure` at source -- but a misclassified diff still renders meaningfully if any path slips through.
|
|
246
|
+
|
|
151
247
|
=== Structural Whitespace
|
|
152
248
|
|
|
153
249
|
Shows whitespace-only differences (usually informative).
|
|
@@ -29,8 +29,10 @@ variant can extend a base profile with only the differences.
|
|
|
29
29
|
|
|
30
30
|
| `:metanorma`
|
|
31
31
|
| Standard Metanorma spec configuration. Sets preprocessing to `:format`,
|
|
32
|
-
match profile to `:spec_friendly`,
|
|
33
|
-
|
|
32
|
+
match profile to `:spec_friendly`, `whitespace_type` to `:normalize`
|
|
33
|
+
(so that Unicode whitespace variants like space vs NBSP are treated as
|
|
34
|
+
equivalent for backward compatibility), diff algorithm to `:dom`,
|
|
35
|
+
canonical display format, normalized pretty-print display preprocessing,
|
|
34
36
|
and XML-specific whitespace element lists.
|
|
35
37
|
|
|
36
38
|
| `:metanorma_debug`
|
|
@@ -28,6 +28,9 @@ Canon's diff formatting includes:
|
|
|
28
28
|
* **Context and grouping**: Control how much surrounding context to show
|
|
29
29
|
* **Algorithm-specific output**: Different output styles for different diff
|
|
30
30
|
algorithms
|
|
31
|
+
* **Whitespace adjacency**: Stray whitespace-only text nodes are anchored at
|
|
32
|
+
themselves instead of cascading into mismatches against neighbouring
|
|
33
|
+
content (link:./whitespace-adjacency.adoc[details])
|
|
31
34
|
|
|
32
35
|
== Available formatting options
|
|
33
36
|
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Whitespace adjacency in diff reports
|
|
3
|
+
parent: Diff Formatting
|
|
4
|
+
nav_order: 8
|
|
5
|
+
---
|
|
6
|
+
= Whitespace adjacency in diff reports
|
|
7
|
+
:toc:
|
|
8
|
+
:toclevels: 2
|
|
9
|
+
|
|
10
|
+
== Purpose
|
|
11
|
+
|
|
12
|
+
Canon's diff reports anchor whitespace-only text nodes that have no
|
|
13
|
+
counterpart on the other side to a dedicated `:whitespace_adjacency`
|
|
14
|
+
dimension instead of letting them cascade into 3-4 misaligned
|
|
15
|
+
`:text_content` mismatches against neighbouring content nodes.
|
|
16
|
+
|
|
17
|
+
This is a *report-only* contract — equivalence verdicts are unchanged.
|
|
18
|
+
Inputs that were non-equivalent before this feature remain non-equivalent;
|
|
19
|
+
only the *shape* of the diff report changes.
|
|
20
|
+
|
|
21
|
+
== The problem
|
|
22
|
+
|
|
23
|
+
Consider an HTML fragment compared as `be_html_equivalent_to`:
|
|
24
|
+
|
|
25
|
+
[source,html]
|
|
26
|
+
----
|
|
27
|
+
<!-- expected -->
|
|
28
|
+
<p>
|
|
29
|
+
<span>ISO </span>
|
|
30
|
+
<span>20483</span>
|
|
31
|
+
,
|
|
32
|
+
<i>Cereals and pulses</i>
|
|
33
|
+
</p>
|
|
34
|
+
|
|
35
|
+
<!-- actual -->
|
|
36
|
+
<p><span>ISO </span><span>20483</span>, <i>Cereals and pulses</i></p>
|
|
37
|
+
----
|
|
38
|
+
|
|
39
|
+
Both render identically in a browser — the indentation is structural HTML
|
|
40
|
+
formatting, not content. Before this feature, the diff report contained
|
|
41
|
+
four entries:
|
|
42
|
+
|
|
43
|
+
[source]
|
|
44
|
+
----
|
|
45
|
+
DIFFERENCE #1 — element_structure: parent <p> "missing children"
|
|
46
|
+
DIFFERENCE #2 — text_content: "" vs "20483" (visualised: ↵░░░░)
|
|
47
|
+
DIFFERENCE #3 — text_content: "20483" vs ","
|
|
48
|
+
DIFFERENCE #4 — text_content: "," vs "Cereals and pulses"
|
|
49
|
+
----
|
|
50
|
+
|
|
51
|
+
The cascade comes from positional `zip()` alignment in
|
|
52
|
+
`Canon::Comparison::XmlComparatorHelpers::ChildComparison`: with the
|
|
53
|
+
expected side carrying extra whitespace-only text nodes and the actual
|
|
54
|
+
side carrying none, every child slides by one slot and gets paired
|
|
55
|
+
against the wrong neighbour.
|
|
56
|
+
|
|
57
|
+
== The contract
|
|
58
|
+
|
|
59
|
+
When `ChildComparison` aligns child sequences and encounters a
|
|
60
|
+
whitespace-only text node on one side paired against a non-whitespace
|
|
61
|
+
node on the other, it:
|
|
62
|
+
|
|
63
|
+
1. Treats the whitespace node as a *single-side gap* in the alignment.
|
|
64
|
+
2. Emits one `:whitespace_adjacency` diff entry anchored at the
|
|
65
|
+
whitespace node itself (not at its mis-paired neighbour).
|
|
66
|
+
3. Advances only the cursor that carries the whitespace, so the next
|
|
67
|
+
iteration aligns content against content.
|
|
68
|
+
|
|
69
|
+
The asymmetric whitespace still produces a non-equivalent verdict — the
|
|
70
|
+
`:whitespace_adjacency` dimension is classified as normative
|
|
71
|
+
unconditionally — so any test that previously failed on whitespace
|
|
72
|
+
asymmetry continues to fail.
|
|
73
|
+
|
|
74
|
+
After the new contract, the cascade above collapses to:
|
|
75
|
+
|
|
76
|
+
[source]
|
|
77
|
+
----
|
|
78
|
+
DIFFERENCE #1 — whitespace_adjacency: Whitespace surrounding "20483":
|
|
79
|
+
present on EXPECTED ("↵░░"), absent on ACTUAL
|
|
80
|
+
DIFFERENCE #2 — whitespace_adjacency: Whitespace surrounding ",":
|
|
81
|
+
present on EXPECTED ("↵░░"), absent on ACTUAL
|
|
82
|
+
DIFFERENCE #3 — text_content: "↵░░,↵░░" vs ", "
|
|
83
|
+
----
|
|
84
|
+
|
|
85
|
+
== Adjacency positions
|
|
86
|
+
|
|
87
|
+
The Reason line names the adjacency position of the whitespace node
|
|
88
|
+
relative to its non-whitespace siblings:
|
|
89
|
+
|
|
90
|
+
`:preceding`:: Whitespace at the start of its parent (no non-whitespace
|
|
91
|
+
sibling before it, has one after it).
|
|
92
|
+
|
|
93
|
+
`:following`:: Whitespace at the end of its parent (has a non-whitespace
|
|
94
|
+
sibling before it, none after).
|
|
95
|
+
|
|
96
|
+
`:surrounding`:: Sandwiched between two non-whitespace siblings.
|
|
97
|
+
|
|
98
|
+
`:isolated`:: No non-whitespace siblings at all (degenerate; rarely
|
|
99
|
+
emitted).
|
|
100
|
+
|
|
101
|
+
== What this contract does NOT do
|
|
102
|
+
|
|
103
|
+
* **Does not change equivalence outcomes.** A non-equivalent comparison
|
|
104
|
+
before #137 remains non-equivalent after — only the diff-report shape
|
|
105
|
+
changes.
|
|
106
|
+
* **Does not silently filter whitespace.** The asymmetric whitespace is
|
|
107
|
+
always reported; it is just labelled `:whitespace_adjacency` and
|
|
108
|
+
anchored at the whitespace node, instead of cascading as
|
|
109
|
+
`:text_content` against unrelated content nodes.
|
|
110
|
+
* **Does not affect symmetric whitespace.** When both sides carry
|
|
111
|
+
parallel whitespace-only nodes, those compare normally
|
|
112
|
+
(no `:whitespace_adjacency` entry, no cascade).
|
|
113
|
+
|
|
114
|
+
== Where it runs
|
|
115
|
+
|
|
116
|
+
The contract is implemented as a re-alignment walk inside
|
|
117
|
+
`Canon::Comparison::XmlComparatorHelpers::ChildComparison.use_positional_comparison`.
|
|
118
|
+
It activates whenever the existing positional `zip()` alignment would
|
|
119
|
+
pair a whitespace-only text node against a content node — that is, in
|
|
120
|
+
every whitespace context where the upstream filter has not already
|
|
121
|
+
dropped the whitespace nodes.
|
|
122
|
+
|
|
123
|
+
For elements where whitespace is preserved by configuration
|
|
124
|
+
(`preserve_whitespace_elements`) the upstream filter does not drop
|
|
125
|
+
indentation, and the re-alignment walk surfaces every asymmetric
|
|
126
|
+
whitespace node as a single normative `:whitespace_adjacency` diff.
|
|
127
|
+
|
|
128
|
+
== Related
|
|
129
|
+
|
|
130
|
+
* link:../../advanced/diff-classification.adoc[Diff classification] —
|
|
131
|
+
Normative vs informative differences.
|
|
132
|
+
* link:../match-options/index.adoc[Match options] — Configuring
|
|
133
|
+
`preserve_whitespace_elements`, `collapse_whitespace_elements`, and
|
|
134
|
+
`strip_whitespace_elements`.
|
|
135
|
+
|
|
136
|
+
== History
|
|
137
|
+
|
|
138
|
+
The cascade behaviour was reported in
|
|
139
|
+
https://github.com/lutaml/canon/issues/137[issue #137]. The fix landed
|
|
140
|
+
as a report-only re-alignment in PR #138.
|
|
@@ -44,6 +44,8 @@ Canon automatically detects HTML version:
|
|
|
44
44
|
|
|
45
45
|
Detection is based on DOCTYPE or parsing mode.
|
|
46
46
|
|
|
47
|
+
NOTE: Whitespace sensitivity does not differ between HTML4 and HTML5 — both apply HTML's content-model whitespace rules. `be_html4_equivalent_to` and `be_html5_equivalent_to` therefore agree on whether two inputs are whitespace-equivalent. Differences between the matchers are limited to genuine HTML4/HTML5 distinctions such as case sensitivity. Internally Canon parses both via `Nokogiri::HTML5` to share the content-model logic.
|
|
48
|
+
|
|
47
49
|
=== Whitespace Preservation
|
|
48
50
|
|
|
49
51
|
Certain HTML elements require strict whitespace preservation regardless of the
|