canon 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +31 -149
- data/README.adoc +9 -0
- data/docs/advanced/semantic-diff-report.adoc +31 -0
- data/docs/features/configuration-profiles.adoc +4 -2
- data/docs/features/match-options/html-policies.adoc +2 -0
- data/docs/features/match-options/index.adoc +40 -0
- data/docs/guides/choosing-configuration.adoc +12 -1
- data/docs/reference/cli-options.adoc +3 -0
- data/docs/reference/options-across-interfaces.adoc +7 -1
- data/docs/understanding/formats/html.adoc +9 -2
- data/lib/canon/cli.rb +4 -0
- data/lib/canon/commands/diff_command.rb +1 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +92 -11
- data/lib/canon/comparison/markup_comparator.rb +19 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
- data/lib/canon/comparison/match_options.rb +23 -2
- data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +6 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
- data/lib/canon/comparison/xml_comparator.rb +80 -4
- data/lib/canon/comparison/xml_node_comparison.rb +29 -3
- data/lib/canon/comparison.rb +84 -22
- data/lib/canon/config/env_schema.rb +2 -1
- data/lib/canon/config/profiles/metanorma.yml +3 -0
- data/lib/canon/config.rb +51 -5
- data/lib/canon/diff/diff_classifier.rb +18 -2
- data/lib/canon/diff/diff_line_builder.rb +9 -8
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
- data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
- data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +65 -17
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +17 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
- data/lib/canon/diff_formatter.rb +57 -173
- data/lib/canon/html/data_model.rb +10 -4
- data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +59 -5
- data/lib/canon/xml/element_matcher.rb +3 -0
- data/lib/canon/xml/node.rb +8 -1
- data/lib/canon/xml/nodes/comment_node.rb +4 -0
- data/lib/canon/xml/nodes/element_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/canon/xml/sax_builder.rb +11 -2
- data/lib/canon/xml/xpath_engine.rb +238 -0
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c24b7c541d6159a3c261d389c0d41b85f954cd4152d88d9ca4748d9a3ceb34ef
|
|
4
|
+
data.tar.gz: 1de985c950b90c6979432b7b0bd1ed1b469240456fb7fd985a3d7f6929448b83
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 719eefd6be6d642503adb82e50609983fe9082ec8c7efe34c5e6cf27bfdc8065edc05b7ae75a959db8e5fe117f0ec67d71d81006d342a1c01f2846b4aa54b196
|
|
7
|
+
data.tar.gz: 32a1bece85afd8265f158fdea547de08759773ba8a1e574ca72e42c79f6f59b02ed881cc4ba4bb78e54d135c9f4362100c8d409d2ee08b0eaa3561b13652296c
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-04-
|
|
3
|
+
# on 2026-04-27 09:48:55 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,125 +11,38 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'canon.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count:
|
|
14
|
+
# Offense count: 30
|
|
15
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
16
16
|
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
17
|
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
18
|
Layout/ArgumentAlignment:
|
|
19
19
|
Exclude:
|
|
20
|
-
- 'lib/canon/comparison/whitespace_sensitivity.rb'
|
|
21
20
|
- 'lib/canon/comparison/xml_comparator.rb'
|
|
22
|
-
- '
|
|
23
|
-
- 'lib/canon/config.rb'
|
|
24
|
-
- 'lib/canon/diff/diff_classifier.rb'
|
|
25
|
-
- 'lib/canon/diff_formatter.rb'
|
|
26
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
|
|
27
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
28
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
29
|
-
- 'spec/canon/config/profile_spec.rb'
|
|
30
|
-
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
31
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
32
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
33
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
21
|
+
- 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
|
|
34
22
|
|
|
35
|
-
# Offense count:
|
|
36
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
37
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
38
|
-
# SupportedStyles: with_first_element, with_fixed_indentation
|
|
39
|
-
Layout/ArrayAlignment:
|
|
40
|
-
Exclude:
|
|
41
|
-
- 'lib/canon/comparison/match_options/base_resolver.rb'
|
|
42
|
-
- 'lib/canon/comparison/match_options/xml_resolver.rb'
|
|
43
|
-
- 'spec/canon/config/profile_spec.rb'
|
|
44
|
-
|
|
45
|
-
# Offense count: 16
|
|
23
|
+
# Offense count: 1
|
|
46
24
|
# This cop supports safe autocorrection (--autocorrect).
|
|
47
25
|
# Configuration parameters: EnforcedStyleAlignWith.
|
|
48
26
|
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
49
27
|
Layout/BlockAlignment:
|
|
50
28
|
Exclude:
|
|
51
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/
|
|
52
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
53
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
54
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
55
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
56
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
57
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
29
|
+
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
58
30
|
|
|
59
|
-
# Offense count:
|
|
31
|
+
# Offense count: 1
|
|
60
32
|
# This cop supports safe autocorrection (--autocorrect).
|
|
61
33
|
Layout/BlockEndNewline:
|
|
62
34
|
Exclude:
|
|
63
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/
|
|
64
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
65
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
66
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
67
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
68
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
69
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
70
|
-
|
|
71
|
-
# Offense count: 5
|
|
72
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
73
|
-
Layout/ClosingParenthesisIndentation:
|
|
74
|
-
Exclude:
|
|
75
|
-
- 'lib/canon/config/profile_loader.rb'
|
|
76
|
-
- 'lib/canon/diff/diff_classifier.rb'
|
|
77
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
35
|
+
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
78
36
|
|
|
79
37
|
# Offense count: 2
|
|
80
38
|
# This cop supports safe autocorrection (--autocorrect).
|
|
81
|
-
Layout/ElseAlignment:
|
|
82
|
-
Exclude:
|
|
83
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
|
|
84
|
-
|
|
85
|
-
# Offense count: 2
|
|
86
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
87
|
-
# Configuration parameters: EnforcedStyleAlignWith.
|
|
88
|
-
# SupportedStylesAlignWith: keyword, variable, start_of_line
|
|
89
|
-
Layout/EndAlignment:
|
|
90
|
-
Exclude:
|
|
91
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
|
|
92
|
-
|
|
93
|
-
# Offense count: 5
|
|
94
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
95
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
96
|
-
# SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
|
|
97
|
-
Layout/FirstArgumentIndentation:
|
|
98
|
-
Exclude:
|
|
99
|
-
- 'lib/canon/config/profile_loader.rb'
|
|
100
|
-
- 'lib/canon/diff/diff_classifier.rb'
|
|
101
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
102
|
-
|
|
103
|
-
# Offense count: 30
|
|
104
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
105
|
-
# Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
|
|
106
|
-
# SupportedHashRocketStyles: key, separator, table
|
|
107
|
-
# SupportedColonStyles: key, separator, table
|
|
108
|
-
# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
|
|
109
|
-
Layout/HashAlignment:
|
|
110
|
-
Exclude:
|
|
111
|
-
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
112
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
113
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
114
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
115
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
116
|
-
|
|
117
|
-
# Offense count: 36
|
|
118
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
119
39
|
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
120
40
|
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
121
41
|
Layout/IndentationWidth:
|
|
122
42
|
Exclude:
|
|
123
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/
|
|
124
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
|
|
125
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
126
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
127
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
128
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
129
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
130
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
43
|
+
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
131
44
|
|
|
132
|
-
# Offense count:
|
|
45
|
+
# Offense count: 1347
|
|
133
46
|
# This cop supports safe autocorrection (--autocorrect).
|
|
134
47
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
135
48
|
# URISchemes: http, https
|
|
@@ -138,20 +51,12 @@ Layout/LineLength:
|
|
|
138
51
|
|
|
139
52
|
# Offense count: 2
|
|
140
53
|
# This cop supports safe autocorrection (--autocorrect).
|
|
141
|
-
# Configuration parameters: EnforcedStyle.
|
|
142
|
-
# SupportedStyles: symmetrical, new_line, same_line
|
|
143
|
-
Layout/MultilineMethodCallBraceLayout:
|
|
144
|
-
Exclude:
|
|
145
|
-
- 'lib/canon/config/profile_loader.rb'
|
|
146
|
-
- 'lib/canon/diff/diff_classifier.rb'
|
|
147
|
-
|
|
148
|
-
# Offense count: 57
|
|
149
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
150
54
|
# Configuration parameters: AllowInHeredoc.
|
|
151
55
|
Layout/TrailingWhitespace:
|
|
152
|
-
|
|
56
|
+
Exclude:
|
|
57
|
+
- 'lib/canon/comparison/xml_comparator.rb'
|
|
153
58
|
|
|
154
|
-
# Offense count:
|
|
59
|
+
# Offense count: 58
|
|
155
60
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
156
61
|
Lint/DuplicateBranch:
|
|
157
62
|
Enabled: false
|
|
@@ -196,7 +101,7 @@ Lint/UselessConstantScoping:
|
|
|
196
101
|
Exclude:
|
|
197
102
|
- 'lib/canon/diff_formatter/theme.rb'
|
|
198
103
|
|
|
199
|
-
# Offense count:
|
|
104
|
+
# Offense count: 322
|
|
200
105
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
201
106
|
Metrics/AbcSize:
|
|
202
107
|
Enabled: false
|
|
@@ -207,32 +112,32 @@ Metrics/AbcSize:
|
|
|
207
112
|
Metrics/BlockLength:
|
|
208
113
|
Max: 92
|
|
209
114
|
|
|
210
|
-
# Offense count:
|
|
115
|
+
# Offense count: 1
|
|
211
116
|
# Configuration parameters: CountBlocks, CountModifierForms.
|
|
212
117
|
Metrics/BlockNesting:
|
|
213
118
|
Max: 4
|
|
214
119
|
|
|
215
|
-
# Offense count:
|
|
120
|
+
# Offense count: 281
|
|
216
121
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
217
122
|
Metrics/CyclomaticComplexity:
|
|
218
123
|
Enabled: false
|
|
219
124
|
|
|
220
|
-
# Offense count:
|
|
125
|
+
# Offense count: 517
|
|
221
126
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
222
127
|
Metrics/MethodLength:
|
|
223
128
|
Max: 146
|
|
224
129
|
|
|
225
|
-
# Offense count:
|
|
130
|
+
# Offense count: 56
|
|
226
131
|
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
|
227
132
|
Metrics/ParameterLists:
|
|
228
133
|
Max: 10
|
|
229
134
|
|
|
230
|
-
# Offense count:
|
|
135
|
+
# Offense count: 225
|
|
231
136
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
232
137
|
Metrics/PerceivedComplexity:
|
|
233
138
|
Enabled: false
|
|
234
139
|
|
|
235
|
-
# Offense count:
|
|
140
|
+
# Offense count: 30
|
|
236
141
|
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
237
142
|
# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
|
|
238
143
|
Naming/MethodParameterName:
|
|
@@ -260,13 +165,13 @@ Performance/CollectionLiteralInLoop:
|
|
|
260
165
|
- 'lib/canon/xml/xml_base_handler.rb'
|
|
261
166
|
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
262
167
|
|
|
263
|
-
# Offense count:
|
|
168
|
+
# Offense count: 85
|
|
264
169
|
# Configuration parameters: Prefixes, AllowedPatterns.
|
|
265
170
|
# Prefixes: when, with, without
|
|
266
171
|
RSpec/ContextWording:
|
|
267
172
|
Enabled: false
|
|
268
173
|
|
|
269
|
-
# Offense count:
|
|
174
|
+
# Offense count: 43
|
|
270
175
|
# Configuration parameters: IgnoredMetadata.
|
|
271
176
|
RSpec/DescribeClass:
|
|
272
177
|
Enabled: false
|
|
@@ -277,7 +182,7 @@ RSpec/DescribeMethod:
|
|
|
277
182
|
- 'spec/canon/comparison/multiple_differences_spec.rb'
|
|
278
183
|
- 'spec/canon/diff_formatter/character_map_customization_spec.rb'
|
|
279
184
|
|
|
280
|
-
# Offense count:
|
|
185
|
+
# Offense count: 847
|
|
281
186
|
# Configuration parameters: CountAsOne.
|
|
282
187
|
RSpec/ExampleLength:
|
|
283
188
|
Max: 44
|
|
@@ -291,6 +196,12 @@ RSpec/ExpectActual:
|
|
|
291
196
|
- 'spec/canon/rspec_matchers_spec.rb'
|
|
292
197
|
- 'spec/canon/string_matcher_spec.rb'
|
|
293
198
|
|
|
199
|
+
# Offense count: 7
|
|
200
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
201
|
+
RSpec/IncludeExamples:
|
|
202
|
+
Exclude:
|
|
203
|
+
- 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
|
|
204
|
+
|
|
294
205
|
# Offense count: 177
|
|
295
206
|
# Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
|
|
296
207
|
RSpec/IndexedLet:
|
|
@@ -329,7 +240,7 @@ RSpec/MultipleDescribes:
|
|
|
329
240
|
Exclude:
|
|
330
241
|
- 'spec/canon/comparison/match_options_spec.rb'
|
|
331
242
|
|
|
332
|
-
# Offense count:
|
|
243
|
+
# Offense count: 694
|
|
333
244
|
RSpec/MultipleExpectations:
|
|
334
245
|
Max: 15
|
|
335
246
|
|
|
@@ -347,7 +258,7 @@ RSpec/NamedSubject:
|
|
|
347
258
|
- 'spec/canon/pretty_printer/json_spec.rb'
|
|
348
259
|
- 'spec/canon/pretty_printer/xml_spec.rb'
|
|
349
260
|
|
|
350
|
-
# Offense count:
|
|
261
|
+
# Offense count: 53
|
|
351
262
|
# Configuration parameters: AllowedGroups.
|
|
352
263
|
RSpec/NestedGroups:
|
|
353
264
|
Max: 4
|
|
@@ -381,7 +292,7 @@ RSpec/SpecFilePathFormat:
|
|
|
381
292
|
- 'spec/canon/yaml/formatter_spec.rb'
|
|
382
293
|
- 'spec/xml_c14n_spec.rb'
|
|
383
294
|
|
|
384
|
-
# Offense count:
|
|
295
|
+
# Offense count: 134
|
|
385
296
|
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
|
|
386
297
|
RSpec/VerifiedDoubles:
|
|
387
298
|
Exclude:
|
|
@@ -393,23 +304,6 @@ RSpec/VerifiedDoubles:
|
|
|
393
304
|
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
394
305
|
- 'spec/canon/tree_diff/operation_converter_spec.rb'
|
|
395
306
|
|
|
396
|
-
# Offense count: 25
|
|
397
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
398
|
-
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
399
|
-
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
400
|
-
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
401
|
-
# FunctionalMethods: let, let!, subject, watch
|
|
402
|
-
# AllowedMethods: lambda, proc, it
|
|
403
|
-
Style/BlockDelimiters:
|
|
404
|
-
Exclude:
|
|
405
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
|
|
406
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
407
|
-
- 'spec/canon/config/profile_loader_spec.rb'
|
|
408
|
-
- 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
|
|
409
|
-
- 'spec/canon/diff_formatter/pretty_diff_spec.rb'
|
|
410
|
-
- 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
|
|
411
|
-
- 'spec/canon/pretty_printer/xml_normalized_spec.rb'
|
|
412
|
-
|
|
413
307
|
# Offense count: 1
|
|
414
308
|
# This cop supports safe autocorrection (--autocorrect).
|
|
415
309
|
# Configuration parameters: EnforcedStyle, AllowComments.
|
|
@@ -432,18 +326,6 @@ Style/IdenticalConditionalBranches:
|
|
|
432
326
|
- 'lib/canon/diff_formatter/by_object/base_formatter.rb'
|
|
433
327
|
- 'lib/canon/diff_formatter/legend.rb'
|
|
434
328
|
|
|
435
|
-
# Offense count: 3
|
|
436
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
437
|
-
Style/MultilineIfModifier:
|
|
438
|
-
Exclude:
|
|
439
|
-
- 'lib/canon/pretty_printer/xml_normalized.rb'
|
|
440
|
-
|
|
441
|
-
# Offense count: 2
|
|
442
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
443
|
-
Style/MultilineTernaryOperator:
|
|
444
|
-
Exclude:
|
|
445
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
|
|
446
|
-
|
|
447
329
|
# Offense count: 1
|
|
448
330
|
# Configuration parameters: AllowedMethods.
|
|
449
331
|
# AllowedMethods: respond_to_missing?
|
data/README.adoc
CHANGED
|
@@ -770,6 +770,15 @@ Each dimension can have one of three behaviors:
|
|
|
770
770
|
* **`:normalize`**: Differences are normalized; only semantic changes are normative
|
|
771
771
|
* **`:ignore`**: Differences are informative only (don't affect equivalence)
|
|
772
772
|
|
|
773
|
+
In addition, the `whitespace_type` option controls how Unicode whitespace
|
|
774
|
+
characters are compared:
|
|
775
|
+
|
|
776
|
+
* **`whitespace_type: :strict`** (default): Different whitespace types (space,
|
|
777
|
+
NBSP, ideographic space, etc.) are detected as differences — useful for catching
|
|
778
|
+
accidental insertion of wrong whitespace.
|
|
779
|
+
* **`whitespace_type: :normalize`**: All Unicode whitespace types are treated as
|
|
780
|
+
equivalent.
|
|
781
|
+
|
|
773
782
|
.Example: Whitespace handling
|
|
774
783
|
[example]
|
|
775
784
|
====
|
|
@@ -148,6 +148,37 @@ Location: /html/body/div/table/tbody/tr/td/pre/text
|
|
|
148
148
|
|
|
149
149
|
The warning appears for text inside whitespace-preserving elements where Canon automatically switches to strict mode.
|
|
150
150
|
|
|
151
|
+
==== Parent-context fallback for ambiguous text diffs
|
|
152
|
+
|
|
153
|
+
For a `text_content` difference, Canon normally renders the two sides as JSON-quoted strings.
|
|
154
|
+
When both sides would collapse to the same (or visually indistinguishable) short string -- both empty (`""`), both whitespace-only, or both equal on the text-node extraction even though the surrounding DOM differs -- that rendering conveys nothing.
|
|
155
|
+
|
|
156
|
+
In this case Canon instead serializes each side's *parent element* compactly and visualizes whitespace (`·` for space, `→` for tab, `¬` for newline, `<NBSP>` for non-breaking space) so the structural contrast is visible.
|
|
157
|
+
|
|
158
|
+
.Example: Ambiguous empty-vs-whitespace text diff
|
|
159
|
+
[example]
|
|
160
|
+
====
|
|
161
|
+
[source]
|
|
162
|
+
----
|
|
163
|
+
🔍 DIFFERENCE #1/1 [NORMATIVE]
|
|
164
|
+
──────────────────────────────────────────────────────────────────────
|
|
165
|
+
Dimension: text_content
|
|
166
|
+
Location: /#document[0]/fmt-title[0]/span[0]/span/text()[0]
|
|
167
|
+
Reason: Text: "¬······:¬······"
|
|
168
|
+
vs.: ":"
|
|
169
|
+
|
|
170
|
+
⊖ Expected (File 1):
|
|
171
|
+
<span·class="fmt-caption-delim">¬······:¬······<tab/>¬···</span>
|
|
172
|
+
⊕ Actual (File 2):
|
|
173
|
+
<span·class="fmt-caption-delim">:<tab/></span>
|
|
174
|
+
|
|
175
|
+
✨ Changes:
|
|
176
|
+
Content differs: <span·class="fmt-caption-delim">¬······:¬······<tab/>¬···</span> → <span·class="fmt-caption-delim">:<tab/></span>
|
|
177
|
+
----
|
|
178
|
+
====
|
|
179
|
+
|
|
180
|
+
This fallback is implemented in `Canon::DiffFormatter::DiffDetailFormatterHelpers::DimensionFormatter.format_text_content_details` and only triggers when `TextUtils.ambiguous_text_pair?` returns `true` _and_ at least one side has a parent element to render.
|
|
181
|
+
|
|
151
182
|
=== Structural Whitespace
|
|
152
183
|
|
|
153
184
|
Shows whitespace-only differences (usually informative).
|
|
@@ -29,8 +29,10 @@ variant can extend a base profile with only the differences.
|
|
|
29
29
|
|
|
30
30
|
| `:metanorma`
|
|
31
31
|
| Standard Metanorma spec configuration. Sets preprocessing to `:format`,
|
|
32
|
-
match profile to `:spec_friendly`,
|
|
33
|
-
|
|
32
|
+
match profile to `:spec_friendly`, `whitespace_type` to `:normalize`
|
|
33
|
+
(so that Unicode whitespace variants like space vs NBSP are treated as
|
|
34
|
+
equivalent for backward compatibility), diff algorithm to `:dom`,
|
|
35
|
+
canonical display format, normalized pretty-print display preprocessing,
|
|
34
36
|
and XML-specific whitespace element lists.
|
|
35
37
|
|
|
36
38
|
| `:metanorma_debug`
|
|
@@ -44,6 +44,8 @@ Canon automatically detects HTML version:
|
|
|
44
44
|
|
|
45
45
|
Detection is based on DOCTYPE or parsing mode.
|
|
46
46
|
|
|
47
|
+
NOTE: Whitespace sensitivity does not differ between HTML4 and HTML5 — both apply HTML's content-model whitespace rules. `be_html4_equivalent_to` and `be_html5_equivalent_to` therefore agree on whether two inputs are whitespace-equivalent. Differences between the matchers are limited to genuine HTML4/HTML5 distinctions such as case sensitivity. Internally Canon parses both via `Nokogiri::HTML5` to share the content-model logic.
|
|
48
|
+
|
|
47
49
|
=== Whitespace Preservation
|
|
48
50
|
|
|
49
51
|
Certain HTML elements require strict whitespace preservation regardless of the
|
|
@@ -92,6 +92,46 @@ Canon.equivalent?(
|
|
|
92
92
|
`:ignore`:: Structural whitespace is completely ignored
|
|
93
93
|
|
|
94
94
|
|
|
95
|
+
=== whitespace_type
|
|
96
|
+
|
|
97
|
+
**Applies to**: XML, HTML
|
|
98
|
+
|
|
99
|
+
**Purpose**: Controls whether different Unicode whitespace characters (space, NBSP, ideographic space, etc.) are treated as equivalent or distinct.
|
|
100
|
+
|
|
101
|
+
**Behaviors**:
|
|
102
|
+
|
|
103
|
+
`:strict`:: (default) Different Unicode whitespace types are significant.
|
|
104
|
+
Space (U+0020) and NBSP (U+00A0) are treated as different characters.
|
|
105
|
+
This is useful for catching accidental insertion of wrong whitespace types
|
|
106
|
+
(e.g., a pasted NBSP where a regular space was intended).
|
|
107
|
+
|
|
108
|
+
`:normalize`:: All Unicode whitespace characters are collapsed to a single space
|
|
109
|
+
before comparison. Space, NBSP, ideographic space (U+3000), and other Unicode
|
|
110
|
+
whitespace characters are treated as equivalent.
|
|
111
|
+
|
|
112
|
+
.Using whitespace_type: :strict (default)
|
|
113
|
+
[example]
|
|
114
|
+
====
|
|
115
|
+
[source,ruby]
|
|
116
|
+
----
|
|
117
|
+
# By default, space and NBSP are different
|
|
118
|
+
xml1 = '<root><span>ISO</span> <span>712</span></root>'
|
|
119
|
+
xml2 = '<root><span>ISO</span> <span>712</span></root>'
|
|
120
|
+
|
|
121
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
122
|
+
match_profile: :spec_friendly
|
|
123
|
+
)
|
|
124
|
+
# => false (NBSP detected as different from space)
|
|
125
|
+
|
|
126
|
+
# Opt into treating all whitespace types as equivalent
|
|
127
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
128
|
+
match_profile: :spec_friendly,
|
|
129
|
+
match: { whitespace_type: :normalize }
|
|
130
|
+
)
|
|
131
|
+
# => true
|
|
132
|
+
----
|
|
133
|
+
====
|
|
134
|
+
|
|
95
135
|
=== Whitespace sensitivity at element level
|
|
96
136
|
|
|
97
137
|
==== General
|
|
@@ -210,13 +210,24 @@ Canon::Comparison.equivalent?(doc1, doc2,
|
|
|
210
210
|
structural_whitespace: :ignore, # ignore, normalize, strict
|
|
211
211
|
attribute_order: :ignore, # ignore, strict (XML/HTML)
|
|
212
212
|
attribute_values: :normalize, # normalize, strict, ignore
|
|
213
|
-
comments: :ignore
|
|
213
|
+
comments: :ignore, # ignore, normalize, strict
|
|
214
|
+
whitespace_type: :strict # strict (default), normalize
|
|
214
215
|
}
|
|
215
216
|
)
|
|
216
217
|
----
|
|
217
218
|
|
|
218
219
|
**Remember**: Match options behave differently with each algorithm! See link:../features/match-options/algorithm-specific-behavior.adoc[Algorithm-Specific Behavior].
|
|
219
220
|
|
|
221
|
+
==== Whitespace Type Sensitivity
|
|
222
|
+
|
|
223
|
+
By default, Canon distinguishes between different Unicode whitespace types
|
|
224
|
+
(e.g. regular space U+0020 vs non-breaking space U+00A0 vs ideographic space
|
|
225
|
+
U+3000). This catches accidental insertion of wrong whitespace characters.
|
|
226
|
+
|
|
227
|
+
Use `whitespace_type: :normalize` when all Unicode whitespace variants should
|
|
228
|
+
be treated as equivalent (e.g. when output from different tools may use
|
|
229
|
+
different whitespace types for the same visual result).
|
|
230
|
+
|
|
220
231
|
=== Layer 4: Diff Formatting
|
|
221
232
|
|
|
222
233
|
**Question**: How should differences be displayed?
|
|
@@ -145,6 +145,9 @@ Individual dimension control (overrides profile settings):
|
|
|
145
145
|
|
|
146
146
|
|`--comments BEHAVIOR`
|
|
147
147
|
|Comments: `strict`, `normalize`, `ignore`
|
|
148
|
+
|
|
149
|
+
|`--whitespace-type BEHAVIOR`
|
|
150
|
+
|Whitespace type sensitivity: `strict` (default), `normalize`
|
|
148
151
|
|===
|
|
149
152
|
|
|
150
153
|
See link:../features/match-options/[Match Options] for details.
|
|
@@ -223,9 +223,15 @@ Profile values: `strict`, `rendered`, `spec_friendly`, `content_only`
|
|
|
223
223
|
|`match: { element_hierarchy: :strict }`
|
|
224
224
|
|`config.canon.xml.match.options = { element_hierarchy: :strict }`
|
|
225
225
|
|`CANON_ELEMENT_HIERARCHY=strict`
|
|
226
|
+
|
|
227
|
+
|Whitespace Type
|
|
228
|
+
|`--whitespace-type normalize`
|
|
229
|
+
|`match: { whitespace_type: :normalize }`
|
|
230
|
+
|`config.canon.xml.match.options = { whitespace_type: :normalize }`
|
|
231
|
+
|`CANON_WHITESPACE_TYPE=normalize`
|
|
226
232
|
|===
|
|
227
233
|
|
|
228
|
-
Values: `strict`, `normalize`, `ignore` (or `strict`, `ignore` for structure/position/hierarchy)
|
|
234
|
+
Values: `strict`, `normalize`, `ignore` (or `strict`, `ignore` for structure/position/hierarchy). `whitespace_type` values: `strict` (default), `normalize`
|
|
229
235
|
|
|
230
236
|
==== XML/HTML-Specific Match Dimensions
|
|
231
237
|
|
|
@@ -19,7 +19,7 @@ Canon supports HTML 4, HTML5, and XHTML with automatic format detection.
|
|
|
19
19
|
**Key features:**
|
|
20
20
|
|
|
21
21
|
* Automatic HTML vs XHTML detection
|
|
22
|
-
* HTML5 parser for
|
|
22
|
+
* HTML5 parser for HTML input regardless of declared version (HTML4 and HTML5 share the same content model and parsing whitespace rules — see <<html4-html5-parity>>)
|
|
23
23
|
* XML parser for XHTML
|
|
24
24
|
* Consistent attribute ordering
|
|
25
25
|
* Whitespace normalization
|
|
@@ -203,9 +203,16 @@ Automatically detects HTML5, HTML4, or XHTML based on DOCTYPE and structure.
|
|
|
203
203
|
----
|
|
204
204
|
====
|
|
205
205
|
|
|
206
|
+
[[html4-html5-parity]]
|
|
207
|
+
=== HTML4 / HTML5 parity
|
|
208
|
+
|
|
209
|
+
`be_html4_equivalent_to` and `be_html5_equivalent_to` apply the same whitespace-sensitivity rules. Whitespace sensitivity is a property of HTML's content model and is identical across the two HTML versions, so any input that compares equivalent under one matcher must compare equivalent under the other.
|
|
210
|
+
|
|
211
|
+
Internally, both matchers parse input via `Nokogiri::HTML5.fragment`. (Earlier releases routed `:html` and `:html4` through `Nokogiri::XML.fragment`, which silently applied XML whitespace rules — meaning `be_html4_equivalent_to` could reject inputs that `be_html5_equivalent_to` correctly accepted.) See https://github.com/lutaml/canon/issues/118 for the full background.
|
|
212
|
+
|
|
206
213
|
=== Whitespace handling
|
|
207
214
|
|
|
208
|
-
HTML whitespace is collapsed per CSS rendering rules. Empty text nodes between elements are removed.
|
|
215
|
+
HTML whitespace is collapsed per CSS rendering rules. Empty text nodes between elements are removed. Whitespace-only text between two adjacent inline elements (`<span>A</span> <span>B</span>`) is preserved because it renders as a visible space; whitespace at a block boundary (between an inline element and a block element, or between two block siblings) is collapsed.
|
|
209
216
|
|
|
210
217
|
.Whitespace handling example
|
|
211
218
|
[example]
|
data/lib/canon/cli.rb
CHANGED
|
@@ -218,6 +218,10 @@ module Canon
|
|
|
218
218
|
type: :string,
|
|
219
219
|
enum: %w[strict normalize ignore],
|
|
220
220
|
desc: "Comment matching: strict, normalize, or ignore"
|
|
221
|
+
method_option :whitespace_type,
|
|
222
|
+
type: :string,
|
|
223
|
+
enum: %w[strict normalize],
|
|
224
|
+
desc: "Whitespace type sensitivity: strict (default) or normalize"
|
|
221
225
|
method_option :show_diffs,
|
|
222
226
|
type: :string,
|
|
223
227
|
enum: %w[all normative informative],
|
|
@@ -84,6 +84,30 @@ html_version: nil, match_options: nil, algorithm: :dom, original_strings: nil)
|
|
|
84
84
|
@match_options&.[](:tree_diff_operations) || []
|
|
85
85
|
end
|
|
86
86
|
|
|
87
|
+
# Generate a human-readable summary of the first difference.
|
|
88
|
+
#
|
|
89
|
+
# When documents are equivalent, returns "Equivalent".
|
|
90
|
+
# When they differ, returns a single-line string with the first normative
|
|
91
|
+
# (or first informative) difference location and reason.
|
|
92
|
+
#
|
|
93
|
+
# @return [String] Summary string
|
|
94
|
+
def summary
|
|
95
|
+
return "Equivalent" if equivalent?
|
|
96
|
+
|
|
97
|
+
diff = normative_differences.first || informative_differences.first ||
|
|
98
|
+
@differences.first # rubocop:disable Layout/MultilineOperationIndentation
|
|
99
|
+
|
|
100
|
+
return "Not equivalent" unless diff
|
|
101
|
+
|
|
102
|
+
if diff.is_a?(Canon::Diff::DiffNode)
|
|
103
|
+
summarize_diff_node(diff)
|
|
104
|
+
elsif diff.is_a?(Hash)
|
|
105
|
+
summarize_legacy_hash(diff)
|
|
106
|
+
else
|
|
107
|
+
"Not equivalent"
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
87
111
|
# Generate formatted diff output
|
|
88
112
|
#
|
|
89
113
|
# @param use_color [Boolean] Whether to use ANSI color codes
|
|
@@ -116,6 +140,61 @@ show_diffs: :all, diff_mode: :separate, legacy_terminal: false)
|
|
|
116
140
|
html_version: @html_version,
|
|
117
141
|
)
|
|
118
142
|
end
|
|
143
|
+
|
|
144
|
+
private
|
|
145
|
+
|
|
146
|
+
# Format a single DiffNode into a summary string.
|
|
147
|
+
#
|
|
148
|
+
# @param diff [DiffNode] The difference to summarize
|
|
149
|
+
# @return [String] Human-readable summary
|
|
150
|
+
def summarize_diff_node(diff)
|
|
151
|
+
parts = ["Not equivalent:"]
|
|
152
|
+
|
|
153
|
+
# rubocop:disable Layout/SpaceBeforeInterpolation,Style/ConditionalAssignment
|
|
154
|
+
if diff.path
|
|
155
|
+
parts << "#{diff.reason} at #{diff.path}"
|
|
156
|
+
else
|
|
157
|
+
parts << diff.reason.to_s
|
|
158
|
+
end
|
|
159
|
+
# rubocop:enable Layout/SpaceBeforeInterpolation,Style/ConditionalAssignment
|
|
160
|
+
|
|
161
|
+
if diff.serialized_before && diff.serialized_after
|
|
162
|
+
before_preview = truncate_preview(diff.serialized_before)
|
|
163
|
+
after_preview = truncate_preview(diff.serialized_after)
|
|
164
|
+
parts << "(#{before_preview} vs #{after_preview})"
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
parts.join(" ")
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Format a legacy Hash difference into a summary string.
|
|
171
|
+
#
|
|
172
|
+
# @param diff [Hash] Legacy difference hash with :path, :value1, :value2
|
|
173
|
+
# @return [String] Human-readable summary
|
|
174
|
+
def summarize_legacy_hash(diff)
|
|
175
|
+
parts = ["Not equivalent:"]
|
|
176
|
+
parts << "#{diff[:diff_code_description]} at #{diff[:path]}" if diff[:path]
|
|
177
|
+
|
|
178
|
+
if diff[:value1] && diff[:value2]
|
|
179
|
+
parts << "(#{truncate_preview(diff[:value1].to_s)} vs #{truncate_preview(diff[:value2].to_s)})"
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
parts.size > 1 ? parts.join(" ") : "Not equivalent: values differ"
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Truncate a string for preview display.
|
|
186
|
+
#
|
|
187
|
+
# @param text [String] Text to truncate
|
|
188
|
+
# @param max_len [Integer] Maximum length
|
|
189
|
+
# @return [String] Truncated text with ellipsis if needed
|
|
190
|
+
def truncate_preview(text, max_len = 40)
|
|
191
|
+
stripped = text.strip.gsub(/\s+/, " ")
|
|
192
|
+
if stripped.length > max_len
|
|
193
|
+
"#{stripped[0...(max_len - 3)]}..."
|
|
194
|
+
else
|
|
195
|
+
stripped
|
|
196
|
+
end
|
|
197
|
+
end
|
|
119
198
|
end
|
|
120
199
|
end
|
|
121
200
|
end
|