canon 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +83 -22
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +196 -24
- data/docs/features/match-options/index.adoc +239 -1
- data/lib/canon/comparison/format_detector.rb +2 -1
- data/lib/canon/comparison/html_comparator.rb +19 -8
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/markup_comparator.rb +109 -2
- data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
- data/lib/canon/comparison/xml_comparator.rb +240 -23
- data/lib/canon/comparison/xml_node_comparison.rb +25 -3
- data/lib/canon/diff/diff_classifier.rb +119 -5
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
- data/lib/canon/rspec_matchers.rb +37 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +4 -78
- data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
- data/false_positive_analysis.txt +0 -0
- data/file1.html +0 -1
- data/file2.html +0 -1
- data/old-docs/ADVANCED_TOPICS.adoc +0 -20
- data/old-docs/BASIC_USAGE.adoc +0 -16
- data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
- data/old-docs/CLI.adoc +0 -497
- data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/old-docs/DIFF_FORMATTING.adoc +0 -540
- data/old-docs/DIFF_PARAMETERS.adoc +0 -261
- data/old-docs/DOM_DIFF.adoc +0 -1017
- data/old-docs/ENV_CONFIG.adoc +0 -876
- data/old-docs/FORMATS.adoc +0 -867
- data/old-docs/INPUT_VALIDATION.adoc +0 -477
- data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
- data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/old-docs/MATCH_OPTIONS.adoc +0 -912
- data/old-docs/MODES.adoc +0 -432
- data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/old-docs/OPTIONS.adoc +0 -1387
- data/old-docs/PREPROCESSING.adoc +0 -491
- data/old-docs/README.old.adoc +0 -2831
- data/old-docs/RSPEC.adoc +0 -814
- data/old-docs/RUBY_API.adoc +0 -485
- data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
- data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
- data/old-docs/STRING_COMPARE.adoc +0 -345
- data/old-docs/TMP.adoc +0 -3384
- data/old-docs/TREE_DIFF.adoc +0 -1080
- data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
- data/old-docs/VERBOSE.adoc +0 -482
- data/old-docs/VISUALIZATION_MAP.adoc +0 -625
- data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
- data/scripts/analyze_current_state.rb +0 -85
- data/scripts/analyze_false_positives.rb +0 -114
- data/scripts/analyze_remaining_failures.rb +0 -105
- data/scripts/compare_current_failures.rb +0 -95
- data/scripts/compare_dom_tree_diff.rb +0 -158
- data/scripts/compare_failures.rb +0 -151
- data/scripts/debug_attribute_extraction.rb +0 -66
- data/scripts/debug_blocks_839.rb +0 -115
- data/scripts/debug_meta_matching.rb +0 -52
- data/scripts/debug_p_matching.rb +0 -192
- data/scripts/debug_signature_matching.rb +0 -118
- data/scripts/debug_sourcecode_124.rb +0 -32
- data/scripts/debug_whitespace_sensitive.rb +0 -192
- data/scripts/extract_false_positives.rb +0 -138
- data/scripts/find_actual_false_positives.rb +0 -125
- data/scripts/investigate_all_false_positives.rb +0 -161
- data/scripts/investigate_batch1.rb +0 -127
- data/scripts/investigate_classification.rb +0 -150
- data/scripts/investigate_classification_detailed.rb +0 -190
- data/scripts/investigate_common_failures.rb +0 -342
- data/scripts/investigate_false_negative.rb +0 -80
- data/scripts/investigate_false_positive.rb +0 -83
- data/scripts/investigate_false_positives.rb +0 -227
- data/scripts/investigate_false_positives_batch.rb +0 -163
- data/scripts/investigate_mixed_content.rb +0 -125
- data/scripts/investigate_remaining_16.rb +0 -214
- data/scripts/run_single_test.rb +0 -29
- data/scripts/test_all_false_positives.rb +0 -95
- data/scripts/test_attribute_details.rb +0 -61
- data/scripts/test_both_algorithms.rb +0 -49
- data/scripts/test_both_simple.rb +0 -49
- data/scripts/test_enhanced_semantic_output.rb +0 -125
- data/scripts/test_readme_examples.rb +0 -131
- data/scripts/test_semantic_tree_diff.rb +0 -99
- data/scripts/test_semantic_ux_improvements.rb +0 -135
- data/scripts/test_single_false_positive.rb +0 -119
- data/scripts/test_size_limits.rb +0 -99
- data/test_html_1.html +0 -21
- data/test_html_2.html +0 -21
- data/test_nokogiri.rb +0 -33
- data/test_normalize.rb +0 -45
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4f9d0e9c0c1bc9f213d837f480d3d9a26ce11505691ff48b63907e7a4abd530e
|
|
4
|
+
data.tar.gz: aa591a7682cede5f23a8dcb8b8eb8f7616d849bc5f9cad1aa2038463ee9c52b0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6c0af5461fff1d1cd1347ba57681bc671cda71d55d62efd328ac9424ef10b8329ec877ccf43f9ff78e83a54ca03df1026e160b259396caac7bd2704227ef01b1
|
|
7
|
+
data.tar.gz: 8803713442225ae16c0c6c9c03c9cff55dd27dc6b96f5254ee5f814a29b7ad7b5ef6eafd0cd6a58d17f070a2609154476215147d595a01a69586ca7de8608a7f
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-01-
|
|
3
|
+
# on 2026-01-21 01:26:28 UTC using RuboCop version 1.81.7.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -12,27 +12,51 @@ Gemspec/RequiredRubyVersion:
|
|
|
12
12
|
Exclude:
|
|
13
13
|
- 'canon.gemspec'
|
|
14
14
|
|
|
15
|
-
# Offense count:
|
|
15
|
+
# Offense count: 16
|
|
16
16
|
# This cop supports safe autocorrection (--autocorrect).
|
|
17
17
|
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
18
18
|
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
19
19
|
Layout/ArgumentAlignment:
|
|
20
20
|
Exclude:
|
|
21
|
-
- 'lib/canon/comparison.rb'
|
|
21
|
+
- 'lib/canon/comparison/xml_comparator.rb'
|
|
22
|
+
- 'lib/canon/diff/xml_serialization_formatter.rb'
|
|
23
|
+
- 'spec/canon/diff/xml_serialization_formatter_spec.rb'
|
|
24
|
+
|
|
25
|
+
# Offense count: 1
|
|
26
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
27
|
+
# Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
|
|
28
|
+
# SupportedHashRocketStyles: key, separator, table
|
|
29
|
+
# SupportedColonStyles: key, separator, table
|
|
30
|
+
# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
|
|
31
|
+
Layout/HashAlignment:
|
|
32
|
+
Exclude:
|
|
33
|
+
- 'test_verify_equivalent.rb'
|
|
22
34
|
|
|
23
|
-
# Offense count:
|
|
35
|
+
# Offense count: 709
|
|
24
36
|
# This cop supports safe autocorrection (--autocorrect).
|
|
25
37
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
|
|
26
38
|
# URISchemes: http, https
|
|
27
39
|
Layout/LineLength:
|
|
28
40
|
Enabled: false
|
|
29
41
|
|
|
30
|
-
# Offense count:
|
|
42
|
+
# Offense count: 4
|
|
43
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
44
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
45
|
+
# SupportedStyles: aligned, indented
|
|
46
|
+
Layout/MultilineOperationIndentation:
|
|
47
|
+
Exclude:
|
|
48
|
+
- 'lib/canon/diff/diff_classifier.rb'
|
|
49
|
+
- 'lib/canon/diff/xml_serialization_formatter.rb'
|
|
50
|
+
|
|
51
|
+
# Offense count: 17
|
|
31
52
|
# This cop supports safe autocorrection (--autocorrect).
|
|
32
53
|
# Configuration parameters: AllowInHeredoc.
|
|
33
54
|
Layout/TrailingWhitespace:
|
|
34
55
|
Exclude:
|
|
35
|
-
- 'lib/canon/comparison.rb'
|
|
56
|
+
- 'lib/canon/comparison/xml_comparator.rb'
|
|
57
|
+
- 'lib/canon/diff/xml_serialization_formatter.rb'
|
|
58
|
+
- 'spec/canon/diff/xml_serialization_formatter_spec.rb'
|
|
59
|
+
- 'test_verify_equivalent.rb'
|
|
36
60
|
|
|
37
61
|
# Offense count: 48
|
|
38
62
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
@@ -74,38 +98,38 @@ Lint/UnusedMethodArgument:
|
|
|
74
98
|
- 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
|
|
75
99
|
- 'lib/canon/diff_formatter/by_object/base_formatter.rb'
|
|
76
100
|
|
|
77
|
-
# Offense count:
|
|
101
|
+
# Offense count: 207
|
|
78
102
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
79
103
|
Metrics/AbcSize:
|
|
80
104
|
Enabled: false
|
|
81
105
|
|
|
82
|
-
# Offense count:
|
|
106
|
+
# Offense count: 20
|
|
83
107
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
|
|
84
108
|
# AllowedMethods: refine
|
|
85
109
|
Metrics/BlockLength:
|
|
86
110
|
Max: 84
|
|
87
111
|
|
|
88
|
-
# Offense count:
|
|
112
|
+
# Offense count: 176
|
|
89
113
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
90
114
|
Metrics/CyclomaticComplexity:
|
|
91
115
|
Enabled: false
|
|
92
116
|
|
|
93
|
-
# Offense count:
|
|
117
|
+
# Offense count: 360
|
|
94
118
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
95
119
|
Metrics/MethodLength:
|
|
96
120
|
Max: 110
|
|
97
121
|
|
|
98
|
-
# Offense count:
|
|
122
|
+
# Offense count: 45
|
|
99
123
|
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
|
100
124
|
Metrics/ParameterLists:
|
|
101
125
|
Max: 9
|
|
102
126
|
|
|
103
|
-
# Offense count:
|
|
127
|
+
# Offense count: 142
|
|
104
128
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
105
129
|
Metrics/PerceivedComplexity:
|
|
106
130
|
Enabled: false
|
|
107
131
|
|
|
108
|
-
# Offense count:
|
|
132
|
+
# Offense count: 28
|
|
109
133
|
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
110
134
|
# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
|
|
111
135
|
Naming/MethodParameterName:
|
|
@@ -113,7 +137,6 @@ Naming/MethodParameterName:
|
|
|
113
137
|
- 'lib/canon/comparison/xml_comparator.rb'
|
|
114
138
|
- 'lib/canon/comparison/xml_comparator/attribute_comparator.rb'
|
|
115
139
|
- 'lib/canon/xml/namespace_handler.rb'
|
|
116
|
-
- 'scripts/investigate_all_false_positives.rb'
|
|
117
140
|
|
|
118
141
|
# Offense count: 1
|
|
119
142
|
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
|
|
@@ -125,13 +148,15 @@ Naming/PredicatePrefix:
|
|
|
125
148
|
Exclude:
|
|
126
149
|
- 'lib/canon/comparison/html_comparator.rb'
|
|
127
150
|
|
|
128
|
-
# Offense count:
|
|
151
|
+
# Offense count: 6
|
|
129
152
|
# Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
|
|
130
153
|
# SupportedStyles: snake_case, normalcase, non_integer
|
|
131
154
|
# AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
|
|
132
155
|
Naming/VariableNumber:
|
|
133
156
|
Exclude:
|
|
134
157
|
- 'lib/canon/comparison/json_comparator.rb'
|
|
158
|
+
- 'lib/canon/comparison/markup_comparator.rb'
|
|
159
|
+
- 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
|
|
135
160
|
|
|
136
161
|
# Offense count: 2
|
|
137
162
|
# Configuration parameters: MinSize.
|
|
@@ -140,7 +165,7 @@ Performance/CollectionLiteralInLoop:
|
|
|
140
165
|
- 'lib/canon/comparison/html_comparator.rb'
|
|
141
166
|
- 'lib/canon/xml/xml_base_handler.rb'
|
|
142
167
|
|
|
143
|
-
# Offense count:
|
|
168
|
+
# Offense count: 64
|
|
144
169
|
# Configuration parameters: Prefixes, AllowedPatterns.
|
|
145
170
|
# Prefixes: when, with, without
|
|
146
171
|
RSpec/ContextWording:
|
|
@@ -157,7 +182,7 @@ RSpec/DescribeMethod:
|
|
|
157
182
|
- 'spec/canon/comparison/multiple_differences_spec.rb'
|
|
158
183
|
- 'spec/canon/diff_formatter/character_map_customization_spec.rb'
|
|
159
184
|
|
|
160
|
-
# Offense count:
|
|
185
|
+
# Offense count: 675
|
|
161
186
|
# Configuration parameters: CountAsOne.
|
|
162
187
|
RSpec/ExampleLength:
|
|
163
188
|
Max: 67
|
|
@@ -171,7 +196,7 @@ RSpec/ExpectActual:
|
|
|
171
196
|
- 'spec/canon/rspec_matchers_spec.rb'
|
|
172
197
|
- 'spec/canon/string_matcher_spec.rb'
|
|
173
198
|
|
|
174
|
-
# Offense count:
|
|
199
|
+
# Offense count: 175
|
|
175
200
|
# Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
|
|
176
201
|
RSpec/IndexedLet:
|
|
177
202
|
Exclude:
|
|
@@ -208,11 +233,11 @@ RSpec/MultipleDescribes:
|
|
|
208
233
|
Exclude:
|
|
209
234
|
- 'spec/canon/comparison/match_options_spec.rb'
|
|
210
235
|
|
|
211
|
-
# Offense count:
|
|
236
|
+
# Offense count: 518
|
|
212
237
|
RSpec/MultipleExpectations:
|
|
213
238
|
Max: 15
|
|
214
239
|
|
|
215
|
-
# Offense count:
|
|
240
|
+
# Offense count: 69
|
|
216
241
|
# Configuration parameters: AllowSubject.
|
|
217
242
|
RSpec/MultipleMemoizedHelpers:
|
|
218
243
|
Max: 13
|
|
@@ -226,7 +251,7 @@ RSpec/NamedSubject:
|
|
|
226
251
|
- 'spec/canon/pretty_printer/json_spec.rb'
|
|
227
252
|
- 'spec/canon/pretty_printer/xml_spec.rb'
|
|
228
253
|
|
|
229
|
-
# Offense count:
|
|
254
|
+
# Offense count: 37
|
|
230
255
|
# Configuration parameters: AllowedGroups.
|
|
231
256
|
RSpec/NestedGroups:
|
|
232
257
|
Max: 4
|
|
@@ -254,14 +279,24 @@ RSpec/SpecFilePathFormat:
|
|
|
254
279
|
- 'spec/canon/yaml/formatter_spec.rb'
|
|
255
280
|
- 'spec/xml_c14n_spec.rb'
|
|
256
281
|
|
|
257
|
-
# Offense count:
|
|
282
|
+
# Offense count: 120
|
|
258
283
|
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
|
|
259
284
|
RSpec/VerifiedDoubles:
|
|
260
285
|
Exclude:
|
|
286
|
+
- 'spec/canon/comparison/whitespace_sensitivity_spec.rb'
|
|
261
287
|
- 'spec/canon/diff/diff_classifier_spec.rb'
|
|
262
288
|
- 'spec/canon/diff/path_builder_spec.rb'
|
|
289
|
+
- 'spec/canon/diff/xml_serialization_formatter_spec.rb'
|
|
263
290
|
- 'spec/canon/tree_diff/operation_converter_spec.rb'
|
|
264
291
|
|
|
292
|
+
# Offense count: 1
|
|
293
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
294
|
+
# Configuration parameters: EnforcedStyle, AllowComments.
|
|
295
|
+
# SupportedStyles: empty, nil, both
|
|
296
|
+
Style/EmptyElse:
|
|
297
|
+
Exclude:
|
|
298
|
+
- 'lib/canon/comparison/xml_comparator.rb'
|
|
299
|
+
|
|
265
300
|
# Offense count: 3
|
|
266
301
|
# Configuration parameters: MinBranchesCount.
|
|
267
302
|
Style/HashLikeCase:
|
|
@@ -276,9 +311,35 @@ Style/IdenticalConditionalBranches:
|
|
|
276
311
|
- 'lib/canon/diff_formatter/by_object/base_formatter.rb'
|
|
277
312
|
- 'lib/canon/diff_formatter/legend.rb'
|
|
278
313
|
|
|
314
|
+
# Offense count: 2
|
|
315
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
316
|
+
# Configuration parameters: InverseMethods, InverseBlocks.
|
|
317
|
+
Style/InverseMethods:
|
|
318
|
+
Exclude:
|
|
319
|
+
- 'lib/canon/comparison/markup_comparator.rb'
|
|
320
|
+
- 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
|
|
321
|
+
|
|
279
322
|
# Offense count: 1
|
|
280
323
|
# Configuration parameters: AllowedMethods.
|
|
281
324
|
# AllowedMethods: respond_to_missing?
|
|
282
325
|
Style/OptionalBooleanParameter:
|
|
283
326
|
Exclude:
|
|
284
327
|
- 'lib/canon/diff_formatter/debug_output.rb'
|
|
328
|
+
|
|
329
|
+
# Offense count: 3
|
|
330
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
331
|
+
# Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
|
|
332
|
+
# SupportedStyles: single_quotes, double_quotes
|
|
333
|
+
Style/StringLiterals:
|
|
334
|
+
Exclude:
|
|
335
|
+
- 'lib/canon/comparison/markup_comparator.rb'
|
|
336
|
+
- 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
|
|
337
|
+
- 'test_verify_equivalent.rb'
|
|
338
|
+
|
|
339
|
+
# Offense count: 12
|
|
340
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
341
|
+
# Configuration parameters: EnforcedStyleForMultiline.
|
|
342
|
+
# SupportedStylesForMultiline: comma, consistent_comma, diff_comma, no_comma
|
|
343
|
+
Style/TrailingCommaInArguments:
|
|
344
|
+
Exclude:
|
|
345
|
+
- 'spec/canon/diff/xml_serialization_formatter_spec.rb'
|
data/docs/Gemfile
CHANGED
data/docs/_config.yml
CHANGED
|
@@ -12,8 +12,13 @@ repository: lutaml/canon
|
|
|
12
12
|
|
|
13
13
|
# Theme
|
|
14
14
|
theme: just-the-docs
|
|
15
|
+
remote_theme: just-the-docs/just-the-docs@v0.7.0
|
|
15
16
|
color_scheme: light
|
|
16
17
|
|
|
18
|
+
# Logo (uncomment if you have a logo)
|
|
19
|
+
# logo: "/assets/images/logo.svg"
|
|
20
|
+
# favicon_ico: "/assets/images/favicon.ico"
|
|
21
|
+
|
|
17
22
|
# AsciiDoc support
|
|
18
23
|
asciidoc: {}
|
|
19
24
|
asciidoctor:
|
|
@@ -63,10 +68,36 @@ heading_anchors: true
|
|
|
63
68
|
# Footer
|
|
64
69
|
footer_content: 'Copyright © 2025 Ribose. Distributed under the <a href="https://github.com/lutaml/canon/blob/main/LICENSE.txt">BSD 2-Clause License</a>.'
|
|
65
70
|
|
|
71
|
+
# Footer last edit timestamp
|
|
72
|
+
last_edit_timestamp: true
|
|
73
|
+
last_edit_time_format: "%b %e %Y at %I:%M %p"
|
|
74
|
+
|
|
75
|
+
# Enable code copy button
|
|
76
|
+
enable_copy_code_button: true
|
|
77
|
+
|
|
78
|
+
# Callouts
|
|
79
|
+
callouts_level: quiet
|
|
80
|
+
callouts:
|
|
81
|
+
highlight:
|
|
82
|
+
color: yellow
|
|
83
|
+
important:
|
|
84
|
+
title: Important
|
|
85
|
+
color: blue
|
|
86
|
+
new:
|
|
87
|
+
title: New
|
|
88
|
+
color: green
|
|
89
|
+
note:
|
|
90
|
+
title: Note
|
|
91
|
+
color: purple
|
|
92
|
+
warning:
|
|
93
|
+
title: Warning
|
|
94
|
+
color: red
|
|
95
|
+
|
|
66
96
|
# Plugins
|
|
67
97
|
plugins:
|
|
68
98
|
- jekyll-asciidoc
|
|
69
99
|
- jekyll-seo-tag
|
|
100
|
+
- jekyll-sitemap
|
|
70
101
|
|
|
71
102
|
# Markdown settings (for any markdown files)
|
|
72
103
|
markdown: kramdown
|
|
@@ -75,6 +106,60 @@ kramdown:
|
|
|
75
106
|
hard_wrap: false
|
|
76
107
|
syntax_highlighter: rouge
|
|
77
108
|
|
|
109
|
+
# Collections for organizing content
|
|
110
|
+
collections:
|
|
111
|
+
# Core documentation pages (getting-started, interfaces, etc.)
|
|
112
|
+
pages:
|
|
113
|
+
permalink: "/:path/"
|
|
114
|
+
output: true
|
|
115
|
+
|
|
116
|
+
# Feature documentation
|
|
117
|
+
features:
|
|
118
|
+
permalink: "/:collection/:path/"
|
|
119
|
+
output: true
|
|
120
|
+
|
|
121
|
+
# Understanding/internal documentation
|
|
122
|
+
understanding:
|
|
123
|
+
permalink: "/:collection/:path/"
|
|
124
|
+
output: true
|
|
125
|
+
|
|
126
|
+
# Advanced topics
|
|
127
|
+
advanced:
|
|
128
|
+
permalink: "/:collection/:path/"
|
|
129
|
+
output: true
|
|
130
|
+
|
|
131
|
+
# Guides (task-oriented tutorials)
|
|
132
|
+
guides:
|
|
133
|
+
permalink: "/:collection/:path/"
|
|
134
|
+
output: true
|
|
135
|
+
|
|
136
|
+
# Reference documentation
|
|
137
|
+
reference:
|
|
138
|
+
permalink: "/:collection/:path/"
|
|
139
|
+
output: true
|
|
140
|
+
|
|
141
|
+
# Just the Docs collection configuration
|
|
142
|
+
just_the_docs:
|
|
143
|
+
collections:
|
|
144
|
+
pages:
|
|
145
|
+
name: Pages
|
|
146
|
+
nav_fold: false
|
|
147
|
+
features:
|
|
148
|
+
name: Features
|
|
149
|
+
nav_fold: true
|
|
150
|
+
understanding:
|
|
151
|
+
name: Understanding
|
|
152
|
+
nav_fold: true
|
|
153
|
+
advanced:
|
|
154
|
+
name: Advanced
|
|
155
|
+
nav_fold: true
|
|
156
|
+
guides:
|
|
157
|
+
name: Guides
|
|
158
|
+
nav_fold: true
|
|
159
|
+
reference:
|
|
160
|
+
name: Reference
|
|
161
|
+
nav_fold: true
|
|
162
|
+
|
|
78
163
|
# Defaults
|
|
79
164
|
defaults:
|
|
80
165
|
- scope:
|
|
@@ -83,6 +168,10 @@ defaults:
|
|
|
83
168
|
values:
|
|
84
169
|
layout: default
|
|
85
170
|
|
|
171
|
+
# Include additional files
|
|
172
|
+
include:
|
|
173
|
+
- "*.adoc"
|
|
174
|
+
|
|
86
175
|
# Exclude from processing
|
|
87
176
|
exclude:
|
|
88
177
|
- Gemfile
|
|
@@ -97,4 +186,4 @@ exclude:
|
|
|
97
186
|
- .git
|
|
98
187
|
- .gitignore
|
|
99
188
|
|
|
100
|
-
permalink: pretty
|
|
189
|
+
permalink: pretty
|
|
@@ -80,14 +80,20 @@ Classification depends on `attribute_order` setting:
|
|
|
80
80
|
│ │
|
|
81
81
|
│ DiffClassifier examines each DiffNode: │
|
|
82
82
|
│ │
|
|
83
|
-
│
|
|
84
|
-
│
|
|
83
|
+
│ 1. Serialization-level formatting (XmlSerializationFormatter) │
|
|
84
|
+
│ → XML syntax differences: <tag/> vs <tag></tag> │
|
|
85
|
+
│ → ALWAYS formatting-only (non-normative) │
|
|
85
86
|
│ │
|
|
86
|
-
│
|
|
87
|
-
│
|
|
88
|
-
│
|
|
89
|
-
│ → NORMATIVE (difference matters) │
|
|
87
|
+
│ 2. Content-level formatting (text_content: :normalize) │
|
|
88
|
+
│ → Whitespace differences in content │
|
|
89
|
+
│ → Formatting-only when normalized content matches │
|
|
90
90
|
│ │
|
|
91
|
+
│ 3. CompareProfile policy (normative vs informative) │
|
|
92
|
+
│ → behavior == :ignore → INFORMATIVE │
|
|
93
|
+
│ → behavior == :strict → NORMATIVE │
|
|
94
|
+
│ → behavior == :normalize → Check content normalization │
|
|
95
|
+
│ │
|
|
96
|
+
│ Sets diff_node.formatting = true/false │
|
|
91
97
|
│ Sets diff_node.normative = true/false │
|
|
92
98
|
└───────────────────────────────────┬───────────────────────────────┘
|
|
93
99
|
↓
|
|
@@ -102,6 +108,27 @@ Classification depends on `attribute_order` setting:
|
|
|
102
108
|
└──────────────────────────────────────────────────────────────────┘
|
|
103
109
|
----
|
|
104
110
|
|
|
111
|
+
=== Three-Level Classification System
|
|
112
|
+
|
|
113
|
+
Canon distinguishes between **three distinct kinds of differences**:
|
|
114
|
+
|
|
115
|
+
| Kind | `formatting:` | `normative:` | Meaning | Examples |
|
|
116
|
+
|------|---------------|--------------|---------|----------|
|
|
117
|
+
| **Serialization formatting** | `true` | `false` | XML syntax differences | `<tag/>` vs `<tag></tag>` |
|
|
118
|
+
| **Content formatting** | `true` | `false` | Whitespace in content | `Hello world` vs `Hello world` |
|
|
119
|
+
| **Informative** | `false` | `false` | Tracked but doesn't affect equivalence | Attribute order (when `:ignore`) |
|
|
120
|
+
| **Normative** | `false` | `true` | Affects equivalence | Different words, missing elements |
|
|
121
|
+
|
|
122
|
+
**Key distinction**:
|
|
123
|
+
|
|
124
|
+
* **Serialization-level formatting**: XML syntax differences that are ALWAYS non-normative regardless of match options, because they represent different valid serializations of the same semantic content. Detected by `XmlSerializationFormatter`.
|
|
125
|
+
|
|
126
|
+
* **Content-level formatting**: Whitespace differences in document content. These are formatting-only (non-normative) when normalized content matches (using `text_content: :normalize`).
|
|
127
|
+
|
|
128
|
+
* **Informative**: Differences tracked for reference but don't affect equivalence (when behavior is `:ignore`).
|
|
129
|
+
|
|
130
|
+
* **Normative**: Semantic content differences that affect equivalence (when behavior is `:strict` or when normalized content differs).
|
|
131
|
+
|
|
105
132
|
== CompareProfile-Based Classification
|
|
106
133
|
|
|
107
134
|
=== Overview
|
|
@@ -120,22 +147,42 @@ DiffNode → DiffClassifier → CompareProfile → normative?
|
|
|
120
147
|
|
|
121
148
|
=== Classification Hierarchy
|
|
122
149
|
|
|
123
|
-
Canon uses a
|
|
150
|
+
Canon uses a **multi-level hierarchy** for classifying differences:
|
|
151
|
+
|
|
152
|
+
[source]
|
|
153
|
+
----
|
|
154
|
+
DiffNode → DiffClassifier → XmlSerializationFormatter → serialization formatting?
|
|
155
|
+
↓
|
|
156
|
+
CompareProfile → normative dimension?
|
|
157
|
+
↓
|
|
158
|
+
FormattingDetector → formatting-only?
|
|
159
|
+
↓
|
|
160
|
+
Final classification
|
|
161
|
+
----
|
|
162
|
+
|
|
163
|
+
**Classification priority (from highest to lowest specificity)**:
|
|
164
|
+
|
|
165
|
+
1. **Serialization-level formatting** (highest priority)
|
|
166
|
+
- XML syntax differences: `<tag/>` vs `<tag></tag>`
|
|
167
|
+
- Detected by `XmlSerializationFormatter`
|
|
168
|
+
- **ALWAYS** `formatting: true, normative: false`
|
|
169
|
+
- Bypasses all other classification logic
|
|
124
170
|
|
|
125
|
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
-
|
|
171
|
+
2. **Content-level formatting**
|
|
172
|
+
- Whitespace differences in document content
|
|
173
|
+
- Detected by `FormattingDetector` when `text_content: :normalize`
|
|
174
|
+
- `formatting: true, normative: false` when normalized content matches
|
|
175
|
+
- Respects element-level whitespace sensitivity
|
|
129
176
|
|
|
130
|
-
|
|
177
|
+
3. **Informative** (based on `:ignore` behavior)
|
|
131
178
|
- Tracked but doesn't affect equivalence
|
|
132
|
-
-
|
|
133
|
-
-
|
|
179
|
+
- `formatting: false, normative: false`
|
|
180
|
+
- Example: Attribute order when `attribute_order: :ignore`
|
|
134
181
|
|
|
135
|
-
|
|
182
|
+
4. **Normative** (based on `:strict` behavior or content mismatch)
|
|
136
183
|
- Affects equivalence
|
|
137
|
-
-
|
|
138
|
-
-
|
|
184
|
+
- `formatting: false, normative: true`
|
|
185
|
+
- Example: Different words, missing elements
|
|
139
186
|
|
|
140
187
|
=== Format-Specific Policies
|
|
141
188
|
|
|
@@ -229,6 +276,97 @@ result = Canon::Comparison.equivalent?(
|
|
|
229
276
|
----
|
|
230
277
|
====
|
|
231
278
|
|
|
279
|
+
==== Text Content
|
|
280
|
+
|
|
281
|
+
* **`:strict` behavior** → Normative
|
|
282
|
+
- Text must match exactly, including all whitespace
|
|
283
|
+
- Any text difference causes non-equivalence
|
|
284
|
+
|
|
285
|
+
* **`:normalize` behavior** → Normative (after normalization) or Informative (if formatting-only)
|
|
286
|
+
- Whitespace is normalized (collapsed/trimmed) before comparison
|
|
287
|
+
- If normalized texts match but originals differ, classified as formatting-only (informative)
|
|
288
|
+
- This ensures that whitespace-only differences don't affect equivalence
|
|
289
|
+
- Element-level sensitivity is respected (e.g., `<pre>`, `<code>` preserve whitespace)
|
|
290
|
+
|
|
291
|
+
* **`:ignore` behavior** → Informative
|
|
292
|
+
- Text content differences tracked but don't affect equivalence
|
|
293
|
+
|
|
294
|
+
.Example: Text content with normalize behavior
|
|
295
|
+
====
|
|
296
|
+
[source,ruby]
|
|
297
|
+
----
|
|
298
|
+
# Formatting-only difference - normalized texts match
|
|
299
|
+
xml1 = '<p>Hello world</p>'
|
|
300
|
+
xml2 = '<p>Hello world</p>'
|
|
301
|
+
|
|
302
|
+
result = Canon::Comparison.equivalent?(
|
|
303
|
+
xml1, xml2,
|
|
304
|
+
match: { text_content: :normalize }
|
|
305
|
+
)
|
|
306
|
+
# => true (extra space is formatting-only, classified as informative)
|
|
307
|
+
|
|
308
|
+
# Shows as informative in verbose output
|
|
309
|
+
result.differences.first.normative?
|
|
310
|
+
# => false
|
|
311
|
+
result.differences.first.formatting?
|
|
312
|
+
# => true
|
|
313
|
+
----
|
|
314
|
+
|
|
315
|
+
.Using text_content: :normalize with element-level sensitivity
|
|
316
|
+
====
|
|
317
|
+
[source,ruby]
|
|
318
|
+
----
|
|
319
|
+
# HTML defaults: <code> is whitespace-sensitive
|
|
320
|
+
html1 = '<code> indented </code><p> text </p>'
|
|
321
|
+
html2 = '<code>indented</code><p>text</p>'
|
|
322
|
+
|
|
323
|
+
# With <code> blacklisted from sensitive elements
|
|
324
|
+
Canon::Comparison.equivalent?(html1, html2,
|
|
325
|
+
format: :html,
|
|
326
|
+
match: {
|
|
327
|
+
whitespace_insensitive_elements: [:code],
|
|
328
|
+
}
|
|
329
|
+
)
|
|
330
|
+
# => true
|
|
331
|
+
# - <code> whitespace: formatting-only (informative)
|
|
332
|
+
# - <p> whitespace: formatting-only (informative)
|
|
333
|
+
|
|
334
|
+
# Without blacklisting (default HTML behavior)
|
|
335
|
+
Canon::Comparison.equivalent?(html1, html2, format: :html)
|
|
336
|
+
# => false
|
|
337
|
+
# - <code> whitespace: normative (sensitive element)
|
|
338
|
+
# - <p> whitespace: formatting-only (informative)
|
|
339
|
+
----
|
|
340
|
+
====
|
|
341
|
+
|
|
342
|
+
.Self-closing vs explicit closing tags
|
|
343
|
+
====
|
|
344
|
+
Per XML standards, `<tag/>` and `<tag></tag>` are semantically equivalent (both represent empty elements). Canon classifies differences in serialisation format as **formatting-only** (non-normative):
|
|
345
|
+
|
|
346
|
+
[source,ruby]
|
|
347
|
+
----
|
|
348
|
+
# Self-closing vs explicit closing - always equivalent
|
|
349
|
+
xml1 = '<svg><rect x="10" y="10"/></svg>'
|
|
350
|
+
xml2 = '<svg><rect x="10" y="10"></rect></svg>'
|
|
351
|
+
|
|
352
|
+
Canon::Comparison.equivalent?(xml1, xml2, format: :xml)
|
|
353
|
+
# => true
|
|
354
|
+
|
|
355
|
+
# Empty/whitespace-only text nodes from serialisation are formatting-only
|
|
356
|
+
result = Canon::Comparison.equivalent?(xml1, xml2, format: :xml, verbose: true)
|
|
357
|
+
result.differences.each do |diff|
|
|
358
|
+
if diff.dimension == :text_content
|
|
359
|
+
puts "Normative: #{diff.normative?}" # => false
|
|
360
|
+
puts "Formatting: #{diff.formatting?}" # => true
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
----
|
|
364
|
+
|
|
365
|
+
This applies regardless of `text_content` behavior setting, as these differences are purely serialisation format variations (similar to attribute order).
|
|
366
|
+
|
|
367
|
+
The key insight: empty or whitespace-only text nodes created by different serialisation styles (`<tag/>` vs `<tag></tag>`) are always classified as **formatting-only**, not normative.
|
|
368
|
+
====
|
|
369
|
+
|
|
232
370
|
=== FormattingDetector Integration
|
|
233
371
|
|
|
234
372
|
For dimensions that support it (`:text_content`, `:structural_whitespace`),
|
|
@@ -256,21 +394,48 @@ With `:normalize` mode:
|
|
|
256
394
|
|
|
257
395
|
=== Implementation Details
|
|
258
396
|
|
|
259
|
-
The
|
|
397
|
+
The classification system uses three main classes:
|
|
260
398
|
|
|
261
|
-
*
|
|
262
|
-
|
|
263
|
-
|
|
399
|
+
* **`XmlSerializationFormatter`** - Detects XML serialization-level formatting differences
|
|
400
|
+
- Self-closing vs explicit closing tags: `<tag/>` vs `<tag></tag>`
|
|
401
|
+
- Always returns `formatting: true, normative: false`
|
|
402
|
+
- These differences are ALWAYS non-normative regardless of match options
|
|
264
403
|
|
|
265
|
-
|
|
404
|
+
* **`CompareProfile`** - Determines dimension behavior and policy
|
|
405
|
+
- `normative_dimension?(dimension)` - Is this dimension normative?
|
|
406
|
+
- `affects_equivalence?(dimension)` - Does this dimension affect equivalence?
|
|
407
|
+
- `supports_formatting_detection?(dimension)` - Can this dimension have formatting-only diffs?
|
|
408
|
+
|
|
409
|
+
* **`DiffClassifier`** - Orchestrates classification using the above
|
|
410
|
+
- First checks `XmlSerializationFormatter` for serialization formatting
|
|
411
|
+
- Then handles content-level formatting (text_content: :normalize)
|
|
412
|
+
- Finally applies `CompareProfile` policy for normative vs informative
|
|
266
413
|
|
|
267
414
|
[source,ruby]
|
|
268
415
|
----
|
|
269
416
|
def classify(diff_node)
|
|
270
|
-
# Check
|
|
417
|
+
# FIRST: Check for XML serialization-level formatting differences
|
|
418
|
+
# These are ALWAYS non-normative (formatting-only) regardless of match options
|
|
419
|
+
if XmlSerializationFormatter.serialization_formatting?(diff_node)
|
|
420
|
+
diff_node.formatting = true
|
|
421
|
+
diff_node.normative = false
|
|
422
|
+
return diff_node
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# SECOND: Handle content-level formatting for text_content with :normalize
|
|
426
|
+
if diff_node.dimension == :text_content &&
|
|
427
|
+
profile.send(:behavior_for, :text_content) == :normalize &&
|
|
428
|
+
!inside_whitespace_sensitive_element?(diff_node) &&
|
|
429
|
+
formatting_only_diff?(diff_node)
|
|
430
|
+
diff_node.formatting = true
|
|
431
|
+
diff_node.normative = false
|
|
432
|
+
return diff_node
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
# THIRD: Apply CompareProfile policy
|
|
271
436
|
is_normative = profile.normative_dimension?(diff_node.dimension)
|
|
272
437
|
|
|
273
|
-
#
|
|
438
|
+
# FOURTH: Check FormattingDetector for non-normative dimensions
|
|
274
439
|
if !is_normative && profile.supports_formatting_detection?(diff_node.dimension)
|
|
275
440
|
if formatting_only_diff?(diff_node)
|
|
276
441
|
diff_node.formatting = true
|
|
@@ -279,11 +444,18 @@ def classify(diff_node)
|
|
|
279
444
|
end
|
|
280
445
|
end
|
|
281
446
|
|
|
447
|
+
# FIFTH: Apply normative determination
|
|
282
448
|
diff_node.normative = is_normative
|
|
283
449
|
diff_node
|
|
284
450
|
end
|
|
285
451
|
----
|
|
286
452
|
|
|
453
|
+
The key distinction for `text_content: :normalize`:
|
|
454
|
+
|
|
455
|
+
* **Formatting-only detection**: Uses `normalized_equivalent?` method to compare normalized texts
|
|
456
|
+
* **Element sensitivity**: Respects element-level whitespace sensitivity (`<pre>`, `<code>`, etc.)
|
|
457
|
+
* **Result**: Whitespace-only differences are classified as *informative* (non-normative) when using `:normalize`
|
|
458
|
+
|
|
287
459
|
== Visual Indicators
|
|
288
460
|
|
|
289
461
|
=== Normative Diffs
|