canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +83 -22
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +196 -24
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/markup_comparator.rb +109 -2
  11. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  12. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  13. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  14. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
  15. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  16. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  17. data/lib/canon/comparison/xml_comparator.rb +240 -23
  18. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  19. data/lib/canon/diff/diff_classifier.rb +119 -5
  20. data/lib/canon/diff/formatting_detector.rb +1 -1
  21. data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
  22. data/lib/canon/rspec_matchers.rb +37 -8
  23. data/lib/canon/version.rb +1 -1
  24. data/lib/canon/xml/data_model.rb +24 -13
  25. metadata +4 -78
  26. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  27. data/false_positive_analysis.txt +0 -0
  28. data/file1.html +0 -1
  29. data/file2.html +0 -1
  30. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  31. data/old-docs/BASIC_USAGE.adoc +0 -16
  32. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  33. data/old-docs/CLI.adoc +0 -497
  34. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  35. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  36. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  37. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  38. data/old-docs/DOM_DIFF.adoc +0 -1017
  39. data/old-docs/ENV_CONFIG.adoc +0 -876
  40. data/old-docs/FORMATS.adoc +0 -867
  41. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  42. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  43. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  44. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  45. data/old-docs/MODES.adoc +0 -432
  46. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  47. data/old-docs/OPTIONS.adoc +0 -1387
  48. data/old-docs/PREPROCESSING.adoc +0 -491
  49. data/old-docs/README.old.adoc +0 -2831
  50. data/old-docs/RSPEC.adoc +0 -814
  51. data/old-docs/RUBY_API.adoc +0 -485
  52. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  53. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  54. data/old-docs/STRING_COMPARE.adoc +0 -345
  55. data/old-docs/TMP.adoc +0 -3384
  56. data/old-docs/TREE_DIFF.adoc +0 -1080
  57. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  58. data/old-docs/VERBOSE.adoc +0 -482
  59. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  60. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  61. data/scripts/analyze_current_state.rb +0 -85
  62. data/scripts/analyze_false_positives.rb +0 -114
  63. data/scripts/analyze_remaining_failures.rb +0 -105
  64. data/scripts/compare_current_failures.rb +0 -95
  65. data/scripts/compare_dom_tree_diff.rb +0 -158
  66. data/scripts/compare_failures.rb +0 -151
  67. data/scripts/debug_attribute_extraction.rb +0 -66
  68. data/scripts/debug_blocks_839.rb +0 -115
  69. data/scripts/debug_meta_matching.rb +0 -52
  70. data/scripts/debug_p_matching.rb +0 -192
  71. data/scripts/debug_signature_matching.rb +0 -118
  72. data/scripts/debug_sourcecode_124.rb +0 -32
  73. data/scripts/debug_whitespace_sensitive.rb +0 -192
  74. data/scripts/extract_false_positives.rb +0 -138
  75. data/scripts/find_actual_false_positives.rb +0 -125
  76. data/scripts/investigate_all_false_positives.rb +0 -161
  77. data/scripts/investigate_batch1.rb +0 -127
  78. data/scripts/investigate_classification.rb +0 -150
  79. data/scripts/investigate_classification_detailed.rb +0 -190
  80. data/scripts/investigate_common_failures.rb +0 -342
  81. data/scripts/investigate_false_negative.rb +0 -80
  82. data/scripts/investigate_false_positive.rb +0 -83
  83. data/scripts/investigate_false_positives.rb +0 -227
  84. data/scripts/investigate_false_positives_batch.rb +0 -163
  85. data/scripts/investigate_mixed_content.rb +0 -125
  86. data/scripts/investigate_remaining_16.rb +0 -214
  87. data/scripts/run_single_test.rb +0 -29
  88. data/scripts/test_all_false_positives.rb +0 -95
  89. data/scripts/test_attribute_details.rb +0 -61
  90. data/scripts/test_both_algorithms.rb +0 -49
  91. data/scripts/test_both_simple.rb +0 -49
  92. data/scripts/test_enhanced_semantic_output.rb +0 -125
  93. data/scripts/test_readme_examples.rb +0 -131
  94. data/scripts/test_semantic_tree_diff.rb +0 -99
  95. data/scripts/test_semantic_ux_improvements.rb +0 -135
  96. data/scripts/test_single_false_positive.rb +0 -119
  97. data/scripts/test_size_limits.rb +0 -99
  98. data/test_html_1.html +0 -21
  99. data/test_html_2.html +0 -21
  100. data/test_nokogiri.rb +0 -33
  101. data/test_normalize.rb +0 -45
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1d94be550a90d23eb695f46579b13fa327434993b89a995b3c95ba658a143fb9
4
- data.tar.gz: 2ac083712aa9d0153aa2e0898186a7cbf669e775368378c9de8de6c42c52a257
3
+ metadata.gz: 4f9d0e9c0c1bc9f213d837f480d3d9a26ce11505691ff48b63907e7a4abd530e
4
+ data.tar.gz: aa591a7682cede5f23a8dcb8b8eb8f7616d849bc5f9cad1aa2038463ee9c52b0
5
5
  SHA512:
6
- metadata.gz: 45b4502c83bfd367c5933e66f610de529b87f27d5cfef18c0fe808bcbc91e20c1c3c0e32ba36ed6163a70de37c5ae3a1023c67fed586491e7eea5f7c621e2769
7
- data.tar.gz: 3713856d8dfdfb4164cfd9e8bdcd4dcbdce7b303619d703b7defe58d09c5468eb3acc5f32435f3184d35f20b3a6f7470c2960d8e5cef21ba720ebd7dc44ccfbf
6
+ metadata.gz: 6c0af5461fff1d1cd1347ba57681bc671cda71d55d62efd328ac9424ef10b8329ec877ccf43f9ff78e83a54ca03df1026e160b259396caac7bd2704227ef01b1
7
+ data.tar.gz: 8803713442225ae16c0c6c9c03c9cff55dd27dc6b96f5254ee5f814a29b7ad7b5ef6eafd0cd6a58d17f070a2609154476215147d595a01a69586ca7de8608a7f
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-01-17 14:46:16 UTC using RuboCop version 1.81.7.
3
+ # on 2026-01-21 01:26:28 UTC using RuboCop version 1.81.7.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -12,27 +12,51 @@ Gemspec/RequiredRubyVersion:
12
12
  Exclude:
13
13
  - 'canon.gemspec'
14
14
 
15
- # Offense count: 1
15
+ # Offense count: 16
16
16
  # This cop supports safe autocorrection (--autocorrect).
17
17
  # Configuration parameters: EnforcedStyle, IndentationWidth.
18
18
  # SupportedStyles: with_first_argument, with_fixed_indentation
19
19
  Layout/ArgumentAlignment:
20
20
  Exclude:
21
- - 'lib/canon/comparison.rb'
21
+ - 'lib/canon/comparison/xml_comparator.rb'
22
+ - 'lib/canon/diff/xml_serialization_formatter.rb'
23
+ - 'spec/canon/diff/xml_serialization_formatter_spec.rb'
24
+
25
+ # Offense count: 1
26
+ # This cop supports safe autocorrection (--autocorrect).
27
+ # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
28
+ # SupportedHashRocketStyles: key, separator, table
29
+ # SupportedColonStyles: key, separator, table
30
+ # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
31
+ Layout/HashAlignment:
32
+ Exclude:
33
+ - 'test_verify_equivalent.rb'
22
34
 
23
- # Offense count: 697
35
+ # Offense count: 709
24
36
  # This cop supports safe autocorrection (--autocorrect).
25
37
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
26
38
  # URISchemes: http, https
27
39
  Layout/LineLength:
28
40
  Enabled: false
29
41
 
30
- # Offense count: 1
42
+ # Offense count: 4
43
+ # This cop supports safe autocorrection (--autocorrect).
44
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
45
+ # SupportedStyles: aligned, indented
46
+ Layout/MultilineOperationIndentation:
47
+ Exclude:
48
+ - 'lib/canon/diff/diff_classifier.rb'
49
+ - 'lib/canon/diff/xml_serialization_formatter.rb'
50
+
51
+ # Offense count: 17
31
52
  # This cop supports safe autocorrection (--autocorrect).
32
53
  # Configuration parameters: AllowInHeredoc.
33
54
  Layout/TrailingWhitespace:
34
55
  Exclude:
35
- - 'lib/canon/comparison.rb'
56
+ - 'lib/canon/comparison/xml_comparator.rb'
57
+ - 'lib/canon/diff/xml_serialization_formatter.rb'
58
+ - 'spec/canon/diff/xml_serialization_formatter_spec.rb'
59
+ - 'test_verify_equivalent.rb'
36
60
 
37
61
  # Offense count: 48
38
62
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
@@ -74,38 +98,38 @@ Lint/UnusedMethodArgument:
74
98
  - 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
75
99
  - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
76
100
 
77
- # Offense count: 225
101
+ # Offense count: 207
78
102
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
79
103
  Metrics/AbcSize:
80
104
  Enabled: false
81
105
 
82
- # Offense count: 27
106
+ # Offense count: 20
83
107
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
84
108
  # AllowedMethods: refine
85
109
  Metrics/BlockLength:
86
110
  Max: 84
87
111
 
88
- # Offense count: 178
112
+ # Offense count: 176
89
113
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
90
114
  Metrics/CyclomaticComplexity:
91
115
  Enabled: false
92
116
 
93
- # Offense count: 376
117
+ # Offense count: 360
94
118
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
95
119
  Metrics/MethodLength:
96
120
  Max: 110
97
121
 
98
- # Offense count: 39
122
+ # Offense count: 45
99
123
  # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
100
124
  Metrics/ParameterLists:
101
125
  Max: 9
102
126
 
103
- # Offense count: 143
127
+ # Offense count: 142
104
128
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
105
129
  Metrics/PerceivedComplexity:
106
130
  Enabled: false
107
131
 
108
- # Offense count: 29
132
+ # Offense count: 28
109
133
  # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
110
134
  # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
111
135
  Naming/MethodParameterName:
@@ -113,7 +137,6 @@ Naming/MethodParameterName:
113
137
  - 'lib/canon/comparison/xml_comparator.rb'
114
138
  - 'lib/canon/comparison/xml_comparator/attribute_comparator.rb'
115
139
  - 'lib/canon/xml/namespace_handler.rb'
116
- - 'scripts/investigate_all_false_positives.rb'
117
140
 
118
141
  # Offense count: 1
119
142
  # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
@@ -125,13 +148,15 @@ Naming/PredicatePrefix:
125
148
  Exclude:
126
149
  - 'lib/canon/comparison/html_comparator.rb'
127
150
 
128
- # Offense count: 2
151
+ # Offense count: 6
129
152
  # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
130
153
  # SupportedStyles: snake_case, normalcase, non_integer
131
154
  # AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
132
155
  Naming/VariableNumber:
133
156
  Exclude:
134
157
  - 'lib/canon/comparison/json_comparator.rb'
158
+ - 'lib/canon/comparison/markup_comparator.rb'
159
+ - 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
135
160
 
136
161
  # Offense count: 2
137
162
  # Configuration parameters: MinSize.
@@ -140,7 +165,7 @@ Performance/CollectionLiteralInLoop:
140
165
  - 'lib/canon/comparison/html_comparator.rb'
141
166
  - 'lib/canon/xml/xml_base_handler.rb'
142
167
 
143
- # Offense count: 62
168
+ # Offense count: 64
144
169
  # Configuration parameters: Prefixes, AllowedPatterns.
145
170
  # Prefixes: when, with, without
146
171
  RSpec/ContextWording:
@@ -157,7 +182,7 @@ RSpec/DescribeMethod:
157
182
  - 'spec/canon/comparison/multiple_differences_spec.rb'
158
183
  - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
159
184
 
160
- # Offense count: 624
185
+ # Offense count: 675
161
186
  # Configuration parameters: CountAsOne.
162
187
  RSpec/ExampleLength:
163
188
  Max: 67
@@ -171,7 +196,7 @@ RSpec/ExpectActual:
171
196
  - 'spec/canon/rspec_matchers_spec.rb'
172
197
  - 'spec/canon/string_matcher_spec.rb'
173
198
 
174
- # Offense count: 171
199
+ # Offense count: 175
175
200
  # Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
176
201
  RSpec/IndexedLet:
177
202
  Exclude:
@@ -208,11 +233,11 @@ RSpec/MultipleDescribes:
208
233
  Exclude:
209
234
  - 'spec/canon/comparison/match_options_spec.rb'
210
235
 
211
- # Offense count: 515
236
+ # Offense count: 518
212
237
  RSpec/MultipleExpectations:
213
238
  Max: 15
214
239
 
215
- # Offense count: 66
240
+ # Offense count: 69
216
241
  # Configuration parameters: AllowSubject.
217
242
  RSpec/MultipleMemoizedHelpers:
218
243
  Max: 13
@@ -226,7 +251,7 @@ RSpec/NamedSubject:
226
251
  - 'spec/canon/pretty_printer/json_spec.rb'
227
252
  - 'spec/canon/pretty_printer/xml_spec.rb'
228
253
 
229
- # Offense count: 30
254
+ # Offense count: 37
230
255
  # Configuration parameters: AllowedGroups.
231
256
  RSpec/NestedGroups:
232
257
  Max: 4
@@ -254,14 +279,24 @@ RSpec/SpecFilePathFormat:
254
279
  - 'spec/canon/yaml/formatter_spec.rb'
255
280
  - 'spec/xml_c14n_spec.rb'
256
281
 
257
- # Offense count: 94
282
+ # Offense count: 120
258
283
  # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
259
284
  RSpec/VerifiedDoubles:
260
285
  Exclude:
286
+ - 'spec/canon/comparison/whitespace_sensitivity_spec.rb'
261
287
  - 'spec/canon/diff/diff_classifier_spec.rb'
262
288
  - 'spec/canon/diff/path_builder_spec.rb'
289
+ - 'spec/canon/diff/xml_serialization_formatter_spec.rb'
263
290
  - 'spec/canon/tree_diff/operation_converter_spec.rb'
264
291
 
292
+ # Offense count: 1
293
+ # This cop supports safe autocorrection (--autocorrect).
294
+ # Configuration parameters: EnforcedStyle, AllowComments.
295
+ # SupportedStyles: empty, nil, both
296
+ Style/EmptyElse:
297
+ Exclude:
298
+ - 'lib/canon/comparison/xml_comparator.rb'
299
+
265
300
  # Offense count: 3
266
301
  # Configuration parameters: MinBranchesCount.
267
302
  Style/HashLikeCase:
@@ -276,9 +311,35 @@ Style/IdenticalConditionalBranches:
276
311
  - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
277
312
  - 'lib/canon/diff_formatter/legend.rb'
278
313
 
314
+ # Offense count: 2
315
+ # This cop supports unsafe autocorrection (--autocorrect-all).
316
+ # Configuration parameters: InverseMethods, InverseBlocks.
317
+ Style/InverseMethods:
318
+ Exclude:
319
+ - 'lib/canon/comparison/markup_comparator.rb'
320
+ - 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
321
+
279
322
  # Offense count: 1
280
323
  # Configuration parameters: AllowedMethods.
281
324
  # AllowedMethods: respond_to_missing?
282
325
  Style/OptionalBooleanParameter:
283
326
  Exclude:
284
327
  - 'lib/canon/diff_formatter/debug_output.rb'
328
+
329
+ # Offense count: 3
330
+ # This cop supports safe autocorrection (--autocorrect).
331
+ # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
332
+ # SupportedStyles: single_quotes, double_quotes
333
+ Style/StringLiterals:
334
+ Exclude:
335
+ - 'lib/canon/comparison/markup_comparator.rb'
336
+ - 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
337
+ - 'test_verify_equivalent.rb'
338
+
339
+ # Offense count: 12
340
+ # This cop supports safe autocorrection (--autocorrect).
341
+ # Configuration parameters: EnforcedStyleForMultiline.
342
+ # SupportedStylesForMultiline: comma, consistent_comma, diff_comma, no_comma
343
+ Style/TrailingCommaInArguments:
344
+ Exclude:
345
+ - 'spec/canon/diff/xml_serialization_formatter_spec.rb'
data/docs/Gemfile CHANGED
@@ -6,4 +6,5 @@ gem "just-the-docs"
6
6
 
7
7
  group :jekyll_plugins do
8
8
  gem "jekyll-seo-tag"
9
+ gem "jekyll-sitemap"
9
10
  end
data/docs/_config.yml CHANGED
@@ -12,8 +12,13 @@ repository: lutaml/canon
12
12
 
13
13
  # Theme
14
14
  theme: just-the-docs
15
+ remote_theme: just-the-docs/just-the-docs@v0.7.0
15
16
  color_scheme: light
16
17
 
18
+ # Logo (uncomment if you have a logo)
19
+ # logo: "/assets/images/logo.svg"
20
+ # favicon_ico: "/assets/images/favicon.ico"
21
+
17
22
  # AsciiDoc support
18
23
  asciidoc: {}
19
24
  asciidoctor:
@@ -63,10 +68,36 @@ heading_anchors: true
63
68
  # Footer
64
69
  footer_content: 'Copyright &copy; 2025 Ribose. Distributed under the <a href="https://github.com/lutaml/canon/blob/main/LICENSE.txt">BSD 2-Clause License</a>.'
65
70
 
71
+ # Footer last edit timestamp
72
+ last_edit_timestamp: true
73
+ last_edit_time_format: "%b %e %Y at %I:%M %p"
74
+
75
+ # Enable code copy button
76
+ enable_copy_code_button: true
77
+
78
+ # Callouts
79
+ callouts_level: quiet
80
+ callouts:
81
+ highlight:
82
+ color: yellow
83
+ important:
84
+ title: Important
85
+ color: blue
86
+ new:
87
+ title: New
88
+ color: green
89
+ note:
90
+ title: Note
91
+ color: purple
92
+ warning:
93
+ title: Warning
94
+ color: red
95
+
66
96
  # Plugins
67
97
  plugins:
68
98
  - jekyll-asciidoc
69
99
  - jekyll-seo-tag
100
+ - jekyll-sitemap
70
101
 
71
102
  # Markdown settings (for any markdown files)
72
103
  markdown: kramdown
@@ -75,6 +106,60 @@ kramdown:
75
106
  hard_wrap: false
76
107
  syntax_highlighter: rouge
77
108
 
109
+ # Collections for organizing content
110
+ collections:
111
+ # Core documentation pages (getting-started, interfaces, etc.)
112
+ pages:
113
+ permalink: "/:path/"
114
+ output: true
115
+
116
+ # Feature documentation
117
+ features:
118
+ permalink: "/:collection/:path/"
119
+ output: true
120
+
121
+ # Understanding/internal documentation
122
+ understanding:
123
+ permalink: "/:collection/:path/"
124
+ output: true
125
+
126
+ # Advanced topics
127
+ advanced:
128
+ permalink: "/:collection/:path/"
129
+ output: true
130
+
131
+ # Guides (task-oriented tutorials)
132
+ guides:
133
+ permalink: "/:collection/:path/"
134
+ output: true
135
+
136
+ # Reference documentation
137
+ reference:
138
+ permalink: "/:collection/:path/"
139
+ output: true
140
+
141
+ # Just the Docs collection configuration
142
+ just_the_docs:
143
+ collections:
144
+ pages:
145
+ name: Pages
146
+ nav_fold: false
147
+ features:
148
+ name: Features
149
+ nav_fold: true
150
+ understanding:
151
+ name: Understanding
152
+ nav_fold: true
153
+ advanced:
154
+ name: Advanced
155
+ nav_fold: true
156
+ guides:
157
+ name: Guides
158
+ nav_fold: true
159
+ reference:
160
+ name: Reference
161
+ nav_fold: true
162
+
78
163
  # Defaults
79
164
  defaults:
80
165
  - scope:
@@ -83,6 +168,10 @@ defaults:
83
168
  values:
84
169
  layout: default
85
170
 
171
+ # Include additional files
172
+ include:
173
+ - "*.adoc"
174
+
86
175
  # Exclude from processing
87
176
  exclude:
88
177
  - Gemfile
@@ -97,4 +186,4 @@ exclude:
97
186
  - .git
98
187
  - .gitignore
99
188
 
100
- permalink: pretty
189
+ permalink: pretty
@@ -80,14 +80,20 @@ Classification depends on `attribute_order` setting:
80
80
  │ │
81
81
  │ DiffClassifier examines each DiffNode: │
82
82
  │ │
83
- For each dimension:
84
- behavior = match_options.behavior_for(dimension)
83
+ 1. Serialization-level formatting (XmlSerializationFormatter)
84
+ XML syntax differences: <tag/> vs <tag></tag>
85
+ │ → ALWAYS formatting-only (non-normative) │
85
86
  │ │
86
- if behavior == :ignore
87
- INFORMATIVE (difference doesn't matter)
88
- else # :strict or :normalize
89
- │ → NORMATIVE (difference matters) │
87
+ 2. Content-level formatting (text_content: :normalize)
88
+ Whitespace differences in content
89
+ Formatting-only when normalized content matches
90
90
  │ │
91
+ │ 3. CompareProfile policy (normative vs informative) │
92
+ │ → behavior == :ignore → INFORMATIVE │
93
+ │ → behavior == :strict → NORMATIVE │
94
+ │ → behavior == :normalize → Check content normalization │
95
+ │ │
96
+ │ Sets diff_node.formatting = true/false │
91
97
  │ Sets diff_node.normative = true/false │
92
98
  └───────────────────────────────────┬───────────────────────────────┘
93
99
 
@@ -102,6 +108,27 @@ Classification depends on `attribute_order` setting:
102
108
  └──────────────────────────────────────────────────────────────────┘
103
109
  ----
104
110
 
111
+ === Three-Level Classification System
112
+
113
+ Canon distinguishes between **three distinct kinds of differences**:
114
+
115
+ | Kind | `formatting:` | `normative:` | Meaning | Examples |
116
+ |------|---------------|--------------|---------|----------|
117
+ | **Serialization formatting** | `true` | `false` | XML syntax differences | `<tag/>` vs `<tag></tag>` |
118
+ | **Content formatting** | `true` | `false` | Whitespace in content | `Hello world` vs `Hello world` |
119
+ | **Informative** | `false` | `false` | Tracked but doesn't affect equivalence | Attribute order (when `:ignore`) |
120
+ | **Normative** | `false` | `true` | Affects equivalence | Different words, missing elements |
121
+
122
+ **Key distinction**:
123
+
124
+ * **Serialization-level formatting**: XML syntax differences that are ALWAYS non-normative regardless of match options, because they represent different valid serializations of the same semantic content. Detected by `XmlSerializationFormatter`.
125
+
126
+ * **Content-level formatting**: Whitespace differences in document content. These are formatting-only (non-normative) when normalized content matches (using `text_content: :normalize`).
127
+
128
+ * **Informative**: Differences tracked for reference but don't affect equivalence (when behavior is `:ignore`).
129
+
130
+ * **Normative**: Semantic content differences that affect equivalence (when behavior is `:strict` or when normalized content differs).
131
+
105
132
  == CompareProfile-Based Classification
106
133
 
107
134
  === Overview
@@ -120,22 +147,42 @@ DiffNode → DiffClassifier → CompareProfile → normative?
120
147
 
121
148
  === Classification Hierarchy
122
149
 
123
- Canon uses a three-level hierarchy for classifying differences:
150
+ Canon uses a **multi-level hierarchy** for classifying differences:
151
+
152
+ [source]
153
+ ----
154
+ DiffNode → DiffClassifier → XmlSerializationFormatter → serialization formatting?
155
+
156
+ CompareProfile → normative dimension?
157
+
158
+ FormattingDetector → formatting-only?
159
+
160
+ Final classification
161
+ ----
162
+
163
+ **Classification priority (from highest to lowest specificity)**:
164
+
165
+ 1. **Serialization-level formatting** (highest priority)
166
+ - XML syntax differences: `<tag/>` vs `<tag></tag>`
167
+ - Detected by `XmlSerializationFormatter`
168
+ - **ALWAYS** `formatting: true, normative: false`
169
+ - Bypasses all other classification logic
124
170
 
125
- 1. **Formatting-only** (lowest priority)
126
- - Pure whitespace/formatting differences
127
- - Normalized content is identical
128
- - Markers: `[` and `]` in diff output
171
+ 2. **Content-level formatting**
172
+ - Whitespace differences in document content
173
+ - Detected by `FormattingDetector` when `text_content: :normalize`
174
+ - `formatting: true, normative: false` when normalized content matches
175
+ - Respects element-level whitespace sensitivity
129
176
 
130
- 2. **Informative** (medium priority)
177
+ 3. **Informative** (based on `:ignore` behavior)
131
178
  - Tracked but doesn't affect equivalence
132
- - Based on behavior `:ignore`
133
- - Markers: `<` and `>` in diff output
179
+ - `formatting: false, normative: false`
180
+ - Example: Attribute order when `attribute_order: :ignore`
134
181
 
135
- 3. **Normative** (highest priority)
182
+ 4. **Normative** (based on `:strict` behavior or content mismatch)
136
183
  - Affects equivalence
137
- - Based on behavior `:strict`
138
- - Markers: `-` and `+` in diff output
184
+ - `formatting: false, normative: true`
185
+ - Example: Different words, missing elements
139
186
 
140
187
  === Format-Specific Policies
141
188
 
@@ -229,6 +276,97 @@ result = Canon::Comparison.equivalent?(
229
276
  ----
230
277
  ====
231
278
 
279
+ ==== Text Content
280
+
281
+ * **`:strict` behavior** → Normative
282
+ - Text must match exactly, including all whitespace
283
+ - Any text difference causes non-equivalence
284
+
285
+ * **`:normalize` behavior** → Normative (after normalization) or Informative (if formatting-only)
286
+ - Whitespace is normalized (collapsed/trimmed) before comparison
287
+ - If normalized texts match but originals differ, classified as formatting-only (informative)
288
+ - This ensures that whitespace-only differences don't affect equivalence
289
+ - Element-level sensitivity is respected (e.g., `<pre>`, `<code>` preserve whitespace)
290
+
291
+ * **`:ignore` behavior** → Informative
292
+ - Text content differences tracked but don't affect equivalence
293
+
294
+ .Example: Text content with normalize behavior
295
+ ====
296
+ [source,ruby]
297
+ ----
298
+ # Formatting-only difference - normalized texts match
299
+ xml1 = '<p>Hello world</p>'
300
+ xml2 = '<p>Hello world</p>'
301
+
302
+ result = Canon::Comparison.equivalent?(
303
+ xml1, xml2,
304
+ match: { text_content: :normalize }
305
+ )
306
+ # => true (extra space is formatting-only, classified as informative)
307
+
308
+ # Shows as informative in verbose output
309
+ result.differences.first.normative?
310
+ # => false
311
+ result.differences.first.formatting?
312
+ # => true
313
+ ----
314
+
315
+ .Using text_content: :normalize with element-level sensitivity
316
+ ====
317
+ [source,ruby]
318
+ ----
319
+ # HTML defaults: <code> is whitespace-sensitive
320
+ html1 = '<code> indented </code><p> text </p>'
321
+ html2 = '<code>indented</code><p>text</p>'
322
+
323
+ # With <code> blacklisted from sensitive elements
324
+ Canon::Comparison.equivalent?(html1, html2,
325
+ format: :html,
326
+ match: {
327
+ whitespace_insensitive_elements: [:code],
328
+ }
329
+ )
330
+ # => true
331
+ # - <code> whitespace: formatting-only (informative)
332
+ # - <p> whitespace: formatting-only (informative)
333
+
334
+ # Without blacklisting (default HTML behavior)
335
+ Canon::Comparison.equivalent?(html1, html2, format: :html)
336
+ # => false
337
+ # - <code> whitespace: normative (sensitive element)
338
+ # - <p> whitespace: formatting-only (informative)
339
+ ----
340
+ ====
341
+
342
+ .Self-closing vs explicit closing tags
343
+ ====
344
+ Per XML standards, `<tag/>` and `<tag></tag>` are semantically equivalent (both represent empty elements). Canon classifies differences in serialisation format as **formatting-only** (non-normative):
345
+
346
+ [source,ruby]
347
+ ----
348
+ # Self-closing vs explicit closing - always equivalent
349
+ xml1 = '<svg><rect x="10" y="10"/></svg>'
350
+ xml2 = '<svg><rect x="10" y="10"></rect></svg>'
351
+
352
+ Canon::Comparison.equivalent?(xml1, xml2, format: :xml)
353
+ # => true
354
+
355
+ # Empty/whitespace-only text nodes from serialisation are formatting-only
356
+ result = Canon::Comparison.equivalent?(xml1, xml2, format: :xml, verbose: true)
357
+ result.differences.each do |diff|
358
+ if diff.dimension == :text_content
359
+ puts "Normative: #{diff.normative?}" # => false
360
+ puts "Formatting: #{diff.formatting?}" # => true
361
+ end
362
+ end
363
+ ----
364
+
365
+ This applies regardless of `text_content` behavior setting, as these differences are purely serialisation format variations (similar to attribute order).
366
+
367
+ The key insight: empty or whitespace-only text nodes created by different serialisation styles (`<tag/>` vs `<tag></tag>`) are always classified as **formatting-only**, not normative.
368
+ ====
369
+
232
370
  === FormattingDetector Integration
233
371
 
234
372
  For dimensions that support it (`:text_content`, `:structural_whitespace`),
@@ -256,21 +394,48 @@ With `:normalize` mode:
256
394
 
257
395
  === Implementation Details
258
396
 
259
- The [`CompareProfile`](../../lib/canon/comparison/compare_profile.rb) class provides:
397
+ The classification system uses three main classes:
260
398
 
261
- * `normative_dimension?(dimension)` - Is this dimension normative?
262
- * `affects_equivalence?(dimension)` - Does this dimension affect equivalence?
263
- * `supports_formatting_detection?(dimension)` - Can this dimension have formatting-only diffs?
399
+ * **`XmlSerializationFormatter`** - Detects XML serialization-level formatting differences
400
+ - Self-closing vs explicit closing tags: `<tag/>` vs `<tag></tag>`
401
+ - Always returns `formatting: true, normative: false`
402
+ - These differences are ALWAYS non-normative regardless of match options
264
403
 
265
- The [`DiffClassifier`](../../lib/canon/diff/diff_classifier.rb) uses CompareProfile to classify:
404
+ * **`CompareProfile`** - Determines dimension behavior and policy
405
+ - `normative_dimension?(dimension)` - Is this dimension normative?
406
+ - `affects_equivalence?(dimension)` - Does this dimension affect equivalence?
407
+ - `supports_formatting_detection?(dimension)` - Can this dimension have formatting-only diffs?
408
+
409
+ * **`DiffClassifier`** - Orchestrates classification using the above
410
+ - First checks `XmlSerializationFormatter` for serialization formatting
411
+ - Then handles content-level formatting (text_content: :normalize)
412
+ - Finally applies `CompareProfile` policy for normative vs informative
266
413
 
267
414
  [source,ruby]
268
415
  ----
269
416
  def classify(diff_node)
270
- # Check normative status based on policy
417
+ # FIRST: Check for XML serialization-level formatting differences
418
+ # These are ALWAYS non-normative (formatting-only) regardless of match options
419
+ if XmlSerializationFormatter.serialization_formatting?(diff_node)
420
+ diff_node.formatting = true
421
+ diff_node.normative = false
422
+ return diff_node
423
+ end
424
+
425
+ # SECOND: Handle content-level formatting for text_content with :normalize
426
+ if diff_node.dimension == :text_content &&
427
+ profile.send(:behavior_for, :text_content) == :normalize &&
428
+ !inside_whitespace_sensitive_element?(diff_node) &&
429
+ formatting_only_diff?(diff_node)
430
+ diff_node.formatting = true
431
+ diff_node.normative = false
432
+ return diff_node
433
+ end
434
+
435
+ # THIRD: Apply CompareProfile policy
271
436
  is_normative = profile.normative_dimension?(diff_node.dimension)
272
437
 
273
- # Only check formatting for non-normative dimensions
438
+ # FOURTH: Check FormattingDetector for non-normative dimensions
274
439
  if !is_normative && profile.supports_formatting_detection?(diff_node.dimension)
275
440
  if formatting_only_diff?(diff_node)
276
441
  diff_node.formatting = true
@@ -279,11 +444,18 @@ def classify(diff_node)
279
444
  end
280
445
  end
281
446
 
447
+ # FIFTH: Apply normative determination
282
448
  diff_node.normative = is_normative
283
449
  diff_node
284
450
  end
285
451
  ----
286
452
 
453
+ The key distinction for `text_content: :normalize`:
454
+
455
+ * **Formatting-only detection**: Uses `normalized_equivalent?` method to compare normalized texts
456
+ * **Element sensitivity**: Respects element-level whitespace sensitivity (`<pre>`, `<code>`, etc.)
457
+ * **Result**: Whitespace-only differences are classified as *informative* (non-normative) when using `:normalize`
458
+
287
459
  == Visual Indicators
288
460
 
289
461
  === Normative Diffs