canon 0.1.23 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +155 -30
  3. data/docs/INDEX.adoc +4 -0
  4. data/docs/advanced/diff-classification.adoc +3 -2
  5. data/docs/features/configuration-profiles.adoc +288 -0
  6. data/docs/features/diff-formatting/character-visualization.adoc +153 -454
  7. data/docs/features/diff-formatting/display-filtering.adoc +44 -0
  8. data/docs/features/diff-formatting/display-preprocessing.adoc +656 -0
  9. data/docs/features/diff-formatting/index.adoc +47 -0
  10. data/docs/features/diff-formatting/pretty-diff-mode.adoc +154 -0
  11. data/docs/features/environment-configuration/override-system.adoc +10 -3
  12. data/docs/features/index.adoc +9 -0
  13. data/docs/features/match-options/index.adoc +32 -42
  14. data/docs/features/match-options/pretty-printed-fixtures.adoc +270 -0
  15. data/docs/guides/choosing-configuration.adoc +22 -0
  16. data/docs/reference/environment-variables.adoc +121 -1
  17. data/docs/reference/options-across-interfaces.adoc +182 -2
  18. data/lib/canon/cli.rb +20 -0
  19. data/lib/canon/commands/diff_command.rb +7 -2
  20. data/lib/canon/commands/format_command.rb +1 -1
  21. data/lib/canon/comparison/html_comparator.rb +20 -15
  22. data/lib/canon/comparison/html_compare_profile.rb +4 -4
  23. data/lib/canon/comparison/markup_comparator.rb +12 -3
  24. data/lib/canon/comparison/match_options/base_resolver.rb +29 -7
  25. data/lib/canon/comparison/match_options/json_resolver.rb +9 -0
  26. data/lib/canon/comparison/match_options/xml_resolver.rb +16 -2
  27. data/lib/canon/comparison/match_options/yaml_resolver.rb +10 -0
  28. data/lib/canon/comparison/match_options.rb +4 -1
  29. data/lib/canon/comparison/whitespace_sensitivity.rb +189 -137
  30. data/lib/canon/comparison/xml_comparator/child_comparison.rb +21 -4
  31. data/lib/canon/comparison/xml_comparator.rb +14 -12
  32. data/lib/canon/comparison/xml_node_comparison.rb +51 -6
  33. data/lib/canon/comparison.rb +52 -9
  34. data/lib/canon/config/env_schema.rb +32 -4
  35. data/lib/canon/config/override_resolver.rb +16 -3
  36. data/lib/canon/config/profile_loader.rb +135 -0
  37. data/lib/canon/config/profiles/metanorma.yml +74 -0
  38. data/lib/canon/config/profiles/metanorma_debug.yml +8 -0
  39. data/lib/canon/config/type_converter.rb +8 -0
  40. data/lib/canon/config.rb +469 -5
  41. data/lib/canon/diff/diff_classifier.rb +41 -11
  42. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +48 -17
  43. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +58 -0
  44. data/lib/canon/diff_formatter/diff_detail_formatter.rb +22 -7
  45. data/lib/canon/diff_formatter.rb +493 -36
  46. data/lib/canon/pretty_printer/xml_normalized.rb +395 -0
  47. data/lib/canon/rspec_matchers.rb +36 -0
  48. data/lib/canon/version.rb +1 -1
  49. data/lib/canon/xml/nodes/namespace_node.rb +4 -0
  50. data/lib/canon/xml/nodes/processing_instruction_node.rb +4 -0
  51. data/lib/canon/xml/nodes/root_node.rb +4 -0
  52. data/lib/canon/xml/nodes/text_node.rb +4 -0
  53. data/lib/tasks/performance_helpers.rb +2 -2
  54. metadata +24 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '08086e588946bb0f5f39a8235984058b5a5e8aea956b9652e0e366ff84500232'
4
- data.tar.gz: bb078d67c1a68da1ca88b3d29fde336e291b0ff95186ade125ee1ccfb1a82de4
3
+ metadata.gz: 10234a14be49f993f58acac6d82bb6e8ca409d309bcd8446296f32f60ad6380f
4
+ data.tar.gz: 80e29ddf981d17f0fcd5812921f6ec1fbdf056b3ae673dc4d3d94c9979cddc21
5
5
  SHA512:
6
- metadata.gz: 35f7888d948f86fa4b504b060ee6d9976dc38b32232e8c86b9b8986ee08448cdad265fe4d091e810e5f63f0b0702e125317e07d15761b5cd6404905190546234
7
- data.tar.gz: 4f57564289fdff8c73020f3ed9f9184b363688062cf1b89baea79f0d25c57905bcbc225b5e946afeb30305b91402b557ae65ce61b99a0d63b8bb1c569ec2d57f
6
+ metadata.gz: bc63b5a8dae56a06781d8588809e8356eea618fdcdb13b933ea4c8a3c4f221201c05070a3ce100945c9f81ef6f122b73ff6ac723441aa2b3cbcc5fd31e8fee63
7
+ data.tar.gz: fbcb9ceb782f7847e6f2c6afba275f7d68da410d253b6f34f263b5c91803a3fbaff031de9b2dda5b8787fc37d0c82e1cea97d3ae1489d507eaad969168a107a1
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-04-01 00:03:16 UTC using RuboCop version 1.86.0.
3
+ # on 2026-04-12 07:40:40 UTC using RuboCop version 1.86.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -11,39 +11,147 @@ Gemspec/RequiredRubyVersion:
11
11
  Exclude:
12
12
  - 'canon.gemspec'
13
13
 
14
- # Offense count: 2
14
+ # Offense count: 49
15
+ # This cop supports safe autocorrection (--autocorrect).
16
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
17
+ # SupportedStyles: with_first_argument, with_fixed_indentation
18
+ Layout/ArgumentAlignment:
19
+ Exclude:
20
+ - 'lib/canon/comparison/whitespace_sensitivity.rb'
21
+ - 'lib/canon/comparison/xml_comparator.rb'
22
+ - 'lib/canon/comparison/xml_node_comparison.rb'
23
+ - 'lib/canon/config.rb'
24
+ - 'lib/canon/diff/diff_classifier.rb'
25
+ - 'lib/canon/diff_formatter.rb'
26
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
27
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
28
+ - 'spec/canon/config/profile_loader_spec.rb'
29
+ - 'spec/canon/config/profile_spec.rb'
30
+ - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
31
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
32
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
33
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
34
+
35
+ # Offense count: 3
36
+ # This cop supports safe autocorrection (--autocorrect).
37
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
38
+ # SupportedStyles: with_first_element, with_fixed_indentation
39
+ Layout/ArrayAlignment:
40
+ Exclude:
41
+ - 'lib/canon/comparison/match_options/base_resolver.rb'
42
+ - 'lib/canon/comparison/match_options/xml_resolver.rb'
43
+ - 'spec/canon/config/profile_spec.rb'
44
+
45
+ # Offense count: 16
15
46
  # This cop supports safe autocorrection (--autocorrect).
16
47
  # Configuration parameters: EnforcedStyleAlignWith.
17
48
  # SupportedStylesAlignWith: either, start_of_block, start_of_line
18
49
  Layout/BlockAlignment:
19
50
  Exclude:
20
- - 'lib/canon/tree_diff/matchers/hash_matcher.rb'
21
- - 'spec/canon/hash_matcher_regression_spec.rb'
51
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
52
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
53
+ - 'spec/canon/config/profile_loader_spec.rb'
54
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
55
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
56
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
57
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
22
58
 
23
- # Offense count: 2
59
+ # Offense count: 16
24
60
  # This cop supports safe autocorrection (--autocorrect).
25
61
  Layout/BlockEndNewline:
26
62
  Exclude:
27
- - 'lib/canon/tree_diff/matchers/hash_matcher.rb'
28
- - 'spec/canon/hash_matcher_regression_spec.rb'
63
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
64
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
65
+ - 'spec/canon/config/profile_loader_spec.rb'
66
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
67
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
68
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
69
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
29
70
 
30
- # Offense count: 4
71
+ # Offense count: 5
72
+ # This cop supports safe autocorrection (--autocorrect).
73
+ Layout/ClosingParenthesisIndentation:
74
+ Exclude:
75
+ - 'lib/canon/config/profile_loader.rb'
76
+ - 'lib/canon/diff/diff_classifier.rb'
77
+ - 'spec/canon/config/profile_loader_spec.rb'
78
+
79
+ # Offense count: 2
80
+ # This cop supports safe autocorrection (--autocorrect).
81
+ Layout/ElseAlignment:
82
+ Exclude:
83
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
84
+
85
+ # Offense count: 2
86
+ # This cop supports safe autocorrection (--autocorrect).
87
+ # Configuration parameters: EnforcedStyleAlignWith.
88
+ # SupportedStylesAlignWith: keyword, variable, start_of_line
89
+ Layout/EndAlignment:
90
+ Exclude:
91
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
92
+
93
+ # Offense count: 5
94
+ # This cop supports safe autocorrection (--autocorrect).
95
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
96
+ # SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
97
+ Layout/FirstArgumentIndentation:
98
+ Exclude:
99
+ - 'lib/canon/config/profile_loader.rb'
100
+ - 'lib/canon/diff/diff_classifier.rb'
101
+ - 'spec/canon/config/profile_loader_spec.rb'
102
+
103
+ # Offense count: 30
104
+ # This cop supports safe autocorrection (--autocorrect).
105
+ # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
106
+ # SupportedHashRocketStyles: key, separator, table
107
+ # SupportedColonStyles: key, separator, table
108
+ # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
109
+ Layout/HashAlignment:
110
+ Exclude:
111
+ - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
112
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
113
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
114
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
115
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
116
+
117
+ # Offense count: 36
31
118
  # This cop supports safe autocorrection (--autocorrect).
32
119
  # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
33
120
  # SupportedStylesAlignWith: start_of_line, relative_to_receiver
34
121
  Layout/IndentationWidth:
35
122
  Exclude:
36
- - 'lib/canon/tree_diff/matchers/hash_matcher.rb'
37
- - 'spec/canon/hash_matcher_regression_spec.rb'
123
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
124
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
125
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
126
+ - 'spec/canon/config/profile_loader_spec.rb'
127
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
128
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
129
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
130
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
38
131
 
39
- # Offense count: 1103
132
+ # Offense count: 1375
40
133
  # This cop supports safe autocorrection (--autocorrect).
41
134
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
42
135
  # URISchemes: http, https
43
136
  Layout/LineLength:
44
137
  Enabled: false
45
138
 
46
- # Offense count: 55
139
+ # Offense count: 2
140
+ # This cop supports safe autocorrection (--autocorrect).
141
+ # Configuration parameters: EnforcedStyle.
142
+ # SupportedStyles: symmetrical, new_line, same_line
143
+ Layout/MultilineMethodCallBraceLayout:
144
+ Exclude:
145
+ - 'lib/canon/config/profile_loader.rb'
146
+ - 'lib/canon/diff/diff_classifier.rb'
147
+
148
+ # Offense count: 57
149
+ # This cop supports safe autocorrection (--autocorrect).
150
+ # Configuration parameters: AllowInHeredoc.
151
+ Layout/TrailingWhitespace:
152
+ Enabled: false
153
+
154
+ # Offense count: 56
47
155
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
48
156
  Lint/DuplicateBranch:
49
157
  Enabled: false
@@ -88,7 +196,7 @@ Lint/UselessConstantScoping:
88
196
  Exclude:
89
197
  - 'lib/canon/diff_formatter/theme.rb'
90
198
 
91
- # Offense count: 298
199
+ # Offense count: 309
92
200
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
93
201
  Metrics/AbcSize:
94
202
  Enabled: false
@@ -99,27 +207,27 @@ Metrics/AbcSize:
99
207
  Metrics/BlockLength:
100
208
  Max: 92
101
209
 
102
- # Offense count: 1
210
+ # Offense count: 3
103
211
  # Configuration parameters: CountBlocks, CountModifierForms.
104
212
  Metrics/BlockNesting:
105
213
  Max: 4
106
214
 
107
- # Offense count: 262
215
+ # Offense count: 272
108
216
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
109
217
  Metrics/CyclomaticComplexity:
110
218
  Enabled: false
111
219
 
112
- # Offense count: 485
220
+ # Offense count: 498
113
221
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
114
222
  Metrics/MethodLength:
115
223
  Max: 146
116
224
 
117
- # Offense count: 56
225
+ # Offense count: 58
118
226
  # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
119
227
  Metrics/ParameterLists:
120
228
  Max: 10
121
229
 
122
- # Offense count: 213
230
+ # Offense count: 219
123
231
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
124
232
  Metrics/PerceivedComplexity:
125
233
  Enabled: false
@@ -152,13 +260,13 @@ Performance/CollectionLiteralInLoop:
152
260
  - 'lib/canon/xml/xml_base_handler.rb'
153
261
  - 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
154
262
 
155
- # Offense count: 71
263
+ # Offense count: 82
156
264
  # Configuration parameters: Prefixes, AllowedPatterns.
157
265
  # Prefixes: when, with, without
158
266
  RSpec/ContextWording:
159
267
  Enabled: false
160
268
 
161
- # Offense count: 34
269
+ # Offense count: 37
162
270
  # Configuration parameters: IgnoredMetadata.
163
271
  RSpec/DescribeClass:
164
272
  Enabled: false
@@ -169,7 +277,7 @@ RSpec/DescribeMethod:
169
277
  - 'spec/canon/comparison/multiple_differences_spec.rb'
170
278
  - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
171
279
 
172
- # Offense count: 741
280
+ # Offense count: 804
173
281
  # Configuration parameters: CountAsOne.
174
282
  RSpec/ExampleLength:
175
283
  Max: 44
@@ -183,7 +291,7 @@ RSpec/ExpectActual:
183
291
  - 'spec/canon/rspec_matchers_spec.rb'
184
292
  - 'spec/canon/string_matcher_spec.rb'
185
293
 
186
- # Offense count: 175
294
+ # Offense count: 177
187
295
  # Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
188
296
  RSpec/IndexedLet:
189
297
  Exclude:
@@ -221,14 +329,14 @@ RSpec/MultipleDescribes:
221
329
  Exclude:
222
330
  - 'spec/canon/comparison/match_options_spec.rb'
223
331
 
224
- # Offense count: 595
332
+ # Offense count: 654
225
333
  RSpec/MultipleExpectations:
226
334
  Max: 15
227
335
 
228
- # Offense count: 71
336
+ # Offense count: 93
229
337
  # Configuration parameters: AllowSubject.
230
338
  RSpec/MultipleMemoizedHelpers:
231
- Max: 13
339
+ Max: 16
232
340
 
233
341
  # Offense count: 17
234
342
  # Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
@@ -239,7 +347,7 @@ RSpec/NamedSubject:
239
347
  - 'spec/canon/pretty_printer/json_spec.rb'
240
348
  - 'spec/canon/pretty_printer/xml_spec.rb'
241
349
 
242
- # Offense count: 40
350
+ # Offense count: 50
243
351
  # Configuration parameters: AllowedGroups.
244
352
  RSpec/NestedGroups:
245
353
  Max: 4
@@ -273,7 +381,7 @@ RSpec/SpecFilePathFormat:
273
381
  - 'spec/canon/yaml/formatter_spec.rb'
274
382
  - 'spec/xml_c14n_spec.rb'
275
383
 
276
- # Offense count: 128
384
+ # Offense count: 131
277
385
  # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
278
386
  RSpec/VerifiedDoubles:
279
387
  Exclude:
@@ -285,7 +393,7 @@ RSpec/VerifiedDoubles:
285
393
  - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
286
394
  - 'spec/canon/tree_diff/operation_converter_spec.rb'
287
395
 
288
- # Offense count: 2
396
+ # Offense count: 25
289
397
  # This cop supports safe autocorrection (--autocorrect).
290
398
  # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
291
399
  # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
@@ -294,8 +402,13 @@ RSpec/VerifiedDoubles:
294
402
  # AllowedMethods: lambda, proc, it
295
403
  Style/BlockDelimiters:
296
404
  Exclude:
297
- - 'lib/canon/tree_diff/matchers/hash_matcher.rb'
298
- - 'spec/canon/hash_matcher_regression_spec.rb'
405
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
406
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
407
+ - 'spec/canon/config/profile_loader_spec.rb'
408
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
409
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
410
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
411
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
299
412
 
300
413
  # Offense count: 1
301
414
  # This cop supports safe autocorrection (--autocorrect).
@@ -319,6 +432,18 @@ Style/IdenticalConditionalBranches:
319
432
  - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
320
433
  - 'lib/canon/diff_formatter/legend.rb'
321
434
 
435
+ # Offense count: 3
436
+ # This cop supports safe autocorrection (--autocorrect).
437
+ Style/MultilineIfModifier:
438
+ Exclude:
439
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
440
+
441
+ # Offense count: 2
442
+ # This cop supports safe autocorrection (--autocorrect).
443
+ Style/MultilineTernaryOperator:
444
+ Exclude:
445
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
446
+
322
447
  # Offense count: 1
323
448
  # Configuration parameters: AllowedMethods.
324
449
  # AllowedMethods: respond_to_missing?
data/docs/INDEX.adoc CHANGED
@@ -98,6 +98,7 @@ Choose your interface:
98
98
  === Level 4: Master Features (1 hour)
99
99
 
100
100
  . link:features/match-options/[Match options and profiles]
101
+ . link:features/configuration-profiles/[Configuration profiles] - YAML-based presets with element-level whitespace control
101
102
  . link:features/canonicalization/[Format-specific canonicalization]
102
103
  . link:features/diff-formatting/[Customizing diff output]
103
104
 
@@ -111,6 +112,7 @@ Choose your interface:
111
112
 
112
113
  * link:reference/options-across-interfaces[Options Across Interfaces] - Same options in CLI, Ruby API, and RSpec
113
114
  * link:reference/environment-variables[Environment Variables] - Configure Canon via ENV variables
115
+ * link:features/configuration-profiles/[Configuration Profiles] - YAML presets with element-level whitespace control
114
116
  * link:features/match-options/profiles[Match Profiles] - Pre-configured comparison strategies
115
117
  * link:features/diff-formatting/colors-and-symbols[Color-Coded Diffs] - Understanding diff output
116
118
  * link:features/environment-configuration/size-limits[Size Limits] - Preventing hangs on large files
@@ -135,5 +137,7 @@ See link:contributing[Contributing to Documentation]
135
137
  * Size limits for large files
136
138
  * Enhanced character visualization
137
139
  * Environment variable configuration
140
+ * Configuration profiles with element-level whitespace classification
141
+ * Three-way whitespace sensitivity: strict, normalize, insensitive
138
142
 
139
143
  See link:https://github.com/lutaml/canon/blob/main/CHANGELOG.md[Full Changelog]
@@ -324,7 +324,7 @@ html2 = '<code>indented</code><p>text</p>'
324
324
  Canon::Comparison.equivalent?(html1, html2,
325
325
  format: :html,
326
326
  match: {
327
- whitespace_insensitive_elements: [:code],
327
+ strip_whitespace_elements: [:code],
328
328
  }
329
329
  )
330
330
  # => true
@@ -423,9 +423,10 @@ def classify(diff_node)
423
423
  end
424
424
 
425
425
  # SECOND: Handle content-level formatting for text_content with :normalize
426
+ # Skipped for :preserve elements (<pre>, <code>, etc.) where whitespace is always normative
426
427
  if diff_node.dimension == :text_content &&
427
428
  profile.send(:behavior_for, :text_content) == :normalize &&
428
- !inside_whitespace_sensitive_element?(diff_node) &&
429
+ !inside_preserve_element?(diff_node) &&
429
430
  formatting_only_diff?(diff_node)
430
431
  diff_node.formatting = true
431
432
  diff_node.normative = false
@@ -0,0 +1,288 @@
1
+ ---
2
+ title: Configuration Profiles
3
+ parent: Features
4
+ nav_order: 7
5
+ ---
6
+ = Configuration profiles
7
+ :toc:
8
+ :toclevels: 3
9
+
10
+ == Purpose
11
+
12
+ Configuration profiles bundle many settings into a single named preset,
13
+ eliminating repetitive configuration blocks across multiple gems.
14
+ Instead of 60+ lines of per-format settings, use one line:
15
+
16
+ [source,ruby]
17
+ ----
18
+ Canon::Config.instance.profile = :metanorma
19
+ ----
20
+
21
+ Profiles are defined in YAML files and support inheritance, so a debug
22
+ variant can extend a base profile with only the differences.
23
+
24
+ == Built-in profiles
25
+
26
+ [cols="1,3"]
27
+ |===
28
+ | Profile | Description
29
+
30
+ | `:metanorma`
31
+ | Standard Metanorma spec configuration. Sets preprocessing to `:format`,
32
+ match profile to `:spec_friendly`, diff algorithm to `:dom`, canonical
33
+ display format, normalized pretty-print display preprocessing,
34
+ and XML-specific whitespace element lists.
35
+
36
+ | `:metanorma_debug`
37
+ | Extends `:metanorma` with debug output enabled
38
+ (`show_prettyprint_received: true`).
39
+ |===
40
+
41
+ List all available profiles programmatically:
42
+
43
+ [source,ruby]
44
+ ----
45
+ Canon::Config::ProfileLoader.available_profiles
46
+ # => [:metanorma, :metanorma_debug]
47
+ ----
48
+
49
+ == Element-level whitespace classification
50
+
51
+ The metanorma profile's key feature is its **element-level whitespace classification**.
52
+ This controls how whitespace differences within specific elements are treated:
53
+
54
+ **Three-way classification:**
55
+
56
+ * **Preserve** (`:preserve`) — Every whitespace character is significant. Use for elements
57
+ where exact whitespace matters (like `<pre>`, `<code>`).
58
+
59
+ * **Collapse** (`:collapse`) — Presence matters but whitespace form doesn't.
60
+ `" hello "` equals `"hello"`. Differences are formatting-only (informative).
61
+ Use for elements like `<p>`, `<li>`, `<td>` in prose documents.
62
+
63
+ * **Strip** (`:strip`) — Whitespace is structural noise, dropped entirely.
64
+ The default for XML elements not in any list.
65
+
66
+ **Metanorma profile element lists:**
67
+
68
+ [source,ruby]
69
+ ----
70
+ # In metanorma profile
71
+ Canon::Config.instance.profile = :metanorma
72
+ Canon::Config.instance.xml.match.collapse_whitespace_elements
73
+ # => ["p", "title", "name", "td", "th", "dt", "dd", "li", ...]
74
+
75
+ Canon::Config.instance.xml.match.preserve_whitespace_elements
76
+ # => ["body", "passthrough"]
77
+ ----
78
+
79
+ **How it works with `text_content: :normalize`:**
80
+
81
+ [source,ruby]
82
+ ----
83
+ # With metanorma profile: <p> is in collapse_whitespace_elements
84
+ Canon::Config.instance.profile = :metanorma
85
+
86
+ # These are EQUIVALENT (whitespace in <p> is formatting-only)
87
+ Canon::Comparison.equivalent?('<p> hello </p>', '<p>hello</p>')
88
+ # => true
89
+
90
+ # But <body> is in preserve_whitespace_elements — every character matters
91
+ # These are NOT EQUIVALENT (whitespace in <body> is normative)
92
+ Canon::Comparison.equivalent?('<body> hello </body>', '<body>hello</body>')
93
+ # => false
94
+ ----
95
+
96
+ **Why this matters:**
97
+
98
+ In Metanorma/DocBook documents, elements like `<p>`, `<li>`, `<td>` contain prose
99
+ where whitespace formatting (extra spaces, line breaks) is irrelevant. But `<body>`
100
+ or `<passthrough>` contain code or exact whitespace that matters.
101
+
102
+ Without element-level classification, you'd have to choose:
103
+ - `text_content: :normalize` — ignores ALL whitespace, too permissive
104
+ - `text_content: :strict` — requires exact match everywhere, too strict
105
+
106
+ Element-level classification gives you fine-grained control.
107
+
108
+ == Usage
109
+
110
+ === Programmatic (Ruby API)
111
+
112
+ Use a **Symbol** for built-in profiles and a **String** for file paths:
113
+
114
+ [source,ruby]
115
+ ----
116
+ # Built-in profile (Symbol)
117
+ Canon::Config.instance.profile = :metanorma
118
+
119
+ # Local YAML file (String)
120
+ Canon::Config.instance.profile = "/path/to/my_profile.yml"
121
+ Canon::Config.instance.profile = "~/my_canon_profile.yml"
122
+ Canon::Config.instance.profile = "config/canon_profile.yml"
123
+
124
+ # Or in a configure block
125
+ Canon::Config.configure do |cfg|
126
+ cfg.profile = :metanorma
127
+ # Override individual settings after profile if needed
128
+ cfg.xml.diff.verbose_diff = true
129
+ end
130
+
131
+ # Clear the profile (revert to defaults + programmatic values)
132
+ Canon::Config.instance.profile = nil
133
+ ----
134
+
135
+ IMPORTANT: The type of the value determines how it is resolved:
136
+ **Symbols** are looked up as built-in profile names;
137
+ **Strings** are treated as file paths (with `~` expansion and relative
138
+ path resolution against the working directory).
139
+
140
+ Local YAML files can inherit from built-in profiles (see <<inheritance>>).
141
+
142
+ === Environment variable
143
+
144
+ Set `CANON_CONFIG_PROFILE` to apply a profile automatically on
145
+ initialization:
146
+
147
+ [source,bash]
148
+ ----
149
+ # Built-in profile
150
+ CANON_CONFIG_PROFILE=metanorma bundle exec rspec
151
+
152
+ # File path
153
+ CANON_CONFIG_PROFILE=~/my_profile.yml bundle exec rspec
154
+ ----
155
+
156
+ NOTE: `CANON_CONFIG_PROFILE` is distinct from `CANON_PROFILE`, which
157
+ controls the match profile (comparison behavior). The config profile
158
+ controls all settings at once.
159
+
160
+ == Priority chain
161
+
162
+ With profiles, the resolution chain becomes four layers:
163
+
164
+ [source]
165
+ ----
166
+ +------------------------------------+
167
+ | 1. Environment Variables | <- Highest Priority
168
+ | (CANON_XML_DIFF_ALGORITHM) |
169
+ +------------------------------------+
170
+ | overrides
171
+ +------------------------------------+
172
+ | 2. Programmatic Configuration |
173
+ | (config.xml.diff.algorithm=) |
174
+ +------------------------------------+
175
+ | overrides
176
+ +------------------------------------+
177
+ | 3. Profile Values |
178
+ | (from YAML profile file) |
179
+ +------------------------------------+
180
+ | overrides
181
+ +------------------------------------+
182
+ | 4. Default Values | <- Lowest Priority
183
+ | (defined in Canon::Config) |
184
+ +------------------------------------+
185
+ ----
186
+
187
+ This means:
188
+
189
+ * ENV variables always win (useful for CI overrides)
190
+ * Programmatic setter calls override profile values
191
+ * Profile values override built-in defaults
192
+ * Clearing the profile (`cfg.profile = nil`) removes only layer 3
193
+
194
+ [[inheritance]]
195
+ == Profile inheritance
196
+
197
+ A profile can inherit from another using the `inherits` key:
198
+
199
+ [source,yaml]
200
+ ----
201
+ name: my_debug
202
+ inherits: metanorma
203
+
204
+ shared:
205
+ diff:
206
+ verbose_diff: true
207
+ show_prettyprint_received: true
208
+ ----
209
+
210
+ Inheritance rules:
211
+
212
+ * Parent values are loaded first, then child values are deep-merged on top
213
+ * Hashes are merged recursively (child keys override parent keys)
214
+ * Arrays are replaced entirely (not concatenated)
215
+ * Single-parent inheritance only
216
+ * Cycle detection prevents infinite loops
217
+ * Local files can inherit from built-in profiles by name
218
+
219
+ == Creating custom profiles
220
+
221
+ === YAML file format
222
+
223
+ [source,yaml]
224
+ ----
225
+ ---
226
+ name: my_profile # <1>
227
+ description: My custom config # <2>
228
+ inherits: metanorma # <3>
229
+
230
+ shared: # <4>
231
+ preprocessing: format
232
+ match:
233
+ profile: spec_friendly
234
+ diff:
235
+ algorithm: dom
236
+ context_lines: 5
237
+ verbose_diff: false
238
+
239
+ formats: # <5>
240
+ xml:
241
+ match:
242
+ collapse_whitespace_elements:
243
+ - p
244
+ - title
245
+ - td
246
+ preserve_whitespace_elements:
247
+ - body
248
+ - passthrough
249
+ html:
250
+ diff:
251
+ show_raw_inputs: true
252
+ ----
253
+ <1> Profile name (metadata)
254
+ <2> Description (metadata)
255
+ <3> Optional: inherit from another profile (name or path)
256
+ <4> `shared` settings apply to all formats (xml, html, json, yaml, string)
257
+ <5> `formats.<name>` settings override `shared` for that specific format
258
+
259
+ === Attribute mapping
260
+
261
+ Profile YAML keys map directly to Canon configuration accessors:
262
+
263
+ [cols="2,2"]
264
+ |===
265
+ | YAML path | Ruby equivalent
266
+
267
+ | `shared.preprocessing`
268
+ | `cfg.xml.preprocessing = :format`
269
+
270
+ | `shared.match.profile`
271
+ | `cfg.xml.match.profile = :spec_friendly`
272
+
273
+ | `shared.diff.algorithm`
274
+ | `cfg.xml.diff.algorithm = :dom`
275
+
276
+ | `formats.xml.diff.context_lines`
277
+ | `cfg.xml.diff.context_lines = 5`
278
+ |===
279
+
280
+ All `DiffConfig` and `MatchConfig` attributes documented in
281
+ link:../reference/options-across-interfaces.adoc[Options Across Interfaces]
282
+ are supported.
283
+
284
+ == See also
285
+
286
+ * link:environment-configuration/override-system.adoc[Override System] -- ENV variable priority
287
+ * link:match-options/index.adoc[Match Options] -- match profile presets (`:strict`, `:spec_friendly`, etc.)
288
+ * link:../guides/choosing-configuration.adoc[Choosing Configuration] -- decision guide