canon 0.1.22 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +174 -25
  3. data/docs/INDEX.adoc +4 -0
  4. data/docs/advanced/diff-classification.adoc +3 -2
  5. data/docs/features/configuration-profiles.adoc +288 -0
  6. data/docs/features/diff-formatting/character-visualization.adoc +153 -454
  7. data/docs/features/diff-formatting/display-filtering.adoc +44 -0
  8. data/docs/features/diff-formatting/display-preprocessing.adoc +656 -0
  9. data/docs/features/diff-formatting/index.adoc +47 -0
  10. data/docs/features/diff-formatting/pretty-diff-mode.adoc +154 -0
  11. data/docs/features/environment-configuration/override-system.adoc +10 -3
  12. data/docs/features/index.adoc +9 -0
  13. data/docs/features/match-options/index.adoc +32 -42
  14. data/docs/features/match-options/pretty-printed-fixtures.adoc +270 -0
  15. data/docs/guides/choosing-configuration.adoc +22 -0
  16. data/docs/reference/environment-variables.adoc +121 -1
  17. data/docs/reference/options-across-interfaces.adoc +182 -2
  18. data/lib/canon/cli.rb +20 -0
  19. data/lib/canon/commands/diff_command.rb +7 -2
  20. data/lib/canon/commands/format_command.rb +1 -1
  21. data/lib/canon/comparison/html_comparator.rb +20 -15
  22. data/lib/canon/comparison/html_compare_profile.rb +4 -4
  23. data/lib/canon/comparison/markup_comparator.rb +12 -3
  24. data/lib/canon/comparison/match_options/base_resolver.rb +29 -7
  25. data/lib/canon/comparison/match_options/json_resolver.rb +9 -0
  26. data/lib/canon/comparison/match_options/xml_resolver.rb +16 -2
  27. data/lib/canon/comparison/match_options/yaml_resolver.rb +10 -0
  28. data/lib/canon/comparison/match_options.rb +4 -1
  29. data/lib/canon/comparison/whitespace_sensitivity.rb +189 -137
  30. data/lib/canon/comparison/xml_comparator/child_comparison.rb +21 -4
  31. data/lib/canon/comparison/xml_comparator.rb +14 -12
  32. data/lib/canon/comparison/xml_node_comparison.rb +51 -6
  33. data/lib/canon/comparison.rb +52 -9
  34. data/lib/canon/config/env_schema.rb +32 -4
  35. data/lib/canon/config/override_resolver.rb +16 -3
  36. data/lib/canon/config/profile_loader.rb +135 -0
  37. data/lib/canon/config/profiles/metanorma.yml +74 -0
  38. data/lib/canon/config/profiles/metanorma_debug.yml +8 -0
  39. data/lib/canon/config/type_converter.rb +8 -0
  40. data/lib/canon/config.rb +469 -5
  41. data/lib/canon/diff/diff_classifier.rb +41 -11
  42. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +48 -17
  43. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +58 -0
  44. data/lib/canon/diff_formatter/diff_detail_formatter.rb +22 -7
  45. data/lib/canon/diff_formatter/theme.rb +24 -17
  46. data/lib/canon/diff_formatter.rb +493 -36
  47. data/lib/canon/pretty_printer/xml_normalized.rb +395 -0
  48. data/lib/canon/rspec_matchers.rb +36 -0
  49. data/lib/canon/tree_diff/matchers/hash_matcher.rb +26 -11
  50. data/lib/canon/version.rb +1 -1
  51. data/lib/canon/xml/nodes/namespace_node.rb +4 -0
  52. data/lib/canon/xml/nodes/processing_instruction_node.rb +4 -0
  53. data/lib/canon/xml/nodes/root_node.rb +4 -0
  54. data/lib/canon/xml/nodes/text_node.rb +4 -0
  55. data/lib/tasks/performance_helpers.rb +2 -2
  56. metadata +24 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba5fabd7a7d0057f365952f8aecc01381ef88f2f1dd28679e9e8947945b54104
4
- data.tar.gz: 3e4119f7fc69a7c2534957c7654b6a567a868208179262fc921ea1266b5a1a6c
3
+ metadata.gz: 10234a14be49f993f58acac6d82bb6e8ca409d309bcd8446296f32f60ad6380f
4
+ data.tar.gz: 80e29ddf981d17f0fcd5812921f6ec1fbdf056b3ae673dc4d3d94c9979cddc21
5
5
  SHA512:
6
- metadata.gz: 44a74c7bb2010a2aa22855258affa6889003585edba717538bb579c63f6694757b21b74bc929b07bd0f87660f0bb3bd02f158b0a91be18017e938100f1fee491
7
- data.tar.gz: be61d26774ae22a5547b2ba0ec1d578b11d37be0899add26d7f14ac63c197637568b74a9d0cad76535e5ab6a82d297af7b917f225f972361f09549964917576b
6
+ metadata.gz: bc63b5a8dae56a06781d8588809e8356eea618fdcdb13b933ea4c8a3c4f221201c05070a3ce100945c9f81ef6f122b73ff6ac723441aa2b3cbcc5fd31e8fee63
7
+ data.tar.gz: fbcb9ceb782f7847e6f2c6afba275f7d68da410d253b6f34f263b5c91803a3fbaff031de9b2dda5b8787fc37d0c82e1cea97d3ae1489d507eaad969168a107a1
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-03-31 12:44:31 UTC using RuboCop version 1.86.0.
3
+ # on 2026-04-12 07:40:40 UTC using RuboCop version 1.86.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -11,27 +11,147 @@ Gemspec/RequiredRubyVersion:
11
11
  Exclude:
12
12
  - 'canon.gemspec'
13
13
 
14
- # Offense count: 1
14
+ # Offense count: 49
15
15
  # This cop supports safe autocorrection (--autocorrect).
16
- # Configuration parameters: EmptyLineBetweenMethodDefs, EmptyLineBetweenClassDefs, EmptyLineBetweenModuleDefs, DefLikeMacros, AllowAdjacentOneLineDefs, NumberOfEmptyLines.
17
- Layout/EmptyLineBetweenDefs:
16
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
17
+ # SupportedStyles: with_first_argument, with_fixed_indentation
18
+ Layout/ArgumentAlignment:
18
19
  Exclude:
19
- - 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
20
+ - 'lib/canon/comparison/whitespace_sensitivity.rb'
21
+ - 'lib/canon/comparison/xml_comparator.rb'
22
+ - 'lib/canon/comparison/xml_node_comparison.rb'
23
+ - 'lib/canon/config.rb'
24
+ - 'lib/canon/diff/diff_classifier.rb'
25
+ - 'lib/canon/diff_formatter.rb'
26
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
27
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
28
+ - 'spec/canon/config/profile_loader_spec.rb'
29
+ - 'spec/canon/config/profile_spec.rb'
30
+ - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
31
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
32
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
33
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
20
34
 
21
- # Offense count: 1
35
+ # Offense count: 3
22
36
  # This cop supports safe autocorrection (--autocorrect).
23
- Layout/EmptyLines:
37
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
38
+ # SupportedStyles: with_first_element, with_fixed_indentation
39
+ Layout/ArrayAlignment:
24
40
  Exclude:
25
- - 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
41
+ - 'lib/canon/comparison/match_options/base_resolver.rb'
42
+ - 'lib/canon/comparison/match_options/xml_resolver.rb'
43
+ - 'spec/canon/config/profile_spec.rb'
26
44
 
27
- # Offense count: 1094
45
+ # Offense count: 16
46
+ # This cop supports safe autocorrection (--autocorrect).
47
+ # Configuration parameters: EnforcedStyleAlignWith.
48
+ # SupportedStylesAlignWith: either, start_of_block, start_of_line
49
+ Layout/BlockAlignment:
50
+ Exclude:
51
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
52
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
53
+ - 'spec/canon/config/profile_loader_spec.rb'
54
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
55
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
56
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
57
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
58
+
59
+ # Offense count: 16
60
+ # This cop supports safe autocorrection (--autocorrect).
61
+ Layout/BlockEndNewline:
62
+ Exclude:
63
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
64
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
65
+ - 'spec/canon/config/profile_loader_spec.rb'
66
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
67
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
68
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
69
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
70
+
71
+ # Offense count: 5
72
+ # This cop supports safe autocorrection (--autocorrect).
73
+ Layout/ClosingParenthesisIndentation:
74
+ Exclude:
75
+ - 'lib/canon/config/profile_loader.rb'
76
+ - 'lib/canon/diff/diff_classifier.rb'
77
+ - 'spec/canon/config/profile_loader_spec.rb'
78
+
79
+ # Offense count: 2
80
+ # This cop supports safe autocorrection (--autocorrect).
81
+ Layout/ElseAlignment:
82
+ Exclude:
83
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
84
+
85
+ # Offense count: 2
86
+ # This cop supports safe autocorrection (--autocorrect).
87
+ # Configuration parameters: EnforcedStyleAlignWith.
88
+ # SupportedStylesAlignWith: keyword, variable, start_of_line
89
+ Layout/EndAlignment:
90
+ Exclude:
91
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
92
+
93
+ # Offense count: 5
94
+ # This cop supports safe autocorrection (--autocorrect).
95
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
96
+ # SupportedStyles: consistent, consistent_relative_to_receiver, special_for_inner_method_call, special_for_inner_method_call_in_parentheses
97
+ Layout/FirstArgumentIndentation:
98
+ Exclude:
99
+ - 'lib/canon/config/profile_loader.rb'
100
+ - 'lib/canon/diff/diff_classifier.rb'
101
+ - 'spec/canon/config/profile_loader_spec.rb'
102
+
103
+ # Offense count: 30
104
+ # This cop supports safe autocorrection (--autocorrect).
105
+ # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
106
+ # SupportedHashRocketStyles: key, separator, table
107
+ # SupportedColonStyles: key, separator, table
108
+ # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
109
+ Layout/HashAlignment:
110
+ Exclude:
111
+ - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
112
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
113
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
114
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
115
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
116
+
117
+ # Offense count: 36
118
+ # This cop supports safe autocorrection (--autocorrect).
119
+ # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
120
+ # SupportedStylesAlignWith: start_of_line, relative_to_receiver
121
+ Layout/IndentationWidth:
122
+ Exclude:
123
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
124
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
125
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
126
+ - 'spec/canon/config/profile_loader_spec.rb'
127
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
128
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
129
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
130
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
131
+
132
+ # Offense count: 1375
28
133
  # This cop supports safe autocorrection (--autocorrect).
29
134
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
30
135
  # URISchemes: http, https
31
136
  Layout/LineLength:
32
137
  Enabled: false
33
138
 
34
- # Offense count: 55
139
+ # Offense count: 2
140
+ # This cop supports safe autocorrection (--autocorrect).
141
+ # Configuration parameters: EnforcedStyle.
142
+ # SupportedStyles: symmetrical, new_line, same_line
143
+ Layout/MultilineMethodCallBraceLayout:
144
+ Exclude:
145
+ - 'lib/canon/config/profile_loader.rb'
146
+ - 'lib/canon/diff/diff_classifier.rb'
147
+
148
+ # Offense count: 57
149
+ # This cop supports safe autocorrection (--autocorrect).
150
+ # Configuration parameters: AllowInHeredoc.
151
+ Layout/TrailingWhitespace:
152
+ Enabled: false
153
+
154
+ # Offense count: 56
35
155
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
36
156
  Lint/DuplicateBranch:
37
157
  Enabled: false
@@ -76,7 +196,7 @@ Lint/UselessConstantScoping:
76
196
  Exclude:
77
197
  - 'lib/canon/diff_formatter/theme.rb'
78
198
 
79
- # Offense count: 297
199
+ # Offense count: 309
80
200
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
81
201
  Metrics/AbcSize:
82
202
  Enabled: false
@@ -87,27 +207,27 @@ Metrics/AbcSize:
87
207
  Metrics/BlockLength:
88
208
  Max: 92
89
209
 
90
- # Offense count: 1
210
+ # Offense count: 3
91
211
  # Configuration parameters: CountBlocks, CountModifierForms.
92
212
  Metrics/BlockNesting:
93
213
  Max: 4
94
214
 
95
- # Offense count: 261
215
+ # Offense count: 272
96
216
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
97
217
  Metrics/CyclomaticComplexity:
98
218
  Enabled: false
99
219
 
100
- # Offense count: 485
220
+ # Offense count: 498
101
221
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
102
222
  Metrics/MethodLength:
103
223
  Max: 146
104
224
 
105
- # Offense count: 56
225
+ # Offense count: 58
106
226
  # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
107
227
  Metrics/ParameterLists:
108
228
  Max: 10
109
229
 
110
- # Offense count: 212
230
+ # Offense count: 219
111
231
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
112
232
  Metrics/PerceivedComplexity:
113
233
  Enabled: false
@@ -140,13 +260,13 @@ Performance/CollectionLiteralInLoop:
140
260
  - 'lib/canon/xml/xml_base_handler.rb'
141
261
  - 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
142
262
 
143
- # Offense count: 71
263
+ # Offense count: 82
144
264
  # Configuration parameters: Prefixes, AllowedPatterns.
145
265
  # Prefixes: when, with, without
146
266
  RSpec/ContextWording:
147
267
  Enabled: false
148
268
 
149
- # Offense count: 33
269
+ # Offense count: 37
150
270
  # Configuration parameters: IgnoredMetadata.
151
271
  RSpec/DescribeClass:
152
272
  Enabled: false
@@ -157,7 +277,7 @@ RSpec/DescribeMethod:
157
277
  - 'spec/canon/comparison/multiple_differences_spec.rb'
158
278
  - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
159
279
 
160
- # Offense count: 736
280
+ # Offense count: 804
161
281
  # Configuration parameters: CountAsOne.
162
282
  RSpec/ExampleLength:
163
283
  Max: 44
@@ -171,7 +291,7 @@ RSpec/ExpectActual:
171
291
  - 'spec/canon/rspec_matchers_spec.rb'
172
292
  - 'spec/canon/string_matcher_spec.rb'
173
293
 
174
- # Offense count: 175
294
+ # Offense count: 177
175
295
  # Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
176
296
  RSpec/IndexedLet:
177
297
  Exclude:
@@ -209,14 +329,14 @@ RSpec/MultipleDescribes:
209
329
  Exclude:
210
330
  - 'spec/canon/comparison/match_options_spec.rb'
211
331
 
212
- # Offense count: 590
332
+ # Offense count: 654
213
333
  RSpec/MultipleExpectations:
214
334
  Max: 15
215
335
 
216
- # Offense count: 71
336
+ # Offense count: 93
217
337
  # Configuration parameters: AllowSubject.
218
338
  RSpec/MultipleMemoizedHelpers:
219
- Max: 13
339
+ Max: 16
220
340
 
221
341
  # Offense count: 17
222
342
  # Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
@@ -227,7 +347,7 @@ RSpec/NamedSubject:
227
347
  - 'spec/canon/pretty_printer/json_spec.rb'
228
348
  - 'spec/canon/pretty_printer/xml_spec.rb'
229
349
 
230
- # Offense count: 40
350
+ # Offense count: 50
231
351
  # Configuration parameters: AllowedGroups.
232
352
  RSpec/NestedGroups:
233
353
  Max: 4
@@ -261,7 +381,7 @@ RSpec/SpecFilePathFormat:
261
381
  - 'spec/canon/yaml/formatter_spec.rb'
262
382
  - 'spec/xml_c14n_spec.rb'
263
383
 
264
- # Offense count: 128
384
+ # Offense count: 131
265
385
  # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
266
386
  RSpec/VerifiedDoubles:
267
387
  Exclude:
@@ -273,6 +393,23 @@ RSpec/VerifiedDoubles:
273
393
  - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
274
394
  - 'spec/canon/tree_diff/operation_converter_spec.rb'
275
395
 
396
+ # Offense count: 25
397
+ # This cop supports safe autocorrection (--autocorrect).
398
+ # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
399
+ # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
400
+ # ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
401
+ # FunctionalMethods: let, let!, subject, watch
402
+ # AllowedMethods: lambda, proc, it
403
+ Style/BlockDelimiters:
404
+ Exclude:
405
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
406
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
407
+ - 'spec/canon/config/profile_loader_spec.rb'
408
+ - 'spec/canon/diff_formatter/display_preprocessing_spec.rb'
409
+ - 'spec/canon/diff_formatter/pretty_diff_spec.rb'
410
+ - 'spec/canon/diff_formatter/show_diffs_filtering_spec.rb'
411
+ - 'spec/canon/pretty_printer/xml_normalized_spec.rb'
412
+
276
413
  # Offense count: 1
277
414
  # This cop supports safe autocorrection (--autocorrect).
278
415
  # Configuration parameters: EnforcedStyle, AllowComments.
@@ -295,6 +432,18 @@ Style/IdenticalConditionalBranches:
295
432
  - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
296
433
  - 'lib/canon/diff_formatter/legend.rb'
297
434
 
435
+ # Offense count: 3
436
+ # This cop supports safe autocorrection (--autocorrect).
437
+ Style/MultilineIfModifier:
438
+ Exclude:
439
+ - 'lib/canon/pretty_printer/xml_normalized.rb'
440
+
441
+ # Offense count: 2
442
+ # This cop supports safe autocorrection (--autocorrect).
443
+ Style/MultilineTernaryOperator:
444
+ Exclude:
445
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
446
+
298
447
  # Offense count: 1
299
448
  # Configuration parameters: AllowedMethods.
300
449
  # AllowedMethods: respond_to_missing?
data/docs/INDEX.adoc CHANGED
@@ -98,6 +98,7 @@ Choose your interface:
98
98
  === Level 4: Master Features (1 hour)
99
99
 
100
100
  . link:features/match-options/[Match options and profiles]
101
+ . link:features/configuration-profiles/[Configuration profiles] - YAML-based presets with element-level whitespace control
101
102
  . link:features/canonicalization/[Format-specific canonicalization]
102
103
  . link:features/diff-formatting/[Customizing diff output]
103
104
 
@@ -111,6 +112,7 @@ Choose your interface:
111
112
 
112
113
  * link:reference/options-across-interfaces[Options Across Interfaces] - Same options in CLI, Ruby API, and RSpec
113
114
  * link:reference/environment-variables[Environment Variables] - Configure Canon via ENV variables
115
+ * link:features/configuration-profiles/[Configuration Profiles] - YAML presets with element-level whitespace control
114
116
  * link:features/match-options/profiles[Match Profiles] - Pre-configured comparison strategies
115
117
  * link:features/diff-formatting/colors-and-symbols[Color-Coded Diffs] - Understanding diff output
116
118
  * link:features/environment-configuration/size-limits[Size Limits] - Preventing hangs on large files
@@ -135,5 +137,7 @@ See link:contributing[Contributing to Documentation]
135
137
  * Size limits for large files
136
138
  * Enhanced character visualization
137
139
  * Environment variable configuration
140
+ * Configuration profiles with element-level whitespace classification
141
+ * Three-way whitespace sensitivity: strict, normalize, insensitive
138
142
 
139
143
  See link:https://github.com/lutaml/canon/blob/main/CHANGELOG.md[Full Changelog]
@@ -324,7 +324,7 @@ html2 = '<code>indented</code><p>text</p>'
324
324
  Canon::Comparison.equivalent?(html1, html2,
325
325
  format: :html,
326
326
  match: {
327
- whitespace_insensitive_elements: [:code],
327
+ strip_whitespace_elements: [:code],
328
328
  }
329
329
  )
330
330
  # => true
@@ -423,9 +423,10 @@ def classify(diff_node)
423
423
  end
424
424
 
425
425
  # SECOND: Handle content-level formatting for text_content with :normalize
426
+ # Skipped for :preserve elements (<pre>, <code>, etc.) where whitespace is always normative
426
427
  if diff_node.dimension == :text_content &&
427
428
  profile.send(:behavior_for, :text_content) == :normalize &&
428
- !inside_whitespace_sensitive_element?(diff_node) &&
429
+ !inside_preserve_element?(diff_node) &&
429
430
  formatting_only_diff?(diff_node)
430
431
  diff_node.formatting = true
431
432
  diff_node.normative = false
@@ -0,0 +1,288 @@
1
+ ---
2
+ title: Configuration Profiles
3
+ parent: Features
4
+ nav_order: 7
5
+ ---
6
+ = Configuration profiles
7
+ :toc:
8
+ :toclevels: 3
9
+
10
+ == Purpose
11
+
12
+ Configuration profiles bundle many settings into a single named preset,
13
+ eliminating repetitive configuration blocks across multiple gems.
14
+ Instead of 60+ lines of per-format settings, use one line:
15
+
16
+ [source,ruby]
17
+ ----
18
+ Canon::Config.instance.profile = :metanorma
19
+ ----
20
+
21
+ Profiles are defined in YAML files and support inheritance, so a debug
22
+ variant can extend a base profile with only the differences.
23
+
24
+ == Built-in profiles
25
+
26
+ [cols="1,3"]
27
+ |===
28
+ | Profile | Description
29
+
30
+ | `:metanorma`
31
+ | Standard Metanorma spec configuration. Sets preprocessing to `:format`,
32
+ match profile to `:spec_friendly`, diff algorithm to `:dom`, canonical
33
+ display format, normalized pretty-print display preprocessing,
34
+ and XML-specific whitespace element lists.
35
+
36
+ | `:metanorma_debug`
37
+ | Extends `:metanorma` with debug output enabled
38
+ (`show_prettyprint_received: true`).
39
+ |===
40
+
41
+ List all available profiles programmatically:
42
+
43
+ [source,ruby]
44
+ ----
45
+ Canon::Config::ProfileLoader.available_profiles
46
+ # => [:metanorma, :metanorma_debug]
47
+ ----
48
+
49
+ == Element-level whitespace classification
50
+
51
+ The metanorma profile's key feature is its **element-level whitespace classification**.
52
+ This controls how whitespace differences within specific elements are treated:
53
+
54
+ **Three-way classification:**
55
+
56
+ * **Preserve** (`:preserve`) — Every whitespace character is significant. Use for elements
57
+ where exact whitespace matters (like `<pre>`, `<code>`).
58
+
59
+ * **Collapse** (`:collapse`) — Presence matters but whitespace form doesn't.
60
+ `" hello "` equals `"hello"`. Differences are formatting-only (informative).
61
+ Use for elements like `<p>`, `<li>`, `<td>` in prose documents.
62
+
63
+ * **Strip** (`:strip`) — Whitespace is structural noise, dropped entirely.
64
+ The default for XML elements not in any list.
65
+
66
+ **Metanorma profile element lists:**
67
+
68
+ [source,ruby]
69
+ ----
70
+ # In metanorma profile
71
+ Canon::Config.instance.profile = :metanorma
72
+ Canon::Config.instance.xml.match.collapse_whitespace_elements
73
+ # => ["p", "title", "name", "td", "th", "dt", "dd", "li", ...]
74
+
75
+ Canon::Config.instance.xml.match.preserve_whitespace_elements
76
+ # => ["body", "passthrough"]
77
+ ----
78
+
79
+ **How it works with `text_content: :normalize`:**
80
+
81
+ [source,ruby]
82
+ ----
83
+ # With metanorma profile: <p> is in collapse_whitespace_elements
84
+ Canon::Config.instance.profile = :metanorma
85
+
86
+ # These are EQUIVALENT (whitespace in <p> is formatting-only)
87
+ Canon::Comparison.equivalent?('<p> hello </p>', '<p>hello</p>')
88
+ # => true
89
+
90
+ # But <body> is in preserve_whitespace_elements — every character matters
91
+ # These are NOT EQUIVALENT (whitespace in <body> is normative)
92
+ Canon::Comparison.equivalent?('<body> hello </body>', '<body>hello</body>')
93
+ # => false
94
+ ----
95
+
96
+ **Why this matters:**
97
+
98
+ In Metanorma/DocBook documents, elements like `<p>`, `<li>`, `<td>` contain prose
99
+ where whitespace formatting (extra spaces, line breaks) is irrelevant. But `<body>`
100
+ or `<passthrough>` contain code or exact whitespace that matters.
101
+
102
+ Without element-level classification, you'd have to choose:
103
+ - `text_content: :normalize` — ignores ALL whitespace, too permissive
104
+ - `text_content: :strict` — requires exact match everywhere, too strict
105
+
106
+ Element-level classification gives you fine-grained control.
107
+
108
+ == Usage
109
+
110
+ === Programmatic (Ruby API)
111
+
112
+ Use a **Symbol** for built-in profiles and a **String** for file paths:
113
+
114
+ [source,ruby]
115
+ ----
116
+ # Built-in profile (Symbol)
117
+ Canon::Config.instance.profile = :metanorma
118
+
119
+ # Local YAML file (String)
120
+ Canon::Config.instance.profile = "/path/to/my_profile.yml"
121
+ Canon::Config.instance.profile = "~/my_canon_profile.yml"
122
+ Canon::Config.instance.profile = "config/canon_profile.yml"
123
+
124
+ # Or in a configure block
125
+ Canon::Config.configure do |cfg|
126
+ cfg.profile = :metanorma
127
+ # Override individual settings after profile if needed
128
+ cfg.xml.diff.verbose_diff = true
129
+ end
130
+
131
+ # Clear the profile (revert to defaults + programmatic values)
132
+ Canon::Config.instance.profile = nil
133
+ ----
134
+
135
+ IMPORTANT: The type of the value determines how it is resolved:
136
+ **Symbols** are looked up as built-in profile names;
137
+ **Strings** are treated as file paths (with `~` expansion and relative
138
+ path resolution against the working directory).
139
+
140
+ Local YAML files can inherit from built-in profiles (see <<inheritance>>).
141
+
142
+ === Environment variable
143
+
144
+ Set `CANON_CONFIG_PROFILE` to apply a profile automatically on
145
+ initialization:
146
+
147
+ [source,bash]
148
+ ----
149
+ # Built-in profile
150
+ CANON_CONFIG_PROFILE=metanorma bundle exec rspec
151
+
152
+ # File path
153
+ CANON_CONFIG_PROFILE=~/my_profile.yml bundle exec rspec
154
+ ----
155
+
156
+ NOTE: `CANON_CONFIG_PROFILE` is distinct from `CANON_PROFILE`, which
157
+ controls the match profile (comparison behavior). The config profile
158
+ controls all settings at once.
159
+
160
+ == Priority chain
161
+
162
+ With profiles, the resolution chain becomes four layers:
163
+
164
+ [source]
165
+ ----
166
+ +------------------------------------+
167
+ | 1. Environment Variables | <- Highest Priority
168
+ | (CANON_XML_DIFF_ALGORITHM) |
169
+ +------------------------------------+
170
+ | overrides
171
+ +------------------------------------+
172
+ | 2. Programmatic Configuration |
173
+ | (config.xml.diff.algorithm=) |
174
+ +------------------------------------+
175
+ | overrides
176
+ +------------------------------------+
177
+ | 3. Profile Values |
178
+ | (from YAML profile file) |
179
+ +------------------------------------+
180
+ | overrides
181
+ +------------------------------------+
182
+ | 4. Default Values | <- Lowest Priority
183
+ | (defined in Canon::Config) |
184
+ +------------------------------------+
185
+ ----
186
+
187
+ This means:
188
+
189
+ * ENV variables always win (useful for CI overrides)
190
+ * Programmatic setter calls override profile values
191
+ * Profile values override built-in defaults
192
+ * Clearing the profile (`cfg.profile = nil`) removes only layer 3
193
+
194
+ [[inheritance]]
195
+ == Profile inheritance
196
+
197
+ A profile can inherit from another using the `inherits` key:
198
+
199
+ [source,yaml]
200
+ ----
201
+ name: my_debug
202
+ inherits: metanorma
203
+
204
+ shared:
205
+ diff:
206
+ verbose_diff: true
207
+ show_prettyprint_received: true
208
+ ----
209
+
210
+ Inheritance rules:
211
+
212
+ * Parent values are loaded first, then child values are deep-merged on top
213
+ * Hashes are merged recursively (child keys override parent keys)
214
+ * Arrays are replaced entirely (not concatenated)
215
+ * Single-parent inheritance only
216
+ * Cycle detection prevents infinite loops
217
+ * Local files can inherit from built-in profiles by name
218
+
219
+ == Creating custom profiles
220
+
221
+ === YAML file format
222
+
223
+ [source,yaml]
224
+ ----
225
+ ---
226
+ name: my_profile # <1>
227
+ description: My custom config # <2>
228
+ inherits: metanorma # <3>
229
+
230
+ shared: # <4>
231
+ preprocessing: format
232
+ match:
233
+ profile: spec_friendly
234
+ diff:
235
+ algorithm: dom
236
+ context_lines: 5
237
+ verbose_diff: false
238
+
239
+ formats: # <5>
240
+ xml:
241
+ match:
242
+ collapse_whitespace_elements:
243
+ - p
244
+ - title
245
+ - td
246
+ preserve_whitespace_elements:
247
+ - body
248
+ - passthrough
249
+ html:
250
+ diff:
251
+ show_raw_inputs: true
252
+ ----
253
+ <1> Profile name (metadata)
254
+ <2> Description (metadata)
255
+ <3> Optional: inherit from another profile (name or path)
256
+ <4> `shared` settings apply to all formats (xml, html, json, yaml, string)
257
+ <5> `formats.<name>` settings override `shared` for that specific format
258
+
259
+ === Attribute mapping
260
+
261
+ Profile YAML keys map directly to Canon configuration accessors:
262
+
263
+ [cols="2,2"]
264
+ |===
265
+ | YAML path | Ruby equivalent
266
+
267
+ | `shared.preprocessing`
268
+ | `cfg.xml.preprocessing = :format`
269
+
270
+ | `shared.match.profile`
271
+ | `cfg.xml.match.profile = :spec_friendly`
272
+
273
+ | `shared.diff.algorithm`
274
+ | `cfg.xml.diff.algorithm = :dom`
275
+
276
+ | `formats.xml.diff.context_lines`
277
+ | `cfg.xml.diff.context_lines = 5`
278
+ |===
279
+
280
+ All `DiffConfig` and `MatchConfig` attributes documented in
281
+ link:../reference/options-across-interfaces.adoc[Options Across Interfaces]
282
+ are supported.
283
+
284
+ == See also
285
+
286
+ * link:environment-configuration/override-system.adoc[Override System] -- ENV variable priority
287
+ * link:match-options/index.adoc[Match Options] -- match profile presets (`:strict`, `:spec_friendly`, etc.)
288
+ * link:../guides/choosing-configuration.adoc[Choosing Configuration] -- decision guide