canon 0.1.23 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +155 -30
  3. data/docs/INDEX.adoc +4 -0
  4. data/docs/advanced/diff-classification.adoc +3 -2
  5. data/docs/features/configuration-profiles.adoc +288 -0
  6. data/docs/features/diff-formatting/character-visualization.adoc +153 -454
  7. data/docs/features/diff-formatting/display-filtering.adoc +44 -0
  8. data/docs/features/diff-formatting/display-preprocessing.adoc +656 -0
  9. data/docs/features/diff-formatting/index.adoc +47 -0
  10. data/docs/features/diff-formatting/pretty-diff-mode.adoc +154 -0
  11. data/docs/features/environment-configuration/override-system.adoc +10 -3
  12. data/docs/features/index.adoc +9 -0
  13. data/docs/features/match-options/index.adoc +32 -42
  14. data/docs/features/match-options/pretty-printed-fixtures.adoc +270 -0
  15. data/docs/guides/choosing-configuration.adoc +22 -0
  16. data/docs/reference/environment-variables.adoc +121 -1
  17. data/docs/reference/options-across-interfaces.adoc +182 -2
  18. data/lib/canon/cli.rb +20 -0
  19. data/lib/canon/commands/diff_command.rb +7 -2
  20. data/lib/canon/commands/format_command.rb +1 -1
  21. data/lib/canon/comparison/html_comparator.rb +20 -15
  22. data/lib/canon/comparison/html_compare_profile.rb +4 -4
  23. data/lib/canon/comparison/markup_comparator.rb +12 -3
  24. data/lib/canon/comparison/match_options/base_resolver.rb +29 -7
  25. data/lib/canon/comparison/match_options/json_resolver.rb +9 -0
  26. data/lib/canon/comparison/match_options/xml_resolver.rb +16 -2
  27. data/lib/canon/comparison/match_options/yaml_resolver.rb +10 -0
  28. data/lib/canon/comparison/match_options.rb +4 -1
  29. data/lib/canon/comparison/whitespace_sensitivity.rb +189 -137
  30. data/lib/canon/comparison/xml_comparator/child_comparison.rb +21 -4
  31. data/lib/canon/comparison/xml_comparator.rb +14 -12
  32. data/lib/canon/comparison/xml_node_comparison.rb +51 -6
  33. data/lib/canon/comparison.rb +52 -9
  34. data/lib/canon/config/env_schema.rb +32 -4
  35. data/lib/canon/config/override_resolver.rb +16 -3
  36. data/lib/canon/config/profile_loader.rb +135 -0
  37. data/lib/canon/config/profiles/metanorma.yml +74 -0
  38. data/lib/canon/config/profiles/metanorma_debug.yml +8 -0
  39. data/lib/canon/config/type_converter.rb +8 -0
  40. data/lib/canon/config.rb +469 -5
  41. data/lib/canon/diff/diff_classifier.rb +41 -11
  42. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +48 -17
  43. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +58 -0
  44. data/lib/canon/diff_formatter/diff_detail_formatter.rb +22 -7
  45. data/lib/canon/diff_formatter.rb +493 -36
  46. data/lib/canon/pretty_printer/xml_normalized.rb +395 -0
  47. data/lib/canon/rspec_matchers.rb +36 -0
  48. data/lib/canon/version.rb +1 -1
  49. data/lib/canon/xml/nodes/namespace_node.rb +4 -0
  50. data/lib/canon/xml/nodes/processing_instruction_node.rb +4 -0
  51. data/lib/canon/xml/nodes/root_node.rb +4 -0
  52. data/lib/canon/xml/nodes/text_node.rb +4 -0
  53. data/lib/tasks/performance_helpers.rb +2 -2
  54. metadata +24 -2
data/lib/canon/config.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require_relative "config/env_provider"
4
4
  require_relative "config/override_resolver"
5
+ require_relative "config/profile_loader"
5
6
  require_relative "color_detector"
6
7
 
7
8
  module Canon
@@ -44,6 +45,36 @@ module Canon
44
45
  @json = FormatConfig.new(:json)
45
46
  @yaml = FormatConfig.new(:yaml)
46
47
  @string = FormatConfig.new(:string)
48
+ @profile = nil
49
+
50
+ env_profile = ENV.fetch("CANON_CONFIG_PROFILE", nil)
51
+ if env_profile
52
+ # Convert to symbol if it matches a built-in profile name
53
+ self.profile = if ProfileLoader.available_profiles.include?(env_profile.to_sym)
54
+ env_profile.to_sym
55
+ else
56
+ env_profile
57
+ end
58
+ end
59
+ end
60
+
61
+ # Returns the current profile name or path.
62
+ def profile
63
+ @profile
64
+ end
65
+
66
+ # Apply a configuration profile by name (Symbol for built-in) or
67
+ # file path (String). Set to +nil+ to clear the profile layer.
68
+ def profile=(name_or_path)
69
+ clear_profile_values!
70
+
71
+ if name_or_path.nil?
72
+ @profile = nil
73
+ return
74
+ end
75
+
76
+ @profile = name_or_path.is_a?(Symbol) ? name_or_path : name_or_path.to_s
77
+ apply_profile(@profile)
47
78
  end
48
79
 
49
80
  def reset!
@@ -52,6 +83,7 @@ module Canon
52
83
  @json.reset!
53
84
  @yaml.reset!
54
85
  @string.reset!
86
+ @profile = nil
55
87
  end
56
88
 
57
89
  # Backward compatibility methods for top-level diff configuration
@@ -93,19 +125,44 @@ module Canon
93
125
  # Each format (XML, HTML, JSON, YAML) has its own instance
94
126
  class FormatConfig
95
127
  attr_reader :format, :match, :diff
96
- attr_accessor :preprocessing
97
128
 
98
129
  def initialize(format)
99
130
  @format = format
100
131
  @match = MatchConfig.new(format)
101
132
  @diff = DiffConfig.new(format)
102
133
  @preprocessing = nil
134
+ @profile_preprocessing = nil
135
+ end
136
+
137
+ def preprocessing
138
+ @preprocessing || @profile_preprocessing
139
+ end
140
+
141
+ def preprocessing=(value)
142
+ @preprocessing = value
103
143
  end
104
144
 
105
145
  def reset!
106
146
  @match.reset!
107
147
  @diff.reset!
108
148
  @preprocessing = nil
149
+ @profile_preprocessing = nil
150
+ end
151
+
152
+ def apply_profile_data(data)
153
+ if data.key?("preprocessing")
154
+ val = data["preprocessing"]
155
+ @profile_preprocessing = val.is_a?(String) ? val.to_sym : val
156
+ end
157
+
158
+ @match.apply_profile_data(data["match"]) if data.key?("match")
159
+ @diff.apply_profile_data(data["diff"]) if data.key?("diff")
160
+ end
161
+
162
+ def clear_profile!
163
+ @profile_preprocessing = nil
164
+ @match.clear_profile!
165
+ @diff.clear_profile!
109
166
  end
110
167
  end
111
168
 
@@ -137,6 +194,34 @@ module Canon
137
194
  @resolver.set_programmatic(:profile, value)
138
195
  end
139
196
 
197
+ # Return all profile-sourced values from the resolver, excluding
198
+ # the :profile key itself (which is accessed via #profile).
199
+ # These are the YAML-profile settings (e.g., preserve_whitespace_elements)
200
+ # that are stored in the resolver's profile layer but not exposed
201
+ # through the built-in MATCH_PROFILES system.
202
+ #
203
+ # @return [Hash] Profile option key-values (excluding :profile)
204
+ def profile_options
205
+ @resolver.profile.except(:profile)
206
+ end
207
+
208
+ # Element names where whitespace is PRESERVED exactly (no manipulation).
209
+ # All whitespace characters are significant in these elements.
210
+ def preserve_whitespace_elements
211
+ @resolver.resolve(:preserve_whitespace_elements) || []
212
+ end
213
+
214
+ # Element names where whitespace is COLLAPSED (HTML-style behavior).
215
+ # Multiple whitespace chars collapse to single space; boundaries preserved.
216
+ def collapse_whitespace_elements
217
+ @resolver.resolve(:collapse_whitespace_elements) || []
218
+ end
219
+
220
+ # Element names where whitespace-only text nodes are STRIPPED.
221
+ def strip_whitespace_elements
222
+ @resolver.resolve(:strip_whitespace_elements) || []
223
+ end
224
+
140
225
  # Build match options from profile and options
141
226
  def to_h
142
227
  result = {}
@@ -145,6 +230,20 @@ module Canon
145
230
  result
146
231
  end
147
232
 
233
+ def apply_profile_data(data)
234
+ return unless data
235
+
236
+ data.each do |key, value|
237
+ sym_key = key.to_sym
238
+ converted = value.is_a?(String) ? value.to_sym : value
239
+ @resolver.set_profile(sym_key, converted)
240
+ end
241
+ end
242
+
243
+ def clear_profile!
244
+ @resolver.clear_profile!
245
+ end
246
+
148
247
  private
149
248
 
150
249
  def build_resolver(format)
@@ -162,15 +261,60 @@ module Canon
162
261
  end
163
262
  end
164
263
 
264
+ # Pretty-printer sub-configuration for display canonicalization.
265
+ # Controls how documents are formatted by +Canon::PrettyPrinter::Xml+
266
+ # when +display_preprocessing: :pretty_print+ is active.
267
+ # The two attributes (+indent+ and +indent_type+) are backed by the
268
+ # parent +DiffConfig+'s resolver so that ENV overrides (e.g.
269
+ # +CANON_XML_DIFF_PRETTY_PRINTER_INDENT+) work automatically.
270
+ class PrettyPrinterConfig
271
+ def initialize(resolver)
272
+ @resolver = resolver
273
+ end
274
+
275
+ def indent
276
+ @resolver.resolve(:pretty_printer_indent)
277
+ end
278
+
279
+ def indent=(value)
280
+ @resolver.set_programmatic(:pretty_printer_indent, value)
281
+ end
282
+
283
+ def indent_type
284
+ @resolver.resolve(:pretty_printer_indent_type)
285
+ end
286
+
287
+ def indent_type=(value)
288
+ @resolver.set_programmatic(:pretty_printer_indent_type, value)
289
+ end
290
+ end
291
+
165
292
  # Diff configuration for output formatting
166
293
  class DiffConfig
294
+ attr_reader :pretty_printer
295
+
167
296
  def initialize(format = nil)
168
297
  @format = format
169
298
  @resolver = build_resolver(format)
299
+ @pretty_printer = PrettyPrinterConfig.new(@resolver)
170
300
  end
171
301
 
172
302
  def reset!
173
303
  @resolver = build_resolver(@format)
304
+ @pretty_printer = PrettyPrinterConfig.new(@resolver)
305
+ end
306
+
307
+ def apply_profile_data(data)
308
+ return unless data
309
+
310
+ data.each do |key, value|
311
+ sym_key = key.to_sym
312
+ @resolver.set_profile(sym_key, coerce_profile_value(sym_key, value))
313
+ end
314
+ end
315
+
316
+ def clear_profile!
317
+ @resolver.clear_profile!
174
318
  end
175
319
 
176
320
  # Accessors with ENV override support
@@ -230,6 +374,35 @@ module Canon
230
374
  @resolver.set_programmatic(:show_raw_inputs, value)
231
375
  end
232
376
 
377
+ # Show only the EXPECTED (fixture) block in the raw-inputs section.
378
+ # Has no effect unless +show_raw_inputs+ or +verbose_diff+ is also set.
379
+ # Use +show_raw_expected: false+ together with +show_raw_received: true+
380
+ # (or +show_raw_inputs: true+) to suppress the fixture display while
381
+ # keeping the received output.
382
+ #
383
+ # ENV variable: +CANON_<FORMAT>_DIFF_SHOW_RAW_EXPECTED+
384
+ def show_raw_expected
385
+ @resolver.resolve(:show_raw_expected)
386
+ end
387
+
388
+ def show_raw_expected=(value)
389
+ @resolver.set_programmatic(:show_raw_expected, value)
390
+ end
391
+
392
+ # Show only the RECEIVED (actual) block in the raw-inputs section.
393
+ # Combined with +show_raw_expected: false+ (or leaving it at the default
394
+ # +false+) this suppresses the fixture while still displaying the output
395
+ # that was generated.
396
+ #
397
+ # ENV variable: +CANON_<FORMAT>_DIFF_SHOW_RAW_RECEIVED+
398
+ def show_raw_received
399
+ @resolver.resolve(:show_raw_received)
400
+ end
401
+
402
+ def show_raw_received=(value)
403
+ @resolver.set_programmatic(:show_raw_received, value)
404
+ end
405
+
233
406
  def show_preprocessed_inputs
234
407
  @resolver.resolve(:show_preprocessed_inputs)
235
408
  end
@@ -238,6 +411,77 @@ module Canon
238
411
  @resolver.set_programmatic(:show_preprocessed_inputs, value)
239
412
  end
240
413
 
414
+ # Show only the EXPECTED (fixture) block in the preprocessed-inputs
415
+ # section. Has no effect unless +show_preprocessed_inputs+ or
416
+ # +verbose_diff+ is also set. Use +show_preprocessed_expected: true+
417
+ # together with +show_preprocessed_received: false+ to display only the
418
+ # preprocessed fixture while suppressing the preprocessed received output.
419
+ #
420
+ # ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PREPROCESSED_EXPECTED+
421
+ def show_preprocessed_expected
422
+ @resolver.resolve(:show_preprocessed_expected)
423
+ end
424
+
425
+ def show_preprocessed_expected=(value)
426
+ @resolver.set_programmatic(:show_preprocessed_expected, value)
427
+ end
428
+
429
+ # Show only the RECEIVED (actual) block in the preprocessed-inputs
430
+ # section. Combined with +show_preprocessed_expected: false+ (or leaving
431
+ # it at the default +false+) this suppresses the fixture preprocessing
432
+ # display while still showing what the received document looked like after
433
+ # preprocessing.
434
+ #
435
+ # ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PREPROCESSED_RECEIVED+
436
+ def show_preprocessed_received
437
+ @resolver.resolve(:show_preprocessed_received)
438
+ end
439
+
440
+ def show_preprocessed_received=(value)
441
+ @resolver.set_programmatic(:show_preprocessed_received, value)
442
+ end
443
+
444
+ # Show both EXPECTED and RECEIVED blocks in a fixture-ready pretty-printed
445
+ # section. The output uses the same pretty-printer as
446
+ # +display_preprocessing: :pretty_print+ (one tag per line, indentation)
447
+ # but with *no* character visualization — whitespace appears as plain ASCII
448
+ # so the output can be copy-pasted directly into RSpec fixture heredocs.
449
+ #
450
+ # ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PRETTYPRINT_INPUTS+
451
+ def show_prettyprint_inputs
452
+ @resolver.resolve(:show_prettyprint_inputs)
453
+ end
454
+
455
+ def show_prettyprint_inputs=(value)
456
+ @resolver.set_programmatic(:show_prettyprint_inputs, value)
457
+ end
458
+
459
+ # Show only the EXPECTED (fixture) block in the pretty-print section.
460
+ # Useful when the fixture is what needs updating and the received side is
461
+ # not needed for copy-pasting.
462
+ #
463
+ # ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PRETTYPRINT_EXPECTED+
464
+ def show_prettyprint_expected
465
+ @resolver.resolve(:show_prettyprint_expected)
466
+ end
467
+
468
+ def show_prettyprint_expected=(value)
469
+ @resolver.set_programmatic(:show_prettyprint_expected, value)
470
+ end
471
+
472
+ # Show only the RECEIVED (actual) block in the pretty-print section.
473
+ # Use this to get a copy-pasteable pretty-printed form of the generated
474
+ # output (the most common fixture-update workflow).
475
+ #
476
+ # ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PRETTYPRINT_RECEIVED+
477
+ def show_prettyprint_received
478
+ @resolver.resolve(:show_prettyprint_received)
479
+ end
480
+
481
+ def show_prettyprint_received=(value)
482
+ @resolver.set_programmatic(:show_prettyprint_received, value)
483
+ end
484
+
241
485
  def show_line_numbered_inputs
242
486
  @resolver.resolve(:show_line_numbered_inputs)
243
487
  end
@@ -254,6 +498,147 @@ module Canon
254
498
  @resolver.set_programmatic(:display_format, value)
255
499
  end
256
500
 
501
+ # Controls how documents are normalized *for display* before the line
502
+ # diff. This is independent of +FormatConfig#preprocessing+, which
503
+ # controls normalization for *comparison* (equivalence detection).
504
+ #
505
+ # Values:
506
+ # :none - use documents as-is (default, existing behaviour)
507
+ # :pretty_print - run through Canon::PrettyPrinter::Xml before diffing
508
+ # :c14n - run through XML C14N normalization before diffing
509
+ def display_preprocessing
510
+ @resolver.resolve(:display_preprocessing)
511
+ end
512
+
513
+ def display_preprocessing=(value)
514
+ @resolver.set_programmatic(:display_preprocessing, value)
515
+ end
516
+
517
+ # Element names where whitespace is PRESERVED exactly (no manipulation).
518
+ # All whitespace characters are significant in these elements.
519
+ # ENV variable: +CANON_<FORMAT>_DIFF_PRESERVE_WHITESPACE_ELEMENTS+
520
+ def preserve_whitespace_elements
521
+ @resolver.resolve(:preserve_whitespace_elements) || []
522
+ end
523
+
524
+ def preserve_whitespace_elements=(value)
525
+ @resolver.set_programmatic(:preserve_whitespace_elements,
526
+ Array(value).map(&:to_s))
527
+ end
528
+
529
+ # Element names where whitespace is COLLAPSED (HTML-style behavior).
530
+ # Multiple whitespace chars collapse to single space; boundaries preserved.
531
+ # ENV variable: +CANON_<FORMAT>_DIFF_COLLAPSE_WHITESPACE_ELEMENTS+
532
+ def collapse_whitespace_elements
533
+ @resolver.resolve(:collapse_whitespace_elements) || []
534
+ end
535
+
536
+ def collapse_whitespace_elements=(value)
537
+ @resolver.set_programmatic(:collapse_whitespace_elements,
538
+ Array(value).map(&:to_s))
539
+ end
540
+
541
+ # Element names where whitespace-only text nodes are STRIPPED.
542
+ # ENV variable: +CANON_<FORMAT>_DIFF_STRIP_WHITESPACE_ELEMENTS+
543
+ def strip_whitespace_elements
544
+ @resolver.resolve(:strip_whitespace_elements) || []
545
+ end
546
+
547
+ def strip_whitespace_elements=(value)
548
+ @resolver.set_programmatic(:strip_whitespace_elements,
549
+ Array(value).map(&:to_s))
550
+ end
551
+
552
+ # When true, whitespace-only text nodes starting with "\n" in :collapse
553
+ # elements of the **expected** (fixture) document are treated as structural
554
+ # indentation and dropped from both comparison and display. Use this when
555
+ # fixture files are indented but received XML is compact.
556
+ # ENV variable: +CANON_<FORMAT>_DIFF_PRETTY_PRINTED_EXPECTED+
557
+ def pretty_printed_expected
558
+ @resolver.resolve(:pretty_printed_expected)
559
+ end
560
+
561
+ def pretty_printed_expected=(value)
562
+ @resolver.set_programmatic(:pretty_printed_expected, value)
563
+ end
564
+
565
+ # When true, whitespace-only text nodes starting with "\n" in :normalize
566
+ # elements of the **received** document are treated as structural
567
+ # indentation and dropped from both comparison and display. Use this when
568
+ # received XML may be pretty-printed but the fixture is compact.
569
+ # ENV variable: +CANON_<FORMAT>_DIFF_PRETTY_PRINTED_RECEIVED+
570
+ def pretty_printed_received
571
+ @resolver.resolve(:pretty_printed_received)
572
+ end
573
+
574
+ def pretty_printed_received=(value)
575
+ @resolver.set_programmatic(:pretty_printed_received, value)
576
+ end
577
+
578
+ # When true, attributes on each element are sorted by namespace URI
579
+ # then local name in the pretty-printed display, eliminating spurious
580
+ # diff noise from differing attribute order.
581
+ # ENV variable: +CANON_<FORMAT>_DIFF_PRETTY_PRINTER_SORT_ATTRIBUTES+
582
+ def pretty_printer_sort_attributes
583
+ @resolver.resolve(:pretty_printer_sort_attributes)
584
+ end
585
+
586
+ def pretty_printer_sort_attributes=(value)
587
+ @resolver.set_programmatic(:pretty_printer_sort_attributes, value)
588
+ end
589
+
590
+ # Render element nodes in the Semantic Diff Report as compact inline XML
591
+ # (e.g. +<strong>Annex</strong>+) instead of the verbose node_info
592
+ # description string (e.g. "name: strong namespace_uri: …").
593
+ #
594
+ # Default: +false+ (keep existing verbose format for backwards compatibility)
595
+ # ENV variable: +CANON_<FORMAT>_DIFF_COMPACT_SEMANTIC_REPORT+
596
+ def compact_semantic_report
597
+ @resolver.resolve(:compact_semantic_report)
598
+ end
599
+
600
+ def compact_semantic_report=(value)
601
+ @resolver.set_programmatic(:compact_semantic_report, value)
602
+ end
603
+
604
+ # Show the full serialized node content (including children) in
605
+ # element_structure diffs instead of just the tag name.
606
+ #
607
+ # Default: +false+ (show only the tag name, e.g. +<biblio-tag>+)
608
+ # ENV variable: +CANON_<FORMAT>_DIFF_EXPAND_DIFFERENCE+
609
+ def expand_difference
610
+ @resolver.resolve(:expand_difference)
611
+ end
612
+
613
+ def expand_difference=(value)
614
+ @resolver.set_programmatic(:expand_difference, value)
615
+ end
616
+
617
+ # Controls whether invisible characters (spaces, tabs, non-breaking
618
+ # spaces, etc.) are replaced with visible Unicode symbols in diff output.
619
+ #
620
+ # Values:
621
+ # true - apply the full default visualization map (default)
622
+ # false - disable visualization; output plain text
623
+ # :content_only - reserved for future use; currently behaves as +true+.
624
+ # Future intent: apply visualization only to DOM text
625
+ # node content, not to structural indentation whitespace.
626
+ # (TODO: implement DOM-level pre-serialization pass)
627
+ def character_visualization
628
+ val = @resolver.resolve(:character_visualization)
629
+ # Coerce symbol booleans that may arrive via ENV (env_schema uses :symbol type
630
+ # so "true"/"false" env strings become :true/:false symbols)
631
+ case val
632
+ when true, :true then true # rubocop:disable Lint/BooleanSymbol
633
+ when false, :false then false # rubocop:disable Lint/BooleanSymbol
634
+ else val # true/false from programmatic, or :content_only
635
+ end
636
+ end
637
+
638
+ def character_visualization=(value)
639
+ @resolver.set_programmatic(:character_visualization, value)
640
+ end
641
+
257
642
  def algorithm
258
643
  @resolver.resolve(:algorithm)
259
644
  end
@@ -309,9 +694,27 @@ module Canon
309
694
  verbose_diff: verbose_diff,
310
695
  diff_algorithm: algorithm,
311
696
  show_raw_inputs: show_raw_inputs,
697
+ show_raw_expected: show_raw_expected,
698
+ show_raw_received: show_raw_received,
312
699
  show_preprocessed_inputs: show_preprocessed_inputs,
700
+ show_preprocessed_expected: show_preprocessed_expected,
701
+ show_preprocessed_received: show_preprocessed_received,
702
+ show_prettyprint_inputs: show_prettyprint_inputs,
703
+ show_prettyprint_expected: show_prettyprint_expected,
704
+ show_prettyprint_received: show_prettyprint_received,
313
705
  show_line_numbered_inputs: show_line_numbered_inputs,
706
+ character_visualization: character_visualization,
314
707
  display_format: display_format,
708
+ display_preprocessing: display_preprocessing,
709
+ pretty_printer_indent: pretty_printer.indent,
710
+ pretty_printer_indent_type: pretty_printer.indent_type,
711
+ preserve_whitespace_elements: preserve_whitespace_elements,
712
+ collapse_whitespace_elements: collapse_whitespace_elements,
713
+ strip_whitespace_elements: strip_whitespace_elements,
714
+ pretty_printed_expected: pretty_printed_expected,
715
+ pretty_printed_received: pretty_printed_received,
716
+ compact_semantic_report: compact_semantic_report,
717
+ expand_difference: expand_difference,
315
718
  max_file_size: max_file_size,
316
719
  max_node_count: max_node_count,
317
720
  max_diff_lines: max_diff_lines,
@@ -331,12 +734,31 @@ module Canon
331
734
  verbose_diff: false,
332
735
  algorithm: :dom,
333
736
  show_raw_inputs: false,
737
+ show_raw_expected: false,
738
+ show_raw_received: false,
334
739
  show_preprocessed_inputs: false,
740
+ show_preprocessed_expected: false,
741
+ show_preprocessed_received: false,
742
+ show_prettyprint_inputs: false,
743
+ show_prettyprint_expected: false,
744
+ show_prettyprint_received: false,
335
745
  show_line_numbered_inputs: false,
336
- display_format: :raw, # :raw = no formatting, :canonical = HTML-aware formatting
337
- max_file_size: 5_242_880, # 5MB in bytes
338
- max_node_count: 10_000, # Maximum nodes in tree
339
- max_diff_lines: 10_000, # Maximum diff output lines
746
+ character_visualization: true, # true, false, :content_only
747
+ display_format: :raw, # :raw = no formatting, :canonical = HTML-aware formatting
748
+ display_preprocessing: :none, # :none, :pretty_print, :c14n
749
+ pretty_printer_indent: 2,
750
+ pretty_printer_indent_type: :space, # :space or :tab
751
+ preserve_whitespace_elements: [],
752
+ collapse_whitespace_elements: [],
753
+ strip_whitespace_elements: [],
754
+ pretty_printed_expected: false,
755
+ pretty_printed_received: false,
756
+ pretty_printer_sort_attributes: false,
757
+ compact_semantic_report: false,
758
+ expand_difference: false,
759
+ max_file_size: 5_242_880, # 5MB in bytes
760
+ max_node_count: 10_000, # Maximum nodes in tree
761
+ max_diff_lines: 10_000, # Maximum diff output lines
340
762
  theme: :dark, # Default theme
341
763
  }
342
764
 
@@ -348,6 +770,48 @@ module Canon
348
770
  env: env,
349
771
  )
350
772
  end
773
+
774
+ # Coerce a YAML value to the appropriate Ruby type based on EnvSchema.
775
+ # YAML natively handles booleans, integers, and arrays, but symbols
776
+ # arrive as strings and need conversion.
777
+ def coerce_profile_value(key, value)
778
+ return value if value.is_a?(Array) # string_array already correct from YAML
779
+
780
+ type = EnvSchema.type_for(key)
781
+ case type
782
+ when :symbol
783
+ value.is_a?(String) ? value.to_sym : value
784
+ when :boolean
785
+ # YAML booleans are already true/false
786
+ value
787
+ when :integer
788
+ value.is_a?(String) ? Integer(value) : value
789
+ else
790
+ value
791
+ end
792
+ end
793
+ end
794
+
795
+ private
796
+
797
+ def apply_profile(name_or_path)
798
+ data = ProfileLoader.load(name_or_path)
799
+ shared = data["shared"] || {}
800
+ formats = data["formats"] || {}
801
+
802
+ format_configs.each do |fmt_key, fmt_cfg|
803
+ fmt_data = ProfileLoader.send(:deep_merge, shared,
804
+ formats[fmt_key.to_s] || {})
805
+ fmt_cfg.apply_profile_data(fmt_data)
806
+ end
807
+ end
808
+
809
+ def clear_profile_values!
810
+ format_configs.each_value(&:clear_profile!)
811
+ end
812
+
813
+ def format_configs
814
+ { xml: @xml, html: @html, json: @json, yaml: @yaml, string: @string }
351
815
  end
352
816
  end
353
817
  end
@@ -38,7 +38,12 @@ module Canon
38
38
  # FIRST: Check for XML serialization-level formatting differences
39
39
  # These are ALWAYS non-normative (formatting-only) regardless of match options
40
40
  # Examples: self-closing tags (<tag/>) vs explicit closing tags (<tag></tag>)
41
- if XmlSerializationFormatter.serialization_formatting?(diff_node)
41
+ #
42
+ # EXCEPTION: If the text node is inside a whitespace-sensitive element
43
+ # (:preserve or :collapse), don't dismiss as serialization formatting
44
+ # because whitespace presence is meaningful in those elements.
45
+ if !inside_whitespace_sensitive_element?(diff_node) &&
46
+ XmlSerializationFormatter.serialization_formatting?(diff_node)
42
47
  diff_node.formatting = true
43
48
  diff_node.normative = false
44
49
  return diff_node
@@ -49,9 +54,11 @@ module Canon
49
54
  # it should be marked as non-normative (informative)
50
55
  # This ensures that verbose and non-verbose modes give consistent results
51
56
  #
52
- # EXCEPTION: If the text node is inside a whitespace-sensitive element
57
+ # EXCEPTION: If the text node is inside a PRESERVE whitespace element
53
58
  # (like <pre>, <code>, <textarea> in HTML), don't apply formatting detection
54
- # because whitespace should be preserved in these elements
59
+ # because whitespace should be preserved exactly in these elements.
60
+ # Note: COLLAPSE elements like <p> DO get formatting detection because
61
+ # their whitespace IS normalized (differences are formatting-only).
55
62
  #
56
63
  # This check must come BEFORE normative_dimension? is called,
57
64
  # because normative_dimension? returns true for text_content: :normalize
@@ -59,7 +66,7 @@ module Canon
59
66
  # detection from being applied.
60
67
  if diff_node.dimension == :text_content &&
61
68
  profile.send(:behavior_for, :text_content) == :normalize &&
62
- !inside_whitespace_sensitive_element?(diff_node) &&
69
+ !inside_preserve_element?(diff_node) &&
63
70
  formatting_only_diff?(diff_node)
64
71
  diff_node.formatting = true
65
72
  diff_node.normative = false
@@ -143,19 +150,42 @@ module Canon
143
150
  end
144
151
 
145
152
  # Check if the text node is inside a whitespace-sensitive element
153
+ # (preserve/collapse classification or xml:space='preserve').
154
+ # In these elements, whitespace presence is meaningful and should
155
+ # not be dismissed as serialization formatting.
146
156
  # @param diff_node [DiffNode] The diff node to check
147
- # @return [Boolean] true if inside a whitespace-sensitive element
157
+ # @return [Boolean] true if whitespace is preserved for this element
148
158
  def inside_whitespace_sensitive_element?(diff_node)
149
- # Get the text node (not the parent element)
150
159
  node = diff_node.node1 || diff_node.node2
151
160
  return false unless node
152
161
 
153
- # WhitespaceSensitivity.element_sensitive? expects a text node
154
- # and checks its parent element
155
- # We need to pass the full options structure with :match_opts key
156
- opts = { match_opts: @match_options.options }
162
+ return false unless node.respond_to?(:parent)
163
+
164
+ parent = node.parent
165
+ return false unless parent
166
+
167
+ match_opts = @match_options.options
168
+ Canon::Comparison::WhitespaceSensitivity.whitespace_preserved?(parent,
169
+ match_opts)
170
+ end
171
+
172
+ # Check if the text node is inside a PRESERVE whitespace element
173
+ # Only returns true for elements where whitespace is preserved exactly (:preserve),
174
+ # not for elements where whitespace is normalized (:collapse).
175
+ # @param diff_node [DiffNode] The diff node to check
176
+ # @return [Boolean] true if inside a preserve whitespace element
177
+ def inside_preserve_element?(diff_node)
178
+ node = diff_node.node1 || diff_node.node2
179
+ return false unless node
180
+
181
+ match_opts = @match_options.options
182
+ parent = node.parent
183
+ return false unless parent
157
184
 
158
- Canon::Comparison::WhitespaceSensitivity.element_sensitive?(node, opts)
185
+ classification = Canon::Comparison::WhitespaceSensitivity.classify_element(
186
+ parent, match_opts
187
+ )
188
+ classification == :preserve
159
189
  end
160
190
 
161
191
  # Extract text content from a node for formatting comparison