canon 0.1.22 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +174 -25
- data/docs/INDEX.adoc +4 -0
- data/docs/advanced/diff-classification.adoc +3 -2
- data/docs/features/configuration-profiles.adoc +288 -0
- data/docs/features/diff-formatting/character-visualization.adoc +153 -454
- data/docs/features/diff-formatting/display-filtering.adoc +44 -0
- data/docs/features/diff-formatting/display-preprocessing.adoc +656 -0
- data/docs/features/diff-formatting/index.adoc +47 -0
- data/docs/features/diff-formatting/pretty-diff-mode.adoc +154 -0
- data/docs/features/environment-configuration/override-system.adoc +10 -3
- data/docs/features/index.adoc +9 -0
- data/docs/features/match-options/index.adoc +32 -42
- data/docs/features/match-options/pretty-printed-fixtures.adoc +270 -0
- data/docs/guides/choosing-configuration.adoc +22 -0
- data/docs/reference/environment-variables.adoc +121 -1
- data/docs/reference/options-across-interfaces.adoc +182 -2
- data/lib/canon/cli.rb +20 -0
- data/lib/canon/commands/diff_command.rb +7 -2
- data/lib/canon/commands/format_command.rb +1 -1
- data/lib/canon/comparison/html_comparator.rb +20 -15
- data/lib/canon/comparison/html_compare_profile.rb +4 -4
- data/lib/canon/comparison/markup_comparator.rb +12 -3
- data/lib/canon/comparison/match_options/base_resolver.rb +29 -7
- data/lib/canon/comparison/match_options/json_resolver.rb +9 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +16 -2
- data/lib/canon/comparison/match_options/yaml_resolver.rb +10 -0
- data/lib/canon/comparison/match_options.rb +4 -1
- data/lib/canon/comparison/whitespace_sensitivity.rb +189 -137
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +21 -4
- data/lib/canon/comparison/xml_comparator.rb +14 -12
- data/lib/canon/comparison/xml_node_comparison.rb +51 -6
- data/lib/canon/comparison.rb +52 -9
- data/lib/canon/config/env_schema.rb +32 -4
- data/lib/canon/config/override_resolver.rb +16 -3
- data/lib/canon/config/profile_loader.rb +135 -0
- data/lib/canon/config/profiles/metanorma.yml +74 -0
- data/lib/canon/config/profiles/metanorma_debug.yml +8 -0
- data/lib/canon/config/type_converter.rb +8 -0
- data/lib/canon/config.rb +469 -5
- data/lib/canon/diff/diff_classifier.rb +41 -11
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +48 -17
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +58 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +22 -7
- data/lib/canon/diff_formatter/theme.rb +24 -17
- data/lib/canon/diff_formatter.rb +493 -36
- data/lib/canon/pretty_printer/xml_normalized.rb +395 -0
- data/lib/canon/rspec_matchers.rb +36 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +26 -11
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/nodes/namespace_node.rb +4 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +4 -0
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/tasks/performance_helpers.rb +2 -2
- metadata +24 -2
data/lib/canon/diff_formatter.rb
CHANGED
|
@@ -167,8 +167,27 @@ module Canon
|
|
|
167
167
|
diff_grouping_lines: nil, visualization_map: nil,
|
|
168
168
|
character_map_file: nil, character_definitions: nil,
|
|
169
169
|
show_diffs: :all, verbose_diff: false,
|
|
170
|
-
show_raw_inputs: false,
|
|
170
|
+
show_raw_inputs: false, show_raw_expected: false,
|
|
171
|
+
show_raw_received: false,
|
|
172
|
+
show_preprocessed_inputs: false,
|
|
173
|
+
show_preprocessed_expected: false,
|
|
174
|
+
show_preprocessed_received: false,
|
|
175
|
+
show_prettyprint_inputs: false,
|
|
176
|
+
show_prettyprint_expected: false,
|
|
177
|
+
show_prettyprint_received: false,
|
|
171
178
|
show_line_numbered_inputs: false,
|
|
179
|
+
character_visualization: true,
|
|
180
|
+
display_preprocessing: :none,
|
|
181
|
+
pretty_printer_indent: 2,
|
|
182
|
+
pretty_printer_indent_type: :space,
|
|
183
|
+
preserve_whitespace_elements: [],
|
|
184
|
+
collapse_whitespace_elements: [],
|
|
185
|
+
strip_whitespace_elements: [],
|
|
186
|
+
pretty_printed_expected: false,
|
|
187
|
+
pretty_printed_received: false,
|
|
188
|
+
pretty_printer_sort_attributes: false,
|
|
189
|
+
compact_semantic_report: false,
|
|
190
|
+
expand_difference: false,
|
|
172
191
|
diff_mode: :separate, legacy_terminal: false)
|
|
173
192
|
# rubocop:enable Metrics/ParameterLists
|
|
174
193
|
@use_color = use_color
|
|
@@ -178,11 +197,31 @@ module Canon
|
|
|
178
197
|
@show_diffs = show_diffs
|
|
179
198
|
@verbose_diff = verbose_diff
|
|
180
199
|
@show_raw_inputs = show_raw_inputs
|
|
200
|
+
@show_raw_expected = show_raw_expected
|
|
201
|
+
@show_raw_received = show_raw_received
|
|
181
202
|
@show_preprocessed_inputs = show_preprocessed_inputs
|
|
203
|
+
@show_preprocessed_expected = show_preprocessed_expected
|
|
204
|
+
@show_preprocessed_received = show_preprocessed_received
|
|
205
|
+
@show_prettyprint_inputs = show_prettyprint_inputs
|
|
206
|
+
@show_prettyprint_expected = show_prettyprint_expected
|
|
207
|
+
@show_prettyprint_received = show_prettyprint_received
|
|
182
208
|
@show_line_numbered_inputs = show_line_numbered_inputs
|
|
209
|
+
@character_visualization = character_visualization
|
|
210
|
+
@display_preprocessing = display_preprocessing
|
|
211
|
+
@pretty_printer_indent = pretty_printer_indent
|
|
212
|
+
@pretty_printer_indent_type = pretty_printer_indent_type
|
|
213
|
+
@preserve_whitespace_elements = Array(preserve_whitespace_elements).map(&:to_s)
|
|
214
|
+
@collapse_whitespace_elements = Array(collapse_whitespace_elements).map(&:to_s)
|
|
215
|
+
@strip_whitespace_elements = Array(strip_whitespace_elements).map(&:to_s)
|
|
216
|
+
@pretty_printed_expected = pretty_printed_expected
|
|
217
|
+
@pretty_printed_received = pretty_printed_received
|
|
218
|
+
@pretty_printer_sort_attributes = pretty_printer_sort_attributes
|
|
219
|
+
@compact_semantic_report = compact_semantic_report
|
|
220
|
+
@expand_difference = expand_difference
|
|
183
221
|
@diff_mode = legacy_terminal ? :separate : diff_mode
|
|
184
222
|
@legacy_terminal = legacy_terminal
|
|
185
223
|
@visualization_map = build_visualization_map(
|
|
224
|
+
character_visualization: character_visualization,
|
|
186
225
|
visualization_map: visualization_map,
|
|
187
226
|
character_map_file: character_map_file,
|
|
188
227
|
character_definitions: character_definitions,
|
|
@@ -269,6 +308,12 @@ module Canon
|
|
|
269
308
|
differences: differences)
|
|
270
309
|
end
|
|
271
310
|
|
|
311
|
+
# In pretty_diff mode, always use text-LCS diff (bypasses DiffNodeMapper).
|
|
312
|
+
# pretty_diff_format handles nil doc1/doc2 itself (emits header only).
|
|
313
|
+
if @mode == :pretty_diff
|
|
314
|
+
return pretty_diff_format(doc1, doc2, format: format)
|
|
315
|
+
end
|
|
316
|
+
|
|
272
317
|
no_diffs = if differences.respond_to?(:equivalent?)
|
|
273
318
|
differences.equivalent?
|
|
274
319
|
else
|
|
@@ -280,6 +325,8 @@ module Canon
|
|
|
280
325
|
when :by_line
|
|
281
326
|
by_line_diff(doc1, doc2, format: format, html_version: html_version,
|
|
282
327
|
differences: differences)
|
|
328
|
+
when :pretty_diff
|
|
329
|
+
pretty_diff_format(doc1, doc2, format: format)
|
|
283
330
|
else
|
|
284
331
|
by_object_diff(differences, format)
|
|
285
332
|
end
|
|
@@ -333,30 +380,65 @@ module Canon
|
|
|
333
380
|
output << DiffDetailFormatter.format_report(
|
|
334
381
|
comparison_result.differences,
|
|
335
382
|
use_color: @use_color,
|
|
383
|
+
show_diffs: @show_diffs,
|
|
384
|
+
compact_semantic_report: @compact_semantic_report,
|
|
385
|
+
expand_difference: @expand_difference,
|
|
336
386
|
)
|
|
337
387
|
end
|
|
338
388
|
|
|
339
|
-
# verbose_diff
|
|
340
|
-
|
|
341
|
-
|
|
389
|
+
# verbose_diff / show_raw_inputs shows both sides as a convenience shorthand.
|
|
390
|
+
# show_raw_expected / show_raw_received give per-side control.
|
|
391
|
+
combined_raw = @verbose_diff || @show_raw_inputs
|
|
392
|
+
show_raw_exp = combined_raw || @show_raw_expected
|
|
393
|
+
show_raw_rec = combined_raw || @show_raw_received
|
|
394
|
+
verbose = show_raw_exp || show_raw_rec
|
|
395
|
+
# verbose_diff / show_preprocessed_inputs shows both sides as a shorthand.
|
|
396
|
+
# show_preprocessed_expected / show_preprocessed_received give per-side control.
|
|
397
|
+
combined_prep = @verbose_diff || @show_preprocessed_inputs
|
|
398
|
+
show_prep_exp = combined_prep || @show_preprocessed_expected
|
|
399
|
+
show_prep_rec = combined_prep || @show_preprocessed_received
|
|
400
|
+
show_prep = show_prep_exp || show_prep_rec
|
|
342
401
|
show_line = @verbose_diff || @show_line_numbered_inputs
|
|
343
402
|
|
|
344
|
-
# 3. Raw/Original Input Display (when show_raw_inputs
|
|
403
|
+
# 3. Raw/Original Input Display (when show_raw_inputs/show_raw_expected/show_raw_received enabled)
|
|
345
404
|
if verbose && comparison_result.is_a?(Canon::Comparison::ComparisonResult)
|
|
346
405
|
original1, original2 = comparison_result.original_strings
|
|
347
406
|
if original1 && original2
|
|
348
|
-
output << format_raw_inputs(original1, original2
|
|
407
|
+
output << format_raw_inputs(original1, original2,
|
|
408
|
+
show_expected: show_raw_exp,
|
|
409
|
+
show_received: show_raw_rec)
|
|
349
410
|
end
|
|
350
411
|
end
|
|
351
412
|
|
|
352
|
-
# 4. Preprocessed Input Display (when show_preprocessed_inputs
|
|
413
|
+
# 4. Preprocessed Input Display (when show_preprocessed_inputs/expected/received enabled)
|
|
353
414
|
if show_prep && comparison_result.is_a?(Canon::Comparison::ComparisonResult)
|
|
354
415
|
preprocessed1, preprocessed2 = comparison_result.preprocessed_strings
|
|
355
416
|
if preprocessed1 && preprocessed2
|
|
356
417
|
preprocessing_info = comparison_result.match_options&.dig(:match,
|
|
357
418
|
:preprocessing)
|
|
358
419
|
output << format_preprocessed_inputs(preprocessed1, preprocessed2,
|
|
359
|
-
preprocessing_info
|
|
420
|
+
preprocessing_info,
|
|
421
|
+
show_expected: show_prep_exp,
|
|
422
|
+
show_received: show_prep_rec)
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
# 4.5. Pretty-printed Input Display (when show_prettyprint_inputs/expected/received enabled)
|
|
427
|
+
# Pretty-prints the ORIGINAL strings (not preprocessed) through PrettyPrinter::Xml/Html
|
|
428
|
+
# with NO character visualization — output is plain ASCII suitable for copy-pasting
|
|
429
|
+
# into RSpec fixture heredocs. verbose_diff does NOT enable these options.
|
|
430
|
+
show_pp_inp = @show_prettyprint_inputs
|
|
431
|
+
show_pp_exp = show_pp_inp || @show_prettyprint_expected
|
|
432
|
+
show_pp_rec = show_pp_inp || @show_prettyprint_received
|
|
433
|
+
show_pp = show_pp_exp || show_pp_rec
|
|
434
|
+
|
|
435
|
+
if show_pp && comparison_result.is_a?(Canon::Comparison::ComparisonResult)
|
|
436
|
+
orig1, orig2 = comparison_result.original_strings
|
|
437
|
+
if orig1 && orig2
|
|
438
|
+
pp1, pp2 = prettyprint_for_display(orig1, orig2, format)
|
|
439
|
+
output << format_prettyprint_inputs(pp1, pp2,
|
|
440
|
+
show_expected: show_pp_exp,
|
|
441
|
+
show_received: show_pp_rec)
|
|
360
442
|
end
|
|
361
443
|
end
|
|
362
444
|
|
|
@@ -497,41 +579,65 @@ module Canon
|
|
|
497
579
|
end
|
|
498
580
|
|
|
499
581
|
# Format raw/original inputs for display (user-friendly copyable format)
|
|
500
|
-
# Shows the raw file contents before any preprocessing
|
|
582
|
+
# Shows the raw file contents before any preprocessing.
|
|
583
|
+
#
|
|
584
|
+
# Use +show_expected:+ and +show_received:+ to control which side is
|
|
585
|
+
# rendered. Both default to +true+ so existing callers are unaffected.
|
|
586
|
+
# Pass +show_expected: false+ to suppress the fixture/expected block while
|
|
587
|
+
# still showing the received output (useful when the fixture is very long
|
|
588
|
+
# and the user only wants to see what the generator produced).
|
|
501
589
|
#
|
|
502
|
-
# @param raw1 [String] First raw input string
|
|
503
|
-
# @param raw2 [String] Second raw input string
|
|
590
|
+
# @param raw1 [String] First raw input string (expected / fixture)
|
|
591
|
+
# @param raw2 [String] Second raw input string (received / actual)
|
|
592
|
+
# @param show_expected [Boolean] Render the EXPECTED block
|
|
593
|
+
# @param show_received [Boolean] Render the RECEIVED block
|
|
504
594
|
# @return [String] Formatted display of raw inputs
|
|
505
|
-
def format_raw_inputs(raw1, raw2)
|
|
595
|
+
def format_raw_inputs(raw1, raw2, show_expected: true, show_received: true)
|
|
506
596
|
return "" if raw1.nil? || raw2.nil?
|
|
597
|
+
return "" unless show_expected || show_received
|
|
507
598
|
|
|
508
599
|
output = []
|
|
509
600
|
output << ""
|
|
510
601
|
output << colorize("=== ORIGINAL INPUTS (Raw) ===", :cyan, :bold)
|
|
511
602
|
output << ""
|
|
512
|
-
output << colorize("EXPECTED:", :yellow, :bold)
|
|
513
|
-
output << ("-" * 70)
|
|
514
|
-
output << raw1
|
|
515
|
-
output << ""
|
|
516
|
-
output << colorize("RECEIVED:", :yellow, :bold)
|
|
517
|
-
output << ("-" * 70)
|
|
518
|
-
output << raw2
|
|
519
|
-
output << ""
|
|
520
|
-
output << ""
|
|
521
603
|
|
|
604
|
+
if show_expected
|
|
605
|
+
output << colorize("EXPECTED:", :yellow, :bold)
|
|
606
|
+
output << ("-" * 70)
|
|
607
|
+
output << raw1
|
|
608
|
+
output << ""
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
if show_received
|
|
612
|
+
output << colorize("RECEIVED:", :yellow, :bold)
|
|
613
|
+
output << ("-" * 70)
|
|
614
|
+
output << raw2
|
|
615
|
+
output << ""
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
output << ""
|
|
522
619
|
output.join("\n")
|
|
523
620
|
end
|
|
524
621
|
|
|
525
622
|
# Format preprocessed inputs for display (what was actually compared)
|
|
526
623
|
# Shows the content after preprocessing (c14n, normalize, format, etc.)
|
|
527
624
|
#
|
|
528
|
-
#
|
|
529
|
-
#
|
|
625
|
+
# Use +show_expected:+ and +show_received:+ to control which side is rendered.
|
|
626
|
+
# Both default to +true+ so existing callers are unaffected.
|
|
627
|
+
# Pass +show_expected: false+ to suppress the fixture/expected block while
|
|
628
|
+
# still showing the preprocessed received output.
|
|
629
|
+
#
|
|
630
|
+
# @param preprocessed1 [String] First preprocessed string (expected / fixture)
|
|
631
|
+
# @param preprocessed2 [String] Second preprocessed string (received / actual)
|
|
530
632
|
# @param preprocessing_info [Symbol, nil] Preprocessing mode (:c14n, :normalize, :format, etc.)
|
|
633
|
+
# @param show_expected [Boolean] Render the EXPECTED block
|
|
634
|
+
# @param show_received [Boolean] Render the RECEIVED block
|
|
531
635
|
# @return [String] Formatted display of preprocessed inputs
|
|
532
636
|
def format_preprocessed_inputs(preprocessed1, preprocessed2,
|
|
533
|
-
preprocessing_info = nil
|
|
637
|
+
preprocessing_info = nil,
|
|
638
|
+
show_expected: true, show_received: true)
|
|
534
639
|
return "" if preprocessed1.nil? || preprocessed2.nil?
|
|
640
|
+
return "" unless show_expected || show_received
|
|
535
641
|
|
|
536
642
|
output = []
|
|
537
643
|
output << ""
|
|
@@ -542,16 +648,22 @@ preprocessing_info = nil)
|
|
|
542
648
|
output << "Preprocessing: #{preprocessing_info}"
|
|
543
649
|
end
|
|
544
650
|
output << ""
|
|
545
|
-
output << colorize("EXPECTED:", :yellow, :bold)
|
|
546
|
-
output << ("-" * 70)
|
|
547
|
-
output << preprocessed1
|
|
548
|
-
output << ""
|
|
549
|
-
output << colorize("RECEIVED:", :yellow, :bold)
|
|
550
|
-
output << ("-" * 70)
|
|
551
|
-
output << preprocessed2
|
|
552
|
-
output << ""
|
|
553
|
-
output << ""
|
|
554
651
|
|
|
652
|
+
if show_expected
|
|
653
|
+
output << colorize("EXPECTED:", :yellow, :bold)
|
|
654
|
+
output << ("-" * 70)
|
|
655
|
+
output << preprocessed1
|
|
656
|
+
output << ""
|
|
657
|
+
end
|
|
658
|
+
|
|
659
|
+
if show_received
|
|
660
|
+
output << colorize("RECEIVED:", :yellow, :bold)
|
|
661
|
+
output << ("-" * 70)
|
|
662
|
+
output << preprocessed2
|
|
663
|
+
output << ""
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
output << ""
|
|
555
667
|
output.join("\n")
|
|
556
668
|
end
|
|
557
669
|
|
|
@@ -561,12 +673,23 @@ preprocessing_info = nil)
|
|
|
561
673
|
# @param character_map_file [String, nil] Path to custom YAML file
|
|
562
674
|
# @param character_definitions [Array<Hash>, nil] Individual character definitions
|
|
563
675
|
# @return [Hash] Final visualization map
|
|
564
|
-
def build_visualization_map(
|
|
676
|
+
def build_visualization_map(character_visualization: true,
|
|
677
|
+
visualization_map: nil,
|
|
678
|
+
character_map_file: nil,
|
|
565
679
|
character_definitions: nil)
|
|
566
680
|
# Priority order:
|
|
681
|
+
# 0. character_visualization: false → return empty map (no substitution)
|
|
567
682
|
# 1. If visualization_map is provided, use it as complete replacement
|
|
568
683
|
# 2. Otherwise, start with defaults and apply customizations
|
|
569
684
|
|
|
685
|
+
# false disables all visualization
|
|
686
|
+
return {} if character_visualization == false
|
|
687
|
+
|
|
688
|
+
# :content_only currently behaves as true (full map)
|
|
689
|
+
# TODO: apply visualization at DOM text-node level pre-serialization,
|
|
690
|
+
# keeping structural indentation whitespace plain.
|
|
691
|
+
# See docs/features/diff-formatting/character-visualization.adoc
|
|
692
|
+
|
|
570
693
|
return visualization_map if visualization_map
|
|
571
694
|
|
|
572
695
|
# Start with defaults
|
|
@@ -644,6 +767,8 @@ differences: [])
|
|
|
644
767
|
|
|
645
768
|
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
646
769
|
|
|
770
|
+
# Apply display preprocessing (format both sides identically before diff)
|
|
771
|
+
doc1, doc2 = apply_display_preprocessing(doc1, doc2, format)
|
|
647
772
|
# Extract differences array and equivalent status from ComparisonResult if needed
|
|
648
773
|
diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
|
|
649
774
|
@comparison_equivalent = differences.equivalent?
|
|
@@ -672,8 +797,340 @@ differences: [])
|
|
|
672
797
|
output.join("\n")
|
|
673
798
|
end
|
|
674
799
|
|
|
675
|
-
#
|
|
676
|
-
#
|
|
800
|
+
# Generate a text-LCS diff against preprocessed lines (pretty_diff mode).
|
|
801
|
+
#
|
|
802
|
+
# This mode bypasses DiffNodeMapper entirely: it applies display_preprocessing
|
|
803
|
+
# to both sides, then runs Diff::LCS.sdiff on the resulting plain-text lines.
|
|
804
|
+
# It is a reliable short-term workaround for #85 (normative changes invisible
|
|
805
|
+
# in :by_line mode when DiffNodeMapper's DOM-address correlation is off).
|
|
806
|
+
#
|
|
807
|
+
# Limitations:
|
|
808
|
+
# - show_diffs :normative / :informative filter is ignored (no DiffNodes)
|
|
809
|
+
# - No inline character highlighting (whole-line granularity only)
|
|
810
|
+
#
|
|
811
|
+
# @param doc1 [String] First document
|
|
812
|
+
# @param doc2 [String] Second document
|
|
813
|
+
# @param format [Symbol] Document format
|
|
814
|
+
# @return [String] Formatted diff output
|
|
815
|
+
def pretty_diff_format(doc1, doc2, format:)
|
|
816
|
+
require "diff/lcs"
|
|
817
|
+
|
|
818
|
+
resolved_format = format
|
|
819
|
+
|
|
820
|
+
format_name = resolved_format.to_s.upcase
|
|
821
|
+
output = []
|
|
822
|
+
output << colorize("Pretty diff (#{format_name} mode):", :cyan, :bold)
|
|
823
|
+
|
|
824
|
+
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
825
|
+
|
|
826
|
+
# Apply display preprocessing — same transforms as by_line_diff
|
|
827
|
+
d1, d2 = apply_display_preprocessing(doc1, doc2, resolved_format)
|
|
828
|
+
|
|
829
|
+
lines1 = d1.lines.map(&:chomp)
|
|
830
|
+
lines2 = d2.lines.map(&:chomp)
|
|
831
|
+
|
|
832
|
+
hunks = ::Diff::LCS.sdiff(lines1, lines2)
|
|
833
|
+
|
|
834
|
+
output << render_pretty_diff(hunks)
|
|
835
|
+
output.join("\n")
|
|
836
|
+
end
|
|
837
|
+
|
|
838
|
+
# Render sdiff hunks with context windowing and colorization.
|
|
839
|
+
#
|
|
840
|
+
# Uses the same context_lines setting as by_line_diff. Changed hunks
|
|
841
|
+
# (action != "=") are expanded by context_lines in each direction; nearby
|
|
842
|
+
# windows are merged; a separator is emitted between non-adjacent blocks.
|
|
843
|
+
#
|
|
844
|
+
# @param hunks [Array<Diff::LCS::ContextChange>] Output of Diff::LCS.sdiff
|
|
845
|
+
# @return [String] Rendered diff lines joined with "\n"
|
|
846
|
+
def render_pretty_diff(hunks)
|
|
847
|
+
# Identify positions of changed hunks
|
|
848
|
+
changed = hunks.each_index.reject { |i| hunks[i].action == "=" }
|
|
849
|
+
|
|
850
|
+
return colorize(" (no differences)", :green) if changed.empty?
|
|
851
|
+
|
|
852
|
+
ctx = [@context_lines || 3, 0].max
|
|
853
|
+
|
|
854
|
+
# Build expanded windows, then merge overlapping/adjacent ones
|
|
855
|
+
windows = changed.map do |pos|
|
|
856
|
+
[
|
|
857
|
+
[pos - ctx, 0].max,
|
|
858
|
+
[pos + ctx, hunks.length - 1].min,
|
|
859
|
+
]
|
|
860
|
+
end
|
|
861
|
+
|
|
862
|
+
merged = []
|
|
863
|
+
windows.each do |lo, hi|
|
|
864
|
+
if merged.empty? || lo > merged.last[1] + 1
|
|
865
|
+
merged << [lo, hi]
|
|
866
|
+
else
|
|
867
|
+
merged.last[1] = [merged.last[1], hi].max
|
|
868
|
+
end
|
|
869
|
+
end
|
|
870
|
+
|
|
871
|
+
lines = []
|
|
872
|
+
merged.each_with_index do |(lo, hi), block_idx|
|
|
873
|
+
# Separator between non-adjacent blocks
|
|
874
|
+
if block_idx.positive?
|
|
875
|
+
lines << colorize("--- ---", :cyan)
|
|
876
|
+
elsif lo.positive?
|
|
877
|
+
lines << colorize("--- ---", :cyan)
|
|
878
|
+
end
|
|
879
|
+
|
|
880
|
+
(lo..hi).each do |i|
|
|
881
|
+
hunk = hunks[i]
|
|
882
|
+
case hunk.action
|
|
883
|
+
when "="
|
|
884
|
+
lines << (@use_color ? "\e[0m #{hunk.old_element}" : " #{hunk.old_element}")
|
|
885
|
+
when "-"
|
|
886
|
+
lines << colorize("- #{hunk.old_element}", :red)
|
|
887
|
+
when "+"
|
|
888
|
+
lines << colorize("+ #{hunk.new_element}", :green)
|
|
889
|
+
when "!"
|
|
890
|
+
lines << colorize("- #{hunk.old_element}", :red)
|
|
891
|
+
lines << colorize("+ #{hunk.new_element}", :green)
|
|
892
|
+
end
|
|
893
|
+
end
|
|
894
|
+
end
|
|
895
|
+
|
|
896
|
+
lines.join("\n")
|
|
897
|
+
end
|
|
898
|
+
|
|
899
|
+
# Apply display preprocessing to both documents before the line diff.
|
|
900
|
+
#
|
|
901
|
+
# This normalizes both sides through the same formatter so that structural
|
|
902
|
+
# formatting differences (indentation, line breaks) do not confuse the LCS
|
|
903
|
+
# algorithm. Equivalence detection is never affected.
|
|
904
|
+
#
|
|
905
|
+
# NOTE: Character visualization (e.g. U+00A0 → ░) is applied by the
|
|
906
|
+
# line-diff formatters to the output lines *after* this step. Because the
|
|
907
|
+
# pretty-printer introduces only ASCII U+0020 spaces and U+000A newlines
|
|
908
|
+
# for structural indentation, and neither of those is in Canon's default
|
|
909
|
+
# visualization map, pretty-printer whitespace is never misvisualized.
|
|
910
|
+
#
|
|
911
|
+
# Future constraint: if the visualization map is extended to cover common
|
|
912
|
+
# ASCII whitespace, this method must move visualization to a DOM-level pass
|
|
913
|
+
# (walk text nodes before serialization) to keep structural and content
|
|
914
|
+
# whitespace separate. See docs/features/diff-formatting/display-preprocessing.adoc.
|
|
915
|
+
#
|
|
916
|
+
# @param doc1 [String] First document
|
|
917
|
+
# @param doc2 [String] Second document
|
|
918
|
+
# @param format [Symbol] Document format (:xml, :html, :html4, :html5, ...)
|
|
919
|
+
# @return [Array<String, String>] Preprocessed [doc1, doc2]
|
|
920
|
+
def apply_display_preprocessing(doc1, doc2, format)
|
|
921
|
+
case @display_preprocessing
|
|
922
|
+
when :pretty_print
|
|
923
|
+
apply_pretty_print(doc1, doc2, format)
|
|
924
|
+
when :normalize_pretty_print
|
|
925
|
+
apply_normalize_pretty_print(doc1, doc2, format)
|
|
926
|
+
when :c14n
|
|
927
|
+
apply_c14n(doc1, doc2, format)
|
|
928
|
+
else
|
|
929
|
+
[doc1, doc2]
|
|
930
|
+
end
|
|
931
|
+
end
|
|
932
|
+
|
|
933
|
+
# Apply mixed-content-aware normalization + visualization to both documents.
|
|
934
|
+
#
|
|
935
|
+
# Uses PrettyPrinter::XmlNormalized, which breaks every XML element onto
|
|
936
|
+
# its own line while preserving and visualizing boundary content whitespace.
|
|
937
|
+
# See PrettyPrinter::XmlNormalized for the full rationale.
|
|
938
|
+
#
|
|
939
|
+
# Whitespace classification is driven by three element-name lists:
|
|
940
|
+
# - preserve_whitespace_elements → every char significant (e.g. pre, code)
|
|
941
|
+
# - collapse_whitespace_elements → presence matters, form collapses (e.g. p, li)
|
|
942
|
+
# - strip_whitespace_elements → all whitespace dropped (explicit blacklist)
|
|
943
|
+
#
|
|
944
|
+
# For XML the lists default to empty (all insensitive); for HTML built-in
|
|
945
|
+
# defaults cover the common cases. Callers supply format-specific lists via
|
|
946
|
+
# Canon::Config or DiffFormatter constructor keyword arguments.
|
|
947
|
+
def apply_normalize_pretty_print(doc1, doc2, format)
|
|
948
|
+
return [doc1, doc2] unless %i[xml html html4 html5].include?(format)
|
|
949
|
+
|
|
950
|
+
indent_type_str = @pretty_printer_indent_type.to_s
|
|
951
|
+
vis_map = @visualization_map.empty? ? DiffFormatter::DEFAULT_VISUALIZATION_MAP : @visualization_map
|
|
952
|
+
|
|
953
|
+
require "canon/pretty_printer/xml_normalized"
|
|
954
|
+
# TODO: implement HtmlNormalized for HTML formats; XmlNormalized works via
|
|
955
|
+
# Nokogiri's HTML-aware parse for now.
|
|
956
|
+
#
|
|
957
|
+
# Create side-specific printers so that the pretty_printed_expected and
|
|
958
|
+
# pretty_printed_received flags drop structural \n indentation nodes only
|
|
959
|
+
# on the side that is actually pretty-printed. If both sides share the
|
|
960
|
+
# same settings, two identical printer instances are created (cheap).
|
|
961
|
+
shared_args = {
|
|
962
|
+
indent: @pretty_printer_indent,
|
|
963
|
+
indent_type: indent_type_str,
|
|
964
|
+
visualization_map: vis_map,
|
|
965
|
+
preserve_whitespace_elements: @preserve_whitespace_elements,
|
|
966
|
+
collapse_whitespace_elements: @collapse_whitespace_elements,
|
|
967
|
+
strip_whitespace_elements: @strip_whitespace_elements,
|
|
968
|
+
sort_attributes: @pretty_printer_sort_attributes,
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
printer_expected = Canon::PrettyPrinter::XmlNormalized.new(
|
|
972
|
+
**shared_args,
|
|
973
|
+
pretty_printed: @pretty_printed_expected,
|
|
974
|
+
)
|
|
975
|
+
printer_received = Canon::PrettyPrinter::XmlNormalized.new(
|
|
976
|
+
**shared_args,
|
|
977
|
+
pretty_printed: @pretty_printed_received,
|
|
978
|
+
)
|
|
979
|
+
|
|
980
|
+
[safe_format(printer_expected, doc1), safe_format(printer_received, doc2)]
|
|
981
|
+
end
|
|
982
|
+
|
|
983
|
+
# Pretty-print both documents using a format-appropriate pretty printer.
|
|
984
|
+
#
|
|
985
|
+
# * HTML formats (:html, :html4, :html5) use +Canon::PrettyPrinter::Html+
|
|
986
|
+
# which is Nokogiri::HTML5-aware and correctly handles void elements,
|
|
987
|
+
# optional end tags, and HTML5 serialization rules.
|
|
988
|
+
# * XML uses +Canon::PrettyPrinter::Xml+.
|
|
989
|
+
# * Other formats fall through unchanged.
|
|
990
|
+
def apply_pretty_print(doc1, doc2, format)
|
|
991
|
+
return [doc1, doc2] unless %i[xml html html4 html5].include?(format)
|
|
992
|
+
|
|
993
|
+
indent_type_str = @pretty_printer_indent_type.to_s
|
|
994
|
+
|
|
995
|
+
printer = if %i[html html4 html5].include?(format)
|
|
996
|
+
require "canon/pretty_printer/html"
|
|
997
|
+
Canon::PrettyPrinter::Html.new(
|
|
998
|
+
indent: @pretty_printer_indent,
|
|
999
|
+
indent_type: indent_type_str,
|
|
1000
|
+
)
|
|
1001
|
+
else
|
|
1002
|
+
require "canon/pretty_printer/xml"
|
|
1003
|
+
Canon::PrettyPrinter::Xml.new(
|
|
1004
|
+
indent: @pretty_printer_indent,
|
|
1005
|
+
indent_type: indent_type_str,
|
|
1006
|
+
)
|
|
1007
|
+
end
|
|
1008
|
+
|
|
1009
|
+
[safe_format(printer, doc1), safe_format(printer, doc2)]
|
|
1010
|
+
end
|
|
1011
|
+
|
|
1012
|
+
# Normalize both documents for display using canonical serialization.
|
|
1013
|
+
#
|
|
1014
|
+
# * HTML formats use Nokogiri's HTML5 serializer as a consistent canonical
|
|
1015
|
+
# form (attribute order, void elements, etc. are standardized).
|
|
1016
|
+
# * XML uses the XML C14N algorithm (alphabetical attributes, namespace
|
|
1017
|
+
# normalization, etc.).
|
|
1018
|
+
# * Other formats fall through unchanged.
|
|
1019
|
+
#
|
|
1020
|
+
# @param doc1 [String] First document
|
|
1021
|
+
# @param doc2 [String] Second document
|
|
1022
|
+
# @param format [Symbol] Document format (:xml, :html, :html4, :html5, ...)
|
|
1023
|
+
# @return [Array<String, String>] Canonicalized [doc1, doc2]
|
|
1024
|
+
def apply_c14n(doc1, doc2, format = :xml)
|
|
1025
|
+
if %i[html html4 html5].include?(format)
|
|
1026
|
+
[safe_html_normalize(doc1), safe_html_normalize(doc2)]
|
|
1027
|
+
else
|
|
1028
|
+
require "canon/xml/c14n"
|
|
1029
|
+
[safe_c14n(doc1), safe_c14n(doc2)]
|
|
1030
|
+
end
|
|
1031
|
+
end
|
|
1032
|
+
|
|
1033
|
+
# Pretty-print document strings for the fixture-ready display section.
|
|
1034
|
+
#
|
|
1035
|
+
# Runs independently of the +display_preprocessing+ setting — it is a
|
|
1036
|
+
# standalone display feature, not part of the diff pipeline.
|
|
1037
|
+
#
|
|
1038
|
+
# The output contains NO character visualization so it can be copy-pasted
|
|
1039
|
+
# directly into RSpec heredoc fixtures.
|
|
1040
|
+
#
|
|
1041
|
+
# @param doc1 [String] First document (expected / fixture)
|
|
1042
|
+
# @param doc2 [String] Second document (received / actual)
|
|
1043
|
+
# @param format [Symbol] Document format (:xml, :html, :html4, :html5, ...)
|
|
1044
|
+
# @return [Array<String, String>] Pretty-printed [doc1, doc2]
|
|
1045
|
+
def prettyprint_for_display(doc1, doc2, format)
|
|
1046
|
+
indent_type_str = @pretty_printer_indent_type.to_s
|
|
1047
|
+
|
|
1048
|
+
if %i[html html4 html5].include?(format)
|
|
1049
|
+
require "canon/pretty_printer/html"
|
|
1050
|
+
printer = Canon::PrettyPrinter::Html.new(
|
|
1051
|
+
indent: @pretty_printer_indent,
|
|
1052
|
+
indent_type: indent_type_str,
|
|
1053
|
+
)
|
|
1054
|
+
elsif format == :xml
|
|
1055
|
+
require "canon/pretty_printer/xml"
|
|
1056
|
+
printer = Canon::PrettyPrinter::Xml.new(
|
|
1057
|
+
indent: @pretty_printer_indent,
|
|
1058
|
+
indent_type: indent_type_str,
|
|
1059
|
+
)
|
|
1060
|
+
else
|
|
1061
|
+
return [doc1, doc2]
|
|
1062
|
+
end
|
|
1063
|
+
|
|
1064
|
+
[safe_format(printer, doc1), safe_format(printer, doc2)]
|
|
1065
|
+
end
|
|
1066
|
+
|
|
1067
|
+
# Format fixture-ready pretty-printed inputs for display.
|
|
1068
|
+
#
|
|
1069
|
+
# Unlike +format_preprocessed_inputs+, this section outputs plain ASCII
|
|
1070
|
+
# with NO character visualization — the content is intended for
|
|
1071
|
+
# copy-pasting into RSpec heredoc fixtures.
|
|
1072
|
+
#
|
|
1073
|
+
# @param pp1 [String] First pretty-printed string (expected / fixture)
|
|
1074
|
+
# @param pp2 [String] Second pretty-printed string (received / actual)
|
|
1075
|
+
# @param show_expected [Boolean] Render the EXPECTED block
|
|
1076
|
+
# @param show_received [Boolean] Render the RECEIVED block
|
|
1077
|
+
# @return [String] Formatted display of pretty-printed inputs
|
|
1078
|
+
def format_prettyprint_inputs(pp1, pp2, show_expected: true,
|
|
1079
|
+
show_received: true)
|
|
1080
|
+
return "" if pp1.nil? || pp2.nil?
|
|
1081
|
+
return "" unless show_expected || show_received
|
|
1082
|
+
|
|
1083
|
+
output = []
|
|
1084
|
+
output << ""
|
|
1085
|
+
output << colorize("=== PRETTY-PRINTED INPUTS (Fixture-ready) ===",
|
|
1086
|
+
:cyan, :bold)
|
|
1087
|
+
output << ""
|
|
1088
|
+
|
|
1089
|
+
if show_expected
|
|
1090
|
+
output << colorize("EXPECTED:", :yellow, :bold)
|
|
1091
|
+
output << ("-" * 70)
|
|
1092
|
+
output << pp1
|
|
1093
|
+
output << ""
|
|
1094
|
+
end
|
|
1095
|
+
|
|
1096
|
+
if show_received
|
|
1097
|
+
output << colorize("RECEIVED:", :yellow, :bold)
|
|
1098
|
+
output << ("-" * 70)
|
|
1099
|
+
output << pp2
|
|
1100
|
+
output << ""
|
|
1101
|
+
end
|
|
1102
|
+
|
|
1103
|
+
output << ""
|
|
1104
|
+
output.join("\n")
|
|
1105
|
+
end
|
|
1106
|
+
|
|
1107
|
+
# Format a document through the pretty-printer, falling back to the
|
|
1108
|
+
# original string on any parse error.
|
|
1109
|
+
def safe_format(printer, doc)
|
|
1110
|
+
printer.format(doc.to_s)
|
|
1111
|
+
rescue StandardError
|
|
1112
|
+
doc.to_s
|
|
1113
|
+
end
|
|
1114
|
+
|
|
1115
|
+
# Canonicalize a document via C14N, falling back on error.
|
|
1116
|
+
def safe_c14n(doc)
|
|
1117
|
+
Canon::Xml::C14n.canonicalize(doc.to_s, with_comments: true)
|
|
1118
|
+
rescue StandardError
|
|
1119
|
+
doc.to_s
|
|
1120
|
+
end
|
|
1121
|
+
|
|
1122
|
+
# Serialize HTML through Nokogiri's HTML5 serializer for a canonical form.
|
|
1123
|
+
# Normalizes attribute order, void elements, and optional end tags consistently.
|
|
1124
|
+
# Falls back to the original string on any parse error.
|
|
1125
|
+
def safe_html_normalize(doc)
|
|
1126
|
+
require "nokogiri"
|
|
1127
|
+
Nokogiri::HTML5(doc.to_s).to_html(encoding: "UTF-8")
|
|
1128
|
+
rescue StandardError
|
|
1129
|
+
doc.to_s
|
|
1130
|
+
end
|
|
1131
|
+
|
|
1132
|
+
# Colorize text if color is enabled.
|
|
1133
|
+
# RSpec-aware: resets any existing ANSI codes before applying new colors.
|
|
677
1134
|
def colorize(text, *colors)
|
|
678
1135
|
return text unless @use_color
|
|
679
1136
|
|