canon 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +31 -149
  3. data/README.adoc +9 -0
  4. data/docs/advanced/semantic-diff-report.adoc +31 -0
  5. data/docs/features/configuration-profiles.adoc +4 -2
  6. data/docs/features/match-options/html-policies.adoc +2 -0
  7. data/docs/features/match-options/index.adoc +40 -0
  8. data/docs/guides/choosing-configuration.adoc +12 -1
  9. data/docs/reference/cli-options.adoc +3 -0
  10. data/docs/reference/options-across-interfaces.adoc +7 -1
  11. data/docs/understanding/formats/html.adoc +9 -2
  12. data/lib/canon/cli.rb +4 -0
  13. data/lib/canon/commands/diff_command.rb +1 -0
  14. data/lib/canon/comparison/comparison_result.rb +79 -0
  15. data/lib/canon/comparison/html_comparator.rb +92 -11
  16. data/lib/canon/comparison/markup_comparator.rb +19 -0
  17. data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
  18. data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
  19. data/lib/canon/comparison/match_options.rb +23 -2
  20. data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
  21. data/lib/canon/comparison/xml_comparator/child_comparison.rb +6 -0
  22. data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
  23. data/lib/canon/comparison/xml_comparator.rb +80 -4
  24. data/lib/canon/comparison/xml_node_comparison.rb +29 -3
  25. data/lib/canon/comparison.rb +84 -22
  26. data/lib/canon/config/env_schema.rb +2 -1
  27. data/lib/canon/config/profiles/metanorma.yml +3 -0
  28. data/lib/canon/config.rb +51 -5
  29. data/lib/canon/diff/diff_classifier.rb +18 -2
  30. data/lib/canon/diff/diff_line_builder.rb +9 -8
  31. data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
  32. data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
  33. data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
  34. data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
  35. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +65 -17
  36. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +17 -0
  37. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
  38. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
  39. data/lib/canon/diff_formatter.rb +57 -173
  40. data/lib/canon/html/data_model.rb +10 -4
  41. data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
  42. data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
  43. data/lib/canon/version.rb +1 -1
  44. data/lib/canon/xml/c14n.rb +59 -5
  45. data/lib/canon/xml/element_matcher.rb +3 -0
  46. data/lib/canon/xml/node.rb +8 -1
  47. data/lib/canon/xml/nodes/comment_node.rb +4 -0
  48. data/lib/canon/xml/nodes/element_node.rb +4 -0
  49. data/lib/canon/xml/nodes/text_node.rb +4 -0
  50. data/lib/canon/xml/sax_builder.rb +11 -2
  51. data/lib/canon/xml/xpath_engine.rb +238 -0
  52. metadata +6 -2
@@ -13,8 +13,10 @@ module Canon
13
13
  # @param use_color [Boolean] Whether to use colors
14
14
  # @param compact [Boolean] Whether to serialize element nodes as compact XML
15
15
  # @return [String] Formatted dimension details
16
+ # rubocop:disable Lint/UnusedMethodArgument
16
17
  def self.format_dimension_details(diff, use_color, compact: false,
17
18
  expand_difference: false)
19
+ # rubocop:enable Lint/UnusedMethodArgument
18
20
  dimension = extract_dimension(diff)
19
21
 
20
22
  case dimension
@@ -23,8 +25,7 @@ expand_difference: false)
23
25
  when :namespace_declarations
24
26
  format_namespace_declarations_details(diff, use_color)
25
27
  when :element_structure
26
- format_element_structure_details(diff, use_color,
27
- expand_difference: expand_difference)
28
+ format_element_structure_details(diff, use_color)
28
29
  when :attribute_presence
29
30
  format_attribute_presence_details(diff, use_color)
30
31
  when :attribute_values
@@ -163,37 +164,70 @@ expand_difference: false)
163
164
 
164
165
  # Format element structure differences
165
166
  #
167
+ # Produces compact XML for both sides so the user can see attributes
168
+ # and text content, not just the tag name. Handles nil nodes that
169
+ # arise from insertions/deletions.
170
+ #
166
171
  # @param diff [DiffNode, Hash] Difference node
167
172
  # @param use_color [Boolean] Whether to use colors
168
173
  # @return [Array] Tuple of [detail1, detail2, changes]
169
- def self.format_element_structure_details(diff, use_color,
170
- expand_difference: false)
174
+ def self.format_element_structure_details(diff, use_color)
171
175
  require_relative "color_helper"
172
176
  require_relative "node_utils"
173
177
 
174
178
  node1 = extract_node1(diff)
175
179
  node2 = extract_node2(diff)
176
180
 
177
- name1 = NodeUtils.get_element_name_for_display(node1)
178
- name2 = NodeUtils.get_element_name_for_display(node2)
181
+ has1 = !node1.nil?
182
+ has2 = !node2.nil?
183
+
184
+ if has1 && has2
185
+ # Both elements present — show compact XML for both
186
+ compact1 = NodeUtils.serialize_node_compact(node1)
187
+ compact2 = NodeUtils.serialize_node_compact(node2)
188
+ detail1 = ColorHelper.colorize(compact1, :red, use_color)
189
+ detail2 = ColorHelper.colorize(compact2, :green, use_color)
190
+
191
+ name1 = NodeUtils.get_element_name_for_display(node1)
192
+ name2 = NodeUtils.get_element_name_for_display(node2)
179
193
 
180
- if expand_difference
181
- display1 = NodeUtils.serialize_node_compact(node1)
182
- display2 = NodeUtils.serialize_node_compact(node2)
183
- detail1 = ColorHelper.colorize(display1, :red, use_color)
184
- detail2 = ColorHelper.colorize(display2, :green, use_color)
194
+ changes = if name1 == name2
195
+ "Element <#{name1}> structure changed (children differ)"
196
+ else
197
+ build_structure_change_text(compact1, compact2,
198
+ use_color)
199
+ end
200
+ elsif has1
201
+ # Element removed
202
+ compact1 = NodeUtils.serialize_node_compact(node1)
203
+ detail1 = ColorHelper.colorize(compact1, :red, use_color)
204
+ detail2 = ColorHelper.colorize("(not present)", :green, use_color)
205
+ changes = "Element removed: #{ColorHelper.colorize(compact1, :red,
206
+ use_color)}"
185
207
  else
186
- detail1 = "<#{ColorHelper.colorize(name1, :red, use_color)}>"
187
- detail2 = "<#{ColorHelper.colorize(name2, :green, use_color)}>"
208
+ # Element added
209
+ compact2 = NodeUtils.serialize_node_compact(node2)
210
+ detail1 = ColorHelper.colorize("(not present)", :red, use_color)
211
+ detail2 = ColorHelper.colorize(compact2, :green, use_color)
212
+ changes = "Element added: #{ColorHelper.colorize(compact2, :green,
213
+ use_color)}"
188
214
  end
189
215
 
190
- changes = "Element differs: #{ColorHelper.colorize(name1, :red,
191
- use_color)} → " \
192
- "#{ColorHelper.colorize(name2, :green, use_color)}"
193
-
194
216
  [detail1, detail2, changes]
195
217
  end
196
218
 
219
+ # Build human-readable change text for element structure diffs
220
+ #
221
+ # @param display1 [String] Serialized expected element
222
+ # @param display2 [String] Serialized actual element
223
+ # @param use_color [Boolean] Whether to use colors
224
+ # @return [String] Change description
225
+ def self.build_structure_change_text(display1, display2, use_color)
226
+ "Element structure changed: " \
227
+ "#{ColorHelper.colorize(display1, :red, use_color)} → " \
228
+ "#{ColorHelper.colorize(display2, :green, use_color)}"
229
+ end
230
+
197
231
  # Format attribute presence differences
198
232
  #
199
233
  # @param diff [DiffNode, Hash] Difference node
@@ -351,6 +385,20 @@ expand_difference: false)
351
385
  detail2 = ColorHelper.colorize(
352
386
  TextUtils.visualize_whitespace(text2), :green, use_color
353
387
  )
388
+ elsif TextUtils.ambiguous_text_pair?(text1, text2) &&
389
+ (NodeUtils.parent_of(node1) || NodeUtils.parent_of(node2))
390
+ # Both sides render to empty/whitespace-only strings, which are
391
+ # indistinguishable after JSON quoting. Fall back to each side's
392
+ # parent element serialized compactly, with whitespace visualized
393
+ # so the reader can see the structural contrast.
394
+ ctx1 = NodeUtils.serialize_node_compact(NodeUtils.parent_of(node1))
395
+ ctx2 = NodeUtils.serialize_node_compact(NodeUtils.parent_of(node2))
396
+ detail1 = ColorHelper.colorize(
397
+ TextUtils.visualize_whitespace(ctx1), :red, use_color
398
+ )
399
+ detail2 = ColorHelper.colorize(
400
+ TextUtils.visualize_whitespace(ctx2), :green, use_color
401
+ )
354
402
  elsif compact && (node1.is_a?(Canon::Xml::Nodes::ElementNode) ||
355
403
  node2.is_a?(Canon::Xml::Nodes::ElementNode))
356
404
  # In compact mode with element nodes, display as raw XML without
@@ -318,6 +318,23 @@ module Canon
318
318
  end
319
319
  end
320
320
 
321
+ # Return the parent of a node, or nil, regardless of the node API.
322
+ #
323
+ # Canon::Xml nodes expose +parent+; some Nokogiri-shaped nodes expose
324
+ # +parent_node+. This helper abstracts over both.
325
+ #
326
+ # @param node [Object] Node to query
327
+ # @return [Object, nil] Parent node or nil
328
+ def self.parent_of(node)
329
+ return nil unless node
330
+
331
+ if node.respond_to?(:parent)
332
+ node.parent
333
+ elsif node.respond_to?(:parent_node)
334
+ node.parent_node
335
+ end
336
+ end
337
+
321
338
  # Check if node is inside a preserve-whitespace element
322
339
  #
323
340
  # @param node [Object] Node to check
@@ -83,6 +83,35 @@ module Canon
83
83
  end.join
84
84
  end
85
85
 
86
+ # Whether two text values would be visually indistinguishable when
87
+ # rendered through the standard JSON-quoting path.
88
+ #
89
+ # Covers three cases that collapse to near-identical short strings
90
+ # like +""+ / +" "+ / +":"+ / +":"+:
91
+ # * both sides empty
92
+ # * both sides whitespace-only (possibly with different whitespace
93
+ # that JSON.generate preserves verbatim but a reader cannot tell
94
+ # apart from plain spaces)
95
+ # * both sides equal (the comparator reported a diff based on
96
+ # something the text-only extraction does not surface — e.g. a
97
+ # sibling text node that exists on one side and not the other)
98
+ #
99
+ # Callers should fall back to rendering parent-element context
100
+ # instead.
101
+ #
102
+ # @param text1 [String, nil]
103
+ # @param text2 [String, nil]
104
+ # @return [Boolean]
105
+ def self.ambiguous_text_pair?(text1, text2)
106
+ blank_or_whitespace = ->(t) {
107
+ t.nil? || t.empty? || t.match?(/\A\s+\z/)
108
+ }
109
+ return true if blank_or_whitespace.call(text1) &&
110
+ blank_or_whitespace.call(text2)
111
+
112
+ text1 == text2
113
+ end
114
+
86
115
  # Check if text contains non-ASCII or non-printable characters
87
116
  #
88
117
  # @param text [String] Text to check
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "paint"
4
+ require "diff/lcs"
5
+
6
+ module Canon
7
+ class DiffFormatter
8
+ # Handles the pretty_diff rendering pipeline for text-LCS diffs.
9
+ #
10
+ # Bypasses DiffNodeMapper entirely — runs Diff::LCS.sdiff on plain-text
11
+ # lines and renders with context windowing and colorization.
12
+ class PrettyDiffFormatter
13
+ def initialize(use_color:, context_lines:)
14
+ @use_color = use_color
15
+ @context_lines = context_lines
16
+ end
17
+
18
+ # Format a text-LCS diff between two documents.
19
+ #
20
+ # @param doc1 [String, nil] First document (already preprocessed)
21
+ # @param doc2 [String, nil] Second document (already preprocessed)
22
+ # @param format [Symbol] Document format for display name
23
+ # @return [String] Formatted diff output
24
+ def format(doc1, doc2, format:)
25
+ format_name = format.to_s.upcase
26
+
27
+ output = []
28
+ output << colorize("Pretty diff (#{format_name} mode):", :cyan, :bold)
29
+
30
+ return output.join("\n") if doc1.nil? || doc2.nil?
31
+
32
+ lines1 = doc1.lines.map(&:chomp)
33
+ lines2 = doc2.lines.map(&:chomp)
34
+
35
+ hunks = ::Diff::LCS.sdiff(lines1, lines2)
36
+
37
+ output << render_pretty_diff(hunks)
38
+ output.join("\n")
39
+ end
40
+
41
+ private
42
+
43
+ # Render sdiff hunks with context windowing and colorization.
44
+ #
45
+ # Uses context_lines setting for expansion. Changed hunks
46
+ # (action != "=") are expanded by context_lines in each direction;
47
+ # nearby windows are merged; a separator is emitted between
48
+ # non-adjacent blocks.
49
+ #
50
+ # @param hunks [Array<Diff::LCS::ContextChange>] Output of Diff::LCS.sdiff
51
+ # @return [String] Rendered diff lines joined with "\n"
52
+ def render_pretty_diff(hunks)
53
+ changed = hunks.each_index.reject { |i| hunks[i].action == "=" }
54
+
55
+ return colorize(" (no differences)", :green) if changed.empty?
56
+
57
+ ctx = [@context_lines || 3, 0].max
58
+
59
+ windows = changed.map do |pos|
60
+ [
61
+ [pos - ctx, 0].max,
62
+ [pos + ctx, hunks.length - 1].min,
63
+ ]
64
+ end
65
+
66
+ merged = []
67
+ windows.each do |lo, hi|
68
+ if merged.empty? || lo > merged.last[1] + 1
69
+ merged << [lo, hi]
70
+ else
71
+ merged.last[1] = [merged.last[1], hi].max
72
+ end
73
+ end
74
+
75
+ lines = []
76
+ merged.each_with_index do |(lo, hi), block_idx|
77
+ if block_idx.positive?
78
+ lines << colorize("--- ---", :cyan)
79
+ elsif lo.positive?
80
+ lines << colorize("--- ---", :cyan)
81
+ end
82
+
83
+ (lo..hi).each do |i|
84
+ hunk = hunks[i]
85
+ case hunk.action
86
+ when "="
87
+ lines << (@use_color ? "\e[0m #{hunk.old_element}" : " #{hunk.old_element}")
88
+ when "-"
89
+ lines << colorize("- #{hunk.old_element}", :red)
90
+ when "+"
91
+ lines << colorize("+ #{hunk.new_element}", :green)
92
+ when "!"
93
+ lines << colorize("- #{hunk.old_element}", :red)
94
+ lines << colorize("+ #{hunk.new_element}", :green)
95
+ end
96
+ end
97
+ end
98
+
99
+ lines.join("\n")
100
+ end
101
+
102
+ def colorize(text, *colors)
103
+ return text unless @use_color
104
+
105
+ "\e[0m#{Paint[text, *colors]}"
106
+ end
107
+ end
108
+ end
109
+ end
@@ -7,6 +7,9 @@ require_relative "diff/diff_block"
7
7
  require_relative "diff/diff_context"
8
8
  require_relative "diff/diff_report"
9
9
  require_relative "diff_formatter/debug_output"
10
+ require_relative "diff_formatter/by_line_formatter"
11
+ require_relative "diff_formatter/by_object_formatter"
12
+ require_relative "diff_formatter/pretty_diff_formatter"
10
13
 
11
14
  module Canon
12
15
  # Formatter for displaying semantic differences with color support
@@ -301,17 +304,26 @@ module Canon
301
304
  # @param html_version [Symbol, nil] HTML version (:html4 or :html5)
302
305
  # @return [String] Formatted output
303
306
  def format(differences, format, doc1: nil, doc2: nil, html_version: nil)
304
- # In by-line mode, always use by-line diff
307
+ # In by-line mode with both docs present, always use by-line diff
305
308
  if @mode == :by_line && doc1 && doc2
306
- return by_line_diff(doc1, doc2, format: format,
307
- html_version: html_version,
308
- differences: differences)
309
+ doc1, doc2 = apply_display_preprocessing(doc1, doc2, format)
310
+ # rubocop:disable Layout/HashAlignment
311
+ return by_line_formatter.format(doc1, doc2, format: format,
312
+ html_version: html_version,
313
+ differences: differences)
314
+ # rubocop:enable Layout/HashAlignment
309
315
  end
310
316
 
311
317
  # In pretty_diff mode, always use text-LCS diff (bypasses DiffNodeMapper).
312
- # pretty_diff_format handles nil doc1/doc2 itself (emits header only).
313
318
  if @mode == :pretty_diff
314
- return pretty_diff_format(doc1, doc2, format: format)
319
+ d1, d2 = if doc1 && doc2
320
+ apply_display_preprocessing(doc1, doc2,
321
+ format)
322
+ else
323
+ [doc1,
324
+ doc2]
325
+ end
326
+ return pretty_diff_formatter.format(d1, d2, format: format)
315
327
  end
316
328
 
317
329
  no_diffs = if differences.respond_to?(:equivalent?)
@@ -323,12 +335,26 @@ module Canon
323
335
 
324
336
  case @mode
325
337
  when :by_line
326
- by_line_diff(doc1, doc2, format: format, html_version: html_version,
327
- differences: differences)
338
+ if doc1 && doc2
339
+ doc1, doc2 = apply_display_preprocessing(doc1, doc2,
340
+ format)
341
+ end
342
+ # rubocop:disable Layout/HashAlignment
343
+ by_line_formatter.format(doc1, doc2, format: format,
344
+ html_version: html_version,
345
+ differences: differences)
346
+ # rubocop:enable Layout/HashAlignment
328
347
  when :pretty_diff
329
- pretty_diff_format(doc1, doc2, format: format)
348
+ d1, d2 = if doc1 && doc2
349
+ apply_display_preprocessing(doc1, doc2,
350
+ format)
351
+ else
352
+ [doc1,
353
+ doc2]
354
+ end
355
+ pretty_diff_formatter.format(d1, d2, format: format)
330
356
  else
331
- by_object_diff(differences, format)
357
+ by_object_formatter.format(differences, format)
332
358
  end
333
359
  end
334
360
 
@@ -685,10 +711,8 @@ module Canon
685
711
  # false disables all visualization
686
712
  return {} if character_visualization == false
687
713
 
688
- # :content_only currently behaves as true (full map)
689
- # TODO: apply visualization at DOM text-node level pre-serialization,
690
- # keeping structural indentation whitespace plain.
691
- # See docs/features/diff-formatting/character-visualization.adoc
714
+ # :content_only builds the full map; the by_line formatter applies
715
+ # it only to content portions, leaving structural indentation plain.
692
716
 
693
717
  return visualization_map if visualization_map
694
718
 
@@ -723,177 +747,37 @@ module Canon
723
747
  colorize("#{emoji}#{message}\n", :green, :bold)
724
748
  end
725
749
 
726
- # Generate by-object diff with tree visualization
727
- # Delegates to format-specific by-object formatters
728
- def by_object_diff(differences, format)
729
- output = []
730
- output << colorize("Visual Diff:", :cyan, :bold)
731
-
732
- # Extract differences array from ComparisonResult if needed
733
- diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
734
- differences.differences
735
- else
736
- differences
737
- end
738
-
739
- # Delegate to format-specific formatter
740
- formatter = ByObject::BaseFormatter.for_format(
741
- format,
750
+ # Factory methods for mode-specific formatters
751
+
752
+ # @return [ByLineFormatter]
753
+ def by_line_formatter
754
+ @by_line_formatter ||= ByLineFormatter.new(
742
755
  use_color: @use_color,
743
756
  visualization_map: @visualization_map,
757
+ context_lines: @context_lines,
758
+ diff_grouping_lines: @diff_grouping_lines,
744
759
  show_diffs: @show_diffs,
760
+ character_visualization: @character_visualization,
761
+ legacy_terminal: @legacy_terminal,
762
+ diff_mode: @diff_mode,
745
763
  )
746
-
747
- output << formatter.format(diffs_array, format)
748
-
749
- output.join("\n")
750
764
  end
751
765
 
752
- # Generate by-line diff
753
- # Delegates to format-specific by-line formatters
754
- def by_line_diff(doc1, doc2, format: :xml, html_version: nil,
755
- differences: [])
756
- # For HTML format, use html_version if provided, otherwise default to :html4
757
- if format == :html && html_version
758
- format = html_version # Use :html4 or :html5
759
- end
760
-
761
- # Format display name for header
762
- format_name = format.to_s.upcase
763
-
764
- output = []
765
- output << colorize("Line-by-line diff (#{format_name} mode):", :cyan,
766
- :bold)
767
-
768
- return output.join("\n") if doc1.nil? || doc2.nil?
769
-
770
- # Apply display preprocessing (format both sides identically before diff)
771
- doc1, doc2 = apply_display_preprocessing(doc1, doc2, format)
772
- # Extract differences array and equivalent status from ComparisonResult if needed
773
- diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
774
- @comparison_equivalent = differences.equivalent?
775
- differences.differences
776
- else
777
- @comparison_equivalent = nil
778
- differences
779
- end
780
-
781
- # Delegate to format-specific formatter
782
- formatter = ByLine::BaseFormatter.for_format(
783
- format,
766
+ # @return [ByObjectFormatter]
767
+ def by_object_formatter
768
+ @by_object_formatter ||= ByObjectFormatter.new(
784
769
  use_color: @use_color,
785
- context_lines: @context_lines,
786
- diff_grouping_lines: @diff_grouping_lines,
787
770
  visualization_map: @visualization_map,
788
771
  show_diffs: @show_diffs,
789
- differences: diffs_array,
790
- diff_mode: @legacy_terminal ? :separate : @diff_mode,
791
- legacy_terminal: @legacy_terminal,
792
- equivalent: @comparison_equivalent,
793
772
  )
794
-
795
- output << formatter.format(doc1, doc2)
796
-
797
- output.join("\n")
798
- end
799
-
800
- # Generate a text-LCS diff against preprocessed lines (pretty_diff mode).
801
- #
802
- # This mode bypasses DiffNodeMapper entirely: it applies display_preprocessing
803
- # to both sides, then runs Diff::LCS.sdiff on the resulting plain-text lines.
804
- # It is a reliable short-term workaround for #85 (normative changes invisible
805
- # in :by_line mode when DiffNodeMapper's DOM-address correlation is off).
806
- #
807
- # Limitations:
808
- # - show_diffs :normative / :informative filter is ignored (no DiffNodes)
809
- # - No inline character highlighting (whole-line granularity only)
810
- #
811
- # @param doc1 [String] First document
812
- # @param doc2 [String] Second document
813
- # @param format [Symbol] Document format
814
- # @return [String] Formatted diff output
815
- def pretty_diff_format(doc1, doc2, format:)
816
- require "diff/lcs"
817
-
818
- resolved_format = format
819
-
820
- format_name = resolved_format.to_s.upcase
821
- output = []
822
- output << colorize("Pretty diff (#{format_name} mode):", :cyan, :bold)
823
-
824
- return output.join("\n") if doc1.nil? || doc2.nil?
825
-
826
- # Apply display preprocessing — same transforms as by_line_diff
827
- d1, d2 = apply_display_preprocessing(doc1, doc2, resolved_format)
828
-
829
- lines1 = d1.lines.map(&:chomp)
830
- lines2 = d2.lines.map(&:chomp)
831
-
832
- hunks = ::Diff::LCS.sdiff(lines1, lines2)
833
-
834
- output << render_pretty_diff(hunks)
835
- output.join("\n")
836
773
  end
837
774
 
838
- # Render sdiff hunks with context windowing and colorization.
839
- #
840
- # Uses the same context_lines setting as by_line_diff. Changed hunks
841
- # (action != "=") are expanded by context_lines in each direction; nearby
842
- # windows are merged; a separator is emitted between non-adjacent blocks.
843
- #
844
- # @param hunks [Array<Diff::LCS::ContextChange>] Output of Diff::LCS.sdiff
845
- # @return [String] Rendered diff lines joined with "\n"
846
- def render_pretty_diff(hunks)
847
- # Identify positions of changed hunks
848
- changed = hunks.each_index.reject { |i| hunks[i].action == "=" }
849
-
850
- return colorize(" (no differences)", :green) if changed.empty?
851
-
852
- ctx = [@context_lines || 3, 0].max
853
-
854
- # Build expanded windows, then merge overlapping/adjacent ones
855
- windows = changed.map do |pos|
856
- [
857
- [pos - ctx, 0].max,
858
- [pos + ctx, hunks.length - 1].min,
859
- ]
860
- end
861
-
862
- merged = []
863
- windows.each do |lo, hi|
864
- if merged.empty? || lo > merged.last[1] + 1
865
- merged << [lo, hi]
866
- else
867
- merged.last[1] = [merged.last[1], hi].max
868
- end
869
- end
870
-
871
- lines = []
872
- merged.each_with_index do |(lo, hi), block_idx|
873
- # Separator between non-adjacent blocks
874
- if block_idx.positive?
875
- lines << colorize("--- ---", :cyan)
876
- elsif lo.positive?
877
- lines << colorize("--- ---", :cyan)
878
- end
879
-
880
- (lo..hi).each do |i|
881
- hunk = hunks[i]
882
- case hunk.action
883
- when "="
884
- lines << (@use_color ? "\e[0m #{hunk.old_element}" : " #{hunk.old_element}")
885
- when "-"
886
- lines << colorize("- #{hunk.old_element}", :red)
887
- when "+"
888
- lines << colorize("+ #{hunk.new_element}", :green)
889
- when "!"
890
- lines << colorize("- #{hunk.old_element}", :red)
891
- lines << colorize("+ #{hunk.new_element}", :green)
892
- end
893
- end
894
- end
895
-
896
- lines.join("\n")
775
+ # @return [PrettyDiffFormatter]
776
+ def pretty_diff_formatter
777
+ @pretty_diff_formatter ||= PrettyDiffFormatter.new(
778
+ use_color: @use_color,
779
+ context_lines: @context_lines,
780
+ )
897
781
  end
898
782
 
899
783
  # Apply display preprocessing to both documents before the line diff.
@@ -208,19 +208,25 @@ module Canon
208
208
 
209
209
  # Build text node from Nokogiri text node
210
210
  # HTML-specific: handles whitespace-sensitive elements (pre, code, textarea, script, style)
211
+ # and preserves whitespace between inline element siblings.
211
212
  def self.build_text_node(nokogiri_text)
212
213
  # Skip text nodes that are only whitespace between elements
213
214
  # EXCEPT in whitespace-sensitive elements (pre, code, textarea, script, style)
214
- # where whitespace is semantically significant
215
+ # and when whitespace is between inline element siblings (semantically significant)
215
216
  content = nokogiri_text.content
216
217
 
217
- if content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
218
+ # NBSP (U+00A0) is never insignificant whitespace
219
+ if content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element) && !content.include?("\u00A0")
218
220
  # Check if parent is whitespace-sensitive
219
221
  parent_name = nokogiri_text.parent.name.downcase
220
222
  whitespace_sensitive_tags = %w[pre code textarea script style]
221
223
 
222
- # Skip whitespace-only text UNLESS in whitespace-sensitive element
223
- return nil unless whitespace_sensitive_tags.include?(parent_name)
224
+ # Check if whitespace is between inline siblings
225
+ require_relative "../comparison/whitespace_sensitivity"
226
+ unless whitespace_sensitive_tags.include?(parent_name) ||
227
+ Canon::Comparison::WhitespaceSensitivity.inline_whitespace_significant?(nokogiri_text)
228
+ return nil
229
+ end
224
230
  end
225
231
 
226
232
  # Nokogiri already handles CDATA conversion and entity resolution