canon 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +31 -149
  3. data/README.adoc +9 -0
  4. data/docs/advanced/semantic-diff-report.adoc +96 -0
  5. data/docs/features/configuration-profiles.adoc +4 -2
  6. data/docs/features/diff-formatting/index.adoc +3 -0
  7. data/docs/features/diff-formatting/whitespace-adjacency.adoc +140 -0
  8. data/docs/features/match-options/html-policies.adoc +2 -0
  9. data/docs/features/match-options/index.adoc +40 -0
  10. data/docs/guides/choosing-configuration.adoc +12 -1
  11. data/docs/reference/cli-options.adoc +3 -0
  12. data/docs/reference/environment-variables.adoc +3 -1
  13. data/docs/reference/options-across-interfaces.adoc +7 -1
  14. data/docs/understanding/formats/html.adoc +9 -2
  15. data/lib/canon/cli.rb +4 -0
  16. data/lib/canon/commands/diff_command.rb +1 -0
  17. data/lib/canon/comparison/comparison_result.rb +95 -2
  18. data/lib/canon/comparison/html_comparator.rb +96 -11
  19. data/lib/canon/comparison/markup_comparator.rb +68 -71
  20. data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
  21. data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
  22. data/lib/canon/comparison/match_options.rb +23 -2
  23. data/lib/canon/comparison/node_inspector.rb +103 -0
  24. data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
  25. data/lib/canon/comparison/xml_comparator/child_comparison.rb +133 -55
  26. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +24 -23
  27. data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
  28. data/lib/canon/comparison/xml_comparator.rb +174 -7
  29. data/lib/canon/comparison/xml_node_comparison.rb +48 -66
  30. data/lib/canon/comparison.rb +143 -22
  31. data/lib/canon/config/env_schema.rb +2 -1
  32. data/lib/canon/config/profiles/metanorma.yml +3 -0
  33. data/lib/canon/config.rb +51 -5
  34. data/lib/canon/diff/diff_classifier.rb +55 -41
  35. data/lib/canon/diff/diff_line_builder.rb +9 -8
  36. data/lib/canon/diff/xml_serialization_formatter.rb +27 -42
  37. data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
  38. data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
  39. data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
  40. data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
  41. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +184 -26
  42. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +92 -4
  43. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
  44. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
  45. data/lib/canon/diff_formatter.rb +128 -175
  46. data/lib/canon/html/data_model.rb +10 -4
  47. data/lib/canon/pretty_printer/html.rb +76 -14
  48. data/lib/canon/pretty_printer/html_void_elements.rb +20 -0
  49. data/lib/canon/pretty_printer/xml_normalized.rb +10 -3
  50. data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
  51. data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
  52. data/lib/canon/version.rb +1 -1
  53. data/lib/canon/xml/c14n.rb +59 -5
  54. data/lib/canon/xml/data_model.rb +13 -1
  55. data/lib/canon/xml/element_matcher.rb +3 -0
  56. data/lib/canon/xml/node.rb +23 -1
  57. data/lib/canon/xml/nodes/comment_node.rb +4 -0
  58. data/lib/canon/xml/nodes/element_node.rb +4 -0
  59. data/lib/canon/xml/nodes/text_node.rb +4 -0
  60. data/lib/canon/xml/sax_builder.rb +29 -2
  61. data/lib/canon/xml/xpath_engine.rb +238 -0
  62. metadata +9 -2
data/lib/canon/config.rb CHANGED
@@ -285,6 +285,7 @@ module Canon
285
285
  end
286
286
 
287
287
  def indent_type=(value)
288
+ DiffConfig.validate_config_value!(:pretty_printer_indent_type, value)
288
289
  @resolver.set_programmatic(:pretty_printer_indent_type, value)
289
290
  end
290
291
  end
@@ -293,6 +294,20 @@ module Canon
293
294
  class DiffConfig
294
295
  attr_reader :pretty_printer
295
296
 
297
+ # Valid values for enum-like configuration options
298
+ VALID_ENUM_VALUES = {
299
+ mode: %i[by_line by_object pretty_diff],
300
+ show_diffs: %i[all normative informative],
301
+ algorithm: %i[dom semantic],
302
+ parser: %i[sax dom],
303
+ display_preprocessing: %i[none pretty_print normalize_pretty_print
304
+ c14n],
305
+ display_format: %i[raw canonical],
306
+ pretty_printer_indent_type: %i[space tab],
307
+ character_visualization: [true, false, :content_only],
308
+ theme: %i[light dark retro claude cyberpunk],
309
+ }.freeze
310
+
296
311
  def initialize(format = nil)
297
312
  @format = format
298
313
  @resolver = build_resolver(format)
@@ -309,7 +324,9 @@ module Canon
309
324
 
310
325
  data.each do |key, value|
311
326
  sym_key = key.to_sym
312
- @resolver.set_profile(sym_key, coerce_profile_value(sym_key, value))
327
+ coerced = coerce_profile_value(sym_key, value)
328
+ self.class.validate_config_value!(sym_key, coerced)
329
+ @resolver.set_profile(sym_key, coerced)
313
330
  end
314
331
  end
315
332
 
@@ -317,12 +334,25 @@ module Canon
317
334
  @resolver.clear_profile!
318
335
  end
319
336
 
337
+ # Validate a config value against its allowed enum values
338
+ def self.validate_config_value!(key, value)
339
+ valid = VALID_ENUM_VALUES[key]
340
+ return unless valid
341
+
342
+ return if valid.include?(value)
343
+
344
+ raise ArgumentError,
345
+ "Invalid value #{value.inspect} for #{key}. " \
346
+ "Valid values: #{valid.map(&:inspect).join(', ')}"
347
+ end
348
+
320
349
  # Accessors with ENV override support
321
350
  def mode
322
351
  @resolver.resolve(:mode)
323
352
  end
324
353
 
325
354
  def mode=(value)
355
+ self.class.validate_config_value!(:mode, value)
326
356
  @resolver.set_programmatic(:mode, value)
327
357
  end
328
358
 
@@ -355,6 +385,7 @@ module Canon
355
385
  end
356
386
 
357
387
  def show_diffs=(value)
388
+ self.class.validate_config_value!(:show_diffs, value)
358
389
  @resolver.set_programmatic(:show_diffs, value)
359
390
  end
360
391
 
@@ -495,6 +526,7 @@ module Canon
495
526
  end
496
527
 
497
528
  def display_format=(value)
529
+ self.class.validate_config_value!(:display_format, value)
498
530
  @resolver.set_programmatic(:display_format, value)
499
531
  end
500
532
 
@@ -511,6 +543,7 @@ module Canon
511
543
  end
512
544
 
513
545
  def display_preprocessing=(value)
546
+ self.class.validate_config_value!(:display_preprocessing, value)
514
547
  @resolver.set_programmatic(:display_preprocessing, value)
515
548
  end
516
549
 
@@ -620,10 +653,8 @@ module Canon
620
653
  # Values:
621
654
  # true - apply the full default visualization map (default)
622
655
  # false - disable visualization; output plain text
623
- # :content_only - reserved for future use; currently behaves as +true+.
624
- # Future intent: apply visualization only to DOM text
625
- # node content, not to structural indentation whitespace.
626
- # (TODO: implement DOM-level pre-serialization pass)
656
+ # :content_only - apply visualization only to text content, not
657
+ # to structural indentation whitespace.
627
658
  def character_visualization
628
659
  val = @resolver.resolve(:character_visualization)
629
660
  # Coerce symbol booleans that may arrive via ENV (env_schema uses :symbol type
@@ -636,6 +667,7 @@ module Canon
636
667
  end
637
668
 
638
669
  def character_visualization=(value)
670
+ self.class.validate_config_value!(:character_visualization, value)
639
671
  @resolver.set_programmatic(:character_visualization, value)
640
672
  end
641
673
 
@@ -644,15 +676,27 @@ module Canon
644
676
  end
645
677
 
646
678
  def algorithm=(value)
679
+ self.class.validate_config_value!(:algorithm, value)
647
680
  @resolver.set_programmatic(:algorithm, value)
648
681
  end
649
682
 
683
+ # XML parser backend (:sax or :dom, default :sax)
684
+ def parser
685
+ @resolver.resolve(:parser)
686
+ end
687
+
688
+ def parser=(value)
689
+ self.class.validate_config_value!(:parser, value)
690
+ @resolver.set_programmatic(:parser, value)
691
+ end
692
+
650
693
  # Theme name (:light, :dark, :retro, :claude)
651
694
  def theme
652
695
  @resolver.resolve(:theme)
653
696
  end
654
697
 
655
698
  def theme=(value)
699
+ self.class.validate_config_value!(:theme, value)
656
700
  @resolver.set_programmatic(:theme, value)
657
701
  end
658
702
 
@@ -693,6 +737,7 @@ module Canon
693
737
  show_diffs: show_diffs,
694
738
  verbose_diff: verbose_diff,
695
739
  diff_algorithm: algorithm,
740
+ parser: parser,
696
741
  show_raw_inputs: show_raw_inputs,
697
742
  show_raw_expected: show_raw_expected,
698
743
  show_raw_received: show_raw_received,
@@ -733,6 +778,7 @@ module Canon
733
778
  show_diffs: :all,
734
779
  verbose_diff: false,
735
780
  algorithm: :dom,
781
+ parser: :sax,
736
782
  show_raw_inputs: false,
737
783
  show_raw_expected: false,
738
784
  show_raw_received: false,
@@ -73,6 +73,16 @@ module Canon
73
73
  return diff_node
74
74
  end
75
75
 
76
+ # :whitespace_adjacency is a report-only re-label of an
77
+ # asymmetric whitespace mismatch emitted by ChildComparison's
78
+ # two-cursor walk. Equivalence behaviour is unchanged — the
79
+ # underlying mismatch is normative regardless of match options.
80
+ if diff_node.dimension == :whitespace_adjacency
81
+ diff_node.formatting = false
82
+ diff_node.normative = true
83
+ return diff_node
84
+ end
85
+
76
86
  # THIRD: Determine if this dimension is normative based on CompareProfile
77
87
  # This respects the policy settings (strict/normalize/ignore)
78
88
  is_normative = profile.normative_dimension?(diff_node.dimension)
@@ -150,8 +160,9 @@ module Canon
150
160
  end
151
161
 
152
162
  # Check if the text node is inside a whitespace-sensitive element
153
- # (preserve/collapse classification or xml:space='preserve').
154
- # In these elements, whitespace presence is meaningful and should
163
+ # (preserve/collapse classification, xml:space='preserve', or
164
+ # between inline element siblings in HTML).
165
+ # In these contexts, whitespace presence is meaningful and should
155
166
  # not be dismissed as serialization formatting.
156
167
  # @param diff_node [DiffNode] The diff node to check
157
168
  # @return [Boolean] true if whitespace is preserved for this element
@@ -159,6 +170,21 @@ module Canon
159
170
  node = diff_node.node1 || diff_node.node2
160
171
  return false unless node
161
172
 
173
+ # HTML: whitespace between inline element siblings is significant
174
+ if Canon::Comparison::WhitespaceSensitivity.inline_whitespace_significant?(node)
175
+ return true
176
+ end
177
+
178
+ # HTML: non-breaking space (U+00A0) is never insignificant
179
+ text = if node.respond_to?(:content)
180
+ node.content
181
+ elsif node.respond_to?(:value)
182
+ node.value
183
+ end
184
+ if text && Canon::Comparison::WhitespaceSensitivity.contains_nbsp?(text)
185
+ return true
186
+ end
187
+
162
188
  return false unless node.respond_to?(:parent)
163
189
 
164
190
  parent = node.parent
@@ -194,28 +220,21 @@ module Canon
194
220
  def extract_text_content(node)
195
221
  return nil if node.nil?
196
222
 
197
- # For TextNode with value attribute (Canon::Xml::Nodes::TextNode)
198
- return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
199
-
200
- # For XML/HTML nodes with text_content method
201
- return node.text_content if node.respond_to?(:text_content)
202
-
203
- # For nodes with text method
204
- return node.text if node.respond_to?(:text)
205
-
206
- # For nodes with content method
207
- return node.content if node.respond_to?(:content)
208
-
209
- # For nodes with value method (other types)
210
- return node.value if node.respond_to?(:value)
211
-
212
- # For simple text nodes or strings
213
- return node.to_s if node.is_a?(String)
214
-
215
- # For other node types, try to_s
216
- node.to_s
223
+ case node
224
+ when Canon::Xml::Nodes::TextNode
225
+ node.value
226
+ when Canon::Xml::Node
227
+ node.text_content
228
+ when Nokogiri::XML::Node
229
+ node.content.to_s
230
+ when Moxml::Node
231
+ node.content.to_s
232
+ when String
233
+ node
234
+ else
235
+ node.to_s
236
+ end
217
237
  rescue StandardError
218
- # If extraction fails, return nil (not formatting-only)
219
238
  nil
220
239
  end
221
240
 
@@ -225,25 +244,20 @@ module Canon
225
244
  def text_node?(node)
226
245
  return false if node.nil?
227
246
 
228
- # Canon::Xml::Nodes::TextNode
229
- return true if node.is_a?(Canon::Xml::Nodes::TextNode)
230
-
231
- # Nokogiri text nodes (node_type returns integer constant like 3)
232
- return true if node.respond_to?(:node_type) &&
233
- node.node_type.is_a?(Integer) &&
247
+ case node
248
+ when Canon::Xml::Nodes::TextNode
249
+ true
250
+ when Canon::Xml::Node
251
+ node.node_type == :text
252
+ when Nokogiri::XML::Node
234
253
  node.node_type == Nokogiri::XML::Node::TEXT_NODE
235
-
236
- # Moxml text nodes (node_type returns symbol)
237
- return true if node.respond_to?(:node_type) && node.node_type == :text
238
-
239
- # String
240
- return true if node.is_a?(String)
241
-
242
- # Test doubles or objects with text node-like interface
243
- # Check if it has a value method (contains text content)
244
- return true if node.respond_to?(:value)
245
-
246
- false
254
+ when Moxml::Node
255
+ node.text?
256
+ when String
257
+ true
258
+ else
259
+ false
260
+ end
247
261
  end
248
262
  end
249
263
  end
@@ -858,12 +858,14 @@ new_line_ranges)
858
858
  # The DiffNode's explicit formatting? flag takes precedence:
859
859
  # - If formatting? == true: return true (explicitly formatting-only)
860
860
  #
861
- # If node exists and is normative (formatting? is nil but norm is true):
862
- # - Check line-level formatting via FormattingDetector for whitespace-only changes
863
- # - But NOT via comment_only_line? heuristic because comment content is different
861
+ # If node exists and is normative:
862
+ # - Return false normative DiffNodes are NEVER formatting-only.
863
+ # Even if the serialized content looks whitespace-equivalent,
864
+ # the comparison classified it as a normative change and it MUST
865
+ # be visible in by_line output (especially with show_diffs: :normative).
864
866
  #
865
867
  # If node exists and is informative (norm=false):
866
- # - Return false (informative diffs are always shown as informative)
868
+ # - Return false (informative diffs are shown as informative)
867
869
  #
868
870
  # If NO node exists (diff_node is nil):
869
871
  # - Use heuristics: comment-only lines and FormattingDetector
@@ -877,11 +879,10 @@ new_line_ranges)
877
879
  return true if diff_node&.formatting?
878
880
 
879
881
  if diff_node
880
- # Node exists - use node classification
881
- return false unless diff_node.normative?
882
+ # Normative nodes are never formatting-only
883
+ return false if diff_node.normative?
882
884
 
883
- # For normative nodes, check line-level formatting
884
- # (but NOT comment_only_line? which would misclassify comment content changes)
885
+ # Informative nodes: check line-level formatting
885
886
  elsif comment_only_line?(line1) || comment_only_line?(line2)
886
887
  # No DiffNode: use heuristics
887
888
  return true
@@ -91,28 +91,20 @@ module Canon
91
91
  def self.text_node?(node)
92
92
  return false if node.nil?
93
93
 
94
- # Canon::Xml::Nodes::TextNode
95
- return true if node.is_a?(Canon::Xml::Nodes::TextNode)
96
-
97
- # Moxml::Text (check before generic node_type check)
98
- return true if node.is_a?(Moxml::Text)
99
-
100
- # Nokogiri text nodes (node_type returns integer constant like 3)
101
- return true if node.respond_to?(:node_type) &&
102
- node.node_type.is_a?(Integer) &&
94
+ case node
95
+ when Canon::Xml::Nodes::TextNode
96
+ true
97
+ when Canon::Xml::Node
98
+ node.node_type == :text
99
+ when Nokogiri::XML::Node
103
100
  node.node_type == Nokogiri::XML::Node::TEXT_NODE
104
-
105
- # Moxml text nodes (node_type returns symbol) - for when using Moxml adapters
106
- return true if node.respond_to?(:node_type) && node.node_type == :text
107
-
108
- # String
109
- return true if node.is_a?(String)
110
-
111
- # Test doubles or objects with text node-like interface
112
- # Check if it has a value method (contains text content)
113
- return true if node.respond_to?(:value)
114
-
115
- false
101
+ when Moxml::Node
102
+ node.text?
103
+ when String
104
+ true
105
+ else
106
+ false
107
+ end
116
108
  end
117
109
 
118
110
  # Extract text content from a node
@@ -121,28 +113,21 @@ module Canon
121
113
  def self.extract_text_content(node)
122
114
  return nil if node.nil?
123
115
 
124
- # For TextNode with value attribute (Canon::Xml::Nodes::TextNode)
125
- return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
126
-
127
- # For XML/HTML nodes with text_content method
128
- return node.text_content if node.respond_to?(:text_content)
129
-
130
- # For nodes with content method (try before text, as Moxml::Text.text returns "")
131
- return node.content if node.respond_to?(:content)
132
-
133
- # For nodes with text method
134
- return node.text if node.respond_to?(:text)
135
-
136
- # For nodes with value method (other types)
137
- return node.value if node.respond_to?(:value)
138
-
139
- # For simple text nodes or strings
140
- return node.to_s if node.is_a?(String)
141
-
142
- # For other node types, try to_s
143
- node.to_s
116
+ case node
117
+ when Canon::Xml::Nodes::TextNode
118
+ node.value
119
+ when Canon::Xml::Node
120
+ node.text_content
121
+ when Nokogiri::XML::Node
122
+ node.content.to_s
123
+ when Moxml::Node
124
+ node.content.to_s
125
+ when String
126
+ node
127
+ else
128
+ node.to_s
129
+ end
144
130
  rescue StandardError
145
- # If extraction fails, return nil (not a serialization difference)
146
131
  nil
147
132
  end
148
133
 
@@ -45,11 +45,13 @@ module Canon
45
45
  end
46
46
  end
47
47
 
48
+ # rubocop:disable Metrics/ParameterLists
48
49
  def initialize(use_color: true, context_lines: 3,
49
50
  diff_grouping_lines: nil, visualization_map: nil,
50
51
  show_diffs: :all, differences: [],
51
52
  diff_mode: :separate, legacy_terminal: false,
52
- equivalent: nil, theme: nil)
53
+ equivalent: nil, theme: nil,
54
+ character_visualization: true)
53
55
  @use_color = use_color
54
56
  @context_lines = context_lines
55
57
  @diff_grouping_lines = diff_grouping_lines
@@ -61,7 +63,9 @@ module Canon
61
63
  @legacy_terminal = legacy_terminal
62
64
  @equivalent = equivalent
63
65
  @theme = theme
66
+ @character_visualization = character_visualization
64
67
  end
68
+ # rubocop:enable Metrics/ParameterLists
65
69
 
66
70
  # Get the resolved theme hash
67
71
  # @return [Hash] Theme hash
@@ -644,15 +648,23 @@ module Canon
644
648
 
645
649
  # Apply character visualization
646
650
  #
651
+ # When +character_visualization+ is +:content_only+, leading
652
+ # structural whitespace (indentation) is left plain while content
653
+ # whitespace is visualized.
654
+ #
647
655
  # @param token [String] The token to apply visualization to
648
656
  # @param color [Symbol, nil] Optional color to apply
649
657
  # @return [String] Visualized and optionally colored token
650
658
  def apply_visualization(token, color = nil)
651
659
  return "" if token.nil?
652
660
 
653
- visual = token.to_s.chars.map do |char|
654
- @visualization_map.fetch(char, char)
655
- end.join
661
+ visual = if @character_visualization == :content_only
662
+ visualize_content_only(token.to_s)
663
+ else
664
+ token.to_s.chars.map do |char|
665
+ @visualization_map.fetch(char, char)
666
+ end.join
667
+ end
656
668
 
657
669
  if color && @use_color
658
670
  require "rainbow"
@@ -678,6 +690,29 @@ module Canon
678
690
  end
679
691
  end
680
692
 
693
+ # Visualize only content portion, leaving structural indentation plain.
694
+ #
695
+ # Splits the token into leading whitespace (structural indentation)
696
+ # and the rest (content). Only the content portion gets character
697
+ # visualization.
698
+ #
699
+ # @param token [String] The full line token
700
+ # @return [String] Token with content-only visualization
701
+ def visualize_content_only(token)
702
+ # Leading whitespace is structural indentation — keep it plain
703
+ indent_end = token.index(/[^\s]/) || token.length
704
+ indent = token[0...indent_end]
705
+ content = token[indent_end..]
706
+
707
+ if content.nil? || content.empty?
708
+ indent
709
+ else
710
+ indent + content.chars.map { |char|
711
+ @visualization_map.fetch(char, char)
712
+ }.join
713
+ end
714
+ end
715
+
681
716
  # Get max diff lines limit
682
717
  #
683
718
  # @return [Integer, nil] Max diff output lines
@@ -12,19 +12,22 @@ module Canon
12
12
  class HtmlFormatter < BaseFormatter
13
13
  attr_reader :html_version
14
14
 
15
+ # rubocop:disable Metrics/ParameterLists
15
16
  def initialize(use_color: true, context_lines: 3,
16
17
  diff_grouping_lines: nil, visualization_map: nil,
17
18
  html_version: :html4, show_diffs: :all, differences: [],
18
19
  diff_mode: :separate, legacy_terminal: false,
19
- equivalent: nil)
20
+ equivalent: nil, character_visualization: true)
20
21
  super(use_color: use_color, context_lines: context_lines,
21
22
  diff_grouping_lines: diff_grouping_lines,
22
23
  visualization_map: visualization_map,
23
24
  show_diffs: show_diffs, differences: differences,
24
25
  diff_mode: diff_mode, legacy_terminal: legacy_terminal,
25
- equivalent: equivalent)
26
+ equivalent: equivalent,
27
+ character_visualization: character_visualization)
26
28
  @html_version = html_version
27
29
  end
30
+ # rubocop:enable Metrics/ParameterLists
28
31
 
29
32
  # Format DOM-guided HTML diff
30
33
  #
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "paint"
4
+
5
+ module Canon
6
+ class DiffFormatter
7
+ # Handles the by_line rendering pipeline for line-by-line diffs.
8
+ #
9
+ # Receives preprocessed document strings from the DiffFormatter facade
10
+ # and delegates to format-specific ByLine formatters (XML, HTML, JSON, YAML).
11
+ class ByLineFormatter
12
+ # rubocop:disable Metrics/ParameterLists
13
+ def initialize(use_color:, visualization_map:, context_lines:,
14
+ diff_grouping_lines:, show_diffs:, character_visualization:,
15
+ legacy_terminal:, diff_mode:)
16
+ @use_color = use_color
17
+ @visualization_map = visualization_map
18
+ @context_lines = context_lines
19
+ @diff_grouping_lines = diff_grouping_lines
20
+ @show_diffs = show_diffs
21
+ @character_visualization = character_visualization
22
+ @legacy_terminal = legacy_terminal
23
+ @diff_mode = diff_mode
24
+ end
25
+ # rubocop:enable Metrics/ParameterLists
26
+
27
+ # Format a line-by-line diff between two documents.
28
+ #
29
+ # @param doc1 [String] First document (already preprocessed)
30
+ # @param doc2 [String] Second document (already preprocessed)
31
+ # @param format [Symbol] Document format (:xml, :html, :json, :yaml, etc.)
32
+ # @param html_version [Symbol, nil] HTML version override (:html4, :html5)
33
+ # @param differences [Array, ComparisonResult] Differences from comparison
34
+ # @return [String] Formatted diff output
35
+ def format(doc1, doc2, format:, html_version: nil, differences: [])
36
+ resolved_format = format == :html && html_version ? html_version : format
37
+ format_name = resolved_format.to_s.upcase
38
+
39
+ output = []
40
+ output << colorize("Line-by-line diff (#{format_name} mode):", :cyan,
41
+ :bold)
42
+
43
+ return output.join("\n") if doc1.nil? || doc2.nil?
44
+
45
+ diffs_array = extract_differences(differences)
46
+
47
+ formatter = ByLine::BaseFormatter.for_format(
48
+ resolved_format,
49
+ use_color: @use_color,
50
+ context_lines: @context_lines,
51
+ diff_grouping_lines: @diff_grouping_lines,
52
+ visualization_map: @visualization_map,
53
+ show_diffs: @show_diffs,
54
+ differences: diffs_array,
55
+ diff_mode: @legacy_terminal ? :separate : @diff_mode,
56
+ legacy_terminal: @legacy_terminal,
57
+ equivalent: @comparison_equivalent,
58
+ character_visualization: @character_visualization,
59
+ )
60
+
61
+ output << formatter.format(doc1, doc2)
62
+ output.join("\n")
63
+ end
64
+
65
+ private
66
+
67
+ def extract_differences(differences)
68
+ if differences.is_a?(Canon::Comparison::ComparisonResult)
69
+ @comparison_equivalent = differences.equivalent?
70
+ differences.differences
71
+ else
72
+ @comparison_equivalent = nil
73
+ differences
74
+ end
75
+ end
76
+
77
+ def colorize(text, *colors)
78
+ return text unless @use_color
79
+
80
+ "\e[0m#{Paint[text, *colors]}"
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "paint"
4
+
5
+ module Canon
6
+ class DiffFormatter
7
+ # Handles the by_object rendering pipeline for tree-based semantic diffs.
8
+ #
9
+ # Delegates to format-specific ByObject formatters (XML, JSON, YAML)
10
+ # which produce visual tree output with box-drawing characters.
11
+ class ByObjectFormatter
12
+ def initialize(use_color:, visualization_map:, show_diffs:)
13
+ @use_color = use_color
14
+ @visualization_map = visualization_map
15
+ @show_diffs = show_diffs
16
+ end
17
+
18
+ # Format a tree-based object diff.
19
+ #
20
+ # @param differences [Array, ComparisonResult] Differences from comparison
21
+ # @param format [Symbol] Document format (:xml, :json, :yaml)
22
+ # @return [String] Formatted diff output
23
+ def format(differences, format)
24
+ output = []
25
+ output << colorize("Visual Diff:", :cyan, :bold)
26
+
27
+ diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
28
+ differences.differences
29
+ else
30
+ differences
31
+ end
32
+
33
+ formatter = ByObject::BaseFormatter.for_format(
34
+ format,
35
+ use_color: @use_color,
36
+ visualization_map: @visualization_map,
37
+ show_diffs: @show_diffs,
38
+ )
39
+
40
+ output << formatter.format(diffs_array, format)
41
+ output.join("\n")
42
+ end
43
+
44
+ private
45
+
46
+ def colorize(text, *colors)
47
+ return text unless @use_color
48
+
49
+ "\e[0m#{Paint[text, *colors]}"
50
+ end
51
+ end
52
+ end
53
+ end