canon 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +31 -149
  3. data/README.adoc +9 -0
  4. data/docs/advanced/semantic-diff-report.adoc +96 -0
  5. data/docs/features/configuration-profiles.adoc +4 -2
  6. data/docs/features/diff-formatting/index.adoc +3 -0
  7. data/docs/features/diff-formatting/whitespace-adjacency.adoc +140 -0
  8. data/docs/features/match-options/html-policies.adoc +2 -0
  9. data/docs/features/match-options/index.adoc +40 -0
  10. data/docs/guides/choosing-configuration.adoc +12 -1
  11. data/docs/reference/cli-options.adoc +3 -0
  12. data/docs/reference/environment-variables.adoc +3 -1
  13. data/docs/reference/options-across-interfaces.adoc +7 -1
  14. data/docs/understanding/formats/html.adoc +9 -2
  15. data/lib/canon/cli.rb +4 -0
  16. data/lib/canon/commands/diff_command.rb +1 -0
  17. data/lib/canon/comparison/comparison_result.rb +95 -2
  18. data/lib/canon/comparison/html_comparator.rb +96 -11
  19. data/lib/canon/comparison/markup_comparator.rb +68 -71
  20. data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
  21. data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
  22. data/lib/canon/comparison/match_options.rb +23 -2
  23. data/lib/canon/comparison/node_inspector.rb +103 -0
  24. data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
  25. data/lib/canon/comparison/xml_comparator/child_comparison.rb +133 -55
  26. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +24 -23
  27. data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
  28. data/lib/canon/comparison/xml_comparator.rb +174 -7
  29. data/lib/canon/comparison/xml_node_comparison.rb +48 -66
  30. data/lib/canon/comparison.rb +143 -22
  31. data/lib/canon/config/env_schema.rb +2 -1
  32. data/lib/canon/config/profiles/metanorma.yml +3 -0
  33. data/lib/canon/config.rb +51 -5
  34. data/lib/canon/diff/diff_classifier.rb +55 -41
  35. data/lib/canon/diff/diff_line_builder.rb +9 -8
  36. data/lib/canon/diff/xml_serialization_formatter.rb +27 -42
  37. data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
  38. data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
  39. data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
  40. data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
  41. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +184 -26
  42. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +92 -4
  43. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
  44. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
  45. data/lib/canon/diff_formatter.rb +128 -175
  46. data/lib/canon/html/data_model.rb +10 -4
  47. data/lib/canon/pretty_printer/html.rb +76 -14
  48. data/lib/canon/pretty_printer/html_void_elements.rb +20 -0
  49. data/lib/canon/pretty_printer/xml_normalized.rb +10 -3
  50. data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
  51. data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
  52. data/lib/canon/version.rb +1 -1
  53. data/lib/canon/xml/c14n.rb +59 -5
  54. data/lib/canon/xml/data_model.rb +13 -1
  55. data/lib/canon/xml/element_matcher.rb +3 -0
  56. data/lib/canon/xml/node.rb +23 -1
  57. data/lib/canon/xml/nodes/comment_node.rb +4 -0
  58. data/lib/canon/xml/nodes/element_node.rb +4 -0
  59. data/lib/canon/xml/nodes/text_node.rb +4 -0
  60. data/lib/canon/xml/sax_builder.rb +29 -2
  61. data/lib/canon/xml/xpath_engine.rb +238 -0
  62. metadata +9 -2
@@ -13,8 +13,10 @@ module Canon
13
13
  # @param use_color [Boolean] Whether to use colors
14
14
  # @param compact [Boolean] Whether to serialize element nodes as compact XML
15
15
  # @return [String] Formatted dimension details
16
+ # rubocop:disable Lint/UnusedMethodArgument
16
17
  def self.format_dimension_details(diff, use_color, compact: false,
17
18
  expand_difference: false)
19
+ # rubocop:enable Lint/UnusedMethodArgument
18
20
  dimension = extract_dimension(diff)
19
21
 
20
22
  case dimension
@@ -23,8 +25,7 @@ expand_difference: false)
23
25
  when :namespace_declarations
24
26
  format_namespace_declarations_details(diff, use_color)
25
27
  when :element_structure
26
- format_element_structure_details(diff, use_color,
27
- expand_difference: expand_difference)
28
+ format_element_structure_details(diff, use_color)
28
29
  when :attribute_presence
29
30
  format_attribute_presence_details(diff, use_color)
30
31
  when :attribute_values
@@ -33,6 +34,8 @@ expand_difference: false)
33
34
  format_attribute_order_details(diff, use_color)
34
35
  when :text_content
35
36
  format_text_content_details(diff, use_color, compact: compact)
37
+ when :whitespace_adjacency
38
+ format_whitespace_adjacency_details(diff, use_color)
36
39
  when :structural_whitespace
37
40
  format_structural_whitespace_details(diff, use_color)
38
41
  when :comments
@@ -163,37 +166,70 @@ expand_difference: false)
163
166
 
164
167
  # Format element structure differences
165
168
  #
169
+ # Produces compact XML for both sides so the user can see attributes
170
+ # and text content, not just the tag name. Handles nil nodes that
171
+ # arise from insertions/deletions.
172
+ #
166
173
  # @param diff [DiffNode, Hash] Difference node
167
174
  # @param use_color [Boolean] Whether to use colors
168
175
  # @return [Array] Tuple of [detail1, detail2, changes]
169
- def self.format_element_structure_details(diff, use_color,
170
- expand_difference: false)
176
+ def self.format_element_structure_details(diff, use_color)
171
177
  require_relative "color_helper"
172
178
  require_relative "node_utils"
173
179
 
174
180
  node1 = extract_node1(diff)
175
181
  node2 = extract_node2(diff)
176
182
 
177
- name1 = NodeUtils.get_element_name_for_display(node1)
178
- name2 = NodeUtils.get_element_name_for_display(node2)
183
+ has1 = !node1.nil?
184
+ has2 = !node2.nil?
185
+
186
+ if has1 && has2
187
+ # Both elements present — show compact XML for both
188
+ compact1 = NodeUtils.serialize_node_compact(node1)
189
+ compact2 = NodeUtils.serialize_node_compact(node2)
190
+ detail1 = ColorHelper.colorize(compact1, :red, use_color)
191
+ detail2 = ColorHelper.colorize(compact2, :green, use_color)
192
+
193
+ name1 = NodeUtils.get_element_name_for_display(node1)
194
+ name2 = NodeUtils.get_element_name_for_display(node2)
179
195
 
180
- if expand_difference
181
- display1 = NodeUtils.serialize_node_compact(node1)
182
- display2 = NodeUtils.serialize_node_compact(node2)
183
- detail1 = ColorHelper.colorize(display1, :red, use_color)
184
- detail2 = ColorHelper.colorize(display2, :green, use_color)
196
+ changes = if name1 == name2
197
+ "Element <#{name1}> structure changed (children differ)"
198
+ else
199
+ build_structure_change_text(compact1, compact2,
200
+ use_color)
201
+ end
202
+ elsif has1
203
+ # Element removed
204
+ compact1 = NodeUtils.serialize_node_compact(node1)
205
+ detail1 = ColorHelper.colorize(compact1, :red, use_color)
206
+ detail2 = ColorHelper.colorize("(not present)", :green, use_color)
207
+ changes = "Element removed: #{ColorHelper.colorize(compact1, :red,
208
+ use_color)}"
185
209
  else
186
- detail1 = "<#{ColorHelper.colorize(name1, :red, use_color)}>"
187
- detail2 = "<#{ColorHelper.colorize(name2, :green, use_color)}>"
210
+ # Element added
211
+ compact2 = NodeUtils.serialize_node_compact(node2)
212
+ detail1 = ColorHelper.colorize("(not present)", :red, use_color)
213
+ detail2 = ColorHelper.colorize(compact2, :green, use_color)
214
+ changes = "Element added: #{ColorHelper.colorize(compact2, :green,
215
+ use_color)}"
188
216
  end
189
217
 
190
- changes = "Element differs: #{ColorHelper.colorize(name1, :red,
191
- use_color)} → " \
192
- "#{ColorHelper.colorize(name2, :green, use_color)}"
193
-
194
218
  [detail1, detail2, changes]
195
219
  end
196
220
 
221
+ # Build human-readable change text for element structure diffs
222
+ #
223
+ # @param display1 [String] Serialized expected element
224
+ # @param display2 [String] Serialized actual element
225
+ # @param use_color [Boolean] Whether to use colors
226
+ # @return [String] Change description
227
+ def self.build_structure_change_text(display1, display2, use_color)
228
+ "Element structure changed: " \
229
+ "#{ColorHelper.colorize(display1, :red, use_color)} → " \
230
+ "#{ColorHelper.colorize(display2, :green, use_color)}"
231
+ end
232
+
197
233
  # Format attribute presence differences
198
234
  #
199
235
  # @param diff [DiffNode, Hash] Difference node
@@ -332,18 +368,21 @@ expand_difference: false)
332
368
  node1 = extract_node1(diff)
333
369
  node2 = extract_node2(diff)
334
370
 
371
+ # Symmetric one-sided rendering for missing/extra text nodes.
372
+ # When exactly one side is nil, render "(not present)" on that
373
+ # side and the present side's raw text content (whitespace-
374
+ # visualised, with a brief parent open-tag hint for context).
375
+ # Mirrors format_element_structure_details above. Without this
376
+ # short-circuit, the ambiguous-pair fallback further down would
377
+ # serialize the present side's *parent subtree* in full,
378
+ # producing a misleading diff payload. See lutaml/canon#125.
379
+ if node1.nil? ^ node2.nil?
380
+ return format_text_content_one_sided(node1, node2, use_color)
381
+ end
382
+
335
383
  text1 = NodeUtils.node_to_display(node1, compact: compact)
336
384
  text2 = NodeUtils.node_to_display(node2, compact: compact)
337
385
 
338
- # Handle cases where one node is missing (e.g. text added or removed)
339
- if node1.nil? || node2.nil?
340
- if node1.nil?
341
- text2 = NodeUtils.node_to_display(node2, compact: compact)
342
- else
343
- text1 = NodeUtils.node_to_display(node1, compact: compact)
344
- end
345
- end
346
-
347
386
  if NodeUtils.inside_preserve_element?(node1) || NodeUtils.inside_preserve_element?(node2)
348
387
  detail1 = ColorHelper.colorize(
349
388
  TextUtils.visualize_whitespace(text1), :red, use_color
@@ -351,6 +390,20 @@ expand_difference: false)
351
390
  detail2 = ColorHelper.colorize(
352
391
  TextUtils.visualize_whitespace(text2), :green, use_color
353
392
  )
393
+ elsif TextUtils.ambiguous_text_pair?(text1, text2) &&
394
+ (NodeUtils.parent_of(node1) || NodeUtils.parent_of(node2))
395
+ # Both sides render to empty/whitespace-only strings, which are
396
+ # indistinguishable after JSON quoting. Fall back to each side's
397
+ # parent element serialized compactly, with whitespace visualized
398
+ # so the reader can see the structural contrast.
399
+ ctx1 = NodeUtils.serialize_node_compact(NodeUtils.parent_of(node1))
400
+ ctx2 = NodeUtils.serialize_node_compact(NodeUtils.parent_of(node2))
401
+ detail1 = ColorHelper.colorize(
402
+ TextUtils.visualize_whitespace(ctx1), :red, use_color
403
+ )
404
+ detail2 = ColorHelper.colorize(
405
+ TextUtils.visualize_whitespace(ctx2), :green, use_color
406
+ )
354
407
  elsif compact && (node1.is_a?(Canon::Xml::Nodes::ElementNode) ||
355
408
  node2.is_a?(Canon::Xml::Nodes::ElementNode))
356
409
  # In compact mode with element nodes, display as raw XML without
@@ -384,8 +437,113 @@ expand_difference: false)
384
437
  [detail1, detail2, changes]
385
438
  end
386
439
 
440
+ # Whether a node is an element (Canon or Nokogiri), used to
441
+ # detect element-shaped diffs that have been misclassified as
442
+ # :text_content and route them to element-structure rendering.
443
+ # See lutaml/canon#125 follow-up.
444
+ def self.present_is_element?(node)
445
+ return false unless node
446
+
447
+ case node
448
+ when Canon::Xml::Node
449
+ node.node_type == :element
450
+ when Nokogiri::XML::Node
451
+ node.element?
452
+ else
453
+ false
454
+ end
455
+ end
456
+
457
+ # Render a one-sided text-content diff (one node nil, the other a
458
+ # text node). Mirrors the +has1+/+has2+ branches of
459
+ # +format_element_structure_details+: "(not present)" on the nil
460
+ # side, the present side's raw text content (whitespace-visualised
461
+ # and quoted) plus a brief parent open-tag hint for context.
462
+ #
463
+ # @param node1 [Object, nil] First node (nil if removed)
464
+ # @param node2 [Object, nil] Second node (nil if added)
465
+ # @param use_color [Boolean] Whether to apply ANSI colours
466
+ # @return [Array<String>] Tuple of [detail1, detail2, changes]
467
+ def self.format_text_content_one_sided(node1, node2, use_color)
468
+ require_relative "color_helper"
469
+ require_relative "node_utils"
470
+ require_relative "text_utils"
471
+
472
+ present = node1 || node2
473
+
474
+ # Defensive: if a one-sided text-content diff carries an
475
+ # *element* on the present side (e.g. because an upstream
476
+ # comparator misclassified an element orphan as
477
+ # :text_content), delegate to the element-structure
478
+ # formatter rather than rendering the element as +text ""+.
479
+ # The construction-side fix in lutaml/canon#125 follow-up
480
+ # removes the immediate failure mode, but other paths could
481
+ # still misclassify and the formatter must produce a
482
+ # best-effort element representation, never +text ""+.
483
+ if present_is_element?(present)
484
+ return format_element_structure_details(
485
+ { node1: node1, node2: node2 }, use_color
486
+ )
487
+ end
488
+
489
+ removed = node2.nil?
490
+
491
+ raw = NodeUtils.raw_text_value(present)
492
+ visible = TextUtils.visualize_whitespace(raw)
493
+ parent = NodeUtils.parent_of(present)
494
+ context = parent ? " in #{NodeUtils.serialize_open_tag(parent)}" : ""
495
+ present_str = "text \"#{visible}\"#{context}"
496
+
497
+ if removed
498
+ detail1 = ColorHelper.colorize(present_str, :red, use_color)
499
+ detail2 = ColorHelper.colorize("(not present)", :green, use_color)
500
+ changes = "Text removed: #{detail1}"
501
+ else
502
+ detail1 = ColorHelper.colorize("(not present)", :red, use_color)
503
+ detail2 = ColorHelper.colorize(present_str, :green, use_color)
504
+ changes = "Text added: #{detail2}"
505
+ end
506
+
507
+ [detail1, detail2, changes]
508
+ end
509
+
387
510
  # Format structural whitespace differences
388
511
  #
512
+ # Format a :whitespace_adjacency diff (#137).
513
+ #
514
+ # @param diff [DiffNode, Hash] Difference node
515
+ # @param use_color [Boolean] Whether to use colors
516
+ # @return [Array] Tuple of [detail1, detail2, changes]
517
+ def self.format_whitespace_adjacency_details(diff, use_color)
518
+ require_relative "color_helper"
519
+ require_relative "node_utils"
520
+ require_relative "text_utils"
521
+
522
+ node1 = extract_node1(diff)
523
+ node2 = extract_node2(diff)
524
+
525
+ text1 = NodeUtils.get_node_text(node1).to_s
526
+ text2 = NodeUtils.get_node_text(node2).to_s
527
+
528
+ detail1 = ColorHelper.colorize(
529
+ "\"#{TextUtils.visualize_whitespace(text1)}\"",
530
+ :red, use_color
531
+ )
532
+ detail2 = ColorHelper.colorize(
533
+ "\"#{TextUtils.visualize_whitespace(text2)}\"",
534
+ :green, use_color
535
+ )
536
+
537
+ reason = if diff.is_a?(Canon::Diff::DiffNode)
538
+ diff.reason
539
+ else
540
+ diff.is_a?(Hash) ? diff[:reason] : nil
541
+ end
542
+ changes = reason.to_s
543
+
544
+ [detail1, detail2, changes]
545
+ end
546
+
389
547
  # @param diff [DiffNode, Hash] Difference node
390
548
  # @param use_color [Boolean] Whether to use colors
391
549
  # @return [Array] Tuple of [detail1, detail2, changes]
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "nokogiri"
3
4
  require_relative "../../xml/namespace_helper"
4
5
 
5
6
  module Canon
@@ -260,12 +261,15 @@ module Canon
260
261
  end
261
262
  end
262
263
 
263
- # Serialize a Canon Xml node tree as compact XML for display.
264
+ # Serialize a node tree as compact XML for display.
264
265
  #
265
266
  # Produces a human-readable inline XML string without namespace
266
267
  # declarations and without indentation — suitable for use in Semantic
267
- # Diff Report entries. Only handles Canon::Xml::Nodes types; for any
268
- # other node (Nokogiri, etc.) falls back to +get_node_text+.
268
+ # Diff Report entries. Handles both +Canon::Xml::Nodes+ types and
269
+ # Nokogiri XML/HTML nodes (the html DOM comparison path uses
270
+ # Nokogiri nodes, so element-structure diffs originating there must
271
+ # be rendered structurally too — see issue #120). For any other
272
+ # node type, falls back to +get_node_text+.
269
273
  #
270
274
  # @param node [Object] Node to serialize
271
275
  # @return [String] Compact XML string
@@ -294,12 +298,79 @@ module Canon
294
298
  when Canon::Xml::Nodes::CommentNode
295
299
  text = node.respond_to?(:value) ? node.value.to_s : ""
296
300
  "<!--#{CGI.escapeHTML(text)}-->"
301
+ when Nokogiri::XML::Text, Nokogiri::XML::CDATA
302
+ CGI.escapeHTML(node.content.to_s)
303
+ when Nokogiri::XML::Comment
304
+ "<!--#{CGI.escapeHTML(node.content.to_s)}-->"
305
+ when Nokogiri::XML::Element
306
+ tag = node.name.to_s
307
+ attrs = node.attribute_nodes.map do |a|
308
+ " #{a.name}=\"#{CGI.escapeHTML(a.value.to_s)}\""
309
+ end.join
310
+ children_xml = node.children.map do |c|
311
+ serialize_node_compact(c)
312
+ end.join
313
+ if children_xml.empty?
314
+ "<#{tag}#{attrs}/>"
315
+ else
316
+ "<#{tag}#{attrs}>#{children_xml}</#{tag}>"
317
+ end
297
318
  else
298
- # Nokogiri nodes or other unknown types — fall back to text extraction
319
+ # Unknown node types — fall back to text extraction
299
320
  get_node_text(node)
300
321
  end
301
322
  end
302
323
 
324
+ # Serialize a node's open tag only — name + attributes, no children,
325
+ # no closing tag. Used by +format_text_content_one_sided+ to render
326
+ # a brief parent-element context hint (e.g. +<div id="A">+) for a
327
+ # one-sided text diff, instead of the full ancestor subtree that
328
+ # +serialize_node_compact+ would produce. See lutaml/canon#125.
329
+ #
330
+ # @param node [Object] Element node to serialize
331
+ # @return [String] Open-tag string, or "" for non-elements / nil
332
+ def self.serialize_open_tag(node)
333
+ require "cgi"
334
+ return "" unless node
335
+
336
+ case node
337
+ when Canon::Xml::Nodes::ElementNode
338
+ tag = node.name.to_s
339
+ attrs = node.attribute_nodes.map do |attr|
340
+ " #{attr.name}=\"#{CGI.escapeHTML(attr.value.to_s)}\""
341
+ end.join
342
+ "<#{tag}#{attrs}>"
343
+ when Nokogiri::XML::Element
344
+ tag = node.name.to_s
345
+ attrs = node.attribute_nodes.map do |a|
346
+ " #{a.name}=\"#{CGI.escapeHTML(a.value.to_s)}\""
347
+ end.join
348
+ "<#{tag}#{attrs}>"
349
+ else
350
+ ""
351
+ end
352
+ end
353
+
354
+ # Return the raw text content of a text node without stripping
355
+ # whitespace. +get_node_text+ strips ASCII whitespace, which
356
+ # destroys whitespace-only payloads that callers (e.g. one-sided
357
+ # text-content diff rendering) need to display verbatim.
358
+ #
359
+ # @param node [Object] Text node
360
+ # @return [String] Raw text content, or "" if not a text-bearing node
361
+ def self.raw_text_value(node)
362
+ return "" unless node
363
+
364
+ case node
365
+ when Canon::Xml::Node
366
+ node.value.to_s
367
+ when Nokogiri::XML::Node
368
+ node.content.to_s
369
+ else
370
+ ""
371
+ end
372
+ end
373
+
303
374
  # Return the best display string for a node.
304
375
  #
305
376
  # When +compact: true+ and the node is a Canon ElementNode, returns a
@@ -318,6 +389,23 @@ module Canon
318
389
  end
319
390
  end
320
391
 
392
+ # Return the parent of a node, or nil, regardless of the node API.
393
+ #
394
+ # Canon::Xml nodes expose +parent+; some Nokogiri-shaped nodes expose
395
+ # +parent_node+. This helper abstracts over both.
396
+ #
397
+ # @param node [Object] Node to query
398
+ # @return [Object, nil] Parent node or nil
399
+ def self.parent_of(node)
400
+ return nil unless node
401
+
402
+ if node.respond_to?(:parent)
403
+ node.parent
404
+ elsif node.respond_to?(:parent_node)
405
+ node.parent_node
406
+ end
407
+ end
408
+
321
409
  # Check if node is inside a preserve-whitespace element
322
410
  #
323
411
  # @param node [Object] Node to check
@@ -83,6 +83,35 @@ module Canon
83
83
  end.join
84
84
  end
85
85
 
86
+ # Whether two text values would be visually indistinguishable when
87
+ # rendered through the standard JSON-quoting path.
88
+ #
89
+ # Covers three cases that collapse to near-identical short strings
90
+ # like +""+ / +" "+ / +":"+ / +":"+:
91
+ # * both sides empty
92
+ # * both sides whitespace-only (possibly with different whitespace
93
+ # that JSON.generate preserves verbatim but a reader cannot tell
94
+ # apart from plain spaces)
95
+ # * both sides equal (the comparator reported a diff based on
96
+ # something the text-only extraction does not surface — e.g. a
97
+ # sibling text node that exists on one side and not the other)
98
+ #
99
+ # Callers should fall back to rendering parent-element context
100
+ # instead.
101
+ #
102
+ # @param text1 [String, nil]
103
+ # @param text2 [String, nil]
104
+ # @return [Boolean]
105
+ def self.ambiguous_text_pair?(text1, text2)
106
+ blank_or_whitespace = ->(t) {
107
+ t.nil? || t.empty? || t.match?(/\A\s+\z/)
108
+ }
109
+ return true if blank_or_whitespace.call(text1) &&
110
+ blank_or_whitespace.call(text2)
111
+
112
+ text1 == text2
113
+ end
114
+
86
115
  # Check if text contains non-ASCII or non-printable characters
87
116
  #
88
117
  # @param text [String] Text to check
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "paint"
4
+ require "diff/lcs"
5
+
6
+ module Canon
7
+ class DiffFormatter
8
+ # Handles the pretty_diff rendering pipeline for text-LCS diffs.
9
+ #
10
+ # Bypasses DiffNodeMapper entirely — runs Diff::LCS.sdiff on plain-text
11
+ # lines and renders with context windowing and colorization.
12
+ class PrettyDiffFormatter
13
+ def initialize(use_color:, context_lines:)
14
+ @use_color = use_color
15
+ @context_lines = context_lines
16
+ end
17
+
18
+ # Format a text-LCS diff between two documents.
19
+ #
20
+ # @param doc1 [String, nil] First document (already preprocessed)
21
+ # @param doc2 [String, nil] Second document (already preprocessed)
22
+ # @param format [Symbol] Document format for display name
23
+ # @return [String] Formatted diff output
24
+ def format(doc1, doc2, format:)
25
+ format_name = format.to_s.upcase
26
+
27
+ output = []
28
+ output << colorize("Pretty diff (#{format_name} mode):", :cyan, :bold)
29
+
30
+ return output.join("\n") if doc1.nil? || doc2.nil?
31
+
32
+ lines1 = doc1.lines.map(&:chomp)
33
+ lines2 = doc2.lines.map(&:chomp)
34
+
35
+ hunks = ::Diff::LCS.sdiff(lines1, lines2)
36
+
37
+ output << render_pretty_diff(hunks)
38
+ output.join("\n")
39
+ end
40
+
41
+ private
42
+
43
+ # Render sdiff hunks with context windowing and colorization.
44
+ #
45
+ # Uses context_lines setting for expansion. Changed hunks
46
+ # (action != "=") are expanded by context_lines in each direction;
47
+ # nearby windows are merged; a separator is emitted between
48
+ # non-adjacent blocks.
49
+ #
50
+ # @param hunks [Array<Diff::LCS::ContextChange>] Output of Diff::LCS.sdiff
51
+ # @return [String] Rendered diff lines joined with "\n"
52
+ def render_pretty_diff(hunks)
53
+ changed = hunks.each_index.reject { |i| hunks[i].action == "=" }
54
+
55
+ return colorize(" (no differences)", :green) if changed.empty?
56
+
57
+ ctx = [@context_lines || 3, 0].max
58
+
59
+ windows = changed.map do |pos|
60
+ [
61
+ [pos - ctx, 0].max,
62
+ [pos + ctx, hunks.length - 1].min,
63
+ ]
64
+ end
65
+
66
+ merged = []
67
+ windows.each do |lo, hi|
68
+ if merged.empty? || lo > merged.last[1] + 1
69
+ merged << [lo, hi]
70
+ else
71
+ merged.last[1] = [merged.last[1], hi].max
72
+ end
73
+ end
74
+
75
+ lines = []
76
+ merged.each_with_index do |(lo, hi), block_idx|
77
+ if block_idx.positive?
78
+ lines << colorize("--- ---", :cyan)
79
+ elsif lo.positive?
80
+ lines << colorize("--- ---", :cyan)
81
+ end
82
+
83
+ (lo..hi).each do |i|
84
+ hunk = hunks[i]
85
+ case hunk.action
86
+ when "="
87
+ lines << (@use_color ? "\e[0m #{hunk.old_element}" : " #{hunk.old_element}")
88
+ when "-"
89
+ lines << colorize("- #{hunk.old_element}", :red)
90
+ when "+"
91
+ lines << colorize("+ #{hunk.new_element}", :green)
92
+ when "!"
93
+ lines << colorize("- #{hunk.old_element}", :red)
94
+ lines << colorize("+ #{hunk.new_element}", :green)
95
+ end
96
+ end
97
+ end
98
+
99
+ lines.join("\n")
100
+ end
101
+
102
+ def colorize(text, *colors)
103
+ return text unless @use_color
104
+
105
+ "\e[0m#{Paint[text, *colors]}"
106
+ end
107
+ end
108
+ end
109
+ end