canon 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +31 -149
- data/README.adoc +9 -0
- data/docs/advanced/semantic-diff-report.adoc +31 -0
- data/docs/features/configuration-profiles.adoc +4 -2
- data/docs/features/match-options/html-policies.adoc +2 -0
- data/docs/features/match-options/index.adoc +40 -0
- data/docs/guides/choosing-configuration.adoc +12 -1
- data/docs/reference/cli-options.adoc +3 -0
- data/docs/reference/options-across-interfaces.adoc +7 -1
- data/docs/understanding/formats/html.adoc +9 -2
- data/lib/canon/cli.rb +4 -0
- data/lib/canon/commands/diff_command.rb +1 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +92 -11
- data/lib/canon/comparison/markup_comparator.rb +19 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
- data/lib/canon/comparison/match_options.rb +23 -2
- data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +6 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
- data/lib/canon/comparison/xml_comparator.rb +80 -4
- data/lib/canon/comparison/xml_node_comparison.rb +29 -3
- data/lib/canon/comparison.rb +84 -22
- data/lib/canon/config/env_schema.rb +2 -1
- data/lib/canon/config/profiles/metanorma.yml +3 -0
- data/lib/canon/config.rb +51 -5
- data/lib/canon/diff/diff_classifier.rb +18 -2
- data/lib/canon/diff/diff_line_builder.rb +9 -8
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
- data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
- data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +65 -17
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +17 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
- data/lib/canon/diff_formatter.rb +57 -173
- data/lib/canon/html/data_model.rb +10 -4
- data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +59 -5
- data/lib/canon/xml/element_matcher.rb +3 -0
- data/lib/canon/xml/node.rb +8 -1
- data/lib/canon/xml/nodes/comment_node.rb +4 -0
- data/lib/canon/xml/nodes/element_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/canon/xml/sax_builder.rb +11 -2
- data/lib/canon/xml/xpath_engine.rb +238 -0
- metadata +6 -2
|
@@ -13,8 +13,10 @@ module Canon
|
|
|
13
13
|
# @param use_color [Boolean] Whether to use colors
|
|
14
14
|
# @param compact [Boolean] Whether to serialize element nodes as compact XML
|
|
15
15
|
# @return [String] Formatted dimension details
|
|
16
|
+
# rubocop:disable Lint/UnusedMethodArgument
|
|
16
17
|
def self.format_dimension_details(diff, use_color, compact: false,
|
|
17
18
|
expand_difference: false)
|
|
19
|
+
# rubocop:enable Lint/UnusedMethodArgument
|
|
18
20
|
dimension = extract_dimension(diff)
|
|
19
21
|
|
|
20
22
|
case dimension
|
|
@@ -23,8 +25,7 @@ expand_difference: false)
|
|
|
23
25
|
when :namespace_declarations
|
|
24
26
|
format_namespace_declarations_details(diff, use_color)
|
|
25
27
|
when :element_structure
|
|
26
|
-
format_element_structure_details(diff, use_color
|
|
27
|
-
expand_difference: expand_difference)
|
|
28
|
+
format_element_structure_details(diff, use_color)
|
|
28
29
|
when :attribute_presence
|
|
29
30
|
format_attribute_presence_details(diff, use_color)
|
|
30
31
|
when :attribute_values
|
|
@@ -163,37 +164,70 @@ expand_difference: false)
|
|
|
163
164
|
|
|
164
165
|
# Format element structure differences
|
|
165
166
|
#
|
|
167
|
+
# Produces compact XML for both sides so the user can see attributes
|
|
168
|
+
# and text content, not just the tag name. Handles nil nodes that
|
|
169
|
+
# arise from insertions/deletions.
|
|
170
|
+
#
|
|
166
171
|
# @param diff [DiffNode, Hash] Difference node
|
|
167
172
|
# @param use_color [Boolean] Whether to use colors
|
|
168
173
|
# @return [Array] Tuple of [detail1, detail2, changes]
|
|
169
|
-
def self.format_element_structure_details(diff, use_color
|
|
170
|
-
expand_difference: false)
|
|
174
|
+
def self.format_element_structure_details(diff, use_color)
|
|
171
175
|
require_relative "color_helper"
|
|
172
176
|
require_relative "node_utils"
|
|
173
177
|
|
|
174
178
|
node1 = extract_node1(diff)
|
|
175
179
|
node2 = extract_node2(diff)
|
|
176
180
|
|
|
177
|
-
|
|
178
|
-
|
|
181
|
+
has1 = !node1.nil?
|
|
182
|
+
has2 = !node2.nil?
|
|
183
|
+
|
|
184
|
+
if has1 && has2
|
|
185
|
+
# Both elements present — show compact XML for both
|
|
186
|
+
compact1 = NodeUtils.serialize_node_compact(node1)
|
|
187
|
+
compact2 = NodeUtils.serialize_node_compact(node2)
|
|
188
|
+
detail1 = ColorHelper.colorize(compact1, :red, use_color)
|
|
189
|
+
detail2 = ColorHelper.colorize(compact2, :green, use_color)
|
|
190
|
+
|
|
191
|
+
name1 = NodeUtils.get_element_name_for_display(node1)
|
|
192
|
+
name2 = NodeUtils.get_element_name_for_display(node2)
|
|
179
193
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
194
|
+
changes = if name1 == name2
|
|
195
|
+
"Element <#{name1}> structure changed (children differ)"
|
|
196
|
+
else
|
|
197
|
+
build_structure_change_text(compact1, compact2,
|
|
198
|
+
use_color)
|
|
199
|
+
end
|
|
200
|
+
elsif has1
|
|
201
|
+
# Element removed
|
|
202
|
+
compact1 = NodeUtils.serialize_node_compact(node1)
|
|
203
|
+
detail1 = ColorHelper.colorize(compact1, :red, use_color)
|
|
204
|
+
detail2 = ColorHelper.colorize("(not present)", :green, use_color)
|
|
205
|
+
changes = "Element removed: #{ColorHelper.colorize(compact1, :red,
|
|
206
|
+
use_color)}"
|
|
185
207
|
else
|
|
186
|
-
|
|
187
|
-
|
|
208
|
+
# Element added
|
|
209
|
+
compact2 = NodeUtils.serialize_node_compact(node2)
|
|
210
|
+
detail1 = ColorHelper.colorize("(not present)", :red, use_color)
|
|
211
|
+
detail2 = ColorHelper.colorize(compact2, :green, use_color)
|
|
212
|
+
changes = "Element added: #{ColorHelper.colorize(compact2, :green,
|
|
213
|
+
use_color)}"
|
|
188
214
|
end
|
|
189
215
|
|
|
190
|
-
changes = "Element differs: #{ColorHelper.colorize(name1, :red,
|
|
191
|
-
use_color)} → " \
|
|
192
|
-
"#{ColorHelper.colorize(name2, :green, use_color)}"
|
|
193
|
-
|
|
194
216
|
[detail1, detail2, changes]
|
|
195
217
|
end
|
|
196
218
|
|
|
219
|
+
# Build human-readable change text for element structure diffs
|
|
220
|
+
#
|
|
221
|
+
# @param display1 [String] Serialized expected element
|
|
222
|
+
# @param display2 [String] Serialized actual element
|
|
223
|
+
# @param use_color [Boolean] Whether to use colors
|
|
224
|
+
# @return [String] Change description
|
|
225
|
+
def self.build_structure_change_text(display1, display2, use_color)
|
|
226
|
+
"Element structure changed: " \
|
|
227
|
+
"#{ColorHelper.colorize(display1, :red, use_color)} → " \
|
|
228
|
+
"#{ColorHelper.colorize(display2, :green, use_color)}"
|
|
229
|
+
end
|
|
230
|
+
|
|
197
231
|
# Format attribute presence differences
|
|
198
232
|
#
|
|
199
233
|
# @param diff [DiffNode, Hash] Difference node
|
|
@@ -351,6 +385,20 @@ expand_difference: false)
|
|
|
351
385
|
detail2 = ColorHelper.colorize(
|
|
352
386
|
TextUtils.visualize_whitespace(text2), :green, use_color
|
|
353
387
|
)
|
|
388
|
+
elsif TextUtils.ambiguous_text_pair?(text1, text2) &&
|
|
389
|
+
(NodeUtils.parent_of(node1) || NodeUtils.parent_of(node2))
|
|
390
|
+
# Both sides render to empty/whitespace-only strings, which are
|
|
391
|
+
# indistinguishable after JSON quoting. Fall back to each side's
|
|
392
|
+
# parent element serialized compactly, with whitespace visualized
|
|
393
|
+
# so the reader can see the structural contrast.
|
|
394
|
+
ctx1 = NodeUtils.serialize_node_compact(NodeUtils.parent_of(node1))
|
|
395
|
+
ctx2 = NodeUtils.serialize_node_compact(NodeUtils.parent_of(node2))
|
|
396
|
+
detail1 = ColorHelper.colorize(
|
|
397
|
+
TextUtils.visualize_whitespace(ctx1), :red, use_color
|
|
398
|
+
)
|
|
399
|
+
detail2 = ColorHelper.colorize(
|
|
400
|
+
TextUtils.visualize_whitespace(ctx2), :green, use_color
|
|
401
|
+
)
|
|
354
402
|
elsif compact && (node1.is_a?(Canon::Xml::Nodes::ElementNode) ||
|
|
355
403
|
node2.is_a?(Canon::Xml::Nodes::ElementNode))
|
|
356
404
|
# In compact mode with element nodes, display as raw XML without
|
|
@@ -318,6 +318,23 @@ module Canon
|
|
|
318
318
|
end
|
|
319
319
|
end
|
|
320
320
|
|
|
321
|
+
# Return the parent of a node, or nil, regardless of the node API.
|
|
322
|
+
#
|
|
323
|
+
# Canon::Xml nodes expose +parent+; some Nokogiri-shaped nodes expose
|
|
324
|
+
# +parent_node+. This helper abstracts over both.
|
|
325
|
+
#
|
|
326
|
+
# @param node [Object] Node to query
|
|
327
|
+
# @return [Object, nil] Parent node or nil
|
|
328
|
+
def self.parent_of(node)
|
|
329
|
+
return nil unless node
|
|
330
|
+
|
|
331
|
+
if node.respond_to?(:parent)
|
|
332
|
+
node.parent
|
|
333
|
+
elsif node.respond_to?(:parent_node)
|
|
334
|
+
node.parent_node
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
|
|
321
338
|
# Check if node is inside a preserve-whitespace element
|
|
322
339
|
#
|
|
323
340
|
# @param node [Object] Node to check
|
|
@@ -83,6 +83,35 @@ module Canon
|
|
|
83
83
|
end.join
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
# Whether two text values would be visually indistinguishable when
|
|
87
|
+
# rendered through the standard JSON-quoting path.
|
|
88
|
+
#
|
|
89
|
+
# Covers three cases that collapse to near-identical short strings
|
|
90
|
+
# like +""+ / +" "+ / +":"+ / +":"+:
|
|
91
|
+
# * both sides empty
|
|
92
|
+
# * both sides whitespace-only (possibly with different whitespace
|
|
93
|
+
# that JSON.generate preserves verbatim but a reader cannot tell
|
|
94
|
+
# apart from plain spaces)
|
|
95
|
+
# * both sides equal (the comparator reported a diff based on
|
|
96
|
+
# something the text-only extraction does not surface — e.g. a
|
|
97
|
+
# sibling text node that exists on one side and not the other)
|
|
98
|
+
#
|
|
99
|
+
# Callers should fall back to rendering parent-element context
|
|
100
|
+
# instead.
|
|
101
|
+
#
|
|
102
|
+
# @param text1 [String, nil]
|
|
103
|
+
# @param text2 [String, nil]
|
|
104
|
+
# @return [Boolean]
|
|
105
|
+
def self.ambiguous_text_pair?(text1, text2)
|
|
106
|
+
blank_or_whitespace = ->(t) {
|
|
107
|
+
t.nil? || t.empty? || t.match?(/\A\s+\z/)
|
|
108
|
+
}
|
|
109
|
+
return true if blank_or_whitespace.call(text1) &&
|
|
110
|
+
blank_or_whitespace.call(text2)
|
|
111
|
+
|
|
112
|
+
text1 == text2
|
|
113
|
+
end
|
|
114
|
+
|
|
86
115
|
# Check if text contains non-ASCII or non-printable characters
|
|
87
116
|
#
|
|
88
117
|
# @param text [String] Text to check
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "paint"
|
|
4
|
+
require "diff/lcs"
|
|
5
|
+
|
|
6
|
+
module Canon
|
|
7
|
+
class DiffFormatter
|
|
8
|
+
# Handles the pretty_diff rendering pipeline for text-LCS diffs.
|
|
9
|
+
#
|
|
10
|
+
# Bypasses DiffNodeMapper entirely — runs Diff::LCS.sdiff on plain-text
|
|
11
|
+
# lines and renders with context windowing and colorization.
|
|
12
|
+
class PrettyDiffFormatter
|
|
13
|
+
def initialize(use_color:, context_lines:)
|
|
14
|
+
@use_color = use_color
|
|
15
|
+
@context_lines = context_lines
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Format a text-LCS diff between two documents.
|
|
19
|
+
#
|
|
20
|
+
# @param doc1 [String, nil] First document (already preprocessed)
|
|
21
|
+
# @param doc2 [String, nil] Second document (already preprocessed)
|
|
22
|
+
# @param format [Symbol] Document format for display name
|
|
23
|
+
# @return [String] Formatted diff output
|
|
24
|
+
def format(doc1, doc2, format:)
|
|
25
|
+
format_name = format.to_s.upcase
|
|
26
|
+
|
|
27
|
+
output = []
|
|
28
|
+
output << colorize("Pretty diff (#{format_name} mode):", :cyan, :bold)
|
|
29
|
+
|
|
30
|
+
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
31
|
+
|
|
32
|
+
lines1 = doc1.lines.map(&:chomp)
|
|
33
|
+
lines2 = doc2.lines.map(&:chomp)
|
|
34
|
+
|
|
35
|
+
hunks = ::Diff::LCS.sdiff(lines1, lines2)
|
|
36
|
+
|
|
37
|
+
output << render_pretty_diff(hunks)
|
|
38
|
+
output.join("\n")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Render sdiff hunks with context windowing and colorization.
|
|
44
|
+
#
|
|
45
|
+
# Uses context_lines setting for expansion. Changed hunks
|
|
46
|
+
# (action != "=") are expanded by context_lines in each direction;
|
|
47
|
+
# nearby windows are merged; a separator is emitted between
|
|
48
|
+
# non-adjacent blocks.
|
|
49
|
+
#
|
|
50
|
+
# @param hunks [Array<Diff::LCS::ContextChange>] Output of Diff::LCS.sdiff
|
|
51
|
+
# @return [String] Rendered diff lines joined with "\n"
|
|
52
|
+
def render_pretty_diff(hunks)
|
|
53
|
+
changed = hunks.each_index.reject { |i| hunks[i].action == "=" }
|
|
54
|
+
|
|
55
|
+
return colorize(" (no differences)", :green) if changed.empty?
|
|
56
|
+
|
|
57
|
+
ctx = [@context_lines || 3, 0].max
|
|
58
|
+
|
|
59
|
+
windows = changed.map do |pos|
|
|
60
|
+
[
|
|
61
|
+
[pos - ctx, 0].max,
|
|
62
|
+
[pos + ctx, hunks.length - 1].min,
|
|
63
|
+
]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
merged = []
|
|
67
|
+
windows.each do |lo, hi|
|
|
68
|
+
if merged.empty? || lo > merged.last[1] + 1
|
|
69
|
+
merged << [lo, hi]
|
|
70
|
+
else
|
|
71
|
+
merged.last[1] = [merged.last[1], hi].max
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
lines = []
|
|
76
|
+
merged.each_with_index do |(lo, hi), block_idx|
|
|
77
|
+
if block_idx.positive?
|
|
78
|
+
lines << colorize("--- ---", :cyan)
|
|
79
|
+
elsif lo.positive?
|
|
80
|
+
lines << colorize("--- ---", :cyan)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
(lo..hi).each do |i|
|
|
84
|
+
hunk = hunks[i]
|
|
85
|
+
case hunk.action
|
|
86
|
+
when "="
|
|
87
|
+
lines << (@use_color ? "\e[0m #{hunk.old_element}" : " #{hunk.old_element}")
|
|
88
|
+
when "-"
|
|
89
|
+
lines << colorize("- #{hunk.old_element}", :red)
|
|
90
|
+
when "+"
|
|
91
|
+
lines << colorize("+ #{hunk.new_element}", :green)
|
|
92
|
+
when "!"
|
|
93
|
+
lines << colorize("- #{hunk.old_element}", :red)
|
|
94
|
+
lines << colorize("+ #{hunk.new_element}", :green)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
lines.join("\n")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def colorize(text, *colors)
|
|
103
|
+
return text unless @use_color
|
|
104
|
+
|
|
105
|
+
"\e[0m#{Paint[text, *colors]}"
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
data/lib/canon/diff_formatter.rb
CHANGED
|
@@ -7,6 +7,9 @@ require_relative "diff/diff_block"
|
|
|
7
7
|
require_relative "diff/diff_context"
|
|
8
8
|
require_relative "diff/diff_report"
|
|
9
9
|
require_relative "diff_formatter/debug_output"
|
|
10
|
+
require_relative "diff_formatter/by_line_formatter"
|
|
11
|
+
require_relative "diff_formatter/by_object_formatter"
|
|
12
|
+
require_relative "diff_formatter/pretty_diff_formatter"
|
|
10
13
|
|
|
11
14
|
module Canon
|
|
12
15
|
# Formatter for displaying semantic differences with color support
|
|
@@ -301,17 +304,26 @@ module Canon
|
|
|
301
304
|
# @param html_version [Symbol, nil] HTML version (:html4 or :html5)
|
|
302
305
|
# @return [String] Formatted output
|
|
303
306
|
def format(differences, format, doc1: nil, doc2: nil, html_version: nil)
|
|
304
|
-
# In by-line mode, always use by-line diff
|
|
307
|
+
# In by-line mode with both docs present, always use by-line diff
|
|
305
308
|
if @mode == :by_line && doc1 && doc2
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
+
doc1, doc2 = apply_display_preprocessing(doc1, doc2, format)
|
|
310
|
+
# rubocop:disable Layout/HashAlignment
|
|
311
|
+
return by_line_formatter.format(doc1, doc2, format: format,
|
|
312
|
+
html_version: html_version,
|
|
313
|
+
differences: differences)
|
|
314
|
+
# rubocop:enable Layout/HashAlignment
|
|
309
315
|
end
|
|
310
316
|
|
|
311
317
|
# In pretty_diff mode, always use text-LCS diff (bypasses DiffNodeMapper).
|
|
312
|
-
# pretty_diff_format handles nil doc1/doc2 itself (emits header only).
|
|
313
318
|
if @mode == :pretty_diff
|
|
314
|
-
|
|
319
|
+
d1, d2 = if doc1 && doc2
|
|
320
|
+
apply_display_preprocessing(doc1, doc2,
|
|
321
|
+
format)
|
|
322
|
+
else
|
|
323
|
+
[doc1,
|
|
324
|
+
doc2]
|
|
325
|
+
end
|
|
326
|
+
return pretty_diff_formatter.format(d1, d2, format: format)
|
|
315
327
|
end
|
|
316
328
|
|
|
317
329
|
no_diffs = if differences.respond_to?(:equivalent?)
|
|
@@ -323,12 +335,26 @@ module Canon
|
|
|
323
335
|
|
|
324
336
|
case @mode
|
|
325
337
|
when :by_line
|
|
326
|
-
|
|
327
|
-
|
|
338
|
+
if doc1 && doc2
|
|
339
|
+
doc1, doc2 = apply_display_preprocessing(doc1, doc2,
|
|
340
|
+
format)
|
|
341
|
+
end
|
|
342
|
+
# rubocop:disable Layout/HashAlignment
|
|
343
|
+
by_line_formatter.format(doc1, doc2, format: format,
|
|
344
|
+
html_version: html_version,
|
|
345
|
+
differences: differences)
|
|
346
|
+
# rubocop:enable Layout/HashAlignment
|
|
328
347
|
when :pretty_diff
|
|
329
|
-
|
|
348
|
+
d1, d2 = if doc1 && doc2
|
|
349
|
+
apply_display_preprocessing(doc1, doc2,
|
|
350
|
+
format)
|
|
351
|
+
else
|
|
352
|
+
[doc1,
|
|
353
|
+
doc2]
|
|
354
|
+
end
|
|
355
|
+
pretty_diff_formatter.format(d1, d2, format: format)
|
|
330
356
|
else
|
|
331
|
-
|
|
357
|
+
by_object_formatter.format(differences, format)
|
|
332
358
|
end
|
|
333
359
|
end
|
|
334
360
|
|
|
@@ -685,10 +711,8 @@ module Canon
|
|
|
685
711
|
# false disables all visualization
|
|
686
712
|
return {} if character_visualization == false
|
|
687
713
|
|
|
688
|
-
# :content_only
|
|
689
|
-
#
|
|
690
|
-
# keeping structural indentation whitespace plain.
|
|
691
|
-
# See docs/features/diff-formatting/character-visualization.adoc
|
|
714
|
+
# :content_only builds the full map; the by_line formatter applies
|
|
715
|
+
# it only to content portions, leaving structural indentation plain.
|
|
692
716
|
|
|
693
717
|
return visualization_map if visualization_map
|
|
694
718
|
|
|
@@ -723,177 +747,37 @@ module Canon
|
|
|
723
747
|
colorize("#{emoji}#{message}\n", :green, :bold)
|
|
724
748
|
end
|
|
725
749
|
|
|
726
|
-
#
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
# Extract differences array from ComparisonResult if needed
|
|
733
|
-
diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
|
|
734
|
-
differences.differences
|
|
735
|
-
else
|
|
736
|
-
differences
|
|
737
|
-
end
|
|
738
|
-
|
|
739
|
-
# Delegate to format-specific formatter
|
|
740
|
-
formatter = ByObject::BaseFormatter.for_format(
|
|
741
|
-
format,
|
|
750
|
+
# Factory methods for mode-specific formatters
|
|
751
|
+
|
|
752
|
+
# @return [ByLineFormatter]
|
|
753
|
+
def by_line_formatter
|
|
754
|
+
@by_line_formatter ||= ByLineFormatter.new(
|
|
742
755
|
use_color: @use_color,
|
|
743
756
|
visualization_map: @visualization_map,
|
|
757
|
+
context_lines: @context_lines,
|
|
758
|
+
diff_grouping_lines: @diff_grouping_lines,
|
|
744
759
|
show_diffs: @show_diffs,
|
|
760
|
+
character_visualization: @character_visualization,
|
|
761
|
+
legacy_terminal: @legacy_terminal,
|
|
762
|
+
diff_mode: @diff_mode,
|
|
745
763
|
)
|
|
746
|
-
|
|
747
|
-
output << formatter.format(diffs_array, format)
|
|
748
|
-
|
|
749
|
-
output.join("\n")
|
|
750
764
|
end
|
|
751
765
|
|
|
752
|
-
#
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
differences: [])
|
|
756
|
-
# For HTML format, use html_version if provided, otherwise default to :html4
|
|
757
|
-
if format == :html && html_version
|
|
758
|
-
format = html_version # Use :html4 or :html5
|
|
759
|
-
end
|
|
760
|
-
|
|
761
|
-
# Format display name for header
|
|
762
|
-
format_name = format.to_s.upcase
|
|
763
|
-
|
|
764
|
-
output = []
|
|
765
|
-
output << colorize("Line-by-line diff (#{format_name} mode):", :cyan,
|
|
766
|
-
:bold)
|
|
767
|
-
|
|
768
|
-
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
769
|
-
|
|
770
|
-
# Apply display preprocessing (format both sides identically before diff)
|
|
771
|
-
doc1, doc2 = apply_display_preprocessing(doc1, doc2, format)
|
|
772
|
-
# Extract differences array and equivalent status from ComparisonResult if needed
|
|
773
|
-
diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
|
|
774
|
-
@comparison_equivalent = differences.equivalent?
|
|
775
|
-
differences.differences
|
|
776
|
-
else
|
|
777
|
-
@comparison_equivalent = nil
|
|
778
|
-
differences
|
|
779
|
-
end
|
|
780
|
-
|
|
781
|
-
# Delegate to format-specific formatter
|
|
782
|
-
formatter = ByLine::BaseFormatter.for_format(
|
|
783
|
-
format,
|
|
766
|
+
# @return [ByObjectFormatter]
|
|
767
|
+
def by_object_formatter
|
|
768
|
+
@by_object_formatter ||= ByObjectFormatter.new(
|
|
784
769
|
use_color: @use_color,
|
|
785
|
-
context_lines: @context_lines,
|
|
786
|
-
diff_grouping_lines: @diff_grouping_lines,
|
|
787
770
|
visualization_map: @visualization_map,
|
|
788
771
|
show_diffs: @show_diffs,
|
|
789
|
-
differences: diffs_array,
|
|
790
|
-
diff_mode: @legacy_terminal ? :separate : @diff_mode,
|
|
791
|
-
legacy_terminal: @legacy_terminal,
|
|
792
|
-
equivalent: @comparison_equivalent,
|
|
793
772
|
)
|
|
794
|
-
|
|
795
|
-
output << formatter.format(doc1, doc2)
|
|
796
|
-
|
|
797
|
-
output.join("\n")
|
|
798
|
-
end
|
|
799
|
-
|
|
800
|
-
# Generate a text-LCS diff against preprocessed lines (pretty_diff mode).
|
|
801
|
-
#
|
|
802
|
-
# This mode bypasses DiffNodeMapper entirely: it applies display_preprocessing
|
|
803
|
-
# to both sides, then runs Diff::LCS.sdiff on the resulting plain-text lines.
|
|
804
|
-
# It is a reliable short-term workaround for #85 (normative changes invisible
|
|
805
|
-
# in :by_line mode when DiffNodeMapper's DOM-address correlation is off).
|
|
806
|
-
#
|
|
807
|
-
# Limitations:
|
|
808
|
-
# - show_diffs :normative / :informative filter is ignored (no DiffNodes)
|
|
809
|
-
# - No inline character highlighting (whole-line granularity only)
|
|
810
|
-
#
|
|
811
|
-
# @param doc1 [String] First document
|
|
812
|
-
# @param doc2 [String] Second document
|
|
813
|
-
# @param format [Symbol] Document format
|
|
814
|
-
# @return [String] Formatted diff output
|
|
815
|
-
def pretty_diff_format(doc1, doc2, format:)
|
|
816
|
-
require "diff/lcs"
|
|
817
|
-
|
|
818
|
-
resolved_format = format
|
|
819
|
-
|
|
820
|
-
format_name = resolved_format.to_s.upcase
|
|
821
|
-
output = []
|
|
822
|
-
output << colorize("Pretty diff (#{format_name} mode):", :cyan, :bold)
|
|
823
|
-
|
|
824
|
-
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
825
|
-
|
|
826
|
-
# Apply display preprocessing — same transforms as by_line_diff
|
|
827
|
-
d1, d2 = apply_display_preprocessing(doc1, doc2, resolved_format)
|
|
828
|
-
|
|
829
|
-
lines1 = d1.lines.map(&:chomp)
|
|
830
|
-
lines2 = d2.lines.map(&:chomp)
|
|
831
|
-
|
|
832
|
-
hunks = ::Diff::LCS.sdiff(lines1, lines2)
|
|
833
|
-
|
|
834
|
-
output << render_pretty_diff(hunks)
|
|
835
|
-
output.join("\n")
|
|
836
773
|
end
|
|
837
774
|
|
|
838
|
-
#
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
# @param hunks [Array<Diff::LCS::ContextChange>] Output of Diff::LCS.sdiff
|
|
845
|
-
# @return [String] Rendered diff lines joined with "\n"
|
|
846
|
-
def render_pretty_diff(hunks)
|
|
847
|
-
# Identify positions of changed hunks
|
|
848
|
-
changed = hunks.each_index.reject { |i| hunks[i].action == "=" }
|
|
849
|
-
|
|
850
|
-
return colorize(" (no differences)", :green) if changed.empty?
|
|
851
|
-
|
|
852
|
-
ctx = [@context_lines || 3, 0].max
|
|
853
|
-
|
|
854
|
-
# Build expanded windows, then merge overlapping/adjacent ones
|
|
855
|
-
windows = changed.map do |pos|
|
|
856
|
-
[
|
|
857
|
-
[pos - ctx, 0].max,
|
|
858
|
-
[pos + ctx, hunks.length - 1].min,
|
|
859
|
-
]
|
|
860
|
-
end
|
|
861
|
-
|
|
862
|
-
merged = []
|
|
863
|
-
windows.each do |lo, hi|
|
|
864
|
-
if merged.empty? || lo > merged.last[1] + 1
|
|
865
|
-
merged << [lo, hi]
|
|
866
|
-
else
|
|
867
|
-
merged.last[1] = [merged.last[1], hi].max
|
|
868
|
-
end
|
|
869
|
-
end
|
|
870
|
-
|
|
871
|
-
lines = []
|
|
872
|
-
merged.each_with_index do |(lo, hi), block_idx|
|
|
873
|
-
# Separator between non-adjacent blocks
|
|
874
|
-
if block_idx.positive?
|
|
875
|
-
lines << colorize("--- ---", :cyan)
|
|
876
|
-
elsif lo.positive?
|
|
877
|
-
lines << colorize("--- ---", :cyan)
|
|
878
|
-
end
|
|
879
|
-
|
|
880
|
-
(lo..hi).each do |i|
|
|
881
|
-
hunk = hunks[i]
|
|
882
|
-
case hunk.action
|
|
883
|
-
when "="
|
|
884
|
-
lines << (@use_color ? "\e[0m #{hunk.old_element}" : " #{hunk.old_element}")
|
|
885
|
-
when "-"
|
|
886
|
-
lines << colorize("- #{hunk.old_element}", :red)
|
|
887
|
-
when "+"
|
|
888
|
-
lines << colorize("+ #{hunk.new_element}", :green)
|
|
889
|
-
when "!"
|
|
890
|
-
lines << colorize("- #{hunk.old_element}", :red)
|
|
891
|
-
lines << colorize("+ #{hunk.new_element}", :green)
|
|
892
|
-
end
|
|
893
|
-
end
|
|
894
|
-
end
|
|
895
|
-
|
|
896
|
-
lines.join("\n")
|
|
775
|
+
# @return [PrettyDiffFormatter]
|
|
776
|
+
def pretty_diff_formatter
|
|
777
|
+
@pretty_diff_formatter ||= PrettyDiffFormatter.new(
|
|
778
|
+
use_color: @use_color,
|
|
779
|
+
context_lines: @context_lines,
|
|
780
|
+
)
|
|
897
781
|
end
|
|
898
782
|
|
|
899
783
|
# Apply display preprocessing to both documents before the line diff.
|
|
@@ -208,19 +208,25 @@ module Canon
|
|
|
208
208
|
|
|
209
209
|
# Build text node from Nokogiri text node
|
|
210
210
|
# HTML-specific: handles whitespace-sensitive elements (pre, code, textarea, script, style)
|
|
211
|
+
# and preserves whitespace between inline element siblings.
|
|
211
212
|
def self.build_text_node(nokogiri_text)
|
|
212
213
|
# Skip text nodes that are only whitespace between elements
|
|
213
214
|
# EXCEPT in whitespace-sensitive elements (pre, code, textarea, script, style)
|
|
214
|
-
#
|
|
215
|
+
# and when whitespace is between inline element siblings (semantically significant)
|
|
215
216
|
content = nokogiri_text.content
|
|
216
217
|
|
|
217
|
-
|
|
218
|
+
# NBSP (U+00A0) is never insignificant whitespace
|
|
219
|
+
if content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element) && !content.include?("\u00A0")
|
|
218
220
|
# Check if parent is whitespace-sensitive
|
|
219
221
|
parent_name = nokogiri_text.parent.name.downcase
|
|
220
222
|
whitespace_sensitive_tags = %w[pre code textarea script style]
|
|
221
223
|
|
|
222
|
-
#
|
|
223
|
-
|
|
224
|
+
# Check if whitespace is between inline siblings
|
|
225
|
+
require_relative "../comparison/whitespace_sensitivity"
|
|
226
|
+
unless whitespace_sensitive_tags.include?(parent_name) ||
|
|
227
|
+
Canon::Comparison::WhitespaceSensitivity.inline_whitespace_significant?(nokogiri_text)
|
|
228
|
+
return nil
|
|
229
|
+
end
|
|
224
230
|
end
|
|
225
231
|
|
|
226
232
|
# Nokogiri already handles CDATA conversion and entity resolution
|