canon 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +50 -26
- data/README.adoc +8 -3
- data/docs/advanced/diff-pipeline.adoc +36 -9
- data/docs/features/diff-formatting/colors-and-symbols.adoc +82 -0
- data/docs/features/diff-formatting/index.adoc +12 -0
- data/docs/features/diff-formatting/themes.adoc +353 -0
- data/docs/features/environment-configuration/index.adoc +23 -0
- data/docs/internals/diff-char-range-pipeline.adoc +249 -0
- data/docs/internals/diffnode-enrichment.adoc +1 -0
- data/docs/internals/index.adoc +52 -4
- data/docs/reference/environment-variables.adoc +6 -0
- data/docs/understanding/architecture.adoc +5 -0
- data/examples/show_themes.rb +217 -0
- data/lib/canon/comparison/comparison_result.rb +9 -4
- data/lib/canon/config/env_schema.rb +3 -1
- data/lib/canon/config.rb +11 -0
- data/lib/canon/diff/diff_block.rb +7 -0
- data/lib/canon/diff/diff_block_builder.rb +2 -2
- data/lib/canon/diff/diff_char_range.rb +140 -0
- data/lib/canon/diff/diff_line.rb +42 -4
- data/lib/canon/diff/diff_line_builder.rb +907 -0
- data/lib/canon/diff/diff_node.rb +5 -1
- data/lib/canon/diff/diff_node_enricher.rb +1418 -0
- data/lib/canon/diff/diff_node_mapper.rb +54 -0
- data/lib/canon/diff/source_locator.rb +105 -0
- data/lib/canon/diff/text_decomposer.rb +103 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +264 -24
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +35 -20
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +36 -19
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +33 -19
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +583 -98
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +36 -19
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +62 -13
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +59 -24
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +74 -34
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +4 -5
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +1 -1
- data/lib/canon/diff_formatter/legend.rb +4 -2
- data/lib/canon/diff_formatter/theme.rb +864 -0
- data/lib/canon/diff_formatter.rb +11 -6
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +16 -1
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +10 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +5 -1
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- metadata +11 -2
|
@@ -0,0 +1,907 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "diff_line"
|
|
4
|
+
require_relative "formatting_detector"
|
|
5
|
+
|
|
6
|
+
module Canon
|
|
7
|
+
module Diff
|
|
8
|
+
# Assembles DiffLines from enriched DiffNodes.
|
|
9
|
+
#
|
|
10
|
+
# This is Phase 2 of the two-phase diff pipeline. It runs after
|
|
11
|
+
# DiffNodeEnricher and before DiffBlockBuilder. It does NO computation
|
|
12
|
+
# on the change content — it simply reads pre-computed DiffCharRanges
|
|
13
|
+
# from DiffNodes and assembles them into DiffLines.
|
|
14
|
+
#
|
|
15
|
+
# The DiffLineBuilder handles:
|
|
16
|
+
# - Mapping DiffCharRanges to the correct DiffLines
|
|
17
|
+
# - Filling in unchanged context lines between changes
|
|
18
|
+
# - Detecting reflow (lines that moved between documents)
|
|
19
|
+
# - Computing line correspondence without LCS
|
|
20
|
+
class DiffLineBuilder
|
|
21
|
+
# Build DiffLines from enriched DiffNodes.
|
|
22
|
+
#
|
|
23
|
+
# @param diff_nodes [Array<DiffNode>] Enriched DiffNodes with char_ranges
|
|
24
|
+
# @param text1 [String] The first document (preprocessed)
|
|
25
|
+
# @param text2 [String] The second document (preprocessed)
|
|
26
|
+
# @return [Array<DiffLine>] The assembled diff lines
|
|
27
|
+
def self.build(diff_nodes, text1, text2)
|
|
28
|
+
return [] if diff_nodes.nil? || diff_nodes.empty?
|
|
29
|
+
return [] if text1.nil? || text2.nil?
|
|
30
|
+
|
|
31
|
+
new(diff_nodes, text1, text2).build
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def initialize(diff_nodes, text1, text2)
|
|
35
|
+
@diff_nodes = diff_nodes
|
|
36
|
+
@text1 = text1
|
|
37
|
+
@text2 = text2
|
|
38
|
+
@lines1 = text1.split("\n")
|
|
39
|
+
@lines2 = text2.split("\n")
|
|
40
|
+
# Build reverse indices for efficient content lookup in gap handling.
|
|
41
|
+
# Maps content string to array of line indices where that content appears.
|
|
42
|
+
@line_to_indices1 = build_line_index(@lines1)
|
|
43
|
+
@line_to_indices2 = build_line_index(@lines2)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Maximum number of reflow lines before switching to summary mode.
|
|
47
|
+
# When more lines than this are unmatched in a reflow gap, a summary
|
|
48
|
+
# line is emitted instead of listing each individual line.
|
|
49
|
+
REFLOW_SUMMARY_THRESHOLD = 2
|
|
50
|
+
|
|
51
|
+
def build
|
|
52
|
+
# Sort DiffNodes by their position in text1 (or text2 if no text1 range)
|
|
53
|
+
sorted = @diff_nodes.select do |dn|
|
|
54
|
+
dn.char_ranges && !dn.char_ranges.empty?
|
|
55
|
+
end
|
|
56
|
+
.sort_by { |dn| sort_key(dn) }
|
|
57
|
+
|
|
58
|
+
result = []
|
|
59
|
+
cursor1 = 0 # current position in text1 lines
|
|
60
|
+
cursor2 = 0 # current position in text2 lines
|
|
61
|
+
|
|
62
|
+
sorted.each do |diff_node|
|
|
63
|
+
range1 = diff_node.line_range_before
|
|
64
|
+
range2 = diff_node.line_range_after
|
|
65
|
+
|
|
66
|
+
# Determine the start positions for this change
|
|
67
|
+
node_start1 = range1 ? range1[0] : cursor1
|
|
68
|
+
node_start2 = range2 ? range2[0] : cursor2
|
|
69
|
+
|
|
70
|
+
# Skip if this node's range has already been passed by the cursor.
|
|
71
|
+
# Handle cases where range1 or range2 is nil (nil means position is only
|
|
72
|
+
# in the other text, so we only check the non-nil side).
|
|
73
|
+
cursor1_passed = range1.nil? ? false : (cursor1 > node_start1)
|
|
74
|
+
cursor2_passed = range2.nil? ? false : (cursor2 > node_start2)
|
|
75
|
+
if cursor1_passed || cursor2_passed
|
|
76
|
+
next
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Emit unchanged lines before this change
|
|
80
|
+
emit_unchanged(result, cursor1, node_start1, cursor2, node_start2)
|
|
81
|
+
|
|
82
|
+
# Detect and handle reflow before this change
|
|
83
|
+
handle_reflow(result, cursor1, node_start1, cursor2, node_start2,
|
|
84
|
+
diff_node)
|
|
85
|
+
|
|
86
|
+
# Emit changed lines for this DiffNode
|
|
87
|
+
emit_changed(result, diff_node)
|
|
88
|
+
|
|
89
|
+
# Advance cursors past this change.
|
|
90
|
+
# cursor1 advances based on text1 content consumed.
|
|
91
|
+
# cursor2 advances based on text2 content consumed.
|
|
92
|
+
# For pure insertions (range1 nil), cursor1 advances by count2 to
|
|
93
|
+
# account for text2 gap lines that were emitted as mapping to text1.
|
|
94
|
+
# For pure deletions (range2 nil), cursor2 advances by count1.
|
|
95
|
+
old_cursor1 = cursor1
|
|
96
|
+
old_cursor2 = cursor2
|
|
97
|
+
cursor1 = if range1
|
|
98
|
+
range1[1] + 1
|
|
99
|
+
elsif range2
|
|
100
|
+
old_cursor1 + (node_start2 - old_cursor2)
|
|
101
|
+
else
|
|
102
|
+
node_start1 + 1
|
|
103
|
+
end
|
|
104
|
+
cursor2 = if range2
|
|
105
|
+
range2[1] + 1
|
|
106
|
+
elsif range1
|
|
107
|
+
old_cursor2 + (node_start1 - old_cursor1)
|
|
108
|
+
else
|
|
109
|
+
node_start2 + 1
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Emit remaining unchanged lines after last change
|
|
114
|
+
emit_unchanged(result, cursor1, @lines1.length, cursor2, @lines2.length)
|
|
115
|
+
|
|
116
|
+
result
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
# Sort key for ordering DiffNodes by position in the document.
|
|
122
|
+
def sort_key(diff_node)
|
|
123
|
+
range = diff_node.line_range_before || diff_node.line_range_after
|
|
124
|
+
range ? range[0] : Float::INFINITY
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Emit unchanged DiffLines between two cursor positions.
|
|
128
|
+
#
|
|
129
|
+
# @param result [Array<DiffLine>] output array
|
|
130
|
+
# @param from1 [Integer] start line in text1
|
|
131
|
+
# @param to1 [Integer] end line (exclusive) in text1
|
|
132
|
+
# @param from2 [Integer] start line in text2
|
|
133
|
+
# @param to2 [Integer] end line (exclusive) in text2
|
|
134
|
+
def emit_unchanged(result, from1, to1, from2, to2)
|
|
135
|
+
count1 = to1 - from1
|
|
136
|
+
count2 = to2 - from2
|
|
137
|
+
|
|
138
|
+
if count1 == count2 && count1 >= 0
|
|
139
|
+
# Simple case: same number of lines
|
|
140
|
+
count1.times do |i|
|
|
141
|
+
line1_idx = from1 + i
|
|
142
|
+
line2_idx = from2 + i
|
|
143
|
+
next if line1_idx >= @lines1.length && line2_idx >= @lines2.length
|
|
144
|
+
|
|
145
|
+
content = if line1_idx < @lines1.length
|
|
146
|
+
@lines1[line1_idx]
|
|
147
|
+
else
|
|
148
|
+
@lines2[line2_idx]
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
result << DiffLine.new(
|
|
152
|
+
line_number: line1_idx,
|
|
153
|
+
new_position: line2_idx,
|
|
154
|
+
content: content,
|
|
155
|
+
type: :unchanged,
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
elsif count1.positive? && count2.positive?
|
|
159
|
+
# Different number of lines: check if content actually exists in other text.
|
|
160
|
+
# If middle content is truly orphaned (doesn't exist in other text),
|
|
161
|
+
# use emit_gap_lines instead to avoid emitting lines without diff_nodes.
|
|
162
|
+
slice1 = @lines1[from1...to1]
|
|
163
|
+
slice2 = @lines2[from2...to2]
|
|
164
|
+
middle_orphaned = slice_middle_orphaned?(slice1, slice2)
|
|
165
|
+
if middle_orphaned
|
|
166
|
+
# Content only exists in one text - use gap handling
|
|
167
|
+
emit_gap_lines(result, from1, to1, from2, to2, count1, count2)
|
|
168
|
+
else
|
|
169
|
+
# Content exists in both texts but at different positions - use reflow
|
|
170
|
+
emit_unchanged_with_reflow(result, from1, to1, from2, to2)
|
|
171
|
+
end
|
|
172
|
+
elsif count1.positive? || count2.positive?
|
|
173
|
+
# Handle gap lines (orphaned or reflow)
|
|
174
|
+
emit_gap_lines(result, from1, to1, from2, to2, count1, count2)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Check if the middle content (after removing common prefix/suffix) is truly
|
|
179
|
+
# orphaned - meaning it exists in only one text, not both.
|
|
180
|
+
# Returns true if content exists in only one text (not reflow).
|
|
181
|
+
def slice_middle_orphaned?(slice1, slice2)
|
|
182
|
+
return false if slice1.empty? || slice2.empty?
|
|
183
|
+
|
|
184
|
+
# Check if slice1 content exists anywhere in text2
|
|
185
|
+
slice1_all_in_text2 = slice1.all? do |line|
|
|
186
|
+
@line_to_indices2.key?(line)
|
|
187
|
+
end
|
|
188
|
+
# Check if slice2 content exists anywhere in text1
|
|
189
|
+
slice2_all_in_text1 = slice2.all? do |line|
|
|
190
|
+
@line_to_indices1.key?(line)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# If either slice has no presence in the other text, it's orphaned
|
|
194
|
+
!slice1_all_in_text2 || !slice2_all_in_text1
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Handle gap lines when one text has more lines than the other.
|
|
198
|
+
# Determines whether lines are orphaned (exist in both texts at different
|
|
199
|
+
# positions) or reflow (formatting-only).
|
|
200
|
+
#
|
|
201
|
+
# IMPORTANT: We never emit DiffLines without diff_nodes for gap content.
|
|
202
|
+
# If content exists in one text but not the other, the comparison should
|
|
203
|
+
# have reported it as a diff_node. We only emit :unchanged for orphaned
|
|
204
|
+
# content when we can find it in the other text at a different position.
|
|
205
|
+
def emit_gap_lines(result, from1, to1, from2, to2, count1, count2)
|
|
206
|
+
if count1.positive?
|
|
207
|
+
# Lines only in text1: check if they exist in text2 at different positions
|
|
208
|
+
if count1 >= REFLOW_SUMMARY_THRESHOLD
|
|
209
|
+
all_exist_in_text2 = (0...count1).all? do |i|
|
|
210
|
+
line_idx = from1 + i
|
|
211
|
+
line_idx < @lines1.length &&
|
|
212
|
+
@line_to_indices2.key?(@lines1[line_idx])
|
|
213
|
+
end
|
|
214
|
+
if all_exist_in_text2
|
|
215
|
+
emit_orphaned_unchanged(result, from1, to1, from2,
|
|
216
|
+
@line_to_indices2, true)
|
|
217
|
+
# Also emit extra lines from text2 as :added (text2 has more lines)
|
|
218
|
+
emit_extra_added_lines(result, from1, to1, from2, count1, count2)
|
|
219
|
+
else
|
|
220
|
+
# Can't emit individual lines without diff_nodes — use summary
|
|
221
|
+
emit_reflow_summary(result, from1, to1, from2, to2)
|
|
222
|
+
end
|
|
223
|
+
else
|
|
224
|
+
# Small gap: check each line individually
|
|
225
|
+
# Only emit :unchanged if we can find content in text2.
|
|
226
|
+
# DON'T emit :removed formatting lines without diff_nodes.
|
|
227
|
+
count1.times do |i|
|
|
228
|
+
line_idx = from1 + i
|
|
229
|
+
next if line_idx >= @lines1.length
|
|
230
|
+
|
|
231
|
+
content = @lines1[line_idx]
|
|
232
|
+
if @line_to_indices2.key?(content)
|
|
233
|
+
# Found in text2: emit as :unchanged with correct position
|
|
234
|
+
new_pos = @line_to_indices2[content].min_by do |idx|
|
|
235
|
+
(idx - from2).abs
|
|
236
|
+
end
|
|
237
|
+
result << DiffLine.new(
|
|
238
|
+
line_number: line_idx,
|
|
239
|
+
new_position: new_pos,
|
|
240
|
+
content: content,
|
|
241
|
+
type: :unchanged,
|
|
242
|
+
)
|
|
243
|
+
end
|
|
244
|
+
# If not found in text2: don't emit anything.
|
|
245
|
+
# The comparison should have reported this as a diff_node.
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
elsif count2.positive?
|
|
249
|
+
# Lines only in text2: check if they exist in text1 at different positions
|
|
250
|
+
# When count1=0, don't emit unchanged lines here - they'll be emitted
|
|
251
|
+
# from the text1 gap when cursor1 catches up.
|
|
252
|
+
if count1.zero?
|
|
253
|
+
# Pure insertion: text1 has no gap. The text2 gap lines are unchanged
|
|
254
|
+
# and correspond to text1 positions. Emit them from text1's perspective
|
|
255
|
+
# to avoid duplicates when cursor1 catches up.
|
|
256
|
+
count2.times do |i|
|
|
257
|
+
line_idx = from2 + i
|
|
258
|
+
next if line_idx >= @lines2.length
|
|
259
|
+
|
|
260
|
+
content = @lines2[line_idx]
|
|
261
|
+
if @line_to_indices1.key?(content)
|
|
262
|
+
# Found in text1: emit as :unchanged with TEXT1 line number
|
|
263
|
+
text1_pos = @line_to_indices1[content].min_by do |idx|
|
|
264
|
+
(idx - from1).abs
|
|
265
|
+
end
|
|
266
|
+
result << DiffLine.new(
|
|
267
|
+
line_number: text1_pos, # Use text1 position as primary
|
|
268
|
+
new_position: line_idx, # Use text2 position as secondary
|
|
269
|
+
content: content,
|
|
270
|
+
type: :unchanged,
|
|
271
|
+
)
|
|
272
|
+
end
|
|
273
|
+
# If not found in text1: don't emit anything
|
|
274
|
+
end
|
|
275
|
+
elsif count2 >= REFLOW_SUMMARY_THRESHOLD
|
|
276
|
+
all_exist_in_text1 = (0...count2).all? do |i|
|
|
277
|
+
line_idx = from2 + i
|
|
278
|
+
line_idx < @lines2.length &&
|
|
279
|
+
@line_to_indices1.key?(@lines2[line_idx])
|
|
280
|
+
end
|
|
281
|
+
if all_exist_in_text1
|
|
282
|
+
# All content exists in text1 but at different positions: treat as reflow
|
|
283
|
+
# Emit orphaned content with position mapping
|
|
284
|
+
emit_orphaned_unchanged(result, from2, to2, from1, from1, true)
|
|
285
|
+
else
|
|
286
|
+
emit_reflow_summary(result, from1, to1, from2, to2)
|
|
287
|
+
end
|
|
288
|
+
else
|
|
289
|
+
count2.times do |i|
|
|
290
|
+
line_idx = from2 + i
|
|
291
|
+
next if line_idx >= @lines2.length
|
|
292
|
+
|
|
293
|
+
content = @lines2[line_idx]
|
|
294
|
+
if @line_to_indices1.key?(content)
|
|
295
|
+
new_pos = @line_to_indices1[content].min_by do |idx|
|
|
296
|
+
(idx - from1).abs
|
|
297
|
+
end
|
|
298
|
+
result << DiffLine.new(
|
|
299
|
+
line_number: line_idx,
|
|
300
|
+
new_position: new_pos,
|
|
301
|
+
content: content,
|
|
302
|
+
type: :unchanged,
|
|
303
|
+
)
|
|
304
|
+
end
|
|
305
|
+
# If not found in text1: don't emit anything
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# Emit extra lines from text2 as :added when text2 has more lines than text1
|
|
312
|
+
# in a gap where all of text1's content exists in text2 (reflow case).
|
|
313
|
+
def emit_extra_added_lines(result, from1, to1, from2, count1, count2)
|
|
314
|
+
return unless count2 > count1
|
|
315
|
+
|
|
316
|
+
extra_count = count2 - count1
|
|
317
|
+
extra_lines_in_text2 = @lines2[from2...(from2 + count2)]
|
|
318
|
+
text1_set = @lines1[from1...to1].to_set
|
|
319
|
+
extra_lines_in_text2.each do |content|
|
|
320
|
+
next if text1_set.include?(content)
|
|
321
|
+
|
|
322
|
+
extra_count -= 1
|
|
323
|
+
next if extra_count.negative?
|
|
324
|
+
|
|
325
|
+
line_idx = @line_to_indices2[content].min_by do |idx|
|
|
326
|
+
(idx - from2).abs
|
|
327
|
+
end
|
|
328
|
+
result << DiffLine.new(
|
|
329
|
+
line_number: line_idx,
|
|
330
|
+
new_position: line_idx,
|
|
331
|
+
content: content,
|
|
332
|
+
type: :added,
|
|
333
|
+
formatting: true,
|
|
334
|
+
)
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Emit unchanged lines when text1 and text2 have different line counts
|
|
339
|
+
# in the unchanged region. Uses prefix/suffix matching at the structural level
|
|
340
|
+
# to find which lines correspond, treating unmatched middle lines as reflow.
|
|
341
|
+
#
|
|
342
|
+
# This method handles unchanged regions between DiffNodes. Within those
|
|
343
|
+
# regions, we use prefix/suffix matching to find structural correspondence.
|
|
344
|
+
# The unmatched lines are marked as formatting-only (reflow).
|
|
345
|
+
# When many lines are unmatched, a summary is emitted instead.
|
|
346
|
+
def emit_unchanged_with_reflow(result, from1, to1, from2, to2)
|
|
347
|
+
slice1 = @lines1[from1...to1]
|
|
348
|
+
slice2 = @lines2[from2...to2]
|
|
349
|
+
return if slice1.empty? && slice2.empty?
|
|
350
|
+
|
|
351
|
+
# Fast path: if slices are identical, emit all as unchanged
|
|
352
|
+
if slice1 == slice2
|
|
353
|
+
emit_unchanged_range(result, from1, from2, slice1.length)
|
|
354
|
+
return
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Find common prefix (lines that match between the two slices)
|
|
358
|
+
prefix_len = 0
|
|
359
|
+
max_prefix = [slice1.length, slice2.length].min
|
|
360
|
+
while prefix_len < max_prefix &&
|
|
361
|
+
strip_for_compare(slice1[prefix_len]) == strip_for_compare(slice2[prefix_len])
|
|
362
|
+
prefix_len += 1
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Find common suffix
|
|
366
|
+
suffix_len = 0
|
|
367
|
+
max_suffix = [slice1.length - prefix_len,
|
|
368
|
+
slice2.length - prefix_len].min
|
|
369
|
+
while suffix_len < max_suffix &&
|
|
370
|
+
strip_for_compare(slice1[slice1.length - 1 - suffix_len]) ==
|
|
371
|
+
strip_for_compare(slice2[slice2.length - 1 - suffix_len])
|
|
372
|
+
suffix_len += 1
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# Emit common prefix as unchanged
|
|
376
|
+
prefix_len.times do |i|
|
|
377
|
+
result << DiffLine.new(
|
|
378
|
+
line_number: from1 + i,
|
|
379
|
+
new_position: from2 + i,
|
|
380
|
+
content: slice1[i],
|
|
381
|
+
type: :unchanged,
|
|
382
|
+
)
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# Emit middle (reflow) lines
|
|
386
|
+
mid_start1 = from1 + prefix_len
|
|
387
|
+
mid_end1 = to1 - suffix_len
|
|
388
|
+
mid_start2 = from2 + prefix_len
|
|
389
|
+
mid_end2 = to2 - suffix_len
|
|
390
|
+
mid_count1 = mid_end1 - mid_start1
|
|
391
|
+
mid_count2 = mid_end2 - mid_start2
|
|
392
|
+
|
|
393
|
+
if mid_count1 + mid_count2 >= REFLOW_SUMMARY_THRESHOLD
|
|
394
|
+
# Too many reflow lines — emit summary instead of listing each
|
|
395
|
+
emit_reflow_summary(result, mid_start1, mid_end1, mid_start2,
|
|
396
|
+
mid_end2)
|
|
397
|
+
else
|
|
398
|
+
# Few enough to show individually
|
|
399
|
+
# Lines only in text1 (removed by reflow)
|
|
400
|
+
(mid_start1...mid_end1).each do |line_idx|
|
|
401
|
+
next if line_idx >= @lines1.length
|
|
402
|
+
|
|
403
|
+
result << DiffLine.new(
|
|
404
|
+
line_number: line_idx,
|
|
405
|
+
content: @lines1[line_idx],
|
|
406
|
+
type: :removed,
|
|
407
|
+
formatting: true,
|
|
408
|
+
)
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Lines only in text2 (added by reflow)
|
|
412
|
+
(mid_start2...mid_end2).each do |line_idx|
|
|
413
|
+
next if line_idx >= @lines2.length
|
|
414
|
+
|
|
415
|
+
result << DiffLine.new(
|
|
416
|
+
line_number: line_idx,
|
|
417
|
+
new_position: line_idx,
|
|
418
|
+
content: @lines2[line_idx],
|
|
419
|
+
type: :added,
|
|
420
|
+
formatting: true,
|
|
421
|
+
)
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# Emit common suffix as unchanged
|
|
426
|
+
suffix_len.times do |i|
|
|
427
|
+
idx1 = to1 - suffix_len + i
|
|
428
|
+
idx2 = to2 - suffix_len + i
|
|
429
|
+
next if idx1 >= @lines1.length && idx2 >= @lines2.length
|
|
430
|
+
|
|
431
|
+
content = if idx1 < @lines1.length
|
|
432
|
+
@lines1[idx1]
|
|
433
|
+
else
|
|
434
|
+
@lines2[idx2]
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
result << DiffLine.new(
|
|
438
|
+
line_number: idx1,
|
|
439
|
+
new_position: idx2,
|
|
440
|
+
content: content,
|
|
441
|
+
type: :unchanged,
|
|
442
|
+
)
|
|
443
|
+
end
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
# Helper to emit a range of unchanged lines
|
|
447
|
+
def emit_unchanged_range(result, from1, from2, count)
|
|
448
|
+
count.times do |i|
|
|
449
|
+
line1_idx = from1 + i
|
|
450
|
+
line2_idx = from2 + i
|
|
451
|
+
next if line1_idx >= @lines1.length && line2_idx >= @lines2.length
|
|
452
|
+
|
|
453
|
+
content = if line1_idx < @lines1.length
|
|
454
|
+
@lines1[line1_idx]
|
|
455
|
+
else
|
|
456
|
+
@lines2[line2_idx]
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
result << DiffLine.new(
|
|
460
|
+
line_number: line1_idx,
|
|
461
|
+
new_position: line2_idx,
|
|
462
|
+
content: content,
|
|
463
|
+
type: :unchanged,
|
|
464
|
+
)
|
|
465
|
+
end
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
# Emit a summary line for large reflow gaps instead of listing each line.
|
|
469
|
+
# This prevents output explosion when documents have different formatting
|
|
470
|
+
# that causes many lines to be unmatched in prefix/suffix matching.
|
|
471
|
+
#
|
|
472
|
+
# IMPORTANT: We only emit representative removed/added lines if they
|
|
473
|
+
# actually exist in the other text. Lines that are truly orphaned
|
|
474
|
+
# (don't exist in the other text) are NOT emitted as individual lines
|
|
475
|
+
# since that would be "inventing" diffs without diff_nodes.
|
|
476
|
+
def emit_reflow_summary(result, mid_start1, mid_end1, mid_start2,
|
|
477
|
+
mid_end2)
|
|
478
|
+
mid_count1 = mid_end1 - mid_start1
|
|
479
|
+
mid_count2 = mid_end2 - mid_start2
|
|
480
|
+
|
|
481
|
+
# Only emit representative lines if they exist in the other text.
|
|
482
|
+
# This avoids "inventing" diffs for content that truly doesn't exist.
|
|
483
|
+
first_removed_content = mid_count1.positive? && mid_start1 < @lines1.length ? @lines1[mid_start1] : nil
|
|
484
|
+
first_added_content = mid_count2.positive? && mid_start2 < @lines2.length ? @lines2[mid_start2] : nil
|
|
485
|
+
|
|
486
|
+
# Check if first lines exist in the other text (not truly orphaned)
|
|
487
|
+
show_first_removed = first_removed_content && @line_to_indices2.key?(first_removed_content)
|
|
488
|
+
show_first_added = first_added_content && @line_to_indices1.key?(first_added_content)
|
|
489
|
+
|
|
490
|
+
if show_first_removed
|
|
491
|
+
result << DiffLine.new(
|
|
492
|
+
line_number: mid_start1,
|
|
493
|
+
content: first_removed_content,
|
|
494
|
+
type: :removed,
|
|
495
|
+
formatting: true,
|
|
496
|
+
)
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
if show_first_added
|
|
500
|
+
result << DiffLine.new(
|
|
501
|
+
line_number: mid_start2,
|
|
502
|
+
new_position: mid_start2,
|
|
503
|
+
content: first_added_content,
|
|
504
|
+
type: :added,
|
|
505
|
+
formatting: true,
|
|
506
|
+
)
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
# Summary line when there are more than the first-shown pair
|
|
510
|
+
extra1 = show_first_removed ? [mid_count1 - 1, 0].max : mid_count1
|
|
511
|
+
extra2 = show_first_added ? [mid_count2 - 1, 0].max : mid_count2
|
|
512
|
+
|
|
513
|
+
if extra1.positive? || extra2.positive?
|
|
514
|
+
parts = []
|
|
515
|
+
parts << "#{extra1} more removed" if extra1.positive?
|
|
516
|
+
parts << "#{extra2} more added" if extra2.positive?
|
|
517
|
+
|
|
518
|
+
result << DiffLine.new(
|
|
519
|
+
line_number: mid_start1,
|
|
520
|
+
new_position: mid_start2,
|
|
521
|
+
content: "... #{parts.join(', ')} (formatting only) ...",
|
|
522
|
+
type: :reflow_summary,
|
|
523
|
+
formatting: true,
|
|
524
|
+
)
|
|
525
|
+
end
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
# Emit orphaned lines that exist in both texts but at different positions.
|
|
529
|
+
# This handles the case where structural changes cause content to be
|
|
530
|
+
# repositioned rather than added/removed.
|
|
531
|
+
#
|
|
532
|
+
# @param result [Array<DiffLine>] output array
|
|
533
|
+
# @param from1 [Integer] start line in text1
|
|
534
|
+
# @param to1 [Integer] end line (exclusive) in text1
|
|
535
|
+
# @param from2 [Integer] start line in text2
|
|
536
|
+
# @param to2 [Integer] end line (exclusive) in text2
|
|
537
|
+
# @param text1_orphaned [Boolean] true if text1 has the orphaned lines
|
|
538
|
+
def emit_orphaned_unchanged(result, from1, to1, from2, to2,
|
|
539
|
+
text1_orphaned)
|
|
540
|
+
if text1_orphaned
|
|
541
|
+
count = to1 - from1
|
|
542
|
+
count.times do |i|
|
|
543
|
+
line_idx = from1 + i
|
|
544
|
+
next if line_idx >= @lines1.length
|
|
545
|
+
|
|
546
|
+
content = @lines1[line_idx]
|
|
547
|
+
next unless content
|
|
548
|
+
|
|
549
|
+
if @line_to_indices2.key?(content)
|
|
550
|
+
new_pos = @line_to_indices2[content].min_by do |idx|
|
|
551
|
+
(idx - from2).abs
|
|
552
|
+
end
|
|
553
|
+
result << DiffLine.new(
|
|
554
|
+
line_number: line_idx,
|
|
555
|
+
new_position: new_pos,
|
|
556
|
+
content: content,
|
|
557
|
+
type: :unchanged,
|
|
558
|
+
)
|
|
559
|
+
end
|
|
560
|
+
end
|
|
561
|
+
else
|
|
562
|
+
count = to2 - from2
|
|
563
|
+
count.times do |i|
|
|
564
|
+
line_idx = from2 + i
|
|
565
|
+
next if line_idx >= @lines2.length
|
|
566
|
+
|
|
567
|
+
content = @lines2[line_idx]
|
|
568
|
+
next unless content
|
|
569
|
+
|
|
570
|
+
if @line_to_indices1.key?(content)
|
|
571
|
+
new_pos = @line_to_indices1[content].min_by do |idx|
|
|
572
|
+
(idx - from1).abs
|
|
573
|
+
end
|
|
574
|
+
result << DiffLine.new(
|
|
575
|
+
line_number: line_idx,
|
|
576
|
+
new_position: new_pos,
|
|
577
|
+
content: content,
|
|
578
|
+
type: :unchanged,
|
|
579
|
+
)
|
|
580
|
+
end
|
|
581
|
+
end
|
|
582
|
+
end
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
# Detect reflow: lines that exist in text1 but whose content is absorbed
|
|
586
|
+
# into an adjacent changed line in text2 (or vice versa).
|
|
587
|
+
def handle_reflow(result, cursor1, node_start1, _cursor2, _node_start2,
|
|
588
|
+
diff_node)
|
|
589
|
+
# Check if there are "extra" lines in text1 before the change
|
|
590
|
+
# that are absorbed into the changed line in text2
|
|
591
|
+
extra_lines1 = node_start1 - cursor1
|
|
592
|
+
return if extra_lines1 <= 0
|
|
593
|
+
|
|
594
|
+
# Check if the content of those extra lines appears in the
|
|
595
|
+
# adjacent line in text2
|
|
596
|
+
extra_content = @lines1[cursor1...node_start1].map(&:strip).join
|
|
597
|
+
|
|
598
|
+
# Find the nearest changed line in text2
|
|
599
|
+
next_new_line = find_changed_line_in_text2(diff_node)
|
|
600
|
+
return unless next_new_line
|
|
601
|
+
|
|
602
|
+
if next_new_line.include?(extra_content.strip)
|
|
603
|
+
# The extra lines are reflow — mark as formatting-only
|
|
604
|
+
# Remove any removed lines we already added for this range
|
|
605
|
+
# (they were added by emit_unchanged)
|
|
606
|
+
extra_lines1.times do |i|
|
|
607
|
+
line_idx = cursor1 + i
|
|
608
|
+
# Find and mark existing lines as formatting
|
|
609
|
+
existing = result.find do |dl|
|
|
610
|
+
dl.line_number == line_idx && dl.removed? && !dl.formatting?
|
|
611
|
+
end
|
|
612
|
+
existing&.formatting = true
|
|
613
|
+
end
|
|
614
|
+
end
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
# Find the content of the changed line in text2 for a DiffNode.
|
|
618
|
+
def find_changed_line_in_text2(diff_node)
|
|
619
|
+
new_ranges = diff_node.char_ranges&.select(&:new_side?)
|
|
620
|
+
return nil unless new_ranges&.any?
|
|
621
|
+
|
|
622
|
+
first_range = new_ranges.min_by(&:line_number)
|
|
623
|
+
return nil unless first_range
|
|
624
|
+
|
|
625
|
+
@lines2[first_range.line_number]
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
# Emit DiffLines for a single DiffNode's char_ranges.
|
|
629
|
+
def emit_changed(result, diff_node)
|
|
630
|
+
return unless diff_node.char_ranges && !diff_node.char_ranges.empty?
|
|
631
|
+
|
|
632
|
+
ranges = diff_node.char_ranges
|
|
633
|
+
|
|
634
|
+
# Group ranges by (line_number, side) to build DiffLines
|
|
635
|
+
old_line_ranges = {}
|
|
636
|
+
new_line_ranges = {}
|
|
637
|
+
|
|
638
|
+
ranges.each do |cr|
|
|
639
|
+
if cr.old_side?
|
|
640
|
+
(old_line_ranges[cr.line_number] ||= []) << cr
|
|
641
|
+
else
|
|
642
|
+
(new_line_ranges[cr.line_number] ||= []) << cr
|
|
643
|
+
end
|
|
644
|
+
end
|
|
645
|
+
|
|
646
|
+
# Determine what kind of change this is
|
|
647
|
+
has_old = !old_line_ranges.empty?
|
|
648
|
+
has_new = !new_line_ranges.empty?
|
|
649
|
+
|
|
650
|
+
if has_old && has_new
|
|
651
|
+
# Changed: exists in both texts
|
|
652
|
+
emit_changed_lines(result, diff_node, old_line_ranges,
|
|
653
|
+
new_line_ranges)
|
|
654
|
+
elsif has_old
|
|
655
|
+
# Removed: only in text1
|
|
656
|
+
emit_removed_lines(result, diff_node, old_line_ranges)
|
|
657
|
+
elsif has_new
|
|
658
|
+
# Added: only in text2
|
|
659
|
+
emit_added_lines(result, diff_node, new_line_ranges)
|
|
660
|
+
end
|
|
661
|
+
end
|
|
662
|
+
|
|
663
|
+
# Emit DiffLines for a change that exists in both texts.
|
|
664
|
+
def emit_changed_lines(result, diff_node, old_line_ranges,
|
|
665
|
+
new_line_ranges)
|
|
666
|
+
old_lines = old_line_ranges.keys.sort
|
|
667
|
+
new_lines = new_line_ranges.keys.sort
|
|
668
|
+
|
|
669
|
+
# For single-line changes, emit as a single :changed DiffLine
|
|
670
|
+
if old_lines.length == 1 && new_lines.length == 1
|
|
671
|
+
line1_idx = old_lines[0]
|
|
672
|
+
line2_idx = new_lines[0]
|
|
673
|
+
line1_content = @lines1[line1_idx]
|
|
674
|
+
line2_content = @lines2[line2_idx]
|
|
675
|
+
|
|
676
|
+
# For formatting detection, use the actual changed content from the DiffNode,
|
|
677
|
+
# not the full line content. The full line includes surrounding XML tags
|
|
678
|
+
# which would cause FormattingDetector to not detect whitespace-only changes.
|
|
679
|
+
text1 = diff_node&.serialized_before || line1_content
|
|
680
|
+
text2 = diff_node&.serialized_after || line2_content
|
|
681
|
+
|
|
682
|
+
dl = DiffLine.new(
|
|
683
|
+
line_number: line1_idx,
|
|
684
|
+
new_position: line2_idx,
|
|
685
|
+
content: line1_content,
|
|
686
|
+
new_content: line2_content,
|
|
687
|
+
type: :changed,
|
|
688
|
+
diff_node: diff_node,
|
|
689
|
+
formatting: formatting?(diff_node, text1, text2),
|
|
690
|
+
char_ranges: sort_ranges(old_line_ranges[line1_idx]),
|
|
691
|
+
new_char_ranges: sort_ranges(new_line_ranges[line2_idx]),
|
|
692
|
+
)
|
|
693
|
+
result << dl
|
|
694
|
+
|
|
695
|
+
# If line_range indicates content spans more lines than char_ranges cover,
|
|
696
|
+
# emit additional :added lines for the continuation lines.
|
|
697
|
+
# This handles multi-line text nodes where TextDecomposer only creates
|
|
698
|
+
# char_ranges on the starting line.
|
|
699
|
+
range1 = diff_node.line_range_before
|
|
700
|
+
range2 = diff_node.line_range_after
|
|
701
|
+
if range2 && new_lines[0] < range2[1]
|
|
702
|
+
# New version has continuation lines
|
|
703
|
+
((new_lines[0] + 1)..range2[1]).each do |cont_line_idx|
|
|
704
|
+
next if cont_line_idx >= @lines2.length
|
|
705
|
+
|
|
706
|
+
cont_content = @lines2[cont_line_idx]
|
|
707
|
+
result << DiffLine.new(
|
|
708
|
+
line_number: cont_line_idx,
|
|
709
|
+
new_position: cont_line_idx,
|
|
710
|
+
content: cont_content,
|
|
711
|
+
type: :added,
|
|
712
|
+
formatting: true, # Continuation lines are formatting-only
|
|
713
|
+
)
|
|
714
|
+
end
|
|
715
|
+
end
|
|
716
|
+
if range1 && old_lines[0] < range1[1]
|
|
717
|
+
# Old version has continuation lines
|
|
718
|
+
((old_lines[0] + 1)..range1[1]).each do |cont_line_idx|
|
|
719
|
+
next if cont_line_idx >= @lines1.length
|
|
720
|
+
|
|
721
|
+
cont_content = @lines1[cont_line_idx]
|
|
722
|
+
result << DiffLine.new(
|
|
723
|
+
line_number: cont_line_idx,
|
|
724
|
+
content: cont_content,
|
|
725
|
+
type: :removed,
|
|
726
|
+
formatting: true, # Continuation lines are formatting-only
|
|
727
|
+
)
|
|
728
|
+
end
|
|
729
|
+
end
|
|
730
|
+
else
|
|
731
|
+
# Multi-line change: emit old lines as :removed, new lines as :added
|
|
732
|
+
# But keep them associated with the same DiffNode
|
|
733
|
+
|
|
734
|
+
# Emit old lines
|
|
735
|
+
old_lines.each do |line_idx|
|
|
736
|
+
line_content = @lines1[line_idx]
|
|
737
|
+
result << DiffLine.new(
|
|
738
|
+
line_number: line_idx,
|
|
739
|
+
content: line_content,
|
|
740
|
+
type: :removed,
|
|
741
|
+
diff_node: diff_node,
|
|
742
|
+
formatting: formatting?(diff_node, line_content, ""),
|
|
743
|
+
char_ranges: sort_ranges(old_line_ranges[line_idx]),
|
|
744
|
+
)
|
|
745
|
+
end
|
|
746
|
+
|
|
747
|
+
# Emit new lines
|
|
748
|
+
new_lines.each do |line_idx|
|
|
749
|
+
line_content = @lines2[line_idx]
|
|
750
|
+
result << DiffLine.new(
|
|
751
|
+
line_number: line_idx, # Required; same as new_position for added lines
|
|
752
|
+
new_position: line_idx,
|
|
753
|
+
content: line_content,
|
|
754
|
+
type: :added,
|
|
755
|
+
diff_node: diff_node,
|
|
756
|
+
formatting: formatting?(diff_node, "", line_content),
|
|
757
|
+
new_char_ranges: sort_ranges(new_line_ranges[line_idx]),
|
|
758
|
+
)
|
|
759
|
+
end
|
|
760
|
+
end
|
|
761
|
+
end
|
|
762
|
+
|
|
763
|
+
# Emit DiffLines for a removal (only in text1).
|
|
764
|
+
def emit_removed_lines(result, diff_node, old_line_ranges)
|
|
765
|
+
old_lines = old_line_ranges.keys.sort
|
|
766
|
+
|
|
767
|
+
old_lines.each do |line_idx|
|
|
768
|
+
line_content = @lines1[line_idx]
|
|
769
|
+
result << DiffLine.new(
|
|
770
|
+
line_number: line_idx,
|
|
771
|
+
content: line_content,
|
|
772
|
+
type: :removed,
|
|
773
|
+
diff_node: diff_node,
|
|
774
|
+
formatting: formatting?(diff_node, line_content, ""),
|
|
775
|
+
char_ranges: sort_ranges(old_line_ranges[line_idx]),
|
|
776
|
+
)
|
|
777
|
+
end
|
|
778
|
+
|
|
779
|
+
# Emit continuation lines when line_range_before extends beyond the lines
|
|
780
|
+
# that have char_ranges. This handles multi-line elements where
|
|
781
|
+
# TextDecomposer only creates char_ranges on the starting line.
|
|
782
|
+
range1 = diff_node.line_range_before
|
|
783
|
+
if range1 && old_lines.any? && old_lines.last < range1[1]
|
|
784
|
+
((old_lines.last + 1)..range1[1]).each do |cont_line_idx|
|
|
785
|
+
next if cont_line_idx >= @lines1.length
|
|
786
|
+
|
|
787
|
+
cont_content = @lines1[cont_line_idx]
|
|
788
|
+
result << DiffLine.new(
|
|
789
|
+
line_number: cont_line_idx,
|
|
790
|
+
content: cont_content,
|
|
791
|
+
type: :removed,
|
|
792
|
+
formatting: true, # Continuation lines are formatting-only
|
|
793
|
+
)
|
|
794
|
+
end
|
|
795
|
+
end
|
|
796
|
+
end
|
|
797
|
+
|
|
798
|
+
# Emit DiffLines for an addition (only in text2).
|
|
799
|
+
def emit_added_lines(result, diff_node, new_line_ranges)
|
|
800
|
+
new_lines = new_line_ranges.keys.sort
|
|
801
|
+
|
|
802
|
+
new_lines.each do |line_idx|
|
|
803
|
+
line_content = @lines2[line_idx]
|
|
804
|
+
result << DiffLine.new(
|
|
805
|
+
line_number: line_idx, # Required; same as new_position for added lines
|
|
806
|
+
new_position: line_idx,
|
|
807
|
+
content: line_content,
|
|
808
|
+
type: :added,
|
|
809
|
+
diff_node: diff_node,
|
|
810
|
+
formatting: formatting?(diff_node, "", line_content),
|
|
811
|
+
new_char_ranges: sort_ranges(new_line_ranges[line_idx]),
|
|
812
|
+
)
|
|
813
|
+
end
|
|
814
|
+
|
|
815
|
+
# Emit continuation lines when line_range_after extends beyond the lines
|
|
816
|
+
# that have char_ranges. This handles multi-line elements where
|
|
817
|
+
# TextDecomposer only creates char_ranges on the starting line.
|
|
818
|
+
range2 = diff_node.line_range_after
|
|
819
|
+
if range2 && new_lines.any? && new_lines.last < range2[1]
|
|
820
|
+
((new_lines.last + 1)..range2[1]).each do |cont_line_idx|
|
|
821
|
+
next if cont_line_idx >= @lines2.length
|
|
822
|
+
|
|
823
|
+
cont_content = @lines2[cont_line_idx]
|
|
824
|
+
result << DiffLine.new(
|
|
825
|
+
line_number: cont_line_idx,
|
|
826
|
+
new_position: cont_line_idx,
|
|
827
|
+
content: cont_content,
|
|
828
|
+
type: :added,
|
|
829
|
+
formatting: true, # Continuation lines are formatting-only
|
|
830
|
+
)
|
|
831
|
+
end
|
|
832
|
+
end
|
|
833
|
+
end
|
|
834
|
+
|
|
835
|
+
# Build a reverse index mapping line content to array of line indices.
|
|
836
|
+
# Used for efficient lookup when handling orphaned lines in gaps.
|
|
837
|
+
#
|
|
838
|
+
# @param lines [Array<String>] Array of lines
|
|
839
|
+
# @return [Hash{String => Array<Integer>}] Map from content to indices
|
|
840
|
+
def build_line_index(lines)
|
|
841
|
+
index = Hash.new { |h, k| h[k] = [] }
|
|
842
|
+
lines.each_with_index { |line, idx| index[line] << idx }
|
|
843
|
+
index
|
|
844
|
+
end
|
|
845
|
+
|
|
846
|
+
# Sort char ranges by start_col for consistent rendering.
|
|
847
|
+
def sort_ranges(ranges)
|
|
848
|
+
(ranges || []).sort_by(&:start_col)
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
# Strip a line for comparison purposes (handles whitespace-only differences).
|
|
852
|
+
def strip_for_compare(line)
|
|
853
|
+
line.strip
|
|
854
|
+
end
|
|
855
|
+
|
|
856
|
+
# Compute formatting flag for a DiffLine.
|
|
857
|
+
#
|
|
858
|
+
# The DiffNode's explicit formatting? flag takes precedence:
|
|
859
|
+
# - If formatting? == true: return true (explicitly formatting-only)
|
|
860
|
+
#
|
|
861
|
+
# If node exists and is normative (formatting? is nil but norm is true):
|
|
862
|
+
# - Check line-level formatting via FormattingDetector for whitespace-only changes
|
|
863
|
+
# - But NOT via comment_only_line? heuristic because comment content is different
|
|
864
|
+
#
|
|
865
|
+
# If node exists and is informative (norm=false):
|
|
866
|
+
# - Return false (informative diffs are always shown as informative)
|
|
867
|
+
#
|
|
868
|
+
# If NO node exists (diff_node is nil):
|
|
869
|
+
# - Use heuristics: comment-only lines and FormattingDetector
|
|
870
|
+
#
|
|
871
|
+
# @param diff_node [DiffNode, nil] The associated DiffNode
|
|
872
|
+
# @param line1 [String, nil] Old line content
|
|
873
|
+
# @param line2 [String, nil] New line content
|
|
874
|
+
# @return [Boolean] true if formatting-only
|
|
875
|
+
def formatting?(diff_node, line1, line2)
|
|
876
|
+
# If node explicitly has formatting? == true, it's formatting-only
|
|
877
|
+
return true if diff_node&.formatting?
|
|
878
|
+
|
|
879
|
+
if diff_node
|
|
880
|
+
# Node exists - use node classification
|
|
881
|
+
return false unless diff_node.normative?
|
|
882
|
+
|
|
883
|
+
# For normative nodes, check line-level formatting
|
|
884
|
+
# (but NOT comment_only_line? which would misclassify comment content changes)
|
|
885
|
+
elsif comment_only_line?(line1) || comment_only_line?(line2)
|
|
886
|
+
# No DiffNode: use heuristics
|
|
887
|
+
return true
|
|
888
|
+
|
|
889
|
+
end
|
|
890
|
+
FormattingDetector.formatting_only?(line1, line2)
|
|
891
|
+
end
|
|
892
|
+
|
|
893
|
+
# Check if a line is entirely an XML comment (possibly with whitespace).
|
|
894
|
+
# Used as heuristic: comment-only lines with no DiffNode are likely
|
|
895
|
+
# filtered/ignored comments, not normative differences.
|
|
896
|
+
#
|
|
897
|
+
# @param line [String, nil] Line content
|
|
898
|
+
# @return [Boolean] true if comment-only
|
|
899
|
+
def comment_only_line?(line)
|
|
900
|
+
return false if line.nil?
|
|
901
|
+
|
|
902
|
+
stripped = line.strip
|
|
903
|
+
stripped.start_with?("<!--") && stripped.end_with?("-->")
|
|
904
|
+
end
|
|
905
|
+
end
|
|
906
|
+
end
|
|
907
|
+
end
|