@createiq/htmldiff 1.1.0 → 1.2.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,701 @@
1
+ import { lcsAlign, textSimilarity } from './Alignment'
2
+ import { injectClass, parseOpeningTagAt } from './HtmlScanner'
3
+ import {
4
+ type CellRange,
5
+ exceedsSizeLimit,
6
+ findTopLevelTables,
7
+ makePlaceholderPrefix,
8
+ PLACEHOLDER_SUFFIX,
9
+ type RowRange,
10
+ rowKey,
11
+ sameDimensions,
12
+ spliceString,
13
+ type TableRange,
14
+ } from './TableDiff'
15
+ import { type Author, authorAttribution } from './ThreeWayDiff'
16
+ import Utils from './Utils'
17
+
18
+ /**
19
+ * Three-way table preprocessing. Same shape as the existing two-way
20
+ * `preprocessTables` but takes V1/V2/V3 and a cell-level three-way diff
21
+ * callback. All three inputs share a single placeholder nonce so V2's
22
+ * tokenisation is identical when the word-level 3-way merger sees it
23
+ * from both pair-wise analyses.
24
+ *
25
+ * This commit handles only the same-dimensions positional case across
26
+ * all three table triples. The structural-change case (rows/cells
27
+ * differ between any pair) throws; the next commit replaces that with
28
+ * a row-level V2-spine merge that mirrors the word-level approach.
29
+ * Multi-table count divergence (CP added or Me removed a whole table)
30
+ * is handled in commit 6 (D3).
31
+ */
32
+
33
+ export interface ThreeWayPreprocessResult {
34
+ modifiedV1: string
35
+ modifiedV2: string
36
+ modifiedV3: string
37
+ placeholderToDiff: Map<string, string>
38
+ }
39
+
40
+ export type ThreeWayDiffCellFn = (v1Cell: string, v2Cell: string, v3Cell: string) => string
41
+
42
+ export function preprocessTablesThreeWay(
43
+ v1: string,
44
+ v2: string,
45
+ v3: string,
46
+ cellDiff: ThreeWayDiffCellFn
47
+ ): ThreeWayPreprocessResult | null {
48
+ const t1s = findTopLevelTables(v1)
49
+ const t2s = findTopLevelTables(v2)
50
+ const t3s = findTopLevelTables(v3)
51
+
52
+ // No tables in any input — caller can skip preprocessing entirely.
53
+ if (t1s.length === 0 && t2s.length === 0 && t3s.length === 0) return null
54
+
55
+ // Size cap: bail to word-level diff for pathologically large tables.
56
+ for (const t of t1s) if (exceedsSizeLimit(t)) return null
57
+ for (const t of t2s) if (exceedsSizeLimit(t)) return null
58
+ for (const t of t3s) if (exceedsSizeLimit(t)) return null
59
+
60
+ const placeholderPrefix = makePlaceholderPrefix(v1, v2, v3)
61
+
62
+ // Fast path: counts match AND each positional triple looks similar
63
+ // enough that 1:1 positional pairing is sound. The similarity gate
64
+ // catches the swap case — V1=[A,B], V2=[B,A] has matching counts but
65
+ // positionally pairing would mis-attribute. Without the gate, a swap
66
+ // would silently land in the per-cell diff machinery comparing
67
+ // unrelated tables.
68
+ if (positionallyAligned(v1, v2, v3, t1s, t2s, t3s)) {
69
+ return preprocessAlignedByPosition(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
70
+ }
71
+
72
+ // Multi-table mismatch (D3). CP added/removed/moved a table, Me added/
73
+ // removed/moved a table, etc. Use content-LCS to pair tables across
74
+ // each adjacent pair, then assign placeholders so the word-level 3-way
75
+ // merger naturally attributes unpaired tables — the placeholder token
76
+ // appears only in the inputs where the table exists, and the merger
77
+ // sees that as an insertion/deletion.
78
+ return preprocessMisalignedByContent(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
79
+ }
80
+
81
+ function preprocessAlignedByPosition(
82
+ v1: string,
83
+ v2: string,
84
+ v3: string,
85
+ t1s: TableRange[],
86
+ t2s: TableRange[],
87
+ t3s: TableRange[],
88
+ cellDiff: ThreeWayDiffCellFn,
89
+ placeholderPrefix: string
90
+ ): ThreeWayPreprocessResult {
91
+ const pairs: Array<{
92
+ t1: TableRange
93
+ t2: TableRange
94
+ t3: TableRange
95
+ diffed: string
96
+ }> = []
97
+ for (let i = 0; i < t1s.length; i++) {
98
+ pairs.push({
99
+ t1: t1s[i],
100
+ t2: t2s[i],
101
+ t3: t3s[i],
102
+ diffed: diffTableThreeWay(v1, v2, v3, t1s[i], t2s[i], t3s[i], cellDiff),
103
+ })
104
+ }
105
+ let modifiedV1 = v1
106
+ let modifiedV2 = v2
107
+ let modifiedV3 = v3
108
+ const placeholderToDiff = new Map<string, string>()
109
+ // Splice end → start so earlier offsets stay valid.
110
+ for (let i = pairs.length - 1; i >= 0; i--) {
111
+ const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`
112
+ placeholderToDiff.set(placeholder, pairs[i].diffed)
113
+ modifiedV1 = spliceString(modifiedV1, pairs[i].t1.tableStart, pairs[i].t1.tableEnd, placeholder)
114
+ modifiedV2 = spliceString(modifiedV2, pairs[i].t2.tableStart, pairs[i].t2.tableEnd, placeholder)
115
+ modifiedV3 = spliceString(modifiedV3, pairs[i].t3.tableStart, pairs[i].t3.tableEnd, placeholder)
116
+ }
117
+ return { modifiedV1, modifiedV2, modifiedV3, placeholderToDiff }
118
+ }
119
+
120
+ /**
121
+ * Multi-table mismatch handler. Tables are paired across V1↔V2 and
122
+ * V2↔V3 via content-LCS, then substituted as placeholders such that
123
+ * each placeholder appears in exactly the inputs where its underlying
124
+ * table exists. The word-level merger sees:
125
+ * - paired-everywhere placeholders → equal in both diffs → unwrapped
126
+ * - V2-only (CP-inserted + Me-rejected) → inserted by CP, deleted by
127
+ * Me → reject wrapper around the table
128
+ * - V2+V3 (CP-inserted, Me-kept) → ins-cp wrapper
129
+ * - V1+V2 (Me-deleted) → del-me wrapper
130
+ * - V1-only (CP-deleted before V2) → del-cp wrapper
131
+ * - V3-only (Me-inserted) → ins-me wrapper
132
+ *
133
+ * Each placeholder's content is the diffed table for paired triples,
134
+ * or the raw table HTML for unpaired tables (the word-level wrapper
135
+ * provides the attribution).
136
+ */
137
+ function preprocessMisalignedByContent(
138
+ v1: string,
139
+ v2: string,
140
+ v3: string,
141
+ t1s: TableRange[],
142
+ t2s: TableRange[],
143
+ t3s: TableRange[],
144
+ cellDiff: ThreeWayDiffCellFn,
145
+ placeholderPrefix: string
146
+ ): ThreeWayPreprocessResult {
147
+ const k1 = t1s.map(t => tableKey(v1, t))
148
+ const k2 = t2s.map(t => tableKey(v2, t))
149
+ const k3 = t3s.map(t => tableKey(v3, t))
150
+
151
+ const align12 = lcsAlign(k1, k2)
152
+ const align23 = lcsAlign(k2, k3)
153
+
154
+ // Maps from table-index → counterpart in the other input (or -1).
155
+ const v1ToV2 = new Array<number>(t1s.length).fill(-1)
156
+ const v2ToV1 = new Array<number>(t2s.length).fill(-1)
157
+ for (const a of align12) {
158
+ if (a.oldIdx !== null && a.newIdx !== null) {
159
+ v1ToV2[a.oldIdx] = a.newIdx
160
+ v2ToV1[a.newIdx] = a.oldIdx
161
+ }
162
+ }
163
+ const v2ToV3 = new Array<number>(t2s.length).fill(-1)
164
+ const v3ToV2 = new Array<number>(t3s.length).fill(-1)
165
+ for (const a of align23) {
166
+ if (a.oldIdx !== null && a.newIdx !== null) {
167
+ v2ToV3[a.oldIdx] = a.newIdx
168
+ v3ToV2[a.newIdx] = a.oldIdx
169
+ }
170
+ }
171
+
172
+ // Allocate placeholders. Each logical-table-position (paired triple,
173
+ // paired pair, or singleton) gets one shared placeholder used in
174
+ // every input that contains it.
175
+ let nextId = 0
176
+ const placeholderToDiff = new Map<string, string>()
177
+ const placeholders = {
178
+ v1: new Array<string | null>(t1s.length).fill(null),
179
+ v2: new Array<string | null>(t2s.length).fill(null),
180
+ v3: new Array<string | null>(t3s.length).fill(null),
181
+ }
182
+
183
+ const allocate = (): string => `${placeholderPrefix}${nextId++}${PLACEHOLDER_SUFFIX}`
184
+
185
+ // 1. Triples paired through V2 (preserved in both V1↔V2 AND V2↔V3) — full 3-way diff.
186
+ for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
187
+ const v1Idx = v2ToV1[v2Idx]
188
+ const v3Idx = v2ToV3[v2Idx]
189
+ if (v1Idx === -1 || v3Idx === -1) continue
190
+ const placeholder = allocate()
191
+ placeholderToDiff.set(placeholder, diffTableThreeWay(v1, v2, v3, t1s[v1Idx], t2s[v2Idx], t3s[v3Idx], cellDiff))
192
+ placeholders.v1[v1Idx] = placeholder
193
+ placeholders.v2[v2Idx] = placeholder
194
+ placeholders.v3[v3Idx] = placeholder
195
+ }
196
+
197
+ // For unpaired placeholders the word-level merger can't wrap a tag
198
+ // token (insertTag emits tags verbatim), so we bake the author
199
+ // attribution directly into the placeholder content. The merger then
200
+ // only has to position the placeholder via word-level alignment;
201
+ // the attribution wrapping is already in the substituted HTML.
202
+ const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string, rejects?: Author): string =>
203
+ Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author, rejects))
204
+
205
+ // 2. V2 tables paired only with V3 (CP-inserted into V2, Me-kept).
206
+ for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
207
+ if (placeholders.v2[v2Idx] !== null) continue
208
+ const v3Idx = v2ToV3[v2Idx]
209
+ if (v3Idx === -1) continue
210
+ const placeholder = allocate()
211
+ placeholderToDiff.set(placeholder, wrapWhole('ins', 'cp', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd)))
212
+ placeholders.v2[v2Idx] = placeholder
213
+ placeholders.v3[v3Idx] = placeholder
214
+ }
215
+
216
+ // 3. V2 tables paired only with V1 (preserved from V1, Me-deleted in V3).
217
+ for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
218
+ if (placeholders.v2[v2Idx] !== null) continue
219
+ const v1Idx = v2ToV1[v2Idx]
220
+ if (v1Idx === -1) continue
221
+ const placeholder = allocate()
222
+ placeholderToDiff.set(placeholder, wrapWhole('del', 'me', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd)))
223
+ placeholders.v1[v1Idx] = placeholder
224
+ placeholders.v2[v2Idx] = placeholder
225
+ }
226
+
227
+ // 4. V2 tables paired with neither (CP-inserted AND Me-deleted = reject).
228
+ for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
229
+ if (placeholders.v2[v2Idx] !== null) continue
230
+ const placeholder = allocate()
231
+ placeholderToDiff.set(
232
+ placeholder,
233
+ wrapWhole('del', 'me', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd), 'cp')
234
+ )
235
+ placeholders.v2[v2Idx] = placeholder
236
+ }
237
+
238
+ // 5. V1 tables unpaired with V2 (CP-deleted before V2).
239
+ for (let v1Idx = 0; v1Idx < t1s.length; v1Idx++) {
240
+ if (placeholders.v1[v1Idx] !== null) continue
241
+ const placeholder = allocate()
242
+ placeholderToDiff.set(placeholder, wrapWhole('del', 'cp', v1.slice(t1s[v1Idx].tableStart, t1s[v1Idx].tableEnd)))
243
+ placeholders.v1[v1Idx] = placeholder
244
+ }
245
+
246
+ // 6. V3 tables unpaired with V2 (Me-inserted into V3).
247
+ for (let v3Idx = 0; v3Idx < t3s.length; v3Idx++) {
248
+ if (placeholders.v3[v3Idx] !== null) continue
249
+ const placeholder = allocate()
250
+ placeholderToDiff.set(placeholder, wrapWhole('ins', 'me', v3.slice(t3s[v3Idx].tableStart, t3s[v3Idx].tableEnd)))
251
+ placeholders.v3[v3Idx] = placeholder
252
+ }
253
+
254
+ // Splice placeholders into each input. End → start per input.
255
+ let modifiedV1 = v1
256
+ for (let i = t1s.length - 1; i >= 0; i--) {
257
+ const p = placeholders.v1[i]
258
+ if (p === null) continue
259
+ modifiedV1 = spliceString(modifiedV1, t1s[i].tableStart, t1s[i].tableEnd, p)
260
+ }
261
+ let modifiedV2 = v2
262
+ for (let i = t2s.length - 1; i >= 0; i--) {
263
+ const p = placeholders.v2[i]
264
+ if (p === null) continue
265
+ modifiedV2 = spliceString(modifiedV2, t2s[i].tableStart, t2s[i].tableEnd, p)
266
+ }
267
+ let modifiedV3 = v3
268
+ for (let i = t3s.length - 1; i >= 0; i--) {
269
+ const p = placeholders.v3[i]
270
+ if (p === null) continue
271
+ modifiedV3 = spliceString(modifiedV3, t3s[i].tableStart, t3s[i].tableEnd, p)
272
+ }
273
+
274
+ return { modifiedV1, modifiedV2, modifiedV3, placeholderToDiff }
275
+ }
276
+
277
+ /**
278
+ * Threshold at which positional pairing is considered sound. Below this
279
+ * similarity, two positionally-aligned tables are probably different
280
+ * tables (e.g. CP swapped them around) and content-LCS pairing should
281
+ * be used instead. 0.5 is a deliberately loose bar — paired-but-content-
282
+ * edited tables (the common case) sit well above it; genuinely different
283
+ * tables sit well below.
284
+ */
285
+ const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.5
286
+
287
+ /**
288
+ * Returns true when V1/V2/V3 tables can be 1:1 paired by position. The
289
+ * three lists must have equal length AND each positional triple must
290
+ * have content similar enough that positional pairing reflects the
291
+ * authors' likely intent. The slow content-LCS path handles cases that
292
+ * fail this gate (table reordering, additions, deletions).
293
+ */
294
+ function positionallyAligned(
295
+ v1: string,
296
+ v2: string,
297
+ v3: string,
298
+ t1s: TableRange[],
299
+ t2s: TableRange[],
300
+ t3s: TableRange[]
301
+ ): boolean {
302
+ if (t1s.length !== t2s.length || t2s.length !== t3s.length) return false
303
+ for (let i = 0; i < t1s.length; i++) {
304
+ const k1 = tableKey(v1, t1s[i])
305
+ const k2 = tableKey(v2, t2s[i])
306
+ const k3 = tableKey(v3, t3s[i])
307
+ if (textSimilarity(k1, k2) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
308
+ if (textSimilarity(k2, k3) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
309
+ }
310
+ return true
311
+ }
312
+
313
+ function tableKey(html: string, table: TableRange): string {
314
+ // Whitespace-normalised full table HTML — tables with byte-identical
315
+ // content (modulo whitespace) pair; any structural or content
316
+ // difference falls through to unpaired (table-level ins/del).
317
+ return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
318
+ }
319
+
320
+ function diffTableThreeWay(
321
+ v1: string,
322
+ v2: string,
323
+ v3: string,
324
+ t1: TableRange,
325
+ t2: TableRange,
326
+ t3: TableRange,
327
+ cellDiff: ThreeWayDiffCellFn
328
+ ): string {
329
+ if (sameDimensions(t1, t2) && sameDimensions(t2, t3)) {
330
+ return diffTablePositional(v1, v2, v3, t1, t2, t3, cellDiff)
331
+ }
332
+ return diffTableStructural(v1, v2, v3, t1, t2, t3, cellDiff)
333
+ }
334
+
335
+ function diffTablePositional(
336
+ v1: string,
337
+ v2: string,
338
+ v3: string,
339
+ t1: TableRange,
340
+ t2: TableRange,
341
+ t3: TableRange,
342
+ cellDiff: ThreeWayDiffCellFn
343
+ ): string {
344
+ // Walk V2 verbatim — its scaffolding (`<table>`, `<tr>`, attributes,
345
+ // inter-cell whitespace) is the spine. Substitute each cell content
346
+ // range with the 3-way merge.
347
+ const out: string[] = []
348
+ let cursor = t2.tableStart
349
+ for (let r = 0; r < t2.rows.length; r++) {
350
+ const r1 = t1.rows[r]
351
+ const r2 = t2.rows[r]
352
+ const r3 = t3.rows[r]
353
+ for (let c = 0; c < r2.cells.length; c++) {
354
+ const c1 = r1.cells[c]
355
+ const c2 = r2.cells[c]
356
+ const c3 = r3.cells[c]
357
+ out.push(v2.slice(cursor, c2.contentStart))
358
+ out.push(
359
+ cellDiff(
360
+ v1.slice(c1.contentStart, c1.contentEnd),
361
+ v2.slice(c2.contentStart, c2.contentEnd),
362
+ v3.slice(c3.contentStart, c3.contentEnd)
363
+ )
364
+ )
365
+ cursor = c2.contentEnd
366
+ }
367
+ }
368
+ out.push(v2.slice(cursor, t2.tableEnd))
369
+ return out.join('')
370
+ }
371
+
372
+ /**
373
+ * Structural-change three-way table diff: rows or cells differ in count
374
+ * across V1/V2/V3. Strategy:
375
+ * 1. Run row-LCS for each pair (V1↔V2, V2↔V3) over rowKeys
376
+ * 2. Build per-V2-row origin (from align1) and fate (from align2)
377
+ * 3. Walk V2's row order, interleaving:
378
+ * - CP-deleted V1 rows (in align1 but not preserved into V2)
379
+ * - Me-inserted V3 rows (in align2 but not from V2)
380
+ * 4. For each V2 row, combine origin+fate to decide:
381
+ * - equal: recurse cellDiff if cell counts match, else fall back
382
+ * - ins-cp: emit V2 row as fully-CP-inserted
383
+ * - del-me: emit V2 row as fully-Me-deleted
384
+ * - reject: emit V2 row as Me-rejects-CP
385
+ *
386
+ * Tie-break to Me on LCS disagreement (D2): each LCS is authoritative
387
+ * for its own pair-wise view; we don't attempt to reconcile cases where
388
+ * align1's idea of V2's V1 origin contradicts what align2 implies via
389
+ * V3 history. In practice these cases manifest as the row being
390
+ * attributed independently per pair, which is the conservative correct
391
+ * thing to do.
392
+ */
393
+ function diffTableStructural(
394
+ v1: string,
395
+ v2: string,
396
+ v3: string,
397
+ t1: TableRange,
398
+ t2: TableRange,
399
+ t3: TableRange,
400
+ cellDiff: ThreeWayDiffCellFn
401
+ ): string {
402
+ const v1Keys = t1.rows.map(r => rowKey(v1, r))
403
+ const v2Keys = t2.rows.map(r => rowKey(v2, r))
404
+ const v3Keys = t3.rows.map(r => rowKey(v3, r))
405
+
406
+ const align1 = lcsAlign(v1Keys, v2Keys)
407
+ const align2 = lcsAlign(v2Keys, v3Keys)
408
+
409
+ // Per-V2-row attribution lookups.
410
+ // Origin: 'preserved' (with V1 row index) or 'cp-inserted'.
411
+ // Fate: 'preserved' (with V3 row index) or 'me-deleted'.
412
+ const v2Origin = new Array<{ kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' }>(t2.rows.length)
413
+ for (let i = 0; i < v2Origin.length; i++) v2Origin[i] = { kind: 'cp-inserted' }
414
+ for (const a of align1) {
415
+ if (a.newIdx !== null && a.oldIdx !== null) {
416
+ v2Origin[a.newIdx] = { kind: 'preserved', v1Idx: a.oldIdx }
417
+ }
418
+ }
419
+
420
+ const v2Fate = new Array<{ kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' }>(t2.rows.length)
421
+ for (let i = 0; i < v2Fate.length; i++) v2Fate[i] = { kind: 'me-deleted' }
422
+ for (const a of align2) {
423
+ if (a.oldIdx !== null && a.newIdx !== null) {
424
+ v2Fate[a.oldIdx] = { kind: 'preserved', v3Idx: a.newIdx }
425
+ }
426
+ }
427
+
428
+ // Off-spine surfaces.
429
+ // CP-deleted V1 rows: in align1 with newIdx == null. They land at the
430
+ // V2 boundary that follows them. The boundary index is the next
431
+ // preserved V2 row, or v2.rows.length if no following preserved row.
432
+ const cpDelRowsAt = collectCpDelRowsAtBoundary(align1, t2.rows.length)
433
+ // Me-inserted V3 rows: in align2 with oldIdx == null. They land at the
434
+ // V2 boundary they sit before — i.e. the next preserved V2 row.
435
+ const meInsRowsAt = collectMeInsRowsAtBoundary(align2, t2.rows.length)
436
+
437
+ // Emit. We reconstruct the table from scratch since rows may be added
438
+ // or deleted from V2's order; preserve the V2 header (everything up
439
+ // to the first <tr>) and the V2 footer (after the last </tr>).
440
+ const out: string[] = []
441
+ out.push(tableHeaderSlice(v2, t2))
442
+
443
+ const emitBoundary = (i: number) => {
444
+ const cpDel = cpDelRowsAt.get(i)
445
+ if (cpDel) {
446
+ for (const v1RowIdx of cpDel) {
447
+ out.push(emitFullRowAttributed(v1, t1.rows[v1RowIdx], 'del', 'cp'))
448
+ }
449
+ }
450
+ const meIns = meInsRowsAt.get(i)
451
+ if (meIns) {
452
+ for (const v3RowIdx of meIns) {
453
+ out.push(emitFullRowAttributed(v3, t3.rows[v3RowIdx], 'ins', 'me'))
454
+ }
455
+ }
456
+ }
457
+
458
+ for (let r = 0; r < t2.rows.length; r++) {
459
+ emitBoundary(r)
460
+ const v2Row = t2.rows[r]
461
+ const origin = v2Origin[r]
462
+ const fate = v2Fate[r]
463
+ out.push(emitV2Row(v1, v2, v3, v2Row, t1, t3, origin, fate, cellDiff))
464
+ }
465
+ emitBoundary(t2.rows.length)
466
+ out.push(tableFooterSlice(v2, t2))
467
+ return out.join('')
468
+ }
469
+
470
+ function emitV2Row(
471
+ v1: string,
472
+ v2: string,
473
+ v3: string,
474
+ v2Row: RowRange,
475
+ t1: TableRange,
476
+ t3: TableRange,
477
+ origin: { kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' },
478
+ fate: { kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' },
479
+ cellDiff: ThreeWayDiffCellFn
480
+ ): string {
481
+ if (origin.kind === 'cp-inserted' && fate.kind === 'me-deleted') {
482
+ // CP added the row, Me removed it: reject. Show as Me-deletion of
483
+ // CP's insertion via the rejects markup.
484
+ return emitFullRowAttributed(v2, v2Row, 'del', 'me', 'cp')
485
+ }
486
+ if (origin.kind === 'cp-inserted') {
487
+ // CP added the row, Me kept it. Attribute as CP-inserted but emit
488
+ // V2's content (which equals V3's content since Me kept it).
489
+ return emitFullRowAttributed(v2, v2Row, 'ins', 'cp')
490
+ }
491
+ if (fate.kind === 'me-deleted') {
492
+ // Me removed an original V1 row. Emit as Me-deletion of V2's content.
493
+ return emitFullRowAttributed(v2, v2Row, 'del', 'me')
494
+ }
495
+ // Preserved on both sides — recurse into cells. The discriminated-union
496
+ // narrowing makes the indices safe to access directly.
497
+ const v1Row = t1.rows[origin.v1Idx]
498
+ const v3Row = t3.rows[fate.v3Idx]
499
+ if (v1Row.cells.length === v2Row.cells.length && v2Row.cells.length === v3Row.cells.length) {
500
+ // Same cell counts → positional cell diff via cellDiff.
501
+ return diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff)
502
+ }
503
+ // Cell-count mismatch within a preserved row. Cell-level structural
504
+ // change is deferred; fall back to Me-attribution Replace (V2 row
505
+ // removed, V3 row inserted). This is lossy for CP's contribution
506
+ // within the row but functional. Real-world legal docs rarely change
507
+ // column count mid-row; this is a known limitation.
508
+ const out: string[] = []
509
+ out.push(emitFullRowAttributed(v2, v2Row, 'del', 'me'))
510
+ out.push(emitFullRowAttributed(v3, v3Row, 'ins', 'me'))
511
+ return out.join('')
512
+ }
513
+
514
+ function diffRowPositional(
515
+ v1: string,
516
+ v2: string,
517
+ v3: string,
518
+ v1Row: RowRange,
519
+ v2Row: RowRange,
520
+ v3Row: RowRange,
521
+ cellDiff: ThreeWayDiffCellFn
522
+ ): string {
523
+ // Walk V2's row verbatim, substituting each cell content with the
524
+ // 3-way merge. Mirrors `diffTablePositional` at the row scale.
525
+ const out: string[] = []
526
+ let cursor = v2Row.rowStart
527
+ for (let c = 0; c < v2Row.cells.length; c++) {
528
+ const c1 = v1Row.cells[c]
529
+ const c2 = v2Row.cells[c]
530
+ const c3 = v3Row.cells[c]
531
+ out.push(v2.slice(cursor, c2.contentStart))
532
+ out.push(
533
+ cellDiff(
534
+ v1.slice(c1.contentStart, c1.contentEnd),
535
+ v2.slice(c2.contentStart, c2.contentEnd),
536
+ v3.slice(c3.contentStart, c3.contentEnd)
537
+ )
538
+ )
539
+ cursor = c2.contentEnd
540
+ }
541
+ out.push(v2.slice(cursor, v2Row.rowEnd))
542
+ return out.join('')
543
+ }
544
+
545
+ function collectCpDelRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
546
+ // For each unpaired V1 row (oldIdx set, newIdx null), determine its
547
+ // V2 boundary index: the position just before the next preserved V2
548
+ // row, or v2RowCount if there's no following preserved row.
549
+ const out = new Map<number, number[]>()
550
+ let nextV2Boundary = v2RowCount
551
+ // Walk the alignment in reverse so we can compute nextV2Boundary
552
+ // running backwards, then assign each unpaired V1 row to the boundary
553
+ // currently in scope.
554
+ const pending: number[] = []
555
+ for (let i = align.length - 1; i >= 0; i--) {
556
+ const a = align[i]
557
+ if (a.newIdx !== null) {
558
+ // Flush pending unpaired V1 rows to this V2 boundary.
559
+ if (pending.length > 0) {
560
+ const existing = out.get(nextV2Boundary) ?? []
561
+ // pending was filled backwards — reverse so document order is preserved.
562
+ existing.unshift(...pending.toReversed())
563
+ out.set(nextV2Boundary, existing)
564
+ pending.length = 0
565
+ }
566
+ nextV2Boundary = a.newIdx
567
+ } else if (a.oldIdx !== null) {
568
+ // Unpaired V1 row — CP deleted it.
569
+ pending.push(a.oldIdx)
570
+ }
571
+ }
572
+ if (pending.length > 0) {
573
+ const existing = out.get(nextV2Boundary) ?? []
574
+ existing.unshift(...pending.reverse())
575
+ out.set(nextV2Boundary, existing)
576
+ }
577
+ return out
578
+ }
579
+
580
+ function collectMeInsRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
581
+ // For each unpaired V3 row (newIdx set, oldIdx null), determine its
582
+ // V2 boundary: the position of the next preserved V2 row, or
583
+ // v2RowCount if at the tail. Mirror of CP-del logic.
584
+ const out = new Map<number, number[]>()
585
+ let nextV2Boundary = v2RowCount
586
+ const pending: number[] = []
587
+ for (let i = align.length - 1; i >= 0; i--) {
588
+ const a = align[i]
589
+ if (a.oldIdx !== null) {
590
+ if (pending.length > 0) {
591
+ const existing = out.get(nextV2Boundary) ?? []
592
+ existing.unshift(...pending.toReversed())
593
+ out.set(nextV2Boundary, existing)
594
+ pending.length = 0
595
+ }
596
+ nextV2Boundary = a.oldIdx
597
+ } else if (a.newIdx !== null) {
598
+ pending.push(a.newIdx)
599
+ }
600
+ }
601
+ if (pending.length > 0) {
602
+ const existing = out.get(nextV2Boundary) ?? []
603
+ existing.unshift(...pending.reverse())
604
+ out.set(nextV2Boundary, existing)
605
+ }
606
+ return out
607
+ }
608
+
609
+ function tableHeaderSlice(html: string, table: TableRange): string {
610
+ // Slice from <table> to start of first <tr>. If table is empty, take
611
+ // everything up to </table>.
612
+ const firstRow = table.rows[0]
613
+ if (!firstRow) return html.slice(table.tableStart, table.tableEnd - '</table>'.length)
614
+ return html.slice(table.tableStart, firstRow.rowStart)
615
+ }
616
+
617
+ function tableFooterSlice(html: string, table: TableRange): string {
618
+ // Slice from end of last <tr> to </table>.
619
+ const lastRow = table.rows[table.rows.length - 1]
620
+ if (!lastRow) return '</table>'
621
+ return html.slice(lastRow.rowEnd, table.tableEnd)
622
+ }
623
+
624
+ /**
625
+ * Emit a row that's fully attributed to one author, in an ins or del
626
+ * role. `rejectsAuthor` is set when the row is a Me-deletion of a
627
+ * CP-inserted row. Wraps `<tr>` in `class='diffins cp'` etc. and each
628
+ * `<td>` content in the corresponding `<ins>`/`<del>` wrapper with the
629
+ * author classes/attrs.
630
+ */
631
+ function emitFullRowAttributed(
632
+ html: string,
633
+ row: RowRange,
634
+ kind: 'ins' | 'del',
635
+ author: Author,
636
+ rejectsAuthor?: Author
637
+ ): string {
638
+ const trOpening = parseOpeningTagAt(html, row.rowStart)
639
+ if (!trOpening) return html.slice(html.length, html.length)
640
+ const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author, rejectsAuthor)
641
+
642
+ const out: string[] = [trWithAttrs]
643
+ let cursor = trOpening.end
644
+ for (const cell of row.cells) {
645
+ out.push(html.slice(cursor, cell.cellStart))
646
+ out.push(emitFullCellAttributed(html, cell, kind, author, rejectsAuthor))
647
+ cursor = cell.cellEnd
648
+ }
649
+ out.push(html.slice(cursor, row.rowEnd))
650
+ return out.join('')
651
+ }
652
+
653
+ function emitFullCellAttributed(
654
+ html: string,
655
+ cell: CellRange,
656
+ kind: 'ins' | 'del',
657
+ author: Author,
658
+ rejectsAuthor?: Author
659
+ ): string {
660
+ const tdOpening = parseOpeningTagAt(html, cell.cellStart)
661
+ if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
662
+ const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author, rejectsAuthor)
663
+ // Wrap the content in an ins/del with the author classes — same
664
+ // shape as the word-level emission. Empty cells get the class on the
665
+ // <td> but no inner wrapper.
666
+ const innerContent = html.slice(cell.contentStart, cell.contentEnd)
667
+ const innerWrapped =
668
+ innerContent.trim().length === 0
669
+ ? innerContent
670
+ : Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author, rejectsAuthor))
671
+ const closing = html.slice(cell.contentEnd, cell.cellEnd)
672
+ return tdWithAttrs + innerWrapped + closing
673
+ }
674
+
675
+ /**
676
+ * Inject author classes + data-attrs into an existing opening tag (e.g.
677
+ * an `<tr>` or `<td>` already in the source HTML). Uses the same
678
+ * attribution shape as `authorAttribution` + `Utils.wrapText` so the
679
+ * inject-into-existing and wrap-around-text paths agree.
680
+ */
681
+ function injectAuthorAttribution(
682
+ openingTag: string,
683
+ kind: 'ins' | 'del',
684
+ author: Author,
685
+ rejectsAuthor?: Author
686
+ ): string {
687
+ const meta = authorAttribution(author, rejectsAuthor)
688
+ const tagWithClass = injectClass(openingTag, `diff${kind} ${meta.extraClasses}`)
689
+ return injectDataAttrs(tagWithClass, meta.dataAttrs ?? {})
690
+ }
691
+
692
+ function injectDataAttrs(openingTag: string, dataAttrs: Readonly<Record<string, string>>): string {
693
+ const keys = Object.keys(dataAttrs)
694
+ if (keys.length === 0) return openingTag
695
+ const attrs = keys.map(k => ` data-${k}='${dataAttrs[k]}'`).join('')
696
+ // Insert the data-* attributes just before the closing '>' of the
697
+ // opening tag. `<tr>` and `<td>` are never self-closing in real HTML,
698
+ // but handle `/>` defensively for symmetry with other HTML emitters.
699
+ if (openingTag.endsWith('/>')) return `${openingTag.slice(0, -2)}${attrs}/>`
700
+ return `${openingTag.slice(0, -1)}${attrs}>`
701
+ }