@createiq/htmldiff 1.1.0 → 1.2.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,625 @@
1
+ import { lcsAlign, textSimilarity } from './Alignment'
2
+ import { injectClass, parseOpeningTagAt } from './HtmlScanner'
3
+ import {
4
+ type CellRange,
5
+ exceedsSizeLimit,
6
+ findTopLevelTables,
7
+ makePlaceholderPrefix,
8
+ PLACEHOLDER_SUFFIX,
9
+ type RowRange,
10
+ rowKey,
11
+ sameDimensions,
12
+ spliceString,
13
+ type TableRange,
14
+ } from './TableDiff'
15
+ import { type Author, authorAttribution } from './ThreeWayDiff'
16
+ import Utils from './Utils'
17
+
18
+ /**
19
+ * Three-way table preprocessing for the genesis-spine merge.
20
+ *
21
+ * Inputs: `genesis` (common ancestor), `cpLatest` (counterparty's
22
+ * accumulated position), `meCurrent` (Me's accumulated position). All
23
+ * three share a single placeholder nonce so genesis tokenises
24
+ * identically across both pair-wise word-level analyses.
25
+ *
26
+ * Three paths:
27
+ * 1. **Positional** — all three have the same table count AND each
28
+ * positional triple's tableKey is similar enough that 1:1 pairing
29
+ * by position is sound. Recurses cellDiff per cell, structural
30
+ * layout from genesis.
31
+ * 2. **Row-structural** — paired triples whose row/cell counts differ.
32
+ * Per-table row-level LCS against genesis; recurse on preserved
33
+ * rows, emit author-attributed full rows for the rest.
34
+ * 3. **Multi-table by content** — table counts diverge across inputs.
35
+ * Pair tables to genesis via content-LCS, then assign placeholders
36
+ * such that each placeholder appears in exactly the inputs that
37
+ * contain the underlying table. The word-level merger walks the
38
+ * genesis spine and attributes unpaired tables naturally
39
+ * (cp-only/me-only/both-agree).
40
+ */
41
+
42
+ export interface ThreeWayPreprocessResult {
43
+ modifiedGenesis: string
44
+ modifiedCp: string
45
+ modifiedMe: string
46
+ placeholderToDiff: Map<string, string>
47
+ }
48
+
49
+ export type ThreeWayDiffCellFn = (genesisCell: string, cpCell: string, meCell: string) => string
50
+
51
+ export function preprocessTablesThreeWay(
52
+ genesis: string,
53
+ cpLatest: string,
54
+ meCurrent: string,
55
+ cellDiff: ThreeWayDiffCellFn
56
+ ): ThreeWayPreprocessResult | null {
57
+ const gTables = findTopLevelTables(genesis)
58
+ const cTables = findTopLevelTables(cpLatest)
59
+ const mTables = findTopLevelTables(meCurrent)
60
+
61
+ if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null
62
+
63
+ for (const t of gTables) if (exceedsSizeLimit(t)) return null
64
+ for (const t of cTables) if (exceedsSizeLimit(t)) return null
65
+ for (const t of mTables) if (exceedsSizeLimit(t)) return null
66
+
67
+ const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent)
68
+
69
+ if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) {
70
+ return preprocessAlignedByPosition(
71
+ genesis,
72
+ cpLatest,
73
+ meCurrent,
74
+ gTables,
75
+ cTables,
76
+ mTables,
77
+ cellDiff,
78
+ placeholderPrefix
79
+ )
80
+ }
81
+
82
+ return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix)
83
+ }
84
+
85
+ function preprocessAlignedByPosition(
86
+ genesis: string,
87
+ cpLatest: string,
88
+ meCurrent: string,
89
+ gTables: TableRange[],
90
+ cTables: TableRange[],
91
+ mTables: TableRange[],
92
+ cellDiff: ThreeWayDiffCellFn,
93
+ placeholderPrefix: string
94
+ ): ThreeWayPreprocessResult {
95
+ const pairs: Array<{ g: TableRange; c: TableRange; m: TableRange; diffed: string }> = []
96
+ for (let i = 0; i < gTables.length; i++) {
97
+ pairs.push({
98
+ g: gTables[i],
99
+ c: cTables[i],
100
+ m: mTables[i],
101
+ diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff),
102
+ })
103
+ }
104
+ let modifiedGenesis = genesis
105
+ let modifiedCp = cpLatest
106
+ let modifiedMe = meCurrent
107
+ const placeholderToDiff = new Map<string, string>()
108
+ for (let i = pairs.length - 1; i >= 0; i--) {
109
+ const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`
110
+ placeholderToDiff.set(placeholder, pairs[i].diffed)
111
+ modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder)
112
+ modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder)
113
+ modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder)
114
+ }
115
+ return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
116
+ }
117
+
118
+ /**
119
+ * Multi-table handler. Tables are paired against `genesis` (the spine)
120
+ * via content-LCS on each of cp and me. Placeholders are assigned so
121
+ * each appears only in the inputs that actually contain the underlying
122
+ * table. The word-level merger then attributes them naturally:
123
+ *
124
+ * - paired in genesis+cp+me → equal in both diffs → emit recursive 3-way diff
125
+ * - in cp+me, not in genesis → both-agree insertion → emit plain
126
+ * - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
127
+ * - in me only → me insertion → ins-me wrapper
128
+ * - in genesis+cp, not me → me deletion → del-me wrapper
129
+ * - in genesis+me, not cp → cp deletion → del-cp wrapper
130
+ * - in genesis only → both deleted, settled → silent (placeholder content empty)
131
+ */
132
+ function preprocessByContent(
133
+ genesis: string,
134
+ cpLatest: string,
135
+ meCurrent: string,
136
+ gTables: TableRange[],
137
+ cTables: TableRange[],
138
+ mTables: TableRange[],
139
+ cellDiff: ThreeWayDiffCellFn,
140
+ placeholderPrefix: string
141
+ ): ThreeWayPreprocessResult {
142
+ const gKeys = gTables.map(t => tableKey(genesis, t))
143
+ const cKeys = cTables.map(t => tableKey(cpLatest, t))
144
+ const mKeys = mTables.map(t => tableKey(meCurrent, t))
145
+
146
+ const alignCp = lcsAlign(gKeys, cKeys)
147
+ const alignMe = lcsAlign(gKeys, mKeys)
148
+
149
+ // Maps: genesisIdx → matching cpIdx (-1 if none); cpIdx → matching genesisIdx; etc.
150
+ const gToCp = new Array<number>(gTables.length).fill(-1)
151
+ const cpToG = new Array<number>(cTables.length).fill(-1)
152
+ for (const a of alignCp) {
153
+ if (a.oldIdx !== null && a.newIdx !== null) {
154
+ gToCp[a.oldIdx] = a.newIdx
155
+ cpToG[a.newIdx] = a.oldIdx
156
+ }
157
+ }
158
+ const gToMe = new Array<number>(gTables.length).fill(-1)
159
+ const meToG = new Array<number>(mTables.length).fill(-1)
160
+ for (const a of alignMe) {
161
+ if (a.oldIdx !== null && a.newIdx !== null) {
162
+ gToMe[a.oldIdx] = a.newIdx
163
+ meToG[a.newIdx] = a.oldIdx
164
+ }
165
+ }
166
+
167
+ let nextId = 0
168
+ const placeholderToDiff = new Map<string, string>()
169
+ const placeholders = {
170
+ g: new Array<string | null>(gTables.length).fill(null),
171
+ c: new Array<string | null>(cTables.length).fill(null),
172
+ m: new Array<string | null>(mTables.length).fill(null),
173
+ }
174
+ const allocate = (): string => `${placeholderPrefix}${nextId++}${PLACEHOLDER_SUFFIX}`
175
+
176
+ // For unpaired-in-one-side placeholders, bake author attribution
177
+ // into the placeholder content — the word-level merger emits tag
178
+ // tokens (HTML comments) verbatim, so it can't wrap them itself.
179
+ const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string): string =>
180
+ Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author))
181
+
182
+ // 1. Triples paired in all three (genesis + cp + me) → recursive 3-way diff.
183
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
184
+ const cIdx = gToCp[gIdx]
185
+ const mIdx = gToMe[gIdx]
186
+ if (cIdx === -1 || mIdx === -1) continue
187
+ const placeholder = allocate()
188
+ placeholderToDiff.set(
189
+ placeholder,
190
+ diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff)
191
+ )
192
+ placeholders.g[gIdx] = placeholder
193
+ placeholders.c[cIdx] = placeholder
194
+ placeholders.m[mIdx] = placeholder
195
+ }
196
+
197
+ // 2. Genesis + CP only (not in Me) → me deletion.
198
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
199
+ if (placeholders.g[gIdx] !== null) continue
200
+ const cIdx = gToCp[gIdx]
201
+ if (cIdx === -1) continue
202
+ const placeholder = allocate()
203
+ placeholderToDiff.set(
204
+ placeholder,
205
+ wrapWhole('del', 'me', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
206
+ )
207
+ placeholders.g[gIdx] = placeholder
208
+ placeholders.c[cIdx] = placeholder
209
+ }
210
+
211
+ // 3. Genesis + Me only (not in CP) → cp deletion.
212
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
213
+ if (placeholders.g[gIdx] !== null) continue
214
+ const mIdx = gToMe[gIdx]
215
+ if (mIdx === -1) continue
216
+ const placeholder = allocate()
217
+ placeholderToDiff.set(
218
+ placeholder,
219
+ wrapWhole('del', 'cp', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
220
+ )
221
+ placeholders.g[gIdx] = placeholder
222
+ placeholders.m[mIdx] = placeholder
223
+ }
224
+
225
+ // 4. Genesis only (not in CP, not in Me) → both deleted, settled, silent.
226
+ // Placeholder ONLY in genesis; cp and me lack it. The word-level merger
227
+ // sees it as "deleted by both" via the genesis-spine fate maps and
228
+ // silences it via the settled-deletion rule (empty placeholder content).
229
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
230
+ if (placeholders.g[gIdx] !== null) continue
231
+ const placeholder = allocate()
232
+ placeholderToDiff.set(placeholder, '')
233
+ placeholders.g[gIdx] = placeholder
234
+ }
235
+
236
+ // 5. CP + Me both inserted (no genesis) — agreement check. If their
237
+ // table content is textually identical, emit plain (settled). Otherwise
238
+ // each side gets its own placeholder (cp-only / me-only treatment).
239
+ for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
240
+ if (placeholders.c[cIdx] !== null) continue
241
+ // CP table not paired to genesis. Is there an unpaired Me table with
242
+ // matching content?
243
+ const cText = cKeys[cIdx]
244
+ let mIdx = -1
245
+ for (let candidate = 0; candidate < mTables.length; candidate++) {
246
+ if (placeholders.m[candidate] !== null) continue
247
+ if (meToG[candidate] !== -1) continue
248
+ if (mKeys[candidate] === cText) {
249
+ mIdx = candidate
250
+ break
251
+ }
252
+ }
253
+ if (mIdx === -1) continue
254
+ // Both inserted the same table content → settled insertion.
255
+ const placeholder = allocate()
256
+ placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
257
+ placeholders.c[cIdx] = placeholder
258
+ placeholders.m[mIdx] = placeholder
259
+ }
260
+
261
+ // 6. Remaining CP-only tables (inserted by CP, Me didn't take).
262
+ for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
263
+ if (placeholders.c[cIdx] !== null) continue
264
+ const placeholder = allocate()
265
+ placeholderToDiff.set(
266
+ placeholder,
267
+ wrapWhole('ins', 'cp', cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
268
+ )
269
+ placeholders.c[cIdx] = placeholder
270
+ }
271
+
272
+ // 7. Remaining Me-only tables (Me inserted, CP didn't).
273
+ for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
274
+ if (placeholders.m[mIdx] !== null) continue
275
+ const placeholder = allocate()
276
+ placeholderToDiff.set(
277
+ placeholder,
278
+ wrapWhole('ins', 'me', meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd))
279
+ )
280
+ placeholders.m[mIdx] = placeholder
281
+ }
282
+
283
+ // Splice end → start per input.
284
+ let modifiedGenesis = genesis
285
+ for (let i = gTables.length - 1; i >= 0; i--) {
286
+ const p = placeholders.g[i]
287
+ if (p === null) continue
288
+ modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p)
289
+ }
290
+ let modifiedCp = cpLatest
291
+ for (let i = cTables.length - 1; i >= 0; i--) {
292
+ const p = placeholders.c[i]
293
+ if (p === null) continue
294
+ modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p)
295
+ }
296
+ let modifiedMe = meCurrent
297
+ for (let i = mTables.length - 1; i >= 0; i--) {
298
+ const p = placeholders.m[i]
299
+ if (p === null) continue
300
+ modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p)
301
+ }
302
+
303
+ return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
304
+ }
305
+
306
+ const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.5
307
+
308
+ function positionallyAligned(
309
+ genesis: string,
310
+ cpLatest: string,
311
+ meCurrent: string,
312
+ gTables: TableRange[],
313
+ cTables: TableRange[],
314
+ mTables: TableRange[]
315
+ ): boolean {
316
+ if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false
317
+ for (let i = 0; i < gTables.length; i++) {
318
+ const kG = tableKey(genesis, gTables[i])
319
+ const kC = tableKey(cpLatest, cTables[i])
320
+ const kM = tableKey(meCurrent, mTables[i])
321
+ if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
322
+ if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
323
+ }
324
+ return true
325
+ }
326
+
327
+ function tableKey(html: string, table: TableRange): string {
328
+ return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
329
+ }
330
+
331
+ // ────────────────────────────────────────────────────────────────────────────
332
+ // Per-table diff: positional cells or row-level structural change.
333
+
334
+ function diffTableThreeWay(
335
+ genesis: string,
336
+ cpLatest: string,
337
+ meCurrent: string,
338
+ tG: TableRange,
339
+ tC: TableRange,
340
+ tM: TableRange,
341
+ cellDiff: ThreeWayDiffCellFn
342
+ ): string {
343
+ if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) {
344
+ return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
345
+ }
346
+ return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
347
+ }
348
+
349
+ function diffTablePositional(
350
+ genesis: string,
351
+ cpLatest: string,
352
+ meCurrent: string,
353
+ tG: TableRange,
354
+ tC: TableRange,
355
+ tM: TableRange,
356
+ cellDiff: ThreeWayDiffCellFn
357
+ ): string {
358
+ // Walk genesis's table scaffolding verbatim — it's the common
359
+ // ancestor. Cells are merged 3-way via cellDiff. Choosing genesis as
360
+ // the spine keeps the table structure stable across both pair-wise
361
+ // diffs that the word-level merger will see.
362
+ const out: string[] = []
363
+ let cursor = tG.tableStart
364
+ for (let r = 0; r < tG.rows.length; r++) {
365
+ const rG = tG.rows[r]
366
+ const rC = tC.rows[r]
367
+ const rM = tM.rows[r]
368
+ for (let c = 0; c < rG.cells.length; c++) {
369
+ const cG = rG.cells[c]
370
+ const cC = rC.cells[c]
371
+ const cM = rM.cells[c]
372
+ out.push(genesis.slice(cursor, cG.contentStart))
373
+ out.push(
374
+ cellDiff(
375
+ genesis.slice(cG.contentStart, cG.contentEnd),
376
+ cpLatest.slice(cC.contentStart, cC.contentEnd),
377
+ meCurrent.slice(cM.contentStart, cM.contentEnd)
378
+ )
379
+ )
380
+ cursor = cG.contentEnd
381
+ }
382
+ }
383
+ out.push(genesis.slice(cursor, tG.tableEnd))
384
+ return out.join('')
385
+ }
386
+
387
+ /**
388
+ * Row-level genesis-spine merge for tables with diverging row/cell
389
+ * counts.
390
+ *
391
+ * 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
392
+ * (alignMe), each via row-LCS over rowKeys.
393
+ * 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
394
+ * Both kept → recurse cell diff (with structural-change cell handling
395
+ * falling back to me-attribution Replace per the documented
396
+ * limitation). One kept, other deleted → emit author-attributed full
397
+ * row. Both deleted → silent.
398
+ * 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
399
+ * Check for content agreement at the same boundary; agreed
400
+ * insertions emit plain.
401
+ */
402
+ function diffTableStructural(
403
+ genesis: string,
404
+ cpLatest: string,
405
+ meCurrent: string,
406
+ tG: TableRange,
407
+ tC: TableRange,
408
+ tM: TableRange,
409
+ cellDiff: ThreeWayDiffCellFn
410
+ ): string {
411
+ const gKeys = tG.rows.map(r => rowKey(genesis, r))
412
+ const cKeys = tC.rows.map(r => rowKey(cpLatest, r))
413
+ const mKeys = tM.rows.map(r => rowKey(meCurrent, r))
414
+
415
+ const alignCp = lcsAlign(gKeys, cKeys)
416
+ const alignMe = lcsAlign(gKeys, mKeys)
417
+
418
+ // genesisIdx → matching cpIdx (-1 if cp deleted this row)
419
+ const gToCp = new Array<number>(tG.rows.length).fill(-1)
420
+ for (const a of alignCp) {
421
+ if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx
422
+ }
423
+ const gToMe = new Array<number>(tG.rows.length).fill(-1)
424
+ for (const a of alignMe) {
425
+ if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx
426
+ }
427
+
428
+ // Off-spine row collections: cp rows with no genesis counterpart, me rows with no genesis counterpart.
429
+ // Keyed by "the genesis row index they should appear before" so emission interleaves correctly.
430
+ const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length)
431
+ const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length)
432
+
433
+ const out: string[] = []
434
+ out.push(tableHeaderSlice(genesis, tG))
435
+
436
+ const emitBoundaryInsertions = (b: number) => {
437
+ const cIdxs = cpInsAt.get(b) ?? []
438
+ const mIdxs = meInsAt.get(b) ?? []
439
+ if (cIdxs.length === 0 && mIdxs.length === 0) return
440
+ // Detect settled insertions (cp and me both inserted the same row content).
441
+ // Pair by content key, in order of appearance.
442
+ const remainingMe = new Set(mIdxs)
443
+ for (const cIdx of cIdxs) {
444
+ const cText = cKeys[cIdx]
445
+ let agreedMeIdx: number | undefined
446
+ for (const mIdx of remainingMe) {
447
+ if (mKeys[mIdx] === cText) {
448
+ agreedMeIdx = mIdx
449
+ break
450
+ }
451
+ }
452
+ if (agreedMeIdx !== undefined) {
453
+ remainingMe.delete(agreedMeIdx)
454
+ // Settled insertion — emit cp's row verbatim, unmarked.
455
+ out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd))
456
+ } else {
457
+ out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'ins', 'cp'))
458
+ }
459
+ }
460
+ for (const mIdx of remainingMe) {
461
+ out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'ins', 'me'))
462
+ }
463
+ }
464
+
465
+ for (let g = 0; g < tG.rows.length; g++) {
466
+ emitBoundaryInsertions(g)
467
+
468
+ const cIdx = gToCp[g]
469
+ const mIdx = gToMe[g]
470
+ const cpDel = cIdx === -1
471
+ const meDel = mIdx === -1
472
+
473
+ if (!cpDel && !meDel) {
474
+ // Both kept — recurse cell-level diff against this row triple.
475
+ out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff))
476
+ } else if (cpDel && meDel) {
477
+ // Both deleted — silent (settled).
478
+ } else if (cpDel) {
479
+ // CP dropped, Me kept → emit Me's row attributed as cp-deletion. The
480
+ // content shown is what Me has; the styling tells the reader CP
481
+ // wanted it gone.
482
+ out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'del', 'cp'))
483
+ } else {
484
+ // Me dropped, CP kept → emit CP's row attributed as me-deletion.
485
+ out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'del', 'me'))
486
+ }
487
+ }
488
+ emitBoundaryInsertions(tG.rows.length)
489
+ out.push(tableFooterSlice(genesis, tG))
490
+ return out.join('')
491
+ }
492
+
493
+ function emitPreservedRow(
494
+ genesis: string,
495
+ cpLatest: string,
496
+ meCurrent: string,
497
+ rG: RowRange,
498
+ rC: RowRange,
499
+ rM: RowRange,
500
+ cellDiff: ThreeWayDiffCellFn
501
+ ): string {
502
+ if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
503
+ // Same cell counts — positional cell diff.
504
+ const out: string[] = []
505
+ let cursor = rG.rowStart
506
+ for (let c = 0; c < rG.cells.length; c++) {
507
+ const cG = rG.cells[c]
508
+ const cC = rC.cells[c]
509
+ const cM = rM.cells[c]
510
+ out.push(genesis.slice(cursor, cG.contentStart))
511
+ out.push(
512
+ cellDiff(
513
+ genesis.slice(cG.contentStart, cG.contentEnd),
514
+ cpLatest.slice(cC.contentStart, cC.contentEnd),
515
+ meCurrent.slice(cM.contentStart, cM.contentEnd)
516
+ )
517
+ )
518
+ cursor = cG.contentEnd
519
+ }
520
+ out.push(genesis.slice(cursor, rG.rowEnd))
521
+ return out.join('')
522
+ }
523
+ // Cell-count mismatch within a preserved row — cell-level structural
524
+ // change deferred. Fall back to me-attributed Replace (genesis row
525
+ // removed, me row inserted). Lossy for CP within that row.
526
+ return emitFullRowAttributed(genesis, rG, 'del', 'me') + emitFullRowAttributed(meCurrent, rM, 'ins', 'me')
527
+ }
528
+
529
+ /**
530
+ * Returns map "genesis-row-boundary → list of new-side row indices
531
+ * inserted at that boundary". Mirrors the word-level boundary collection
532
+ * but at the row scale.
533
+ */
534
+ function collectInsertedRowsAtBoundary(
535
+ align: ReturnType<typeof lcsAlign>,
536
+ genesisRowCount: number
537
+ ): Map<number, number[]> {
538
+ const out = new Map<number, number[]>()
539
+ let nextGenesisBoundary = genesisRowCount
540
+ const pending: number[] = []
541
+ // Walk in reverse so nextGenesisBoundary tracks the next preserved row
542
+ // we'll encounter; flush pending unpaired new rows at the appropriate
543
+ // genesis boundary.
544
+ for (let i = align.length - 1; i >= 0; i--) {
545
+ const a = align[i]
546
+ if (a.oldIdx !== null) {
547
+ if (pending.length > 0) {
548
+ const existing = out.get(nextGenesisBoundary) ?? []
549
+ existing.unshift(...pending.toReversed())
550
+ out.set(nextGenesisBoundary, existing)
551
+ pending.length = 0
552
+ }
553
+ nextGenesisBoundary = a.oldIdx
554
+ } else if (a.newIdx !== null) {
555
+ pending.push(a.newIdx)
556
+ }
557
+ }
558
+ if (pending.length > 0) {
559
+ const existing = out.get(nextGenesisBoundary) ?? []
560
+ existing.unshift(...pending.toReversed())
561
+ out.set(nextGenesisBoundary, existing)
562
+ }
563
+ return out
564
+ }
565
+
566
+ function tableHeaderSlice(html: string, table: TableRange): string {
567
+ const firstRow = table.rows[0]
568
+ if (!firstRow) return html.slice(table.tableStart, table.tableEnd - '</table>'.length)
569
+ return html.slice(table.tableStart, firstRow.rowStart)
570
+ }
571
+
572
+ function tableFooterSlice(html: string, table: TableRange): string {
573
+ const lastRow = table.rows[table.rows.length - 1]
574
+ if (!lastRow) return '</table>'
575
+ return html.slice(lastRow.rowEnd, table.tableEnd)
576
+ }
577
+
578
+ /**
579
+ * Emit a row fully attributed to one author. Wraps `<tr>` and each
580
+ * `<td>` with the author's diffins/diffdel class and `data-author`
581
+ * attribute; wraps cell content with an inner `<ins>`/`<del>` matching
582
+ * the word-level emission shape.
583
+ */
584
+ function emitFullRowAttributed(html: string, row: RowRange, kind: 'ins' | 'del', author: Author): string {
585
+ const trOpening = parseOpeningTagAt(html, row.rowStart)
586
+ if (!trOpening) return html.slice(row.rowStart, row.rowEnd)
587
+ const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)
588
+
589
+ const out: string[] = [trWithAttrs]
590
+ let cursor = trOpening.end
591
+ for (const cell of row.cells) {
592
+ out.push(html.slice(cursor, cell.cellStart))
593
+ out.push(emitFullCellAttributed(html, cell, kind, author))
594
+ cursor = cell.cellEnd
595
+ }
596
+ out.push(html.slice(cursor, row.rowEnd))
597
+ return out.join('')
598
+ }
599
+
600
+ function emitFullCellAttributed(html: string, cell: CellRange, kind: 'ins' | 'del', author: Author): string {
601
+ const tdOpening = parseOpeningTagAt(html, cell.cellStart)
602
+ if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
603
+ const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author)
604
+ const innerContent = html.slice(cell.contentStart, cell.contentEnd)
605
+ const innerWrapped =
606
+ innerContent.trim().length === 0
607
+ ? innerContent
608
+ : Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author))
609
+ const closing = html.slice(cell.contentEnd, cell.cellEnd)
610
+ return tdWithAttrs + innerWrapped + closing
611
+ }
612
+
613
+ function injectAuthorAttribution(openingTag: string, kind: 'ins' | 'del', author: Author): string {
614
+ const meta = authorAttribution(author)
615
+ const tagWithClass = injectClass(openingTag, `diff${kind} ${meta.extraClasses}`)
616
+ return injectDataAttrs(tagWithClass, meta.dataAttrs ?? {})
617
+ }
618
+
619
+ function injectDataAttrs(openingTag: string, dataAttrs: Readonly<Record<string, string>>): string {
620
+ const keys = Object.keys(dataAttrs)
621
+ if (keys.length === 0) return openingTag
622
+ const attrs = keys.map(k => ` data-${k}='${dataAttrs[k]}'`).join('')
623
+ if (openingTag.endsWith('/>')) return `${openingTag.slice(0, -2)}${attrs}/>`
624
+ return `${openingTag.slice(0, -1)}${attrs}>`
625
+ }
package/src/Utils.ts CHANGED
@@ -32,8 +32,39 @@ export function stripTagAttributes(word: string): string {
32
32
  return word
33
33
  }
34
34
 
35
- export function wrapText(text: string, tagName: string, cssClass: string): string {
36
- return `<${tagName} class='${cssClass}'>${text}</${tagName}>`
35
+ /**
36
+ * Optional metadata attached to a wrapped tag. Used by `executeThreeWay`
37
+ * to colour diff segments with their author (CP vs Me) via extra classes
38
+ * and `data-*` attributes; the two-way path passes nothing and gets the
39
+ * unchanged historical output.
40
+ */
41
+ export interface WrapMetadata {
42
+ /** Space-separated classes appended after `cssClass`. */
43
+ extraClasses?: string
44
+ /** `data-*` attribute map, keyed by the attribute name *without* the `data-` prefix. */
45
+ dataAttrs?: Readonly<Record<string, string>>
46
+ }
47
+
48
+ export function wrapText(text: string, tagName: string, cssClass: string, metadata?: WrapMetadata): string {
49
+ if (!metadata) return `<${tagName} class='${cssClass}'>${text}</${tagName}>`
50
+ return `<${tagName}${composeTagAttributes(cssClass, metadata)}>${text}</${tagName}>`
51
+ }
52
+
53
+ /**
54
+ * Build the attribute portion of an opening tag from a base class plus
55
+ * optional metadata. Exposed so emission paths that build opening-tag
56
+ * fragments by hand (e.g. the formatting-tag special-case in
57
+ * `HtmlDiff.insertTag`) can stay consistent with `wrapText`.
58
+ */
59
+ export function composeTagAttributes(cssClass: string, metadata: WrapMetadata): string {
60
+ const classes = metadata.extraClasses ? `${cssClass} ${metadata.extraClasses}` : cssClass
61
+ let out = ` class='${classes}'`
62
+ if (metadata.dataAttrs) {
63
+ for (const key of Object.keys(metadata.dataAttrs)) {
64
+ out += ` data-${key}='${metadata.dataAttrs[key]}'`
65
+ }
66
+ }
67
+ return out
37
68
  }
38
69
 
39
70
  export function isStartOfTag(val: string): boolean {
@@ -85,6 +116,7 @@ export default {
85
116
  isTag,
86
117
  stripTagAttributes,
87
118
  wrapText,
119
+ composeTagAttributes,
88
120
  isStartOfTag,
89
121
  isEndOfTag,
90
122
  isStartOfEntity,