@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { lcsAlign, textSimilarity } from './Alignment'
1
+ import { type Alignment, lcsAlign, pairSimilarUnmatched, textSimilarity } from './Alignment'
2
2
  import { injectClass, parseOpeningTagAt } from './HtmlScanner'
3
3
  import {
4
4
  type CellRange,
@@ -8,6 +8,7 @@ import {
8
8
  PLACEHOLDER_SUFFIX,
9
9
  type RowRange,
10
10
  rowKey,
11
+ rowText,
11
12
  sameDimensions,
12
13
  spliceString,
13
14
  type TableRange,
@@ -16,675 +17,735 @@ import { type Author, authorAttribution } from './ThreeWayDiff'
16
17
  import Utils from './Utils'
17
18
 
18
19
  /**
19
- * Three-way table preprocessing. Same shape as the existing two-way
20
- * `preprocessTables` but takes V1/V2/V3 and a cell-level three-way diff
21
- * callback. All three inputs share a single placeholder nonce so V2's
22
- * tokenisation is identical when the word-level 3-way merger sees it
23
- * from both pair-wise analyses.
20
+ * Three-way table preprocessing for the genesis-spine merge.
24
21
  *
25
- * This commit handles only the same-dimensions positional case across
26
- * all three table triples. The structural-change case (rows/cells
27
- * differ between any pair) throws; the next commit replaces that with
28
- * a row-level V2-spine merge that mirrors the word-level approach.
29
- * Multi-table count divergence (CP added or Me removed a whole table)
30
- * is handled in commit 6 (D3).
22
+ * Inputs: `genesis` (common ancestor), `cpLatest` (counterparty's
23
+ * accumulated position), `meCurrent` (Me's accumulated position). All
24
+ * three share a single placeholder nonce so genesis tokenises
25
+ * identically across both pair-wise word-level analyses.
26
+ *
27
+ * Three paths:
28
+ * 1. **Positional** — all three have the same table count AND each
29
+ * positional triple's tableKey is similar enough that 1:1 pairing
30
+ * by position is sound. Recurses cellDiff per cell, structural
31
+ * layout from genesis.
32
+ * 2. **Row-structural** — paired triples whose row/cell counts differ.
33
+ * Per-table row-level LCS against genesis; recurse on preserved
34
+ * rows, emit author-attributed full rows for the rest.
35
+ * 3. **Multi-table by content** — table counts diverge across inputs.
36
+ * Pair tables to genesis via content-LCS, then assign placeholders
37
+ * such that each placeholder appears in exactly the inputs that
38
+ * contain the underlying table. The word-level merger walks the
39
+ * genesis spine and attributes unpaired tables naturally
40
+ * (cp-only/me-only/both-agree).
31
41
  */
32
42
 
33
43
  export interface ThreeWayPreprocessResult {
34
- modifiedV1: string
35
- modifiedV2: string
36
- modifiedV3: string
44
+ modifiedGenesis: string
45
+ modifiedCp: string
46
+ modifiedMe: string
37
47
  placeholderToDiff: Map<string, string>
38
48
  }
39
49
 
40
- export type ThreeWayDiffCellFn = (v1Cell: string, v2Cell: string, v3Cell: string) => string
50
+ export type ThreeWayDiffCellFn = (genesisCell: string, cpCell: string, meCell: string) => string
41
51
 
42
52
  export function preprocessTablesThreeWay(
43
- v1: string,
44
- v2: string,
45
- v3: string,
53
+ genesis: string,
54
+ cpLatest: string,
55
+ meCurrent: string,
46
56
  cellDiff: ThreeWayDiffCellFn
47
57
  ): ThreeWayPreprocessResult | null {
48
- const t1s = findTopLevelTables(v1)
49
- const t2s = findTopLevelTables(v2)
50
- const t3s = findTopLevelTables(v3)
51
-
52
- // No tables in any input caller can skip preprocessing entirely.
53
- if (t1s.length === 0 && t2s.length === 0 && t3s.length === 0) return null
54
-
55
- // Size cap: bail to word-level diff for pathologically large tables.
56
- for (const t of t1s) if (exceedsSizeLimit(t)) return null
57
- for (const t of t2s) if (exceedsSizeLimit(t)) return null
58
- for (const t of t3s) if (exceedsSizeLimit(t)) return null
59
-
60
- const placeholderPrefix = makePlaceholderPrefix(v1, v2, v3)
61
-
62
- // Fast path: counts match AND each positional triple looks similar
63
- // enough that 1:1 positional pairing is sound. The similarity gate
64
- // catches the swap case — V1=[A,B], V2=[B,A] has matching counts but
65
- // positionally pairing would mis-attribute. Without the gate, a swap
66
- // would silently land in the per-cell diff machinery comparing
67
- // unrelated tables.
68
- if (positionallyAligned(v1, v2, v3, t1s, t2s, t3s)) {
69
- return preprocessAlignedByPosition(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
58
+ const gTables = findTopLevelTables(genesis)
59
+ const cTables = findTopLevelTables(cpLatest)
60
+ const mTables = findTopLevelTables(meCurrent)
61
+
62
+ if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null
63
+
64
+ for (const t of gTables) if (exceedsSizeLimit(t)) return null
65
+ for (const t of cTables) if (exceedsSizeLimit(t)) return null
66
+ for (const t of mTables) if (exceedsSizeLimit(t)) return null
67
+
68
+ const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent)
69
+
70
+ if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) {
71
+ return preprocessAlignedByPosition(
72
+ genesis,
73
+ cpLatest,
74
+ meCurrent,
75
+ gTables,
76
+ cTables,
77
+ mTables,
78
+ cellDiff,
79
+ placeholderPrefix
80
+ )
70
81
  }
71
82
 
72
- // Multi-table mismatch (D3). CP added/removed/moved a table, Me added/
73
- // removed/moved a table, etc. Use content-LCS to pair tables across
74
- // each adjacent pair, then assign placeholders so the word-level 3-way
75
- // merger naturally attributes unpaired tables — the placeholder token
76
- // appears only in the inputs where the table exists, and the merger
77
- // sees that as an insertion/deletion.
78
- return preprocessMisalignedByContent(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
83
+ return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix)
79
84
  }
80
85
 
81
86
  function preprocessAlignedByPosition(
82
- v1: string,
83
- v2: string,
84
- v3: string,
85
- t1s: TableRange[],
86
- t2s: TableRange[],
87
- t3s: TableRange[],
87
+ genesis: string,
88
+ cpLatest: string,
89
+ meCurrent: string,
90
+ gTables: TableRange[],
91
+ cTables: TableRange[],
92
+ mTables: TableRange[],
88
93
  cellDiff: ThreeWayDiffCellFn,
89
94
  placeholderPrefix: string
90
95
  ): ThreeWayPreprocessResult {
91
- const pairs: Array<{
92
- t1: TableRange
93
- t2: TableRange
94
- t3: TableRange
95
- diffed: string
96
- }> = []
97
- for (let i = 0; i < t1s.length; i++) {
96
+ const pairs: Array<{ g: TableRange; c: TableRange; m: TableRange; diffed: string }> = []
97
+ for (let i = 0; i < gTables.length; i++) {
98
98
  pairs.push({
99
- t1: t1s[i],
100
- t2: t2s[i],
101
- t3: t3s[i],
102
- diffed: diffTableThreeWay(v1, v2, v3, t1s[i], t2s[i], t3s[i], cellDiff),
99
+ g: gTables[i],
100
+ c: cTables[i],
101
+ m: mTables[i],
102
+ diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff),
103
103
  })
104
104
  }
105
- let modifiedV1 = v1
106
- let modifiedV2 = v2
107
- let modifiedV3 = v3
105
+ let modifiedGenesis = genesis
106
+ let modifiedCp = cpLatest
107
+ let modifiedMe = meCurrent
108
108
  const placeholderToDiff = new Map<string, string>()
109
- // Splice end → start so earlier offsets stay valid.
110
109
  for (let i = pairs.length - 1; i >= 0; i--) {
111
110
  const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`
112
111
  placeholderToDiff.set(placeholder, pairs[i].diffed)
113
- modifiedV1 = spliceString(modifiedV1, pairs[i].t1.tableStart, pairs[i].t1.tableEnd, placeholder)
114
- modifiedV2 = spliceString(modifiedV2, pairs[i].t2.tableStart, pairs[i].t2.tableEnd, placeholder)
115
- modifiedV3 = spliceString(modifiedV3, pairs[i].t3.tableStart, pairs[i].t3.tableEnd, placeholder)
112
+ modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder)
113
+ modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder)
114
+ modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder)
116
115
  }
117
- return { modifiedV1, modifiedV2, modifiedV3, placeholderToDiff }
116
+ return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
118
117
  }
119
118
 
120
119
  /**
121
- * Multi-table mismatch handler. Tables are paired across V1↔V2 and
122
- * V2↔V3 via content-LCS, then substituted as placeholders such that
123
- * each placeholder appears in exactly the inputs where its underlying
124
- * table exists. The word-level merger sees:
125
- * - paired-everywhere placeholders → equal in both diffs → unwrapped
126
- * - V2-only (CP-inserted + Me-rejected) → inserted by CP, deleted by
127
- * Me → reject wrapper around the table
128
- * - V2+V3 (CP-inserted, Me-kept) → ins-cp wrapper
129
- * - V1+V2 (Me-deleted) → del-me wrapper
130
- * - V1-only (CP-deleted before V2) → del-cp wrapper
131
- * - V3-only (Me-inserted) → ins-me wrapper
120
+ * Multi-table handler. Tables are paired against `genesis` (the spine)
121
+ * via content-LCS on each of cp and me. Placeholders are assigned so
122
+ * each appears only in the inputs that actually contain the underlying
123
+ * table. The word-level merger then attributes them naturally:
132
124
  *
133
- * Each placeholder's content is the diffed table for paired triples,
134
- * or the raw table HTML for unpaired tables (the word-level wrapper
135
- * provides the attribution).
125
+ * - paired in genesis+cp+me → equal in both diffs emit recursive 3-way diff
126
+ * - in cp+me, not in genesis → both-agree insertion emit plain
127
+ * - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
128
+ * - in me only → me insertion → ins-me wrapper
129
+ * - in genesis+cp, not me → me deletion → del-me wrapper
130
+ * - in genesis+me, not cp → cp deletion → del-cp wrapper
131
+ * - in genesis only → both deleted, settled → silent (placeholder content empty)
136
132
  */
137
- function preprocessMisalignedByContent(
138
- v1: string,
139
- v2: string,
140
- v3: string,
141
- t1s: TableRange[],
142
- t2s: TableRange[],
143
- t3s: TableRange[],
133
+ function preprocessByContent(
134
+ genesis: string,
135
+ cpLatest: string,
136
+ meCurrent: string,
137
+ gTables: TableRange[],
138
+ cTables: TableRange[],
139
+ mTables: TableRange[],
144
140
  cellDiff: ThreeWayDiffCellFn,
145
141
  placeholderPrefix: string
146
142
  ): ThreeWayPreprocessResult {
147
- const k1 = t1s.map(t => tableKey(v1, t))
148
- const k2 = t2s.map(t => tableKey(v2, t))
149
- const k3 = t3s.map(t => tableKey(v3, t))
150
-
151
- const align12 = lcsAlign(k1, k2)
152
- const align23 = lcsAlign(k2, k3)
153
-
154
- // Maps from table-index counterpart in the other input (or -1).
155
- const v1ToV2 = new Array<number>(t1s.length).fill(-1)
156
- const v2ToV1 = new Array<number>(t2s.length).fill(-1)
157
- for (const a of align12) {
143
+ const gKeys = gTables.map(t => tableKey(genesis, t))
144
+ const cKeys = cTables.map(t => tableKey(cpLatest, t))
145
+ const mKeys = mTables.map(t => tableKey(meCurrent, t))
146
+
147
+ // Exact tableKey LCS, then fuzzy-pair unmatched runs by content
148
+ // similarity. Without this, a table whose cells were edited (but
149
+ // not its overall shape) fails the exact tableKey match and the
150
+ // table-level aligner pulls it apart into a whole-table del + a
151
+ // whole-table ins. Same fuzzy pass `TableDiff` uses for the 2-way
152
+ // path `pairSimilarTablesThreeWay` is defined below.
153
+ const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables)
154
+ const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables)
155
+
156
+ // Maps: genesisIdx → matching cpIdx (-1 if none); cpIdx → matching genesisIdx; etc.
157
+ const gToCp = new Array<number>(gTables.length).fill(-1)
158
+ const cpToG = new Array<number>(cTables.length).fill(-1)
159
+ for (const a of alignCp) {
158
160
  if (a.oldIdx !== null && a.newIdx !== null) {
159
- v1ToV2[a.oldIdx] = a.newIdx
160
- v2ToV1[a.newIdx] = a.oldIdx
161
+ gToCp[a.oldIdx] = a.newIdx
162
+ cpToG[a.newIdx] = a.oldIdx
161
163
  }
162
164
  }
163
- const v2ToV3 = new Array<number>(t2s.length).fill(-1)
164
- const v3ToV2 = new Array<number>(t3s.length).fill(-1)
165
- for (const a of align23) {
165
+ const gToMe = new Array<number>(gTables.length).fill(-1)
166
+ const meToG = new Array<number>(mTables.length).fill(-1)
167
+ for (const a of alignMe) {
166
168
  if (a.oldIdx !== null && a.newIdx !== null) {
167
- v2ToV3[a.oldIdx] = a.newIdx
168
- v3ToV2[a.newIdx] = a.oldIdx
169
+ gToMe[a.oldIdx] = a.newIdx
170
+ meToG[a.newIdx] = a.oldIdx
169
171
  }
170
172
  }
171
173
 
172
- // Allocate placeholders. Each logical-table-position (paired triple,
173
- // paired pair, or singleton) gets one shared placeholder used in
174
- // every input that contains it.
175
174
  let nextId = 0
176
175
  const placeholderToDiff = new Map<string, string>()
177
176
  const placeholders = {
178
- v1: new Array<string | null>(t1s.length).fill(null),
179
- v2: new Array<string | null>(t2s.length).fill(null),
180
- v3: new Array<string | null>(t3s.length).fill(null),
177
+ g: new Array<string | null>(gTables.length).fill(null),
178
+ c: new Array<string | null>(cTables.length).fill(null),
179
+ m: new Array<string | null>(mTables.length).fill(null),
181
180
  }
182
-
183
181
  const allocate = (): string => `${placeholderPrefix}${nextId++}${PLACEHOLDER_SUFFIX}`
184
182
 
185
- // 1. Triples paired through V2 (preserved in both V1↔V2 AND V2↔V3) — full 3-way diff.
186
- for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
187
- const v1Idx = v2ToV1[v2Idx]
188
- const v3Idx = v2ToV3[v2Idx]
189
- if (v1Idx === -1 || v3Idx === -1) continue
190
- const placeholder = allocate()
191
- placeholderToDiff.set(placeholder, diffTableThreeWay(v1, v2, v3, t1s[v1Idx], t2s[v2Idx], t3s[v3Idx], cellDiff))
192
- placeholders.v1[v1Idx] = placeholder
193
- placeholders.v2[v2Idx] = placeholder
194
- placeholders.v3[v3Idx] = placeholder
195
- }
196
-
197
- // For unpaired placeholders the word-level merger can't wrap a tag
198
- // token (insertTag emits tags verbatim), so we bake the author
199
- // attribution directly into the placeholder content. The merger then
200
- // only has to position the placeholder via word-level alignment;
201
- // the attribution wrapping is already in the substituted HTML.
202
- const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string, rejects?: Author): string =>
203
- Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author, rejects))
204
-
205
- // 2. V2 tables paired only with V3 (CP-inserted into V2, Me-kept).
206
- for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
207
- if (placeholders.v2[v2Idx] !== null) continue
208
- const v3Idx = v2ToV3[v2Idx]
209
- if (v3Idx === -1) continue
183
+ // For unpaired-in-one-side placeholders, bake author attribution
184
+ // into the placeholder content the word-level merger emits tag
185
+ // tokens (HTML comments) verbatim, so it can't wrap them itself.
186
+ const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string): string =>
187
+ Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author))
188
+
189
+ // 1. Triples paired in all three (genesis + cp + me) recursive 3-way diff.
190
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
191
+ const cIdx = gToCp[gIdx]
192
+ const mIdx = gToMe[gIdx]
193
+ if (cIdx === -1 || mIdx === -1) continue
210
194
  const placeholder = allocate()
211
- placeholderToDiff.set(placeholder, wrapWhole('ins', 'cp', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd)))
212
- placeholders.v2[v2Idx] = placeholder
213
- placeholders.v3[v3Idx] = placeholder
195
+ placeholderToDiff.set(
196
+ placeholder,
197
+ diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff)
198
+ )
199
+ placeholders.g[gIdx] = placeholder
200
+ placeholders.c[cIdx] = placeholder
201
+ placeholders.m[mIdx] = placeholder
214
202
  }
215
203
 
216
- // 3. V2 tables paired only with V1 (preserved from V1, Me-deleted in V3).
217
- for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
218
- if (placeholders.v2[v2Idx] !== null) continue
219
- const v1Idx = v2ToV1[v2Idx]
220
- if (v1Idx === -1) continue
204
+ // 2. Genesis + CP only (not in Me) me deletion.
205
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
206
+ if (placeholders.g[gIdx] !== null) continue
207
+ const cIdx = gToCp[gIdx]
208
+ if (cIdx === -1) continue
221
209
  const placeholder = allocate()
222
- placeholderToDiff.set(placeholder, wrapWhole('del', 'me', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd)))
223
- placeholders.v1[v1Idx] = placeholder
224
- placeholders.v2[v2Idx] = placeholder
210
+ placeholderToDiff.set(
211
+ placeholder,
212
+ wrapWhole('del', 'me', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
213
+ )
214
+ placeholders.g[gIdx] = placeholder
215
+ placeholders.c[cIdx] = placeholder
225
216
  }
226
217
 
227
- // 4. V2 tables paired with neither (CP-inserted AND Me-deleted = reject).
228
- for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
229
- if (placeholders.v2[v2Idx] !== null) continue
218
+ // 3. Genesis + Me only (not in CP) cp deletion.
219
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
220
+ if (placeholders.g[gIdx] !== null) continue
221
+ const mIdx = gToMe[gIdx]
222
+ if (mIdx === -1) continue
230
223
  const placeholder = allocate()
231
224
  placeholderToDiff.set(
232
225
  placeholder,
233
- wrapWhole('del', 'me', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd), 'cp')
226
+ wrapWhole('del', 'cp', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
234
227
  )
235
- placeholders.v2[v2Idx] = placeholder
228
+ placeholders.g[gIdx] = placeholder
229
+ placeholders.m[mIdx] = placeholder
236
230
  }
237
231
 
238
- // 5. V1 tables unpaired with V2 (CP-deleted before V2).
239
- for (let v1Idx = 0; v1Idx < t1s.length; v1Idx++) {
240
- if (placeholders.v1[v1Idx] !== null) continue
232
+ // 4. Genesis only (not in CP, not in Me) → both deleted, settled, silent.
233
+ // Placeholder ONLY in genesis; cp and me lack it. The word-level merger
234
+ // sees it as "deleted by both" via the genesis-spine fate maps and
235
+ // silences it via the settled-deletion rule (empty placeholder content).
236
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
237
+ if (placeholders.g[gIdx] !== null) continue
238
+ const placeholder = allocate()
239
+ placeholderToDiff.set(placeholder, '')
240
+ placeholders.g[gIdx] = placeholder
241
+ }
242
+
243
+ // 5. CP + Me both inserted (no genesis) — agreement check. If their
244
+ // table content is textually identical, emit plain (settled). Otherwise
245
+ // each side gets its own placeholder (cp-only / me-only treatment).
246
+ for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
247
+ if (placeholders.c[cIdx] !== null) continue
248
+ // CP table not paired to genesis. Is there an unpaired Me table with
249
+ // matching content?
250
+ const cText = cKeys[cIdx]
251
+ let mIdx = -1
252
+ for (let candidate = 0; candidate < mTables.length; candidate++) {
253
+ if (placeholders.m[candidate] !== null) continue
254
+ if (meToG[candidate] !== -1) continue
255
+ if (mKeys[candidate] === cText) {
256
+ mIdx = candidate
257
+ break
258
+ }
259
+ }
260
+ if (mIdx === -1) continue
261
+ // Both inserted the same table content → settled insertion.
241
262
  const placeholder = allocate()
242
- placeholderToDiff.set(placeholder, wrapWhole('del', 'cp', v1.slice(t1s[v1Idx].tableStart, t1s[v1Idx].tableEnd)))
243
- placeholders.v1[v1Idx] = placeholder
263
+ placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
264
+ placeholders.c[cIdx] = placeholder
265
+ placeholders.m[mIdx] = placeholder
266
+ }
267
+
268
+ // 6. Remaining CP-only tables (inserted by CP, Me didn't take).
269
+ for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
270
+ if (placeholders.c[cIdx] !== null) continue
271
+ const placeholder = allocate()
272
+ placeholderToDiff.set(
273
+ placeholder,
274
+ wrapWhole('ins', 'cp', cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
275
+ )
276
+ placeholders.c[cIdx] = placeholder
244
277
  }
245
278
 
246
- // 6. V3 tables unpaired with V2 (Me-inserted into V3).
247
- for (let v3Idx = 0; v3Idx < t3s.length; v3Idx++) {
248
- if (placeholders.v3[v3Idx] !== null) continue
279
+ // 7. Remaining Me-only tables (Me inserted, CP didn't).
280
+ for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
281
+ if (placeholders.m[mIdx] !== null) continue
249
282
  const placeholder = allocate()
250
- placeholderToDiff.set(placeholder, wrapWhole('ins', 'me', v3.slice(t3s[v3Idx].tableStart, t3s[v3Idx].tableEnd)))
251
- placeholders.v3[v3Idx] = placeholder
283
+ placeholderToDiff.set(
284
+ placeholder,
285
+ wrapWhole('ins', 'me', meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd))
286
+ )
287
+ placeholders.m[mIdx] = placeholder
252
288
  }
253
289
 
254
- // Splice placeholders into each input. End → start per input.
255
- let modifiedV1 = v1
256
- for (let i = t1s.length - 1; i >= 0; i--) {
257
- const p = placeholders.v1[i]
290
+ // Splice end → start per input.
291
+ let modifiedGenesis = genesis
292
+ for (let i = gTables.length - 1; i >= 0; i--) {
293
+ const p = placeholders.g[i]
258
294
  if (p === null) continue
259
- modifiedV1 = spliceString(modifiedV1, t1s[i].tableStart, t1s[i].tableEnd, p)
295
+ modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p)
260
296
  }
261
- let modifiedV2 = v2
262
- for (let i = t2s.length - 1; i >= 0; i--) {
263
- const p = placeholders.v2[i]
297
+ let modifiedCp = cpLatest
298
+ for (let i = cTables.length - 1; i >= 0; i--) {
299
+ const p = placeholders.c[i]
264
300
  if (p === null) continue
265
- modifiedV2 = spliceString(modifiedV2, t2s[i].tableStart, t2s[i].tableEnd, p)
301
+ modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p)
266
302
  }
267
- let modifiedV3 = v3
268
- for (let i = t3s.length - 1; i >= 0; i--) {
269
- const p = placeholders.v3[i]
303
+ let modifiedMe = meCurrent
304
+ for (let i = mTables.length - 1; i >= 0; i--) {
305
+ const p = placeholders.m[i]
270
306
  if (p === null) continue
271
- modifiedV3 = spliceString(modifiedV3, t3s[i].tableStart, t3s[i].tableEnd, p)
307
+ modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p)
272
308
  }
273
309
 
274
- return { modifiedV1, modifiedV2, modifiedV3, placeholderToDiff }
310
+ return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
275
311
  }
276
312
 
277
- /**
278
- * Threshold at which positional pairing is considered sound. Below this
279
- * similarity, two positionally-aligned tables are probably different
280
- * tables (e.g. CP swapped them around) and content-LCS pairing should
281
- * be used instead. 0.5 is a deliberately loose bar — paired-but-content-
282
- * edited tables (the common case) sit well above it; genuinely different
283
- * tables sit well below.
284
- */
285
- const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.5
313
+ // Positional pairing is the strict-default for three-way table merge:
314
+ // when all three inputs have the same number of tables in the same
315
+ // order, we pair them by index and let `diffTableThreeWay` handle
316
+ // per-table cell/row level differences. The similarity guard below
317
+ // only kicks in to *reject* positional alignment when a pair is
318
+ // SO dissimilar that it's near-certainly a table reorder/rename
319
+ // where content-LCS pairing would be materially better. The
320
+ // threshold is intentionally low — the 2-way path has no such guard
321
+ // and pairs purely by index (its `diffTable` falls back through
322
+ // same-dimension → equal-row-count → row-LCS → whole-table on its
323
+ // own), so the three-way path was stricter than its sibling and
324
+ // silently dropped to whole-table del+ins for legitimate edits
325
+ // like "rename one column and tweak its values". Aligning the
326
+ // threshold here keeps the two-way and three-way paths in step.
327
+ const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.15
286
328
 
287
- /**
288
- * Returns true when V1/V2/V3 tables can be 1:1 paired by position. The
289
- * three lists must have equal length AND each positional triple must
290
- * have content similar enough that positional pairing reflects the
291
- * authors' likely intent. The slow content-LCS path handles cases that
292
- * fail this gate (table reordering, additions, deletions).
293
- */
294
329
  function positionallyAligned(
295
- v1: string,
296
- v2: string,
297
- v3: string,
298
- t1s: TableRange[],
299
- t2s: TableRange[],
300
- t3s: TableRange[]
330
+ genesis: string,
331
+ cpLatest: string,
332
+ meCurrent: string,
333
+ gTables: TableRange[],
334
+ cTables: TableRange[],
335
+ mTables: TableRange[]
301
336
  ): boolean {
302
- if (t1s.length !== t2s.length || t2s.length !== t3s.length) return false
303
- for (let i = 0; i < t1s.length; i++) {
304
- const k1 = tableKey(v1, t1s[i])
305
- const k2 = tableKey(v2, t2s[i])
306
- const k3 = tableKey(v3, t3s[i])
307
- if (textSimilarity(k1, k2) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
308
- if (textSimilarity(k2, k3) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
337
+ if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false
338
+ for (let i = 0; i < gTables.length; i++) {
339
+ const kG = tableKey(genesis, gTables[i])
340
+ const kC = tableKey(cpLatest, cTables[i])
341
+ const kM = tableKey(meCurrent, mTables[i])
342
+ if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
343
+ if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
309
344
  }
310
345
  return true
311
346
  }
312
347
 
313
348
  function tableKey(html: string, table: TableRange): string {
314
- // Whitespace-normalised full table HTML — tables with byte-identical
315
- // content (modulo whitespace) pair; any structural or content
316
- // difference falls through to unpaired (table-level ins/del).
317
349
  return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
318
350
  }
319
351
 
352
+ /**
353
+ * Character-level similarity above which the three-way aligner treats
354
+ * two rows / tables as "the same logical entry, edited" rather than
355
+ * an unrelated delete + insert. Matched to TableDiff's
356
+ * `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
357
+ * agree on which pairings are reachable; if a row's content overlap
358
+ * is enough to fool the 2-way diff into pairing, it should also be
359
+ * enough for 3-way.
360
+ */
361
+ const THREE_WAY_FUZZY_THRESHOLD = 0.5
362
+
363
+ /**
364
+ * Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
365
+ * applies after its exact-LCS, but against one side of the genesis
366
+ * spine (either cp or me). The genesis tables/rows are always the
367
+ * "old" side; `newTable` is the cp or me table being aligned. Returns
368
+ * the enriched alignment with additional paired entries.
369
+ *
370
+ * Cell-count guard: only fuzzy-pair when both rows have the same cell
371
+ * count. Without this guard an asymmetric restructure — e.g. CP and
372
+ * Me both added a different column — leads to ONE side fuzzy-pairing
373
+ * its row with genesis (content overlap above threshold) while the
374
+ * other side falls below threshold. That mismatch routes through
375
+ * `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
376
+ * branch, which emits CP's row as a Me-attributed deletion. In
377
+ * cp-only mode `stripMeAttributedMarkers` then removes the row
378
+ * entirely and CP's edit vanishes from the view — exactly the
379
+ * content-loss case we're meant to prevent. Restricting fuzzy
380
+ * pairing to same-shape rows preserves the common case (single cell
381
+ * edit, identical row shape) while pushing structural mismatches
382
+ * back to the boundary-insertion path that emits both sides
383
+ * explicitly.
384
+ */
385
+ function pairSimilarRowsThreeWay(
386
+ alignment: Alignment[],
387
+ genesis: string,
388
+ newHtml: string,
389
+ oldTable: TableRange,
390
+ newTable: TableRange
391
+ ): Alignment[] {
392
+ const oldTexts = oldTable.rows.map(r => rowText(genesis, r))
393
+ const newTexts = newTable.rows.map(r => rowText(newHtml, r))
394
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
395
+ // Returning 0 sits below any positive threshold so
396
+ // `pairSimilarUnmatched` won't pair these rows; the guard remains
397
+ // defensive should the threshold ever be lowered to 0.
398
+ if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0
399
+ return textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
400
+ })
401
+ }
402
+
403
+ /**
404
+ * Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
405
+ * full table HTML keys, fuzzy-pair unmatched table runs by their
406
+ * row-text-concatenated content. Without this, a table whose body
407
+ * was edited (but not its outer shape) fails the exact-key match
408
+ * and the preprocessing emits whole-table del + whole-table ins
409
+ * instead of recursing into per-cell three-way diffs.
410
+ */
411
+ function pairSimilarTablesThreeWay(
412
+ alignment: Alignment[],
413
+ oldHtml: string,
414
+ newHtml: string,
415
+ oldTables: TableRange[],
416
+ newTables: TableRange[]
417
+ ): Alignment[] {
418
+ const oldTexts = oldTables.map(t => t.rows.map(r => rowText(oldHtml, r)).join(' '))
419
+ const newTexts = newTables.map(t => t.rows.map(r => rowText(newHtml, r)).join(' '))
420
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
421
+ textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
422
+ )
423
+ }
424
+
425
+ // ────────────────────────────────────────────────────────────────────────────
426
+ // Per-table diff: positional cells or row-level structural change.
427
+
320
428
  function diffTableThreeWay(
321
- v1: string,
322
- v2: string,
323
- v3: string,
324
- t1: TableRange,
325
- t2: TableRange,
326
- t3: TableRange,
429
+ genesis: string,
430
+ cpLatest: string,
431
+ meCurrent: string,
432
+ tG: TableRange,
433
+ tC: TableRange,
434
+ tM: TableRange,
327
435
  cellDiff: ThreeWayDiffCellFn
328
436
  ): string {
329
- if (sameDimensions(t1, t2) && sameDimensions(t2, t3)) {
330
- return diffTablePositional(v1, v2, v3, t1, t2, t3, cellDiff)
437
+ if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) {
438
+ return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
331
439
  }
332
- return diffTableStructural(v1, v2, v3, t1, t2, t3, cellDiff)
440
+ return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
333
441
  }
334
442
 
335
443
  function diffTablePositional(
336
- v1: string,
337
- v2: string,
338
- v3: string,
339
- t1: TableRange,
340
- t2: TableRange,
341
- t3: TableRange,
444
+ genesis: string,
445
+ cpLatest: string,
446
+ meCurrent: string,
447
+ tG: TableRange,
448
+ tC: TableRange,
449
+ tM: TableRange,
342
450
  cellDiff: ThreeWayDiffCellFn
343
451
  ): string {
344
- // Walk V2 verbatim — its scaffolding (`<table>`, `<tr>`, attributes,
345
- // inter-cell whitespace) is the spine. Substitute each cell content
346
- // range with the 3-way merge.
452
+ // Walk genesis's table scaffolding verbatim — it's the common
453
+ // ancestor. Cells are merged 3-way via cellDiff. Choosing genesis as
454
+ // the spine keeps the table structure stable across both pair-wise
455
+ // diffs that the word-level merger will see.
347
456
  const out: string[] = []
348
- let cursor = t2.tableStart
349
- for (let r = 0; r < t2.rows.length; r++) {
350
- const r1 = t1.rows[r]
351
- const r2 = t2.rows[r]
352
- const r3 = t3.rows[r]
353
- for (let c = 0; c < r2.cells.length; c++) {
354
- const c1 = r1.cells[c]
355
- const c2 = r2.cells[c]
356
- const c3 = r3.cells[c]
357
- out.push(v2.slice(cursor, c2.contentStart))
457
+ let cursor = tG.tableStart
458
+ for (let r = 0; r < tG.rows.length; r++) {
459
+ const rG = tG.rows[r]
460
+ const rC = tC.rows[r]
461
+ const rM = tM.rows[r]
462
+ for (let c = 0; c < rG.cells.length; c++) {
463
+ const cG = rG.cells[c]
464
+ const cC = rC.cells[c]
465
+ const cM = rM.cells[c]
466
+ out.push(genesis.slice(cursor, cG.contentStart))
358
467
  out.push(
359
468
  cellDiff(
360
- v1.slice(c1.contentStart, c1.contentEnd),
361
- v2.slice(c2.contentStart, c2.contentEnd),
362
- v3.slice(c3.contentStart, c3.contentEnd)
469
+ genesis.slice(cG.contentStart, cG.contentEnd),
470
+ cpLatest.slice(cC.contentStart, cC.contentEnd),
471
+ meCurrent.slice(cM.contentStart, cM.contentEnd)
363
472
  )
364
473
  )
365
- cursor = c2.contentEnd
474
+ cursor = cG.contentEnd
366
475
  }
367
476
  }
368
- out.push(v2.slice(cursor, t2.tableEnd))
477
+ out.push(genesis.slice(cursor, tG.tableEnd))
369
478
  return out.join('')
370
479
  }
371
480
 
372
481
  /**
373
- * Structural-change three-way table diff: rows or cells differ in count
374
- * across V1/V2/V3. Strategy:
375
- * 1. Run row-LCS for each pair (V1↔V2, V2↔V3) over rowKeys
376
- * 2. Build per-V2-row origin (from align1) and fate (from align2)
377
- * 3. Walk V2's row order, interleaving:
378
- * - CP-deleted V1 rows (in align1 but not preserved into V2)
379
- * - Me-inserted V3 rows (in align2 but not from V2)
380
- * 4. For each V2 row, combine origin+fate to decide:
381
- * - equal: recurse cellDiff if cell counts match, else fall back
382
- * - ins-cp: emit V2 row as fully-CP-inserted
383
- * - del-me: emit V2 row as fully-Me-deleted
384
- * - reject: emit V2 row as Me-rejects-CP
482
+ * Row-level genesis-spine merge for tables with diverging row/cell
483
+ * counts.
385
484
  *
386
- * Tie-break to Me on LCS disagreement (D2): each LCS is authoritative
387
- * for its own pair-wise view; we don't attempt to reconcile cases where
388
- * align1's idea of V2's V1 origin contradicts what align2 implies via
389
- * V3 history. In practice these cases manifest as the row being
390
- * attributed independently per pair, which is the conservative correct
391
- * thing to do.
485
+ * 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
486
+ * (alignMe), each via row-LCS over rowKeys.
487
+ * 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
488
+ * Both kept recurse cell diff (with structural-change cell handling
489
+ * falling back to me-attribution Replace per the documented
490
+ * limitation). One kept, other deleted → emit author-attributed full
491
+ * row. Both deleted → silent.
492
+ * 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
493
+ * Check for content agreement at the same boundary; agreed
494
+ * insertions emit plain.
392
495
  */
393
496
  function diffTableStructural(
394
- v1: string,
395
- v2: string,
396
- v3: string,
397
- t1: TableRange,
398
- t2: TableRange,
399
- t3: TableRange,
497
+ genesis: string,
498
+ cpLatest: string,
499
+ meCurrent: string,
500
+ tG: TableRange,
501
+ tC: TableRange,
502
+ tM: TableRange,
400
503
  cellDiff: ThreeWayDiffCellFn
401
504
  ): string {
402
- const v1Keys = t1.rows.map(r => rowKey(v1, r))
403
- const v2Keys = t2.rows.map(r => rowKey(v2, r))
404
- const v3Keys = t3.rows.map(r => rowKey(v3, r))
405
-
406
- const align1 = lcsAlign(v1Keys, v2Keys)
407
- const align2 = lcsAlign(v2Keys, v3Keys)
408
-
409
- // Per-V2-row attribution lookups.
410
- // Origin: 'preserved' (with V1 row index) or 'cp-inserted'.
411
- // Fate: 'preserved' (with V3 row index) or 'me-deleted'.
412
- const v2Origin = new Array<{ kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' }>(t2.rows.length)
413
- for (let i = 0; i < v2Origin.length; i++) v2Origin[i] = { kind: 'cp-inserted' }
414
- for (const a of align1) {
415
- if (a.newIdx !== null && a.oldIdx !== null) {
416
- v2Origin[a.newIdx] = { kind: 'preserved', v1Idx: a.oldIdx }
417
- }
418
- }
505
+ const gKeys = tG.rows.map(r => rowKey(genesis, r))
506
+ const cKeys = tC.rows.map(r => rowKey(cpLatest, r))
507
+ const mKeys = tM.rows.map(r => rowKey(meCurrent, r))
508
+
509
+ // Exact LCS first, then fuzzy-pair remaining unmatched runs. Without
510
+ // the fuzzy pass, a row where CP edited just a single cell's text
511
+ // produces no key match — the row aligner emits the genesis row as
512
+ // CP-deleted AND CP's reshaped row as inserted, when a cell-level
513
+ // diff against the paired row would render the edit far more
514
+ // legibly. The 2-way path (`TableDiff.pairSimilarUnmatchedRows`)
515
+ // has done this since inception; bringing the three-way path in
516
+ // step removes the asymmetry where the cp-only / all-changes view
517
+ // looks markedly worse than plain 2-way for ordinary cell edits.
518
+ const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC)
519
+ const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM)
520
+
521
+ // genesisIdx → matching cpIdx (-1 if cp deleted this row)
522
+ const gToCp = new Array<number>(tG.rows.length).fill(-1)
523
+ for (const a of alignCp) {
524
+ if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx
525
+ }
526
+ const gToMe = new Array<number>(tG.rows.length).fill(-1)
527
+ for (const a of alignMe) {
528
+ if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx
529
+ }
530
+
531
+ // Off-spine row collections: cp rows with no genesis counterpart, me rows with no genesis counterpart.
532
+ // Keyed by "the genesis row index they should appear before" so emission interleaves correctly.
533
+ const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length)
534
+ const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length)
419
535
 
420
- const v2Fate = new Array<{ kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' }>(t2.rows.length)
421
- for (let i = 0; i < v2Fate.length; i++) v2Fate[i] = { kind: 'me-deleted' }
422
- for (const a of align2) {
423
- if (a.oldIdx !== null && a.newIdx !== null) {
424
- v2Fate[a.oldIdx] = { kind: 'preserved', v3Idx: a.newIdx }
425
- }
426
- }
427
-
428
- // Off-spine surfaces.
429
- // CP-deleted V1 rows: in align1 with newIdx == null. They land at the
430
- // V2 boundary that follows them. The boundary index is the next
431
- // preserved V2 row, or v2.rows.length if no following preserved row.
432
- const cpDelRowsAt = collectCpDelRowsAtBoundary(align1, t2.rows.length)
433
- // Me-inserted V3 rows: in align2 with oldIdx == null. They land at the
434
- // V2 boundary they sit before — i.e. the next preserved V2 row.
435
- const meInsRowsAt = collectMeInsRowsAtBoundary(align2, t2.rows.length)
436
-
437
- // Emit. We reconstruct the table from scratch since rows may be added
438
- // or deleted from V2's order; preserve the V2 header (everything up
439
- // to the first <tr>) and the V2 footer (after the last </tr>).
440
536
  const out: string[] = []
441
- out.push(tableHeaderSlice(v2, t2))
442
-
443
- const emitBoundary = (i: number) => {
444
- const cpDel = cpDelRowsAt.get(i)
445
- if (cpDel) {
446
- for (const v1RowIdx of cpDel) {
447
- out.push(emitFullRowAttributed(v1, t1.rows[v1RowIdx], 'del', 'cp'))
537
+ out.push(tableHeaderSlice(genesis, tG))
538
+
539
+ const emitBoundaryInsertions = (b: number) => {
540
+ const cIdxs = cpInsAt.get(b) ?? []
541
+ const mIdxs = meInsAt.get(b) ?? []
542
+ if (cIdxs.length === 0 && mIdxs.length === 0) return
543
+ // Detect settled insertions (cp and me both inserted the same row content).
544
+ // Pair by content key, in order of appearance.
545
+ const remainingMe = new Set(mIdxs)
546
+ for (const cIdx of cIdxs) {
547
+ const cText = cKeys[cIdx]
548
+ let agreedMeIdx: number | undefined
549
+ for (const mIdx of remainingMe) {
550
+ if (mKeys[mIdx] === cText) {
551
+ agreedMeIdx = mIdx
552
+ break
553
+ }
448
554
  }
449
- }
450
- const meIns = meInsRowsAt.get(i)
451
- if (meIns) {
452
- for (const v3RowIdx of meIns) {
453
- out.push(emitFullRowAttributed(v3, t3.rows[v3RowIdx], 'ins', 'me'))
555
+ if (agreedMeIdx !== undefined) {
556
+ remainingMe.delete(agreedMeIdx)
557
+ // Settled insertion — emit cp's row verbatim, unmarked.
558
+ out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd))
559
+ } else {
560
+ out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'ins', 'cp'))
454
561
  }
455
562
  }
563
+ for (const mIdx of remainingMe) {
564
+ out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'ins', 'me'))
565
+ }
456
566
  }
457
567
 
458
- for (let r = 0; r < t2.rows.length; r++) {
459
- emitBoundary(r)
460
- const v2Row = t2.rows[r]
461
- const origin = v2Origin[r]
462
- const fate = v2Fate[r]
463
- out.push(emitV2Row(v1, v2, v3, v2Row, t1, t3, origin, fate, cellDiff))
464
- }
465
- emitBoundary(t2.rows.length)
466
- out.push(tableFooterSlice(v2, t2))
467
- return out.join('')
468
- }
469
-
470
- function emitV2Row(
471
- v1: string,
472
- v2: string,
473
- v3: string,
474
- v2Row: RowRange,
475
- t1: TableRange,
476
- t3: TableRange,
477
- origin: { kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' },
478
- fate: { kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' },
479
- cellDiff: ThreeWayDiffCellFn
480
- ): string {
481
- if (origin.kind === 'cp-inserted' && fate.kind === 'me-deleted') {
482
- // CP added the row, Me removed it: reject. Show as Me-deletion of
483
- // CP's insertion via the rejects markup.
484
- return emitFullRowAttributed(v2, v2Row, 'del', 'me', 'cp')
485
- }
486
- if (origin.kind === 'cp-inserted') {
487
- // CP added the row, Me kept it. Attribute as CP-inserted but emit
488
- // V2's content (which equals V3's content since Me kept it).
489
- return emitFullRowAttributed(v2, v2Row, 'ins', 'cp')
490
- }
491
- if (fate.kind === 'me-deleted') {
492
- // Me removed an original V1 row. Emit as Me-deletion of V2's content.
493
- return emitFullRowAttributed(v2, v2Row, 'del', 'me')
494
- }
495
- // Preserved on both sides — recurse into cells. The discriminated-union
496
- // narrowing makes the indices safe to access directly.
497
- const v1Row = t1.rows[origin.v1Idx]
498
- const v3Row = t3.rows[fate.v3Idx]
499
- if (v1Row.cells.length === v2Row.cells.length && v2Row.cells.length === v3Row.cells.length) {
500
- // Same cell counts → positional cell diff via cellDiff.
501
- return diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff)
568
+ for (let g = 0; g < tG.rows.length; g++) {
569
+ emitBoundaryInsertions(g)
570
+
571
+ const cIdx = gToCp[g]
572
+ const mIdx = gToMe[g]
573
+ const cpDel = cIdx === -1
574
+ const meDel = mIdx === -1
575
+
576
+ if (!cpDel && !meDel) {
577
+ // Both kept — recurse cell-level diff against this row triple.
578
+ out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff))
579
+ } else if (cpDel && meDel) {
580
+ // Both deleted — silent (settled).
581
+ } else if (cpDel) {
582
+ // CP dropped, Me kept → emit Me's row attributed as cp-deletion. The
583
+ // content shown is what Me has; the styling tells the reader CP
584
+ // wanted it gone.
585
+ out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'del', 'cp'))
586
+ } else {
587
+ // Me dropped, CP kept emit CP's row attributed as me-deletion.
588
+ out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'del', 'me'))
589
+ }
502
590
  }
503
- // Cell-count mismatch within a preserved row. Cell-level structural
504
- // change is deferred; fall back to Me-attribution Replace (V2 row
505
- // removed, V3 row inserted). This is lossy for CP's contribution
506
- // within the row but functional. Real-world legal docs rarely change
507
- // column count mid-row; this is a known limitation.
508
- const out: string[] = []
509
- out.push(emitFullRowAttributed(v2, v2Row, 'del', 'me'))
510
- out.push(emitFullRowAttributed(v3, v3Row, 'ins', 'me'))
591
+ emitBoundaryInsertions(tG.rows.length)
592
+ out.push(tableFooterSlice(genesis, tG))
511
593
  return out.join('')
512
594
  }
513
595
 
514
- function diffRowPositional(
515
- v1: string,
516
- v2: string,
517
- v3: string,
518
- v1Row: RowRange,
519
- v2Row: RowRange,
520
- v3Row: RowRange,
596
+ function emitPreservedRow(
597
+ genesis: string,
598
+ cpLatest: string,
599
+ meCurrent: string,
600
+ rG: RowRange,
601
+ rC: RowRange,
602
+ rM: RowRange,
521
603
  cellDiff: ThreeWayDiffCellFn
522
604
  ): string {
523
- // Walk V2's row verbatim, substituting each cell content with the
524
- // 3-way merge. Mirrors `diffTablePositional` at the row scale.
525
- const out: string[] = []
526
- let cursor = v2Row.rowStart
527
- for (let c = 0; c < v2Row.cells.length; c++) {
528
- const c1 = v1Row.cells[c]
529
- const c2 = v2Row.cells[c]
530
- const c3 = v3Row.cells[c]
531
- out.push(v2.slice(cursor, c2.contentStart))
532
- out.push(
533
- cellDiff(
534
- v1.slice(c1.contentStart, c1.contentEnd),
535
- v2.slice(c2.contentStart, c2.contentEnd),
536
- v3.slice(c3.contentStart, c3.contentEnd)
605
+ if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
606
+ // Same cell counts positional cell diff.
607
+ const out: string[] = []
608
+ let cursor = rG.rowStart
609
+ for (let c = 0; c < rG.cells.length; c++) {
610
+ const cG = rG.cells[c]
611
+ const cC = rC.cells[c]
612
+ const cM = rM.cells[c]
613
+ out.push(genesis.slice(cursor, cG.contentStart))
614
+ out.push(
615
+ cellDiff(
616
+ genesis.slice(cG.contentStart, cG.contentEnd),
617
+ cpLatest.slice(cC.contentStart, cC.contentEnd),
618
+ meCurrent.slice(cM.contentStart, cM.contentEnd)
619
+ )
537
620
  )
538
- )
539
- cursor = c2.contentEnd
540
- }
541
- out.push(v2.slice(cursor, v2Row.rowEnd))
542
- return out.join('')
543
- }
544
-
545
- function collectCpDelRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
546
- // For each unpaired V1 row (oldIdx set, newIdx null), determine its
547
- // V2 boundary index: the position just before the next preserved V2
548
- // row, or v2RowCount if there's no following preserved row.
549
- const out = new Map<number, number[]>()
550
- let nextV2Boundary = v2RowCount
551
- // Walk the alignment in reverse so we can compute nextV2Boundary
552
- // running backwards, then assign each unpaired V1 row to the boundary
553
- // currently in scope.
554
- const pending: number[] = []
555
- for (let i = align.length - 1; i >= 0; i--) {
556
- const a = align[i]
557
- if (a.newIdx !== null) {
558
- // Flush pending unpaired V1 rows to this V2 boundary.
559
- if (pending.length > 0) {
560
- const existing = out.get(nextV2Boundary) ?? []
561
- // pending was filled backwards — reverse so document order is preserved.
562
- existing.unshift(...pending.toReversed())
563
- out.set(nextV2Boundary, existing)
564
- pending.length = 0
565
- }
566
- nextV2Boundary = a.newIdx
567
- } else if (a.oldIdx !== null) {
568
- // Unpaired V1 row — CP deleted it.
569
- pending.push(a.oldIdx)
621
+ cursor = cG.contentEnd
570
622
  }
571
- }
572
- if (pending.length > 0) {
573
- const existing = out.get(nextV2Boundary) ?? []
574
- existing.unshift(...pending.reverse())
575
- out.set(nextV2Boundary, existing)
576
- }
577
- return out
623
+ out.push(genesis.slice(cursor, rG.rowEnd))
624
+ return out.join('')
625
+ }
626
+ // Cell-count mismatch within a preserved row — cell-level structural
627
+ // alignment is non-trivial (which Me cell maps to which CP cell when
628
+ // the counts diverge?). The previous fallback emitted only
629
+ // genesis-as-del + me-as-ins, which silently destroyed CP's row
630
+ // content whenever CP changed the cell count — a content-loss bug
631
+ // (a row where CP added a column would disappear from the rendered
632
+ // diff entirely). Emit each side's row as a distinct attributed
633
+ // block so neither party's restructure can vanish:
634
+ // - if both restructured (different shapes on both sides) the
635
+ // genesis row is settled-deleted (silent) and we emit cp + me
636
+ // rows side by side, each attributed to its author;
637
+ // - if only one restructured, the genesis row is del-attributed to
638
+ // the restructuring author so the reader sees what was there
639
+ // before, then the new shape ins-attributed to the same author.
640
+ //
641
+ // Content edits inside a side that DID keep the genesis cell count
642
+ // are not surfaced here (no positional path is available across
643
+ // mismatched shapes); the underlying data is still present in the
644
+ // source document but the visual diff doesn't decompose it. That is
645
+ // a degradation of detail, not content loss — symmetric for cp/me.
646
+ const cpRestructured = rC.cells.length !== rG.cells.length
647
+ const meRestructured = rM.cells.length !== rG.cells.length
648
+ const blocks: string[] = []
649
+ if (cpRestructured && meRestructured) {
650
+ // Both sides restructured; genesis shape retained by neither.
651
+ blocks.push(emitFullRowAttributed(cpLatest, rC, 'ins', 'cp'))
652
+ blocks.push(emitFullRowAttributed(meCurrent, rM, 'ins', 'me'))
653
+ } else if (cpRestructured) {
654
+ blocks.push(emitFullRowAttributed(genesis, rG, 'del', 'cp'))
655
+ blocks.push(emitFullRowAttributed(cpLatest, rC, 'ins', 'cp'))
656
+ } else {
657
+ blocks.push(emitFullRowAttributed(genesis, rG, 'del', 'me'))
658
+ blocks.push(emitFullRowAttributed(meCurrent, rM, 'ins', 'me'))
659
+ }
660
+ return blocks.join('')
578
661
  }
579
662
 
580
- function collectMeInsRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
581
- // For each unpaired V3 row (newIdx set, oldIdx null), determine its
582
- // V2 boundary: the position of the next preserved V2 row, or
583
- // v2RowCount if at the tail. Mirror of CP-del logic.
663
+ /**
664
+ * Returns map "genesis-row-boundary list of new-side row indices
665
+ * inserted at that boundary". Mirrors the word-level boundary collection
666
+ * but at the row scale.
667
+ */
668
+ function collectInsertedRowsAtBoundary(
669
+ align: ReturnType<typeof lcsAlign>,
670
+ genesisRowCount: number
671
+ ): Map<number, number[]> {
584
672
  const out = new Map<number, number[]>()
585
- let nextV2Boundary = v2RowCount
673
+ let nextGenesisBoundary = genesisRowCount
586
674
  const pending: number[] = []
675
+ // Walk in reverse so nextGenesisBoundary tracks the next preserved row
676
+ // we'll encounter; flush pending unpaired new rows at the appropriate
677
+ // genesis boundary.
587
678
  for (let i = align.length - 1; i >= 0; i--) {
588
679
  const a = align[i]
589
680
  if (a.oldIdx !== null) {
590
681
  if (pending.length > 0) {
591
- const existing = out.get(nextV2Boundary) ?? []
682
+ const existing = out.get(nextGenesisBoundary) ?? []
592
683
  existing.unshift(...pending.toReversed())
593
- out.set(nextV2Boundary, existing)
684
+ out.set(nextGenesisBoundary, existing)
594
685
  pending.length = 0
595
686
  }
596
- nextV2Boundary = a.oldIdx
687
+ nextGenesisBoundary = a.oldIdx
597
688
  } else if (a.newIdx !== null) {
598
689
  pending.push(a.newIdx)
599
690
  }
600
691
  }
601
692
  if (pending.length > 0) {
602
- const existing = out.get(nextV2Boundary) ?? []
603
- existing.unshift(...pending.reverse())
604
- out.set(nextV2Boundary, existing)
693
+ const existing = out.get(nextGenesisBoundary) ?? []
694
+ existing.unshift(...pending.toReversed())
695
+ out.set(nextGenesisBoundary, existing)
605
696
  }
606
697
  return out
607
698
  }
608
699
 
609
700
  function tableHeaderSlice(html: string, table: TableRange): string {
610
- // Slice from <table> to start of first <tr>. If table is empty, take
611
- // everything up to </table>.
612
701
  const firstRow = table.rows[0]
613
702
  if (!firstRow) return html.slice(table.tableStart, table.tableEnd - '</table>'.length)
614
703
  return html.slice(table.tableStart, firstRow.rowStart)
615
704
  }
616
705
 
617
706
  function tableFooterSlice(html: string, table: TableRange): string {
618
- // Slice from end of last <tr> to </table>.
619
707
  const lastRow = table.rows[table.rows.length - 1]
620
708
  if (!lastRow) return '</table>'
621
709
  return html.slice(lastRow.rowEnd, table.tableEnd)
622
710
  }
623
711
 
624
712
  /**
625
- * Emit a row that's fully attributed to one author, in an ins or del
626
- * role. `rejectsAuthor` is set when the row is a Me-deletion of a
627
- * CP-inserted row. Wraps `<tr>` in `class='diffins cp'` etc. and each
628
- * `<td>` content in the corresponding `<ins>`/`<del>` wrapper with the
629
- * author classes/attrs.
713
+ * Emit a row fully attributed to one author. Wraps `<tr>` and each
714
+ * `<td>` with the author's diffins/diffdel class and `data-author`
715
+ * attribute; wraps cell content with an inner `<ins>`/`<del>` matching
716
+ * the word-level emission shape.
630
717
  */
631
- function emitFullRowAttributed(
632
- html: string,
633
- row: RowRange,
634
- kind: 'ins' | 'del',
635
- author: Author,
636
- rejectsAuthor?: Author
637
- ): string {
718
+ function emitFullRowAttributed(html: string, row: RowRange, kind: 'ins' | 'del', author: Author): string {
638
719
  const trOpening = parseOpeningTagAt(html, row.rowStart)
639
- if (!trOpening) return html.slice(html.length, html.length)
640
- const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author, rejectsAuthor)
720
+ if (!trOpening) return html.slice(row.rowStart, row.rowEnd)
721
+ const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)
641
722
 
642
723
  const out: string[] = [trWithAttrs]
643
724
  let cursor = trOpening.end
644
725
  for (const cell of row.cells) {
645
726
  out.push(html.slice(cursor, cell.cellStart))
646
- out.push(emitFullCellAttributed(html, cell, kind, author, rejectsAuthor))
727
+ out.push(emitFullCellAttributed(html, cell, kind, author))
647
728
  cursor = cell.cellEnd
648
729
  }
649
730
  out.push(html.slice(cursor, row.rowEnd))
650
731
  return out.join('')
651
732
  }
652
733
 
653
- function emitFullCellAttributed(
654
- html: string,
655
- cell: CellRange,
656
- kind: 'ins' | 'del',
657
- author: Author,
658
- rejectsAuthor?: Author
659
- ): string {
734
+ function emitFullCellAttributed(html: string, cell: CellRange, kind: 'ins' | 'del', author: Author): string {
660
735
  const tdOpening = parseOpeningTagAt(html, cell.cellStart)
661
736
  if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
662
- const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author, rejectsAuthor)
663
- // Wrap the content in an ins/del with the author classes — same
664
- // shape as the word-level emission. Empty cells get the class on the
665
- // <td> but no inner wrapper.
737
+ const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author)
666
738
  const innerContent = html.slice(cell.contentStart, cell.contentEnd)
667
739
  const innerWrapped =
668
740
  innerContent.trim().length === 0
669
741
  ? innerContent
670
- : Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author, rejectsAuthor))
742
+ : Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author))
671
743
  const closing = html.slice(cell.contentEnd, cell.cellEnd)
672
744
  return tdWithAttrs + innerWrapped + closing
673
745
  }
674
746
 
675
- /**
676
- * Inject author classes + data-attrs into an existing opening tag (e.g.
677
- * an `<tr>` or `<td>` already in the source HTML). Uses the same
678
- * attribution shape as `authorAttribution` + `Utils.wrapText` so the
679
- * inject-into-existing and wrap-around-text paths agree.
680
- */
681
- function injectAuthorAttribution(
682
- openingTag: string,
683
- kind: 'ins' | 'del',
684
- author: Author,
685
- rejectsAuthor?: Author
686
- ): string {
687
- const meta = authorAttribution(author, rejectsAuthor)
747
+ function injectAuthorAttribution(openingTag: string, kind: 'ins' | 'del', author: Author): string {
748
+ const meta = authorAttribution(author)
688
749
  const tagWithClass = injectClass(openingTag, `diff${kind} ${meta.extraClasses}`)
689
750
  return injectDataAttrs(tagWithClass, meta.dataAttrs ?? {})
690
751
  }
@@ -693,9 +754,6 @@ function injectDataAttrs(openingTag: string, dataAttrs: Readonly<Record<string,
693
754
  const keys = Object.keys(dataAttrs)
694
755
  if (keys.length === 0) return openingTag
695
756
  const attrs = keys.map(k => ` data-${k}='${dataAttrs[k]}'`).join('')
696
- // Insert the data-* attributes just before the closing '>' of the
697
- // opening tag. `<tr>` and `<td>` are never self-closing in real HTML,
698
- // but handle `/>` defensively for symmetry with other HTML emitters.
699
757
  if (openingTag.endsWith('/>')) return `${openingTag.slice(0, -2)}${attrs}/>`
700
758
  return `${openingTag.slice(0, -1)}${attrs}>`
701
759
  }