@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,675 +16,602 @@ import { type Author, authorAttribution } from './ThreeWayDiff'
16
16
  import Utils from './Utils'
17
17
 
18
18
  /**
19
- * Three-way table preprocessing. Same shape as the existing two-way
20
- * `preprocessTables` but takes V1/V2/V3 and a cell-level three-way diff
21
- * callback. All three inputs share a single placeholder nonce so V2's
22
- * tokenisation is identical when the word-level 3-way merger sees it
23
- * from both pair-wise analyses.
19
+ * Three-way table preprocessing for the genesis-spine merge.
24
20
  *
25
- * This commit handles only the same-dimensions positional case across
26
- * all three table triples. The structural-change case (rows/cells
27
- * differ between any pair) throws; the next commit replaces that with
28
- * a row-level V2-spine merge that mirrors the word-level approach.
29
- * Multi-table count divergence (CP added or Me removed a whole table)
30
- * is handled in commit 6 (D3).
21
+ * Inputs: `genesis` (common ancestor), `cpLatest` (counterparty's
22
+ * accumulated position), `meCurrent` (Me's accumulated position). All
23
+ * three share a single placeholder nonce so genesis tokenises
24
+ * identically across both pair-wise word-level analyses.
25
+ *
26
+ * Three paths:
27
+ * 1. **Positional** — all three have the same table count AND each
28
+ * positional triple's tableKey is similar enough that 1:1 pairing
29
+ * by position is sound. Recurses cellDiff per cell, structural
30
+ * layout from genesis.
31
+ * 2. **Row-structural** — paired triples whose row/cell counts differ.
32
+ * Per-table row-level LCS against genesis; recurse on preserved
33
+ * rows, emit author-attributed full rows for the rest.
34
+ * 3. **Multi-table by content** — table counts diverge across inputs.
35
+ * Pair tables to genesis via content-LCS, then assign placeholders
36
+ * such that each placeholder appears in exactly the inputs that
37
+ * contain the underlying table. The word-level merger walks the
38
+ * genesis spine and attributes unpaired tables naturally
39
+ * (cp-only/me-only/both-agree).
31
40
  */
32
41
 
33
42
  export interface ThreeWayPreprocessResult {
34
- modifiedV1: string
35
- modifiedV2: string
36
- modifiedV3: string
43
+ modifiedGenesis: string
44
+ modifiedCp: string
45
+ modifiedMe: string
37
46
  placeholderToDiff: Map<string, string>
38
47
  }
39
48
 
40
- export type ThreeWayDiffCellFn = (v1Cell: string, v2Cell: string, v3Cell: string) => string
49
+ export type ThreeWayDiffCellFn = (genesisCell: string, cpCell: string, meCell: string) => string
41
50
 
42
51
  export function preprocessTablesThreeWay(
43
- v1: string,
44
- v2: string,
45
- v3: string,
52
+ genesis: string,
53
+ cpLatest: string,
54
+ meCurrent: string,
46
55
  cellDiff: ThreeWayDiffCellFn
47
56
  ): ThreeWayPreprocessResult | null {
48
- const t1s = findTopLevelTables(v1)
49
- const t2s = findTopLevelTables(v2)
50
- const t3s = findTopLevelTables(v3)
51
-
52
- // No tables in any input caller can skip preprocessing entirely.
53
- if (t1s.length === 0 && t2s.length === 0 && t3s.length === 0) return null
54
-
55
- // Size cap: bail to word-level diff for pathologically large tables.
56
- for (const t of t1s) if (exceedsSizeLimit(t)) return null
57
- for (const t of t2s) if (exceedsSizeLimit(t)) return null
58
- for (const t of t3s) if (exceedsSizeLimit(t)) return null
59
-
60
- const placeholderPrefix = makePlaceholderPrefix(v1, v2, v3)
61
-
62
- // Fast path: counts match AND each positional triple looks similar
63
- // enough that 1:1 positional pairing is sound. The similarity gate
64
- // catches the swap case — V1=[A,B], V2=[B,A] has matching counts but
65
- // positionally pairing would mis-attribute. Without the gate, a swap
66
- // would silently land in the per-cell diff machinery comparing
67
- // unrelated tables.
68
- if (positionallyAligned(v1, v2, v3, t1s, t2s, t3s)) {
69
- return preprocessAlignedByPosition(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
57
+ const gTables = findTopLevelTables(genesis)
58
+ const cTables = findTopLevelTables(cpLatest)
59
+ const mTables = findTopLevelTables(meCurrent)
60
+
61
+ if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null
62
+
63
+ for (const t of gTables) if (exceedsSizeLimit(t)) return null
64
+ for (const t of cTables) if (exceedsSizeLimit(t)) return null
65
+ for (const t of mTables) if (exceedsSizeLimit(t)) return null
66
+
67
+ const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent)
68
+
69
+ if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) {
70
+ return preprocessAlignedByPosition(
71
+ genesis,
72
+ cpLatest,
73
+ meCurrent,
74
+ gTables,
75
+ cTables,
76
+ mTables,
77
+ cellDiff,
78
+ placeholderPrefix
79
+ )
70
80
  }
71
81
 
72
- // Multi-table mismatch (D3). CP added/removed/moved a table, Me added/
73
- // removed/moved a table, etc. Use content-LCS to pair tables across
74
- // each adjacent pair, then assign placeholders so the word-level 3-way
75
- // merger naturally attributes unpaired tables — the placeholder token
76
- // appears only in the inputs where the table exists, and the merger
77
- // sees that as an insertion/deletion.
78
- return preprocessMisalignedByContent(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
82
+ return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix)
79
83
  }
80
84
 
81
85
  function preprocessAlignedByPosition(
82
- v1: string,
83
- v2: string,
84
- v3: string,
85
- t1s: TableRange[],
86
- t2s: TableRange[],
87
- t3s: TableRange[],
86
+ genesis: string,
87
+ cpLatest: string,
88
+ meCurrent: string,
89
+ gTables: TableRange[],
90
+ cTables: TableRange[],
91
+ mTables: TableRange[],
88
92
  cellDiff: ThreeWayDiffCellFn,
89
93
  placeholderPrefix: string
90
94
  ): ThreeWayPreprocessResult {
91
- const pairs: Array<{
92
- t1: TableRange
93
- t2: TableRange
94
- t3: TableRange
95
- diffed: string
96
- }> = []
97
- for (let i = 0; i < t1s.length; i++) {
95
+ const pairs: Array<{ g: TableRange; c: TableRange; m: TableRange; diffed: string }> = []
96
+ for (let i = 0; i < gTables.length; i++) {
98
97
  pairs.push({
99
- t1: t1s[i],
100
- t2: t2s[i],
101
- t3: t3s[i],
102
- diffed: diffTableThreeWay(v1, v2, v3, t1s[i], t2s[i], t3s[i], cellDiff),
98
+ g: gTables[i],
99
+ c: cTables[i],
100
+ m: mTables[i],
101
+ diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff),
103
102
  })
104
103
  }
105
- let modifiedV1 = v1
106
- let modifiedV2 = v2
107
- let modifiedV3 = v3
104
+ let modifiedGenesis = genesis
105
+ let modifiedCp = cpLatest
106
+ let modifiedMe = meCurrent
108
107
  const placeholderToDiff = new Map<string, string>()
109
- // Splice end → start so earlier offsets stay valid.
110
108
  for (let i = pairs.length - 1; i >= 0; i--) {
111
109
  const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`
112
110
  placeholderToDiff.set(placeholder, pairs[i].diffed)
113
- modifiedV1 = spliceString(modifiedV1, pairs[i].t1.tableStart, pairs[i].t1.tableEnd, placeholder)
114
- modifiedV2 = spliceString(modifiedV2, pairs[i].t2.tableStart, pairs[i].t2.tableEnd, placeholder)
115
- modifiedV3 = spliceString(modifiedV3, pairs[i].t3.tableStart, pairs[i].t3.tableEnd, placeholder)
111
+ modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder)
112
+ modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder)
113
+ modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder)
116
114
  }
117
- return { modifiedV1, modifiedV2, modifiedV3, placeholderToDiff }
115
+ return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
118
116
  }
119
117
 
120
118
  /**
121
- * Multi-table mismatch handler. Tables are paired across V1↔V2 and
122
- * V2↔V3 via content-LCS, then substituted as placeholders such that
123
- * each placeholder appears in exactly the inputs where its underlying
124
- * table exists. The word-level merger sees:
125
- * - paired-everywhere placeholders → equal in both diffs → unwrapped
126
- * - V2-only (CP-inserted + Me-rejected) → inserted by CP, deleted by
127
- * Me → reject wrapper around the table
128
- * - V2+V3 (CP-inserted, Me-kept) → ins-cp wrapper
129
- * - V1+V2 (Me-deleted) → del-me wrapper
130
- * - V1-only (CP-deleted before V2) → del-cp wrapper
131
- * - V3-only (Me-inserted) → ins-me wrapper
119
+ * Multi-table handler. Tables are paired against `genesis` (the spine)
120
+ * via content-LCS on each of cp and me. Placeholders are assigned so
121
+ * each appears only in the inputs that actually contain the underlying
122
+ * table. The word-level merger then attributes them naturally:
132
123
  *
133
- * Each placeholder's content is the diffed table for paired triples,
134
- * or the raw table HTML for unpaired tables (the word-level wrapper
135
- * provides the attribution).
124
+ * - paired in genesis+cp+me → equal in both diffs emit recursive 3-way diff
125
+ * - in cp+me, not in genesis → both-agree insertion emit plain
126
+ * - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
127
+ * - in me only → me insertion → ins-me wrapper
128
+ * - in genesis+cp, not me → me deletion → del-me wrapper
129
+ * - in genesis+me, not cp → cp deletion → del-cp wrapper
130
+ * - in genesis only → both deleted, settled → silent (placeholder content empty)
136
131
  */
137
- function preprocessMisalignedByContent(
138
- v1: string,
139
- v2: string,
140
- v3: string,
141
- t1s: TableRange[],
142
- t2s: TableRange[],
143
- t3s: TableRange[],
132
+ function preprocessByContent(
133
+ genesis: string,
134
+ cpLatest: string,
135
+ meCurrent: string,
136
+ gTables: TableRange[],
137
+ cTables: TableRange[],
138
+ mTables: TableRange[],
144
139
  cellDiff: ThreeWayDiffCellFn,
145
140
  placeholderPrefix: string
146
141
  ): ThreeWayPreprocessResult {
147
- const k1 = t1s.map(t => tableKey(v1, t))
148
- const k2 = t2s.map(t => tableKey(v2, t))
149
- const k3 = t3s.map(t => tableKey(v3, t))
142
+ const gKeys = gTables.map(t => tableKey(genesis, t))
143
+ const cKeys = cTables.map(t => tableKey(cpLatest, t))
144
+ const mKeys = mTables.map(t => tableKey(meCurrent, t))
150
145
 
151
- const align12 = lcsAlign(k1, k2)
152
- const align23 = lcsAlign(k2, k3)
146
+ const alignCp = lcsAlign(gKeys, cKeys)
147
+ const alignMe = lcsAlign(gKeys, mKeys)
153
148
 
154
- // Maps from table-index counterpart in the other input (or -1).
155
- const v1ToV2 = new Array<number>(t1s.length).fill(-1)
156
- const v2ToV1 = new Array<number>(t2s.length).fill(-1)
157
- for (const a of align12) {
149
+ // Maps: genesisIdx → matching cpIdx (-1 if none); cpIdx matching genesisIdx; etc.
150
+ const gToCp = new Array<number>(gTables.length).fill(-1)
151
+ const cpToG = new Array<number>(cTables.length).fill(-1)
152
+ for (const a of alignCp) {
158
153
  if (a.oldIdx !== null && a.newIdx !== null) {
159
- v1ToV2[a.oldIdx] = a.newIdx
160
- v2ToV1[a.newIdx] = a.oldIdx
154
+ gToCp[a.oldIdx] = a.newIdx
155
+ cpToG[a.newIdx] = a.oldIdx
161
156
  }
162
157
  }
163
- const v2ToV3 = new Array<number>(t2s.length).fill(-1)
164
- const v3ToV2 = new Array<number>(t3s.length).fill(-1)
165
- for (const a of align23) {
158
+ const gToMe = new Array<number>(gTables.length).fill(-1)
159
+ const meToG = new Array<number>(mTables.length).fill(-1)
160
+ for (const a of alignMe) {
166
161
  if (a.oldIdx !== null && a.newIdx !== null) {
167
- v2ToV3[a.oldIdx] = a.newIdx
168
- v3ToV2[a.newIdx] = a.oldIdx
162
+ gToMe[a.oldIdx] = a.newIdx
163
+ meToG[a.newIdx] = a.oldIdx
169
164
  }
170
165
  }
171
166
 
172
- // Allocate placeholders. Each logical-table-position (paired triple,
173
- // paired pair, or singleton) gets one shared placeholder used in
174
- // every input that contains it.
175
167
  let nextId = 0
176
168
  const placeholderToDiff = new Map<string, string>()
177
169
  const placeholders = {
178
- v1: new Array<string | null>(t1s.length).fill(null),
179
- v2: new Array<string | null>(t2s.length).fill(null),
180
- v3: new Array<string | null>(t3s.length).fill(null),
170
+ g: new Array<string | null>(gTables.length).fill(null),
171
+ c: new Array<string | null>(cTables.length).fill(null),
172
+ m: new Array<string | null>(mTables.length).fill(null),
181
173
  }
182
-
183
174
  const allocate = (): string => `${placeholderPrefix}${nextId++}${PLACEHOLDER_SUFFIX}`
184
175
 
185
- // 1. Triples paired through V2 (preserved in both V1↔V2 AND V2↔V3) — full 3-way diff.
186
- for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
187
- const v1Idx = v2ToV1[v2Idx]
188
- const v3Idx = v2ToV3[v2Idx]
189
- if (v1Idx === -1 || v3Idx === -1) continue
190
- const placeholder = allocate()
191
- placeholderToDiff.set(placeholder, diffTableThreeWay(v1, v2, v3, t1s[v1Idx], t2s[v2Idx], t3s[v3Idx], cellDiff))
192
- placeholders.v1[v1Idx] = placeholder
193
- placeholders.v2[v2Idx] = placeholder
194
- placeholders.v3[v3Idx] = placeholder
195
- }
196
-
197
- // For unpaired placeholders the word-level merger can't wrap a tag
198
- // token (insertTag emits tags verbatim), so we bake the author
199
- // attribution directly into the placeholder content. The merger then
200
- // only has to position the placeholder via word-level alignment;
201
- // the attribution wrapping is already in the substituted HTML.
202
- const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string, rejects?: Author): string =>
203
- Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author, rejects))
204
-
205
- // 2. V2 tables paired only with V3 (CP-inserted into V2, Me-kept).
206
- for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
207
- if (placeholders.v2[v2Idx] !== null) continue
208
- const v3Idx = v2ToV3[v2Idx]
209
- if (v3Idx === -1) continue
176
+ // For unpaired-in-one-side placeholders, bake author attribution
177
+ // into the placeholder content the word-level merger emits tag
178
+ // tokens (HTML comments) verbatim, so it can't wrap them itself.
179
+ const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string): string =>
180
+ Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author))
181
+
182
+ // 1. Triples paired in all three (genesis + cp + me) recursive 3-way diff.
183
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
184
+ const cIdx = gToCp[gIdx]
185
+ const mIdx = gToMe[gIdx]
186
+ if (cIdx === -1 || mIdx === -1) continue
210
187
  const placeholder = allocate()
211
- placeholderToDiff.set(placeholder, wrapWhole('ins', 'cp', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd)))
212
- placeholders.v2[v2Idx] = placeholder
213
- placeholders.v3[v3Idx] = placeholder
188
+ placeholderToDiff.set(
189
+ placeholder,
190
+ diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff)
191
+ )
192
+ placeholders.g[gIdx] = placeholder
193
+ placeholders.c[cIdx] = placeholder
194
+ placeholders.m[mIdx] = placeholder
214
195
  }
215
196
 
216
- // 3. V2 tables paired only with V1 (preserved from V1, Me-deleted in V3).
217
- for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
218
- if (placeholders.v2[v2Idx] !== null) continue
219
- const v1Idx = v2ToV1[v2Idx]
220
- if (v1Idx === -1) continue
197
+ // 2. Genesis + CP only (not in Me) me deletion.
198
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
199
+ if (placeholders.g[gIdx] !== null) continue
200
+ const cIdx = gToCp[gIdx]
201
+ if (cIdx === -1) continue
221
202
  const placeholder = allocate()
222
- placeholderToDiff.set(placeholder, wrapWhole('del', 'me', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd)))
223
- placeholders.v1[v1Idx] = placeholder
224
- placeholders.v2[v2Idx] = placeholder
203
+ placeholderToDiff.set(
204
+ placeholder,
205
+ wrapWhole('del', 'me', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
206
+ )
207
+ placeholders.g[gIdx] = placeholder
208
+ placeholders.c[cIdx] = placeholder
225
209
  }
226
210
 
227
- // 4. V2 tables paired with neither (CP-inserted AND Me-deleted = reject).
228
- for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
229
- if (placeholders.v2[v2Idx] !== null) continue
211
+ // 3. Genesis + Me only (not in CP) cp deletion.
212
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
213
+ if (placeholders.g[gIdx] !== null) continue
214
+ const mIdx = gToMe[gIdx]
215
+ if (mIdx === -1) continue
230
216
  const placeholder = allocate()
231
217
  placeholderToDiff.set(
232
218
  placeholder,
233
- wrapWhole('del', 'me', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd), 'cp')
219
+ wrapWhole('del', 'cp', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
234
220
  )
235
- placeholders.v2[v2Idx] = placeholder
221
+ placeholders.g[gIdx] = placeholder
222
+ placeholders.m[mIdx] = placeholder
236
223
  }
237
224
 
238
- // 5. V1 tables unpaired with V2 (CP-deleted before V2).
239
- for (let v1Idx = 0; v1Idx < t1s.length; v1Idx++) {
240
- if (placeholders.v1[v1Idx] !== null) continue
225
+ // 4. Genesis only (not in CP, not in Me) → both deleted, settled, silent.
226
+ // Placeholder ONLY in genesis; cp and me lack it. The word-level merger
227
+ // sees it as "deleted by both" via the genesis-spine fate maps and
228
+ // silences it via the settled-deletion rule (empty placeholder content).
229
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
230
+ if (placeholders.g[gIdx] !== null) continue
231
+ const placeholder = allocate()
232
+ placeholderToDiff.set(placeholder, '')
233
+ placeholders.g[gIdx] = placeholder
234
+ }
235
+
236
+ // 5. CP + Me both inserted (no genesis) — agreement check. If their
237
+ // table content is textually identical, emit plain (settled). Otherwise
238
+ // each side gets its own placeholder (cp-only / me-only treatment).
239
+ for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
240
+ if (placeholders.c[cIdx] !== null) continue
241
+ // CP table not paired to genesis. Is there an unpaired Me table with
242
+ // matching content?
243
+ const cText = cKeys[cIdx]
244
+ let mIdx = -1
245
+ for (let candidate = 0; candidate < mTables.length; candidate++) {
246
+ if (placeholders.m[candidate] !== null) continue
247
+ if (meToG[candidate] !== -1) continue
248
+ if (mKeys[candidate] === cText) {
249
+ mIdx = candidate
250
+ break
251
+ }
252
+ }
253
+ if (mIdx === -1) continue
254
+ // Both inserted the same table content → settled insertion.
241
255
  const placeholder = allocate()
242
- placeholderToDiff.set(placeholder, wrapWhole('del', 'cp', v1.slice(t1s[v1Idx].tableStart, t1s[v1Idx].tableEnd)))
243
- placeholders.v1[v1Idx] = placeholder
256
+ placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
257
+ placeholders.c[cIdx] = placeholder
258
+ placeholders.m[mIdx] = placeholder
244
259
  }
245
260
 
246
- // 6. V3 tables unpaired with V2 (Me-inserted into V3).
247
- for (let v3Idx = 0; v3Idx < t3s.length; v3Idx++) {
248
- if (placeholders.v3[v3Idx] !== null) continue
261
+ // 6. Remaining CP-only tables (inserted by CP, Me didn't take).
262
+ for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
263
+ if (placeholders.c[cIdx] !== null) continue
249
264
  const placeholder = allocate()
250
- placeholderToDiff.set(placeholder, wrapWhole('ins', 'me', v3.slice(t3s[v3Idx].tableStart, t3s[v3Idx].tableEnd)))
251
- placeholders.v3[v3Idx] = placeholder
265
+ placeholderToDiff.set(
266
+ placeholder,
267
+ wrapWhole('ins', 'cp', cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
268
+ )
269
+ placeholders.c[cIdx] = placeholder
252
270
  }
253
271
 
254
- // Splice placeholders into each input. End start per input.
255
- let modifiedV1 = v1
256
- for (let i = t1s.length - 1; i >= 0; i--) {
257
- const p = placeholders.v1[i]
272
+ // 7. Remaining Me-only tables (Me inserted, CP didn't).
273
+ for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
274
+ if (placeholders.m[mIdx] !== null) continue
275
+ const placeholder = allocate()
276
+ placeholderToDiff.set(
277
+ placeholder,
278
+ wrapWhole('ins', 'me', meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd))
279
+ )
280
+ placeholders.m[mIdx] = placeholder
281
+ }
282
+
283
+ // Splice end → start per input.
284
+ let modifiedGenesis = genesis
285
+ for (let i = gTables.length - 1; i >= 0; i--) {
286
+ const p = placeholders.g[i]
258
287
  if (p === null) continue
259
- modifiedV1 = spliceString(modifiedV1, t1s[i].tableStart, t1s[i].tableEnd, p)
288
+ modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p)
260
289
  }
261
- let modifiedV2 = v2
262
- for (let i = t2s.length - 1; i >= 0; i--) {
263
- const p = placeholders.v2[i]
290
+ let modifiedCp = cpLatest
291
+ for (let i = cTables.length - 1; i >= 0; i--) {
292
+ const p = placeholders.c[i]
264
293
  if (p === null) continue
265
- modifiedV2 = spliceString(modifiedV2, t2s[i].tableStart, t2s[i].tableEnd, p)
294
+ modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p)
266
295
  }
267
- let modifiedV3 = v3
268
- for (let i = t3s.length - 1; i >= 0; i--) {
269
- const p = placeholders.v3[i]
296
+ let modifiedMe = meCurrent
297
+ for (let i = mTables.length - 1; i >= 0; i--) {
298
+ const p = placeholders.m[i]
270
299
  if (p === null) continue
271
- modifiedV3 = spliceString(modifiedV3, t3s[i].tableStart, t3s[i].tableEnd, p)
300
+ modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p)
272
301
  }
273
302
 
274
- return { modifiedV1, modifiedV2, modifiedV3, placeholderToDiff }
303
+ return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
275
304
  }
276
305
 
277
- /**
278
- * Threshold at which positional pairing is considered sound. Below this
279
- * similarity, two positionally-aligned tables are probably different
280
- * tables (e.g. CP swapped them around) and content-LCS pairing should
281
- * be used instead. 0.5 is a deliberately loose bar — paired-but-content-
282
- * edited tables (the common case) sit well above it; genuinely different
283
- * tables sit well below.
284
- */
285
306
  const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.5
286
307
 
287
- /**
288
- * Returns true when V1/V2/V3 tables can be 1:1 paired by position. The
289
- * three lists must have equal length AND each positional triple must
290
- * have content similar enough that positional pairing reflects the
291
- * authors' likely intent. The slow content-LCS path handles cases that
292
- * fail this gate (table reordering, additions, deletions).
293
- */
294
308
  function positionallyAligned(
295
- v1: string,
296
- v2: string,
297
- v3: string,
298
- t1s: TableRange[],
299
- t2s: TableRange[],
300
- t3s: TableRange[]
309
+ genesis: string,
310
+ cpLatest: string,
311
+ meCurrent: string,
312
+ gTables: TableRange[],
313
+ cTables: TableRange[],
314
+ mTables: TableRange[]
301
315
  ): boolean {
302
- if (t1s.length !== t2s.length || t2s.length !== t3s.length) return false
303
- for (let i = 0; i < t1s.length; i++) {
304
- const k1 = tableKey(v1, t1s[i])
305
- const k2 = tableKey(v2, t2s[i])
306
- const k3 = tableKey(v3, t3s[i])
307
- if (textSimilarity(k1, k2) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
308
- if (textSimilarity(k2, k3) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
316
+ if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false
317
+ for (let i = 0; i < gTables.length; i++) {
318
+ const kG = tableKey(genesis, gTables[i])
319
+ const kC = tableKey(cpLatest, cTables[i])
320
+ const kM = tableKey(meCurrent, mTables[i])
321
+ if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
322
+ if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
309
323
  }
310
324
  return true
311
325
  }
312
326
 
313
327
  function tableKey(html: string, table: TableRange): string {
314
- // Whitespace-normalised full table HTML — tables with byte-identical
315
- // content (modulo whitespace) pair; any structural or content
316
- // difference falls through to unpaired (table-level ins/del).
317
328
  return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
318
329
  }
319
330
 
331
+ // ────────────────────────────────────────────────────────────────────────────
332
+ // Per-table diff: positional cells or row-level structural change.
333
+
320
334
  function diffTableThreeWay(
321
- v1: string,
322
- v2: string,
323
- v3: string,
324
- t1: TableRange,
325
- t2: TableRange,
326
- t3: TableRange,
335
+ genesis: string,
336
+ cpLatest: string,
337
+ meCurrent: string,
338
+ tG: TableRange,
339
+ tC: TableRange,
340
+ tM: TableRange,
327
341
  cellDiff: ThreeWayDiffCellFn
328
342
  ): string {
329
- if (sameDimensions(t1, t2) && sameDimensions(t2, t3)) {
330
- return diffTablePositional(v1, v2, v3, t1, t2, t3, cellDiff)
343
+ if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) {
344
+ return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
331
345
  }
332
- return diffTableStructural(v1, v2, v3, t1, t2, t3, cellDiff)
346
+ return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
333
347
  }
334
348
 
335
349
  function diffTablePositional(
336
- v1: string,
337
- v2: string,
338
- v3: string,
339
- t1: TableRange,
340
- t2: TableRange,
341
- t3: TableRange,
350
+ genesis: string,
351
+ cpLatest: string,
352
+ meCurrent: string,
353
+ tG: TableRange,
354
+ tC: TableRange,
355
+ tM: TableRange,
342
356
  cellDiff: ThreeWayDiffCellFn
343
357
  ): string {
344
- // Walk V2 verbatim — its scaffolding (`<table>`, `<tr>`, attributes,
345
- // inter-cell whitespace) is the spine. Substitute each cell content
346
- // range with the 3-way merge.
358
+ // Walk genesis's table scaffolding verbatim — it's the common
359
+ // ancestor. Cells are merged 3-way via cellDiff. Choosing genesis as
360
+ // the spine keeps the table structure stable across both pair-wise
361
+ // diffs that the word-level merger will see.
347
362
  const out: string[] = []
348
- let cursor = t2.tableStart
349
- for (let r = 0; r < t2.rows.length; r++) {
350
- const r1 = t1.rows[r]
351
- const r2 = t2.rows[r]
352
- const r3 = t3.rows[r]
353
- for (let c = 0; c < r2.cells.length; c++) {
354
- const c1 = r1.cells[c]
355
- const c2 = r2.cells[c]
356
- const c3 = r3.cells[c]
357
- out.push(v2.slice(cursor, c2.contentStart))
363
+ let cursor = tG.tableStart
364
+ for (let r = 0; r < tG.rows.length; r++) {
365
+ const rG = tG.rows[r]
366
+ const rC = tC.rows[r]
367
+ const rM = tM.rows[r]
368
+ for (let c = 0; c < rG.cells.length; c++) {
369
+ const cG = rG.cells[c]
370
+ const cC = rC.cells[c]
371
+ const cM = rM.cells[c]
372
+ out.push(genesis.slice(cursor, cG.contentStart))
358
373
  out.push(
359
374
  cellDiff(
360
- v1.slice(c1.contentStart, c1.contentEnd),
361
- v2.slice(c2.contentStart, c2.contentEnd),
362
- v3.slice(c3.contentStart, c3.contentEnd)
375
+ genesis.slice(cG.contentStart, cG.contentEnd),
376
+ cpLatest.slice(cC.contentStart, cC.contentEnd),
377
+ meCurrent.slice(cM.contentStart, cM.contentEnd)
363
378
  )
364
379
  )
365
- cursor = c2.contentEnd
380
+ cursor = cG.contentEnd
366
381
  }
367
382
  }
368
- out.push(v2.slice(cursor, t2.tableEnd))
383
+ out.push(genesis.slice(cursor, tG.tableEnd))
369
384
  return out.join('')
370
385
  }
371
386
 
372
387
  /**
373
- * Structural-change three-way table diff: rows or cells differ in count
374
- * across V1/V2/V3. Strategy:
375
- * 1. Run row-LCS for each pair (V1↔V2, V2↔V3) over rowKeys
376
- * 2. Build per-V2-row origin (from align1) and fate (from align2)
377
- * 3. Walk V2's row order, interleaving:
378
- * - CP-deleted V1 rows (in align1 but not preserved into V2)
379
- * - Me-inserted V3 rows (in align2 but not from V2)
380
- * 4. For each V2 row, combine origin+fate to decide:
381
- * - equal: recurse cellDiff if cell counts match, else fall back
382
- * - ins-cp: emit V2 row as fully-CP-inserted
383
- * - del-me: emit V2 row as fully-Me-deleted
384
- * - reject: emit V2 row as Me-rejects-CP
388
+ * Row-level genesis-spine merge for tables with diverging row/cell
389
+ * counts.
385
390
  *
386
- * Tie-break to Me on LCS disagreement (D2): each LCS is authoritative
387
- * for its own pair-wise view; we don't attempt to reconcile cases where
388
- * align1's idea of V2's V1 origin contradicts what align2 implies via
389
- * V3 history. In practice these cases manifest as the row being
390
- * attributed independently per pair, which is the conservative correct
391
- * thing to do.
391
+ * 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
392
+ * (alignMe), each via row-LCS over rowKeys.
393
+ * 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
394
+ * Both kept recurse cell diff (with structural-change cell handling
395
+ * falling back to me-attribution Replace per the documented
396
+ * limitation). One kept, other deleted → emit author-attributed full
397
+ * row. Both deleted → silent.
398
+ * 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
399
+ * Check for content agreement at the same boundary; agreed
400
+ * insertions emit plain.
392
401
  */
393
402
  function diffTableStructural(
394
- v1: string,
395
- v2: string,
396
- v3: string,
397
- t1: TableRange,
398
- t2: TableRange,
399
- t3: TableRange,
403
+ genesis: string,
404
+ cpLatest: string,
405
+ meCurrent: string,
406
+ tG: TableRange,
407
+ tC: TableRange,
408
+ tM: TableRange,
400
409
  cellDiff: ThreeWayDiffCellFn
401
410
  ): string {
402
- const v1Keys = t1.rows.map(r => rowKey(v1, r))
403
- const v2Keys = t2.rows.map(r => rowKey(v2, r))
404
- const v3Keys = t3.rows.map(r => rowKey(v3, r))
405
-
406
- const align1 = lcsAlign(v1Keys, v2Keys)
407
- const align2 = lcsAlign(v2Keys, v3Keys)
408
-
409
- // Per-V2-row attribution lookups.
410
- // Origin: 'preserved' (with V1 row index) or 'cp-inserted'.
411
- // Fate: 'preserved' (with V3 row index) or 'me-deleted'.
412
- const v2Origin = new Array<{ kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' }>(t2.rows.length)
413
- for (let i = 0; i < v2Origin.length; i++) v2Origin[i] = { kind: 'cp-inserted' }
414
- for (const a of align1) {
415
- if (a.newIdx !== null && a.oldIdx !== null) {
416
- v2Origin[a.newIdx] = { kind: 'preserved', v1Idx: a.oldIdx }
417
- }
418
- }
411
+ const gKeys = tG.rows.map(r => rowKey(genesis, r))
412
+ const cKeys = tC.rows.map(r => rowKey(cpLatest, r))
413
+ const mKeys = tM.rows.map(r => rowKey(meCurrent, r))
419
414
 
420
- const v2Fate = new Array<{ kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' }>(t2.rows.length)
421
- for (let i = 0; i < v2Fate.length; i++) v2Fate[i] = { kind: 'me-deleted' }
422
- for (const a of align2) {
423
- if (a.oldIdx !== null && a.newIdx !== null) {
424
- v2Fate[a.oldIdx] = { kind: 'preserved', v3Idx: a.newIdx }
425
- }
415
+ const alignCp = lcsAlign(gKeys, cKeys)
416
+ const alignMe = lcsAlign(gKeys, mKeys)
417
+
418
+ // genesisIdx → matching cpIdx (-1 if cp deleted this row)
419
+ const gToCp = new Array<number>(tG.rows.length).fill(-1)
420
+ for (const a of alignCp) {
421
+ if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx
422
+ }
423
+ const gToMe = new Array<number>(tG.rows.length).fill(-1)
424
+ for (const a of alignMe) {
425
+ if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx
426
426
  }
427
427
 
428
- // Off-spine surfaces.
429
- // CP-deleted V1 rows: in align1 with newIdx == null. They land at the
430
- // V2 boundary that follows them. The boundary index is the next
431
- // preserved V2 row, or v2.rows.length if no following preserved row.
432
- const cpDelRowsAt = collectCpDelRowsAtBoundary(align1, t2.rows.length)
433
- // Me-inserted V3 rows: in align2 with oldIdx == null. They land at the
434
- // V2 boundary they sit before — i.e. the next preserved V2 row.
435
- const meInsRowsAt = collectMeInsRowsAtBoundary(align2, t2.rows.length)
436
-
437
- // Emit. We reconstruct the table from scratch since rows may be added
438
- // or deleted from V2's order; preserve the V2 header (everything up
439
- // to the first <tr>) and the V2 footer (after the last </tr>).
440
- const out: string[] = []
441
- out.push(tableHeaderSlice(v2, t2))
428
+ // Off-spine row collections: cp rows with no genesis counterpart, me rows with no genesis counterpart.
429
+ // Keyed by "the genesis row index they should appear before" so emission interleaves correctly.
430
+ const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length)
431
+ const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length)
442
432
 
443
- const emitBoundary = (i: number) => {
444
- const cpDel = cpDelRowsAt.get(i)
445
- if (cpDel) {
446
- for (const v1RowIdx of cpDel) {
447
- out.push(emitFullRowAttributed(v1, t1.rows[v1RowIdx], 'del', 'cp'))
433
+ const out: string[] = []
434
+ out.push(tableHeaderSlice(genesis, tG))
435
+
436
+ const emitBoundaryInsertions = (b: number) => {
437
+ const cIdxs = cpInsAt.get(b) ?? []
438
+ const mIdxs = meInsAt.get(b) ?? []
439
+ if (cIdxs.length === 0 && mIdxs.length === 0) return
440
+ // Detect settled insertions (cp and me both inserted the same row content).
441
+ // Pair by content key, in order of appearance.
442
+ const remainingMe = new Set(mIdxs)
443
+ for (const cIdx of cIdxs) {
444
+ const cText = cKeys[cIdx]
445
+ let agreedMeIdx: number | undefined
446
+ for (const mIdx of remainingMe) {
447
+ if (mKeys[mIdx] === cText) {
448
+ agreedMeIdx = mIdx
449
+ break
450
+ }
448
451
  }
449
- }
450
- const meIns = meInsRowsAt.get(i)
451
- if (meIns) {
452
- for (const v3RowIdx of meIns) {
453
- out.push(emitFullRowAttributed(v3, t3.rows[v3RowIdx], 'ins', 'me'))
452
+ if (agreedMeIdx !== undefined) {
453
+ remainingMe.delete(agreedMeIdx)
454
+ // Settled insertion — emit cp's row verbatim, unmarked.
455
+ out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd))
456
+ } else {
457
+ out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'ins', 'cp'))
454
458
  }
455
459
  }
460
+ for (const mIdx of remainingMe) {
461
+ out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'ins', 'me'))
462
+ }
456
463
  }
457
464
 
458
- for (let r = 0; r < t2.rows.length; r++) {
459
- emitBoundary(r)
460
- const v2Row = t2.rows[r]
461
- const origin = v2Origin[r]
462
- const fate = v2Fate[r]
463
- out.push(emitV2Row(v1, v2, v3, v2Row, t1, t3, origin, fate, cellDiff))
464
- }
465
- emitBoundary(t2.rows.length)
466
- out.push(tableFooterSlice(v2, t2))
467
- return out.join('')
468
- }
469
-
470
- function emitV2Row(
471
- v1: string,
472
- v2: string,
473
- v3: string,
474
- v2Row: RowRange,
475
- t1: TableRange,
476
- t3: TableRange,
477
- origin: { kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' },
478
- fate: { kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' },
479
- cellDiff: ThreeWayDiffCellFn
480
- ): string {
481
- if (origin.kind === 'cp-inserted' && fate.kind === 'me-deleted') {
482
- // CP added the row, Me removed it: reject. Show as Me-deletion of
483
- // CP's insertion via the rejects markup.
484
- return emitFullRowAttributed(v2, v2Row, 'del', 'me', 'cp')
485
- }
486
- if (origin.kind === 'cp-inserted') {
487
- // CP added the row, Me kept it. Attribute as CP-inserted but emit
488
- // V2's content (which equals V3's content since Me kept it).
489
- return emitFullRowAttributed(v2, v2Row, 'ins', 'cp')
490
- }
491
- if (fate.kind === 'me-deleted') {
492
- // Me removed an original V1 row. Emit as Me-deletion of V2's content.
493
- return emitFullRowAttributed(v2, v2Row, 'del', 'me')
494
- }
495
- // Preserved on both sides — recurse into cells. The discriminated-union
496
- // narrowing makes the indices safe to access directly.
497
- const v1Row = t1.rows[origin.v1Idx]
498
- const v3Row = t3.rows[fate.v3Idx]
499
- if (v1Row.cells.length === v2Row.cells.length && v2Row.cells.length === v3Row.cells.length) {
500
- // Same cell counts → positional cell diff via cellDiff.
501
- return diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff)
465
+ for (let g = 0; g < tG.rows.length; g++) {
466
+ emitBoundaryInsertions(g)
467
+
468
+ const cIdx = gToCp[g]
469
+ const mIdx = gToMe[g]
470
+ const cpDel = cIdx === -1
471
+ const meDel = mIdx === -1
472
+
473
+ if (!cpDel && !meDel) {
474
+ // Both kept — recurse cell-level diff against this row triple.
475
+ out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff))
476
+ } else if (cpDel && meDel) {
477
+ // Both deleted — silent (settled).
478
+ } else if (cpDel) {
479
+ // CP dropped, Me kept → emit Me's row attributed as cp-deletion. The
480
+ // content shown is what Me has; the styling tells the reader CP
481
+ // wanted it gone.
482
+ out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'del', 'cp'))
483
+ } else {
484
+ // Me dropped, CP kept emit CP's row attributed as me-deletion.
485
+ out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'del', 'me'))
486
+ }
502
487
  }
503
- // Cell-count mismatch within a preserved row. Cell-level structural
504
- // change is deferred; fall back to Me-attribution Replace (V2 row
505
- // removed, V3 row inserted). This is lossy for CP's contribution
506
- // within the row but functional. Real-world legal docs rarely change
507
- // column count mid-row; this is a known limitation.
508
- const out: string[] = []
509
- out.push(emitFullRowAttributed(v2, v2Row, 'del', 'me'))
510
- out.push(emitFullRowAttributed(v3, v3Row, 'ins', 'me'))
488
+ emitBoundaryInsertions(tG.rows.length)
489
+ out.push(tableFooterSlice(genesis, tG))
511
490
  return out.join('')
512
491
  }
513
492
 
514
- function diffRowPositional(
515
- v1: string,
516
- v2: string,
517
- v3: string,
518
- v1Row: RowRange,
519
- v2Row: RowRange,
520
- v3Row: RowRange,
493
+ function emitPreservedRow(
494
+ genesis: string,
495
+ cpLatest: string,
496
+ meCurrent: string,
497
+ rG: RowRange,
498
+ rC: RowRange,
499
+ rM: RowRange,
521
500
  cellDiff: ThreeWayDiffCellFn
522
501
  ): string {
523
- // Walk V2's row verbatim, substituting each cell content with the
524
- // 3-way merge. Mirrors `diffTablePositional` at the row scale.
525
- const out: string[] = []
526
- let cursor = v2Row.rowStart
527
- for (let c = 0; c < v2Row.cells.length; c++) {
528
- const c1 = v1Row.cells[c]
529
- const c2 = v2Row.cells[c]
530
- const c3 = v3Row.cells[c]
531
- out.push(v2.slice(cursor, c2.contentStart))
532
- out.push(
533
- cellDiff(
534
- v1.slice(c1.contentStart, c1.contentEnd),
535
- v2.slice(c2.contentStart, c2.contentEnd),
536
- v3.slice(c3.contentStart, c3.contentEnd)
502
+ if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
503
+ // Same cell counts positional cell diff.
504
+ const out: string[] = []
505
+ let cursor = rG.rowStart
506
+ for (let c = 0; c < rG.cells.length; c++) {
507
+ const cG = rG.cells[c]
508
+ const cC = rC.cells[c]
509
+ const cM = rM.cells[c]
510
+ out.push(genesis.slice(cursor, cG.contentStart))
511
+ out.push(
512
+ cellDiff(
513
+ genesis.slice(cG.contentStart, cG.contentEnd),
514
+ cpLatest.slice(cC.contentStart, cC.contentEnd),
515
+ meCurrent.slice(cM.contentStart, cM.contentEnd)
516
+ )
537
517
  )
538
- )
539
- cursor = c2.contentEnd
540
- }
541
- out.push(v2.slice(cursor, v2Row.rowEnd))
542
- return out.join('')
543
- }
544
-
545
- function collectCpDelRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
546
- // For each unpaired V1 row (oldIdx set, newIdx null), determine its
547
- // V2 boundary index: the position just before the next preserved V2
548
- // row, or v2RowCount if there's no following preserved row.
549
- const out = new Map<number, number[]>()
550
- let nextV2Boundary = v2RowCount
551
- // Walk the alignment in reverse so we can compute nextV2Boundary
552
- // running backwards, then assign each unpaired V1 row to the boundary
553
- // currently in scope.
554
- const pending: number[] = []
555
- for (let i = align.length - 1; i >= 0; i--) {
556
- const a = align[i]
557
- if (a.newIdx !== null) {
558
- // Flush pending unpaired V1 rows to this V2 boundary.
559
- if (pending.length > 0) {
560
- const existing = out.get(nextV2Boundary) ?? []
561
- // pending was filled backwards — reverse so document order is preserved.
562
- existing.unshift(...pending.toReversed())
563
- out.set(nextV2Boundary, existing)
564
- pending.length = 0
565
- }
566
- nextV2Boundary = a.newIdx
567
- } else if (a.oldIdx !== null) {
568
- // Unpaired V1 row — CP deleted it.
569
- pending.push(a.oldIdx)
518
+ cursor = cG.contentEnd
570
519
  }
520
+ out.push(genesis.slice(cursor, rG.rowEnd))
521
+ return out.join('')
571
522
  }
572
- if (pending.length > 0) {
573
- const existing = out.get(nextV2Boundary) ?? []
574
- existing.unshift(...pending.reverse())
575
- out.set(nextV2Boundary, existing)
576
- }
577
- return out
523
+ // Cell-count mismatch within a preserved row — cell-level structural
524
+ // change deferred. Fall back to me-attributed Replace (genesis row
525
+ // removed, me row inserted). Lossy for CP within that row.
526
+ return emitFullRowAttributed(genesis, rG, 'del', 'me') + emitFullRowAttributed(meCurrent, rM, 'ins', 'me')
578
527
  }
579
528
 
580
- function collectMeInsRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
581
- // For each unpaired V3 row (newIdx set, oldIdx null), determine its
582
- // V2 boundary: the position of the next preserved V2 row, or
583
- // v2RowCount if at the tail. Mirror of CP-del logic.
529
+ /**
530
+ * Returns map "genesis-row-boundary list of new-side row indices
531
+ * inserted at that boundary". Mirrors the word-level boundary collection
532
+ * but at the row scale.
533
+ */
534
+ function collectInsertedRowsAtBoundary(
535
+ align: ReturnType<typeof lcsAlign>,
536
+ genesisRowCount: number
537
+ ): Map<number, number[]> {
584
538
  const out = new Map<number, number[]>()
585
- let nextV2Boundary = v2RowCount
539
+ let nextGenesisBoundary = genesisRowCount
586
540
  const pending: number[] = []
541
+ // Walk in reverse so nextGenesisBoundary tracks the next preserved row
542
+ // we'll encounter; flush pending unpaired new rows at the appropriate
543
+ // genesis boundary.
587
544
  for (let i = align.length - 1; i >= 0; i--) {
588
545
  const a = align[i]
589
546
  if (a.oldIdx !== null) {
590
547
  if (pending.length > 0) {
591
- const existing = out.get(nextV2Boundary) ?? []
548
+ const existing = out.get(nextGenesisBoundary) ?? []
592
549
  existing.unshift(...pending.toReversed())
593
- out.set(nextV2Boundary, existing)
550
+ out.set(nextGenesisBoundary, existing)
594
551
  pending.length = 0
595
552
  }
596
- nextV2Boundary = a.oldIdx
553
+ nextGenesisBoundary = a.oldIdx
597
554
  } else if (a.newIdx !== null) {
598
555
  pending.push(a.newIdx)
599
556
  }
600
557
  }
601
558
  if (pending.length > 0) {
602
- const existing = out.get(nextV2Boundary) ?? []
603
- existing.unshift(...pending.reverse())
604
- out.set(nextV2Boundary, existing)
559
+ const existing = out.get(nextGenesisBoundary) ?? []
560
+ existing.unshift(...pending.toReversed())
561
+ out.set(nextGenesisBoundary, existing)
605
562
  }
606
563
  return out
607
564
  }
608
565
 
609
566
  function tableHeaderSlice(html: string, table: TableRange): string {
610
- // Slice from <table> to start of first <tr>. If table is empty, take
611
- // everything up to </table>.
612
567
  const firstRow = table.rows[0]
613
568
  if (!firstRow) return html.slice(table.tableStart, table.tableEnd - '</table>'.length)
614
569
  return html.slice(table.tableStart, firstRow.rowStart)
615
570
  }
616
571
 
617
572
  function tableFooterSlice(html: string, table: TableRange): string {
618
- // Slice from end of last <tr> to </table>.
619
573
  const lastRow = table.rows[table.rows.length - 1]
620
574
  if (!lastRow) return '</table>'
621
575
  return html.slice(lastRow.rowEnd, table.tableEnd)
622
576
  }
623
577
 
624
578
  /**
625
- * Emit a row that's fully attributed to one author, in an ins or del
626
- * role. `rejectsAuthor` is set when the row is a Me-deletion of a
627
- * CP-inserted row. Wraps `<tr>` in `class='diffins cp'` etc. and each
628
- * `<td>` content in the corresponding `<ins>`/`<del>` wrapper with the
629
- * author classes/attrs.
579
+ * Emit a row fully attributed to one author. Wraps `<tr>` and each
580
+ * `<td>` with the author's diffins/diffdel class and `data-author`
581
+ * attribute; wraps cell content with an inner `<ins>`/`<del>` matching
582
+ * the word-level emission shape.
630
583
  */
631
- function emitFullRowAttributed(
632
- html: string,
633
- row: RowRange,
634
- kind: 'ins' | 'del',
635
- author: Author,
636
- rejectsAuthor?: Author
637
- ): string {
584
+ function emitFullRowAttributed(html: string, row: RowRange, kind: 'ins' | 'del', author: Author): string {
638
585
  const trOpening = parseOpeningTagAt(html, row.rowStart)
639
- if (!trOpening) return html.slice(html.length, html.length)
640
- const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author, rejectsAuthor)
586
+ if (!trOpening) return html.slice(row.rowStart, row.rowEnd)
587
+ const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)
641
588
 
642
589
  const out: string[] = [trWithAttrs]
643
590
  let cursor = trOpening.end
644
591
  for (const cell of row.cells) {
645
592
  out.push(html.slice(cursor, cell.cellStart))
646
- out.push(emitFullCellAttributed(html, cell, kind, author, rejectsAuthor))
593
+ out.push(emitFullCellAttributed(html, cell, kind, author))
647
594
  cursor = cell.cellEnd
648
595
  }
649
596
  out.push(html.slice(cursor, row.rowEnd))
650
597
  return out.join('')
651
598
  }
652
599
 
653
- function emitFullCellAttributed(
654
- html: string,
655
- cell: CellRange,
656
- kind: 'ins' | 'del',
657
- author: Author,
658
- rejectsAuthor?: Author
659
- ): string {
600
+ function emitFullCellAttributed(html: string, cell: CellRange, kind: 'ins' | 'del', author: Author): string {
660
601
  const tdOpening = parseOpeningTagAt(html, cell.cellStart)
661
602
  if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
662
- const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author, rejectsAuthor)
663
- // Wrap the content in an ins/del with the author classes — same
664
- // shape as the word-level emission. Empty cells get the class on the
665
- // <td> but no inner wrapper.
603
+ const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author)
666
604
  const innerContent = html.slice(cell.contentStart, cell.contentEnd)
667
605
  const innerWrapped =
668
606
  innerContent.trim().length === 0
669
607
  ? innerContent
670
- : Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author, rejectsAuthor))
608
+ : Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author))
671
609
  const closing = html.slice(cell.contentEnd, cell.cellEnd)
672
610
  return tdWithAttrs + innerWrapped + closing
673
611
  }
674
612
 
675
- /**
676
- * Inject author classes + data-attrs into an existing opening tag (e.g.
677
- * an `<tr>` or `<td>` already in the source HTML). Uses the same
678
- * attribution shape as `authorAttribution` + `Utils.wrapText` so the
679
- * inject-into-existing and wrap-around-text paths agree.
680
- */
681
- function injectAuthorAttribution(
682
- openingTag: string,
683
- kind: 'ins' | 'del',
684
- author: Author,
685
- rejectsAuthor?: Author
686
- ): string {
687
- const meta = authorAttribution(author, rejectsAuthor)
613
+ function injectAuthorAttribution(openingTag: string, kind: 'ins' | 'del', author: Author): string {
614
+ const meta = authorAttribution(author)
688
615
  const tagWithClass = injectClass(openingTag, `diff${kind} ${meta.extraClasses}`)
689
616
  return injectDataAttrs(tagWithClass, meta.dataAttrs ?? {})
690
617
  }
@@ -693,9 +620,6 @@ function injectDataAttrs(openingTag: string, dataAttrs: Readonly<Record<string,
693
620
  const keys = Object.keys(dataAttrs)
694
621
  if (keys.length === 0) return openingTag
695
622
  const attrs = keys.map(k => ` data-${k}='${dataAttrs[k]}'`).join('')
696
- // Insert the data-* attributes just before the closing '>' of the
697
- // opening tag. `<tr>` and `<td>` are never self-closing in real HTML,
698
- // but handle `/>` defensively for symmetry with other HTML emitters.
699
623
  if (openingTag.endsWith('/>')) return `${openingTag.slice(0, -2)}${attrs}/>`
700
624
  return `${openingTag.slice(0, -1)}${attrs}>`
701
625
  }