@createiq/htmldiff 1.1.0-beta.0 → 1.2.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -0
- package/dist/HtmlDiff.cjs +1259 -498
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +141 -7
- package/dist/HtmlDiff.d.mts +140 -7
- package/dist/HtmlDiff.mjs +1259 -498
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +7 -7
- package/src/Alignment.ts +349 -0
- package/src/HtmlDiff.ts +323 -33
- package/src/HtmlScanner.ts +200 -0
- package/src/TableDiff.ts +99 -550
- package/src/ThreeWayDiff.ts +223 -0
- package/src/ThreeWayTable.ts +701 -0
- package/src/Utils.ts +34 -2
- package/test/HtmlDiff.analyze.spec.ts +152 -0
- package/test/HtmlDiff.tables.matrix.spec.ts +8 -3
- package/test/HtmlDiff.tables.spec.ts +368 -19
- package/test/HtmlDiff.threeWay.spec.ts +175 -0
- package/test/HtmlDiff.threeWay.tables.spec.ts +407 -0
- package/test/TableDiff.bench.ts +39 -0
- package/test/Utils.spec.ts +48 -0
|
@@ -0,0 +1,701 @@
|
|
|
1
|
+
import { lcsAlign, textSimilarity } from './Alignment'
|
|
2
|
+
import { injectClass, parseOpeningTagAt } from './HtmlScanner'
|
|
3
|
+
import {
|
|
4
|
+
type CellRange,
|
|
5
|
+
exceedsSizeLimit,
|
|
6
|
+
findTopLevelTables,
|
|
7
|
+
makePlaceholderPrefix,
|
|
8
|
+
PLACEHOLDER_SUFFIX,
|
|
9
|
+
type RowRange,
|
|
10
|
+
rowKey,
|
|
11
|
+
sameDimensions,
|
|
12
|
+
spliceString,
|
|
13
|
+
type TableRange,
|
|
14
|
+
} from './TableDiff'
|
|
15
|
+
import { type Author, authorAttribution } from './ThreeWayDiff'
|
|
16
|
+
import Utils from './Utils'
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Three-way table preprocessing. Same shape as the existing two-way
|
|
20
|
+
* `preprocessTables` but takes V1/V2/V3 and a cell-level three-way diff
|
|
21
|
+
* callback. All three inputs share a single placeholder nonce so V2's
|
|
22
|
+
* tokenisation is identical when the word-level 3-way merger sees it
|
|
23
|
+
* from both pair-wise analyses.
|
|
24
|
+
*
|
|
25
|
+
* This commit handles only the same-dimensions positional case across
|
|
26
|
+
* all three table triples. The structural-change case (rows/cells
|
|
27
|
+
* differ between any pair) throws; the next commit replaces that with
|
|
28
|
+
* a row-level V2-spine merge that mirrors the word-level approach.
|
|
29
|
+
* Multi-table count divergence (CP added or Me removed a whole table)
|
|
30
|
+
* is handled in commit 6 (D3).
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
export interface ThreeWayPreprocessResult {
|
|
34
|
+
modifiedV1: string
|
|
35
|
+
modifiedV2: string
|
|
36
|
+
modifiedV3: string
|
|
37
|
+
placeholderToDiff: Map<string, string>
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export type ThreeWayDiffCellFn = (v1Cell: string, v2Cell: string, v3Cell: string) => string
|
|
41
|
+
|
|
42
|
+
export function preprocessTablesThreeWay(
|
|
43
|
+
v1: string,
|
|
44
|
+
v2: string,
|
|
45
|
+
v3: string,
|
|
46
|
+
cellDiff: ThreeWayDiffCellFn
|
|
47
|
+
): ThreeWayPreprocessResult | null {
|
|
48
|
+
const t1s = findTopLevelTables(v1)
|
|
49
|
+
const t2s = findTopLevelTables(v2)
|
|
50
|
+
const t3s = findTopLevelTables(v3)
|
|
51
|
+
|
|
52
|
+
// No tables in any input — caller can skip preprocessing entirely.
|
|
53
|
+
if (t1s.length === 0 && t2s.length === 0 && t3s.length === 0) return null
|
|
54
|
+
|
|
55
|
+
// Size cap: bail to word-level diff for pathologically large tables.
|
|
56
|
+
for (const t of t1s) if (exceedsSizeLimit(t)) return null
|
|
57
|
+
for (const t of t2s) if (exceedsSizeLimit(t)) return null
|
|
58
|
+
for (const t of t3s) if (exceedsSizeLimit(t)) return null
|
|
59
|
+
|
|
60
|
+
const placeholderPrefix = makePlaceholderPrefix(v1, v2, v3)
|
|
61
|
+
|
|
62
|
+
// Fast path: counts match AND each positional triple looks similar
|
|
63
|
+
// enough that 1:1 positional pairing is sound. The similarity gate
|
|
64
|
+
// catches the swap case — V1=[A,B], V2=[B,A] has matching counts but
|
|
65
|
+
// positionally pairing would mis-attribute. Without the gate, a swap
|
|
66
|
+
// would silently land in the per-cell diff machinery comparing
|
|
67
|
+
// unrelated tables.
|
|
68
|
+
if (positionallyAligned(v1, v2, v3, t1s, t2s, t3s)) {
|
|
69
|
+
return preprocessAlignedByPosition(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Multi-table mismatch (D3). CP added/removed/moved a table, Me added/
|
|
73
|
+
// removed/moved a table, etc. Use content-LCS to pair tables across
|
|
74
|
+
// each adjacent pair, then assign placeholders so the word-level 3-way
|
|
75
|
+
// merger naturally attributes unpaired tables — the placeholder token
|
|
76
|
+
// appears only in the inputs where the table exists, and the merger
|
|
77
|
+
// sees that as an insertion/deletion.
|
|
78
|
+
return preprocessMisalignedByContent(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function preprocessAlignedByPosition(
|
|
82
|
+
v1: string,
|
|
83
|
+
v2: string,
|
|
84
|
+
v3: string,
|
|
85
|
+
t1s: TableRange[],
|
|
86
|
+
t2s: TableRange[],
|
|
87
|
+
t3s: TableRange[],
|
|
88
|
+
cellDiff: ThreeWayDiffCellFn,
|
|
89
|
+
placeholderPrefix: string
|
|
90
|
+
): ThreeWayPreprocessResult {
|
|
91
|
+
const pairs: Array<{
|
|
92
|
+
t1: TableRange
|
|
93
|
+
t2: TableRange
|
|
94
|
+
t3: TableRange
|
|
95
|
+
diffed: string
|
|
96
|
+
}> = []
|
|
97
|
+
for (let i = 0; i < t1s.length; i++) {
|
|
98
|
+
pairs.push({
|
|
99
|
+
t1: t1s[i],
|
|
100
|
+
t2: t2s[i],
|
|
101
|
+
t3: t3s[i],
|
|
102
|
+
diffed: diffTableThreeWay(v1, v2, v3, t1s[i], t2s[i], t3s[i], cellDiff),
|
|
103
|
+
})
|
|
104
|
+
}
|
|
105
|
+
let modifiedV1 = v1
|
|
106
|
+
let modifiedV2 = v2
|
|
107
|
+
let modifiedV3 = v3
|
|
108
|
+
const placeholderToDiff = new Map<string, string>()
|
|
109
|
+
// Splice end → start so earlier offsets stay valid.
|
|
110
|
+
for (let i = pairs.length - 1; i >= 0; i--) {
|
|
111
|
+
const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`
|
|
112
|
+
placeholderToDiff.set(placeholder, pairs[i].diffed)
|
|
113
|
+
modifiedV1 = spliceString(modifiedV1, pairs[i].t1.tableStart, pairs[i].t1.tableEnd, placeholder)
|
|
114
|
+
modifiedV2 = spliceString(modifiedV2, pairs[i].t2.tableStart, pairs[i].t2.tableEnd, placeholder)
|
|
115
|
+
modifiedV3 = spliceString(modifiedV3, pairs[i].t3.tableStart, pairs[i].t3.tableEnd, placeholder)
|
|
116
|
+
}
|
|
117
|
+
return { modifiedV1, modifiedV2, modifiedV3, placeholderToDiff }
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Multi-table mismatch handler. Tables are paired across V1↔V2 and
|
|
122
|
+
* V2↔V3 via content-LCS, then substituted as placeholders such that
|
|
123
|
+
* each placeholder appears in exactly the inputs where its underlying
|
|
124
|
+
* table exists. The word-level merger sees:
|
|
125
|
+
* - paired-everywhere placeholders → equal in both diffs → unwrapped
|
|
126
|
+
* - V2-only (CP-inserted + Me-rejected) → inserted by CP, deleted by
|
|
127
|
+
* Me → reject wrapper around the table
|
|
128
|
+
* - V2+V3 (CP-inserted, Me-kept) → ins-cp wrapper
|
|
129
|
+
* - V1+V2 (Me-deleted) → del-me wrapper
|
|
130
|
+
* - V1-only (CP-deleted before V2) → del-cp wrapper
|
|
131
|
+
* - V3-only (Me-inserted) → ins-me wrapper
|
|
132
|
+
*
|
|
133
|
+
* Each placeholder's content is the diffed table for paired triples,
|
|
134
|
+
* or the raw table HTML for unpaired tables (the word-level wrapper
|
|
135
|
+
* provides the attribution).
|
|
136
|
+
*/
|
|
137
|
+
function preprocessMisalignedByContent(
|
|
138
|
+
v1: string,
|
|
139
|
+
v2: string,
|
|
140
|
+
v3: string,
|
|
141
|
+
t1s: TableRange[],
|
|
142
|
+
t2s: TableRange[],
|
|
143
|
+
t3s: TableRange[],
|
|
144
|
+
cellDiff: ThreeWayDiffCellFn,
|
|
145
|
+
placeholderPrefix: string
|
|
146
|
+
): ThreeWayPreprocessResult {
|
|
147
|
+
const k1 = t1s.map(t => tableKey(v1, t))
|
|
148
|
+
const k2 = t2s.map(t => tableKey(v2, t))
|
|
149
|
+
const k3 = t3s.map(t => tableKey(v3, t))
|
|
150
|
+
|
|
151
|
+
const align12 = lcsAlign(k1, k2)
|
|
152
|
+
const align23 = lcsAlign(k2, k3)
|
|
153
|
+
|
|
154
|
+
// Maps from table-index → counterpart in the other input (or -1).
|
|
155
|
+
const v1ToV2 = new Array<number>(t1s.length).fill(-1)
|
|
156
|
+
const v2ToV1 = new Array<number>(t2s.length).fill(-1)
|
|
157
|
+
for (const a of align12) {
|
|
158
|
+
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
159
|
+
v1ToV2[a.oldIdx] = a.newIdx
|
|
160
|
+
v2ToV1[a.newIdx] = a.oldIdx
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
const v2ToV3 = new Array<number>(t2s.length).fill(-1)
|
|
164
|
+
const v3ToV2 = new Array<number>(t3s.length).fill(-1)
|
|
165
|
+
for (const a of align23) {
|
|
166
|
+
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
167
|
+
v2ToV3[a.oldIdx] = a.newIdx
|
|
168
|
+
v3ToV2[a.newIdx] = a.oldIdx
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Allocate placeholders. Each logical-table-position (paired triple,
|
|
173
|
+
// paired pair, or singleton) gets one shared placeholder used in
|
|
174
|
+
// every input that contains it.
|
|
175
|
+
let nextId = 0
|
|
176
|
+
const placeholderToDiff = new Map<string, string>()
|
|
177
|
+
const placeholders = {
|
|
178
|
+
v1: new Array<string | null>(t1s.length).fill(null),
|
|
179
|
+
v2: new Array<string | null>(t2s.length).fill(null),
|
|
180
|
+
v3: new Array<string | null>(t3s.length).fill(null),
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const allocate = (): string => `${placeholderPrefix}${nextId++}${PLACEHOLDER_SUFFIX}`
|
|
184
|
+
|
|
185
|
+
// 1. Triples paired through V2 (preserved in both V1↔V2 AND V2↔V3) — full 3-way diff.
|
|
186
|
+
for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
|
|
187
|
+
const v1Idx = v2ToV1[v2Idx]
|
|
188
|
+
const v3Idx = v2ToV3[v2Idx]
|
|
189
|
+
if (v1Idx === -1 || v3Idx === -1) continue
|
|
190
|
+
const placeholder = allocate()
|
|
191
|
+
placeholderToDiff.set(placeholder, diffTableThreeWay(v1, v2, v3, t1s[v1Idx], t2s[v2Idx], t3s[v3Idx], cellDiff))
|
|
192
|
+
placeholders.v1[v1Idx] = placeholder
|
|
193
|
+
placeholders.v2[v2Idx] = placeholder
|
|
194
|
+
placeholders.v3[v3Idx] = placeholder
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// For unpaired placeholders the word-level merger can't wrap a tag
|
|
198
|
+
// token (insertTag emits tags verbatim), so we bake the author
|
|
199
|
+
// attribution directly into the placeholder content. The merger then
|
|
200
|
+
// only has to position the placeholder via word-level alignment;
|
|
201
|
+
// the attribution wrapping is already in the substituted HTML.
|
|
202
|
+
const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string, rejects?: Author): string =>
|
|
203
|
+
Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author, rejects))
|
|
204
|
+
|
|
205
|
+
// 2. V2 tables paired only with V3 (CP-inserted into V2, Me-kept).
|
|
206
|
+
for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
|
|
207
|
+
if (placeholders.v2[v2Idx] !== null) continue
|
|
208
|
+
const v3Idx = v2ToV3[v2Idx]
|
|
209
|
+
if (v3Idx === -1) continue
|
|
210
|
+
const placeholder = allocate()
|
|
211
|
+
placeholderToDiff.set(placeholder, wrapWhole('ins', 'cp', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd)))
|
|
212
|
+
placeholders.v2[v2Idx] = placeholder
|
|
213
|
+
placeholders.v3[v3Idx] = placeholder
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// 3. V2 tables paired only with V1 (preserved from V1, Me-deleted in V3).
|
|
217
|
+
for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
|
|
218
|
+
if (placeholders.v2[v2Idx] !== null) continue
|
|
219
|
+
const v1Idx = v2ToV1[v2Idx]
|
|
220
|
+
if (v1Idx === -1) continue
|
|
221
|
+
const placeholder = allocate()
|
|
222
|
+
placeholderToDiff.set(placeholder, wrapWhole('del', 'me', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd)))
|
|
223
|
+
placeholders.v1[v1Idx] = placeholder
|
|
224
|
+
placeholders.v2[v2Idx] = placeholder
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// 4. V2 tables paired with neither (CP-inserted AND Me-deleted = reject).
|
|
228
|
+
for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
|
|
229
|
+
if (placeholders.v2[v2Idx] !== null) continue
|
|
230
|
+
const placeholder = allocate()
|
|
231
|
+
placeholderToDiff.set(
|
|
232
|
+
placeholder,
|
|
233
|
+
wrapWhole('del', 'me', v2.slice(t2s[v2Idx].tableStart, t2s[v2Idx].tableEnd), 'cp')
|
|
234
|
+
)
|
|
235
|
+
placeholders.v2[v2Idx] = placeholder
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// 5. V1 tables unpaired with V2 (CP-deleted before V2).
|
|
239
|
+
for (let v1Idx = 0; v1Idx < t1s.length; v1Idx++) {
|
|
240
|
+
if (placeholders.v1[v1Idx] !== null) continue
|
|
241
|
+
const placeholder = allocate()
|
|
242
|
+
placeholderToDiff.set(placeholder, wrapWhole('del', 'cp', v1.slice(t1s[v1Idx].tableStart, t1s[v1Idx].tableEnd)))
|
|
243
|
+
placeholders.v1[v1Idx] = placeholder
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// 6. V3 tables unpaired with V2 (Me-inserted into V3).
|
|
247
|
+
for (let v3Idx = 0; v3Idx < t3s.length; v3Idx++) {
|
|
248
|
+
if (placeholders.v3[v3Idx] !== null) continue
|
|
249
|
+
const placeholder = allocate()
|
|
250
|
+
placeholderToDiff.set(placeholder, wrapWhole('ins', 'me', v3.slice(t3s[v3Idx].tableStart, t3s[v3Idx].tableEnd)))
|
|
251
|
+
placeholders.v3[v3Idx] = placeholder
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Splice placeholders into each input. End → start per input.
|
|
255
|
+
let modifiedV1 = v1
|
|
256
|
+
for (let i = t1s.length - 1; i >= 0; i--) {
|
|
257
|
+
const p = placeholders.v1[i]
|
|
258
|
+
if (p === null) continue
|
|
259
|
+
modifiedV1 = spliceString(modifiedV1, t1s[i].tableStart, t1s[i].tableEnd, p)
|
|
260
|
+
}
|
|
261
|
+
let modifiedV2 = v2
|
|
262
|
+
for (let i = t2s.length - 1; i >= 0; i--) {
|
|
263
|
+
const p = placeholders.v2[i]
|
|
264
|
+
if (p === null) continue
|
|
265
|
+
modifiedV2 = spliceString(modifiedV2, t2s[i].tableStart, t2s[i].tableEnd, p)
|
|
266
|
+
}
|
|
267
|
+
let modifiedV3 = v3
|
|
268
|
+
for (let i = t3s.length - 1; i >= 0; i--) {
|
|
269
|
+
const p = placeholders.v3[i]
|
|
270
|
+
if (p === null) continue
|
|
271
|
+
modifiedV3 = spliceString(modifiedV3, t3s[i].tableStart, t3s[i].tableEnd, p)
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return { modifiedV1, modifiedV2, modifiedV3, placeholderToDiff }
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Threshold at which positional pairing is considered sound. Below this
|
|
279
|
+
* similarity, two positionally-aligned tables are probably different
|
|
280
|
+
* tables (e.g. CP swapped them around) and content-LCS pairing should
|
|
281
|
+
* be used instead. 0.5 is a deliberately loose bar — paired-but-content-
|
|
282
|
+
* edited tables (the common case) sit well above it; genuinely different
|
|
283
|
+
* tables sit well below.
|
|
284
|
+
*/
|
|
285
|
+
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.5
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Returns true when V1/V2/V3 tables can be 1:1 paired by position. The
|
|
289
|
+
* three lists must have equal length AND each positional triple must
|
|
290
|
+
* have content similar enough that positional pairing reflects the
|
|
291
|
+
* authors' likely intent. The slow content-LCS path handles cases that
|
|
292
|
+
* fail this gate (table reordering, additions, deletions).
|
|
293
|
+
*/
|
|
294
|
+
function positionallyAligned(
|
|
295
|
+
v1: string,
|
|
296
|
+
v2: string,
|
|
297
|
+
v3: string,
|
|
298
|
+
t1s: TableRange[],
|
|
299
|
+
t2s: TableRange[],
|
|
300
|
+
t3s: TableRange[]
|
|
301
|
+
): boolean {
|
|
302
|
+
if (t1s.length !== t2s.length || t2s.length !== t3s.length) return false
|
|
303
|
+
for (let i = 0; i < t1s.length; i++) {
|
|
304
|
+
const k1 = tableKey(v1, t1s[i])
|
|
305
|
+
const k2 = tableKey(v2, t2s[i])
|
|
306
|
+
const k3 = tableKey(v3, t3s[i])
|
|
307
|
+
if (textSimilarity(k1, k2) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
|
|
308
|
+
if (textSimilarity(k2, k3) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
|
|
309
|
+
}
|
|
310
|
+
return true
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function tableKey(html: string, table: TableRange): string {
|
|
314
|
+
// Whitespace-normalised full table HTML — tables with byte-identical
|
|
315
|
+
// content (modulo whitespace) pair; any structural or content
|
|
316
|
+
// difference falls through to unpaired (table-level ins/del).
|
|
317
|
+
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function diffTableThreeWay(
|
|
321
|
+
v1: string,
|
|
322
|
+
v2: string,
|
|
323
|
+
v3: string,
|
|
324
|
+
t1: TableRange,
|
|
325
|
+
t2: TableRange,
|
|
326
|
+
t3: TableRange,
|
|
327
|
+
cellDiff: ThreeWayDiffCellFn
|
|
328
|
+
): string {
|
|
329
|
+
if (sameDimensions(t1, t2) && sameDimensions(t2, t3)) {
|
|
330
|
+
return diffTablePositional(v1, v2, v3, t1, t2, t3, cellDiff)
|
|
331
|
+
}
|
|
332
|
+
return diffTableStructural(v1, v2, v3, t1, t2, t3, cellDiff)
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function diffTablePositional(
|
|
336
|
+
v1: string,
|
|
337
|
+
v2: string,
|
|
338
|
+
v3: string,
|
|
339
|
+
t1: TableRange,
|
|
340
|
+
t2: TableRange,
|
|
341
|
+
t3: TableRange,
|
|
342
|
+
cellDiff: ThreeWayDiffCellFn
|
|
343
|
+
): string {
|
|
344
|
+
// Walk V2 verbatim — its scaffolding (`<table>`, `<tr>`, attributes,
|
|
345
|
+
// inter-cell whitespace) is the spine. Substitute each cell content
|
|
346
|
+
// range with the 3-way merge.
|
|
347
|
+
const out: string[] = []
|
|
348
|
+
let cursor = t2.tableStart
|
|
349
|
+
for (let r = 0; r < t2.rows.length; r++) {
|
|
350
|
+
const r1 = t1.rows[r]
|
|
351
|
+
const r2 = t2.rows[r]
|
|
352
|
+
const r3 = t3.rows[r]
|
|
353
|
+
for (let c = 0; c < r2.cells.length; c++) {
|
|
354
|
+
const c1 = r1.cells[c]
|
|
355
|
+
const c2 = r2.cells[c]
|
|
356
|
+
const c3 = r3.cells[c]
|
|
357
|
+
out.push(v2.slice(cursor, c2.contentStart))
|
|
358
|
+
out.push(
|
|
359
|
+
cellDiff(
|
|
360
|
+
v1.slice(c1.contentStart, c1.contentEnd),
|
|
361
|
+
v2.slice(c2.contentStart, c2.contentEnd),
|
|
362
|
+
v3.slice(c3.contentStart, c3.contentEnd)
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
cursor = c2.contentEnd
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
out.push(v2.slice(cursor, t2.tableEnd))
|
|
369
|
+
return out.join('')
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Structural-change three-way table diff: rows or cells differ in count
|
|
374
|
+
* across V1/V2/V3. Strategy:
|
|
375
|
+
* 1. Run row-LCS for each pair (V1↔V2, V2↔V3) over rowKeys
|
|
376
|
+
* 2. Build per-V2-row origin (from align1) and fate (from align2)
|
|
377
|
+
* 3. Walk V2's row order, interleaving:
|
|
378
|
+
* - CP-deleted V1 rows (in align1 but not preserved into V2)
|
|
379
|
+
* - Me-inserted V3 rows (in align2 but not from V2)
|
|
380
|
+
* 4. For each V2 row, combine origin+fate to decide:
|
|
381
|
+
* - equal: recurse cellDiff if cell counts match, else fall back
|
|
382
|
+
* - ins-cp: emit V2 row as fully-CP-inserted
|
|
383
|
+
* - del-me: emit V2 row as fully-Me-deleted
|
|
384
|
+
* - reject: emit V2 row as Me-rejects-CP
|
|
385
|
+
*
|
|
386
|
+
* Tie-break to Me on LCS disagreement (D2): each LCS is authoritative
|
|
387
|
+
* for its own pair-wise view; we don't attempt to reconcile cases where
|
|
388
|
+
* align1's idea of V2's V1 origin contradicts what align2 implies via
|
|
389
|
+
* V3 history. In practice these cases manifest as the row being
|
|
390
|
+
* attributed independently per pair, which is the conservative correct
|
|
391
|
+
* thing to do.
|
|
392
|
+
*/
|
|
393
|
+
function diffTableStructural(
|
|
394
|
+
v1: string,
|
|
395
|
+
v2: string,
|
|
396
|
+
v3: string,
|
|
397
|
+
t1: TableRange,
|
|
398
|
+
t2: TableRange,
|
|
399
|
+
t3: TableRange,
|
|
400
|
+
cellDiff: ThreeWayDiffCellFn
|
|
401
|
+
): string {
|
|
402
|
+
const v1Keys = t1.rows.map(r => rowKey(v1, r))
|
|
403
|
+
const v2Keys = t2.rows.map(r => rowKey(v2, r))
|
|
404
|
+
const v3Keys = t3.rows.map(r => rowKey(v3, r))
|
|
405
|
+
|
|
406
|
+
const align1 = lcsAlign(v1Keys, v2Keys)
|
|
407
|
+
const align2 = lcsAlign(v2Keys, v3Keys)
|
|
408
|
+
|
|
409
|
+
// Per-V2-row attribution lookups.
|
|
410
|
+
// Origin: 'preserved' (with V1 row index) or 'cp-inserted'.
|
|
411
|
+
// Fate: 'preserved' (with V3 row index) or 'me-deleted'.
|
|
412
|
+
const v2Origin = new Array<{ kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' }>(t2.rows.length)
|
|
413
|
+
for (let i = 0; i < v2Origin.length; i++) v2Origin[i] = { kind: 'cp-inserted' }
|
|
414
|
+
for (const a of align1) {
|
|
415
|
+
if (a.newIdx !== null && a.oldIdx !== null) {
|
|
416
|
+
v2Origin[a.newIdx] = { kind: 'preserved', v1Idx: a.oldIdx }
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
const v2Fate = new Array<{ kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' }>(t2.rows.length)
|
|
421
|
+
for (let i = 0; i < v2Fate.length; i++) v2Fate[i] = { kind: 'me-deleted' }
|
|
422
|
+
for (const a of align2) {
|
|
423
|
+
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
424
|
+
v2Fate[a.oldIdx] = { kind: 'preserved', v3Idx: a.newIdx }
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Off-spine surfaces.
|
|
429
|
+
// CP-deleted V1 rows: in align1 with newIdx == null. They land at the
|
|
430
|
+
// V2 boundary that follows them. The boundary index is the next
|
|
431
|
+
// preserved V2 row, or v2.rows.length if no following preserved row.
|
|
432
|
+
const cpDelRowsAt = collectCpDelRowsAtBoundary(align1, t2.rows.length)
|
|
433
|
+
// Me-inserted V3 rows: in align2 with oldIdx == null. They land at the
|
|
434
|
+
// V2 boundary they sit before — i.e. the next preserved V2 row.
|
|
435
|
+
const meInsRowsAt = collectMeInsRowsAtBoundary(align2, t2.rows.length)
|
|
436
|
+
|
|
437
|
+
// Emit. We reconstruct the table from scratch since rows may be added
|
|
438
|
+
// or deleted from V2's order; preserve the V2 header (everything up
|
|
439
|
+
// to the first <tr>) and the V2 footer (after the last </tr>).
|
|
440
|
+
const out: string[] = []
|
|
441
|
+
out.push(tableHeaderSlice(v2, t2))
|
|
442
|
+
|
|
443
|
+
const emitBoundary = (i: number) => {
|
|
444
|
+
const cpDel = cpDelRowsAt.get(i)
|
|
445
|
+
if (cpDel) {
|
|
446
|
+
for (const v1RowIdx of cpDel) {
|
|
447
|
+
out.push(emitFullRowAttributed(v1, t1.rows[v1RowIdx], 'del', 'cp'))
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
const meIns = meInsRowsAt.get(i)
|
|
451
|
+
if (meIns) {
|
|
452
|
+
for (const v3RowIdx of meIns) {
|
|
453
|
+
out.push(emitFullRowAttributed(v3, t3.rows[v3RowIdx], 'ins', 'me'))
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
for (let r = 0; r < t2.rows.length; r++) {
|
|
459
|
+
emitBoundary(r)
|
|
460
|
+
const v2Row = t2.rows[r]
|
|
461
|
+
const origin = v2Origin[r]
|
|
462
|
+
const fate = v2Fate[r]
|
|
463
|
+
out.push(emitV2Row(v1, v2, v3, v2Row, t1, t3, origin, fate, cellDiff))
|
|
464
|
+
}
|
|
465
|
+
emitBoundary(t2.rows.length)
|
|
466
|
+
out.push(tableFooterSlice(v2, t2))
|
|
467
|
+
return out.join('')
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
function emitV2Row(
|
|
471
|
+
v1: string,
|
|
472
|
+
v2: string,
|
|
473
|
+
v3: string,
|
|
474
|
+
v2Row: RowRange,
|
|
475
|
+
t1: TableRange,
|
|
476
|
+
t3: TableRange,
|
|
477
|
+
origin: { kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' },
|
|
478
|
+
fate: { kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' },
|
|
479
|
+
cellDiff: ThreeWayDiffCellFn
|
|
480
|
+
): string {
|
|
481
|
+
if (origin.kind === 'cp-inserted' && fate.kind === 'me-deleted') {
|
|
482
|
+
// CP added the row, Me removed it: reject. Show as Me-deletion of
|
|
483
|
+
// CP's insertion via the rejects markup.
|
|
484
|
+
return emitFullRowAttributed(v2, v2Row, 'del', 'me', 'cp')
|
|
485
|
+
}
|
|
486
|
+
if (origin.kind === 'cp-inserted') {
|
|
487
|
+
// CP added the row, Me kept it. Attribute as CP-inserted but emit
|
|
488
|
+
// V2's content (which equals V3's content since Me kept it).
|
|
489
|
+
return emitFullRowAttributed(v2, v2Row, 'ins', 'cp')
|
|
490
|
+
}
|
|
491
|
+
if (fate.kind === 'me-deleted') {
|
|
492
|
+
// Me removed an original V1 row. Emit as Me-deletion of V2's content.
|
|
493
|
+
return emitFullRowAttributed(v2, v2Row, 'del', 'me')
|
|
494
|
+
}
|
|
495
|
+
// Preserved on both sides — recurse into cells. The discriminated-union
|
|
496
|
+
// narrowing makes the indices safe to access directly.
|
|
497
|
+
const v1Row = t1.rows[origin.v1Idx]
|
|
498
|
+
const v3Row = t3.rows[fate.v3Idx]
|
|
499
|
+
if (v1Row.cells.length === v2Row.cells.length && v2Row.cells.length === v3Row.cells.length) {
|
|
500
|
+
// Same cell counts → positional cell diff via cellDiff.
|
|
501
|
+
return diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff)
|
|
502
|
+
}
|
|
503
|
+
// Cell-count mismatch within a preserved row. Cell-level structural
|
|
504
|
+
// change is deferred; fall back to Me-attribution Replace (V2 row
|
|
505
|
+
// removed, V3 row inserted). This is lossy for CP's contribution
|
|
506
|
+
// within the row but functional. Real-world legal docs rarely change
|
|
507
|
+
// column count mid-row; this is a known limitation.
|
|
508
|
+
const out: string[] = []
|
|
509
|
+
out.push(emitFullRowAttributed(v2, v2Row, 'del', 'me'))
|
|
510
|
+
out.push(emitFullRowAttributed(v3, v3Row, 'ins', 'me'))
|
|
511
|
+
return out.join('')
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
function diffRowPositional(
|
|
515
|
+
v1: string,
|
|
516
|
+
v2: string,
|
|
517
|
+
v3: string,
|
|
518
|
+
v1Row: RowRange,
|
|
519
|
+
v2Row: RowRange,
|
|
520
|
+
v3Row: RowRange,
|
|
521
|
+
cellDiff: ThreeWayDiffCellFn
|
|
522
|
+
): string {
|
|
523
|
+
// Walk V2's row verbatim, substituting each cell content with the
|
|
524
|
+
// 3-way merge. Mirrors `diffTablePositional` at the row scale.
|
|
525
|
+
const out: string[] = []
|
|
526
|
+
let cursor = v2Row.rowStart
|
|
527
|
+
for (let c = 0; c < v2Row.cells.length; c++) {
|
|
528
|
+
const c1 = v1Row.cells[c]
|
|
529
|
+
const c2 = v2Row.cells[c]
|
|
530
|
+
const c3 = v3Row.cells[c]
|
|
531
|
+
out.push(v2.slice(cursor, c2.contentStart))
|
|
532
|
+
out.push(
|
|
533
|
+
cellDiff(
|
|
534
|
+
v1.slice(c1.contentStart, c1.contentEnd),
|
|
535
|
+
v2.slice(c2.contentStart, c2.contentEnd),
|
|
536
|
+
v3.slice(c3.contentStart, c3.contentEnd)
|
|
537
|
+
)
|
|
538
|
+
)
|
|
539
|
+
cursor = c2.contentEnd
|
|
540
|
+
}
|
|
541
|
+
out.push(v2.slice(cursor, v2Row.rowEnd))
|
|
542
|
+
return out.join('')
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
function collectCpDelRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
|
|
546
|
+
// For each unpaired V1 row (oldIdx set, newIdx null), determine its
|
|
547
|
+
// V2 boundary index: the position just before the next preserved V2
|
|
548
|
+
// row, or v2RowCount if there's no following preserved row.
|
|
549
|
+
const out = new Map<number, number[]>()
|
|
550
|
+
let nextV2Boundary = v2RowCount
|
|
551
|
+
// Walk the alignment in reverse so we can compute nextV2Boundary
|
|
552
|
+
// running backwards, then assign each unpaired V1 row to the boundary
|
|
553
|
+
// currently in scope.
|
|
554
|
+
const pending: number[] = []
|
|
555
|
+
for (let i = align.length - 1; i >= 0; i--) {
|
|
556
|
+
const a = align[i]
|
|
557
|
+
if (a.newIdx !== null) {
|
|
558
|
+
// Flush pending unpaired V1 rows to this V2 boundary.
|
|
559
|
+
if (pending.length > 0) {
|
|
560
|
+
const existing = out.get(nextV2Boundary) ?? []
|
|
561
|
+
// pending was filled backwards — reverse so document order is preserved.
|
|
562
|
+
existing.unshift(...pending.toReversed())
|
|
563
|
+
out.set(nextV2Boundary, existing)
|
|
564
|
+
pending.length = 0
|
|
565
|
+
}
|
|
566
|
+
nextV2Boundary = a.newIdx
|
|
567
|
+
} else if (a.oldIdx !== null) {
|
|
568
|
+
// Unpaired V1 row — CP deleted it.
|
|
569
|
+
pending.push(a.oldIdx)
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
if (pending.length > 0) {
|
|
573
|
+
const existing = out.get(nextV2Boundary) ?? []
|
|
574
|
+
existing.unshift(...pending.reverse())
|
|
575
|
+
out.set(nextV2Boundary, existing)
|
|
576
|
+
}
|
|
577
|
+
return out
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
function collectMeInsRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
|
|
581
|
+
// For each unpaired V3 row (newIdx set, oldIdx null), determine its
|
|
582
|
+
// V2 boundary: the position of the next preserved V2 row, or
|
|
583
|
+
// v2RowCount if at the tail. Mirror of CP-del logic.
|
|
584
|
+
const out = new Map<number, number[]>()
|
|
585
|
+
let nextV2Boundary = v2RowCount
|
|
586
|
+
const pending: number[] = []
|
|
587
|
+
for (let i = align.length - 1; i >= 0; i--) {
|
|
588
|
+
const a = align[i]
|
|
589
|
+
if (a.oldIdx !== null) {
|
|
590
|
+
if (pending.length > 0) {
|
|
591
|
+
const existing = out.get(nextV2Boundary) ?? []
|
|
592
|
+
existing.unshift(...pending.toReversed())
|
|
593
|
+
out.set(nextV2Boundary, existing)
|
|
594
|
+
pending.length = 0
|
|
595
|
+
}
|
|
596
|
+
nextV2Boundary = a.oldIdx
|
|
597
|
+
} else if (a.newIdx !== null) {
|
|
598
|
+
pending.push(a.newIdx)
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
if (pending.length > 0) {
|
|
602
|
+
const existing = out.get(nextV2Boundary) ?? []
|
|
603
|
+
existing.unshift(...pending.reverse())
|
|
604
|
+
out.set(nextV2Boundary, existing)
|
|
605
|
+
}
|
|
606
|
+
return out
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
function tableHeaderSlice(html: string, table: TableRange): string {
|
|
610
|
+
// Slice from <table> to start of first <tr>. If table is empty, take
|
|
611
|
+
// everything up to </table>.
|
|
612
|
+
const firstRow = table.rows[0]
|
|
613
|
+
if (!firstRow) return html.slice(table.tableStart, table.tableEnd - '</table>'.length)
|
|
614
|
+
return html.slice(table.tableStart, firstRow.rowStart)
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
function tableFooterSlice(html: string, table: TableRange): string {
|
|
618
|
+
// Slice from end of last <tr> to </table>.
|
|
619
|
+
const lastRow = table.rows[table.rows.length - 1]
|
|
620
|
+
if (!lastRow) return '</table>'
|
|
621
|
+
return html.slice(lastRow.rowEnd, table.tableEnd)
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Emit a row that's fully attributed to one author, in an ins or del
|
|
626
|
+
* role. `rejectsAuthor` is set when the row is a Me-deletion of a
|
|
627
|
+
* CP-inserted row. Wraps `<tr>` in `class='diffins cp'` etc. and each
|
|
628
|
+
* `<td>` content in the corresponding `<ins>`/`<del>` wrapper with the
|
|
629
|
+
* author classes/attrs.
|
|
630
|
+
*/
|
|
631
|
+
function emitFullRowAttributed(
|
|
632
|
+
html: string,
|
|
633
|
+
row: RowRange,
|
|
634
|
+
kind: 'ins' | 'del',
|
|
635
|
+
author: Author,
|
|
636
|
+
rejectsAuthor?: Author
|
|
637
|
+
): string {
|
|
638
|
+
const trOpening = parseOpeningTagAt(html, row.rowStart)
|
|
639
|
+
if (!trOpening) return html.slice(html.length, html.length)
|
|
640
|
+
const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author, rejectsAuthor)
|
|
641
|
+
|
|
642
|
+
const out: string[] = [trWithAttrs]
|
|
643
|
+
let cursor = trOpening.end
|
|
644
|
+
for (const cell of row.cells) {
|
|
645
|
+
out.push(html.slice(cursor, cell.cellStart))
|
|
646
|
+
out.push(emitFullCellAttributed(html, cell, kind, author, rejectsAuthor))
|
|
647
|
+
cursor = cell.cellEnd
|
|
648
|
+
}
|
|
649
|
+
out.push(html.slice(cursor, row.rowEnd))
|
|
650
|
+
return out.join('')
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
function emitFullCellAttributed(
|
|
654
|
+
html: string,
|
|
655
|
+
cell: CellRange,
|
|
656
|
+
kind: 'ins' | 'del',
|
|
657
|
+
author: Author,
|
|
658
|
+
rejectsAuthor?: Author
|
|
659
|
+
): string {
|
|
660
|
+
const tdOpening = parseOpeningTagAt(html, cell.cellStart)
|
|
661
|
+
if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
|
|
662
|
+
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author, rejectsAuthor)
|
|
663
|
+
// Wrap the content in an ins/del with the author classes — same
|
|
664
|
+
// shape as the word-level emission. Empty cells get the class on the
|
|
665
|
+
// <td> but no inner wrapper.
|
|
666
|
+
const innerContent = html.slice(cell.contentStart, cell.contentEnd)
|
|
667
|
+
const innerWrapped =
|
|
668
|
+
innerContent.trim().length === 0
|
|
669
|
+
? innerContent
|
|
670
|
+
: Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author, rejectsAuthor))
|
|
671
|
+
const closing = html.slice(cell.contentEnd, cell.cellEnd)
|
|
672
|
+
return tdWithAttrs + innerWrapped + closing
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
/**
|
|
676
|
+
* Inject author classes + data-attrs into an existing opening tag (e.g.
|
|
677
|
+
* an `<tr>` or `<td>` already in the source HTML). Uses the same
|
|
678
|
+
* attribution shape as `authorAttribution` + `Utils.wrapText` so the
|
|
679
|
+
* inject-into-existing and wrap-around-text paths agree.
|
|
680
|
+
*/
|
|
681
|
+
function injectAuthorAttribution(
|
|
682
|
+
openingTag: string,
|
|
683
|
+
kind: 'ins' | 'del',
|
|
684
|
+
author: Author,
|
|
685
|
+
rejectsAuthor?: Author
|
|
686
|
+
): string {
|
|
687
|
+
const meta = authorAttribution(author, rejectsAuthor)
|
|
688
|
+
const tagWithClass = injectClass(openingTag, `diff${kind} ${meta.extraClasses}`)
|
|
689
|
+
return injectDataAttrs(tagWithClass, meta.dataAttrs ?? {})
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
function injectDataAttrs(openingTag: string, dataAttrs: Readonly<Record<string, string>>): string {
|
|
693
|
+
const keys = Object.keys(dataAttrs)
|
|
694
|
+
if (keys.length === 0) return openingTag
|
|
695
|
+
const attrs = keys.map(k => ` data-${k}='${dataAttrs[k]}'`).join('')
|
|
696
|
+
// Insert the data-* attributes just before the closing '>' of the
|
|
697
|
+
// opening tag. `<tr>` and `<td>` are never self-closing in real HTML,
|
|
698
|
+
// but handle `/>` defensively for symmetry with other HTML emitters.
|
|
699
|
+
if (openingTag.endsWith('/>')) return `${openingTag.slice(0, -2)}${attrs}/>`
|
|
700
|
+
return `${openingTag.slice(0, -1)}${attrs}>`
|
|
701
|
+
}
|