@createiq/htmldiff 1.1.0 → 1.2.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/README.md +67 -0
- package/dist/HtmlDiff.cjs +1192 -456
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +160 -7
- package/dist/HtmlDiff.d.mts +159 -7
- package/dist/HtmlDiff.mjs +1192 -456
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/Alignment.ts +349 -0
- package/src/HtmlDiff.ts +343 -33
- package/src/HtmlScanner.ts +200 -0
- package/src/TableDiff.ts +67 -522
- package/src/ThreeWayDiff.ts +269 -0
- package/src/ThreeWayTable.ts +625 -0
- package/src/Utils.ts +34 -2
- package/test/HtmlDiff.analyze.spec.ts +152 -0
- package/test/HtmlDiff.tables.spec.ts +43 -19
- package/test/HtmlDiff.threeWay.spec.ts +173 -0
- package/test/HtmlDiff.threeWay.tables.spec.ts +301 -0
- package/test/TableDiff.bench.ts +39 -0
- package/test/Utils.spec.ts +48 -0
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
import { lcsAlign, textSimilarity } from './Alignment'
|
|
2
|
+
import { injectClass, parseOpeningTagAt } from './HtmlScanner'
|
|
3
|
+
import {
|
|
4
|
+
type CellRange,
|
|
5
|
+
exceedsSizeLimit,
|
|
6
|
+
findTopLevelTables,
|
|
7
|
+
makePlaceholderPrefix,
|
|
8
|
+
PLACEHOLDER_SUFFIX,
|
|
9
|
+
type RowRange,
|
|
10
|
+
rowKey,
|
|
11
|
+
sameDimensions,
|
|
12
|
+
spliceString,
|
|
13
|
+
type TableRange,
|
|
14
|
+
} from './TableDiff'
|
|
15
|
+
import { type Author, authorAttribution } from './ThreeWayDiff'
|
|
16
|
+
import Utils from './Utils'
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Three-way table preprocessing for the genesis-spine merge.
|
|
20
|
+
*
|
|
21
|
+
* Inputs: `genesis` (common ancestor), `cpLatest` (counterparty's
|
|
22
|
+
* accumulated position), `meCurrent` (Me's accumulated position). All
|
|
23
|
+
* three share a single placeholder nonce so genesis tokenises
|
|
24
|
+
* identically across both pair-wise word-level analyses.
|
|
25
|
+
*
|
|
26
|
+
* Three paths:
|
|
27
|
+
* 1. **Positional** — all three have the same table count AND each
|
|
28
|
+
* positional triple's tableKey is similar enough that 1:1 pairing
|
|
29
|
+
* by position is sound. Recurses cellDiff per cell, structural
|
|
30
|
+
* layout from genesis.
|
|
31
|
+
* 2. **Row-structural** — paired triples whose row/cell counts differ.
|
|
32
|
+
* Per-table row-level LCS against genesis; recurse on preserved
|
|
33
|
+
* rows, emit author-attributed full rows for the rest.
|
|
34
|
+
* 3. **Multi-table by content** — table counts diverge across inputs.
|
|
35
|
+
* Pair tables to genesis via content-LCS, then assign placeholders
|
|
36
|
+
* such that each placeholder appears in exactly the inputs that
|
|
37
|
+
* contain the underlying table. The word-level merger walks the
|
|
38
|
+
* genesis spine and attributes unpaired tables naturally
|
|
39
|
+
* (cp-only/me-only/both-agree).
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
export interface ThreeWayPreprocessResult {
|
|
43
|
+
modifiedGenesis: string
|
|
44
|
+
modifiedCp: string
|
|
45
|
+
modifiedMe: string
|
|
46
|
+
placeholderToDiff: Map<string, string>
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export type ThreeWayDiffCellFn = (genesisCell: string, cpCell: string, meCell: string) => string
|
|
50
|
+
|
|
51
|
+
export function preprocessTablesThreeWay(
|
|
52
|
+
genesis: string,
|
|
53
|
+
cpLatest: string,
|
|
54
|
+
meCurrent: string,
|
|
55
|
+
cellDiff: ThreeWayDiffCellFn
|
|
56
|
+
): ThreeWayPreprocessResult | null {
|
|
57
|
+
const gTables = findTopLevelTables(genesis)
|
|
58
|
+
const cTables = findTopLevelTables(cpLatest)
|
|
59
|
+
const mTables = findTopLevelTables(meCurrent)
|
|
60
|
+
|
|
61
|
+
if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null
|
|
62
|
+
|
|
63
|
+
for (const t of gTables) if (exceedsSizeLimit(t)) return null
|
|
64
|
+
for (const t of cTables) if (exceedsSizeLimit(t)) return null
|
|
65
|
+
for (const t of mTables) if (exceedsSizeLimit(t)) return null
|
|
66
|
+
|
|
67
|
+
const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent)
|
|
68
|
+
|
|
69
|
+
if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) {
|
|
70
|
+
return preprocessAlignedByPosition(
|
|
71
|
+
genesis,
|
|
72
|
+
cpLatest,
|
|
73
|
+
meCurrent,
|
|
74
|
+
gTables,
|
|
75
|
+
cTables,
|
|
76
|
+
mTables,
|
|
77
|
+
cellDiff,
|
|
78
|
+
placeholderPrefix
|
|
79
|
+
)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function preprocessAlignedByPosition(
|
|
86
|
+
genesis: string,
|
|
87
|
+
cpLatest: string,
|
|
88
|
+
meCurrent: string,
|
|
89
|
+
gTables: TableRange[],
|
|
90
|
+
cTables: TableRange[],
|
|
91
|
+
mTables: TableRange[],
|
|
92
|
+
cellDiff: ThreeWayDiffCellFn,
|
|
93
|
+
placeholderPrefix: string
|
|
94
|
+
): ThreeWayPreprocessResult {
|
|
95
|
+
const pairs: Array<{ g: TableRange; c: TableRange; m: TableRange; diffed: string }> = []
|
|
96
|
+
for (let i = 0; i < gTables.length; i++) {
|
|
97
|
+
pairs.push({
|
|
98
|
+
g: gTables[i],
|
|
99
|
+
c: cTables[i],
|
|
100
|
+
m: mTables[i],
|
|
101
|
+
diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff),
|
|
102
|
+
})
|
|
103
|
+
}
|
|
104
|
+
let modifiedGenesis = genesis
|
|
105
|
+
let modifiedCp = cpLatest
|
|
106
|
+
let modifiedMe = meCurrent
|
|
107
|
+
const placeholderToDiff = new Map<string, string>()
|
|
108
|
+
for (let i = pairs.length - 1; i >= 0; i--) {
|
|
109
|
+
const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`
|
|
110
|
+
placeholderToDiff.set(placeholder, pairs[i].diffed)
|
|
111
|
+
modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder)
|
|
112
|
+
modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder)
|
|
113
|
+
modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder)
|
|
114
|
+
}
|
|
115
|
+
return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Multi-table handler. Tables are paired against `genesis` (the spine)
|
|
120
|
+
* via content-LCS on each of cp and me. Placeholders are assigned so
|
|
121
|
+
* each appears only in the inputs that actually contain the underlying
|
|
122
|
+
* table. The word-level merger then attributes them naturally:
|
|
123
|
+
*
|
|
124
|
+
* - paired in genesis+cp+me → equal in both diffs → emit recursive 3-way diff
|
|
125
|
+
* - in cp+me, not in genesis → both-agree insertion → emit plain
|
|
126
|
+
* - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
|
|
127
|
+
* - in me only → me insertion → ins-me wrapper
|
|
128
|
+
* - in genesis+cp, not me → me deletion → del-me wrapper
|
|
129
|
+
* - in genesis+me, not cp → cp deletion → del-cp wrapper
|
|
130
|
+
* - in genesis only → both deleted, settled → silent (placeholder content empty)
|
|
131
|
+
*/
|
|
132
|
+
function preprocessByContent(
|
|
133
|
+
genesis: string,
|
|
134
|
+
cpLatest: string,
|
|
135
|
+
meCurrent: string,
|
|
136
|
+
gTables: TableRange[],
|
|
137
|
+
cTables: TableRange[],
|
|
138
|
+
mTables: TableRange[],
|
|
139
|
+
cellDiff: ThreeWayDiffCellFn,
|
|
140
|
+
placeholderPrefix: string
|
|
141
|
+
): ThreeWayPreprocessResult {
|
|
142
|
+
const gKeys = gTables.map(t => tableKey(genesis, t))
|
|
143
|
+
const cKeys = cTables.map(t => tableKey(cpLatest, t))
|
|
144
|
+
const mKeys = mTables.map(t => tableKey(meCurrent, t))
|
|
145
|
+
|
|
146
|
+
const alignCp = lcsAlign(gKeys, cKeys)
|
|
147
|
+
const alignMe = lcsAlign(gKeys, mKeys)
|
|
148
|
+
|
|
149
|
+
// Maps: genesisIdx → matching cpIdx (-1 if none); cpIdx → matching genesisIdx; etc.
|
|
150
|
+
const gToCp = new Array<number>(gTables.length).fill(-1)
|
|
151
|
+
const cpToG = new Array<number>(cTables.length).fill(-1)
|
|
152
|
+
for (const a of alignCp) {
|
|
153
|
+
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
154
|
+
gToCp[a.oldIdx] = a.newIdx
|
|
155
|
+
cpToG[a.newIdx] = a.oldIdx
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
const gToMe = new Array<number>(gTables.length).fill(-1)
|
|
159
|
+
const meToG = new Array<number>(mTables.length).fill(-1)
|
|
160
|
+
for (const a of alignMe) {
|
|
161
|
+
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
162
|
+
gToMe[a.oldIdx] = a.newIdx
|
|
163
|
+
meToG[a.newIdx] = a.oldIdx
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
let nextId = 0
|
|
168
|
+
const placeholderToDiff = new Map<string, string>()
|
|
169
|
+
const placeholders = {
|
|
170
|
+
g: new Array<string | null>(gTables.length).fill(null),
|
|
171
|
+
c: new Array<string | null>(cTables.length).fill(null),
|
|
172
|
+
m: new Array<string | null>(mTables.length).fill(null),
|
|
173
|
+
}
|
|
174
|
+
const allocate = (): string => `${placeholderPrefix}${nextId++}${PLACEHOLDER_SUFFIX}`
|
|
175
|
+
|
|
176
|
+
// For unpaired-in-one-side placeholders, bake author attribution
|
|
177
|
+
// into the placeholder content — the word-level merger emits tag
|
|
178
|
+
// tokens (HTML comments) verbatim, so it can't wrap them itself.
|
|
179
|
+
const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string): string =>
|
|
180
|
+
Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author))
|
|
181
|
+
|
|
182
|
+
// 1. Triples paired in all three (genesis + cp + me) → recursive 3-way diff.
|
|
183
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
184
|
+
const cIdx = gToCp[gIdx]
|
|
185
|
+
const mIdx = gToMe[gIdx]
|
|
186
|
+
if (cIdx === -1 || mIdx === -1) continue
|
|
187
|
+
const placeholder = allocate()
|
|
188
|
+
placeholderToDiff.set(
|
|
189
|
+
placeholder,
|
|
190
|
+
diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff)
|
|
191
|
+
)
|
|
192
|
+
placeholders.g[gIdx] = placeholder
|
|
193
|
+
placeholders.c[cIdx] = placeholder
|
|
194
|
+
placeholders.m[mIdx] = placeholder
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// 2. Genesis + CP only (not in Me) → me deletion.
|
|
198
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
199
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
200
|
+
const cIdx = gToCp[gIdx]
|
|
201
|
+
if (cIdx === -1) continue
|
|
202
|
+
const placeholder = allocate()
|
|
203
|
+
placeholderToDiff.set(
|
|
204
|
+
placeholder,
|
|
205
|
+
wrapWhole('del', 'me', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
|
|
206
|
+
)
|
|
207
|
+
placeholders.g[gIdx] = placeholder
|
|
208
|
+
placeholders.c[cIdx] = placeholder
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// 3. Genesis + Me only (not in CP) → cp deletion.
|
|
212
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
213
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
214
|
+
const mIdx = gToMe[gIdx]
|
|
215
|
+
if (mIdx === -1) continue
|
|
216
|
+
const placeholder = allocate()
|
|
217
|
+
placeholderToDiff.set(
|
|
218
|
+
placeholder,
|
|
219
|
+
wrapWhole('del', 'cp', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
|
|
220
|
+
)
|
|
221
|
+
placeholders.g[gIdx] = placeholder
|
|
222
|
+
placeholders.m[mIdx] = placeholder
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// 4. Genesis only (not in CP, not in Me) → both deleted, settled, silent.
|
|
226
|
+
// Placeholder ONLY in genesis; cp and me lack it. The word-level merger
|
|
227
|
+
// sees it as "deleted by both" via the genesis-spine fate maps and
|
|
228
|
+
// silences it via the settled-deletion rule (empty placeholder content).
|
|
229
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
230
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
231
|
+
const placeholder = allocate()
|
|
232
|
+
placeholderToDiff.set(placeholder, '')
|
|
233
|
+
placeholders.g[gIdx] = placeholder
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// 5. CP + Me both inserted (no genesis) — agreement check. If their
|
|
237
|
+
// table content is textually identical, emit plain (settled). Otherwise
|
|
238
|
+
// each side gets its own placeholder (cp-only / me-only treatment).
|
|
239
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
240
|
+
if (placeholders.c[cIdx] !== null) continue
|
|
241
|
+
// CP table not paired to genesis. Is there an unpaired Me table with
|
|
242
|
+
// matching content?
|
|
243
|
+
const cText = cKeys[cIdx]
|
|
244
|
+
let mIdx = -1
|
|
245
|
+
for (let candidate = 0; candidate < mTables.length; candidate++) {
|
|
246
|
+
if (placeholders.m[candidate] !== null) continue
|
|
247
|
+
if (meToG[candidate] !== -1) continue
|
|
248
|
+
if (mKeys[candidate] === cText) {
|
|
249
|
+
mIdx = candidate
|
|
250
|
+
break
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
if (mIdx === -1) continue
|
|
254
|
+
// Both inserted the same table content → settled insertion.
|
|
255
|
+
const placeholder = allocate()
|
|
256
|
+
placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
|
|
257
|
+
placeholders.c[cIdx] = placeholder
|
|
258
|
+
placeholders.m[mIdx] = placeholder
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// 6. Remaining CP-only tables (inserted by CP, Me didn't take).
|
|
262
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
263
|
+
if (placeholders.c[cIdx] !== null) continue
|
|
264
|
+
const placeholder = allocate()
|
|
265
|
+
placeholderToDiff.set(
|
|
266
|
+
placeholder,
|
|
267
|
+
wrapWhole('ins', 'cp', cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
|
|
268
|
+
)
|
|
269
|
+
placeholders.c[cIdx] = placeholder
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// 7. Remaining Me-only tables (Me inserted, CP didn't).
|
|
273
|
+
for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
|
|
274
|
+
if (placeholders.m[mIdx] !== null) continue
|
|
275
|
+
const placeholder = allocate()
|
|
276
|
+
placeholderToDiff.set(
|
|
277
|
+
placeholder,
|
|
278
|
+
wrapWhole('ins', 'me', meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd))
|
|
279
|
+
)
|
|
280
|
+
placeholders.m[mIdx] = placeholder
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Splice end → start per input.
|
|
284
|
+
let modifiedGenesis = genesis
|
|
285
|
+
for (let i = gTables.length - 1; i >= 0; i--) {
|
|
286
|
+
const p = placeholders.g[i]
|
|
287
|
+
if (p === null) continue
|
|
288
|
+
modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p)
|
|
289
|
+
}
|
|
290
|
+
let modifiedCp = cpLatest
|
|
291
|
+
for (let i = cTables.length - 1; i >= 0; i--) {
|
|
292
|
+
const p = placeholders.c[i]
|
|
293
|
+
if (p === null) continue
|
|
294
|
+
modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p)
|
|
295
|
+
}
|
|
296
|
+
let modifiedMe = meCurrent
|
|
297
|
+
for (let i = mTables.length - 1; i >= 0; i--) {
|
|
298
|
+
const p = placeholders.m[i]
|
|
299
|
+
if (p === null) continue
|
|
300
|
+
modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p)
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.5
|
|
307
|
+
|
|
308
|
+
function positionallyAligned(
|
|
309
|
+
genesis: string,
|
|
310
|
+
cpLatest: string,
|
|
311
|
+
meCurrent: string,
|
|
312
|
+
gTables: TableRange[],
|
|
313
|
+
cTables: TableRange[],
|
|
314
|
+
mTables: TableRange[]
|
|
315
|
+
): boolean {
|
|
316
|
+
if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false
|
|
317
|
+
for (let i = 0; i < gTables.length; i++) {
|
|
318
|
+
const kG = tableKey(genesis, gTables[i])
|
|
319
|
+
const kC = tableKey(cpLatest, cTables[i])
|
|
320
|
+
const kM = tableKey(meCurrent, mTables[i])
|
|
321
|
+
if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
|
|
322
|
+
if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
|
|
323
|
+
}
|
|
324
|
+
return true
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
function tableKey(html: string, table: TableRange): string {
|
|
328
|
+
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// ────────────────────────────────────────────────────────────────────────────
|
|
332
|
+
// Per-table diff: positional cells or row-level structural change.
|
|
333
|
+
|
|
334
|
+
function diffTableThreeWay(
|
|
335
|
+
genesis: string,
|
|
336
|
+
cpLatest: string,
|
|
337
|
+
meCurrent: string,
|
|
338
|
+
tG: TableRange,
|
|
339
|
+
tC: TableRange,
|
|
340
|
+
tM: TableRange,
|
|
341
|
+
cellDiff: ThreeWayDiffCellFn
|
|
342
|
+
): string {
|
|
343
|
+
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) {
|
|
344
|
+
return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
345
|
+
}
|
|
346
|
+
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function diffTablePositional(
|
|
350
|
+
genesis: string,
|
|
351
|
+
cpLatest: string,
|
|
352
|
+
meCurrent: string,
|
|
353
|
+
tG: TableRange,
|
|
354
|
+
tC: TableRange,
|
|
355
|
+
tM: TableRange,
|
|
356
|
+
cellDiff: ThreeWayDiffCellFn
|
|
357
|
+
): string {
|
|
358
|
+
// Walk genesis's table scaffolding verbatim — it's the common
|
|
359
|
+
// ancestor. Cells are merged 3-way via cellDiff. Choosing genesis as
|
|
360
|
+
// the spine keeps the table structure stable across both pair-wise
|
|
361
|
+
// diffs that the word-level merger will see.
|
|
362
|
+
const out: string[] = []
|
|
363
|
+
let cursor = tG.tableStart
|
|
364
|
+
for (let r = 0; r < tG.rows.length; r++) {
|
|
365
|
+
const rG = tG.rows[r]
|
|
366
|
+
const rC = tC.rows[r]
|
|
367
|
+
const rM = tM.rows[r]
|
|
368
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
369
|
+
const cG = rG.cells[c]
|
|
370
|
+
const cC = rC.cells[c]
|
|
371
|
+
const cM = rM.cells[c]
|
|
372
|
+
out.push(genesis.slice(cursor, cG.contentStart))
|
|
373
|
+
out.push(
|
|
374
|
+
cellDiff(
|
|
375
|
+
genesis.slice(cG.contentStart, cG.contentEnd),
|
|
376
|
+
cpLatest.slice(cC.contentStart, cC.contentEnd),
|
|
377
|
+
meCurrent.slice(cM.contentStart, cM.contentEnd)
|
|
378
|
+
)
|
|
379
|
+
)
|
|
380
|
+
cursor = cG.contentEnd
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
out.push(genesis.slice(cursor, tG.tableEnd))
|
|
384
|
+
return out.join('')
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* Row-level genesis-spine merge for tables with diverging row/cell
|
|
389
|
+
* counts.
|
|
390
|
+
*
|
|
391
|
+
* 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
|
|
392
|
+
* (alignMe), each via row-LCS over rowKeys.
|
|
393
|
+
* 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
|
|
394
|
+
* Both kept → recurse cell diff (with structural-change cell handling
|
|
395
|
+
* falling back to me-attribution Replace per the documented
|
|
396
|
+
* limitation). One kept, other deleted → emit author-attributed full
|
|
397
|
+
* row. Both deleted → silent.
|
|
398
|
+
* 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
|
|
399
|
+
* Check for content agreement at the same boundary; agreed
|
|
400
|
+
* insertions emit plain.
|
|
401
|
+
*/
|
|
402
|
+
function diffTableStructural(
|
|
403
|
+
genesis: string,
|
|
404
|
+
cpLatest: string,
|
|
405
|
+
meCurrent: string,
|
|
406
|
+
tG: TableRange,
|
|
407
|
+
tC: TableRange,
|
|
408
|
+
tM: TableRange,
|
|
409
|
+
cellDiff: ThreeWayDiffCellFn
|
|
410
|
+
): string {
|
|
411
|
+
const gKeys = tG.rows.map(r => rowKey(genesis, r))
|
|
412
|
+
const cKeys = tC.rows.map(r => rowKey(cpLatest, r))
|
|
413
|
+
const mKeys = tM.rows.map(r => rowKey(meCurrent, r))
|
|
414
|
+
|
|
415
|
+
const alignCp = lcsAlign(gKeys, cKeys)
|
|
416
|
+
const alignMe = lcsAlign(gKeys, mKeys)
|
|
417
|
+
|
|
418
|
+
// genesisIdx → matching cpIdx (-1 if cp deleted this row)
|
|
419
|
+
const gToCp = new Array<number>(tG.rows.length).fill(-1)
|
|
420
|
+
for (const a of alignCp) {
|
|
421
|
+
if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx
|
|
422
|
+
}
|
|
423
|
+
const gToMe = new Array<number>(tG.rows.length).fill(-1)
|
|
424
|
+
for (const a of alignMe) {
|
|
425
|
+
if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Off-spine row collections: cp rows with no genesis counterpart, me rows with no genesis counterpart.
|
|
429
|
+
// Keyed by "the genesis row index they should appear before" so emission interleaves correctly.
|
|
430
|
+
const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length)
|
|
431
|
+
const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length)
|
|
432
|
+
|
|
433
|
+
const out: string[] = []
|
|
434
|
+
out.push(tableHeaderSlice(genesis, tG))
|
|
435
|
+
|
|
436
|
+
const emitBoundaryInsertions = (b: number) => {
|
|
437
|
+
const cIdxs = cpInsAt.get(b) ?? []
|
|
438
|
+
const mIdxs = meInsAt.get(b) ?? []
|
|
439
|
+
if (cIdxs.length === 0 && mIdxs.length === 0) return
|
|
440
|
+
// Detect settled insertions (cp and me both inserted the same row content).
|
|
441
|
+
// Pair by content key, in order of appearance.
|
|
442
|
+
const remainingMe = new Set(mIdxs)
|
|
443
|
+
for (const cIdx of cIdxs) {
|
|
444
|
+
const cText = cKeys[cIdx]
|
|
445
|
+
let agreedMeIdx: number | undefined
|
|
446
|
+
for (const mIdx of remainingMe) {
|
|
447
|
+
if (mKeys[mIdx] === cText) {
|
|
448
|
+
agreedMeIdx = mIdx
|
|
449
|
+
break
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
if (agreedMeIdx !== undefined) {
|
|
453
|
+
remainingMe.delete(agreedMeIdx)
|
|
454
|
+
// Settled insertion — emit cp's row verbatim, unmarked.
|
|
455
|
+
out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd))
|
|
456
|
+
} else {
|
|
457
|
+
out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'ins', 'cp'))
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
for (const mIdx of remainingMe) {
|
|
461
|
+
out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'ins', 'me'))
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
for (let g = 0; g < tG.rows.length; g++) {
|
|
466
|
+
emitBoundaryInsertions(g)
|
|
467
|
+
|
|
468
|
+
const cIdx = gToCp[g]
|
|
469
|
+
const mIdx = gToMe[g]
|
|
470
|
+
const cpDel = cIdx === -1
|
|
471
|
+
const meDel = mIdx === -1
|
|
472
|
+
|
|
473
|
+
if (!cpDel && !meDel) {
|
|
474
|
+
// Both kept — recurse cell-level diff against this row triple.
|
|
475
|
+
out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff))
|
|
476
|
+
} else if (cpDel && meDel) {
|
|
477
|
+
// Both deleted — silent (settled).
|
|
478
|
+
} else if (cpDel) {
|
|
479
|
+
// CP dropped, Me kept → emit Me's row attributed as cp-deletion. The
|
|
480
|
+
// content shown is what Me has; the styling tells the reader CP
|
|
481
|
+
// wanted it gone.
|
|
482
|
+
out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'del', 'cp'))
|
|
483
|
+
} else {
|
|
484
|
+
// Me dropped, CP kept → emit CP's row attributed as me-deletion.
|
|
485
|
+
out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'del', 'me'))
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
emitBoundaryInsertions(tG.rows.length)
|
|
489
|
+
out.push(tableFooterSlice(genesis, tG))
|
|
490
|
+
return out.join('')
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
function emitPreservedRow(
|
|
494
|
+
genesis: string,
|
|
495
|
+
cpLatest: string,
|
|
496
|
+
meCurrent: string,
|
|
497
|
+
rG: RowRange,
|
|
498
|
+
rC: RowRange,
|
|
499
|
+
rM: RowRange,
|
|
500
|
+
cellDiff: ThreeWayDiffCellFn
|
|
501
|
+
): string {
|
|
502
|
+
if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
|
|
503
|
+
// Same cell counts — positional cell diff.
|
|
504
|
+
const out: string[] = []
|
|
505
|
+
let cursor = rG.rowStart
|
|
506
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
507
|
+
const cG = rG.cells[c]
|
|
508
|
+
const cC = rC.cells[c]
|
|
509
|
+
const cM = rM.cells[c]
|
|
510
|
+
out.push(genesis.slice(cursor, cG.contentStart))
|
|
511
|
+
out.push(
|
|
512
|
+
cellDiff(
|
|
513
|
+
genesis.slice(cG.contentStart, cG.contentEnd),
|
|
514
|
+
cpLatest.slice(cC.contentStart, cC.contentEnd),
|
|
515
|
+
meCurrent.slice(cM.contentStart, cM.contentEnd)
|
|
516
|
+
)
|
|
517
|
+
)
|
|
518
|
+
cursor = cG.contentEnd
|
|
519
|
+
}
|
|
520
|
+
out.push(genesis.slice(cursor, rG.rowEnd))
|
|
521
|
+
return out.join('')
|
|
522
|
+
}
|
|
523
|
+
// Cell-count mismatch within a preserved row — cell-level structural
|
|
524
|
+
// change deferred. Fall back to me-attributed Replace (genesis row
|
|
525
|
+
// removed, me row inserted). Lossy for CP within that row.
|
|
526
|
+
return emitFullRowAttributed(genesis, rG, 'del', 'me') + emitFullRowAttributed(meCurrent, rM, 'ins', 'me')
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/**
|
|
530
|
+
* Returns map "genesis-row-boundary → list of new-side row indices
|
|
531
|
+
* inserted at that boundary". Mirrors the word-level boundary collection
|
|
532
|
+
* but at the row scale.
|
|
533
|
+
*/
|
|
534
|
+
function collectInsertedRowsAtBoundary(
|
|
535
|
+
align: ReturnType<typeof lcsAlign>,
|
|
536
|
+
genesisRowCount: number
|
|
537
|
+
): Map<number, number[]> {
|
|
538
|
+
const out = new Map<number, number[]>()
|
|
539
|
+
let nextGenesisBoundary = genesisRowCount
|
|
540
|
+
const pending: number[] = []
|
|
541
|
+
// Walk in reverse so nextGenesisBoundary tracks the next preserved row
|
|
542
|
+
// we'll encounter; flush pending unpaired new rows at the appropriate
|
|
543
|
+
// genesis boundary.
|
|
544
|
+
for (let i = align.length - 1; i >= 0; i--) {
|
|
545
|
+
const a = align[i]
|
|
546
|
+
if (a.oldIdx !== null) {
|
|
547
|
+
if (pending.length > 0) {
|
|
548
|
+
const existing = out.get(nextGenesisBoundary) ?? []
|
|
549
|
+
existing.unshift(...pending.toReversed())
|
|
550
|
+
out.set(nextGenesisBoundary, existing)
|
|
551
|
+
pending.length = 0
|
|
552
|
+
}
|
|
553
|
+
nextGenesisBoundary = a.oldIdx
|
|
554
|
+
} else if (a.newIdx !== null) {
|
|
555
|
+
pending.push(a.newIdx)
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
if (pending.length > 0) {
|
|
559
|
+
const existing = out.get(nextGenesisBoundary) ?? []
|
|
560
|
+
existing.unshift(...pending.toReversed())
|
|
561
|
+
out.set(nextGenesisBoundary, existing)
|
|
562
|
+
}
|
|
563
|
+
return out
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
function tableHeaderSlice(html: string, table: TableRange): string {
|
|
567
|
+
const firstRow = table.rows[0]
|
|
568
|
+
if (!firstRow) return html.slice(table.tableStart, table.tableEnd - '</table>'.length)
|
|
569
|
+
return html.slice(table.tableStart, firstRow.rowStart)
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
function tableFooterSlice(html: string, table: TableRange): string {
|
|
573
|
+
const lastRow = table.rows[table.rows.length - 1]
|
|
574
|
+
if (!lastRow) return '</table>'
|
|
575
|
+
return html.slice(lastRow.rowEnd, table.tableEnd)
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
/**
|
|
579
|
+
* Emit a row fully attributed to one author. Wraps `<tr>` and each
|
|
580
|
+
* `<td>` with the author's diffins/diffdel class and `data-author`
|
|
581
|
+
* attribute; wraps cell content with an inner `<ins>`/`<del>` matching
|
|
582
|
+
* the word-level emission shape.
|
|
583
|
+
*/
|
|
584
|
+
function emitFullRowAttributed(html: string, row: RowRange, kind: 'ins' | 'del', author: Author): string {
|
|
585
|
+
const trOpening = parseOpeningTagAt(html, row.rowStart)
|
|
586
|
+
if (!trOpening) return html.slice(row.rowStart, row.rowEnd)
|
|
587
|
+
const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)
|
|
588
|
+
|
|
589
|
+
const out: string[] = [trWithAttrs]
|
|
590
|
+
let cursor = trOpening.end
|
|
591
|
+
for (const cell of row.cells) {
|
|
592
|
+
out.push(html.slice(cursor, cell.cellStart))
|
|
593
|
+
out.push(emitFullCellAttributed(html, cell, kind, author))
|
|
594
|
+
cursor = cell.cellEnd
|
|
595
|
+
}
|
|
596
|
+
out.push(html.slice(cursor, row.rowEnd))
|
|
597
|
+
return out.join('')
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
function emitFullCellAttributed(html: string, cell: CellRange, kind: 'ins' | 'del', author: Author): string {
|
|
601
|
+
const tdOpening = parseOpeningTagAt(html, cell.cellStart)
|
|
602
|
+
if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
|
|
603
|
+
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author)
|
|
604
|
+
const innerContent = html.slice(cell.contentStart, cell.contentEnd)
|
|
605
|
+
const innerWrapped =
|
|
606
|
+
innerContent.trim().length === 0
|
|
607
|
+
? innerContent
|
|
608
|
+
: Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author))
|
|
609
|
+
const closing = html.slice(cell.contentEnd, cell.cellEnd)
|
|
610
|
+
return tdWithAttrs + innerWrapped + closing
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
function injectAuthorAttribution(openingTag: string, kind: 'ins' | 'del', author: Author): string {
|
|
614
|
+
const meta = authorAttribution(author)
|
|
615
|
+
const tagWithClass = injectClass(openingTag, `diff${kind} ${meta.extraClasses}`)
|
|
616
|
+
return injectDataAttrs(tagWithClass, meta.dataAttrs ?? {})
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
function injectDataAttrs(openingTag: string, dataAttrs: Readonly<Record<string, string>>): string {
|
|
620
|
+
const keys = Object.keys(dataAttrs)
|
|
621
|
+
if (keys.length === 0) return openingTag
|
|
622
|
+
const attrs = keys.map(k => ` data-${k}='${dataAttrs[k]}'`).join('')
|
|
623
|
+
if (openingTag.endsWith('/>')) return `${openingTag.slice(0, -2)}${attrs}/>`
|
|
624
|
+
return `${openingTag.slice(0, -1)}${attrs}>`
|
|
625
|
+
}
|
package/src/Utils.ts
CHANGED
|
@@ -32,8 +32,39 @@ export function stripTagAttributes(word: string): string {
|
|
|
32
32
|
return word
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
/**
|
|
36
|
+
* Optional metadata attached to a wrapped tag. Used by `executeThreeWay`
|
|
37
|
+
* to colour diff segments with their author (CP vs Me) via extra classes
|
|
38
|
+
* and `data-*` attributes; the two-way path passes nothing and gets the
|
|
39
|
+
* unchanged historical output.
|
|
40
|
+
*/
|
|
41
|
+
export interface WrapMetadata {
|
|
42
|
+
/** Space-separated classes appended after `cssClass`. */
|
|
43
|
+
extraClasses?: string
|
|
44
|
+
/** `data-*` attribute map, keyed by the attribute name *without* the `data-` prefix. */
|
|
45
|
+
dataAttrs?: Readonly<Record<string, string>>
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function wrapText(text: string, tagName: string, cssClass: string, metadata?: WrapMetadata): string {
|
|
49
|
+
if (!metadata) return `<${tagName} class='${cssClass}'>${text}</${tagName}>`
|
|
50
|
+
return `<${tagName}${composeTagAttributes(cssClass, metadata)}>${text}</${tagName}>`
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Build the attribute portion of an opening tag from a base class plus
|
|
55
|
+
* optional metadata. Exposed so emission paths that build opening-tag
|
|
56
|
+
* fragments by hand (e.g. the formatting-tag special-case in
|
|
57
|
+
* `HtmlDiff.insertTag`) can stay consistent with `wrapText`.
|
|
58
|
+
*/
|
|
59
|
+
export function composeTagAttributes(cssClass: string, metadata: WrapMetadata): string {
|
|
60
|
+
const classes = metadata.extraClasses ? `${cssClass} ${metadata.extraClasses}` : cssClass
|
|
61
|
+
let out = ` class='${classes}'`
|
|
62
|
+
if (metadata.dataAttrs) {
|
|
63
|
+
for (const key of Object.keys(metadata.dataAttrs)) {
|
|
64
|
+
out += ` data-${key}='${metadata.dataAttrs[key]}'`
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return out
|
|
37
68
|
}
|
|
38
69
|
|
|
39
70
|
export function isStartOfTag(val: string): boolean {
|
|
@@ -85,6 +116,7 @@ export default {
|
|
|
85
116
|
isTag,
|
|
86
117
|
stripTagAttributes,
|
|
87
118
|
wrapText,
|
|
119
|
+
composeTagAttributes,
|
|
88
120
|
isStartOfTag,
|
|
89
121
|
isEndOfTag,
|
|
90
122
|
isStartOfEntity,
|