@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -19
- package/dist/HtmlDiff.cjs +609 -438
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +89 -16
- package/dist/HtmlDiff.d.mts +89 -16
- package/dist/HtmlDiff.mjs +604 -438
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +218 -74
- package/src/ThreeWayDiff.ts +220 -127
- package/src/ThreeWayTable.ts +549 -491
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +316 -92
- package/test/HtmlDiff.threeWay.tables.spec.ts +200 -196
- package/test/Utils.spec.ts +3 -3
package/src/ThreeWayTable.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { lcsAlign, textSimilarity } from './Alignment'
|
|
1
|
+
import { type Alignment, lcsAlign, pairSimilarUnmatched, textSimilarity } from './Alignment'
|
|
2
2
|
import { injectClass, parseOpeningTagAt } from './HtmlScanner'
|
|
3
3
|
import {
|
|
4
4
|
type CellRange,
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
PLACEHOLDER_SUFFIX,
|
|
9
9
|
type RowRange,
|
|
10
10
|
rowKey,
|
|
11
|
+
rowText,
|
|
11
12
|
sameDimensions,
|
|
12
13
|
spliceString,
|
|
13
14
|
type TableRange,
|
|
@@ -16,675 +17,735 @@ import { type Author, authorAttribution } from './ThreeWayDiff'
|
|
|
16
17
|
import Utils from './Utils'
|
|
17
18
|
|
|
18
19
|
/**
|
|
19
|
-
* Three-way table preprocessing
|
|
20
|
-
* `preprocessTables` but takes V1/V2/V3 and a cell-level three-way diff
|
|
21
|
-
* callback. All three inputs share a single placeholder nonce so V2's
|
|
22
|
-
* tokenisation is identical when the word-level 3-way merger sees it
|
|
23
|
-
* from both pair-wise analyses.
|
|
20
|
+
* Three-way table preprocessing for the genesis-spine merge.
|
|
24
21
|
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
*
|
|
22
|
+
* Inputs: `genesis` (common ancestor), `cpLatest` (counterparty's
|
|
23
|
+
* accumulated position), `meCurrent` (Me's accumulated position). All
|
|
24
|
+
* three share a single placeholder nonce so genesis tokenises
|
|
25
|
+
* identically across both pair-wise word-level analyses.
|
|
26
|
+
*
|
|
27
|
+
* Three paths:
|
|
28
|
+
* 1. **Positional** — all three have the same table count AND each
|
|
29
|
+
* positional triple's tableKey is similar enough that 1:1 pairing
|
|
30
|
+
* by position is sound. Recurses cellDiff per cell, structural
|
|
31
|
+
* layout from genesis.
|
|
32
|
+
* 2. **Row-structural** — paired triples whose row/cell counts differ.
|
|
33
|
+
* Per-table row-level LCS against genesis; recurse on preserved
|
|
34
|
+
* rows, emit author-attributed full rows for the rest.
|
|
35
|
+
* 3. **Multi-table by content** — table counts diverge across inputs.
|
|
36
|
+
* Pair tables to genesis via content-LCS, then assign placeholders
|
|
37
|
+
* such that each placeholder appears in exactly the inputs that
|
|
38
|
+
* contain the underlying table. The word-level merger walks the
|
|
39
|
+
* genesis spine and attributes unpaired tables naturally
|
|
40
|
+
* (cp-only/me-only/both-agree).
|
|
31
41
|
*/
|
|
32
42
|
|
|
33
43
|
export interface ThreeWayPreprocessResult {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
44
|
+
modifiedGenesis: string
|
|
45
|
+
modifiedCp: string
|
|
46
|
+
modifiedMe: string
|
|
37
47
|
placeholderToDiff: Map<string, string>
|
|
38
48
|
}
|
|
39
49
|
|
|
40
|
-
export type ThreeWayDiffCellFn = (
|
|
50
|
+
export type ThreeWayDiffCellFn = (genesisCell: string, cpCell: string, meCell: string) => string
|
|
41
51
|
|
|
42
52
|
export function preprocessTablesThreeWay(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
53
|
+
genesis: string,
|
|
54
|
+
cpLatest: string,
|
|
55
|
+
meCurrent: string,
|
|
46
56
|
cellDiff: ThreeWayDiffCellFn
|
|
47
57
|
): ThreeWayPreprocessResult | null {
|
|
48
|
-
const
|
|
49
|
-
const
|
|
50
|
-
const
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
for (const t of
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
58
|
+
const gTables = findTopLevelTables(genesis)
|
|
59
|
+
const cTables = findTopLevelTables(cpLatest)
|
|
60
|
+
const mTables = findTopLevelTables(meCurrent)
|
|
61
|
+
|
|
62
|
+
if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null
|
|
63
|
+
|
|
64
|
+
for (const t of gTables) if (exceedsSizeLimit(t)) return null
|
|
65
|
+
for (const t of cTables) if (exceedsSizeLimit(t)) return null
|
|
66
|
+
for (const t of mTables) if (exceedsSizeLimit(t)) return null
|
|
67
|
+
|
|
68
|
+
const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent)
|
|
69
|
+
|
|
70
|
+
if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) {
|
|
71
|
+
return preprocessAlignedByPosition(
|
|
72
|
+
genesis,
|
|
73
|
+
cpLatest,
|
|
74
|
+
meCurrent,
|
|
75
|
+
gTables,
|
|
76
|
+
cTables,
|
|
77
|
+
mTables,
|
|
78
|
+
cellDiff,
|
|
79
|
+
placeholderPrefix
|
|
80
|
+
)
|
|
70
81
|
}
|
|
71
82
|
|
|
72
|
-
|
|
73
|
-
// removed/moved a table, etc. Use content-LCS to pair tables across
|
|
74
|
-
// each adjacent pair, then assign placeholders so the word-level 3-way
|
|
75
|
-
// merger naturally attributes unpaired tables — the placeholder token
|
|
76
|
-
// appears only in the inputs where the table exists, and the merger
|
|
77
|
-
// sees that as an insertion/deletion.
|
|
78
|
-
return preprocessMisalignedByContent(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
|
|
83
|
+
return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix)
|
|
79
84
|
}
|
|
80
85
|
|
|
81
86
|
function preprocessAlignedByPosition(
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
87
|
+
genesis: string,
|
|
88
|
+
cpLatest: string,
|
|
89
|
+
meCurrent: string,
|
|
90
|
+
gTables: TableRange[],
|
|
91
|
+
cTables: TableRange[],
|
|
92
|
+
mTables: TableRange[],
|
|
88
93
|
cellDiff: ThreeWayDiffCellFn,
|
|
89
94
|
placeholderPrefix: string
|
|
90
95
|
): ThreeWayPreprocessResult {
|
|
91
|
-
const pairs: Array<{
|
|
92
|
-
|
|
93
|
-
t2: TableRange
|
|
94
|
-
t3: TableRange
|
|
95
|
-
diffed: string
|
|
96
|
-
}> = []
|
|
97
|
-
for (let i = 0; i < t1s.length; i++) {
|
|
96
|
+
const pairs: Array<{ g: TableRange; c: TableRange; m: TableRange; diffed: string }> = []
|
|
97
|
+
for (let i = 0; i < gTables.length; i++) {
|
|
98
98
|
pairs.push({
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
diffed: diffTableThreeWay(
|
|
99
|
+
g: gTables[i],
|
|
100
|
+
c: cTables[i],
|
|
101
|
+
m: mTables[i],
|
|
102
|
+
diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff),
|
|
103
103
|
})
|
|
104
104
|
}
|
|
105
|
-
let
|
|
106
|
-
let
|
|
107
|
-
let
|
|
105
|
+
let modifiedGenesis = genesis
|
|
106
|
+
let modifiedCp = cpLatest
|
|
107
|
+
let modifiedMe = meCurrent
|
|
108
108
|
const placeholderToDiff = new Map<string, string>()
|
|
109
|
-
// Splice end → start so earlier offsets stay valid.
|
|
110
109
|
for (let i = pairs.length - 1; i >= 0; i--) {
|
|
111
110
|
const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`
|
|
112
111
|
placeholderToDiff.set(placeholder, pairs[i].diffed)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
112
|
+
modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder)
|
|
113
|
+
modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder)
|
|
114
|
+
modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder)
|
|
116
115
|
}
|
|
117
|
-
return {
|
|
116
|
+
return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
|
|
118
117
|
}
|
|
119
118
|
|
|
120
119
|
/**
|
|
121
|
-
* Multi-table
|
|
122
|
-
*
|
|
123
|
-
* each
|
|
124
|
-
* table
|
|
125
|
-
* - paired-everywhere placeholders → equal in both diffs → unwrapped
|
|
126
|
-
* - V2-only (CP-inserted + Me-rejected) → inserted by CP, deleted by
|
|
127
|
-
* Me → reject wrapper around the table
|
|
128
|
-
* - V2+V3 (CP-inserted, Me-kept) → ins-cp wrapper
|
|
129
|
-
* - V1+V2 (Me-deleted) → del-me wrapper
|
|
130
|
-
* - V1-only (CP-deleted before V2) → del-cp wrapper
|
|
131
|
-
* - V3-only (Me-inserted) → ins-me wrapper
|
|
120
|
+
* Multi-table handler. Tables are paired against `genesis` (the spine)
|
|
121
|
+
* via content-LCS on each of cp and me. Placeholders are assigned so
|
|
122
|
+
* each appears only in the inputs that actually contain the underlying
|
|
123
|
+
* table. The word-level merger then attributes them naturally:
|
|
132
124
|
*
|
|
133
|
-
*
|
|
134
|
-
*
|
|
135
|
-
*
|
|
125
|
+
* - paired in genesis+cp+me → equal in both diffs → emit recursive 3-way diff
|
|
126
|
+
* - in cp+me, not in genesis → both-agree insertion → emit plain
|
|
127
|
+
* - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
|
|
128
|
+
* - in me only → me insertion → ins-me wrapper
|
|
129
|
+
* - in genesis+cp, not me → me deletion → del-me wrapper
|
|
130
|
+
* - in genesis+me, not cp → cp deletion → del-cp wrapper
|
|
131
|
+
* - in genesis only → both deleted, settled → silent (placeholder content empty)
|
|
136
132
|
*/
|
|
137
|
-
function
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
133
|
+
function preprocessByContent(
|
|
134
|
+
genesis: string,
|
|
135
|
+
cpLatest: string,
|
|
136
|
+
meCurrent: string,
|
|
137
|
+
gTables: TableRange[],
|
|
138
|
+
cTables: TableRange[],
|
|
139
|
+
mTables: TableRange[],
|
|
144
140
|
cellDiff: ThreeWayDiffCellFn,
|
|
145
141
|
placeholderPrefix: string
|
|
146
142
|
): ThreeWayPreprocessResult {
|
|
147
|
-
const
|
|
148
|
-
const
|
|
149
|
-
const
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
//
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
143
|
+
const gKeys = gTables.map(t => tableKey(genesis, t))
|
|
144
|
+
const cKeys = cTables.map(t => tableKey(cpLatest, t))
|
|
145
|
+
const mKeys = mTables.map(t => tableKey(meCurrent, t))
|
|
146
|
+
|
|
147
|
+
// Exact tableKey LCS, then fuzzy-pair unmatched runs by content
|
|
148
|
+
// similarity. Without this, a table whose cells were edited (but
|
|
149
|
+
// not its overall shape) fails the exact tableKey match and the
|
|
150
|
+
// table-level aligner pulls it apart into a whole-table del + a
|
|
151
|
+
// whole-table ins. Same fuzzy pass `TableDiff` uses for the 2-way
|
|
152
|
+
// path — `pairSimilarTablesThreeWay` is defined below.
|
|
153
|
+
const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables)
|
|
154
|
+
const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables)
|
|
155
|
+
|
|
156
|
+
// Maps: genesisIdx → matching cpIdx (-1 if none); cpIdx → matching genesisIdx; etc.
|
|
157
|
+
const gToCp = new Array<number>(gTables.length).fill(-1)
|
|
158
|
+
const cpToG = new Array<number>(cTables.length).fill(-1)
|
|
159
|
+
for (const a of alignCp) {
|
|
158
160
|
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
159
|
-
|
|
160
|
-
|
|
161
|
+
gToCp[a.oldIdx] = a.newIdx
|
|
162
|
+
cpToG[a.newIdx] = a.oldIdx
|
|
161
163
|
}
|
|
162
164
|
}
|
|
163
|
-
const
|
|
164
|
-
const
|
|
165
|
-
for (const a of
|
|
165
|
+
const gToMe = new Array<number>(gTables.length).fill(-1)
|
|
166
|
+
const meToG = new Array<number>(mTables.length).fill(-1)
|
|
167
|
+
for (const a of alignMe) {
|
|
166
168
|
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
167
|
-
|
|
168
|
-
|
|
169
|
+
gToMe[a.oldIdx] = a.newIdx
|
|
170
|
+
meToG[a.newIdx] = a.oldIdx
|
|
169
171
|
}
|
|
170
172
|
}
|
|
171
173
|
|
|
172
|
-
// Allocate placeholders. Each logical-table-position (paired triple,
|
|
173
|
-
// paired pair, or singleton) gets one shared placeholder used in
|
|
174
|
-
// every input that contains it.
|
|
175
174
|
let nextId = 0
|
|
176
175
|
const placeholderToDiff = new Map<string, string>()
|
|
177
176
|
const placeholders = {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
177
|
+
g: new Array<string | null>(gTables.length).fill(null),
|
|
178
|
+
c: new Array<string | null>(cTables.length).fill(null),
|
|
179
|
+
m: new Array<string | null>(mTables.length).fill(null),
|
|
181
180
|
}
|
|
182
|
-
|
|
183
181
|
const allocate = (): string => `${placeholderPrefix}${nextId++}${PLACEHOLDER_SUFFIX}`
|
|
184
182
|
|
|
185
|
-
//
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
// For unpaired placeholders the word-level merger can't wrap a tag
|
|
198
|
-
// token (insertTag emits tags verbatim), so we bake the author
|
|
199
|
-
// attribution directly into the placeholder content. The merger then
|
|
200
|
-
// only has to position the placeholder via word-level alignment;
|
|
201
|
-
// the attribution wrapping is already in the substituted HTML.
|
|
202
|
-
const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string, rejects?: Author): string =>
|
|
203
|
-
Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author, rejects))
|
|
204
|
-
|
|
205
|
-
// 2. V2 tables paired only with V3 (CP-inserted into V2, Me-kept).
|
|
206
|
-
for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
|
|
207
|
-
if (placeholders.v2[v2Idx] !== null) continue
|
|
208
|
-
const v3Idx = v2ToV3[v2Idx]
|
|
209
|
-
if (v3Idx === -1) continue
|
|
183
|
+
// For unpaired-in-one-side placeholders, bake author attribution
|
|
184
|
+
// into the placeholder content — the word-level merger emits tag
|
|
185
|
+
// tokens (HTML comments) verbatim, so it can't wrap them itself.
|
|
186
|
+
const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string): string =>
|
|
187
|
+
Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author))
|
|
188
|
+
|
|
189
|
+
// 1. Triples paired in all three (genesis + cp + me) → recursive 3-way diff.
|
|
190
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
191
|
+
const cIdx = gToCp[gIdx]
|
|
192
|
+
const mIdx = gToMe[gIdx]
|
|
193
|
+
if (cIdx === -1 || mIdx === -1) continue
|
|
210
194
|
const placeholder = allocate()
|
|
211
|
-
placeholderToDiff.set(
|
|
212
|
-
|
|
213
|
-
|
|
195
|
+
placeholderToDiff.set(
|
|
196
|
+
placeholder,
|
|
197
|
+
diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff)
|
|
198
|
+
)
|
|
199
|
+
placeholders.g[gIdx] = placeholder
|
|
200
|
+
placeholders.c[cIdx] = placeholder
|
|
201
|
+
placeholders.m[mIdx] = placeholder
|
|
214
202
|
}
|
|
215
203
|
|
|
216
|
-
//
|
|
217
|
-
for (let
|
|
218
|
-
if (placeholders.
|
|
219
|
-
const
|
|
220
|
-
if (
|
|
204
|
+
// 2. Genesis + CP only (not in Me) → me deletion.
|
|
205
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
206
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
207
|
+
const cIdx = gToCp[gIdx]
|
|
208
|
+
if (cIdx === -1) continue
|
|
221
209
|
const placeholder = allocate()
|
|
222
|
-
placeholderToDiff.set(
|
|
223
|
-
|
|
224
|
-
|
|
210
|
+
placeholderToDiff.set(
|
|
211
|
+
placeholder,
|
|
212
|
+
wrapWhole('del', 'me', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
|
|
213
|
+
)
|
|
214
|
+
placeholders.g[gIdx] = placeholder
|
|
215
|
+
placeholders.c[cIdx] = placeholder
|
|
225
216
|
}
|
|
226
217
|
|
|
227
|
-
//
|
|
228
|
-
for (let
|
|
229
|
-
if (placeholders.
|
|
218
|
+
// 3. Genesis + Me only (not in CP) → cp deletion.
|
|
219
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
220
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
221
|
+
const mIdx = gToMe[gIdx]
|
|
222
|
+
if (mIdx === -1) continue
|
|
230
223
|
const placeholder = allocate()
|
|
231
224
|
placeholderToDiff.set(
|
|
232
225
|
placeholder,
|
|
233
|
-
wrapWhole('del', '
|
|
226
|
+
wrapWhole('del', 'cp', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
|
|
234
227
|
)
|
|
235
|
-
placeholders.
|
|
228
|
+
placeholders.g[gIdx] = placeholder
|
|
229
|
+
placeholders.m[mIdx] = placeholder
|
|
236
230
|
}
|
|
237
231
|
|
|
238
|
-
//
|
|
239
|
-
|
|
240
|
-
|
|
232
|
+
// 4. Genesis only (not in CP, not in Me) → both deleted, settled, silent.
|
|
233
|
+
// Placeholder ONLY in genesis; cp and me lack it. The word-level merger
|
|
234
|
+
// sees it as "deleted by both" via the genesis-spine fate maps and
|
|
235
|
+
// silences it via the settled-deletion rule (empty placeholder content).
|
|
236
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
237
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
238
|
+
const placeholder = allocate()
|
|
239
|
+
placeholderToDiff.set(placeholder, '')
|
|
240
|
+
placeholders.g[gIdx] = placeholder
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// 5. CP + Me both inserted (no genesis) — agreement check. If their
|
|
244
|
+
// table content is textually identical, emit plain (settled). Otherwise
|
|
245
|
+
// each side gets its own placeholder (cp-only / me-only treatment).
|
|
246
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
247
|
+
if (placeholders.c[cIdx] !== null) continue
|
|
248
|
+
// CP table not paired to genesis. Is there an unpaired Me table with
|
|
249
|
+
// matching content?
|
|
250
|
+
const cText = cKeys[cIdx]
|
|
251
|
+
let mIdx = -1
|
|
252
|
+
for (let candidate = 0; candidate < mTables.length; candidate++) {
|
|
253
|
+
if (placeholders.m[candidate] !== null) continue
|
|
254
|
+
if (meToG[candidate] !== -1) continue
|
|
255
|
+
if (mKeys[candidate] === cText) {
|
|
256
|
+
mIdx = candidate
|
|
257
|
+
break
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
if (mIdx === -1) continue
|
|
261
|
+
// Both inserted the same table content → settled insertion.
|
|
241
262
|
const placeholder = allocate()
|
|
242
|
-
placeholderToDiff.set(placeholder,
|
|
243
|
-
placeholders.
|
|
263
|
+
placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
|
|
264
|
+
placeholders.c[cIdx] = placeholder
|
|
265
|
+
placeholders.m[mIdx] = placeholder
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// 6. Remaining CP-only tables (inserted by CP, Me didn't take).
|
|
269
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
270
|
+
if (placeholders.c[cIdx] !== null) continue
|
|
271
|
+
const placeholder = allocate()
|
|
272
|
+
placeholderToDiff.set(
|
|
273
|
+
placeholder,
|
|
274
|
+
wrapWhole('ins', 'cp', cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
|
|
275
|
+
)
|
|
276
|
+
placeholders.c[cIdx] = placeholder
|
|
244
277
|
}
|
|
245
278
|
|
|
246
|
-
//
|
|
247
|
-
for (let
|
|
248
|
-
if (placeholders.
|
|
279
|
+
// 7. Remaining Me-only tables (Me inserted, CP didn't).
|
|
280
|
+
for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
|
|
281
|
+
if (placeholders.m[mIdx] !== null) continue
|
|
249
282
|
const placeholder = allocate()
|
|
250
|
-
placeholderToDiff.set(
|
|
251
|
-
|
|
283
|
+
placeholderToDiff.set(
|
|
284
|
+
placeholder,
|
|
285
|
+
wrapWhole('ins', 'me', meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd))
|
|
286
|
+
)
|
|
287
|
+
placeholders.m[mIdx] = placeholder
|
|
252
288
|
}
|
|
253
289
|
|
|
254
|
-
// Splice
|
|
255
|
-
let
|
|
256
|
-
for (let i =
|
|
257
|
-
const p = placeholders.
|
|
290
|
+
// Splice end → start per input.
|
|
291
|
+
let modifiedGenesis = genesis
|
|
292
|
+
for (let i = gTables.length - 1; i >= 0; i--) {
|
|
293
|
+
const p = placeholders.g[i]
|
|
258
294
|
if (p === null) continue
|
|
259
|
-
|
|
295
|
+
modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p)
|
|
260
296
|
}
|
|
261
|
-
let
|
|
262
|
-
for (let i =
|
|
263
|
-
const p = placeholders.
|
|
297
|
+
let modifiedCp = cpLatest
|
|
298
|
+
for (let i = cTables.length - 1; i >= 0; i--) {
|
|
299
|
+
const p = placeholders.c[i]
|
|
264
300
|
if (p === null) continue
|
|
265
|
-
|
|
301
|
+
modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p)
|
|
266
302
|
}
|
|
267
|
-
let
|
|
268
|
-
for (let i =
|
|
269
|
-
const p = placeholders.
|
|
303
|
+
let modifiedMe = meCurrent
|
|
304
|
+
for (let i = mTables.length - 1; i >= 0; i--) {
|
|
305
|
+
const p = placeholders.m[i]
|
|
270
306
|
if (p === null) continue
|
|
271
|
-
|
|
307
|
+
modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p)
|
|
272
308
|
}
|
|
273
309
|
|
|
274
|
-
return {
|
|
310
|
+
return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
|
|
275
311
|
}
|
|
276
312
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
313
|
+
// Positional pairing is the strict-default for three-way table merge:
|
|
314
|
+
// when all three inputs have the same number of tables in the same
|
|
315
|
+
// order, we pair them by index and let `diffTableThreeWay` handle
|
|
316
|
+
// per-table cell/row level differences. The similarity guard below
|
|
317
|
+
// only kicks in to *reject* positional alignment when a pair is
|
|
318
|
+
// SO dissimilar that it's near-certainly a table reorder/rename
|
|
319
|
+
// where content-LCS pairing would be materially better. The
|
|
320
|
+
// threshold is intentionally low — the 2-way path has no such guard
|
|
321
|
+
// and pairs purely by index (its `diffTable` falls back through
|
|
322
|
+
// same-dimension → equal-row-count → row-LCS → whole-table on its
|
|
323
|
+
// own), so the three-way path was stricter than its sibling and
|
|
324
|
+
// silently dropped to whole-table del+ins for legitimate edits
|
|
325
|
+
// like "rename one column and tweak its values". Aligning the
|
|
326
|
+
// threshold here keeps the two-way and three-way paths in step.
|
|
327
|
+
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.15
|
|
286
328
|
|
|
287
|
-
/**
|
|
288
|
-
* Returns true when V1/V2/V3 tables can be 1:1 paired by position. The
|
|
289
|
-
* three lists must have equal length AND each positional triple must
|
|
290
|
-
* have content similar enough that positional pairing reflects the
|
|
291
|
-
* authors' likely intent. The slow content-LCS path handles cases that
|
|
292
|
-
* fail this gate (table reordering, additions, deletions).
|
|
293
|
-
*/
|
|
294
329
|
function positionallyAligned(
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
330
|
+
genesis: string,
|
|
331
|
+
cpLatest: string,
|
|
332
|
+
meCurrent: string,
|
|
333
|
+
gTables: TableRange[],
|
|
334
|
+
cTables: TableRange[],
|
|
335
|
+
mTables: TableRange[]
|
|
301
336
|
): boolean {
|
|
302
|
-
if (
|
|
303
|
-
for (let i = 0; i <
|
|
304
|
-
const
|
|
305
|
-
const
|
|
306
|
-
const
|
|
307
|
-
if (textSimilarity(
|
|
308
|
-
if (textSimilarity(
|
|
337
|
+
if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false
|
|
338
|
+
for (let i = 0; i < gTables.length; i++) {
|
|
339
|
+
const kG = tableKey(genesis, gTables[i])
|
|
340
|
+
const kC = tableKey(cpLatest, cTables[i])
|
|
341
|
+
const kM = tableKey(meCurrent, mTables[i])
|
|
342
|
+
if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
|
|
343
|
+
if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
|
|
309
344
|
}
|
|
310
345
|
return true
|
|
311
346
|
}
|
|
312
347
|
|
|
313
348
|
function tableKey(html: string, table: TableRange): string {
|
|
314
|
-
// Whitespace-normalised full table HTML — tables with byte-identical
|
|
315
|
-
// content (modulo whitespace) pair; any structural or content
|
|
316
|
-
// difference falls through to unpaired (table-level ins/del).
|
|
317
349
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
|
|
318
350
|
}
|
|
319
351
|
|
|
352
|
+
/**
|
|
353
|
+
* Character-level similarity above which the three-way aligner treats
|
|
354
|
+
* two rows / tables as "the same logical entry, edited" rather than
|
|
355
|
+
* an unrelated delete + insert. Matched to TableDiff's
|
|
356
|
+
* `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
|
|
357
|
+
* agree on which pairings are reachable; if a row's content overlap
|
|
358
|
+
* is enough to fool the 2-way diff into pairing, it should also be
|
|
359
|
+
* enough for 3-way.
|
|
360
|
+
*/
|
|
361
|
+
const THREE_WAY_FUZZY_THRESHOLD = 0.5
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
|
|
365
|
+
* applies after its exact-LCS, but against one side of the genesis
|
|
366
|
+
* spine (either cp or me). The genesis tables/rows are always the
|
|
367
|
+
* "old" side; `newTable` is the cp or me table being aligned. Returns
|
|
368
|
+
* the enriched alignment with additional paired entries.
|
|
369
|
+
*
|
|
370
|
+
* Cell-count guard: only fuzzy-pair when both rows have the same cell
|
|
371
|
+
* count. Without this guard an asymmetric restructure — e.g. CP and
|
|
372
|
+
* Me both added a different column — leads to ONE side fuzzy-pairing
|
|
373
|
+
* its row with genesis (content overlap above threshold) while the
|
|
374
|
+
* other side falls below threshold. That mismatch routes through
|
|
375
|
+
* `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
|
|
376
|
+
* branch, which emits CP's row as a Me-attributed deletion. In
|
|
377
|
+
* cp-only mode `stripMeAttributedMarkers` then removes the row
|
|
378
|
+
* entirely and CP's edit vanishes from the view — exactly the
|
|
379
|
+
* content-loss case we're meant to prevent. Restricting fuzzy
|
|
380
|
+
* pairing to same-shape rows preserves the common case (single cell
|
|
381
|
+
* edit, identical row shape) while pushing structural mismatches
|
|
382
|
+
* back to the boundary-insertion path that emits both sides
|
|
383
|
+
* explicitly.
|
|
384
|
+
*/
|
|
385
|
+
function pairSimilarRowsThreeWay(
|
|
386
|
+
alignment: Alignment[],
|
|
387
|
+
genesis: string,
|
|
388
|
+
newHtml: string,
|
|
389
|
+
oldTable: TableRange,
|
|
390
|
+
newTable: TableRange
|
|
391
|
+
): Alignment[] {
|
|
392
|
+
const oldTexts = oldTable.rows.map(r => rowText(genesis, r))
|
|
393
|
+
const newTexts = newTable.rows.map(r => rowText(newHtml, r))
|
|
394
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
|
|
395
|
+
// Returning 0 sits below any positive threshold so
|
|
396
|
+
// `pairSimilarUnmatched` won't pair these rows; the guard remains
|
|
397
|
+
// defensive should the threshold ever be lowered to 0.
|
|
398
|
+
if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0
|
|
399
|
+
return textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
|
|
400
|
+
})
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
|
|
405
|
+
* full table HTML keys, fuzzy-pair unmatched table runs by their
|
|
406
|
+
* row-text-concatenated content. Without this, a table whose body
|
|
407
|
+
* was edited (but not its outer shape) fails the exact-key match
|
|
408
|
+
* and the preprocessing emits whole-table del + whole-table ins
|
|
409
|
+
* instead of recursing into per-cell three-way diffs.
|
|
410
|
+
*/
|
|
411
|
+
function pairSimilarTablesThreeWay(
|
|
412
|
+
alignment: Alignment[],
|
|
413
|
+
oldHtml: string,
|
|
414
|
+
newHtml: string,
|
|
415
|
+
oldTables: TableRange[],
|
|
416
|
+
newTables: TableRange[]
|
|
417
|
+
): Alignment[] {
|
|
418
|
+
const oldTexts = oldTables.map(t => t.rows.map(r => rowText(oldHtml, r)).join(' '))
|
|
419
|
+
const newTexts = newTables.map(t => t.rows.map(r => rowText(newHtml, r)).join(' '))
|
|
420
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
|
|
421
|
+
textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
|
|
422
|
+
)
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// ────────────────────────────────────────────────────────────────────────────
|
|
426
|
+
// Per-table diff: positional cells or row-level structural change.
|
|
427
|
+
|
|
320
428
|
function diffTableThreeWay(
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
429
|
+
genesis: string,
|
|
430
|
+
cpLatest: string,
|
|
431
|
+
meCurrent: string,
|
|
432
|
+
tG: TableRange,
|
|
433
|
+
tC: TableRange,
|
|
434
|
+
tM: TableRange,
|
|
327
435
|
cellDiff: ThreeWayDiffCellFn
|
|
328
436
|
): string {
|
|
329
|
-
if (sameDimensions(
|
|
330
|
-
return diffTablePositional(
|
|
437
|
+
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) {
|
|
438
|
+
return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
331
439
|
}
|
|
332
|
-
return diffTableStructural(
|
|
440
|
+
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
333
441
|
}
|
|
334
442
|
|
|
335
443
|
function diffTablePositional(
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
444
|
+
genesis: string,
|
|
445
|
+
cpLatest: string,
|
|
446
|
+
meCurrent: string,
|
|
447
|
+
tG: TableRange,
|
|
448
|
+
tC: TableRange,
|
|
449
|
+
tM: TableRange,
|
|
342
450
|
cellDiff: ThreeWayDiffCellFn
|
|
343
451
|
): string {
|
|
344
|
-
// Walk
|
|
345
|
-
//
|
|
346
|
-
//
|
|
452
|
+
// Walk genesis's table scaffolding verbatim — it's the common
|
|
453
|
+
// ancestor. Cells are merged 3-way via cellDiff. Choosing genesis as
|
|
454
|
+
// the spine keeps the table structure stable across both pair-wise
|
|
455
|
+
// diffs that the word-level merger will see.
|
|
347
456
|
const out: string[] = []
|
|
348
|
-
let cursor =
|
|
349
|
-
for (let r = 0; r <
|
|
350
|
-
const
|
|
351
|
-
const
|
|
352
|
-
const
|
|
353
|
-
for (let c = 0; c <
|
|
354
|
-
const
|
|
355
|
-
const
|
|
356
|
-
const
|
|
357
|
-
out.push(
|
|
457
|
+
let cursor = tG.tableStart
|
|
458
|
+
for (let r = 0; r < tG.rows.length; r++) {
|
|
459
|
+
const rG = tG.rows[r]
|
|
460
|
+
const rC = tC.rows[r]
|
|
461
|
+
const rM = tM.rows[r]
|
|
462
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
463
|
+
const cG = rG.cells[c]
|
|
464
|
+
const cC = rC.cells[c]
|
|
465
|
+
const cM = rM.cells[c]
|
|
466
|
+
out.push(genesis.slice(cursor, cG.contentStart))
|
|
358
467
|
out.push(
|
|
359
468
|
cellDiff(
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
469
|
+
genesis.slice(cG.contentStart, cG.contentEnd),
|
|
470
|
+
cpLatest.slice(cC.contentStart, cC.contentEnd),
|
|
471
|
+
meCurrent.slice(cM.contentStart, cM.contentEnd)
|
|
363
472
|
)
|
|
364
473
|
)
|
|
365
|
-
cursor =
|
|
474
|
+
cursor = cG.contentEnd
|
|
366
475
|
}
|
|
367
476
|
}
|
|
368
|
-
out.push(
|
|
477
|
+
out.push(genesis.slice(cursor, tG.tableEnd))
|
|
369
478
|
return out.join('')
|
|
370
479
|
}
|
|
371
480
|
|
|
372
481
|
/**
|
|
373
|
-
*
|
|
374
|
-
*
|
|
375
|
-
* 1. Run row-LCS for each pair (V1↔V2, V2↔V3) over rowKeys
|
|
376
|
-
* 2. Build per-V2-row origin (from align1) and fate (from align2)
|
|
377
|
-
* 3. Walk V2's row order, interleaving:
|
|
378
|
-
* - CP-deleted V1 rows (in align1 but not preserved into V2)
|
|
379
|
-
* - Me-inserted V3 rows (in align2 but not from V2)
|
|
380
|
-
* 4. For each V2 row, combine origin+fate to decide:
|
|
381
|
-
* - equal: recurse cellDiff if cell counts match, else fall back
|
|
382
|
-
* - ins-cp: emit V2 row as fully-CP-inserted
|
|
383
|
-
* - del-me: emit V2 row as fully-Me-deleted
|
|
384
|
-
* - reject: emit V2 row as Me-rejects-CP
|
|
482
|
+
* Row-level genesis-spine merge for tables with diverging row/cell
|
|
483
|
+
* counts.
|
|
385
484
|
*
|
|
386
|
-
*
|
|
387
|
-
*
|
|
388
|
-
*
|
|
389
|
-
*
|
|
390
|
-
*
|
|
391
|
-
*
|
|
485
|
+
* 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
|
|
486
|
+
* (alignMe), each via row-LCS over rowKeys.
|
|
487
|
+
* 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
|
|
488
|
+
* Both kept → recurse cell diff (with structural-change cell handling
|
|
489
|
+
* falling back to me-attribution Replace per the documented
|
|
490
|
+
* limitation). One kept, other deleted → emit author-attributed full
|
|
491
|
+
* row. Both deleted → silent.
|
|
492
|
+
* 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
|
|
493
|
+
* Check for content agreement at the same boundary; agreed
|
|
494
|
+
* insertions emit plain.
|
|
392
495
|
*/
|
|
393
496
|
function diffTableStructural(
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
497
|
+
genesis: string,
|
|
498
|
+
cpLatest: string,
|
|
499
|
+
meCurrent: string,
|
|
500
|
+
tG: TableRange,
|
|
501
|
+
tC: TableRange,
|
|
502
|
+
tM: TableRange,
|
|
400
503
|
cellDiff: ThreeWayDiffCellFn
|
|
401
504
|
): string {
|
|
402
|
-
const
|
|
403
|
-
const
|
|
404
|
-
const
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
//
|
|
410
|
-
//
|
|
411
|
-
//
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
505
|
+
const gKeys = tG.rows.map(r => rowKey(genesis, r))
|
|
506
|
+
const cKeys = tC.rows.map(r => rowKey(cpLatest, r))
|
|
507
|
+
const mKeys = tM.rows.map(r => rowKey(meCurrent, r))
|
|
508
|
+
|
|
509
|
+
// Exact LCS first, then fuzzy-pair remaining unmatched runs. Without
|
|
510
|
+
// the fuzzy pass, a row where CP edited just a single cell's text
|
|
511
|
+
// produces no key match — the row aligner emits the genesis row as
|
|
512
|
+
// CP-deleted AND CP's reshaped row as inserted, when a cell-level
|
|
513
|
+
// diff against the paired row would render the edit far more
|
|
514
|
+
// legibly. The 2-way path (`TableDiff.pairSimilarUnmatchedRows`)
|
|
515
|
+
// has done this since inception; bringing the three-way path in
|
|
516
|
+
// step removes the asymmetry where the cp-only / all-changes view
|
|
517
|
+
// looks markedly worse than plain 2-way for ordinary cell edits.
|
|
518
|
+
const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC)
|
|
519
|
+
const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM)
|
|
520
|
+
|
|
521
|
+
// genesisIdx → matching cpIdx (-1 if cp deleted this row)
|
|
522
|
+
const gToCp = new Array<number>(tG.rows.length).fill(-1)
|
|
523
|
+
for (const a of alignCp) {
|
|
524
|
+
if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx
|
|
525
|
+
}
|
|
526
|
+
const gToMe = new Array<number>(tG.rows.length).fill(-1)
|
|
527
|
+
for (const a of alignMe) {
|
|
528
|
+
if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Off-spine row collections: cp rows with no genesis counterpart, me rows with no genesis counterpart.
|
|
532
|
+
// Keyed by "the genesis row index they should appear before" so emission interleaves correctly.
|
|
533
|
+
const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length)
|
|
534
|
+
const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length)
|
|
419
535
|
|
|
420
|
-
const v2Fate = new Array<{ kind: 'preserved'; v3Idx: number } | { kind: 'me-deleted' }>(t2.rows.length)
|
|
421
|
-
for (let i = 0; i < v2Fate.length; i++) v2Fate[i] = { kind: 'me-deleted' }
|
|
422
|
-
for (const a of align2) {
|
|
423
|
-
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
424
|
-
v2Fate[a.oldIdx] = { kind: 'preserved', v3Idx: a.newIdx }
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
// Off-spine surfaces.
|
|
429
|
-
// CP-deleted V1 rows: in align1 with newIdx == null. They land at the
|
|
430
|
-
// V2 boundary that follows them. The boundary index is the next
|
|
431
|
-
// preserved V2 row, or v2.rows.length if no following preserved row.
|
|
432
|
-
const cpDelRowsAt = collectCpDelRowsAtBoundary(align1, t2.rows.length)
|
|
433
|
-
// Me-inserted V3 rows: in align2 with oldIdx == null. They land at the
|
|
434
|
-
// V2 boundary they sit before — i.e. the next preserved V2 row.
|
|
435
|
-
const meInsRowsAt = collectMeInsRowsAtBoundary(align2, t2.rows.length)
|
|
436
|
-
|
|
437
|
-
// Emit. We reconstruct the table from scratch since rows may be added
|
|
438
|
-
// or deleted from V2's order; preserve the V2 header (everything up
|
|
439
|
-
// to the first <tr>) and the V2 footer (after the last </tr>).
|
|
440
536
|
const out: string[] = []
|
|
441
|
-
out.push(tableHeaderSlice(
|
|
442
|
-
|
|
443
|
-
const
|
|
444
|
-
const
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
537
|
+
out.push(tableHeaderSlice(genesis, tG))
|
|
538
|
+
|
|
539
|
+
const emitBoundaryInsertions = (b: number) => {
|
|
540
|
+
const cIdxs = cpInsAt.get(b) ?? []
|
|
541
|
+
const mIdxs = meInsAt.get(b) ?? []
|
|
542
|
+
if (cIdxs.length === 0 && mIdxs.length === 0) return
|
|
543
|
+
// Detect settled insertions (cp and me both inserted the same row content).
|
|
544
|
+
// Pair by content key, in order of appearance.
|
|
545
|
+
const remainingMe = new Set(mIdxs)
|
|
546
|
+
for (const cIdx of cIdxs) {
|
|
547
|
+
const cText = cKeys[cIdx]
|
|
548
|
+
let agreedMeIdx: number | undefined
|
|
549
|
+
for (const mIdx of remainingMe) {
|
|
550
|
+
if (mKeys[mIdx] === cText) {
|
|
551
|
+
agreedMeIdx = mIdx
|
|
552
|
+
break
|
|
553
|
+
}
|
|
448
554
|
}
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
555
|
+
if (agreedMeIdx !== undefined) {
|
|
556
|
+
remainingMe.delete(agreedMeIdx)
|
|
557
|
+
// Settled insertion — emit cp's row verbatim, unmarked.
|
|
558
|
+
out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd))
|
|
559
|
+
} else {
|
|
560
|
+
out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'ins', 'cp'))
|
|
454
561
|
}
|
|
455
562
|
}
|
|
563
|
+
for (const mIdx of remainingMe) {
|
|
564
|
+
out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'ins', 'me'))
|
|
565
|
+
}
|
|
456
566
|
}
|
|
457
567
|
|
|
458
|
-
for (let
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
const
|
|
462
|
-
const
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
): string {
|
|
481
|
-
if (origin.kind === 'cp-inserted' && fate.kind === 'me-deleted') {
|
|
482
|
-
// CP added the row, Me removed it: reject. Show as Me-deletion of
|
|
483
|
-
// CP's insertion via the rejects markup.
|
|
484
|
-
return emitFullRowAttributed(v2, v2Row, 'del', 'me', 'cp')
|
|
485
|
-
}
|
|
486
|
-
if (origin.kind === 'cp-inserted') {
|
|
487
|
-
// CP added the row, Me kept it. Attribute as CP-inserted but emit
|
|
488
|
-
// V2's content (which equals V3's content since Me kept it).
|
|
489
|
-
return emitFullRowAttributed(v2, v2Row, 'ins', 'cp')
|
|
490
|
-
}
|
|
491
|
-
if (fate.kind === 'me-deleted') {
|
|
492
|
-
// Me removed an original V1 row. Emit as Me-deletion of V2's content.
|
|
493
|
-
return emitFullRowAttributed(v2, v2Row, 'del', 'me')
|
|
494
|
-
}
|
|
495
|
-
// Preserved on both sides — recurse into cells. The discriminated-union
|
|
496
|
-
// narrowing makes the indices safe to access directly.
|
|
497
|
-
const v1Row = t1.rows[origin.v1Idx]
|
|
498
|
-
const v3Row = t3.rows[fate.v3Idx]
|
|
499
|
-
if (v1Row.cells.length === v2Row.cells.length && v2Row.cells.length === v3Row.cells.length) {
|
|
500
|
-
// Same cell counts → positional cell diff via cellDiff.
|
|
501
|
-
return diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff)
|
|
568
|
+
for (let g = 0; g < tG.rows.length; g++) {
|
|
569
|
+
emitBoundaryInsertions(g)
|
|
570
|
+
|
|
571
|
+
const cIdx = gToCp[g]
|
|
572
|
+
const mIdx = gToMe[g]
|
|
573
|
+
const cpDel = cIdx === -1
|
|
574
|
+
const meDel = mIdx === -1
|
|
575
|
+
|
|
576
|
+
if (!cpDel && !meDel) {
|
|
577
|
+
// Both kept — recurse cell-level diff against this row triple.
|
|
578
|
+
out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff))
|
|
579
|
+
} else if (cpDel && meDel) {
|
|
580
|
+
// Both deleted — silent (settled).
|
|
581
|
+
} else if (cpDel) {
|
|
582
|
+
// CP dropped, Me kept → emit Me's row attributed as cp-deletion. The
|
|
583
|
+
// content shown is what Me has; the styling tells the reader CP
|
|
584
|
+
// wanted it gone.
|
|
585
|
+
out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'del', 'cp'))
|
|
586
|
+
} else {
|
|
587
|
+
// Me dropped, CP kept → emit CP's row attributed as me-deletion.
|
|
588
|
+
out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'del', 'me'))
|
|
589
|
+
}
|
|
502
590
|
}
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
// removed, V3 row inserted). This is lossy for CP's contribution
|
|
506
|
-
// within the row but functional. Real-world legal docs rarely change
|
|
507
|
-
// column count mid-row; this is a known limitation.
|
|
508
|
-
const out: string[] = []
|
|
509
|
-
out.push(emitFullRowAttributed(v2, v2Row, 'del', 'me'))
|
|
510
|
-
out.push(emitFullRowAttributed(v3, v3Row, 'ins', 'me'))
|
|
591
|
+
emitBoundaryInsertions(tG.rows.length)
|
|
592
|
+
out.push(tableFooterSlice(genesis, tG))
|
|
511
593
|
return out.join('')
|
|
512
594
|
}
|
|
513
595
|
|
|
514
|
-
function
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
596
|
+
function emitPreservedRow(
|
|
597
|
+
genesis: string,
|
|
598
|
+
cpLatest: string,
|
|
599
|
+
meCurrent: string,
|
|
600
|
+
rG: RowRange,
|
|
601
|
+
rC: RowRange,
|
|
602
|
+
rM: RowRange,
|
|
521
603
|
cellDiff: ThreeWayDiffCellFn
|
|
522
604
|
): string {
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
605
|
+
if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
|
|
606
|
+
// Same cell counts — positional cell diff.
|
|
607
|
+
const out: string[] = []
|
|
608
|
+
let cursor = rG.rowStart
|
|
609
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
610
|
+
const cG = rG.cells[c]
|
|
611
|
+
const cC = rC.cells[c]
|
|
612
|
+
const cM = rM.cells[c]
|
|
613
|
+
out.push(genesis.slice(cursor, cG.contentStart))
|
|
614
|
+
out.push(
|
|
615
|
+
cellDiff(
|
|
616
|
+
genesis.slice(cG.contentStart, cG.contentEnd),
|
|
617
|
+
cpLatest.slice(cC.contentStart, cC.contentEnd),
|
|
618
|
+
meCurrent.slice(cM.contentStart, cM.contentEnd)
|
|
619
|
+
)
|
|
537
620
|
)
|
|
538
|
-
|
|
539
|
-
cursor = c2.contentEnd
|
|
540
|
-
}
|
|
541
|
-
out.push(v2.slice(cursor, v2Row.rowEnd))
|
|
542
|
-
return out.join('')
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
function collectCpDelRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
|
|
546
|
-
// For each unpaired V1 row (oldIdx set, newIdx null), determine its
|
|
547
|
-
// V2 boundary index: the position just before the next preserved V2
|
|
548
|
-
// row, or v2RowCount if there's no following preserved row.
|
|
549
|
-
const out = new Map<number, number[]>()
|
|
550
|
-
let nextV2Boundary = v2RowCount
|
|
551
|
-
// Walk the alignment in reverse so we can compute nextV2Boundary
|
|
552
|
-
// running backwards, then assign each unpaired V1 row to the boundary
|
|
553
|
-
// currently in scope.
|
|
554
|
-
const pending: number[] = []
|
|
555
|
-
for (let i = align.length - 1; i >= 0; i--) {
|
|
556
|
-
const a = align[i]
|
|
557
|
-
if (a.newIdx !== null) {
|
|
558
|
-
// Flush pending unpaired V1 rows to this V2 boundary.
|
|
559
|
-
if (pending.length > 0) {
|
|
560
|
-
const existing = out.get(nextV2Boundary) ?? []
|
|
561
|
-
// pending was filled backwards — reverse so document order is preserved.
|
|
562
|
-
existing.unshift(...pending.toReversed())
|
|
563
|
-
out.set(nextV2Boundary, existing)
|
|
564
|
-
pending.length = 0
|
|
565
|
-
}
|
|
566
|
-
nextV2Boundary = a.newIdx
|
|
567
|
-
} else if (a.oldIdx !== null) {
|
|
568
|
-
// Unpaired V1 row — CP deleted it.
|
|
569
|
-
pending.push(a.oldIdx)
|
|
621
|
+
cursor = cG.contentEnd
|
|
570
622
|
}
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
623
|
+
out.push(genesis.slice(cursor, rG.rowEnd))
|
|
624
|
+
return out.join('')
|
|
625
|
+
}
|
|
626
|
+
// Cell-count mismatch within a preserved row — cell-level structural
|
|
627
|
+
// alignment is non-trivial (which Me cell maps to which CP cell when
|
|
628
|
+
// the counts diverge?). The previous fallback emitted only
|
|
629
|
+
// genesis-as-del + me-as-ins, which silently destroyed CP's row
|
|
630
|
+
// content whenever CP changed the cell count — a content-loss bug
|
|
631
|
+
// (a row where CP added a column would disappear from the rendered
|
|
632
|
+
// diff entirely). Emit each side's row as a distinct attributed
|
|
633
|
+
// block so neither party's restructure can vanish:
|
|
634
|
+
// - if both restructured (different shapes on both sides) the
|
|
635
|
+
// genesis row is settled-deleted (silent) and we emit cp + me
|
|
636
|
+
// rows side by side, each attributed to its author;
|
|
637
|
+
// - if only one restructured, the genesis row is del-attributed to
|
|
638
|
+
// the restructuring author so the reader sees what was there
|
|
639
|
+
// before, then the new shape ins-attributed to the same author.
|
|
640
|
+
//
|
|
641
|
+
// Content edits inside a side that DID keep the genesis cell count
|
|
642
|
+
// are not surfaced here (no positional path is available across
|
|
643
|
+
// mismatched shapes); the underlying data is still present in the
|
|
644
|
+
// source document but the visual diff doesn't decompose it. That is
|
|
645
|
+
// a degradation of detail, not content loss — symmetric for cp/me.
|
|
646
|
+
const cpRestructured = rC.cells.length !== rG.cells.length
|
|
647
|
+
const meRestructured = rM.cells.length !== rG.cells.length
|
|
648
|
+
const blocks: string[] = []
|
|
649
|
+
if (cpRestructured && meRestructured) {
|
|
650
|
+
// Both sides restructured; genesis shape retained by neither.
|
|
651
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, 'ins', 'cp'))
|
|
652
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, 'ins', 'me'))
|
|
653
|
+
} else if (cpRestructured) {
|
|
654
|
+
blocks.push(emitFullRowAttributed(genesis, rG, 'del', 'cp'))
|
|
655
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, 'ins', 'cp'))
|
|
656
|
+
} else {
|
|
657
|
+
blocks.push(emitFullRowAttributed(genesis, rG, 'del', 'me'))
|
|
658
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, 'ins', 'me'))
|
|
659
|
+
}
|
|
660
|
+
return blocks.join('')
|
|
578
661
|
}
|
|
579
662
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
663
|
+
/**
|
|
664
|
+
* Returns map "genesis-row-boundary → list of new-side row indices
|
|
665
|
+
* inserted at that boundary". Mirrors the word-level boundary collection
|
|
666
|
+
* but at the row scale.
|
|
667
|
+
*/
|
|
668
|
+
function collectInsertedRowsAtBoundary(
|
|
669
|
+
align: ReturnType<typeof lcsAlign>,
|
|
670
|
+
genesisRowCount: number
|
|
671
|
+
): Map<number, number[]> {
|
|
584
672
|
const out = new Map<number, number[]>()
|
|
585
|
-
let
|
|
673
|
+
let nextGenesisBoundary = genesisRowCount
|
|
586
674
|
const pending: number[] = []
|
|
675
|
+
// Walk in reverse so nextGenesisBoundary tracks the next preserved row
|
|
676
|
+
// we'll encounter; flush pending unpaired new rows at the appropriate
|
|
677
|
+
// genesis boundary.
|
|
587
678
|
for (let i = align.length - 1; i >= 0; i--) {
|
|
588
679
|
const a = align[i]
|
|
589
680
|
if (a.oldIdx !== null) {
|
|
590
681
|
if (pending.length > 0) {
|
|
591
|
-
const existing = out.get(
|
|
682
|
+
const existing = out.get(nextGenesisBoundary) ?? []
|
|
592
683
|
existing.unshift(...pending.toReversed())
|
|
593
|
-
out.set(
|
|
684
|
+
out.set(nextGenesisBoundary, existing)
|
|
594
685
|
pending.length = 0
|
|
595
686
|
}
|
|
596
|
-
|
|
687
|
+
nextGenesisBoundary = a.oldIdx
|
|
597
688
|
} else if (a.newIdx !== null) {
|
|
598
689
|
pending.push(a.newIdx)
|
|
599
690
|
}
|
|
600
691
|
}
|
|
601
692
|
if (pending.length > 0) {
|
|
602
|
-
const existing = out.get(
|
|
603
|
-
existing.unshift(...pending.
|
|
604
|
-
out.set(
|
|
693
|
+
const existing = out.get(nextGenesisBoundary) ?? []
|
|
694
|
+
existing.unshift(...pending.toReversed())
|
|
695
|
+
out.set(nextGenesisBoundary, existing)
|
|
605
696
|
}
|
|
606
697
|
return out
|
|
607
698
|
}
|
|
608
699
|
|
|
609
700
|
function tableHeaderSlice(html: string, table: TableRange): string {
|
|
610
|
-
// Slice from <table> to start of first <tr>. If table is empty, take
|
|
611
|
-
// everything up to </table>.
|
|
612
701
|
const firstRow = table.rows[0]
|
|
613
702
|
if (!firstRow) return html.slice(table.tableStart, table.tableEnd - '</table>'.length)
|
|
614
703
|
return html.slice(table.tableStart, firstRow.rowStart)
|
|
615
704
|
}
|
|
616
705
|
|
|
617
706
|
function tableFooterSlice(html: string, table: TableRange): string {
|
|
618
|
-
// Slice from end of last <tr> to </table>.
|
|
619
707
|
const lastRow = table.rows[table.rows.length - 1]
|
|
620
708
|
if (!lastRow) return '</table>'
|
|
621
709
|
return html.slice(lastRow.rowEnd, table.tableEnd)
|
|
622
710
|
}
|
|
623
711
|
|
|
624
712
|
/**
|
|
625
|
-
* Emit a row
|
|
626
|
-
*
|
|
627
|
-
*
|
|
628
|
-
*
|
|
629
|
-
* author classes/attrs.
|
|
713
|
+
* Emit a row fully attributed to one author. Wraps `<tr>` and each
|
|
714
|
+
* `<td>` with the author's diffins/diffdel class and `data-author`
|
|
715
|
+
* attribute; wraps cell content with an inner `<ins>`/`<del>` matching
|
|
716
|
+
* the word-level emission shape.
|
|
630
717
|
*/
|
|
631
|
-
function emitFullRowAttributed(
|
|
632
|
-
html: string,
|
|
633
|
-
row: RowRange,
|
|
634
|
-
kind: 'ins' | 'del',
|
|
635
|
-
author: Author,
|
|
636
|
-
rejectsAuthor?: Author
|
|
637
|
-
): string {
|
|
718
|
+
function emitFullRowAttributed(html: string, row: RowRange, kind: 'ins' | 'del', author: Author): string {
|
|
638
719
|
const trOpening = parseOpeningTagAt(html, row.rowStart)
|
|
639
|
-
if (!trOpening) return html.slice(
|
|
640
|
-
const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author
|
|
720
|
+
if (!trOpening) return html.slice(row.rowStart, row.rowEnd)
|
|
721
|
+
const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)
|
|
641
722
|
|
|
642
723
|
const out: string[] = [trWithAttrs]
|
|
643
724
|
let cursor = trOpening.end
|
|
644
725
|
for (const cell of row.cells) {
|
|
645
726
|
out.push(html.slice(cursor, cell.cellStart))
|
|
646
|
-
out.push(emitFullCellAttributed(html, cell, kind, author
|
|
727
|
+
out.push(emitFullCellAttributed(html, cell, kind, author))
|
|
647
728
|
cursor = cell.cellEnd
|
|
648
729
|
}
|
|
649
730
|
out.push(html.slice(cursor, row.rowEnd))
|
|
650
731
|
return out.join('')
|
|
651
732
|
}
|
|
652
733
|
|
|
653
|
-
function emitFullCellAttributed(
|
|
654
|
-
html: string,
|
|
655
|
-
cell: CellRange,
|
|
656
|
-
kind: 'ins' | 'del',
|
|
657
|
-
author: Author,
|
|
658
|
-
rejectsAuthor?: Author
|
|
659
|
-
): string {
|
|
734
|
+
function emitFullCellAttributed(html: string, cell: CellRange, kind: 'ins' | 'del', author: Author): string {
|
|
660
735
|
const tdOpening = parseOpeningTagAt(html, cell.cellStart)
|
|
661
736
|
if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
|
|
662
|
-
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author
|
|
663
|
-
// Wrap the content in an ins/del with the author classes — same
|
|
664
|
-
// shape as the word-level emission. Empty cells get the class on the
|
|
665
|
-
// <td> but no inner wrapper.
|
|
737
|
+
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author)
|
|
666
738
|
const innerContent = html.slice(cell.contentStart, cell.contentEnd)
|
|
667
739
|
const innerWrapped =
|
|
668
740
|
innerContent.trim().length === 0
|
|
669
741
|
? innerContent
|
|
670
|
-
: Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author
|
|
742
|
+
: Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author))
|
|
671
743
|
const closing = html.slice(cell.contentEnd, cell.cellEnd)
|
|
672
744
|
return tdWithAttrs + innerWrapped + closing
|
|
673
745
|
}
|
|
674
746
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
* an `<tr>` or `<td>` already in the source HTML). Uses the same
|
|
678
|
-
* attribution shape as `authorAttribution` + `Utils.wrapText` so the
|
|
679
|
-
* inject-into-existing and wrap-around-text paths agree.
|
|
680
|
-
*/
|
|
681
|
-
function injectAuthorAttribution(
|
|
682
|
-
openingTag: string,
|
|
683
|
-
kind: 'ins' | 'del',
|
|
684
|
-
author: Author,
|
|
685
|
-
rejectsAuthor?: Author
|
|
686
|
-
): string {
|
|
687
|
-
const meta = authorAttribution(author, rejectsAuthor)
|
|
747
|
+
function injectAuthorAttribution(openingTag: string, kind: 'ins' | 'del', author: Author): string {
|
|
748
|
+
const meta = authorAttribution(author)
|
|
688
749
|
const tagWithClass = injectClass(openingTag, `diff${kind} ${meta.extraClasses}`)
|
|
689
750
|
return injectDataAttrs(tagWithClass, meta.dataAttrs ?? {})
|
|
690
751
|
}
|
|
@@ -693,9 +754,6 @@ function injectDataAttrs(openingTag: string, dataAttrs: Readonly<Record<string,
|
|
|
693
754
|
const keys = Object.keys(dataAttrs)
|
|
694
755
|
if (keys.length === 0) return openingTag
|
|
695
756
|
const attrs = keys.map(k => ` data-${k}='${dataAttrs[k]}'`).join('')
|
|
696
|
-
// Insert the data-* attributes just before the closing '>' of the
|
|
697
|
-
// opening tag. `<tr>` and `<td>` are never self-closing in real HTML,
|
|
698
|
-
// but handle `/>` defensively for symmetry with other HTML emitters.
|
|
699
757
|
if (openingTag.endsWith('/>')) return `${openingTag.slice(0, -2)}${attrs}/>`
|
|
700
758
|
return `${openingTag.slice(0, -1)}${attrs}>`
|
|
701
759
|
}
|