@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -19
- package/dist/HtmlDiff.cjs +418 -420
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +30 -1
- package/dist/HtmlDiff.d.mts +30 -1
- package/dist/HtmlDiff.mjs +418 -420
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +106 -50
- package/src/ThreeWayDiff.ts +173 -127
- package/src/ThreeWayTable.ts +408 -484
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +117 -108
- package/test/HtmlDiff.threeWay.tables.spec.ts +88 -194
package/src/ThreeWayTable.ts
CHANGED
|
@@ -16,675 +16,602 @@ import { type Author, authorAttribution } from './ThreeWayDiff'
|
|
|
16
16
|
import Utils from './Utils'
|
|
17
17
|
|
|
18
18
|
/**
|
|
19
|
-
* Three-way table preprocessing
|
|
20
|
-
* `preprocessTables` but takes V1/V2/V3 and a cell-level three-way diff
|
|
21
|
-
* callback. All three inputs share a single placeholder nonce so V2's
|
|
22
|
-
* tokenisation is identical when the word-level 3-way merger sees it
|
|
23
|
-
* from both pair-wise analyses.
|
|
19
|
+
* Three-way table preprocessing for the genesis-spine merge.
|
|
24
20
|
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
*
|
|
21
|
+
* Inputs: `genesis` (common ancestor), `cpLatest` (counterparty's
|
|
22
|
+
* accumulated position), `meCurrent` (Me's accumulated position). All
|
|
23
|
+
* three share a single placeholder nonce so genesis tokenises
|
|
24
|
+
* identically across both pair-wise word-level analyses.
|
|
25
|
+
*
|
|
26
|
+
* Three paths:
|
|
27
|
+
* 1. **Positional** — all three have the same table count AND each
|
|
28
|
+
* positional triple's tableKey is similar enough that 1:1 pairing
|
|
29
|
+
* by position is sound. Recurses cellDiff per cell, structural
|
|
30
|
+
* layout from genesis.
|
|
31
|
+
* 2. **Row-structural** — paired triples whose row/cell counts differ.
|
|
32
|
+
* Per-table row-level LCS against genesis; recurse on preserved
|
|
33
|
+
* rows, emit author-attributed full rows for the rest.
|
|
34
|
+
* 3. **Multi-table by content** — table counts diverge across inputs.
|
|
35
|
+
* Pair tables to genesis via content-LCS, then assign placeholders
|
|
36
|
+
* such that each placeholder appears in exactly the inputs that
|
|
37
|
+
* contain the underlying table. The word-level merger walks the
|
|
38
|
+
* genesis spine and attributes unpaired tables naturally
|
|
39
|
+
* (cp-only/me-only/both-agree).
|
|
31
40
|
*/
|
|
32
41
|
|
|
33
42
|
export interface ThreeWayPreprocessResult {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
43
|
+
modifiedGenesis: string
|
|
44
|
+
modifiedCp: string
|
|
45
|
+
modifiedMe: string
|
|
37
46
|
placeholderToDiff: Map<string, string>
|
|
38
47
|
}
|
|
39
48
|
|
|
40
|
-
export type ThreeWayDiffCellFn = (
|
|
49
|
+
export type ThreeWayDiffCellFn = (genesisCell: string, cpCell: string, meCell: string) => string
|
|
41
50
|
|
|
42
51
|
export function preprocessTablesThreeWay(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
52
|
+
genesis: string,
|
|
53
|
+
cpLatest: string,
|
|
54
|
+
meCurrent: string,
|
|
46
55
|
cellDiff: ThreeWayDiffCellFn
|
|
47
56
|
): ThreeWayPreprocessResult | null {
|
|
48
|
-
const
|
|
49
|
-
const
|
|
50
|
-
const
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
for (const t of
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
57
|
+
const gTables = findTopLevelTables(genesis)
|
|
58
|
+
const cTables = findTopLevelTables(cpLatest)
|
|
59
|
+
const mTables = findTopLevelTables(meCurrent)
|
|
60
|
+
|
|
61
|
+
if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null
|
|
62
|
+
|
|
63
|
+
for (const t of gTables) if (exceedsSizeLimit(t)) return null
|
|
64
|
+
for (const t of cTables) if (exceedsSizeLimit(t)) return null
|
|
65
|
+
for (const t of mTables) if (exceedsSizeLimit(t)) return null
|
|
66
|
+
|
|
67
|
+
const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent)
|
|
68
|
+
|
|
69
|
+
if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) {
|
|
70
|
+
return preprocessAlignedByPosition(
|
|
71
|
+
genesis,
|
|
72
|
+
cpLatest,
|
|
73
|
+
meCurrent,
|
|
74
|
+
gTables,
|
|
75
|
+
cTables,
|
|
76
|
+
mTables,
|
|
77
|
+
cellDiff,
|
|
78
|
+
placeholderPrefix
|
|
79
|
+
)
|
|
70
80
|
}
|
|
71
81
|
|
|
72
|
-
|
|
73
|
-
// removed/moved a table, etc. Use content-LCS to pair tables across
|
|
74
|
-
// each adjacent pair, then assign placeholders so the word-level 3-way
|
|
75
|
-
// merger naturally attributes unpaired tables — the placeholder token
|
|
76
|
-
// appears only in the inputs where the table exists, and the merger
|
|
77
|
-
// sees that as an insertion/deletion.
|
|
78
|
-
return preprocessMisalignedByContent(v1, v2, v3, t1s, t2s, t3s, cellDiff, placeholderPrefix)
|
|
82
|
+
return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix)
|
|
79
83
|
}
|
|
80
84
|
|
|
81
85
|
function preprocessAlignedByPosition(
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
genesis: string,
|
|
87
|
+
cpLatest: string,
|
|
88
|
+
meCurrent: string,
|
|
89
|
+
gTables: TableRange[],
|
|
90
|
+
cTables: TableRange[],
|
|
91
|
+
mTables: TableRange[],
|
|
88
92
|
cellDiff: ThreeWayDiffCellFn,
|
|
89
93
|
placeholderPrefix: string
|
|
90
94
|
): ThreeWayPreprocessResult {
|
|
91
|
-
const pairs: Array<{
|
|
92
|
-
|
|
93
|
-
t2: TableRange
|
|
94
|
-
t3: TableRange
|
|
95
|
-
diffed: string
|
|
96
|
-
}> = []
|
|
97
|
-
for (let i = 0; i < t1s.length; i++) {
|
|
95
|
+
const pairs: Array<{ g: TableRange; c: TableRange; m: TableRange; diffed: string }> = []
|
|
96
|
+
for (let i = 0; i < gTables.length; i++) {
|
|
98
97
|
pairs.push({
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
diffed: diffTableThreeWay(
|
|
98
|
+
g: gTables[i],
|
|
99
|
+
c: cTables[i],
|
|
100
|
+
m: mTables[i],
|
|
101
|
+
diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff),
|
|
103
102
|
})
|
|
104
103
|
}
|
|
105
|
-
let
|
|
106
|
-
let
|
|
107
|
-
let
|
|
104
|
+
let modifiedGenesis = genesis
|
|
105
|
+
let modifiedCp = cpLatest
|
|
106
|
+
let modifiedMe = meCurrent
|
|
108
107
|
const placeholderToDiff = new Map<string, string>()
|
|
109
|
-
// Splice end → start so earlier offsets stay valid.
|
|
110
108
|
for (let i = pairs.length - 1; i >= 0; i--) {
|
|
111
109
|
const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`
|
|
112
110
|
placeholderToDiff.set(placeholder, pairs[i].diffed)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
111
|
+
modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder)
|
|
112
|
+
modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder)
|
|
113
|
+
modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder)
|
|
116
114
|
}
|
|
117
|
-
return {
|
|
115
|
+
return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
|
|
118
116
|
}
|
|
119
117
|
|
|
120
118
|
/**
|
|
121
|
-
* Multi-table
|
|
122
|
-
*
|
|
123
|
-
* each
|
|
124
|
-
* table
|
|
125
|
-
* - paired-everywhere placeholders → equal in both diffs → unwrapped
|
|
126
|
-
* - V2-only (CP-inserted + Me-rejected) → inserted by CP, deleted by
|
|
127
|
-
* Me → reject wrapper around the table
|
|
128
|
-
* - V2+V3 (CP-inserted, Me-kept) → ins-cp wrapper
|
|
129
|
-
* - V1+V2 (Me-deleted) → del-me wrapper
|
|
130
|
-
* - V1-only (CP-deleted before V2) → del-cp wrapper
|
|
131
|
-
* - V3-only (Me-inserted) → ins-me wrapper
|
|
119
|
+
* Multi-table handler. Tables are paired against `genesis` (the spine)
|
|
120
|
+
* via content-LCS on each of cp and me. Placeholders are assigned so
|
|
121
|
+
* each appears only in the inputs that actually contain the underlying
|
|
122
|
+
* table. The word-level merger then attributes them naturally:
|
|
132
123
|
*
|
|
133
|
-
*
|
|
134
|
-
*
|
|
135
|
-
*
|
|
124
|
+
* - paired in genesis+cp+me → equal in both diffs → emit recursive 3-way diff
|
|
125
|
+
* - in cp+me, not in genesis → both-agree insertion → emit plain
|
|
126
|
+
* - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
|
|
127
|
+
* - in me only → me insertion → ins-me wrapper
|
|
128
|
+
* - in genesis+cp, not me → me deletion → del-me wrapper
|
|
129
|
+
* - in genesis+me, not cp → cp deletion → del-cp wrapper
|
|
130
|
+
* - in genesis only → both deleted, settled → silent (placeholder content empty)
|
|
136
131
|
*/
|
|
137
|
-
function
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
132
|
+
function preprocessByContent(
|
|
133
|
+
genesis: string,
|
|
134
|
+
cpLatest: string,
|
|
135
|
+
meCurrent: string,
|
|
136
|
+
gTables: TableRange[],
|
|
137
|
+
cTables: TableRange[],
|
|
138
|
+
mTables: TableRange[],
|
|
144
139
|
cellDiff: ThreeWayDiffCellFn,
|
|
145
140
|
placeholderPrefix: string
|
|
146
141
|
): ThreeWayPreprocessResult {
|
|
147
|
-
const
|
|
148
|
-
const
|
|
149
|
-
const
|
|
142
|
+
const gKeys = gTables.map(t => tableKey(genesis, t))
|
|
143
|
+
const cKeys = cTables.map(t => tableKey(cpLatest, t))
|
|
144
|
+
const mKeys = mTables.map(t => tableKey(meCurrent, t))
|
|
150
145
|
|
|
151
|
-
const
|
|
152
|
-
const
|
|
146
|
+
const alignCp = lcsAlign(gKeys, cKeys)
|
|
147
|
+
const alignMe = lcsAlign(gKeys, mKeys)
|
|
153
148
|
|
|
154
|
-
// Maps
|
|
155
|
-
const
|
|
156
|
-
const
|
|
157
|
-
for (const a of
|
|
149
|
+
// Maps: genesisIdx → matching cpIdx (-1 if none); cpIdx → matching genesisIdx; etc.
|
|
150
|
+
const gToCp = new Array<number>(gTables.length).fill(-1)
|
|
151
|
+
const cpToG = new Array<number>(cTables.length).fill(-1)
|
|
152
|
+
for (const a of alignCp) {
|
|
158
153
|
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
159
|
-
|
|
160
|
-
|
|
154
|
+
gToCp[a.oldIdx] = a.newIdx
|
|
155
|
+
cpToG[a.newIdx] = a.oldIdx
|
|
161
156
|
}
|
|
162
157
|
}
|
|
163
|
-
const
|
|
164
|
-
const
|
|
165
|
-
for (const a of
|
|
158
|
+
const gToMe = new Array<number>(gTables.length).fill(-1)
|
|
159
|
+
const meToG = new Array<number>(mTables.length).fill(-1)
|
|
160
|
+
for (const a of alignMe) {
|
|
166
161
|
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
167
|
-
|
|
168
|
-
|
|
162
|
+
gToMe[a.oldIdx] = a.newIdx
|
|
163
|
+
meToG[a.newIdx] = a.oldIdx
|
|
169
164
|
}
|
|
170
165
|
}
|
|
171
166
|
|
|
172
|
-
// Allocate placeholders. Each logical-table-position (paired triple,
|
|
173
|
-
// paired pair, or singleton) gets one shared placeholder used in
|
|
174
|
-
// every input that contains it.
|
|
175
167
|
let nextId = 0
|
|
176
168
|
const placeholderToDiff = new Map<string, string>()
|
|
177
169
|
const placeholders = {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
170
|
+
g: new Array<string | null>(gTables.length).fill(null),
|
|
171
|
+
c: new Array<string | null>(cTables.length).fill(null),
|
|
172
|
+
m: new Array<string | null>(mTables.length).fill(null),
|
|
181
173
|
}
|
|
182
|
-
|
|
183
174
|
const allocate = (): string => `${placeholderPrefix}${nextId++}${PLACEHOLDER_SUFFIX}`
|
|
184
175
|
|
|
185
|
-
//
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
// For unpaired placeholders the word-level merger can't wrap a tag
|
|
198
|
-
// token (insertTag emits tags verbatim), so we bake the author
|
|
199
|
-
// attribution directly into the placeholder content. The merger then
|
|
200
|
-
// only has to position the placeholder via word-level alignment;
|
|
201
|
-
// the attribution wrapping is already in the substituted HTML.
|
|
202
|
-
const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string, rejects?: Author): string =>
|
|
203
|
-
Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author, rejects))
|
|
204
|
-
|
|
205
|
-
// 2. V2 tables paired only with V3 (CP-inserted into V2, Me-kept).
|
|
206
|
-
for (let v2Idx = 0; v2Idx < t2s.length; v2Idx++) {
|
|
207
|
-
if (placeholders.v2[v2Idx] !== null) continue
|
|
208
|
-
const v3Idx = v2ToV3[v2Idx]
|
|
209
|
-
if (v3Idx === -1) continue
|
|
176
|
+
// For unpaired-in-one-side placeholders, bake author attribution
|
|
177
|
+
// into the placeholder content — the word-level merger emits tag
|
|
178
|
+
// tokens (HTML comments) verbatim, so it can't wrap them itself.
|
|
179
|
+
const wrapWhole = (tag: 'ins' | 'del', author: Author, tableHtml: string): string =>
|
|
180
|
+
Utils.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author))
|
|
181
|
+
|
|
182
|
+
// 1. Triples paired in all three (genesis + cp + me) → recursive 3-way diff.
|
|
183
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
184
|
+
const cIdx = gToCp[gIdx]
|
|
185
|
+
const mIdx = gToMe[gIdx]
|
|
186
|
+
if (cIdx === -1 || mIdx === -1) continue
|
|
210
187
|
const placeholder = allocate()
|
|
211
|
-
placeholderToDiff.set(
|
|
212
|
-
|
|
213
|
-
|
|
188
|
+
placeholderToDiff.set(
|
|
189
|
+
placeholder,
|
|
190
|
+
diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff)
|
|
191
|
+
)
|
|
192
|
+
placeholders.g[gIdx] = placeholder
|
|
193
|
+
placeholders.c[cIdx] = placeholder
|
|
194
|
+
placeholders.m[mIdx] = placeholder
|
|
214
195
|
}
|
|
215
196
|
|
|
216
|
-
//
|
|
217
|
-
for (let
|
|
218
|
-
if (placeholders.
|
|
219
|
-
const
|
|
220
|
-
if (
|
|
197
|
+
// 2. Genesis + CP only (not in Me) → me deletion.
|
|
198
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
199
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
200
|
+
const cIdx = gToCp[gIdx]
|
|
201
|
+
if (cIdx === -1) continue
|
|
221
202
|
const placeholder = allocate()
|
|
222
|
-
placeholderToDiff.set(
|
|
223
|
-
|
|
224
|
-
|
|
203
|
+
placeholderToDiff.set(
|
|
204
|
+
placeholder,
|
|
205
|
+
wrapWhole('del', 'me', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
|
|
206
|
+
)
|
|
207
|
+
placeholders.g[gIdx] = placeholder
|
|
208
|
+
placeholders.c[cIdx] = placeholder
|
|
225
209
|
}
|
|
226
210
|
|
|
227
|
-
//
|
|
228
|
-
for (let
|
|
229
|
-
if (placeholders.
|
|
211
|
+
// 3. Genesis + Me only (not in CP) → cp deletion.
|
|
212
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
213
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
214
|
+
const mIdx = gToMe[gIdx]
|
|
215
|
+
if (mIdx === -1) continue
|
|
230
216
|
const placeholder = allocate()
|
|
231
217
|
placeholderToDiff.set(
|
|
232
218
|
placeholder,
|
|
233
|
-
wrapWhole('del', '
|
|
219
|
+
wrapWhole('del', 'cp', genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd))
|
|
234
220
|
)
|
|
235
|
-
placeholders.
|
|
221
|
+
placeholders.g[gIdx] = placeholder
|
|
222
|
+
placeholders.m[mIdx] = placeholder
|
|
236
223
|
}
|
|
237
224
|
|
|
238
|
-
//
|
|
239
|
-
|
|
240
|
-
|
|
225
|
+
// 4. Genesis only (not in CP, not in Me) → both deleted, settled, silent.
|
|
226
|
+
// Placeholder ONLY in genesis; cp and me lack it. The word-level merger
|
|
227
|
+
// sees it as "deleted by both" via the genesis-spine fate maps and
|
|
228
|
+
// silences it via the settled-deletion rule (empty placeholder content).
|
|
229
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
230
|
+
if (placeholders.g[gIdx] !== null) continue
|
|
231
|
+
const placeholder = allocate()
|
|
232
|
+
placeholderToDiff.set(placeholder, '')
|
|
233
|
+
placeholders.g[gIdx] = placeholder
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// 5. CP + Me both inserted (no genesis) — agreement check. If their
|
|
237
|
+
// table content is textually identical, emit plain (settled). Otherwise
|
|
238
|
+
// each side gets its own placeholder (cp-only / me-only treatment).
|
|
239
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
240
|
+
if (placeholders.c[cIdx] !== null) continue
|
|
241
|
+
// CP table not paired to genesis. Is there an unpaired Me table with
|
|
242
|
+
// matching content?
|
|
243
|
+
const cText = cKeys[cIdx]
|
|
244
|
+
let mIdx = -1
|
|
245
|
+
for (let candidate = 0; candidate < mTables.length; candidate++) {
|
|
246
|
+
if (placeholders.m[candidate] !== null) continue
|
|
247
|
+
if (meToG[candidate] !== -1) continue
|
|
248
|
+
if (mKeys[candidate] === cText) {
|
|
249
|
+
mIdx = candidate
|
|
250
|
+
break
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
if (mIdx === -1) continue
|
|
254
|
+
// Both inserted the same table content → settled insertion.
|
|
241
255
|
const placeholder = allocate()
|
|
242
|
-
placeholderToDiff.set(placeholder,
|
|
243
|
-
placeholders.
|
|
256
|
+
placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
|
|
257
|
+
placeholders.c[cIdx] = placeholder
|
|
258
|
+
placeholders.m[mIdx] = placeholder
|
|
244
259
|
}
|
|
245
260
|
|
|
246
|
-
// 6.
|
|
247
|
-
for (let
|
|
248
|
-
if (placeholders.
|
|
261
|
+
// 6. Remaining CP-only tables (inserted by CP, Me didn't take).
|
|
262
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
263
|
+
if (placeholders.c[cIdx] !== null) continue
|
|
249
264
|
const placeholder = allocate()
|
|
250
|
-
placeholderToDiff.set(
|
|
251
|
-
|
|
265
|
+
placeholderToDiff.set(
|
|
266
|
+
placeholder,
|
|
267
|
+
wrapWhole('ins', 'cp', cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd))
|
|
268
|
+
)
|
|
269
|
+
placeholders.c[cIdx] = placeholder
|
|
252
270
|
}
|
|
253
271
|
|
|
254
|
-
//
|
|
255
|
-
let
|
|
256
|
-
|
|
257
|
-
const
|
|
272
|
+
// 7. Remaining Me-only tables (Me inserted, CP didn't).
|
|
273
|
+
for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
|
|
274
|
+
if (placeholders.m[mIdx] !== null) continue
|
|
275
|
+
const placeholder = allocate()
|
|
276
|
+
placeholderToDiff.set(
|
|
277
|
+
placeholder,
|
|
278
|
+
wrapWhole('ins', 'me', meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd))
|
|
279
|
+
)
|
|
280
|
+
placeholders.m[mIdx] = placeholder
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Splice end → start per input.
|
|
284
|
+
let modifiedGenesis = genesis
|
|
285
|
+
for (let i = gTables.length - 1; i >= 0; i--) {
|
|
286
|
+
const p = placeholders.g[i]
|
|
258
287
|
if (p === null) continue
|
|
259
|
-
|
|
288
|
+
modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p)
|
|
260
289
|
}
|
|
261
|
-
let
|
|
262
|
-
for (let i =
|
|
263
|
-
const p = placeholders.
|
|
290
|
+
let modifiedCp = cpLatest
|
|
291
|
+
for (let i = cTables.length - 1; i >= 0; i--) {
|
|
292
|
+
const p = placeholders.c[i]
|
|
264
293
|
if (p === null) continue
|
|
265
|
-
|
|
294
|
+
modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p)
|
|
266
295
|
}
|
|
267
|
-
let
|
|
268
|
-
for (let i =
|
|
269
|
-
const p = placeholders.
|
|
296
|
+
let modifiedMe = meCurrent
|
|
297
|
+
for (let i = mTables.length - 1; i >= 0; i--) {
|
|
298
|
+
const p = placeholders.m[i]
|
|
270
299
|
if (p === null) continue
|
|
271
|
-
|
|
300
|
+
modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p)
|
|
272
301
|
}
|
|
273
302
|
|
|
274
|
-
return {
|
|
303
|
+
return { modifiedGenesis, modifiedCp, modifiedMe, placeholderToDiff }
|
|
275
304
|
}
|
|
276
305
|
|
|
277
|
-
/**
|
|
278
|
-
* Threshold at which positional pairing is considered sound. Below this
|
|
279
|
-
* similarity, two positionally-aligned tables are probably different
|
|
280
|
-
* tables (e.g. CP swapped them around) and content-LCS pairing should
|
|
281
|
-
* be used instead. 0.5 is a deliberately loose bar — paired-but-content-
|
|
282
|
-
* edited tables (the common case) sit well above it; genuinely different
|
|
283
|
-
* tables sit well below.
|
|
284
|
-
*/
|
|
285
306
|
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = 0.5
|
|
286
307
|
|
|
287
|
-
/**
|
|
288
|
-
* Returns true when V1/V2/V3 tables can be 1:1 paired by position. The
|
|
289
|
-
* three lists must have equal length AND each positional triple must
|
|
290
|
-
* have content similar enough that positional pairing reflects the
|
|
291
|
-
* authors' likely intent. The slow content-LCS path handles cases that
|
|
292
|
-
* fail this gate (table reordering, additions, deletions).
|
|
293
|
-
*/
|
|
294
308
|
function positionallyAligned(
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
309
|
+
genesis: string,
|
|
310
|
+
cpLatest: string,
|
|
311
|
+
meCurrent: string,
|
|
312
|
+
gTables: TableRange[],
|
|
313
|
+
cTables: TableRange[],
|
|
314
|
+
mTables: TableRange[]
|
|
301
315
|
): boolean {
|
|
302
|
-
if (
|
|
303
|
-
for (let i = 0; i <
|
|
304
|
-
const
|
|
305
|
-
const
|
|
306
|
-
const
|
|
307
|
-
if (textSimilarity(
|
|
308
|
-
if (textSimilarity(
|
|
316
|
+
if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false
|
|
317
|
+
for (let i = 0; i < gTables.length; i++) {
|
|
318
|
+
const kG = tableKey(genesis, gTables[i])
|
|
319
|
+
const kC = tableKey(cpLatest, cTables[i])
|
|
320
|
+
const kM = tableKey(meCurrent, mTables[i])
|
|
321
|
+
if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
|
|
322
|
+
if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false
|
|
309
323
|
}
|
|
310
324
|
return true
|
|
311
325
|
}
|
|
312
326
|
|
|
313
327
|
function tableKey(html: string, table: TableRange): string {
|
|
314
|
-
// Whitespace-normalised full table HTML — tables with byte-identical
|
|
315
|
-
// content (modulo whitespace) pair; any structural or content
|
|
316
|
-
// difference falls through to unpaired (table-level ins/del).
|
|
317
328
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, ' ').trim()
|
|
318
329
|
}
|
|
319
330
|
|
|
331
|
+
// ────────────────────────────────────────────────────────────────────────────
|
|
332
|
+
// Per-table diff: positional cells or row-level structural change.
|
|
333
|
+
|
|
320
334
|
function diffTableThreeWay(
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
335
|
+
genesis: string,
|
|
336
|
+
cpLatest: string,
|
|
337
|
+
meCurrent: string,
|
|
338
|
+
tG: TableRange,
|
|
339
|
+
tC: TableRange,
|
|
340
|
+
tM: TableRange,
|
|
327
341
|
cellDiff: ThreeWayDiffCellFn
|
|
328
342
|
): string {
|
|
329
|
-
if (sameDimensions(
|
|
330
|
-
return diffTablePositional(
|
|
343
|
+
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) {
|
|
344
|
+
return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
331
345
|
}
|
|
332
|
-
return diffTableStructural(
|
|
346
|
+
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
333
347
|
}
|
|
334
348
|
|
|
335
349
|
function diffTablePositional(
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
350
|
+
genesis: string,
|
|
351
|
+
cpLatest: string,
|
|
352
|
+
meCurrent: string,
|
|
353
|
+
tG: TableRange,
|
|
354
|
+
tC: TableRange,
|
|
355
|
+
tM: TableRange,
|
|
342
356
|
cellDiff: ThreeWayDiffCellFn
|
|
343
357
|
): string {
|
|
344
|
-
// Walk
|
|
345
|
-
//
|
|
346
|
-
//
|
|
358
|
+
// Walk genesis's table scaffolding verbatim — it's the common
|
|
359
|
+
// ancestor. Cells are merged 3-way via cellDiff. Choosing genesis as
|
|
360
|
+
// the spine keeps the table structure stable across both pair-wise
|
|
361
|
+
// diffs that the word-level merger will see.
|
|
347
362
|
const out: string[] = []
|
|
348
|
-
let cursor =
|
|
349
|
-
for (let r = 0; r <
|
|
350
|
-
const
|
|
351
|
-
const
|
|
352
|
-
const
|
|
353
|
-
for (let c = 0; c <
|
|
354
|
-
const
|
|
355
|
-
const
|
|
356
|
-
const
|
|
357
|
-
out.push(
|
|
363
|
+
let cursor = tG.tableStart
|
|
364
|
+
for (let r = 0; r < tG.rows.length; r++) {
|
|
365
|
+
const rG = tG.rows[r]
|
|
366
|
+
const rC = tC.rows[r]
|
|
367
|
+
const rM = tM.rows[r]
|
|
368
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
369
|
+
const cG = rG.cells[c]
|
|
370
|
+
const cC = rC.cells[c]
|
|
371
|
+
const cM = rM.cells[c]
|
|
372
|
+
out.push(genesis.slice(cursor, cG.contentStart))
|
|
358
373
|
out.push(
|
|
359
374
|
cellDiff(
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
375
|
+
genesis.slice(cG.contentStart, cG.contentEnd),
|
|
376
|
+
cpLatest.slice(cC.contentStart, cC.contentEnd),
|
|
377
|
+
meCurrent.slice(cM.contentStart, cM.contentEnd)
|
|
363
378
|
)
|
|
364
379
|
)
|
|
365
|
-
cursor =
|
|
380
|
+
cursor = cG.contentEnd
|
|
366
381
|
}
|
|
367
382
|
}
|
|
368
|
-
out.push(
|
|
383
|
+
out.push(genesis.slice(cursor, tG.tableEnd))
|
|
369
384
|
return out.join('')
|
|
370
385
|
}
|
|
371
386
|
|
|
372
387
|
/**
|
|
373
|
-
*
|
|
374
|
-
*
|
|
375
|
-
* 1. Run row-LCS for each pair (V1↔V2, V2↔V3) over rowKeys
|
|
376
|
-
* 2. Build per-V2-row origin (from align1) and fate (from align2)
|
|
377
|
-
* 3. Walk V2's row order, interleaving:
|
|
378
|
-
* - CP-deleted V1 rows (in align1 but not preserved into V2)
|
|
379
|
-
* - Me-inserted V3 rows (in align2 but not from V2)
|
|
380
|
-
* 4. For each V2 row, combine origin+fate to decide:
|
|
381
|
-
* - equal: recurse cellDiff if cell counts match, else fall back
|
|
382
|
-
* - ins-cp: emit V2 row as fully-CP-inserted
|
|
383
|
-
* - del-me: emit V2 row as fully-Me-deleted
|
|
384
|
-
* - reject: emit V2 row as Me-rejects-CP
|
|
388
|
+
* Row-level genesis-spine merge for tables with diverging row/cell
|
|
389
|
+
* counts.
|
|
385
390
|
*
|
|
386
|
-
*
|
|
387
|
-
*
|
|
388
|
-
*
|
|
389
|
-
*
|
|
390
|
-
*
|
|
391
|
-
*
|
|
391
|
+
* 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
|
|
392
|
+
* (alignMe), each via row-LCS over rowKeys.
|
|
393
|
+
* 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
|
|
394
|
+
* Both kept → recurse cell diff (with structural-change cell handling
|
|
395
|
+
* falling back to me-attribution Replace per the documented
|
|
396
|
+
* limitation). One kept, other deleted → emit author-attributed full
|
|
397
|
+
* row. Both deleted → silent.
|
|
398
|
+
* 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
|
|
399
|
+
* Check for content agreement at the same boundary; agreed
|
|
400
|
+
* insertions emit plain.
|
|
392
401
|
*/
|
|
393
402
|
function diffTableStructural(
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
403
|
+
genesis: string,
|
|
404
|
+
cpLatest: string,
|
|
405
|
+
meCurrent: string,
|
|
406
|
+
tG: TableRange,
|
|
407
|
+
tC: TableRange,
|
|
408
|
+
tM: TableRange,
|
|
400
409
|
cellDiff: ThreeWayDiffCellFn
|
|
401
410
|
): string {
|
|
402
|
-
const
|
|
403
|
-
const
|
|
404
|
-
const
|
|
405
|
-
|
|
406
|
-
const align1 = lcsAlign(v1Keys, v2Keys)
|
|
407
|
-
const align2 = lcsAlign(v2Keys, v3Keys)
|
|
408
|
-
|
|
409
|
-
// Per-V2-row attribution lookups.
|
|
410
|
-
// Origin: 'preserved' (with V1 row index) or 'cp-inserted'.
|
|
411
|
-
// Fate: 'preserved' (with V3 row index) or 'me-deleted'.
|
|
412
|
-
const v2Origin = new Array<{ kind: 'preserved'; v1Idx: number } | { kind: 'cp-inserted' }>(t2.rows.length)
|
|
413
|
-
for (let i = 0; i < v2Origin.length; i++) v2Origin[i] = { kind: 'cp-inserted' }
|
|
414
|
-
for (const a of align1) {
|
|
415
|
-
if (a.newIdx !== null && a.oldIdx !== null) {
|
|
416
|
-
v2Origin[a.newIdx] = { kind: 'preserved', v1Idx: a.oldIdx }
|
|
417
|
-
}
|
|
418
|
-
}
|
|
411
|
+
const gKeys = tG.rows.map(r => rowKey(genesis, r))
|
|
412
|
+
const cKeys = tC.rows.map(r => rowKey(cpLatest, r))
|
|
413
|
+
const mKeys = tM.rows.map(r => rowKey(meCurrent, r))
|
|
419
414
|
|
|
420
|
-
const
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
415
|
+
const alignCp = lcsAlign(gKeys, cKeys)
|
|
416
|
+
const alignMe = lcsAlign(gKeys, mKeys)
|
|
417
|
+
|
|
418
|
+
// genesisIdx → matching cpIdx (-1 if cp deleted this row)
|
|
419
|
+
const gToCp = new Array<number>(tG.rows.length).fill(-1)
|
|
420
|
+
for (const a of alignCp) {
|
|
421
|
+
if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx
|
|
422
|
+
}
|
|
423
|
+
const gToMe = new Array<number>(tG.rows.length).fill(-1)
|
|
424
|
+
for (const a of alignMe) {
|
|
425
|
+
if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx
|
|
426
426
|
}
|
|
427
427
|
|
|
428
|
-
// Off-spine
|
|
429
|
-
//
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
const cpDelRowsAt = collectCpDelRowsAtBoundary(align1, t2.rows.length)
|
|
433
|
-
// Me-inserted V3 rows: in align2 with oldIdx == null. They land at the
|
|
434
|
-
// V2 boundary they sit before — i.e. the next preserved V2 row.
|
|
435
|
-
const meInsRowsAt = collectMeInsRowsAtBoundary(align2, t2.rows.length)
|
|
436
|
-
|
|
437
|
-
// Emit. We reconstruct the table from scratch since rows may be added
|
|
438
|
-
// or deleted from V2's order; preserve the V2 header (everything up
|
|
439
|
-
// to the first <tr>) and the V2 footer (after the last </tr>).
|
|
440
|
-
const out: string[] = []
|
|
441
|
-
out.push(tableHeaderSlice(v2, t2))
|
|
428
|
+
// Off-spine row collections: cp rows with no genesis counterpart, me rows with no genesis counterpart.
|
|
429
|
+
// Keyed by "the genesis row index they should appear before" so emission interleaves correctly.
|
|
430
|
+
const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length)
|
|
431
|
+
const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length)
|
|
442
432
|
|
|
443
|
-
const
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
433
|
+
const out: string[] = []
|
|
434
|
+
out.push(tableHeaderSlice(genesis, tG))
|
|
435
|
+
|
|
436
|
+
const emitBoundaryInsertions = (b: number) => {
|
|
437
|
+
const cIdxs = cpInsAt.get(b) ?? []
|
|
438
|
+
const mIdxs = meInsAt.get(b) ?? []
|
|
439
|
+
if (cIdxs.length === 0 && mIdxs.length === 0) return
|
|
440
|
+
// Detect settled insertions (cp and me both inserted the same row content).
|
|
441
|
+
// Pair by content key, in order of appearance.
|
|
442
|
+
const remainingMe = new Set(mIdxs)
|
|
443
|
+
for (const cIdx of cIdxs) {
|
|
444
|
+
const cText = cKeys[cIdx]
|
|
445
|
+
let agreedMeIdx: number | undefined
|
|
446
|
+
for (const mIdx of remainingMe) {
|
|
447
|
+
if (mKeys[mIdx] === cText) {
|
|
448
|
+
agreedMeIdx = mIdx
|
|
449
|
+
break
|
|
450
|
+
}
|
|
448
451
|
}
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
452
|
+
if (agreedMeIdx !== undefined) {
|
|
453
|
+
remainingMe.delete(agreedMeIdx)
|
|
454
|
+
// Settled insertion — emit cp's row verbatim, unmarked.
|
|
455
|
+
out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd))
|
|
456
|
+
} else {
|
|
457
|
+
out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'ins', 'cp'))
|
|
454
458
|
}
|
|
455
459
|
}
|
|
460
|
+
for (const mIdx of remainingMe) {
|
|
461
|
+
out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'ins', 'me'))
|
|
462
|
+
}
|
|
456
463
|
}
|
|
457
464
|
|
|
458
|
-
for (let
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
const
|
|
462
|
-
const
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
): string {
|
|
481
|
-
if (origin.kind === 'cp-inserted' && fate.kind === 'me-deleted') {
|
|
482
|
-
// CP added the row, Me removed it: reject. Show as Me-deletion of
|
|
483
|
-
// CP's insertion via the rejects markup.
|
|
484
|
-
return emitFullRowAttributed(v2, v2Row, 'del', 'me', 'cp')
|
|
485
|
-
}
|
|
486
|
-
if (origin.kind === 'cp-inserted') {
|
|
487
|
-
// CP added the row, Me kept it. Attribute as CP-inserted but emit
|
|
488
|
-
// V2's content (which equals V3's content since Me kept it).
|
|
489
|
-
return emitFullRowAttributed(v2, v2Row, 'ins', 'cp')
|
|
490
|
-
}
|
|
491
|
-
if (fate.kind === 'me-deleted') {
|
|
492
|
-
// Me removed an original V1 row. Emit as Me-deletion of V2's content.
|
|
493
|
-
return emitFullRowAttributed(v2, v2Row, 'del', 'me')
|
|
494
|
-
}
|
|
495
|
-
// Preserved on both sides — recurse into cells. The discriminated-union
|
|
496
|
-
// narrowing makes the indices safe to access directly.
|
|
497
|
-
const v1Row = t1.rows[origin.v1Idx]
|
|
498
|
-
const v3Row = t3.rows[fate.v3Idx]
|
|
499
|
-
if (v1Row.cells.length === v2Row.cells.length && v2Row.cells.length === v3Row.cells.length) {
|
|
500
|
-
// Same cell counts → positional cell diff via cellDiff.
|
|
501
|
-
return diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff)
|
|
465
|
+
for (let g = 0; g < tG.rows.length; g++) {
|
|
466
|
+
emitBoundaryInsertions(g)
|
|
467
|
+
|
|
468
|
+
const cIdx = gToCp[g]
|
|
469
|
+
const mIdx = gToMe[g]
|
|
470
|
+
const cpDel = cIdx === -1
|
|
471
|
+
const meDel = mIdx === -1
|
|
472
|
+
|
|
473
|
+
if (!cpDel && !meDel) {
|
|
474
|
+
// Both kept — recurse cell-level diff against this row triple.
|
|
475
|
+
out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff))
|
|
476
|
+
} else if (cpDel && meDel) {
|
|
477
|
+
// Both deleted — silent (settled).
|
|
478
|
+
} else if (cpDel) {
|
|
479
|
+
// CP dropped, Me kept → emit Me's row attributed as cp-deletion. The
|
|
480
|
+
// content shown is what Me has; the styling tells the reader CP
|
|
481
|
+
// wanted it gone.
|
|
482
|
+
out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], 'del', 'cp'))
|
|
483
|
+
} else {
|
|
484
|
+
// Me dropped, CP kept → emit CP's row attributed as me-deletion.
|
|
485
|
+
out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], 'del', 'me'))
|
|
486
|
+
}
|
|
502
487
|
}
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
// removed, V3 row inserted). This is lossy for CP's contribution
|
|
506
|
-
// within the row but functional. Real-world legal docs rarely change
|
|
507
|
-
// column count mid-row; this is a known limitation.
|
|
508
|
-
const out: string[] = []
|
|
509
|
-
out.push(emitFullRowAttributed(v2, v2Row, 'del', 'me'))
|
|
510
|
-
out.push(emitFullRowAttributed(v3, v3Row, 'ins', 'me'))
|
|
488
|
+
emitBoundaryInsertions(tG.rows.length)
|
|
489
|
+
out.push(tableFooterSlice(genesis, tG))
|
|
511
490
|
return out.join('')
|
|
512
491
|
}
|
|
513
492
|
|
|
514
|
-
function
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
493
|
+
function emitPreservedRow(
|
|
494
|
+
genesis: string,
|
|
495
|
+
cpLatest: string,
|
|
496
|
+
meCurrent: string,
|
|
497
|
+
rG: RowRange,
|
|
498
|
+
rC: RowRange,
|
|
499
|
+
rM: RowRange,
|
|
521
500
|
cellDiff: ThreeWayDiffCellFn
|
|
522
501
|
): string {
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
502
|
+
if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
|
|
503
|
+
// Same cell counts — positional cell diff.
|
|
504
|
+
const out: string[] = []
|
|
505
|
+
let cursor = rG.rowStart
|
|
506
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
507
|
+
const cG = rG.cells[c]
|
|
508
|
+
const cC = rC.cells[c]
|
|
509
|
+
const cM = rM.cells[c]
|
|
510
|
+
out.push(genesis.slice(cursor, cG.contentStart))
|
|
511
|
+
out.push(
|
|
512
|
+
cellDiff(
|
|
513
|
+
genesis.slice(cG.contentStart, cG.contentEnd),
|
|
514
|
+
cpLatest.slice(cC.contentStart, cC.contentEnd),
|
|
515
|
+
meCurrent.slice(cM.contentStart, cM.contentEnd)
|
|
516
|
+
)
|
|
537
517
|
)
|
|
538
|
-
|
|
539
|
-
cursor = c2.contentEnd
|
|
540
|
-
}
|
|
541
|
-
out.push(v2.slice(cursor, v2Row.rowEnd))
|
|
542
|
-
return out.join('')
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
function collectCpDelRowsAtBoundary(align: ReturnType<typeof lcsAlign>, v2RowCount: number): Map<number, number[]> {
|
|
546
|
-
// For each unpaired V1 row (oldIdx set, newIdx null), determine its
|
|
547
|
-
// V2 boundary index: the position just before the next preserved V2
|
|
548
|
-
// row, or v2RowCount if there's no following preserved row.
|
|
549
|
-
const out = new Map<number, number[]>()
|
|
550
|
-
let nextV2Boundary = v2RowCount
|
|
551
|
-
// Walk the alignment in reverse so we can compute nextV2Boundary
|
|
552
|
-
// running backwards, then assign each unpaired V1 row to the boundary
|
|
553
|
-
// currently in scope.
|
|
554
|
-
const pending: number[] = []
|
|
555
|
-
for (let i = align.length - 1; i >= 0; i--) {
|
|
556
|
-
const a = align[i]
|
|
557
|
-
if (a.newIdx !== null) {
|
|
558
|
-
// Flush pending unpaired V1 rows to this V2 boundary.
|
|
559
|
-
if (pending.length > 0) {
|
|
560
|
-
const existing = out.get(nextV2Boundary) ?? []
|
|
561
|
-
// pending was filled backwards — reverse so document order is preserved.
|
|
562
|
-
existing.unshift(...pending.toReversed())
|
|
563
|
-
out.set(nextV2Boundary, existing)
|
|
564
|
-
pending.length = 0
|
|
565
|
-
}
|
|
566
|
-
nextV2Boundary = a.newIdx
|
|
567
|
-
} else if (a.oldIdx !== null) {
|
|
568
|
-
// Unpaired V1 row — CP deleted it.
|
|
569
|
-
pending.push(a.oldIdx)
|
|
518
|
+
cursor = cG.contentEnd
|
|
570
519
|
}
|
|
520
|
+
out.push(genesis.slice(cursor, rG.rowEnd))
|
|
521
|
+
return out.join('')
|
|
571
522
|
}
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
}
|
|
577
|
-
return out
|
|
523
|
+
// Cell-count mismatch within a preserved row — cell-level structural
|
|
524
|
+
// change deferred. Fall back to me-attributed Replace (genesis row
|
|
525
|
+
// removed, me row inserted). Lossy for CP within that row.
|
|
526
|
+
return emitFullRowAttributed(genesis, rG, 'del', 'me') + emitFullRowAttributed(meCurrent, rM, 'ins', 'me')
|
|
578
527
|
}
|
|
579
528
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
529
|
+
/**
|
|
530
|
+
* Returns map "genesis-row-boundary → list of new-side row indices
|
|
531
|
+
* inserted at that boundary". Mirrors the word-level boundary collection
|
|
532
|
+
* but at the row scale.
|
|
533
|
+
*/
|
|
534
|
+
function collectInsertedRowsAtBoundary(
|
|
535
|
+
align: ReturnType<typeof lcsAlign>,
|
|
536
|
+
genesisRowCount: number
|
|
537
|
+
): Map<number, number[]> {
|
|
584
538
|
const out = new Map<number, number[]>()
|
|
585
|
-
let
|
|
539
|
+
let nextGenesisBoundary = genesisRowCount
|
|
586
540
|
const pending: number[] = []
|
|
541
|
+
// Walk in reverse so nextGenesisBoundary tracks the next preserved row
|
|
542
|
+
// we'll encounter; flush pending unpaired new rows at the appropriate
|
|
543
|
+
// genesis boundary.
|
|
587
544
|
for (let i = align.length - 1; i >= 0; i--) {
|
|
588
545
|
const a = align[i]
|
|
589
546
|
if (a.oldIdx !== null) {
|
|
590
547
|
if (pending.length > 0) {
|
|
591
|
-
const existing = out.get(
|
|
548
|
+
const existing = out.get(nextGenesisBoundary) ?? []
|
|
592
549
|
existing.unshift(...pending.toReversed())
|
|
593
|
-
out.set(
|
|
550
|
+
out.set(nextGenesisBoundary, existing)
|
|
594
551
|
pending.length = 0
|
|
595
552
|
}
|
|
596
|
-
|
|
553
|
+
nextGenesisBoundary = a.oldIdx
|
|
597
554
|
} else if (a.newIdx !== null) {
|
|
598
555
|
pending.push(a.newIdx)
|
|
599
556
|
}
|
|
600
557
|
}
|
|
601
558
|
if (pending.length > 0) {
|
|
602
|
-
const existing = out.get(
|
|
603
|
-
existing.unshift(...pending.
|
|
604
|
-
out.set(
|
|
559
|
+
const existing = out.get(nextGenesisBoundary) ?? []
|
|
560
|
+
existing.unshift(...pending.toReversed())
|
|
561
|
+
out.set(nextGenesisBoundary, existing)
|
|
605
562
|
}
|
|
606
563
|
return out
|
|
607
564
|
}
|
|
608
565
|
|
|
609
566
|
function tableHeaderSlice(html: string, table: TableRange): string {
|
|
610
|
-
// Slice from <table> to start of first <tr>. If table is empty, take
|
|
611
|
-
// everything up to </table>.
|
|
612
567
|
const firstRow = table.rows[0]
|
|
613
568
|
if (!firstRow) return html.slice(table.tableStart, table.tableEnd - '</table>'.length)
|
|
614
569
|
return html.slice(table.tableStart, firstRow.rowStart)
|
|
615
570
|
}
|
|
616
571
|
|
|
617
572
|
function tableFooterSlice(html: string, table: TableRange): string {
|
|
618
|
-
// Slice from end of last <tr> to </table>.
|
|
619
573
|
const lastRow = table.rows[table.rows.length - 1]
|
|
620
574
|
if (!lastRow) return '</table>'
|
|
621
575
|
return html.slice(lastRow.rowEnd, table.tableEnd)
|
|
622
576
|
}
|
|
623
577
|
|
|
624
578
|
/**
|
|
625
|
-
* Emit a row
|
|
626
|
-
*
|
|
627
|
-
*
|
|
628
|
-
*
|
|
629
|
-
* author classes/attrs.
|
|
579
|
+
* Emit a row fully attributed to one author. Wraps `<tr>` and each
|
|
580
|
+
* `<td>` with the author's diffins/diffdel class and `data-author`
|
|
581
|
+
* attribute; wraps cell content with an inner `<ins>`/`<del>` matching
|
|
582
|
+
* the word-level emission shape.
|
|
630
583
|
*/
|
|
631
|
-
function emitFullRowAttributed(
|
|
632
|
-
html: string,
|
|
633
|
-
row: RowRange,
|
|
634
|
-
kind: 'ins' | 'del',
|
|
635
|
-
author: Author,
|
|
636
|
-
rejectsAuthor?: Author
|
|
637
|
-
): string {
|
|
584
|
+
function emitFullRowAttributed(html: string, row: RowRange, kind: 'ins' | 'del', author: Author): string {
|
|
638
585
|
const trOpening = parseOpeningTagAt(html, row.rowStart)
|
|
639
|
-
if (!trOpening) return html.slice(
|
|
640
|
-
const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author
|
|
586
|
+
if (!trOpening) return html.slice(row.rowStart, row.rowEnd)
|
|
587
|
+
const trWithAttrs = injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)
|
|
641
588
|
|
|
642
589
|
const out: string[] = [trWithAttrs]
|
|
643
590
|
let cursor = trOpening.end
|
|
644
591
|
for (const cell of row.cells) {
|
|
645
592
|
out.push(html.slice(cursor, cell.cellStart))
|
|
646
|
-
out.push(emitFullCellAttributed(html, cell, kind, author
|
|
593
|
+
out.push(emitFullCellAttributed(html, cell, kind, author))
|
|
647
594
|
cursor = cell.cellEnd
|
|
648
595
|
}
|
|
649
596
|
out.push(html.slice(cursor, row.rowEnd))
|
|
650
597
|
return out.join('')
|
|
651
598
|
}
|
|
652
599
|
|
|
653
|
-
function emitFullCellAttributed(
|
|
654
|
-
html: string,
|
|
655
|
-
cell: CellRange,
|
|
656
|
-
kind: 'ins' | 'del',
|
|
657
|
-
author: Author,
|
|
658
|
-
rejectsAuthor?: Author
|
|
659
|
-
): string {
|
|
600
|
+
function emitFullCellAttributed(html: string, cell: CellRange, kind: 'ins' | 'del', author: Author): string {
|
|
660
601
|
const tdOpening = parseOpeningTagAt(html, cell.cellStart)
|
|
661
602
|
if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
|
|
662
|
-
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author
|
|
663
|
-
// Wrap the content in an ins/del with the author classes — same
|
|
664
|
-
// shape as the word-level emission. Empty cells get the class on the
|
|
665
|
-
// <td> but no inner wrapper.
|
|
603
|
+
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author)
|
|
666
604
|
const innerContent = html.slice(cell.contentStart, cell.contentEnd)
|
|
667
605
|
const innerWrapped =
|
|
668
606
|
innerContent.trim().length === 0
|
|
669
607
|
? innerContent
|
|
670
|
-
: Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author
|
|
608
|
+
: Utils.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author))
|
|
671
609
|
const closing = html.slice(cell.contentEnd, cell.cellEnd)
|
|
672
610
|
return tdWithAttrs + innerWrapped + closing
|
|
673
611
|
}
|
|
674
612
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
* an `<tr>` or `<td>` already in the source HTML). Uses the same
|
|
678
|
-
* attribution shape as `authorAttribution` + `Utils.wrapText` so the
|
|
679
|
-
* inject-into-existing and wrap-around-text paths agree.
|
|
680
|
-
*/
|
|
681
|
-
function injectAuthorAttribution(
|
|
682
|
-
openingTag: string,
|
|
683
|
-
kind: 'ins' | 'del',
|
|
684
|
-
author: Author,
|
|
685
|
-
rejectsAuthor?: Author
|
|
686
|
-
): string {
|
|
687
|
-
const meta = authorAttribution(author, rejectsAuthor)
|
|
613
|
+
function injectAuthorAttribution(openingTag: string, kind: 'ins' | 'del', author: Author): string {
|
|
614
|
+
const meta = authorAttribution(author)
|
|
688
615
|
const tagWithClass = injectClass(openingTag, `diff${kind} ${meta.extraClasses}`)
|
|
689
616
|
return injectDataAttrs(tagWithClass, meta.dataAttrs ?? {})
|
|
690
617
|
}
|
|
@@ -693,9 +620,6 @@ function injectDataAttrs(openingTag: string, dataAttrs: Readonly<Record<string,
|
|
|
693
620
|
const keys = Object.keys(dataAttrs)
|
|
694
621
|
if (keys.length === 0) return openingTag
|
|
695
622
|
const attrs = keys.map(k => ` data-${k}='${dataAttrs[k]}'`).join('')
|
|
696
|
-
// Insert the data-* attributes just before the closing '>' of the
|
|
697
|
-
// opening tag. `<tr>` and `<td>` are never self-closing in real HTML,
|
|
698
|
-
// but handle `/>` defensively for symmetry with other HTML emitters.
|
|
699
623
|
if (openingTag.endsWith('/>')) return `${openingTag.slice(0, -2)}${attrs}/>`
|
|
700
624
|
return `${openingTag.slice(0, -1)}${attrs}>`
|
|
701
625
|
}
|