@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -19
- package/dist/HtmlDiff.cjs +609 -438
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +89 -16
- package/dist/HtmlDiff.d.mts +89 -16
- package/dist/HtmlDiff.mjs +604 -438
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +218 -74
- package/src/ThreeWayDiff.ts +220 -127
- package/src/ThreeWayTable.ts +549 -491
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +316 -92
- package/test/HtmlDiff.threeWay.tables.spec.ts +200 -196
- package/test/Utils.spec.ts +3 -3
package/dist/HtmlDiff.cjs
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
Object.defineProperties(exports, {
|
|
2
|
+
__esModule: { value: true },
|
|
3
|
+
[Symbol.toStringTag]: { value: "Module" }
|
|
4
|
+
});
|
|
1
5
|
//#region src/Match.ts
|
|
2
6
|
var Match = class {
|
|
3
7
|
_startInOld;
|
|
@@ -1252,119 +1256,155 @@ function findTopLevelCells(html, start, end) {
|
|
|
1252
1256
|
}
|
|
1253
1257
|
//#endregion
|
|
1254
1258
|
//#region src/ThreeWayDiff.ts
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1259
|
+
/**
|
|
1260
|
+
* Builds the attributed segment stream for a three-way diff.
|
|
1261
|
+
*
|
|
1262
|
+
* @param dCp analysis of diff(genesis → cp-latest)
|
|
1263
|
+
* @param dMe analysis of diff(genesis → me-current)
|
|
1264
|
+
*
|
|
1265
|
+
* Both analyses must share the same `oldDiffWords` (the genesis tokens)
|
|
1266
|
+
* — the caller guarantees this by passing the same genesis input and
|
|
1267
|
+
* the same `useProjections` decision to both `HtmlDiff.analyze` calls.
|
|
1268
|
+
*/
|
|
1269
|
+
function buildSegments(dCp, dMe) {
|
|
1270
|
+
const genesisLen = dCp.oldDiffWords.length;
|
|
1271
|
+
const cpFate = buildFateFromGenesis(dCp.operations, genesisLen);
|
|
1272
|
+
const meFate = buildFateFromGenesis(dMe.operations, genesisLen);
|
|
1273
|
+
const cpInsAt = collectInsertionsKeyedByEnd(dCp);
|
|
1274
|
+
const meInsAt = collectInsertionsKeyedByEnd(dMe);
|
|
1275
|
+
const diffToOriginal = dCp.oldContentToOriginal ?? Array.from({ length: genesisLen }, (_, i) => i);
|
|
1276
|
+
const genesisOriginalLen = dCp.oldOriginalWords.length;
|
|
1263
1277
|
const segments = [];
|
|
1264
1278
|
let originalCursor = 0;
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
author: "cp"
|
|
1270
|
-
}, cpDel);
|
|
1271
|
-
const attr = combine(fromV1[i], toV3[i]);
|
|
1279
|
+
emitBoundary(0, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments);
|
|
1280
|
+
for (let i = 0; i < genesisLen; i++) {
|
|
1281
|
+
const cpDel = cpFate[i] === "deleted";
|
|
1282
|
+
const meDel = meFate[i] === "deleted";
|
|
1272
1283
|
const origIdx = diffToOriginal[i];
|
|
1273
|
-
const slice =
|
|
1284
|
+
const slice = dCp.oldOriginalWords.slice(originalCursor, origIdx + 1);
|
|
1274
1285
|
originalCursor = origIdx + 1;
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
kind: "
|
|
1286
|
+
if (!cpDel && !meDel) appendSegment(segments, { kind: "equal" }, slice);
|
|
1287
|
+
else if (cpDel && meDel) {
|
|
1288
|
+
if (slice.length > 1) appendSegment(segments, { kind: "equal" }, slice.slice(0, slice.length - 1));
|
|
1289
|
+
} else if (cpDel) appendSegment(segments, {
|
|
1290
|
+
kind: "del",
|
|
1291
|
+
author: "cp"
|
|
1292
|
+
}, slice);
|
|
1293
|
+
else appendSegment(segments, {
|
|
1294
|
+
kind: "del",
|
|
1284
1295
|
author: "me"
|
|
1285
|
-
},
|
|
1296
|
+
}, slice);
|
|
1297
|
+
emitBoundary(i + 1, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments);
|
|
1286
1298
|
}
|
|
1287
|
-
|
|
1288
|
-
if (tailCpDel?.length) appendSegment(segments, {
|
|
1289
|
-
kind: "del",
|
|
1290
|
-
author: "cp"
|
|
1291
|
-
}, tailCpDel);
|
|
1292
|
-
const tailMeIns = meInsertionsAt.get(v2DiffLen);
|
|
1293
|
-
if (tailMeIns?.length) appendSegment(segments, {
|
|
1294
|
-
kind: "ins",
|
|
1295
|
-
author: "me"
|
|
1296
|
-
}, tailMeIns);
|
|
1297
|
-
if (originalCursor < v2OriginalLen) appendSegment(segments, { kind: "equal" }, d1.newOriginalWords.slice(originalCursor));
|
|
1299
|
+
if (originalCursor < genesisOriginalLen) appendSegment(segments, { kind: "equal" }, dCp.oldOriginalWords.slice(originalCursor));
|
|
1298
1300
|
return segments;
|
|
1299
1301
|
}
|
|
1300
|
-
function buildOriginMap(ops, v2Len) {
|
|
1301
|
-
const out = new Array(v2Len).fill("preserved-from-v1");
|
|
1302
|
-
for (const op of ops) {
|
|
1303
|
-
const origin = op.action === 2 ? "inserted-by-cp" : op.action === 4 ? "replaced-into-by-cp" : null;
|
|
1304
|
-
if (origin === null) continue;
|
|
1305
|
-
for (let i = op.startInNew; i < op.endInNew; i++) if (i >= 0 && i < v2Len) out[i] = origin;
|
|
1306
|
-
}
|
|
1307
|
-
return out;
|
|
1308
|
-
}
|
|
1309
|
-
function buildFateMap(ops, v2Len) {
|
|
1310
|
-
const out = new Array(v2Len).fill("preserved-to-v3");
|
|
1311
|
-
for (const op of ops) {
|
|
1312
|
-
const fate = op.action === 1 ? "deleted-by-me" : op.action === 4 ? "replaced-out-by-me" : null;
|
|
1313
|
-
if (fate === null) continue;
|
|
1314
|
-
for (let i = op.startInOld; i < op.endInOld; i++) if (i >= 0 && i < v2Len) out[i] = fate;
|
|
1315
|
-
}
|
|
1316
|
-
return out;
|
|
1317
|
-
}
|
|
1318
|
-
function isDeletion(attr) {
|
|
1319
|
-
return attr.kind === "del" || attr.kind === "reject";
|
|
1320
|
-
}
|
|
1321
|
-
function combine(origin, fate) {
|
|
1322
|
-
const cpInserted = origin === "inserted-by-cp" || origin === "replaced-into-by-cp";
|
|
1323
|
-
const meDeleted = fate === "deleted-by-me" || fate === "replaced-out-by-me";
|
|
1324
|
-
if (!cpInserted && !meDeleted) return { kind: "equal" };
|
|
1325
|
-
if (cpInserted && !meDeleted) return {
|
|
1326
|
-
kind: "ins",
|
|
1327
|
-
author: "cp"
|
|
1328
|
-
};
|
|
1329
|
-
if (!cpInserted && meDeleted) return {
|
|
1330
|
-
kind: "del",
|
|
1331
|
-
author: "me"
|
|
1332
|
-
};
|
|
1333
|
-
return {
|
|
1334
|
-
kind: "reject",
|
|
1335
|
-
by: "me",
|
|
1336
|
-
rejected: "cp"
|
|
1337
|
-
};
|
|
1338
|
-
}
|
|
1339
1302
|
/**
|
|
1340
|
-
*
|
|
1341
|
-
*
|
|
1342
|
-
*
|
|
1303
|
+
* Per genesis-diff-index, what did this side do to that token? Both
|
|
1304
|
+
* Delete and Replace ops remove the token from the side's output, so
|
|
1305
|
+
* both contribute `'deleted'`. Equal ops contribute `'kept'`. Insert
|
|
1306
|
+
* ops have an empty old range, so they don't touch the genesis fate
|
|
1307
|
+
* map.
|
|
1343
1308
|
*/
|
|
1344
|
-
function
|
|
1345
|
-
const out =
|
|
1346
|
-
for (const op of
|
|
1309
|
+
function buildFateFromGenesis(ops, genesisLen) {
|
|
1310
|
+
const out = new Array(genesisLen).fill("kept");
|
|
1311
|
+
for (const op of ops) {
|
|
1347
1312
|
if (op.action !== 1 && op.action !== 4) continue;
|
|
1348
|
-
|
|
1349
|
-
if (words.length === 0) continue;
|
|
1350
|
-
const existing = out.get(op.startInNew) ?? [];
|
|
1351
|
-
existing.push(...words);
|
|
1352
|
-
out.set(op.startInNew, existing);
|
|
1313
|
+
for (let i = op.startInOld; i < op.endInOld; i++) if (i >= 0 && i < genesisLen) out[i] = "deleted";
|
|
1353
1314
|
}
|
|
1354
1315
|
return out;
|
|
1355
1316
|
}
|
|
1356
|
-
|
|
1317
|
+
/**
|
|
1318
|
+
* Per genesis boundary `b`, collect tokens this side inserted at that
|
|
1319
|
+
* boundary. Keyed by `endInOld` so a Replace at genesis[k..k+1] has its
|
|
1320
|
+
* insertion at boundary k+1 (after the deleted token) rather than k
|
|
1321
|
+
* (before) — that produces the del-then-ins visual order.
|
|
1322
|
+
*
|
|
1323
|
+
* For pure Insert ops the old range is empty (endInOld == startInOld),
|
|
1324
|
+
* so the key is the same as the semantic between-tokens position.
|
|
1325
|
+
*/
|
|
1326
|
+
function collectInsertionsKeyedByEnd(d) {
|
|
1357
1327
|
const out = /* @__PURE__ */ new Map();
|
|
1358
1328
|
for (const op of d.operations) {
|
|
1359
1329
|
if (op.action !== 2 && op.action !== 4) continue;
|
|
1360
1330
|
const words = d.newDiffWords.slice(op.startInNew, op.endInNew);
|
|
1361
1331
|
if (words.length === 0) continue;
|
|
1362
|
-
const
|
|
1332
|
+
const key = op.endInOld;
|
|
1333
|
+
const existing = out.get(key) ?? [];
|
|
1363
1334
|
existing.push(...words);
|
|
1364
|
-
out.set(
|
|
1335
|
+
out.set(key, existing);
|
|
1365
1336
|
}
|
|
1366
1337
|
return out;
|
|
1367
1338
|
}
|
|
1339
|
+
/**
|
|
1340
|
+
* Emit any insertions at boundary `b`.
|
|
1341
|
+
*
|
|
1342
|
+
* Reading model: a legal reviewer wants to see CP's INTENT relative
|
|
1343
|
+
* to Me's current content. Me's content is the base; CP's deltas are
|
|
1344
|
+
* what they need to act on. Under that framing:
|
|
1345
|
+
* - tokens both authors inserted at the same boundary → settled
|
|
1346
|
+
* - tokens CP inserted that Me doesn't have → ins-cp (CP wants
|
|
1347
|
+
* this added)
|
|
1348
|
+
* - tokens Me inserted that CP doesn't have → del-cp (CP wants
|
|
1349
|
+
* this removed from Me's content)
|
|
1350
|
+
*
|
|
1351
|
+
* The third case is the load-bearing attribution flip. The
|
|
1352
|
+
* genesis-spine view technically labels me-only-at-boundary tokens
|
|
1353
|
+
* as "ins-me" (Me added them; CP didn't), but that's confusing to
|
|
1354
|
+
* a reviewer: they see "Me added X" alongside "CP added Y" and have
|
|
1355
|
+
* to mentally derive "CP wants X gone, replaced with Y". Surfacing
|
|
1356
|
+
* me-only tokens as `del-cp` shows CP's intent directly:
|
|
1357
|
+
* - "CP accepted Me's text minus `things`": settled bulk + del-cp
|
|
1358
|
+
* `things` (no parallel redundant insertions)
|
|
1359
|
+
* - "CP wants `cruel` where Me wrote `brave`": ins-cp `cruel` +
|
|
1360
|
+
* del-cp `brave` (the substitution intent reads directly)
|
|
1361
|
+
* - "CP added extra words": cp-extras stay as ins-cp (same as
|
|
1362
|
+
* before; the cp-only direction was always intent-correct)
|
|
1363
|
+
*
|
|
1364
|
+
* Pure single-side insertions (Me added text CP doesn't engage
|
|
1365
|
+
* with at all, or vice versa) keep their genesis-spine attribution
|
|
1366
|
+
* — these aren't refinement cases, just Me's own content additions.
|
|
1367
|
+
*/
|
|
1368
|
+
function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments) {
|
|
1369
|
+
const cpIns = cpInsAt.get(b);
|
|
1370
|
+
const meIns = meInsAt.get(b);
|
|
1371
|
+
const hasCp = !!cpIns && cpIns.length > 0;
|
|
1372
|
+
const hasMe = !!meIns && meIns.length > 0;
|
|
1373
|
+
if (!hasCp && !hasMe) return;
|
|
1374
|
+
if (!hasCp) {
|
|
1375
|
+
appendSegment(segments, {
|
|
1376
|
+
kind: "ins",
|
|
1377
|
+
author: "me"
|
|
1378
|
+
}, meIns);
|
|
1379
|
+
return;
|
|
1380
|
+
}
|
|
1381
|
+
if (!hasMe) {
|
|
1382
|
+
appendSegment(segments, {
|
|
1383
|
+
kind: "ins",
|
|
1384
|
+
author: "cp"
|
|
1385
|
+
}, cpIns);
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
1388
|
+
if (tokenArraysEqual(cpIns, meIns)) {
|
|
1389
|
+
appendSegment(segments, { kind: "equal" }, cpIns);
|
|
1390
|
+
return;
|
|
1391
|
+
}
|
|
1392
|
+
const alignment = lcsAlign(cpIns, meIns);
|
|
1393
|
+
for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) appendSegment(segments, { kind: "equal" }, [cpIns[a.oldIdx]]);
|
|
1394
|
+
else if (a.oldIdx !== null) appendSegment(segments, {
|
|
1395
|
+
kind: "ins",
|
|
1396
|
+
author: "cp"
|
|
1397
|
+
}, [cpIns[a.oldIdx]]);
|
|
1398
|
+
else if (a.newIdx !== null) appendSegment(segments, {
|
|
1399
|
+
kind: "del",
|
|
1400
|
+
author: "cp"
|
|
1401
|
+
}, [meIns[a.newIdx]]);
|
|
1402
|
+
}
|
|
1403
|
+
function tokenArraysEqual(a, b) {
|
|
1404
|
+
if (a.length !== b.length) return false;
|
|
1405
|
+
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
|
|
1406
|
+
return true;
|
|
1407
|
+
}
|
|
1368
1408
|
function appendSegment(segments, attr, words) {
|
|
1369
1409
|
if (words.length === 0) return;
|
|
1370
1410
|
const last = segments[segments.length - 1];
|
|
@@ -1381,7 +1421,6 @@ function sameAttribution(a, b) {
|
|
|
1381
1421
|
if (a.kind === "equal" && b.kind === "equal") return true;
|
|
1382
1422
|
if (a.kind === "ins" && b.kind === "ins") return a.author === b.author;
|
|
1383
1423
|
if (a.kind === "del" && b.kind === "del") return a.author === b.author;
|
|
1384
|
-
if (a.kind === "reject" && b.kind === "reject") return true;
|
|
1385
1424
|
return false;
|
|
1386
1425
|
}
|
|
1387
1426
|
/**
|
|
@@ -1391,375 +1430,404 @@ function sameAttribution(a, b) {
|
|
|
1391
1430
|
* pre-wrap) stay consistent. A change here propagates to every author
|
|
1392
1431
|
* marker in the output.
|
|
1393
1432
|
*/
|
|
1394
|
-
function authorAttribution(author
|
|
1395
|
-
const dataAttrs = { author };
|
|
1396
|
-
if (rejects !== void 0) dataAttrs.rejects = rejects;
|
|
1433
|
+
function authorAttribution(author) {
|
|
1397
1434
|
return {
|
|
1398
|
-
extraClasses:
|
|
1399
|
-
dataAttrs
|
|
1435
|
+
extraClasses: author,
|
|
1436
|
+
dataAttrs: { author }
|
|
1400
1437
|
};
|
|
1401
1438
|
}
|
|
1402
1439
|
/**
|
|
1403
1440
|
* Resolve a segment's attribution into the wrapper-tag, base CSS class,
|
|
1404
1441
|
* and `WrapMetadata` consumed by `Utils.wrapText` / `insertTag`. The
|
|
1405
1442
|
* caller is `HtmlDiff.executeThreeWay`'s emission loop.
|
|
1443
|
+
*
|
|
1444
|
+
* `equal` segments don't go through this — they're emitted unmarked.
|
|
1406
1445
|
*/
|
|
1407
1446
|
function segmentEmissionShape(attr) {
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
};
|
|
1414
|
-
case "del": return {
|
|
1415
|
-
tag: "del",
|
|
1416
|
-
baseClass: "diffdel",
|
|
1417
|
-
metadata: authorAttribution(attr.author)
|
|
1418
|
-
};
|
|
1419
|
-
case "reject": return {
|
|
1420
|
-
tag: "del",
|
|
1421
|
-
baseClass: "diffdel",
|
|
1422
|
-
metadata: authorAttribution(attr.by, attr.rejected)
|
|
1423
|
-
};
|
|
1424
|
-
}
|
|
1447
|
+
return {
|
|
1448
|
+
tag: attr.kind,
|
|
1449
|
+
baseClass: attr.kind === "ins" ? "diffins" : "diffdel",
|
|
1450
|
+
metadata: authorAttribution(attr.author)
|
|
1451
|
+
};
|
|
1425
1452
|
}
|
|
1426
1453
|
//#endregion
|
|
1427
1454
|
//#region src/ThreeWayTable.ts
|
|
1428
|
-
function preprocessTablesThreeWay(
|
|
1429
|
-
const
|
|
1430
|
-
const
|
|
1431
|
-
const
|
|
1432
|
-
if (
|
|
1433
|
-
for (const t of
|
|
1434
|
-
for (const t of
|
|
1435
|
-
for (const t of
|
|
1436
|
-
const placeholderPrefix = makePlaceholderPrefix(
|
|
1437
|
-
if (positionallyAligned(
|
|
1438
|
-
return
|
|
1439
|
-
}
|
|
1440
|
-
function preprocessAlignedByPosition(
|
|
1455
|
+
function preprocessTablesThreeWay(genesis, cpLatest, meCurrent, cellDiff) {
|
|
1456
|
+
const gTables = findTopLevelTables(genesis);
|
|
1457
|
+
const cTables = findTopLevelTables(cpLatest);
|
|
1458
|
+
const mTables = findTopLevelTables(meCurrent);
|
|
1459
|
+
if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null;
|
|
1460
|
+
for (const t of gTables) if (exceedsSizeLimit(t)) return null;
|
|
1461
|
+
for (const t of cTables) if (exceedsSizeLimit(t)) return null;
|
|
1462
|
+
for (const t of mTables) if (exceedsSizeLimit(t)) return null;
|
|
1463
|
+
const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent);
|
|
1464
|
+
if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) return preprocessAlignedByPosition(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix);
|
|
1465
|
+
return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix);
|
|
1466
|
+
}
|
|
1467
|
+
function preprocessAlignedByPosition(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix) {
|
|
1441
1468
|
const pairs = [];
|
|
1442
|
-
for (let i = 0; i <
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
diffed: diffTableThreeWay(
|
|
1469
|
+
for (let i = 0; i < gTables.length; i++) pairs.push({
|
|
1470
|
+
g: gTables[i],
|
|
1471
|
+
c: cTables[i],
|
|
1472
|
+
m: mTables[i],
|
|
1473
|
+
diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff)
|
|
1447
1474
|
});
|
|
1448
|
-
let
|
|
1449
|
-
let
|
|
1450
|
-
let
|
|
1475
|
+
let modifiedGenesis = genesis;
|
|
1476
|
+
let modifiedCp = cpLatest;
|
|
1477
|
+
let modifiedMe = meCurrent;
|
|
1451
1478
|
const placeholderToDiff = /* @__PURE__ */ new Map();
|
|
1452
1479
|
for (let i = pairs.length - 1; i >= 0; i--) {
|
|
1453
1480
|
const placeholder = `${placeholderPrefix}${i}-->`;
|
|
1454
1481
|
placeholderToDiff.set(placeholder, pairs[i].diffed);
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1482
|
+
modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder);
|
|
1483
|
+
modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder);
|
|
1484
|
+
modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder);
|
|
1458
1485
|
}
|
|
1459
1486
|
return {
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1487
|
+
modifiedGenesis,
|
|
1488
|
+
modifiedCp,
|
|
1489
|
+
modifiedMe,
|
|
1463
1490
|
placeholderToDiff
|
|
1464
1491
|
};
|
|
1465
1492
|
}
|
|
1466
1493
|
/**
|
|
1467
|
-
* Multi-table
|
|
1468
|
-
*
|
|
1469
|
-
* each
|
|
1470
|
-
* table
|
|
1471
|
-
* - paired-everywhere placeholders → equal in both diffs → unwrapped
|
|
1472
|
-
* - V2-only (CP-inserted + Me-rejected) → inserted by CP, deleted by
|
|
1473
|
-
* Me → reject wrapper around the table
|
|
1474
|
-
* - V2+V3 (CP-inserted, Me-kept) → ins-cp wrapper
|
|
1475
|
-
* - V1+V2 (Me-deleted) → del-me wrapper
|
|
1476
|
-
* - V1-only (CP-deleted before V2) → del-cp wrapper
|
|
1477
|
-
* - V3-only (Me-inserted) → ins-me wrapper
|
|
1494
|
+
* Multi-table handler. Tables are paired against `genesis` (the spine)
|
|
1495
|
+
* via content-LCS on each of cp and me. Placeholders are assigned so
|
|
1496
|
+
* each appears only in the inputs that actually contain the underlying
|
|
1497
|
+
* table. The word-level merger then attributes them naturally:
|
|
1478
1498
|
*
|
|
1479
|
-
*
|
|
1480
|
-
*
|
|
1481
|
-
*
|
|
1499
|
+
* - paired in genesis+cp+me → equal in both diffs → emit recursive 3-way diff
|
|
1500
|
+
* - in cp+me, not in genesis → both-agree insertion → emit plain
|
|
1501
|
+
* - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
|
|
1502
|
+
* - in me only → me insertion → ins-me wrapper
|
|
1503
|
+
* - in genesis+cp, not me → me deletion → del-me wrapper
|
|
1504
|
+
* - in genesis+me, not cp → cp deletion → del-cp wrapper
|
|
1505
|
+
* - in genesis only → both deleted, settled → silent (placeholder content empty)
|
|
1482
1506
|
*/
|
|
1483
|
-
function
|
|
1484
|
-
const
|
|
1485
|
-
const
|
|
1486
|
-
const
|
|
1487
|
-
const
|
|
1488
|
-
const
|
|
1489
|
-
const
|
|
1490
|
-
const
|
|
1491
|
-
for (const a of
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
}
|
|
1495
|
-
const
|
|
1496
|
-
const
|
|
1497
|
-
for (const a of
|
|
1498
|
-
|
|
1499
|
-
|
|
1507
|
+
function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix) {
|
|
1508
|
+
const gKeys = gTables.map((t) => tableKey(genesis, t));
|
|
1509
|
+
const cKeys = cTables.map((t) => tableKey(cpLatest, t));
|
|
1510
|
+
const mKeys = mTables.map((t) => tableKey(meCurrent, t));
|
|
1511
|
+
const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
|
|
1512
|
+
const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
|
|
1513
|
+
const gToCp = new Array(gTables.length).fill(-1);
|
|
1514
|
+
const cpToG = new Array(cTables.length).fill(-1);
|
|
1515
|
+
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
|
|
1516
|
+
gToCp[a.oldIdx] = a.newIdx;
|
|
1517
|
+
cpToG[a.newIdx] = a.oldIdx;
|
|
1518
|
+
}
|
|
1519
|
+
const gToMe = new Array(gTables.length).fill(-1);
|
|
1520
|
+
const meToG = new Array(mTables.length).fill(-1);
|
|
1521
|
+
for (const a of alignMe) if (a.oldIdx !== null && a.newIdx !== null) {
|
|
1522
|
+
gToMe[a.oldIdx] = a.newIdx;
|
|
1523
|
+
meToG[a.newIdx] = a.oldIdx;
|
|
1500
1524
|
}
|
|
1501
1525
|
let nextId = 0;
|
|
1502
1526
|
const placeholderToDiff = /* @__PURE__ */ new Map();
|
|
1503
1527
|
const placeholders = {
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1528
|
+
g: new Array(gTables.length).fill(null),
|
|
1529
|
+
c: new Array(cTables.length).fill(null),
|
|
1530
|
+
m: new Array(mTables.length).fill(null)
|
|
1507
1531
|
};
|
|
1508
1532
|
const allocate = () => `${placeholderPrefix}${nextId++}-->`;
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
const
|
|
1512
|
-
|
|
1533
|
+
const wrapWhole = (tag, author, tableHtml) => Utils_default.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author));
|
|
1534
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
1535
|
+
const cIdx = gToCp[gIdx];
|
|
1536
|
+
const mIdx = gToMe[gIdx];
|
|
1537
|
+
if (cIdx === -1 || mIdx === -1) continue;
|
|
1513
1538
|
const placeholder = allocate();
|
|
1514
|
-
placeholderToDiff.set(placeholder, diffTableThreeWay(
|
|
1515
|
-
placeholders.
|
|
1516
|
-
placeholders.
|
|
1517
|
-
placeholders.
|
|
1518
|
-
}
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
if (v3Idx === -1) continue;
|
|
1539
|
+
placeholderToDiff.set(placeholder, diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff));
|
|
1540
|
+
placeholders.g[gIdx] = placeholder;
|
|
1541
|
+
placeholders.c[cIdx] = placeholder;
|
|
1542
|
+
placeholders.m[mIdx] = placeholder;
|
|
1543
|
+
}
|
|
1544
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
1545
|
+
if (placeholders.g[gIdx] !== null) continue;
|
|
1546
|
+
const cIdx = gToCp[gIdx];
|
|
1547
|
+
if (cIdx === -1) continue;
|
|
1524
1548
|
const placeholder = allocate();
|
|
1525
|
-
placeholderToDiff.set(placeholder, wrapWhole("
|
|
1526
|
-
placeholders.
|
|
1527
|
-
placeholders.
|
|
1528
|
-
}
|
|
1529
|
-
for (let
|
|
1530
|
-
if (placeholders.
|
|
1531
|
-
const
|
|
1532
|
-
if (
|
|
1549
|
+
placeholderToDiff.set(placeholder, wrapWhole("del", "me", genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd)));
|
|
1550
|
+
placeholders.g[gIdx] = placeholder;
|
|
1551
|
+
placeholders.c[cIdx] = placeholder;
|
|
1552
|
+
}
|
|
1553
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
1554
|
+
if (placeholders.g[gIdx] !== null) continue;
|
|
1555
|
+
const mIdx = gToMe[gIdx];
|
|
1556
|
+
if (mIdx === -1) continue;
|
|
1533
1557
|
const placeholder = allocate();
|
|
1534
|
-
placeholderToDiff.set(placeholder, wrapWhole("del", "
|
|
1535
|
-
placeholders.
|
|
1536
|
-
placeholders.
|
|
1558
|
+
placeholderToDiff.set(placeholder, wrapWhole("del", "cp", genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd)));
|
|
1559
|
+
placeholders.g[gIdx] = placeholder;
|
|
1560
|
+
placeholders.m[mIdx] = placeholder;
|
|
1537
1561
|
}
|
|
1538
|
-
for (let
|
|
1539
|
-
if (placeholders.
|
|
1562
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
1563
|
+
if (placeholders.g[gIdx] !== null) continue;
|
|
1564
|
+
const placeholder = allocate();
|
|
1565
|
+
placeholderToDiff.set(placeholder, "");
|
|
1566
|
+
placeholders.g[gIdx] = placeholder;
|
|
1567
|
+
}
|
|
1568
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
1569
|
+
if (placeholders.c[cIdx] !== null) continue;
|
|
1570
|
+
const cText = cKeys[cIdx];
|
|
1571
|
+
let mIdx = -1;
|
|
1572
|
+
for (let candidate = 0; candidate < mTables.length; candidate++) {
|
|
1573
|
+
if (placeholders.m[candidate] !== null) continue;
|
|
1574
|
+
if (meToG[candidate] !== -1) continue;
|
|
1575
|
+
if (mKeys[candidate] === cText) {
|
|
1576
|
+
mIdx = candidate;
|
|
1577
|
+
break;
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
if (mIdx === -1) continue;
|
|
1540
1581
|
const placeholder = allocate();
|
|
1541
|
-
placeholderToDiff.set(placeholder,
|
|
1542
|
-
placeholders.
|
|
1582
|
+
placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd));
|
|
1583
|
+
placeholders.c[cIdx] = placeholder;
|
|
1584
|
+
placeholders.m[mIdx] = placeholder;
|
|
1543
1585
|
}
|
|
1544
|
-
for (let
|
|
1545
|
-
if (placeholders.
|
|
1586
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
1587
|
+
if (placeholders.c[cIdx] !== null) continue;
|
|
1546
1588
|
const placeholder = allocate();
|
|
1547
|
-
placeholderToDiff.set(placeholder, wrapWhole("
|
|
1548
|
-
placeholders.
|
|
1589
|
+
placeholderToDiff.set(placeholder, wrapWhole("ins", "cp", cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd)));
|
|
1590
|
+
placeholders.c[cIdx] = placeholder;
|
|
1549
1591
|
}
|
|
1550
|
-
for (let
|
|
1551
|
-
if (placeholders.
|
|
1592
|
+
for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
|
|
1593
|
+
if (placeholders.m[mIdx] !== null) continue;
|
|
1552
1594
|
const placeholder = allocate();
|
|
1553
|
-
placeholderToDiff.set(placeholder, wrapWhole("ins", "me",
|
|
1554
|
-
placeholders.
|
|
1595
|
+
placeholderToDiff.set(placeholder, wrapWhole("ins", "me", meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd)));
|
|
1596
|
+
placeholders.m[mIdx] = placeholder;
|
|
1555
1597
|
}
|
|
1556
|
-
let
|
|
1557
|
-
for (let i =
|
|
1558
|
-
const p = placeholders.
|
|
1598
|
+
let modifiedGenesis = genesis;
|
|
1599
|
+
for (let i = gTables.length - 1; i >= 0; i--) {
|
|
1600
|
+
const p = placeholders.g[i];
|
|
1559
1601
|
if (p === null) continue;
|
|
1560
|
-
|
|
1602
|
+
modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p);
|
|
1561
1603
|
}
|
|
1562
|
-
let
|
|
1563
|
-
for (let i =
|
|
1564
|
-
const p = placeholders.
|
|
1604
|
+
let modifiedCp = cpLatest;
|
|
1605
|
+
for (let i = cTables.length - 1; i >= 0; i--) {
|
|
1606
|
+
const p = placeholders.c[i];
|
|
1565
1607
|
if (p === null) continue;
|
|
1566
|
-
|
|
1608
|
+
modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p);
|
|
1567
1609
|
}
|
|
1568
|
-
let
|
|
1569
|
-
for (let i =
|
|
1570
|
-
const p = placeholders.
|
|
1610
|
+
let modifiedMe = meCurrent;
|
|
1611
|
+
for (let i = mTables.length - 1; i >= 0; i--) {
|
|
1612
|
+
const p = placeholders.m[i];
|
|
1571
1613
|
if (p === null) continue;
|
|
1572
|
-
|
|
1614
|
+
modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p);
|
|
1573
1615
|
}
|
|
1574
1616
|
return {
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1617
|
+
modifiedGenesis,
|
|
1618
|
+
modifiedCp,
|
|
1619
|
+
modifiedMe,
|
|
1578
1620
|
placeholderToDiff
|
|
1579
1621
|
};
|
|
1580
1622
|
}
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
/**
|
|
1591
|
-
* Returns true when V1/V2/V3 tables can be 1:1 paired by position. The
|
|
1592
|
-
* three lists must have equal length AND each positional triple must
|
|
1593
|
-
* have content similar enough that positional pairing reflects the
|
|
1594
|
-
* authors' likely intent. The slow content-LCS path handles cases that
|
|
1595
|
-
* fail this gate (table reordering, additions, deletions).
|
|
1596
|
-
*/
|
|
1597
|
-
function positionallyAligned(v1, v2, v3, t1s, t2s, t3s) {
|
|
1598
|
-
if (t1s.length !== t2s.length || t2s.length !== t3s.length) return false;
|
|
1599
|
-
for (let i = 0; i < t1s.length; i++) {
|
|
1600
|
-
const k1 = tableKey(v1, t1s[i]);
|
|
1601
|
-
const k2 = tableKey(v2, t2s[i]);
|
|
1602
|
-
const k3 = tableKey(v3, t3s[i]);
|
|
1603
|
-
if (textSimilarity(k1, k2) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
|
|
1604
|
-
if (textSimilarity(k2, k3) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
|
|
1623
|
+
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .15;
|
|
1624
|
+
function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables) {
|
|
1625
|
+
if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false;
|
|
1626
|
+
for (let i = 0; i < gTables.length; i++) {
|
|
1627
|
+
const kG = tableKey(genesis, gTables[i]);
|
|
1628
|
+
const kC = tableKey(cpLatest, cTables[i]);
|
|
1629
|
+
const kM = tableKey(meCurrent, mTables[i]);
|
|
1630
|
+
if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
|
|
1631
|
+
if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
|
|
1605
1632
|
}
|
|
1606
1633
|
return true;
|
|
1607
1634
|
}
|
|
1608
1635
|
function tableKey(html, table) {
|
|
1609
1636
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
|
|
1610
1637
|
}
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1638
|
+
/**
|
|
1639
|
+
* Character-level similarity above which the three-way aligner treats
|
|
1640
|
+
* two rows / tables as "the same logical entry, edited" rather than
|
|
1641
|
+
* an unrelated delete + insert. Matched to TableDiff's
|
|
1642
|
+
* `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
|
|
1643
|
+
* agree on which pairings are reachable; if a row's content overlap
|
|
1644
|
+
* is enough to fool the 2-way diff into pairing, it should also be
|
|
1645
|
+
* enough for 3-way.
|
|
1646
|
+
*/
|
|
1647
|
+
const THREE_WAY_FUZZY_THRESHOLD = .5;
|
|
1648
|
+
/**
|
|
1649
|
+
* Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
|
|
1650
|
+
* applies after its exact-LCS, but against one side of the genesis
|
|
1651
|
+
* spine (either cp or me). The genesis tables/rows are always the
|
|
1652
|
+
* "old" side; `newTable` is the cp or me table being aligned. Returns
|
|
1653
|
+
* the enriched alignment with additional paired entries.
|
|
1654
|
+
*
|
|
1655
|
+
* Cell-count guard: only fuzzy-pair when both rows have the same cell
|
|
1656
|
+
* count. Without this guard an asymmetric restructure — e.g. CP and
|
|
1657
|
+
* Me both added a different column — leads to ONE side fuzzy-pairing
|
|
1658
|
+
* its row with genesis (content overlap above threshold) while the
|
|
1659
|
+
* other side falls below threshold. That mismatch routes through
|
|
1660
|
+
* `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
|
|
1661
|
+
* branch, which emits CP's row as a Me-attributed deletion. In
|
|
1662
|
+
* cp-only mode `stripMeAttributedMarkers` then removes the row
|
|
1663
|
+
* entirely and CP's edit vanishes from the view — exactly the
|
|
1664
|
+
* content-loss case we're meant to prevent. Restricting fuzzy
|
|
1665
|
+
* pairing to same-shape rows preserves the common case (single cell
|
|
1666
|
+
* edit, identical row shape) while pushing structural mismatches
|
|
1667
|
+
* back to the boundary-insertion path that emits both sides
|
|
1668
|
+
* explicitly.
|
|
1669
|
+
*/
|
|
1670
|
+
function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
|
|
1671
|
+
const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
|
|
1672
|
+
const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
|
|
1673
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
|
|
1674
|
+
if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
|
|
1675
|
+
return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
|
|
1676
|
+
});
|
|
1677
|
+
}
|
|
1678
|
+
/**
|
|
1679
|
+
* Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
|
|
1680
|
+
* full table HTML keys, fuzzy-pair unmatched table runs by their
|
|
1681
|
+
* row-text-concatenated content. Without this, a table whose body
|
|
1682
|
+
* was edited (but not its outer shape) fails the exact-key match
|
|
1683
|
+
* and the preprocessing emits whole-table del + whole-table ins
|
|
1684
|
+
* instead of recursing into per-cell three-way diffs.
|
|
1685
|
+
*/
|
|
1686
|
+
function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
|
|
1687
|
+
const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
|
|
1688
|
+
const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
|
|
1689
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
|
|
1690
|
+
}
|
|
1691
|
+
function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1692
|
+
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
1693
|
+
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
1614
1694
|
}
|
|
1615
|
-
function diffTablePositional(
|
|
1695
|
+
function diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1616
1696
|
const out = [];
|
|
1617
|
-
let cursor =
|
|
1618
|
-
for (let r = 0; r <
|
|
1619
|
-
const
|
|
1620
|
-
const
|
|
1621
|
-
const
|
|
1622
|
-
for (let c = 0; c <
|
|
1623
|
-
const
|
|
1624
|
-
const
|
|
1625
|
-
const
|
|
1626
|
-
out.push(
|
|
1627
|
-
out.push(cellDiff(
|
|
1628
|
-
cursor =
|
|
1697
|
+
let cursor = tG.tableStart;
|
|
1698
|
+
for (let r = 0; r < tG.rows.length; r++) {
|
|
1699
|
+
const rG = tG.rows[r];
|
|
1700
|
+
const rC = tC.rows[r];
|
|
1701
|
+
const rM = tM.rows[r];
|
|
1702
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
1703
|
+
const cG = rG.cells[c];
|
|
1704
|
+
const cC = rC.cells[c];
|
|
1705
|
+
const cM = rM.cells[c];
|
|
1706
|
+
out.push(genesis.slice(cursor, cG.contentStart));
|
|
1707
|
+
out.push(cellDiff(genesis.slice(cG.contentStart, cG.contentEnd), cpLatest.slice(cC.contentStart, cC.contentEnd), meCurrent.slice(cM.contentStart, cM.contentEnd)));
|
|
1708
|
+
cursor = cG.contentEnd;
|
|
1629
1709
|
}
|
|
1630
1710
|
}
|
|
1631
|
-
out.push(
|
|
1711
|
+
out.push(genesis.slice(cursor, tG.tableEnd));
|
|
1632
1712
|
return out.join("");
|
|
1633
1713
|
}
|
|
1634
1714
|
/**
|
|
1635
|
-
*
|
|
1636
|
-
*
|
|
1637
|
-
* 1. Run row-LCS for each pair (V1↔V2, V2↔V3) over rowKeys
|
|
1638
|
-
* 2. Build per-V2-row origin (from align1) and fate (from align2)
|
|
1639
|
-
* 3. Walk V2's row order, interleaving:
|
|
1640
|
-
* - CP-deleted V1 rows (in align1 but not preserved into V2)
|
|
1641
|
-
* - Me-inserted V3 rows (in align2 but not from V2)
|
|
1642
|
-
* 4. For each V2 row, combine origin+fate to decide:
|
|
1643
|
-
* - equal: recurse cellDiff if cell counts match, else fall back
|
|
1644
|
-
* - ins-cp: emit V2 row as fully-CP-inserted
|
|
1645
|
-
* - del-me: emit V2 row as fully-Me-deleted
|
|
1646
|
-
* - reject: emit V2 row as Me-rejects-CP
|
|
1715
|
+
* Row-level genesis-spine merge for tables with diverging row/cell
|
|
1716
|
+
* counts.
|
|
1647
1717
|
*
|
|
1648
|
-
*
|
|
1649
|
-
*
|
|
1650
|
-
*
|
|
1651
|
-
*
|
|
1652
|
-
*
|
|
1653
|
-
*
|
|
1718
|
+
* 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
|
|
1719
|
+
* (alignMe), each via row-LCS over rowKeys.
|
|
1720
|
+
* 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
|
|
1721
|
+
* Both kept → recurse cell diff (with structural-change cell handling
|
|
1722
|
+
* falling back to me-attribution Replace per the documented
|
|
1723
|
+
* limitation). One kept, other deleted → emit author-attributed full
|
|
1724
|
+
* row. Both deleted → silent.
|
|
1725
|
+
* 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
|
|
1726
|
+
* Check for content agreement at the same boundary; agreed
|
|
1727
|
+
* insertions emit plain.
|
|
1654
1728
|
*/
|
|
1655
|
-
function diffTableStructural(
|
|
1656
|
-
const
|
|
1657
|
-
const
|
|
1658
|
-
const
|
|
1659
|
-
const
|
|
1660
|
-
const
|
|
1661
|
-
const
|
|
1662
|
-
for (
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
const v2Fate = new Array(t2.rows.length);
|
|
1668
|
-
for (let i = 0; i < v2Fate.length; i++) v2Fate[i] = { kind: "me-deleted" };
|
|
1669
|
-
for (const a of align2) if (a.oldIdx !== null && a.newIdx !== null) v2Fate[a.oldIdx] = {
|
|
1670
|
-
kind: "preserved",
|
|
1671
|
-
v3Idx: a.newIdx
|
|
1672
|
-
};
|
|
1673
|
-
const cpDelRowsAt = collectCpDelRowsAtBoundary(align1, t2.rows.length);
|
|
1674
|
-
const meInsRowsAt = collectMeInsRowsAtBoundary(align2, t2.rows.length);
|
|
1729
|
+
function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1730
|
+
const gKeys = tG.rows.map((r) => rowKey(genesis, r));
|
|
1731
|
+
const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
|
|
1732
|
+
const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
|
|
1733
|
+
const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
|
|
1734
|
+
const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
|
|
1735
|
+
const gToCp = new Array(tG.rows.length).fill(-1);
|
|
1736
|
+
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
|
|
1737
|
+
const gToMe = new Array(tG.rows.length).fill(-1);
|
|
1738
|
+
for (const a of alignMe) if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx;
|
|
1739
|
+
const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length);
|
|
1740
|
+
const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length);
|
|
1675
1741
|
const out = [];
|
|
1676
|
-
out.push(tableHeaderSlice(
|
|
1677
|
-
const
|
|
1678
|
-
const
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1742
|
+
out.push(tableHeaderSlice(genesis, tG));
|
|
1743
|
+
const emitBoundaryInsertions = (b) => {
|
|
1744
|
+
const cIdxs = cpInsAt.get(b) ?? [];
|
|
1745
|
+
const mIdxs = meInsAt.get(b) ?? [];
|
|
1746
|
+
if (cIdxs.length === 0 && mIdxs.length === 0) return;
|
|
1747
|
+
const remainingMe = new Set(mIdxs);
|
|
1748
|
+
for (const cIdx of cIdxs) {
|
|
1749
|
+
const cText = cKeys[cIdx];
|
|
1750
|
+
let agreedMeIdx;
|
|
1751
|
+
for (const mIdx of remainingMe) if (mKeys[mIdx] === cText) {
|
|
1752
|
+
agreedMeIdx = mIdx;
|
|
1753
|
+
break;
|
|
1754
|
+
}
|
|
1755
|
+
if (agreedMeIdx !== void 0) {
|
|
1756
|
+
remainingMe.delete(agreedMeIdx);
|
|
1757
|
+
out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd));
|
|
1758
|
+
} else out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], "ins", "cp"));
|
|
1759
|
+
}
|
|
1760
|
+
for (const mIdx of remainingMe) out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], "ins", "me"));
|
|
1682
1761
|
};
|
|
1683
|
-
for (let
|
|
1684
|
-
|
|
1685
|
-
const
|
|
1686
|
-
const
|
|
1687
|
-
const
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
if (origin.kind === "cp-inserted" && fate.kind === "me-deleted") return emitFullRowAttributed(v2, v2Row, "del", "me", "cp");
|
|
1696
|
-
if (origin.kind === "cp-inserted") return emitFullRowAttributed(v2, v2Row, "ins", "cp");
|
|
1697
|
-
if (fate.kind === "me-deleted") return emitFullRowAttributed(v2, v2Row, "del", "me");
|
|
1698
|
-
const v1Row = t1.rows[origin.v1Idx];
|
|
1699
|
-
const v3Row = t3.rows[fate.v3Idx];
|
|
1700
|
-
if (v1Row.cells.length === v2Row.cells.length && v2Row.cells.length === v3Row.cells.length) return diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff);
|
|
1701
|
-
const out = [];
|
|
1702
|
-
out.push(emitFullRowAttributed(v2, v2Row, "del", "me"));
|
|
1703
|
-
out.push(emitFullRowAttributed(v3, v3Row, "ins", "me"));
|
|
1704
|
-
return out.join("");
|
|
1705
|
-
}
|
|
1706
|
-
function diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff) {
|
|
1707
|
-
const out = [];
|
|
1708
|
-
let cursor = v2Row.rowStart;
|
|
1709
|
-
for (let c = 0; c < v2Row.cells.length; c++) {
|
|
1710
|
-
const c1 = v1Row.cells[c];
|
|
1711
|
-
const c2 = v2Row.cells[c];
|
|
1712
|
-
const c3 = v3Row.cells[c];
|
|
1713
|
-
out.push(v2.slice(cursor, c2.contentStart));
|
|
1714
|
-
out.push(cellDiff(v1.slice(c1.contentStart, c1.contentEnd), v2.slice(c2.contentStart, c2.contentEnd), v3.slice(c3.contentStart, c3.contentEnd)));
|
|
1715
|
-
cursor = c2.contentEnd;
|
|
1716
|
-
}
|
|
1717
|
-
out.push(v2.slice(cursor, v2Row.rowEnd));
|
|
1762
|
+
for (let g = 0; g < tG.rows.length; g++) {
|
|
1763
|
+
emitBoundaryInsertions(g);
|
|
1764
|
+
const cIdx = gToCp[g];
|
|
1765
|
+
const mIdx = gToMe[g];
|
|
1766
|
+
const cpDel = cIdx === -1;
|
|
1767
|
+
const meDel = mIdx === -1;
|
|
1768
|
+
if (!cpDel && !meDel) out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff));
|
|
1769
|
+
else if (cpDel && meDel) {} else if (cpDel) out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], "del", "cp"));
|
|
1770
|
+
else out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], "del", "me"));
|
|
1771
|
+
}
|
|
1772
|
+
emitBoundaryInsertions(tG.rows.length);
|
|
1773
|
+
out.push(tableFooterSlice(genesis, tG));
|
|
1718
1774
|
return out.join("");
|
|
1719
1775
|
}
|
|
1720
|
-
function
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1776
|
+
function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
|
|
1777
|
+
if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
|
|
1778
|
+
const out = [];
|
|
1779
|
+
let cursor = rG.rowStart;
|
|
1780
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
1781
|
+
const cG = rG.cells[c];
|
|
1782
|
+
const cC = rC.cells[c];
|
|
1783
|
+
const cM = rM.cells[c];
|
|
1784
|
+
out.push(genesis.slice(cursor, cG.contentStart));
|
|
1785
|
+
out.push(cellDiff(genesis.slice(cG.contentStart, cG.contentEnd), cpLatest.slice(cC.contentStart, cC.contentEnd), meCurrent.slice(cM.contentStart, cM.contentEnd)));
|
|
1786
|
+
cursor = cG.contentEnd;
|
|
1787
|
+
}
|
|
1788
|
+
out.push(genesis.slice(cursor, rG.rowEnd));
|
|
1789
|
+
return out.join("");
|
|
1790
|
+
}
|
|
1791
|
+
const cpRestructured = rC.cells.length !== rG.cells.length;
|
|
1792
|
+
const meRestructured = rM.cells.length !== rG.cells.length;
|
|
1793
|
+
const blocks = [];
|
|
1794
|
+
if (cpRestructured && meRestructured) {
|
|
1795
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1796
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1797
|
+
} else if (cpRestructured) {
|
|
1798
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "cp"));
|
|
1799
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1800
|
+
} else {
|
|
1801
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "me"));
|
|
1802
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1740
1803
|
}
|
|
1741
|
-
return
|
|
1804
|
+
return blocks.join("");
|
|
1742
1805
|
}
|
|
1743
|
-
|
|
1806
|
+
/**
|
|
1807
|
+
* Returns map "genesis-row-boundary → list of new-side row indices
|
|
1808
|
+
* inserted at that boundary". Mirrors the word-level boundary collection
|
|
1809
|
+
* but at the row scale.
|
|
1810
|
+
*/
|
|
1811
|
+
function collectInsertedRowsAtBoundary(align, genesisRowCount) {
|
|
1744
1812
|
const out = /* @__PURE__ */ new Map();
|
|
1745
|
-
let
|
|
1813
|
+
let nextGenesisBoundary = genesisRowCount;
|
|
1746
1814
|
const pending = [];
|
|
1747
1815
|
for (let i = align.length - 1; i >= 0; i--) {
|
|
1748
1816
|
const a = align[i];
|
|
1749
1817
|
if (a.oldIdx !== null) {
|
|
1750
1818
|
if (pending.length > 0) {
|
|
1751
|
-
const existing = out.get(
|
|
1819
|
+
const existing = out.get(nextGenesisBoundary) ?? [];
|
|
1752
1820
|
existing.unshift(...pending.toReversed());
|
|
1753
|
-
out.set(
|
|
1821
|
+
out.set(nextGenesisBoundary, existing);
|
|
1754
1822
|
pending.length = 0;
|
|
1755
1823
|
}
|
|
1756
|
-
|
|
1824
|
+
nextGenesisBoundary = a.oldIdx;
|
|
1757
1825
|
} else if (a.newIdx !== null) pending.push(a.newIdx);
|
|
1758
1826
|
}
|
|
1759
1827
|
if (pending.length > 0) {
|
|
1760
|
-
const existing = out.get(
|
|
1761
|
-
existing.unshift(...pending.
|
|
1762
|
-
out.set(
|
|
1828
|
+
const existing = out.get(nextGenesisBoundary) ?? [];
|
|
1829
|
+
existing.unshift(...pending.toReversed());
|
|
1830
|
+
out.set(nextGenesisBoundary, existing);
|
|
1763
1831
|
}
|
|
1764
1832
|
return out;
|
|
1765
1833
|
}
|
|
@@ -1774,42 +1842,35 @@ function tableFooterSlice(html, table) {
|
|
|
1774
1842
|
return html.slice(lastRow.rowEnd, table.tableEnd);
|
|
1775
1843
|
}
|
|
1776
1844
|
/**
|
|
1777
|
-
* Emit a row
|
|
1778
|
-
*
|
|
1779
|
-
*
|
|
1780
|
-
*
|
|
1781
|
-
* author classes/attrs.
|
|
1845
|
+
* Emit a row fully attributed to one author. Wraps `<tr>` and each
|
|
1846
|
+
* `<td>` with the author's diffins/diffdel class and `data-author`
|
|
1847
|
+
* attribute; wraps cell content with an inner `<ins>`/`<del>` matching
|
|
1848
|
+
* the word-level emission shape.
|
|
1782
1849
|
*/
|
|
1783
|
-
function emitFullRowAttributed(html, row, kind, author
|
|
1850
|
+
function emitFullRowAttributed(html, row, kind, author) {
|
|
1784
1851
|
const trOpening = parseOpeningTagAt(html, row.rowStart);
|
|
1785
|
-
if (!trOpening) return html.slice(
|
|
1786
|
-
const out = [injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author
|
|
1852
|
+
if (!trOpening) return html.slice(row.rowStart, row.rowEnd);
|
|
1853
|
+
const out = [injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)];
|
|
1787
1854
|
let cursor = trOpening.end;
|
|
1788
1855
|
for (const cell of row.cells) {
|
|
1789
1856
|
out.push(html.slice(cursor, cell.cellStart));
|
|
1790
|
-
out.push(emitFullCellAttributed(html, cell, kind, author
|
|
1857
|
+
out.push(emitFullCellAttributed(html, cell, kind, author));
|
|
1791
1858
|
cursor = cell.cellEnd;
|
|
1792
1859
|
}
|
|
1793
1860
|
out.push(html.slice(cursor, row.rowEnd));
|
|
1794
1861
|
return out.join("");
|
|
1795
1862
|
}
|
|
1796
|
-
function emitFullCellAttributed(html, cell, kind, author
|
|
1863
|
+
function emitFullCellAttributed(html, cell, kind, author) {
|
|
1797
1864
|
const tdOpening = parseOpeningTagAt(html, cell.cellStart);
|
|
1798
1865
|
if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd);
|
|
1799
|
-
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author
|
|
1866
|
+
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author);
|
|
1800
1867
|
const innerContent = html.slice(cell.contentStart, cell.contentEnd);
|
|
1801
|
-
const innerWrapped = innerContent.trim().length === 0 ? innerContent : Utils_default.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author
|
|
1868
|
+
const innerWrapped = innerContent.trim().length === 0 ? innerContent : Utils_default.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author));
|
|
1802
1869
|
const closing = html.slice(cell.contentEnd, cell.cellEnd);
|
|
1803
1870
|
return tdWithAttrs + innerWrapped + closing;
|
|
1804
1871
|
}
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
* an `<tr>` or `<td>` already in the source HTML). Uses the same
|
|
1808
|
-
* attribution shape as `authorAttribution` + `Utils.wrapText` so the
|
|
1809
|
-
* inject-into-existing and wrap-around-text paths agree.
|
|
1810
|
-
*/
|
|
1811
|
-
function injectAuthorAttribution(openingTag, kind, author, rejectsAuthor) {
|
|
1812
|
-
const meta = authorAttribution(author, rejectsAuthor);
|
|
1872
|
+
function injectAuthorAttribution(openingTag, kind, author) {
|
|
1873
|
+
const meta = authorAttribution(author);
|
|
1813
1874
|
return injectDataAttrs(injectClass(openingTag, `diff${kind} ${meta.extraClasses}`), meta.dataAttrs ?? {});
|
|
1814
1875
|
}
|
|
1815
1876
|
function injectDataAttrs(openingTag, dataAttrs) {
|
|
@@ -2014,6 +2075,37 @@ var BlockFinder = class {
|
|
|
2014
2075
|
};
|
|
2015
2076
|
//#endregion
|
|
2016
2077
|
//#region src/HtmlDiff.ts
|
|
2078
|
+
/**
|
|
2079
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
2080
|
+
* track-changes rendering for legal-document rewrites.
|
|
2081
|
+
*
|
|
2082
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
2083
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
2084
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
2085
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
2086
|
+
* dramatically more readable for legal text.
|
|
2087
|
+
*
|
|
2088
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
2089
|
+
* Commercial One CP, May 2026):
|
|
2090
|
+
* - short edits (typo / one-word insert): output identical to
|
|
2091
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
2092
|
+
* trivially clears the bar;
|
|
2093
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
2094
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
2095
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
2096
|
+
* Word's 1+1 and a major readability win;
|
|
2097
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
2098
|
+
* formatting changes into a single block — too aggressive.
|
|
2099
|
+
*
|
|
2100
|
+
* Consumers rendering legal documents should spread this into their
|
|
2101
|
+
* options:
|
|
2102
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
2103
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
2104
|
+
*
|
|
2105
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
2106
|
+
* keep the bare default.
|
|
2107
|
+
*/
|
|
2108
|
+
const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
|
|
2017
2109
|
var HtmlDiff = class HtmlDiff {
|
|
2018
2110
|
/**
|
|
2019
2111
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -2089,6 +2181,16 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2089
2181
|
newText;
|
|
2090
2182
|
oldText;
|
|
2091
2183
|
tablePreprocessDepth = 0;
|
|
2184
|
+
/**
|
|
2185
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
2186
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
2187
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
2188
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
2189
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
2190
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
2191
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
2192
|
+
* handler for the split logic.
|
|
2193
|
+
*/
|
|
2092
2194
|
specialTagDiffStack = [];
|
|
2093
2195
|
newWords = [];
|
|
2094
2196
|
oldWords = [];
|
|
@@ -2156,8 +2258,23 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2156
2258
|
this.oldText = oldText;
|
|
2157
2259
|
this.newText = newText;
|
|
2158
2260
|
}
|
|
2159
|
-
|
|
2160
|
-
|
|
2261
|
+
/**
|
|
2262
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
2263
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
2264
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
2265
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
2266
|
+
*
|
|
2267
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
2268
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
2269
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
2270
|
+
*/
|
|
2271
|
+
static execute(oldText, newText, options = {}) {
|
|
2272
|
+
const inner = new HtmlDiff(oldText, newText);
|
|
2273
|
+
if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
|
|
2274
|
+
if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
|
|
2275
|
+
if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
|
|
2276
|
+
if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
|
|
2277
|
+
return inner.build();
|
|
2161
2278
|
}
|
|
2162
2279
|
/**
|
|
2163
2280
|
* Analyse a two-way diff and return its raw building blocks: the word
|
|
@@ -2226,40 +2343,43 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2226
2343
|
return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
|
|
2227
2344
|
}
|
|
2228
2345
|
/**
|
|
2229
|
-
* Three-way HTML diff
|
|
2230
|
-
*
|
|
2231
|
-
*
|
|
2232
|
-
*
|
|
2233
|
-
*
|
|
2234
|
-
*
|
|
2235
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
2346
|
+
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
2347
|
+
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
2348
|
+
* from Me's accumulated changes (genesis → meCurrent). Use this for
|
|
2349
|
+
* blackline UX where the negotiation has gone through multiple turns
|
|
2350
|
+
* and the reader wants to see "who proposed what" across the whole
|
|
2351
|
+
* history, not just the most recent round.
|
|
2236
2352
|
*
|
|
2237
|
-
*
|
|
2238
|
-
*
|
|
2239
|
-
*
|
|
2240
|
-
*
|
|
2241
|
-
*
|
|
2242
|
-
*
|
|
2353
|
+
* When both parties happen to have made the same change (e.g. CP
|
|
2354
|
+
* proposed a wording change in turn N, Me adopted it in turn N+1),
|
|
2355
|
+
* the change reads as "settled" and is emitted unmarked — only
|
|
2356
|
+
* disagreements and pending proposals carry author attribution.
|
|
2357
|
+
*
|
|
2358
|
+
* @param genesis the shared common ancestor (per-user — the FE
|
|
2359
|
+
* picks between V1.0 and /preview/initialAnswers
|
|
2360
|
+
* based on `prefillReceiverAnswers`)
|
|
2361
|
+
* @param cpLatest the counterparty's current published version
|
|
2362
|
+
* @param meCurrent Me's current draft (the document on screen)
|
|
2243
2363
|
*/
|
|
2244
|
-
static executeThreeWay(
|
|
2245
|
-
return HtmlDiff.executeThreeWayWithDepth(
|
|
2246
|
-
}
|
|
2247
|
-
static executeThreeWayWithDepth(
|
|
2248
|
-
const tablePreprocess = depth < HtmlDiff.MaxThreeWayDepth ? preprocessTablesThreeWay(
|
|
2249
|
-
const
|
|
2250
|
-
const
|
|
2251
|
-
const
|
|
2364
|
+
static executeThreeWay(genesis, cpLatest, meCurrent, options = {}) {
|
|
2365
|
+
return HtmlDiff.executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, 0);
|
|
2366
|
+
}
|
|
2367
|
+
static executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, depth) {
|
|
2368
|
+
const tablePreprocess = depth < HtmlDiff.MaxThreeWayDepth ? preprocessTablesThreeWay(genesis, cpLatest, meCurrent, (g, c, m) => HtmlDiff.executeThreeWayWithDepth(g, c, m, options, depth + 1)) : null;
|
|
2369
|
+
const inGenesis = tablePreprocess?.modifiedGenesis ?? genesis;
|
|
2370
|
+
const inCp = tablePreprocess?.modifiedCp ?? cpLatest;
|
|
2371
|
+
const inMe = tablePreprocess?.modifiedMe ?? meCurrent;
|
|
2252
2372
|
const analyzeOpts = {
|
|
2253
|
-
useProjections: options.useProjections ?? (HtmlDiff.evaluateProjectionApplicability(
|
|
2373
|
+
useProjections: options.useProjections ?? (HtmlDiff.evaluateProjectionApplicability(inGenesis, inCp) && HtmlDiff.evaluateProjectionApplicability(inGenesis, inMe)),
|
|
2254
2374
|
blockExpressions: options.blockExpressions,
|
|
2255
2375
|
repeatingWordsAccuracy: options.repeatingWordsAccuracy,
|
|
2256
2376
|
orphanMatchThreshold: options.orphanMatchThreshold,
|
|
2257
2377
|
ignoreWhitespaceDifferences: options.ignoreWhitespaceDifferences
|
|
2258
2378
|
};
|
|
2259
|
-
const
|
|
2260
|
-
const
|
|
2261
|
-
if (
|
|
2262
|
-
const segments = buildSegments(
|
|
2379
|
+
const dCp = HtmlDiff.analyze(inGenesis, inCp, analyzeOpts);
|
|
2380
|
+
const dMe = HtmlDiff.analyze(inGenesis, inMe, analyzeOpts);
|
|
2381
|
+
if (dCp.oldDiffWords.length !== dMe.oldDiffWords.length) throw new Error(`HtmlDiff.executeThreeWay: genesis tokenisation diverged across pair-wise analyses (${dCp.oldDiffWords.length} vs ${dMe.oldDiffWords.length}). This indicates the symmetric-projection coordination has a bug.`);
|
|
2382
|
+
const segments = buildSegments(dCp, dMe);
|
|
2263
2383
|
const merged = HtmlDiff.emitSegments(segments);
|
|
2264
2384
|
return tablePreprocess ? restoreTablePlaceholders(merged, tablePreprocess.placeholderToDiff) : merged;
|
|
2265
2385
|
}
|
|
@@ -2269,6 +2389,25 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2269
2389
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
2270
2390
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
2271
2391
|
* opened in one segment and closed in another stays balanced.
|
|
2392
|
+
*
|
|
2393
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
2394
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
2395
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
2396
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
2397
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
2398
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
2399
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
2400
|
+
* at the end of emission.
|
|
2401
|
+
*
|
|
2402
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
2403
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
2404
|
+
* postInject regardless of whether the outer segment is ins or
|
|
2405
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
2406
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
2407
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
2408
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
2409
|
+
* output is usually acceptable — but the warning IS the signal
|
|
2410
|
+
* that the input had a real imbalance worth investigating.
|
|
2272
2411
|
*/
|
|
2273
2412
|
static emitSegments(segments) {
|
|
2274
2413
|
const emitter = new HtmlDiff("", "");
|
|
@@ -2280,7 +2419,13 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2280
2419
|
const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
|
|
2281
2420
|
emitter.insertTag(tag, baseClass, [...seg.words], metadata);
|
|
2282
2421
|
}
|
|
2283
|
-
if (emitter.specialTagDiffStack.length > 0)
|
|
2422
|
+
if (emitter.specialTagDiffStack.length > 0) {
|
|
2423
|
+
console.warn(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting wrap(s) on the stack. Closing defensively. This usually means a formatting tag opens in a del/ins segment and its matching closer is in an equal segment.`);
|
|
2424
|
+
while (emitter.specialTagDiffStack.length > 0) {
|
|
2425
|
+
emitter.content.push("</ins>");
|
|
2426
|
+
emitter.specialTagDiffStack.pop();
|
|
2427
|
+
}
|
|
2428
|
+
}
|
|
2284
2429
|
return emitter.content.join("");
|
|
2285
2430
|
}
|
|
2286
2431
|
/**
|
|
@@ -2536,38 +2681,52 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2536
2681
|
if (words.length === 0) break;
|
|
2537
2682
|
const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
|
|
2538
2683
|
const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
|
|
2539
|
-
let
|
|
2540
|
-
let
|
|
2684
|
+
let preInject = "";
|
|
2685
|
+
let postInject = "";
|
|
2541
2686
|
if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
|
|
2542
2687
|
const tagNames = /* @__PURE__ */ new Set();
|
|
2543
2688
|
for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
|
|
2544
2689
|
const styledTagNames = Array.from(tagNames).join(" ");
|
|
2545
|
-
|
|
2546
|
-
|
|
2690
|
+
const styledCssClass = `mod ${styledTagNames}`;
|
|
2691
|
+
this.specialTagDiffStack.push({
|
|
2692
|
+
tag: words[0],
|
|
2693
|
+
styledTagNames,
|
|
2694
|
+
cssClass: styledCssClass,
|
|
2695
|
+
metadata
|
|
2696
|
+
});
|
|
2697
|
+
postInject = `<ins${Utils_default.composeTagAttributes(styledCssClass, metadata ?? {})}>`;
|
|
2547
2698
|
if (tag === HtmlDiff.DelTag) {
|
|
2548
2699
|
words.shift();
|
|
2549
2700
|
while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
|
|
2550
2701
|
}
|
|
2551
2702
|
} else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
|
|
2552
|
-
const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
|
|
2553
2703
|
let tagIndexToCompare = indexLastTagInFirstTagBlock;
|
|
2554
2704
|
if (tag === HtmlDiff.DelTag && indexOfFirstNonTag === -1) {
|
|
2555
2705
|
if (words.slice(0, indexLastTagInFirstTagBlock + 1).some((w) => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))) tagIndexToCompare = 0;
|
|
2556
2706
|
}
|
|
2557
|
-
const
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2707
|
+
const closingTagName = Utils_default.getTagName(words[tagIndexToCompare]);
|
|
2708
|
+
let matchIdx = -1;
|
|
2709
|
+
for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) if (Utils_default.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
|
|
2710
|
+
matchIdx = i;
|
|
2711
|
+
break;
|
|
2712
|
+
}
|
|
2713
|
+
if (matchIdx >= 0) {
|
|
2714
|
+
const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1);
|
|
2715
|
+
this.specialTagDiffStack.pop();
|
|
2716
|
+
preInject = "</ins>".repeat(aboveEntries.length + 1);
|
|
2717
|
+
for (const entry of aboveEntries) {
|
|
2718
|
+
postInject += `<ins${Utils_default.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`;
|
|
2719
|
+
this.specialTagDiffStack.push(entry);
|
|
2720
|
+
}
|
|
2721
|
+
}
|
|
2562
2722
|
if (tag === HtmlDiff.DelTag) {
|
|
2563
2723
|
words.shift();
|
|
2564
2724
|
while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) words.shift();
|
|
2565
2725
|
}
|
|
2566
2726
|
}
|
|
2567
|
-
if (words.length === 0 &&
|
|
2727
|
+
if (words.length === 0 && preInject.length === 0 && postInject.length === 0) break;
|
|
2568
2728
|
const isTagForExtraction = tag === HtmlDiff.DelTag ? (x) => Utils_default.isTag(x) && !HtmlDiff.SpecialCaseOpeningTagRegex.test(x) && !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase()) : Utils_default.isTag;
|
|
2569
|
-
|
|
2570
|
-
else this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join("") + specialCaseTagInjection);
|
|
2729
|
+
this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join("") + postInject);
|
|
2571
2730
|
if (words.length === 0) continue;
|
|
2572
2731
|
this.insertTag(tag, cssClass, words, metadata);
|
|
2573
2732
|
break;
|
|
@@ -2632,6 +2791,17 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2632
2791
|
curr = next;
|
|
2633
2792
|
continue;
|
|
2634
2793
|
}
|
|
2794
|
+
let allTags = true;
|
|
2795
|
+
for (let i = curr.startInNew; i < curr.endInNew; i++) if (!Utils_default.isTag(wordsForDiffNew[i])) {
|
|
2796
|
+
allTags = false;
|
|
2797
|
+
break;
|
|
2798
|
+
}
|
|
2799
|
+
if (allTags) {
|
|
2800
|
+
yield curr;
|
|
2801
|
+
prev = curr;
|
|
2802
|
+
curr = next;
|
|
2803
|
+
continue;
|
|
2804
|
+
}
|
|
2635
2805
|
let oldDistanceInChars = 0;
|
|
2636
2806
|
for (let i = prev.endInOld; i < next.startInOld; i++) oldDistanceInChars += wordsForDiffOld[i].length;
|
|
2637
2807
|
let newDistanceInChars = 0;
|
|
@@ -2674,6 +2844,7 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2674
2844
|
}
|
|
2675
2845
|
};
|
|
2676
2846
|
//#endregion
|
|
2677
|
-
|
|
2847
|
+
exports.WORD_ALIGNED_OPTIONS = WORD_ALIGNED_OPTIONS;
|
|
2848
|
+
exports.default = HtmlDiff;
|
|
2678
2849
|
|
|
2679
2850
|
//# sourceMappingURL=HtmlDiff.cjs.map
|