@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -19
- package/dist/HtmlDiff.cjs +609 -438
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +89 -16
- package/dist/HtmlDiff.d.mts +89 -16
- package/dist/HtmlDiff.mjs +604 -438
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +218 -74
- package/src/ThreeWayDiff.ts +220 -127
- package/src/ThreeWayTable.ts +549 -491
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +316 -92
- package/test/HtmlDiff.threeWay.tables.spec.ts +200 -196
- package/test/Utils.spec.ts +3 -3
package/dist/HtmlDiff.mjs
CHANGED
|
@@ -1252,119 +1252,155 @@ function findTopLevelCells(html, start, end) {
|
|
|
1252
1252
|
}
|
|
1253
1253
|
//#endregion
|
|
1254
1254
|
//#region src/ThreeWayDiff.ts
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1255
|
+
/**
|
|
1256
|
+
* Builds the attributed segment stream for a three-way diff.
|
|
1257
|
+
*
|
|
1258
|
+
* @param dCp analysis of diff(genesis → cp-latest)
|
|
1259
|
+
* @param dMe analysis of diff(genesis → me-current)
|
|
1260
|
+
*
|
|
1261
|
+
* Both analyses must share the same `oldDiffWords` (the genesis tokens)
|
|
1262
|
+
* — the caller guarantees this by passing the same genesis input and
|
|
1263
|
+
* the same `useProjections` decision to both `HtmlDiff.analyze` calls.
|
|
1264
|
+
*/
|
|
1265
|
+
function buildSegments(dCp, dMe) {
|
|
1266
|
+
const genesisLen = dCp.oldDiffWords.length;
|
|
1267
|
+
const cpFate = buildFateFromGenesis(dCp.operations, genesisLen);
|
|
1268
|
+
const meFate = buildFateFromGenesis(dMe.operations, genesisLen);
|
|
1269
|
+
const cpInsAt = collectInsertionsKeyedByEnd(dCp);
|
|
1270
|
+
const meInsAt = collectInsertionsKeyedByEnd(dMe);
|
|
1271
|
+
const diffToOriginal = dCp.oldContentToOriginal ?? Array.from({ length: genesisLen }, (_, i) => i);
|
|
1272
|
+
const genesisOriginalLen = dCp.oldOriginalWords.length;
|
|
1263
1273
|
const segments = [];
|
|
1264
1274
|
let originalCursor = 0;
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
author: "cp"
|
|
1270
|
-
}, cpDel);
|
|
1271
|
-
const attr = combine(fromV1[i], toV3[i]);
|
|
1275
|
+
emitBoundary(0, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments);
|
|
1276
|
+
for (let i = 0; i < genesisLen; i++) {
|
|
1277
|
+
const cpDel = cpFate[i] === "deleted";
|
|
1278
|
+
const meDel = meFate[i] === "deleted";
|
|
1272
1279
|
const origIdx = diffToOriginal[i];
|
|
1273
|
-
const slice =
|
|
1280
|
+
const slice = dCp.oldOriginalWords.slice(originalCursor, origIdx + 1);
|
|
1274
1281
|
originalCursor = origIdx + 1;
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
kind: "
|
|
1282
|
+
if (!cpDel && !meDel) appendSegment(segments, { kind: "equal" }, slice);
|
|
1283
|
+
else if (cpDel && meDel) {
|
|
1284
|
+
if (slice.length > 1) appendSegment(segments, { kind: "equal" }, slice.slice(0, slice.length - 1));
|
|
1285
|
+
} else if (cpDel) appendSegment(segments, {
|
|
1286
|
+
kind: "del",
|
|
1287
|
+
author: "cp"
|
|
1288
|
+
}, slice);
|
|
1289
|
+
else appendSegment(segments, {
|
|
1290
|
+
kind: "del",
|
|
1284
1291
|
author: "me"
|
|
1285
|
-
},
|
|
1292
|
+
}, slice);
|
|
1293
|
+
emitBoundary(i + 1, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments);
|
|
1286
1294
|
}
|
|
1287
|
-
|
|
1288
|
-
if (tailCpDel?.length) appendSegment(segments, {
|
|
1289
|
-
kind: "del",
|
|
1290
|
-
author: "cp"
|
|
1291
|
-
}, tailCpDel);
|
|
1292
|
-
const tailMeIns = meInsertionsAt.get(v2DiffLen);
|
|
1293
|
-
if (tailMeIns?.length) appendSegment(segments, {
|
|
1294
|
-
kind: "ins",
|
|
1295
|
-
author: "me"
|
|
1296
|
-
}, tailMeIns);
|
|
1297
|
-
if (originalCursor < v2OriginalLen) appendSegment(segments, { kind: "equal" }, d1.newOriginalWords.slice(originalCursor));
|
|
1295
|
+
if (originalCursor < genesisOriginalLen) appendSegment(segments, { kind: "equal" }, dCp.oldOriginalWords.slice(originalCursor));
|
|
1298
1296
|
return segments;
|
|
1299
1297
|
}
|
|
1300
|
-
function buildOriginMap(ops, v2Len) {
|
|
1301
|
-
const out = new Array(v2Len).fill("preserved-from-v1");
|
|
1302
|
-
for (const op of ops) {
|
|
1303
|
-
const origin = op.action === 2 ? "inserted-by-cp" : op.action === 4 ? "replaced-into-by-cp" : null;
|
|
1304
|
-
if (origin === null) continue;
|
|
1305
|
-
for (let i = op.startInNew; i < op.endInNew; i++) if (i >= 0 && i < v2Len) out[i] = origin;
|
|
1306
|
-
}
|
|
1307
|
-
return out;
|
|
1308
|
-
}
|
|
1309
|
-
function buildFateMap(ops, v2Len) {
|
|
1310
|
-
const out = new Array(v2Len).fill("preserved-to-v3");
|
|
1311
|
-
for (const op of ops) {
|
|
1312
|
-
const fate = op.action === 1 ? "deleted-by-me" : op.action === 4 ? "replaced-out-by-me" : null;
|
|
1313
|
-
if (fate === null) continue;
|
|
1314
|
-
for (let i = op.startInOld; i < op.endInOld; i++) if (i >= 0 && i < v2Len) out[i] = fate;
|
|
1315
|
-
}
|
|
1316
|
-
return out;
|
|
1317
|
-
}
|
|
1318
|
-
function isDeletion(attr) {
|
|
1319
|
-
return attr.kind === "del" || attr.kind === "reject";
|
|
1320
|
-
}
|
|
1321
|
-
function combine(origin, fate) {
|
|
1322
|
-
const cpInserted = origin === "inserted-by-cp" || origin === "replaced-into-by-cp";
|
|
1323
|
-
const meDeleted = fate === "deleted-by-me" || fate === "replaced-out-by-me";
|
|
1324
|
-
if (!cpInserted && !meDeleted) return { kind: "equal" };
|
|
1325
|
-
if (cpInserted && !meDeleted) return {
|
|
1326
|
-
kind: "ins",
|
|
1327
|
-
author: "cp"
|
|
1328
|
-
};
|
|
1329
|
-
if (!cpInserted && meDeleted) return {
|
|
1330
|
-
kind: "del",
|
|
1331
|
-
author: "me"
|
|
1332
|
-
};
|
|
1333
|
-
return {
|
|
1334
|
-
kind: "reject",
|
|
1335
|
-
by: "me",
|
|
1336
|
-
rejected: "cp"
|
|
1337
|
-
};
|
|
1338
|
-
}
|
|
1339
1298
|
/**
|
|
1340
|
-
*
|
|
1341
|
-
*
|
|
1342
|
-
*
|
|
1299
|
+
* Per genesis-diff-index, what did this side do to that token? Both
|
|
1300
|
+
* Delete and Replace ops remove the token from the side's output, so
|
|
1301
|
+
* both contribute `'deleted'`. Equal ops contribute `'kept'`. Insert
|
|
1302
|
+
* ops have an empty old range, so they don't touch the genesis fate
|
|
1303
|
+
* map.
|
|
1343
1304
|
*/
|
|
1344
|
-
function
|
|
1345
|
-
const out =
|
|
1346
|
-
for (const op of
|
|
1305
|
+
function buildFateFromGenesis(ops, genesisLen) {
|
|
1306
|
+
const out = new Array(genesisLen).fill("kept");
|
|
1307
|
+
for (const op of ops) {
|
|
1347
1308
|
if (op.action !== 1 && op.action !== 4) continue;
|
|
1348
|
-
|
|
1349
|
-
if (words.length === 0) continue;
|
|
1350
|
-
const existing = out.get(op.startInNew) ?? [];
|
|
1351
|
-
existing.push(...words);
|
|
1352
|
-
out.set(op.startInNew, existing);
|
|
1309
|
+
for (let i = op.startInOld; i < op.endInOld; i++) if (i >= 0 && i < genesisLen) out[i] = "deleted";
|
|
1353
1310
|
}
|
|
1354
1311
|
return out;
|
|
1355
1312
|
}
|
|
1356
|
-
|
|
1313
|
+
/**
|
|
1314
|
+
* Per genesis boundary `b`, collect tokens this side inserted at that
|
|
1315
|
+
* boundary. Keyed by `endInOld` so a Replace at genesis[k..k+1] has its
|
|
1316
|
+
* insertion at boundary k+1 (after the deleted token) rather than k
|
|
1317
|
+
* (before) — that produces the del-then-ins visual order.
|
|
1318
|
+
*
|
|
1319
|
+
* For pure Insert ops the old range is empty (endInOld == startInOld),
|
|
1320
|
+
* so the key is the same as the semantic between-tokens position.
|
|
1321
|
+
*/
|
|
1322
|
+
function collectInsertionsKeyedByEnd(d) {
|
|
1357
1323
|
const out = /* @__PURE__ */ new Map();
|
|
1358
1324
|
for (const op of d.operations) {
|
|
1359
1325
|
if (op.action !== 2 && op.action !== 4) continue;
|
|
1360
1326
|
const words = d.newDiffWords.slice(op.startInNew, op.endInNew);
|
|
1361
1327
|
if (words.length === 0) continue;
|
|
1362
|
-
const
|
|
1328
|
+
const key = op.endInOld;
|
|
1329
|
+
const existing = out.get(key) ?? [];
|
|
1363
1330
|
existing.push(...words);
|
|
1364
|
-
out.set(
|
|
1331
|
+
out.set(key, existing);
|
|
1365
1332
|
}
|
|
1366
1333
|
return out;
|
|
1367
1334
|
}
|
|
1335
|
+
/**
|
|
1336
|
+
* Emit any insertions at boundary `b`.
|
|
1337
|
+
*
|
|
1338
|
+
* Reading model: a legal reviewer wants to see CP's INTENT relative
|
|
1339
|
+
* to Me's current content. Me's content is the base; CP's deltas are
|
|
1340
|
+
* what they need to act on. Under that framing:
|
|
1341
|
+
* - tokens both authors inserted at the same boundary → settled
|
|
1342
|
+
* - tokens CP inserted that Me doesn't have → ins-cp (CP wants
|
|
1343
|
+
* this added)
|
|
1344
|
+
* - tokens Me inserted that CP doesn't have → del-cp (CP wants
|
|
1345
|
+
* this removed from Me's content)
|
|
1346
|
+
*
|
|
1347
|
+
* The third case is the load-bearing attribution flip. The
|
|
1348
|
+
* genesis-spine view technically labels me-only-at-boundary tokens
|
|
1349
|
+
* as "ins-me" (Me added them; CP didn't), but that's confusing to
|
|
1350
|
+
* a reviewer: they see "Me added X" alongside "CP added Y" and have
|
|
1351
|
+
* to mentally derive "CP wants X gone, replaced with Y". Surfacing
|
|
1352
|
+
* me-only tokens as `del-cp` shows CP's intent directly:
|
|
1353
|
+
* - "CP accepted Me's text minus `things`": settled bulk + del-cp
|
|
1354
|
+
* `things` (no parallel redundant insertions)
|
|
1355
|
+
* - "CP wants `cruel` where Me wrote `brave`": ins-cp `cruel` +
|
|
1356
|
+
* del-cp `brave` (the substitution intent reads directly)
|
|
1357
|
+
* - "CP added extra words": cp-extras stay as ins-cp (same as
|
|
1358
|
+
* before; the cp-only direction was always intent-correct)
|
|
1359
|
+
*
|
|
1360
|
+
* Pure single-side insertions (Me added text CP doesn't engage
|
|
1361
|
+
* with at all, or vice versa) keep their genesis-spine attribution
|
|
1362
|
+
* — these aren't refinement cases, just Me's own content additions.
|
|
1363
|
+
*/
|
|
1364
|
+
function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments) {
|
|
1365
|
+
const cpIns = cpInsAt.get(b);
|
|
1366
|
+
const meIns = meInsAt.get(b);
|
|
1367
|
+
const hasCp = !!cpIns && cpIns.length > 0;
|
|
1368
|
+
const hasMe = !!meIns && meIns.length > 0;
|
|
1369
|
+
if (!hasCp && !hasMe) return;
|
|
1370
|
+
if (!hasCp) {
|
|
1371
|
+
appendSegment(segments, {
|
|
1372
|
+
kind: "ins",
|
|
1373
|
+
author: "me"
|
|
1374
|
+
}, meIns);
|
|
1375
|
+
return;
|
|
1376
|
+
}
|
|
1377
|
+
if (!hasMe) {
|
|
1378
|
+
appendSegment(segments, {
|
|
1379
|
+
kind: "ins",
|
|
1380
|
+
author: "cp"
|
|
1381
|
+
}, cpIns);
|
|
1382
|
+
return;
|
|
1383
|
+
}
|
|
1384
|
+
if (tokenArraysEqual(cpIns, meIns)) {
|
|
1385
|
+
appendSegment(segments, { kind: "equal" }, cpIns);
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
1388
|
+
const alignment = lcsAlign(cpIns, meIns);
|
|
1389
|
+
for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) appendSegment(segments, { kind: "equal" }, [cpIns[a.oldIdx]]);
|
|
1390
|
+
else if (a.oldIdx !== null) appendSegment(segments, {
|
|
1391
|
+
kind: "ins",
|
|
1392
|
+
author: "cp"
|
|
1393
|
+
}, [cpIns[a.oldIdx]]);
|
|
1394
|
+
else if (a.newIdx !== null) appendSegment(segments, {
|
|
1395
|
+
kind: "del",
|
|
1396
|
+
author: "cp"
|
|
1397
|
+
}, [meIns[a.newIdx]]);
|
|
1398
|
+
}
|
|
1399
|
+
function tokenArraysEqual(a, b) {
|
|
1400
|
+
if (a.length !== b.length) return false;
|
|
1401
|
+
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
|
|
1402
|
+
return true;
|
|
1403
|
+
}
|
|
1368
1404
|
function appendSegment(segments, attr, words) {
|
|
1369
1405
|
if (words.length === 0) return;
|
|
1370
1406
|
const last = segments[segments.length - 1];
|
|
@@ -1381,7 +1417,6 @@ function sameAttribution(a, b) {
|
|
|
1381
1417
|
if (a.kind === "equal" && b.kind === "equal") return true;
|
|
1382
1418
|
if (a.kind === "ins" && b.kind === "ins") return a.author === b.author;
|
|
1383
1419
|
if (a.kind === "del" && b.kind === "del") return a.author === b.author;
|
|
1384
|
-
if (a.kind === "reject" && b.kind === "reject") return true;
|
|
1385
1420
|
return false;
|
|
1386
1421
|
}
|
|
1387
1422
|
/**
|
|
@@ -1391,375 +1426,404 @@ function sameAttribution(a, b) {
|
|
|
1391
1426
|
* pre-wrap) stay consistent. A change here propagates to every author
|
|
1392
1427
|
* marker in the output.
|
|
1393
1428
|
*/
|
|
1394
|
-
function authorAttribution(author
|
|
1395
|
-
const dataAttrs = { author };
|
|
1396
|
-
if (rejects !== void 0) dataAttrs.rejects = rejects;
|
|
1429
|
+
function authorAttribution(author) {
|
|
1397
1430
|
return {
|
|
1398
|
-
extraClasses:
|
|
1399
|
-
dataAttrs
|
|
1431
|
+
extraClasses: author,
|
|
1432
|
+
dataAttrs: { author }
|
|
1400
1433
|
};
|
|
1401
1434
|
}
|
|
1402
1435
|
/**
|
|
1403
1436
|
* Resolve a segment's attribution into the wrapper-tag, base CSS class,
|
|
1404
1437
|
* and `WrapMetadata` consumed by `Utils.wrapText` / `insertTag`. The
|
|
1405
1438
|
* caller is `HtmlDiff.executeThreeWay`'s emission loop.
|
|
1439
|
+
*
|
|
1440
|
+
* `equal` segments don't go through this — they're emitted unmarked.
|
|
1406
1441
|
*/
|
|
1407
1442
|
function segmentEmissionShape(attr) {
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
};
|
|
1414
|
-
case "del": return {
|
|
1415
|
-
tag: "del",
|
|
1416
|
-
baseClass: "diffdel",
|
|
1417
|
-
metadata: authorAttribution(attr.author)
|
|
1418
|
-
};
|
|
1419
|
-
case "reject": return {
|
|
1420
|
-
tag: "del",
|
|
1421
|
-
baseClass: "diffdel",
|
|
1422
|
-
metadata: authorAttribution(attr.by, attr.rejected)
|
|
1423
|
-
};
|
|
1424
|
-
}
|
|
1443
|
+
return {
|
|
1444
|
+
tag: attr.kind,
|
|
1445
|
+
baseClass: attr.kind === "ins" ? "diffins" : "diffdel",
|
|
1446
|
+
metadata: authorAttribution(attr.author)
|
|
1447
|
+
};
|
|
1425
1448
|
}
|
|
1426
1449
|
//#endregion
|
|
1427
1450
|
//#region src/ThreeWayTable.ts
|
|
1428
|
-
function preprocessTablesThreeWay(
|
|
1429
|
-
const
|
|
1430
|
-
const
|
|
1431
|
-
const
|
|
1432
|
-
if (
|
|
1433
|
-
for (const t of
|
|
1434
|
-
for (const t of
|
|
1435
|
-
for (const t of
|
|
1436
|
-
const placeholderPrefix = makePlaceholderPrefix(
|
|
1437
|
-
if (positionallyAligned(
|
|
1438
|
-
return
|
|
1439
|
-
}
|
|
1440
|
-
function preprocessAlignedByPosition(
|
|
1451
|
+
function preprocessTablesThreeWay(genesis, cpLatest, meCurrent, cellDiff) {
|
|
1452
|
+
const gTables = findTopLevelTables(genesis);
|
|
1453
|
+
const cTables = findTopLevelTables(cpLatest);
|
|
1454
|
+
const mTables = findTopLevelTables(meCurrent);
|
|
1455
|
+
if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null;
|
|
1456
|
+
for (const t of gTables) if (exceedsSizeLimit(t)) return null;
|
|
1457
|
+
for (const t of cTables) if (exceedsSizeLimit(t)) return null;
|
|
1458
|
+
for (const t of mTables) if (exceedsSizeLimit(t)) return null;
|
|
1459
|
+
const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent);
|
|
1460
|
+
if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) return preprocessAlignedByPosition(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix);
|
|
1461
|
+
return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix);
|
|
1462
|
+
}
|
|
1463
|
+
function preprocessAlignedByPosition(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix) {
|
|
1441
1464
|
const pairs = [];
|
|
1442
|
-
for (let i = 0; i <
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
diffed: diffTableThreeWay(
|
|
1465
|
+
for (let i = 0; i < gTables.length; i++) pairs.push({
|
|
1466
|
+
g: gTables[i],
|
|
1467
|
+
c: cTables[i],
|
|
1468
|
+
m: mTables[i],
|
|
1469
|
+
diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff)
|
|
1447
1470
|
});
|
|
1448
|
-
let
|
|
1449
|
-
let
|
|
1450
|
-
let
|
|
1471
|
+
let modifiedGenesis = genesis;
|
|
1472
|
+
let modifiedCp = cpLatest;
|
|
1473
|
+
let modifiedMe = meCurrent;
|
|
1451
1474
|
const placeholderToDiff = /* @__PURE__ */ new Map();
|
|
1452
1475
|
for (let i = pairs.length - 1; i >= 0; i--) {
|
|
1453
1476
|
const placeholder = `${placeholderPrefix}${i}-->`;
|
|
1454
1477
|
placeholderToDiff.set(placeholder, pairs[i].diffed);
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1478
|
+
modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder);
|
|
1479
|
+
modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder);
|
|
1480
|
+
modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder);
|
|
1458
1481
|
}
|
|
1459
1482
|
return {
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1483
|
+
modifiedGenesis,
|
|
1484
|
+
modifiedCp,
|
|
1485
|
+
modifiedMe,
|
|
1463
1486
|
placeholderToDiff
|
|
1464
1487
|
};
|
|
1465
1488
|
}
|
|
1466
1489
|
/**
|
|
1467
|
-
* Multi-table
|
|
1468
|
-
*
|
|
1469
|
-
* each
|
|
1470
|
-
* table
|
|
1471
|
-
* - paired-everywhere placeholders → equal in both diffs → unwrapped
|
|
1472
|
-
* - V2-only (CP-inserted + Me-rejected) → inserted by CP, deleted by
|
|
1473
|
-
* Me → reject wrapper around the table
|
|
1474
|
-
* - V2+V3 (CP-inserted, Me-kept) → ins-cp wrapper
|
|
1475
|
-
* - V1+V2 (Me-deleted) → del-me wrapper
|
|
1476
|
-
* - V1-only (CP-deleted before V2) → del-cp wrapper
|
|
1477
|
-
* - V3-only (Me-inserted) → ins-me wrapper
|
|
1490
|
+
* Multi-table handler. Tables are paired against `genesis` (the spine)
|
|
1491
|
+
* via content-LCS on each of cp and me. Placeholders are assigned so
|
|
1492
|
+
* each appears only in the inputs that actually contain the underlying
|
|
1493
|
+
* table. The word-level merger then attributes them naturally:
|
|
1478
1494
|
*
|
|
1479
|
-
*
|
|
1480
|
-
*
|
|
1481
|
-
*
|
|
1495
|
+
* - paired in genesis+cp+me → equal in both diffs → emit recursive 3-way diff
|
|
1496
|
+
* - in cp+me, not in genesis → both-agree insertion → emit plain
|
|
1497
|
+
* - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
|
|
1498
|
+
* - in me only → me insertion → ins-me wrapper
|
|
1499
|
+
* - in genesis+cp, not me → me deletion → del-me wrapper
|
|
1500
|
+
* - in genesis+me, not cp → cp deletion → del-cp wrapper
|
|
1501
|
+
* - in genesis only → both deleted, settled → silent (placeholder content empty)
|
|
1482
1502
|
*/
|
|
1483
|
-
function
|
|
1484
|
-
const
|
|
1485
|
-
const
|
|
1486
|
-
const
|
|
1487
|
-
const
|
|
1488
|
-
const
|
|
1489
|
-
const
|
|
1490
|
-
const
|
|
1491
|
-
for (const a of
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
}
|
|
1495
|
-
const
|
|
1496
|
-
const
|
|
1497
|
-
for (const a of
|
|
1498
|
-
|
|
1499
|
-
|
|
1503
|
+
function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix) {
|
|
1504
|
+
const gKeys = gTables.map((t) => tableKey(genesis, t));
|
|
1505
|
+
const cKeys = cTables.map((t) => tableKey(cpLatest, t));
|
|
1506
|
+
const mKeys = mTables.map((t) => tableKey(meCurrent, t));
|
|
1507
|
+
const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
|
|
1508
|
+
const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
|
|
1509
|
+
const gToCp = new Array(gTables.length).fill(-1);
|
|
1510
|
+
const cpToG = new Array(cTables.length).fill(-1);
|
|
1511
|
+
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
|
|
1512
|
+
gToCp[a.oldIdx] = a.newIdx;
|
|
1513
|
+
cpToG[a.newIdx] = a.oldIdx;
|
|
1514
|
+
}
|
|
1515
|
+
const gToMe = new Array(gTables.length).fill(-1);
|
|
1516
|
+
const meToG = new Array(mTables.length).fill(-1);
|
|
1517
|
+
for (const a of alignMe) if (a.oldIdx !== null && a.newIdx !== null) {
|
|
1518
|
+
gToMe[a.oldIdx] = a.newIdx;
|
|
1519
|
+
meToG[a.newIdx] = a.oldIdx;
|
|
1500
1520
|
}
|
|
1501
1521
|
let nextId = 0;
|
|
1502
1522
|
const placeholderToDiff = /* @__PURE__ */ new Map();
|
|
1503
1523
|
const placeholders = {
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1524
|
+
g: new Array(gTables.length).fill(null),
|
|
1525
|
+
c: new Array(cTables.length).fill(null),
|
|
1526
|
+
m: new Array(mTables.length).fill(null)
|
|
1507
1527
|
};
|
|
1508
1528
|
const allocate = () => `${placeholderPrefix}${nextId++}-->`;
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
const
|
|
1512
|
-
|
|
1529
|
+
const wrapWhole = (tag, author, tableHtml) => Utils_default.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author));
|
|
1530
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
1531
|
+
const cIdx = gToCp[gIdx];
|
|
1532
|
+
const mIdx = gToMe[gIdx];
|
|
1533
|
+
if (cIdx === -1 || mIdx === -1) continue;
|
|
1513
1534
|
const placeholder = allocate();
|
|
1514
|
-
placeholderToDiff.set(placeholder, diffTableThreeWay(
|
|
1515
|
-
placeholders.
|
|
1516
|
-
placeholders.
|
|
1517
|
-
placeholders.
|
|
1518
|
-
}
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
if (v3Idx === -1) continue;
|
|
1535
|
+
placeholderToDiff.set(placeholder, diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff));
|
|
1536
|
+
placeholders.g[gIdx] = placeholder;
|
|
1537
|
+
placeholders.c[cIdx] = placeholder;
|
|
1538
|
+
placeholders.m[mIdx] = placeholder;
|
|
1539
|
+
}
|
|
1540
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
1541
|
+
if (placeholders.g[gIdx] !== null) continue;
|
|
1542
|
+
const cIdx = gToCp[gIdx];
|
|
1543
|
+
if (cIdx === -1) continue;
|
|
1524
1544
|
const placeholder = allocate();
|
|
1525
|
-
placeholderToDiff.set(placeholder, wrapWhole("
|
|
1526
|
-
placeholders.
|
|
1527
|
-
placeholders.
|
|
1528
|
-
}
|
|
1529
|
-
for (let
|
|
1530
|
-
if (placeholders.
|
|
1531
|
-
const
|
|
1532
|
-
if (
|
|
1545
|
+
placeholderToDiff.set(placeholder, wrapWhole("del", "me", genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd)));
|
|
1546
|
+
placeholders.g[gIdx] = placeholder;
|
|
1547
|
+
placeholders.c[cIdx] = placeholder;
|
|
1548
|
+
}
|
|
1549
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
1550
|
+
if (placeholders.g[gIdx] !== null) continue;
|
|
1551
|
+
const mIdx = gToMe[gIdx];
|
|
1552
|
+
if (mIdx === -1) continue;
|
|
1533
1553
|
const placeholder = allocate();
|
|
1534
|
-
placeholderToDiff.set(placeholder, wrapWhole("del", "
|
|
1535
|
-
placeholders.
|
|
1536
|
-
placeholders.
|
|
1554
|
+
placeholderToDiff.set(placeholder, wrapWhole("del", "cp", genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd)));
|
|
1555
|
+
placeholders.g[gIdx] = placeholder;
|
|
1556
|
+
placeholders.m[mIdx] = placeholder;
|
|
1537
1557
|
}
|
|
1538
|
-
for (let
|
|
1539
|
-
if (placeholders.
|
|
1558
|
+
for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
|
|
1559
|
+
if (placeholders.g[gIdx] !== null) continue;
|
|
1560
|
+
const placeholder = allocate();
|
|
1561
|
+
placeholderToDiff.set(placeholder, "");
|
|
1562
|
+
placeholders.g[gIdx] = placeholder;
|
|
1563
|
+
}
|
|
1564
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
1565
|
+
if (placeholders.c[cIdx] !== null) continue;
|
|
1566
|
+
const cText = cKeys[cIdx];
|
|
1567
|
+
let mIdx = -1;
|
|
1568
|
+
for (let candidate = 0; candidate < mTables.length; candidate++) {
|
|
1569
|
+
if (placeholders.m[candidate] !== null) continue;
|
|
1570
|
+
if (meToG[candidate] !== -1) continue;
|
|
1571
|
+
if (mKeys[candidate] === cText) {
|
|
1572
|
+
mIdx = candidate;
|
|
1573
|
+
break;
|
|
1574
|
+
}
|
|
1575
|
+
}
|
|
1576
|
+
if (mIdx === -1) continue;
|
|
1540
1577
|
const placeholder = allocate();
|
|
1541
|
-
placeholderToDiff.set(placeholder,
|
|
1542
|
-
placeholders.
|
|
1578
|
+
placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd));
|
|
1579
|
+
placeholders.c[cIdx] = placeholder;
|
|
1580
|
+
placeholders.m[mIdx] = placeholder;
|
|
1543
1581
|
}
|
|
1544
|
-
for (let
|
|
1545
|
-
if (placeholders.
|
|
1582
|
+
for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
|
|
1583
|
+
if (placeholders.c[cIdx] !== null) continue;
|
|
1546
1584
|
const placeholder = allocate();
|
|
1547
|
-
placeholderToDiff.set(placeholder, wrapWhole("
|
|
1548
|
-
placeholders.
|
|
1585
|
+
placeholderToDiff.set(placeholder, wrapWhole("ins", "cp", cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd)));
|
|
1586
|
+
placeholders.c[cIdx] = placeholder;
|
|
1549
1587
|
}
|
|
1550
|
-
for (let
|
|
1551
|
-
if (placeholders.
|
|
1588
|
+
for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
|
|
1589
|
+
if (placeholders.m[mIdx] !== null) continue;
|
|
1552
1590
|
const placeholder = allocate();
|
|
1553
|
-
placeholderToDiff.set(placeholder, wrapWhole("ins", "me",
|
|
1554
|
-
placeholders.
|
|
1591
|
+
placeholderToDiff.set(placeholder, wrapWhole("ins", "me", meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd)));
|
|
1592
|
+
placeholders.m[mIdx] = placeholder;
|
|
1555
1593
|
}
|
|
1556
|
-
let
|
|
1557
|
-
for (let i =
|
|
1558
|
-
const p = placeholders.
|
|
1594
|
+
let modifiedGenesis = genesis;
|
|
1595
|
+
for (let i = gTables.length - 1; i >= 0; i--) {
|
|
1596
|
+
const p = placeholders.g[i];
|
|
1559
1597
|
if (p === null) continue;
|
|
1560
|
-
|
|
1598
|
+
modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p);
|
|
1561
1599
|
}
|
|
1562
|
-
let
|
|
1563
|
-
for (let i =
|
|
1564
|
-
const p = placeholders.
|
|
1600
|
+
let modifiedCp = cpLatest;
|
|
1601
|
+
for (let i = cTables.length - 1; i >= 0; i--) {
|
|
1602
|
+
const p = placeholders.c[i];
|
|
1565
1603
|
if (p === null) continue;
|
|
1566
|
-
|
|
1604
|
+
modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p);
|
|
1567
1605
|
}
|
|
1568
|
-
let
|
|
1569
|
-
for (let i =
|
|
1570
|
-
const p = placeholders.
|
|
1606
|
+
let modifiedMe = meCurrent;
|
|
1607
|
+
for (let i = mTables.length - 1; i >= 0; i--) {
|
|
1608
|
+
const p = placeholders.m[i];
|
|
1571
1609
|
if (p === null) continue;
|
|
1572
|
-
|
|
1610
|
+
modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p);
|
|
1573
1611
|
}
|
|
1574
1612
|
return {
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1613
|
+
modifiedGenesis,
|
|
1614
|
+
modifiedCp,
|
|
1615
|
+
modifiedMe,
|
|
1578
1616
|
placeholderToDiff
|
|
1579
1617
|
};
|
|
1580
1618
|
}
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
/**
|
|
1591
|
-
* Returns true when V1/V2/V3 tables can be 1:1 paired by position. The
|
|
1592
|
-
* three lists must have equal length AND each positional triple must
|
|
1593
|
-
* have content similar enough that positional pairing reflects the
|
|
1594
|
-
* authors' likely intent. The slow content-LCS path handles cases that
|
|
1595
|
-
* fail this gate (table reordering, additions, deletions).
|
|
1596
|
-
*/
|
|
1597
|
-
function positionallyAligned(v1, v2, v3, t1s, t2s, t3s) {
|
|
1598
|
-
if (t1s.length !== t2s.length || t2s.length !== t3s.length) return false;
|
|
1599
|
-
for (let i = 0; i < t1s.length; i++) {
|
|
1600
|
-
const k1 = tableKey(v1, t1s[i]);
|
|
1601
|
-
const k2 = tableKey(v2, t2s[i]);
|
|
1602
|
-
const k3 = tableKey(v3, t3s[i]);
|
|
1603
|
-
if (textSimilarity(k1, k2) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
|
|
1604
|
-
if (textSimilarity(k2, k3) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
|
|
1619
|
+
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .15;
|
|
1620
|
+
function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables) {
|
|
1621
|
+
if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false;
|
|
1622
|
+
for (let i = 0; i < gTables.length; i++) {
|
|
1623
|
+
const kG = tableKey(genesis, gTables[i]);
|
|
1624
|
+
const kC = tableKey(cpLatest, cTables[i]);
|
|
1625
|
+
const kM = tableKey(meCurrent, mTables[i]);
|
|
1626
|
+
if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
|
|
1627
|
+
if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
|
|
1605
1628
|
}
|
|
1606
1629
|
return true;
|
|
1607
1630
|
}
|
|
1608
1631
|
function tableKey(html, table) {
|
|
1609
1632
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
|
|
1610
1633
|
}
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1634
|
+
/**
|
|
1635
|
+
* Character-level similarity above which the three-way aligner treats
|
|
1636
|
+
* two rows / tables as "the same logical entry, edited" rather than
|
|
1637
|
+
* an unrelated delete + insert. Matched to TableDiff's
|
|
1638
|
+
* `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
|
|
1639
|
+
* agree on which pairings are reachable; if a row's content overlap
|
|
1640
|
+
* is enough to fool the 2-way diff into pairing, it should also be
|
|
1641
|
+
* enough for 3-way.
|
|
1642
|
+
*/
|
|
1643
|
+
const THREE_WAY_FUZZY_THRESHOLD = .5;
|
|
1644
|
+
/**
|
|
1645
|
+
* Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
|
|
1646
|
+
* applies after its exact-LCS, but against one side of the genesis
|
|
1647
|
+
* spine (either cp or me). The genesis tables/rows are always the
|
|
1648
|
+
* "old" side; `newTable` is the cp or me table being aligned. Returns
|
|
1649
|
+
* the enriched alignment with additional paired entries.
|
|
1650
|
+
*
|
|
1651
|
+
* Cell-count guard: only fuzzy-pair when both rows have the same cell
|
|
1652
|
+
* count. Without this guard an asymmetric restructure — e.g. CP and
|
|
1653
|
+
* Me both added a different column — leads to ONE side fuzzy-pairing
|
|
1654
|
+
* its row with genesis (content overlap above threshold) while the
|
|
1655
|
+
* other side falls below threshold. That mismatch routes through
|
|
1656
|
+
* `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
|
|
1657
|
+
* branch, which emits CP's row as a Me-attributed deletion. In
|
|
1658
|
+
* cp-only mode `stripMeAttributedMarkers` then removes the row
|
|
1659
|
+
* entirely and CP's edit vanishes from the view — exactly the
|
|
1660
|
+
* content-loss case we're meant to prevent. Restricting fuzzy
|
|
1661
|
+
* pairing to same-shape rows preserves the common case (single cell
|
|
1662
|
+
* edit, identical row shape) while pushing structural mismatches
|
|
1663
|
+
* back to the boundary-insertion path that emits both sides
|
|
1664
|
+
* explicitly.
|
|
1665
|
+
*/
|
|
1666
|
+
function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
|
|
1667
|
+
const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
|
|
1668
|
+
const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
|
|
1669
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
|
|
1670
|
+
if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
|
|
1671
|
+
return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
|
|
1672
|
+
});
|
|
1673
|
+
}
|
|
1674
|
+
/**
|
|
1675
|
+
* Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
|
|
1676
|
+
* full table HTML keys, fuzzy-pair unmatched table runs by their
|
|
1677
|
+
* row-text-concatenated content. Without this, a table whose body
|
|
1678
|
+
* was edited (but not its outer shape) fails the exact-key match
|
|
1679
|
+
* and the preprocessing emits whole-table del + whole-table ins
|
|
1680
|
+
* instead of recursing into per-cell three-way diffs.
|
|
1681
|
+
*/
|
|
1682
|
+
function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
|
|
1683
|
+
const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
|
|
1684
|
+
const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
|
|
1685
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
|
|
1686
|
+
}
|
|
1687
|
+
function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1688
|
+
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
1689
|
+
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
1614
1690
|
}
|
|
1615
|
-
function diffTablePositional(
|
|
1691
|
+
function diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1616
1692
|
const out = [];
|
|
1617
|
-
let cursor =
|
|
1618
|
-
for (let r = 0; r <
|
|
1619
|
-
const
|
|
1620
|
-
const
|
|
1621
|
-
const
|
|
1622
|
-
for (let c = 0; c <
|
|
1623
|
-
const
|
|
1624
|
-
const
|
|
1625
|
-
const
|
|
1626
|
-
out.push(
|
|
1627
|
-
out.push(cellDiff(
|
|
1628
|
-
cursor =
|
|
1693
|
+
let cursor = tG.tableStart;
|
|
1694
|
+
for (let r = 0; r < tG.rows.length; r++) {
|
|
1695
|
+
const rG = tG.rows[r];
|
|
1696
|
+
const rC = tC.rows[r];
|
|
1697
|
+
const rM = tM.rows[r];
|
|
1698
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
1699
|
+
const cG = rG.cells[c];
|
|
1700
|
+
const cC = rC.cells[c];
|
|
1701
|
+
const cM = rM.cells[c];
|
|
1702
|
+
out.push(genesis.slice(cursor, cG.contentStart));
|
|
1703
|
+
out.push(cellDiff(genesis.slice(cG.contentStart, cG.contentEnd), cpLatest.slice(cC.contentStart, cC.contentEnd), meCurrent.slice(cM.contentStart, cM.contentEnd)));
|
|
1704
|
+
cursor = cG.contentEnd;
|
|
1629
1705
|
}
|
|
1630
1706
|
}
|
|
1631
|
-
out.push(
|
|
1707
|
+
out.push(genesis.slice(cursor, tG.tableEnd));
|
|
1632
1708
|
return out.join("");
|
|
1633
1709
|
}
|
|
1634
1710
|
/**
|
|
1635
|
-
*
|
|
1636
|
-
*
|
|
1637
|
-
* 1. Run row-LCS for each pair (V1↔V2, V2↔V3) over rowKeys
|
|
1638
|
-
* 2. Build per-V2-row origin (from align1) and fate (from align2)
|
|
1639
|
-
* 3. Walk V2's row order, interleaving:
|
|
1640
|
-
* - CP-deleted V1 rows (in align1 but not preserved into V2)
|
|
1641
|
-
* - Me-inserted V3 rows (in align2 but not from V2)
|
|
1642
|
-
* 4. For each V2 row, combine origin+fate to decide:
|
|
1643
|
-
* - equal: recurse cellDiff if cell counts match, else fall back
|
|
1644
|
-
* - ins-cp: emit V2 row as fully-CP-inserted
|
|
1645
|
-
* - del-me: emit V2 row as fully-Me-deleted
|
|
1646
|
-
* - reject: emit V2 row as Me-rejects-CP
|
|
1711
|
+
* Row-level genesis-spine merge for tables with diverging row/cell
|
|
1712
|
+
* counts.
|
|
1647
1713
|
*
|
|
1648
|
-
*
|
|
1649
|
-
*
|
|
1650
|
-
*
|
|
1651
|
-
*
|
|
1652
|
-
*
|
|
1653
|
-
*
|
|
1714
|
+
* 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
|
|
1715
|
+
* (alignMe), each via row-LCS over rowKeys.
|
|
1716
|
+
* 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
|
|
1717
|
+
* Both kept → recurse cell diff (with structural-change cell handling
|
|
1718
|
+
* falling back to me-attribution Replace per the documented
|
|
1719
|
+
* limitation). One kept, other deleted → emit author-attributed full
|
|
1720
|
+
* row. Both deleted → silent.
|
|
1721
|
+
* 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
|
|
1722
|
+
* Check for content agreement at the same boundary; agreed
|
|
1723
|
+
* insertions emit plain.
|
|
1654
1724
|
*/
|
|
1655
|
-
function diffTableStructural(
|
|
1656
|
-
const
|
|
1657
|
-
const
|
|
1658
|
-
const
|
|
1659
|
-
const
|
|
1660
|
-
const
|
|
1661
|
-
const
|
|
1662
|
-
for (
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
const v2Fate = new Array(t2.rows.length);
|
|
1668
|
-
for (let i = 0; i < v2Fate.length; i++) v2Fate[i] = { kind: "me-deleted" };
|
|
1669
|
-
for (const a of align2) if (a.oldIdx !== null && a.newIdx !== null) v2Fate[a.oldIdx] = {
|
|
1670
|
-
kind: "preserved",
|
|
1671
|
-
v3Idx: a.newIdx
|
|
1672
|
-
};
|
|
1673
|
-
const cpDelRowsAt = collectCpDelRowsAtBoundary(align1, t2.rows.length);
|
|
1674
|
-
const meInsRowsAt = collectMeInsRowsAtBoundary(align2, t2.rows.length);
|
|
1725
|
+
function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1726
|
+
const gKeys = tG.rows.map((r) => rowKey(genesis, r));
|
|
1727
|
+
const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
|
|
1728
|
+
const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
|
|
1729
|
+
const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
|
|
1730
|
+
const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
|
|
1731
|
+
const gToCp = new Array(tG.rows.length).fill(-1);
|
|
1732
|
+
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
|
|
1733
|
+
const gToMe = new Array(tG.rows.length).fill(-1);
|
|
1734
|
+
for (const a of alignMe) if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx;
|
|
1735
|
+
const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length);
|
|
1736
|
+
const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length);
|
|
1675
1737
|
const out = [];
|
|
1676
|
-
out.push(tableHeaderSlice(
|
|
1677
|
-
const
|
|
1678
|
-
const
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1738
|
+
out.push(tableHeaderSlice(genesis, tG));
|
|
1739
|
+
const emitBoundaryInsertions = (b) => {
|
|
1740
|
+
const cIdxs = cpInsAt.get(b) ?? [];
|
|
1741
|
+
const mIdxs = meInsAt.get(b) ?? [];
|
|
1742
|
+
if (cIdxs.length === 0 && mIdxs.length === 0) return;
|
|
1743
|
+
const remainingMe = new Set(mIdxs);
|
|
1744
|
+
for (const cIdx of cIdxs) {
|
|
1745
|
+
const cText = cKeys[cIdx];
|
|
1746
|
+
let agreedMeIdx;
|
|
1747
|
+
for (const mIdx of remainingMe) if (mKeys[mIdx] === cText) {
|
|
1748
|
+
agreedMeIdx = mIdx;
|
|
1749
|
+
break;
|
|
1750
|
+
}
|
|
1751
|
+
if (agreedMeIdx !== void 0) {
|
|
1752
|
+
remainingMe.delete(agreedMeIdx);
|
|
1753
|
+
out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd));
|
|
1754
|
+
} else out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], "ins", "cp"));
|
|
1755
|
+
}
|
|
1756
|
+
for (const mIdx of remainingMe) out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], "ins", "me"));
|
|
1682
1757
|
};
|
|
1683
|
-
for (let
|
|
1684
|
-
|
|
1685
|
-
const
|
|
1686
|
-
const
|
|
1687
|
-
const
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
if (origin.kind === "cp-inserted" && fate.kind === "me-deleted") return emitFullRowAttributed(v2, v2Row, "del", "me", "cp");
|
|
1696
|
-
if (origin.kind === "cp-inserted") return emitFullRowAttributed(v2, v2Row, "ins", "cp");
|
|
1697
|
-
if (fate.kind === "me-deleted") return emitFullRowAttributed(v2, v2Row, "del", "me");
|
|
1698
|
-
const v1Row = t1.rows[origin.v1Idx];
|
|
1699
|
-
const v3Row = t3.rows[fate.v3Idx];
|
|
1700
|
-
if (v1Row.cells.length === v2Row.cells.length && v2Row.cells.length === v3Row.cells.length) return diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff);
|
|
1701
|
-
const out = [];
|
|
1702
|
-
out.push(emitFullRowAttributed(v2, v2Row, "del", "me"));
|
|
1703
|
-
out.push(emitFullRowAttributed(v3, v3Row, "ins", "me"));
|
|
1704
|
-
return out.join("");
|
|
1705
|
-
}
|
|
1706
|
-
function diffRowPositional(v1, v2, v3, v1Row, v2Row, v3Row, cellDiff) {
|
|
1707
|
-
const out = [];
|
|
1708
|
-
let cursor = v2Row.rowStart;
|
|
1709
|
-
for (let c = 0; c < v2Row.cells.length; c++) {
|
|
1710
|
-
const c1 = v1Row.cells[c];
|
|
1711
|
-
const c2 = v2Row.cells[c];
|
|
1712
|
-
const c3 = v3Row.cells[c];
|
|
1713
|
-
out.push(v2.slice(cursor, c2.contentStart));
|
|
1714
|
-
out.push(cellDiff(v1.slice(c1.contentStart, c1.contentEnd), v2.slice(c2.contentStart, c2.contentEnd), v3.slice(c3.contentStart, c3.contentEnd)));
|
|
1715
|
-
cursor = c2.contentEnd;
|
|
1716
|
-
}
|
|
1717
|
-
out.push(v2.slice(cursor, v2Row.rowEnd));
|
|
1758
|
+
for (let g = 0; g < tG.rows.length; g++) {
|
|
1759
|
+
emitBoundaryInsertions(g);
|
|
1760
|
+
const cIdx = gToCp[g];
|
|
1761
|
+
const mIdx = gToMe[g];
|
|
1762
|
+
const cpDel = cIdx === -1;
|
|
1763
|
+
const meDel = mIdx === -1;
|
|
1764
|
+
if (!cpDel && !meDel) out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff));
|
|
1765
|
+
else if (cpDel && meDel) {} else if (cpDel) out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], "del", "cp"));
|
|
1766
|
+
else out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], "del", "me"));
|
|
1767
|
+
}
|
|
1768
|
+
emitBoundaryInsertions(tG.rows.length);
|
|
1769
|
+
out.push(tableFooterSlice(genesis, tG));
|
|
1718
1770
|
return out.join("");
|
|
1719
1771
|
}
|
|
1720
|
-
function
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1772
|
+
function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
|
|
1773
|
+
if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
|
|
1774
|
+
const out = [];
|
|
1775
|
+
let cursor = rG.rowStart;
|
|
1776
|
+
for (let c = 0; c < rG.cells.length; c++) {
|
|
1777
|
+
const cG = rG.cells[c];
|
|
1778
|
+
const cC = rC.cells[c];
|
|
1779
|
+
const cM = rM.cells[c];
|
|
1780
|
+
out.push(genesis.slice(cursor, cG.contentStart));
|
|
1781
|
+
out.push(cellDiff(genesis.slice(cG.contentStart, cG.contentEnd), cpLatest.slice(cC.contentStart, cC.contentEnd), meCurrent.slice(cM.contentStart, cM.contentEnd)));
|
|
1782
|
+
cursor = cG.contentEnd;
|
|
1783
|
+
}
|
|
1784
|
+
out.push(genesis.slice(cursor, rG.rowEnd));
|
|
1785
|
+
return out.join("");
|
|
1786
|
+
}
|
|
1787
|
+
const cpRestructured = rC.cells.length !== rG.cells.length;
|
|
1788
|
+
const meRestructured = rM.cells.length !== rG.cells.length;
|
|
1789
|
+
const blocks = [];
|
|
1790
|
+
if (cpRestructured && meRestructured) {
|
|
1791
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1792
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1793
|
+
} else if (cpRestructured) {
|
|
1794
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "cp"));
|
|
1795
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1796
|
+
} else {
|
|
1797
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "me"));
|
|
1798
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1740
1799
|
}
|
|
1741
|
-
return
|
|
1800
|
+
return blocks.join("");
|
|
1742
1801
|
}
|
|
1743
|
-
|
|
1802
|
+
/**
|
|
1803
|
+
* Returns map "genesis-row-boundary → list of new-side row indices
|
|
1804
|
+
* inserted at that boundary". Mirrors the word-level boundary collection
|
|
1805
|
+
* but at the row scale.
|
|
1806
|
+
*/
|
|
1807
|
+
function collectInsertedRowsAtBoundary(align, genesisRowCount) {
|
|
1744
1808
|
const out = /* @__PURE__ */ new Map();
|
|
1745
|
-
let
|
|
1809
|
+
let nextGenesisBoundary = genesisRowCount;
|
|
1746
1810
|
const pending = [];
|
|
1747
1811
|
for (let i = align.length - 1; i >= 0; i--) {
|
|
1748
1812
|
const a = align[i];
|
|
1749
1813
|
if (a.oldIdx !== null) {
|
|
1750
1814
|
if (pending.length > 0) {
|
|
1751
|
-
const existing = out.get(
|
|
1815
|
+
const existing = out.get(nextGenesisBoundary) ?? [];
|
|
1752
1816
|
existing.unshift(...pending.toReversed());
|
|
1753
|
-
out.set(
|
|
1817
|
+
out.set(nextGenesisBoundary, existing);
|
|
1754
1818
|
pending.length = 0;
|
|
1755
1819
|
}
|
|
1756
|
-
|
|
1820
|
+
nextGenesisBoundary = a.oldIdx;
|
|
1757
1821
|
} else if (a.newIdx !== null) pending.push(a.newIdx);
|
|
1758
1822
|
}
|
|
1759
1823
|
if (pending.length > 0) {
|
|
1760
|
-
const existing = out.get(
|
|
1761
|
-
existing.unshift(...pending.
|
|
1762
|
-
out.set(
|
|
1824
|
+
const existing = out.get(nextGenesisBoundary) ?? [];
|
|
1825
|
+
existing.unshift(...pending.toReversed());
|
|
1826
|
+
out.set(nextGenesisBoundary, existing);
|
|
1763
1827
|
}
|
|
1764
1828
|
return out;
|
|
1765
1829
|
}
|
|
@@ -1774,42 +1838,35 @@ function tableFooterSlice(html, table) {
|
|
|
1774
1838
|
return html.slice(lastRow.rowEnd, table.tableEnd);
|
|
1775
1839
|
}
|
|
1776
1840
|
/**
|
|
1777
|
-
* Emit a row
|
|
1778
|
-
*
|
|
1779
|
-
*
|
|
1780
|
-
*
|
|
1781
|
-
* author classes/attrs.
|
|
1841
|
+
* Emit a row fully attributed to one author. Wraps `<tr>` and each
|
|
1842
|
+
* `<td>` with the author's diffins/diffdel class and `data-author`
|
|
1843
|
+
* attribute; wraps cell content with an inner `<ins>`/`<del>` matching
|
|
1844
|
+
* the word-level emission shape.
|
|
1782
1845
|
*/
|
|
1783
|
-
function emitFullRowAttributed(html, row, kind, author
|
|
1846
|
+
function emitFullRowAttributed(html, row, kind, author) {
|
|
1784
1847
|
const trOpening = parseOpeningTagAt(html, row.rowStart);
|
|
1785
|
-
if (!trOpening) return html.slice(
|
|
1786
|
-
const out = [injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author
|
|
1848
|
+
if (!trOpening) return html.slice(row.rowStart, row.rowEnd);
|
|
1849
|
+
const out = [injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)];
|
|
1787
1850
|
let cursor = trOpening.end;
|
|
1788
1851
|
for (const cell of row.cells) {
|
|
1789
1852
|
out.push(html.slice(cursor, cell.cellStart));
|
|
1790
|
-
out.push(emitFullCellAttributed(html, cell, kind, author
|
|
1853
|
+
out.push(emitFullCellAttributed(html, cell, kind, author));
|
|
1791
1854
|
cursor = cell.cellEnd;
|
|
1792
1855
|
}
|
|
1793
1856
|
out.push(html.slice(cursor, row.rowEnd));
|
|
1794
1857
|
return out.join("");
|
|
1795
1858
|
}
|
|
1796
|
-
function emitFullCellAttributed(html, cell, kind, author
|
|
1859
|
+
function emitFullCellAttributed(html, cell, kind, author) {
|
|
1797
1860
|
const tdOpening = parseOpeningTagAt(html, cell.cellStart);
|
|
1798
1861
|
if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd);
|
|
1799
|
-
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author
|
|
1862
|
+
const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author);
|
|
1800
1863
|
const innerContent = html.slice(cell.contentStart, cell.contentEnd);
|
|
1801
|
-
const innerWrapped = innerContent.trim().length === 0 ? innerContent : Utils_default.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author
|
|
1864
|
+
const innerWrapped = innerContent.trim().length === 0 ? innerContent : Utils_default.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author));
|
|
1802
1865
|
const closing = html.slice(cell.contentEnd, cell.cellEnd);
|
|
1803
1866
|
return tdWithAttrs + innerWrapped + closing;
|
|
1804
1867
|
}
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
* an `<tr>` or `<td>` already in the source HTML). Uses the same
|
|
1808
|
-
* attribution shape as `authorAttribution` + `Utils.wrapText` so the
|
|
1809
|
-
* inject-into-existing and wrap-around-text paths agree.
|
|
1810
|
-
*/
|
|
1811
|
-
function injectAuthorAttribution(openingTag, kind, author, rejectsAuthor) {
|
|
1812
|
-
const meta = authorAttribution(author, rejectsAuthor);
|
|
1868
|
+
function injectAuthorAttribution(openingTag, kind, author) {
|
|
1869
|
+
const meta = authorAttribution(author);
|
|
1813
1870
|
return injectDataAttrs(injectClass(openingTag, `diff${kind} ${meta.extraClasses}`), meta.dataAttrs ?? {});
|
|
1814
1871
|
}
|
|
1815
1872
|
function injectDataAttrs(openingTag, dataAttrs) {
|
|
@@ -2014,6 +2071,37 @@ var BlockFinder = class {
|
|
|
2014
2071
|
};
|
|
2015
2072
|
//#endregion
|
|
2016
2073
|
//#region src/HtmlDiff.ts
|
|
2074
|
+
/**
|
|
2075
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
2076
|
+
* track-changes rendering for legal-document rewrites.
|
|
2077
|
+
*
|
|
2078
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
2079
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
2080
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
2081
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
2082
|
+
* dramatically more readable for legal text.
|
|
2083
|
+
*
|
|
2084
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
2085
|
+
* Commercial One CP, May 2026):
|
|
2086
|
+
* - short edits (typo / one-word insert): output identical to
|
|
2087
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
2088
|
+
* trivially clears the bar;
|
|
2089
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
2090
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
2091
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
2092
|
+
* Word's 1+1 and a major readability win;
|
|
2093
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
2094
|
+
* formatting changes into a single block — too aggressive.
|
|
2095
|
+
*
|
|
2096
|
+
* Consumers rendering legal documents should spread this into their
|
|
2097
|
+
* options:
|
|
2098
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
2099
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
2100
|
+
*
|
|
2101
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
2102
|
+
* keep the bare default.
|
|
2103
|
+
*/
|
|
2104
|
+
const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
|
|
2017
2105
|
var HtmlDiff = class HtmlDiff {
|
|
2018
2106
|
/**
|
|
2019
2107
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -2089,6 +2177,16 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2089
2177
|
newText;
|
|
2090
2178
|
oldText;
|
|
2091
2179
|
tablePreprocessDepth = 0;
|
|
2180
|
+
/**
|
|
2181
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
2182
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
2183
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
2184
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
2185
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
2186
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
2187
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
2188
|
+
* handler for the split logic.
|
|
2189
|
+
*/
|
|
2092
2190
|
specialTagDiffStack = [];
|
|
2093
2191
|
newWords = [];
|
|
2094
2192
|
oldWords = [];
|
|
@@ -2156,8 +2254,23 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2156
2254
|
this.oldText = oldText;
|
|
2157
2255
|
this.newText = newText;
|
|
2158
2256
|
}
|
|
2159
|
-
|
|
2160
|
-
|
|
2257
|
+
/**
|
|
2258
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
2259
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
2260
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
2261
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
2262
|
+
*
|
|
2263
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
2264
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
2265
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
2266
|
+
*/
|
|
2267
|
+
static execute(oldText, newText, options = {}) {
|
|
2268
|
+
const inner = new HtmlDiff(oldText, newText);
|
|
2269
|
+
if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
|
|
2270
|
+
if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
|
|
2271
|
+
if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
|
|
2272
|
+
if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
|
|
2273
|
+
return inner.build();
|
|
2161
2274
|
}
|
|
2162
2275
|
/**
|
|
2163
2276
|
* Analyse a two-way diff and return its raw building blocks: the word
|
|
@@ -2226,40 +2339,43 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2226
2339
|
return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
|
|
2227
2340
|
}
|
|
2228
2341
|
/**
|
|
2229
|
-
* Three-way HTML diff
|
|
2230
|
-
*
|
|
2231
|
-
*
|
|
2232
|
-
*
|
|
2233
|
-
*
|
|
2234
|
-
*
|
|
2235
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
2342
|
+
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
2343
|
+
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
2344
|
+
* from Me's accumulated changes (genesis → meCurrent). Use this for
|
|
2345
|
+
* blackline UX where the negotiation has gone through multiple turns
|
|
2346
|
+
* and the reader wants to see "who proposed what" across the whole
|
|
2347
|
+
* history, not just the most recent round.
|
|
2236
2348
|
*
|
|
2237
|
-
*
|
|
2238
|
-
*
|
|
2239
|
-
*
|
|
2240
|
-
*
|
|
2241
|
-
*
|
|
2242
|
-
*
|
|
2349
|
+
* When both parties happen to have made the same change (e.g. CP
|
|
2350
|
+
* proposed a wording change in turn N, Me adopted it in turn N+1),
|
|
2351
|
+
* the change reads as "settled" and is emitted unmarked — only
|
|
2352
|
+
* disagreements and pending proposals carry author attribution.
|
|
2353
|
+
*
|
|
2354
|
+
* @param genesis the shared common ancestor (per-user — the FE
|
|
2355
|
+
* picks between V1.0 and /preview/initialAnswers
|
|
2356
|
+
* based on `prefillReceiverAnswers`)
|
|
2357
|
+
* @param cpLatest the counterparty's current published version
|
|
2358
|
+
* @param meCurrent Me's current draft (the document on screen)
|
|
2243
2359
|
*/
|
|
2244
|
-
static executeThreeWay(
|
|
2245
|
-
return HtmlDiff.executeThreeWayWithDepth(
|
|
2246
|
-
}
|
|
2247
|
-
static executeThreeWayWithDepth(
|
|
2248
|
-
const tablePreprocess = depth < HtmlDiff.MaxThreeWayDepth ? preprocessTablesThreeWay(
|
|
2249
|
-
const
|
|
2250
|
-
const
|
|
2251
|
-
const
|
|
2360
|
+
static executeThreeWay(genesis, cpLatest, meCurrent, options = {}) {
|
|
2361
|
+
return HtmlDiff.executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, 0);
|
|
2362
|
+
}
|
|
2363
|
+
static executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, depth) {
|
|
2364
|
+
const tablePreprocess = depth < HtmlDiff.MaxThreeWayDepth ? preprocessTablesThreeWay(genesis, cpLatest, meCurrent, (g, c, m) => HtmlDiff.executeThreeWayWithDepth(g, c, m, options, depth + 1)) : null;
|
|
2365
|
+
const inGenesis = tablePreprocess?.modifiedGenesis ?? genesis;
|
|
2366
|
+
const inCp = tablePreprocess?.modifiedCp ?? cpLatest;
|
|
2367
|
+
const inMe = tablePreprocess?.modifiedMe ?? meCurrent;
|
|
2252
2368
|
const analyzeOpts = {
|
|
2253
|
-
useProjections: options.useProjections ?? (HtmlDiff.evaluateProjectionApplicability(
|
|
2369
|
+
useProjections: options.useProjections ?? (HtmlDiff.evaluateProjectionApplicability(inGenesis, inCp) && HtmlDiff.evaluateProjectionApplicability(inGenesis, inMe)),
|
|
2254
2370
|
blockExpressions: options.blockExpressions,
|
|
2255
2371
|
repeatingWordsAccuracy: options.repeatingWordsAccuracy,
|
|
2256
2372
|
orphanMatchThreshold: options.orphanMatchThreshold,
|
|
2257
2373
|
ignoreWhitespaceDifferences: options.ignoreWhitespaceDifferences
|
|
2258
2374
|
};
|
|
2259
|
-
const
|
|
2260
|
-
const
|
|
2261
|
-
if (
|
|
2262
|
-
const segments = buildSegments(
|
|
2375
|
+
const dCp = HtmlDiff.analyze(inGenesis, inCp, analyzeOpts);
|
|
2376
|
+
const dMe = HtmlDiff.analyze(inGenesis, inMe, analyzeOpts);
|
|
2377
|
+
if (dCp.oldDiffWords.length !== dMe.oldDiffWords.length) throw new Error(`HtmlDiff.executeThreeWay: genesis tokenisation diverged across pair-wise analyses (${dCp.oldDiffWords.length} vs ${dMe.oldDiffWords.length}). This indicates the symmetric-projection coordination has a bug.`);
|
|
2378
|
+
const segments = buildSegments(dCp, dMe);
|
|
2263
2379
|
const merged = HtmlDiff.emitSegments(segments);
|
|
2264
2380
|
return tablePreprocess ? restoreTablePlaceholders(merged, tablePreprocess.placeholderToDiff) : merged;
|
|
2265
2381
|
}
|
|
@@ -2269,6 +2385,25 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2269
2385
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
2270
2386
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
2271
2387
|
* opened in one segment and closed in another stays balanced.
|
|
2388
|
+
*
|
|
2389
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
2390
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
2391
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
2392
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
2393
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
2394
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
2395
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
2396
|
+
* at the end of emission.
|
|
2397
|
+
*
|
|
2398
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
2399
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
2400
|
+
* postInject regardless of whether the outer segment is ins or
|
|
2401
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
2402
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
2403
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
2404
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
2405
|
+
* output is usually acceptable — but the warning IS the signal
|
|
2406
|
+
* that the input had a real imbalance worth investigating.
|
|
2272
2407
|
*/
|
|
2273
2408
|
static emitSegments(segments) {
|
|
2274
2409
|
const emitter = new HtmlDiff("", "");
|
|
@@ -2280,7 +2415,13 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2280
2415
|
const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
|
|
2281
2416
|
emitter.insertTag(tag, baseClass, [...seg.words], metadata);
|
|
2282
2417
|
}
|
|
2283
|
-
if (emitter.specialTagDiffStack.length > 0)
|
|
2418
|
+
if (emitter.specialTagDiffStack.length > 0) {
|
|
2419
|
+
console.warn(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting wrap(s) on the stack. Closing defensively. This usually means a formatting tag opens in a del/ins segment and its matching closer is in an equal segment.`);
|
|
2420
|
+
while (emitter.specialTagDiffStack.length > 0) {
|
|
2421
|
+
emitter.content.push("</ins>");
|
|
2422
|
+
emitter.specialTagDiffStack.pop();
|
|
2423
|
+
}
|
|
2424
|
+
}
|
|
2284
2425
|
return emitter.content.join("");
|
|
2285
2426
|
}
|
|
2286
2427
|
/**
|
|
@@ -2536,38 +2677,52 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2536
2677
|
if (words.length === 0) break;
|
|
2537
2678
|
const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
|
|
2538
2679
|
const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
|
|
2539
|
-
let
|
|
2540
|
-
let
|
|
2680
|
+
let preInject = "";
|
|
2681
|
+
let postInject = "";
|
|
2541
2682
|
if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
|
|
2542
2683
|
const tagNames = /* @__PURE__ */ new Set();
|
|
2543
2684
|
for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
|
|
2544
2685
|
const styledTagNames = Array.from(tagNames).join(" ");
|
|
2545
|
-
|
|
2546
|
-
|
|
2686
|
+
const styledCssClass = `mod ${styledTagNames}`;
|
|
2687
|
+
this.specialTagDiffStack.push({
|
|
2688
|
+
tag: words[0],
|
|
2689
|
+
styledTagNames,
|
|
2690
|
+
cssClass: styledCssClass,
|
|
2691
|
+
metadata
|
|
2692
|
+
});
|
|
2693
|
+
postInject = `<ins${Utils_default.composeTagAttributes(styledCssClass, metadata ?? {})}>`;
|
|
2547
2694
|
if (tag === HtmlDiff.DelTag) {
|
|
2548
2695
|
words.shift();
|
|
2549
2696
|
while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
|
|
2550
2697
|
}
|
|
2551
2698
|
} else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
|
|
2552
|
-
const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
|
|
2553
2699
|
let tagIndexToCompare = indexLastTagInFirstTagBlock;
|
|
2554
2700
|
if (tag === HtmlDiff.DelTag && indexOfFirstNonTag === -1) {
|
|
2555
2701
|
if (words.slice(0, indexLastTagInFirstTagBlock + 1).some((w) => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))) tagIndexToCompare = 0;
|
|
2556
2702
|
}
|
|
2557
|
-
const
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2703
|
+
const closingTagName = Utils_default.getTagName(words[tagIndexToCompare]);
|
|
2704
|
+
let matchIdx = -1;
|
|
2705
|
+
for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) if (Utils_default.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
|
|
2706
|
+
matchIdx = i;
|
|
2707
|
+
break;
|
|
2708
|
+
}
|
|
2709
|
+
if (matchIdx >= 0) {
|
|
2710
|
+
const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1);
|
|
2711
|
+
this.specialTagDiffStack.pop();
|
|
2712
|
+
preInject = "</ins>".repeat(aboveEntries.length + 1);
|
|
2713
|
+
for (const entry of aboveEntries) {
|
|
2714
|
+
postInject += `<ins${Utils_default.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`;
|
|
2715
|
+
this.specialTagDiffStack.push(entry);
|
|
2716
|
+
}
|
|
2717
|
+
}
|
|
2562
2718
|
if (tag === HtmlDiff.DelTag) {
|
|
2563
2719
|
words.shift();
|
|
2564
2720
|
while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) words.shift();
|
|
2565
2721
|
}
|
|
2566
2722
|
}
|
|
2567
|
-
if (words.length === 0 &&
|
|
2723
|
+
if (words.length === 0 && preInject.length === 0 && postInject.length === 0) break;
|
|
2568
2724
|
const isTagForExtraction = tag === HtmlDiff.DelTag ? (x) => Utils_default.isTag(x) && !HtmlDiff.SpecialCaseOpeningTagRegex.test(x) && !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase()) : Utils_default.isTag;
|
|
2569
|
-
|
|
2570
|
-
else this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join("") + specialCaseTagInjection);
|
|
2725
|
+
this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join("") + postInject);
|
|
2571
2726
|
if (words.length === 0) continue;
|
|
2572
2727
|
this.insertTag(tag, cssClass, words, metadata);
|
|
2573
2728
|
break;
|
|
@@ -2632,6 +2787,17 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2632
2787
|
curr = next;
|
|
2633
2788
|
continue;
|
|
2634
2789
|
}
|
|
2790
|
+
let allTags = true;
|
|
2791
|
+
for (let i = curr.startInNew; i < curr.endInNew; i++) if (!Utils_default.isTag(wordsForDiffNew[i])) {
|
|
2792
|
+
allTags = false;
|
|
2793
|
+
break;
|
|
2794
|
+
}
|
|
2795
|
+
if (allTags) {
|
|
2796
|
+
yield curr;
|
|
2797
|
+
prev = curr;
|
|
2798
|
+
curr = next;
|
|
2799
|
+
continue;
|
|
2800
|
+
}
|
|
2635
2801
|
let oldDistanceInChars = 0;
|
|
2636
2802
|
for (let i = prev.endInOld; i < next.startInOld; i++) oldDistanceInChars += wordsForDiffOld[i].length;
|
|
2637
2803
|
let newDistanceInChars = 0;
|
|
@@ -2674,6 +2840,6 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2674
2840
|
}
|
|
2675
2841
|
};
|
|
2676
2842
|
//#endregion
|
|
2677
|
-
export { HtmlDiff as default };
|
|
2843
|
+
export { WORD_ALIGNED_OPTIONS, HtmlDiff as default };
|
|
2678
2844
|
|
|
2679
2845
|
//# sourceMappingURL=HtmlDiff.mjs.map
|