@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +249 -52
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +72 -18
- package/dist/HtmlDiff.d.mts +72 -18
- package/dist/HtmlDiff.mjs +244 -52
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +172 -48
- package/src/ThreeWayDiff.ts +58 -11
- package/src/ThreeWayTable.ts +143 -9
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +232 -6
- package/test/HtmlDiff.threeWay.tables.spec.ts +111 -1
- package/test/Utils.spec.ts +3 -3
package/dist/HtmlDiff.cjs
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
Object.defineProperties(exports, {
|
|
2
|
+
__esModule: { value: true },
|
|
3
|
+
[Symbol.toStringTag]: { value: "Module" }
|
|
4
|
+
});
|
|
1
5
|
//#region src/Match.ts
|
|
2
6
|
var Match = class {
|
|
3
7
|
_startInOld;
|
|
@@ -1333,13 +1337,33 @@ function collectInsertionsKeyedByEnd(d) {
|
|
|
1333
1337
|
return out;
|
|
1334
1338
|
}
|
|
1335
1339
|
/**
|
|
1336
|
-
* Emit any insertions at boundary `b`.
|
|
1337
|
-
* the same boundary AND the inserted token sequences are textually
|
|
1338
|
-
* identical, the insertion is treated as agreed and emitted unmarked.
|
|
1339
|
-
* Otherwise each side's insertion is emitted with author attribution.
|
|
1340
|
+
* Emit any insertions at boundary `b`.
|
|
1340
1341
|
*
|
|
1341
|
-
*
|
|
1342
|
-
*
|
|
1342
|
+
* Reading model: a legal reviewer wants to see CP's INTENT relative
|
|
1343
|
+
* to Me's current content. Me's content is the base; CP's deltas are
|
|
1344
|
+
* what they need to act on. Under that framing:
|
|
1345
|
+
* - tokens both authors inserted at the same boundary → settled
|
|
1346
|
+
* - tokens CP inserted that Me doesn't have → ins-cp (CP wants
|
|
1347
|
+
* this added)
|
|
1348
|
+
* - tokens Me inserted that CP doesn't have → del-cp (CP wants
|
|
1349
|
+
* this removed from Me's content)
|
|
1350
|
+
*
|
|
1351
|
+
* The third case is the load-bearing attribution flip. The
|
|
1352
|
+
* genesis-spine view technically labels me-only-at-boundary tokens
|
|
1353
|
+
* as "ins-me" (Me added them; CP didn't), but that's confusing to
|
|
1354
|
+
* a reviewer: they see "Me added X" alongside "CP added Y" and have
|
|
1355
|
+
* to mentally derive "CP wants X gone, replaced with Y". Surfacing
|
|
1356
|
+
* me-only tokens as `del-cp` shows CP's intent directly:
|
|
1357
|
+
* - "CP accepted Me's text minus `things`": settled bulk + del-cp
|
|
1358
|
+
* `things` (no parallel redundant insertions)
|
|
1359
|
+
* - "CP wants `cruel` where Me wrote `brave`": ins-cp `cruel` +
|
|
1360
|
+
* del-cp `brave` (the substitution intent reads directly)
|
|
1361
|
+
* - "CP added extra words": cp-extras stay as ins-cp (same as
|
|
1362
|
+
* before; the cp-only direction was always intent-correct)
|
|
1363
|
+
*
|
|
1364
|
+
* Pure single-side insertions (Me added text CP doesn't engage
|
|
1365
|
+
* with at all, or vice versa) keep their genesis-spine attribution
|
|
1366
|
+
* — these aren't refinement cases, just Me's own content additions.
|
|
1343
1367
|
*/
|
|
1344
1368
|
function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments) {
|
|
1345
1369
|
const cpIns = cpInsAt.get(b);
|
|
@@ -1347,18 +1371,34 @@ function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments)
|
|
|
1347
1371
|
const hasCp = !!cpIns && cpIns.length > 0;
|
|
1348
1372
|
const hasMe = !!meIns && meIns.length > 0;
|
|
1349
1373
|
if (!hasCp && !hasMe) return;
|
|
1350
|
-
if (hasCp
|
|
1374
|
+
if (!hasCp) {
|
|
1375
|
+
appendSegment(segments, {
|
|
1376
|
+
kind: "ins",
|
|
1377
|
+
author: "me"
|
|
1378
|
+
}, meIns);
|
|
1379
|
+
return;
|
|
1380
|
+
}
|
|
1381
|
+
if (!hasMe) {
|
|
1382
|
+
appendSegment(segments, {
|
|
1383
|
+
kind: "ins",
|
|
1384
|
+
author: "cp"
|
|
1385
|
+
}, cpIns);
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
1388
|
+
if (tokenArraysEqual(cpIns, meIns)) {
|
|
1351
1389
|
appendSegment(segments, { kind: "equal" }, cpIns);
|
|
1352
1390
|
return;
|
|
1353
1391
|
}
|
|
1354
|
-
|
|
1392
|
+
const alignment = lcsAlign(cpIns, meIns);
|
|
1393
|
+
for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) appendSegment(segments, { kind: "equal" }, [cpIns[a.oldIdx]]);
|
|
1394
|
+
else if (a.oldIdx !== null) appendSegment(segments, {
|
|
1355
1395
|
kind: "ins",
|
|
1356
1396
|
author: "cp"
|
|
1357
|
-
}, cpIns);
|
|
1358
|
-
if (
|
|
1359
|
-
kind: "
|
|
1360
|
-
author: "
|
|
1361
|
-
}, meIns);
|
|
1397
|
+
}, [cpIns[a.oldIdx]]);
|
|
1398
|
+
else if (a.newIdx !== null) appendSegment(segments, {
|
|
1399
|
+
kind: "del",
|
|
1400
|
+
author: "cp"
|
|
1401
|
+
}, [meIns[a.newIdx]]);
|
|
1362
1402
|
}
|
|
1363
1403
|
function tokenArraysEqual(a, b) {
|
|
1364
1404
|
if (a.length !== b.length) return false;
|
|
@@ -1468,8 +1508,8 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1468
1508
|
const gKeys = gTables.map((t) => tableKey(genesis, t));
|
|
1469
1509
|
const cKeys = cTables.map((t) => tableKey(cpLatest, t));
|
|
1470
1510
|
const mKeys = mTables.map((t) => tableKey(meCurrent, t));
|
|
1471
|
-
const alignCp = lcsAlign(gKeys, cKeys);
|
|
1472
|
-
const alignMe = lcsAlign(gKeys, mKeys);
|
|
1511
|
+
const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
|
|
1512
|
+
const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
|
|
1473
1513
|
const gToCp = new Array(gTables.length).fill(-1);
|
|
1474
1514
|
const cpToG = new Array(cTables.length).fill(-1);
|
|
1475
1515
|
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
|
|
@@ -1580,7 +1620,7 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1580
1620
|
placeholderToDiff
|
|
1581
1621
|
};
|
|
1582
1622
|
}
|
|
1583
|
-
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .
|
|
1623
|
+
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .15;
|
|
1584
1624
|
function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables) {
|
|
1585
1625
|
if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false;
|
|
1586
1626
|
for (let i = 0; i < gTables.length; i++) {
|
|
@@ -1595,6 +1635,59 @@ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1595
1635
|
function tableKey(html, table) {
|
|
1596
1636
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
|
|
1597
1637
|
}
|
|
1638
|
+
/**
|
|
1639
|
+
* Character-level similarity above which the three-way aligner treats
|
|
1640
|
+
* two rows / tables as "the same logical entry, edited" rather than
|
|
1641
|
+
* an unrelated delete + insert. Matched to TableDiff's
|
|
1642
|
+
* `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
|
|
1643
|
+
* agree on which pairings are reachable; if a row's content overlap
|
|
1644
|
+
* is enough to fool the 2-way diff into pairing, it should also be
|
|
1645
|
+
* enough for 3-way.
|
|
1646
|
+
*/
|
|
1647
|
+
const THREE_WAY_FUZZY_THRESHOLD = .5;
|
|
1648
|
+
/**
|
|
1649
|
+
* Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
|
|
1650
|
+
* applies after its exact-LCS, but against one side of the genesis
|
|
1651
|
+
* spine (either cp or me). The genesis tables/rows are always the
|
|
1652
|
+
* "old" side; `newTable` is the cp or me table being aligned. Returns
|
|
1653
|
+
* the enriched alignment with additional paired entries.
|
|
1654
|
+
*
|
|
1655
|
+
* Cell-count guard: only fuzzy-pair when both rows have the same cell
|
|
1656
|
+
* count. Without this guard an asymmetric restructure — e.g. CP and
|
|
1657
|
+
* Me both added a different column — leads to ONE side fuzzy-pairing
|
|
1658
|
+
* its row with genesis (content overlap above threshold) while the
|
|
1659
|
+
* other side falls below threshold. That mismatch routes through
|
|
1660
|
+
* `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
|
|
1661
|
+
* branch, which emits CP's row as a Me-attributed deletion. In
|
|
1662
|
+
* cp-only mode `stripMeAttributedMarkers` then removes the row
|
|
1663
|
+
* entirely and CP's edit vanishes from the view — exactly the
|
|
1664
|
+
* content-loss case we're meant to prevent. Restricting fuzzy
|
|
1665
|
+
* pairing to same-shape rows preserves the common case (single cell
|
|
1666
|
+
* edit, identical row shape) while pushing structural mismatches
|
|
1667
|
+
* back to the boundary-insertion path that emits both sides
|
|
1668
|
+
* explicitly.
|
|
1669
|
+
*/
|
|
1670
|
+
function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
|
|
1671
|
+
const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
|
|
1672
|
+
const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
|
|
1673
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
|
|
1674
|
+
if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
|
|
1675
|
+
return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
|
|
1676
|
+
});
|
|
1677
|
+
}
|
|
1678
|
+
/**
|
|
1679
|
+
* Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
|
|
1680
|
+
* full table HTML keys, fuzzy-pair unmatched table runs by their
|
|
1681
|
+
* row-text-concatenated content. Without this, a table whose body
|
|
1682
|
+
* was edited (but not its outer shape) fails the exact-key match
|
|
1683
|
+
* and the preprocessing emits whole-table del + whole-table ins
|
|
1684
|
+
* instead of recursing into per-cell three-way diffs.
|
|
1685
|
+
*/
|
|
1686
|
+
function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
|
|
1687
|
+
const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
|
|
1688
|
+
const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
|
|
1689
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
|
|
1690
|
+
}
|
|
1598
1691
|
function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1599
1692
|
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
1600
1693
|
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
@@ -1637,8 +1730,8 @@ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
|
1637
1730
|
const gKeys = tG.rows.map((r) => rowKey(genesis, r));
|
|
1638
1731
|
const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
|
|
1639
1732
|
const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
|
|
1640
|
-
const alignCp = lcsAlign(gKeys, cKeys);
|
|
1641
|
-
const alignMe = lcsAlign(gKeys, mKeys);
|
|
1733
|
+
const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
|
|
1734
|
+
const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
|
|
1642
1735
|
const gToCp = new Array(tG.rows.length).fill(-1);
|
|
1643
1736
|
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
|
|
1644
1737
|
const gToMe = new Array(tG.rows.length).fill(-1);
|
|
@@ -1695,7 +1788,20 @@ function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
|
|
|
1695
1788
|
out.push(genesis.slice(cursor, rG.rowEnd));
|
|
1696
1789
|
return out.join("");
|
|
1697
1790
|
}
|
|
1698
|
-
|
|
1791
|
+
const cpRestructured = rC.cells.length !== rG.cells.length;
|
|
1792
|
+
const meRestructured = rM.cells.length !== rG.cells.length;
|
|
1793
|
+
const blocks = [];
|
|
1794
|
+
if (cpRestructured && meRestructured) {
|
|
1795
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1796
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1797
|
+
} else if (cpRestructured) {
|
|
1798
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "cp"));
|
|
1799
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1800
|
+
} else {
|
|
1801
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "me"));
|
|
1802
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1803
|
+
}
|
|
1804
|
+
return blocks.join("");
|
|
1699
1805
|
}
|
|
1700
1806
|
/**
|
|
1701
1807
|
* Returns map "genesis-row-boundary → list of new-side row indices
|
|
@@ -1969,6 +2075,37 @@ var BlockFinder = class {
|
|
|
1969
2075
|
};
|
|
1970
2076
|
//#endregion
|
|
1971
2077
|
//#region src/HtmlDiff.ts
|
|
2078
|
+
/**
|
|
2079
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
2080
|
+
* track-changes rendering for legal-document rewrites.
|
|
2081
|
+
*
|
|
2082
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
2083
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
2084
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
2085
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
2086
|
+
* dramatically more readable for legal text.
|
|
2087
|
+
*
|
|
2088
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
2089
|
+
* Commercial One CP, May 2026):
|
|
2090
|
+
* - short edits (typo / one-word insert): output identical to
|
|
2091
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
2092
|
+
* trivially clears the bar;
|
|
2093
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
2094
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
2095
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
2096
|
+
* Word's 1+1 and a major readability win;
|
|
2097
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
2098
|
+
* formatting changes into a single block — too aggressive.
|
|
2099
|
+
*
|
|
2100
|
+
* Consumers rendering legal documents should spread this into their
|
|
2101
|
+
* options:
|
|
2102
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
2103
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
2104
|
+
*
|
|
2105
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
2106
|
+
* keep the bare default.
|
|
2107
|
+
*/
|
|
2108
|
+
const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
|
|
1972
2109
|
var HtmlDiff = class HtmlDiff {
|
|
1973
2110
|
/**
|
|
1974
2111
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -2044,6 +2181,16 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2044
2181
|
newText;
|
|
2045
2182
|
oldText;
|
|
2046
2183
|
tablePreprocessDepth = 0;
|
|
2184
|
+
/**
|
|
2185
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
2186
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
2187
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
2188
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
2189
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
2190
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
2191
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
2192
|
+
* handler for the split logic.
|
|
2193
|
+
*/
|
|
2047
2194
|
specialTagDiffStack = [];
|
|
2048
2195
|
newWords = [];
|
|
2049
2196
|
oldWords = [];
|
|
@@ -2111,8 +2258,23 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2111
2258
|
this.oldText = oldText;
|
|
2112
2259
|
this.newText = newText;
|
|
2113
2260
|
}
|
|
2114
|
-
|
|
2115
|
-
|
|
2261
|
+
/**
|
|
2262
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
2263
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
2264
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
2265
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
2266
|
+
*
|
|
2267
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
2268
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
2269
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
2270
|
+
*/
|
|
2271
|
+
static execute(oldText, newText, options = {}) {
|
|
2272
|
+
const inner = new HtmlDiff(oldText, newText);
|
|
2273
|
+
if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
|
|
2274
|
+
if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
|
|
2275
|
+
if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
|
|
2276
|
+
if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
|
|
2277
|
+
return inner.build();
|
|
2116
2278
|
}
|
|
2117
2279
|
/**
|
|
2118
2280
|
* Analyse a two-way diff and return its raw building blocks: the word
|
|
@@ -2181,22 +2343,6 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2181
2343
|
return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
|
|
2182
2344
|
}
|
|
2183
2345
|
/**
|
|
2184
|
-
* Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
|
|
2185
|
-
* version CP sent back), and V3 (Me's current draft), produces a
|
|
2186
|
-
* single attributed HTML output where CP's and Me's changes are
|
|
2187
|
-
* distinguished by `data-author` ('cp' or 'me') and matching
|
|
2188
|
-
* `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
|
|
2189
|
-
* CP's proposal" case (Me deleted text CP had inserted) gets a
|
|
2190
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
2191
|
-
*
|
|
2192
|
-
* Coordinates the symmetric-projection decision (D1) across both
|
|
2193
|
-
* internal `analyze` calls so V2 tokenises identically on each side
|
|
2194
|
-
* of the spine. When `useProjections` is left undefined, the decision
|
|
2195
|
-
* is the conjunction of both pair-wise heuristics — project iff both
|
|
2196
|
-
* pairs would project on their own. Pass an explicit boolean to
|
|
2197
|
-
* override.
|
|
2198
|
-
*/
|
|
2199
|
-
/**
|
|
2200
2346
|
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
2201
2347
|
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
2202
2348
|
* from Me's accumulated changes (genesis → meCurrent). Use this for
|
|
@@ -2243,6 +2389,25 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2243
2389
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
2244
2390
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
2245
2391
|
* opened in one segment and closed in another stays balanced.
|
|
2392
|
+
*
|
|
2393
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
2394
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
2395
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
2396
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
2397
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
2398
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
2399
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
2400
|
+
* at the end of emission.
|
|
2401
|
+
*
|
|
2402
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
2403
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
2404
|
+
* postInject regardless of whether the outer segment is ins or
|
|
2405
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
2406
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
2407
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
2408
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
2409
|
+
* output is usually acceptable — but the warning IS the signal
|
|
2410
|
+
* that the input had a real imbalance worth investigating.
|
|
2246
2411
|
*/
|
|
2247
2412
|
static emitSegments(segments) {
|
|
2248
2413
|
const emitter = new HtmlDiff("", "");
|
|
@@ -2254,7 +2419,13 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2254
2419
|
const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
|
|
2255
2420
|
emitter.insertTag(tag, baseClass, [...seg.words], metadata);
|
|
2256
2421
|
}
|
|
2257
|
-
if (emitter.specialTagDiffStack.length > 0)
|
|
2422
|
+
if (emitter.specialTagDiffStack.length > 0) {
|
|
2423
|
+
console.warn(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting wrap(s) on the stack. Closing defensively. This usually means a formatting tag opens in a del/ins segment and its matching closer is in an equal segment.`);
|
|
2424
|
+
while (emitter.specialTagDiffStack.length > 0) {
|
|
2425
|
+
emitter.content.push("</ins>");
|
|
2426
|
+
emitter.specialTagDiffStack.pop();
|
|
2427
|
+
}
|
|
2428
|
+
}
|
|
2258
2429
|
return emitter.content.join("");
|
|
2259
2430
|
}
|
|
2260
2431
|
/**
|
|
@@ -2510,38 +2681,52 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2510
2681
|
if (words.length === 0) break;
|
|
2511
2682
|
const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
|
|
2512
2683
|
const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
|
|
2513
|
-
let
|
|
2514
|
-
let
|
|
2684
|
+
let preInject = "";
|
|
2685
|
+
let postInject = "";
|
|
2515
2686
|
if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
|
|
2516
2687
|
const tagNames = /* @__PURE__ */ new Set();
|
|
2517
2688
|
for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
|
|
2518
2689
|
const styledTagNames = Array.from(tagNames).join(" ");
|
|
2519
|
-
|
|
2520
|
-
|
|
2690
|
+
const styledCssClass = `mod ${styledTagNames}`;
|
|
2691
|
+
this.specialTagDiffStack.push({
|
|
2692
|
+
tag: words[0],
|
|
2693
|
+
styledTagNames,
|
|
2694
|
+
cssClass: styledCssClass,
|
|
2695
|
+
metadata
|
|
2696
|
+
});
|
|
2697
|
+
postInject = `<ins${Utils_default.composeTagAttributes(styledCssClass, metadata ?? {})}>`;
|
|
2521
2698
|
if (tag === HtmlDiff.DelTag) {
|
|
2522
2699
|
words.shift();
|
|
2523
2700
|
while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
|
|
2524
2701
|
}
|
|
2525
2702
|
} else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
|
|
2526
|
-
const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
|
|
2527
2703
|
let tagIndexToCompare = indexLastTagInFirstTagBlock;
|
|
2528
2704
|
if (tag === HtmlDiff.DelTag && indexOfFirstNonTag === -1) {
|
|
2529
2705
|
if (words.slice(0, indexLastTagInFirstTagBlock + 1).some((w) => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))) tagIndexToCompare = 0;
|
|
2530
2706
|
}
|
|
2531
|
-
const
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2707
|
+
const closingTagName = Utils_default.getTagName(words[tagIndexToCompare]);
|
|
2708
|
+
let matchIdx = -1;
|
|
2709
|
+
for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) if (Utils_default.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
|
|
2710
|
+
matchIdx = i;
|
|
2711
|
+
break;
|
|
2712
|
+
}
|
|
2713
|
+
if (matchIdx >= 0) {
|
|
2714
|
+
const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1);
|
|
2715
|
+
this.specialTagDiffStack.pop();
|
|
2716
|
+
preInject = "</ins>".repeat(aboveEntries.length + 1);
|
|
2717
|
+
for (const entry of aboveEntries) {
|
|
2718
|
+
postInject += `<ins${Utils_default.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`;
|
|
2719
|
+
this.specialTagDiffStack.push(entry);
|
|
2720
|
+
}
|
|
2721
|
+
}
|
|
2536
2722
|
if (tag === HtmlDiff.DelTag) {
|
|
2537
2723
|
words.shift();
|
|
2538
2724
|
while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) words.shift();
|
|
2539
2725
|
}
|
|
2540
2726
|
}
|
|
2541
|
-
if (words.length === 0 &&
|
|
2727
|
+
if (words.length === 0 && preInject.length === 0 && postInject.length === 0) break;
|
|
2542
2728
|
const isTagForExtraction = tag === HtmlDiff.DelTag ? (x) => Utils_default.isTag(x) && !HtmlDiff.SpecialCaseOpeningTagRegex.test(x) && !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase()) : Utils_default.isTag;
|
|
2543
|
-
|
|
2544
|
-
else this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join("") + specialCaseTagInjection);
|
|
2729
|
+
this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join("") + postInject);
|
|
2545
2730
|
if (words.length === 0) continue;
|
|
2546
2731
|
this.insertTag(tag, cssClass, words, metadata);
|
|
2547
2732
|
break;
|
|
@@ -2606,6 +2791,17 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2606
2791
|
curr = next;
|
|
2607
2792
|
continue;
|
|
2608
2793
|
}
|
|
2794
|
+
let allTags = true;
|
|
2795
|
+
for (let i = curr.startInNew; i < curr.endInNew; i++) if (!Utils_default.isTag(wordsForDiffNew[i])) {
|
|
2796
|
+
allTags = false;
|
|
2797
|
+
break;
|
|
2798
|
+
}
|
|
2799
|
+
if (allTags) {
|
|
2800
|
+
yield curr;
|
|
2801
|
+
prev = curr;
|
|
2802
|
+
curr = next;
|
|
2803
|
+
continue;
|
|
2804
|
+
}
|
|
2609
2805
|
let oldDistanceInChars = 0;
|
|
2610
2806
|
for (let i = prev.endInOld; i < next.startInOld; i++) oldDistanceInChars += wordsForDiffOld[i].length;
|
|
2611
2807
|
let newDistanceInChars = 0;
|
|
@@ -2648,6 +2844,7 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2648
2844
|
}
|
|
2649
2845
|
};
|
|
2650
2846
|
//#endregion
|
|
2651
|
-
|
|
2847
|
+
exports.WORD_ALIGNED_OPTIONS = WORD_ALIGNED_OPTIONS;
|
|
2848
|
+
exports.default = HtmlDiff;
|
|
2652
2849
|
|
|
2653
2850
|
//# sourceMappingURL=HtmlDiff.cjs.map
|