@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +249 -52
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +72 -18
- package/dist/HtmlDiff.d.mts +72 -18
- package/dist/HtmlDiff.mjs +244 -52
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +172 -48
- package/src/ThreeWayDiff.ts +58 -11
- package/src/ThreeWayTable.ts +143 -9
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +232 -6
- package/test/HtmlDiff.threeWay.tables.spec.ts +111 -1
- package/test/Utils.spec.ts +3 -3
package/dist/HtmlDiff.mjs
CHANGED
|
@@ -1333,13 +1333,33 @@ function collectInsertionsKeyedByEnd(d) {
|
|
|
1333
1333
|
return out;
|
|
1334
1334
|
}
|
|
1335
1335
|
/**
|
|
1336
|
-
* Emit any insertions at boundary `b`.
|
|
1337
|
-
* the same boundary AND the inserted token sequences are textually
|
|
1338
|
-
* identical, the insertion is treated as agreed and emitted unmarked.
|
|
1339
|
-
* Otherwise each side's insertion is emitted with author attribution.
|
|
1336
|
+
* Emit any insertions at boundary `b`.
|
|
1340
1337
|
*
|
|
1341
|
-
*
|
|
1342
|
-
*
|
|
1338
|
+
* Reading model: a legal reviewer wants to see CP's INTENT relative
|
|
1339
|
+
* to Me's current content. Me's content is the base; CP's deltas are
|
|
1340
|
+
* what they need to act on. Under that framing:
|
|
1341
|
+
* - tokens both authors inserted at the same boundary → settled
|
|
1342
|
+
* - tokens CP inserted that Me doesn't have → ins-cp (CP wants
|
|
1343
|
+
* this added)
|
|
1344
|
+
* - tokens Me inserted that CP doesn't have → del-cp (CP wants
|
|
1345
|
+
* this removed from Me's content)
|
|
1346
|
+
*
|
|
1347
|
+
* The third case is the load-bearing attribution flip. The
|
|
1348
|
+
* genesis-spine view technically labels me-only-at-boundary tokens
|
|
1349
|
+
* as "ins-me" (Me added them; CP didn't), but that's confusing to
|
|
1350
|
+
* a reviewer: they see "Me added X" alongside "CP added Y" and have
|
|
1351
|
+
* to mentally derive "CP wants X gone, replaced with Y". Surfacing
|
|
1352
|
+
* me-only tokens as `del-cp` shows CP's intent directly:
|
|
1353
|
+
* - "CP accepted Me's text minus `things`": settled bulk + del-cp
|
|
1354
|
+
* `things` (no parallel redundant insertions)
|
|
1355
|
+
* - "CP wants `cruel` where Me wrote `brave`": ins-cp `cruel` +
|
|
1356
|
+
* del-cp `brave` (the substitution intent reads directly)
|
|
1357
|
+
* - "CP added extra words": cp-extras stay as ins-cp (same as
|
|
1358
|
+
* before; the cp-only direction was always intent-correct)
|
|
1359
|
+
*
|
|
1360
|
+
* Pure single-side insertions (Me added text CP doesn't engage
|
|
1361
|
+
* with at all, or vice versa) keep their genesis-spine attribution
|
|
1362
|
+
* — these aren't refinement cases, just Me's own content additions.
|
|
1343
1363
|
*/
|
|
1344
1364
|
function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments) {
|
|
1345
1365
|
const cpIns = cpInsAt.get(b);
|
|
@@ -1347,18 +1367,34 @@ function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments)
|
|
|
1347
1367
|
const hasCp = !!cpIns && cpIns.length > 0;
|
|
1348
1368
|
const hasMe = !!meIns && meIns.length > 0;
|
|
1349
1369
|
if (!hasCp && !hasMe) return;
|
|
1350
|
-
if (hasCp
|
|
1370
|
+
if (!hasCp) {
|
|
1371
|
+
appendSegment(segments, {
|
|
1372
|
+
kind: "ins",
|
|
1373
|
+
author: "me"
|
|
1374
|
+
}, meIns);
|
|
1375
|
+
return;
|
|
1376
|
+
}
|
|
1377
|
+
if (!hasMe) {
|
|
1378
|
+
appendSegment(segments, {
|
|
1379
|
+
kind: "ins",
|
|
1380
|
+
author: "cp"
|
|
1381
|
+
}, cpIns);
|
|
1382
|
+
return;
|
|
1383
|
+
}
|
|
1384
|
+
if (tokenArraysEqual(cpIns, meIns)) {
|
|
1351
1385
|
appendSegment(segments, { kind: "equal" }, cpIns);
|
|
1352
1386
|
return;
|
|
1353
1387
|
}
|
|
1354
|
-
|
|
1388
|
+
const alignment = lcsAlign(cpIns, meIns);
|
|
1389
|
+
for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) appendSegment(segments, { kind: "equal" }, [cpIns[a.oldIdx]]);
|
|
1390
|
+
else if (a.oldIdx !== null) appendSegment(segments, {
|
|
1355
1391
|
kind: "ins",
|
|
1356
1392
|
author: "cp"
|
|
1357
|
-
}, cpIns);
|
|
1358
|
-
if (
|
|
1359
|
-
kind: "
|
|
1360
|
-
author: "
|
|
1361
|
-
}, meIns);
|
|
1393
|
+
}, [cpIns[a.oldIdx]]);
|
|
1394
|
+
else if (a.newIdx !== null) appendSegment(segments, {
|
|
1395
|
+
kind: "del",
|
|
1396
|
+
author: "cp"
|
|
1397
|
+
}, [meIns[a.newIdx]]);
|
|
1362
1398
|
}
|
|
1363
1399
|
function tokenArraysEqual(a, b) {
|
|
1364
1400
|
if (a.length !== b.length) return false;
|
|
@@ -1468,8 +1504,8 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1468
1504
|
const gKeys = gTables.map((t) => tableKey(genesis, t));
|
|
1469
1505
|
const cKeys = cTables.map((t) => tableKey(cpLatest, t));
|
|
1470
1506
|
const mKeys = mTables.map((t) => tableKey(meCurrent, t));
|
|
1471
|
-
const alignCp = lcsAlign(gKeys, cKeys);
|
|
1472
|
-
const alignMe = lcsAlign(gKeys, mKeys);
|
|
1507
|
+
const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
|
|
1508
|
+
const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
|
|
1473
1509
|
const gToCp = new Array(gTables.length).fill(-1);
|
|
1474
1510
|
const cpToG = new Array(cTables.length).fill(-1);
|
|
1475
1511
|
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
|
|
@@ -1580,7 +1616,7 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1580
1616
|
placeholderToDiff
|
|
1581
1617
|
};
|
|
1582
1618
|
}
|
|
1583
|
-
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .
|
|
1619
|
+
const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .15;
|
|
1584
1620
|
function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables) {
|
|
1585
1621
|
if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false;
|
|
1586
1622
|
for (let i = 0; i < gTables.length; i++) {
|
|
@@ -1595,6 +1631,59 @@ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1595
1631
|
function tableKey(html, table) {
|
|
1596
1632
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
|
|
1597
1633
|
}
|
|
1634
|
+
/**
|
|
1635
|
+
* Character-level similarity above which the three-way aligner treats
|
|
1636
|
+
* two rows / tables as "the same logical entry, edited" rather than
|
|
1637
|
+
* an unrelated delete + insert. Matched to TableDiff's
|
|
1638
|
+
* `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
|
|
1639
|
+
* agree on which pairings are reachable; if a row's content overlap
|
|
1640
|
+
* is enough to fool the 2-way diff into pairing, it should also be
|
|
1641
|
+
* enough for 3-way.
|
|
1642
|
+
*/
|
|
1643
|
+
const THREE_WAY_FUZZY_THRESHOLD = .5;
|
|
1644
|
+
/**
|
|
1645
|
+
* Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
|
|
1646
|
+
* applies after its exact-LCS, but against one side of the genesis
|
|
1647
|
+
* spine (either cp or me). The genesis tables/rows are always the
|
|
1648
|
+
* "old" side; `newTable` is the cp or me table being aligned. Returns
|
|
1649
|
+
* the enriched alignment with additional paired entries.
|
|
1650
|
+
*
|
|
1651
|
+
* Cell-count guard: only fuzzy-pair when both rows have the same cell
|
|
1652
|
+
* count. Without this guard an asymmetric restructure — e.g. CP and
|
|
1653
|
+
* Me both added a different column — leads to ONE side fuzzy-pairing
|
|
1654
|
+
* its row with genesis (content overlap above threshold) while the
|
|
1655
|
+
* other side falls below threshold. That mismatch routes through
|
|
1656
|
+
* `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
|
|
1657
|
+
* branch, which emits CP's row as a Me-attributed deletion. In
|
|
1658
|
+
* cp-only mode `stripMeAttributedMarkers` then removes the row
|
|
1659
|
+
* entirely and CP's edit vanishes from the view — exactly the
|
|
1660
|
+
* content-loss case we're meant to prevent. Restricting fuzzy
|
|
1661
|
+
* pairing to same-shape rows preserves the common case (single cell
|
|
1662
|
+
* edit, identical row shape) while pushing structural mismatches
|
|
1663
|
+
* back to the boundary-insertion path that emits both sides
|
|
1664
|
+
* explicitly.
|
|
1665
|
+
*/
|
|
1666
|
+
function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
|
|
1667
|
+
const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
|
|
1668
|
+
const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
|
|
1669
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
|
|
1670
|
+
if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
|
|
1671
|
+
return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
|
|
1672
|
+
});
|
|
1673
|
+
}
|
|
1674
|
+
/**
|
|
1675
|
+
* Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
|
|
1676
|
+
* full table HTML keys, fuzzy-pair unmatched table runs by their
|
|
1677
|
+
* row-text-concatenated content. Without this, a table whose body
|
|
1678
|
+
* was edited (but not its outer shape) fails the exact-key match
|
|
1679
|
+
* and the preprocessing emits whole-table del + whole-table ins
|
|
1680
|
+
* instead of recursing into per-cell three-way diffs.
|
|
1681
|
+
*/
|
|
1682
|
+
function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
|
|
1683
|
+
const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
|
|
1684
|
+
const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
|
|
1685
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
|
|
1686
|
+
}
|
|
1598
1687
|
function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1599
1688
|
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
1600
1689
|
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
@@ -1637,8 +1726,8 @@ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
|
1637
1726
|
const gKeys = tG.rows.map((r) => rowKey(genesis, r));
|
|
1638
1727
|
const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
|
|
1639
1728
|
const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
|
|
1640
|
-
const alignCp = lcsAlign(gKeys, cKeys);
|
|
1641
|
-
const alignMe = lcsAlign(gKeys, mKeys);
|
|
1729
|
+
const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
|
|
1730
|
+
const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
|
|
1642
1731
|
const gToCp = new Array(tG.rows.length).fill(-1);
|
|
1643
1732
|
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
|
|
1644
1733
|
const gToMe = new Array(tG.rows.length).fill(-1);
|
|
@@ -1695,7 +1784,20 @@ function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
|
|
|
1695
1784
|
out.push(genesis.slice(cursor, rG.rowEnd));
|
|
1696
1785
|
return out.join("");
|
|
1697
1786
|
}
|
|
1698
|
-
|
|
1787
|
+
const cpRestructured = rC.cells.length !== rG.cells.length;
|
|
1788
|
+
const meRestructured = rM.cells.length !== rG.cells.length;
|
|
1789
|
+
const blocks = [];
|
|
1790
|
+
if (cpRestructured && meRestructured) {
|
|
1791
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1792
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1793
|
+
} else if (cpRestructured) {
|
|
1794
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "cp"));
|
|
1795
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1796
|
+
} else {
|
|
1797
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "me"));
|
|
1798
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1799
|
+
}
|
|
1800
|
+
return blocks.join("");
|
|
1699
1801
|
}
|
|
1700
1802
|
/**
|
|
1701
1803
|
* Returns map "genesis-row-boundary → list of new-side row indices
|
|
@@ -1969,6 +2071,37 @@ var BlockFinder = class {
|
|
|
1969
2071
|
};
|
|
1970
2072
|
//#endregion
|
|
1971
2073
|
//#region src/HtmlDiff.ts
|
|
2074
|
+
/**
|
|
2075
|
+
* Opinionated options that align htmldiff's output with Microsoft Word's
|
|
2076
|
+
* track-changes rendering for legal-document rewrites.
|
|
2077
|
+
*
|
|
2078
|
+
* The library's bare default (`orphanMatchThreshold = 0`) keeps every
|
|
2079
|
+
* LCS match, however small — which fragments long sentence rewrites
|
|
2080
|
+
* into many tiny ins/del pairs around stray word matches ("of", "the",
|
|
2081
|
+
* "shall"). Word collapses those into a single coarse del+ins, which is
|
|
2082
|
+
* dramatically more readable for legal text.
|
|
2083
|
+
*
|
|
2084
|
+
* 0.25 was tuned empirically against a customer Word reference (US
|
|
2085
|
+
* Commercial One CP, May 2026):
|
|
2086
|
+
* - short edits (typo / one-word insert): output identical to
|
|
2087
|
+
* threshold=0 — inter-match distances are tiny so every match
|
|
2088
|
+
* trivially clears the bar;
|
|
2089
|
+
* - long rewrites (the "Specified Indebtedness" rewrite in the
|
|
2090
|
+
* reference): previously produced 6 dels + 5 ins fragmented around
|
|
2091
|
+
* stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
|
|
2092
|
+
* Word's 1+1 and a major readability win;
|
|
2093
|
+
* - higher values (0.3+) collapsed short edits containing inline
|
|
2094
|
+
* formatting changes into a single block — too aggressive.
|
|
2095
|
+
*
|
|
2096
|
+
* Consumers rendering legal documents should spread this into their
|
|
2097
|
+
* options:
|
|
2098
|
+
* `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
|
|
2099
|
+
* `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
|
|
2100
|
+
*
|
|
2101
|
+
* Other consumers (machine-readable diff, exact-token alignment) can
|
|
2102
|
+
* keep the bare default.
|
|
2103
|
+
*/
|
|
2104
|
+
const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
|
|
1972
2105
|
var HtmlDiff = class HtmlDiff {
|
|
1973
2106
|
/**
|
|
1974
2107
|
* This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
|
|
@@ -2044,6 +2177,16 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2044
2177
|
newText;
|
|
2045
2178
|
oldText;
|
|
2046
2179
|
tablePreprocessDepth = 0;
|
|
2180
|
+
/**
|
|
2181
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
2182
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
2183
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
2184
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
2185
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
2186
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
2187
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
2188
|
+
* handler for the split logic.
|
|
2189
|
+
*/
|
|
2047
2190
|
specialTagDiffStack = [];
|
|
2048
2191
|
newWords = [];
|
|
2049
2192
|
oldWords = [];
|
|
@@ -2111,8 +2254,23 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2111
2254
|
this.oldText = oldText;
|
|
2112
2255
|
this.newText = newText;
|
|
2113
2256
|
}
|
|
2114
|
-
|
|
2115
|
-
|
|
2257
|
+
/**
|
|
2258
|
+
* Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
|
|
2259
|
+
* `executeThreeWay`, with two intentional exceptions documented
|
|
2260
|
+
* inline below. Consumers wanting Word-aligned output should spread
|
|
2261
|
+
* `WORD_ALIGNED_OPTIONS` into the third argument.
|
|
2262
|
+
*
|
|
2263
|
+
* Note: unlike `analyze`, `execute` runs `build()` which performs
|
|
2264
|
+
* full table preprocessing — `tablePreprocessDepth` stays at 0 so
|
|
2265
|
+
* the recursive cell diff can happen. Callers can't override that.
|
|
2266
|
+
*/
|
|
2267
|
+
static execute(oldText, newText, options = {}) {
|
|
2268
|
+
const inner = new HtmlDiff(oldText, newText);
|
|
2269
|
+
if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
|
|
2270
|
+
if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
|
|
2271
|
+
if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
|
|
2272
|
+
if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
|
|
2273
|
+
return inner.build();
|
|
2116
2274
|
}
|
|
2117
2275
|
/**
|
|
2118
2276
|
* Analyse a two-way diff and return its raw building blocks: the word
|
|
@@ -2181,22 +2339,6 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2181
2339
|
return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
|
|
2182
2340
|
}
|
|
2183
2341
|
/**
|
|
2184
|
-
* Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
|
|
2185
|
-
* version CP sent back), and V3 (Me's current draft), produces a
|
|
2186
|
-
* single attributed HTML output where CP's and Me's changes are
|
|
2187
|
-
* distinguished by `data-author` ('cp' or 'me') and matching
|
|
2188
|
-
* `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
|
|
2189
|
-
* CP's proposal" case (Me deleted text CP had inserted) gets a
|
|
2190
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
2191
|
-
*
|
|
2192
|
-
* Coordinates the symmetric-projection decision (D1) across both
|
|
2193
|
-
* internal `analyze` calls so V2 tokenises identically on each side
|
|
2194
|
-
* of the spine. When `useProjections` is left undefined, the decision
|
|
2195
|
-
* is the conjunction of both pair-wise heuristics — project iff both
|
|
2196
|
-
* pairs would project on their own. Pass an explicit boolean to
|
|
2197
|
-
* override.
|
|
2198
|
-
*/
|
|
2199
|
-
/**
|
|
2200
2342
|
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
2201
2343
|
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
2202
2344
|
* from Me's accumulated changes (genesis → meCurrent). Use this for
|
|
@@ -2243,6 +2385,25 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2243
2385
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
2244
2386
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
2245
2387
|
* opened in one segment and closed in another stays balanced.
|
|
2388
|
+
*
|
|
2389
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
2390
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
2391
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
2392
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
2393
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
2394
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
2395
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
2396
|
+
* at the end of emission.
|
|
2397
|
+
*
|
|
2398
|
+
* Caveat: the `</ins>` close is honest for the mod-wrap that the
|
|
2399
|
+
* opener pushed (every formatting opener emits an inner `<ins…>`
|
|
2400
|
+
* postInject regardless of whether the outer segment is ins or
|
|
2401
|
+
* del). For del-segment formatting openers the outer `<del>` may
|
|
2402
|
+
* itself be left open by the same emission imbalance; this fixup
|
|
2403
|
+
* doesn't address that. Downstream browsers/DOMParser normalise
|
|
2404
|
+
* mildly-malformed HTML by closing dangling tags, so the rendered
|
|
2405
|
+
* output is usually acceptable — but the warning IS the signal
|
|
2406
|
+
* that the input had a real imbalance worth investigating.
|
|
2246
2407
|
*/
|
|
2247
2408
|
static emitSegments(segments) {
|
|
2248
2409
|
const emitter = new HtmlDiff("", "");
|
|
@@ -2254,7 +2415,13 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2254
2415
|
const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
|
|
2255
2416
|
emitter.insertTag(tag, baseClass, [...seg.words], metadata);
|
|
2256
2417
|
}
|
|
2257
|
-
if (emitter.specialTagDiffStack.length > 0)
|
|
2418
|
+
if (emitter.specialTagDiffStack.length > 0) {
|
|
2419
|
+
console.warn(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting wrap(s) on the stack. Closing defensively. This usually means a formatting tag opens in a del/ins segment and its matching closer is in an equal segment.`);
|
|
2420
|
+
while (emitter.specialTagDiffStack.length > 0) {
|
|
2421
|
+
emitter.content.push("</ins>");
|
|
2422
|
+
emitter.specialTagDiffStack.pop();
|
|
2423
|
+
}
|
|
2424
|
+
}
|
|
2258
2425
|
return emitter.content.join("");
|
|
2259
2426
|
}
|
|
2260
2427
|
/**
|
|
@@ -2510,38 +2677,52 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2510
2677
|
if (words.length === 0) break;
|
|
2511
2678
|
const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
|
|
2512
2679
|
const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
|
|
2513
|
-
let
|
|
2514
|
-
let
|
|
2680
|
+
let preInject = "";
|
|
2681
|
+
let postInject = "";
|
|
2515
2682
|
if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
|
|
2516
2683
|
const tagNames = /* @__PURE__ */ new Set();
|
|
2517
2684
|
for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
|
|
2518
2685
|
const styledTagNames = Array.from(tagNames).join(" ");
|
|
2519
|
-
|
|
2520
|
-
|
|
2686
|
+
const styledCssClass = `mod ${styledTagNames}`;
|
|
2687
|
+
this.specialTagDiffStack.push({
|
|
2688
|
+
tag: words[0],
|
|
2689
|
+
styledTagNames,
|
|
2690
|
+
cssClass: styledCssClass,
|
|
2691
|
+
metadata
|
|
2692
|
+
});
|
|
2693
|
+
postInject = `<ins${Utils_default.composeTagAttributes(styledCssClass, metadata ?? {})}>`;
|
|
2521
2694
|
if (tag === HtmlDiff.DelTag) {
|
|
2522
2695
|
words.shift();
|
|
2523
2696
|
while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
|
|
2524
2697
|
}
|
|
2525
2698
|
} else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
|
|
2526
|
-
const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
|
|
2527
2699
|
let tagIndexToCompare = indexLastTagInFirstTagBlock;
|
|
2528
2700
|
if (tag === HtmlDiff.DelTag && indexOfFirstNonTag === -1) {
|
|
2529
2701
|
if (words.slice(0, indexLastTagInFirstTagBlock + 1).some((w) => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))) tagIndexToCompare = 0;
|
|
2530
2702
|
}
|
|
2531
|
-
const
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2703
|
+
const closingTagName = Utils_default.getTagName(words[tagIndexToCompare]);
|
|
2704
|
+
let matchIdx = -1;
|
|
2705
|
+
for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) if (Utils_default.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
|
|
2706
|
+
matchIdx = i;
|
|
2707
|
+
break;
|
|
2708
|
+
}
|
|
2709
|
+
if (matchIdx >= 0) {
|
|
2710
|
+
const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1);
|
|
2711
|
+
this.specialTagDiffStack.pop();
|
|
2712
|
+
preInject = "</ins>".repeat(aboveEntries.length + 1);
|
|
2713
|
+
for (const entry of aboveEntries) {
|
|
2714
|
+
postInject += `<ins${Utils_default.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`;
|
|
2715
|
+
this.specialTagDiffStack.push(entry);
|
|
2716
|
+
}
|
|
2717
|
+
}
|
|
2536
2718
|
if (tag === HtmlDiff.DelTag) {
|
|
2537
2719
|
words.shift();
|
|
2538
2720
|
while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) words.shift();
|
|
2539
2721
|
}
|
|
2540
2722
|
}
|
|
2541
|
-
if (words.length === 0 &&
|
|
2723
|
+
if (words.length === 0 && preInject.length === 0 && postInject.length === 0) break;
|
|
2542
2724
|
const isTagForExtraction = tag === HtmlDiff.DelTag ? (x) => Utils_default.isTag(x) && !HtmlDiff.SpecialCaseOpeningTagRegex.test(x) && !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase()) : Utils_default.isTag;
|
|
2543
|
-
|
|
2544
|
-
else this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join("") + specialCaseTagInjection);
|
|
2725
|
+
this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join("") + postInject);
|
|
2545
2726
|
if (words.length === 0) continue;
|
|
2546
2727
|
this.insertTag(tag, cssClass, words, metadata);
|
|
2547
2728
|
break;
|
|
@@ -2606,6 +2787,17 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2606
2787
|
curr = next;
|
|
2607
2788
|
continue;
|
|
2608
2789
|
}
|
|
2790
|
+
let allTags = true;
|
|
2791
|
+
for (let i = curr.startInNew; i < curr.endInNew; i++) if (!Utils_default.isTag(wordsForDiffNew[i])) {
|
|
2792
|
+
allTags = false;
|
|
2793
|
+
break;
|
|
2794
|
+
}
|
|
2795
|
+
if (allTags) {
|
|
2796
|
+
yield curr;
|
|
2797
|
+
prev = curr;
|
|
2798
|
+
curr = next;
|
|
2799
|
+
continue;
|
|
2800
|
+
}
|
|
2609
2801
|
let oldDistanceInChars = 0;
|
|
2610
2802
|
for (let i = prev.endInOld; i < next.startInOld; i++) oldDistanceInChars += wordsForDiffOld[i].length;
|
|
2611
2803
|
let newDistanceInChars = 0;
|
|
@@ -2648,6 +2840,6 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2648
2840
|
}
|
|
2649
2841
|
};
|
|
2650
2842
|
//#endregion
|
|
2651
|
-
export { HtmlDiff as default };
|
|
2843
|
+
export { WORD_ALIGNED_OPTIONS, HtmlDiff as default };
|
|
2652
2844
|
|
|
2653
2845
|
//# sourceMappingURL=HtmlDiff.mjs.map
|