@createiq/htmldiff 1.2.0-beta.4 → 1.2.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +64 -6
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +1 -1
- package/dist/HtmlDiff.d.mts +1 -1
- package/dist/HtmlDiff.mjs +64 -6
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +14 -2
- package/src/ThreeWayTable.ts +91 -5
- package/test/HtmlDiff.threeWay.tables.spec.ts +28 -0
package/dist/HtmlDiff.cjs
CHANGED
|
@@ -1468,8 +1468,8 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1468
1468
|
const gKeys = gTables.map((t) => tableKey(genesis, t));
|
|
1469
1469
|
const cKeys = cTables.map((t) => tableKey(cpLatest, t));
|
|
1470
1470
|
const mKeys = mTables.map((t) => tableKey(meCurrent, t));
|
|
1471
|
-
const alignCp = lcsAlign(gKeys, cKeys);
|
|
1472
|
-
const alignMe = lcsAlign(gKeys, mKeys);
|
|
1471
|
+
const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
|
|
1472
|
+
const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
|
|
1473
1473
|
const gToCp = new Array(gTables.length).fill(-1);
|
|
1474
1474
|
const cpToG = new Array(cTables.length).fill(-1);
|
|
1475
1475
|
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
|
|
@@ -1595,6 +1595,59 @@ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1595
1595
|
function tableKey(html, table) {
|
|
1596
1596
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
|
|
1597
1597
|
}
|
|
1598
|
+
/**
|
|
1599
|
+
* Character-level similarity above which the three-way aligner treats
|
|
1600
|
+
* two rows / tables as "the same logical entry, edited" rather than
|
|
1601
|
+
* an unrelated delete + insert. Matched to TableDiff's
|
|
1602
|
+
* `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
|
|
1603
|
+
* agree on which pairings are reachable; if a row's content overlap
|
|
1604
|
+
* is enough to fool the 2-way diff into pairing, it should also be
|
|
1605
|
+
* enough for 3-way.
|
|
1606
|
+
*/
|
|
1607
|
+
const THREE_WAY_FUZZY_THRESHOLD = .5;
|
|
1608
|
+
/**
|
|
1609
|
+
* Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
|
|
1610
|
+
* applies after its exact-LCS, but against one side of the genesis
|
|
1611
|
+
* spine (either cp or me). The genesis tables/rows are always the
|
|
1612
|
+
* "old" side; `newTable` is the cp or me table being aligned. Returns
|
|
1613
|
+
* the enriched alignment with additional paired entries.
|
|
1614
|
+
*
|
|
1615
|
+
* Cell-count guard: only fuzzy-pair when both rows have the same cell
|
|
1616
|
+
* count. Without this guard an asymmetric restructure — e.g. CP and
|
|
1617
|
+
* Me both added a different column — leads to ONE side fuzzy-pairing
|
|
1618
|
+
* its row with genesis (content overlap above threshold) while the
|
|
1619
|
+
* other side falls below threshold. That mismatch routes through
|
|
1620
|
+
* `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
|
|
1621
|
+
* branch, which emits CP's row as a Me-attributed deletion. In
|
|
1622
|
+
* cp-only mode `stripMeAttributedMarkers` then removes the row
|
|
1623
|
+
* entirely and CP's edit vanishes from the view — exactly the
|
|
1624
|
+
* content-loss case we're meant to prevent. Restricting fuzzy
|
|
1625
|
+
* pairing to same-shape rows preserves the common case (single cell
|
|
1626
|
+
* edit, identical row shape) while pushing structural mismatches
|
|
1627
|
+
* back to the boundary-insertion path that emits both sides
|
|
1628
|
+
* explicitly.
|
|
1629
|
+
*/
|
|
1630
|
+
function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
|
|
1631
|
+
const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
|
|
1632
|
+
const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
|
|
1633
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
|
|
1634
|
+
if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
|
|
1635
|
+
return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
|
|
1636
|
+
});
|
|
1637
|
+
}
|
|
1638
|
+
/**
|
|
1639
|
+
* Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
|
|
1640
|
+
* full table HTML keys, fuzzy-pair unmatched table runs by their
|
|
1641
|
+
* row-text-concatenated content. Without this, a table whose body
|
|
1642
|
+
* was edited (but not its outer shape) fails the exact-key match
|
|
1643
|
+
* and the preprocessing emits whole-table del + whole-table ins
|
|
1644
|
+
* instead of recursing into per-cell three-way diffs.
|
|
1645
|
+
*/
|
|
1646
|
+
function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
|
|
1647
|
+
const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
|
|
1648
|
+
const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
|
|
1649
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
|
|
1650
|
+
}
|
|
1598
1651
|
function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1599
1652
|
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
1600
1653
|
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
@@ -1637,8 +1690,8 @@ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
|
1637
1690
|
const gKeys = tG.rows.map((r) => rowKey(genesis, r));
|
|
1638
1691
|
const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
|
|
1639
1692
|
const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
|
|
1640
|
-
const alignCp = lcsAlign(gKeys, cKeys);
|
|
1641
|
-
const alignMe = lcsAlign(gKeys, mKeys);
|
|
1693
|
+
const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
|
|
1694
|
+
const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
|
|
1642
1695
|
const gToCp = new Array(tG.rows.length).fill(-1);
|
|
1643
1696
|
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
|
|
1644
1697
|
const gToMe = new Array(tG.rows.length).fill(-1);
|
|
@@ -2134,8 +2187,13 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2134
2187
|
this.oldText = oldText;
|
|
2135
2188
|
this.newText = newText;
|
|
2136
2189
|
}
|
|
2137
|
-
static execute(oldText, newText) {
|
|
2138
|
-
|
|
2190
|
+
static execute(oldText, newText, options = {}) {
|
|
2191
|
+
const inner = new HtmlDiff(oldText, newText);
|
|
2192
|
+
if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
|
|
2193
|
+
if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
|
|
2194
|
+
if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
|
|
2195
|
+
if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
|
|
2196
|
+
return inner.build();
|
|
2139
2197
|
}
|
|
2140
2198
|
/**
|
|
2141
2199
|
* Analyse a two-way diff and return its raw building blocks: the word
|