@createiq/htmldiff 1.2.0-beta.4 → 1.2.0-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HtmlDiff.cjs CHANGED
@@ -1468,8 +1468,8 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1468
1468
  const gKeys = gTables.map((t) => tableKey(genesis, t));
1469
1469
  const cKeys = cTables.map((t) => tableKey(cpLatest, t));
1470
1470
  const mKeys = mTables.map((t) => tableKey(meCurrent, t));
1471
- const alignCp = lcsAlign(gKeys, cKeys);
1472
- const alignMe = lcsAlign(gKeys, mKeys);
1471
+ const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
1472
+ const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
1473
1473
  const gToCp = new Array(gTables.length).fill(-1);
1474
1474
  const cpToG = new Array(cTables.length).fill(-1);
1475
1475
  for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
@@ -1595,6 +1595,59 @@ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1595
1595
  function tableKey(html, table) {
1596
1596
  return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
1597
1597
  }
1598
+ /**
1599
+ * Character-level similarity above which the three-way aligner treats
1600
+ * two rows / tables as "the same logical entry, edited" rather than
1601
+ * an unrelated delete + insert. Matched to TableDiff's
1602
+ * `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
1603
+ * agree on which pairings are reachable; if a row's content overlap
1604
+ * is enough to fool the 2-way diff into pairing, it should also be
1605
+ * enough for 3-way.
1606
+ */
1607
+ const THREE_WAY_FUZZY_THRESHOLD = .5;
1608
+ /**
1609
+ * Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
1610
+ * applies after its exact-LCS, but against one side of the genesis
1611
+ * spine (either cp or me). The genesis tables/rows are always the
1612
+ * "old" side; `newTable` is the cp or me table being aligned. Returns
1613
+ * the enriched alignment with additional paired entries.
1614
+ *
1615
+ * Cell-count guard: only fuzzy-pair when both rows have the same cell
1616
+ * count. Without this guard an asymmetric restructure — e.g. CP and
1617
+ * Me both added a different column — leads to ONE side fuzzy-pairing
1618
+ * its row with genesis (content overlap above threshold) while the
1619
+ * other side falls below threshold. That mismatch routes through
1620
+ * `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
1621
+ * branch, which emits CP's row as a Me-attributed deletion. In
1622
+ * cp-only mode `stripMeAttributedMarkers` then removes the row
1623
+ * entirely and CP's edit vanishes from the view — exactly the
1624
+ * content-loss case we're meant to prevent. Restricting fuzzy
1625
+ * pairing to same-shape rows preserves the common case (single cell
1626
+ * edit, identical row shape) while pushing structural mismatches
1627
+ * back to the boundary-insertion path that emits both sides
1628
+ * explicitly.
1629
+ */
1630
+ function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
1631
+ const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
1632
+ const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
1633
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
1634
+ if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
1635
+ return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
1636
+ });
1637
+ }
1638
+ /**
1639
+ * Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
1640
+ * full table HTML keys, fuzzy-pair unmatched table runs by their
1641
+ * row-text-concatenated content. Without this, a table whose body
1642
+ * was edited (but not its outer shape) fails the exact-key match
1643
+ * and the preprocessing emits whole-table del + whole-table ins
1644
+ * instead of recursing into per-cell three-way diffs.
1645
+ */
1646
+ function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
1647
+ const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
1648
+ const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
1649
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
1650
+ }
1598
1651
  function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
1599
1652
  if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
1600
1653
  return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
@@ -1637,8 +1690,8 @@ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
1637
1690
  const gKeys = tG.rows.map((r) => rowKey(genesis, r));
1638
1691
  const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
1639
1692
  const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
1640
- const alignCp = lcsAlign(gKeys, cKeys);
1641
- const alignMe = lcsAlign(gKeys, mKeys);
1693
+ const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
1694
+ const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
1642
1695
  const gToCp = new Array(tG.rows.length).fill(-1);
1643
1696
  for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
1644
1697
  const gToMe = new Array(tG.rows.length).fill(-1);
@@ -2134,8 +2187,13 @@ var HtmlDiff = class HtmlDiff {
2134
2187
  this.oldText = oldText;
2135
2188
  this.newText = newText;
2136
2189
  }
2137
- static execute(oldText, newText) {
2138
- return new HtmlDiff(oldText, newText).build();
2190
+ static execute(oldText, newText, options = {}) {
2191
+ const inner = new HtmlDiff(oldText, newText);
2192
+ if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
2193
+ if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
2194
+ if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
2195
+ if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
2196
+ return inner.build();
2139
2197
  }
2140
2198
  /**
2141
2199
  * Analyse a two-way diff and return its raw building blocks: the word