@createiq/htmldiff 1.2.0-beta.3 → 1.2.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -202,22 +202,6 @@ declare class HtmlDiff {
202
202
  * why symmetry matters.
203
203
  */
204
204
  static evaluateProjectionApplicability(oldText: string, newText: string): boolean;
205
- /**
206
- * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
207
- * version CP sent back), and V3 (Me's current draft), produces a
208
- * single attributed HTML output where CP's and Me's changes are
209
- * distinguished by `data-author` ('cp' or 'me') and matching
210
- * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
211
- * CP's proposal" case (Me deleted text CP had inserted) gets a
212
- * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
213
- *
214
- * Coordinates the symmetric-projection decision (D1) across both
215
- * internal `analyze` calls so V2 tokenises identically on each side
216
- * of the spine. When `useProjections` is left undefined, the decision
217
- * is the conjunction of both pair-wise heuristics — project iff both
218
- * pairs would project on their own. Pass an explicit boolean to
219
- * override.
220
- */
221
205
  /**
222
206
  * Three-way HTML diff against a shared genesis. Produces attributed
223
207
  * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
@@ -245,6 +229,17 @@ declare class HtmlDiff {
245
229
  * buffer. Reusing the instance keeps the formatting-tag stack
246
230
  * (`specialTagDiffStack`) coherent across segments — a `<strong>`
247
231
  * opened in one segment and closed in another stays balanced.
232
+ *
233
+ * Edge case: an ins/del segment can open a formatting wrap whose
234
+ * matching closer ends up in an equal segment (`<strong>` deleted
235
+ * by CP but `</strong>` kept by both — buildSegments emits the open
236
+ * as del-cp and the close as equal). Equal segments bypass
237
+ * `insertTag` and push raw, so the stack entry for the open is
238
+ * never popped. Rather than throw — which forces the caller's UI
239
+ * into an error boundary — close every leftover wrap with `</ins>`
240
+ * at the end of emission. The resulting HTML has an extra
241
+ * `</ins>` next to the formatting closer; DOMParser-normalisation
242
+ * downstream produces sensible nesting.
248
243
  */
249
244
  private static emitSegments;
250
245
  /**
@@ -202,22 +202,6 @@ declare class HtmlDiff {
202
202
  * why symmetry matters.
203
203
  */
204
204
  static evaluateProjectionApplicability(oldText: string, newText: string): boolean;
205
- /**
206
- * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
207
- * version CP sent back), and V3 (Me's current draft), produces a
208
- * single attributed HTML output where CP's and Me's changes are
209
- * distinguished by `data-author` ('cp' or 'me') and matching
210
- * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
211
- * CP's proposal" case (Me deleted text CP had inserted) gets a
212
- * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
213
- *
214
- * Coordinates the symmetric-projection decision (D1) across both
215
- * internal `analyze` calls so V2 tokenises identically on each side
216
- * of the spine. When `useProjections` is left undefined, the decision
217
- * is the conjunction of both pair-wise heuristics — project iff both
218
- * pairs would project on their own. Pass an explicit boolean to
219
- * override.
220
- */
221
205
  /**
222
206
  * Three-way HTML diff against a shared genesis. Produces attributed
223
207
  * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
@@ -245,6 +229,17 @@ declare class HtmlDiff {
245
229
  * buffer. Reusing the instance keeps the formatting-tag stack
246
230
  * (`specialTagDiffStack`) coherent across segments — a `<strong>`
247
231
  * opened in one segment and closed in another stays balanced.
232
+ *
233
+ * Edge case: an ins/del segment can open a formatting wrap whose
234
+ * matching closer ends up in an equal segment (`<strong>` deleted
235
+ * by CP but `</strong>` kept by both — buildSegments emits the open
236
+ * as del-cp and the close as equal). Equal segments bypass
237
+ * `insertTag` and push raw, so the stack entry for the open is
238
+ * never popped. Rather than throw — which forces the caller's UI
239
+ * into an error boundary — close every leftover wrap with `</ins>`
240
+ * at the end of emission. The resulting HTML has an extra
241
+ * `</ins>` next to the formatting closer; DOMParser-normalisation
242
+ * downstream produces sensible nesting.
248
243
  */
249
244
  private static emitSegments;
250
245
  /**
package/dist/HtmlDiff.mjs CHANGED
@@ -1468,8 +1468,8 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1468
1468
  const gKeys = gTables.map((t) => tableKey(genesis, t));
1469
1469
  const cKeys = cTables.map((t) => tableKey(cpLatest, t));
1470
1470
  const mKeys = mTables.map((t) => tableKey(meCurrent, t));
1471
- const alignCp = lcsAlign(gKeys, cKeys);
1472
- const alignMe = lcsAlign(gKeys, mKeys);
1471
+ const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
1472
+ const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
1473
1473
  const gToCp = new Array(gTables.length).fill(-1);
1474
1474
  const cpToG = new Array(cTables.length).fill(-1);
1475
1475
  for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
@@ -1595,6 +1595,59 @@ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1595
1595
  function tableKey(html, table) {
1596
1596
  return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
1597
1597
  }
1598
+ /**
1599
+ * Character-level similarity above which the three-way aligner treats
1600
+ * two rows / tables as "the same logical entry, edited" rather than
1601
+ * an unrelated delete + insert. Matched to TableDiff's
1602
+ * `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
1603
+ * agree on which pairings are reachable; if a row's content overlap
1604
+ * is enough to fool the 2-way diff into pairing, it should also be
1605
+ * enough for 3-way.
1606
+ */
1607
+ const THREE_WAY_FUZZY_THRESHOLD = .5;
1608
+ /**
1609
+ * Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
1610
+ * applies after its exact-LCS, but against one side of the genesis
1611
+ * spine (either cp or me). The genesis tables/rows are always the
1612
+ * "old" side; `newTable` is the cp or me table being aligned. Returns
1613
+ * the enriched alignment with additional paired entries.
1614
+ *
1615
+ * Cell-count guard: only fuzzy-pair when both rows have the same cell
1616
+ * count. Without this guard an asymmetric restructure — e.g. CP and
1617
+ * Me both added a different column — leads to ONE side fuzzy-pairing
1618
+ * its row with genesis (content overlap above threshold) while the
1619
+ * other side falls below threshold. That mismatch routes through
1620
+ * `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
1621
+ * branch, which emits CP's row as a Me-attributed deletion. In
1622
+ * cp-only mode `stripMeAttributedMarkers` then removes the row
1623
+ * entirely and CP's edit vanishes from the view — exactly the
1624
+ * content-loss case we're meant to prevent. Restricting fuzzy
1625
+ * pairing to same-shape rows preserves the common case (single cell
1626
+ * edit, identical row shape) while pushing structural mismatches
1627
+ * back to the boundary-insertion path that emits both sides
1628
+ * explicitly.
1629
+ */
1630
+ function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
1631
+ const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
1632
+ const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
1633
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
1634
+ if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
1635
+ return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
1636
+ });
1637
+ }
1638
+ /**
1639
+ * Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
1640
+ * full table HTML keys, fuzzy-pair unmatched table runs by their
1641
+ * row-text-concatenated content. Without this, a table whose body
1642
+ * was edited (but not its outer shape) fails the exact-key match
1643
+ * and the preprocessing emits whole-table del + whole-table ins
1644
+ * instead of recursing into per-cell three-way diffs.
1645
+ */
1646
+ function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
1647
+ const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
1648
+ const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
1649
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
1650
+ }
1598
1651
  function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
1599
1652
  if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
1600
1653
  return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
@@ -1637,8 +1690,8 @@ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
1637
1690
  const gKeys = tG.rows.map((r) => rowKey(genesis, r));
1638
1691
  const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
1639
1692
  const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
1640
- const alignCp = lcsAlign(gKeys, cKeys);
1641
- const alignMe = lcsAlign(gKeys, mKeys);
1693
+ const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
1694
+ const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
1642
1695
  const gToCp = new Array(tG.rows.length).fill(-1);
1643
1696
  for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
1644
1697
  const gToMe = new Array(tG.rows.length).fill(-1);
@@ -1695,7 +1748,20 @@ function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
1695
1748
  out.push(genesis.slice(cursor, rG.rowEnd));
1696
1749
  return out.join("");
1697
1750
  }
1698
- return emitFullRowAttributed(genesis, rG, "del", "me") + emitFullRowAttributed(meCurrent, rM, "ins", "me");
1751
+ const cpRestructured = rC.cells.length !== rG.cells.length;
1752
+ const meRestructured = rM.cells.length !== rG.cells.length;
1753
+ const blocks = [];
1754
+ if (cpRestructured && meRestructured) {
1755
+ blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
1756
+ blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
1757
+ } else if (cpRestructured) {
1758
+ blocks.push(emitFullRowAttributed(genesis, rG, "del", "cp"));
1759
+ blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
1760
+ } else {
1761
+ blocks.push(emitFullRowAttributed(genesis, rG, "del", "me"));
1762
+ blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
1763
+ }
1764
+ return blocks.join("");
1699
1765
  }
1700
1766
  /**
1701
1767
  * Returns map "genesis-row-boundary → list of new-side row indices
@@ -2191,22 +2257,6 @@ var HtmlDiff = class HtmlDiff {
2191
2257
  return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
2192
2258
  }
2193
2259
  /**
2194
- * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
2195
- * version CP sent back), and V3 (Me's current draft), produces a
2196
- * single attributed HTML output where CP's and Me's changes are
2197
- * distinguished by `data-author` ('cp' or 'me') and matching
2198
- * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
2199
- * CP's proposal" case (Me deleted text CP had inserted) gets a
2200
- * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
2201
- *
2202
- * Coordinates the symmetric-projection decision (D1) across both
2203
- * internal `analyze` calls so V2 tokenises identically on each side
2204
- * of the spine. When `useProjections` is left undefined, the decision
2205
- * is the conjunction of both pair-wise heuristics — project iff both
2206
- * pairs would project on their own. Pass an explicit boolean to
2207
- * override.
2208
- */
2209
- /**
2210
2260
  * Three-way HTML diff against a shared genesis. Produces attributed
2211
2261
  * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
2212
2262
  * from Me's accumulated changes (genesis → meCurrent). Use this for
@@ -2253,6 +2303,17 @@ var HtmlDiff = class HtmlDiff {
2253
2303
  * buffer. Reusing the instance keeps the formatting-tag stack
2254
2304
  * (`specialTagDiffStack`) coherent across segments — a `<strong>`
2255
2305
  * opened in one segment and closed in another stays balanced.
2306
+ *
2307
+ * Edge case: an ins/del segment can open a formatting wrap whose
2308
+ * matching closer ends up in an equal segment (`<strong>` deleted
2309
+ * by CP but `</strong>` kept by both — buildSegments emits the open
2310
+ * as del-cp and the close as equal). Equal segments bypass
2311
+ * `insertTag` and push raw, so the stack entry for the open is
2312
+ * never popped. Rather than throw — which forces the caller's UI
2313
+ * into an error boundary — close every leftover wrap with `</ins>`
2314
+ * at the end of emission. The resulting HTML has an extra
2315
+ * `</ins>` next to the formatting closer; DOMParser-normalisation
2316
+ * downstream produces sensible nesting.
2256
2317
  */
2257
2318
  static emitSegments(segments) {
2258
2319
  const emitter = new HtmlDiff("", "");
@@ -2264,7 +2325,13 @@ var HtmlDiff = class HtmlDiff {
2264
2325
  const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
2265
2326
  emitter.insertTag(tag, baseClass, [...seg.words], metadata);
2266
2327
  }
2267
- if (emitter.specialTagDiffStack.length > 0) throw new Error(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting tag(s) on the stack — input may have unbalanced <strong>/<em>/etc. or there is a bug in segment emission.`);
2328
+ if (emitter.specialTagDiffStack.length > 0) {
2329
+ console.warn(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting wrap(s) on the stack. Closing defensively. This usually means a formatting tag opens in a del/ins segment and its matching closer is in an equal segment.`);
2330
+ while (emitter.specialTagDiffStack.length > 0) {
2331
+ emitter.content.push("</ins>");
2332
+ emitter.specialTagDiffStack.pop();
2333
+ }
2334
+ }
2268
2335
  return emitter.content.join("");
2269
2336
  }
2270
2337
  /**