@createiq/htmldiff 1.2.0-beta.3 → 1.2.0-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +89 -22
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +11 -16
- package/dist/HtmlDiff.d.mts +11 -16
- package/dist/HtmlDiff.mjs +89 -22
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +23 -25
- package/src/ThreeWayTable.ts +125 -8
- package/test/HtmlDiff.threeWay.spec.ts +47 -2
- package/test/HtmlDiff.threeWay.tables.spec.ts +76 -0
- package/test/Utils.spec.ts +3 -3
package/dist/HtmlDiff.d.cts
CHANGED
|
@@ -202,22 +202,6 @@ declare class HtmlDiff {
|
|
|
202
202
|
* why symmetry matters.
|
|
203
203
|
*/
|
|
204
204
|
static evaluateProjectionApplicability(oldText: string, newText: string): boolean;
|
|
205
|
-
/**
|
|
206
|
-
* Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
|
|
207
|
-
* version CP sent back), and V3 (Me's current draft), produces a
|
|
208
|
-
* single attributed HTML output where CP's and Me's changes are
|
|
209
|
-
* distinguished by `data-author` ('cp' or 'me') and matching
|
|
210
|
-
* `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
|
|
211
|
-
* CP's proposal" case (Me deleted text CP had inserted) gets a
|
|
212
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
213
|
-
*
|
|
214
|
-
* Coordinates the symmetric-projection decision (D1) across both
|
|
215
|
-
* internal `analyze` calls so V2 tokenises identically on each side
|
|
216
|
-
* of the spine. When `useProjections` is left undefined, the decision
|
|
217
|
-
* is the conjunction of both pair-wise heuristics — project iff both
|
|
218
|
-
* pairs would project on their own. Pass an explicit boolean to
|
|
219
|
-
* override.
|
|
220
|
-
*/
|
|
221
205
|
/**
|
|
222
206
|
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
223
207
|
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
@@ -245,6 +229,17 @@ declare class HtmlDiff {
|
|
|
245
229
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
246
230
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
247
231
|
* opened in one segment and closed in another stays balanced.
|
|
232
|
+
*
|
|
233
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
234
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
235
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
236
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
237
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
238
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
239
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
240
|
+
* at the end of emission. The resulting HTML has an extra
|
|
241
|
+
* `</ins>` next to the formatting closer; DOMParser-normalisation
|
|
242
|
+
* downstream produces sensible nesting.
|
|
248
243
|
*/
|
|
249
244
|
private static emitSegments;
|
|
250
245
|
/**
|
package/dist/HtmlDiff.d.mts
CHANGED
|
@@ -202,22 +202,6 @@ declare class HtmlDiff {
|
|
|
202
202
|
* why symmetry matters.
|
|
203
203
|
*/
|
|
204
204
|
static evaluateProjectionApplicability(oldText: string, newText: string): boolean;
|
|
205
|
-
/**
|
|
206
|
-
* Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
|
|
207
|
-
* version CP sent back), and V3 (Me's current draft), produces a
|
|
208
|
-
* single attributed HTML output where CP's and Me's changes are
|
|
209
|
-
* distinguished by `data-author` ('cp' or 'me') and matching
|
|
210
|
-
* `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
|
|
211
|
-
* CP's proposal" case (Me deleted text CP had inserted) gets a
|
|
212
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
213
|
-
*
|
|
214
|
-
* Coordinates the symmetric-projection decision (D1) across both
|
|
215
|
-
* internal `analyze` calls so V2 tokenises identically on each side
|
|
216
|
-
* of the spine. When `useProjections` is left undefined, the decision
|
|
217
|
-
* is the conjunction of both pair-wise heuristics — project iff both
|
|
218
|
-
* pairs would project on their own. Pass an explicit boolean to
|
|
219
|
-
* override.
|
|
220
|
-
*/
|
|
221
205
|
/**
|
|
222
206
|
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
223
207
|
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
@@ -245,6 +229,17 @@ declare class HtmlDiff {
|
|
|
245
229
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
246
230
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
247
231
|
* opened in one segment and closed in another stays balanced.
|
|
232
|
+
*
|
|
233
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
234
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
235
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
236
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
237
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
238
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
239
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
240
|
+
* at the end of emission. The resulting HTML has an extra
|
|
241
|
+
* `</ins>` next to the formatting closer; DOMParser-normalisation
|
|
242
|
+
* downstream produces sensible nesting.
|
|
248
243
|
*/
|
|
249
244
|
private static emitSegments;
|
|
250
245
|
/**
|
package/dist/HtmlDiff.mjs
CHANGED
|
@@ -1468,8 +1468,8 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1468
1468
|
const gKeys = gTables.map((t) => tableKey(genesis, t));
|
|
1469
1469
|
const cKeys = cTables.map((t) => tableKey(cpLatest, t));
|
|
1470
1470
|
const mKeys = mTables.map((t) => tableKey(meCurrent, t));
|
|
1471
|
-
const alignCp = lcsAlign(gKeys, cKeys);
|
|
1472
|
-
const alignMe = lcsAlign(gKeys, mKeys);
|
|
1471
|
+
const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
|
|
1472
|
+
const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
|
|
1473
1473
|
const gToCp = new Array(gTables.length).fill(-1);
|
|
1474
1474
|
const cpToG = new Array(cTables.length).fill(-1);
|
|
1475
1475
|
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
|
|
@@ -1595,6 +1595,59 @@ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTa
|
|
|
1595
1595
|
function tableKey(html, table) {
|
|
1596
1596
|
return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
|
|
1597
1597
|
}
|
|
1598
|
+
/**
|
|
1599
|
+
* Character-level similarity above which the three-way aligner treats
|
|
1600
|
+
* two rows / tables as "the same logical entry, edited" rather than
|
|
1601
|
+
* an unrelated delete + insert. Matched to TableDiff's
|
|
1602
|
+
* `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
|
|
1603
|
+
* agree on which pairings are reachable; if a row's content overlap
|
|
1604
|
+
* is enough to fool the 2-way diff into pairing, it should also be
|
|
1605
|
+
* enough for 3-way.
|
|
1606
|
+
*/
|
|
1607
|
+
const THREE_WAY_FUZZY_THRESHOLD = .5;
|
|
1608
|
+
/**
|
|
1609
|
+
* Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
|
|
1610
|
+
* applies after its exact-LCS, but against one side of the genesis
|
|
1611
|
+
* spine (either cp or me). The genesis tables/rows are always the
|
|
1612
|
+
* "old" side; `newTable` is the cp or me table being aligned. Returns
|
|
1613
|
+
* the enriched alignment with additional paired entries.
|
|
1614
|
+
*
|
|
1615
|
+
* Cell-count guard: only fuzzy-pair when both rows have the same cell
|
|
1616
|
+
* count. Without this guard an asymmetric restructure — e.g. CP and
|
|
1617
|
+
* Me both added a different column — leads to ONE side fuzzy-pairing
|
|
1618
|
+
* its row with genesis (content overlap above threshold) while the
|
|
1619
|
+
* other side falls below threshold. That mismatch routes through
|
|
1620
|
+
* `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
|
|
1621
|
+
* branch, which emits CP's row as a Me-attributed deletion. In
|
|
1622
|
+
* cp-only mode `stripMeAttributedMarkers` then removes the row
|
|
1623
|
+
* entirely and CP's edit vanishes from the view — exactly the
|
|
1624
|
+
* content-loss case we're meant to prevent. Restricting fuzzy
|
|
1625
|
+
* pairing to same-shape rows preserves the common case (single cell
|
|
1626
|
+
* edit, identical row shape) while pushing structural mismatches
|
|
1627
|
+
* back to the boundary-insertion path that emits both sides
|
|
1628
|
+
* explicitly.
|
|
1629
|
+
*/
|
|
1630
|
+
function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
|
|
1631
|
+
const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
|
|
1632
|
+
const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
|
|
1633
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
|
|
1634
|
+
if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
|
|
1635
|
+
return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
|
|
1636
|
+
});
|
|
1637
|
+
}
|
|
1638
|
+
/**
|
|
1639
|
+
* Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
|
|
1640
|
+
* full table HTML keys, fuzzy-pair unmatched table runs by their
|
|
1641
|
+
* row-text-concatenated content. Without this, a table whose body
|
|
1642
|
+
* was edited (but not its outer shape) fails the exact-key match
|
|
1643
|
+
* and the preprocessing emits whole-table del + whole-table ins
|
|
1644
|
+
* instead of recursing into per-cell three-way diffs.
|
|
1645
|
+
*/
|
|
1646
|
+
function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
|
|
1647
|
+
const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
|
|
1648
|
+
const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
|
|
1649
|
+
return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
|
|
1650
|
+
}
|
|
1598
1651
|
function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
|
|
1599
1652
|
if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
1600
1653
|
return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
|
|
@@ -1637,8 +1690,8 @@ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
|
|
|
1637
1690
|
const gKeys = tG.rows.map((r) => rowKey(genesis, r));
|
|
1638
1691
|
const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
|
|
1639
1692
|
const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
|
|
1640
|
-
const alignCp = lcsAlign(gKeys, cKeys);
|
|
1641
|
-
const alignMe = lcsAlign(gKeys, mKeys);
|
|
1693
|
+
const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
|
|
1694
|
+
const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
|
|
1642
1695
|
const gToCp = new Array(tG.rows.length).fill(-1);
|
|
1643
1696
|
for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
|
|
1644
1697
|
const gToMe = new Array(tG.rows.length).fill(-1);
|
|
@@ -1695,7 +1748,20 @@ function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
|
|
|
1695
1748
|
out.push(genesis.slice(cursor, rG.rowEnd));
|
|
1696
1749
|
return out.join("");
|
|
1697
1750
|
}
|
|
1698
|
-
|
|
1751
|
+
const cpRestructured = rC.cells.length !== rG.cells.length;
|
|
1752
|
+
const meRestructured = rM.cells.length !== rG.cells.length;
|
|
1753
|
+
const blocks = [];
|
|
1754
|
+
if (cpRestructured && meRestructured) {
|
|
1755
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1756
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1757
|
+
} else if (cpRestructured) {
|
|
1758
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "cp"));
|
|
1759
|
+
blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
|
|
1760
|
+
} else {
|
|
1761
|
+
blocks.push(emitFullRowAttributed(genesis, rG, "del", "me"));
|
|
1762
|
+
blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
|
|
1763
|
+
}
|
|
1764
|
+
return blocks.join("");
|
|
1699
1765
|
}
|
|
1700
1766
|
/**
|
|
1701
1767
|
* Returns map "genesis-row-boundary → list of new-side row indices
|
|
@@ -2191,22 +2257,6 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2191
2257
|
return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
|
|
2192
2258
|
}
|
|
2193
2259
|
/**
|
|
2194
|
-
* Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
|
|
2195
|
-
* version CP sent back), and V3 (Me's current draft), produces a
|
|
2196
|
-
* single attributed HTML output where CP's and Me's changes are
|
|
2197
|
-
* distinguished by `data-author` ('cp' or 'me') and matching
|
|
2198
|
-
* `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
|
|
2199
|
-
* CP's proposal" case (Me deleted text CP had inserted) gets a
|
|
2200
|
-
* dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
|
|
2201
|
-
*
|
|
2202
|
-
* Coordinates the symmetric-projection decision (D1) across both
|
|
2203
|
-
* internal `analyze` calls so V2 tokenises identically on each side
|
|
2204
|
-
* of the spine. When `useProjections` is left undefined, the decision
|
|
2205
|
-
* is the conjunction of both pair-wise heuristics — project iff both
|
|
2206
|
-
* pairs would project on their own. Pass an explicit boolean to
|
|
2207
|
-
* override.
|
|
2208
|
-
*/
|
|
2209
|
-
/**
|
|
2210
2260
|
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
2211
2261
|
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
2212
2262
|
* from Me's accumulated changes (genesis → meCurrent). Use this for
|
|
@@ -2253,6 +2303,17 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2253
2303
|
* buffer. Reusing the instance keeps the formatting-tag stack
|
|
2254
2304
|
* (`specialTagDiffStack`) coherent across segments — a `<strong>`
|
|
2255
2305
|
* opened in one segment and closed in another stays balanced.
|
|
2306
|
+
*
|
|
2307
|
+
* Edge case: an ins/del segment can open a formatting wrap whose
|
|
2308
|
+
* matching closer ends up in an equal segment (`<strong>` deleted
|
|
2309
|
+
* by CP but `</strong>` kept by both — buildSegments emits the open
|
|
2310
|
+
* as del-cp and the close as equal). Equal segments bypass
|
|
2311
|
+
* `insertTag` and push raw, so the stack entry for the open is
|
|
2312
|
+
* never popped. Rather than throw — which forces the caller's UI
|
|
2313
|
+
* into an error boundary — close every leftover wrap with `</ins>`
|
|
2314
|
+
* at the end of emission. The resulting HTML has an extra
|
|
2315
|
+
* `</ins>` next to the formatting closer; DOMParser-normalisation
|
|
2316
|
+
* downstream produces sensible nesting.
|
|
2256
2317
|
*/
|
|
2257
2318
|
static emitSegments(segments) {
|
|
2258
2319
|
const emitter = new HtmlDiff("", "");
|
|
@@ -2264,7 +2325,13 @@ var HtmlDiff = class HtmlDiff {
|
|
|
2264
2325
|
const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
|
|
2265
2326
|
emitter.insertTag(tag, baseClass, [...seg.words], metadata);
|
|
2266
2327
|
}
|
|
2267
|
-
if (emitter.specialTagDiffStack.length > 0)
|
|
2328
|
+
if (emitter.specialTagDiffStack.length > 0) {
|
|
2329
|
+
console.warn(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting wrap(s) on the stack. Closing defensively. This usually means a formatting tag opens in a del/ins segment and its matching closer is in an equal segment.`);
|
|
2330
|
+
while (emitter.specialTagDiffStack.length > 0) {
|
|
2331
|
+
emitter.content.push("</ins>");
|
|
2332
|
+
emitter.specialTagDiffStack.pop();
|
|
2333
|
+
}
|
|
2334
|
+
}
|
|
2268
2335
|
return emitter.content.join("");
|
|
2269
2336
|
}
|
|
2270
2337
|
/**
|