@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HtmlDiff.cjs CHANGED
@@ -1,3 +1,7 @@
1
+ Object.defineProperties(exports, {
2
+ __esModule: { value: true },
3
+ [Symbol.toStringTag]: { value: "Module" }
4
+ });
1
5
  //#region src/Match.ts
2
6
  var Match = class {
3
7
  _startInOld;
@@ -1333,13 +1337,33 @@ function collectInsertionsKeyedByEnd(d) {
1333
1337
  return out;
1334
1338
  }
1335
1339
  /**
1336
- * Emit any insertions at boundary `b`. When both authors inserted at
1337
- * the same boundary AND the inserted token sequences are textually
1338
- * identical, the insertion is treated as agreed and emitted unmarked.
1339
- * Otherwise each side's insertion is emitted with author attribution.
1340
+ * Emit any insertions at boundary `b`.
1340
1341
  *
1341
- * The CP-then-Me ordering for disagreement is arbitrary but consistent;
1342
- * callers don't depend on it.
1342
+ * Reading model: a legal reviewer wants to see CP's INTENT relative
1343
+ * to Me's current content. Me's content is the base; CP's deltas are
1344
+ * what they need to act on. Under that framing:
1345
+ * - tokens both authors inserted at the same boundary → settled
1346
+ * - tokens CP inserted that Me doesn't have → ins-cp (CP wants
1347
+ * this added)
1348
+ * - tokens Me inserted that CP doesn't have → del-cp (CP wants
1349
+ * this removed from Me's content)
1350
+ *
1351
+ * The third case is the load-bearing attribution flip. The
1352
+ * genesis-spine view technically labels me-only-at-boundary tokens
1353
+ * as "ins-me" (Me added them; CP didn't), but that's confusing to
1354
+ * a reviewer: they see "Me added X" alongside "CP added Y" and have
1355
+ * to mentally derive "CP wants X gone, replaced with Y". Surfacing
1356
+ * me-only tokens as `del-cp` shows CP's intent directly:
1357
+ * - "CP accepted Me's text minus `things`": settled bulk + del-cp
1358
+ * `things` (no parallel redundant insertions)
1359
+ * - "CP wants `cruel` where Me wrote `brave`": ins-cp `cruel` +
1360
+ * del-cp `brave` (the substitution intent reads directly)
1361
+ * - "CP added extra words": cp-extras stay as ins-cp (same as
1362
+ * before; the cp-only direction was always intent-correct)
1363
+ *
1364
+ * Pure single-side insertions (Me added text CP doesn't engage
1365
+ * with at all, or vice versa) keep their genesis-spine attribution
1366
+ * — these aren't refinement cases, just Me's own content additions.
1343
1367
  */
1344
1368
  function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments) {
1345
1369
  const cpIns = cpInsAt.get(b);
@@ -1347,18 +1371,34 @@ function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments)
1347
1371
  const hasCp = !!cpIns && cpIns.length > 0;
1348
1372
  const hasMe = !!meIns && meIns.length > 0;
1349
1373
  if (!hasCp && !hasMe) return;
1350
- if (hasCp && hasMe && tokenArraysEqual(cpIns, meIns)) {
1374
+ if (!hasCp) {
1375
+ appendSegment(segments, {
1376
+ kind: "ins",
1377
+ author: "me"
1378
+ }, meIns);
1379
+ return;
1380
+ }
1381
+ if (!hasMe) {
1382
+ appendSegment(segments, {
1383
+ kind: "ins",
1384
+ author: "cp"
1385
+ }, cpIns);
1386
+ return;
1387
+ }
1388
+ if (tokenArraysEqual(cpIns, meIns)) {
1351
1389
  appendSegment(segments, { kind: "equal" }, cpIns);
1352
1390
  return;
1353
1391
  }
1354
- if (hasCp) appendSegment(segments, {
1392
+ const alignment = lcsAlign(cpIns, meIns);
1393
+ for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) appendSegment(segments, { kind: "equal" }, [cpIns[a.oldIdx]]);
1394
+ else if (a.oldIdx !== null) appendSegment(segments, {
1355
1395
  kind: "ins",
1356
1396
  author: "cp"
1357
- }, cpIns);
1358
- if (hasMe) appendSegment(segments, {
1359
- kind: "ins",
1360
- author: "me"
1361
- }, meIns);
1397
+ }, [cpIns[a.oldIdx]]);
1398
+ else if (a.newIdx !== null) appendSegment(segments, {
1399
+ kind: "del",
1400
+ author: "cp"
1401
+ }, [meIns[a.newIdx]]);
1362
1402
  }
1363
1403
  function tokenArraysEqual(a, b) {
1364
1404
  if (a.length !== b.length) return false;
@@ -1468,8 +1508,8 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1468
1508
  const gKeys = gTables.map((t) => tableKey(genesis, t));
1469
1509
  const cKeys = cTables.map((t) => tableKey(cpLatest, t));
1470
1510
  const mKeys = mTables.map((t) => tableKey(meCurrent, t));
1471
- const alignCp = lcsAlign(gKeys, cKeys);
1472
- const alignMe = lcsAlign(gKeys, mKeys);
1511
+ const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
1512
+ const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
1473
1513
  const gToCp = new Array(gTables.length).fill(-1);
1474
1514
  const cpToG = new Array(cTables.length).fill(-1);
1475
1515
  for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
@@ -1580,7 +1620,7 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1580
1620
  placeholderToDiff
1581
1621
  };
1582
1622
  }
1583
- const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .5;
1623
+ const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .15;
1584
1624
  function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables) {
1585
1625
  if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false;
1586
1626
  for (let i = 0; i < gTables.length; i++) {
@@ -1595,6 +1635,59 @@ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1595
1635
  function tableKey(html, table) {
1596
1636
  return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
1597
1637
  }
1638
+ /**
1639
+ * Character-level similarity above which the three-way aligner treats
1640
+ * two rows / tables as "the same logical entry, edited" rather than
1641
+ * an unrelated delete + insert. Matched to TableDiff's
1642
+ * `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
1643
+ * agree on which pairings are reachable; if a row's content overlap
1644
+ * is enough to fool the 2-way diff into pairing, it should also be
1645
+ * enough for 3-way.
1646
+ */
1647
+ const THREE_WAY_FUZZY_THRESHOLD = .5;
1648
+ /**
1649
+ * Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
1650
+ * applies after its exact-LCS, but against one side of the genesis
1651
+ * spine (either cp or me). The genesis tables/rows are always the
1652
+ * "old" side; `newTable` is the cp or me table being aligned. Returns
1653
+ * the enriched alignment with additional paired entries.
1654
+ *
1655
+ * Cell-count guard: only fuzzy-pair when both rows have the same cell
1656
+ * count. Without this guard an asymmetric restructure — e.g. CP and
1657
+ * Me both added a different column — leads to ONE side fuzzy-pairing
1658
+ * its row with genesis (content overlap above threshold) while the
1659
+ * other side falls below threshold. That mismatch routes through
1660
+ * `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
1661
+ * branch, which emits CP's row as a Me-attributed deletion. In
1662
+ * cp-only mode `stripMeAttributedMarkers` then removes the row
1663
+ * entirely and CP's edit vanishes from the view — exactly the
1664
+ * content-loss case we're meant to prevent. Restricting fuzzy
1665
+ * pairing to same-shape rows preserves the common case (single cell
1666
+ * edit, identical row shape) while pushing structural mismatches
1667
+ * back to the boundary-insertion path that emits both sides
1668
+ * explicitly.
1669
+ */
1670
+ function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
1671
+ const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
1672
+ const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
1673
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
1674
+ if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
1675
+ return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
1676
+ });
1677
+ }
1678
+ /**
1679
+ * Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
1680
+ * full table HTML keys, fuzzy-pair unmatched table runs by their
1681
+ * row-text-concatenated content. Without this, a table whose body
1682
+ * was edited (but not its outer shape) fails the exact-key match
1683
+ * and the preprocessing emits whole-table del + whole-table ins
1684
+ * instead of recursing into per-cell three-way diffs.
1685
+ */
1686
+ function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
1687
+ const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
1688
+ const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
1689
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
1690
+ }
1598
1691
  function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
1599
1692
  if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
1600
1693
  return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
@@ -1637,8 +1730,8 @@ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
1637
1730
  const gKeys = tG.rows.map((r) => rowKey(genesis, r));
1638
1731
  const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
1639
1732
  const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
1640
- const alignCp = lcsAlign(gKeys, cKeys);
1641
- const alignMe = lcsAlign(gKeys, mKeys);
1733
+ const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
1734
+ const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
1642
1735
  const gToCp = new Array(tG.rows.length).fill(-1);
1643
1736
  for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
1644
1737
  const gToMe = new Array(tG.rows.length).fill(-1);
@@ -1695,7 +1788,20 @@ function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
1695
1788
  out.push(genesis.slice(cursor, rG.rowEnd));
1696
1789
  return out.join("");
1697
1790
  }
1698
- return emitFullRowAttributed(genesis, rG, "del", "me") + emitFullRowAttributed(meCurrent, rM, "ins", "me");
1791
+ const cpRestructured = rC.cells.length !== rG.cells.length;
1792
+ const meRestructured = rM.cells.length !== rG.cells.length;
1793
+ const blocks = [];
1794
+ if (cpRestructured && meRestructured) {
1795
+ blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
1796
+ blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
1797
+ } else if (cpRestructured) {
1798
+ blocks.push(emitFullRowAttributed(genesis, rG, "del", "cp"));
1799
+ blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
1800
+ } else {
1801
+ blocks.push(emitFullRowAttributed(genesis, rG, "del", "me"));
1802
+ blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
1803
+ }
1804
+ return blocks.join("");
1699
1805
  }
1700
1806
  /**
1701
1807
  * Returns map "genesis-row-boundary → list of new-side row indices
@@ -1969,6 +2075,37 @@ var BlockFinder = class {
1969
2075
  };
1970
2076
  //#endregion
1971
2077
  //#region src/HtmlDiff.ts
2078
+ /**
2079
+ * Opinionated options that align htmldiff's output with Microsoft Word's
2080
+ * track-changes rendering for legal-document rewrites.
2081
+ *
2082
+ * The library's bare default (`orphanMatchThreshold = 0`) keeps every
2083
+ * LCS match, however small — which fragments long sentence rewrites
2084
+ * into many tiny ins/del pairs around stray word matches ("of", "the",
2085
+ * "shall"). Word collapses those into a single coarse del+ins, which is
2086
+ * dramatically more readable for legal text.
2087
+ *
2088
+ * 0.25 was tuned empirically against a customer Word reference (US
2089
+ * Commercial One CP, May 2026):
2090
+ * - short edits (typo / one-word insert): output identical to
2091
+ * threshold=0 — inter-match distances are tiny so every match
2092
+ * trivially clears the bar;
2093
+ * - long rewrites (the "Specified Indebtedness" rewrite in the
2094
+ * reference): previously produced 6 dels + 5 ins fragmented around
2095
+ * stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
2096
+ * Word's 1+1 and a major readability win;
2097
+ * - higher values (0.3+) collapsed short edits containing inline
2098
+ * formatting changes into a single block — too aggressive.
2099
+ *
2100
+ * Consumers rendering legal documents should spread this into their
2101
+ * options:
2102
+ * `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
2103
+ * `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
2104
+ *
2105
+ * Other consumers (machine-readable diff, exact-token alignment) can
2106
+ * keep the bare default.
2107
+ */
2108
+ const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
1972
2109
  var HtmlDiff = class HtmlDiff {
1973
2110
  /**
1974
2111
  * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
@@ -2044,6 +2181,16 @@ var HtmlDiff = class HtmlDiff {
2044
2181
  newText;
2045
2182
  oldText;
2046
2183
  tablePreprocessDepth = 0;
2184
+ /**
2185
+ * Tracks currently-open formatting-tag wraps. Each entry pairs the
2186
+ * opening tag (so a later closing tag can find its match) with the
2187
+ * styling info needed to RE-OPEN the wrap if an overlapping
2188
+ * formatting-tag close forces it to split. Without the styling info,
2189
+ * an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
2190
+ * unclosable wrap (the closing tag for the outer wrap arrives while
2191
+ * an inner wrap is still on the stack); see `insertTag`'s closing
2192
+ * handler for the split logic.
2193
+ */
2047
2194
  specialTagDiffStack = [];
2048
2195
  newWords = [];
2049
2196
  oldWords = [];
@@ -2111,8 +2258,23 @@ var HtmlDiff = class HtmlDiff {
2111
2258
  this.oldText = oldText;
2112
2259
  this.newText = newText;
2113
2260
  }
2114
- static execute(oldText, newText) {
2115
- return new HtmlDiff(oldText, newText).build();
2261
+ /**
2262
+ * Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
2263
+ * `executeThreeWay`, with two intentional exceptions documented
2264
+ * inline below. Consumers wanting Word-aligned output should spread
2265
+ * `WORD_ALIGNED_OPTIONS` into the third argument.
2266
+ *
2267
+ * Note: unlike `analyze`, `execute` runs `build()` which performs
2268
+ * full table preprocessing — `tablePreprocessDepth` stays at 0 so
2269
+ * the recursive cell diff can happen. Callers can't override that.
2270
+ */
2271
+ static execute(oldText, newText, options = {}) {
2272
+ const inner = new HtmlDiff(oldText, newText);
2273
+ if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
2274
+ if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
2275
+ if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
2276
+ if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
2277
+ return inner.build();
2116
2278
  }
2117
2279
  /**
2118
2280
  * Analyse a two-way diff and return its raw building blocks: the word
@@ -2181,22 +2343,6 @@ var HtmlDiff = class HtmlDiff {
2181
2343
  return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
2182
2344
  }
2183
2345
  /**
2184
- * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
2185
- * version CP sent back), and V3 (Me's current draft), produces a
2186
- * single attributed HTML output where CP's and Me's changes are
2187
- * distinguished by `data-author` ('cp' or 'me') and matching
2188
- * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
2189
- * CP's proposal" case (Me deleted text CP had inserted) gets a
2190
- * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
2191
- *
2192
- * Coordinates the symmetric-projection decision (D1) across both
2193
- * internal `analyze` calls so V2 tokenises identically on each side
2194
- * of the spine. When `useProjections` is left undefined, the decision
2195
- * is the conjunction of both pair-wise heuristics — project iff both
2196
- * pairs would project on their own. Pass an explicit boolean to
2197
- * override.
2198
- */
2199
- /**
2200
2346
  * Three-way HTML diff against a shared genesis. Produces attributed
2201
2347
  * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
2202
2348
  * from Me's accumulated changes (genesis → meCurrent). Use this for
@@ -2243,6 +2389,25 @@ var HtmlDiff = class HtmlDiff {
2243
2389
  * buffer. Reusing the instance keeps the formatting-tag stack
2244
2390
  * (`specialTagDiffStack`) coherent across segments — a `<strong>`
2245
2391
  * opened in one segment and closed in another stays balanced.
2392
+ *
2393
+ * Edge case: an ins/del segment can open a formatting wrap whose
2394
+ * matching closer ends up in an equal segment (`<strong>` deleted
2395
+ * by CP but `</strong>` kept by both — buildSegments emits the open
2396
+ * as del-cp and the close as equal). Equal segments bypass
2397
+ * `insertTag` and push raw, so the stack entry for the open is
2398
+ * never popped. Rather than throw — which forces the caller's UI
2399
+ * into an error boundary — close every leftover wrap with `</ins>`
2400
+ * at the end of emission.
2401
+ *
2402
+ * Caveat: the `</ins>` close is honest for the mod-wrap that the
2403
+ * opener pushed (every formatting opener emits an inner `<ins…>`
2404
+ * postInject regardless of whether the outer segment is ins or
2405
+ * del). For del-segment formatting openers the outer `<del>` may
2406
+ * itself be left open by the same emission imbalance; this fixup
2407
+ * doesn't address that. Downstream browsers/DOMParser normalise
2408
+ * mildly-malformed HTML by closing dangling tags, so the rendered
2409
+ * output is usually acceptable — but the warning IS the signal
2410
+ * that the input had a real imbalance worth investigating.
2246
2411
  */
2247
2412
  static emitSegments(segments) {
2248
2413
  const emitter = new HtmlDiff("", "");
@@ -2254,7 +2419,13 @@ var HtmlDiff = class HtmlDiff {
2254
2419
  const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
2255
2420
  emitter.insertTag(tag, baseClass, [...seg.words], metadata);
2256
2421
  }
2257
- if (emitter.specialTagDiffStack.length > 0) throw new Error(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting tag(s) on the stack — input may have unbalanced <strong>/<em>/etc. or there is a bug in segment emission.`);
2422
+ if (emitter.specialTagDiffStack.length > 0) {
2423
+ console.warn(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting wrap(s) on the stack. Closing defensively. This usually means a formatting tag opens in a del/ins segment and its matching closer is in an equal segment.`);
2424
+ while (emitter.specialTagDiffStack.length > 0) {
2425
+ emitter.content.push("</ins>");
2426
+ emitter.specialTagDiffStack.pop();
2427
+ }
2428
+ }
2258
2429
  return emitter.content.join("");
2259
2430
  }
2260
2431
  /**
@@ -2510,38 +2681,52 @@ var HtmlDiff = class HtmlDiff {
2510
2681
  if (words.length === 0) break;
2511
2682
  const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
2512
2683
  const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
2513
- let specialCaseTagInjection = "";
2514
- let specialCaseTagInjectionIsBefore = false;
2684
+ let preInject = "";
2685
+ let postInject = "";
2515
2686
  if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
2516
2687
  const tagNames = /* @__PURE__ */ new Set();
2517
2688
  for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
2518
2689
  const styledTagNames = Array.from(tagNames).join(" ");
2519
- this.specialTagDiffStack.push(words[0]);
2520
- specialCaseTagInjection = `<ins${Utils_default.composeTagAttributes(`mod ${styledTagNames}`, metadata ?? {})}>`;
2690
+ const styledCssClass = `mod ${styledTagNames}`;
2691
+ this.specialTagDiffStack.push({
2692
+ tag: words[0],
2693
+ styledTagNames,
2694
+ cssClass: styledCssClass,
2695
+ metadata
2696
+ });
2697
+ postInject = `<ins${Utils_default.composeTagAttributes(styledCssClass, metadata ?? {})}>`;
2521
2698
  if (tag === HtmlDiff.DelTag) {
2522
2699
  words.shift();
2523
2700
  while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
2524
2701
  }
2525
2702
  } else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
2526
- const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
2527
2703
  let tagIndexToCompare = indexLastTagInFirstTagBlock;
2528
2704
  if (tag === HtmlDiff.DelTag && indexOfFirstNonTag === -1) {
2529
2705
  if (words.slice(0, indexLastTagInFirstTagBlock + 1).some((w) => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))) tagIndexToCompare = 0;
2530
2706
  }
2531
- const openingAndClosingTagsMatch = !!openingTag && Utils_default.getTagName(openingTag) === Utils_default.getTagName(words[tagIndexToCompare]);
2532
- if (openingTag && openingAndClosingTagsMatch) {
2533
- specialCaseTagInjection = "</ins>";
2534
- specialCaseTagInjectionIsBefore = true;
2535
- } else if (openingTag) this.specialTagDiffStack.push(openingTag);
2707
+ const closingTagName = Utils_default.getTagName(words[tagIndexToCompare]);
2708
+ let matchIdx = -1;
2709
+ for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) if (Utils_default.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
2710
+ matchIdx = i;
2711
+ break;
2712
+ }
2713
+ if (matchIdx >= 0) {
2714
+ const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1);
2715
+ this.specialTagDiffStack.pop();
2716
+ preInject = "</ins>".repeat(aboveEntries.length + 1);
2717
+ for (const entry of aboveEntries) {
2718
+ postInject += `<ins${Utils_default.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`;
2719
+ this.specialTagDiffStack.push(entry);
2720
+ }
2721
+ }
2536
2722
  if (tag === HtmlDiff.DelTag) {
2537
2723
  words.shift();
2538
2724
  while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) words.shift();
2539
2725
  }
2540
2726
  }
2541
- if (words.length === 0 && specialCaseTagInjection.length === 0) break;
2727
+ if (words.length === 0 && preInject.length === 0 && postInject.length === 0) break;
2542
2728
  const isTagForExtraction = tag === HtmlDiff.DelTag ? (x) => Utils_default.isTag(x) && !HtmlDiff.SpecialCaseOpeningTagRegex.test(x) && !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase()) : Utils_default.isTag;
2543
- if (specialCaseTagInjectionIsBefore) this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(""));
2544
- else this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join("") + specialCaseTagInjection);
2729
+ this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join("") + postInject);
2545
2730
  if (words.length === 0) continue;
2546
2731
  this.insertTag(tag, cssClass, words, metadata);
2547
2732
  break;
@@ -2606,6 +2791,17 @@ var HtmlDiff = class HtmlDiff {
2606
2791
  curr = next;
2607
2792
  continue;
2608
2793
  }
2794
+ let allTags = true;
2795
+ for (let i = curr.startInNew; i < curr.endInNew; i++) if (!Utils_default.isTag(wordsForDiffNew[i])) {
2796
+ allTags = false;
2797
+ break;
2798
+ }
2799
+ if (allTags) {
2800
+ yield curr;
2801
+ prev = curr;
2802
+ curr = next;
2803
+ continue;
2804
+ }
2609
2805
  let oldDistanceInChars = 0;
2610
2806
  for (let i = prev.endInOld; i < next.startInOld; i++) oldDistanceInChars += wordsForDiffOld[i].length;
2611
2807
  let newDistanceInChars = 0;
@@ -2648,6 +2844,7 @@ var HtmlDiff = class HtmlDiff {
2648
2844
  }
2649
2845
  };
2650
2846
  //#endregion
2651
- module.exports = HtmlDiff;
2847
+ exports.WORD_ALIGNED_OPTIONS = WORD_ALIGNED_OPTIONS;
2848
+ exports.default = HtmlDiff;
2652
2849
 
2653
2850
  //# sourceMappingURL=HtmlDiff.cjs.map