@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HtmlDiff.mjs CHANGED
@@ -1333,13 +1333,33 @@ function collectInsertionsKeyedByEnd(d) {
1333
1333
  return out;
1334
1334
  }
1335
1335
  /**
1336
- * Emit any insertions at boundary `b`. When both authors inserted at
1337
- * the same boundary AND the inserted token sequences are textually
1338
- * identical, the insertion is treated as agreed and emitted unmarked.
1339
- * Otherwise each side's insertion is emitted with author attribution.
1336
+ * Emit any insertions at boundary `b`.
1340
1337
  *
1341
- * The CP-then-Me ordering for disagreement is arbitrary but consistent;
1342
- * callers don't depend on it.
1338
+ * Reading model: a legal reviewer wants to see CP's INTENT relative
1339
+ * to Me's current content. Me's content is the base; CP's deltas are
1340
+ * what they need to act on. Under that framing:
1341
+ * - tokens both authors inserted at the same boundary → settled
1342
+ * - tokens CP inserted that Me doesn't have → ins-cp (CP wants
1343
+ * this added)
1344
+ * - tokens Me inserted that CP doesn't have → del-cp (CP wants
1345
+ * this removed from Me's content)
1346
+ *
1347
+ * The third case is the load-bearing attribution flip. The
1348
+ * genesis-spine view technically labels me-only-at-boundary tokens
1349
+ * as "ins-me" (Me added them; CP didn't), but that's confusing to
1350
+ * a reviewer: they see "Me added X" alongside "CP added Y" and have
1351
+ * to mentally derive "CP wants X gone, replaced with Y". Surfacing
1352
+ * me-only tokens as `del-cp` shows CP's intent directly:
1353
+ * - "CP accepted Me's text minus `things`": settled bulk + del-cp
1354
+ * `things` (no parallel redundant insertions)
1355
+ * - "CP wants `cruel` where Me wrote `brave`": ins-cp `cruel` +
1356
+ * del-cp `brave` (the substitution intent reads directly)
1357
+ * - "CP added extra words": cp-extras stay as ins-cp (same as
1358
+ * before; the cp-only direction was always intent-correct)
1359
+ *
1360
+ * Pure single-side insertions (Me added text CP doesn't engage
1361
+ * with at all, or vice versa) keep their genesis-spine attribution
1362
+ * — these aren't refinement cases, just Me's own content additions.
1343
1363
  */
1344
1364
  function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments) {
1345
1365
  const cpIns = cpInsAt.get(b);
@@ -1347,18 +1367,34 @@ function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments)
1347
1367
  const hasCp = !!cpIns && cpIns.length > 0;
1348
1368
  const hasMe = !!meIns && meIns.length > 0;
1349
1369
  if (!hasCp && !hasMe) return;
1350
- if (hasCp && hasMe && tokenArraysEqual(cpIns, meIns)) {
1370
+ if (!hasCp) {
1371
+ appendSegment(segments, {
1372
+ kind: "ins",
1373
+ author: "me"
1374
+ }, meIns);
1375
+ return;
1376
+ }
1377
+ if (!hasMe) {
1378
+ appendSegment(segments, {
1379
+ kind: "ins",
1380
+ author: "cp"
1381
+ }, cpIns);
1382
+ return;
1383
+ }
1384
+ if (tokenArraysEqual(cpIns, meIns)) {
1351
1385
  appendSegment(segments, { kind: "equal" }, cpIns);
1352
1386
  return;
1353
1387
  }
1354
- if (hasCp) appendSegment(segments, {
1388
+ const alignment = lcsAlign(cpIns, meIns);
1389
+ for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) appendSegment(segments, { kind: "equal" }, [cpIns[a.oldIdx]]);
1390
+ else if (a.oldIdx !== null) appendSegment(segments, {
1355
1391
  kind: "ins",
1356
1392
  author: "cp"
1357
- }, cpIns);
1358
- if (hasMe) appendSegment(segments, {
1359
- kind: "ins",
1360
- author: "me"
1361
- }, meIns);
1393
+ }, [cpIns[a.oldIdx]]);
1394
+ else if (a.newIdx !== null) appendSegment(segments, {
1395
+ kind: "del",
1396
+ author: "cp"
1397
+ }, [meIns[a.newIdx]]);
1362
1398
  }
1363
1399
  function tokenArraysEqual(a, b) {
1364
1400
  if (a.length !== b.length) return false;
@@ -1468,8 +1504,8 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1468
1504
  const gKeys = gTables.map((t) => tableKey(genesis, t));
1469
1505
  const cKeys = cTables.map((t) => tableKey(cpLatest, t));
1470
1506
  const mKeys = mTables.map((t) => tableKey(meCurrent, t));
1471
- const alignCp = lcsAlign(gKeys, cKeys);
1472
- const alignMe = lcsAlign(gKeys, mKeys);
1507
+ const alignCp = pairSimilarTablesThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, gTables, cTables);
1508
+ const alignMe = pairSimilarTablesThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, gTables, mTables);
1473
1509
  const gToCp = new Array(gTables.length).fill(-1);
1474
1510
  const cpToG = new Array(cTables.length).fill(-1);
1475
1511
  for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
@@ -1580,7 +1616,7 @@ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1580
1616
  placeholderToDiff
1581
1617
  };
1582
1618
  }
1583
- const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .5;
1619
+ const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .15;
1584
1620
  function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables) {
1585
1621
  if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false;
1586
1622
  for (let i = 0; i < gTables.length; i++) {
@@ -1595,6 +1631,59 @@ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTa
1595
1631
  function tableKey(html, table) {
1596
1632
  return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
1597
1633
  }
1634
+ /**
1635
+ * Character-level similarity above which the three-way aligner treats
1636
+ * two rows / tables as "the same logical entry, edited" rather than
1637
+ * an unrelated delete + insert. Matched to TableDiff's
1638
+ * `ROW_FUZZY_THRESHOLD` / `CELL_FUZZY_THRESHOLD` so 2-way and 3-way
1639
+ * agree on which pairings are reachable; if a row's content overlap
1640
+ * is enough to fool the 2-way diff into pairing, it should also be
1641
+ * enough for 3-way.
1642
+ */
1643
+ const THREE_WAY_FUZZY_THRESHOLD = .5;
1644
+ /**
1645
+ * Run the same fuzzy-pairing pass `TableDiff.pairSimilarUnmatchedRows`
1646
+ * applies after its exact-LCS, but against one side of the genesis
1647
+ * spine (either cp or me). The genesis tables/rows are always the
1648
+ * "old" side; `newTable` is the cp or me table being aligned. Returns
1649
+ * the enriched alignment with additional paired entries.
1650
+ *
1651
+ * Cell-count guard: only fuzzy-pair when both rows have the same cell
1652
+ * count. Without this guard an asymmetric restructure — e.g. CP and
1653
+ * Me both added a different column — leads to ONE side fuzzy-pairing
1654
+ * its row with genesis (content overlap above threshold) while the
1655
+ * other side falls below threshold. That mismatch routes through
1656
+ * `diffTableStructural`'s "Me dropped, CP kept" (or the mirror)
1657
+ * branch, which emits CP's row as a Me-attributed deletion. In
1658
+ * cp-only mode `stripMeAttributedMarkers` then removes the row
1659
+ * entirely and CP's edit vanishes from the view — exactly the
1660
+ * content-loss case we're meant to prevent. Restricting fuzzy
1661
+ * pairing to same-shape rows preserves the common case (single cell
1662
+ * edit, identical row shape) while pushing structural mismatches
1663
+ * back to the boundary-insertion path that emits both sides
1664
+ * explicitly.
1665
+ */
1666
+ function pairSimilarRowsThreeWay(alignment, genesis, newHtml, oldTable, newTable) {
1667
+ const oldTexts = oldTable.rows.map((r) => rowText(genesis, r));
1668
+ const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
1669
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => {
1670
+ if (oldTable.rows[oldIdx].cells.length !== newTable.rows[newIdx].cells.length) return 0;
1671
+ return textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
1672
+ });
1673
+ }
1674
+ /**
1675
+ * Table-level counterpart: after `lcsAlign(gKeys, otherKeys)` over
1676
+ * full table HTML keys, fuzzy-pair unmatched table runs by their
1677
+ * row-text-concatenated content. Without this, a table whose body
1678
+ * was edited (but not its outer shape) fails the exact-key match
1679
+ * and the preprocessing emits whole-table del + whole-table ins
1680
+ * instead of recursing into per-cell three-way diffs.
1681
+ */
1682
+ function pairSimilarTablesThreeWay(alignment, oldHtml, newHtml, oldTables, newTables) {
1683
+ const oldTexts = oldTables.map((t) => t.rows.map((r) => rowText(oldHtml, r)).join(" "));
1684
+ const newTexts = newTables.map((t) => t.rows.map((r) => rowText(newHtml, r)).join(" "));
1685
+ return pairSimilarUnmatched(alignment, THREE_WAY_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
1686
+ }
1598
1687
  function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
1599
1688
  if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
1600
1689
  return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
@@ -1637,8 +1726,8 @@ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff)
1637
1726
  const gKeys = tG.rows.map((r) => rowKey(genesis, r));
1638
1727
  const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
1639
1728
  const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
1640
- const alignCp = lcsAlign(gKeys, cKeys);
1641
- const alignMe = lcsAlign(gKeys, mKeys);
1729
+ const alignCp = pairSimilarRowsThreeWay(lcsAlign(gKeys, cKeys), genesis, cpLatest, tG, tC);
1730
+ const alignMe = pairSimilarRowsThreeWay(lcsAlign(gKeys, mKeys), genesis, meCurrent, tG, tM);
1642
1731
  const gToCp = new Array(tG.rows.length).fill(-1);
1643
1732
  for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
1644
1733
  const gToMe = new Array(tG.rows.length).fill(-1);
@@ -1695,7 +1784,20 @@ function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
1695
1784
  out.push(genesis.slice(cursor, rG.rowEnd));
1696
1785
  return out.join("");
1697
1786
  }
1698
- return emitFullRowAttributed(genesis, rG, "del", "me") + emitFullRowAttributed(meCurrent, rM, "ins", "me");
1787
+ const cpRestructured = rC.cells.length !== rG.cells.length;
1788
+ const meRestructured = rM.cells.length !== rG.cells.length;
1789
+ const blocks = [];
1790
+ if (cpRestructured && meRestructured) {
1791
+ blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
1792
+ blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
1793
+ } else if (cpRestructured) {
1794
+ blocks.push(emitFullRowAttributed(genesis, rG, "del", "cp"));
1795
+ blocks.push(emitFullRowAttributed(cpLatest, rC, "ins", "cp"));
1796
+ } else {
1797
+ blocks.push(emitFullRowAttributed(genesis, rG, "del", "me"));
1798
+ blocks.push(emitFullRowAttributed(meCurrent, rM, "ins", "me"));
1799
+ }
1800
+ return blocks.join("");
1699
1801
  }
1700
1802
  /**
1701
1803
  * Returns map "genesis-row-boundary → list of new-side row indices
@@ -1969,6 +2071,37 @@ var BlockFinder = class {
1969
2071
  };
1970
2072
  //#endregion
1971
2073
  //#region src/HtmlDiff.ts
2074
+ /**
2075
+ * Opinionated options that align htmldiff's output with Microsoft Word's
2076
+ * track-changes rendering for legal-document rewrites.
2077
+ *
2078
+ * The library's bare default (`orphanMatchThreshold = 0`) keeps every
2079
+ * LCS match, however small — which fragments long sentence rewrites
2080
+ * into many tiny ins/del pairs around stray word matches ("of", "the",
2081
+ * "shall"). Word collapses those into a single coarse del+ins, which is
2082
+ * dramatically more readable for legal text.
2083
+ *
2084
+ * 0.25 was tuned empirically against a customer Word reference (US
2085
+ * Commercial One CP, May 2026):
2086
+ * - short edits (typo / one-word insert): output identical to
2087
+ * threshold=0 — inter-match distances are tiny so every match
2088
+ * trivially clears the bar;
2089
+ * - long rewrites (the "Specified Indebtedness" rewrite in the
2090
+ * reference): previously produced 6 dels + 5 ins fragmented around
2091
+ * stray matches; at 0.25 it condenses to 3 dels + 2 ins — close to
2092
+ * Word's 1+1 and a major readability win;
2093
+ * - higher values (0.3+) collapsed short edits containing inline
2094
+ * formatting changes into a single block — too aggressive.
2095
+ *
2096
+ * Consumers rendering legal documents should spread this into their
2097
+ * options:
2098
+ * `HtmlDiff.execute(old, new, { ...WORD_ALIGNED_OPTIONS })`
2099
+ * `HtmlDiff.executeThreeWay(g, c, m, { ...WORD_ALIGNED_OPTIONS })`
2100
+ *
2101
+ * Other consumers (machine-readable diff, exact-token alignment) can
2102
+ * keep the bare default.
2103
+ */
2104
+ const WORD_ALIGNED_OPTIONS = { orphanMatchThreshold: .25 };
1972
2105
  var HtmlDiff = class HtmlDiff {
1973
2106
  /**
1974
2107
  * This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
@@ -2044,6 +2177,16 @@ var HtmlDiff = class HtmlDiff {
2044
2177
  newText;
2045
2178
  oldText;
2046
2179
  tablePreprocessDepth = 0;
2180
+ /**
2181
+ * Tracks currently-open formatting-tag wraps. Each entry pairs the
2182
+ * opening tag (so a later closing tag can find its match) with the
2183
+ * styling info needed to RE-OPEN the wrap if an overlapping
2184
+ * formatting-tag close forces it to split. Without the styling info,
2185
+ * an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
2186
+ * unclosable wrap (the closing tag for the outer wrap arrives while
2187
+ * an inner wrap is still on the stack); see `insertTag`'s closing
2188
+ * handler for the split logic.
2189
+ */
2047
2190
  specialTagDiffStack = [];
2048
2191
  newWords = [];
2049
2192
  oldWords = [];
@@ -2111,8 +2254,23 @@ var HtmlDiff = class HtmlDiff {
2111
2254
  this.oldText = oldText;
2112
2255
  this.newText = newText;
2113
2256
  }
2114
- static execute(oldText, newText) {
2115
- return new HtmlDiff(oldText, newText).build();
2257
+ /**
2258
+ * Two-way diff entry point. Accepts the same `AnalyzeOptions` bag as
2259
+ * `executeThreeWay`, with two intentional exceptions documented
2260
+ * inline below. Consumers wanting Word-aligned output should spread
2261
+ * `WORD_ALIGNED_OPTIONS` into the third argument.
2262
+ *
2263
+ * Note: unlike `analyze`, `execute` runs `build()` which performs
2264
+ * full table preprocessing — `tablePreprocessDepth` stays at 0 so
2265
+ * the recursive cell diff can happen. Callers can't override that.
2266
+ */
2267
+ static execute(oldText, newText, options = {}) {
2268
+ const inner = new HtmlDiff(oldText, newText);
2269
+ if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
2270
+ if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
2271
+ if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
2272
+ if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
2273
+ return inner.build();
2116
2274
  }
2117
2275
  /**
2118
2276
  * Analyse a two-way diff and return its raw building blocks: the word
@@ -2181,22 +2339,6 @@ var HtmlDiff = class HtmlDiff {
2181
2339
  return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
2182
2340
  }
2183
2341
  /**
2184
- * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
2185
- * version CP sent back), and V3 (Me's current draft), produces a
2186
- * single attributed HTML output where CP's and Me's changes are
2187
- * distinguished by `data-author` ('cp' or 'me') and matching
2188
- * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
2189
- * CP's proposal" case (Me deleted text CP had inserted) gets a
2190
- * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
2191
- *
2192
- * Coordinates the symmetric-projection decision (D1) across both
2193
- * internal `analyze` calls so V2 tokenises identically on each side
2194
- * of the spine. When `useProjections` is left undefined, the decision
2195
- * is the conjunction of both pair-wise heuristics — project iff both
2196
- * pairs would project on their own. Pass an explicit boolean to
2197
- * override.
2198
- */
2199
- /**
2200
2342
  * Three-way HTML diff against a shared genesis. Produces attributed
2201
2343
  * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
2202
2344
  * from Me's accumulated changes (genesis → meCurrent). Use this for
@@ -2243,6 +2385,25 @@ var HtmlDiff = class HtmlDiff {
2243
2385
  * buffer. Reusing the instance keeps the formatting-tag stack
2244
2386
  * (`specialTagDiffStack`) coherent across segments — a `<strong>`
2245
2387
  * opened in one segment and closed in another stays balanced.
2388
+ *
2389
+ * Edge case: an ins/del segment can open a formatting wrap whose
2390
+ * matching closer ends up in an equal segment (`<strong>` deleted
2391
+ * by CP but `</strong>` kept by both — buildSegments emits the open
2392
+ * as del-cp and the close as equal). Equal segments bypass
2393
+ * `insertTag` and push raw, so the stack entry for the open is
2394
+ * never popped. Rather than throw — which forces the caller's UI
2395
+ * into an error boundary — close every leftover wrap with `</ins>`
2396
+ * at the end of emission.
2397
+ *
2398
+ * Caveat: the `</ins>` close is honest for the mod-wrap that the
2399
+ * opener pushed (every formatting opener emits an inner `<ins…>`
2400
+ * postInject regardless of whether the outer segment is ins or
2401
+ * del). For del-segment formatting openers the outer `<del>` may
2402
+ * itself be left open by the same emission imbalance; this fixup
2403
+ * doesn't address that. Downstream browsers/DOMParser normalise
2404
+ * mildly-malformed HTML by closing dangling tags, so the rendered
2405
+ * output is usually acceptable — but the warning IS the signal
2406
+ * that the input had a real imbalance worth investigating.
2246
2407
  */
2247
2408
  static emitSegments(segments) {
2248
2409
  const emitter = new HtmlDiff("", "");
@@ -2254,7 +2415,13 @@ var HtmlDiff = class HtmlDiff {
2254
2415
  const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
2255
2416
  emitter.insertTag(tag, baseClass, [...seg.words], metadata);
2256
2417
  }
2257
- if (emitter.specialTagDiffStack.length > 0) throw new Error(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting tag(s) on the stack — input may have unbalanced <strong>/<em>/etc. or there is a bug in segment emission.`);
2418
+ if (emitter.specialTagDiffStack.length > 0) {
2419
+ console.warn(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting wrap(s) on the stack. Closing defensively. This usually means a formatting tag opens in a del/ins segment and its matching closer is in an equal segment.`);
2420
+ while (emitter.specialTagDiffStack.length > 0) {
2421
+ emitter.content.push("</ins>");
2422
+ emitter.specialTagDiffStack.pop();
2423
+ }
2424
+ }
2258
2425
  return emitter.content.join("");
2259
2426
  }
2260
2427
  /**
@@ -2510,38 +2677,52 @@ var HtmlDiff = class HtmlDiff {
2510
2677
  if (words.length === 0) break;
2511
2678
  const indexOfFirstNonTag = words.findIndex((x) => !Utils_default.isTag(x));
2512
2679
  const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1;
2513
- let specialCaseTagInjection = "";
2514
- let specialCaseTagInjectionIsBefore = false;
2680
+ let preInject = "";
2681
+ let postInject = "";
2515
2682
  if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
2516
2683
  const tagNames = /* @__PURE__ */ new Set();
2517
2684
  for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
2518
2685
  const styledTagNames = Array.from(tagNames).join(" ");
2519
- this.specialTagDiffStack.push(words[0]);
2520
- specialCaseTagInjection = `<ins${Utils_default.composeTagAttributes(`mod ${styledTagNames}`, metadata ?? {})}>`;
2686
+ const styledCssClass = `mod ${styledTagNames}`;
2687
+ this.specialTagDiffStack.push({
2688
+ tag: words[0],
2689
+ styledTagNames,
2690
+ cssClass: styledCssClass,
2691
+ metadata
2692
+ });
2693
+ postInject = `<ins${Utils_default.composeTagAttributes(styledCssClass, metadata ?? {})}>`;
2521
2694
  if (tag === HtmlDiff.DelTag) {
2522
2695
  words.shift();
2523
2696
  while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
2524
2697
  }
2525
2698
  } else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
2526
- const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
2527
2699
  let tagIndexToCompare = indexLastTagInFirstTagBlock;
2528
2700
  if (tag === HtmlDiff.DelTag && indexOfFirstNonTag === -1) {
2529
2701
  if (words.slice(0, indexLastTagInFirstTagBlock + 1).some((w) => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))) tagIndexToCompare = 0;
2530
2702
  }
2531
- const openingAndClosingTagsMatch = !!openingTag && Utils_default.getTagName(openingTag) === Utils_default.getTagName(words[tagIndexToCompare]);
2532
- if (openingTag && openingAndClosingTagsMatch) {
2533
- specialCaseTagInjection = "</ins>";
2534
- specialCaseTagInjectionIsBefore = true;
2535
- } else if (openingTag) this.specialTagDiffStack.push(openingTag);
2703
+ const closingTagName = Utils_default.getTagName(words[tagIndexToCompare]);
2704
+ let matchIdx = -1;
2705
+ for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) if (Utils_default.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
2706
+ matchIdx = i;
2707
+ break;
2708
+ }
2709
+ if (matchIdx >= 0) {
2710
+ const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1);
2711
+ this.specialTagDiffStack.pop();
2712
+ preInject = "</ins>".repeat(aboveEntries.length + 1);
2713
+ for (const entry of aboveEntries) {
2714
+ postInject += `<ins${Utils_default.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`;
2715
+ this.specialTagDiffStack.push(entry);
2716
+ }
2717
+ }
2536
2718
  if (tag === HtmlDiff.DelTag) {
2537
2719
  words.shift();
2538
2720
  while (words.length > 0 && HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) words.shift();
2539
2721
  }
2540
2722
  }
2541
- if (words.length === 0 && specialCaseTagInjection.length === 0) break;
2723
+ if (words.length === 0 && preInject.length === 0 && postInject.length === 0) break;
2542
2724
  const isTagForExtraction = tag === HtmlDiff.DelTag ? (x) => Utils_default.isTag(x) && !HtmlDiff.SpecialCaseOpeningTagRegex.test(x) && !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase()) : Utils_default.isTag;
2543
- if (specialCaseTagInjectionIsBefore) this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(""));
2544
- else this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join("") + specialCaseTagInjection);
2725
+ this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join("") + postInject);
2545
2726
  if (words.length === 0) continue;
2546
2727
  this.insertTag(tag, cssClass, words, metadata);
2547
2728
  break;
@@ -2606,6 +2787,17 @@ var HtmlDiff = class HtmlDiff {
2606
2787
  curr = next;
2607
2788
  continue;
2608
2789
  }
2790
+ let allTags = true;
2791
+ for (let i = curr.startInNew; i < curr.endInNew; i++) if (!Utils_default.isTag(wordsForDiffNew[i])) {
2792
+ allTags = false;
2793
+ break;
2794
+ }
2795
+ if (allTags) {
2796
+ yield curr;
2797
+ prev = curr;
2798
+ curr = next;
2799
+ continue;
2800
+ }
2609
2801
  let oldDistanceInChars = 0;
2610
2802
  for (let i = prev.endInOld; i < next.startInOld; i++) oldDistanceInChars += wordsForDiffOld[i].length;
2611
2803
  let newDistanceInChars = 0;
@@ -2648,6 +2840,6 @@ var HtmlDiff = class HtmlDiff {
2648
2840
  }
2649
2841
  };
2650
2842
  //#endregion
2651
- export { HtmlDiff as default };
2843
+ export { WORD_ALIGNED_OPTIONS, HtmlDiff as default };
2652
2844
 
2653
2845
  //# sourceMappingURL=HtmlDiff.mjs.map