npm - @createiq/htmldiff - Versions diffs - 1.0.5-beta.1 → 1.0.5-beta.3 - Mend

@createiq/htmldiff 1.0.5-beta.1 → 1.0.5-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/HtmlDiff.cjs +171 -11
package/dist/HtmlDiff.cjs.map +1 -1
package/dist/HtmlDiff.mjs +171 -11
package/dist/HtmlDiff.mjs.map +1 -1
package/package.json +1 -1
package/src/TableDiff.ts +258 -26
package/test/HtmlDiff.tables.matrix.spec.ts +327 -0
package/test/HtmlDiff.tables.spec.ts +39 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@createiq/htmldiff",
-  "version": "1.0.5-beta.1",
+  "version": "1.0.5-beta.3",
   "description": "TypeScript port of htmldiff.net",
   "type": "module",
   "author": "Mathew Mannion <mathew.mannion@linklaters.com>",

package/src/TableDiff.ts CHANGED Viewed

@@ -469,13 +469,165 @@ function diffPreservedRow(
   }
   // Cell counts differ. Try to interpret it as a horizontal merge/split via
   // colspan first — preserving the new structure with `class='mod colspan'`
-  // on each affected cell. Falls back to the cell-LCS path if the cells
-  // don't align cleanly on logical column positions.
+  // on each affected cell.
   const colspanAligned = diffColspanChangedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
   if (colspanAligned !== null) return colspanAligned
+  // For a single-column add/delete (cell count differs by exactly 1),
+  // detect the position via positional similarity scan and align the
+  // remaining cells positionally. This handles the case where a column
+  // was added AND a different cell got an unrelated content edit — the
+  // edited cell still aligns by position rather than getting orphaned by
+  // the cell-LCS exact-match.
+  const delta = newRow.cells.length - oldRow.cells.length
+  // For column add/delete (cell counts differ), find the best insertion
+  // or deletion positions via positional similarity scan and align the
+  // remaining cells positionally. This handles content-edit alongside
+  // column-add by keeping the edited cell in its column position rather
+  // than orphaning it via the cell-LCS exact match.
+  // Guardrail: combinatorial search is C(newCount, k); we cap to avoid
+  // explosion on very wide tables. The cap is generous for real legal
+  // schedules; anything above falls through to cell-LCS.
+  const absDelta = Math.abs(delta)
+  if (
+    absDelta > 0 &&
+    absDelta <= MAX_COLUMN_DELTA &&
+    Math.max(oldRow.cells.length, newRow.cells.length) <= MAX_COLUMN_SEARCH_WIDTH
+  ) {
+    if (delta > 0) return diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, delta, diffCell)
+    return diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, -delta, diffCell)
+  }
   return diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
 }
+const MAX_COLUMN_DELTA = 6
+const MAX_COLUMN_SEARCH_WIDTH = 40
+/**
+ * For a row where new has K more cells than old, find the K column
+ * positions in new where cells were inserted by scanning all C(newCount,
+ * K) combinations and picking the one that maximises positional content
+ * similarity with the remaining cells. The inserted cells are emitted
+ * with diff markers; the rest are aligned positionally with content
+ * diff for matched pairs.
+ */
+function diffMultiColumnAddRow(
+  oldHtml: string,
+  newHtml: string,
+  oldRow: RowRange,
+  newRow: RowRange,
+  k: number,
+  diffCell: DiffCellFn
+): string {
+  const insertedPositions = findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml)
+  const inserted = new Set(insertedPositions)
+  const out: string[] = [rowHeaderSlice(newHtml, newRow)]
+  let oldIdx = 0
+  for (let c = 0; c < newRow.cells.length; c++) {
+    if (inserted.has(c)) {
+      out.push(emitFullCell(newHtml, newRow.cells[c], 'ins', diffCell))
+    } else {
+      out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[c], diffCell))
+      oldIdx++
+    }
+  }
+  out.push('</tr>')
+  return out.join('')
+}
+function diffMultiColumnDeleteRow(
+  oldHtml: string,
+  newHtml: string,
+  oldRow: RowRange,
+  newRow: RowRange,
+  k: number,
+  diffCell: DiffCellFn
+): string {
+  const deletedPositions = findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml)
+  const deleted = new Set(deletedPositions)
+  const out: string[] = [rowHeaderSlice(newHtml, newRow)]
+  let newIdx = 0
+  for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
+    if (deleted.has(oldIdx)) {
+      out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], 'del', diffCell))
+      continue
+    }
+    out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[newIdx], diffCell))
+    newIdx++
+  }
+  out.push('</tr>')
+  return out.join('')
+}
+function findBestColumnInsertPositions(
+  oldRow: RowRange,
+  newRow: RowRange,
+  k: number,
+  oldHtml: string,
+  newHtml: string
+): number[] {
+  let bestPositions: number[] = []
+  let bestScore = -1
+  for (const combo of combinationsOfRange(newRow.cells.length, k)) {
+    const inserted = new Set(combo)
+    let score = 0
+    let oldIdx = 0
+    for (let newIdx = 0; newIdx < newRow.cells.length; newIdx++) {
+      if (inserted.has(newIdx)) continue
+      score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
+      oldIdx++
+    }
+    if (score > bestScore) {
+      bestScore = score
+      bestPositions = combo
+    }
+  }
+  return bestPositions
+}
+function findBestColumnDeletePositions(
+  oldRow: RowRange,
+  newRow: RowRange,
+  k: number,
+  oldHtml: string,
+  newHtml: string
+): number[] {
+  let bestPositions: number[] = []
+  let bestScore = -1
+  for (const combo of combinationsOfRange(oldRow.cells.length, k)) {
+    const deleted = new Set(combo)
+    let score = 0
+    let newIdx = 0
+    for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
+      if (deleted.has(oldIdx)) continue
+      score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
+      newIdx++
+    }
+    if (score > bestScore) {
+      bestScore = score
+      bestPositions = combo
+    }
+  }
+  return bestPositions
+}
+/**
+ * Yields all sorted-ascending combinations of `k` distinct integers
+ * from [0, n). Iterative implementation avoids recursion overhead and
+ * keeps memory at O(k).
+ */
+function* combinationsOfRange(n: number, k: number): IterableIterator<number[]> {
+  if (k === 0 || k > n) return
+  const indices = Array.from({ length: k }, (_, i) => i)
+  while (true) {
+    yield indices.slice()
+    let i = k - 1
+    while (i >= 0 && indices[i] === n - k + i) i--
+    if (i < 0) return
+    indices[i]++
+    for (let j = i + 1; j < k; j++) indices[j] = indices[j - 1] + 1
+  }
+}
 /**
  * Try to align cells by logical column position (sum of colspans). When
  * one side has a colspan'd cell that absorbs multiple cells on the other
@@ -619,7 +771,12 @@ function diffStructurallyAlignedRow(
 ): string {
   const oldKeys = oldRow.cells.map(cell => cellKey(oldHtml, cell))
   const newKeys = newRow.cells.map(cell => cellKey(newHtml, cell))
-  const alignment = lcsAlign(oldKeys, newKeys)
+  const exactAlignment = lcsAlign(oldKeys, newKeys)
+  // After exact LCS, fuzzy-pair adjacent unmatched old/new cells whose
+  // content is similar enough — so a content-edit cell alongside a
+  // column-add in the same row produces a content diff for the edited
+  // cell rather than a phantom delete + insert + extra cell.
+  const alignment = pairSimilarUnmatchedCells(exactAlignment, oldRow, newRow, oldHtml, newHtml)
   const out: string[] = []
   // Use new's <tr> if it exists; otherwise old's.
@@ -765,9 +922,17 @@ interface Alignment {
   newIdx: number | null
 }
-/** Jaccard similarity threshold above which we treat two rows as "the same row, edited". */
+/** Character-level similarity threshold above which we treat two rows as "the same row, edited". */
 const ROW_FUZZY_THRESHOLD = 0.5
+/**
+ * Threshold for "this cell is a content-edit of that cell." Tuned the same
+ * as ROW_FUZZY_THRESHOLD; cells in legal docs that share most of their
+ * content typically ARE the same logical cell with a body edit, so 0.5
+ * works for both granularities in practice.
+ */
+const CELL_FUZZY_THRESHOLD = 0.5
 /**
  * After exact LCS, scan the alignment for runs of "old deleted, then new
  * inserted" (or vice versa) and pair entries whose content is similar
@@ -783,14 +948,42 @@ function pairSimilarUnmatchedRows(
   oldHtml: string,
   newHtml: string
 ): Alignment[] {
-  // Identify pairings inside each unmatched-only run, then build the
-  // output by walking the alignment and substituting paired entries at
-  // the *ins position* (not the del position). This keeps the result
-  // alignment monotonic in newIdx — critical because the cursor-based
-  // emission downstream walks new's html in order. Emitting at the del
-  // position would be fine when del<ins in the alignment array (the
-  // typical case), but can violate monotonicity when there are mixed
-  // unpaired entries in between (column-add + row-add together, etc.).
+  return pairSimilarUnmatched(alignment, ROW_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
+    rowSimilarity(oldTable.rows[oldIdx], newTable.rows[newIdx], oldHtml, newHtml)
+  )
+}
+function pairSimilarUnmatchedCells(
+  alignment: Alignment[],
+  oldRow: RowRange,
+  newRow: RowRange,
+  oldHtml: string,
+  newHtml: string
+): Alignment[] {
+  return pairSimilarUnmatched(alignment, CELL_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
+    cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
+  )
+}
+/**
+ * Identify pairings inside each unmatched-only run, then build the output
+ * alignment by walking the original and substituting paired entries at
+ * the *ins position* (not the del position). This keeps the result
+ * monotonic in newIdx — critical because the cursor-based emission
+ * downstream walks new's html in order. Emitting at the del position
+ * would be fine when del<ins in the alignment array (the typical case),
+ * but can violate monotonicity when there are mixed unpaired entries in
+ * between (column-add + row-add together, content-edit + column-add,
+ * etc.).
+ *
+ * Generic over what's being paired — works for both rows (by full row
+ * content similarity) and cells (by per-cell content similarity).
+ */
+function pairSimilarUnmatched(
+  alignment: Alignment[],
+  threshold: number,
+  similarity: (oldIdx: number, newIdx: number) => number
+): Alignment[] {
   const pairs = new Map<number, number>() // del-alignment-idx → ins-alignment-idx
   let i = 0
   while (i < alignment.length) {
@@ -812,15 +1005,10 @@ function pairSimilarUnmatchedRows(
     const usedIns = new Set<number>()
     for (const di of delIndices) {
       let bestIi = -1
-      let bestSim = ROW_FUZZY_THRESHOLD
+      let bestSim = threshold
       for (const ii of insIndices) {
         if (usedIns.has(ii)) continue
-        const sim = rowSimilarity(
-          oldTable.rows[alignment[di].oldIdx as number],
-          newTable.rows[alignment[ii].newIdx as number],
-          oldHtml,
-          newHtml
-        )
+        const sim = similarity(alignment[di].oldIdx as number, alignment[ii].newIdx as number)
         if (sim > bestSim) {
           bestSim = sim
           bestIi = ii
@@ -851,18 +1039,41 @@ function pairSimilarUnmatchedRows(
 }
 /**
- * Character-level similarity using shared prefix + suffix as a fraction
- * of the longer string. Catches "single edit somewhere in a long row"
- * (which token-Jaccard misses on short rows) while still correctly
- * rejecting rows with no positional overlap. HTML tags are stripped to
- * keep the comparison content-focused.
+ * Combined similarity metric used for both row-level and cell-level
+ * fuzzy pairing. Returns the MAX of two complementary metrics:
+ *
+ *   1. **Character prefix+suffix similarity** — fraction of the longer
+ *      string covered by shared prefix + shared suffix. Catches small
+ *      edits in the middle of a string (one word changed in a row).
+ *      Misses cases where the bulk of common content is in the middle
+ *      and the ends differ.
+ *
+ *   2. **Token Jaccard similarity** — intersection-over-union of the
+ *      whitespace-split tokens. Catches "most of the content is the
+ *      same but bookended by different bits" — e.g. a row whose only
+ *      edit is a column added at the start and another at the end,
+ *      where the ~50 chars in the middle that DO match would be
+ *      invisible to prefix+suffix.
+ *
+ * Either metric exceeding the threshold means pair. Neither alone is
+ * sufficient for the full range of legal-doc edits we see in
+ * production tables.
  */
 function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newHtml: string): number {
-  const a = rowText(oldHtml, oldRow)
-  const b = rowText(newHtml, newRow)
+  return textSimilarity(rowText(oldHtml, oldRow), rowText(newHtml, newRow))
+}
+function cellSimilarity(oldCell: CellRange, newCell: CellRange, oldHtml: string, newHtml: string): number {
+  return textSimilarity(cellText(oldHtml, oldCell), cellText(newHtml, newCell))
+}
+function textSimilarity(a: string, b: string): number {
   if (a === b) return 1
   if (a.length === 0 || b.length === 0) return 0
+  return Math.max(charPrefixSuffixSimilarity(a, b), tokenJaccardSimilarity(a, b))
+}
+function charPrefixSuffixSimilarity(a: string, b: string): number {
   let prefix = 0
   const minLen = Math.min(a.length, b.length)
   while (prefix < minLen && a[prefix] === b[prefix]) prefix++
@@ -879,6 +1090,18 @@ function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newH
   return (prefix + suffix) / Math.max(a.length, b.length)
 }
+function tokenJaccardSimilarity(a: string, b: string): number {
+  const tokensA = new Set(a.split(/\s+/).filter(Boolean))
+  const tokensB = new Set(b.split(/\s+/).filter(Boolean))
+  if (tokensA.size === 0 && tokensB.size === 0) return 1
+  let intersection = 0
+  for (const t of tokensA) {
+    if (tokensB.has(t)) intersection++
+  }
+  const union = tokensA.size + tokensB.size - intersection
+  return union === 0 ? 0 : intersection / union
+}
 function rowText(html: string, row: RowRange): string {
   const parts: string[] = []
   for (const cell of row.cells) {
@@ -887,6 +1110,15 @@ function rowText(html: string, row: RowRange): string {
   return parts.join(' ').replace(/\s+/g, ' ').trim().toLowerCase()
 }
+function cellText(html: string, cell: CellRange): string {
+  return html
+    .slice(cell.contentStart, cell.contentEnd)
+    .replace(/<[^>]+>/g, ' ')
+    .replace(/\s+/g, ' ')
+    .trim()
+    .toLowerCase()
+}
 /**
  * Standard LCS alignment: walks both sequences and emits a list of pairs
  * where `(oldIdx, newIdx)` are both set for matching positions, and one

package/test/HtmlDiff.tables.matrix.spec.ts ADDED Viewed

@@ -0,0 +1,327 @@
+import { describe, expect, it } from 'vitest'
+import HtmlDiff from '../src/HtmlDiff'
+/**
+ * Exhaustive matrix of common table operations and their pairwise
+ * combinations. Each case runs HtmlDiff.execute and asserts structural
+ * invariants on the output:
+ *
+ *   • Every `<tr>` opens and closes
+ *   • No row has more cells than max(old-row, new-row) cell count
+ *     (accounting for colspan)
+ *   • All `<ins>`/`<del>` tags balance
+ *   • Class markers are coherent (a `<tr class='diffins'>` row's cells
+ *     all have ins-marked content or are empty; a deleted row's cells
+ *     all have del-marked content or are empty)
+ *   • The cell content of every `<td>` from new appears somewhere in
+ *     the output (we don't silently drop cells)
+ *
+ * The matrix is built combinatorially — single operations × single
+ * operations — so a regression in any pairwise combination surfaces
+ * here even if no test was added for that exact pair.
+ */
+describe('HtmlDiff — table operations matrix', () => {
+  describe('single operations on a 3×3 base', () => {
+    for (const op of allSingleOperations()) {
+      it(`${op.name} produces structurally valid output`, () => {
+        const base = baseTable3x3()
+        const newHtml = op.apply(base)
+        const oldHtml = renderTable(base)
+        const result = HtmlDiff.execute(oldHtml, newHtml)
+        assertStructurallyValid(result, oldHtml, newHtml, op.name)
+      })
+    }
+  })
+  describe('pairwise combinations on a 3×3 base', () => {
+    const ops = allSingleOperations()
+    for (const opA of ops) {
+      for (const opB of ops) {
+        if (opA === opB) continue
+        // Some combinations don't compose cleanly (e.g. "delete-row-end"
+        // + "delete-row-end" applied twice). Skip pairs that mutate
+        // overlapping ranges.
+        if (!canCompose(opA, opB)) continue
+        it(`${opA.name} + ${opB.name} produces structurally valid output`, () => {
+          const base = baseTable3x3()
+          const intermediate = parseTable(opA.apply(base))
+          const newHtml = opB.apply(intermediate)
+          const oldHtml = renderTable(base)
+          const result = HtmlDiff.execute(oldHtml, newHtml)
+          assertStructurallyValid(result, oldHtml, newHtml, `${opA.name} + ${opB.name}`)
+        })
+      }
+    }
+  })
+  describe('user-reported regression scenarios', () => {
+    it('column added + empty row inserted in middle (regression for issue with blank row)', () => {
+      const oldHtml =
+        '<table>' +
+        '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
+        '<tr><td>Party A</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto).</td><td>(i) Upon execution.</td></tr>' +
+        '<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
+        '</table>'
+      const newHtml =
+        '<table>' +
+        '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
+        "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto). Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
+        '<tr><td></td><td></td><td></td><td></td></tr>' +
+        '<tr><td>Party B</td><td>A</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
+        '</table>'
+      const result = HtmlDiff.execute(oldHtml, newHtml)
+      assertStructurallyValid(result, oldHtml, newHtml, 'column-add + empty row insert')
+      // Specific assertions on this case so we can see exactly what went
+      // wrong if the structural-invariant check passes but the output
+      // still drifts.
+      const rowCount = countMatches(result, /<tr[\s>]/g)
+      expect(rowCount).toBe(4) // header + Party A + empty + Party B
+      expect(result).toContain(
+        "<tr class='diffins'><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td></tr>"
+      )
+    })
+  })
+})
+// ──────────────────────── operations ────────────────────────
+interface Op {
+  name: string
+  apply: (table: TableData) => string
+}
+function allSingleOperations(): Op[] {
+  return [
+    { name: 'no-op', apply: t => renderTable(t) },
+    { name: 'edit-cell', apply: t => renderTable(mutate(t, m => m.editCell(1, 1, 'EDITED'))) },
+    { name: 'add-row-start', apply: t => renderTable(mutate(t, m => m.addRowAt(0, ['NEW1', 'NEW2', 'NEW3']))) },
+    { name: 'add-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['NEW1', 'NEW2', 'NEW3']))) },
+    {
+      name: 'add-row-end',
+      apply: t => renderTable(mutate(t, m => m.addRowAt(t.rows.length, ['NEW1', 'NEW2', 'NEW3']))),
+    },
+    { name: 'add-empty-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['', '', '']))) },
+    {
+      name: 'add-multiple-rows',
+      apply: t =>
+        renderTable(
+          mutate(t, m => {
+            m.addRowAt(t.rows.length, ['X1', 'X2', 'X3'])
+            m.addRowAt(t.rows.length + 1, ['Y1', 'Y2', 'Y3'])
+          })
+        ),
+    },
+    { name: 'delete-row-start', apply: t => renderTable(mutate(t, m => m.deleteRow(1))) }, // skip header
+    { name: 'delete-row-middle', apply: t => renderTable(mutate(t, m => m.deleteRow(2))) },
+    { name: 'delete-row-end', apply: t => renderTable(mutate(t, m => m.deleteRow(t.rows.length - 1))) },
+    {
+      name: 'delete-multiple-rows',
+      apply: t =>
+        renderTable(
+          mutate(t, m => {
+            m.deleteRow(t.rows.length - 1)
+            m.deleteRow(1)
+          })
+        ),
+    },
+    {
+      name: 'add-column-start',
+      apply: t => renderTable(mutate(t, m => m.addColumnAt(0, ['NewHeader', 'newA', 'newB', 'newC']))),
+    },
+    {
+      name: 'add-column-middle',
+      apply: t => renderTable(mutate(t, m => m.addColumnAt(1, ['NewHeader', 'newA', 'newB', 'newC']))),
+    },
+    {
+      name: 'add-column-end',
+      apply: t => renderTable(mutate(t, m => m.addColumnAt(t.rows[0].length, ['NewHeader', 'newA', 'newB', 'newC']))),
+    },
+    {
+      name: 'add-multiple-columns',
+      apply: t =>
+        renderTable(
+          mutate(t, m => {
+            m.addColumnAt(t.rows[0].length, ['H1', 'a1', 'b1', 'c1'])
+            m.addColumnAt(t.rows[0].length + 1, ['H2', 'a2', 'b2', 'c2'])
+          })
+        ),
+    },
+    { name: 'delete-column-start', apply: t => renderTable(mutate(t, m => m.deleteColumn(0))) },
+    { name: 'delete-column-middle', apply: t => renderTable(mutate(t, m => m.deleteColumn(1))) },
+    { name: 'delete-column-end', apply: t => renderTable(mutate(t, m => m.deleteColumn(t.rows[0].length - 1))) },
+    { name: 'shift-content-right', apply: t => renderTable(mutate(t, m => m.shiftContentRight(1))) },
+  ]
+}
+/**
+ * Some operation pairs don't compose cleanly because the second
+ * operation's row/column index assumes the original table dimensions.
+ * We skip pairs where the second op's index would be out of bounds
+ * after the first op's mutation.
+ */
+function canCompose(_a: Op, _b: Op): boolean {
+  // For now, allow all combinations and let the operation skip
+  // gracefully when bounds are invalid. The mutate helpers clamp.
+  return true
+}
+// ──────────────────────── table model ────────────────────────
+interface TableData {
+  rows: string[][]
+}
+function baseTable3x3(): TableData {
+  return {
+    rows: [
+      ['Header1', 'Header2', 'Header3'],
+      ['A1', 'A2', 'A3'],
+      ['B1', 'B2', 'B3'],
+      ['C1', 'C2', 'C3'],
+    ],
+  }
+}
+function renderTable(t: TableData): string {
+  const out: string[] = ['<table>']
+  for (let r = 0; r < t.rows.length; r++) {
+    out.push('<tr>')
+    const tag = r === 0 ? 'th' : 'td'
+    for (const cell of t.rows[r]) {
+      out.push(`<${tag}>${cell}</${tag}>`)
+    }
+    out.push('</tr>')
+  }
+  out.push('</table>')
+  return out.join('')
+}
+function parseTable(html: string): TableData {
+  // Tiny parser sufficient for our generated tables. NOT a general
+  // HTML parser; only used inside this matrix.
+  const rows: string[][] = []
+  const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
+  for (const rowMatch of rowMatches) {
+    const cells: string[] = []
+    const cellMatches = rowMatch[1].matchAll(/<t[dh][^>]*>(.*?)<\/t[dh]>/gs)
+    for (const cellMatch of cellMatches) cells.push(cellMatch[1])
+    rows.push(cells)
+  }
+  return { rows }
+}
+interface Mutator {
+  editCell(row: number, col: number, content: string): void
+  addRowAt(at: number, content: string[]): void
+  deleteRow(at: number): void
+  addColumnAt(at: number, columnContent: string[]): void
+  deleteColumn(at: number): void
+  shiftContentRight(rowIdx: number): void
+}
+function mutate(t: TableData, fn: (m: Mutator) => void): TableData {
+  const cloned: TableData = { rows: t.rows.map(row => [...row]) }
+  const m: Mutator = {
+    editCell(row, col, content) {
+      if (cloned.rows[row]?.[col] !== undefined) cloned.rows[row][col] = content
+    },
+    addRowAt(at, content) {
+      const idx = Math.max(0, Math.min(at, cloned.rows.length))
+      cloned.rows.splice(idx, 0, content)
+    },
+    deleteRow(at) {
+      if (at >= 0 && at < cloned.rows.length) cloned.rows.splice(at, 1)
+    },
+    addColumnAt(at, columnContent) {
+      for (let r = 0; r < cloned.rows.length; r++) {
+        const idx = Math.max(0, Math.min(at, cloned.rows[r].length))
+        cloned.rows[r].splice(idx, 0, columnContent[r] ?? '')
+      }
+    },
+    deleteColumn(at) {
+      for (const row of cloned.rows) {
+        if (at >= 0 && at < row.length) row.splice(at, 1)
+      }
+    },
+    shiftContentRight(rowIdx) {
+      const row = cloned.rows[rowIdx]
+      if (!row) return
+      // Shift each cell's content one position to the right; first
+      // cell becomes empty, last cell's content drops off.
+      for (let c = row.length - 1; c > 0; c--) row[c] = row[c - 1]
+      row[0] = ''
+    },
+  }
+  fn(m)
+  return cloned
+}
+// ──────────────────────── invariant checks ────────────────────────
+/**
+ * Asserts the diff output is structurally valid:
+ *   • All `<tr>`/`<td>`/`<th>` open/close tags balance
+ *   • All `<ins>`/`<del>` tags balance
+ *   • Every row in the output has cell count ≤ max(old-row-cell-count,
+ *     new-row-cell-count) — no phantom cells
+ *   • Output isn't empty when inputs aren't equal
+ */
+function assertStructurallyValid(output: string, oldHtml: string, newHtml: string, label: string) {
+  const ctx = `[${label}]`
+  // Tag balance
+  const openTr = countMatches(output, /<tr[\s>]/g)
+  const closeTr = countMatches(output, /<\/tr>/g)
+  expect(openTr, `${ctx} <tr> tag balance`).toBe(closeTr)
+  const openTd = countMatches(output, /<td[\s>]/g)
+  const closeTd = countMatches(output, /<\/td>/g)
+  expect(openTd, `${ctx} <td> tag balance`).toBe(closeTd)
+  const openTh = countMatches(output, /<th[\s>]/g)
+  const closeTh = countMatches(output, /<\/th>/g)
+  expect(openTh, `${ctx} <th> tag balance`).toBe(closeTh)
+  // ins/del balance — each opening tag has a matching closing tag.
+  const openIns = countMatches(output, /<ins[\s>]/g)
+  const closeIns = countMatches(output, /<\/ins>/g)
+  expect(openIns, `${ctx} <ins> tag balance`).toBe(closeIns)
+  const openDel = countMatches(output, /<del[\s>]/g)
+  const closeDel = countMatches(output, /<\/del>/g)
+  expect(openDel, `${ctx} <del> tag balance`).toBe(closeDel)
+  // Per-row cell count ≤ max(old, new) row width.
+  const oldMaxCells = maxRowCellCount(oldHtml)
+  const newMaxCells = maxRowCellCount(newHtml)
+  const limit = Math.max(oldMaxCells, newMaxCells)
+  // Walk output rows
+  const rowMatches = output.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
+  for (const rowMatch of rowMatches) {
+    const cellsInRow = countMatches(rowMatch[1], /<t[dh][\s>]/g)
+    expect(cellsInRow, `${ctx} row has too many cells (${cellsInRow} > ${limit})`).toBeLessThanOrEqual(limit)
+  }
+  // Output is non-empty when inputs aren't equal.
+  if (oldHtml !== newHtml) {
+    expect(output.length, `${ctx} output is empty`).toBeGreaterThan(0)
+  }
+}
+function maxRowCellCount(html: string): number {
+  let max = 0
+  const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
+  for (const rowMatch of rowMatches) {
+    const count = countMatches(rowMatch[1], /<t[dh][\s>]/g)
+    if (count > max) max = count
+  }
+  return max
+}
+function countMatches(s: string, re: RegExp): number {
+  const matches = s.match(re)
+  return matches ? matches.length : 0
+}