@createiq/htmldiff 1.0.5-beta.2 → 1.0.5-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@createiq/htmldiff",
3
- "version": "1.0.5-beta.2",
3
+ "version": "1.0.5-beta.3",
4
4
  "description": "TypeScript port of htmldiff.net",
5
5
  "type": "module",
6
6
  "author": "Mathew Mannion <mathew.mannion@linklaters.com>",
package/src/TableDiff.ts CHANGED
@@ -469,13 +469,165 @@ function diffPreservedRow(
469
469
  }
470
470
  // Cell counts differ. Try to interpret it as a horizontal merge/split via
471
471
  // colspan first — preserving the new structure with `class='mod colspan'`
472
- // on each affected cell. Falls back to the cell-LCS path if the cells
473
- // don't align cleanly on logical column positions.
472
+ // on each affected cell.
474
473
  const colspanAligned = diffColspanChangedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
475
474
  if (colspanAligned !== null) return colspanAligned
475
+ // For a single-column add/delete (cell count differs by exactly 1),
476
+ // detect the position via positional similarity scan and align the
477
+ // remaining cells positionally. This handles the case where a column
478
+ // was added AND a different cell got an unrelated content edit — the
479
+ // edited cell still aligns by position rather than getting orphaned by
480
+ // the cell-LCS exact-match.
481
+ const delta = newRow.cells.length - oldRow.cells.length
482
+ // For column add/delete (cell counts differ), find the best insertion
483
+ // or deletion positions via positional similarity scan and align the
484
+ // remaining cells positionally. This handles content-edit alongside
485
+ // column-add by keeping the edited cell in its column position rather
486
+ // than orphaning it via the cell-LCS exact match.
487
+ // Guardrail: combinatorial search is C(newCount, k); we cap to avoid
488
+ // explosion on very wide tables. The cap is generous for real legal
489
+ // schedules; anything above falls through to cell-LCS.
490
+ const absDelta = Math.abs(delta)
491
+ if (
492
+ absDelta > 0 &&
493
+ absDelta <= MAX_COLUMN_DELTA &&
494
+ Math.max(oldRow.cells.length, newRow.cells.length) <= MAX_COLUMN_SEARCH_WIDTH
495
+ ) {
496
+ if (delta > 0) return diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, delta, diffCell)
497
+ return diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, -delta, diffCell)
498
+ }
476
499
  return diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
477
500
  }
478
501
 
502
+ const MAX_COLUMN_DELTA = 6
503
+ const MAX_COLUMN_SEARCH_WIDTH = 40
504
+
505
+ /**
506
+ * For a row where new has K more cells than old, find the K column
507
+ * positions in new where cells were inserted by scanning all C(newCount,
508
+ * K) combinations and picking the one that maximises positional content
509
+ * similarity with the remaining cells. The inserted cells are emitted
510
+ * with diff markers; the rest are aligned positionally with content
511
+ * diff for matched pairs.
512
+ */
513
+ function diffMultiColumnAddRow(
514
+ oldHtml: string,
515
+ newHtml: string,
516
+ oldRow: RowRange,
517
+ newRow: RowRange,
518
+ k: number,
519
+ diffCell: DiffCellFn
520
+ ): string {
521
+ const insertedPositions = findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml)
522
+ const inserted = new Set(insertedPositions)
523
+ const out: string[] = [rowHeaderSlice(newHtml, newRow)]
524
+ let oldIdx = 0
525
+ for (let c = 0; c < newRow.cells.length; c++) {
526
+ if (inserted.has(c)) {
527
+ out.push(emitFullCell(newHtml, newRow.cells[c], 'ins', diffCell))
528
+ } else {
529
+ out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[c], diffCell))
530
+ oldIdx++
531
+ }
532
+ }
533
+ out.push('</tr>')
534
+ return out.join('')
535
+ }
536
+
537
+ function diffMultiColumnDeleteRow(
538
+ oldHtml: string,
539
+ newHtml: string,
540
+ oldRow: RowRange,
541
+ newRow: RowRange,
542
+ k: number,
543
+ diffCell: DiffCellFn
544
+ ): string {
545
+ const deletedPositions = findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml)
546
+ const deleted = new Set(deletedPositions)
547
+ const out: string[] = [rowHeaderSlice(newHtml, newRow)]
548
+ let newIdx = 0
549
+ for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
550
+ if (deleted.has(oldIdx)) {
551
+ out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], 'del', diffCell))
552
+ continue
553
+ }
554
+ out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[newIdx], diffCell))
555
+ newIdx++
556
+ }
557
+ out.push('</tr>')
558
+ return out.join('')
559
+ }
560
+
561
+ function findBestColumnInsertPositions(
562
+ oldRow: RowRange,
563
+ newRow: RowRange,
564
+ k: number,
565
+ oldHtml: string,
566
+ newHtml: string
567
+ ): number[] {
568
+ let bestPositions: number[] = []
569
+ let bestScore = -1
570
+ for (const combo of combinationsOfRange(newRow.cells.length, k)) {
571
+ const inserted = new Set(combo)
572
+ let score = 0
573
+ let oldIdx = 0
574
+ for (let newIdx = 0; newIdx < newRow.cells.length; newIdx++) {
575
+ if (inserted.has(newIdx)) continue
576
+ score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
577
+ oldIdx++
578
+ }
579
+ if (score > bestScore) {
580
+ bestScore = score
581
+ bestPositions = combo
582
+ }
583
+ }
584
+ return bestPositions
585
+ }
586
+
587
+ function findBestColumnDeletePositions(
588
+ oldRow: RowRange,
589
+ newRow: RowRange,
590
+ k: number,
591
+ oldHtml: string,
592
+ newHtml: string
593
+ ): number[] {
594
+ let bestPositions: number[] = []
595
+ let bestScore = -1
596
+ for (const combo of combinationsOfRange(oldRow.cells.length, k)) {
597
+ const deleted = new Set(combo)
598
+ let score = 0
599
+ let newIdx = 0
600
+ for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
601
+ if (deleted.has(oldIdx)) continue
602
+ score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
603
+ newIdx++
604
+ }
605
+ if (score > bestScore) {
606
+ bestScore = score
607
+ bestPositions = combo
608
+ }
609
+ }
610
+ return bestPositions
611
+ }
612
+
613
+ /**
614
+ * Yields all sorted-ascending combinations of `k` distinct integers
615
+ * from [0, n). Iterative implementation avoids recursion overhead and
616
+ * keeps memory at O(k).
617
+ */
618
+ function* combinationsOfRange(n: number, k: number): IterableIterator<number[]> {
619
+ if (k === 0 || k > n) return
620
+ const indices = Array.from({ length: k }, (_, i) => i)
621
+ while (true) {
622
+ yield indices.slice()
623
+ let i = k - 1
624
+ while (i >= 0 && indices[i] === n - k + i) i--
625
+ if (i < 0) return
626
+ indices[i]++
627
+ for (let j = i + 1; j < k; j++) indices[j] = indices[j - 1] + 1
628
+ }
629
+ }
630
+
479
631
  /**
480
632
  * Try to align cells by logical column position (sum of colspans). When
481
633
  * one side has a colspan'd cell that absorbs multiple cells on the other
@@ -887,18 +1039,41 @@ function pairSimilarUnmatched(
887
1039
  }
888
1040
 
889
1041
  /**
890
- * Character-level similarity using shared prefix + suffix as a fraction
891
- * of the longer string. Catches "single edit somewhere in a long row"
892
- * (which token-Jaccard misses on short rows) while still correctly
893
- * rejecting rows with no positional overlap. HTML tags are stripped to
894
- * keep the comparison content-focused.
1042
+ * Combined similarity metric used for both row-level and cell-level
1043
+ * fuzzy pairing. Returns the MAX of two complementary metrics:
1044
+ *
1045
+ * 1. **Character prefix+suffix similarity** fraction of the longer
1046
+ * string covered by shared prefix + shared suffix. Catches small
1047
+ * edits in the middle of a string (one word changed in a row).
1048
+ * Misses cases where the bulk of common content is in the middle
1049
+ * and the ends differ.
1050
+ *
1051
+ * 2. **Token Jaccard similarity** — intersection-over-union of the
1052
+ * whitespace-split tokens. Catches "most of the content is the
1053
+ * same but bookended by different bits" — e.g. a row whose only
1054
+ * edit is a column added at the start and another at the end,
1055
+ * where the ~50 chars in the middle that DO match would be
1056
+ * invisible to prefix+suffix.
1057
+ *
1058
+ * Either metric exceeding the threshold means pair. Neither alone is
1059
+ * sufficient for the full range of legal-doc edits we see in
1060
+ * production tables.
895
1061
  */
896
1062
  function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newHtml: string): number {
897
- const a = rowText(oldHtml, oldRow)
898
- const b = rowText(newHtml, newRow)
1063
+ return textSimilarity(rowText(oldHtml, oldRow), rowText(newHtml, newRow))
1064
+ }
1065
+
1066
+ function cellSimilarity(oldCell: CellRange, newCell: CellRange, oldHtml: string, newHtml: string): number {
1067
+ return textSimilarity(cellText(oldHtml, oldCell), cellText(newHtml, newCell))
1068
+ }
1069
+
1070
+ function textSimilarity(a: string, b: string): number {
899
1071
  if (a === b) return 1
900
1072
  if (a.length === 0 || b.length === 0) return 0
1073
+ return Math.max(charPrefixSuffixSimilarity(a, b), tokenJaccardSimilarity(a, b))
1074
+ }
901
1075
 
1076
+ function charPrefixSuffixSimilarity(a: string, b: string): number {
902
1077
  let prefix = 0
903
1078
  const minLen = Math.min(a.length, b.length)
904
1079
  while (prefix < minLen && a[prefix] === b[prefix]) prefix++
@@ -915,6 +1090,18 @@ function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newH
915
1090
  return (prefix + suffix) / Math.max(a.length, b.length)
916
1091
  }
917
1092
 
1093
+ function tokenJaccardSimilarity(a: string, b: string): number {
1094
+ const tokensA = new Set(a.split(/\s+/).filter(Boolean))
1095
+ const tokensB = new Set(b.split(/\s+/).filter(Boolean))
1096
+ if (tokensA.size === 0 && tokensB.size === 0) return 1
1097
+ let intersection = 0
1098
+ for (const t of tokensA) {
1099
+ if (tokensB.has(t)) intersection++
1100
+ }
1101
+ const union = tokensA.size + tokensB.size - intersection
1102
+ return union === 0 ? 0 : intersection / union
1103
+ }
1104
+
918
1105
  function rowText(html: string, row: RowRange): string {
919
1106
  const parts: string[] = []
920
1107
  for (const cell of row.cells) {
@@ -923,34 +1110,6 @@ function rowText(html: string, row: RowRange): string {
923
1110
  return parts.join(' ').replace(/\s+/g, ' ').trim().toLowerCase()
924
1111
  }
925
1112
 
926
- /**
927
- * Character-level prefix+suffix similarity for a single cell's text
928
- * content. Same metric as rowSimilarity, scoped to one cell so we can
929
- * fuzzy-pair unmatched cells (e.g. a cell with a content edit alongside
930
- * a column add in the same row).
931
- */
932
- function cellSimilarity(oldCell: CellRange, newCell: CellRange, oldHtml: string, newHtml: string): number {
933
- const a = cellText(oldHtml, oldCell)
934
- const b = cellText(newHtml, newCell)
935
- if (a === b) return 1
936
- if (a.length === 0 || b.length === 0) return 0
937
-
938
- let prefix = 0
939
- const minLen = Math.min(a.length, b.length)
940
- while (prefix < minLen && a[prefix] === b[prefix]) prefix++
941
-
942
- let suffix = 0
943
- while (
944
- suffix < a.length - prefix &&
945
- suffix < b.length - prefix &&
946
- a[a.length - 1 - suffix] === b[b.length - 1 - suffix]
947
- ) {
948
- suffix++
949
- }
950
-
951
- return (prefix + suffix) / Math.max(a.length, b.length)
952
- }
953
-
954
1113
  function cellText(html: string, cell: CellRange): string {
955
1114
  return html
956
1115
  .slice(cell.contentStart, cell.contentEnd)
@@ -0,0 +1,327 @@
1
+ import { describe, expect, it } from 'vitest'
2
+
3
+ import HtmlDiff from '../src/HtmlDiff'
4
+
5
+ /**
6
+ * Exhaustive matrix of common table operations and their pairwise
7
+ * combinations. Each case runs HtmlDiff.execute and asserts structural
8
+ * invariants on the output:
9
+ *
10
+ * • Every `<tr>` opens and closes
11
+ * • No row has more cells than max(old-row, new-row) cell count
12
+ * (accounting for colspan)
13
+ * • All `<ins>`/`<del>` tags balance
14
+ * • Class markers are coherent (a `<tr class='diffins'>` row's cells
15
+ * all have ins-marked content or are empty; a deleted row's cells
16
+ * all have del-marked content or are empty)
17
+ * • The cell content of every `<td>` from new appears somewhere in
18
+ * the output (we don't silently drop cells)
19
+ *
20
+ * The matrix is built combinatorially — single operations × single
21
+ * operations — so a regression in any pairwise combination surfaces
22
+ * here even if no test was added for that exact pair.
23
+ */
24
+ describe('HtmlDiff — table operations matrix', () => {
25
+ describe('single operations on a 3×3 base', () => {
26
+ for (const op of allSingleOperations()) {
27
+ it(`${op.name} produces structurally valid output`, () => {
28
+ const base = baseTable3x3()
29
+ const newHtml = op.apply(base)
30
+ const oldHtml = renderTable(base)
31
+ const result = HtmlDiff.execute(oldHtml, newHtml)
32
+ assertStructurallyValid(result, oldHtml, newHtml, op.name)
33
+ })
34
+ }
35
+ })
36
+
37
+ describe('pairwise combinations on a 3×3 base', () => {
38
+ const ops = allSingleOperations()
39
+ for (const opA of ops) {
40
+ for (const opB of ops) {
41
+ if (opA === opB) continue
42
+ // Some combinations don't compose cleanly (e.g. "delete-row-end"
43
+ // + "delete-row-end" applied twice). Skip pairs that mutate
44
+ // overlapping ranges.
45
+ if (!canCompose(opA, opB)) continue
46
+ it(`${opA.name} + ${opB.name} produces structurally valid output`, () => {
47
+ const base = baseTable3x3()
48
+ const intermediate = parseTable(opA.apply(base))
49
+ const newHtml = opB.apply(intermediate)
50
+ const oldHtml = renderTable(base)
51
+ const result = HtmlDiff.execute(oldHtml, newHtml)
52
+ assertStructurallyValid(result, oldHtml, newHtml, `${opA.name} + ${opB.name}`)
53
+ })
54
+ }
55
+ }
56
+ })
57
+
58
+ describe('user-reported regression scenarios', () => {
59
+ it('column added + empty row inserted in middle (regression for issue with blank row)', () => {
60
+ const oldHtml =
61
+ '<table>' +
62
+ '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
63
+ '<tr><td>Party A</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto).</td><td>(i) Upon execution.</td></tr>' +
64
+ '<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
65
+ '</table>'
66
+ const newHtml =
67
+ '<table>' +
68
+ '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
69
+ "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto). Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
70
+ '<tr><td></td><td></td><td></td><td></td></tr>' +
71
+ '<tr><td>Party B</td><td>A</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
72
+ '</table>'
73
+
74
+ const result = HtmlDiff.execute(oldHtml, newHtml)
75
+ assertStructurallyValid(result, oldHtml, newHtml, 'column-add + empty row insert')
76
+ // Specific assertions on this case so we can see exactly what went
77
+ // wrong if the structural-invariant check passes but the output
78
+ // still drifts.
79
+ const rowCount = countMatches(result, /<tr[\s>]/g)
80
+ expect(rowCount).toBe(4) // header + Party A + empty + Party B
81
+ expect(result).toContain(
82
+ "<tr class='diffins'><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td></tr>"
83
+ )
84
+ })
85
+ })
86
+ })
87
+
88
+ // ──────────────────────── operations ────────────────────────
89
+
90
+ interface Op {
91
+ name: string
92
+ apply: (table: TableData) => string
93
+ }
94
+
95
+ function allSingleOperations(): Op[] {
96
+ return [
97
+ { name: 'no-op', apply: t => renderTable(t) },
98
+ { name: 'edit-cell', apply: t => renderTable(mutate(t, m => m.editCell(1, 1, 'EDITED'))) },
99
+ { name: 'add-row-start', apply: t => renderTable(mutate(t, m => m.addRowAt(0, ['NEW1', 'NEW2', 'NEW3']))) },
100
+ { name: 'add-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['NEW1', 'NEW2', 'NEW3']))) },
101
+ {
102
+ name: 'add-row-end',
103
+ apply: t => renderTable(mutate(t, m => m.addRowAt(t.rows.length, ['NEW1', 'NEW2', 'NEW3']))),
104
+ },
105
+ { name: 'add-empty-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['', '', '']))) },
106
+ {
107
+ name: 'add-multiple-rows',
108
+ apply: t =>
109
+ renderTable(
110
+ mutate(t, m => {
111
+ m.addRowAt(t.rows.length, ['X1', 'X2', 'X3'])
112
+ m.addRowAt(t.rows.length + 1, ['Y1', 'Y2', 'Y3'])
113
+ })
114
+ ),
115
+ },
116
+ { name: 'delete-row-start', apply: t => renderTable(mutate(t, m => m.deleteRow(1))) }, // skip header
117
+ { name: 'delete-row-middle', apply: t => renderTable(mutate(t, m => m.deleteRow(2))) },
118
+ { name: 'delete-row-end', apply: t => renderTable(mutate(t, m => m.deleteRow(t.rows.length - 1))) },
119
+ {
120
+ name: 'delete-multiple-rows',
121
+ apply: t =>
122
+ renderTable(
123
+ mutate(t, m => {
124
+ m.deleteRow(t.rows.length - 1)
125
+ m.deleteRow(1)
126
+ })
127
+ ),
128
+ },
129
+ {
130
+ name: 'add-column-start',
131
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(0, ['NewHeader', 'newA', 'newB', 'newC']))),
132
+ },
133
+ {
134
+ name: 'add-column-middle',
135
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(1, ['NewHeader', 'newA', 'newB', 'newC']))),
136
+ },
137
+ {
138
+ name: 'add-column-end',
139
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(t.rows[0].length, ['NewHeader', 'newA', 'newB', 'newC']))),
140
+ },
141
+ {
142
+ name: 'add-multiple-columns',
143
+ apply: t =>
144
+ renderTable(
145
+ mutate(t, m => {
146
+ m.addColumnAt(t.rows[0].length, ['H1', 'a1', 'b1', 'c1'])
147
+ m.addColumnAt(t.rows[0].length + 1, ['H2', 'a2', 'b2', 'c2'])
148
+ })
149
+ ),
150
+ },
151
+ { name: 'delete-column-start', apply: t => renderTable(mutate(t, m => m.deleteColumn(0))) },
152
+ { name: 'delete-column-middle', apply: t => renderTable(mutate(t, m => m.deleteColumn(1))) },
153
+ { name: 'delete-column-end', apply: t => renderTable(mutate(t, m => m.deleteColumn(t.rows[0].length - 1))) },
154
+ { name: 'shift-content-right', apply: t => renderTable(mutate(t, m => m.shiftContentRight(1))) },
155
+ ]
156
+ }
157
+
158
+ /**
159
+ * Some operation pairs don't compose cleanly because the second
160
+ * operation's row/column index assumes the original table dimensions.
161
+ * We skip pairs where the second op's index would be out of bounds
162
+ * after the first op's mutation.
163
+ */
164
+ function canCompose(_a: Op, _b: Op): boolean {
165
+ // For now, allow all combinations and let the operation skip
166
+ // gracefully when bounds are invalid. The mutate helpers clamp.
167
+ return true
168
+ }
169
+
170
+ // ──────────────────────── table model ────────────────────────
171
+
172
+ interface TableData {
173
+ rows: string[][]
174
+ }
175
+
176
+ function baseTable3x3(): TableData {
177
+ return {
178
+ rows: [
179
+ ['Header1', 'Header2', 'Header3'],
180
+ ['A1', 'A2', 'A3'],
181
+ ['B1', 'B2', 'B3'],
182
+ ['C1', 'C2', 'C3'],
183
+ ],
184
+ }
185
+ }
186
+
187
+ function renderTable(t: TableData): string {
188
+ const out: string[] = ['<table>']
189
+ for (let r = 0; r < t.rows.length; r++) {
190
+ out.push('<tr>')
191
+ const tag = r === 0 ? 'th' : 'td'
192
+ for (const cell of t.rows[r]) {
193
+ out.push(`<${tag}>${cell}</${tag}>`)
194
+ }
195
+ out.push('</tr>')
196
+ }
197
+ out.push('</table>')
198
+ return out.join('')
199
+ }
200
+
201
+ function parseTable(html: string): TableData {
202
+ // Tiny parser sufficient for our generated tables. NOT a general
203
+ // HTML parser; only used inside this matrix.
204
+ const rows: string[][] = []
205
+ const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
206
+ for (const rowMatch of rowMatches) {
207
+ const cells: string[] = []
208
+ const cellMatches = rowMatch[1].matchAll(/<t[dh][^>]*>(.*?)<\/t[dh]>/gs)
209
+ for (const cellMatch of cellMatches) cells.push(cellMatch[1])
210
+ rows.push(cells)
211
+ }
212
+ return { rows }
213
+ }
214
+
215
+ interface Mutator {
216
+ editCell(row: number, col: number, content: string): void
217
+ addRowAt(at: number, content: string[]): void
218
+ deleteRow(at: number): void
219
+ addColumnAt(at: number, columnContent: string[]): void
220
+ deleteColumn(at: number): void
221
+ shiftContentRight(rowIdx: number): void
222
+ }
223
+
224
+ function mutate(t: TableData, fn: (m: Mutator) => void): TableData {
225
+ const cloned: TableData = { rows: t.rows.map(row => [...row]) }
226
+ const m: Mutator = {
227
+ editCell(row, col, content) {
228
+ if (cloned.rows[row]?.[col] !== undefined) cloned.rows[row][col] = content
229
+ },
230
+ addRowAt(at, content) {
231
+ const idx = Math.max(0, Math.min(at, cloned.rows.length))
232
+ cloned.rows.splice(idx, 0, content)
233
+ },
234
+ deleteRow(at) {
235
+ if (at >= 0 && at < cloned.rows.length) cloned.rows.splice(at, 1)
236
+ },
237
+ addColumnAt(at, columnContent) {
238
+ for (let r = 0; r < cloned.rows.length; r++) {
239
+ const idx = Math.max(0, Math.min(at, cloned.rows[r].length))
240
+ cloned.rows[r].splice(idx, 0, columnContent[r] ?? '')
241
+ }
242
+ },
243
+ deleteColumn(at) {
244
+ for (const row of cloned.rows) {
245
+ if (at >= 0 && at < row.length) row.splice(at, 1)
246
+ }
247
+ },
248
+ shiftContentRight(rowIdx) {
249
+ const row = cloned.rows[rowIdx]
250
+ if (!row) return
251
+ // Shift each cell's content one position to the right; first
252
+ // cell becomes empty, last cell's content drops off.
253
+ for (let c = row.length - 1; c > 0; c--) row[c] = row[c - 1]
254
+ row[0] = ''
255
+ },
256
+ }
257
+ fn(m)
258
+ return cloned
259
+ }
260
+
261
+ // ──────────────────────── invariant checks ────────────────────────
262
+
263
+ /**
264
+ * Asserts the diff output is structurally valid:
265
+ * • All `<tr>`/`<td>`/`<th>` open/close tags balance
266
+ * • All `<ins>`/`<del>` tags balance
267
+ * • Every row in the output has cell count ≤ max(old-row-cell-count,
268
+ * new-row-cell-count) — no phantom cells
269
+ * • Output isn't empty when inputs aren't equal
270
+ */
271
+ function assertStructurallyValid(output: string, oldHtml: string, newHtml: string, label: string) {
272
+ const ctx = `[${label}]`
273
+
274
+ // Tag balance
275
+ const openTr = countMatches(output, /<tr[\s>]/g)
276
+ const closeTr = countMatches(output, /<\/tr>/g)
277
+ expect(openTr, `${ctx} <tr> tag balance`).toBe(closeTr)
278
+
279
+ const openTd = countMatches(output, /<td[\s>]/g)
280
+ const closeTd = countMatches(output, /<\/td>/g)
281
+ expect(openTd, `${ctx} <td> tag balance`).toBe(closeTd)
282
+
283
+ const openTh = countMatches(output, /<th[\s>]/g)
284
+ const closeTh = countMatches(output, /<\/th>/g)
285
+ expect(openTh, `${ctx} <th> tag balance`).toBe(closeTh)
286
+
287
+ // ins/del balance — each opening tag has a matching closing tag.
288
+ const openIns = countMatches(output, /<ins[\s>]/g)
289
+ const closeIns = countMatches(output, /<\/ins>/g)
290
+ expect(openIns, `${ctx} <ins> tag balance`).toBe(closeIns)
291
+
292
+ const openDel = countMatches(output, /<del[\s>]/g)
293
+ const closeDel = countMatches(output, /<\/del>/g)
294
+ expect(openDel, `${ctx} <del> tag balance`).toBe(closeDel)
295
+
296
+ // Per-row cell count ≤ max(old, new) row width.
297
+ const oldMaxCells = maxRowCellCount(oldHtml)
298
+ const newMaxCells = maxRowCellCount(newHtml)
299
+ const limit = Math.max(oldMaxCells, newMaxCells)
300
+
301
+ // Walk output rows
302
+ const rowMatches = output.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
303
+ for (const rowMatch of rowMatches) {
304
+ const cellsInRow = countMatches(rowMatch[1], /<t[dh][\s>]/g)
305
+ expect(cellsInRow, `${ctx} row has too many cells (${cellsInRow} > ${limit})`).toBeLessThanOrEqual(limit)
306
+ }
307
+
308
+ // Output is non-empty when inputs aren't equal.
309
+ if (oldHtml !== newHtml) {
310
+ expect(output.length, `${ctx} output is empty`).toBeGreaterThan(0)
311
+ }
312
+ }
313
+
314
+ function maxRowCellCount(html: string): number {
315
+ let max = 0
316
+ const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
317
+ for (const rowMatch of rowMatches) {
318
+ const count = countMatches(rowMatch[1], /<t[dh][\s>]/g)
319
+ if (count > max) max = count
320
+ }
321
+ return max
322
+ }
323
+
324
+ function countMatches(s: string, re: RegExp): number {
325
+ const matches = s.match(re)
326
+ return matches ? matches.length : 0
327
+ }