@createiq/htmldiff 1.0.5-beta.1 → 1.0.5-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@createiq/htmldiff",
3
- "version": "1.0.5-beta.1",
3
+ "version": "1.0.5-beta.3",
4
4
  "description": "TypeScript port of htmldiff.net",
5
5
  "type": "module",
6
6
  "author": "Mathew Mannion <mathew.mannion@linklaters.com>",
package/src/TableDiff.ts CHANGED
@@ -469,13 +469,165 @@ function diffPreservedRow(
469
469
  }
470
470
  // Cell counts differ. Try to interpret it as a horizontal merge/split via
471
471
  // colspan first — preserving the new structure with `class='mod colspan'`
472
- // on each affected cell. Falls back to the cell-LCS path if the cells
473
- // don't align cleanly on logical column positions.
472
+ // on each affected cell.
474
473
  const colspanAligned = diffColspanChangedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
475
474
  if (colspanAligned !== null) return colspanAligned
475
+ // For a single-column add/delete (cell count differs by exactly 1),
476
+ // detect the position via positional similarity scan and align the
477
+ // remaining cells positionally. This handles the case where a column
478
+ // was added AND a different cell got an unrelated content edit — the
479
+ // edited cell still aligns by position rather than getting orphaned by
480
+ // the cell-LCS exact-match.
481
+ const delta = newRow.cells.length - oldRow.cells.length
482
+ // For column add/delete (cell counts differ), find the best insertion
483
+ // or deletion positions via positional similarity scan and align the
484
+ // remaining cells positionally. This handles content-edit alongside
485
+ // column-add by keeping the edited cell in its column position rather
486
+ // than orphaning it via the cell-LCS exact match.
487
+ // Guardrail: combinatorial search is C(newCount, k); we cap to avoid
488
+ // explosion on very wide tables. The cap is generous for real legal
489
+ // schedules; anything above falls through to cell-LCS.
490
+ const absDelta = Math.abs(delta)
491
+ if (
492
+ absDelta > 0 &&
493
+ absDelta <= MAX_COLUMN_DELTA &&
494
+ Math.max(oldRow.cells.length, newRow.cells.length) <= MAX_COLUMN_SEARCH_WIDTH
495
+ ) {
496
+ if (delta > 0) return diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, delta, diffCell)
497
+ return diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, -delta, diffCell)
498
+ }
476
499
  return diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
477
500
  }
478
501
 
502
+ const MAX_COLUMN_DELTA = 6
503
+ const MAX_COLUMN_SEARCH_WIDTH = 40
504
+
505
+ /**
506
+ * For a row where new has K more cells than old, find the K column
507
+ * positions in new where cells were inserted by scanning all C(newCount,
508
+ * K) combinations and picking the one that maximises positional content
509
+ * similarity with the remaining cells. The inserted cells are emitted
510
+ * with diff markers; the rest are aligned positionally with content
511
+ * diff for matched pairs.
512
+ */
513
+ function diffMultiColumnAddRow(
514
+ oldHtml: string,
515
+ newHtml: string,
516
+ oldRow: RowRange,
517
+ newRow: RowRange,
518
+ k: number,
519
+ diffCell: DiffCellFn
520
+ ): string {
521
+ const insertedPositions = findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml)
522
+ const inserted = new Set(insertedPositions)
523
+ const out: string[] = [rowHeaderSlice(newHtml, newRow)]
524
+ let oldIdx = 0
525
+ for (let c = 0; c < newRow.cells.length; c++) {
526
+ if (inserted.has(c)) {
527
+ out.push(emitFullCell(newHtml, newRow.cells[c], 'ins', diffCell))
528
+ } else {
529
+ out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[c], diffCell))
530
+ oldIdx++
531
+ }
532
+ }
533
+ out.push('</tr>')
534
+ return out.join('')
535
+ }
536
+
537
+ function diffMultiColumnDeleteRow(
538
+ oldHtml: string,
539
+ newHtml: string,
540
+ oldRow: RowRange,
541
+ newRow: RowRange,
542
+ k: number,
543
+ diffCell: DiffCellFn
544
+ ): string {
545
+ const deletedPositions = findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml)
546
+ const deleted = new Set(deletedPositions)
547
+ const out: string[] = [rowHeaderSlice(newHtml, newRow)]
548
+ let newIdx = 0
549
+ for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
550
+ if (deleted.has(oldIdx)) {
551
+ out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], 'del', diffCell))
552
+ continue
553
+ }
554
+ out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[newIdx], diffCell))
555
+ newIdx++
556
+ }
557
+ out.push('</tr>')
558
+ return out.join('')
559
+ }
560
+
561
+ function findBestColumnInsertPositions(
562
+ oldRow: RowRange,
563
+ newRow: RowRange,
564
+ k: number,
565
+ oldHtml: string,
566
+ newHtml: string
567
+ ): number[] {
568
+ let bestPositions: number[] = []
569
+ let bestScore = -1
570
+ for (const combo of combinationsOfRange(newRow.cells.length, k)) {
571
+ const inserted = new Set(combo)
572
+ let score = 0
573
+ let oldIdx = 0
574
+ for (let newIdx = 0; newIdx < newRow.cells.length; newIdx++) {
575
+ if (inserted.has(newIdx)) continue
576
+ score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
577
+ oldIdx++
578
+ }
579
+ if (score > bestScore) {
580
+ bestScore = score
581
+ bestPositions = combo
582
+ }
583
+ }
584
+ return bestPositions
585
+ }
586
+
587
+ function findBestColumnDeletePositions(
588
+ oldRow: RowRange,
589
+ newRow: RowRange,
590
+ k: number,
591
+ oldHtml: string,
592
+ newHtml: string
593
+ ): number[] {
594
+ let bestPositions: number[] = []
595
+ let bestScore = -1
596
+ for (const combo of combinationsOfRange(oldRow.cells.length, k)) {
597
+ const deleted = new Set(combo)
598
+ let score = 0
599
+ let newIdx = 0
600
+ for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
601
+ if (deleted.has(oldIdx)) continue
602
+ score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
603
+ newIdx++
604
+ }
605
+ if (score > bestScore) {
606
+ bestScore = score
607
+ bestPositions = combo
608
+ }
609
+ }
610
+ return bestPositions
611
+ }
612
+
613
+ /**
614
+ * Yields all sorted-ascending combinations of `k` distinct integers
615
+ * from [0, n). Iterative implementation avoids recursion overhead and
616
+ * keeps memory at O(k).
617
+ */
618
+ function* combinationsOfRange(n: number, k: number): IterableIterator<number[]> {
619
+ if (k === 0 || k > n) return
620
+ const indices = Array.from({ length: k }, (_, i) => i)
621
+ while (true) {
622
+ yield indices.slice()
623
+ let i = k - 1
624
+ while (i >= 0 && indices[i] === n - k + i) i--
625
+ if (i < 0) return
626
+ indices[i]++
627
+ for (let j = i + 1; j < k; j++) indices[j] = indices[j - 1] + 1
628
+ }
629
+ }
630
+
479
631
  /**
480
632
  * Try to align cells by logical column position (sum of colspans). When
481
633
  * one side has a colspan'd cell that absorbs multiple cells on the other
@@ -619,7 +771,12 @@ function diffStructurallyAlignedRow(
619
771
  ): string {
620
772
  const oldKeys = oldRow.cells.map(cell => cellKey(oldHtml, cell))
621
773
  const newKeys = newRow.cells.map(cell => cellKey(newHtml, cell))
622
- const alignment = lcsAlign(oldKeys, newKeys)
774
+ const exactAlignment = lcsAlign(oldKeys, newKeys)
775
+ // After exact LCS, fuzzy-pair adjacent unmatched old/new cells whose
776
+ // content is similar enough — so a content-edit cell alongside a
777
+ // column-add in the same row produces a content diff for the edited
778
+ // cell rather than a phantom delete + insert + extra cell.
779
+ const alignment = pairSimilarUnmatchedCells(exactAlignment, oldRow, newRow, oldHtml, newHtml)
623
780
 
624
781
  const out: string[] = []
625
782
  // Use new's <tr> if it exists; otherwise old's.
@@ -765,9 +922,17 @@ interface Alignment {
765
922
  newIdx: number | null
766
923
  }
767
924
 
768
- /** Jaccard similarity threshold above which we treat two rows as "the same row, edited". */
925
+ /** Character-level similarity threshold above which we treat two rows as "the same row, edited". */
769
926
  const ROW_FUZZY_THRESHOLD = 0.5
770
927
 
928
+ /**
929
+ * Threshold for "this cell is a content-edit of that cell." Tuned the same
930
+ * as ROW_FUZZY_THRESHOLD; cells in legal docs that share most of their
931
+ * content typically ARE the same logical cell with a body edit, so 0.5
932
+ * works for both granularities in practice.
933
+ */
934
+ const CELL_FUZZY_THRESHOLD = 0.5
935
+
771
936
  /**
772
937
  * After exact LCS, scan the alignment for runs of "old deleted, then new
773
938
  * inserted" (or vice versa) and pair entries whose content is similar
@@ -783,14 +948,42 @@ function pairSimilarUnmatchedRows(
783
948
  oldHtml: string,
784
949
  newHtml: string
785
950
  ): Alignment[] {
786
- // Identify pairings inside each unmatched-only run, then build the
787
- // output by walking the alignment and substituting paired entries at
788
- // the *ins position* (not the del position). This keeps the result
789
- // alignment monotonic in newIdx — critical because the cursor-based
790
- // emission downstream walks new's html in order. Emitting at the del
791
- // position would be fine when del<ins in the alignment array (the
792
- // typical case), but can violate monotonicity when there are mixed
793
- // unpaired entries in between (column-add + row-add together, etc.).
951
+ return pairSimilarUnmatched(alignment, ROW_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
952
+ rowSimilarity(oldTable.rows[oldIdx], newTable.rows[newIdx], oldHtml, newHtml)
953
+ )
954
+ }
955
+
956
+ function pairSimilarUnmatchedCells(
957
+ alignment: Alignment[],
958
+ oldRow: RowRange,
959
+ newRow: RowRange,
960
+ oldHtml: string,
961
+ newHtml: string
962
+ ): Alignment[] {
963
+ return pairSimilarUnmatched(alignment, CELL_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
964
+ cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
965
+ )
966
+ }
967
+
968
+ /**
969
+ * Identify pairings inside each unmatched-only run, then build the output
970
+ * alignment by walking the original and substituting paired entries at
971
+ * the *ins position* (not the del position). This keeps the result
972
+ * monotonic in newIdx — critical because the cursor-based emission
973
+ * downstream walks new's html in order. Emitting at the del position
974
+ * would be fine when del<ins in the alignment array (the typical case),
975
+ * but can violate monotonicity when there are mixed unpaired entries in
976
+ * between (column-add + row-add together, content-edit + column-add,
977
+ * etc.).
978
+ *
979
+ * Generic over what's being paired — works for both rows (by full row
980
+ * content similarity) and cells (by per-cell content similarity).
981
+ */
982
+ function pairSimilarUnmatched(
983
+ alignment: Alignment[],
984
+ threshold: number,
985
+ similarity: (oldIdx: number, newIdx: number) => number
986
+ ): Alignment[] {
794
987
  const pairs = new Map<number, number>() // del-alignment-idx → ins-alignment-idx
795
988
  let i = 0
796
989
  while (i < alignment.length) {
@@ -812,15 +1005,10 @@ function pairSimilarUnmatchedRows(
812
1005
  const usedIns = new Set<number>()
813
1006
  for (const di of delIndices) {
814
1007
  let bestIi = -1
815
- let bestSim = ROW_FUZZY_THRESHOLD
1008
+ let bestSim = threshold
816
1009
  for (const ii of insIndices) {
817
1010
  if (usedIns.has(ii)) continue
818
- const sim = rowSimilarity(
819
- oldTable.rows[alignment[di].oldIdx as number],
820
- newTable.rows[alignment[ii].newIdx as number],
821
- oldHtml,
822
- newHtml
823
- )
1011
+ const sim = similarity(alignment[di].oldIdx as number, alignment[ii].newIdx as number)
824
1012
  if (sim > bestSim) {
825
1013
  bestSim = sim
826
1014
  bestIi = ii
@@ -851,18 +1039,41 @@ function pairSimilarUnmatchedRows(
851
1039
  }
852
1040
 
853
1041
  /**
854
- * Character-level similarity using shared prefix + suffix as a fraction
855
- * of the longer string. Catches "single edit somewhere in a long row"
856
- * (which token-Jaccard misses on short rows) while still correctly
857
- * rejecting rows with no positional overlap. HTML tags are stripped to
858
- * keep the comparison content-focused.
1042
+ * Combined similarity metric used for both row-level and cell-level
1043
+ * fuzzy pairing. Returns the MAX of two complementary metrics:
1044
+ *
1045
+ * 1. **Character prefix+suffix similarity** fraction of the longer
1046
+ * string covered by shared prefix + shared suffix. Catches small
1047
+ * edits in the middle of a string (one word changed in a row).
1048
+ * Misses cases where the bulk of common content is in the middle
1049
+ * and the ends differ.
1050
+ *
1051
+ * 2. **Token Jaccard similarity** — intersection-over-union of the
1052
+ * whitespace-split tokens. Catches "most of the content is the
1053
+ * same but bookended by different bits" — e.g. a row whose only
1054
+ * edit is a column added at the start and another at the end,
1055
+ * where the ~50 chars in the middle that DO match would be
1056
+ * invisible to prefix+suffix.
1057
+ *
1058
+ * Either metric exceeding the threshold means pair. Neither alone is
1059
+ * sufficient for the full range of legal-doc edits we see in
1060
+ * production tables.
859
1061
  */
860
1062
  function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newHtml: string): number {
861
- const a = rowText(oldHtml, oldRow)
862
- const b = rowText(newHtml, newRow)
1063
+ return textSimilarity(rowText(oldHtml, oldRow), rowText(newHtml, newRow))
1064
+ }
1065
+
1066
+ function cellSimilarity(oldCell: CellRange, newCell: CellRange, oldHtml: string, newHtml: string): number {
1067
+ return textSimilarity(cellText(oldHtml, oldCell), cellText(newHtml, newCell))
1068
+ }
1069
+
1070
+ function textSimilarity(a: string, b: string): number {
863
1071
  if (a === b) return 1
864
1072
  if (a.length === 0 || b.length === 0) return 0
1073
+ return Math.max(charPrefixSuffixSimilarity(a, b), tokenJaccardSimilarity(a, b))
1074
+ }
865
1075
 
1076
+ function charPrefixSuffixSimilarity(a: string, b: string): number {
866
1077
  let prefix = 0
867
1078
  const minLen = Math.min(a.length, b.length)
868
1079
  while (prefix < minLen && a[prefix] === b[prefix]) prefix++
@@ -879,6 +1090,18 @@ function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newH
879
1090
  return (prefix + suffix) / Math.max(a.length, b.length)
880
1091
  }
881
1092
 
1093
+ function tokenJaccardSimilarity(a: string, b: string): number {
1094
+ const tokensA = new Set(a.split(/\s+/).filter(Boolean))
1095
+ const tokensB = new Set(b.split(/\s+/).filter(Boolean))
1096
+ if (tokensA.size === 0 && tokensB.size === 0) return 1
1097
+ let intersection = 0
1098
+ for (const t of tokensA) {
1099
+ if (tokensB.has(t)) intersection++
1100
+ }
1101
+ const union = tokensA.size + tokensB.size - intersection
1102
+ return union === 0 ? 0 : intersection / union
1103
+ }
1104
+
882
1105
  function rowText(html: string, row: RowRange): string {
883
1106
  const parts: string[] = []
884
1107
  for (const cell of row.cells) {
@@ -887,6 +1110,15 @@ function rowText(html: string, row: RowRange): string {
887
1110
  return parts.join(' ').replace(/\s+/g, ' ').trim().toLowerCase()
888
1111
  }
889
1112
 
1113
+ function cellText(html: string, cell: CellRange): string {
1114
+ return html
1115
+ .slice(cell.contentStart, cell.contentEnd)
1116
+ .replace(/<[^>]+>/g, ' ')
1117
+ .replace(/\s+/g, ' ')
1118
+ .trim()
1119
+ .toLowerCase()
1120
+ }
1121
+
890
1122
  /**
891
1123
  * Standard LCS alignment: walks both sequences and emits a list of pairs
892
1124
  * where `(oldIdx, newIdx)` are both set for matching positions, and one
@@ -0,0 +1,327 @@
1
+ import { describe, expect, it } from 'vitest'
2
+
3
+ import HtmlDiff from '../src/HtmlDiff'
4
+
5
+ /**
6
+ * Exhaustive matrix of common table operations and their pairwise
7
+ * combinations. Each case runs HtmlDiff.execute and asserts structural
8
+ * invariants on the output:
9
+ *
10
+ * • Every `<tr>` opens and closes
11
+ * • No row has more cells than max(old-row, new-row) cell count
12
+ * (accounting for colspan)
13
+ * • All `<ins>`/`<del>` tags balance
14
+ * • Class markers are coherent (a `<tr class='diffins'>` row's cells
15
+ * all have ins-marked content or are empty; a deleted row's cells
16
+ * all have del-marked content or are empty)
17
+ * • The cell content of every `<td>` from new appears somewhere in
18
+ * the output (we don't silently drop cells)
19
+ *
20
+ * The matrix is built combinatorially — single operations × single
21
+ * operations — so a regression in any pairwise combination surfaces
22
+ * here even if no test was added for that exact pair.
23
+ */
24
+ describe('HtmlDiff — table operations matrix', () => {
25
+ describe('single operations on a 3×3 base', () => {
26
+ for (const op of allSingleOperations()) {
27
+ it(`${op.name} produces structurally valid output`, () => {
28
+ const base = baseTable3x3()
29
+ const newHtml = op.apply(base)
30
+ const oldHtml = renderTable(base)
31
+ const result = HtmlDiff.execute(oldHtml, newHtml)
32
+ assertStructurallyValid(result, oldHtml, newHtml, op.name)
33
+ })
34
+ }
35
+ })
36
+
37
+ describe('pairwise combinations on a 3×3 base', () => {
38
+ const ops = allSingleOperations()
39
+ for (const opA of ops) {
40
+ for (const opB of ops) {
41
+ if (opA === opB) continue
42
+ // Some combinations don't compose cleanly (e.g. "delete-row-end"
43
+ // + "delete-row-end" applied twice). Skip pairs that mutate
44
+ // overlapping ranges.
45
+ if (!canCompose(opA, opB)) continue
46
+ it(`${opA.name} + ${opB.name} produces structurally valid output`, () => {
47
+ const base = baseTable3x3()
48
+ const intermediate = parseTable(opA.apply(base))
49
+ const newHtml = opB.apply(intermediate)
50
+ const oldHtml = renderTable(base)
51
+ const result = HtmlDiff.execute(oldHtml, newHtml)
52
+ assertStructurallyValid(result, oldHtml, newHtml, `${opA.name} + ${opB.name}`)
53
+ })
54
+ }
55
+ }
56
+ })
57
+
58
+ describe('user-reported regression scenarios', () => {
59
+ it('column added + empty row inserted in middle (regression for issue with blank row)', () => {
60
+ const oldHtml =
61
+ '<table>' +
62
+ '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
63
+ '<tr><td>Party A</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto).</td><td>(i) Upon execution.</td></tr>' +
64
+ '<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
65
+ '</table>'
66
+ const newHtml =
67
+ '<table>' +
68
+ '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
69
+ "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto). Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
70
+ '<tr><td></td><td></td><td></td><td></td></tr>' +
71
+ '<tr><td>Party B</td><td>A</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
72
+ '</table>'
73
+
74
+ const result = HtmlDiff.execute(oldHtml, newHtml)
75
+ assertStructurallyValid(result, oldHtml, newHtml, 'column-add + empty row insert')
76
+ // Specific assertions on this case so we can see exactly what went
77
+ // wrong if the structural-invariant check passes but the output
78
+ // still drifts.
79
+ const rowCount = countMatches(result, /<tr[\s>]/g)
80
+ expect(rowCount).toBe(4) // header + Party A + empty + Party B
81
+ expect(result).toContain(
82
+ "<tr class='diffins'><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td></tr>"
83
+ )
84
+ })
85
+ })
86
+ })
87
+
88
+ // ──────────────────────── operations ────────────────────────
89
+
90
+ interface Op {
91
+ name: string
92
+ apply: (table: TableData) => string
93
+ }
94
+
95
+ function allSingleOperations(): Op[] {
96
+ return [
97
+ { name: 'no-op', apply: t => renderTable(t) },
98
+ { name: 'edit-cell', apply: t => renderTable(mutate(t, m => m.editCell(1, 1, 'EDITED'))) },
99
+ { name: 'add-row-start', apply: t => renderTable(mutate(t, m => m.addRowAt(0, ['NEW1', 'NEW2', 'NEW3']))) },
100
+ { name: 'add-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['NEW1', 'NEW2', 'NEW3']))) },
101
+ {
102
+ name: 'add-row-end',
103
+ apply: t => renderTable(mutate(t, m => m.addRowAt(t.rows.length, ['NEW1', 'NEW2', 'NEW3']))),
104
+ },
105
+ { name: 'add-empty-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['', '', '']))) },
106
+ {
107
+ name: 'add-multiple-rows',
108
+ apply: t =>
109
+ renderTable(
110
+ mutate(t, m => {
111
+ m.addRowAt(t.rows.length, ['X1', 'X2', 'X3'])
112
+ m.addRowAt(t.rows.length + 1, ['Y1', 'Y2', 'Y3'])
113
+ })
114
+ ),
115
+ },
116
+ { name: 'delete-row-start', apply: t => renderTable(mutate(t, m => m.deleteRow(1))) }, // skip header
117
+ { name: 'delete-row-middle', apply: t => renderTable(mutate(t, m => m.deleteRow(2))) },
118
+ { name: 'delete-row-end', apply: t => renderTable(mutate(t, m => m.deleteRow(t.rows.length - 1))) },
119
+ {
120
+ name: 'delete-multiple-rows',
121
+ apply: t =>
122
+ renderTable(
123
+ mutate(t, m => {
124
+ m.deleteRow(t.rows.length - 1)
125
+ m.deleteRow(1)
126
+ })
127
+ ),
128
+ },
129
+ {
130
+ name: 'add-column-start',
131
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(0, ['NewHeader', 'newA', 'newB', 'newC']))),
132
+ },
133
+ {
134
+ name: 'add-column-middle',
135
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(1, ['NewHeader', 'newA', 'newB', 'newC']))),
136
+ },
137
+ {
138
+ name: 'add-column-end',
139
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(t.rows[0].length, ['NewHeader', 'newA', 'newB', 'newC']))),
140
+ },
141
+ {
142
+ name: 'add-multiple-columns',
143
+ apply: t =>
144
+ renderTable(
145
+ mutate(t, m => {
146
+ m.addColumnAt(t.rows[0].length, ['H1', 'a1', 'b1', 'c1'])
147
+ m.addColumnAt(t.rows[0].length + 1, ['H2', 'a2', 'b2', 'c2'])
148
+ })
149
+ ),
150
+ },
151
+ { name: 'delete-column-start', apply: t => renderTable(mutate(t, m => m.deleteColumn(0))) },
152
+ { name: 'delete-column-middle', apply: t => renderTable(mutate(t, m => m.deleteColumn(1))) },
153
+ { name: 'delete-column-end', apply: t => renderTable(mutate(t, m => m.deleteColumn(t.rows[0].length - 1))) },
154
+ { name: 'shift-content-right', apply: t => renderTable(mutate(t, m => m.shiftContentRight(1))) },
155
+ ]
156
+ }
157
+
158
+ /**
159
+ * Some operation pairs don't compose cleanly because the second
160
+ * operation's row/column index assumes the original table dimensions.
161
+ * We skip pairs where the second op's index would be out of bounds
162
+ * after the first op's mutation.
163
+ */
164
+ function canCompose(_a: Op, _b: Op): boolean {
165
+ // For now, allow all combinations and let the operation skip
166
+ // gracefully when bounds are invalid. The mutate helpers clamp.
167
+ return true
168
+ }
169
+
170
+ // ──────────────────────── table model ────────────────────────
171
+
172
+ interface TableData {
173
+ rows: string[][]
174
+ }
175
+
176
+ function baseTable3x3(): TableData {
177
+ return {
178
+ rows: [
179
+ ['Header1', 'Header2', 'Header3'],
180
+ ['A1', 'A2', 'A3'],
181
+ ['B1', 'B2', 'B3'],
182
+ ['C1', 'C2', 'C3'],
183
+ ],
184
+ }
185
+ }
186
+
187
+ function renderTable(t: TableData): string {
188
+ const out: string[] = ['<table>']
189
+ for (let r = 0; r < t.rows.length; r++) {
190
+ out.push('<tr>')
191
+ const tag = r === 0 ? 'th' : 'td'
192
+ for (const cell of t.rows[r]) {
193
+ out.push(`<${tag}>${cell}</${tag}>`)
194
+ }
195
+ out.push('</tr>')
196
+ }
197
+ out.push('</table>')
198
+ return out.join('')
199
+ }
200
+
201
+ function parseTable(html: string): TableData {
202
+ // Tiny parser sufficient for our generated tables. NOT a general
203
+ // HTML parser; only used inside this matrix.
204
+ const rows: string[][] = []
205
+ const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
206
+ for (const rowMatch of rowMatches) {
207
+ const cells: string[] = []
208
+ const cellMatches = rowMatch[1].matchAll(/<t[dh][^>]*>(.*?)<\/t[dh]>/gs)
209
+ for (const cellMatch of cellMatches) cells.push(cellMatch[1])
210
+ rows.push(cells)
211
+ }
212
+ return { rows }
213
+ }
214
+
215
+ interface Mutator {
216
+ editCell(row: number, col: number, content: string): void
217
+ addRowAt(at: number, content: string[]): void
218
+ deleteRow(at: number): void
219
+ addColumnAt(at: number, columnContent: string[]): void
220
+ deleteColumn(at: number): void
221
+ shiftContentRight(rowIdx: number): void
222
+ }
223
+
224
+ function mutate(t: TableData, fn: (m: Mutator) => void): TableData {
225
+ const cloned: TableData = { rows: t.rows.map(row => [...row]) }
226
+ const m: Mutator = {
227
+ editCell(row, col, content) {
228
+ if (cloned.rows[row]?.[col] !== undefined) cloned.rows[row][col] = content
229
+ },
230
+ addRowAt(at, content) {
231
+ const idx = Math.max(0, Math.min(at, cloned.rows.length))
232
+ cloned.rows.splice(idx, 0, content)
233
+ },
234
+ deleteRow(at) {
235
+ if (at >= 0 && at < cloned.rows.length) cloned.rows.splice(at, 1)
236
+ },
237
+ addColumnAt(at, columnContent) {
238
+ for (let r = 0; r < cloned.rows.length; r++) {
239
+ const idx = Math.max(0, Math.min(at, cloned.rows[r].length))
240
+ cloned.rows[r].splice(idx, 0, columnContent[r] ?? '')
241
+ }
242
+ },
243
+ deleteColumn(at) {
244
+ for (const row of cloned.rows) {
245
+ if (at >= 0 && at < row.length) row.splice(at, 1)
246
+ }
247
+ },
248
+ shiftContentRight(rowIdx) {
249
+ const row = cloned.rows[rowIdx]
250
+ if (!row) return
251
+ // Shift each cell's content one position to the right; first
252
+ // cell becomes empty, last cell's content drops off.
253
+ for (let c = row.length - 1; c > 0; c--) row[c] = row[c - 1]
254
+ row[0] = ''
255
+ },
256
+ }
257
+ fn(m)
258
+ return cloned
259
+ }
260
+
261
+ // ──────────────────────── invariant checks ────────────────────────
262
+
263
+ /**
264
+ * Asserts the diff output is structurally valid:
265
+ * • All `<tr>`/`<td>`/`<th>` open/close tags balance
266
+ * • All `<ins>`/`<del>` tags balance
267
+ * • Every row in the output has cell count ≤ max(old-row-cell-count,
268
+ * new-row-cell-count) — no phantom cells
269
+ * • Output isn't empty when inputs aren't equal
270
+ */
271
+ function assertStructurallyValid(output: string, oldHtml: string, newHtml: string, label: string) {
272
+ const ctx = `[${label}]`
273
+
274
+ // Tag balance
275
+ const openTr = countMatches(output, /<tr[\s>]/g)
276
+ const closeTr = countMatches(output, /<\/tr>/g)
277
+ expect(openTr, `${ctx} <tr> tag balance`).toBe(closeTr)
278
+
279
+ const openTd = countMatches(output, /<td[\s>]/g)
280
+ const closeTd = countMatches(output, /<\/td>/g)
281
+ expect(openTd, `${ctx} <td> tag balance`).toBe(closeTd)
282
+
283
+ const openTh = countMatches(output, /<th[\s>]/g)
284
+ const closeTh = countMatches(output, /<\/th>/g)
285
+ expect(openTh, `${ctx} <th> tag balance`).toBe(closeTh)
286
+
287
+ // ins/del balance — each opening tag has a matching closing tag.
288
+ const openIns = countMatches(output, /<ins[\s>]/g)
289
+ const closeIns = countMatches(output, /<\/ins>/g)
290
+ expect(openIns, `${ctx} <ins> tag balance`).toBe(closeIns)
291
+
292
+ const openDel = countMatches(output, /<del[\s>]/g)
293
+ const closeDel = countMatches(output, /<\/del>/g)
294
+ expect(openDel, `${ctx} <del> tag balance`).toBe(closeDel)
295
+
296
+ // Per-row cell count ≤ max(old, new) row width.
297
+ const oldMaxCells = maxRowCellCount(oldHtml)
298
+ const newMaxCells = maxRowCellCount(newHtml)
299
+ const limit = Math.max(oldMaxCells, newMaxCells)
300
+
301
+ // Walk output rows
302
+ const rowMatches = output.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
303
+ for (const rowMatch of rowMatches) {
304
+ const cellsInRow = countMatches(rowMatch[1], /<t[dh][\s>]/g)
305
+ expect(cellsInRow, `${ctx} row has too many cells (${cellsInRow} > ${limit})`).toBeLessThanOrEqual(limit)
306
+ }
307
+
308
+ // Output is non-empty when inputs aren't equal.
309
+ if (oldHtml !== newHtml) {
310
+ expect(output.length, `${ctx} output is empty`).toBeGreaterThan(0)
311
+ }
312
+ }
313
+
314
+ function maxRowCellCount(html: string): number {
315
+ let max = 0
316
+ const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
317
+ for (const rowMatch of rowMatches) {
318
+ const count = countMatches(rowMatch[1], /<t[dh][\s>]/g)
319
+ if (count > max) max = count
320
+ }
321
+ return max
322
+ }
323
+
324
+ function countMatches(s: string, re: RegExp): number {
325
+ const matches = s.match(re)
326
+ return matches ? matches.length : 0
327
+ }