@createiq/htmldiff 1.0.5-beta.2 → 1.0.5-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +137 -25
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.mjs +137 -25
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/TableDiff.ts +196 -37
- package/test/HtmlDiff.tables.matrix.spec.ts +327 -0
package/package.json
CHANGED
package/src/TableDiff.ts
CHANGED
|
@@ -469,13 +469,165 @@ function diffPreservedRow(
|
|
|
469
469
|
}
|
|
470
470
|
// Cell counts differ. Try to interpret it as a horizontal merge/split via
|
|
471
471
|
// colspan first — preserving the new structure with `class='mod colspan'`
|
|
472
|
-
// on each affected cell.
|
|
473
|
-
// don't align cleanly on logical column positions.
|
|
472
|
+
// on each affected cell.
|
|
474
473
|
const colspanAligned = diffColspanChangedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
|
|
475
474
|
if (colspanAligned !== null) return colspanAligned
|
|
475
|
+
// For a single-column add/delete (cell count differs by exactly 1),
|
|
476
|
+
// detect the position via positional similarity scan and align the
|
|
477
|
+
// remaining cells positionally. This handles the case where a column
|
|
478
|
+
// was added AND a different cell got an unrelated content edit — the
|
|
479
|
+
// edited cell still aligns by position rather than getting orphaned by
|
|
480
|
+
// the cell-LCS exact-match.
|
|
481
|
+
const delta = newRow.cells.length - oldRow.cells.length
|
|
482
|
+
// For column add/delete (cell counts differ), find the best insertion
|
|
483
|
+
// or deletion positions via positional similarity scan and align the
|
|
484
|
+
// remaining cells positionally. This handles content-edit alongside
|
|
485
|
+
// column-add by keeping the edited cell in its column position rather
|
|
486
|
+
// than orphaning it via the cell-LCS exact match.
|
|
487
|
+
// Guardrail: combinatorial search is C(newCount, k); we cap to avoid
|
|
488
|
+
// explosion on very wide tables. The cap is generous for real legal
|
|
489
|
+
// schedules; anything above falls through to cell-LCS.
|
|
490
|
+
const absDelta = Math.abs(delta)
|
|
491
|
+
if (
|
|
492
|
+
absDelta > 0 &&
|
|
493
|
+
absDelta <= MAX_COLUMN_DELTA &&
|
|
494
|
+
Math.max(oldRow.cells.length, newRow.cells.length) <= MAX_COLUMN_SEARCH_WIDTH
|
|
495
|
+
) {
|
|
496
|
+
if (delta > 0) return diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, delta, diffCell)
|
|
497
|
+
return diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, -delta, diffCell)
|
|
498
|
+
}
|
|
476
499
|
return diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
|
|
477
500
|
}
|
|
478
501
|
|
|
502
|
+
const MAX_COLUMN_DELTA = 6
|
|
503
|
+
const MAX_COLUMN_SEARCH_WIDTH = 40
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* For a row where new has K more cells than old, find the K column
|
|
507
|
+
* positions in new where cells were inserted by scanning all C(newCount,
|
|
508
|
+
* K) combinations and picking the one that maximises positional content
|
|
509
|
+
* similarity with the remaining cells. The inserted cells are emitted
|
|
510
|
+
* with diff markers; the rest are aligned positionally with content
|
|
511
|
+
* diff for matched pairs.
|
|
512
|
+
*/
|
|
513
|
+
function diffMultiColumnAddRow(
|
|
514
|
+
oldHtml: string,
|
|
515
|
+
newHtml: string,
|
|
516
|
+
oldRow: RowRange,
|
|
517
|
+
newRow: RowRange,
|
|
518
|
+
k: number,
|
|
519
|
+
diffCell: DiffCellFn
|
|
520
|
+
): string {
|
|
521
|
+
const insertedPositions = findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml)
|
|
522
|
+
const inserted = new Set(insertedPositions)
|
|
523
|
+
const out: string[] = [rowHeaderSlice(newHtml, newRow)]
|
|
524
|
+
let oldIdx = 0
|
|
525
|
+
for (let c = 0; c < newRow.cells.length; c++) {
|
|
526
|
+
if (inserted.has(c)) {
|
|
527
|
+
out.push(emitFullCell(newHtml, newRow.cells[c], 'ins', diffCell))
|
|
528
|
+
} else {
|
|
529
|
+
out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[c], diffCell))
|
|
530
|
+
oldIdx++
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
out.push('</tr>')
|
|
534
|
+
return out.join('')
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
function diffMultiColumnDeleteRow(
|
|
538
|
+
oldHtml: string,
|
|
539
|
+
newHtml: string,
|
|
540
|
+
oldRow: RowRange,
|
|
541
|
+
newRow: RowRange,
|
|
542
|
+
k: number,
|
|
543
|
+
diffCell: DiffCellFn
|
|
544
|
+
): string {
|
|
545
|
+
const deletedPositions = findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml)
|
|
546
|
+
const deleted = new Set(deletedPositions)
|
|
547
|
+
const out: string[] = [rowHeaderSlice(newHtml, newRow)]
|
|
548
|
+
let newIdx = 0
|
|
549
|
+
for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
|
|
550
|
+
if (deleted.has(oldIdx)) {
|
|
551
|
+
out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], 'del', diffCell))
|
|
552
|
+
continue
|
|
553
|
+
}
|
|
554
|
+
out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[newIdx], diffCell))
|
|
555
|
+
newIdx++
|
|
556
|
+
}
|
|
557
|
+
out.push('</tr>')
|
|
558
|
+
return out.join('')
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
function findBestColumnInsertPositions(
|
|
562
|
+
oldRow: RowRange,
|
|
563
|
+
newRow: RowRange,
|
|
564
|
+
k: number,
|
|
565
|
+
oldHtml: string,
|
|
566
|
+
newHtml: string
|
|
567
|
+
): number[] {
|
|
568
|
+
let bestPositions: number[] = []
|
|
569
|
+
let bestScore = -1
|
|
570
|
+
for (const combo of combinationsOfRange(newRow.cells.length, k)) {
|
|
571
|
+
const inserted = new Set(combo)
|
|
572
|
+
let score = 0
|
|
573
|
+
let oldIdx = 0
|
|
574
|
+
for (let newIdx = 0; newIdx < newRow.cells.length; newIdx++) {
|
|
575
|
+
if (inserted.has(newIdx)) continue
|
|
576
|
+
score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
|
|
577
|
+
oldIdx++
|
|
578
|
+
}
|
|
579
|
+
if (score > bestScore) {
|
|
580
|
+
bestScore = score
|
|
581
|
+
bestPositions = combo
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
return bestPositions
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
function findBestColumnDeletePositions(
|
|
588
|
+
oldRow: RowRange,
|
|
589
|
+
newRow: RowRange,
|
|
590
|
+
k: number,
|
|
591
|
+
oldHtml: string,
|
|
592
|
+
newHtml: string
|
|
593
|
+
): number[] {
|
|
594
|
+
let bestPositions: number[] = []
|
|
595
|
+
let bestScore = -1
|
|
596
|
+
for (const combo of combinationsOfRange(oldRow.cells.length, k)) {
|
|
597
|
+
const deleted = new Set(combo)
|
|
598
|
+
let score = 0
|
|
599
|
+
let newIdx = 0
|
|
600
|
+
for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
|
|
601
|
+
if (deleted.has(oldIdx)) continue
|
|
602
|
+
score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml)
|
|
603
|
+
newIdx++
|
|
604
|
+
}
|
|
605
|
+
if (score > bestScore) {
|
|
606
|
+
bestScore = score
|
|
607
|
+
bestPositions = combo
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
return bestPositions
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
/**
|
|
614
|
+
* Yields all sorted-ascending combinations of `k` distinct integers
|
|
615
|
+
* from [0, n). Iterative implementation avoids recursion overhead and
|
|
616
|
+
* keeps memory at O(k).
|
|
617
|
+
*/
|
|
618
|
+
function* combinationsOfRange(n: number, k: number): IterableIterator<number[]> {
|
|
619
|
+
if (k === 0 || k > n) return
|
|
620
|
+
const indices = Array.from({ length: k }, (_, i) => i)
|
|
621
|
+
while (true) {
|
|
622
|
+
yield indices.slice()
|
|
623
|
+
let i = k - 1
|
|
624
|
+
while (i >= 0 && indices[i] === n - k + i) i--
|
|
625
|
+
if (i < 0) return
|
|
626
|
+
indices[i]++
|
|
627
|
+
for (let j = i + 1; j < k; j++) indices[j] = indices[j - 1] + 1
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
479
631
|
/**
|
|
480
632
|
* Try to align cells by logical column position (sum of colspans). When
|
|
481
633
|
* one side has a colspan'd cell that absorbs multiple cells on the other
|
|
@@ -887,18 +1039,41 @@ function pairSimilarUnmatched(
|
|
|
887
1039
|
}
|
|
888
1040
|
|
|
889
1041
|
/**
|
|
890
|
-
*
|
|
891
|
-
*
|
|
892
|
-
*
|
|
893
|
-
*
|
|
894
|
-
*
|
|
1042
|
+
* Combined similarity metric used for both row-level and cell-level
|
|
1043
|
+
* fuzzy pairing. Returns the MAX of two complementary metrics:
|
|
1044
|
+
*
|
|
1045
|
+
* 1. **Character prefix+suffix similarity** — fraction of the longer
|
|
1046
|
+
* string covered by shared prefix + shared suffix. Catches small
|
|
1047
|
+
* edits in the middle of a string (one word changed in a row).
|
|
1048
|
+
* Misses cases where the bulk of common content is in the middle
|
|
1049
|
+
* and the ends differ.
|
|
1050
|
+
*
|
|
1051
|
+
* 2. **Token Jaccard similarity** — intersection-over-union of the
|
|
1052
|
+
* whitespace-split tokens. Catches "most of the content is the
|
|
1053
|
+
* same but bookended by different bits" — e.g. a row whose only
|
|
1054
|
+
* edit is a column added at the start and another at the end,
|
|
1055
|
+
* where the ~50 chars in the middle that DO match would be
|
|
1056
|
+
* invisible to prefix+suffix.
|
|
1057
|
+
*
|
|
1058
|
+
* Either metric exceeding the threshold means pair. Neither alone is
|
|
1059
|
+
* sufficient for the full range of legal-doc edits we see in
|
|
1060
|
+
* production tables.
|
|
895
1061
|
*/
|
|
896
1062
|
function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newHtml: string): number {
|
|
897
|
-
|
|
898
|
-
|
|
1063
|
+
return textSimilarity(rowText(oldHtml, oldRow), rowText(newHtml, newRow))
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
function cellSimilarity(oldCell: CellRange, newCell: CellRange, oldHtml: string, newHtml: string): number {
|
|
1067
|
+
return textSimilarity(cellText(oldHtml, oldCell), cellText(newHtml, newCell))
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
function textSimilarity(a: string, b: string): number {
|
|
899
1071
|
if (a === b) return 1
|
|
900
1072
|
if (a.length === 0 || b.length === 0) return 0
|
|
1073
|
+
return Math.max(charPrefixSuffixSimilarity(a, b), tokenJaccardSimilarity(a, b))
|
|
1074
|
+
}
|
|
901
1075
|
|
|
1076
|
+
function charPrefixSuffixSimilarity(a: string, b: string): number {
|
|
902
1077
|
let prefix = 0
|
|
903
1078
|
const minLen = Math.min(a.length, b.length)
|
|
904
1079
|
while (prefix < minLen && a[prefix] === b[prefix]) prefix++
|
|
@@ -915,6 +1090,18 @@ function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newH
|
|
|
915
1090
|
return (prefix + suffix) / Math.max(a.length, b.length)
|
|
916
1091
|
}
|
|
917
1092
|
|
|
1093
|
+
function tokenJaccardSimilarity(a: string, b: string): number {
|
|
1094
|
+
const tokensA = new Set(a.split(/\s+/).filter(Boolean))
|
|
1095
|
+
const tokensB = new Set(b.split(/\s+/).filter(Boolean))
|
|
1096
|
+
if (tokensA.size === 0 && tokensB.size === 0) return 1
|
|
1097
|
+
let intersection = 0
|
|
1098
|
+
for (const t of tokensA) {
|
|
1099
|
+
if (tokensB.has(t)) intersection++
|
|
1100
|
+
}
|
|
1101
|
+
const union = tokensA.size + tokensB.size - intersection
|
|
1102
|
+
return union === 0 ? 0 : intersection / union
|
|
1103
|
+
}
|
|
1104
|
+
|
|
918
1105
|
function rowText(html: string, row: RowRange): string {
|
|
919
1106
|
const parts: string[] = []
|
|
920
1107
|
for (const cell of row.cells) {
|
|
@@ -923,34 +1110,6 @@ function rowText(html: string, row: RowRange): string {
|
|
|
923
1110
|
return parts.join(' ').replace(/\s+/g, ' ').trim().toLowerCase()
|
|
924
1111
|
}
|
|
925
1112
|
|
|
926
|
-
/**
|
|
927
|
-
* Character-level prefix+suffix similarity for a single cell's text
|
|
928
|
-
* content. Same metric as rowSimilarity, scoped to one cell so we can
|
|
929
|
-
* fuzzy-pair unmatched cells (e.g. a cell with a content edit alongside
|
|
930
|
-
* a column add in the same row).
|
|
931
|
-
*/
|
|
932
|
-
function cellSimilarity(oldCell: CellRange, newCell: CellRange, oldHtml: string, newHtml: string): number {
|
|
933
|
-
const a = cellText(oldHtml, oldCell)
|
|
934
|
-
const b = cellText(newHtml, newCell)
|
|
935
|
-
if (a === b) return 1
|
|
936
|
-
if (a.length === 0 || b.length === 0) return 0
|
|
937
|
-
|
|
938
|
-
let prefix = 0
|
|
939
|
-
const minLen = Math.min(a.length, b.length)
|
|
940
|
-
while (prefix < minLen && a[prefix] === b[prefix]) prefix++
|
|
941
|
-
|
|
942
|
-
let suffix = 0
|
|
943
|
-
while (
|
|
944
|
-
suffix < a.length - prefix &&
|
|
945
|
-
suffix < b.length - prefix &&
|
|
946
|
-
a[a.length - 1 - suffix] === b[b.length - 1 - suffix]
|
|
947
|
-
) {
|
|
948
|
-
suffix++
|
|
949
|
-
}
|
|
950
|
-
|
|
951
|
-
return (prefix + suffix) / Math.max(a.length, b.length)
|
|
952
|
-
}
|
|
953
|
-
|
|
954
1113
|
function cellText(html: string, cell: CellRange): string {
|
|
955
1114
|
return html
|
|
956
1115
|
.slice(cell.contentStart, cell.contentEnd)
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest'
|
|
2
|
+
|
|
3
|
+
import HtmlDiff from '../src/HtmlDiff'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Exhaustive matrix of common table operations and their pairwise
|
|
7
|
+
* combinations. Each case runs HtmlDiff.execute and asserts structural
|
|
8
|
+
* invariants on the output:
|
|
9
|
+
*
|
|
10
|
+
* • Every `<tr>` opens and closes
|
|
11
|
+
* • No row has more cells than max(old-row, new-row) cell count
|
|
12
|
+
* (accounting for colspan)
|
|
13
|
+
* • All `<ins>`/`<del>` tags balance
|
|
14
|
+
* • Class markers are coherent (a `<tr class='diffins'>` row's cells
|
|
15
|
+
* all have ins-marked content or are empty; a deleted row's cells
|
|
16
|
+
* all have del-marked content or are empty)
|
|
17
|
+
* • The cell content of every `<td>` from new appears somewhere in
|
|
18
|
+
* the output (we don't silently drop cells)
|
|
19
|
+
*
|
|
20
|
+
* The matrix is built combinatorially — single operations × single
|
|
21
|
+
* operations — so a regression in any pairwise combination surfaces
|
|
22
|
+
* here even if no test was added for that exact pair.
|
|
23
|
+
*/
|
|
24
|
+
describe('HtmlDiff — table operations matrix', () => {
|
|
25
|
+
describe('single operations on a 3×3 base', () => {
|
|
26
|
+
for (const op of allSingleOperations()) {
|
|
27
|
+
it(`${op.name} produces structurally valid output`, () => {
|
|
28
|
+
const base = baseTable3x3()
|
|
29
|
+
const newHtml = op.apply(base)
|
|
30
|
+
const oldHtml = renderTable(base)
|
|
31
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
32
|
+
assertStructurallyValid(result, oldHtml, newHtml, op.name)
|
|
33
|
+
})
|
|
34
|
+
}
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
describe('pairwise combinations on a 3×3 base', () => {
|
|
38
|
+
const ops = allSingleOperations()
|
|
39
|
+
for (const opA of ops) {
|
|
40
|
+
for (const opB of ops) {
|
|
41
|
+
if (opA === opB) continue
|
|
42
|
+
// Some combinations don't compose cleanly (e.g. "delete-row-end"
|
|
43
|
+
// + "delete-row-end" applied twice). Skip pairs that mutate
|
|
44
|
+
// overlapping ranges.
|
|
45
|
+
if (!canCompose(opA, opB)) continue
|
|
46
|
+
it(`${opA.name} + ${opB.name} produces structurally valid output`, () => {
|
|
47
|
+
const base = baseTable3x3()
|
|
48
|
+
const intermediate = parseTable(opA.apply(base))
|
|
49
|
+
const newHtml = opB.apply(intermediate)
|
|
50
|
+
const oldHtml = renderTable(base)
|
|
51
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
52
|
+
assertStructurallyValid(result, oldHtml, newHtml, `${opA.name} + ${opB.name}`)
|
|
53
|
+
})
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
describe('user-reported regression scenarios', () => {
|
|
59
|
+
it('column added + empty row inserted in middle (regression for issue with blank row)', () => {
|
|
60
|
+
const oldHtml =
|
|
61
|
+
'<table>' +
|
|
62
|
+
'<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
|
|
63
|
+
'<tr><td>Party A</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto).</td><td>(i) Upon execution.</td></tr>' +
|
|
64
|
+
'<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
|
|
65
|
+
'</table>'
|
|
66
|
+
const newHtml =
|
|
67
|
+
'<table>' +
|
|
68
|
+
'<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
|
|
69
|
+
"<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto). Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
|
|
70
|
+
'<tr><td></td><td></td><td></td><td></td></tr>' +
|
|
71
|
+
'<tr><td>Party B</td><td>A</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
|
|
72
|
+
'</table>'
|
|
73
|
+
|
|
74
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
75
|
+
assertStructurallyValid(result, oldHtml, newHtml, 'column-add + empty row insert')
|
|
76
|
+
// Specific assertions on this case so we can see exactly what went
|
|
77
|
+
// wrong if the structural-invariant check passes but the output
|
|
78
|
+
// still drifts.
|
|
79
|
+
const rowCount = countMatches(result, /<tr[\s>]/g)
|
|
80
|
+
expect(rowCount).toBe(4) // header + Party A + empty + Party B
|
|
81
|
+
expect(result).toContain(
|
|
82
|
+
"<tr class='diffins'><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td></tr>"
|
|
83
|
+
)
|
|
84
|
+
})
|
|
85
|
+
})
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
// ──────────────────────── operations ────────────────────────
|
|
89
|
+
|
|
90
|
+
interface Op {
|
|
91
|
+
name: string
|
|
92
|
+
apply: (table: TableData) => string
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function allSingleOperations(): Op[] {
|
|
96
|
+
return [
|
|
97
|
+
{ name: 'no-op', apply: t => renderTable(t) },
|
|
98
|
+
{ name: 'edit-cell', apply: t => renderTable(mutate(t, m => m.editCell(1, 1, 'EDITED'))) },
|
|
99
|
+
{ name: 'add-row-start', apply: t => renderTable(mutate(t, m => m.addRowAt(0, ['NEW1', 'NEW2', 'NEW3']))) },
|
|
100
|
+
{ name: 'add-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['NEW1', 'NEW2', 'NEW3']))) },
|
|
101
|
+
{
|
|
102
|
+
name: 'add-row-end',
|
|
103
|
+
apply: t => renderTable(mutate(t, m => m.addRowAt(t.rows.length, ['NEW1', 'NEW2', 'NEW3']))),
|
|
104
|
+
},
|
|
105
|
+
{ name: 'add-empty-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['', '', '']))) },
|
|
106
|
+
{
|
|
107
|
+
name: 'add-multiple-rows',
|
|
108
|
+
apply: t =>
|
|
109
|
+
renderTable(
|
|
110
|
+
mutate(t, m => {
|
|
111
|
+
m.addRowAt(t.rows.length, ['X1', 'X2', 'X3'])
|
|
112
|
+
m.addRowAt(t.rows.length + 1, ['Y1', 'Y2', 'Y3'])
|
|
113
|
+
})
|
|
114
|
+
),
|
|
115
|
+
},
|
|
116
|
+
{ name: 'delete-row-start', apply: t => renderTable(mutate(t, m => m.deleteRow(1))) }, // skip header
|
|
117
|
+
{ name: 'delete-row-middle', apply: t => renderTable(mutate(t, m => m.deleteRow(2))) },
|
|
118
|
+
{ name: 'delete-row-end', apply: t => renderTable(mutate(t, m => m.deleteRow(t.rows.length - 1))) },
|
|
119
|
+
{
|
|
120
|
+
name: 'delete-multiple-rows',
|
|
121
|
+
apply: t =>
|
|
122
|
+
renderTable(
|
|
123
|
+
mutate(t, m => {
|
|
124
|
+
m.deleteRow(t.rows.length - 1)
|
|
125
|
+
m.deleteRow(1)
|
|
126
|
+
})
|
|
127
|
+
),
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
name: 'add-column-start',
|
|
131
|
+
apply: t => renderTable(mutate(t, m => m.addColumnAt(0, ['NewHeader', 'newA', 'newB', 'newC']))),
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
name: 'add-column-middle',
|
|
135
|
+
apply: t => renderTable(mutate(t, m => m.addColumnAt(1, ['NewHeader', 'newA', 'newB', 'newC']))),
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
name: 'add-column-end',
|
|
139
|
+
apply: t => renderTable(mutate(t, m => m.addColumnAt(t.rows[0].length, ['NewHeader', 'newA', 'newB', 'newC']))),
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
name: 'add-multiple-columns',
|
|
143
|
+
apply: t =>
|
|
144
|
+
renderTable(
|
|
145
|
+
mutate(t, m => {
|
|
146
|
+
m.addColumnAt(t.rows[0].length, ['H1', 'a1', 'b1', 'c1'])
|
|
147
|
+
m.addColumnAt(t.rows[0].length + 1, ['H2', 'a2', 'b2', 'c2'])
|
|
148
|
+
})
|
|
149
|
+
),
|
|
150
|
+
},
|
|
151
|
+
{ name: 'delete-column-start', apply: t => renderTable(mutate(t, m => m.deleteColumn(0))) },
|
|
152
|
+
{ name: 'delete-column-middle', apply: t => renderTable(mutate(t, m => m.deleteColumn(1))) },
|
|
153
|
+
{ name: 'delete-column-end', apply: t => renderTable(mutate(t, m => m.deleteColumn(t.rows[0].length - 1))) },
|
|
154
|
+
{ name: 'shift-content-right', apply: t => renderTable(mutate(t, m => m.shiftContentRight(1))) },
|
|
155
|
+
]
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Some operation pairs don't compose cleanly because the second
|
|
160
|
+
* operation's row/column index assumes the original table dimensions.
|
|
161
|
+
* We skip pairs where the second op's index would be out of bounds
|
|
162
|
+
* after the first op's mutation.
|
|
163
|
+
*/
|
|
164
|
+
function canCompose(_a: Op, _b: Op): boolean {
|
|
165
|
+
// For now, allow all combinations and let the operation skip
|
|
166
|
+
// gracefully when bounds are invalid. The mutate helpers clamp.
|
|
167
|
+
return true
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// ──────────────────────── table model ────────────────────────
|
|
171
|
+
|
|
172
|
+
interface TableData {
|
|
173
|
+
rows: string[][]
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function baseTable3x3(): TableData {
|
|
177
|
+
return {
|
|
178
|
+
rows: [
|
|
179
|
+
['Header1', 'Header2', 'Header3'],
|
|
180
|
+
['A1', 'A2', 'A3'],
|
|
181
|
+
['B1', 'B2', 'B3'],
|
|
182
|
+
['C1', 'C2', 'C3'],
|
|
183
|
+
],
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function renderTable(t: TableData): string {
|
|
188
|
+
const out: string[] = ['<table>']
|
|
189
|
+
for (let r = 0; r < t.rows.length; r++) {
|
|
190
|
+
out.push('<tr>')
|
|
191
|
+
const tag = r === 0 ? 'th' : 'td'
|
|
192
|
+
for (const cell of t.rows[r]) {
|
|
193
|
+
out.push(`<${tag}>${cell}</${tag}>`)
|
|
194
|
+
}
|
|
195
|
+
out.push('</tr>')
|
|
196
|
+
}
|
|
197
|
+
out.push('</table>')
|
|
198
|
+
return out.join('')
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function parseTable(html: string): TableData {
|
|
202
|
+
// Tiny parser sufficient for our generated tables. NOT a general
|
|
203
|
+
// HTML parser; only used inside this matrix.
|
|
204
|
+
const rows: string[][] = []
|
|
205
|
+
const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
|
|
206
|
+
for (const rowMatch of rowMatches) {
|
|
207
|
+
const cells: string[] = []
|
|
208
|
+
const cellMatches = rowMatch[1].matchAll(/<t[dh][^>]*>(.*?)<\/t[dh]>/gs)
|
|
209
|
+
for (const cellMatch of cellMatches) cells.push(cellMatch[1])
|
|
210
|
+
rows.push(cells)
|
|
211
|
+
}
|
|
212
|
+
return { rows }
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
interface Mutator {
|
|
216
|
+
editCell(row: number, col: number, content: string): void
|
|
217
|
+
addRowAt(at: number, content: string[]): void
|
|
218
|
+
deleteRow(at: number): void
|
|
219
|
+
addColumnAt(at: number, columnContent: string[]): void
|
|
220
|
+
deleteColumn(at: number): void
|
|
221
|
+
shiftContentRight(rowIdx: number): void
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function mutate(t: TableData, fn: (m: Mutator) => void): TableData {
|
|
225
|
+
const cloned: TableData = { rows: t.rows.map(row => [...row]) }
|
|
226
|
+
const m: Mutator = {
|
|
227
|
+
editCell(row, col, content) {
|
|
228
|
+
if (cloned.rows[row]?.[col] !== undefined) cloned.rows[row][col] = content
|
|
229
|
+
},
|
|
230
|
+
addRowAt(at, content) {
|
|
231
|
+
const idx = Math.max(0, Math.min(at, cloned.rows.length))
|
|
232
|
+
cloned.rows.splice(idx, 0, content)
|
|
233
|
+
},
|
|
234
|
+
deleteRow(at) {
|
|
235
|
+
if (at >= 0 && at < cloned.rows.length) cloned.rows.splice(at, 1)
|
|
236
|
+
},
|
|
237
|
+
addColumnAt(at, columnContent) {
|
|
238
|
+
for (let r = 0; r < cloned.rows.length; r++) {
|
|
239
|
+
const idx = Math.max(0, Math.min(at, cloned.rows[r].length))
|
|
240
|
+
cloned.rows[r].splice(idx, 0, columnContent[r] ?? '')
|
|
241
|
+
}
|
|
242
|
+
},
|
|
243
|
+
deleteColumn(at) {
|
|
244
|
+
for (const row of cloned.rows) {
|
|
245
|
+
if (at >= 0 && at < row.length) row.splice(at, 1)
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
shiftContentRight(rowIdx) {
|
|
249
|
+
const row = cloned.rows[rowIdx]
|
|
250
|
+
if (!row) return
|
|
251
|
+
// Shift each cell's content one position to the right; first
|
|
252
|
+
// cell becomes empty, last cell's content drops off.
|
|
253
|
+
for (let c = row.length - 1; c > 0; c--) row[c] = row[c - 1]
|
|
254
|
+
row[0] = ''
|
|
255
|
+
},
|
|
256
|
+
}
|
|
257
|
+
fn(m)
|
|
258
|
+
return cloned
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// ──────────────────────── invariant checks ────────────────────────
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Asserts the diff output is structurally valid:
|
|
265
|
+
* • All `<tr>`/`<td>`/`<th>` open/close tags balance
|
|
266
|
+
* • All `<ins>`/`<del>` tags balance
|
|
267
|
+
* • Every row in the output has cell count ≤ max(old-row-cell-count,
|
|
268
|
+
* new-row-cell-count) — no phantom cells
|
|
269
|
+
* • Output isn't empty when inputs aren't equal
|
|
270
|
+
*/
|
|
271
|
+
function assertStructurallyValid(output: string, oldHtml: string, newHtml: string, label: string) {
|
|
272
|
+
const ctx = `[${label}]`
|
|
273
|
+
|
|
274
|
+
// Tag balance
|
|
275
|
+
const openTr = countMatches(output, /<tr[\s>]/g)
|
|
276
|
+
const closeTr = countMatches(output, /<\/tr>/g)
|
|
277
|
+
expect(openTr, `${ctx} <tr> tag balance`).toBe(closeTr)
|
|
278
|
+
|
|
279
|
+
const openTd = countMatches(output, /<td[\s>]/g)
|
|
280
|
+
const closeTd = countMatches(output, /<\/td>/g)
|
|
281
|
+
expect(openTd, `${ctx} <td> tag balance`).toBe(closeTd)
|
|
282
|
+
|
|
283
|
+
const openTh = countMatches(output, /<th[\s>]/g)
|
|
284
|
+
const closeTh = countMatches(output, /<\/th>/g)
|
|
285
|
+
expect(openTh, `${ctx} <th> tag balance`).toBe(closeTh)
|
|
286
|
+
|
|
287
|
+
// ins/del balance — each opening tag has a matching closing tag.
|
|
288
|
+
const openIns = countMatches(output, /<ins[\s>]/g)
|
|
289
|
+
const closeIns = countMatches(output, /<\/ins>/g)
|
|
290
|
+
expect(openIns, `${ctx} <ins> tag balance`).toBe(closeIns)
|
|
291
|
+
|
|
292
|
+
const openDel = countMatches(output, /<del[\s>]/g)
|
|
293
|
+
const closeDel = countMatches(output, /<\/del>/g)
|
|
294
|
+
expect(openDel, `${ctx} <del> tag balance`).toBe(closeDel)
|
|
295
|
+
|
|
296
|
+
// Per-row cell count ≤ max(old, new) row width.
|
|
297
|
+
const oldMaxCells = maxRowCellCount(oldHtml)
|
|
298
|
+
const newMaxCells = maxRowCellCount(newHtml)
|
|
299
|
+
const limit = Math.max(oldMaxCells, newMaxCells)
|
|
300
|
+
|
|
301
|
+
// Walk output rows
|
|
302
|
+
const rowMatches = output.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
|
|
303
|
+
for (const rowMatch of rowMatches) {
|
|
304
|
+
const cellsInRow = countMatches(rowMatch[1], /<t[dh][\s>]/g)
|
|
305
|
+
expect(cellsInRow, `${ctx} row has too many cells (${cellsInRow} > ${limit})`).toBeLessThanOrEqual(limit)
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Output is non-empty when inputs aren't equal.
|
|
309
|
+
if (oldHtml !== newHtml) {
|
|
310
|
+
expect(output.length, `${ctx} output is empty`).toBeGreaterThan(0)
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
function maxRowCellCount(html: string): number {
|
|
315
|
+
let max = 0
|
|
316
|
+
const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
|
|
317
|
+
for (const rowMatch of rowMatches) {
|
|
318
|
+
const count = countMatches(rowMatch[1], /<t[dh][\s>]/g)
|
|
319
|
+
if (count > max) max = count
|
|
320
|
+
}
|
|
321
|
+
return max
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function countMatches(s: string, re: RegExp): number {
|
|
325
|
+
const matches = s.match(re)
|
|
326
|
+
return matches ? matches.length : 0
|
|
327
|
+
}
|