@createiq/htmldiff 1.1.0-beta.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +32 -33
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.mjs +32 -33
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +7 -7
- package/src/TableDiff.ts +50 -46
- package/test/HtmlDiff.tables.matrix.spec.ts +8 -3
- package/test/HtmlDiff.tables.spec.ts +334 -9
- package/.claude/settings.local.json +0 -15
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@createiq/htmldiff",
|
|
3
|
-
"version": "1.1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "TypeScript port of htmldiff.net",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Mathew Mannion <mathew.mannion@linklaters.com>",
|
|
@@ -23,17 +23,17 @@
|
|
|
23
23
|
"node": ">=20.0.0"
|
|
24
24
|
},
|
|
25
25
|
"devDependencies": {
|
|
26
|
-
"@biomejs/biome": "2.4.
|
|
26
|
+
"@biomejs/biome": "2.4.15",
|
|
27
27
|
"@cyclonedx/cyclonedx-npm": "4.2.1",
|
|
28
28
|
"@tsconfig/recommended": "1.0.13",
|
|
29
|
-
"@types/node": "24.12.
|
|
30
|
-
"@vitest/coverage-v8": "4.1.
|
|
31
|
-
"@vitest/ui": "4.1.
|
|
29
|
+
"@types/node": "24.12.4",
|
|
30
|
+
"@vitest/coverage-v8": "4.1.6",
|
|
31
|
+
"@vitest/ui": "4.1.6",
|
|
32
32
|
"husky": "9.1.7",
|
|
33
|
-
"lint-staged": "17.0.
|
|
33
|
+
"lint-staged": "17.0.4",
|
|
34
34
|
"tsdown": "0.22.0",
|
|
35
35
|
"typescript": "6.0.3",
|
|
36
|
-
"vitest": "4.1.
|
|
36
|
+
"vitest": "4.1.6"
|
|
37
37
|
},
|
|
38
38
|
"scripts": {
|
|
39
39
|
"prepare": "husky",
|
package/src/TableDiff.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { wrapText } from './Utils'
|
|
2
|
+
|
|
1
3
|
/**
|
|
2
4
|
* Table-aware preprocessing for HtmlDiff.
|
|
3
5
|
*
|
|
@@ -73,6 +75,13 @@ const PLACEHOLDER_SUFFIX = '-->'
|
|
|
73
75
|
const MAX_TABLE_ROWS = 1500
|
|
74
76
|
const MAX_TABLE_CELLS_PER_ROW = 200
|
|
75
77
|
|
|
78
|
+
// Caps for the per-row combinatorial column-position search in
|
|
79
|
+
// findBestColumnInsertPositions / findBestColumnDeletePositions. Worst
|
|
80
|
+
// case is C(MAX_COLUMN_SEARCH_WIDTH, MAX_COLUMN_DELTA) ≈ 3.8M combos at
|
|
81
|
+
// the caps below; wider or more-skewed rows fall through to cell-LCS.
|
|
82
|
+
const MAX_COLUMN_DELTA = 6
|
|
83
|
+
const MAX_COLUMN_SEARCH_WIDTH = 40
|
|
84
|
+
|
|
76
85
|
function makePlaceholderPrefix(oldHtml: string, newHtml: string): string {
|
|
77
86
|
// 4 random bytes → 8 hex chars → 16^8 ≈ 4.3 billion combinations. We
|
|
78
87
|
// also retry if the generated nonce happens to occur in either input.
|
|
@@ -395,7 +404,7 @@ function diffStructurallyAlignedTable(
|
|
|
395
404
|
// has no rows at all, fall back to a from-scratch reconstruction so
|
|
396
405
|
// we still emit deleted rows.
|
|
397
406
|
if (newTable.rows.length === 0) {
|
|
398
|
-
return rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment
|
|
407
|
+
return rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment)
|
|
399
408
|
}
|
|
400
409
|
|
|
401
410
|
const out: string[] = []
|
|
@@ -408,11 +417,11 @@ function diffStructurallyAlignedTable(
|
|
|
408
417
|
if (align.oldIdx !== null) {
|
|
409
418
|
out.push(diffPreservedRow(oldHtml, newHtml, oldTable.rows[align.oldIdx], newRow, diffCell))
|
|
410
419
|
} else {
|
|
411
|
-
out.push(emitFullRow(newHtml, newRow, 'ins'
|
|
420
|
+
out.push(emitFullRow(newHtml, newRow, 'ins'))
|
|
412
421
|
}
|
|
413
422
|
cursor = newRow.rowEnd
|
|
414
423
|
} else if (align.oldIdx !== null) {
|
|
415
|
-
out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], 'del'
|
|
424
|
+
out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], 'del'))
|
|
416
425
|
}
|
|
417
426
|
}
|
|
418
427
|
out.push(newHtml.slice(cursor, newTable.tableEnd))
|
|
@@ -497,8 +506,7 @@ function rebuildStructurallyAlignedTable(
|
|
|
497
506
|
newHtml: string,
|
|
498
507
|
oldTable: TableRange,
|
|
499
508
|
newTable: TableRange,
|
|
500
|
-
alignment: Alignment[]
|
|
501
|
-
diffCell: DiffCellFn
|
|
509
|
+
alignment: Alignment[]
|
|
502
510
|
): string {
|
|
503
511
|
// Used when new has no rows but old does — we lose the per-row
|
|
504
512
|
// wrappers from new (there are none), so reconstruct from old's frame.
|
|
@@ -506,9 +514,9 @@ function rebuildStructurallyAlignedTable(
|
|
|
506
514
|
out.push(headerSlice(newHtml, newTable, oldHtml, oldTable))
|
|
507
515
|
for (const align of alignment) {
|
|
508
516
|
if (align.oldIdx !== null) {
|
|
509
|
-
out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], 'del'
|
|
517
|
+
out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], 'del'))
|
|
510
518
|
} else if (align.newIdx !== null) {
|
|
511
|
-
out.push(emitFullRow(newHtml, newTable.rows[align.newIdx], 'ins'
|
|
519
|
+
out.push(emitFullRow(newHtml, newTable.rows[align.newIdx], 'ins'))
|
|
512
520
|
}
|
|
513
521
|
}
|
|
514
522
|
out.push('</table>')
|
|
@@ -548,21 +556,15 @@ function diffPreservedRow(
|
|
|
548
556
|
// on each affected cell.
|
|
549
557
|
const colspanAligned = diffColspanChangedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
|
|
550
558
|
if (colspanAligned !== null) return colspanAligned
|
|
551
|
-
// For a single-column add/delete (cell count differs by exactly 1),
|
|
552
|
-
// detect the position via positional similarity scan and align the
|
|
553
|
-
// remaining cells positionally. This handles the case where a column
|
|
554
|
-
// was added AND a different cell got an unrelated content edit — the
|
|
555
|
-
// edited cell still aligns by position rather than getting orphaned by
|
|
556
|
-
// the cell-LCS exact-match.
|
|
557
|
-
const delta = newRow.cells.length - oldRow.cells.length
|
|
558
559
|
// For column add/delete (cell counts differ), find the best insertion
|
|
559
560
|
// or deletion positions via positional similarity scan and align the
|
|
560
561
|
// remaining cells positionally. This handles content-edit alongside
|
|
561
562
|
// column-add by keeping the edited cell in its column position rather
|
|
562
563
|
// than orphaning it via the cell-LCS exact match.
|
|
563
564
|
// Guardrail: combinatorial search is C(newCount, k); we cap to avoid
|
|
564
|
-
// explosion on very wide tables.
|
|
565
|
-
//
|
|
565
|
+
// explosion on very wide tables. Worst case at the caps is C(40, 6) ≈
|
|
566
|
+
// 3.8M combos; above that we fall through to cell-LCS.
|
|
567
|
+
const delta = newRow.cells.length - oldRow.cells.length
|
|
566
568
|
const absDelta = Math.abs(delta)
|
|
567
569
|
if (
|
|
568
570
|
absDelta > 0 &&
|
|
@@ -575,9 +577,6 @@ function diffPreservedRow(
|
|
|
575
577
|
return diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
|
|
576
578
|
}
|
|
577
579
|
|
|
578
|
-
const MAX_COLUMN_DELTA = 6
|
|
579
|
-
const MAX_COLUMN_SEARCH_WIDTH = 40
|
|
580
|
-
|
|
581
580
|
/**
|
|
582
581
|
* For a row where new has K more cells than old, find the K column
|
|
583
582
|
* positions in new where cells were inserted by scanning all C(newCount,
|
|
@@ -600,7 +599,7 @@ function diffMultiColumnAddRow(
|
|
|
600
599
|
let oldIdx = 0
|
|
601
600
|
for (let c = 0; c < newRow.cells.length; c++) {
|
|
602
601
|
if (inserted.has(c)) {
|
|
603
|
-
out.push(emitFullCell(newHtml, newRow.cells[c], 'ins'
|
|
602
|
+
out.push(emitFullCell(newHtml, newRow.cells[c], 'ins'))
|
|
604
603
|
} else {
|
|
605
604
|
out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[c], diffCell))
|
|
606
605
|
oldIdx++
|
|
@@ -624,7 +623,7 @@ function diffMultiColumnDeleteRow(
|
|
|
624
623
|
let newIdx = 0
|
|
625
624
|
for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
|
|
626
625
|
if (deleted.has(oldIdx)) {
|
|
627
|
-
out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], 'del'
|
|
626
|
+
out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], 'del'))
|
|
628
627
|
continue
|
|
629
628
|
}
|
|
630
629
|
out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[newIdx], diffCell))
|
|
@@ -641,6 +640,11 @@ function findBestColumnInsertPositions(
|
|
|
641
640
|
oldHtml: string,
|
|
642
641
|
newHtml: string
|
|
643
642
|
): number[] {
|
|
643
|
+
// Pre-compute cell texts once instead of letting textSimilarity
|
|
644
|
+
// recompute them inside every combo iteration — C(N, K) combos times
|
|
645
|
+
// ~N text extractions each is a lot of wasted string work.
|
|
646
|
+
const oldTexts = oldRow.cells.map(c => cellText(oldHtml, c))
|
|
647
|
+
const newTexts = newRow.cells.map(c => cellText(newHtml, c))
|
|
644
648
|
let bestPositions: number[] = []
|
|
645
649
|
let bestScore = -1
|
|
646
650
|
for (const combo of combinationsOfRange(newRow.cells.length, k)) {
|
|
@@ -649,7 +653,7 @@ function findBestColumnInsertPositions(
|
|
|
649
653
|
let oldIdx = 0
|
|
650
654
|
for (let newIdx = 0; newIdx < newRow.cells.length; newIdx++) {
|
|
651
655
|
if (inserted.has(newIdx)) continue
|
|
652
|
-
score +=
|
|
656
|
+
score += textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
|
|
653
657
|
oldIdx++
|
|
654
658
|
}
|
|
655
659
|
if (score > bestScore) {
|
|
@@ -667,6 +671,8 @@ function findBestColumnDeletePositions(
|
|
|
667
671
|
oldHtml: string,
|
|
668
672
|
newHtml: string
|
|
669
673
|
): number[] {
|
|
674
|
+
const oldTexts = oldRow.cells.map(c => cellText(oldHtml, c))
|
|
675
|
+
const newTexts = newRow.cells.map(c => cellText(newHtml, c))
|
|
670
676
|
let bestPositions: number[] = []
|
|
671
677
|
let bestScore = -1
|
|
672
678
|
for (const combo of combinationsOfRange(oldRow.cells.length, k)) {
|
|
@@ -675,7 +681,7 @@ function findBestColumnDeletePositions(
|
|
|
675
681
|
let newIdx = 0
|
|
676
682
|
for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
|
|
677
683
|
if (deleted.has(oldIdx)) continue
|
|
678
|
-
score +=
|
|
684
|
+
score += textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
|
|
679
685
|
newIdx++
|
|
680
686
|
}
|
|
681
687
|
if (score > bestScore) {
|
|
@@ -864,9 +870,9 @@ function diffStructurallyAlignedRow(
|
|
|
864
870
|
const newCell = newRow.cells[align.newIdx]
|
|
865
871
|
out.push(emitDiffedCell(oldHtml, newHtml, oldCell, newCell, diffCell))
|
|
866
872
|
} else if (align.newIdx !== null) {
|
|
867
|
-
out.push(emitFullCell(newHtml, newRow.cells[align.newIdx], 'ins'
|
|
873
|
+
out.push(emitFullCell(newHtml, newRow.cells[align.newIdx], 'ins'))
|
|
868
874
|
} else if (align.oldIdx !== null) {
|
|
869
|
-
out.push(emitFullCell(oldHtml, oldRow.cells[align.oldIdx], 'del'
|
|
875
|
+
out.push(emitFullCell(oldHtml, oldRow.cells[align.oldIdx], 'del'))
|
|
870
876
|
}
|
|
871
877
|
}
|
|
872
878
|
|
|
@@ -888,7 +894,7 @@ function cellKey(html: string, cell: CellRange): string {
|
|
|
888
894
|
* each `<td>`, with an `<ins>`/`<del>` wrapper around any cell content
|
|
889
895
|
* (empty cells get the class but no wrapper).
|
|
890
896
|
*/
|
|
891
|
-
function emitFullRow(html: string, row: RowRange, kind: 'ins' | 'del'
|
|
897
|
+
function emitFullRow(html: string, row: RowRange, kind: 'ins' | 'del'): string {
|
|
892
898
|
const cls = kind === 'ins' ? 'diffins' : 'diffdel'
|
|
893
899
|
const trOpening = parseOpeningTagAt(html, row.rowStart)
|
|
894
900
|
if (!trOpening) return html.slice(row.rowStart, row.rowEnd)
|
|
@@ -898,7 +904,7 @@ function emitFullRow(html: string, row: RowRange, kind: 'ins' | 'del', diffCell:
|
|
|
898
904
|
let cursor = trOpening.end
|
|
899
905
|
for (const cell of row.cells) {
|
|
900
906
|
out.push(html.slice(cursor, cell.cellStart))
|
|
901
|
-
out.push(emitFullCell(html, cell, kind
|
|
907
|
+
out.push(emitFullCell(html, cell, kind))
|
|
902
908
|
cursor = cell.cellEnd
|
|
903
909
|
}
|
|
904
910
|
out.push(html.slice(cursor, row.rowEnd))
|
|
@@ -913,7 +919,7 @@ function emitFullRow(html: string, row: RowRange, kind: 'ins' | 'del', diffCell:
|
|
|
913
919
|
* the full recursive diff would produce for newly-inserted formatting.
|
|
914
920
|
* Empty cells get the class on the `<td>` but no inner wrapping.
|
|
915
921
|
*/
|
|
916
|
-
function emitFullCell(html: string, cell: CellRange, kind: 'ins' | 'del'
|
|
922
|
+
function emitFullCell(html: string, cell: CellRange, kind: 'ins' | 'del'): string {
|
|
917
923
|
const cls = kind === 'ins' ? 'diffins' : 'diffdel'
|
|
918
924
|
const tdOpening = parseOpeningTagAt(html, cell.cellStart)
|
|
919
925
|
if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd)
|
|
@@ -954,7 +960,7 @@ function wrapInlineTextRuns(content: string, kind: 'ins' | 'del'): string {
|
|
|
954
960
|
while (j < content.length && content[j] !== '<') j++
|
|
955
961
|
const text = content.slice(i, j)
|
|
956
962
|
if (text.trim().length > 0) {
|
|
957
|
-
out.push(
|
|
963
|
+
out.push(wrapText(text, tag, cls))
|
|
958
964
|
} else {
|
|
959
965
|
out.push(text)
|
|
960
966
|
}
|
|
@@ -1024,8 +1030,13 @@ function pairSimilarUnmatchedRows(
|
|
|
1024
1030
|
oldHtml: string,
|
|
1025
1031
|
newHtml: string
|
|
1026
1032
|
): Alignment[] {
|
|
1033
|
+
// Pre-compute row texts once; the similarity callback is invoked
|
|
1034
|
+
// O(D × I) times per unmatched run (every del × every ins), and
|
|
1035
|
+
// rowText walks every cell.
|
|
1036
|
+
const oldTexts = oldTable.rows.map(r => rowText(oldHtml, r))
|
|
1037
|
+
const newTexts = newTable.rows.map(r => rowText(newHtml, r))
|
|
1027
1038
|
return pairSimilarUnmatched(alignment, ROW_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
|
|
1028
|
-
|
|
1039
|
+
textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
|
|
1029
1040
|
)
|
|
1030
1041
|
}
|
|
1031
1042
|
|
|
@@ -1036,8 +1047,10 @@ function pairSimilarUnmatchedCells(
|
|
|
1036
1047
|
oldHtml: string,
|
|
1037
1048
|
newHtml: string
|
|
1038
1049
|
): Alignment[] {
|
|
1050
|
+
const oldTexts = oldRow.cells.map(c => cellText(oldHtml, c))
|
|
1051
|
+
const newTexts = newRow.cells.map(c => cellText(newHtml, c))
|
|
1039
1052
|
return pairSimilarUnmatched(alignment, CELL_FUZZY_THRESHOLD, (oldIdx, newIdx) =>
|
|
1040
|
-
|
|
1053
|
+
textSimilarity(oldTexts[oldIdx], newTexts[newIdx])
|
|
1041
1054
|
)
|
|
1042
1055
|
}
|
|
1043
1056
|
|
|
@@ -1135,14 +1148,6 @@ function pairSimilarUnmatched(
|
|
|
1135
1148
|
* sufficient for the full range of legal-doc edits we see in
|
|
1136
1149
|
* production tables.
|
|
1137
1150
|
*/
|
|
1138
|
-
function rowSimilarity(oldRow: RowRange, newRow: RowRange, oldHtml: string, newHtml: string): number {
|
|
1139
|
-
return textSimilarity(rowText(oldHtml, oldRow), rowText(newHtml, newRow))
|
|
1140
|
-
}
|
|
1141
|
-
|
|
1142
|
-
function cellSimilarity(oldCell: CellRange, newCell: CellRange, oldHtml: string, newHtml: string): number {
|
|
1143
|
-
return textSimilarity(cellText(oldHtml, oldCell), cellText(newHtml, newCell))
|
|
1144
|
-
}
|
|
1145
|
-
|
|
1146
1151
|
function textSimilarity(a: string, b: string): number {
|
|
1147
1152
|
if (a === b) return 1
|
|
1148
1153
|
if (a.length === 0 || b.length === 0) return 0
|
|
@@ -1215,29 +1220,28 @@ function lcsAlign(oldKeys: string[], newKeys: string[]): Alignment[] {
|
|
|
1215
1220
|
}
|
|
1216
1221
|
}
|
|
1217
1222
|
|
|
1223
|
+
// Backtrack and push; reverse at the end. `unshift` is O(n) per call
|
|
1224
|
+
// so the naive version was O(n²); push+reverse is O(n) total.
|
|
1218
1225
|
const result: Alignment[] = []
|
|
1219
1226
|
let i = m
|
|
1220
1227
|
let j = n
|
|
1221
1228
|
while (i > 0 || j > 0) {
|
|
1222
1229
|
if (i > 0 && j > 0 && oldKeys[i - 1] === newKeys[j - 1]) {
|
|
1223
|
-
result.
|
|
1230
|
+
result.push({ oldIdx: i - 1, newIdx: j - 1 })
|
|
1224
1231
|
i--
|
|
1225
1232
|
j--
|
|
1226
1233
|
} else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
|
|
1227
|
-
result.
|
|
1234
|
+
result.push({ oldIdx: null, newIdx: j - 1 })
|
|
1228
1235
|
j--
|
|
1229
1236
|
} else {
|
|
1230
|
-
result.
|
|
1237
|
+
result.push({ oldIdx: i - 1, newIdx: null })
|
|
1231
1238
|
i--
|
|
1232
1239
|
}
|
|
1233
1240
|
}
|
|
1241
|
+
result.reverse()
|
|
1234
1242
|
return result
|
|
1235
1243
|
}
|
|
1236
1244
|
|
|
1237
|
-
/**
|
|
1238
|
-
* Returns the opening tag string with the given class injected. Existing
|
|
1239
|
-
* `class` attributes are preserved and the new class appended.
|
|
1240
|
-
*/
|
|
1241
1245
|
/**
|
|
1242
1246
|
* Returns the opening tag with the given class injected. Locates the real
|
|
1243
1247
|
* `class` attribute via attribute-aware walking (NOT a flat regex — that
|
|
@@ -108,9 +108,14 @@ describe('HtmlDiff — table operations matrix', () => {
|
|
|
108
108
|
// still drifts.
|
|
109
109
|
const rowCount = countMatches(result, /<tr[\s>]/g)
|
|
110
110
|
expect(rowCount).toBe(4) // header + Party A + empty + Party B
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
)
|
|
111
|
+
// The inserted empty row must be emitted with diffins on the <tr>
|
|
112
|
+
// and 4 empty diffins-marked cells. Asserted via regex (quote-
|
|
113
|
+
// agnostic, whitespace-tolerant) so an incidental change in
|
|
114
|
+
// attribute-quote style isn't flagged as a regression.
|
|
115
|
+
const emptyInsertedRow = result.match(/<tr class=['"]diffins['"]>(.*?)<\/tr>/)
|
|
116
|
+
expect(emptyInsertedRow).not.toBeNull()
|
|
117
|
+
const emptyCellCount = countMatches(emptyInsertedRow?.[1] ?? '', /<td class=['"]diffins['"]><\/td>/g)
|
|
118
|
+
expect(emptyCellCount).toBe(4)
|
|
114
119
|
})
|
|
115
120
|
})
|
|
116
121
|
})
|