@createiq/htmldiff 1.1.0-beta.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +32 -33
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.mjs +32 -33
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +7 -7
- package/src/TableDiff.ts +50 -46
- package/test/HtmlDiff.tables.matrix.spec.ts +8 -3
- package/test/HtmlDiff.tables.spec.ts +334 -9
- package/.claude/settings.local.json +0 -15
package/dist/HtmlDiff.cjs
CHANGED
|
@@ -216,6 +216,8 @@ const PLACEHOLDER_SUFFIX = "-->";
|
|
|
216
216
|
*/
|
|
217
217
|
const MAX_TABLE_ROWS = 1500;
|
|
218
218
|
const MAX_TABLE_CELLS_PER_ROW = 200;
|
|
219
|
+
const MAX_COLUMN_DELTA = 6;
|
|
220
|
+
const MAX_COLUMN_SEARCH_WIDTH = 40;
|
|
219
221
|
function makePlaceholderPrefix(oldHtml, newHtml) {
|
|
220
222
|
for (let attempt = 0; attempt < 8; attempt++) {
|
|
221
223
|
const prefix = `${PLACEHOLDER_PREFIX_BASE}${Math.floor(Math.random() * 4294967295).toString(16).padStart(8, "0")}_`;
|
|
@@ -412,7 +414,7 @@ function diffPositionalTable(oldHtml, newHtml, oldTable, newTable, diffCell) {
|
|
|
412
414
|
*/
|
|
413
415
|
function diffStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, diffCell) {
|
|
414
416
|
const alignment = orderAlignmentForEmission(pairSimilarUnmatchedRows(lcsAlign(oldTable.rows.map((row) => rowKey(oldHtml, row)), newTable.rows.map((row) => rowKey(newHtml, row))), oldTable, newTable, oldHtml, newHtml));
|
|
415
|
-
if (newTable.rows.length === 0) return rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment
|
|
417
|
+
if (newTable.rows.length === 0) return rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment);
|
|
416
418
|
const out = [];
|
|
417
419
|
out.push(newHtml.slice(newTable.tableStart, newTable.rows[0].rowStart));
|
|
418
420
|
let cursor = newTable.rows[0].rowStart;
|
|
@@ -420,9 +422,9 @@ function diffStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, diff
|
|
|
420
422
|
const newRow = newTable.rows[align.newIdx];
|
|
421
423
|
out.push(newHtml.slice(cursor, newRow.rowStart));
|
|
422
424
|
if (align.oldIdx !== null) out.push(diffPreservedRow(oldHtml, newHtml, oldTable.rows[align.oldIdx], newRow, diffCell));
|
|
423
|
-
else out.push(emitFullRow(newHtml, newRow, "ins"
|
|
425
|
+
else out.push(emitFullRow(newHtml, newRow, "ins"));
|
|
424
426
|
cursor = newRow.rowEnd;
|
|
425
|
-
} else if (align.oldIdx !== null) out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], "del"
|
|
427
|
+
} else if (align.oldIdx !== null) out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], "del"));
|
|
426
428
|
out.push(newHtml.slice(cursor, newTable.tableEnd));
|
|
427
429
|
return out.join("");
|
|
428
430
|
}
|
|
@@ -492,11 +494,11 @@ function orderAlignmentForEmission(alignment) {
|
|
|
492
494
|
});
|
|
493
495
|
return decorated.map((d) => d.entry);
|
|
494
496
|
}
|
|
495
|
-
function rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment
|
|
497
|
+
function rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment) {
|
|
496
498
|
const out = [];
|
|
497
499
|
out.push(headerSlice(newHtml, newTable, oldHtml, oldTable));
|
|
498
|
-
for (const align of alignment) if (align.oldIdx !== null) out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], "del"
|
|
499
|
-
else if (align.newIdx !== null) out.push(emitFullRow(newHtml, newTable.rows[align.newIdx], "ins"
|
|
500
|
+
for (const align of alignment) if (align.oldIdx !== null) out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], "del"));
|
|
501
|
+
else if (align.newIdx !== null) out.push(emitFullRow(newHtml, newTable.rows[align.newIdx], "ins"));
|
|
500
502
|
out.push("</table>");
|
|
501
503
|
return out.join("");
|
|
502
504
|
}
|
|
@@ -521,8 +523,6 @@ function diffPreservedRow(oldHtml, newHtml, oldRow, newRow, diffCell) {
|
|
|
521
523
|
}
|
|
522
524
|
return diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell);
|
|
523
525
|
}
|
|
524
|
-
const MAX_COLUMN_DELTA = 6;
|
|
525
|
-
const MAX_COLUMN_SEARCH_WIDTH = 40;
|
|
526
526
|
/**
|
|
527
527
|
* For a row where new has K more cells than old, find the K column
|
|
528
528
|
* positions in new where cells were inserted by scanning all C(newCount,
|
|
@@ -536,7 +536,7 @@ function diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, k, diffCell) {
|
|
|
536
536
|
const inserted = new Set(insertedPositions);
|
|
537
537
|
const out = [rowHeaderSlice(newHtml, newRow)];
|
|
538
538
|
let oldIdx = 0;
|
|
539
|
-
for (let c = 0; c < newRow.cells.length; c++) if (inserted.has(c)) out.push(emitFullCell(newHtml, newRow.cells[c], "ins"
|
|
539
|
+
for (let c = 0; c < newRow.cells.length; c++) if (inserted.has(c)) out.push(emitFullCell(newHtml, newRow.cells[c], "ins"));
|
|
540
540
|
else {
|
|
541
541
|
out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[c], diffCell));
|
|
542
542
|
oldIdx++;
|
|
@@ -551,7 +551,7 @@ function diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, k, diffCell)
|
|
|
551
551
|
let newIdx = 0;
|
|
552
552
|
for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
|
|
553
553
|
if (deleted.has(oldIdx)) {
|
|
554
|
-
out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], "del"
|
|
554
|
+
out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], "del"));
|
|
555
555
|
continue;
|
|
556
556
|
}
|
|
557
557
|
out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[newIdx], diffCell));
|
|
@@ -561,6 +561,8 @@ function diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, k, diffCell)
|
|
|
561
561
|
return out.join("");
|
|
562
562
|
}
|
|
563
563
|
function findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml) {
|
|
564
|
+
const oldTexts = oldRow.cells.map((c) => cellText(oldHtml, c));
|
|
565
|
+
const newTexts = newRow.cells.map((c) => cellText(newHtml, c));
|
|
564
566
|
let bestPositions = [];
|
|
565
567
|
let bestScore = -1;
|
|
566
568
|
for (const combo of combinationsOfRange(newRow.cells.length, k)) {
|
|
@@ -569,7 +571,7 @@ function findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml) {
|
|
|
569
571
|
let oldIdx = 0;
|
|
570
572
|
for (let newIdx = 0; newIdx < newRow.cells.length; newIdx++) {
|
|
571
573
|
if (inserted.has(newIdx)) continue;
|
|
572
|
-
score +=
|
|
574
|
+
score += textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
|
|
573
575
|
oldIdx++;
|
|
574
576
|
}
|
|
575
577
|
if (score > bestScore) {
|
|
@@ -580,6 +582,8 @@ function findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml) {
|
|
|
580
582
|
return bestPositions;
|
|
581
583
|
}
|
|
582
584
|
function findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml) {
|
|
585
|
+
const oldTexts = oldRow.cells.map((c) => cellText(oldHtml, c));
|
|
586
|
+
const newTexts = newRow.cells.map((c) => cellText(newHtml, c));
|
|
583
587
|
let bestPositions = [];
|
|
584
588
|
let bestScore = -1;
|
|
585
589
|
for (const combo of combinationsOfRange(oldRow.cells.length, k)) {
|
|
@@ -588,7 +592,7 @@ function findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml) {
|
|
|
588
592
|
let newIdx = 0;
|
|
589
593
|
for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
|
|
590
594
|
if (deleted.has(oldIdx)) continue;
|
|
591
|
-
score +=
|
|
595
|
+
score += textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
|
|
592
596
|
newIdx++;
|
|
593
597
|
}
|
|
594
598
|
if (score > bestScore) {
|
|
@@ -717,8 +721,8 @@ function diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell)
|
|
|
717
721
|
const oldCell = oldRow.cells[align.oldIdx];
|
|
718
722
|
const newCell = newRow.cells[align.newIdx];
|
|
719
723
|
out.push(emitDiffedCell(oldHtml, newHtml, oldCell, newCell, diffCell));
|
|
720
|
-
} else if (align.newIdx !== null) out.push(emitFullCell(newHtml, newRow.cells[align.newIdx], "ins"
|
|
721
|
-
else if (align.oldIdx !== null) out.push(emitFullCell(oldHtml, oldRow.cells[align.oldIdx], "del"
|
|
724
|
+
} else if (align.newIdx !== null) out.push(emitFullCell(newHtml, newRow.cells[align.newIdx], "ins"));
|
|
725
|
+
else if (align.oldIdx !== null) out.push(emitFullCell(oldHtml, oldRow.cells[align.oldIdx], "del"));
|
|
722
726
|
out.push("</tr>");
|
|
723
727
|
return out.join("");
|
|
724
728
|
}
|
|
@@ -731,7 +735,7 @@ function cellKey(html, cell) {
|
|
|
731
735
|
* each `<td>`, with an `<ins>`/`<del>` wrapper around any cell content
|
|
732
736
|
* (empty cells get the class but no wrapper).
|
|
733
737
|
*/
|
|
734
|
-
function emitFullRow(html, row, kind
|
|
738
|
+
function emitFullRow(html, row, kind) {
|
|
735
739
|
const cls = kind === "ins" ? "diffins" : "diffdel";
|
|
736
740
|
const trOpening = parseOpeningTagAt(html, row.rowStart);
|
|
737
741
|
if (!trOpening) return html.slice(row.rowStart, row.rowEnd);
|
|
@@ -739,7 +743,7 @@ function emitFullRow(html, row, kind, diffCell) {
|
|
|
739
743
|
let cursor = trOpening.end;
|
|
740
744
|
for (const cell of row.cells) {
|
|
741
745
|
out.push(html.slice(cursor, cell.cellStart));
|
|
742
|
-
out.push(emitFullCell(html, cell, kind
|
|
746
|
+
out.push(emitFullCell(html, cell, kind));
|
|
743
747
|
cursor = cell.cellEnd;
|
|
744
748
|
}
|
|
745
749
|
out.push(html.slice(cursor, row.rowEnd));
|
|
@@ -753,7 +757,7 @@ function emitFullRow(html, row, kind, diffCell) {
|
|
|
753
757
|
* the full recursive diff would produce for newly-inserted formatting.
|
|
754
758
|
* Empty cells get the class on the `<td>` but no inner wrapping.
|
|
755
759
|
*/
|
|
756
|
-
function emitFullCell(html, cell, kind
|
|
760
|
+
function emitFullCell(html, cell, kind) {
|
|
757
761
|
const cls = kind === "ins" ? "diffins" : "diffdel";
|
|
758
762
|
const tdOpening = parseOpeningTagAt(html, cell.cellStart);
|
|
759
763
|
if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd);
|
|
@@ -789,7 +793,7 @@ function wrapInlineTextRuns(content, kind) {
|
|
|
789
793
|
let j = i;
|
|
790
794
|
while (j < content.length && content[j] !== "<") j++;
|
|
791
795
|
const text = content.slice(i, j);
|
|
792
|
-
if (text.trim().length > 0) out.push(
|
|
796
|
+
if (text.trim().length > 0) out.push(wrapText(text, tag, cls));
|
|
793
797
|
else out.push(text);
|
|
794
798
|
i = j;
|
|
795
799
|
}
|
|
@@ -827,10 +831,14 @@ const CELL_FUZZY_THRESHOLD = .5;
|
|
|
827
831
|
* expect from a typical track-changes view.
|
|
828
832
|
*/
|
|
829
833
|
function pairSimilarUnmatchedRows(alignment, oldTable, newTable, oldHtml, newHtml) {
|
|
830
|
-
|
|
834
|
+
const oldTexts = oldTable.rows.map((r) => rowText(oldHtml, r));
|
|
835
|
+
const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
|
|
836
|
+
return pairSimilarUnmatched(alignment, ROW_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
|
|
831
837
|
}
|
|
832
838
|
function pairSimilarUnmatchedCells(alignment, oldRow, newRow, oldHtml, newHtml) {
|
|
833
|
-
|
|
839
|
+
const oldTexts = oldRow.cells.map((c) => cellText(oldHtml, c));
|
|
840
|
+
const newTexts = newRow.cells.map((c) => cellText(newHtml, c));
|
|
841
|
+
return pairSimilarUnmatched(alignment, CELL_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
|
|
834
842
|
}
|
|
835
843
|
/**
|
|
836
844
|
* Identify pairings inside each unmatched-only run, then build the output
|
|
@@ -916,12 +924,6 @@ function pairSimilarUnmatched(alignment, threshold, similarity) {
|
|
|
916
924
|
* sufficient for the full range of legal-doc edits we see in
|
|
917
925
|
* production tables.
|
|
918
926
|
*/
|
|
919
|
-
function rowSimilarity(oldRow, newRow, oldHtml, newHtml) {
|
|
920
|
-
return textSimilarity(rowText(oldHtml, oldRow), rowText(newHtml, newRow));
|
|
921
|
-
}
|
|
922
|
-
function cellSimilarity(oldCell, newCell, oldHtml, newHtml) {
|
|
923
|
-
return textSimilarity(cellText(oldHtml, oldCell), cellText(newHtml, newCell));
|
|
924
|
-
}
|
|
925
927
|
function textSimilarity(a, b) {
|
|
926
928
|
if (a === b) return 1;
|
|
927
929
|
if (a.length === 0 || b.length === 0) return 0;
|
|
@@ -968,32 +970,29 @@ function lcsAlign(oldKeys, newKeys) {
|
|
|
968
970
|
let i = m;
|
|
969
971
|
let j = n;
|
|
970
972
|
while (i > 0 || j > 0) if (i > 0 && j > 0 && oldKeys[i - 1] === newKeys[j - 1]) {
|
|
971
|
-
result.
|
|
973
|
+
result.push({
|
|
972
974
|
oldIdx: i - 1,
|
|
973
975
|
newIdx: j - 1
|
|
974
976
|
});
|
|
975
977
|
i--;
|
|
976
978
|
j--;
|
|
977
979
|
} else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
|
|
978
|
-
result.
|
|
980
|
+
result.push({
|
|
979
981
|
oldIdx: null,
|
|
980
982
|
newIdx: j - 1
|
|
981
983
|
});
|
|
982
984
|
j--;
|
|
983
985
|
} else {
|
|
984
|
-
result.
|
|
986
|
+
result.push({
|
|
985
987
|
oldIdx: i - 1,
|
|
986
988
|
newIdx: null
|
|
987
989
|
});
|
|
988
990
|
i--;
|
|
989
991
|
}
|
|
992
|
+
result.reverse();
|
|
990
993
|
return result;
|
|
991
994
|
}
|
|
992
995
|
/**
|
|
993
|
-
* Returns the opening tag string with the given class injected. Existing
|
|
994
|
-
* `class` attributes are preserved and the new class appended.
|
|
995
|
-
*/
|
|
996
|
-
/**
|
|
997
996
|
* Returns the opening tag with the given class injected. Locates the real
|
|
998
997
|
* `class` attribute via attribute-aware walking (NOT a flat regex — that
|
|
999
998
|
* would mis-match inside a foreign attribute value like
|