@createiq/htmldiff 1.0.5-beta.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HtmlDiff.cjs CHANGED
@@ -202,1008 +202,6 @@ var Operation = class {
202
202
  }
203
203
  };
204
204
  //#endregion
205
- //#region src/TableDiff.ts
206
- const PLACEHOLDER_PREFIX_BASE = "<!--HTMLDIFF_TABLE_";
207
- const PLACEHOLDER_SUFFIX = "-->";
208
- /**
209
- * Hard cap on table dimensions handled by the structural-aware path.
210
- * The row-LCS is O(rows²), the per-row cell-LCS is O(cells²), and each
211
- * comparison string-equals row content (potentially many KB). Without a
212
- * cap, a several-thousand-row table can pin a CPU for seconds. Tables
213
- * larger than this fall through to the word-level diff, which scales
214
- * linearly. Tuned to comfortably cover real-world ISDA schedules
215
- * (which routinely have 1000+ rows).
216
- */
217
- const MAX_TABLE_ROWS = 1500;
218
- const MAX_TABLE_CELLS_PER_ROW = 200;
219
- function makePlaceholderPrefix(oldHtml, newHtml) {
220
- for (let attempt = 0; attempt < 8; attempt++) {
221
- const prefix = `${PLACEHOLDER_PREFIX_BASE}${Math.floor(Math.random() * 4294967295).toString(16).padStart(8, "0")}_`;
222
- if (!oldHtml.includes(prefix) && !newHtml.includes(prefix)) return prefix;
223
- }
224
- return `${PLACEHOLDER_PREFIX_BASE}fallback_${Date.now()}_`;
225
- }
226
- /**
227
- * Diffs every paired-by-position table in the inputs and replaces each
228
- * source table with a placeholder, returning the modified inputs plus the
229
- * placeholder→diff mapping. Returns null when there are no tables to
230
- * preprocess or the table counts don't line up.
231
- */
232
- function preprocessTables(oldHtml, newHtml, diffCell) {
233
- const oldTables = findTopLevelTables(oldHtml);
234
- const newTables = findTopLevelTables(newHtml);
235
- if (oldTables.length === 0 && newTables.length === 0) return null;
236
- if (oldTables.length !== newTables.length) return null;
237
- for (let i = 0; i < oldTables.length; i++) if (exceedsSizeLimit(oldTables[i]) || exceedsSizeLimit(newTables[i])) return null;
238
- const pairs = [];
239
- for (let i = 0; i < oldTables.length; i++) pairs.push({
240
- oldTable: oldTables[i],
241
- newTable: newTables[i],
242
- diffed: diffTable(oldHtml, newHtml, oldTables[i], newTables[i], diffCell)
243
- });
244
- let modifiedOld = oldHtml;
245
- let modifiedNew = newHtml;
246
- const placeholderPrefix = makePlaceholderPrefix(oldHtml, newHtml);
247
- const placeholderToDiff = /* @__PURE__ */ new Map();
248
- for (let i = pairs.length - 1; i >= 0; i--) {
249
- const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`;
250
- placeholderToDiff.set(placeholder, pairs[i].diffed);
251
- modifiedOld = spliceString(modifiedOld, pairs[i].oldTable.tableStart, pairs[i].oldTable.tableEnd, placeholder);
252
- modifiedNew = spliceString(modifiedNew, pairs[i].newTable.tableStart, pairs[i].newTable.tableEnd, placeholder);
253
- }
254
- return {
255
- modifiedOld,
256
- modifiedNew,
257
- placeholderToDiff
258
- };
259
- }
260
- function restoreTablePlaceholders(diffOutput, placeholderToDiff) {
261
- let result = diffOutput;
262
- for (const [placeholder, html] of placeholderToDiff) result = result.split(placeholder).join(html);
263
- return result;
264
- }
265
- function spliceString(s, start, end, replacement) {
266
- return s.slice(0, start) + replacement + s.slice(end);
267
- }
268
- function exceedsSizeLimit(table) {
269
- if (table.rows.length > MAX_TABLE_ROWS) return true;
270
- for (const row of table.rows) if (row.cells.length > MAX_TABLE_CELLS_PER_ROW) return true;
271
- return false;
272
- }
273
- function diffTable(oldHtml, newHtml, oldTable, newTable, diffCell) {
274
- if (sameDimensions(oldTable, newTable)) return diffPositionalTable(oldHtml, newHtml, oldTable, newTable, diffCell);
275
- if (oldTable.rows.length === newTable.rows.length) return diffSameRowCountTable(oldHtml, newHtml, oldTable, newTable, diffCell);
276
- return diffStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, diffCell);
277
- }
278
- function diffSameRowCountTable(oldHtml, newHtml, oldTable, newTable, diffCell) {
279
- const out = [];
280
- let cursor = newTable.tableStart;
281
- let r = 0;
282
- while (r < newTable.rows.length) {
283
- const merge = detectVerticalMerge(oldHtml, newHtml, oldTable, newTable, r);
284
- if (merge) {
285
- out.push(newHtml.slice(cursor, newTable.rows[r].rowStart));
286
- out.push(merge.diff);
287
- cursor = newTable.rows[r + merge.span - 1].rowEnd;
288
- r += merge.span;
289
- continue;
290
- }
291
- const split = detectVerticalSplit(oldHtml, newHtml, oldTable, newTable, r);
292
- if (split) {
293
- out.push(newHtml.slice(cursor, newTable.rows[r].rowStart));
294
- out.push(split.diff);
295
- cursor = newTable.rows[r + split.span - 1].rowEnd;
296
- r += split.span;
297
- continue;
298
- }
299
- const newRow = newTable.rows[r];
300
- out.push(newHtml.slice(cursor, newRow.rowStart));
301
- out.push(diffPreservedRow(oldHtml, newHtml, oldTable.rows[r], newRow, diffCell));
302
- cursor = newRow.rowEnd;
303
- r++;
304
- }
305
- out.push(newHtml.slice(cursor, newTable.tableEnd));
306
- return out.join("");
307
- }
308
- /**
309
- * Detects a vertical merge starting at row `r`: new row R has a single
310
- * cell with rowspan=K (and any colspan ≥ 1), with rows R+1..R+K-1 empty
311
- * in new. Old rows R..R+K-1 must have a logical column width equal to
312
- * the new cell's colspan and contain no rowspan'd cells of their own.
313
- * This handles both single-column merges (old rows are 1-cell, new cell
314
- * rowspan=K) and rectangular merges (e.g. 2×2 merge into a single
315
- * colspan=2 rowspan=2 cell). Output: emit the merged cell with
316
- * `class='mod rowspan'` and the empty trailing rows unchanged.
317
- */
318
- function detectVerticalMerge(oldHtml, newHtml, oldTable, newTable, r) {
319
- const newRow = newTable.rows[r];
320
- if (newRow.cells.length !== 1) return null;
321
- const cell = newRow.cells[0];
322
- const span = getRowspan(newHtml, cell);
323
- if (span <= 1) return null;
324
- if (r + span > newTable.rows.length) return null;
325
- const colspan = getColspan(newHtml, cell);
326
- for (let k = 1; k < span; k++) if (newTable.rows[r + k].cells.length !== 0) return null;
327
- for (let k = 0; k < span; k++) {
328
- const oldRow = oldTable.rows[r + k];
329
- if (!oldRow) return null;
330
- if (sumColspans(oldHtml, oldRow.cells) !== colspan) return null;
331
- for (const c of oldRow.cells) if (getRowspan(oldHtml, c) !== 1) return null;
332
- }
333
- const out = [];
334
- out.push(rowHeaderSlice(newHtml, newRow));
335
- out.push(emitSpanChangedCell(newHtml, cell, "rowspan"));
336
- out.push("</tr>");
337
- for (let k = 1; k < span; k++) out.push(emitEmptyRow(newHtml, newTable.rows[r + k]));
338
- return {
339
- diff: out.join(""),
340
- span
341
- };
342
- }
343
- /**
344
- * Detects a vertical split starting at row `r`: old row R has a single
345
- * cell with rowspan=K, old rows R+1..R+K-1 are empty. New rows R..R+K-1
346
- * each have a single cell. Output: emit each new row with the new cell
347
- * tagged `class='mod rowspan'`.
348
- */
349
- function detectVerticalSplit(oldHtml, newHtml, oldTable, newTable, r) {
350
- const oldRow = oldTable.rows[r];
351
- if (oldRow.cells.length !== 1) return null;
352
- const oldCell = oldRow.cells[0];
353
- const span = getRowspan(oldHtml, oldCell);
354
- if (span <= 1) return null;
355
- if (r + span > oldTable.rows.length) return null;
356
- const colspan = getColspan(oldHtml, oldCell);
357
- for (let k = 1; k < span; k++) if (oldTable.rows[r + k].cells.length !== 0) return null;
358
- for (let k = 0; k < span; k++) {
359
- const newRow = newTable.rows[r + k];
360
- if (!newRow) return null;
361
- if (sumColspans(newHtml, newRow.cells) !== colspan) return null;
362
- for (const c of newRow.cells) if (getRowspan(newHtml, c) !== 1) return null;
363
- }
364
- const out = [];
365
- for (let k = 0; k < span; k++) {
366
- const newRow = newTable.rows[r + k];
367
- out.push(rowHeaderSlice(newHtml, newRow));
368
- for (const c of newRow.cells) out.push(emitSpanChangedCell(newHtml, c, "rowspan"));
369
- out.push("</tr>");
370
- }
371
- return {
372
- diff: out.join(""),
373
- span
374
- };
375
- }
376
- function emitEmptyRow(html, row) {
377
- return html.slice(row.rowStart, row.rowEnd);
378
- }
379
- function sameDimensions(a, b) {
380
- if (a.rows.length !== b.rows.length) return false;
381
- for (let i = 0; i < a.rows.length; i++) if (a.rows[i].cells.length !== b.rows[i].cells.length) return false;
382
- return true;
383
- }
384
- /**
385
- * Same-dimension path: walk the new table verbatim and substitute each
386
- * cell content range with the cell-level diff. The surrounding
387
- * `<thead>`/`<tbody>`/whitespace passes through untouched.
388
- */
389
- function diffPositionalTable(oldHtml, newHtml, oldTable, newTable, diffCell) {
390
- const out = [];
391
- let cursor = newTable.tableStart;
392
- for (let r = 0; r < newTable.rows.length; r++) {
393
- const oldRow = oldTable.rows[r];
394
- const newRow = newTable.rows[r];
395
- for (let c = 0; c < newRow.cells.length; c++) {
396
- const oldCell = oldRow.cells[c];
397
- const newCell = newRow.cells[c];
398
- out.push(newHtml.slice(cursor, newCell.contentStart));
399
- out.push(diffCell(oldHtml.slice(oldCell.contentStart, oldCell.contentEnd), newHtml.slice(newCell.contentStart, newCell.contentEnd)));
400
- cursor = newCell.contentEnd;
401
- }
402
- }
403
- out.push(newHtml.slice(cursor, newTable.tableEnd));
404
- return out.join("");
405
- }
406
- /**
407
- * Mismatched-dimensions path: row-level LCS to identify added/deleted rows,
408
- * then per preserved row a cell-level LCS to identify added/deleted cells.
409
- * Reconstructs the table from scratch — there's no "single new structure"
410
- * to walk verbatim, since we're stitching together kept rows from both
411
- * sides.
412
- */
413
- function diffStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, diffCell) {
414
- const alignment = orderAlignmentForEmission(pairSimilarUnmatchedRows(lcsAlign(oldTable.rows.map((row) => rowKey(oldHtml, row)), newTable.rows.map((row) => rowKey(newHtml, row))), oldTable, newTable, oldHtml, newHtml));
415
- if (newTable.rows.length === 0) return rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment, diffCell);
416
- const out = [];
417
- out.push(newHtml.slice(newTable.tableStart, newTable.rows[0].rowStart));
418
- let cursor = newTable.rows[0].rowStart;
419
- for (const align of alignment) if (align.newIdx !== null) {
420
- const newRow = newTable.rows[align.newIdx];
421
- out.push(newHtml.slice(cursor, newRow.rowStart));
422
- if (align.oldIdx !== null) out.push(diffPreservedRow(oldHtml, newHtml, oldTable.rows[align.oldIdx], newRow, diffCell));
423
- else out.push(emitFullRow(newHtml, newRow, "ins", diffCell));
424
- cursor = newRow.rowEnd;
425
- } else if (align.oldIdx !== null) out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], "del", diffCell));
426
- out.push(newHtml.slice(cursor, newTable.tableEnd));
427
- return out.join("");
428
- }
429
- /**
430
- * Reorders the alignment so emission produces rows in the visually-
431
- * correct order. Each entry is assigned a fractional "position" in
432
- * new's flow:
433
- *
434
- * • Preserved/paired (oldIdx, newIdx): position = newIdx.
435
- * • Pure insert (null, newIdx): position = newIdx.
436
- * • Pure delete (oldIdx, null): position = newIdx-of-preserved-just-
437
- * before-this-oldIdx + 0.5. Dels at the same gap sort by oldIdx so
438
- * they appear in old's row order. The +0.5 places dels BEFORE any
439
- * insert at the same gap (insert at newIdx N1+1 has position N1+1
440
- * which is > N1+0.5), giving the natural "delete first, insert
441
- * second" reading order at a replaced position.
442
- *
443
- * This handles the full range:
444
- * • Run of unpaired dels at the start (no preserved predecessor):
445
- * position -0.5, sorted by oldIdx.
446
- * • Dels in the middle: positioned right after their preceding
447
- * preserved row.
448
- * • Dels at the end (no preserved successor): positioned after the
449
- * last preserved row.
450
- *
451
- * Without this reordering, a run of unpaired deletes at low alignment
452
- * indices got emitted at cursor = first-new-row position — putting
453
- * all deletes before any preserved row in the output, regardless of
454
- * where they came from in old.
455
- */
456
- function orderAlignmentForEmission(alignment) {
457
- const preserved = [];
458
- for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) preserved.push({
459
- oldIdx: a.oldIdx,
460
- newIdx: a.newIdx
461
- });
462
- preserved.sort((a, b) => a.oldIdx - b.oldIdx);
463
- function newIdxOfPreservedBefore(oldIdx) {
464
- let result = -1;
465
- for (const p of preserved) {
466
- if (p.oldIdx >= oldIdx) break;
467
- result = p.newIdx;
468
- }
469
- return result;
470
- }
471
- const decorated = alignment.map((a, i) => {
472
- let primary;
473
- let secondary;
474
- if (a.newIdx !== null) {
475
- primary = a.newIdx;
476
- secondary = a.oldIdx === null ? 1 : 0;
477
- } else {
478
- primary = newIdxOfPreservedBefore(a.oldIdx) + .5;
479
- secondary = a.oldIdx;
480
- }
481
- return {
482
- entry: a,
483
- primary,
484
- secondary,
485
- originalIdx: i
486
- };
487
- });
488
- decorated.sort((a, b) => {
489
- if (a.primary !== b.primary) return a.primary - b.primary;
490
- if (a.secondary !== b.secondary) return a.secondary - b.secondary;
491
- return a.originalIdx - b.originalIdx;
492
- });
493
- return decorated.map((d) => d.entry);
494
- }
495
- function rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment, diffCell) {
496
- const out = [];
497
- out.push(headerSlice(newHtml, newTable, oldHtml, oldTable));
498
- for (const align of alignment) if (align.oldIdx !== null) out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], "del", diffCell));
499
- else if (align.newIdx !== null) out.push(emitFullRow(newHtml, newTable.rows[align.newIdx], "ins", diffCell));
500
- out.push("</table>");
501
- return out.join("");
502
- }
503
- function headerSlice(newHtml, newTable, oldHtml, oldTable) {
504
- const newFirstRow = newTable.rows[0]?.rowStart ?? newTable.tableEnd - 8;
505
- if (newFirstRow > newTable.tableStart) return newHtml.slice(newTable.tableStart, newFirstRow);
506
- const oldFirstRow = oldTable.rows[0]?.rowStart ?? oldTable.tableEnd - 8;
507
- return oldHtml.slice(oldTable.tableStart, oldFirstRow);
508
- }
509
- function rowKey(html, row) {
510
- return html.slice(row.rowStart, row.rowEnd).replace(/\s+/g, " ").trim();
511
- }
512
- function diffPreservedRow(oldHtml, newHtml, oldRow, newRow, diffCell) {
513
- if (oldRow.cells.length === newRow.cells.length) return diffPositionalRow(oldHtml, newHtml, oldRow, newRow, diffCell);
514
- const colspanAligned = diffColspanChangedRow(oldHtml, newHtml, oldRow, newRow, diffCell);
515
- if (colspanAligned !== null) return colspanAligned;
516
- const delta = newRow.cells.length - oldRow.cells.length;
517
- const absDelta = Math.abs(delta);
518
- if (absDelta > 0 && absDelta <= MAX_COLUMN_DELTA && Math.max(oldRow.cells.length, newRow.cells.length) <= MAX_COLUMN_SEARCH_WIDTH) {
519
- if (delta > 0) return diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, delta, diffCell);
520
- return diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, -delta, diffCell);
521
- }
522
- return diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell);
523
- }
524
- const MAX_COLUMN_DELTA = 6;
525
- const MAX_COLUMN_SEARCH_WIDTH = 40;
526
- /**
527
- * For a row where new has K more cells than old, find the K column
528
- * positions in new where cells were inserted by scanning all C(newCount,
529
- * K) combinations and picking the one that maximises positional content
530
- * similarity with the remaining cells. The inserted cells are emitted
531
- * with diff markers; the rest are aligned positionally with content
532
- * diff for matched pairs.
533
- */
534
- function diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, k, diffCell) {
535
- const insertedPositions = findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml);
536
- const inserted = new Set(insertedPositions);
537
- const out = [rowHeaderSlice(newHtml, newRow)];
538
- let oldIdx = 0;
539
- for (let c = 0; c < newRow.cells.length; c++) if (inserted.has(c)) out.push(emitFullCell(newHtml, newRow.cells[c], "ins", diffCell));
540
- else {
541
- out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[c], diffCell));
542
- oldIdx++;
543
- }
544
- out.push("</tr>");
545
- return out.join("");
546
- }
547
- function diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, k, diffCell) {
548
- const deletedPositions = findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml);
549
- const deleted = new Set(deletedPositions);
550
- const out = [rowHeaderSlice(newHtml, newRow)];
551
- let newIdx = 0;
552
- for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
553
- if (deleted.has(oldIdx)) {
554
- out.push(emitFullCell(oldHtml, oldRow.cells[oldIdx], "del", diffCell));
555
- continue;
556
- }
557
- out.push(emitDiffedCell(oldHtml, newHtml, oldRow.cells[oldIdx], newRow.cells[newIdx], diffCell));
558
- newIdx++;
559
- }
560
- out.push("</tr>");
561
- return out.join("");
562
- }
563
- function findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml) {
564
- let bestPositions = [];
565
- let bestScore = -1;
566
- for (const combo of combinationsOfRange(newRow.cells.length, k)) {
567
- const inserted = new Set(combo);
568
- let score = 0;
569
- let oldIdx = 0;
570
- for (let newIdx = 0; newIdx < newRow.cells.length; newIdx++) {
571
- if (inserted.has(newIdx)) continue;
572
- score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml);
573
- oldIdx++;
574
- }
575
- if (score > bestScore) {
576
- bestScore = score;
577
- bestPositions = combo;
578
- }
579
- }
580
- return bestPositions;
581
- }
582
- function findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml) {
583
- let bestPositions = [];
584
- let bestScore = -1;
585
- for (const combo of combinationsOfRange(oldRow.cells.length, k)) {
586
- const deleted = new Set(combo);
587
- let score = 0;
588
- let newIdx = 0;
589
- for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
590
- if (deleted.has(oldIdx)) continue;
591
- score += cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml);
592
- newIdx++;
593
- }
594
- if (score > bestScore) {
595
- bestScore = score;
596
- bestPositions = combo;
597
- }
598
- }
599
- return bestPositions;
600
- }
601
- /**
602
- * Yields all sorted-ascending combinations of `k` distinct integers
603
- * from [0, n). Iterative implementation avoids recursion overhead and
604
- * keeps memory at O(k).
605
- */
606
- function* combinationsOfRange(n, k) {
607
- if (k === 0 || k > n) return;
608
- const indices = Array.from({ length: k }, (_, i) => i);
609
- while (true) {
610
- yield indices.slice();
611
- let i = k - 1;
612
- while (i >= 0 && indices[i] === n - k + i) i--;
613
- if (i < 0) return;
614
- indices[i]++;
615
- for (let j = i + 1; j < k; j++) indices[j] = indices[j - 1] + 1;
616
- }
617
- }
618
- /**
619
- * Try to align cells by logical column position (sum of colspans). When
620
- * one side has a colspan'd cell that absorbs multiple cells on the other
621
- * side, emit the new structure with `class='mod colspan'` on the
622
- * merged/split cells. Returns null if the rows don't align cleanly —
623
- * caller falls back to a generic cell-LCS.
624
- */
625
- function diffColspanChangedRow(oldHtml, newHtml, oldRow, newRow, diffCell) {
626
- if (sumColspans(oldHtml, oldRow.cells) !== sumColspans(newHtml, newRow.cells)) return null;
627
- const out = [];
628
- out.push(rowHeaderSlice(newHtml, newRow));
629
- let oi = 0;
630
- let ni = 0;
631
- while (oi < oldRow.cells.length && ni < newRow.cells.length) {
632
- const oCell = oldRow.cells[oi];
633
- const nCell = newRow.cells[ni];
634
- const oSpan = getColspan(oldHtml, oCell);
635
- const nSpan = getColspan(newHtml, nCell);
636
- if (oSpan === nSpan) {
637
- out.push(emitDiffedCell(oldHtml, newHtml, oCell, nCell, diffCell));
638
- oi++;
639
- ni++;
640
- } else if (nSpan > oSpan) {
641
- let totalOldSpan = 0;
642
- let oj = oi;
643
- while (oj < oldRow.cells.length && totalOldSpan < nSpan) {
644
- totalOldSpan += getColspan(oldHtml, oldRow.cells[oj]);
645
- oj++;
646
- }
647
- if (totalOldSpan !== nSpan) return null;
648
- out.push(emitSpanChangedCell(newHtml, nCell, "colspan"));
649
- oi = oj;
650
- ni++;
651
- } else {
652
- let totalNewSpan = 0;
653
- let nj = ni;
654
- while (nj < newRow.cells.length && totalNewSpan < oSpan) {
655
- totalNewSpan += getColspan(newHtml, newRow.cells[nj]);
656
- nj++;
657
- }
658
- if (totalNewSpan !== oSpan) return null;
659
- for (let k = ni; k < nj; k++) out.push(emitSpanChangedCell(newHtml, newRow.cells[k], "colspan"));
660
- oi++;
661
- ni = nj;
662
- }
663
- }
664
- if (oi !== oldRow.cells.length || ni !== newRow.cells.length) return null;
665
- out.push("</tr>");
666
- return out.join("");
667
- }
668
- function sumColspans(html, cells) {
669
- let total = 0;
670
- for (const cell of cells) total += getColspan(html, cell);
671
- return total;
672
- }
673
- function getColspan(html, cell) {
674
- return parseSpanAttribute(html.slice(cell.cellStart, cell.contentStart), "colspan");
675
- }
676
- function getRowspan(html, cell) {
677
- return parseSpanAttribute(html.slice(cell.cellStart, cell.contentStart), "rowspan");
678
- }
679
- function parseSpanAttribute(openingTag, name) {
680
- const m = (name === "colspan" ? /\bcolspan\s*=\s*["']?(\d+)["']?/i : /\browspan\s*=\s*["']?(\d+)["']?/i).exec(openingTag);
681
- if (!m) return 1;
682
- const value = Number.parseInt(m[1], 10);
683
- return Number.isFinite(value) && value > 0 ? value : 1;
684
- }
685
- /**
686
- * Emits a cell that's the merged/split product of a structural change,
687
- * tagged with `class='mod colspan'` or `class='mod rowspan'`. Content is
688
- * carried through unmodified — Word doesn't track these changes, and
689
- * inserting del/ins around content that didn't really change would be
690
- * misleading.
691
- */
692
- function emitSpanChangedCell(html, cell, kind) {
693
- const tdOpening = parseOpeningTagAt(html, cell.cellStart);
694
- if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd);
695
- return injectClass(html.slice(cell.cellStart, tdOpening.end), `mod ${kind}`) + html.slice(cell.contentStart, cell.cellEnd);
696
- }
697
- function diffPositionalRow(oldHtml, newHtml, oldRow, newRow, diffCell) {
698
- const out = [];
699
- const trHeader = rowHeaderSlice(newHtml, newRow);
700
- out.push(trHeader);
701
- let cursor = newRow.cells[0]?.cellStart ?? newRow.rowEnd;
702
- for (let c = 0; c < newRow.cells.length; c++) {
703
- const oldCell = oldRow.cells[c];
704
- const newCell = newRow.cells[c];
705
- out.push(newHtml.slice(cursor, newCell.contentStart));
706
- out.push(diffCell(oldHtml.slice(oldCell.contentStart, oldCell.contentEnd), newHtml.slice(newCell.contentStart, newCell.contentEnd)));
707
- cursor = newCell.contentEnd;
708
- }
709
- out.push(newHtml.slice(cursor, newRow.rowEnd));
710
- return out.join("");
711
- }
712
- function diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell) {
713
- const alignment = pairSimilarUnmatchedCells(lcsAlign(oldRow.cells.map((cell) => cellKey(oldHtml, cell)), newRow.cells.map((cell) => cellKey(newHtml, cell))), oldRow, newRow, oldHtml, newHtml);
714
- const out = [];
715
- out.push(rowHeaderSlice(newHtml, newRow));
716
- for (const align of alignment) if (align.oldIdx !== null && align.newIdx !== null) {
717
- const oldCell = oldRow.cells[align.oldIdx];
718
- const newCell = newRow.cells[align.newIdx];
719
- out.push(emitDiffedCell(oldHtml, newHtml, oldCell, newCell, diffCell));
720
- } else if (align.newIdx !== null) out.push(emitFullCell(newHtml, newRow.cells[align.newIdx], "ins", diffCell));
721
- else if (align.oldIdx !== null) out.push(emitFullCell(oldHtml, oldRow.cells[align.oldIdx], "del", diffCell));
722
- out.push("</tr>");
723
- return out.join("");
724
- }
725
- function cellKey(html, cell) {
726
- return html.slice(cell.contentStart, cell.contentEnd).replace(/\s+/g, " ").trim();
727
- }
728
- /**
729
- * Emits a row with all cells either inserted (kind='ins') or deleted
730
- * (kind='del'). Adds `class='diffins'`/`'diffdel'` to the `<tr>` and to
731
- * each `<td>`, with an `<ins>`/`<del>` wrapper around any cell content
732
- * (empty cells get the class but no wrapper).
733
- */
734
- function emitFullRow(html, row, kind, diffCell) {
735
- const cls = kind === "ins" ? "diffins" : "diffdel";
736
- const trOpening = parseOpeningTagAt(html, row.rowStart);
737
- if (!trOpening) return html.slice(row.rowStart, row.rowEnd);
738
- const out = [injectClass(html.slice(row.rowStart, trOpening.end), cls)];
739
- let cursor = trOpening.end;
740
- for (const cell of row.cells) {
741
- out.push(html.slice(cursor, cell.cellStart));
742
- out.push(emitFullCell(html, cell, kind, diffCell));
743
- cursor = cell.cellEnd;
744
- }
745
- out.push(html.slice(cursor, row.rowEnd));
746
- return out.join("");
747
- }
748
- /**
749
- * Emits a fully-inserted or fully-deleted cell. Inner text runs are wrapped
750
- * with `<ins>`/`<del>` while formatting tags pass through unchanged, so
751
- * `<strong>B</strong>` renders as `<strong><ins>B</ins></strong>` —
752
- * matching htmldiff's general convention without the doubled-`<ins>` that
753
- * the full recursive diff would produce for newly-inserted formatting.
754
- * Empty cells get the class on the `<td>` but no inner wrapping.
755
- */
756
- function emitFullCell(html, cell, kind, _diffCell) {
757
- const cls = kind === "ins" ? "diffins" : "diffdel";
758
- const tdOpening = parseOpeningTagAt(html, cell.cellStart);
759
- if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd);
760
- const tdOpenTag = injectClass(html.slice(cell.cellStart, tdOpening.end), cls);
761
- const content = html.slice(cell.contentStart, cell.contentEnd);
762
- const wrapped = content.trim().length === 0 ? content : wrapInlineTextRuns(content, kind);
763
- const closing = html.slice(cell.contentEnd, cell.cellEnd);
764
- return tdOpenTag + wrapped + closing;
765
- }
766
- /**
767
- * Wraps every non-whitespace text run in the given content with an
768
- * `<ins>`/`<del>` tag, leaving HTML tags untouched. This produces output
769
- * like `<strong><ins>X</ins></strong>` for fully-inserted formatted
770
- * content — the same shape the rest of htmldiff emits for content
771
- * insertions inside existing formatting.
772
- */
773
- function wrapInlineTextRuns(content, kind) {
774
- const tag = kind === "ins" ? "ins" : "del";
775
- const cls = kind === "ins" ? "diffins" : "diffdel";
776
- const out = [];
777
- let i = 0;
778
- while (i < content.length) {
779
- if (content[i] === "<") {
780
- const tagEnd = parseOpeningTagAt(content, i);
781
- if (!tagEnd) {
782
- out.push(content.slice(i));
783
- break;
784
- }
785
- out.push(content.slice(i, tagEnd.end));
786
- i = tagEnd.end;
787
- continue;
788
- }
789
- let j = i;
790
- while (j < content.length && content[j] !== "<") j++;
791
- const text = content.slice(i, j);
792
- if (text.trim().length > 0) out.push(`<${tag} class='${cls}'>${text}</${tag}>`);
793
- else out.push(text);
794
- i = j;
795
- }
796
- return out.join("");
797
- }
798
- function emitDiffedCell(oldHtml, newHtml, oldCell, newCell, diffCell) {
799
- const tdOpening = parseOpeningTagAt(newHtml, newCell.cellStart);
800
- if (!tdOpening) return newHtml.slice(newCell.cellStart, newCell.cellEnd);
801
- const tdOpenTag = newHtml.slice(newCell.cellStart, tdOpening.end);
802
- const content = diffCell(oldHtml.slice(oldCell.contentStart, oldCell.contentEnd), newHtml.slice(newCell.contentStart, newCell.contentEnd));
803
- const closing = newHtml.slice(newCell.contentEnd, newCell.cellEnd);
804
- return tdOpenTag + content + closing;
805
- }
806
- function rowHeaderSlice(html, row) {
807
- const opening = parseOpeningTagAt(html, row.rowStart);
808
- if (!opening) return "";
809
- if (row.cells.length === 0) return html.slice(row.rowStart, opening.end);
810
- return html.slice(row.rowStart, row.cells[0].cellStart);
811
- }
812
- /** Character-level similarity threshold above which we treat two rows as "the same row, edited". */
813
- const ROW_FUZZY_THRESHOLD = .5;
814
- /**
815
- * Threshold for "this cell is a content-edit of that cell." Tuned the same
816
- * as ROW_FUZZY_THRESHOLD; cells in legal docs that share most of their
817
- * content typically ARE the same logical cell with a body edit, so 0.5
818
- * works for both granularities in practice.
819
- */
820
- const CELL_FUZZY_THRESHOLD = .5;
821
- /**
822
- * After exact LCS, scan the alignment for runs of "old deleted, then new
823
- * inserted" (or vice versa) and pair entries whose content is similar
824
- * enough to be treated as an edit rather than a delete+insert. This keeps
825
- * row-level edits (a typo fix, a single word change) from being shown as
826
- * an entire row vanishing and a new one appearing — matching what users
827
- * expect from a typical track-changes view.
828
- */
829
- function pairSimilarUnmatchedRows(alignment, oldTable, newTable, oldHtml, newHtml) {
830
- return pairSimilarUnmatched(alignment, ROW_FUZZY_THRESHOLD, (oldIdx, newIdx) => rowSimilarity(oldTable.rows[oldIdx], newTable.rows[newIdx], oldHtml, newHtml));
831
- }
832
- function pairSimilarUnmatchedCells(alignment, oldRow, newRow, oldHtml, newHtml) {
833
- return pairSimilarUnmatched(alignment, CELL_FUZZY_THRESHOLD, (oldIdx, newIdx) => cellSimilarity(oldRow.cells[oldIdx], newRow.cells[newIdx], oldHtml, newHtml));
834
- }
835
- /**
836
- * Identify pairings inside each unmatched-only run, then build the output
837
- * alignment by walking the original and substituting paired entries at
838
- * the *ins position* (not the del position). This keeps the result
839
- * monotonic in newIdx — critical because the cursor-based emission
840
- * downstream walks new's html in order. Emitting at the del position
841
- * would be fine when del<ins in the alignment array (the typical case),
842
- * but can violate monotonicity when there are mixed unpaired entries in
843
- * between (column-add + row-add together, content-edit + column-add,
844
- * etc.).
845
- *
846
- * Generic over what's being paired — works for both rows (by full row
847
- * content similarity) and cells (by per-cell content similarity).
848
- */
849
- function pairSimilarUnmatched(alignment, threshold, similarity) {
850
- const pairs = /* @__PURE__ */ new Map();
851
- let i = 0;
852
- while (i < alignment.length) {
853
- if (alignment[i].oldIdx !== null && alignment[i].newIdx !== null) {
854
- i++;
855
- continue;
856
- }
857
- const runStart = i;
858
- while (i < alignment.length && alignment[i].oldIdx === null !== (alignment[i].newIdx === null)) i++;
859
- const runEnd = i;
860
- const delIndices = [];
861
- const insIndices = [];
862
- for (let k = runStart; k < runEnd; k++) if (alignment[k].oldIdx !== null) delIndices.push(k);
863
- else insIndices.push(k);
864
- const usedIns = /* @__PURE__ */ new Set();
865
- for (const di of delIndices) {
866
- let bestIi = -1;
867
- let bestSim = threshold;
868
- for (const ii of insIndices) {
869
- if (usedIns.has(ii)) continue;
870
- const sim = similarity(alignment[di].oldIdx, alignment[ii].newIdx);
871
- if (sim > bestSim) {
872
- bestSim = sim;
873
- bestIi = ii;
874
- }
875
- }
876
- if (bestIi >= 0) {
877
- pairs.set(di, bestIi);
878
- usedIns.add(bestIi);
879
- }
880
- }
881
- }
882
- const insToDel = /* @__PURE__ */ new Map();
883
- for (const [delAi, insAi] of pairs) insToDel.set(insAi, delAi);
884
- const pairedDels = new Set(pairs.keys());
885
- const result = [];
886
- for (let k = 0; k < alignment.length; k++) {
887
- if (pairedDels.has(k)) continue;
888
- if (insToDel.has(k)) {
889
- const delAi = insToDel.get(k);
890
- result.push({
891
- oldIdx: alignment[delAi].oldIdx,
892
- newIdx: alignment[k].newIdx
893
- });
894
- } else result.push(alignment[k]);
895
- }
896
- return result;
897
- }
898
- /**
899
- * Combined similarity metric used for both row-level and cell-level
900
- * fuzzy pairing. Returns the MAX of two complementary metrics:
901
- *
902
- * 1. **Character prefix+suffix similarity** — fraction of the longer
903
- * string covered by shared prefix + shared suffix. Catches small
904
- * edits in the middle of a string (one word changed in a row).
905
- * Misses cases where the bulk of common content is in the middle
906
- * and the ends differ.
907
- *
908
- * 2. **Token Jaccard similarity** — intersection-over-union of the
909
- * whitespace-split tokens. Catches "most of the content is the
910
- * same but bookended by different bits" — e.g. a row whose only
911
- * edit is a column added at the start and another at the end,
912
- * where the ~50 chars in the middle that DO match would be
913
- * invisible to prefix+suffix.
914
- *
915
- * Either metric exceeding the threshold means pair. Neither alone is
916
- * sufficient for the full range of legal-doc edits we see in
917
- * production tables.
918
- */
919
- function rowSimilarity(oldRow, newRow, oldHtml, newHtml) {
920
- return textSimilarity(rowText(oldHtml, oldRow), rowText(newHtml, newRow));
921
- }
922
- function cellSimilarity(oldCell, newCell, oldHtml, newHtml) {
923
- return textSimilarity(cellText(oldHtml, oldCell), cellText(newHtml, newCell));
924
- }
925
- function textSimilarity(a, b) {
926
- if (a === b) return 1;
927
- if (a.length === 0 || b.length === 0) return 0;
928
- return Math.max(charPrefixSuffixSimilarity(a, b), tokenJaccardSimilarity(a, b));
929
- }
930
- function charPrefixSuffixSimilarity(a, b) {
931
- let prefix = 0;
932
- const minLen = Math.min(a.length, b.length);
933
- while (prefix < minLen && a[prefix] === b[prefix]) prefix++;
934
- let suffix = 0;
935
- while (suffix < a.length - prefix && suffix < b.length - prefix && a[a.length - 1 - suffix] === b[b.length - 1 - suffix]) suffix++;
936
- return (prefix + suffix) / Math.max(a.length, b.length);
937
- }
938
- function tokenJaccardSimilarity(a, b) {
939
- const tokensA = new Set(a.split(/\s+/).filter(Boolean));
940
- const tokensB = new Set(b.split(/\s+/).filter(Boolean));
941
- if (tokensA.size === 0 && tokensB.size === 0) return 1;
942
- let intersection = 0;
943
- for (const t of tokensA) if (tokensB.has(t)) intersection++;
944
- const union = tokensA.size + tokensB.size - intersection;
945
- return union === 0 ? 0 : intersection / union;
946
- }
947
- function rowText(html, row) {
948
- const parts = [];
949
- for (const cell of row.cells) parts.push(html.slice(cell.contentStart, cell.contentEnd).replace(/<[^>]+>/g, " "));
950
- return parts.join(" ").replace(/\s+/g, " ").trim().toLowerCase();
951
- }
952
- function cellText(html, cell) {
953
- return html.slice(cell.contentStart, cell.contentEnd).replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
954
- }
955
- /**
956
- * Standard LCS alignment: walks both sequences and emits a list of pairs
957
- * where `(oldIdx, newIdx)` are both set for matching positions, and one
958
- * side is null for an unmatched entry on the other side. Equality uses
959
- * strict ===.
960
- */
961
- function lcsAlign(oldKeys, newKeys) {
962
- const m = oldKeys.length;
963
- const n = newKeys.length;
964
- const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
965
- for (let i = 1; i <= m; i++) for (let j = 1; j <= n; j++) if (oldKeys[i - 1] === newKeys[j - 1]) dp[i][j] = dp[i - 1][j - 1] + 1;
966
- else dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
967
- const result = [];
968
- let i = m;
969
- let j = n;
970
- while (i > 0 || j > 0) if (i > 0 && j > 0 && oldKeys[i - 1] === newKeys[j - 1]) {
971
- result.unshift({
972
- oldIdx: i - 1,
973
- newIdx: j - 1
974
- });
975
- i--;
976
- j--;
977
- } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
978
- result.unshift({
979
- oldIdx: null,
980
- newIdx: j - 1
981
- });
982
- j--;
983
- } else {
984
- result.unshift({
985
- oldIdx: i - 1,
986
- newIdx: null
987
- });
988
- i--;
989
- }
990
- return result;
991
- }
992
- /**
993
- * Returns the opening tag string with the given class injected. Existing
994
- * `class` attributes are preserved and the new class appended.
995
- */
996
- /**
997
- * Returns the opening tag with the given class injected. Locates the real
998
- * `class` attribute via attribute-aware walking (NOT a flat regex — that
999
- * would mis-match inside a foreign attribute value like
1000
- * `title="see class='x'"`). When the class already partially overlaps with
1001
- * `cls` — e.g. existing `class="mod"` and we're injecting `mod colspan` —
1002
- * only the missing tokens get appended, so we never end up with
1003
- * `class="mod mod colspan"`.
1004
- */
1005
- function injectClass(openingTag, cls) {
1006
- const clsTokens = cls.split(/\s+/).filter(Boolean);
1007
- if (clsTokens.length === 0) return openingTag;
1008
- const classAttr = findClassAttribute(openingTag);
1009
- if (classAttr) {
1010
- const existingTokens = classAttr.value.split(/\s+/).filter(Boolean);
1011
- const missing = clsTokens.filter((t) => !existingTokens.includes(t));
1012
- if (missing.length === 0) return openingTag;
1013
- const updatedValue = existingTokens.length === 0 ? missing.join(" ") : `${existingTokens.join(" ")} ${missing.join(" ")}`;
1014
- return openingTag.slice(0, classAttr.valueStart) + updatedValue + openingTag.slice(classAttr.valueEnd);
1015
- }
1016
- const insertAt = openingTag.endsWith("/>") ? openingTag.length - 2 : openingTag.length - 1;
1017
- return `${openingTag.slice(0, insertAt).replace(/\s*$/, "")} class='${cls}'${openingTag.slice(insertAt)}`;
1018
- }
1019
- /**
1020
- * Walks the opening tag's attributes (respecting quoted values) to find
1021
- * the actual `class` attribute. Returns the value range (start/end of the
1022
- * value content, *excluding* the surrounding quotes) and the value, or
1023
- * null if no `class` attribute is present.
1024
- */
1025
- function findClassAttribute(openingTag) {
1026
- let i = 1;
1027
- while (i < openingTag.length && /[A-Za-z0-9_:-]/.test(openingTag[i])) i++;
1028
- while (i < openingTag.length) {
1029
- while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
1030
- if (i >= openingTag.length) break;
1031
- if (openingTag[i] === ">" || openingTag[i] === "/") break;
1032
- const nameStart = i;
1033
- while (i < openingTag.length && !/[\s=>/]/.test(openingTag[i])) i++;
1034
- const name = openingTag.slice(nameStart, i);
1035
- while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
1036
- if (openingTag[i] !== "=") continue;
1037
- i++;
1038
- while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
1039
- let valueStart;
1040
- let valueEnd;
1041
- if (openingTag[i] === "\"" || openingTag[i] === "'") {
1042
- const quote = openingTag[i];
1043
- i++;
1044
- valueStart = i;
1045
- while (i < openingTag.length && openingTag[i] !== quote) i++;
1046
- valueEnd = i;
1047
- if (i < openingTag.length) i++;
1048
- } else {
1049
- valueStart = i;
1050
- while (i < openingTag.length && !/[\s>/]/.test(openingTag[i])) i++;
1051
- valueEnd = i;
1052
- }
1053
- if (name.toLowerCase() === "class") return {
1054
- valueStart,
1055
- valueEnd,
1056
- value: openingTag.slice(valueStart, valueEnd)
1057
- };
1058
- }
1059
- return null;
1060
- }
1061
- /**
1062
- * Walks html and returns ranges for every top-level `<table>...</table>`
1063
- * block. Nested tables aren't extracted as separate top-level entries —
1064
- * they're captured inside the parent's content range and handled when the
1065
- * cell-level diff recurses through them.
1066
- */
1067
- function findTopLevelTables(html) {
1068
- const tables = [];
1069
- let i = 0;
1070
- while (i < html.length) if (matchesTagAt(html, i, "table")) {
1071
- const opening = parseOpeningTagAt(html, i);
1072
- if (!opening) {
1073
- i++;
1074
- continue;
1075
- }
1076
- const tableContentStart = opening.end;
1077
- const tableEnd = findMatchingClosingTag(html, tableContentStart, "table");
1078
- if (tableEnd === -1) {
1079
- i = opening.end;
1080
- continue;
1081
- }
1082
- const rows = findTopLevelRows(html, tableContentStart, tableEnd - 8);
1083
- tables.push({
1084
- tableStart: i,
1085
- tableEnd,
1086
- rows
1087
- });
1088
- i = tableEnd;
1089
- } else i++;
1090
- return tables;
1091
- }
1092
- function findTopLevelRows(html, start, end) {
1093
- const rows = [];
1094
- let i = start;
1095
- while (i < end) if (matchesTagAt(html, i, "tr")) {
1096
- const opening = parseOpeningTagAt(html, i);
1097
- if (!opening) {
1098
- i++;
1099
- continue;
1100
- }
1101
- const rowContentStart = opening.end;
1102
- const rowEnd = findMatchingClosingTag(html, rowContentStart, "tr", end);
1103
- if (rowEnd === -1) {
1104
- i = opening.end;
1105
- continue;
1106
- }
1107
- const cells = findTopLevelCells(html, rowContentStart, rowEnd - 5);
1108
- rows.push({
1109
- rowStart: i,
1110
- rowEnd,
1111
- cells
1112
- });
1113
- i = rowEnd;
1114
- } else if (matchesClosingTagAt(html, i, "table")) break;
1115
- else i++;
1116
- return rows;
1117
- }
1118
- function findTopLevelCells(html, start, end) {
1119
- const cells = [];
1120
- let i = start;
1121
- while (i < end) if (matchesTagAt(html, i, "td") || matchesTagAt(html, i, "th")) {
1122
- const tagName = matchesTagAt(html, i, "td") ? "td" : "th";
1123
- const opening = parseOpeningTagAt(html, i);
1124
- if (!opening) {
1125
- i++;
1126
- continue;
1127
- }
1128
- const contentStart = opening.end;
1129
- const cellEnd = findMatchingClosingTag(html, contentStart, tagName, end);
1130
- if (cellEnd === -1) {
1131
- i = opening.end;
1132
- continue;
1133
- }
1134
- const contentEnd = cellEnd - `</${tagName}>`.length;
1135
- cells.push({
1136
- cellStart: i,
1137
- cellEnd,
1138
- contentStart,
1139
- contentEnd
1140
- });
1141
- i = cellEnd;
1142
- } else if (matchesClosingTagAt(html, i, "tr")) break;
1143
- else i++;
1144
- return cells;
1145
- }
1146
- function matchesTagAt(html, i, tagName) {
1147
- if (html[i] !== "<") return false;
1148
- if (html.slice(i + 1, i + 1 + tagName.length).toLowerCase() !== tagName) return false;
1149
- const after = html[i + 1 + tagName.length];
1150
- return after === ">" || after === " " || after === " " || after === "\n" || after === "\r" || after === "/";
1151
- }
1152
- function matchesClosingTagAt(html, i, tagName) {
1153
- if (html[i] !== "<" || html[i + 1] !== "/") return false;
1154
- if (html.slice(i + 2, i + 2 + tagName.length).toLowerCase() !== tagName) return false;
1155
- const after = html[i + 2 + tagName.length];
1156
- return after === ">" || after === " " || after === " " || after === "\n" || after === "\r";
1157
- }
1158
- function parseOpeningTagAt(html, i) {
1159
- if (html.startsWith("<!--", i)) {
1160
- const close = html.indexOf("-->", i + 4);
1161
- return close === -1 ? null : { end: close + 3 };
1162
- }
1163
- if (html.startsWith("<![CDATA[", i)) {
1164
- const close = html.indexOf("]]>", i + 9);
1165
- return close === -1 ? null : { end: close + 3 };
1166
- }
1167
- if (html.startsWith("<?", i)) {
1168
- const close = html.indexOf("?>", i + 2);
1169
- return close === -1 ? null : { end: close + 2 };
1170
- }
1171
- let j = i + 1;
1172
- let quote = null;
1173
- while (j < html.length) {
1174
- const ch = html[j];
1175
- if (quote) {
1176
- if (ch === quote) quote = null;
1177
- } else if (ch === "\"" || ch === "'") quote = ch;
1178
- else if (ch === ">") return { end: j + 1 };
1179
- j++;
1180
- }
1181
- return null;
1182
- }
1183
- /**
1184
- * Returns the index just past the matching `</tagName>`, accounting for
1185
- * nested tags of the same name. Returns -1 if no match before `limit`.
1186
- */
1187
- function findMatchingClosingTag(html, from, tagName, limit = html.length) {
1188
- let depth = 1;
1189
- let i = from;
1190
- while (i < limit) if (matchesTagAt(html, i, tagName)) {
1191
- const opening = parseOpeningTagAt(html, i);
1192
- if (!opening) {
1193
- i++;
1194
- continue;
1195
- }
1196
- if (!html.slice(i, opening.end).endsWith("/>")) depth++;
1197
- i = opening.end;
1198
- } else if (matchesClosingTagAt(html, i, tagName)) {
1199
- depth--;
1200
- const closingEnd = parseOpeningTagAt(html, i)?.end ?? i + `</${tagName}>`.length;
1201
- if (depth === 0) return closingEnd;
1202
- i = closingEnd;
1203
- } else i++;
1204
- return -1;
1205
- }
1206
- //#endregion
1207
205
  //#region src/WordSplitter.ts
1208
206
  var WordSplitter = class WordSplitter {
1209
207
  text;
@@ -1449,20 +447,9 @@ var HtmlDiff = class HtmlDiff {
1449
447
  "s",
1450
448
  "span"
1451
449
  ]);
1452
- /**
1453
- * Hard cap on nested `HtmlDiff.execute` calls (table preprocessing
1454
- * recurses through `diffCell` for cell content). Each level allocates
1455
- * fresh DP matrices and word arrays; without a guard a maliciously
1456
- * nested table-in-cell-in-table-in-cell input could blow stack and
1457
- * memory. Set high enough to comfortably handle real legal documents
1458
- * (tables nested 2-3 deep at most), low enough to short-circuit
1459
- * pathological input.
1460
- */
1461
- static MaxTablePreprocessDepth = 8;
1462
450
  content = [];
1463
451
  newText;
1464
452
  oldText;
1465
- tablePreprocessDepth;
1466
453
  specialTagDiffStack = [];
1467
454
  newWords = [];
1468
455
  oldWords = [];
@@ -1525,17 +512,13 @@ var HtmlDiff = class HtmlDiff {
1525
512
  * Initializes a new instance of the class.
1526
513
  * @param oldText The old text.
1527
514
  * @param newText The new text.
1528
- * @param tablePreprocessDepth Internal: nested-call depth for table
1529
- * preprocessing. Callers should leave at default (0); the recursive
1530
- * `diffCell` callback in TableDiff bumps it.
1531
515
  */
1532
- constructor(oldText, newText, tablePreprocessDepth = 0) {
516
+ constructor(oldText, newText) {
1533
517
  this.oldText = oldText;
1534
518
  this.newText = newText;
1535
- this.tablePreprocessDepth = tablePreprocessDepth;
1536
519
  }
1537
- static execute(oldText, newText, tablePreprocessDepth = 0) {
1538
- return new HtmlDiff(oldText, newText, tablePreprocessDepth).build();
520
+ static execute(oldText, newText) {
521
+ return new HtmlDiff(oldText, newText).build();
1539
522
  }
1540
523
  /**
1541
524
  * Builds the HTML diff output
@@ -1543,22 +526,6 @@ var HtmlDiff = class HtmlDiff {
1543
526
  */
1544
527
  build() {
1545
528
  if (this.oldText === this.newText) return this.newText;
1546
- const blockExpressions = this.blockExpressions;
1547
- const repeatingWordsAccuracy = this.repeatingWordsAccuracy;
1548
- const orphanMatchThreshold = this.orphanMatchThreshold;
1549
- const ignoreWhitespaceDifferences = this.ignoreWhitespaceDifferences;
1550
- const tablePreprocess = this.tablePreprocessDepth >= HtmlDiff.MaxTablePreprocessDepth ? null : preprocessTables(this.oldText, this.newText, (oldCell, newCell) => {
1551
- const inner = new HtmlDiff(oldCell, newCell, this.tablePreprocessDepth + 1);
1552
- for (const expr of blockExpressions) inner.addBlockExpression(expr);
1553
- inner.repeatingWordsAccuracy = repeatingWordsAccuracy;
1554
- inner.orphanMatchThreshold = orphanMatchThreshold;
1555
- inner.ignoreWhitespaceDifferences = ignoreWhitespaceDifferences;
1556
- return inner.build();
1557
- });
1558
- if (tablePreprocess) {
1559
- this.oldText = tablePreprocess.modifiedOld;
1560
- this.newText = tablePreprocess.modifiedNew;
1561
- }
1562
529
  this.splitInputsToWords();
1563
530
  this.buildContentProjections();
1564
531
  const wordsForDiffOld = this.oldContentWords ?? this.oldWords;
@@ -1566,8 +533,7 @@ var HtmlDiff = class HtmlDiff {
1566
533
  this.matchGranularity = Math.min(HtmlDiff.MatchGranularityMaximum, Math.min(wordsForDiffOld.length, wordsForDiffNew.length));
1567
534
  const operations = this.operations();
1568
535
  for (const op of operations) this.performOperation(op);
1569
- const result = this.content.join("");
1570
- return tablePreprocess ? restoreTablePlaceholders(result, tablePreprocess.placeholderToDiff) : result;
536
+ return this.content.join("");
1571
537
  }
1572
538
  /**
1573
539
  * Uses {@link expression} to group text together so that any change detected within the group is treated as a single block
@@ -1794,7 +760,7 @@ var HtmlDiff = class HtmlDiff {
1794
760
  if (words.slice(0, indexLastTagInFirstTagBlock + 1).some((w) => !HtmlDiff.SpecialCaseClosingTagsSet.has(w.toLowerCase()))) tagIndexToCompare = 0;
1795
761
  }
1796
762
  const openingAndClosingTagsMatch = !!openingTag && Utils_default.getTagName(openingTag) === Utils_default.getTagName(words[tagIndexToCompare]);
1797
- if (openingTag && openingAndClosingTagsMatch) {
763
+ if (!!openingTag && openingAndClosingTagsMatch) {
1798
764
  specialCaseTagInjection = "</ins>";
1799
765
  specialCaseTagInjectionIsBefore = true;
1800
766
  } else if (openingTag) this.specialTagDiffStack.push(openingTag);