@createiq/htmldiff 1.1.0 → 1.2.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HtmlDiff.mjs CHANGED
@@ -48,8 +48,20 @@ function stripTagAttributes(word) {
48
48
  if (match) return `${match[0]}${word.endsWith("/>") ? "/>" : ">"}`;
49
49
  return word;
50
50
  }
51
- function wrapText(text, tagName, cssClass) {
52
- return `<${tagName} class='${cssClass}'>${text}</${tagName}>`;
51
+ function wrapText(text, tagName, cssClass, metadata) {
52
+ if (!metadata) return `<${tagName} class='${cssClass}'>${text}</${tagName}>`;
53
+ return `<${tagName}${composeTagAttributes(cssClass, metadata)}>${text}</${tagName}>`;
54
+ }
55
+ /**
56
+ * Build the attribute portion of an opening tag from a base class plus
57
+ * optional metadata. Exposed so emission paths that build opening-tag
58
+ * fragments by hand (e.g. the formatting-tag special-case in
59
+ * `HtmlDiff.insertTag`) can stay consistent with `wrapText`.
60
+ */
61
+ function composeTagAttributes(cssClass, metadata) {
62
+ let out = ` class='${metadata.extraClasses ? `${cssClass} ${metadata.extraClasses}` : cssClass}'`;
63
+ if (metadata.dataAttrs) for (const key of Object.keys(metadata.dataAttrs)) out += ` data-${key}='${metadata.dataAttrs[key]}'`;
64
+ return out;
53
65
  }
54
66
  function isStartOfTag(val) {
55
67
  return val === "<";
@@ -83,6 +95,7 @@ var Utils_default = {
83
95
  isTag,
84
96
  stripTagAttributes,
85
97
  wrapText,
98
+ composeTagAttributes,
86
99
  isStartOfTag,
87
100
  isEndOfTag,
88
101
  isStartOfEntity,
@@ -200,11 +213,425 @@ var Operation = class {
200
213
  this.startInNew = startInNew;
201
214
  this.endInNew = endInNew;
202
215
  }
203
- };
216
+ };
217
+ //#endregion
218
+ //#region src/Alignment.ts
219
+ /**
220
+ * Standard LCS alignment: walks both sequences and emits a list of pairs
221
+ * where `(oldIdx, newIdx)` are both set for matching positions, and one
222
+ * side is null for an unmatched entry on the other side. Equality uses
223
+ * strict ===.
224
+ */
225
+ function lcsAlign(oldKeys, newKeys) {
226
+ const m = oldKeys.length;
227
+ const n = newKeys.length;
228
+ const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
229
+ for (let i = 1; i <= m; i++) for (let j = 1; j <= n; j++) if (oldKeys[i - 1] === newKeys[j - 1]) dp[i][j] = dp[i - 1][j - 1] + 1;
230
+ else dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
231
+ const result = [];
232
+ let i = m;
233
+ let j = n;
234
+ while (i > 0 || j > 0) if (i > 0 && j > 0 && oldKeys[i - 1] === newKeys[j - 1]) {
235
+ result.push({
236
+ oldIdx: i - 1,
237
+ newIdx: j - 1
238
+ });
239
+ i--;
240
+ j--;
241
+ } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
242
+ result.push({
243
+ oldIdx: null,
244
+ newIdx: j - 1
245
+ });
246
+ j--;
247
+ } else {
248
+ result.push({
249
+ oldIdx: i - 1,
250
+ newIdx: null
251
+ });
252
+ i--;
253
+ }
254
+ result.reverse();
255
+ return result;
256
+ }
257
+ /**
258
+ * Given a shorter sequence (M items) and a longer sequence (N items, with
259
+ * N > M), find the K = N - M positions in the longer sequence that should
260
+ * be "skipped" so the unskipped longer items, aligned positionally with
261
+ * the shorter items, maximise the sum of pairwise similarity.
262
+ *
263
+ * Solves the same problem as enumerating C(N, K) skip combinations and
264
+ * picking the highest-scoring one, but in O(M × N) time via DP:
265
+ *
266
+ * f(i, j) = max similarity from consuming i shorter and j longer items
267
+ * (defined for j >= i; entries below the diagonal are never
268
+ * written or read).
269
+ * f(0, j) = 0
270
+ * f(i, j) = max(
271
+ * f(i-1, j-1) + similarity(i-1, j-1), // pair
272
+ * f(i, j-1) // skip longer[j-1]
273
+ * )
274
+ *
275
+ * Tie-breaking prefers pairing over skipping, so ties resolve to skipping
276
+ * EARLIER positions — matching the lex-first-combo behaviour of a full
277
+ * combinatorial enumeration over which K positions to skip. Backtrack
278
+ * re-asks the fill's pair-vs-skip question to preserve this direction
279
+ * (the alternative — a `dp[i][j] > dp[i][j-1]` shortcut — would invert
280
+ * the tie-breaking).
281
+ *
282
+ * Caller responsibility: ensure `longerTexts.length >= shorterTexts.length`.
283
+ */
284
+ function findOptimalAlignmentSkips(shorterTexts, longerTexts, similarity) {
285
+ const m = shorterTexts.length;
286
+ const n = longerTexts.length;
287
+ const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
288
+ for (let i = 1; i <= m; i++) for (let j = i; j <= n; j++) {
289
+ const pair = dp[i - 1][j - 1] + similarity(i - 1, j - 1);
290
+ const skip = j > i ? dp[i][j - 1] : Number.NEGATIVE_INFINITY;
291
+ dp[i][j] = pair >= skip ? pair : skip;
292
+ }
293
+ const skipped = [];
294
+ let i = m;
295
+ let j = n;
296
+ while (j > 0) {
297
+ if (i === 0) {
298
+ skipped.push(j - 1);
299
+ j--;
300
+ continue;
301
+ }
302
+ if (j === i) {
303
+ i--;
304
+ j--;
305
+ continue;
306
+ }
307
+ if (dp[i - 1][j - 1] + similarity(i - 1, j - 1) >= dp[i][j - 1]) {
308
+ i--;
309
+ j--;
310
+ } else {
311
+ skipped.push(j - 1);
312
+ j--;
313
+ }
314
+ }
315
+ skipped.reverse();
316
+ return skipped;
317
+ }
318
+ /**
319
+ * Identifies pairings inside each unmatched-only run, then builds the
320
+ * output alignment by walking the original and substituting paired
321
+ * entries at the *ins position* (not the del position). This keeps the
322
+ * result monotonically non-decreasing in newIdx — required by any
323
+ * downstream emission that walks the new sequence in order. Emitting at
324
+ * the del position would be safe when del<ins in the alignment array
325
+ * (the typical case), but can violate monotonicity when unpaired
326
+ * entries interleave with paired ones in the same run.
327
+ *
328
+ * Greedy assignment: the first del in document order wins its best ins.
329
+ * Suboptimal vs Hungarian on edge cases (two dels above threshold for
330
+ * the same ins), but bounded — a losing del just emits as a full delete
331
+ * rather than a content edit.
332
+ */
333
+ function pairSimilarUnmatched(alignment, threshold, similarity) {
334
+ const pairs = /* @__PURE__ */ new Map();
335
+ let i = 0;
336
+ while (i < alignment.length) {
337
+ if (alignment[i].oldIdx !== null && alignment[i].newIdx !== null) {
338
+ i++;
339
+ continue;
340
+ }
341
+ const runStart = i;
342
+ while (i < alignment.length && alignment[i].oldIdx === null !== (alignment[i].newIdx === null)) i++;
343
+ const runEnd = i;
344
+ const delIndices = [];
345
+ const insIndices = [];
346
+ for (let k = runStart; k < runEnd; k++) if (alignment[k].oldIdx !== null) delIndices.push(k);
347
+ else insIndices.push(k);
348
+ const usedIns = /* @__PURE__ */ new Set();
349
+ for (const di of delIndices) {
350
+ let bestIi = -1;
351
+ let bestSim = threshold;
352
+ for (const ii of insIndices) {
353
+ if (usedIns.has(ii)) continue;
354
+ const sim = similarity(alignment[di].oldIdx, alignment[ii].newIdx);
355
+ if (sim > bestSim) {
356
+ bestSim = sim;
357
+ bestIi = ii;
358
+ }
359
+ }
360
+ if (bestIi >= 0) {
361
+ pairs.set(di, bestIi);
362
+ usedIns.add(bestIi);
363
+ }
364
+ }
365
+ }
366
+ const insToDel = /* @__PURE__ */ new Map();
367
+ for (const [delAi, insAi] of pairs) insToDel.set(insAi, delAi);
368
+ const pairedDels = new Set(pairs.keys());
369
+ const result = [];
370
+ for (let k = 0; k < alignment.length; k++) {
371
+ if (pairedDels.has(k)) continue;
372
+ if (insToDel.has(k)) {
373
+ const delAi = insToDel.get(k);
374
+ result.push({
375
+ oldIdx: alignment[delAi].oldIdx,
376
+ newIdx: alignment[k].newIdx
377
+ });
378
+ } else result.push(alignment[k]);
379
+ }
380
+ return result;
381
+ }
382
+ /**
383
+ * Reorders the alignment so a cursor-based emission walking the new
384
+ * sequence in order produces entries in their visually-correct
385
+ * position. Each entry is assigned a fractional "position" in new's
386
+ * flow:
387
+ *
388
+ * • Preserved/paired (oldIdx, newIdx): position = newIdx.
389
+ * • Pure insert (null, newIdx): position = newIdx.
390
+ * • Pure delete (oldIdx, null): position = newIdx-of-preserved-just-
391
+ * before-this-oldIdx + 0.5. Dels at the same gap sort by oldIdx so
392
+ * they appear in old's source order. The +0.5 places dels BEFORE
393
+ * any insert at the same gap (insert at newIdx N1+1 has position
394
+ * N1+1 which is > N1+0.5), giving the natural "delete first, insert
395
+ * second" reading order at a replaced position.
396
+ *
397
+ * Handles the full range:
398
+ * • Run of unpaired dels at the start (no preserved predecessor):
399
+ * position -0.5, sorted by oldIdx.
400
+ * • Dels in the middle: positioned right after their preceding
401
+ * preserved entry.
402
+ * • Dels at the end (no preserved successor): positioned after the
403
+ * last preserved entry.
404
+ *
405
+ * Without this reordering, a run of unpaired deletes ahead of any
406
+ * preserved entry would be emitted before the first preserved entry,
407
+ * regardless of where they originated in old.
408
+ *
409
+ * NB: `0.5` is the ONLY fractional offset used. If another decoration
410
+ * kind ever needs a fractional position too, redesign this scheme
411
+ * (e.g. a discrete `(integerSlot, kind, secondary)` triple) rather than
412
+ * picking another magic offset and hoping it doesn't collide.
413
+ */
414
+ function orderAlignmentForEmission(alignment) {
415
+ const preserved = [];
416
+ for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) preserved.push({
417
+ oldIdx: a.oldIdx,
418
+ newIdx: a.newIdx
419
+ });
420
+ preserved.sort((a, b) => a.oldIdx - b.oldIdx);
421
+ function newIdxOfPreservedBefore(oldIdx) {
422
+ let result = -1;
423
+ for (const p of preserved) {
424
+ if (p.oldIdx >= oldIdx) break;
425
+ result = p.newIdx;
426
+ }
427
+ return result;
428
+ }
429
+ const decorated = alignment.map((a, i) => {
430
+ let primary;
431
+ let secondary;
432
+ if (a.newIdx !== null) {
433
+ primary = a.newIdx;
434
+ secondary = a.oldIdx === null ? 1 : 0;
435
+ } else {
436
+ primary = newIdxOfPreservedBefore(a.oldIdx) + .5;
437
+ secondary = a.oldIdx;
438
+ }
439
+ return {
440
+ entry: a,
441
+ primary,
442
+ secondary,
443
+ originalIdx: i
444
+ };
445
+ });
446
+ decorated.sort((a, b) => {
447
+ if (a.primary !== b.primary) return a.primary - b.primary;
448
+ if (a.secondary !== b.secondary) return a.secondary - b.secondary;
449
+ return a.originalIdx - b.originalIdx;
450
+ });
451
+ return decorated.map((d) => d.entry);
452
+ }
453
+ /**
454
+ * Combined similarity metric used for fuzzy pairing. Returns the MAX of
455
+ * two complementary metrics:
456
+ *
457
+ * 1. **Character prefix+suffix similarity** — fraction of the longer
458
+ * string covered by shared prefix + shared suffix. Catches small
459
+ * edits in the middle of a string (one word changed). Misses cases
460
+ * where the bulk of common content is in the middle and the ends
461
+ * differ.
462
+ *
463
+ * 2. **Token Jaccard similarity** — intersection-over-union of the
464
+ * whitespace-split tokens. Catches "most of the content is the
465
+ * same but bookended by different bits" — e.g. an edit where the
466
+ * ~50 chars in the middle that DO match would be invisible to
467
+ * prefix+suffix.
468
+ *
469
+ * Either metric exceeding the threshold means pair. Neither alone is
470
+ * sufficient for the full range of legal-doc edits we see in
471
+ * production tables.
472
+ */
473
+ function textSimilarity(a, b) {
474
+ if (a === b) return 1;
475
+ if (a.length === 0 || b.length === 0) return 0;
476
+ return Math.max(charPrefixSuffixSimilarity(a, b), tokenJaccardSimilarity(a, b));
477
+ }
478
+ function charPrefixSuffixSimilarity(a, b) {
479
+ let prefix = 0;
480
+ const minLen = Math.min(a.length, b.length);
481
+ while (prefix < minLen && a[prefix] === b[prefix]) prefix++;
482
+ let suffix = 0;
483
+ while (suffix < a.length - prefix && suffix < b.length - prefix && a[a.length - 1 - suffix] === b[b.length - 1 - suffix]) suffix++;
484
+ return (prefix + suffix) / Math.max(a.length, b.length);
485
+ }
486
+ function tokenJaccardSimilarity(a, b) {
487
+ const tokensA = new Set(a.split(/\s+/).filter(Boolean));
488
+ const tokensB = new Set(b.split(/\s+/).filter(Boolean));
489
+ if (tokensA.size === 0 && tokensB.size === 0) return 1;
490
+ let intersection = 0;
491
+ for (const t of tokensA) if (tokensB.has(t)) intersection++;
492
+ const union = tokensA.size + tokensB.size - intersection;
493
+ return union === 0 ? 0 : intersection / union;
494
+ }
495
+ //#endregion
496
+ //#region src/HtmlScanner.ts
497
+ /**
498
+ * Parses the opening tag (or comment/CDATA/PI) starting at `i`. Returns
499
+ * the index just past the closing delimiter, or null if the tag is
500
+ * malformed (unterminated). HTML comments, CDATA, processing
501
+ * instructions, and DOCTYPE need their own terminators — a plain
502
+ * `>`-walker would cut a comment like `<!-- a > b -->` at the first
503
+ * inner `>`, treating the rest as text and corrupting downstream
504
+ * offsets. Word-exported HTML routinely emits comments inside tables
505
+ * (conditional comments, OLE markers) so these have to be handled.
506
+ */
507
+ function parseOpeningTagAt(html, i) {
508
+ if (html.startsWith("<!--", i)) {
509
+ const close = html.indexOf("-->", i + 4);
510
+ return close === -1 ? null : { end: close + 3 };
511
+ }
512
+ if (html.startsWith("<![CDATA[", i)) {
513
+ const close = html.indexOf("]]>", i + 9);
514
+ return close === -1 ? null : { end: close + 3 };
515
+ }
516
+ if (html.startsWith("<?", i)) {
517
+ const close = html.indexOf("?>", i + 2);
518
+ return close === -1 ? null : { end: close + 2 };
519
+ }
520
+ let j = i + 1;
521
+ let quote = null;
522
+ while (j < html.length) {
523
+ const ch = html[j];
524
+ if (quote) {
525
+ if (ch === quote) quote = null;
526
+ } else if (ch === "\"" || ch === "'") quote = ch;
527
+ else if (ch === ">") return { end: j + 1 };
528
+ j++;
529
+ }
530
+ return null;
531
+ }
532
+ function matchesTagAt(html, i, tagName) {
533
+ if (html[i] !== "<") return false;
534
+ if (html.slice(i + 1, i + 1 + tagName.length).toLowerCase() !== tagName) return false;
535
+ const after = html[i + 1 + tagName.length];
536
+ return after === ">" || after === " " || after === " " || after === "\n" || after === "\r" || after === "/";
537
+ }
538
+ function matchesClosingTagAt(html, i, tagName) {
539
+ if (html[i] !== "<" || html[i + 1] !== "/") return false;
540
+ if (html.slice(i + 2, i + 2 + tagName.length).toLowerCase() !== tagName) return false;
541
+ const after = html[i + 2 + tagName.length];
542
+ return after === ">" || after === " " || after === " " || after === "\n" || after === "\r";
543
+ }
544
+ /**
545
+ * Returns the index just past the matching `</tagName>`, accounting for
546
+ * nested tags of the same name. Returns -1 if no match before `limit`.
547
+ */
548
+ function findMatchingClosingTag(html, from, tagName, limit = html.length) {
549
+ let depth = 1;
550
+ let i = from;
551
+ while (i < limit) if (matchesTagAt(html, i, tagName)) {
552
+ const opening = parseOpeningTagAt(html, i);
553
+ if (!opening) {
554
+ i++;
555
+ continue;
556
+ }
557
+ if (!html.slice(i, opening.end).endsWith("/>")) depth++;
558
+ i = opening.end;
559
+ } else if (matchesClosingTagAt(html, i, tagName)) {
560
+ depth--;
561
+ const closingEnd = parseOpeningTagAt(html, i)?.end ?? i + `</${tagName}>`.length;
562
+ if (depth === 0) return closingEnd;
563
+ i = closingEnd;
564
+ } else i++;
565
+ return -1;
566
+ }
567
+ /**
568
+ * Returns the opening tag with the given class injected. Locates the real
569
+ * `class` attribute via attribute-aware walking (NOT a flat regex — that
570
+ * would mis-match inside a foreign attribute value like
571
+ * `title="see class='x'"`). When the class already partially overlaps with
572
+ * `cls` — e.g. existing `class="mod"` and we're injecting `mod colspan` —
573
+ * only the missing tokens get appended, so we never end up with
574
+ * `class="mod mod colspan"`.
575
+ */
576
+ function injectClass(openingTag, cls) {
577
+ const clsTokens = cls.split(/\s+/).filter(Boolean);
578
+ if (clsTokens.length === 0) return openingTag;
579
+ const classAttr = findClassAttribute(openingTag);
580
+ if (classAttr) {
581
+ const existingTokens = classAttr.value.split(/\s+/).filter(Boolean);
582
+ const missing = clsTokens.filter((t) => !existingTokens.includes(t));
583
+ if (missing.length === 0) return openingTag;
584
+ const updatedValue = existingTokens.length === 0 ? missing.join(" ") : `${existingTokens.join(" ")} ${missing.join(" ")}`;
585
+ return openingTag.slice(0, classAttr.valueStart) + updatedValue + openingTag.slice(classAttr.valueEnd);
586
+ }
587
+ const insertAt = openingTag.endsWith("/>") ? openingTag.length - 2 : openingTag.length - 1;
588
+ return `${openingTag.slice(0, insertAt).replace(/\s*$/, "")} class='${cls}'${openingTag.slice(insertAt)}`;
589
+ }
590
+ /**
591
+ * Walks the opening tag's attributes (respecting quoted values) to find
592
+ * the actual `class` attribute. Returns the value range (start/end of the
593
+ * value content, *excluding* the surrounding quotes) and the value, or
594
+ * null if no `class` attribute is present.
595
+ */
596
+ function findClassAttribute(openingTag) {
597
+ let i = 1;
598
+ while (i < openingTag.length && /[A-Za-z0-9_:-]/.test(openingTag[i])) i++;
599
+ while (i < openingTag.length) {
600
+ while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
601
+ if (i >= openingTag.length) break;
602
+ if (openingTag[i] === ">" || openingTag[i] === "/") break;
603
+ const nameStart = i;
604
+ while (i < openingTag.length && !/[\s=>/]/.test(openingTag[i])) i++;
605
+ const name = openingTag.slice(nameStart, i);
606
+ while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
607
+ if (openingTag[i] !== "=") continue;
608
+ i++;
609
+ while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
610
+ let valueStart;
611
+ let valueEnd;
612
+ if (openingTag[i] === "\"" || openingTag[i] === "'") {
613
+ const quote = openingTag[i];
614
+ i++;
615
+ valueStart = i;
616
+ while (i < openingTag.length && openingTag[i] !== quote) i++;
617
+ valueEnd = i;
618
+ if (i < openingTag.length) i++;
619
+ } else {
620
+ valueStart = i;
621
+ while (i < openingTag.length && !/[\s>/]/.test(openingTag[i])) i++;
622
+ valueEnd = i;
623
+ }
624
+ if (name.toLowerCase() === "class") return {
625
+ valueStart,
626
+ valueEnd,
627
+ value: openingTag.slice(valueStart, valueEnd)
628
+ };
629
+ }
630
+ return null;
631
+ }
204
632
  //#endregion
205
633
  //#region src/TableDiff.ts
206
634
  const PLACEHOLDER_PREFIX_BASE = "<!--HTMLDIFF_TABLE_";
207
- const PLACEHOLDER_SUFFIX = "-->";
208
635
  /**
209
636
  * Hard cap on table dimensions handled by the structural-aware path.
210
637
  * The row-LCS is O(rows²), the per-row cell-LCS is O(cells²), and each
@@ -217,11 +644,16 @@ const PLACEHOLDER_SUFFIX = "-->";
217
644
  const MAX_TABLE_ROWS = 1500;
218
645
  const MAX_TABLE_CELLS_PER_ROW = 200;
219
646
  const MAX_COLUMN_DELTA = 6;
220
- const MAX_COLUMN_SEARCH_WIDTH = 40;
221
- function makePlaceholderPrefix(oldHtml, newHtml) {
647
+ const MAX_COLUMN_SEARCH_WIDTH = 200;
648
+ /**
649
+ * Generate a placeholder-prefix nonce that doesn't collide with any
650
+ * existing content in the inputs. Variadic so callers with N inputs
651
+ * (e.g. three-way diff with V1/V2/V3) check across all of them.
652
+ */
653
+ function makePlaceholderPrefix(...inputs) {
222
654
  for (let attempt = 0; attempt < 8; attempt++) {
223
655
  const prefix = `${PLACEHOLDER_PREFIX_BASE}${Math.floor(Math.random() * 4294967295).toString(16).padStart(8, "0")}_`;
224
- if (!oldHtml.includes(prefix) && !newHtml.includes(prefix)) return prefix;
656
+ if (inputs.every((input) => !input.includes(prefix))) return prefix;
225
657
  }
226
658
  return `${PLACEHOLDER_PREFIX_BASE}fallback_${Date.now()}_`;
227
659
  }
@@ -248,7 +680,7 @@ function preprocessTables(oldHtml, newHtml, diffCell) {
248
680
  const placeholderPrefix = makePlaceholderPrefix(oldHtml, newHtml);
249
681
  const placeholderToDiff = /* @__PURE__ */ new Map();
250
682
  for (let i = pairs.length - 1; i >= 0; i--) {
251
- const placeholder = `${placeholderPrefix}${i}${PLACEHOLDER_SUFFIX}`;
683
+ const placeholder = `${placeholderPrefix}${i}-->`;
252
684
  placeholderToDiff.set(placeholder, pairs[i].diffed);
253
685
  modifiedOld = spliceString(modifiedOld, pairs[i].oldTable.tableStart, pairs[i].oldTable.tableEnd, placeholder);
254
686
  modifiedNew = spliceString(modifiedNew, pairs[i].newTable.tableStart, pairs[i].newTable.tableEnd, placeholder);
@@ -428,72 +860,6 @@ function diffStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, diff
428
860
  out.push(newHtml.slice(cursor, newTable.tableEnd));
429
861
  return out.join("");
430
862
  }
431
- /**
432
- * Reorders the alignment so emission produces rows in the visually-
433
- * correct order. Each entry is assigned a fractional "position" in
434
- * new's flow:
435
- *
436
- * • Preserved/paired (oldIdx, newIdx): position = newIdx.
437
- * • Pure insert (null, newIdx): position = newIdx.
438
- * • Pure delete (oldIdx, null): position = newIdx-of-preserved-just-
439
- * before-this-oldIdx + 0.5. Dels at the same gap sort by oldIdx so
440
- * they appear in old's row order. The +0.5 places dels BEFORE any
441
- * insert at the same gap (insert at newIdx N1+1 has position N1+1
442
- * which is > N1+0.5), giving the natural "delete first, insert
443
- * second" reading order at a replaced position.
444
- *
445
- * This handles the full range:
446
- * • Run of unpaired dels at the start (no preserved predecessor):
447
- * position -0.5, sorted by oldIdx.
448
- * • Dels in the middle: positioned right after their preceding
449
- * preserved row.
450
- * • Dels at the end (no preserved successor): positioned after the
451
- * last preserved row.
452
- *
453
- * Without this reordering, a run of unpaired deletes at low alignment
454
- * indices got emitted at cursor = first-new-row position — putting
455
- * all deletes before any preserved row in the output, regardless of
456
- * where they came from in old.
457
- */
458
- function orderAlignmentForEmission(alignment) {
459
- const preserved = [];
460
- for (const a of alignment) if (a.oldIdx !== null && a.newIdx !== null) preserved.push({
461
- oldIdx: a.oldIdx,
462
- newIdx: a.newIdx
463
- });
464
- preserved.sort((a, b) => a.oldIdx - b.oldIdx);
465
- function newIdxOfPreservedBefore(oldIdx) {
466
- let result = -1;
467
- for (const p of preserved) {
468
- if (p.oldIdx >= oldIdx) break;
469
- result = p.newIdx;
470
- }
471
- return result;
472
- }
473
- const decorated = alignment.map((a, i) => {
474
- let primary;
475
- let secondary;
476
- if (a.newIdx !== null) {
477
- primary = a.newIdx;
478
- secondary = a.oldIdx === null ? 1 : 0;
479
- } else {
480
- primary = newIdxOfPreservedBefore(a.oldIdx) + .5;
481
- secondary = a.oldIdx;
482
- }
483
- return {
484
- entry: a,
485
- primary,
486
- secondary,
487
- originalIdx: i
488
- };
489
- });
490
- decorated.sort((a, b) => {
491
- if (a.primary !== b.primary) return a.primary - b.primary;
492
- if (a.secondary !== b.secondary) return a.secondary - b.secondary;
493
- return a.originalIdx - b.originalIdx;
494
- });
495
- return decorated.map((d) => d.entry);
496
- }
497
863
  function rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment) {
498
864
  const out = [];
499
865
  out.push(headerSlice(newHtml, newTable, oldHtml, oldTable));
@@ -518,21 +884,21 @@ function diffPreservedRow(oldHtml, newHtml, oldRow, newRow, diffCell) {
518
884
  const delta = newRow.cells.length - oldRow.cells.length;
519
885
  const absDelta = Math.abs(delta);
520
886
  if (absDelta > 0 && absDelta <= MAX_COLUMN_DELTA && Math.max(oldRow.cells.length, newRow.cells.length) <= MAX_COLUMN_SEARCH_WIDTH) {
521
- if (delta > 0) return diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, delta, diffCell);
522
- return diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, -delta, diffCell);
887
+ if (delta > 0) return diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, diffCell);
888
+ return diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, diffCell);
523
889
  }
524
890
  return diffStructurallyAlignedRow(oldHtml, newHtml, oldRow, newRow, diffCell);
525
891
  }
526
892
  /**
527
- * For a row where new has K more cells than old, find the K column
528
- * positions in new where cells were inserted by scanning all C(newCount,
529
- * K) combinations and picking the one that maximises positional content
530
- * similarity with the remaining cells. The inserted cells are emitted
531
- * with diff markers; the rest are aligned positionally with content
532
- * diff for matched pairs.
893
+ * For a row where new has more cells than old, find the column positions
894
+ * in new where cells were inserted by running a monotonic-alignment DP
895
+ * over the cell texts: pick the skip positions that maximise the sum-of-
896
+ * similarities of the unskipped new cells aligned positionally against
897
+ * the old cells. The inserted cells are emitted with diff markers; the
898
+ * rest are aligned positionally with content diff for matched pairs.
533
899
  */
534
- function diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, k, diffCell) {
535
- const insertedPositions = findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml);
900
+ function diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, diffCell) {
901
+ const insertedPositions = findBestColumnInsertPositions(oldRow, newRow, oldHtml, newHtml);
536
902
  const inserted = new Set(insertedPositions);
537
903
  const out = [rowHeaderSlice(newHtml, newRow)];
538
904
  let oldIdx = 0;
@@ -544,8 +910,8 @@ function diffMultiColumnAddRow(oldHtml, newHtml, oldRow, newRow, k, diffCell) {
544
910
  out.push("</tr>");
545
911
  return out.join("");
546
912
  }
547
- function diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, k, diffCell) {
548
- const deletedPositions = findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml);
913
+ function diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, diffCell) {
914
+ const deletedPositions = findBestColumnDeletePositions(oldRow, newRow, oldHtml, newHtml);
549
915
  const deleted = new Set(deletedPositions);
550
916
  const out = [rowHeaderSlice(newHtml, newRow)];
551
917
  let newIdx = 0;
@@ -560,64 +926,15 @@ function diffMultiColumnDeleteRow(oldHtml, newHtml, oldRow, newRow, k, diffCell)
560
926
  out.push("</tr>");
561
927
  return out.join("");
562
928
  }
563
- function findBestColumnInsertPositions(oldRow, newRow, k, oldHtml, newHtml) {
929
+ function findBestColumnInsertPositions(oldRow, newRow, oldHtml, newHtml) {
564
930
  const oldTexts = oldRow.cells.map((c) => cellText(oldHtml, c));
565
931
  const newTexts = newRow.cells.map((c) => cellText(newHtml, c));
566
- let bestPositions = [];
567
- let bestScore = -1;
568
- for (const combo of combinationsOfRange(newRow.cells.length, k)) {
569
- const inserted = new Set(combo);
570
- let score = 0;
571
- let oldIdx = 0;
572
- for (let newIdx = 0; newIdx < newRow.cells.length; newIdx++) {
573
- if (inserted.has(newIdx)) continue;
574
- score += textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
575
- oldIdx++;
576
- }
577
- if (score > bestScore) {
578
- bestScore = score;
579
- bestPositions = combo;
580
- }
581
- }
582
- return bestPositions;
932
+ return findOptimalAlignmentSkips(oldTexts, newTexts, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
583
933
  }
584
- function findBestColumnDeletePositions(oldRow, newRow, k, oldHtml, newHtml) {
934
+ function findBestColumnDeletePositions(oldRow, newRow, oldHtml, newHtml) {
585
935
  const oldTexts = oldRow.cells.map((c) => cellText(oldHtml, c));
586
936
  const newTexts = newRow.cells.map((c) => cellText(newHtml, c));
587
- let bestPositions = [];
588
- let bestScore = -1;
589
- for (const combo of combinationsOfRange(oldRow.cells.length, k)) {
590
- const deleted = new Set(combo);
591
- let score = 0;
592
- let newIdx = 0;
593
- for (let oldIdx = 0; oldIdx < oldRow.cells.length; oldIdx++) {
594
- if (deleted.has(oldIdx)) continue;
595
- score += textSimilarity(oldTexts[oldIdx], newTexts[newIdx]);
596
- newIdx++;
597
- }
598
- if (score > bestScore) {
599
- bestScore = score;
600
- bestPositions = combo;
601
- }
602
- }
603
- return bestPositions;
604
- }
605
- /**
606
- * Yields all sorted-ascending combinations of `k` distinct integers
607
- * from [0, n). Iterative implementation avoids recursion overhead and
608
- * keeps memory at O(k).
609
- */
610
- function* combinationsOfRange(n, k) {
611
- if (k === 0 || k > n) return;
612
- const indices = Array.from({ length: k }, (_, i) => i);
613
- while (true) {
614
- yield indices.slice();
615
- let i = k - 1;
616
- while (i >= 0 && indices[i] === n - k + i) i--;
617
- if (i < 0) return;
618
- indices[i]++;
619
- for (let j = i + 1; j < k; j++) indices[j] = indices[j - 1] + 1;
620
- }
937
+ return findOptimalAlignmentSkips(newTexts, oldTexts, (newIdx, oldIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
621
938
  }
622
939
  /**
623
940
  * Try to align cells by logical column position (sum of colspans). When
@@ -809,253 +1126,44 @@ function emitDiffedCell(oldHtml, newHtml, oldCell, newCell, diffCell) {
809
1126
  }
810
1127
  function rowHeaderSlice(html, row) {
811
1128
  const opening = parseOpeningTagAt(html, row.rowStart);
812
- if (!opening) return "";
813
- if (row.cells.length === 0) return html.slice(row.rowStart, opening.end);
814
- return html.slice(row.rowStart, row.cells[0].cellStart);
815
- }
816
- /** Character-level similarity threshold above which we treat two rows as "the same row, edited". */
817
- const ROW_FUZZY_THRESHOLD = .5;
818
- /**
819
- * Threshold for "this cell is a content-edit of that cell." Tuned the same
820
- * as ROW_FUZZY_THRESHOLD; cells in legal docs that share most of their
821
- * content typically ARE the same logical cell with a body edit, so 0.5
822
- * works for both granularities in practice.
823
- */
824
- const CELL_FUZZY_THRESHOLD = .5;
825
- /**
826
- * After exact LCS, scan the alignment for runs of "old deleted, then new
827
- * inserted" (or vice versa) and pair entries whose content is similar
828
- * enough to be treated as an edit rather than a delete+insert. This keeps
829
- * row-level edits (a typo fix, a single word change) from being shown as
830
- * an entire row vanishing and a new one appearing — matching what users
831
- * expect from a typical track-changes view.
832
- */
833
- function pairSimilarUnmatchedRows(alignment, oldTable, newTable, oldHtml, newHtml) {
834
- const oldTexts = oldTable.rows.map((r) => rowText(oldHtml, r));
835
- const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
836
- return pairSimilarUnmatched(alignment, ROW_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
837
- }
838
- function pairSimilarUnmatchedCells(alignment, oldRow, newRow, oldHtml, newHtml) {
839
- const oldTexts = oldRow.cells.map((c) => cellText(oldHtml, c));
840
- const newTexts = newRow.cells.map((c) => cellText(newHtml, c));
841
- return pairSimilarUnmatched(alignment, CELL_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
842
- }
843
- /**
844
- * Identify pairings inside each unmatched-only run, then build the output
845
- * alignment by walking the original and substituting paired entries at
846
- * the *ins position* (not the del position). This keeps the result
847
- * monotonic in newIdx — critical because the cursor-based emission
848
- * downstream walks new's html in order. Emitting at the del position
849
- * would be fine when del<ins in the alignment array (the typical case),
850
- * but can violate monotonicity when there are mixed unpaired entries in
851
- * between (column-add + row-add together, content-edit + column-add,
852
- * etc.).
853
- *
854
- * Generic over what's being paired — works for both rows (by full row
855
- * content similarity) and cells (by per-cell content similarity).
856
- */
857
- function pairSimilarUnmatched(alignment, threshold, similarity) {
858
- const pairs = /* @__PURE__ */ new Map();
859
- let i = 0;
860
- while (i < alignment.length) {
861
- if (alignment[i].oldIdx !== null && alignment[i].newIdx !== null) {
862
- i++;
863
- continue;
864
- }
865
- const runStart = i;
866
- while (i < alignment.length && alignment[i].oldIdx === null !== (alignment[i].newIdx === null)) i++;
867
- const runEnd = i;
868
- const delIndices = [];
869
- const insIndices = [];
870
- for (let k = runStart; k < runEnd; k++) if (alignment[k].oldIdx !== null) delIndices.push(k);
871
- else insIndices.push(k);
872
- const usedIns = /* @__PURE__ */ new Set();
873
- for (const di of delIndices) {
874
- let bestIi = -1;
875
- let bestSim = threshold;
876
- for (const ii of insIndices) {
877
- if (usedIns.has(ii)) continue;
878
- const sim = similarity(alignment[di].oldIdx, alignment[ii].newIdx);
879
- if (sim > bestSim) {
880
- bestSim = sim;
881
- bestIi = ii;
882
- }
883
- }
884
- if (bestIi >= 0) {
885
- pairs.set(di, bestIi);
886
- usedIns.add(bestIi);
887
- }
888
- }
889
- }
890
- const insToDel = /* @__PURE__ */ new Map();
891
- for (const [delAi, insAi] of pairs) insToDel.set(insAi, delAi);
892
- const pairedDels = new Set(pairs.keys());
893
- const result = [];
894
- for (let k = 0; k < alignment.length; k++) {
895
- if (pairedDels.has(k)) continue;
896
- if (insToDel.has(k)) {
897
- const delAi = insToDel.get(k);
898
- result.push({
899
- oldIdx: alignment[delAi].oldIdx,
900
- newIdx: alignment[k].newIdx
901
- });
902
- } else result.push(alignment[k]);
903
- }
904
- return result;
905
- }
906
- /**
907
- * Combined similarity metric used for both row-level and cell-level
908
- * fuzzy pairing. Returns the MAX of two complementary metrics:
909
- *
910
- * 1. **Character prefix+suffix similarity** — fraction of the longer
911
- * string covered by shared prefix + shared suffix. Catches small
912
- * edits in the middle of a string (one word changed in a row).
913
- * Misses cases where the bulk of common content is in the middle
914
- * and the ends differ.
915
- *
916
- * 2. **Token Jaccard similarity** — intersection-over-union of the
917
- * whitespace-split tokens. Catches "most of the content is the
918
- * same but bookended by different bits" — e.g. a row whose only
919
- * edit is a column added at the start and another at the end,
920
- * where the ~50 chars in the middle that DO match would be
921
- * invisible to prefix+suffix.
922
- *
923
- * Either metric exceeding the threshold means pair. Neither alone is
924
- * sufficient for the full range of legal-doc edits we see in
925
- * production tables.
926
- */
927
- function textSimilarity(a, b) {
928
- if (a === b) return 1;
929
- if (a.length === 0 || b.length === 0) return 0;
930
- return Math.max(charPrefixSuffixSimilarity(a, b), tokenJaccardSimilarity(a, b));
931
- }
932
- function charPrefixSuffixSimilarity(a, b) {
933
- let prefix = 0;
934
- const minLen = Math.min(a.length, b.length);
935
- while (prefix < minLen && a[prefix] === b[prefix]) prefix++;
936
- let suffix = 0;
937
- while (suffix < a.length - prefix && suffix < b.length - prefix && a[a.length - 1 - suffix] === b[b.length - 1 - suffix]) suffix++;
938
- return (prefix + suffix) / Math.max(a.length, b.length);
939
- }
940
- function tokenJaccardSimilarity(a, b) {
941
- const tokensA = new Set(a.split(/\s+/).filter(Boolean));
942
- const tokensB = new Set(b.split(/\s+/).filter(Boolean));
943
- if (tokensA.size === 0 && tokensB.size === 0) return 1;
944
- let intersection = 0;
945
- for (const t of tokensA) if (tokensB.has(t)) intersection++;
946
- const union = tokensA.size + tokensB.size - intersection;
947
- return union === 0 ? 0 : intersection / union;
948
- }
949
- function rowText(html, row) {
950
- const parts = [];
951
- for (const cell of row.cells) parts.push(html.slice(cell.contentStart, cell.contentEnd).replace(/<[^>]+>/g, " "));
952
- return parts.join(" ").replace(/\s+/g, " ").trim().toLowerCase();
953
- }
954
- function cellText(html, cell) {
955
- return html.slice(cell.contentStart, cell.contentEnd).replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
956
- }
957
- /**
958
- * Standard LCS alignment: walks both sequences and emits a list of pairs
959
- * where `(oldIdx, newIdx)` are both set for matching positions, and one
960
- * side is null for an unmatched entry on the other side. Equality uses
961
- * strict ===.
962
- */
963
- function lcsAlign(oldKeys, newKeys) {
964
- const m = oldKeys.length;
965
- const n = newKeys.length;
966
- const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
967
- for (let i = 1; i <= m; i++) for (let j = 1; j <= n; j++) if (oldKeys[i - 1] === newKeys[j - 1]) dp[i][j] = dp[i - 1][j - 1] + 1;
968
- else dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
969
- const result = [];
970
- let i = m;
971
- let j = n;
972
- while (i > 0 || j > 0) if (i > 0 && j > 0 && oldKeys[i - 1] === newKeys[j - 1]) {
973
- result.push({
974
- oldIdx: i - 1,
975
- newIdx: j - 1
976
- });
977
- i--;
978
- j--;
979
- } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
980
- result.push({
981
- oldIdx: null,
982
- newIdx: j - 1
983
- });
984
- j--;
985
- } else {
986
- result.push({
987
- oldIdx: i - 1,
988
- newIdx: null
989
- });
990
- i--;
991
- }
992
- result.reverse();
993
- return result;
994
- }
995
- /**
996
- * Returns the opening tag with the given class injected. Locates the real
997
- * `class` attribute via attribute-aware walking (NOT a flat regex — that
998
- * would mis-match inside a foreign attribute value like
999
- * `title="see class='x'"`). When the class already partially overlaps with
1000
- * `cls` — e.g. existing `class="mod"` and we're injecting `mod colspan` —
1001
- * only the missing tokens get appended, so we never end up with
1002
- * `class="mod mod colspan"`.
1003
- */
1004
- function injectClass(openingTag, cls) {
1005
- const clsTokens = cls.split(/\s+/).filter(Boolean);
1006
- if (clsTokens.length === 0) return openingTag;
1007
- const classAttr = findClassAttribute(openingTag);
1008
- if (classAttr) {
1009
- const existingTokens = classAttr.value.split(/\s+/).filter(Boolean);
1010
- const missing = clsTokens.filter((t) => !existingTokens.includes(t));
1011
- if (missing.length === 0) return openingTag;
1012
- const updatedValue = existingTokens.length === 0 ? missing.join(" ") : `${existingTokens.join(" ")} ${missing.join(" ")}`;
1013
- return openingTag.slice(0, classAttr.valueStart) + updatedValue + openingTag.slice(classAttr.valueEnd);
1014
- }
1015
- const insertAt = openingTag.endsWith("/>") ? openingTag.length - 2 : openingTag.length - 1;
1016
- return `${openingTag.slice(0, insertAt).replace(/\s*$/, "")} class='${cls}'${openingTag.slice(insertAt)}`;
1129
+ if (!opening) return "";
1130
+ if (row.cells.length === 0) return html.slice(row.rowStart, opening.end);
1131
+ return html.slice(row.rowStart, row.cells[0].cellStart);
1017
1132
  }
1133
+ /** Character-level similarity threshold above which we treat two rows as "the same row, edited". */
1134
+ const ROW_FUZZY_THRESHOLD = .5;
1018
1135
  /**
1019
- * Walks the opening tag's attributes (respecting quoted values) to find
1020
- * the actual `class` attribute. Returns the value range (start/end of the
1021
- * value content, *excluding* the surrounding quotes) and the value, or
1022
- * null if no `class` attribute is present.
1136
+ * Threshold for "this cell is a content-edit of that cell." Tuned the same
1137
+ * as ROW_FUZZY_THRESHOLD; cells in legal docs that share most of their
1138
+ * content typically ARE the same logical cell with a body edit, so 0.5
1139
+ * works for both granularities in practice.
1023
1140
  */
1024
- function findClassAttribute(openingTag) {
1025
- let i = 1;
1026
- while (i < openingTag.length && /[A-Za-z0-9_:-]/.test(openingTag[i])) i++;
1027
- while (i < openingTag.length) {
1028
- while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
1029
- if (i >= openingTag.length) break;
1030
- if (openingTag[i] === ">" || openingTag[i] === "/") break;
1031
- const nameStart = i;
1032
- while (i < openingTag.length && !/[\s=>/]/.test(openingTag[i])) i++;
1033
- const name = openingTag.slice(nameStart, i);
1034
- while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
1035
- if (openingTag[i] !== "=") continue;
1036
- i++;
1037
- while (i < openingTag.length && /\s/.test(openingTag[i])) i++;
1038
- let valueStart;
1039
- let valueEnd;
1040
- if (openingTag[i] === "\"" || openingTag[i] === "'") {
1041
- const quote = openingTag[i];
1042
- i++;
1043
- valueStart = i;
1044
- while (i < openingTag.length && openingTag[i] !== quote) i++;
1045
- valueEnd = i;
1046
- if (i < openingTag.length) i++;
1047
- } else {
1048
- valueStart = i;
1049
- while (i < openingTag.length && !/[\s>/]/.test(openingTag[i])) i++;
1050
- valueEnd = i;
1051
- }
1052
- if (name.toLowerCase() === "class") return {
1053
- valueStart,
1054
- valueEnd,
1055
- value: openingTag.slice(valueStart, valueEnd)
1056
- };
1057
- }
1058
- return null;
1141
+ const CELL_FUZZY_THRESHOLD = .5;
1142
+ /**
1143
+ * After exact LCS, scan the alignment for runs of "old deleted, then new
1144
+ * inserted" (or vice versa) and pair entries whose content is similar
1145
+ * enough to be treated as an edit rather than a delete+insert. This keeps
1146
+ * row-level edits (a typo fix, a single word change) from being shown as
1147
+ * an entire row vanishing and a new one appearing — matching what users
1148
+ * expect from a typical track-changes view.
1149
+ */
1150
+ function pairSimilarUnmatchedRows(alignment, oldTable, newTable, oldHtml, newHtml) {
1151
+ const oldTexts = oldTable.rows.map((r) => rowText(oldHtml, r));
1152
+ const newTexts = newTable.rows.map((r) => rowText(newHtml, r));
1153
+ return pairSimilarUnmatched(alignment, ROW_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
1154
+ }
1155
+ function pairSimilarUnmatchedCells(alignment, oldRow, newRow, oldHtml, newHtml) {
1156
+ const oldTexts = oldRow.cells.map((c) => cellText(oldHtml, c));
1157
+ const newTexts = newRow.cells.map((c) => cellText(newHtml, c));
1158
+ return pairSimilarUnmatched(alignment, CELL_FUZZY_THRESHOLD, (oldIdx, newIdx) => textSimilarity(oldTexts[oldIdx], newTexts[newIdx]));
1159
+ }
1160
+ function rowText(html, row) {
1161
+ const parts = [];
1162
+ for (const cell of row.cells) parts.push(html.slice(cell.contentStart, cell.contentEnd).replace(/<[^>]+>/g, " "));
1163
+ return parts.join(" ").replace(/\s+/g, " ").trim().toLowerCase();
1164
+ }
1165
+ function cellText(html, cell) {
1166
+ return html.slice(cell.contentStart, cell.contentEnd).replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
1059
1167
  }
1060
1168
  /**
1061
1169
  * Walks html and returns ranges for every top-level `<table>...</table>`
@@ -1142,65 +1250,529 @@ function findTopLevelCells(html, start, end) {
1142
1250
  else i++;
1143
1251
  return cells;
1144
1252
  }
1145
- function matchesTagAt(html, i, tagName) {
1146
- if (html[i] !== "<") return false;
1147
- if (html.slice(i + 1, i + 1 + tagName.length).toLowerCase() !== tagName) return false;
1148
- const after = html[i + 1 + tagName.length];
1149
- return after === ">" || after === " " || after === " " || after === "\n" || after === "\r" || after === "/";
1253
+ //#endregion
1254
+ //#region src/ThreeWayDiff.ts
1255
+ /**
1256
+ * Builds the attributed segment stream for a three-way diff.
1257
+ *
1258
+ * @param dCp analysis of diff(genesis → cp-latest)
1259
+ * @param dMe analysis of diff(genesis → me-current)
1260
+ *
1261
+ * Both analyses must share the same `oldDiffWords` (the genesis tokens)
1262
+ * — the caller guarantees this by passing the same genesis input and
1263
+ * the same `useProjections` decision to both `HtmlDiff.analyze` calls.
1264
+ */
1265
+ function buildSegments(dCp, dMe) {
1266
+ const genesisLen = dCp.oldDiffWords.length;
1267
+ const cpFate = buildFateFromGenesis(dCp.operations, genesisLen);
1268
+ const meFate = buildFateFromGenesis(dMe.operations, genesisLen);
1269
+ const cpInsAt = collectInsertionsKeyedByEnd(dCp);
1270
+ const meInsAt = collectInsertionsKeyedByEnd(dMe);
1271
+ const diffToOriginal = dCp.oldContentToOriginal ?? Array.from({ length: genesisLen }, (_, i) => i);
1272
+ const genesisOriginalLen = dCp.oldOriginalWords.length;
1273
+ const segments = [];
1274
+ let originalCursor = 0;
1275
+ emitBoundary(0, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments);
1276
+ for (let i = 0; i < genesisLen; i++) {
1277
+ const cpDel = cpFate[i] === "deleted";
1278
+ const meDel = meFate[i] === "deleted";
1279
+ const origIdx = diffToOriginal[i];
1280
+ const slice = dCp.oldOriginalWords.slice(originalCursor, origIdx + 1);
1281
+ originalCursor = origIdx + 1;
1282
+ if (!cpDel && !meDel) appendSegment(segments, { kind: "equal" }, slice);
1283
+ else if (cpDel && meDel) {
1284
+ if (slice.length > 1) appendSegment(segments, { kind: "equal" }, slice.slice(0, slice.length - 1));
1285
+ } else if (cpDel) appendSegment(segments, {
1286
+ kind: "del",
1287
+ author: "cp"
1288
+ }, slice);
1289
+ else appendSegment(segments, {
1290
+ kind: "del",
1291
+ author: "me"
1292
+ }, slice);
1293
+ emitBoundary(i + 1, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments);
1294
+ }
1295
+ if (originalCursor < genesisOriginalLen) appendSegment(segments, { kind: "equal" }, dCp.oldOriginalWords.slice(originalCursor));
1296
+ return segments;
1150
1297
  }
1151
- function matchesClosingTagAt(html, i, tagName) {
1152
- if (html[i] !== "<" || html[i + 1] !== "/") return false;
1153
- if (html.slice(i + 2, i + 2 + tagName.length).toLowerCase() !== tagName) return false;
1154
- const after = html[i + 2 + tagName.length];
1155
- return after === ">" || after === " " || after === " " || after === "\n" || after === "\r";
1298
+ /**
1299
+ * Per genesis-diff-index, what did this side do to that token? Both
1300
+ * Delete and Replace ops remove the token from the side's output, so
1301
+ * both contribute `'deleted'`. Equal ops contribute `'kept'`. Insert
1302
+ * ops have an empty old range, so they don't touch the genesis fate
1303
+ * map.
1304
+ */
1305
+ function buildFateFromGenesis(ops, genesisLen) {
1306
+ const out = new Array(genesisLen).fill("kept");
1307
+ for (const op of ops) {
1308
+ if (op.action !== 1 && op.action !== 4) continue;
1309
+ for (let i = op.startInOld; i < op.endInOld; i++) if (i >= 0 && i < genesisLen) out[i] = "deleted";
1310
+ }
1311
+ return out;
1156
1312
  }
1157
- function parseOpeningTagAt(html, i) {
1158
- if (html.startsWith("<!--", i)) {
1159
- const close = html.indexOf("-->", i + 4);
1160
- return close === -1 ? null : { end: close + 3 };
1313
+ /**
1314
+ * Per genesis boundary `b`, collect tokens this side inserted at that
1315
+ * boundary. Keyed by `endInOld` so a Replace at genesis[k..k+1] has its
1316
+ * insertion at boundary k+1 (after the deleted token) rather than k
1317
+ * (before) — that produces the del-then-ins visual order.
1318
+ *
1319
+ * For pure Insert ops the old range is empty (endInOld == startInOld),
1320
+ * so the key is the same as the semantic between-tokens position.
1321
+ */
1322
+ function collectInsertionsKeyedByEnd(d) {
1323
+ const out = /* @__PURE__ */ new Map();
1324
+ for (const op of d.operations) {
1325
+ if (op.action !== 2 && op.action !== 4) continue;
1326
+ const words = d.newDiffWords.slice(op.startInNew, op.endInNew);
1327
+ if (words.length === 0) continue;
1328
+ const key = op.endInOld;
1329
+ const existing = out.get(key) ?? [];
1330
+ existing.push(...words);
1331
+ out.set(key, existing);
1332
+ }
1333
+ return out;
1334
+ }
1335
+ /**
1336
+ * Emit any insertions at boundary `b`. When both authors inserted at
1337
+ * the same boundary AND the inserted token sequences are textually
1338
+ * identical, the insertion is treated as agreed and emitted unmarked.
1339
+ * Otherwise each side's insertion is emitted with author attribution.
1340
+ *
1341
+ * The CP-then-Me ordering for disagreement is arbitrary but consistent;
1342
+ * callers don't depend on it.
1343
+ */
1344
+ function emitBoundary(b, cpInsAt, meInsAt, _cpDiffWords, _meDiffWords, segments) {
1345
+ const cpIns = cpInsAt.get(b);
1346
+ const meIns = meInsAt.get(b);
1347
+ const hasCp = !!cpIns && cpIns.length > 0;
1348
+ const hasMe = !!meIns && meIns.length > 0;
1349
+ if (!hasCp && !hasMe) return;
1350
+ if (hasCp && hasMe && tokenArraysEqual(cpIns, meIns)) {
1351
+ appendSegment(segments, { kind: "equal" }, cpIns);
1352
+ return;
1353
+ }
1354
+ if (hasCp) appendSegment(segments, {
1355
+ kind: "ins",
1356
+ author: "cp"
1357
+ }, cpIns);
1358
+ if (hasMe) appendSegment(segments, {
1359
+ kind: "ins",
1360
+ author: "me"
1361
+ }, meIns);
1362
+ }
1363
+ function tokenArraysEqual(a, b) {
1364
+ if (a.length !== b.length) return false;
1365
+ for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
1366
+ return true;
1367
+ }
1368
+ function appendSegment(segments, attr, words) {
1369
+ if (words.length === 0) return;
1370
+ const last = segments[segments.length - 1];
1371
+ if (last && sameAttribution(last.attr, attr)) {
1372
+ last.words.push(...words);
1373
+ return;
1161
1374
  }
1162
- if (html.startsWith("<![CDATA[", i)) {
1163
- const close = html.indexOf("]]>", i + 9);
1164
- return close === -1 ? null : { end: close + 3 };
1375
+ segments.push({
1376
+ attr,
1377
+ words: [...words]
1378
+ });
1379
+ }
1380
+ function sameAttribution(a, b) {
1381
+ if (a.kind === "equal" && b.kind === "equal") return true;
1382
+ if (a.kind === "ins" && b.kind === "ins") return a.author === b.author;
1383
+ if (a.kind === "del" && b.kind === "del") return a.author === b.author;
1384
+ return false;
1385
+ }
1386
+ /**
1387
+ * Build the `WrapMetadata` for an attribution. Single source of truth
1388
+ * for author-class / data-attr shape so the three emission paths
1389
+ * (word-level, table-level full-row/cell, multi-table whole-table
1390
+ * pre-wrap) stay consistent. A change here propagates to every author
1391
+ * marker in the output.
1392
+ */
1393
+ function authorAttribution(author) {
1394
+ return {
1395
+ extraClasses: author,
1396
+ dataAttrs: { author }
1397
+ };
1398
+ }
1399
+ /**
1400
+ * Resolve a segment's attribution into the wrapper-tag, base CSS class,
1401
+ * and `WrapMetadata` consumed by `Utils.wrapText` / `insertTag`. The
1402
+ * caller is `HtmlDiff.executeThreeWay`'s emission loop.
1403
+ *
1404
+ * `equal` segments don't go through this — they're emitted unmarked.
1405
+ */
1406
+ function segmentEmissionShape(attr) {
1407
+ return {
1408
+ tag: attr.kind,
1409
+ baseClass: attr.kind === "ins" ? "diffins" : "diffdel",
1410
+ metadata: authorAttribution(attr.author)
1411
+ };
1412
+ }
1413
+ //#endregion
1414
+ //#region src/ThreeWayTable.ts
1415
+ function preprocessTablesThreeWay(genesis, cpLatest, meCurrent, cellDiff) {
1416
+ const gTables = findTopLevelTables(genesis);
1417
+ const cTables = findTopLevelTables(cpLatest);
1418
+ const mTables = findTopLevelTables(meCurrent);
1419
+ if (gTables.length === 0 && cTables.length === 0 && mTables.length === 0) return null;
1420
+ for (const t of gTables) if (exceedsSizeLimit(t)) return null;
1421
+ for (const t of cTables) if (exceedsSizeLimit(t)) return null;
1422
+ for (const t of mTables) if (exceedsSizeLimit(t)) return null;
1423
+ const placeholderPrefix = makePlaceholderPrefix(genesis, cpLatest, meCurrent);
1424
+ if (positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables)) return preprocessAlignedByPosition(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix);
1425
+ return preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix);
1426
+ }
1427
+ function preprocessAlignedByPosition(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix) {
1428
+ const pairs = [];
1429
+ for (let i = 0; i < gTables.length; i++) pairs.push({
1430
+ g: gTables[i],
1431
+ c: cTables[i],
1432
+ m: mTables[i],
1433
+ diffed: diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[i], cTables[i], mTables[i], cellDiff)
1434
+ });
1435
+ let modifiedGenesis = genesis;
1436
+ let modifiedCp = cpLatest;
1437
+ let modifiedMe = meCurrent;
1438
+ const placeholderToDiff = /* @__PURE__ */ new Map();
1439
+ for (let i = pairs.length - 1; i >= 0; i--) {
1440
+ const placeholder = `${placeholderPrefix}${i}-->`;
1441
+ placeholderToDiff.set(placeholder, pairs[i].diffed);
1442
+ modifiedGenesis = spliceString(modifiedGenesis, pairs[i].g.tableStart, pairs[i].g.tableEnd, placeholder);
1443
+ modifiedCp = spliceString(modifiedCp, pairs[i].c.tableStart, pairs[i].c.tableEnd, placeholder);
1444
+ modifiedMe = spliceString(modifiedMe, pairs[i].m.tableStart, pairs[i].m.tableEnd, placeholder);
1165
1445
  }
1166
- if (html.startsWith("<?", i)) {
1167
- const close = html.indexOf("?>", i + 2);
1168
- return close === -1 ? null : { end: close + 2 };
1446
+ return {
1447
+ modifiedGenesis,
1448
+ modifiedCp,
1449
+ modifiedMe,
1450
+ placeholderToDiff
1451
+ };
1452
+ }
1453
+ /**
1454
+ * Multi-table handler. Tables are paired against `genesis` (the spine)
1455
+ * via content-LCS on each of cp and me. Placeholders are assigned so
1456
+ * each appears only in the inputs that actually contain the underlying
1457
+ * table. The word-level merger then attributes them naturally:
1458
+ *
1459
+ * - paired in genesis+cp+me → equal in both diffs → emit recursive 3-way diff
1460
+ * - in cp+me, not in genesis → both-agree insertion → emit plain
1461
+ * - in cp only → cp insertion → ins-cp wrapper (Me didn't take it)
1462
+ * - in me only → me insertion → ins-me wrapper
1463
+ * - in genesis+cp, not me → me deletion → del-me wrapper
1464
+ * - in genesis+me, not cp → cp deletion → del-cp wrapper
1465
+ * - in genesis only → both deleted, settled → silent (placeholder content empty)
1466
+ */
1467
+ function preprocessByContent(genesis, cpLatest, meCurrent, gTables, cTables, mTables, cellDiff, placeholderPrefix) {
1468
+ const gKeys = gTables.map((t) => tableKey(genesis, t));
1469
+ const cKeys = cTables.map((t) => tableKey(cpLatest, t));
1470
+ const mKeys = mTables.map((t) => tableKey(meCurrent, t));
1471
+ const alignCp = lcsAlign(gKeys, cKeys);
1472
+ const alignMe = lcsAlign(gKeys, mKeys);
1473
+ const gToCp = new Array(gTables.length).fill(-1);
1474
+ const cpToG = new Array(cTables.length).fill(-1);
1475
+ for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) {
1476
+ gToCp[a.oldIdx] = a.newIdx;
1477
+ cpToG[a.newIdx] = a.oldIdx;
1478
+ }
1479
+ const gToMe = new Array(gTables.length).fill(-1);
1480
+ const meToG = new Array(mTables.length).fill(-1);
1481
+ for (const a of alignMe) if (a.oldIdx !== null && a.newIdx !== null) {
1482
+ gToMe[a.oldIdx] = a.newIdx;
1483
+ meToG[a.newIdx] = a.oldIdx;
1484
+ }
1485
+ let nextId = 0;
1486
+ const placeholderToDiff = /* @__PURE__ */ new Map();
1487
+ const placeholders = {
1488
+ g: new Array(gTables.length).fill(null),
1489
+ c: new Array(cTables.length).fill(null),
1490
+ m: new Array(mTables.length).fill(null)
1491
+ };
1492
+ const allocate = () => `${placeholderPrefix}${nextId++}-->`;
1493
+ const wrapWhole = (tag, author, tableHtml) => Utils_default.wrapText(tableHtml, tag, `diff${tag}`, authorAttribution(author));
1494
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
1495
+ const cIdx = gToCp[gIdx];
1496
+ const mIdx = gToMe[gIdx];
1497
+ if (cIdx === -1 || mIdx === -1) continue;
1498
+ const placeholder = allocate();
1499
+ placeholderToDiff.set(placeholder, diffTableThreeWay(genesis, cpLatest, meCurrent, gTables[gIdx], cTables[cIdx], mTables[mIdx], cellDiff));
1500
+ placeholders.g[gIdx] = placeholder;
1501
+ placeholders.c[cIdx] = placeholder;
1502
+ placeholders.m[mIdx] = placeholder;
1503
+ }
1504
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
1505
+ if (placeholders.g[gIdx] !== null) continue;
1506
+ const cIdx = gToCp[gIdx];
1507
+ if (cIdx === -1) continue;
1508
+ const placeholder = allocate();
1509
+ placeholderToDiff.set(placeholder, wrapWhole("del", "me", genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd)));
1510
+ placeholders.g[gIdx] = placeholder;
1511
+ placeholders.c[cIdx] = placeholder;
1512
+ }
1513
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
1514
+ if (placeholders.g[gIdx] !== null) continue;
1515
+ const mIdx = gToMe[gIdx];
1516
+ if (mIdx === -1) continue;
1517
+ const placeholder = allocate();
1518
+ placeholderToDiff.set(placeholder, wrapWhole("del", "cp", genesis.slice(gTables[gIdx].tableStart, gTables[gIdx].tableEnd)));
1519
+ placeholders.g[gIdx] = placeholder;
1520
+ placeholders.m[mIdx] = placeholder;
1521
+ }
1522
+ for (let gIdx = 0; gIdx < gTables.length; gIdx++) {
1523
+ if (placeholders.g[gIdx] !== null) continue;
1524
+ const placeholder = allocate();
1525
+ placeholderToDiff.set(placeholder, "");
1526
+ placeholders.g[gIdx] = placeholder;
1527
+ }
1528
+ for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
1529
+ if (placeholders.c[cIdx] !== null) continue;
1530
+ const cText = cKeys[cIdx];
1531
+ let mIdx = -1;
1532
+ for (let candidate = 0; candidate < mTables.length; candidate++) {
1533
+ if (placeholders.m[candidate] !== null) continue;
1534
+ if (meToG[candidate] !== -1) continue;
1535
+ if (mKeys[candidate] === cText) {
1536
+ mIdx = candidate;
1537
+ break;
1538
+ }
1539
+ }
1540
+ if (mIdx === -1) continue;
1541
+ const placeholder = allocate();
1542
+ placeholderToDiff.set(placeholder, cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd));
1543
+ placeholders.c[cIdx] = placeholder;
1544
+ placeholders.m[mIdx] = placeholder;
1545
+ }
1546
+ for (let cIdx = 0; cIdx < cTables.length; cIdx++) {
1547
+ if (placeholders.c[cIdx] !== null) continue;
1548
+ const placeholder = allocate();
1549
+ placeholderToDiff.set(placeholder, wrapWhole("ins", "cp", cpLatest.slice(cTables[cIdx].tableStart, cTables[cIdx].tableEnd)));
1550
+ placeholders.c[cIdx] = placeholder;
1551
+ }
1552
+ for (let mIdx = 0; mIdx < mTables.length; mIdx++) {
1553
+ if (placeholders.m[mIdx] !== null) continue;
1554
+ const placeholder = allocate();
1555
+ placeholderToDiff.set(placeholder, wrapWhole("ins", "me", meCurrent.slice(mTables[mIdx].tableStart, mTables[mIdx].tableEnd)));
1556
+ placeholders.m[mIdx] = placeholder;
1557
+ }
1558
+ let modifiedGenesis = genesis;
1559
+ for (let i = gTables.length - 1; i >= 0; i--) {
1560
+ const p = placeholders.g[i];
1561
+ if (p === null) continue;
1562
+ modifiedGenesis = spliceString(modifiedGenesis, gTables[i].tableStart, gTables[i].tableEnd, p);
1563
+ }
1564
+ let modifiedCp = cpLatest;
1565
+ for (let i = cTables.length - 1; i >= 0; i--) {
1566
+ const p = placeholders.c[i];
1567
+ if (p === null) continue;
1568
+ modifiedCp = spliceString(modifiedCp, cTables[i].tableStart, cTables[i].tableEnd, p);
1569
+ }
1570
+ let modifiedMe = meCurrent;
1571
+ for (let i = mTables.length - 1; i >= 0; i--) {
1572
+ const p = placeholders.m[i];
1573
+ if (p === null) continue;
1574
+ modifiedMe = spliceString(modifiedMe, mTables[i].tableStart, mTables[i].tableEnd, p);
1169
1575
  }
1170
- let j = i + 1;
1171
- let quote = null;
1172
- while (j < html.length) {
1173
- const ch = html[j];
1174
- if (quote) {
1175
- if (ch === quote) quote = null;
1176
- } else if (ch === "\"" || ch === "'") quote = ch;
1177
- else if (ch === ">") return { end: j + 1 };
1178
- j++;
1576
+ return {
1577
+ modifiedGenesis,
1578
+ modifiedCp,
1579
+ modifiedMe,
1580
+ placeholderToDiff
1581
+ };
1582
+ }
1583
+ const POSITIONAL_PAIR_SIMILARITY_THRESHOLD = .5;
1584
+ function positionallyAligned(genesis, cpLatest, meCurrent, gTables, cTables, mTables) {
1585
+ if (gTables.length !== cTables.length || cTables.length !== mTables.length) return false;
1586
+ for (let i = 0; i < gTables.length; i++) {
1587
+ const kG = tableKey(genesis, gTables[i]);
1588
+ const kC = tableKey(cpLatest, cTables[i]);
1589
+ const kM = tableKey(meCurrent, mTables[i]);
1590
+ if (textSimilarity(kG, kC) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
1591
+ if (textSimilarity(kG, kM) < POSITIONAL_PAIR_SIMILARITY_THRESHOLD) return false;
1179
1592
  }
1180
- return null;
1593
+ return true;
1594
+ }
1595
+ function tableKey(html, table) {
1596
+ return html.slice(table.tableStart, table.tableEnd).replace(/\s+/g, " ").trim();
1597
+ }
1598
+ function diffTableThreeWay(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
1599
+ if (sameDimensions(tG, tC) && sameDimensions(tC, tM)) return diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
1600
+ return diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff);
1601
+ }
1602
+ function diffTablePositional(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
1603
+ const out = [];
1604
+ let cursor = tG.tableStart;
1605
+ for (let r = 0; r < tG.rows.length; r++) {
1606
+ const rG = tG.rows[r];
1607
+ const rC = tC.rows[r];
1608
+ const rM = tM.rows[r];
1609
+ for (let c = 0; c < rG.cells.length; c++) {
1610
+ const cG = rG.cells[c];
1611
+ const cC = rC.cells[c];
1612
+ const cM = rM.cells[c];
1613
+ out.push(genesis.slice(cursor, cG.contentStart));
1614
+ out.push(cellDiff(genesis.slice(cG.contentStart, cG.contentEnd), cpLatest.slice(cC.contentStart, cC.contentEnd), meCurrent.slice(cM.contentStart, cM.contentEnd)));
1615
+ cursor = cG.contentEnd;
1616
+ }
1617
+ }
1618
+ out.push(genesis.slice(cursor, tG.tableEnd));
1619
+ return out.join("");
1181
1620
  }
1182
1621
  /**
1183
- * Returns the index just past the matching `</tagName>`, accounting for
1184
- * nested tags of the same name. Returns -1 if no match before `limit`.
1622
+ * Row-level genesis-spine merge for tables with diverging row/cell
1623
+ * counts.
1624
+ *
1625
+ * 1. Align cp rows to genesis rows (alignCp), me rows to genesis rows
1626
+ * (alignMe), each via row-LCS over rowKeys.
1627
+ * 2. Per genesis row: cpFate (kept / deleted), meFate (kept / deleted).
1628
+ * Both kept → recurse cell diff (with structural-change cell handling
1629
+ * falling back to me-attribution Replace per the documented
1630
+ * limitation). One kept, other deleted → emit author-attributed full
1631
+ * row. Both deleted → silent.
1632
+ * 3. Off-spine rows: cp-only inserted rows + me-only inserted rows.
1633
+ * Check for content agreement at the same boundary; agreed
1634
+ * insertions emit plain.
1185
1635
  */
1186
- function findMatchingClosingTag(html, from, tagName, limit = html.length) {
1187
- let depth = 1;
1188
- let i = from;
1189
- while (i < limit) if (matchesTagAt(html, i, tagName)) {
1190
- const opening = parseOpeningTagAt(html, i);
1191
- if (!opening) {
1192
- i++;
1193
- continue;
1636
+ function diffTableStructural(genesis, cpLatest, meCurrent, tG, tC, tM, cellDiff) {
1637
+ const gKeys = tG.rows.map((r) => rowKey(genesis, r));
1638
+ const cKeys = tC.rows.map((r) => rowKey(cpLatest, r));
1639
+ const mKeys = tM.rows.map((r) => rowKey(meCurrent, r));
1640
+ const alignCp = lcsAlign(gKeys, cKeys);
1641
+ const alignMe = lcsAlign(gKeys, mKeys);
1642
+ const gToCp = new Array(tG.rows.length).fill(-1);
1643
+ for (const a of alignCp) if (a.oldIdx !== null && a.newIdx !== null) gToCp[a.oldIdx] = a.newIdx;
1644
+ const gToMe = new Array(tG.rows.length).fill(-1);
1645
+ for (const a of alignMe) if (a.oldIdx !== null && a.newIdx !== null) gToMe[a.oldIdx] = a.newIdx;
1646
+ const cpInsAt = collectInsertedRowsAtBoundary(alignCp, tG.rows.length);
1647
+ const meInsAt = collectInsertedRowsAtBoundary(alignMe, tG.rows.length);
1648
+ const out = [];
1649
+ out.push(tableHeaderSlice(genesis, tG));
1650
+ const emitBoundaryInsertions = (b) => {
1651
+ const cIdxs = cpInsAt.get(b) ?? [];
1652
+ const mIdxs = meInsAt.get(b) ?? [];
1653
+ if (cIdxs.length === 0 && mIdxs.length === 0) return;
1654
+ const remainingMe = new Set(mIdxs);
1655
+ for (const cIdx of cIdxs) {
1656
+ const cText = cKeys[cIdx];
1657
+ let agreedMeIdx;
1658
+ for (const mIdx of remainingMe) if (mKeys[mIdx] === cText) {
1659
+ agreedMeIdx = mIdx;
1660
+ break;
1661
+ }
1662
+ if (agreedMeIdx !== void 0) {
1663
+ remainingMe.delete(agreedMeIdx);
1664
+ out.push(cpLatest.slice(tC.rows[cIdx].rowStart, tC.rows[cIdx].rowEnd));
1665
+ } else out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], "ins", "cp"));
1194
1666
  }
1195
- if (!html.slice(i, opening.end).endsWith("/>")) depth++;
1196
- i = opening.end;
1197
- } else if (matchesClosingTagAt(html, i, tagName)) {
1198
- depth--;
1199
- const closingEnd = parseOpeningTagAt(html, i)?.end ?? i + `</${tagName}>`.length;
1200
- if (depth === 0) return closingEnd;
1201
- i = closingEnd;
1202
- } else i++;
1203
- return -1;
1667
+ for (const mIdx of remainingMe) out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], "ins", "me"));
1668
+ };
1669
+ for (let g = 0; g < tG.rows.length; g++) {
1670
+ emitBoundaryInsertions(g);
1671
+ const cIdx = gToCp[g];
1672
+ const mIdx = gToMe[g];
1673
+ const cpDel = cIdx === -1;
1674
+ const meDel = mIdx === -1;
1675
+ if (!cpDel && !meDel) out.push(emitPreservedRow(genesis, cpLatest, meCurrent, tG.rows[g], tC.rows[cIdx], tM.rows[mIdx], cellDiff));
1676
+ else if (cpDel && meDel) {} else if (cpDel) out.push(emitFullRowAttributed(meCurrent, tM.rows[mIdx], "del", "cp"));
1677
+ else out.push(emitFullRowAttributed(cpLatest, tC.rows[cIdx], "del", "me"));
1678
+ }
1679
+ emitBoundaryInsertions(tG.rows.length);
1680
+ out.push(tableFooterSlice(genesis, tG));
1681
+ return out.join("");
1682
+ }
1683
+ function emitPreservedRow(genesis, cpLatest, meCurrent, rG, rC, rM, cellDiff) {
1684
+ if (rG.cells.length === rC.cells.length && rC.cells.length === rM.cells.length) {
1685
+ const out = [];
1686
+ let cursor = rG.rowStart;
1687
+ for (let c = 0; c < rG.cells.length; c++) {
1688
+ const cG = rG.cells[c];
1689
+ const cC = rC.cells[c];
1690
+ const cM = rM.cells[c];
1691
+ out.push(genesis.slice(cursor, cG.contentStart));
1692
+ out.push(cellDiff(genesis.slice(cG.contentStart, cG.contentEnd), cpLatest.slice(cC.contentStart, cC.contentEnd), meCurrent.slice(cM.contentStart, cM.contentEnd)));
1693
+ cursor = cG.contentEnd;
1694
+ }
1695
+ out.push(genesis.slice(cursor, rG.rowEnd));
1696
+ return out.join("");
1697
+ }
1698
+ return emitFullRowAttributed(genesis, rG, "del", "me") + emitFullRowAttributed(meCurrent, rM, "ins", "me");
1699
+ }
1700
+ /**
1701
+ * Returns map "genesis-row-boundary → list of new-side row indices
1702
+ * inserted at that boundary". Mirrors the word-level boundary collection
1703
+ * but at the row scale.
1704
+ */
1705
+ function collectInsertedRowsAtBoundary(align, genesisRowCount) {
1706
+ const out = /* @__PURE__ */ new Map();
1707
+ let nextGenesisBoundary = genesisRowCount;
1708
+ const pending = [];
1709
+ for (let i = align.length - 1; i >= 0; i--) {
1710
+ const a = align[i];
1711
+ if (a.oldIdx !== null) {
1712
+ if (pending.length > 0) {
1713
+ const existing = out.get(nextGenesisBoundary) ?? [];
1714
+ existing.unshift(...pending.toReversed());
1715
+ out.set(nextGenesisBoundary, existing);
1716
+ pending.length = 0;
1717
+ }
1718
+ nextGenesisBoundary = a.oldIdx;
1719
+ } else if (a.newIdx !== null) pending.push(a.newIdx);
1720
+ }
1721
+ if (pending.length > 0) {
1722
+ const existing = out.get(nextGenesisBoundary) ?? [];
1723
+ existing.unshift(...pending.toReversed());
1724
+ out.set(nextGenesisBoundary, existing);
1725
+ }
1726
+ return out;
1727
+ }
1728
+ function tableHeaderSlice(html, table) {
1729
+ const firstRow = table.rows[0];
1730
+ if (!firstRow) return html.slice(table.tableStart, table.tableEnd - 8);
1731
+ return html.slice(table.tableStart, firstRow.rowStart);
1732
+ }
1733
+ function tableFooterSlice(html, table) {
1734
+ const lastRow = table.rows[table.rows.length - 1];
1735
+ if (!lastRow) return "</table>";
1736
+ return html.slice(lastRow.rowEnd, table.tableEnd);
1737
+ }
1738
+ /**
1739
+ * Emit a row fully attributed to one author. Wraps `<tr>` and each
1740
+ * `<td>` with the author's diffins/diffdel class and `data-author`
1741
+ * attribute; wraps cell content with an inner `<ins>`/`<del>` matching
1742
+ * the word-level emission shape.
1743
+ */
1744
+ function emitFullRowAttributed(html, row, kind, author) {
1745
+ const trOpening = parseOpeningTagAt(html, row.rowStart);
1746
+ if (!trOpening) return html.slice(row.rowStart, row.rowEnd);
1747
+ const out = [injectAuthorAttribution(html.slice(row.rowStart, trOpening.end), kind, author)];
1748
+ let cursor = trOpening.end;
1749
+ for (const cell of row.cells) {
1750
+ out.push(html.slice(cursor, cell.cellStart));
1751
+ out.push(emitFullCellAttributed(html, cell, kind, author));
1752
+ cursor = cell.cellEnd;
1753
+ }
1754
+ out.push(html.slice(cursor, row.rowEnd));
1755
+ return out.join("");
1756
+ }
1757
+ function emitFullCellAttributed(html, cell, kind, author) {
1758
+ const tdOpening = parseOpeningTagAt(html, cell.cellStart);
1759
+ if (!tdOpening) return html.slice(cell.cellStart, cell.cellEnd);
1760
+ const tdWithAttrs = injectAuthorAttribution(html.slice(cell.cellStart, tdOpening.end), kind, author);
1761
+ const innerContent = html.slice(cell.contentStart, cell.contentEnd);
1762
+ const innerWrapped = innerContent.trim().length === 0 ? innerContent : Utils_default.wrapText(innerContent, kind, `diff${kind}`, authorAttribution(author));
1763
+ const closing = html.slice(cell.contentEnd, cell.cellEnd);
1764
+ return tdWithAttrs + innerWrapped + closing;
1765
+ }
1766
+ function injectAuthorAttribution(openingTag, kind, author) {
1767
+ const meta = authorAttribution(author);
1768
+ return injectDataAttrs(injectClass(openingTag, `diff${kind} ${meta.extraClasses}`), meta.dataAttrs ?? {});
1769
+ }
1770
+ function injectDataAttrs(openingTag, dataAttrs) {
1771
+ const keys = Object.keys(dataAttrs);
1772
+ if (keys.length === 0) return openingTag;
1773
+ const attrs = keys.map((k) => ` data-${k}='${dataAttrs[k]}'`).join("");
1774
+ if (openingTag.endsWith("/>")) return `${openingTag.slice(0, -2)}${attrs}/>`;
1775
+ return `${openingTag.slice(0, -1)}${attrs}>`;
1204
1776
  }
1205
1777
  //#endregion
1206
1778
  //#region src/WordSplitter.ts
@@ -1458,10 +2030,20 @@ var HtmlDiff = class HtmlDiff {
1458
2030
  * pathological input.
1459
2031
  */
1460
2032
  static MaxTablePreprocessDepth = 8;
2033
+ /**
2034
+ * Mirror cap for the three-way path. The 2-way `MaxTablePreprocessDepth`
2035
+ * guards the recursion inside `executeWithContext`; the 3-way path has
2036
+ * its own recursion (`executeThreeWay` → `preprocessTablesThreeWay` →
2037
+ * `cellDiff` → `executeThreeWay`) which needs its own guard. Once the
2038
+ * cap is reached, `executeThreeWay` skips table preprocessing and
2039
+ * falls back to the word-level merge — same bail-out semantics as the
2040
+ * 2-way path.
2041
+ */
2042
+ static MaxThreeWayDepth = 8;
1461
2043
  content = [];
1462
2044
  newText;
1463
2045
  oldText;
1464
- tablePreprocessDepth;
2046
+ tablePreprocessDepth = 0;
1465
2047
  specialTagDiffStack = [];
1466
2048
  newWords = [];
1467
2049
  oldWords = [];
@@ -1524,17 +2106,172 @@ var HtmlDiff = class HtmlDiff {
1524
2106
  * Initializes a new instance of the class.
1525
2107
  * @param oldText The old text.
1526
2108
  * @param newText The new text.
1527
- * @param tablePreprocessDepth Internal: nested-call depth for table
1528
- * preprocessing. Callers should leave at default (0); the recursive
1529
- * `diffCell` callback in TableDiff bumps it.
1530
2109
  */
1531
- constructor(oldText, newText, tablePreprocessDepth = 0) {
2110
+ constructor(oldText, newText) {
1532
2111
  this.oldText = oldText;
1533
2112
  this.newText = newText;
1534
- this.tablePreprocessDepth = tablePreprocessDepth;
1535
2113
  }
1536
- static execute(oldText, newText, tablePreprocessDepth = 0) {
1537
- return new HtmlDiff(oldText, newText, tablePreprocessDepth).build();
2114
+ static execute(oldText, newText) {
2115
+ return new HtmlDiff(oldText, newText).build();
2116
+ }
2117
+ /**
2118
+ * Analyse a two-way diff and return its raw building blocks: the word
2119
+ * arrays the diff ran against, the operations produced, the original
2120
+ * (pre-projection) word arrays, and the mappings from diff-index back
2121
+ * to original-word index when structural projection is active.
2122
+ * Consumed by `executeThreeWay` so it can compose two diffs by walking
2123
+ * their Operation streams.
2124
+ *
2125
+ * The caller is expected to coordinate `useProjections` symmetrically
2126
+ * across composed analyses — if V1↔V2 projects but V2↔V3 doesn't,
2127
+ * V2's "new" array in the first analysis won't equal V2's "old" array
2128
+ * in the second. `evaluateProjectionApplicability` exposes the same
2129
+ * heuristic `build()` uses internally, so the orchestrator can compute
2130
+ * a single decision and pass it into every `analyze` call.
2131
+ *
2132
+ * Table preprocessing is skipped here. Placeholders mutate the input
2133
+ * in ways that don't compose across two independent analyses; the
2134
+ * 3-way orchestrator handles tables explicitly before calling analyze.
2135
+ */
2136
+ static analyze(oldText, newText, options = {}) {
2137
+ const inner = new HtmlDiff(oldText, newText);
2138
+ inner.tablePreprocessDepth = HtmlDiff.MaxTablePreprocessDepth;
2139
+ if (options.blockExpressions) for (const expr of options.blockExpressions) inner.addBlockExpression(expr);
2140
+ if (options.repeatingWordsAccuracy !== void 0) inner.repeatingWordsAccuracy = options.repeatingWordsAccuracy;
2141
+ if (options.orphanMatchThreshold !== void 0) inner.orphanMatchThreshold = options.orphanMatchThreshold;
2142
+ if (options.ignoreWhitespaceDifferences !== void 0) inner.ignoreWhitespaceDifferences = options.ignoreWhitespaceDifferences;
2143
+ inner.splitInputsToWords();
2144
+ if (options.useProjections === void 0) inner.buildContentProjections();
2145
+ else if (options.useProjections) {
2146
+ const oldProj = HtmlDiff.createContentProjection(inner.oldWords);
2147
+ const newProj = HtmlDiff.createContentProjection(inner.newWords);
2148
+ if (oldProj.contentWords.length > 0 && newProj.contentWords.length > 0) {
2149
+ inner.oldContentWords = oldProj.contentWords;
2150
+ inner.oldContentToOriginal = oldProj.contentToOriginal;
2151
+ inner.newContentWords = newProj.contentWords;
2152
+ inner.newContentToOriginal = newProj.contentToOriginal;
2153
+ }
2154
+ }
2155
+ const wordsForDiffOld = inner.oldContentWords ?? inner.oldWords;
2156
+ const wordsForDiffNew = inner.newContentWords ?? inner.newWords;
2157
+ inner.matchGranularity = Math.min(HtmlDiff.MatchGranularityMaximum, Math.min(wordsForDiffOld.length, wordsForDiffNew.length));
2158
+ return {
2159
+ oldDiffWords: wordsForDiffOld,
2160
+ newDiffWords: wordsForDiffNew,
2161
+ operations: inner.operations(),
2162
+ oldOriginalWords: inner.oldWords,
2163
+ newOriginalWords: inner.newWords,
2164
+ oldContentToOriginal: inner.oldContentToOriginal,
2165
+ newContentToOriginal: inner.newContentToOriginal
2166
+ };
2167
+ }
2168
+ /**
2169
+ * Whether content-projection (structural-tag normalisation) would
2170
+ * apply to this pair of inputs under `build()`'s default heuristic.
2171
+ * Exposed so composers of multiple analyses can compute a symmetric
2172
+ * decision before calling `analyze` — see `analyze`'s docstring for
2173
+ * why symmetry matters.
2174
+ */
2175
+ static evaluateProjectionApplicability(oldText, newText) {
2176
+ const oldWords = WordSplitter.convertHtmlToListOfWords(oldText, []);
2177
+ const newWords = WordSplitter.convertHtmlToListOfWords(newText, []);
2178
+ if (!HtmlDiff.hasStructuralDifferences(oldWords, newWords)) return false;
2179
+ const oldProj = HtmlDiff.createContentProjection(oldWords);
2180
+ const newProj = HtmlDiff.createContentProjection(newWords);
2181
+ return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj);
2182
+ }
2183
+ /**
2184
+ * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
2185
+ * version CP sent back), and V3 (Me's current draft), produces a
2186
+ * single attributed HTML output where CP's and Me's changes are
2187
+ * distinguished by `data-author` ('cp' or 'me') and matching
2188
+ * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
2189
+ * CP's proposal" case (Me deleted text CP had inserted) gets a
2190
+ * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
2191
+ *
2192
+ * Coordinates the symmetric-projection decision (D1) across both
2193
+ * internal `analyze` calls so V2 tokenises identically on each side
2194
+ * of the spine. When `useProjections` is left undefined, the decision
2195
+ * is the conjunction of both pair-wise heuristics — project iff both
2196
+ * pairs would project on their own. Pass an explicit boolean to
2197
+ * override.
2198
+ */
2199
+ /**
2200
+ * Three-way HTML diff against a shared genesis. Produces attributed
2201
+ * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
2202
+ * from Me's accumulated changes (genesis → meCurrent). Use this for
2203
+ * blackline UX where the negotiation has gone through multiple turns
2204
+ * and the reader wants to see "who proposed what" across the whole
2205
+ * history, not just the most recent round.
2206
+ *
2207
+ * When both parties happen to have made the same change (e.g. CP
2208
+ * proposed a wording change in turn N, Me adopted it in turn N+1),
2209
+ * the change reads as "settled" and is emitted unmarked — only
2210
+ * disagreements and pending proposals carry author attribution.
2211
+ *
2212
+ * @param genesis the shared common ancestor (per-user — the FE
2213
+ * picks between V1.0 and /preview/initialAnswers
2214
+ * based on `prefillReceiverAnswers`)
2215
+ * @param cpLatest the counterparty's current published version
2216
+ * @param meCurrent Me's current draft (the document on screen)
2217
+ */
2218
+ static executeThreeWay(genesis, cpLatest, meCurrent, options = {}) {
2219
+ return HtmlDiff.executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, 0);
2220
+ }
2221
+ static executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, depth) {
2222
+ const tablePreprocess = depth < HtmlDiff.MaxThreeWayDepth ? preprocessTablesThreeWay(genesis, cpLatest, meCurrent, (g, c, m) => HtmlDiff.executeThreeWayWithDepth(g, c, m, options, depth + 1)) : null;
2223
+ const inGenesis = tablePreprocess?.modifiedGenesis ?? genesis;
2224
+ const inCp = tablePreprocess?.modifiedCp ?? cpLatest;
2225
+ const inMe = tablePreprocess?.modifiedMe ?? meCurrent;
2226
+ const analyzeOpts = {
2227
+ useProjections: options.useProjections ?? (HtmlDiff.evaluateProjectionApplicability(inGenesis, inCp) && HtmlDiff.evaluateProjectionApplicability(inGenesis, inMe)),
2228
+ blockExpressions: options.blockExpressions,
2229
+ repeatingWordsAccuracy: options.repeatingWordsAccuracy,
2230
+ orphanMatchThreshold: options.orphanMatchThreshold,
2231
+ ignoreWhitespaceDifferences: options.ignoreWhitespaceDifferences
2232
+ };
2233
+ const dCp = HtmlDiff.analyze(inGenesis, inCp, analyzeOpts);
2234
+ const dMe = HtmlDiff.analyze(inGenesis, inMe, analyzeOpts);
2235
+ if (dCp.oldDiffWords.length !== dMe.oldDiffWords.length) throw new Error(`HtmlDiff.executeThreeWay: genesis tokenisation diverged across pair-wise analyses (${dCp.oldDiffWords.length} vs ${dMe.oldDiffWords.length}). This indicates the symmetric-projection coordination has a bug.`);
2236
+ const segments = buildSegments(dCp, dMe);
2237
+ const merged = HtmlDiff.emitSegments(segments);
2238
+ return tablePreprocess ? restoreTablePlaceholders(merged, tablePreprocess.placeholderToDiff) : merged;
2239
+ }
2240
+ /**
2241
+ * Drives a fresh `HtmlDiff` instance through `insertTag` for ins/del
2242
+ * segments and pushes equal segments straight to its `content`
2243
+ * buffer. Reusing the instance keeps the formatting-tag stack
2244
+ * (`specialTagDiffStack`) coherent across segments — a `<strong>`
2245
+ * opened in one segment and closed in another stays balanced.
2246
+ */
2247
+ static emitSegments(segments) {
2248
+ const emitter = new HtmlDiff("", "");
2249
+ for (const seg of segments) {
2250
+ if (seg.attr.kind === "equal") {
2251
+ emitter.content.push(seg.words.join(""));
2252
+ continue;
2253
+ }
2254
+ const { tag, baseClass, metadata } = segmentEmissionShape(seg.attr);
2255
+ emitter.insertTag(tag, baseClass, [...seg.words], metadata);
2256
+ }
2257
+ if (emitter.specialTagDiffStack.length > 0) throw new Error(`HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} unclosed formatting tag(s) on the stack — input may have unbalanced <strong>/<em>/etc. or there is a bug in segment emission.`);
2258
+ return emitter.content.join("");
2259
+ }
2260
+ /**
2261
+ * Internal entry point used by the table-cell recursion. Constructs an
2262
+ * inner `HtmlDiff`, applies the caller's settings, and bumps the
2263
+ * recursion depth — keeping the public constructor signature clean
2264
+ * while still threading the configuration that's required for cell-
2265
+ * level output to match the top-level call's behaviour.
2266
+ */
2267
+ static executeWithContext(oldText, newText, ctx) {
2268
+ const inner = new HtmlDiff(oldText, newText);
2269
+ inner.tablePreprocessDepth = ctx.depth;
2270
+ for (const expr of ctx.blockExpressions) inner.addBlockExpression(expr);
2271
+ inner.repeatingWordsAccuracy = ctx.repeatingWordsAccuracy;
2272
+ inner.orphanMatchThreshold = ctx.orphanMatchThreshold;
2273
+ inner.ignoreWhitespaceDifferences = ctx.ignoreWhitespaceDifferences;
2274
+ return inner.build();
1538
2275
  }
1539
2276
  /**
1540
2277
  * Builds the HTML diff output
@@ -1542,18 +2279,17 @@ var HtmlDiff = class HtmlDiff {
1542
2279
  */
1543
2280
  build() {
1544
2281
  if (this.oldText === this.newText) return this.newText;
1545
- const blockExpressions = this.blockExpressions;
1546
- const repeatingWordsAccuracy = this.repeatingWordsAccuracy;
1547
- const orphanMatchThreshold = this.orphanMatchThreshold;
1548
- const ignoreWhitespaceDifferences = this.ignoreWhitespaceDifferences;
1549
- const tablePreprocess = this.tablePreprocessDepth >= HtmlDiff.MaxTablePreprocessDepth ? null : preprocessTables(this.oldText, this.newText, (oldCell, newCell) => {
1550
- const inner = new HtmlDiff(oldCell, newCell, this.tablePreprocessDepth + 1);
1551
- for (const expr of blockExpressions) inner.addBlockExpression(expr);
1552
- inner.repeatingWordsAccuracy = repeatingWordsAccuracy;
1553
- inner.orphanMatchThreshold = orphanMatchThreshold;
1554
- inner.ignoreWhitespaceDifferences = ignoreWhitespaceDifferences;
1555
- return inner.build();
1556
- });
2282
+ let tablePreprocess = null;
2283
+ if (this.tablePreprocessDepth < HtmlDiff.MaxTablePreprocessDepth) {
2284
+ const ctx = {
2285
+ depth: this.tablePreprocessDepth + 1,
2286
+ blockExpressions: this.blockExpressions,
2287
+ repeatingWordsAccuracy: this.repeatingWordsAccuracy,
2288
+ orphanMatchThreshold: this.orphanMatchThreshold,
2289
+ ignoreWhitespaceDifferences: this.ignoreWhitespaceDifferences
2290
+ };
2291
+ tablePreprocess = preprocessTables(this.oldText, this.newText, (oldCell, newCell) => HtmlDiff.executeWithContext(oldCell, newCell, ctx));
2292
+ }
1557
2293
  if (tablePreprocess) {
1558
2294
  this.oldText = tablePreprocess.modifiedOld;
1559
2295
  this.newText = tablePreprocess.modifiedNew;
@@ -1763,12 +2499,12 @@ var HtmlDiff = class HtmlDiff {
1763
2499
  * @param words
1764
2500
  * @private
1765
2501
  */
1766
- insertTag(tag, cssClass, words) {
2502
+ insertTag(tag, cssClass, words, metadata) {
1767
2503
  while (true) {
1768
2504
  if (words.length === 0) break;
1769
2505
  const allWordsUntilFirstTag = this.extractConsecutiveWords(words, (x) => !Utils_default.isTag(x));
1770
2506
  if (allWordsUntilFirstTag.length > 0) {
1771
- const text = Utils_default.wrapText(allWordsUntilFirstTag.join(""), tag, cssClass);
2507
+ const text = Utils_default.wrapText(allWordsUntilFirstTag.join(""), tag, cssClass, metadata);
1772
2508
  this.content.push(text);
1773
2509
  }
1774
2510
  if (words.length === 0) break;
@@ -1781,7 +2517,7 @@ var HtmlDiff = class HtmlDiff {
1781
2517
  for (const word of words) if (Utils_default.isTag(word)) tagNames.add(Utils_default.getTagName(word));
1782
2518
  const styledTagNames = Array.from(tagNames).join(" ");
1783
2519
  this.specialTagDiffStack.push(words[0]);
1784
- specialCaseTagInjection = `<ins class='mod ${styledTagNames}'>`;
2520
+ specialCaseTagInjection = `<ins${Utils_default.composeTagAttributes(`mod ${styledTagNames}`, metadata ?? {})}>`;
1785
2521
  if (tag === HtmlDiff.DelTag) {
1786
2522
  words.shift();
1787
2523
  while (words.length > 0 && HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) words.shift();
@@ -1807,7 +2543,7 @@ var HtmlDiff = class HtmlDiff {
1807
2543
  if (specialCaseTagInjectionIsBefore) this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(""));
1808
2544
  else this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join("") + specialCaseTagInjection);
1809
2545
  if (words.length === 0) continue;
1810
- this.insertTag(tag, cssClass, words);
2546
+ this.insertTag(tag, cssClass, words, metadata);
1811
2547
  break;
1812
2548
  }
1813
2549
  }