@createiq/htmldiff 1.1.0-beta.0 → 1.2.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/Utils.ts CHANGED
@@ -32,8 +32,39 @@ export function stripTagAttributes(word: string): string {
32
32
  return word
33
33
  }
34
34
 
35
- export function wrapText(text: string, tagName: string, cssClass: string): string {
36
- return `<${tagName} class='${cssClass}'>${text}</${tagName}>`
35
+ /**
36
+ * Optional metadata attached to a wrapped tag. Used by `executeThreeWay`
37
+ * to colour diff segments with their author (CP vs Me) via extra classes
38
+ * and `data-*` attributes; the two-way path passes nothing and gets the
39
+ * unchanged historical output.
40
+ */
41
+ export interface WrapMetadata {
42
+ /** Space-separated classes appended after `cssClass`. */
43
+ extraClasses?: string
44
+ /** `data-*` attribute map, keyed by the attribute name *without* the `data-` prefix. */
45
+ dataAttrs?: Readonly<Record<string, string>>
46
+ }
47
+
48
+ export function wrapText(text: string, tagName: string, cssClass: string, metadata?: WrapMetadata): string {
49
+ if (!metadata) return `<${tagName} class='${cssClass}'>${text}</${tagName}>`
50
+ return `<${tagName}${composeTagAttributes(cssClass, metadata)}>${text}</${tagName}>`
51
+ }
52
+
53
+ /**
54
+ * Build the attribute portion of an opening tag from a base class plus
55
+ * optional metadata. Exposed so emission paths that build opening-tag
56
+ * fragments by hand (e.g. the formatting-tag special-case in
57
+ * `HtmlDiff.insertTag`) can stay consistent with `wrapText`.
58
+ */
59
+ export function composeTagAttributes(cssClass: string, metadata: WrapMetadata): string {
60
+ const classes = metadata.extraClasses ? `${cssClass} ${metadata.extraClasses}` : cssClass
61
+ let out = ` class='${classes}'`
62
+ if (metadata.dataAttrs) {
63
+ for (const key of Object.keys(metadata.dataAttrs)) {
64
+ out += ` data-${key}='${metadata.dataAttrs[key]}'`
65
+ }
66
+ }
67
+ return out
37
68
  }
38
69
 
39
70
  export function isStartOfTag(val: string): boolean {
@@ -85,6 +116,7 @@ export default {
85
116
  isTag,
86
117
  stripTagAttributes,
87
118
  wrapText,
119
+ composeTagAttributes,
88
120
  isStartOfTag,
89
121
  isEndOfTag,
90
122
  isStartOfEntity,
@@ -0,0 +1,152 @@
1
+ import { describe, expect, it } from 'vitest'
2
+
3
+ import Action from '../src/Action'
4
+ import HtmlDiff from '../src/HtmlDiff'
5
+
6
+ describe('HtmlDiff.analyze', () => {
7
+ describe('return shape', () => {
8
+ it('returns operations indexed into oldDiffWords / newDiffWords', () => {
9
+ const result = HtmlDiff.analyze('a b c', 'a x c')
10
+ expect(result.oldDiffWords).toBeInstanceOf(Array)
11
+ expect(result.newDiffWords).toBeInstanceOf(Array)
12
+ expect(result.operations).toBeInstanceOf(Array)
13
+ // Every op's endInOld must be ≤ oldDiffWords.length, etc.
14
+ for (const op of result.operations) {
15
+ expect(op.endInOld).toBeLessThanOrEqual(result.oldDiffWords.length)
16
+ expect(op.endInNew).toBeLessThanOrEqual(result.newDiffWords.length)
17
+ }
18
+ })
19
+
20
+ it('returns original word arrays alongside the diff arrays', () => {
21
+ const result = HtmlDiff.analyze('hello world', 'hello there world')
22
+ expect(result.oldOriginalWords).toBeInstanceOf(Array)
23
+ expect(result.newOriginalWords).toBeInstanceOf(Array)
24
+ })
25
+
26
+ it('returns null contentToOriginal maps when projections are inactive', () => {
27
+ // Plain text with no structural tags → projection inactive.
28
+ const result = HtmlDiff.analyze('a b c', 'a x c')
29
+ expect(result.oldContentToOriginal).toBeNull()
30
+ expect(result.newContentToOriginal).toBeNull()
31
+ })
32
+
33
+ it('returns non-null contentToOriginal maps when projections are active', () => {
34
+ // Different wrapper tags → projection kicks in.
35
+ const result = HtmlDiff.analyze('<p>hello world</p>', '<div>hello world</div>')
36
+ expect(result.oldContentToOriginal).not.toBeNull()
37
+ expect(result.newContentToOriginal).not.toBeNull()
38
+ })
39
+ })
40
+
41
+ describe('useProjections option', () => {
42
+ it('honours useProjections=false even when the heuristic would project', () => {
43
+ const result = HtmlDiff.analyze('<p>hello world</p>', '<div>hello world</div>', { useProjections: false })
44
+ expect(result.oldContentToOriginal).toBeNull()
45
+ expect(result.newContentToOriginal).toBeNull()
46
+ // Structural tags appear as diff tokens — observable consequence.
47
+ expect(result.oldDiffWords).toContain('<p>')
48
+ expect(result.newDiffWords).toContain('<div>')
49
+ })
50
+
51
+ it('honours useProjections=true even when the heuristic would skip', () => {
52
+ // Same structural tags on both sides → heuristic skips projection.
53
+ // Forcing it should still project (strip the <p> tags from diff space).
54
+ const result = HtmlDiff.analyze('<p>a b c</p>', '<p>a x c</p>', { useProjections: true })
55
+ expect(result.oldContentToOriginal).not.toBeNull()
56
+ expect(result.newContentToOriginal).not.toBeNull()
57
+ // Structural tags removed from diff arrays.
58
+ expect(result.oldDiffWords).not.toContain('<p>')
59
+ })
60
+
61
+ it('keeps projections off when useProjections=true but one side has no content', () => {
62
+ const result = HtmlDiff.analyze('<p></p>', '<p>added</p>', { useProjections: true })
63
+ // Empty-content side disables the forced projection.
64
+ expect(result.oldContentToOriginal).toBeNull()
65
+ expect(result.newContentToOriginal).toBeNull()
66
+ })
67
+ })
68
+
69
+ describe('symmetric V2 tokenisation', () => {
70
+ it('produces an identical V2 diff array across two calls when useProjections matches', () => {
71
+ const v1 = '<p>Hello world.</p>'
72
+ const v2 = '<p>Hello cruel world.</p>'
73
+ const v3 = '<p>Hello cruel world today.</p>'
74
+ const useProjections = false // Force off — both calls agree.
75
+ const d1 = HtmlDiff.analyze(v1, v2, { useProjections })
76
+ const d2 = HtmlDiff.analyze(v2, v3, { useProjections })
77
+ expect(d1.newDiffWords).toEqual(d2.oldDiffWords)
78
+ })
79
+
80
+ it('the V2 arrays diverge when one call projects and the other does not (motivates D1)', () => {
81
+ // Asymmetric structural patterns: V1 has <p>, V3 has <div>; V2 has <p>.
82
+ // V1↔V2 heuristic: no structural diff → no projection.
83
+ // V2↔V3 heuristic: structural diff → project.
84
+ // Result: d1.newDiffWords (raw V2) ≠ d2.oldDiffWords (projected V2).
85
+ const v1 = '<p>Hello world.</p>'
86
+ const v2 = '<p>Hello cruel world.</p>'
87
+ const v3 = '<div>Hello cruel world today.</div>'
88
+ const d1 = HtmlDiff.analyze(v1, v2)
89
+ const d2 = HtmlDiff.analyze(v2, v3)
90
+ // This is the bug that D1's symmetric-decision design exists to prevent.
91
+ expect(d1.newDiffWords).not.toEqual(d2.oldDiffWords)
92
+ })
93
+ })
94
+
95
+ describe('options pass-through', () => {
96
+ it('respects ignoreWhitespaceDifferences', () => {
97
+ // With the flag on, the matcher should consider two-space and
98
+ // single-space as equivalent; without it, they replace.
99
+ const withoutFlag = HtmlDiff.analyze('a b', 'a b')
100
+ const withFlag = HtmlDiff.analyze('a b', 'a b', { ignoreWhitespaceDifferences: true })
101
+ const replaceCount = (r: typeof withFlag) => r.operations.filter(op => op.action === Action.Replace).length
102
+ // Flag off: whitespace difference shows up as a Replace.
103
+ expect(replaceCount(withoutFlag)).toBeGreaterThan(0)
104
+ // Flag on: no Replace, only Equals.
105
+ expect(replaceCount(withFlag)).toBe(0)
106
+ })
107
+
108
+ it('respects blockExpressions', () => {
109
+ // Without the block expression, "01/01/2026" is split into multiple
110
+ // tokens; with it, the whole date is one token (per WordSplitter's
111
+ // BlockFinder contract — uses the `g` flag).
112
+ const dateExpr = /\d{2}\/\d{2}\/\d{4}/g
113
+ const without = HtmlDiff.analyze('on 01/01/2026 here', 'on 02/02/2027 here')
114
+ const withExpr = HtmlDiff.analyze('on 01/01/2026 here', 'on 02/02/2027 here', { blockExpressions: [dateExpr] })
115
+ expect(withExpr.oldDiffWords.length).toBeLessThan(without.oldDiffWords.length)
116
+ })
117
+ })
118
+ })
119
+
120
+ describe('HtmlDiff.evaluateProjectionApplicability', () => {
121
+ it('returns false when structures match', () => {
122
+ expect(HtmlDiff.evaluateProjectionApplicability('<p>a</p>', '<p>b</p>')).toBe(false)
123
+ })
124
+
125
+ it('returns true when structures differ (wrapper rename)', () => {
126
+ expect(HtmlDiff.evaluateProjectionApplicability('<p>a b c</p>', '<div>a b c</div>')).toBe(true)
127
+ })
128
+
129
+ it('returns false when one side has no structural tags at all', () => {
130
+ // Plain text vs wrapped HTML: shouldUseContentProjections bails.
131
+ expect(HtmlDiff.evaluateProjectionApplicability('plain text', '<p>plain text</p>')).toBe(false)
132
+ })
133
+
134
+ it('returns false when projection would empty one side', () => {
135
+ expect(HtmlDiff.evaluateProjectionApplicability('<p></p>', '<div>content</div>')).toBe(false)
136
+ })
137
+
138
+ it('lets a composer compute a symmetric decision across three inputs', () => {
139
+ const v1 = '<p>Hello world.</p>'
140
+ const v2 = '<p>Hello cruel world.</p>'
141
+ const v3 = '<div>Hello cruel world today.</div>'
142
+ const proj12 = HtmlDiff.evaluateProjectionApplicability(v1, v2)
143
+ const proj23 = HtmlDiff.evaluateProjectionApplicability(v2, v3)
144
+ // The symmetric decision is the conjunction — project iff both pairs would.
145
+ const symmetric = proj12 && proj23
146
+ expect(symmetric).toBe(false) // V1↔V2 has no structural diff.
147
+ // Both calls then use useProjections=false and V2 tokenises identically.
148
+ const d1 = HtmlDiff.analyze(v1, v2, { useProjections: symmetric })
149
+ const d2 = HtmlDiff.analyze(v2, v3, { useProjections: symmetric })
150
+ expect(d1.newDiffWords).toEqual(d2.oldDiffWords)
151
+ })
152
+ })
@@ -108,9 +108,14 @@ describe('HtmlDiff — table operations matrix', () => {
108
108
  // still drifts.
109
109
  const rowCount = countMatches(result, /<tr[\s>]/g)
110
110
  expect(rowCount).toBe(4) // header + Party A + empty + Party B
111
- expect(result).toContain(
112
- "<tr class='diffins'><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td></tr>"
113
- )
111
+ // The inserted empty row must be emitted with diffins on the <tr>
112
+ // and 4 empty diffins-marked cells. Asserted via regex (quote-
113
+ // agnostic, whitespace-tolerant) so an incidental change in
114
+ // attribute-quote style isn't flagged as a regression.
115
+ const emptyInsertedRow = result.match(/<tr class=['"]diffins['"]>(.*?)<\/tr>/)
116
+ expect(emptyInsertedRow).not.toBeNull()
117
+ const emptyCellCount = countMatches(emptyInsertedRow?.[1] ?? '', /<td class=['"]diffins['"]><\/td>/g)
118
+ expect(emptyCellCount).toBe(4)
114
119
  })
115
120
  })
116
121
  })
@@ -859,7 +859,15 @@ describe('HtmlDiff — tables', () => {
859
859
  '<table><tr><td>A</td><td>X</td></tr><tr data-behaviour="data"><td>B</td><td>EXTRA</td><td>C</td></tr></table>'
860
860
 
861
861
  const result = HtmlDiff.execute(oldHtml, newHtml)
862
- expect(result).toContain('<tr data-behaviour="data">')
862
+ // The <tr> attribute must survive AND the row's cells must be
863
+ // intact: B preserved, EXTRA inserted, C preserved. A bare
864
+ // `toContain('<tr data-behaviour="data">')` would pass even if
865
+ // the cells were dropped or duplicated downstream.
866
+ expect(result).toContain(
867
+ '<tr data-behaviour="data"><td>B</td>' +
868
+ "<td class='diffins'><ins class='diffins'>EXTRA</ins></td>" +
869
+ '<td>C</td></tr>'
870
+ )
863
871
  })
864
872
 
865
873
  it('preserves data-* attribute on a <tr> when the row passes through fuzzy-matching with content edit', () => {
@@ -879,7 +887,12 @@ describe('HtmlDiff — tables', () => {
879
887
  '</table>'
880
888
 
881
889
  const result = HtmlDiff.execute(oldHtml, newHtml)
882
- expect(result).toContain('<tr data-behaviour="data">')
890
+ // Same intent as above: attribute must survive AND the row's
891
+ // content-edit must be present on the same <tr>.
892
+ expect(result).toContain(
893
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over' +
894
+ "<ins class='diffins'>&nbsp;the lazy dog</ins>.</td></tr>"
895
+ )
883
896
  })
884
897
 
885
898
  it('preserves <table> attributes verbatim from new (no diff marker on attribute changes)', () => {
@@ -1573,12 +1586,13 @@ describe('HtmlDiff — tables', () => {
1573
1586
  })
1574
1587
  })
1575
1588
 
1576
- // Coverage gaps surfaced by the v8 report: the cell-LCS fallback path
1577
- // (diffStructurallyAlignedRow + cellKey + pairSimilarUnmatchedCells) is
1578
- // only entered when the per-row column delta exceeds MAX_COLUMN_DELTA
1579
- // (6) or the row's logical width exceeds MAX_COLUMN_SEARCH_WIDTH (40).
1580
- // None of the existing tests trigger that. These tests exercise the
1581
- // fallback and pin its behaviour.
1589
+ // The cell-LCS fallback path (diffStructurallyAlignedRow + cellKey +
1590
+ // pairSimilarUnmatchedCells) is now entered only when the per-row
1591
+ // column delta exceeds MAX_COLUMN_DELTA (6) — the semantic "this is a
1592
+ // row rewrite, not a column add" guard. The row-width guard
1593
+ // (MAX_COLUMN_SEARCH_WIDTH) is now defensive only since the DP is
1594
+ // O(M × N). These tests pin the fallback's behaviour for the
1595
+ // delta > 6 path.
1582
1596
  describe('cell-LCS fallback for very-wide column changes', () => {
1583
1597
  it('handles 8 columns inserted alongside existing cells (delta > MAX_COLUMN_DELTA)', () => {
1584
1598
  // Old: 3 cells. New: 11 cells (8 columns added). Exact-LCS finds
@@ -1689,22 +1703,25 @@ describe('HtmlDiff — tables', () => {
1689
1703
  const newHtml =
1690
1704
  '<table><tr>' + '<td>FirstB</td>' + '<td colspan="2">Merged AB</td>' + '<td>LastB</td>' + '</tr></table>'
1691
1705
 
1692
- const result = HtmlDiff.execute(oldHtml, newHtml)
1693
1706
  // First and last cells should diff content cell-by-cell (matching
1694
1707
  // colspans = 1 on both sides); middle two old cells merge into
1695
- // one colspan=2 cell tagged 'mod colspan'.
1696
- expect(result).toContain("<del class='diffmod'>FirstA</del>")
1697
- expect(result).toContain("<ins class='diffmod'>FirstB</ins>")
1698
- expect(result).toContain('colspan="2" class=\'mod colspan\'')
1699
- expect(result).toContain("<del class='diffmod'>LastA</del>")
1700
- expect(result).toContain("<ins class='diffmod'>LastB</ins>")
1708
+ // one colspan=2 cell tagged 'mod colspan'. Asserted as an exact
1709
+ // string so that a positional swap of first/last is caught — a
1710
+ // swap-blind set of `toContain` assertions would not flag it.
1711
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1712
+ '<table><tr>' +
1713
+ "<td><del class='diffmod'>FirstA</del><ins class='diffmod'>FirstB</ins></td>" +
1714
+ `<td colspan="2" class='mod colspan'>Merged AB</td>` +
1715
+ "<td><del class='diffmod'>LastA</del><ins class='diffmod'>LastB</ins></td>" +
1716
+ '</tr></table>'
1717
+ )
1701
1718
  })
1702
1719
 
1703
- it('handles row wider than MAX_COLUMN_SEARCH_WIDTH (40 cells) fallback to cell-LCS', () => {
1720
+ it('handles a 50-cell row with a single column inserted at start', () => {
1704
1721
  // 50-cell row in old, 51-cell row in new (1 column added at
1705
- // start). MAX_COLUMN_SEARCH_WIDTH guard prevents the
1706
- // combinatorial search; fallback to cell-LCS which finds 50
1707
- // exact matches and the 1 new cell as an insertion.
1722
+ // start). Now stays on the DP path (MAX_COLUMN_SEARCH_WIDTH=200);
1723
+ // produces the same output as the prior cell-LCS fallback would
1724
+ // have: 1 inserted cell, 50 preserved.
1708
1725
  const oldCells = Array.from({ length: 50 }, (_, i) => `<td>c${i}</td>`).join('')
1709
1726
  const newCells = `<td>NEW</td>${oldCells}`
1710
1727
  const oldHtml = `<table><tr>${oldCells}</tr></table>`
@@ -1741,4 +1758,336 @@ describe('HtmlDiff — tables', () => {
1741
1758
  )
1742
1759
  })
1743
1760
  })
1761
+
1762
+ // The column-position search can encounter score ties when inserted
1763
+ // cells have content that is similar both to each other and to
1764
+ // existing cells (e.g. boilerplate "N/A" in a legal schedule). The
1765
+ // algorithm's tie-breaking resolves to skipping LATER positions in
1766
+ // the longer side — the lex-first-combo behaviour of the original
1767
+ // combinatorial path, now matched by "prefer pair on ties" in the DP
1768
+ // backtrack. These tests pin both the structural shape AND the exact
1769
+ // positions the diffins markers land on, so a silent shift of the
1770
+ // tie-breaking rule would fail loudly.
1771
+ describe('column-position search — score-tied inputs', () => {
1772
+ it('handles delta=2 with content-similar inserts (N/A boilerplate)', () => {
1773
+ const oldHtml = '<table><tr><td>N/A</td><td>Term</td><td>Amount</td><td>N/A</td></tr></table>'
1774
+ const newHtml =
1775
+ '<table><tr><td>N/A</td><td>N/A</td><td>Term</td><td>N/A</td><td>Amount</td><td>N/A</td></tr></table>'
1776
+
1777
+ // Exact match locks in tie-breaking: the diffins markers MUST land
1778
+ // on the earliest positions that produce the optimal score (here:
1779
+ // positions 0 and 3).
1780
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1781
+ '<table><tr>' +
1782
+ "<td class='diffins'><ins class='diffins'>N/A</ins></td>" +
1783
+ '<td>N/A</td>' +
1784
+ '<td>Term</td>' +
1785
+ "<td class='diffins'><ins class='diffins'>N/A</ins></td>" +
1786
+ '<td>Amount</td>' +
1787
+ '<td>N/A</td>' +
1788
+ '</tr></table>'
1789
+ )
1790
+ })
1791
+
1792
+ it('still passes the loose structural checks for the same inputs', () => {
1793
+ // Kept alongside the exact-match assertion above as a structural
1794
+ // safety net: if the exact form ever shifts (e.g. quote style),
1795
+ // these structural invariants still apply.
1796
+ const oldHtml = '<table><tr><td>N/A</td><td>Term</td><td>Amount</td><td>N/A</td></tr></table>'
1797
+ const newHtml =
1798
+ '<table><tr><td>N/A</td><td>N/A</td><td>Term</td><td>N/A</td><td>Amount</td><td>N/A</td></tr></table>'
1799
+
1800
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1801
+ // Both inserted N/A cells must be marked diffins.
1802
+ const insMarkers = (result.match(/<td class='diffins'>/g) || []).length
1803
+ expect(insMarkers).toBe(2)
1804
+ // Total td count must be 6 (no phantoms).
1805
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
1806
+ expect(tdCount).toBe(6)
1807
+ // Term and Amount must appear as unmarked preserved cells exactly
1808
+ // once each — they're not similar to N/A, so the algorithm has no
1809
+ // ambiguity around them.
1810
+ expect(result).toContain('<td>Term</td>')
1811
+ expect(result).toContain('<td>Amount</td>')
1812
+ })
1813
+
1814
+ it('handles delta=6 (the MAX_COLUMN_DELTA cap) without misalignment', () => {
1815
+ const oldHtml = '<table><tr><td>A</td><td>B</td><td>C</td><td>D</td></tr></table>'
1816
+ const newHtml =
1817
+ '<table><tr>' +
1818
+ '<td>A</td><td>X1</td><td>X2</td><td>B</td><td>X3</td>' +
1819
+ '<td>X4</td><td>C</td><td>X5</td><td>X6</td><td>D</td>' +
1820
+ '</tr></table>'
1821
+
1822
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1823
+ // Exactly 6 cells inserted, 4 preserved.
1824
+ const insMarkers = (result.match(/<td class='diffins'>/g) || []).length
1825
+ expect(insMarkers).toBe(6)
1826
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
1827
+ expect(tdCount).toBe(10)
1828
+ // All four original cells survive in order without diff markers.
1829
+ expect(result).toContain('<td>A</td>')
1830
+ expect(result).toContain('<td>B</td>')
1831
+ expect(result).toContain('<td>C</td>')
1832
+ expect(result).toContain('<td>D</td>')
1833
+ })
1834
+ })
1835
+
1836
+ // orderAlignmentForEmission's `preserved` list is empty when no rows
1837
+ // survive across the diff. The "delete every row" and "insert every
1838
+ // row" cases are corner cases where the float-positioning logic must
1839
+ // degenerate cleanly: every del's primary becomes `-0.5` (predecessor
1840
+ // index -1), every ins's primary is its own newIdx. Both should emit
1841
+ // in oldIdx / newIdx order respectively, with no preserved rows
1842
+ // sandwiched between them.
1843
+ describe('orderAlignmentForEmission — empty preserved list', () => {
1844
+ it('emits every row as diffdel when new is empty (no preserved rows)', () => {
1845
+ const oldHtml =
1846
+ '<table>' +
1847
+ '<tr><td>r1</td></tr>' +
1848
+ '<tr><td>r2</td></tr>' +
1849
+ '<tr><td>r3</td></tr>' +
1850
+ '<tr><td>r4</td></tr>' +
1851
+ '<tr><td>r5</td></tr>' +
1852
+ '</table>'
1853
+ const newHtml = '<table></table>'
1854
+
1855
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1856
+ '<table>' +
1857
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r1</del></td></tr>" +
1858
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r2</del></td></tr>" +
1859
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r3</del></td></tr>" +
1860
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r4</del></td></tr>" +
1861
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r5</del></td></tr>" +
1862
+ '</table>'
1863
+ )
1864
+ })
1865
+
1866
+ it('emits every row as diffins when old is empty (no preserved rows)', () => {
1867
+ const oldHtml = '<table></table>'
1868
+ const newHtml = '<table>' + '<tr><td>r1</td></tr>' + '<tr><td>r2</td></tr>' + '<tr><td>r3</td></tr>' + '</table>'
1869
+
1870
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1871
+ '<table>' +
1872
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r1</ins></td></tr>" +
1873
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r2</ins></td></tr>" +
1874
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r3</ins></td></tr>" +
1875
+ '</table>'
1876
+ )
1877
+ })
1878
+
1879
+ it('emits all-deletes in order when only the header is preserved', () => {
1880
+ // The header is the only preserved row, so 'preserved' has 1 entry
1881
+ // and every del's newIdxOfPreservedBefore returns -1 — exercising
1882
+ // the boundary between empty-preserved and a single anchoring row.
1883
+ const oldHtml =
1884
+ '<table>' +
1885
+ '<tr><td>Header</td></tr>' +
1886
+ '<tr><td>row a</td></tr>' +
1887
+ '<tr><td>row b</td></tr>' +
1888
+ '<tr><td>row c</td></tr>' +
1889
+ '</table>'
1890
+ const newHtml = '<table><tr><td>Header</td></tr></table>'
1891
+
1892
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1893
+ '<table>' +
1894
+ '<tr><td>Header</td></tr>' +
1895
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row a</del></td></tr>" +
1896
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row b</del></td></tr>" +
1897
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row c</del></td></tr>" +
1898
+ '</table>'
1899
+ )
1900
+ })
1901
+ })
1902
+
1903
+ // Fuzzy row-pairing threshold (0.5) calibrations on ISDA-flavoured
1904
+ // content. The interesting cases are at the edges: rows that share a
1905
+ // little (an enumeration prefix only) — must NOT pair (otherwise an
1906
+ // unrelated rewrite shows up as a single-row content edit); rows that
1907
+ // share a lot of boilerplate text but differ in the meaningful body —
1908
+ // must pair (otherwise the user sees del+ins instead of an edit).
1909
+ describe('fuzzy row pairing — enumerated clauses and shared boilerplate', () => {
1910
+ it('does NOT pair rows that share only an enumeration prefix (different bodies)', () => {
1911
+ // Old has 2 rows, new has 3 rows. The "1." and "2." prefixes are
1912
+ // the only commonality — bodies are completely unrelated.
1913
+ // textSimilarity falls below 0.5 (prefix is 3 chars in 60+; jaccard
1914
+ // is also tiny), so fuzzy pairing must NOT fire — each pair should
1915
+ // emit as a clean del + ins, not a noisy intra-row diff.
1916
+ const oldHtml =
1917
+ '<table>' +
1918
+ '<tr><td>1. Party A shall pay the gross amount on each Payment Date.</td></tr>' +
1919
+ '<tr><td>2. Party B shall deliver collateral on each Calculation Date.</td></tr>' +
1920
+ '</table>'
1921
+ const newHtml =
1922
+ '<table>' +
1923
+ '<tr><td>1. Section intentionally left blank.</td></tr>' +
1924
+ '<tr><td>2. Different boilerplate entirely.</td></tr>' +
1925
+ '<tr><td>3. Brand new clause added here.</td></tr>' +
1926
+ '</table>'
1927
+
1928
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1929
+ '<table>' +
1930
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>1. Party A shall pay the gross amount on each Payment Date.</del></td></tr>" +
1931
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>2. Party B shall deliver collateral on each Calculation Date.</del></td></tr>" +
1932
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>1. Section intentionally left blank.</ins></td></tr>" +
1933
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>2. Different boilerplate entirely.</ins></td></tr>" +
1934
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>3. Brand new clause added here.</ins></td></tr>" +
1935
+ '</table>'
1936
+ )
1937
+ })
1938
+
1939
+ it('DOES pair rows that share a long boilerplate footer (Jaccard kicks in)', () => {
1940
+ // The 9-char body differs, but the 50-char trailing footer is
1941
+ // identical. Prefix-suffix similarity is low, but token Jaccard is
1942
+ // very high because shared footer tokens dominate the token set.
1943
+ // textSimilarity = Math.max(prefix_suffix, jaccard) → must pair.
1944
+ const footer = ' subject to the terms of the Master Agreement.'
1945
+ const oldHtml = `<table><tr><td>Anchor row</td></tr><tr><td>Alpha now${footer}</td></tr></table>`
1946
+ const newHtml =
1947
+ `<table><tr><td>Anchor row</td></tr><tr><td>Bravo new${footer}</td></tr>` +
1948
+ '<tr><td>Extra row appended</td></tr></table>'
1949
+
1950
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1951
+ '<table>' +
1952
+ '<tr><td>Anchor row</td></tr>' +
1953
+ "<tr><td><del class='diffmod'>Alpha</del><ins class='diffmod'>Bravo</ins> " +
1954
+ "<del class='diffmod'>now</del><ins class='diffmod'>new</ins>" +
1955
+ ' subject to the terms of the Master Agreement.</td></tr>' +
1956
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>Extra row appended</ins></td></tr>" +
1957
+ '</table>'
1958
+ )
1959
+ })
1960
+ })
1961
+
1962
+ // orderAlignmentForEmission must keep unpaired dels in their correct
1963
+ // positions even when the run contains a mix of preserved rows, fuzzy-
1964
+ // paired rows, and unpaired dels. A regression here would make trailing
1965
+ // dels appear before the row they came after — the same family of bug
1966
+ // as the "deleted rows out of order" report that motivated the
1967
+ // function in the first place.
1968
+ describe('orderAlignmentForEmission — mixed paired and unpaired rows', () => {
1969
+ it('emits trailing unpaired dels after a fuzzy-paired content edit', () => {
1970
+ // Old: [Aaaaa, Bbbbb, Cccc, Dddd]. New: [Aaaaa, Bbbbb+NEW].
1971
+ // After LCS: A is preserved. After pairSimilarUnmatchedRows: B↔B'
1972
+ // via fuzzy. C and D are unpaired dels. The output order must be:
1973
+ // preserved(A) → paired(B,B') → del(C) → del(D).
1974
+ const oldHtml =
1975
+ '<table>' +
1976
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1977
+ '<tr><td>Bbbbb shared content here</td></tr>' +
1978
+ '<tr><td>Cccc deleted row</td></tr>' +
1979
+ '<tr><td>Dddd deleted row</td></tr>' +
1980
+ '</table>'
1981
+ const newHtml =
1982
+ '<table>' +
1983
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1984
+ '<tr><td>Bbbbb shared content here NEW</td></tr>' +
1985
+ '</table>'
1986
+
1987
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1988
+ '<table>' +
1989
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1990
+ "<tr><td>Bbbbb shared content here<ins class='diffins'>&nbsp;NEW</ins></td></tr>" +
1991
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>Cccc deleted row</del></td></tr>" +
1992
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>Dddd deleted row</del></td></tr>" +
1993
+ '</table>'
1994
+ )
1995
+ })
1996
+
1997
+ it('emits dels at the end of the table in old-document order', () => {
1998
+ // Specifically protects against the regression that motivated
1999
+ // orderAlignmentForEmission: deleting the last two rows
2000
+ // simultaneously must emit them in the order they appeared in old
2001
+ // (second-last, then last), not reversed or jumbled.
2002
+ const oldHtml =
2003
+ '<table>' +
2004
+ '<tr><td>kept1</td></tr>' +
2005
+ '<tr><td>kept2</td></tr>' +
2006
+ '<tr><td>second-last</td></tr>' +
2007
+ '<tr><td>last</td></tr>' +
2008
+ '</table>'
2009
+ const newHtml = '<table><tr><td>kept1</td></tr><tr><td>kept2</td></tr></table>'
2010
+
2011
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
2012
+ '<table>' +
2013
+ '<tr><td>kept1</td></tr>' +
2014
+ '<tr><td>kept2</td></tr>' +
2015
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>second-last</del></td></tr>" +
2016
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>last</del></td></tr>" +
2017
+ '</table>'
2018
+ )
2019
+ })
2020
+ })
2021
+
2022
+ // pairSimilarUnmatched is intentionally greedy (not Hungarian-optimal).
2023
+ // When two unmatched-old entries are both above-threshold matches for
2024
+ // the same unmatched-new entry, the first del wins the pairing only
2025
+ // when iterating in document order means "first" matches the better
2026
+ // candidate. The output must still be structurally valid — no phantom
2027
+ // cells, no dropped content — regardless of which del wins.
2028
+ describe('pairSimilarUnmatchedCells — competing dels for the same ins', () => {
2029
+ it('keeps both candidate cells intact when two old cells could pair with one new cell', () => {
2030
+ // Two old cells with identical content compete for one similar
2031
+ // new cell. The greedy assignment picks one to pair as a content
2032
+ // edit; the other emits as a full diffdel. Both must appear; no
2033
+ // cell may silently vanish.
2034
+ const oldHtml =
2035
+ '<table><tr>' +
2036
+ '<td>Preserved</td>' +
2037
+ '<td>Old content alpha to be edited</td>' +
2038
+ '<td>Old content alpha to be edited</td>' +
2039
+ '</tr></table>'
2040
+ const newHtml = '<table><tr>' + '<td>Preserved</td>' + '<td>Old content alpha CHANGED</td>' + '</tr></table>'
2041
+
2042
+ const result = HtmlDiff.execute(oldHtml, newHtml)
2043
+ // The losing del must emit as a full diffdel cell.
2044
+ expect(result).toContain("<td class='diffdel'><del class='diffdel'>Old content alpha to be edited</del></td>")
2045
+ // The winning pair must emit as a partial content edit.
2046
+ expect(result).toContain("<del class='diffmod'>to be edited</del>")
2047
+ expect(result).toContain("<ins class='diffmod'>CHANGED</ins>")
2048
+ // Structural: 3 tds total in the output (1 preserved, 1 full-del,
2049
+ // 1 paired-edit). No phantoms.
2050
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
2051
+ expect(tdCount).toBe(3)
2052
+ })
2053
+ })
2054
+
2055
+ // Row-LCS on a non-trivial 7-row table where only every other row
2056
+ // matches. Existing named tests max out around 4 rows; the matrix
2057
+ // never produces a row-count drop this large with this much
2058
+ // interleaving. This exercises the LCS DP itself, not just the
2059
+ // diff emission.
2060
+ describe('row-LCS on larger tables', () => {
2061
+ it('finds 4 preserved rows interleaved with 3 dropped rows in a 7-row old table', () => {
2062
+ const oldHtml =
2063
+ '<table>' +
2064
+ '<tr><td>row1 preserved</td></tr>' +
2065
+ '<tr><td>row2 old body</td></tr>' +
2066
+ '<tr><td>row3 preserved</td></tr>' +
2067
+ '<tr><td>row4 old body</td></tr>' +
2068
+ '<tr><td>row5 preserved</td></tr>' +
2069
+ '<tr><td>row6 old body</td></tr>' +
2070
+ '<tr><td>row7 preserved</td></tr>' +
2071
+ '</table>'
2072
+ const newHtml =
2073
+ '<table>' +
2074
+ '<tr><td>row1 preserved</td></tr>' +
2075
+ '<tr><td>row3 preserved</td></tr>' +
2076
+ '<tr><td>row5 preserved</td></tr>' +
2077
+ '<tr><td>row7 preserved</td></tr>' +
2078
+ '</table>'
2079
+
2080
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
2081
+ '<table>' +
2082
+ '<tr><td>row1 preserved</td></tr>' +
2083
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row2 old body</del></td></tr>" +
2084
+ '<tr><td>row3 preserved</td></tr>' +
2085
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row4 old body</del></td></tr>" +
2086
+ '<tr><td>row5 preserved</td></tr>' +
2087
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row6 old body</del></td></tr>" +
2088
+ '<tr><td>row7 preserved</td></tr>' +
2089
+ '</table>'
2090
+ )
2091
+ })
2092
+ })
1744
2093
  })