@createiq/htmldiff 1.1.0-beta.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -859,7 +859,15 @@ describe('HtmlDiff — tables', () => {
859
859
  '<table><tr><td>A</td><td>X</td></tr><tr data-behaviour="data"><td>B</td><td>EXTRA</td><td>C</td></tr></table>'
860
860
 
861
861
  const result = HtmlDiff.execute(oldHtml, newHtml)
862
- expect(result).toContain('<tr data-behaviour="data">')
862
+ // The <tr> attribute must survive AND the row's cells must be
863
+ // intact: B preserved, EXTRA inserted, C preserved. A bare
864
+ // `toContain('<tr data-behaviour="data">')` would pass even if
865
+ // the cells were dropped or duplicated downstream.
866
+ expect(result).toContain(
867
+ '<tr data-behaviour="data"><td>B</td>' +
868
+ "<td class='diffins'><ins class='diffins'>EXTRA</ins></td>" +
869
+ '<td>C</td></tr>'
870
+ )
863
871
  })
864
872
 
865
873
  it('preserves data-* attribute on a <tr> when the row passes through fuzzy-matching with content edit', () => {
@@ -879,7 +887,12 @@ describe('HtmlDiff — tables', () => {
879
887
  '</table>'
880
888
 
881
889
  const result = HtmlDiff.execute(oldHtml, newHtml)
882
- expect(result).toContain('<tr data-behaviour="data">')
890
+ // Same intent as above: attribute must survive AND the row's
891
+ // content-edit must be present on the same <tr>.
892
+ expect(result).toContain(
893
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over' +
894
+ "<ins class='diffins'>&nbsp;the lazy dog</ins>.</td></tr>"
895
+ )
883
896
  })
884
897
 
885
898
  it('preserves <table> attributes verbatim from new (no diff marker on attribute changes)', () => {
@@ -1689,15 +1702,18 @@ describe('HtmlDiff — tables', () => {
1689
1702
  const newHtml =
1690
1703
  '<table><tr>' + '<td>FirstB</td>' + '<td colspan="2">Merged AB</td>' + '<td>LastB</td>' + '</tr></table>'
1691
1704
 
1692
- const result = HtmlDiff.execute(oldHtml, newHtml)
1693
1705
  // First and last cells should diff content cell-by-cell (matching
1694
1706
  // colspans = 1 on both sides); middle two old cells merge into
1695
- // one colspan=2 cell tagged 'mod colspan'.
1696
- expect(result).toContain("<del class='diffmod'>FirstA</del>")
1697
- expect(result).toContain("<ins class='diffmod'>FirstB</ins>")
1698
- expect(result).toContain('colspan="2" class=\'mod colspan\'')
1699
- expect(result).toContain("<del class='diffmod'>LastA</del>")
1700
- expect(result).toContain("<ins class='diffmod'>LastB</ins>")
1707
+ // one colspan=2 cell tagged 'mod colspan'. Asserted as an exact
1708
+ // string so that a positional swap of first/last is caught — a
1709
+ // swap-blind set of `toContain` assertions would not flag it.
1710
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1711
+ '<table><tr>' +
1712
+ "<td><del class='diffmod'>FirstA</del><ins class='diffmod'>FirstB</ins></td>" +
1713
+ `<td colspan="2" class='mod colspan'>Merged AB</td>` +
1714
+ "<td><del class='diffmod'>LastA</del><ins class='diffmod'>LastB</ins></td>" +
1715
+ '</tr></table>'
1716
+ )
1701
1717
  })
1702
1718
 
1703
1719
  it('handles row wider than MAX_COLUMN_SEARCH_WIDTH (40 cells) — fallback to cell-LCS', () => {
@@ -1741,4 +1757,313 @@ describe('HtmlDiff — tables', () => {
1741
1757
  )
1742
1758
  })
1743
1759
  })
1760
+
1761
+ // The combinatorial position search can encounter score ties when
1762
+ // inserted cells have content that is similar both to each other and to
1763
+ // existing cells (e.g. boilerplate "N/A" in a legal schedule). The
1764
+ // algorithm resolves ties by combination-iteration order, so the choice
1765
+ // of which specific column gets the diffins marker is deterministic
1766
+ // but not necessarily the "intuitive" one. These tests lock in the
1767
+ // observed behaviour and guard against silent regressions in the
1768
+ // structural shape: all original cells must survive unmarked, and the
1769
+ // inserted-marker count must equal the column delta.
1770
+ describe('combinatorial column search — score-tied inputs', () => {
1771
+ it('handles delta=2 with content-similar inserts (N/A boilerplate)', () => {
1772
+ const oldHtml = '<table><tr><td>N/A</td><td>Term</td><td>Amount</td><td>N/A</td></tr></table>'
1773
+ const newHtml =
1774
+ '<table><tr><td>N/A</td><td>N/A</td><td>Term</td><td>N/A</td><td>Amount</td><td>N/A</td></tr></table>'
1775
+
1776
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1777
+ // Both inserted N/A cells must be marked diffins.
1778
+ const insMarkers = (result.match(/<td class='diffins'>/g) || []).length
1779
+ expect(insMarkers).toBe(2)
1780
+ // Total td count must be 6 (no phantoms).
1781
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
1782
+ expect(tdCount).toBe(6)
1783
+ // Term and Amount must appear as unmarked preserved cells exactly
1784
+ // once each — they're not similar to N/A, so the algorithm has no
1785
+ // ambiguity around them.
1786
+ expect(result).toContain('<td>Term</td>')
1787
+ expect(result).toContain('<td>Amount</td>')
1788
+ })
1789
+
1790
+ it('handles delta=6 (the MAX_COLUMN_DELTA cap) without misalignment', () => {
1791
+ const oldHtml = '<table><tr><td>A</td><td>B</td><td>C</td><td>D</td></tr></table>'
1792
+ const newHtml =
1793
+ '<table><tr>' +
1794
+ '<td>A</td><td>X1</td><td>X2</td><td>B</td><td>X3</td>' +
1795
+ '<td>X4</td><td>C</td><td>X5</td><td>X6</td><td>D</td>' +
1796
+ '</tr></table>'
1797
+
1798
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1799
+ // Exactly 6 cells inserted, 4 preserved.
1800
+ const insMarkers = (result.match(/<td class='diffins'>/g) || []).length
1801
+ expect(insMarkers).toBe(6)
1802
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
1803
+ expect(tdCount).toBe(10)
1804
+ // All four original cells survive in order without diff markers.
1805
+ expect(result).toContain('<td>A</td>')
1806
+ expect(result).toContain('<td>B</td>')
1807
+ expect(result).toContain('<td>C</td>')
1808
+ expect(result).toContain('<td>D</td>')
1809
+ })
1810
+ })
1811
+
1812
+ // orderAlignmentForEmission's `preserved` list is empty when no rows
1813
+ // survive across the diff. The "delete every row" and "insert every
1814
+ // row" cases are corner cases where the float-positioning logic must
1815
+ // degenerate cleanly: every del's primary becomes `-0.5` (predecessor
1816
+ // index -1), every ins's primary is its own newIdx. Both should emit
1817
+ // in oldIdx / newIdx order respectively, with no preserved rows
1818
+ // sandwiched between them.
1819
+ describe('orderAlignmentForEmission — empty preserved list', () => {
1820
+ it('emits every row as diffdel when new is empty (no preserved rows)', () => {
1821
+ const oldHtml =
1822
+ '<table>' +
1823
+ '<tr><td>r1</td></tr>' +
1824
+ '<tr><td>r2</td></tr>' +
1825
+ '<tr><td>r3</td></tr>' +
1826
+ '<tr><td>r4</td></tr>' +
1827
+ '<tr><td>r5</td></tr>' +
1828
+ '</table>'
1829
+ const newHtml = '<table></table>'
1830
+
1831
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1832
+ '<table>' +
1833
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r1</del></td></tr>" +
1834
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r2</del></td></tr>" +
1835
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r3</del></td></tr>" +
1836
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r4</del></td></tr>" +
1837
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r5</del></td></tr>" +
1838
+ '</table>'
1839
+ )
1840
+ })
1841
+
1842
+ it('emits every row as diffins when old is empty (no preserved rows)', () => {
1843
+ const oldHtml = '<table></table>'
1844
+ const newHtml = '<table>' + '<tr><td>r1</td></tr>' + '<tr><td>r2</td></tr>' + '<tr><td>r3</td></tr>' + '</table>'
1845
+
1846
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1847
+ '<table>' +
1848
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r1</ins></td></tr>" +
1849
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r2</ins></td></tr>" +
1850
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r3</ins></td></tr>" +
1851
+ '</table>'
1852
+ )
1853
+ })
1854
+
1855
+ it('emits all-deletes in order when only the header is preserved', () => {
1856
+ // The header is the only preserved row, so 'preserved' has 1 entry
1857
+ // and every del's newIdxOfPreservedBefore returns -1 — exercising
1858
+ // the boundary between empty-preserved and a single anchoring row.
1859
+ const oldHtml =
1860
+ '<table>' +
1861
+ '<tr><td>Header</td></tr>' +
1862
+ '<tr><td>row a</td></tr>' +
1863
+ '<tr><td>row b</td></tr>' +
1864
+ '<tr><td>row c</td></tr>' +
1865
+ '</table>'
1866
+ const newHtml = '<table><tr><td>Header</td></tr></table>'
1867
+
1868
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1869
+ '<table>' +
1870
+ '<tr><td>Header</td></tr>' +
1871
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row a</del></td></tr>" +
1872
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row b</del></td></tr>" +
1873
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row c</del></td></tr>" +
1874
+ '</table>'
1875
+ )
1876
+ })
1877
+ })
1878
+
1879
+ // Fuzzy row-pairing threshold (0.5) calibrations on ISDA-flavoured
1880
+ // content. The interesting cases are at the edges: rows that share a
1881
+ // little (an enumeration prefix only) — must NOT pair (otherwise an
1882
+ // unrelated rewrite shows up as a single-row content edit); rows that
1883
+ // share a lot of boilerplate text but differ in the meaningful body —
1884
+ // must pair (otherwise the user sees del+ins instead of an edit).
1885
+ describe('fuzzy row pairing — enumerated clauses and shared boilerplate', () => {
1886
+ it('does NOT pair rows that share only an enumeration prefix (different bodies)', () => {
1887
+ // Old has 2 rows, new has 3 rows. The "1." and "2." prefixes are
1888
+ // the only commonality — bodies are completely unrelated.
1889
+ // textSimilarity falls below 0.5 (prefix is 3 chars in 60+; jaccard
1890
+ // is also tiny), so fuzzy pairing must NOT fire — each pair should
1891
+ // emit as a clean del + ins, not a noisy intra-row diff.
1892
+ const oldHtml =
1893
+ '<table>' +
1894
+ '<tr><td>1. Party A shall pay the gross amount on each Payment Date.</td></tr>' +
1895
+ '<tr><td>2. Party B shall deliver collateral on each Calculation Date.</td></tr>' +
1896
+ '</table>'
1897
+ const newHtml =
1898
+ '<table>' +
1899
+ '<tr><td>1. Section intentionally left blank.</td></tr>' +
1900
+ '<tr><td>2. Different boilerplate entirely.</td></tr>' +
1901
+ '<tr><td>3. Brand new clause added here.</td></tr>' +
1902
+ '</table>'
1903
+
1904
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1905
+ '<table>' +
1906
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>1. Party A shall pay the gross amount on each Payment Date.</del></td></tr>" +
1907
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>2. Party B shall deliver collateral on each Calculation Date.</del></td></tr>" +
1908
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>1. Section intentionally left blank.</ins></td></tr>" +
1909
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>2. Different boilerplate entirely.</ins></td></tr>" +
1910
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>3. Brand new clause added here.</ins></td></tr>" +
1911
+ '</table>'
1912
+ )
1913
+ })
1914
+
1915
+ it('DOES pair rows that share a long boilerplate footer (Jaccard kicks in)', () => {
1916
+ // The 9-char body differs, but the 50-char trailing footer is
1917
+ // identical. Prefix-suffix similarity is low, but token Jaccard is
1918
+ // very high because shared footer tokens dominate the token set.
1919
+ // textSimilarity = Math.max(prefix_suffix, jaccard) → must pair.
1920
+ const footer = ' subject to the terms of the Master Agreement.'
1921
+ const oldHtml = `<table><tr><td>Anchor row</td></tr><tr><td>Alpha now${footer}</td></tr></table>`
1922
+ const newHtml =
1923
+ `<table><tr><td>Anchor row</td></tr><tr><td>Bravo new${footer}</td></tr>` +
1924
+ '<tr><td>Extra row appended</td></tr></table>'
1925
+
1926
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1927
+ '<table>' +
1928
+ '<tr><td>Anchor row</td></tr>' +
1929
+ "<tr><td><del class='diffmod'>Alpha</del><ins class='diffmod'>Bravo</ins> " +
1930
+ "<del class='diffmod'>now</del><ins class='diffmod'>new</ins>" +
1931
+ ' subject to the terms of the Master Agreement.</td></tr>' +
1932
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>Extra row appended</ins></td></tr>" +
1933
+ '</table>'
1934
+ )
1935
+ })
1936
+ })
1937
+
1938
+ // orderAlignmentForEmission must keep unpaired dels in their correct
1939
+ // positions even when the run contains a mix of preserved rows, fuzzy-
1940
+ // paired rows, and unpaired dels. A regression here would make trailing
1941
+ // dels appear before the row they came after — the same family of bug
1942
+ // as the "deleted rows out of order" report that motivated the
1943
+ // function in the first place.
1944
+ describe('orderAlignmentForEmission — mixed paired and unpaired rows', () => {
1945
+ it('emits trailing unpaired dels after a fuzzy-paired content edit', () => {
1946
+ // Old: [Aaaaa, Bbbbb, Cccc, Dddd]. New: [Aaaaa, Bbbbb+NEW].
1947
+ // After LCS: A is preserved. After pairSimilarUnmatchedRows: B↔B'
1948
+ // via fuzzy. C and D are unpaired dels. The output order must be:
1949
+ // preserved(A) → paired(B,B') → del(C) → del(D).
1950
+ const oldHtml =
1951
+ '<table>' +
1952
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1953
+ '<tr><td>Bbbbb shared content here</td></tr>' +
1954
+ '<tr><td>Cccc deleted row</td></tr>' +
1955
+ '<tr><td>Dddd deleted row</td></tr>' +
1956
+ '</table>'
1957
+ const newHtml =
1958
+ '<table>' +
1959
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1960
+ '<tr><td>Bbbbb shared content here NEW</td></tr>' +
1961
+ '</table>'
1962
+
1963
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1964
+ '<table>' +
1965
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1966
+ "<tr><td>Bbbbb shared content here<ins class='diffins'>&nbsp;NEW</ins></td></tr>" +
1967
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>Cccc deleted row</del></td></tr>" +
1968
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>Dddd deleted row</del></td></tr>" +
1969
+ '</table>'
1970
+ )
1971
+ })
1972
+
1973
+ it('emits dels at the end of the table in old-document order', () => {
1974
+ // Specifically protects against the regression that motivated
1975
+ // orderAlignmentForEmission: deleting the last two rows
1976
+ // simultaneously must emit them in the order they appeared in old
1977
+ // (second-last, then last), not reversed or jumbled.
1978
+ const oldHtml =
1979
+ '<table>' +
1980
+ '<tr><td>kept1</td></tr>' +
1981
+ '<tr><td>kept2</td></tr>' +
1982
+ '<tr><td>second-last</td></tr>' +
1983
+ '<tr><td>last</td></tr>' +
1984
+ '</table>'
1985
+ const newHtml = '<table><tr><td>kept1</td></tr><tr><td>kept2</td></tr></table>'
1986
+
1987
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1988
+ '<table>' +
1989
+ '<tr><td>kept1</td></tr>' +
1990
+ '<tr><td>kept2</td></tr>' +
1991
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>second-last</del></td></tr>" +
1992
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>last</del></td></tr>" +
1993
+ '</table>'
1994
+ )
1995
+ })
1996
+ })
1997
+
1998
+ // pairSimilarUnmatched is intentionally greedy (not Hungarian-optimal).
1999
+ // When two unmatched-old entries are both above-threshold matches for
2000
+ // the same unmatched-new entry, the first del wins the pairing only
2001
+ // when iterating in document order means "first" matches the better
2002
+ // candidate. The output must still be structurally valid — no phantom
2003
+ // cells, no dropped content — regardless of which del wins.
2004
+ describe('pairSimilarUnmatchedCells — competing dels for the same ins', () => {
2005
+ it('keeps both candidate cells intact when two old cells could pair with one new cell', () => {
2006
+ // Two old cells with identical content compete for one similar
2007
+ // new cell. The greedy assignment picks one to pair as a content
2008
+ // edit; the other emits as a full diffdel. Both must appear; no
2009
+ // cell may silently vanish.
2010
+ const oldHtml =
2011
+ '<table><tr>' +
2012
+ '<td>Preserved</td>' +
2013
+ '<td>Old content alpha to be edited</td>' +
2014
+ '<td>Old content alpha to be edited</td>' +
2015
+ '</tr></table>'
2016
+ const newHtml = '<table><tr>' + '<td>Preserved</td>' + '<td>Old content alpha CHANGED</td>' + '</tr></table>'
2017
+
2018
+ const result = HtmlDiff.execute(oldHtml, newHtml)
2019
+ // The losing del must emit as a full diffdel cell.
2020
+ expect(result).toContain("<td class='diffdel'><del class='diffdel'>Old content alpha to be edited</del></td>")
2021
+ // The winning pair must emit as a partial content edit.
2022
+ expect(result).toContain("<del class='diffmod'>to be edited</del>")
2023
+ expect(result).toContain("<ins class='diffmod'>CHANGED</ins>")
2024
+ // Structural: 3 tds total in the output (1 preserved, 1 full-del,
2025
+ // 1 paired-edit). No phantoms.
2026
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
2027
+ expect(tdCount).toBe(3)
2028
+ })
2029
+ })
2030
+
2031
+ // Row-LCS on a non-trivial 7-row table where only every other row
2032
+ // matches. Existing named tests max out around 4 rows; the matrix
2033
+ // never produces a row-count drop this large with this much
2034
+ // interleaving. This exercises the LCS DP itself, not just the
2035
+ // diff emission.
2036
+ describe('row-LCS on larger tables', () => {
2037
+ it('finds 4 preserved rows interleaved with 3 dropped rows in a 7-row old table', () => {
2038
+ const oldHtml =
2039
+ '<table>' +
2040
+ '<tr><td>row1 preserved</td></tr>' +
2041
+ '<tr><td>row2 old body</td></tr>' +
2042
+ '<tr><td>row3 preserved</td></tr>' +
2043
+ '<tr><td>row4 old body</td></tr>' +
2044
+ '<tr><td>row5 preserved</td></tr>' +
2045
+ '<tr><td>row6 old body</td></tr>' +
2046
+ '<tr><td>row7 preserved</td></tr>' +
2047
+ '</table>'
2048
+ const newHtml =
2049
+ '<table>' +
2050
+ '<tr><td>row1 preserved</td></tr>' +
2051
+ '<tr><td>row3 preserved</td></tr>' +
2052
+ '<tr><td>row5 preserved</td></tr>' +
2053
+ '<tr><td>row7 preserved</td></tr>' +
2054
+ '</table>'
2055
+
2056
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
2057
+ '<table>' +
2058
+ '<tr><td>row1 preserved</td></tr>' +
2059
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row2 old body</del></td></tr>" +
2060
+ '<tr><td>row3 preserved</td></tr>' +
2061
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row4 old body</del></td></tr>" +
2062
+ '<tr><td>row5 preserved</td></tr>' +
2063
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row6 old body</del></td></tr>" +
2064
+ '<tr><td>row7 preserved</td></tr>' +
2065
+ '</table>'
2066
+ )
2067
+ })
2068
+ })
1744
2069
  })
@@ -1,15 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(diff -u test/input1.html test/input2.html)",
5
- "Bash(npm test -- test/Bug.spec.tsx)",
6
- "Bash(timeout 30s npm run test:ci -- test/Bug.spec.tsx)",
7
- "Bash(npm run build)",
8
- "Bash(timeout 10s npm run test:ci -- test/Bug.spec.tsx)",
9
- "Bash(npm run lint)",
10
- "Bash(npm run test:ci)",
11
- "Bash(npm run bench:ci)"
12
- ],
13
- "deny": []
14
- }
15
- }