@createiq/htmldiff 1.0.5-beta.2 → 1.0.5-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,367 @@
1
+ import { describe, expect, it } from 'vitest'
2
+
3
+ import HtmlDiff from '../src/HtmlDiff'
4
+
5
+ /**
6
+ * Exhaustive matrix of common table operations and their pairwise
7
+ * combinations. Each case runs HtmlDiff.execute and asserts structural
8
+ * invariants on the output:
9
+ *
10
+ * • Every `<tr>` opens and closes
11
+ * • No row has more cells than max(old-row, new-row) cell count
12
+ * (accounting for colspan)
13
+ * • All `<ins>`/`<del>` tags balance
14
+ * • Class markers are coherent (a `<tr class='diffins'>` row's cells
15
+ * all have ins-marked content or are empty; a deleted row's cells
16
+ * all have del-marked content or are empty)
17
+ * • The cell content of every `<td>` from new appears somewhere in
18
+ * the output (we don't silently drop cells)
19
+ *
20
+ * The matrix is built combinatorially — single operations × single
21
+ * operations — so a regression in any pairwise combination surfaces
22
+ * here even if no test was added for that exact pair.
23
+ */
24
+ describe('HtmlDiff — table operations matrix', () => {
25
+ describe('single operations on a 3×3 base', () => {
26
+ for (const op of allSingleOperations()) {
27
+ it(`${op.name} produces structurally valid output`, () => {
28
+ const base = baseTable3x3()
29
+ const newHtml = op.apply(base)
30
+ const oldHtml = renderTable(base)
31
+ const result = HtmlDiff.execute(oldHtml, newHtml)
32
+ assertStructurallyValid(result, oldHtml, newHtml, op.name)
33
+ })
34
+ }
35
+ })
36
+
37
+ describe('pairwise combinations on a 3×3 base', () => {
38
+ const ops = allSingleOperations()
39
+ for (const opA of ops) {
40
+ for (const opB of ops) {
41
+ if (opA === opB) continue
42
+ // Some combinations don't compose cleanly (e.g. "delete-row-end"
43
+ // + "delete-row-end" applied twice). Skip pairs that mutate
44
+ // overlapping ranges.
45
+ if (!canCompose(opA, opB)) continue
46
+ it(`${opA.name} + ${opB.name} produces structurally valid output`, () => {
47
+ const base = baseTable3x3()
48
+ const intermediate = parseTable(opA.apply(base))
49
+ const newHtml = opB.apply(intermediate)
50
+ const oldHtml = renderTable(base)
51
+ const result = HtmlDiff.execute(oldHtml, newHtml)
52
+ assertStructurallyValid(result, oldHtml, newHtml, `${opA.name} + ${opB.name}`)
53
+ })
54
+ }
55
+ }
56
+ })
57
+
58
+ describe('user-reported regression scenarios', () => {
59
+ it('multiple deleted rows including the last appear in old-order, AFTER preserved header (regression for "deleted rows out of order")', () => {
60
+ // The user's exact scenario: column added + multiple rows deleted
61
+ // including the last row, with content edits that prevent fuzzy
62
+ // pairing of some rows. Before the fix, all deletes ended up at
63
+ // the START of the output (above the header) because they were
64
+ // emitted at cursor=row-0-start, regardless of where they came
65
+ // from in old.
66
+ const oldHtml =
67
+ '<table>' +
68
+ '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
69
+ '<tr><td>Party A</td><td>IRS Forms W-8BEN-E</td><td>(i) Upon execution.</td></tr>' +
70
+ '<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
71
+ '<tr><td>Party A and Party B</td><td>Any document required.</td><td>Promptly upon reasonable demand.</td></tr>' +
72
+ '</table>'
73
+ const newHtml =
74
+ '<table>' +
75
+ '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
76
+ "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E. Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
77
+ '<tr><td>Party B</td><td>A</td><td>IRS Form W</td><td>Yeah OK</td></tr>' +
78
+ '</table>'
79
+
80
+ const result = HtmlDiff.execute(oldHtml, newHtml)
81
+ assertStructurallyValid(result, oldHtml, newHtml, 'multiple deleted rows out of order')
82
+ // The header (preserved with column added) MUST come first.
83
+ const headerIdx = result.indexOf('<tr><th>Party</th>')
84
+ const firstDelIdx = result.indexOf("<tr class='diffdel'>")
85
+ expect(headerIdx).toBeGreaterThanOrEqual(0)
86
+ expect(firstDelIdx).toBeGreaterThan(headerIdx)
87
+ })
88
+
89
+ it('column added + empty row inserted in middle (regression for issue with blank row)', () => {
90
+ const oldHtml =
91
+ '<table>' +
92
+ '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
93
+ '<tr><td>Party A</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto).</td><td>(i) Upon execution.</td></tr>' +
94
+ '<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
95
+ '</table>'
96
+ const newHtml =
97
+ '<table>' +
98
+ '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
99
+ "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto). Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
100
+ '<tr><td></td><td></td><td></td><td></td></tr>' +
101
+ '<tr><td>Party B</td><td>A</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
102
+ '</table>'
103
+
104
+ const result = HtmlDiff.execute(oldHtml, newHtml)
105
+ assertStructurallyValid(result, oldHtml, newHtml, 'column-add + empty row insert')
106
+ // Specific assertions on this case so we can see exactly what went
107
+ // wrong if the structural-invariant check passes but the output
108
+ // still drifts.
109
+ const rowCount = countMatches(result, /<tr[\s>]/g)
110
+ expect(rowCount).toBe(4) // header + Party A + empty + Party B
111
+ expect(result).toContain(
112
+ "<tr class='diffins'><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td><td class='diffins'></td></tr>"
113
+ )
114
+ })
115
+ })
116
+ })
117
+
118
+ // ──────────────────────── operations ────────────────────────
119
+
120
+ interface Op {
121
+ name: string
122
+ apply: (table: TableData) => string
123
+ }
124
+
125
+ function allSingleOperations(): Op[] {
126
+ return [
127
+ { name: 'no-op', apply: t => renderTable(t) },
128
+ { name: 'edit-cell', apply: t => renderTable(mutate(t, m => m.editCell(1, 1, 'EDITED'))) },
129
+ { name: 'add-row-start', apply: t => renderTable(mutate(t, m => m.addRowAt(0, ['NEW1', 'NEW2', 'NEW3']))) },
130
+ { name: 'add-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['NEW1', 'NEW2', 'NEW3']))) },
131
+ {
132
+ name: 'add-row-end',
133
+ apply: t => renderTable(mutate(t, m => m.addRowAt(t.rows.length, ['NEW1', 'NEW2', 'NEW3']))),
134
+ },
135
+ { name: 'add-empty-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['', '', '']))) },
136
+ {
137
+ name: 'add-multiple-rows',
138
+ apply: t =>
139
+ renderTable(
140
+ mutate(t, m => {
141
+ m.addRowAt(t.rows.length, ['X1', 'X2', 'X3'])
142
+ m.addRowAt(t.rows.length + 1, ['Y1', 'Y2', 'Y3'])
143
+ })
144
+ ),
145
+ },
146
+ { name: 'delete-row-start', apply: t => renderTable(mutate(t, m => m.deleteRow(1))) }, // skip header
147
+ { name: 'delete-row-middle', apply: t => renderTable(mutate(t, m => m.deleteRow(2))) },
148
+ { name: 'delete-row-end', apply: t => renderTable(mutate(t, m => m.deleteRow(t.rows.length - 1))) },
149
+ {
150
+ name: 'delete-multiple-rows',
151
+ apply: t =>
152
+ renderTable(
153
+ mutate(t, m => {
154
+ m.deleteRow(t.rows.length - 1)
155
+ m.deleteRow(1)
156
+ })
157
+ ),
158
+ },
159
+ {
160
+ name: 'delete-multiple-rows-from-end',
161
+ apply: t =>
162
+ renderTable(
163
+ mutate(t, m => {
164
+ m.deleteRow(t.rows.length - 1)
165
+ m.deleteRow(t.rows.length - 1)
166
+ })
167
+ ),
168
+ },
169
+ {
170
+ name: 'add-column-start',
171
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(0, ['NewHeader', 'newA', 'newB', 'newC']))),
172
+ },
173
+ {
174
+ name: 'add-column-middle',
175
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(1, ['NewHeader', 'newA', 'newB', 'newC']))),
176
+ },
177
+ {
178
+ name: 'add-column-end',
179
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(t.rows[0].length, ['NewHeader', 'newA', 'newB', 'newC']))),
180
+ },
181
+ {
182
+ name: 'add-multiple-columns',
183
+ apply: t =>
184
+ renderTable(
185
+ mutate(t, m => {
186
+ m.addColumnAt(t.rows[0].length, ['H1', 'a1', 'b1', 'c1'])
187
+ m.addColumnAt(t.rows[0].length + 1, ['H2', 'a2', 'b2', 'c2'])
188
+ })
189
+ ),
190
+ },
191
+ { name: 'delete-column-start', apply: t => renderTable(mutate(t, m => m.deleteColumn(0))) },
192
+ { name: 'delete-column-middle', apply: t => renderTable(mutate(t, m => m.deleteColumn(1))) },
193
+ { name: 'delete-column-end', apply: t => renderTable(mutate(t, m => m.deleteColumn(t.rows[0].length - 1))) },
194
+ { name: 'shift-content-right', apply: t => renderTable(mutate(t, m => m.shiftContentRight(1))) },
195
+ ]
196
+ }
197
+
198
+ /**
199
+ * Some operation pairs don't compose cleanly because the second
200
+ * operation's row/column index assumes the original table dimensions.
201
+ * We skip pairs where the second op's index would be out of bounds
202
+ * after the first op's mutation.
203
+ */
204
+ function canCompose(_a: Op, _b: Op): boolean {
205
+ // For now, allow all combinations and let the operation skip
206
+ // gracefully when bounds are invalid. The mutate helpers clamp.
207
+ return true
208
+ }
209
+
210
+ // ──────────────────────── table model ────────────────────────
211
+
212
+ interface TableData {
213
+ rows: string[][]
214
+ }
215
+
216
+ function baseTable3x3(): TableData {
217
+ return {
218
+ rows: [
219
+ ['Header1', 'Header2', 'Header3'],
220
+ ['A1', 'A2', 'A3'],
221
+ ['B1', 'B2', 'B3'],
222
+ ['C1', 'C2', 'C3'],
223
+ ],
224
+ }
225
+ }
226
+
227
+ function renderTable(t: TableData): string {
228
+ const out: string[] = ['<table>']
229
+ for (let r = 0; r < t.rows.length; r++) {
230
+ out.push('<tr>')
231
+ const tag = r === 0 ? 'th' : 'td'
232
+ for (const cell of t.rows[r]) {
233
+ out.push(`<${tag}>${cell}</${tag}>`)
234
+ }
235
+ out.push('</tr>')
236
+ }
237
+ out.push('</table>')
238
+ return out.join('')
239
+ }
240
+
241
+ function parseTable(html: string): TableData {
242
+ // Tiny parser sufficient for our generated tables. NOT a general
243
+ // HTML parser; only used inside this matrix.
244
+ const rows: string[][] = []
245
+ const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
246
+ for (const rowMatch of rowMatches) {
247
+ const cells: string[] = []
248
+ const cellMatches = rowMatch[1].matchAll(/<t[dh][^>]*>(.*?)<\/t[dh]>/gs)
249
+ for (const cellMatch of cellMatches) cells.push(cellMatch[1])
250
+ rows.push(cells)
251
+ }
252
+ return { rows }
253
+ }
254
+
255
+ interface Mutator {
256
+ editCell(row: number, col: number, content: string): void
257
+ addRowAt(at: number, content: string[]): void
258
+ deleteRow(at: number): void
259
+ addColumnAt(at: number, columnContent: string[]): void
260
+ deleteColumn(at: number): void
261
+ shiftContentRight(rowIdx: number): void
262
+ }
263
+
264
+ function mutate(t: TableData, fn: (m: Mutator) => void): TableData {
265
+ const cloned: TableData = { rows: t.rows.map(row => [...row]) }
266
+ const m: Mutator = {
267
+ editCell(row, col, content) {
268
+ if (cloned.rows[row]?.[col] !== undefined) cloned.rows[row][col] = content
269
+ },
270
+ addRowAt(at, content) {
271
+ const idx = Math.max(0, Math.min(at, cloned.rows.length))
272
+ cloned.rows.splice(idx, 0, content)
273
+ },
274
+ deleteRow(at) {
275
+ if (at >= 0 && at < cloned.rows.length) cloned.rows.splice(at, 1)
276
+ },
277
+ addColumnAt(at, columnContent) {
278
+ for (let r = 0; r < cloned.rows.length; r++) {
279
+ const idx = Math.max(0, Math.min(at, cloned.rows[r].length))
280
+ cloned.rows[r].splice(idx, 0, columnContent[r] ?? '')
281
+ }
282
+ },
283
+ deleteColumn(at) {
284
+ for (const row of cloned.rows) {
285
+ if (at >= 0 && at < row.length) row.splice(at, 1)
286
+ }
287
+ },
288
+ shiftContentRight(rowIdx) {
289
+ const row = cloned.rows[rowIdx]
290
+ if (!row) return
291
+ // Shift each cell's content one position to the right; first
292
+ // cell becomes empty, last cell's content drops off.
293
+ for (let c = row.length - 1; c > 0; c--) row[c] = row[c - 1]
294
+ row[0] = ''
295
+ },
296
+ }
297
+ fn(m)
298
+ return cloned
299
+ }
300
+
301
+ // ──────────────────────── invariant checks ────────────────────────
302
+
303
+ /**
304
+ * Asserts the diff output is structurally valid:
305
+ * • All `<tr>`/`<td>`/`<th>` open/close tags balance
306
+ * • All `<ins>`/`<del>` tags balance
307
+ * • Every row in the output has cell count ≤ max(old-row-cell-count,
308
+ * new-row-cell-count) — no phantom cells
309
+ * • Output isn't empty when inputs aren't equal
310
+ */
311
+ function assertStructurallyValid(output: string, oldHtml: string, newHtml: string, label: string) {
312
+ const ctx = `[${label}]`
313
+
314
+ // Tag balance
315
+ const openTr = countMatches(output, /<tr[\s>]/g)
316
+ const closeTr = countMatches(output, /<\/tr>/g)
317
+ expect(openTr, `${ctx} <tr> tag balance`).toBe(closeTr)
318
+
319
+ const openTd = countMatches(output, /<td[\s>]/g)
320
+ const closeTd = countMatches(output, /<\/td>/g)
321
+ expect(openTd, `${ctx} <td> tag balance`).toBe(closeTd)
322
+
323
+ const openTh = countMatches(output, /<th[\s>]/g)
324
+ const closeTh = countMatches(output, /<\/th>/g)
325
+ expect(openTh, `${ctx} <th> tag balance`).toBe(closeTh)
326
+
327
+ // ins/del balance — each opening tag has a matching closing tag.
328
+ const openIns = countMatches(output, /<ins[\s>]/g)
329
+ const closeIns = countMatches(output, /<\/ins>/g)
330
+ expect(openIns, `${ctx} <ins> tag balance`).toBe(closeIns)
331
+
332
+ const openDel = countMatches(output, /<del[\s>]/g)
333
+ const closeDel = countMatches(output, /<\/del>/g)
334
+ expect(openDel, `${ctx} <del> tag balance`).toBe(closeDel)
335
+
336
+ // Per-row cell count ≤ max(old, new) row width.
337
+ const oldMaxCells = maxRowCellCount(oldHtml)
338
+ const newMaxCells = maxRowCellCount(newHtml)
339
+ const limit = Math.max(oldMaxCells, newMaxCells)
340
+
341
+ // Walk output rows
342
+ const rowMatches = output.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
343
+ for (const rowMatch of rowMatches) {
344
+ const cellsInRow = countMatches(rowMatch[1], /<t[dh][\s>]/g)
345
+ expect(cellsInRow, `${ctx} row has too many cells (${cellsInRow} > ${limit})`).toBeLessThanOrEqual(limit)
346
+ }
347
+
348
+ // Output is non-empty when inputs aren't equal.
349
+ if (oldHtml !== newHtml) {
350
+ expect(output.length, `${ctx} output is empty`).toBeGreaterThan(0)
351
+ }
352
+ }
353
+
354
+ function maxRowCellCount(html: string): number {
355
+ let max = 0
356
+ const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
357
+ for (const rowMatch of rowMatches) {
358
+ const count = countMatches(rowMatch[1], /<t[dh][\s>]/g)
359
+ if (count > max) max = count
360
+ }
361
+ return max
362
+ }
363
+
364
+ function countMatches(s: string, re: RegExp): number {
365
+ const matches = s.match(re)
366
+ return matches ? matches.length : 0
367
+ }
@@ -802,6 +802,86 @@ describe('HtmlDiff — tables', () => {
802
802
  )
803
803
  })
804
804
 
805
+ it('preserves data-* attributes on a <tr> in an inserted row alongside the injected class', () => {
806
+ // The frontend uses `data-behaviour` (and similar `data-*` attrs)
807
+ // on `<tr>` to drive table semantics. The diff must keep these
808
+ // attributes verbatim AND add `class='diffins'` for the structural
809
+ // change. (Whether downstream DOMPurify allows `data-*` is a
810
+ // separate concern — htmldiff's job is to not lose them.)
811
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
812
+ const newHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td></tr></table>'
813
+
814
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
815
+ '<table>' +
816
+ '<tr><td>A</td></tr>' +
817
+ '<tr data-behaviour="data" class=\'diffins\'>' +
818
+ "<td class='diffins'><ins class='diffins'>B</ins></td>" +
819
+ '</tr>' +
820
+ '</table>'
821
+ )
822
+ })
823
+
824
+ it('preserves data-* attributes on a <tr> in a deleted row alongside the injected class', () => {
825
+ const oldHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td></tr></table>'
826
+ const newHtml = '<table><tr><td>A</td></tr></table>'
827
+
828
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
829
+ '<table>' +
830
+ '<tr><td>A</td></tr>' +
831
+ '<tr data-behaviour="data" class=\'diffdel\'>' +
832
+ "<td class='diffdel'><del class='diffdel'>B</del></td>" +
833
+ '</tr>' +
834
+ '</table>'
835
+ )
836
+ })
837
+
838
+ it('preserves multiple data-* attributes on a <tr> in an inserted row', () => {
839
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
840
+ const newHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data" id="row2" data-x="y"><td>B</td></tr></table>'
841
+
842
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
843
+ '<table>' +
844
+ '<tr><td>A</td></tr>' +
845
+ '<tr data-behaviour="data" id="row2" data-x="y" class=\'diffins\'>' +
846
+ "<td class='diffins'><ins class='diffins'>B</ins></td>" +
847
+ '</tr>' +
848
+ '</table>'
849
+ )
850
+ })
851
+
852
+ it('preserves data-* attribute on a <tr> when the row passes through column-add path (preserved row)', () => {
853
+ // The row exists in both old and new but cell counts differ
854
+ // (column added) — goes through diffPreservedRow → multi-column
855
+ // detection path. Attribute should still be preserved verbatim
856
+ // via rowHeaderSlice.
857
+ const oldHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td><td>C</td></tr></table>'
858
+ const newHtml =
859
+ '<table><tr><td>A</td><td>X</td></tr><tr data-behaviour="data"><td>B</td><td>EXTRA</td><td>C</td></tr></table>'
860
+
861
+ const result = HtmlDiff.execute(oldHtml, newHtml)
862
+ expect(result).toContain('<tr data-behaviour="data">')
863
+ })
864
+
865
+ it('preserves data-* attribute on a <tr> when the row passes through fuzzy-matching with content edit', () => {
866
+ // The row gets fuzzy-matched after exact LCS misses the content
867
+ // edit. Goes through diffStructurallyAlignedTable → fuzzy pair →
868
+ // diffPreservedRow. Attribute should still flow through.
869
+ const oldHtml =
870
+ '<table>' +
871
+ '<tr><td>A</td></tr>' +
872
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over.</td></tr>' +
873
+ '</table>'
874
+ const newHtml =
875
+ '<table>' +
876
+ '<tr><td>A</td></tr>' +
877
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over the lazy dog.</td></tr>' +
878
+ '<tr><td>NEW</td></tr>' +
879
+ '</table>'
880
+
881
+ const result = HtmlDiff.execute(oldHtml, newHtml)
882
+ expect(result).toContain('<tr data-behaviour="data">')
883
+ })
884
+
805
885
  it('preserves <table> attributes verbatim from new (no diff marker on attribute changes)', () => {
806
886
  const oldHtml = '<table border="1"><tr><td>A</td></tr></table>'
807
887
  const newHtml = '<table border="2" style="width:100%"><tr><td>A</td></tr></table>'