@createiq/htmldiff 1.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,7 +51,7 @@ describe('HtmlDiff', () => {
51
51
  [
52
52
  '<table><tr><td>col1</td><td>col2</td></tr><tr><td>Data 1</td><td>Data 2</td></tr></table>',
53
53
  '<table><tr><td>col1</td><td>col2</td></tr></table>',
54
- "<table><tr><td>col1</td><td>col2</td></tr><tr><td><del class='diffdel'>Data 1</del></td><td><del class='diffdel'>Data 2</del></td></tr></table>",
54
+ "<table><tr><td>col1</td><td>col2</td></tr><tr class='diffdel'><td class='diffdel'><del class='diffdel'>Data 1</del></td><td class='diffdel'><del class='diffdel'>Data 2</del></td></tr></table>",
55
55
  ],
56
56
  [
57
57
  'text',
@@ -0,0 +1,372 @@
1
+ import { describe, expect, it } from 'vitest'
2
+
3
+ import HtmlDiff from '../src/HtmlDiff'
4
+
5
+ /**
6
+ * Exhaustive matrix of common table operations and their pairwise
7
+ * combinations. Each case runs HtmlDiff.execute and asserts structural
8
+ * invariants on the output:
9
+ *
10
+ * • Every `<tr>` opens and closes
11
+ * • No row has more cells than max(old-row, new-row) cell count
12
+ * (accounting for colspan)
13
+ * • All `<ins>`/`<del>` tags balance
14
+ * • Class markers are coherent (a `<tr class='diffins'>` row's cells
15
+ * all have ins-marked content or are empty; a deleted row's cells
16
+ * all have del-marked content or are empty)
17
+ * • The cell content of every `<td>` from new appears somewhere in
18
+ * the output (we don't silently drop cells)
19
+ *
20
+ * The matrix is built combinatorially — single operations × single
21
+ * operations — so a regression in any pairwise combination surfaces
22
+ * here even if no test was added for that exact pair.
23
+ */
24
+ describe('HtmlDiff — table operations matrix', () => {
25
+ describe('single operations on a 3×3 base', () => {
26
+ for (const op of allSingleOperations()) {
27
+ it(`${op.name} produces structurally valid output`, () => {
28
+ const base = baseTable3x3()
29
+ const newHtml = op.apply(base)
30
+ const oldHtml = renderTable(base)
31
+ const result = HtmlDiff.execute(oldHtml, newHtml)
32
+ assertStructurallyValid(result, oldHtml, newHtml, op.name)
33
+ })
34
+ }
35
+ })
36
+
37
+ describe('pairwise combinations on a 3×3 base', () => {
38
+ const ops = allSingleOperations()
39
+ for (const opA of ops) {
40
+ for (const opB of ops) {
41
+ if (opA === opB) continue
42
+ // Some combinations don't compose cleanly (e.g. "delete-row-end"
43
+ // + "delete-row-end" applied twice). Skip pairs that mutate
44
+ // overlapping ranges.
45
+ if (!canCompose(opA, opB)) continue
46
+ it(`${opA.name} + ${opB.name} produces structurally valid output`, () => {
47
+ const base = baseTable3x3()
48
+ const intermediate = parseTable(opA.apply(base))
49
+ const newHtml = opB.apply(intermediate)
50
+ const oldHtml = renderTable(base)
51
+ const result = HtmlDiff.execute(oldHtml, newHtml)
52
+ assertStructurallyValid(result, oldHtml, newHtml, `${opA.name} + ${opB.name}`)
53
+ })
54
+ }
55
+ }
56
+ })
57
+
58
+ describe('user-reported regression scenarios', () => {
59
+ it('multiple deleted rows including the last appear in old-order, AFTER preserved header (regression for "deleted rows out of order")', () => {
60
+ // The user's exact scenario: column added + multiple rows deleted
61
+ // including the last row, with content edits that prevent fuzzy
62
+ // pairing of some rows. Before the fix, all deletes ended up at
63
+ // the START of the output (above the header) because they were
64
+ // emitted at cursor=row-0-start, regardless of where they came
65
+ // from in old.
66
+ const oldHtml =
67
+ '<table>' +
68
+ '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
69
+ '<tr><td>Party A</td><td>IRS Forms W-8BEN-E</td><td>(i) Upon execution.</td></tr>' +
70
+ '<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
71
+ '<tr><td>Party A and Party B</td><td>Any document required.</td><td>Promptly upon reasonable demand.</td></tr>' +
72
+ '</table>'
73
+ const newHtml =
74
+ '<table>' +
75
+ '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
76
+ "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E. Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
77
+ '<tr><td>Party B</td><td>A</td><td>IRS Form W</td><td>Yeah OK</td></tr>' +
78
+ '</table>'
79
+
80
+ const result = HtmlDiff.execute(oldHtml, newHtml)
81
+ assertStructurallyValid(result, oldHtml, newHtml, 'multiple deleted rows out of order')
82
+ // The header (preserved with column added) MUST come first.
83
+ const headerIdx = result.indexOf('<tr><th>Party</th>')
84
+ const firstDelIdx = result.indexOf("<tr class='diffdel'>")
85
+ expect(headerIdx).toBeGreaterThanOrEqual(0)
86
+ expect(firstDelIdx).toBeGreaterThan(headerIdx)
87
+ })
88
+
89
+ it('column added + empty row inserted in middle (regression for issue with blank row)', () => {
90
+ const oldHtml =
91
+ '<table>' +
92
+ '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
93
+ '<tr><td>Party A</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto).</td><td>(i) Upon execution.</td></tr>' +
94
+ '<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
95
+ '</table>'
96
+ const newHtml =
97
+ '<table>' +
98
+ '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
99
+ "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto). Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
100
+ '<tr><td></td><td></td><td></td><td></td></tr>' +
101
+ '<tr><td>Party B</td><td>A</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
102
+ '</table>'
103
+
104
+ const result = HtmlDiff.execute(oldHtml, newHtml)
105
+ assertStructurallyValid(result, oldHtml, newHtml, 'column-add + empty row insert')
106
+ // Specific assertions on this case so we can see exactly what went
107
+ // wrong if the structural-invariant check passes but the output
108
+ // still drifts.
109
+ const rowCount = countMatches(result, /<tr[\s>]/g)
110
+ expect(rowCount).toBe(4) // header + Party A + empty + Party B
111
+ // The inserted empty row must be emitted with diffins on the <tr>
112
+ // and 4 empty diffins-marked cells. Asserted via regex (quote-
113
+ // agnostic, whitespace-tolerant) so an incidental change in
114
+ // attribute-quote style isn't flagged as a regression.
115
+ const emptyInsertedRow = result.match(/<tr class=['"]diffins['"]>(.*?)<\/tr>/)
116
+ expect(emptyInsertedRow).not.toBeNull()
117
+ const emptyCellCount = countMatches(emptyInsertedRow?.[1] ?? '', /<td class=['"]diffins['"]><\/td>/g)
118
+ expect(emptyCellCount).toBe(4)
119
+ })
120
+ })
121
+ })
122
+
123
+ // ──────────────────────── operations ────────────────────────
124
+
125
+ interface Op {
126
+ name: string
127
+ apply: (table: TableData) => string
128
+ }
129
+
130
+ function allSingleOperations(): Op[] {
131
+ return [
132
+ { name: 'no-op', apply: t => renderTable(t) },
133
+ { name: 'edit-cell', apply: t => renderTable(mutate(t, m => m.editCell(1, 1, 'EDITED'))) },
134
+ { name: 'add-row-start', apply: t => renderTable(mutate(t, m => m.addRowAt(0, ['NEW1', 'NEW2', 'NEW3']))) },
135
+ { name: 'add-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['NEW1', 'NEW2', 'NEW3']))) },
136
+ {
137
+ name: 'add-row-end',
138
+ apply: t => renderTable(mutate(t, m => m.addRowAt(t.rows.length, ['NEW1', 'NEW2', 'NEW3']))),
139
+ },
140
+ { name: 'add-empty-row-middle', apply: t => renderTable(mutate(t, m => m.addRowAt(2, ['', '', '']))) },
141
+ {
142
+ name: 'add-multiple-rows',
143
+ apply: t =>
144
+ renderTable(
145
+ mutate(t, m => {
146
+ m.addRowAt(t.rows.length, ['X1', 'X2', 'X3'])
147
+ m.addRowAt(t.rows.length + 1, ['Y1', 'Y2', 'Y3'])
148
+ })
149
+ ),
150
+ },
151
+ { name: 'delete-row-start', apply: t => renderTable(mutate(t, m => m.deleteRow(1))) }, // skip header
152
+ { name: 'delete-row-middle', apply: t => renderTable(mutate(t, m => m.deleteRow(2))) },
153
+ { name: 'delete-row-end', apply: t => renderTable(mutate(t, m => m.deleteRow(t.rows.length - 1))) },
154
+ {
155
+ name: 'delete-multiple-rows',
156
+ apply: t =>
157
+ renderTable(
158
+ mutate(t, m => {
159
+ m.deleteRow(t.rows.length - 1)
160
+ m.deleteRow(1)
161
+ })
162
+ ),
163
+ },
164
+ {
165
+ name: 'delete-multiple-rows-from-end',
166
+ apply: t =>
167
+ renderTable(
168
+ mutate(t, m => {
169
+ m.deleteRow(t.rows.length - 1)
170
+ m.deleteRow(t.rows.length - 1)
171
+ })
172
+ ),
173
+ },
174
+ {
175
+ name: 'add-column-start',
176
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(0, ['NewHeader', 'newA', 'newB', 'newC']))),
177
+ },
178
+ {
179
+ name: 'add-column-middle',
180
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(1, ['NewHeader', 'newA', 'newB', 'newC']))),
181
+ },
182
+ {
183
+ name: 'add-column-end',
184
+ apply: t => renderTable(mutate(t, m => m.addColumnAt(t.rows[0].length, ['NewHeader', 'newA', 'newB', 'newC']))),
185
+ },
186
+ {
187
+ name: 'add-multiple-columns',
188
+ apply: t =>
189
+ renderTable(
190
+ mutate(t, m => {
191
+ m.addColumnAt(t.rows[0].length, ['H1', 'a1', 'b1', 'c1'])
192
+ m.addColumnAt(t.rows[0].length + 1, ['H2', 'a2', 'b2', 'c2'])
193
+ })
194
+ ),
195
+ },
196
+ { name: 'delete-column-start', apply: t => renderTable(mutate(t, m => m.deleteColumn(0))) },
197
+ { name: 'delete-column-middle', apply: t => renderTable(mutate(t, m => m.deleteColumn(1))) },
198
+ { name: 'delete-column-end', apply: t => renderTable(mutate(t, m => m.deleteColumn(t.rows[0].length - 1))) },
199
+ { name: 'shift-content-right', apply: t => renderTable(mutate(t, m => m.shiftContentRight(1))) },
200
+ ]
201
+ }
202
+
203
+ /**
204
+ * Some operation pairs don't compose cleanly because the second
205
+ * operation's row/column index assumes the original table dimensions.
206
+ * We skip pairs where the second op's index would be out of bounds
207
+ * after the first op's mutation.
208
+ */
209
+ function canCompose(_a: Op, _b: Op): boolean {
210
+ // For now, allow all combinations and let the operation skip
211
+ // gracefully when bounds are invalid. The mutate helpers clamp.
212
+ return true
213
+ }
214
+
215
+ // ──────────────────────── table model ────────────────────────
216
+
217
+ interface TableData {
218
+ rows: string[][]
219
+ }
220
+
221
+ function baseTable3x3(): TableData {
222
+ return {
223
+ rows: [
224
+ ['Header1', 'Header2', 'Header3'],
225
+ ['A1', 'A2', 'A3'],
226
+ ['B1', 'B2', 'B3'],
227
+ ['C1', 'C2', 'C3'],
228
+ ],
229
+ }
230
+ }
231
+
232
+ function renderTable(t: TableData): string {
233
+ const out: string[] = ['<table>']
234
+ for (let r = 0; r < t.rows.length; r++) {
235
+ out.push('<tr>')
236
+ const tag = r === 0 ? 'th' : 'td'
237
+ for (const cell of t.rows[r]) {
238
+ out.push(`<${tag}>${cell}</${tag}>`)
239
+ }
240
+ out.push('</tr>')
241
+ }
242
+ out.push('</table>')
243
+ return out.join('')
244
+ }
245
+
246
+ function parseTable(html: string): TableData {
247
+ // Tiny parser sufficient for our generated tables. NOT a general
248
+ // HTML parser; only used inside this matrix.
249
+ const rows: string[][] = []
250
+ const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
251
+ for (const rowMatch of rowMatches) {
252
+ const cells: string[] = []
253
+ const cellMatches = rowMatch[1].matchAll(/<t[dh][^>]*>(.*?)<\/t[dh]>/gs)
254
+ for (const cellMatch of cellMatches) cells.push(cellMatch[1])
255
+ rows.push(cells)
256
+ }
257
+ return { rows }
258
+ }
259
+
260
+ interface Mutator {
261
+ editCell(row: number, col: number, content: string): void
262
+ addRowAt(at: number, content: string[]): void
263
+ deleteRow(at: number): void
264
+ addColumnAt(at: number, columnContent: string[]): void
265
+ deleteColumn(at: number): void
266
+ shiftContentRight(rowIdx: number): void
267
+ }
268
+
269
+ function mutate(t: TableData, fn: (m: Mutator) => void): TableData {
270
+ const cloned: TableData = { rows: t.rows.map(row => [...row]) }
271
+ const m: Mutator = {
272
+ editCell(row, col, content) {
273
+ if (cloned.rows[row]?.[col] !== undefined) cloned.rows[row][col] = content
274
+ },
275
+ addRowAt(at, content) {
276
+ const idx = Math.max(0, Math.min(at, cloned.rows.length))
277
+ cloned.rows.splice(idx, 0, content)
278
+ },
279
+ deleteRow(at) {
280
+ if (at >= 0 && at < cloned.rows.length) cloned.rows.splice(at, 1)
281
+ },
282
+ addColumnAt(at, columnContent) {
283
+ for (let r = 0; r < cloned.rows.length; r++) {
284
+ const idx = Math.max(0, Math.min(at, cloned.rows[r].length))
285
+ cloned.rows[r].splice(idx, 0, columnContent[r] ?? '')
286
+ }
287
+ },
288
+ deleteColumn(at) {
289
+ for (const row of cloned.rows) {
290
+ if (at >= 0 && at < row.length) row.splice(at, 1)
291
+ }
292
+ },
293
+ shiftContentRight(rowIdx) {
294
+ const row = cloned.rows[rowIdx]
295
+ if (!row) return
296
+ // Shift each cell's content one position to the right; first
297
+ // cell becomes empty, last cell's content drops off.
298
+ for (let c = row.length - 1; c > 0; c--) row[c] = row[c - 1]
299
+ row[0] = ''
300
+ },
301
+ }
302
+ fn(m)
303
+ return cloned
304
+ }
305
+
306
+ // ──────────────────────── invariant checks ────────────────────────
307
+
308
+ /**
309
+ * Asserts the diff output is structurally valid:
310
+ * • All `<tr>`/`<td>`/`<th>` open/close tags balance
311
+ * • All `<ins>`/`<del>` tags balance
312
+ * • Every row in the output has cell count ≤ max(old-row-cell-count,
313
+ * new-row-cell-count) — no phantom cells
314
+ * • Output isn't empty when inputs aren't equal
315
+ */
316
+ function assertStructurallyValid(output: string, oldHtml: string, newHtml: string, label: string) {
317
+ const ctx = `[${label}]`
318
+
319
+ // Tag balance
320
+ const openTr = countMatches(output, /<tr[\s>]/g)
321
+ const closeTr = countMatches(output, /<\/tr>/g)
322
+ expect(openTr, `${ctx} <tr> tag balance`).toBe(closeTr)
323
+
324
+ const openTd = countMatches(output, /<td[\s>]/g)
325
+ const closeTd = countMatches(output, /<\/td>/g)
326
+ expect(openTd, `${ctx} <td> tag balance`).toBe(closeTd)
327
+
328
+ const openTh = countMatches(output, /<th[\s>]/g)
329
+ const closeTh = countMatches(output, /<\/th>/g)
330
+ expect(openTh, `${ctx} <th> tag balance`).toBe(closeTh)
331
+
332
+ // ins/del balance — each opening tag has a matching closing tag.
333
+ const openIns = countMatches(output, /<ins[\s>]/g)
334
+ const closeIns = countMatches(output, /<\/ins>/g)
335
+ expect(openIns, `${ctx} <ins> tag balance`).toBe(closeIns)
336
+
337
+ const openDel = countMatches(output, /<del[\s>]/g)
338
+ const closeDel = countMatches(output, /<\/del>/g)
339
+ expect(openDel, `${ctx} <del> tag balance`).toBe(closeDel)
340
+
341
+ // Per-row cell count ≤ max(old, new) row width.
342
+ const oldMaxCells = maxRowCellCount(oldHtml)
343
+ const newMaxCells = maxRowCellCount(newHtml)
344
+ const limit = Math.max(oldMaxCells, newMaxCells)
345
+
346
+ // Walk output rows
347
+ const rowMatches = output.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
348
+ for (const rowMatch of rowMatches) {
349
+ const cellsInRow = countMatches(rowMatch[1], /<t[dh][\s>]/g)
350
+ expect(cellsInRow, `${ctx} row has too many cells (${cellsInRow} > ${limit})`).toBeLessThanOrEqual(limit)
351
+ }
352
+
353
+ // Output is non-empty when inputs aren't equal.
354
+ if (oldHtml !== newHtml) {
355
+ expect(output.length, `${ctx} output is empty`).toBeGreaterThan(0)
356
+ }
357
+ }
358
+
359
+ function maxRowCellCount(html: string): number {
360
+ let max = 0
361
+ const rowMatches = html.matchAll(/<tr[^>]*>(.*?)<\/tr>/gs)
362
+ for (const rowMatch of rowMatches) {
363
+ const count = countMatches(rowMatch[1], /<t[dh][\s>]/g)
364
+ if (count > max) max = count
365
+ }
366
+ return max
367
+ }
368
+
369
+ function countMatches(s: string, re: RegExp): number {
370
+ const matches = s.match(re)
371
+ return matches ? matches.length : 0
372
+ }