@createiq/htmldiff 1.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2069 @@
1
+ import { describe, expect, it } from 'vitest'
2
+
3
+ import HtmlDiff from '../src/HtmlDiff'
4
+
5
+ // Tests defining the expected blackline behaviour for HTML tables.
6
+ // The reference behaviour is Microsoft Word's track-changes representation
7
+ // of the same edit. Where Word's output is unclear, the test is marked as
8
+ // `it.todo` until we can capture it from a real document.
9
+ describe('HtmlDiff — tables', () => {
10
+ describe('cell content edits', () => {
11
+ it('marks an inline text change inside a single cell', () => {
12
+ const oldHtml = '<table><tr><td>Foo</td><td>Bar</td></tr></table>'
13
+ const newHtml = '<table><tr><td>Foo</td><td>Baz</td></tr></table>'
14
+
15
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
16
+ "<table><tr><td>Foo</td><td><del class='diffmod'>Bar</del><ins class='diffmod'>Baz</ins></td></tr></table>"
17
+ )
18
+ })
19
+
20
+ it('marks inline text changes across multiple cells in the same row', () => {
21
+ const oldHtml = '<table><tr><td>Apple</td><td>Banana</td><td>Cherry</td></tr></table>'
22
+ const newHtml = '<table><tr><td>Apricot</td><td>Banana</td><td>Coconut</td></tr></table>'
23
+
24
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
25
+ '<table><tr>' +
26
+ "<td><del class='diffmod'>Apple</del><ins class='diffmod'>Apricot</ins></td>" +
27
+ '<td>Banana</td>' +
28
+ "<td><del class='diffmod'>Cherry</del><ins class='diffmod'>Coconut</ins></td>" +
29
+ '</tr></table>'
30
+ )
31
+ })
32
+
33
+ it('marks inline text changes across multiple cells on different rows', () => {
34
+ const oldHtml = '<table><tr><td>Apple</td><td>Banana</td></tr><tr><td>Cherry</td><td>Date</td></tr></table>'
35
+ const newHtml = '<table><tr><td>Apricot</td><td>Banana</td></tr><tr><td>Cherry</td><td>Durian</td></tr></table>'
36
+
37
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
38
+ '<table>' +
39
+ "<tr><td><del class='diffmod'>Apple</del><ins class='diffmod'>Apricot</ins></td><td>Banana</td></tr>" +
40
+ "<tr><td>Cherry</td><td><del class='diffmod'>Date</del><ins class='diffmod'>Durian</ins></td></tr>" +
41
+ '</table>'
42
+ )
43
+ })
44
+
45
+ it('appends inserted text inside a cell when content is added', () => {
46
+ const oldHtml = '<table><tr><td>Foo</td><td>Bar</td></tr></table>'
47
+ const newHtml = '<table><tr><td>Foo</td><td>Bar baz</td></tr></table>'
48
+
49
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
50
+ "<table><tr><td>Foo</td><td>Bar<ins class='diffins'>&nbsp;baz</ins></td></tr></table>"
51
+ )
52
+ })
53
+
54
+ it('strikes through removed text inside a cell when content is deleted', () => {
55
+ const oldHtml = '<table><tr><td>Foo</td><td>Bar baz</td></tr></table>'
56
+ const newHtml = '<table><tr><td>Foo</td><td>Bar</td></tr></table>'
57
+
58
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
59
+ "<table><tr><td>Foo</td><td>Bar<del class='diffdel'>&nbsp;baz</del></td></tr></table>"
60
+ )
61
+ })
62
+
63
+ it('marks formatting changes inside a cell', () => {
64
+ const oldHtml = '<table><tr><td>plain text</td></tr></table>'
65
+ const newHtml = '<table><tr><td><strong>plain text</strong></td></tr></table>'
66
+
67
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
68
+ "<table><tr><td><strong><ins class='mod strong'>plain text</ins></strong></td></tr></table>"
69
+ )
70
+ })
71
+ })
72
+
73
+ describe('content moved between cells', () => {
74
+ // Word does not track cross-cell moves; it shows each cell as an
75
+ // independent edit. A cell that loses its old content and gains new
76
+ // content renders as <del>old</del><ins>new</ins>, and a cell that just
77
+ // loses content renders as <del>old</del>. There is no "moved" marker.
78
+ // Cell-aware preprocessing in TableDiff is what aligns cells positionally
79
+ // when row × cell dimensions match, so these per-cell edits emit cleanly.
80
+ it('marks each cell with independent del/ins when content shifts right by one cell', () => {
81
+ const oldHtml = '<table><tr><td>A</td><td>B</td><td>C</td></tr></table>'
82
+ const newHtml = '<table><tr><td></td><td>A</td><td>B</td></tr></table>'
83
+
84
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
85
+ '<table><tr>' +
86
+ "<td><del class='diffdel'>A</del></td>" +
87
+ "<td><del class='diffmod'>B</del><ins class='diffmod'>A</ins></td>" +
88
+ "<td><del class='diffmod'>C</del><ins class='diffmod'>B</ins></td>" +
89
+ '</tr></table>'
90
+ )
91
+ })
92
+
93
+ it('marks each cell with independent del/ins when content shifts left by one cell', () => {
94
+ const oldHtml = '<table><tr><td>A</td><td>B</td><td>C</td></tr></table>'
95
+ const newHtml = '<table><tr><td>B</td><td>C</td><td></td></tr></table>'
96
+
97
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
98
+ '<table><tr>' +
99
+ "<td><del class='diffmod'>A</del><ins class='diffmod'>B</ins></td>" +
100
+ "<td><del class='diffmod'>B</del><ins class='diffmod'>C</ins></td>" +
101
+ "<td><del class='diffdel'>C</del></td>" +
102
+ '</tr></table>'
103
+ )
104
+ })
105
+
106
+ it('marks each cell with independent del/ins when content is swapped between two cells', () => {
107
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
108
+ const newHtml = '<table><tr><td>B</td><td>A</td></tr></table>'
109
+
110
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
111
+ '<table><tr>' +
112
+ "<td><del class='diffmod'>A</del><ins class='diffmod'>B</ins></td>" +
113
+ "<td><del class='diffmod'>B</del><ins class='diffmod'>A</ins></td>" +
114
+ '</tr></table>'
115
+ )
116
+ })
117
+
118
+ it('marks each cell with independent del/ins when content is moved across rows', () => {
119
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
120
+ const newHtml = '<table><tr><td>C</td><td>D</td></tr><tr><td>A</td><td>B</td></tr></table>'
121
+
122
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
123
+ '<table>' +
124
+ '<tr>' +
125
+ "<td><del class='diffmod'>A</del><ins class='diffmod'>C</ins></td>" +
126
+ "<td><del class='diffmod'>B</del><ins class='diffmod'>D</ins></td>" +
127
+ '</tr>' +
128
+ '<tr>' +
129
+ "<td><del class='diffmod'>C</del><ins class='diffmod'>A</ins></td>" +
130
+ "<td><del class='diffmod'>D</del><ins class='diffmod'>B</ins></td>" +
131
+ '</tr>' +
132
+ '</table>'
133
+ )
134
+ })
135
+ })
136
+
137
+ // Inserted rows get `class='diffins'` on the <tr> AND on every <td> in
138
+ // the row, with each cell's content wrapped in <ins>. The triple class
139
+ // (tr/td/ins) lets stylesheets pick whichever level they want — Word's
140
+ // tinted-row + underlined-text pair maps naturally onto this.
141
+ describe('add rows', () => {
142
+ it('marks a row added at the end of a table', () => {
143
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
144
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
145
+
146
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
147
+ '<table>' +
148
+ '<tr><td>A</td><td>B</td></tr>' +
149
+ "<tr class='diffins'>" +
150
+ "<td class='diffins'><ins class='diffins'>C</ins></td>" +
151
+ "<td class='diffins'><ins class='diffins'>D</ins></td>" +
152
+ '</tr>' +
153
+ '</table>'
154
+ )
155
+ })
156
+
157
+ it('marks a row added at the start of a table', () => {
158
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
159
+ const newHtml = '<table><tr><td>X</td><td>Y</td></tr><tr><td>A</td><td>B</td></tr></table>'
160
+
161
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
162
+ '<table>' +
163
+ "<tr class='diffins'>" +
164
+ "<td class='diffins'><ins class='diffins'>X</ins></td>" +
165
+ "<td class='diffins'><ins class='diffins'>Y</ins></td>" +
166
+ '</tr>' +
167
+ '<tr><td>A</td><td>B</td></tr>' +
168
+ '</table>'
169
+ )
170
+ })
171
+
172
+ it('marks a row added in the middle of a table', () => {
173
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
174
+ const newHtml =
175
+ '<table><tr><td>A</td><td>B</td></tr><tr><td>X</td><td>Y</td></tr><tr><td>C</td><td>D</td></tr></table>'
176
+
177
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
178
+ '<table>' +
179
+ '<tr><td>A</td><td>B</td></tr>' +
180
+ "<tr class='diffins'>" +
181
+ "<td class='diffins'><ins class='diffins'>X</ins></td>" +
182
+ "<td class='diffins'><ins class='diffins'>Y</ins></td>" +
183
+ '</tr>' +
184
+ '<tr><td>C</td><td>D</td></tr>' +
185
+ '</table>'
186
+ )
187
+ })
188
+
189
+ it('marks multiple consecutive rows added at the end', () => {
190
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
191
+ const newHtml =
192
+ '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr><tr><td>E</td><td>F</td></tr></table>'
193
+
194
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
195
+ '<table>' +
196
+ '<tr><td>A</td><td>B</td></tr>' +
197
+ "<tr class='diffins'>" +
198
+ "<td class='diffins'><ins class='diffins'>C</ins></td>" +
199
+ "<td class='diffins'><ins class='diffins'>D</ins></td>" +
200
+ '</tr>' +
201
+ "<tr class='diffins'>" +
202
+ "<td class='diffins'><ins class='diffins'>E</ins></td>" +
203
+ "<td class='diffins'><ins class='diffins'>F</ins></td>" +
204
+ '</tr>' +
205
+ '</table>'
206
+ )
207
+ })
208
+
209
+ it('marks non-consecutive rows added in different parts of the table', () => {
210
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td>C</td></tr></table>'
211
+ const newHtml = '<table><tr><td>X</td></tr><tr><td>A</td></tr><tr><td>C</td></tr><tr><td>Y</td></tr></table>'
212
+
213
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
214
+ '<table>' +
215
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>X</ins></td></tr>" +
216
+ '<tr><td>A</td></tr>' +
217
+ '<tr><td>C</td></tr>' +
218
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>Y</ins></td></tr>" +
219
+ '</table>'
220
+ )
221
+ })
222
+
223
+ it('marks a row containing formatted content as added', () => {
224
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
225
+ const newHtml = '<table><tr><td>A</td></tr><tr><td><strong>B</strong></td></tr></table>'
226
+
227
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
228
+ '<table>' +
229
+ '<tr><td>A</td></tr>' +
230
+ "<tr class='diffins'><td class='diffins'><strong><ins class='diffins'>B</ins></strong></td></tr>" +
231
+ '</table>'
232
+ )
233
+ })
234
+
235
+ // Fuzzy row matching: a row with a minor content edit alongside an
236
+ // unrelated row addition should be matched as an edit (cell-level diff)
237
+ // rather than treated as a whole-row delete + reinsert. The
238
+ // character-level prefix+suffix similarity above ROW_FUZZY_THRESHOLD
239
+ // is what makes this happen.
240
+ it('matches a row with a minor edit alongside a newly added row', () => {
241
+ const oldHtml = '<table><tr><td>Same row</td></tr><tr><td>Edited row</td></tr></table>'
242
+ const newHtml = '<table><tr><td>Same row</td></tr><tr><td>Edited rowX</td></tr><tr><td>New row</td></tr></table>'
243
+
244
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
245
+ '<table>' +
246
+ '<tr><td>Same row</td></tr>' +
247
+ "<tr><td>Edited <del class='diffmod'>row</del><ins class='diffmod'>rowX</ins></td></tr>" +
248
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>New row</ins></td></tr>" +
249
+ '</table>'
250
+ )
251
+ })
252
+
253
+ it('still treats wholly-different rows as deleted+inserted when fuzzy similarity is too low', () => {
254
+ // Different row counts, so we go through the row-LCS path. The
255
+ // unmatched old row "Hello world" and new row "Goodbye there" share
256
+ // no prefix/suffix, so fuzzy matching declines to pair them and
257
+ // they remain as a separate delete + insert (with the unrelated
258
+ // "New row" also inserted).
259
+ const oldHtml = '<table><tr><td>Same</td></tr><tr><td>Hello world</td></tr></table>'
260
+ const newHtml = '<table><tr><td>Same</td></tr><tr><td>Goodbye there</td></tr><tr><td>New row</td></tr></table>'
261
+
262
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
263
+ '<table>' +
264
+ '<tr><td>Same</td></tr>' +
265
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>Hello world</del></td></tr>" +
266
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>Goodbye there</ins></td></tr>" +
267
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>New row</ins></td></tr>" +
268
+ '</table>'
269
+ )
270
+ })
271
+ })
272
+
273
+ // Inserted columns get `class='diffins'` on each new <td> (since the
274
+ // <tr> still contains preserved cells, the row itself isn't tagged).
275
+ describe('add columns', () => {
276
+ it('marks a column added at the end of a table', () => {
277
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td>C</td></tr></table>'
278
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
279
+
280
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
281
+ '<table>' +
282
+ "<tr><td>A</td><td class='diffins'><ins class='diffins'>B</ins></td></tr>" +
283
+ "<tr><td>C</td><td class='diffins'><ins class='diffins'>D</ins></td></tr>" +
284
+ '</table>'
285
+ )
286
+ })
287
+
288
+ it('marks a column added at the start of a table', () => {
289
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td>C</td></tr></table>'
290
+ const newHtml = '<table><tr><td>X</td><td>A</td></tr><tr><td>Y</td><td>C</td></tr></table>'
291
+
292
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
293
+ '<table>' +
294
+ "<tr><td class='diffins'><ins class='diffins'>X</ins></td><td>A</td></tr>" +
295
+ "<tr><td class='diffins'><ins class='diffins'>Y</ins></td><td>C</td></tr>" +
296
+ '</table>'
297
+ )
298
+ })
299
+
300
+ it('marks a column added in the middle of a table', () => {
301
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
302
+ const newHtml = '<table><tr><td>A</td><td>X</td><td>B</td></tr><tr><td>C</td><td>Y</td><td>D</td></tr></table>'
303
+
304
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
305
+ '<table>' +
306
+ "<tr><td>A</td><td class='diffins'><ins class='diffins'>X</ins></td><td>B</td></tr>" +
307
+ "<tr><td>C</td><td class='diffins'><ins class='diffins'>Y</ins></td><td>D</td></tr>" +
308
+ '</table>'
309
+ )
310
+ })
311
+
312
+ it('marks multiple consecutive columns added at the end', () => {
313
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td>C</td></tr></table>'
314
+ const newHtml =
315
+ '<table><tr><td>A</td><td>B1</td><td>B2</td></tr><tr><td>C</td><td>D1</td><td>D2</td></tr></table>'
316
+
317
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
318
+ '<table>' +
319
+ '<tr>' +
320
+ '<td>A</td>' +
321
+ "<td class='diffins'><ins class='diffins'>B1</ins></td>" +
322
+ "<td class='diffins'><ins class='diffins'>B2</ins></td>" +
323
+ '</tr>' +
324
+ '<tr>' +
325
+ '<td>C</td>' +
326
+ "<td class='diffins'><ins class='diffins'>D1</ins></td>" +
327
+ "<td class='diffins'><ins class='diffins'>D2</ins></td>" +
328
+ '</tr>' +
329
+ '</table>'
330
+ )
331
+ })
332
+ })
333
+
334
+ // Deleted rows mirror inserted rows: `class='diffdel'` on <tr> and on
335
+ // every <td>, with each cell content wrapped in <del>.
336
+ describe('delete rows', () => {
337
+ it('marks a row deleted from the end of a table', () => {
338
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
339
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
340
+
341
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
342
+ '<table>' +
343
+ '<tr><td>A</td><td>B</td></tr>' +
344
+ "<tr class='diffdel'>" +
345
+ "<td class='diffdel'><del class='diffdel'>C</del></td>" +
346
+ "<td class='diffdel'><del class='diffdel'>D</del></td>" +
347
+ '</tr>' +
348
+ '</table>'
349
+ )
350
+ })
351
+
352
+ it('marks a row deleted from the start of a table', () => {
353
+ const oldHtml = '<table><tr><td>X</td><td>Y</td></tr><tr><td>A</td><td>B</td></tr></table>'
354
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
355
+
356
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
357
+ '<table>' +
358
+ "<tr class='diffdel'>" +
359
+ "<td class='diffdel'><del class='diffdel'>X</del></td>" +
360
+ "<td class='diffdel'><del class='diffdel'>Y</del></td>" +
361
+ '</tr>' +
362
+ '<tr><td>A</td><td>B</td></tr>' +
363
+ '</table>'
364
+ )
365
+ })
366
+
367
+ it('marks a row deleted from the middle of a table', () => {
368
+ const oldHtml =
369
+ '<table><tr><td>A</td><td>B</td></tr><tr><td>X</td><td>Y</td></tr><tr><td>C</td><td>D</td></tr></table>'
370
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
371
+
372
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
373
+ '<table>' +
374
+ '<tr><td>A</td><td>B</td></tr>' +
375
+ "<tr class='diffdel'>" +
376
+ "<td class='diffdel'><del class='diffdel'>X</del></td>" +
377
+ "<td class='diffdel'><del class='diffdel'>Y</del></td>" +
378
+ '</tr>' +
379
+ '<tr><td>C</td><td>D</td></tr>' +
380
+ '</table>'
381
+ )
382
+ })
383
+
384
+ it('marks multiple consecutive rows deleted', () => {
385
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td>X</td></tr><tr><td>Y</td></tr><tr><td>B</td></tr></table>'
386
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>B</td></tr></table>'
387
+
388
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
389
+ '<table>' +
390
+ '<tr><td>A</td></tr>' +
391
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>X</del></td></tr>" +
392
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>Y</del></td></tr>" +
393
+ '<tr><td>B</td></tr>' +
394
+ '</table>'
395
+ )
396
+ })
397
+
398
+ it('marks every row deleted when the table is emptied of body rows', () => {
399
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td>B</td></tr></table>'
400
+ const newHtml = '<table></table>'
401
+
402
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
403
+ '<table>' +
404
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>A</del></td></tr>" +
405
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>B</del></td></tr>" +
406
+ '</table>'
407
+ )
408
+ })
409
+ })
410
+
411
+ // Word does not track column deletion (it warns "this action won't be
412
+ // marked as a change"), so these expectations are our own design: each
413
+ // deleted <td> gets `class='diffdel'` and its content is wrapped in
414
+ // <del>, mirroring the per-cell behaviour of delete-row.
415
+ describe('delete columns', () => {
416
+ it('marks a column deleted from the end of a table', () => {
417
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
418
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>C</td></tr></table>'
419
+
420
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
421
+ '<table>' +
422
+ "<tr><td>A</td><td class='diffdel'><del class='diffdel'>B</del></td></tr>" +
423
+ "<tr><td>C</td><td class='diffdel'><del class='diffdel'>D</del></td></tr>" +
424
+ '</table>'
425
+ )
426
+ })
427
+
428
+ it('marks a column deleted from the start of a table', () => {
429
+ const oldHtml = '<table><tr><td>X</td><td>A</td></tr><tr><td>Y</td><td>C</td></tr></table>'
430
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>C</td></tr></table>'
431
+
432
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
433
+ '<table>' +
434
+ "<tr><td class='diffdel'><del class='diffdel'>X</del></td><td>A</td></tr>" +
435
+ "<tr><td class='diffdel'><del class='diffdel'>Y</del></td><td>C</td></tr>" +
436
+ '</table>'
437
+ )
438
+ })
439
+
440
+ it('marks a column deleted from the middle of a table', () => {
441
+ const oldHtml = '<table><tr><td>A</td><td>X</td><td>B</td></tr><tr><td>C</td><td>Y</td><td>D</td></tr></table>'
442
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
443
+
444
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
445
+ '<table>' +
446
+ "<tr><td>A</td><td class='diffdel'><del class='diffdel'>X</del></td><td>B</td></tr>" +
447
+ "<tr><td>C</td><td class='diffdel'><del class='diffdel'>Y</del></td><td>D</td></tr>" +
448
+ '</table>'
449
+ )
450
+ })
451
+
452
+ it('marks multiple consecutive columns deleted from the end', () => {
453
+ const oldHtml =
454
+ '<table><tr><td>A</td><td>B1</td><td>B2</td></tr><tr><td>C</td><td>D1</td><td>D2</td></tr></table>'
455
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>C</td></tr></table>'
456
+
457
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
458
+ '<table>' +
459
+ '<tr>' +
460
+ '<td>A</td>' +
461
+ "<td class='diffdel'><del class='diffdel'>B1</del></td>" +
462
+ "<td class='diffdel'><del class='diffdel'>B2</del></td>" +
463
+ '</tr>' +
464
+ '<tr>' +
465
+ '<td>C</td>' +
466
+ "<td class='diffdel'><del class='diffdel'>D1</del></td>" +
467
+ "<td class='diffdel'><del class='diffdel'>D2</del></td>" +
468
+ '</tr>' +
469
+ '</table>'
470
+ )
471
+ })
472
+ })
473
+
474
+ // Word does not track merge/split (it warns "this action won't be marked
475
+ // as a change"), so the design below is our own. Convention: render the
476
+ // new structure as-is, but tag merged/split cells with `class='mod
477
+ // colspan'` or `class='mod rowspan'` so stylesheets can show the
478
+ // structural change without the misleading visual noise of synthetic
479
+ // del/ins around content that didn't actually change. The `mod` prefix
480
+ // matches htmldiff's existing convention for "modified" markers (e.g.
481
+ // `mod strong` for bold-formatting changes).
482
+ describe('merge cells horizontally (colspan)', () => {
483
+ it('marks two adjacent cells merged into one colspan=2 cell', () => {
484
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
485
+ const newHtml = '<table><tr><td colspan="2">A B</td></tr></table>'
486
+
487
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
488
+ '<table><tr><td colspan="2" class=\'mod colspan\'>A B</td></tr></table>'
489
+ )
490
+ })
491
+
492
+ it('marks three adjacent cells merged into one colspan=3 cell', () => {
493
+ const oldHtml = '<table><tr><td>A</td><td>B</td><td>C</td></tr></table>'
494
+ const newHtml = '<table><tr><td colspan="3">A B C</td></tr></table>'
495
+
496
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
497
+ '<table><tr><td colspan="3" class=\'mod colspan\'>A B C</td></tr></table>'
498
+ )
499
+ })
500
+ })
501
+
502
+ describe('merge cells vertically (rowspan)', () => {
503
+ it('marks two stacked cells merged into one rowspan=2 cell', () => {
504
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td>B</td></tr></table>'
505
+ const newHtml = '<table><tr><td rowspan="2">A B</td></tr><tr></tr></table>'
506
+
507
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
508
+ '<table><tr><td rowspan="2" class=\'mod rowspan\'>A B</td></tr><tr></tr></table>'
509
+ )
510
+ })
511
+
512
+ it('marks three stacked cells merged into one rowspan=3 cell', () => {
513
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td>B</td></tr><tr><td>C</td></tr></table>'
514
+ const newHtml = '<table><tr><td rowspan="3">A B C</td></tr><tr></tr><tr></tr></table>'
515
+
516
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
517
+ '<table><tr><td rowspan="3" class=\'mod rowspan\'>A B C</td></tr><tr></tr><tr></tr></table>'
518
+ )
519
+ })
520
+ })
521
+
522
+ describe('split cells horizontally', () => {
523
+ it('marks a colspan=2 cell split into two cells', () => {
524
+ const oldHtml = '<table><tr><td colspan="2">A B</td></tr></table>'
525
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
526
+
527
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
528
+ "<table><tr><td class='mod colspan'>A</td><td class='mod colspan'>B</td></tr></table>"
529
+ )
530
+ })
531
+
532
+ it('marks a colspan=3 cell split into three cells', () => {
533
+ const oldHtml = '<table><tr><td colspan="3">A B C</td></tr></table>'
534
+ const newHtml = '<table><tr><td>A</td><td>B</td><td>C</td></tr></table>'
535
+
536
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
537
+ "<table><tr><td class='mod colspan'>A</td><td class='mod colspan'>B</td><td class='mod colspan'>C</td></tr></table>"
538
+ )
539
+ })
540
+ })
541
+
542
+ describe('split cells vertically', () => {
543
+ it('marks a rowspan=2 cell split into two cells across two rows', () => {
544
+ const oldHtml = '<table><tr><td rowspan="2">A B</td></tr><tr></tr></table>'
545
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>B</td></tr></table>'
546
+
547
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
548
+ "<table><tr><td class='mod rowspan'>A</td></tr><tr><td class='mod rowspan'>B</td></tr></table>"
549
+ )
550
+ })
551
+
552
+ it('marks a rowspan=3 cell split into three cells across three rows', () => {
553
+ const oldHtml = '<table><tr><td rowspan="3">A B C</td></tr><tr></tr><tr></tr></table>'
554
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>B</td></tr><tr><td>C</td></tr></table>'
555
+
556
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
557
+ "<table><tr><td class='mod rowspan'>A</td></tr><tr><td class='mod rowspan'>B</td></tr><tr><td class='mod rowspan'>C</td></tr></table>"
558
+ )
559
+ })
560
+ })
561
+
562
+ // Combined colspan + rowspan (rectangular merge): a cell with both
563
+ // colspan and rowspan absorbs a 2D region. The vertical-merge detector
564
+ // now also handles the case where the absorbed rows aren't 1-cell —
565
+ // any cell layout in old whose logical column width sums to the new
566
+ // cell's colspan and whose rowspans are all 1 is a valid match.
567
+ describe('combined colspan + rowspan merges', () => {
568
+ it('marks a 2x2 region merged into one colspan=2 rowspan=2 cell', () => {
569
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
570
+ const newHtml = '<table><tr><td colspan="2" rowspan="2">Merged</td></tr><tr></tr></table>'
571
+
572
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
573
+ '<table><tr><td colspan="2" rowspan="2" class=\'mod rowspan\'>Merged</td></tr><tr></tr></table>'
574
+ )
575
+ })
576
+
577
+ it('marks the inverse split (colspan=2 rowspan=2 cell split into 2x2)', () => {
578
+ const oldHtml = '<table><tr><td colspan="2" rowspan="2">Merged</td></tr><tr></tr></table>'
579
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
580
+
581
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
582
+ '<table>' +
583
+ "<tr><td class='mod rowspan'>A</td><td class='mod rowspan'>B</td></tr>" +
584
+ "<tr><td class='mod rowspan'>C</td><td class='mod rowspan'>D</td></tr>" +
585
+ '</table>'
586
+ )
587
+ })
588
+ })
589
+
590
+ // Coverage for the structural cases the test reviewer flagged. These
591
+ // sit at the boundary between `preprocessTables` and the word-level
592
+ // diff, or test that we don't lose attributes/wrappers in the new
593
+ // emission paths.
594
+ describe('table identity and pairing', () => {
595
+ it('returns input verbatim when both sides are identical (early-exit)', () => {
596
+ const html = '<table><tr><td>Same</td></tr></table>'
597
+ expect(HtmlDiff.execute(html, html)).toEqual(html)
598
+ })
599
+
600
+ it('only diffs the changed table when there are multiple unchanged tables alongside it', () => {
601
+ const oldHtml =
602
+ '<table><tr><td>A</td></tr></table>' +
603
+ '<table><tr><td>B</td></tr></table>' +
604
+ '<table><tr><td>C</td></tr></table>'
605
+ const newHtml =
606
+ '<table><tr><td>A</td></tr></table>' +
607
+ '<table><tr><td>X</td></tr></table>' +
608
+ '<table><tr><td>C</td></tr></table>'
609
+
610
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
611
+ '<table><tr><td>A</td></tr></table>' +
612
+ "<table><tr><td><del class='diffmod'>B</del><ins class='diffmod'>X</ins></td></tr></table>" +
613
+ '<table><tr><td>C</td></tr></table>'
614
+ )
615
+ })
616
+
617
+ it('diffs a table embedded in surrounding prose without disturbing the prose', () => {
618
+ const oldHtml = '<p>Intro</p><table><tr><td>A</td></tr></table><p>Outro</p>'
619
+ const newHtml = '<p>Intro</p><table><tr><td>B</td></tr></table><p>Outro</p>'
620
+
621
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
622
+ '<p>Intro</p>' +
623
+ "<table><tr><td><del class='diffmod'>A</del><ins class='diffmod'>B</ins></td></tr></table>" +
624
+ '<p>Outro</p>'
625
+ )
626
+ })
627
+
628
+ it('still diffs cells when the user input contains a comment that LOOKS like our placeholder', () => {
629
+ // The per-call random nonce makes collisions astronomically unlikely;
630
+ // the hard-coded `aaaa1234_0` here is harmless because the actual
631
+ // placeholder will use a different random suffix.
632
+ const oldHtml = '<table><tr><td>A</td></tr></table><!--HTMLDIFF_TABLE_aaaa1234_0-->'
633
+ const newHtml = '<table><tr><td>B</td></tr></table><!--HTMLDIFF_TABLE_aaaa1234_0-->'
634
+
635
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
636
+ "<table><tr><td><del class='diffmod'>A</del><ins class='diffmod'>B</ins></td></tr></table><!--HTMLDIFF_TABLE_aaaa1234_0-->"
637
+ )
638
+ })
639
+ })
640
+
641
+ describe('table count mismatches', () => {
642
+ // When old and new have different numbers of tables, preprocessTables
643
+ // returns null and the word-level diff handles it. The cell content
644
+ // gets ins/del markers, but the surrounding `<table>`/`<tr>`
645
+ // structural tags pass through bare. Documenting current behaviour.
646
+ it('handles a table newly injected after surrounding prose (table count old=0, new=1)', () => {
647
+ const oldHtml = '<p>Intro</p>'
648
+ const newHtml = '<p>Intro</p><table><tr><td>Clause</td><td>Term</td></tr></table>'
649
+
650
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
651
+ "<p>Intro</p><table><tr><td><ins class='diffins'>Clause</ins></td><td><ins class='diffins'>Term</ins></td></tr></table>"
652
+ )
653
+ })
654
+
655
+ it('handles a table deleted from surrounding prose (table count old=1, new=0)', () => {
656
+ const oldHtml = '<p>Hello</p><table><tr><td>A</td></tr></table>'
657
+ const newHtml = '<p>Hello</p>'
658
+
659
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
660
+ "<p>Hello</p><table><tr><td><del class='diffdel'>A</del></td></tr></table>"
661
+ )
662
+ })
663
+
664
+ it('handles old=1 vs new=2 tables', () => {
665
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
666
+ const newHtml = '<table><tr><td>A</td></tr></table><table><tr><td>B</td></tr></table>'
667
+
668
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
669
+ "<table><tr><td>A</td></tr></table><table><tr><td><ins class='diffins'>B</ins></td></tr></table>"
670
+ )
671
+ })
672
+ })
673
+
674
+ describe('table reordering (positional pairing)', () => {
675
+ // When two tables swap order, preprocessTables pairs them positionally
676
+ // (table[0] in old with table[0] in new), which produces a flat
677
+ // word-level diff over both tables — both look completely modified
678
+ // even though they were just reordered. This is a known limitation
679
+ // (Word doesn't track move/reorder either); pinned here so a future
680
+ // improvement is visible.
681
+ it('pairs swapped tables positionally and renders both as fully modified', () => {
682
+ const oldHtml = '<table><tr><td>Table1</td></tr></table><table><tr><td>Table2</td></tr></table>'
683
+ const newHtml = '<table><tr><td>Table2</td></tr></table><table><tr><td>Table1</td></tr></table>'
684
+
685
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
686
+ "<table><tr><td><del class='diffmod'>Table1</del><ins class='diffmod'>Table2</ins></td></tr></table>" +
687
+ "<table><tr><td><del class='diffmod'>Table2</del><ins class='diffmod'>Table1</ins></td></tr></table>"
688
+ )
689
+ })
690
+ })
691
+
692
+ describe('thead/tbody/tfoot wrappers', () => {
693
+ it('preserves <thead>/<tbody> when adding a row inside <tbody>', () => {
694
+ const oldHtml = '<table><thead><tr><th>H</th></tr></thead><tbody><tr><td>A</td></tr></tbody></table>'
695
+ const newHtml =
696
+ '<table><thead><tr><th>H</th></tr></thead><tbody><tr><td>A</td></tr><tr><td>B</td></tr></tbody></table>'
697
+
698
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
699
+ '<table><thead><tr><th>H</th></tr></thead><tbody>' +
700
+ '<tr><td>A</td></tr>' +
701
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>B</ins></td></tr>" +
702
+ '</tbody></table>'
703
+ )
704
+ })
705
+
706
+ it('preserves <thead>/<tbody> when deleting a row from <tbody>', () => {
707
+ const oldHtml =
708
+ '<table><thead><tr><th>H</th></tr></thead><tbody><tr><td>A</td></tr><tr><td>B</td></tr></tbody></table>'
709
+ const newHtml = '<table><thead><tr><th>H</th></tr></thead><tbody><tr><td>A</td></tr></tbody></table>'
710
+
711
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
712
+ '<table><thead><tr><th>H</th></tr></thead><tbody>' +
713
+ '<tr><td>A</td></tr>' +
714
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>B</del></td></tr>" +
715
+ '</tbody></table>'
716
+ )
717
+ })
718
+ })
719
+
720
+ describe('<th> cells', () => {
721
+ it('marks an added column with <th> header cells', () => {
722
+ const oldHtml = '<table><tr><th>H1</th><th>H2</th></tr><tr><td>A</td><td>B</td></tr></table>'
723
+ const newHtml = '<table><tr><th>H1</th><th>H2</th><th>H3</th></tr><tr><td>A</td><td>B</td><td>C</td></tr></table>'
724
+
725
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
726
+ '<table>' +
727
+ "<tr><th>H1</th><th>H2</th><th class='diffins'><ins class='diffins'>H3</ins></th></tr>" +
728
+ "<tr><td>A</td><td>B</td><td class='diffins'><ins class='diffins'>C</ins></td></tr>" +
729
+ '</table>'
730
+ )
731
+ })
732
+
733
+ it('marks a deleted row whose cells are <th>', () => {
734
+ const oldHtml = '<table><tr><th>H1</th><th>H2</th></tr><tr><td>A</td><td>B</td></tr></table>'
735
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
736
+
737
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
738
+ '<table>' +
739
+ "<tr class='diffdel'>" +
740
+ "<th class='diffdel'><del class='diffdel'>H1</del></th>" +
741
+ "<th class='diffdel'><del class='diffdel'>H2</del></th>" +
742
+ '</tr>' +
743
+ '<tr><td>A</td><td>B</td></tr>' +
744
+ '</table>'
745
+ )
746
+ })
747
+ })
748
+
749
+ // The user explicitly called these out: when we inject diffins/diffdel
750
+ // class markers, we must preserve any existing attributes (especially
751
+ // `class`) and the `class` injection must merge into the existing
752
+ // attribute rather than overwrite it.
753
+ describe('attribute preservation', () => {
754
+ it("preserves existing class attribute on <tr> when adding 'diffins'", () => {
755
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
756
+ const newHtml = '<table><tr><td>A</td></tr><tr class="section-header"><td>B</td></tr></table>'
757
+
758
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
759
+ '<table>' +
760
+ '<tr><td>A</td></tr>' +
761
+ "<tr class=\"section-header diffins\"><td class='diffins'><ins class='diffins'>B</ins></td></tr>" +
762
+ '</table>'
763
+ )
764
+ })
765
+
766
+ it("preserves multi-class attribute on <td> when adding 'diffdel'", () => {
767
+ const oldHtml = '<table><tr><td>A</td></tr><tr><td class="highlight important">B</td></tr></table>'
768
+ const newHtml = '<table><tr><td>A</td></tr></table>'
769
+
770
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
771
+ '<table>' +
772
+ '<tr><td>A</td></tr>' +
773
+ "<tr class='diffdel'>" +
774
+ '<td class="highlight important diffdel"><del class=\'diffdel\'>B</del></td>' +
775
+ '</tr>' +
776
+ '</table>'
777
+ )
778
+ })
779
+
780
+ it('preserves single-quoted class on <td> in inserted column', () => {
781
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
782
+ const newHtml = "<table><tr><td>A</td><td class='added-col'>B</td></tr></table>"
783
+
784
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
785
+ '<table>' + "<tr><td>A</td><td class='added-col diffins'><ins class='diffins'>B</ins></td></tr>" + '</table>'
786
+ )
787
+ })
788
+
789
+ it('preserves arbitrary attributes (id, style, data-*) on a <td> in an inserted row', () => {
790
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
791
+ const newHtml = '<table><tr><td>A</td></tr><tr><td id="row2" data-key="b" style="color:red">B</td></tr></table>'
792
+
793
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
794
+ '<table>' +
795
+ '<tr><td>A</td></tr>' +
796
+ "<tr class='diffins'>" +
797
+ '<td id="row2" data-key="b" style="color:red" class=\'diffins\'>' +
798
+ "<ins class='diffins'>B</ins>" +
799
+ '</td>' +
800
+ '</tr>' +
801
+ '</table>'
802
+ )
803
+ })
804
+
805
+ it('preserves data-* attributes on a <tr> in an inserted row alongside the injected class', () => {
806
+ // The frontend uses `data-behaviour` (and similar `data-*` attrs)
807
+ // on `<tr>` to drive table semantics. The diff must keep these
808
+ // attributes verbatim AND add `class='diffins'` for the structural
809
+ // change. (Whether downstream DOMPurify allows `data-*` is a
810
+ // separate concern — htmldiff's job is to not lose them.)
811
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
812
+ const newHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td></tr></table>'
813
+
814
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
815
+ '<table>' +
816
+ '<tr><td>A</td></tr>' +
817
+ '<tr data-behaviour="data" class=\'diffins\'>' +
818
+ "<td class='diffins'><ins class='diffins'>B</ins></td>" +
819
+ '</tr>' +
820
+ '</table>'
821
+ )
822
+ })
823
+
824
+ it('preserves data-* attributes on a <tr> in a deleted row alongside the injected class', () => {
825
+ const oldHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td></tr></table>'
826
+ const newHtml = '<table><tr><td>A</td></tr></table>'
827
+
828
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
829
+ '<table>' +
830
+ '<tr><td>A</td></tr>' +
831
+ '<tr data-behaviour="data" class=\'diffdel\'>' +
832
+ "<td class='diffdel'><del class='diffdel'>B</del></td>" +
833
+ '</tr>' +
834
+ '</table>'
835
+ )
836
+ })
837
+
838
+ it('preserves multiple data-* attributes on a <tr> in an inserted row', () => {
839
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
840
+ const newHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data" id="row2" data-x="y"><td>B</td></tr></table>'
841
+
842
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
843
+ '<table>' +
844
+ '<tr><td>A</td></tr>' +
845
+ '<tr data-behaviour="data" id="row2" data-x="y" class=\'diffins\'>' +
846
+ "<td class='diffins'><ins class='diffins'>B</ins></td>" +
847
+ '</tr>' +
848
+ '</table>'
849
+ )
850
+ })
851
+
852
+ it('preserves data-* attribute on a <tr> when the row passes through column-add path (preserved row)', () => {
853
+ // The row exists in both old and new but cell counts differ
854
+ // (column added) — goes through diffPreservedRow → multi-column
855
+ // detection path. Attribute should still be preserved verbatim
856
+ // via rowHeaderSlice.
857
+ const oldHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td><td>C</td></tr></table>'
858
+ const newHtml =
859
+ '<table><tr><td>A</td><td>X</td></tr><tr data-behaviour="data"><td>B</td><td>EXTRA</td><td>C</td></tr></table>'
860
+
861
+ const result = HtmlDiff.execute(oldHtml, newHtml)
862
+ // The <tr> attribute must survive AND the row's cells must be
863
+ // intact: B preserved, EXTRA inserted, C preserved. A bare
864
+ // `toContain('<tr data-behaviour="data">')` would pass even if
865
+ // the cells were dropped or duplicated downstream.
866
+ expect(result).toContain(
867
+ '<tr data-behaviour="data"><td>B</td>' +
868
+ "<td class='diffins'><ins class='diffins'>EXTRA</ins></td>" +
869
+ '<td>C</td></tr>'
870
+ )
871
+ })
872
+
873
+ it('preserves data-* attribute on a <tr> when the row passes through fuzzy-matching with content edit', () => {
874
+ // The row gets fuzzy-matched after exact LCS misses the content
875
+ // edit. Goes through diffStructurallyAlignedTable → fuzzy pair →
876
+ // diffPreservedRow. Attribute should still flow through.
877
+ const oldHtml =
878
+ '<table>' +
879
+ '<tr><td>A</td></tr>' +
880
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over.</td></tr>' +
881
+ '</table>'
882
+ const newHtml =
883
+ '<table>' +
884
+ '<tr><td>A</td></tr>' +
885
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over the lazy dog.</td></tr>' +
886
+ '<tr><td>NEW</td></tr>' +
887
+ '</table>'
888
+
889
+ const result = HtmlDiff.execute(oldHtml, newHtml)
890
+ // Same intent as above: attribute must survive AND the row's
891
+ // content-edit must be present on the same <tr>.
892
+ expect(result).toContain(
893
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over' +
894
+ "<ins class='diffins'>&nbsp;the lazy dog</ins>.</td></tr>"
895
+ )
896
+ })
897
+
898
+ it('preserves <table> attributes verbatim from new (no diff marker on attribute changes)', () => {
899
+ const oldHtml = '<table border="1"><tr><td>A</td></tr></table>'
900
+ const newHtml = '<table border="2" style="width:100%"><tr><td>A</td></tr></table>'
901
+
902
+ // Same content → table attributes flow through from new without
903
+ // a diff marker. This is intentional (Word doesn't track table
904
+ // attribute changes either) but documented as a regression anchor.
905
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
906
+ '<table border="2" style="width:100%"><tr><td>A</td></tr></table>'
907
+ )
908
+ })
909
+
910
+ it('preserves <colgroup>/<col> within a same-dimension table', () => {
911
+ const oldHtml = '<table><colgroup><col span="2"/></colgroup><tr><td>Foo</td><td>Bar</td></tr></table>'
912
+ const newHtml = '<table><colgroup><col span="2"/></colgroup><tr><td>Foo</td><td>Baz</td></tr></table>'
913
+
914
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
915
+ '<table><colgroup><col span="2"/></colgroup>' +
916
+ "<tr><td>Foo</td><td><del class='diffmod'>Bar</del><ins class='diffmod'>Baz</ins></td></tr>" +
917
+ '</table>'
918
+ )
919
+ })
920
+
921
+ it('does not duplicate a class when the same class would be injected twice', () => {
922
+ // Edge case: a `<td>` that already carries `diffins` (from a prior
923
+ // run, or a hand-authored input) should not pick up a second
924
+ // `diffins` token in the class attribute.
925
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
926
+ const newHtml = '<table><tr><td>A</td></tr><tr><td class="diffins">B</td></tr></table>'
927
+
928
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
929
+ '<table>' +
930
+ '<tr><td>A</td></tr>' +
931
+ "<tr class='diffins'><td class=\"diffins\"><ins class='diffins'>B</ins></td></tr>" +
932
+ '</table>'
933
+ )
934
+ })
935
+ })
936
+
937
+ describe('nested tables', () => {
938
+ it('diffs a nested cell change without disturbing the outer table', () => {
939
+ const oldHtml = '<table><tr><td><table><tr><td>inner A</td></tr></table></td></tr></table>'
940
+ const newHtml = '<table><tr><td><table><tr><td>inner B</td></tr></table></td></tr></table>'
941
+
942
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
943
+ '<table><tr><td>' +
944
+ "<table><tr><td>inner <del class='diffmod'>A</del><ins class='diffmod'>B</ins></td></tr></table>" +
945
+ '</td></tr></table>'
946
+ )
947
+ })
948
+ })
949
+
950
+ describe('whitespace handling', () => {
951
+ it('treats whitespace-only differences in row HTML as equal (no spurious diff)', () => {
952
+ const oldHtml = '<table>\n <tr>\n <td>A</td>\n </tr>\n</table>'
953
+ const newHtml = '<table><tr><td>A</td></tr></table>'
954
+
955
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual('<table><tr><td>A</td></tr></table>')
956
+ })
957
+ })
958
+
959
+ describe('row deletion with empty cells', () => {
960
+ it('marks empty cells in a deleted row with diffdel even though they have no content', () => {
961
+ const oldHtml = '<table><tr><td>A</td><td></td><td>C</td></tr><tr><td>D</td><td></td><td>F</td></tr></table>'
962
+ const newHtml = '<table><tr><td>A</td><td></td><td>C</td></tr></table>'
963
+
964
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
965
+ '<table>' +
966
+ '<tr><td>A</td><td></td><td>C</td></tr>' +
967
+ "<tr class='diffdel'>" +
968
+ "<td class='diffdel'><del class='diffdel'>D</del></td>" +
969
+ "<td class='diffdel'></td>" +
970
+ "<td class='diffdel'><del class='diffdel'>F</del></td>" +
971
+ '</tr>' +
972
+ '</table>'
973
+ )
974
+ })
975
+ })
976
+
977
+ // Block-level cell content is the most common shape in legal-doc tables —
978
+ // cells routinely contain `<p>`-wrapped paragraphs, `<ol>`/`<ul>` lists,
979
+ // and `<div>`-wrapped sections. The wrapInlineTextRuns helper walks
980
+ // through tags transparently and only wraps non-whitespace text runs, so
981
+ // `<ins>`/`<del>` ends up *inside* every block-level container — keeping
982
+ // the output as valid HTML (the spec disallows `<ins>` directly wrapping
983
+ // a `<p>`, but `<p><ins>...</ins></p>` is fine).
984
+ describe('block-level cell content', () => {
985
+ it('wraps each paragraph independently in an inserted row with multi-paragraph cells', () => {
986
+ const oldHtml = '<table><tr><td>Header</td></tr></table>'
987
+ const newHtml =
988
+ '<table><tr><td>Header</td></tr><tr><td><p>Paragraph one</p><p>Paragraph two</p></td></tr></table>'
989
+
990
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
991
+ '<table>' +
992
+ '<tr><td>Header</td></tr>' +
993
+ "<tr class='diffins'>" +
994
+ "<td class='diffins'>" +
995
+ "<p><ins class='diffins'>Paragraph one</ins></p>" +
996
+ "<p><ins class='diffins'>Paragraph two</ins></p>" +
997
+ '</td>' +
998
+ '</tr>' +
999
+ '</table>'
1000
+ )
1001
+ })
1002
+
1003
+ it('wraps each paragraph independently in a deleted row with multi-paragraph cells', () => {
1004
+ const oldHtml = '<table><tr><td><p>Paragraph one</p><p>Paragraph two</p></td></tr><tr><td>Keep</td></tr></table>'
1005
+ const newHtml = '<table><tr><td>Keep</td></tr></table>'
1006
+
1007
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1008
+ '<table>' +
1009
+ "<tr class='diffdel'>" +
1010
+ "<td class='diffdel'>" +
1011
+ "<p><del class='diffdel'>Paragraph one</del></p>" +
1012
+ "<p><del class='diffdel'>Paragraph two</del></p>" +
1013
+ '</td>' +
1014
+ '</tr>' +
1015
+ '<tr><td>Keep</td></tr>' +
1016
+ '</table>'
1017
+ )
1018
+ })
1019
+
1020
+ it('wraps text inside paragraphs containing inline formatting in an inserted row', () => {
1021
+ // Legal-doc shape: a paragraph with bold + italic in the middle.
1022
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1023
+ const newHtml =
1024
+ '<table><tr><td>A</td></tr><tr><td><p>The <strong>Cross-Default</strong> provisions of <em>Section 5(a)</em> apply.</p></td></tr></table>'
1025
+
1026
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1027
+ '<table>' +
1028
+ '<tr><td>A</td></tr>' +
1029
+ "<tr class='diffins'><td class='diffins'>" +
1030
+ '<p>' +
1031
+ "<ins class='diffins'>The </ins>" +
1032
+ "<strong><ins class='diffins'>Cross-Default</ins></strong>" +
1033
+ "<ins class='diffins'> provisions of </ins>" +
1034
+ "<em><ins class='diffins'>Section 5(a)</ins></em>" +
1035
+ "<ins class='diffins'> apply.</ins>" +
1036
+ '</p>' +
1037
+ '</td></tr>' +
1038
+ '</table>'
1039
+ )
1040
+ })
1041
+
1042
+ it('wraps each list item independently in an inserted row with an <ol>', () => {
1043
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1044
+ const newHtml =
1045
+ '<table><tr><td>A</td></tr><tr><td><ol data-type="a"><li><p>First</p></li><li><p>Second</p></li></ol></td></tr></table>'
1046
+
1047
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1048
+ '<table>' +
1049
+ '<tr><td>A</td></tr>' +
1050
+ "<tr class='diffins'><td class='diffins'>" +
1051
+ '<ol data-type="a">' +
1052
+ "<li><p><ins class='diffins'>First</ins></p></li>" +
1053
+ "<li><p><ins class='diffins'>Second</ins></p></li>" +
1054
+ '</ol>' +
1055
+ '</td></tr>' +
1056
+ '</table>'
1057
+ )
1058
+ })
1059
+
1060
+ it('wraps each list item independently in a deleted row with a <ul>', () => {
1061
+ const oldHtml = '<table><tr><td><ul><li>Item 1</li><li>Item 2</li></ul></td></tr><tr><td>Keep</td></tr></table>'
1062
+ const newHtml = '<table><tr><td>Keep</td></tr></table>'
1063
+
1064
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1065
+ '<table>' +
1066
+ "<tr class='diffdel'><td class='diffdel'>" +
1067
+ '<ul>' +
1068
+ "<li><del class='diffdel'>Item 1</del></li>" +
1069
+ "<li><del class='diffdel'>Item 2</del></li>" +
1070
+ '</ul>' +
1071
+ '</td></tr>' +
1072
+ '<tr><td>Keep</td></tr>' +
1073
+ '</table>'
1074
+ )
1075
+ })
1076
+
1077
+ it('wraps text inside a div-wrapped paragraph in an inserted row', () => {
1078
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1079
+ const newHtml =
1080
+ '<table><tr><td>A</td></tr><tr><td><div class="justify"><p>Inside a div</p></div></td></tr></table>'
1081
+
1082
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1083
+ '<table>' +
1084
+ '<tr><td>A</td></tr>' +
1085
+ "<tr class='diffins'><td class='diffins'>" +
1086
+ '<div class="justify"><p><ins class=\'diffins\'>Inside a div</ins></p></div>' +
1087
+ '</td></tr>' +
1088
+ '</table>'
1089
+ )
1090
+ })
1091
+
1092
+ it('preserves an empty <p> as-is in an inserted row (no spurious <ins> on whitespace)', () => {
1093
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1094
+ const newHtml = '<table><tr><td>A</td></tr><tr><td><p>Real content</p><p></p></td></tr></table>'
1095
+
1096
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1097
+ '<table>' +
1098
+ '<tr><td>A</td></tr>' +
1099
+ "<tr class='diffins'><td class='diffins'>" +
1100
+ "<p><ins class='diffins'>Real content</ins></p>" +
1101
+ '<p></p>' +
1102
+ '</td></tr>' +
1103
+ '</table>'
1104
+ )
1105
+ })
1106
+
1107
+ it('handles a paragraph with a hyperlink in an inserted row (anchor passes through, text wraps)', () => {
1108
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1109
+ const newHtml =
1110
+ '<table><tr><td>A</td></tr><tr><td><p>See <a href="http://example.com">the docs</a> for details.</p></td></tr></table>'
1111
+
1112
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1113
+ '<table>' +
1114
+ '<tr><td>A</td></tr>' +
1115
+ "<tr class='diffins'><td class='diffins'>" +
1116
+ '<p>' +
1117
+ "<ins class='diffins'>See </ins>" +
1118
+ '<a href="http://example.com">' +
1119
+ "<ins class='diffins'>the docs</ins>" +
1120
+ '</a>' +
1121
+ "<ins class='diffins'> for details.</ins>" +
1122
+ '</p>' +
1123
+ '</td></tr>' +
1124
+ '</table>'
1125
+ )
1126
+ })
1127
+
1128
+ it('handles a heading inside a cell in an inserted row', () => {
1129
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1130
+ const newHtml = '<table><tr><td>A</td></tr><tr><td><h3>Section heading</h3><p>Body text</p></td></tr></table>'
1131
+
1132
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1133
+ '<table>' +
1134
+ '<tr><td>A</td></tr>' +
1135
+ "<tr class='diffins'><td class='diffins'>" +
1136
+ "<h3><ins class='diffins'>Section heading</ins></h3>" +
1137
+ "<p><ins class='diffins'>Body text</ins></p>" +
1138
+ '</td></tr>' +
1139
+ '</table>'
1140
+ )
1141
+ })
1142
+
1143
+ it('preserves a self-closing <br/> inside a paragraph in an inserted row', () => {
1144
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1145
+ const newHtml = '<table><tr><td>A</td></tr><tr><td><p>line one<br/>line two</p></td></tr></table>'
1146
+
1147
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1148
+ '<table>' +
1149
+ '<tr><td>A</td></tr>' +
1150
+ "<tr class='diffins'><td class='diffins'>" +
1151
+ "<p><ins class='diffins'>line one</ins><br/><ins class='diffins'>line two</ins></p>" +
1152
+ '</td></tr>' +
1153
+ '</table>'
1154
+ )
1155
+ })
1156
+ })
1157
+
1158
+ // Adversarial / hardening cases surfaced by the second-pass review.
1159
+ // These pin behaviour that was previously broken or untested for inputs
1160
+ // containing comments, CDATA, mixed-case tags, foreign attribute values
1161
+ // that look like class= patterns, etc.
1162
+ describe('hostile / adversarial inputs', () => {
1163
+ it('handles a processing instruction (<?xml?>) in cell content', () => {
1164
+ // parseOpeningTagAt has explicit handling for `<?...?>`. Pin
1165
+ // that path so a future refactor can't break it.
1166
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1167
+ const newHtml = '<table><tr><td>A</td></tr><tr><td><?xml version="1.0"?>text</td></tr></table>'
1168
+
1169
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1170
+ '<table>' +
1171
+ '<tr><td>A</td></tr>' +
1172
+ "<tr class='diffins'><td class='diffins'>" +
1173
+ '<?xml version="1.0"?>' +
1174
+ "<ins class='diffins'>text</ins>" +
1175
+ '</td></tr>' +
1176
+ '</table>'
1177
+ )
1178
+ })
1179
+
1180
+ it('handles an unquoted class attribute value when injecting diffins', () => {
1181
+ // findClassAttribute's unquoted-value branch wasn't exercised —
1182
+ // most generators emit quoted values. HTML5 permits unquoted
1183
+ // simple values, so support them.
1184
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1185
+ const newHtml = '<table><tr><td>A</td></tr><tr><td class=existing>B</td></tr></table>'
1186
+
1187
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1188
+ // The existing unquoted class is preserved; the injected class
1189
+ // appends. Exact form depends on injectClass's writeback (it
1190
+ // rewrites the attribute value at its parsed range).
1191
+ expect(result).toContain('class=existing')
1192
+ expect(result).toContain('diffins')
1193
+ })
1194
+
1195
+ it('passes content through verbatim when a cell contains a lone `<` (malformed)', () => {
1196
+ // wrapInlineTextRuns sees `<` and calls parseOpeningTagAt, which
1197
+ // returns null for a lone `<` with no closing `>`. The function
1198
+ // then bails by pushing the rest verbatim. The output isn't
1199
+ // pretty but it's predictable and doesn't crash.
1200
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1201
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>fee < cost</td></tr></table>'
1202
+
1203
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1204
+ // The inserted row should still be marked.
1205
+ expect(result).toContain("<tr class='diffins'>")
1206
+ // The literal `<` (with no closing >) should appear in the output.
1207
+ expect(result).toContain('fee')
1208
+ expect(result).toContain('cost')
1209
+ })
1210
+
1211
+ it('handles a malformed table tag missing its closing > (no crash, falls back)', () => {
1212
+ // findTopLevelTables → parseOpeningTagAt returns null → scanner
1213
+ // increments i and continues. preprocessTables ends up with no
1214
+ // valid tables and falls through to the word-level diff.
1215
+ const oldHtml = '<p>before</p><table<tr><td>A</td></tr></table><p>after</p>'
1216
+ const newHtml = '<p>before</p><table<tr><td>B</td></tr></table><p>after</p>'
1217
+
1218
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1219
+ // Should not crash. Should produce *some* del/ins markers.
1220
+ expect(result.length).toBeGreaterThan(0)
1221
+ })
1222
+
1223
+ it('passes an HTML comment with embedded > through cell content unmolested', () => {
1224
+ // Word-exported HTML routinely has comments with `>` inside (e.g.
1225
+ // conditional comments). Before the parser fix, the scanner cut
1226
+ // the comment at the inner `>` and wrapped half of it in <ins>.
1227
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1228
+ const newHtml = '<table><tr><td>A</td></tr><tr><td><!-- note: >5% threshold -->text</td></tr></table>'
1229
+
1230
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1231
+ '<table>' +
1232
+ '<tr><td>A</td></tr>' +
1233
+ "<tr class='diffins'><td class='diffins'>" +
1234
+ "<!-- note: >5% threshold --><ins class='diffins'>text</ins>" +
1235
+ '</td></tr>' +
1236
+ '</table>'
1237
+ )
1238
+ })
1239
+
1240
+ it('handles a CDATA section inside cell content', () => {
1241
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1242
+ const newHtml = '<table><tr><td>A</td></tr><tr><td><![CDATA[ x > y ]]>text</td></tr></table>'
1243
+
1244
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1245
+ '<table>' +
1246
+ '<tr><td>A</td></tr>' +
1247
+ "<tr class='diffins'><td class='diffins'>" +
1248
+ "<![CDATA[ x > y ]]><ins class='diffins'>text</ins>" +
1249
+ '</td></tr>' +
1250
+ '</table>'
1251
+ )
1252
+ })
1253
+
1254
+ it('finds the real class attribute even when a foreign attribute contains `class=`-like text', () => {
1255
+ // injectClass previously used a flat regex that could match inside
1256
+ // any quoted attribute value. A `<td title="see class='x'">` would
1257
+ // get its `title` mangled and never receive the diff class. The
1258
+ // attribute-aware walker handles this correctly.
1259
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1260
+ const newHtml = '<table><tr><td>A</td></tr><tr><td title="see class=\'important\' note">B</td></tr></table>'
1261
+
1262
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1263
+ '<table>' +
1264
+ '<tr><td>A</td></tr>' +
1265
+ "<tr class='diffins'>" +
1266
+ "<td title=\"see class='important' note\" class='diffins'>" +
1267
+ "<ins class='diffins'>B</ins>" +
1268
+ '</td></tr>' +
1269
+ '</table>'
1270
+ )
1271
+ })
1272
+
1273
+ it('does not duplicate "mod" when the cell already has a partial overlap with the multi-word class', () => {
1274
+ // injectClass: existing class "mod" + injecting "mod colspan" →
1275
+ // result must be "mod colspan", not "mod mod colspan".
1276
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
1277
+ const newHtml = '<table><tr><td colspan="2" class="mod">AB</td></tr><tr><td>C</td><td>D</td></tr></table>'
1278
+
1279
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1280
+ '<table>' +
1281
+ '<tr><td colspan="2" class="mod colspan">AB</td></tr>' +
1282
+ '<tr><td>C</td><td>D</td></tr>' +
1283
+ '</table>'
1284
+ )
1285
+ })
1286
+
1287
+ it('handles mixed-case tag names', () => {
1288
+ const oldHtml = '<TABLE><TR><Td>A</Td><Td>B</Td></TR></TABLE>'
1289
+ const newHtml = '<TABLE><TR><Td>A</Td><Td>C</Td></TR></TABLE>'
1290
+
1291
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1292
+ "<TABLE><TR><Td>A</Td><Td><del class='diffmod'>B</del><ins class='diffmod'>C</ins></Td></TR></TABLE>"
1293
+ )
1294
+ })
1295
+
1296
+ it('handles whitespace inside opening tags', () => {
1297
+ const oldHtml = '<table><tr><td class = "highlight" >A</td></tr></table>'
1298
+ const newHtml = '<table><tr><td class = "highlight" >B</td></tr></table>'
1299
+
1300
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1301
+ "<table><tr><td class = \"highlight\" ><del class='diffmod'>A</del><ins class='diffmod'>B</ins></td></tr></table>"
1302
+ )
1303
+ })
1304
+
1305
+ it('handles attribute values containing > inside quotes', () => {
1306
+ const oldHtml = '<table><tr><td data-cond="x > 0">Old</td></tr></table>'
1307
+ const newHtml = '<table><tr><td data-cond="x > 0">New</td></tr></table>'
1308
+
1309
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1310
+ "<table><tr><td data-cond=\"x > 0\"><del class='diffmod'>Old</del><ins class='diffmod'>New</ins></td></tr></table>"
1311
+ )
1312
+ })
1313
+
1314
+ it('falls back to the word-level diff when a table exceeds the row-count safety cap', () => {
1315
+ // Construct a table with 1600 rows (above MAX_TABLE_ROWS=1500).
1316
+ // preprocessTables should bail and let the word-level diff handle
1317
+ // it. The output is whatever the word-level diff produces; we
1318
+ // assert it contains the cell content (so the diff didn't crash)
1319
+ // and that it does NOT contain the structural-aware class markers.
1320
+ const buildBigTable = (cellContent: (r: number) => string) => {
1321
+ const out: string[] = ['<table>']
1322
+ for (let r = 0; r < 1600; r++) out.push(`<tr><td>${cellContent(r)}</td></tr>`)
1323
+ out.push('</table>')
1324
+ return out.join('')
1325
+ }
1326
+ const oldHtml = buildBigTable(r => `Row ${r}`)
1327
+ const newHtml = buildBigTable(r => (r === 100 ? `Row ${r} edited` : `Row ${r}`))
1328
+
1329
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1330
+ // The word-level diff should produce *some* del/ins for the change.
1331
+ expect(result).toContain('edited')
1332
+ // But not the structural-aware class markers, because the cap kicked in.
1333
+ expect(result).not.toContain("class='mod colspan'")
1334
+ })
1335
+
1336
+ it('does not infinite-loop on an unclosed table tag', () => {
1337
+ // Malformed HTML — `<table>` with no `</table>`. findTopLevelTables
1338
+ // should return -1 from findMatchingClosingTag and skip the table,
1339
+ // letting preprocessTables fall back to the word-level diff.
1340
+ const oldHtml = '<table><tr><td>A</td></tr>'
1341
+ const newHtml = '<table><tr><td>B</td></tr>'
1342
+
1343
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1344
+ expect(result).toContain('A')
1345
+ expect(result).toContain('B')
1346
+ })
1347
+
1348
+ it('handles cell content that is whitespace-only without emitting a spurious <ins>', () => {
1349
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1350
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>\n \t</td></tr></table>'
1351
+
1352
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1353
+ '<table>' + '<tr><td>A</td></tr>' + "<tr class='diffins'><td class='diffins'>\n \t</td></tr>" + '</table>'
1354
+ )
1355
+ })
1356
+ })
1357
+
1358
+ describe('combined edits', () => {
1359
+ it('handles a column added together with a new row inserted in the middle', () => {
1360
+ // Both row count AND cell count change — exercises the
1361
+ // diffStructurallyAlignedTable path with fuzzy row matching
1362
+ // followed by per-row cell-level diff.
1363
+ const oldHtml = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
1364
+ const newHtml =
1365
+ '<table><tr><td>A</td><td>B</td><td>E</td></tr><tr><td>New</td><td>Row</td><td>F</td></tr><tr><td>C</td><td>D</td><td>G</td></tr></table>'
1366
+
1367
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1368
+ '<table>' +
1369
+ "<tr><td>A</td><td>B</td><td class='diffins'><ins class='diffins'>E</ins></td></tr>" +
1370
+ "<tr class='diffins'>" +
1371
+ "<td class='diffins'><ins class='diffins'>New</ins></td>" +
1372
+ "<td class='diffins'><ins class='diffins'>Row</ins></td>" +
1373
+ "<td class='diffins'><ins class='diffins'>F</ins></td>" +
1374
+ '</tr>' +
1375
+ "<tr><td>C</td><td>D</td><td class='diffins'><ins class='diffins'>G</ins></td></tr>" +
1376
+ '</table>'
1377
+ )
1378
+ })
1379
+
1380
+ it('places a new row inserted at the start above an edited row (fuzzy matching)', () => {
1381
+ // Order-preservation regression: previously the (paired old,
1382
+ // paired new) entry was emitted at the del's alignment position
1383
+ // which could put it BEFORE the unpaired ins, violating cursor
1384
+ // monotonicity. With the fix, the paired entry is emitted at the
1385
+ // ins position so output order matches new-side order.
1386
+ const oldHtml = '<table><tr><td>Edited row</td></tr><tr><td>Same row</td></tr></table>'
1387
+ const newHtml = '<table><tr><td>New row</td></tr><tr><td>Edited rowX</td></tr><tr><td>Same row</td></tr></table>'
1388
+
1389
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1390
+ '<table>' +
1391
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>New row</ins></td></tr>" +
1392
+ "<tr><td>Edited <del class='diffmod'>row</del><ins class='diffmod'>rowX</ins></td></tr>" +
1393
+ '<tr><td>Same row</td></tr>' +
1394
+ '</table>'
1395
+ )
1396
+ })
1397
+
1398
+ it('handles an edit to a colspan-spanning section row alongside a normal-row content edit', () => {
1399
+ // Real-world legal-doc shape: header row, a colspan section
1400
+ // header, and content rows below. Editing one cell of one of the
1401
+ // content rows shouldn't disturb the colspan row.
1402
+ const oldHtml =
1403
+ '<table>' +
1404
+ '<tr><th>Label</th><th>A</th><th>B</th></tr>' +
1405
+ '<tr><td colspan="3">Section header</td></tr>' +
1406
+ '<tr><td>Row 1</td><td>Old A</td><td>Same B</td></tr>' +
1407
+ '</table>'
1408
+ const newHtml =
1409
+ '<table>' +
1410
+ '<tr><th>Label</th><th>A</th><th>B</th></tr>' +
1411
+ '<tr><td colspan="3">Section header</td></tr>' +
1412
+ '<tr><td>Row 1</td><td>New A</td><td>Same B</td></tr>' +
1413
+ '</table>'
1414
+
1415
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1416
+ '<table>' +
1417
+ '<tr><th>Label</th><th>A</th><th>B</th></tr>' +
1418
+ '<tr><td colspan="3">Section header</td></tr>' +
1419
+ "<tr><td>Row 1</td><td><del class='diffmod'>Old</del><ins class='diffmod'>New</ins> A</td><td>Same B</td></tr>" +
1420
+ '</table>'
1421
+ )
1422
+ })
1423
+
1424
+ it('handles column-add alongside content edit in the SAME row (cell-level fuzzy matching)', () => {
1425
+ // Real-world scenario: a column was inserted at position 1 AND
1426
+ // one of the existing cells got new content appended. Without
1427
+ // cell-level fuzzy matching, the cell-LCS exact-match misses the
1428
+ // "IRS Forms…" pairing, producing a 5-cell row (phantom delete +
1429
+ // two inserts) instead of 4 cells with one inline content edit.
1430
+ const oldHtml =
1431
+ '<table>' +
1432
+ '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
1433
+ '<tr><td>Party A</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto).</td><td>Upon execution.</td></tr>' +
1434
+ '<tr><td>Party B</td><td>IRS Form W-9, as applicable.</td><td>Upon execution.</td></tr>' +
1435
+ '</table>'
1436
+ const newHtml =
1437
+ '<table>' +
1438
+ '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
1439
+ "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto). Here's some extra content</td><td>Upon execution.</td></tr>" +
1440
+ '<tr><td>Party B</td><td>A</td><td>IRS Form W-9, as applicable.</td><td>Upon execution.</td></tr>' +
1441
+ '</table>'
1442
+
1443
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1444
+ '<table>' +
1445
+ // Header row: extra column inserted at position 1
1446
+ '<tr><th>Party</th>' +
1447
+ "<th class='diffins'><ins class='diffins'>Extra column</ins></th>" +
1448
+ '<th>Form</th><th>Date</th></tr>' +
1449
+ // Party A row: extra column cell + content edit on the IRS Forms cell
1450
+ '<tr><td>Party A</td>' +
1451
+ "<td class='diffins'><ins class='diffins'>Yes</ins></td>" +
1452
+ "<td>IRS Forms W-8BEN-E and W-8ECI (or any successors thereto).<ins class='diffins'>&nbsp;Here's some extra content</ins></td>" +
1453
+ '<td>Upon execution.</td></tr>' +
1454
+ // Party B row: extra column cell, IRS Form W-9 cell unchanged
1455
+ '<tr><td>Party B</td>' +
1456
+ "<td class='diffins'><ins class='diffins'>A</ins></td>" +
1457
+ '<td>IRS Form W-9, as applicable.</td>' +
1458
+ '<td>Upon execution.</td></tr>' +
1459
+ '</table>'
1460
+ )
1461
+ })
1462
+
1463
+ it('handles a rowspan cell sharing a row with normal cells (column-add adjacency)', () => {
1464
+ // The rowspan'd cell occupies row 0 col 0 and row 1's col 0 slot
1465
+ // (absorbed). Old has rowspan=2 in col 0 + col 1 in row 0 + col
1466
+ // 0-of-row-2 in row 1. New adds a column on the right: same
1467
+ // rowspan structure, but row 1 has 2 cells (col 0 in row 1 is the
1468
+ // absorbed col, col 1 is the existing C, col 2 is new D).
1469
+ // detectVerticalMerge bails (multi-cell row), so this falls
1470
+ // through to per-row diff with cell-level LCS.
1471
+ const oldHtml = '<table>' + '<tr><td rowspan="2">A</td><td>B</td></tr>' + '<tr><td>C</td></tr>' + '</table>'
1472
+ const newHtml =
1473
+ '<table>' +
1474
+ '<tr><td rowspan="2">A prime</td><td>B prime</td></tr>' +
1475
+ '<tr><td>C prime</td><td>D</td></tr>' +
1476
+ '</table>'
1477
+
1478
+ // The exact emission shape is messy because the algorithm doesn't
1479
+ // model column-position-with-rowspan. Pin current behaviour as a
1480
+ // regression anchor; the cell-level changes are visible.
1481
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1482
+ expect(result).toContain('rowspan="2"')
1483
+ expect(result).toContain('A')
1484
+ expect(result).toContain('prime')
1485
+ expect(result).toContain('D')
1486
+ })
1487
+ })
1488
+
1489
+ describe('block-level html5 wrappers', () => {
1490
+ it('preserves <tfoot> and diffs cell content inside it', () => {
1491
+ const oldHtml = '<table><tbody><tr><td>A</td></tr></tbody><tfoot><tr><td>Total: 1</td></tr></tfoot></table>'
1492
+ const newHtml = '<table><tbody><tr><td>A</td></tr></tbody><tfoot><tr><td>Total: 2</td></tr></tfoot></table>'
1493
+
1494
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1495
+ '<table><tbody><tr><td>A</td></tr></tbody><tfoot>' +
1496
+ "<tr><td>Total: <del class='diffmod'>1</del><ins class='diffmod'>2</ins></td></tr>" +
1497
+ '</tfoot></table>'
1498
+ )
1499
+ })
1500
+
1501
+ it('handles multiple <tbody> blocks within one table', () => {
1502
+ const oldHtml =
1503
+ '<table><tbody><tr><th>H1</th></tr></tbody><tbody></tbody><tbody><tr><td>A</td></tr></tbody></table>'
1504
+ const newHtml =
1505
+ '<table><tbody><tr><th>H1</th></tr></tbody><tbody></tbody><tbody><tr><td>A</td></tr><tr><td>B</td></tr></tbody></table>'
1506
+
1507
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1508
+ '<table><tbody><tr><th>H1</th></tr></tbody><tbody></tbody><tbody>' +
1509
+ '<tr><td>A</td></tr>' +
1510
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>B</ins></td></tr>" +
1511
+ '</tbody></table>'
1512
+ )
1513
+ })
1514
+ })
1515
+
1516
+ describe('inline elements in inserted/deleted rows', () => {
1517
+ it('passes <sup> through and wraps inner text in <ins>', () => {
1518
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1519
+ const newHtml = '<table><tr><td>A</td></tr><tr><td>ISDA SIMM<sup>TM</sup></td></tr></table>'
1520
+
1521
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1522
+ '<table>' +
1523
+ '<tr><td>A</td></tr>' +
1524
+ "<tr class='diffins'><td class='diffins'>" +
1525
+ "<ins class='diffins'>ISDA SIMM</ins>" +
1526
+ "<sup><ins class='diffins'>TM</ins></sup>" +
1527
+ '</td></tr>' +
1528
+ '</table>'
1529
+ )
1530
+ })
1531
+
1532
+ it('wraps text inside a nested <table> inside a deleted row', () => {
1533
+ const oldHtml =
1534
+ '<table>' + '<tr><td><table><tr><td>Inner A</td></tr></table></td></tr>' + '<tr><td>Keep</td></tr>' + '</table>'
1535
+ const newHtml = '<table><tr><td>Keep</td></tr></table>'
1536
+
1537
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1538
+ '<table>' +
1539
+ "<tr class='diffdel'><td class='diffdel'>" +
1540
+ '<table><tr><td>' +
1541
+ "<del class='diffdel'>Inner A</del>" +
1542
+ '</td></tr></table>' +
1543
+ '</td></tr>' +
1544
+ '<tr><td>Keep</td></tr>' +
1545
+ '</table>'
1546
+ )
1547
+ })
1548
+ })
1549
+
1550
+ describe('fuzzy threshold boundary', () => {
1551
+ it('does NOT pair rows when similarity is exactly at the threshold (strict >)', () => {
1552
+ // rowText = "abcdef" (6 chars) vs "abcxyz" (6 chars).
1553
+ // Prefix = 3 ("abc"), suffix = 0 → similarity = 3/6 = 0.5 exactly.
1554
+ // Threshold is `> 0.5` (strict), so this pair should be rejected.
1555
+ const oldHtml = '<table><tr><td>Same</td></tr><tr><td>abcdef</td></tr></table>'
1556
+ const newHtml = '<table><tr><td>Same</td></tr><tr><td>abcxyz</td></tr><tr><td>Brand new</td></tr></table>'
1557
+
1558
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1559
+ '<table>' +
1560
+ '<tr><td>Same</td></tr>' +
1561
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>abcdef</del></td></tr>" +
1562
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>abcxyz</ins></td></tr>" +
1563
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>Brand new</ins></td></tr>" +
1564
+ '</table>'
1565
+ )
1566
+ })
1567
+
1568
+ it('DOES pair rows when similarity is just above the threshold', () => {
1569
+ // rowText "abcdefg" (7) vs "abcdxxx" (7). Prefix = 4, suffix = 0
1570
+ // → similarity = 4/7 ≈ 0.571, above 0.5.
1571
+ const oldHtml = '<table><tr><td>Same</td></tr><tr><td>abcdefg</td></tr></table>'
1572
+ const newHtml = '<table><tr><td>Same</td></tr><tr><td>abcdxxx</td></tr><tr><td>Brand new</td></tr></table>'
1573
+
1574
+ // The cell-level diff is word-based, so "abcdefg" and "abcdxxx"
1575
+ // (no whitespace inside) are seen as one word each — the resulting
1576
+ // cell-level output is a whole-word replacement. The row-level
1577
+ // fuzzy match is what put them on the same row instead of
1578
+ // emitting two unrelated full-row entries; that's the win here.
1579
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1580
+ '<table>' +
1581
+ '<tr><td>Same</td></tr>' +
1582
+ "<tr><td><del class='diffmod'>abcdefg</del><ins class='diffmod'>abcdxxx</ins></td></tr>" +
1583
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>Brand new</ins></td></tr>" +
1584
+ '</table>'
1585
+ )
1586
+ })
1587
+ })
1588
+
1589
+ // Coverage gaps surfaced by the v8 report: the cell-LCS fallback path
1590
+ // (diffStructurallyAlignedRow + cellKey + pairSimilarUnmatchedCells) is
1591
+ // only entered when the per-row column delta exceeds MAX_COLUMN_DELTA
1592
+ // (6) or the row's logical width exceeds MAX_COLUMN_SEARCH_WIDTH (40).
1593
+ // None of the existing tests trigger that. These tests exercise the
1594
+ // fallback and pin its behaviour.
1595
+ describe('cell-LCS fallback for very-wide column changes', () => {
1596
+ it('handles 8 columns inserted alongside existing cells (delta > MAX_COLUMN_DELTA)', () => {
1597
+ // Old: 3 cells. New: 11 cells (8 columns added). Exact-LCS finds
1598
+ // A, B, C as matches; the 8 unmatched new cells are inserted.
1599
+ const oldHtml = '<table><tr><td>A</td><td>B</td><td>C</td></tr></table>'
1600
+ const newHtml =
1601
+ '<table><tr><td>A</td>' +
1602
+ '<td>X1</td><td>X2</td><td>X3</td><td>X4</td>' +
1603
+ '<td>X5</td><td>X6</td><td>X7</td><td>X8</td>' +
1604
+ '<td>B</td><td>C</td></tr></table>'
1605
+
1606
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1607
+ '<table><tr>' +
1608
+ '<td>A</td>' +
1609
+ "<td class='diffins'><ins class='diffins'>X1</ins></td>" +
1610
+ "<td class='diffins'><ins class='diffins'>X2</ins></td>" +
1611
+ "<td class='diffins'><ins class='diffins'>X3</ins></td>" +
1612
+ "<td class='diffins'><ins class='diffins'>X4</ins></td>" +
1613
+ "<td class='diffins'><ins class='diffins'>X5</ins></td>" +
1614
+ "<td class='diffins'><ins class='diffins'>X6</ins></td>" +
1615
+ "<td class='diffins'><ins class='diffins'>X7</ins></td>" +
1616
+ "<td class='diffins'><ins class='diffins'>X8</ins></td>" +
1617
+ '<td>B</td>' +
1618
+ '<td>C</td>' +
1619
+ '</tr></table>'
1620
+ )
1621
+ })
1622
+
1623
+ it('handles 8 columns inserted alongside a content edit (cell fuzzy match in fallback)', () => {
1624
+ // The fallback path's pairSimilarUnmatchedCells should pair the
1625
+ // edited cell (OldText → NewText) by content similarity so it
1626
+ // emits as one content-edit cell, not as delete + insert.
1627
+ const oldHtml = '<table><tr><td>A</td><td>OldText</td><td>C</td></tr></table>'
1628
+ const newHtml =
1629
+ '<table><tr><td>A</td>' +
1630
+ '<td>X1</td><td>X2</td><td>X3</td><td>X4</td>' +
1631
+ '<td>X5</td><td>X6</td><td>X7</td>' +
1632
+ '<td>NewText</td>' +
1633
+ '<td>C</td></tr></table>'
1634
+
1635
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1636
+ '<table><tr>' +
1637
+ '<td>A</td>' +
1638
+ "<td class='diffins'><ins class='diffins'>X1</ins></td>" +
1639
+ "<td class='diffins'><ins class='diffins'>X2</ins></td>" +
1640
+ "<td class='diffins'><ins class='diffins'>X3</ins></td>" +
1641
+ "<td class='diffins'><ins class='diffins'>X4</ins></td>" +
1642
+ "<td class='diffins'><ins class='diffins'>X5</ins></td>" +
1643
+ "<td class='diffins'><ins class='diffins'>X6</ins></td>" +
1644
+ "<td class='diffins'><ins class='diffins'>X7</ins></td>" +
1645
+ "<td><del class='diffmod'>OldText</del><ins class='diffmod'>NewText</ins></td>" +
1646
+ '<td>C</td>' +
1647
+ '</tr></table>'
1648
+ )
1649
+ })
1650
+
1651
+ it('handles many columns deleted (delta < -MAX_COLUMN_DELTA)', () => {
1652
+ const oldHtml =
1653
+ '<table><tr><td>A</td>' +
1654
+ '<td>X1</td><td>X2</td><td>X3</td><td>X4</td>' +
1655
+ '<td>X5</td><td>X6</td><td>X7</td><td>X8</td>' +
1656
+ '<td>B</td><td>C</td></tr></table>'
1657
+ const newHtml = '<table><tr><td>A</td><td>B</td><td>C</td></tr></table>'
1658
+
1659
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1660
+ '<table><tr>' +
1661
+ '<td>A</td>' +
1662
+ "<td class='diffdel'><del class='diffdel'>X1</del></td>" +
1663
+ "<td class='diffdel'><del class='diffdel'>X2</del></td>" +
1664
+ "<td class='diffdel'><del class='diffdel'>X3</del></td>" +
1665
+ "<td class='diffdel'><del class='diffdel'>X4</del></td>" +
1666
+ "<td class='diffdel'><del class='diffdel'>X5</del></td>" +
1667
+ "<td class='diffdel'><del class='diffdel'>X6</del></td>" +
1668
+ "<td class='diffdel'><del class='diffdel'>X7</del></td>" +
1669
+ "<td class='diffdel'><del class='diffdel'>X8</del></td>" +
1670
+ '<td>B</td>' +
1671
+ '<td>C</td>' +
1672
+ '</tr></table>'
1673
+ )
1674
+ })
1675
+
1676
+ it('preserves whitespace between inline elements in a fully-inserted cell (no spurious <ins>)', () => {
1677
+ // wrapInlineTextRuns walks content; when it encounters
1678
+ // whitespace-only text between two inline elements (e.g. the
1679
+ // space between `<strong>` and `<em>`), it passes the whitespace
1680
+ // through unwrapped — the body of the `else` branch on the
1681
+ // text-run path.
1682
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1683
+ const newHtml = '<table><tr><td>A</td></tr><tr><td><strong>a</strong> <em>b</em></td></tr></table>'
1684
+
1685
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1686
+ '<table>' +
1687
+ '<tr><td>A</td></tr>' +
1688
+ "<tr class='diffins'><td class='diffins'>" +
1689
+ "<strong><ins class='diffins'>a</ins></strong> <em><ins class='diffins'>b</ins></em>" +
1690
+ '</td></tr>' +
1691
+ '</table>'
1692
+ )
1693
+ })
1694
+
1695
+ it('handles a colspan-changed row where some cells have matching colspans', () => {
1696
+ // diffColspanChangedRow walks cells; when oSpan === nSpan for a
1697
+ // pair, it emits a content diff for that cell pair. This branch
1698
+ // wasn't exercised — needs a row with BOTH a colspan change AND
1699
+ // matching-colspan cells in the same row.
1700
+ const oldHtml =
1701
+ '<table><tr>' + '<td>FirstA</td>' + '<td>MidA</td><td>MidB</td>' + '<td>LastA</td>' + '</tr></table>'
1702
+ const newHtml =
1703
+ '<table><tr>' + '<td>FirstB</td>' + '<td colspan="2">Merged AB</td>' + '<td>LastB</td>' + '</tr></table>'
1704
+
1705
+ // First and last cells should diff content cell-by-cell (matching
1706
+ // colspans = 1 on both sides); middle two old cells merge into
1707
+ // one colspan=2 cell tagged 'mod colspan'. Asserted as an exact
1708
+ // string so that a positional swap of first/last is caught — a
1709
+ // swap-blind set of `toContain` assertions would not flag it.
1710
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1711
+ '<table><tr>' +
1712
+ "<td><del class='diffmod'>FirstA</del><ins class='diffmod'>FirstB</ins></td>" +
1713
+ `<td colspan="2" class='mod colspan'>Merged AB</td>` +
1714
+ "<td><del class='diffmod'>LastA</del><ins class='diffmod'>LastB</ins></td>" +
1715
+ '</tr></table>'
1716
+ )
1717
+ })
1718
+
1719
+ it('handles row wider than MAX_COLUMN_SEARCH_WIDTH (40 cells) — fallback to cell-LCS', () => {
1720
+ // 50-cell row in old, 51-cell row in new (1 column added at
1721
+ // start). MAX_COLUMN_SEARCH_WIDTH guard prevents the
1722
+ // combinatorial search; fallback to cell-LCS which finds 50
1723
+ // exact matches and the 1 new cell as an insertion.
1724
+ const oldCells = Array.from({ length: 50 }, (_, i) => `<td>c${i}</td>`).join('')
1725
+ const newCells = `<td>NEW</td>${oldCells}`
1726
+ const oldHtml = `<table><tr>${oldCells}</tr></table>`
1727
+ const newHtml = `<table><tr>${newCells}</tr></table>`
1728
+
1729
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1730
+ // We should see exactly one inserted cell and 50 preserved cells.
1731
+ expect(result).toContain("<td class='diffins'><ins class='diffins'>NEW</ins></td>")
1732
+ // Sanity: total td count is 51 (no phantoms).
1733
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
1734
+ expect(tdCount).toBe(51)
1735
+ })
1736
+ })
1737
+
1738
+ describe('attribute edge cases', () => {
1739
+ it('does not introduce a leading space when the existing class attribute is empty', () => {
1740
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
1741
+ const newHtml = '<table><tr><td>A</td></tr><tr><td class="">B</td></tr></table>'
1742
+
1743
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1744
+ '<table>' +
1745
+ '<tr><td>A</td></tr>' +
1746
+ "<tr class='diffins'><td class=\"diffins\"><ins class='diffins'>B</ins></td></tr>" +
1747
+ '</table>'
1748
+ )
1749
+ })
1750
+
1751
+ it('parses unquoted span attribute values (e.g. colspan=2)', () => {
1752
+ const oldHtml = '<table><tr><td colspan=2>AB</td></tr></table>'
1753
+ const newHtml = '<table><tr><td>A</td><td>B</td></tr></table>'
1754
+
1755
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1756
+ "<table><tr><td class='mod colspan'>A</td><td class='mod colspan'>B</td></tr></table>"
1757
+ )
1758
+ })
1759
+ })
1760
+
1761
+ // The combinatorial position search can encounter score ties when
1762
+ // inserted cells have content that is similar both to each other and to
1763
+ // existing cells (e.g. boilerplate "N/A" in a legal schedule). The
1764
+ // algorithm resolves ties by combination-iteration order, so the choice
1765
+ // of which specific column gets the diffins marker is deterministic
1766
+ // but not necessarily the "intuitive" one. These tests lock in the
1767
+ // observed behaviour and guard against silent regressions in the
1768
+ // structural shape: all original cells must survive unmarked, and the
1769
+ // inserted-marker count must equal the column delta.
1770
+ describe('combinatorial column search — score-tied inputs', () => {
1771
+ it('handles delta=2 with content-similar inserts (N/A boilerplate)', () => {
1772
+ const oldHtml = '<table><tr><td>N/A</td><td>Term</td><td>Amount</td><td>N/A</td></tr></table>'
1773
+ const newHtml =
1774
+ '<table><tr><td>N/A</td><td>N/A</td><td>Term</td><td>N/A</td><td>Amount</td><td>N/A</td></tr></table>'
1775
+
1776
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1777
+ // Both inserted N/A cells must be marked diffins.
1778
+ const insMarkers = (result.match(/<td class='diffins'>/g) || []).length
1779
+ expect(insMarkers).toBe(2)
1780
+ // Total td count must be 6 (no phantoms).
1781
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
1782
+ expect(tdCount).toBe(6)
1783
+ // Term and Amount must appear as unmarked preserved cells exactly
1784
+ // once each — they're not similar to N/A, so the algorithm has no
1785
+ // ambiguity around them.
1786
+ expect(result).toContain('<td>Term</td>')
1787
+ expect(result).toContain('<td>Amount</td>')
1788
+ })
1789
+
1790
+ it('handles delta=6 (the MAX_COLUMN_DELTA cap) without misalignment', () => {
1791
+ const oldHtml = '<table><tr><td>A</td><td>B</td><td>C</td><td>D</td></tr></table>'
1792
+ const newHtml =
1793
+ '<table><tr>' +
1794
+ '<td>A</td><td>X1</td><td>X2</td><td>B</td><td>X3</td>' +
1795
+ '<td>X4</td><td>C</td><td>X5</td><td>X6</td><td>D</td>' +
1796
+ '</tr></table>'
1797
+
1798
+ const result = HtmlDiff.execute(oldHtml, newHtml)
1799
+ // Exactly 6 cells inserted, 4 preserved.
1800
+ const insMarkers = (result.match(/<td class='diffins'>/g) || []).length
1801
+ expect(insMarkers).toBe(6)
1802
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
1803
+ expect(tdCount).toBe(10)
1804
+ // All four original cells survive in order without diff markers.
1805
+ expect(result).toContain('<td>A</td>')
1806
+ expect(result).toContain('<td>B</td>')
1807
+ expect(result).toContain('<td>C</td>')
1808
+ expect(result).toContain('<td>D</td>')
1809
+ })
1810
+ })
1811
+
1812
+ // orderAlignmentForEmission's `preserved` list is empty when no rows
1813
+ // survive across the diff. The "delete every row" and "insert every
1814
+ // row" cases are corner cases where the float-positioning logic must
1815
+ // degenerate cleanly: every del's primary becomes `-0.5` (predecessor
1816
+ // index -1), every ins's primary is its own newIdx. Both should emit
1817
+ // in oldIdx / newIdx order respectively, with no preserved rows
1818
+ // sandwiched between them.
1819
+ describe('orderAlignmentForEmission — empty preserved list', () => {
1820
+ it('emits every row as diffdel when new is empty (no preserved rows)', () => {
1821
+ const oldHtml =
1822
+ '<table>' +
1823
+ '<tr><td>r1</td></tr>' +
1824
+ '<tr><td>r2</td></tr>' +
1825
+ '<tr><td>r3</td></tr>' +
1826
+ '<tr><td>r4</td></tr>' +
1827
+ '<tr><td>r5</td></tr>' +
1828
+ '</table>'
1829
+ const newHtml = '<table></table>'
1830
+
1831
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1832
+ '<table>' +
1833
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r1</del></td></tr>" +
1834
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r2</del></td></tr>" +
1835
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r3</del></td></tr>" +
1836
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r4</del></td></tr>" +
1837
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>r5</del></td></tr>" +
1838
+ '</table>'
1839
+ )
1840
+ })
1841
+
1842
+ it('emits every row as diffins when old is empty (no preserved rows)', () => {
1843
+ const oldHtml = '<table></table>'
1844
+ const newHtml = '<table>' + '<tr><td>r1</td></tr>' + '<tr><td>r2</td></tr>' + '<tr><td>r3</td></tr>' + '</table>'
1845
+
1846
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1847
+ '<table>' +
1848
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r1</ins></td></tr>" +
1849
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r2</ins></td></tr>" +
1850
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>r3</ins></td></tr>" +
1851
+ '</table>'
1852
+ )
1853
+ })
1854
+
1855
+ it('emits all-deletes in order when only the header is preserved', () => {
1856
+ // The header is the only preserved row, so 'preserved' has 1 entry
1857
+ // and every del's newIdxOfPreservedBefore returns -1 — exercising
1858
+ // the boundary between empty-preserved and a single anchoring row.
1859
+ const oldHtml =
1860
+ '<table>' +
1861
+ '<tr><td>Header</td></tr>' +
1862
+ '<tr><td>row a</td></tr>' +
1863
+ '<tr><td>row b</td></tr>' +
1864
+ '<tr><td>row c</td></tr>' +
1865
+ '</table>'
1866
+ const newHtml = '<table><tr><td>Header</td></tr></table>'
1867
+
1868
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1869
+ '<table>' +
1870
+ '<tr><td>Header</td></tr>' +
1871
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row a</del></td></tr>" +
1872
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row b</del></td></tr>" +
1873
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row c</del></td></tr>" +
1874
+ '</table>'
1875
+ )
1876
+ })
1877
+ })
1878
+
1879
+ // Fuzzy row-pairing threshold (0.5) calibrations on ISDA-flavoured
1880
+ // content. The interesting cases are at the edges: rows that share a
1881
+ // little (an enumeration prefix only) — must NOT pair (otherwise an
1882
+ // unrelated rewrite shows up as a single-row content edit); rows that
1883
+ // share a lot of boilerplate text but differ in the meaningful body —
1884
+ // must pair (otherwise the user sees del+ins instead of an edit).
1885
+ describe('fuzzy row pairing — enumerated clauses and shared boilerplate', () => {
1886
+ it('does NOT pair rows that share only an enumeration prefix (different bodies)', () => {
1887
+ // Old has 2 rows, new has 3 rows. The "1." and "2." prefixes are
1888
+ // the only commonality — bodies are completely unrelated.
1889
+ // textSimilarity falls below 0.5 (prefix is 3 chars in 60+; jaccard
1890
+ // is also tiny), so fuzzy pairing must NOT fire — each pair should
1891
+ // emit as a clean del + ins, not a noisy intra-row diff.
1892
+ const oldHtml =
1893
+ '<table>' +
1894
+ '<tr><td>1. Party A shall pay the gross amount on each Payment Date.</td></tr>' +
1895
+ '<tr><td>2. Party B shall deliver collateral on each Calculation Date.</td></tr>' +
1896
+ '</table>'
1897
+ const newHtml =
1898
+ '<table>' +
1899
+ '<tr><td>1. Section intentionally left blank.</td></tr>' +
1900
+ '<tr><td>2. Different boilerplate entirely.</td></tr>' +
1901
+ '<tr><td>3. Brand new clause added here.</td></tr>' +
1902
+ '</table>'
1903
+
1904
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1905
+ '<table>' +
1906
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>1. Party A shall pay the gross amount on each Payment Date.</del></td></tr>" +
1907
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>2. Party B shall deliver collateral on each Calculation Date.</del></td></tr>" +
1908
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>1. Section intentionally left blank.</ins></td></tr>" +
1909
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>2. Different boilerplate entirely.</ins></td></tr>" +
1910
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>3. Brand new clause added here.</ins></td></tr>" +
1911
+ '</table>'
1912
+ )
1913
+ })
1914
+
1915
+ it('DOES pair rows that share a long boilerplate footer (Jaccard kicks in)', () => {
1916
+ // The 9-char body differs, but the 50-char trailing footer is
1917
+ // identical. Prefix-suffix similarity is low, but token Jaccard is
1918
+ // very high because shared footer tokens dominate the token set.
1919
+ // textSimilarity = Math.max(prefix_suffix, jaccard) → must pair.
1920
+ const footer = ' subject to the terms of the Master Agreement.'
1921
+ const oldHtml = `<table><tr><td>Anchor row</td></tr><tr><td>Alpha now${footer}</td></tr></table>`
1922
+ const newHtml =
1923
+ `<table><tr><td>Anchor row</td></tr><tr><td>Bravo new${footer}</td></tr>` +
1924
+ '<tr><td>Extra row appended</td></tr></table>'
1925
+
1926
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1927
+ '<table>' +
1928
+ '<tr><td>Anchor row</td></tr>' +
1929
+ "<tr><td><del class='diffmod'>Alpha</del><ins class='diffmod'>Bravo</ins> " +
1930
+ "<del class='diffmod'>now</del><ins class='diffmod'>new</ins>" +
1931
+ ' subject to the terms of the Master Agreement.</td></tr>' +
1932
+ "<tr class='diffins'><td class='diffins'><ins class='diffins'>Extra row appended</ins></td></tr>" +
1933
+ '</table>'
1934
+ )
1935
+ })
1936
+ })
1937
+
1938
+ // orderAlignmentForEmission must keep unpaired dels in their correct
1939
+ // positions even when the run contains a mix of preserved rows, fuzzy-
1940
+ // paired rows, and unpaired dels. A regression here would make trailing
1941
+ // dels appear before the row they came after — the same family of bug
1942
+ // as the "deleted rows out of order" report that motivated the
1943
+ // function in the first place.
1944
+ describe('orderAlignmentForEmission — mixed paired and unpaired rows', () => {
1945
+ it('emits trailing unpaired dels after a fuzzy-paired content edit', () => {
1946
+ // Old: [Aaaaa, Bbbbb, Cccc, Dddd]. New: [Aaaaa, Bbbbb+NEW].
1947
+ // After LCS: A is preserved. After pairSimilarUnmatchedRows: B↔B'
1948
+ // via fuzzy. C and D are unpaired dels. The output order must be:
1949
+ // preserved(A) → paired(B,B') → del(C) → del(D).
1950
+ const oldHtml =
1951
+ '<table>' +
1952
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1953
+ '<tr><td>Bbbbb shared content here</td></tr>' +
1954
+ '<tr><td>Cccc deleted row</td></tr>' +
1955
+ '<tr><td>Dddd deleted row</td></tr>' +
1956
+ '</table>'
1957
+ const newHtml =
1958
+ '<table>' +
1959
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1960
+ '<tr><td>Bbbbb shared content here NEW</td></tr>' +
1961
+ '</table>'
1962
+
1963
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1964
+ '<table>' +
1965
+ '<tr><td>Aaaaa shared content here</td></tr>' +
1966
+ "<tr><td>Bbbbb shared content here<ins class='diffins'>&nbsp;NEW</ins></td></tr>" +
1967
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>Cccc deleted row</del></td></tr>" +
1968
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>Dddd deleted row</del></td></tr>" +
1969
+ '</table>'
1970
+ )
1971
+ })
1972
+
1973
+ it('emits dels at the end of the table in old-document order', () => {
1974
+ // Specifically protects against the regression that motivated
1975
+ // orderAlignmentForEmission: deleting the last two rows
1976
+ // simultaneously must emit them in the order they appeared in old
1977
+ // (second-last, then last), not reversed or jumbled.
1978
+ const oldHtml =
1979
+ '<table>' +
1980
+ '<tr><td>kept1</td></tr>' +
1981
+ '<tr><td>kept2</td></tr>' +
1982
+ '<tr><td>second-last</td></tr>' +
1983
+ '<tr><td>last</td></tr>' +
1984
+ '</table>'
1985
+ const newHtml = '<table><tr><td>kept1</td></tr><tr><td>kept2</td></tr></table>'
1986
+
1987
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
1988
+ '<table>' +
1989
+ '<tr><td>kept1</td></tr>' +
1990
+ '<tr><td>kept2</td></tr>' +
1991
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>second-last</del></td></tr>" +
1992
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>last</del></td></tr>" +
1993
+ '</table>'
1994
+ )
1995
+ })
1996
+ })
1997
+
1998
+ // pairSimilarUnmatched is intentionally greedy (not Hungarian-optimal).
1999
+ // When two unmatched-old entries are both above-threshold matches for
2000
+ // the same unmatched-new entry, the first del wins the pairing only
2001
+ // when iterating in document order means "first" matches the better
2002
+ // candidate. The output must still be structurally valid — no phantom
2003
+ // cells, no dropped content — regardless of which del wins.
2004
+ describe('pairSimilarUnmatchedCells — competing dels for the same ins', () => {
2005
+ it('keeps both candidate cells intact when two old cells could pair with one new cell', () => {
2006
+ // Two old cells with identical content compete for one similar
2007
+ // new cell. The greedy assignment picks one to pair as a content
2008
+ // edit; the other emits as a full diffdel. Both must appear; no
2009
+ // cell may silently vanish.
2010
+ const oldHtml =
2011
+ '<table><tr>' +
2012
+ '<td>Preserved</td>' +
2013
+ '<td>Old content alpha to be edited</td>' +
2014
+ '<td>Old content alpha to be edited</td>' +
2015
+ '</tr></table>'
2016
+ const newHtml = '<table><tr>' + '<td>Preserved</td>' + '<td>Old content alpha CHANGED</td>' + '</tr></table>'
2017
+
2018
+ const result = HtmlDiff.execute(oldHtml, newHtml)
2019
+ // The losing del must emit as a full diffdel cell.
2020
+ expect(result).toContain("<td class='diffdel'><del class='diffdel'>Old content alpha to be edited</del></td>")
2021
+ // The winning pair must emit as a partial content edit.
2022
+ expect(result).toContain("<del class='diffmod'>to be edited</del>")
2023
+ expect(result).toContain("<ins class='diffmod'>CHANGED</ins>")
2024
+ // Structural: 3 tds total in the output (1 preserved, 1 full-del,
2025
+ // 1 paired-edit). No phantoms.
2026
+ const tdCount = (result.match(/<td[\s>]/g) || []).length
2027
+ expect(tdCount).toBe(3)
2028
+ })
2029
+ })
2030
+
2031
+ // Row-LCS on a non-trivial 7-row table where only every other row
2032
+ // matches. Existing named tests max out around 4 rows; the matrix
2033
+ // never produces a row-count drop this large with this much
2034
+ // interleaving. This exercises the LCS DP itself, not just the
2035
+ // diff emission.
2036
+ describe('row-LCS on larger tables', () => {
2037
+ it('finds 4 preserved rows interleaved with 3 dropped rows in a 7-row old table', () => {
2038
+ const oldHtml =
2039
+ '<table>' +
2040
+ '<tr><td>row1 preserved</td></tr>' +
2041
+ '<tr><td>row2 old body</td></tr>' +
2042
+ '<tr><td>row3 preserved</td></tr>' +
2043
+ '<tr><td>row4 old body</td></tr>' +
2044
+ '<tr><td>row5 preserved</td></tr>' +
2045
+ '<tr><td>row6 old body</td></tr>' +
2046
+ '<tr><td>row7 preserved</td></tr>' +
2047
+ '</table>'
2048
+ const newHtml =
2049
+ '<table>' +
2050
+ '<tr><td>row1 preserved</td></tr>' +
2051
+ '<tr><td>row3 preserved</td></tr>' +
2052
+ '<tr><td>row5 preserved</td></tr>' +
2053
+ '<tr><td>row7 preserved</td></tr>' +
2054
+ '</table>'
2055
+
2056
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
2057
+ '<table>' +
2058
+ '<tr><td>row1 preserved</td></tr>' +
2059
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row2 old body</del></td></tr>" +
2060
+ '<tr><td>row3 preserved</td></tr>' +
2061
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row4 old body</del></td></tr>" +
2062
+ '<tr><td>row5 preserved</td></tr>' +
2063
+ "<tr class='diffdel'><td class='diffdel'><del class='diffdel'>row6 old body</del></td></tr>" +
2064
+ '<tr><td>row7 preserved</td></tr>' +
2065
+ '</table>'
2066
+ )
2067
+ })
2068
+ })
2069
+ })