@createiq/htmldiff 1.0.5-beta.3 → 1.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +67 -1
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.mjs +67 -1
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/TableDiff.ts +84 -8
- package/test/HtmlDiff.tables.matrix.spec.ts +40 -0
- package/test/HtmlDiff.tables.spec.ts +286 -0
package/package.json
CHANGED
package/src/TableDiff.ts
CHANGED
|
@@ -379,20 +379,25 @@ function diffStructurallyAlignedTable(
|
|
|
379
379
|
const oldKeys = oldTable.rows.map(row => rowKey(oldHtml, row))
|
|
380
380
|
const newKeys = newTable.rows.map(row => rowKey(newHtml, row))
|
|
381
381
|
const exactAlignment = lcsAlign(oldKeys, newKeys)
|
|
382
|
-
const
|
|
382
|
+
const paired = pairSimilarUnmatchedRows(exactAlignment, oldTable, newTable, oldHtml, newHtml)
|
|
383
|
+
// Reorder so unpaired deleted rows appear at their *natural old-side
|
|
384
|
+
// position* — immediately after the preserved/paired row that came
|
|
385
|
+
// before them in old. Without this, runs of unpaired dels at low
|
|
386
|
+
// alignment indices end up emitted before any preserved row (the
|
|
387
|
+
// "deleted rows out of order" bug).
|
|
388
|
+
const alignment = orderAlignmentForEmission(paired)
|
|
383
389
|
|
|
384
390
|
// Walk new's tableStart→tableEnd, substituting rows with their diffed
|
|
385
391
|
// form so `<thead>`/`<tbody>` wrappers and inter-row whitespace are
|
|
386
392
|
// preserved verbatim. Deleted rows (no position in new) are injected
|
|
387
|
-
// inline at
|
|
388
|
-
//
|
|
393
|
+
// inline at the cursor's current position, which now corresponds to
|
|
394
|
+
// their natural old-side slot thanks to the reordering above. If new
|
|
395
|
+
// has no rows at all, fall back to a from-scratch reconstruction so
|
|
396
|
+
// we still emit deleted rows.
|
|
389
397
|
if (newTable.rows.length === 0) {
|
|
390
398
|
return rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment, diffCell)
|
|
391
399
|
}
|
|
392
400
|
|
|
393
|
-
// Emit the table header (`<table>` + any `<thead>`/`<tbody>` opening
|
|
394
|
-
// text up to the first row) up-front so a leading run of deleted-only
|
|
395
|
-
// alignments doesn't slip in before the table opens.
|
|
396
401
|
const out: string[] = []
|
|
397
402
|
out.push(newHtml.slice(newTable.tableStart, newTable.rows[0].rowStart))
|
|
398
403
|
let cursor = newTable.rows[0].rowStart
|
|
@@ -407,8 +412,6 @@ function diffStructurallyAlignedTable(
|
|
|
407
412
|
}
|
|
408
413
|
cursor = newRow.rowEnd
|
|
409
414
|
} else if (align.oldIdx !== null) {
|
|
410
|
-
// Deleted row: inject inline at the current cursor (between the
|
|
411
|
-
// previously emitted row and the next one in new).
|
|
412
415
|
out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], 'del', diffCell))
|
|
413
416
|
}
|
|
414
417
|
}
|
|
@@ -416,6 +419,79 @@ function diffStructurallyAlignedTable(
|
|
|
416
419
|
return out.join('')
|
|
417
420
|
}
|
|
418
421
|
|
|
422
|
+
/**
|
|
423
|
+
* Reorders the alignment so emission produces rows in the visually-
|
|
424
|
+
* correct order. Each entry is assigned a fractional "position" in
|
|
425
|
+
* new's flow:
|
|
426
|
+
*
|
|
427
|
+
* • Preserved/paired (oldIdx, newIdx): position = newIdx.
|
|
428
|
+
* • Pure insert (null, newIdx): position = newIdx.
|
|
429
|
+
* • Pure delete (oldIdx, null): position = newIdx-of-preserved-just-
|
|
430
|
+
* before-this-oldIdx + 0.5. Dels at the same gap sort by oldIdx so
|
|
431
|
+
* they appear in old's row order. The +0.5 places dels BEFORE any
|
|
432
|
+
* insert at the same gap (insert at newIdx N1+1 has position N1+1
|
|
433
|
+
* which is > N1+0.5), giving the natural "delete first, insert
|
|
434
|
+
* second" reading order at a replaced position.
|
|
435
|
+
*
|
|
436
|
+
* This handles the full range:
|
|
437
|
+
* • Run of unpaired dels at the start (no preserved predecessor):
|
|
438
|
+
* position -0.5, sorted by oldIdx.
|
|
439
|
+
* • Dels in the middle: positioned right after their preceding
|
|
440
|
+
* preserved row.
|
|
441
|
+
* • Dels at the end (no preserved successor): positioned after the
|
|
442
|
+
* last preserved row.
|
|
443
|
+
*
|
|
444
|
+
* Without this reordering, a run of unpaired deletes at low alignment
|
|
445
|
+
* indices got emitted at cursor = first-new-row position — putting
|
|
446
|
+
* all deletes before any preserved row in the output, regardless of
|
|
447
|
+
* where they came from in old.
|
|
448
|
+
*/
|
|
449
|
+
function orderAlignmentForEmission(alignment: Alignment[]): Alignment[] {
|
|
450
|
+
const preserved: Array<{ oldIdx: number; newIdx: number }> = []
|
|
451
|
+
for (const a of alignment) {
|
|
452
|
+
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
453
|
+
preserved.push({ oldIdx: a.oldIdx, newIdx: a.newIdx })
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
preserved.sort((a, b) => a.oldIdx - b.oldIdx)
|
|
457
|
+
|
|
458
|
+
// For a deleted row with oldIdx K, return the newIdx of the preserved
|
|
459
|
+
// entry with the largest oldIdx less than K, or -1 if none.
|
|
460
|
+
function newIdxOfPreservedBefore(oldIdx: number): number {
|
|
461
|
+
let result = -1
|
|
462
|
+
for (const p of preserved) {
|
|
463
|
+
if (p.oldIdx >= oldIdx) break
|
|
464
|
+
result = p.newIdx
|
|
465
|
+
}
|
|
466
|
+
return result
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Decorate each alignment with a fractional position. We use
|
|
470
|
+
// (primary, secondary) tuples so dels at the same gap sort by oldIdx
|
|
471
|
+
// (in old's row order) and inserts at the same newIdx stay stable.
|
|
472
|
+
const decorated = alignment.map((a, i) => {
|
|
473
|
+
let primary: number
|
|
474
|
+
let secondary: number
|
|
475
|
+
if (a.newIdx !== null) {
|
|
476
|
+
primary = a.newIdx
|
|
477
|
+
secondary = a.oldIdx === null ? 1 : 0 // preserved before pure-insert at same newIdx (rare)
|
|
478
|
+
} else {
|
|
479
|
+
// Pure delete
|
|
480
|
+
primary = newIdxOfPreservedBefore(a.oldIdx as number) + 0.5
|
|
481
|
+
secondary = a.oldIdx as number
|
|
482
|
+
}
|
|
483
|
+
return { entry: a, primary, secondary, originalIdx: i }
|
|
484
|
+
})
|
|
485
|
+
|
|
486
|
+
decorated.sort((a, b) => {
|
|
487
|
+
if (a.primary !== b.primary) return a.primary - b.primary
|
|
488
|
+
if (a.secondary !== b.secondary) return a.secondary - b.secondary
|
|
489
|
+
return a.originalIdx - b.originalIdx // stable
|
|
490
|
+
})
|
|
491
|
+
|
|
492
|
+
return decorated.map(d => d.entry)
|
|
493
|
+
}
|
|
494
|
+
|
|
419
495
|
function rebuildStructurallyAlignedTable(
|
|
420
496
|
oldHtml: string,
|
|
421
497
|
newHtml: string,
|
|
@@ -56,6 +56,36 @@ describe('HtmlDiff — table operations matrix', () => {
|
|
|
56
56
|
})
|
|
57
57
|
|
|
58
58
|
describe('user-reported regression scenarios', () => {
|
|
59
|
+
it('multiple deleted rows including the last appear in old-order, AFTER preserved header (regression for "deleted rows out of order")', () => {
|
|
60
|
+
// The user's exact scenario: column added + multiple rows deleted
|
|
61
|
+
// including the last row, with content edits that prevent fuzzy
|
|
62
|
+
// pairing of some rows. Before the fix, all deletes ended up at
|
|
63
|
+
// the START of the output (above the header) because they were
|
|
64
|
+
// emitted at cursor=row-0-start, regardless of where they came
|
|
65
|
+
// from in old.
|
|
66
|
+
const oldHtml =
|
|
67
|
+
'<table>' +
|
|
68
|
+
'<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
|
|
69
|
+
'<tr><td>Party A</td><td>IRS Forms W-8BEN-E</td><td>(i) Upon execution.</td></tr>' +
|
|
70
|
+
'<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
|
|
71
|
+
'<tr><td>Party A and Party B</td><td>Any document required.</td><td>Promptly upon reasonable demand.</td></tr>' +
|
|
72
|
+
'</table>'
|
|
73
|
+
const newHtml =
|
|
74
|
+
'<table>' +
|
|
75
|
+
'<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
|
|
76
|
+
"<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E. Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
|
|
77
|
+
'<tr><td>Party B</td><td>A</td><td>IRS Form W</td><td>Yeah OK</td></tr>' +
|
|
78
|
+
'</table>'
|
|
79
|
+
|
|
80
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
81
|
+
assertStructurallyValid(result, oldHtml, newHtml, 'multiple deleted rows out of order')
|
|
82
|
+
// The header (preserved with column added) MUST come first.
|
|
83
|
+
const headerIdx = result.indexOf('<tr><th>Party</th>')
|
|
84
|
+
const firstDelIdx = result.indexOf("<tr class='diffdel'>")
|
|
85
|
+
expect(headerIdx).toBeGreaterThanOrEqual(0)
|
|
86
|
+
expect(firstDelIdx).toBeGreaterThan(headerIdx)
|
|
87
|
+
})
|
|
88
|
+
|
|
59
89
|
it('column added + empty row inserted in middle (regression for issue with blank row)', () => {
|
|
60
90
|
const oldHtml =
|
|
61
91
|
'<table>' +
|
|
@@ -126,6 +156,16 @@ function allSingleOperations(): Op[] {
|
|
|
126
156
|
})
|
|
127
157
|
),
|
|
128
158
|
},
|
|
159
|
+
{
|
|
160
|
+
name: 'delete-multiple-rows-from-end',
|
|
161
|
+
apply: t =>
|
|
162
|
+
renderTable(
|
|
163
|
+
mutate(t, m => {
|
|
164
|
+
m.deleteRow(t.rows.length - 1)
|
|
165
|
+
m.deleteRow(t.rows.length - 1)
|
|
166
|
+
})
|
|
167
|
+
),
|
|
168
|
+
},
|
|
129
169
|
{
|
|
130
170
|
name: 'add-column-start',
|
|
131
171
|
apply: t => renderTable(mutate(t, m => m.addColumnAt(0, ['NewHeader', 'newA', 'newB', 'newC']))),
|
|
@@ -802,6 +802,86 @@ describe('HtmlDiff — tables', () => {
|
|
|
802
802
|
)
|
|
803
803
|
})
|
|
804
804
|
|
|
805
|
+
it('preserves data-* attributes on a <tr> in an inserted row alongside the injected class', () => {
|
|
806
|
+
// The frontend uses `data-behaviour` (and similar `data-*` attrs)
|
|
807
|
+
// on `<tr>` to drive table semantics. The diff must keep these
|
|
808
|
+
// attributes verbatim AND add `class='diffins'` for the structural
|
|
809
|
+
// change. (Whether downstream DOMPurify allows `data-*` is a
|
|
810
|
+
// separate concern — htmldiff's job is to not lose them.)
|
|
811
|
+
const oldHtml = '<table><tr><td>A</td></tr></table>'
|
|
812
|
+
const newHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td></tr></table>'
|
|
813
|
+
|
|
814
|
+
expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
|
|
815
|
+
'<table>' +
|
|
816
|
+
'<tr><td>A</td></tr>' +
|
|
817
|
+
'<tr data-behaviour="data" class=\'diffins\'>' +
|
|
818
|
+
"<td class='diffins'><ins class='diffins'>B</ins></td>" +
|
|
819
|
+
'</tr>' +
|
|
820
|
+
'</table>'
|
|
821
|
+
)
|
|
822
|
+
})
|
|
823
|
+
|
|
824
|
+
it('preserves data-* attributes on a <tr> in a deleted row alongside the injected class', () => {
|
|
825
|
+
const oldHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td></tr></table>'
|
|
826
|
+
const newHtml = '<table><tr><td>A</td></tr></table>'
|
|
827
|
+
|
|
828
|
+
expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
|
|
829
|
+
'<table>' +
|
|
830
|
+
'<tr><td>A</td></tr>' +
|
|
831
|
+
'<tr data-behaviour="data" class=\'diffdel\'>' +
|
|
832
|
+
"<td class='diffdel'><del class='diffdel'>B</del></td>" +
|
|
833
|
+
'</tr>' +
|
|
834
|
+
'</table>'
|
|
835
|
+
)
|
|
836
|
+
})
|
|
837
|
+
|
|
838
|
+
it('preserves multiple data-* attributes on a <tr> in an inserted row', () => {
|
|
839
|
+
const oldHtml = '<table><tr><td>A</td></tr></table>'
|
|
840
|
+
const newHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data" id="row2" data-x="y"><td>B</td></tr></table>'
|
|
841
|
+
|
|
842
|
+
expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
|
|
843
|
+
'<table>' +
|
|
844
|
+
'<tr><td>A</td></tr>' +
|
|
845
|
+
'<tr data-behaviour="data" id="row2" data-x="y" class=\'diffins\'>' +
|
|
846
|
+
"<td class='diffins'><ins class='diffins'>B</ins></td>" +
|
|
847
|
+
'</tr>' +
|
|
848
|
+
'</table>'
|
|
849
|
+
)
|
|
850
|
+
})
|
|
851
|
+
|
|
852
|
+
it('preserves data-* attribute on a <tr> when the row passes through column-add path (preserved row)', () => {
|
|
853
|
+
// The row exists in both old and new but cell counts differ
|
|
854
|
+
// (column added) — goes through diffPreservedRow → multi-column
|
|
855
|
+
// detection path. Attribute should still be preserved verbatim
|
|
856
|
+
// via rowHeaderSlice.
|
|
857
|
+
const oldHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td><td>C</td></tr></table>'
|
|
858
|
+
const newHtml =
|
|
859
|
+
'<table><tr><td>A</td><td>X</td></tr><tr data-behaviour="data"><td>B</td><td>EXTRA</td><td>C</td></tr></table>'
|
|
860
|
+
|
|
861
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
862
|
+
expect(result).toContain('<tr data-behaviour="data">')
|
|
863
|
+
})
|
|
864
|
+
|
|
865
|
+
it('preserves data-* attribute on a <tr> when the row passes through fuzzy-matching with content edit', () => {
|
|
866
|
+
// The row gets fuzzy-matched after exact LCS misses the content
|
|
867
|
+
// edit. Goes through diffStructurallyAlignedTable → fuzzy pair →
|
|
868
|
+
// diffPreservedRow. Attribute should still flow through.
|
|
869
|
+
const oldHtml =
|
|
870
|
+
'<table>' +
|
|
871
|
+
'<tr><td>A</td></tr>' +
|
|
872
|
+
'<tr data-behaviour="data"><td>The quick brown fox jumps over.</td></tr>' +
|
|
873
|
+
'</table>'
|
|
874
|
+
const newHtml =
|
|
875
|
+
'<table>' +
|
|
876
|
+
'<tr><td>A</td></tr>' +
|
|
877
|
+
'<tr data-behaviour="data"><td>The quick brown fox jumps over the lazy dog.</td></tr>' +
|
|
878
|
+
'<tr><td>NEW</td></tr>' +
|
|
879
|
+
'</table>'
|
|
880
|
+
|
|
881
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
882
|
+
expect(result).toContain('<tr data-behaviour="data">')
|
|
883
|
+
})
|
|
884
|
+
|
|
805
885
|
it('preserves <table> attributes verbatim from new (no diff marker on attribute changes)', () => {
|
|
806
886
|
const oldHtml = '<table border="1"><tr><td>A</td></tr></table>'
|
|
807
887
|
const newHtml = '<table border="2" style="width:100%"><tr><td>A</td></tr></table>'
|
|
@@ -1067,6 +1147,66 @@ describe('HtmlDiff — tables', () => {
|
|
|
1067
1147
|
// containing comments, CDATA, mixed-case tags, foreign attribute values
|
|
1068
1148
|
// that look like class= patterns, etc.
|
|
1069
1149
|
describe('hostile / adversarial inputs', () => {
|
|
1150
|
+
it('handles a processing instruction (<?xml?>) in cell content', () => {
|
|
1151
|
+
// parseOpeningTagAt has explicit handling for `<?...?>`. Pin
|
|
1152
|
+
// that path so a future refactor can't break it.
|
|
1153
|
+
const oldHtml = '<table><tr><td>A</td></tr></table>'
|
|
1154
|
+
const newHtml = '<table><tr><td>A</td></tr><tr><td><?xml version="1.0"?>text</td></tr></table>'
|
|
1155
|
+
|
|
1156
|
+
expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
|
|
1157
|
+
'<table>' +
|
|
1158
|
+
'<tr><td>A</td></tr>' +
|
|
1159
|
+
"<tr class='diffins'><td class='diffins'>" +
|
|
1160
|
+
'<?xml version="1.0"?>' +
|
|
1161
|
+
"<ins class='diffins'>text</ins>" +
|
|
1162
|
+
'</td></tr>' +
|
|
1163
|
+
'</table>'
|
|
1164
|
+
)
|
|
1165
|
+
})
|
|
1166
|
+
|
|
1167
|
+
it('handles an unquoted class attribute value when injecting diffins', () => {
|
|
1168
|
+
// findClassAttribute's unquoted-value branch wasn't exercised —
|
|
1169
|
+
// most generators emit quoted values. HTML5 permits unquoted
|
|
1170
|
+
// simple values, so support them.
|
|
1171
|
+
const oldHtml = '<table><tr><td>A</td></tr></table>'
|
|
1172
|
+
const newHtml = '<table><tr><td>A</td></tr><tr><td class=existing>B</td></tr></table>'
|
|
1173
|
+
|
|
1174
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
1175
|
+
// The existing unquoted class is preserved; the injected class
|
|
1176
|
+
// appends. Exact form depends on injectClass's writeback (it
|
|
1177
|
+
// rewrites the attribute value at its parsed range).
|
|
1178
|
+
expect(result).toContain('class=existing')
|
|
1179
|
+
expect(result).toContain('diffins')
|
|
1180
|
+
})
|
|
1181
|
+
|
|
1182
|
+
it('passes content through verbatim when a cell contains a lone `<` (malformed)', () => {
|
|
1183
|
+
// wrapInlineTextRuns sees `<` and calls parseOpeningTagAt, which
|
|
1184
|
+
// returns null for a lone `<` with no closing `>`. The function
|
|
1185
|
+
// then bails by pushing the rest verbatim. The output isn't
|
|
1186
|
+
// pretty but it's predictable and doesn't crash.
|
|
1187
|
+
const oldHtml = '<table><tr><td>A</td></tr></table>'
|
|
1188
|
+
const newHtml = '<table><tr><td>A</td></tr><tr><td>fee < cost</td></tr></table>'
|
|
1189
|
+
|
|
1190
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
1191
|
+
// The inserted row should still be marked.
|
|
1192
|
+
expect(result).toContain("<tr class='diffins'>")
|
|
1193
|
+
// The literal `<` (with no closing >) should appear in the output.
|
|
1194
|
+
expect(result).toContain('fee')
|
|
1195
|
+
expect(result).toContain('cost')
|
|
1196
|
+
})
|
|
1197
|
+
|
|
1198
|
+
it('handles a malformed table tag missing its closing > (no crash, falls back)', () => {
|
|
1199
|
+
// findTopLevelTables → parseOpeningTagAt returns null → scanner
|
|
1200
|
+
// increments i and continues. preprocessTables ends up with no
|
|
1201
|
+
// valid tables and falls through to the word-level diff.
|
|
1202
|
+
const oldHtml = '<p>before</p><table<tr><td>A</td></tr></table><p>after</p>'
|
|
1203
|
+
const newHtml = '<p>before</p><table<tr><td>B</td></tr></table><p>after</p>'
|
|
1204
|
+
|
|
1205
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
1206
|
+
// Should not crash. Should produce *some* del/ins markers.
|
|
1207
|
+
expect(result.length).toBeGreaterThan(0)
|
|
1208
|
+
})
|
|
1209
|
+
|
|
1070
1210
|
it('passes an HTML comment with embedded > through cell content unmolested', () => {
|
|
1071
1211
|
// Word-exported HTML routinely has comments with `>` inside (e.g.
|
|
1072
1212
|
// conditional comments). Before the parser fix, the scanner cut
|
|
@@ -1433,6 +1573,152 @@ describe('HtmlDiff — tables', () => {
|
|
|
1433
1573
|
})
|
|
1434
1574
|
})
|
|
1435
1575
|
|
|
1576
|
+
// Coverage gaps surfaced by the v8 report: the cell-LCS fallback path
|
|
1577
|
+
// (diffStructurallyAlignedRow + cellKey + pairSimilarUnmatchedCells) is
|
|
1578
|
+
// only entered when the per-row column delta exceeds MAX_COLUMN_DELTA
|
|
1579
|
+
// (6) or the row's logical width exceeds MAX_COLUMN_SEARCH_WIDTH (40).
|
|
1580
|
+
// None of the existing tests trigger that. These tests exercise the
|
|
1581
|
+
// fallback and pin its behaviour.
|
|
1582
|
+
describe('cell-LCS fallback for very-wide column changes', () => {
|
|
1583
|
+
it('handles 8 columns inserted alongside existing cells (delta > MAX_COLUMN_DELTA)', () => {
|
|
1584
|
+
// Old: 3 cells. New: 11 cells (8 columns added). Exact-LCS finds
|
|
1585
|
+
// A, B, C as matches; the 8 unmatched new cells are inserted.
|
|
1586
|
+
const oldHtml = '<table><tr><td>A</td><td>B</td><td>C</td></tr></table>'
|
|
1587
|
+
const newHtml =
|
|
1588
|
+
'<table><tr><td>A</td>' +
|
|
1589
|
+
'<td>X1</td><td>X2</td><td>X3</td><td>X4</td>' +
|
|
1590
|
+
'<td>X5</td><td>X6</td><td>X7</td><td>X8</td>' +
|
|
1591
|
+
'<td>B</td><td>C</td></tr></table>'
|
|
1592
|
+
|
|
1593
|
+
expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
|
|
1594
|
+
'<table><tr>' +
|
|
1595
|
+
'<td>A</td>' +
|
|
1596
|
+
"<td class='diffins'><ins class='diffins'>X1</ins></td>" +
|
|
1597
|
+
"<td class='diffins'><ins class='diffins'>X2</ins></td>" +
|
|
1598
|
+
"<td class='diffins'><ins class='diffins'>X3</ins></td>" +
|
|
1599
|
+
"<td class='diffins'><ins class='diffins'>X4</ins></td>" +
|
|
1600
|
+
"<td class='diffins'><ins class='diffins'>X5</ins></td>" +
|
|
1601
|
+
"<td class='diffins'><ins class='diffins'>X6</ins></td>" +
|
|
1602
|
+
"<td class='diffins'><ins class='diffins'>X7</ins></td>" +
|
|
1603
|
+
"<td class='diffins'><ins class='diffins'>X8</ins></td>" +
|
|
1604
|
+
'<td>B</td>' +
|
|
1605
|
+
'<td>C</td>' +
|
|
1606
|
+
'</tr></table>'
|
|
1607
|
+
)
|
|
1608
|
+
})
|
|
1609
|
+
|
|
1610
|
+
it('handles 8 columns inserted alongside a content edit (cell fuzzy match in fallback)', () => {
|
|
1611
|
+
// The fallback path's pairSimilarUnmatchedCells should pair the
|
|
1612
|
+
// edited cell (OldText → NewText) by content similarity so it
|
|
1613
|
+
// emits as one content-edit cell, not as delete + insert.
|
|
1614
|
+
const oldHtml = '<table><tr><td>A</td><td>OldText</td><td>C</td></tr></table>'
|
|
1615
|
+
const newHtml =
|
|
1616
|
+
'<table><tr><td>A</td>' +
|
|
1617
|
+
'<td>X1</td><td>X2</td><td>X3</td><td>X4</td>' +
|
|
1618
|
+
'<td>X5</td><td>X6</td><td>X7</td>' +
|
|
1619
|
+
'<td>NewText</td>' +
|
|
1620
|
+
'<td>C</td></tr></table>'
|
|
1621
|
+
|
|
1622
|
+
expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
|
|
1623
|
+
'<table><tr>' +
|
|
1624
|
+
'<td>A</td>' +
|
|
1625
|
+
"<td class='diffins'><ins class='diffins'>X1</ins></td>" +
|
|
1626
|
+
"<td class='diffins'><ins class='diffins'>X2</ins></td>" +
|
|
1627
|
+
"<td class='diffins'><ins class='diffins'>X3</ins></td>" +
|
|
1628
|
+
"<td class='diffins'><ins class='diffins'>X4</ins></td>" +
|
|
1629
|
+
"<td class='diffins'><ins class='diffins'>X5</ins></td>" +
|
|
1630
|
+
"<td class='diffins'><ins class='diffins'>X6</ins></td>" +
|
|
1631
|
+
"<td class='diffins'><ins class='diffins'>X7</ins></td>" +
|
|
1632
|
+
"<td><del class='diffmod'>OldText</del><ins class='diffmod'>NewText</ins></td>" +
|
|
1633
|
+
'<td>C</td>' +
|
|
1634
|
+
'</tr></table>'
|
|
1635
|
+
)
|
|
1636
|
+
})
|
|
1637
|
+
|
|
1638
|
+
it('handles many columns deleted (delta < -MAX_COLUMN_DELTA)', () => {
|
|
1639
|
+
const oldHtml =
|
|
1640
|
+
'<table><tr><td>A</td>' +
|
|
1641
|
+
'<td>X1</td><td>X2</td><td>X3</td><td>X4</td>' +
|
|
1642
|
+
'<td>X5</td><td>X6</td><td>X7</td><td>X8</td>' +
|
|
1643
|
+
'<td>B</td><td>C</td></tr></table>'
|
|
1644
|
+
const newHtml = '<table><tr><td>A</td><td>B</td><td>C</td></tr></table>'
|
|
1645
|
+
|
|
1646
|
+
expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
|
|
1647
|
+
'<table><tr>' +
|
|
1648
|
+
'<td>A</td>' +
|
|
1649
|
+
"<td class='diffdel'><del class='diffdel'>X1</del></td>" +
|
|
1650
|
+
"<td class='diffdel'><del class='diffdel'>X2</del></td>" +
|
|
1651
|
+
"<td class='diffdel'><del class='diffdel'>X3</del></td>" +
|
|
1652
|
+
"<td class='diffdel'><del class='diffdel'>X4</del></td>" +
|
|
1653
|
+
"<td class='diffdel'><del class='diffdel'>X5</del></td>" +
|
|
1654
|
+
"<td class='diffdel'><del class='diffdel'>X6</del></td>" +
|
|
1655
|
+
"<td class='diffdel'><del class='diffdel'>X7</del></td>" +
|
|
1656
|
+
"<td class='diffdel'><del class='diffdel'>X8</del></td>" +
|
|
1657
|
+
'<td>B</td>' +
|
|
1658
|
+
'<td>C</td>' +
|
|
1659
|
+
'</tr></table>'
|
|
1660
|
+
)
|
|
1661
|
+
})
|
|
1662
|
+
|
|
1663
|
+
it('preserves whitespace between inline elements in a fully-inserted cell (no spurious <ins>)', () => {
|
|
1664
|
+
// wrapInlineTextRuns walks content; when it encounters
|
|
1665
|
+
// whitespace-only text between two inline elements (e.g. the
|
|
1666
|
+
// space between `<strong>` and `<em>`), it passes the whitespace
|
|
1667
|
+
// through unwrapped — the body of the `else` branch on the
|
|
1668
|
+
// text-run path.
|
|
1669
|
+
const oldHtml = '<table><tr><td>A</td></tr></table>'
|
|
1670
|
+
const newHtml = '<table><tr><td>A</td></tr><tr><td><strong>a</strong> <em>b</em></td></tr></table>'
|
|
1671
|
+
|
|
1672
|
+
expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
|
|
1673
|
+
'<table>' +
|
|
1674
|
+
'<tr><td>A</td></tr>' +
|
|
1675
|
+
"<tr class='diffins'><td class='diffins'>" +
|
|
1676
|
+
"<strong><ins class='diffins'>a</ins></strong> <em><ins class='diffins'>b</ins></em>" +
|
|
1677
|
+
'</td></tr>' +
|
|
1678
|
+
'</table>'
|
|
1679
|
+
)
|
|
1680
|
+
})
|
|
1681
|
+
|
|
1682
|
+
it('handles a colspan-changed row where some cells have matching colspans', () => {
|
|
1683
|
+
// diffColspanChangedRow walks cells; when oSpan === nSpan for a
|
|
1684
|
+
// pair, it emits a content diff for that cell pair. This branch
|
|
1685
|
+
// wasn't exercised — needs a row with BOTH a colspan change AND
|
|
1686
|
+
// matching-colspan cells in the same row.
|
|
1687
|
+
const oldHtml =
|
|
1688
|
+
'<table><tr>' + '<td>FirstA</td>' + '<td>MidA</td><td>MidB</td>' + '<td>LastA</td>' + '</tr></table>'
|
|
1689
|
+
const newHtml =
|
|
1690
|
+
'<table><tr>' + '<td>FirstB</td>' + '<td colspan="2">Merged AB</td>' + '<td>LastB</td>' + '</tr></table>'
|
|
1691
|
+
|
|
1692
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
1693
|
+
// First and last cells should diff content cell-by-cell (matching
|
|
1694
|
+
// colspans = 1 on both sides); middle two old cells merge into
|
|
1695
|
+
// one colspan=2 cell tagged 'mod colspan'.
|
|
1696
|
+
expect(result).toContain("<del class='diffmod'>FirstA</del>")
|
|
1697
|
+
expect(result).toContain("<ins class='diffmod'>FirstB</ins>")
|
|
1698
|
+
expect(result).toContain('colspan="2" class=\'mod colspan\'')
|
|
1699
|
+
expect(result).toContain("<del class='diffmod'>LastA</del>")
|
|
1700
|
+
expect(result).toContain("<ins class='diffmod'>LastB</ins>")
|
|
1701
|
+
})
|
|
1702
|
+
|
|
1703
|
+
it('handles row wider than MAX_COLUMN_SEARCH_WIDTH (40 cells) — fallback to cell-LCS', () => {
|
|
1704
|
+
// 50-cell row in old, 51-cell row in new (1 column added at
|
|
1705
|
+
// start). MAX_COLUMN_SEARCH_WIDTH guard prevents the
|
|
1706
|
+
// combinatorial search; fallback to cell-LCS which finds 50
|
|
1707
|
+
// exact matches and the 1 new cell as an insertion.
|
|
1708
|
+
const oldCells = Array.from({ length: 50 }, (_, i) => `<td>c${i}</td>`).join('')
|
|
1709
|
+
const newCells = `<td>NEW</td>${oldCells}`
|
|
1710
|
+
const oldHtml = `<table><tr>${oldCells}</tr></table>`
|
|
1711
|
+
const newHtml = `<table><tr>${newCells}</tr></table>`
|
|
1712
|
+
|
|
1713
|
+
const result = HtmlDiff.execute(oldHtml, newHtml)
|
|
1714
|
+
// We should see exactly one inserted cell and 50 preserved cells.
|
|
1715
|
+
expect(result).toContain("<td class='diffins'><ins class='diffins'>NEW</ins></td>")
|
|
1716
|
+
// Sanity: total td count is 51 (no phantoms).
|
|
1717
|
+
const tdCount = (result.match(/<td[\s>]/g) || []).length
|
|
1718
|
+
expect(tdCount).toBe(51)
|
|
1719
|
+
})
|
|
1720
|
+
})
|
|
1721
|
+
|
|
1436
1722
|
describe('attribute edge cases', () => {
|
|
1437
1723
|
it('does not introduce a leading space when the existing class attribute is empty', () => {
|
|
1438
1724
|
const oldHtml = '<table><tr><td>A</td></tr></table>'
|