@createiq/htmldiff 1.0.5-beta.3 → 1.0.5-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@createiq/htmldiff",
3
- "version": "1.0.5-beta.3",
3
+ "version": "1.0.5-beta.4",
4
4
  "description": "TypeScript port of htmldiff.net",
5
5
  "type": "module",
6
6
  "author": "Mathew Mannion <mathew.mannion@linklaters.com>",
package/src/TableDiff.ts CHANGED
@@ -379,20 +379,25 @@ function diffStructurallyAlignedTable(
379
379
  const oldKeys = oldTable.rows.map(row => rowKey(oldHtml, row))
380
380
  const newKeys = newTable.rows.map(row => rowKey(newHtml, row))
381
381
  const exactAlignment = lcsAlign(oldKeys, newKeys)
382
- const alignment = pairSimilarUnmatchedRows(exactAlignment, oldTable, newTable, oldHtml, newHtml)
382
+ const paired = pairSimilarUnmatchedRows(exactAlignment, oldTable, newTable, oldHtml, newHtml)
383
+ // Reorder so unpaired deleted rows appear at their *natural old-side
384
+ // position* — immediately after the preserved/paired row that came
385
+ // before them in old. Without this, runs of unpaired dels at low
386
+ // alignment indices end up emitted before any preserved row (the
387
+ // "deleted rows out of order" bug).
388
+ const alignment = orderAlignmentForEmission(paired)
383
389
 
384
390
  // Walk new's tableStart→tableEnd, substituting rows with their diffed
385
391
  // form so `<thead>`/`<tbody>` wrappers and inter-row whitespace are
386
392
  // preserved verbatim. Deleted rows (no position in new) are injected
387
- // inline at their alignment position. If new has no rows at all, fall
388
- // back to a from-scratch reconstruction so we still emit deleted rows.
393
+ // inline at the cursor's current position, which now corresponds to
394
+ // their natural old-side slot thanks to the reordering above. If new
395
+ // has no rows at all, fall back to a from-scratch reconstruction so
396
+ // we still emit deleted rows.
389
397
  if (newTable.rows.length === 0) {
390
398
  return rebuildStructurallyAlignedTable(oldHtml, newHtml, oldTable, newTable, alignment, diffCell)
391
399
  }
392
400
 
393
- // Emit the table header (`<table>` + any `<thead>`/`<tbody>` opening
394
- // text up to the first row) up-front so a leading run of deleted-only
395
- // alignments doesn't slip in before the table opens.
396
401
  const out: string[] = []
397
402
  out.push(newHtml.slice(newTable.tableStart, newTable.rows[0].rowStart))
398
403
  let cursor = newTable.rows[0].rowStart
@@ -407,8 +412,6 @@ function diffStructurallyAlignedTable(
407
412
  }
408
413
  cursor = newRow.rowEnd
409
414
  } else if (align.oldIdx !== null) {
410
- // Deleted row: inject inline at the current cursor (between the
411
- // previously emitted row and the next one in new).
412
415
  out.push(emitFullRow(oldHtml, oldTable.rows[align.oldIdx], 'del', diffCell))
413
416
  }
414
417
  }
@@ -416,6 +419,79 @@ function diffStructurallyAlignedTable(
416
419
  return out.join('')
417
420
  }
418
421
 
422
+ /**
423
+ * Reorders the alignment so emission produces rows in the visually-
424
+ * correct order. Each entry is assigned a fractional "position" in
425
+ * new's flow:
426
+ *
427
+ * • Preserved/paired (oldIdx, newIdx): position = newIdx.
428
+ * • Pure insert (null, newIdx): position = newIdx.
429
+ * • Pure delete (oldIdx, null): position = newIdx-of-preserved-just-
430
+ * before-this-oldIdx + 0.5. Dels at the same gap sort by oldIdx so
431
+ * they appear in old's row order. The +0.5 places dels BEFORE any
432
+ * insert at the same gap (insert at newIdx N1+1 has position N1+1
433
+ * which is > N1+0.5), giving the natural "delete first, insert
434
+ * second" reading order at a replaced position.
435
+ *
436
+ * This handles the full range:
437
+ * • Run of unpaired dels at the start (no preserved predecessor):
438
+ * position -0.5, sorted by oldIdx.
439
+ * • Dels in the middle: positioned right after their preceding
440
+ * preserved row.
441
+ * • Dels at the end (no preserved successor): positioned after the
442
+ * last preserved row.
443
+ *
444
+ * Without this reordering, a run of unpaired deletes at low alignment
445
+ * indices got emitted at cursor = first-new-row position — putting
446
+ * all deletes before any preserved row in the output, regardless of
447
+ * where they came from in old.
448
+ */
449
+ function orderAlignmentForEmission(alignment: Alignment[]): Alignment[] {
450
+ const preserved: Array<{ oldIdx: number; newIdx: number }> = []
451
+ for (const a of alignment) {
452
+ if (a.oldIdx !== null && a.newIdx !== null) {
453
+ preserved.push({ oldIdx: a.oldIdx, newIdx: a.newIdx })
454
+ }
455
+ }
456
+ preserved.sort((a, b) => a.oldIdx - b.oldIdx)
457
+
458
+ // For a deleted row with oldIdx K, return the newIdx of the preserved
459
+ // entry with the largest oldIdx less than K, or -1 if none.
460
+ function newIdxOfPreservedBefore(oldIdx: number): number {
461
+ let result = -1
462
+ for (const p of preserved) {
463
+ if (p.oldIdx >= oldIdx) break
464
+ result = p.newIdx
465
+ }
466
+ return result
467
+ }
468
+
469
+ // Decorate each alignment with a fractional position. We use
470
+ // (primary, secondary) tuples so dels at the same gap sort by oldIdx
471
+ // (in old's row order) and inserts at the same newIdx stay stable.
472
+ const decorated = alignment.map((a, i) => {
473
+ let primary: number
474
+ let secondary: number
475
+ if (a.newIdx !== null) {
476
+ primary = a.newIdx
477
+ secondary = a.oldIdx === null ? 1 : 0 // preserved before pure-insert at same newIdx (rare)
478
+ } else {
479
+ // Pure delete
480
+ primary = newIdxOfPreservedBefore(a.oldIdx as number) + 0.5
481
+ secondary = a.oldIdx as number
482
+ }
483
+ return { entry: a, primary, secondary, originalIdx: i }
484
+ })
485
+
486
+ decorated.sort((a, b) => {
487
+ if (a.primary !== b.primary) return a.primary - b.primary
488
+ if (a.secondary !== b.secondary) return a.secondary - b.secondary
489
+ return a.originalIdx - b.originalIdx // stable
490
+ })
491
+
492
+ return decorated.map(d => d.entry)
493
+ }
494
+
419
495
  function rebuildStructurallyAlignedTable(
420
496
  oldHtml: string,
421
497
  newHtml: string,
@@ -56,6 +56,36 @@ describe('HtmlDiff — table operations matrix', () => {
56
56
  })
57
57
 
58
58
  describe('user-reported regression scenarios', () => {
59
+ it('multiple deleted rows including the last appear in old-order, AFTER preserved header (regression for "deleted rows out of order")', () => {
60
+ // The user's exact scenario: column added + multiple rows deleted
61
+ // including the last row, with content edits that prevent fuzzy
62
+ // pairing of some rows. Before the fix, all deletes ended up at
63
+ // the START of the output (above the header) because they were
64
+ // emitted at cursor=row-0-start, regardless of where they came
65
+ // from in old.
66
+ const oldHtml =
67
+ '<table>' +
68
+ '<tr><th>Party</th><th>Form</th><th>Date</th></tr>' +
69
+ '<tr><td>Party A</td><td>IRS Forms W-8BEN-E</td><td>(i) Upon execution.</td></tr>' +
70
+ '<tr><td>Party B</td><td>IRS Form W-9, as applicable (or any successor thereto).</td><td>(i) Upon execution.</td></tr>' +
71
+ '<tr><td>Party A and Party B</td><td>Any document required.</td><td>Promptly upon reasonable demand.</td></tr>' +
72
+ '</table>'
73
+ const newHtml =
74
+ '<table>' +
75
+ '<tr><th>Party</th><th>Extra column</th><th>Form</th><th>Date</th></tr>' +
76
+ "<tr><td>Party A</td><td>Yes</td><td>IRS Forms W-8BEN-E. Here's some extra content</td><td>(i) Upon execution.</td></tr>" +
77
+ '<tr><td>Party B</td><td>A</td><td>IRS Form W</td><td>Yeah OK</td></tr>' +
78
+ '</table>'
79
+
80
+ const result = HtmlDiff.execute(oldHtml, newHtml)
81
+ assertStructurallyValid(result, oldHtml, newHtml, 'multiple deleted rows out of order')
82
+ // The header (preserved with column added) MUST come first.
83
+ const headerIdx = result.indexOf('<tr><th>Party</th>')
84
+ const firstDelIdx = result.indexOf("<tr class='diffdel'>")
85
+ expect(headerIdx).toBeGreaterThanOrEqual(0)
86
+ expect(firstDelIdx).toBeGreaterThan(headerIdx)
87
+ })
88
+
59
89
  it('column added + empty row inserted in middle (regression for issue with blank row)', () => {
60
90
  const oldHtml =
61
91
  '<table>' +
@@ -126,6 +156,16 @@ function allSingleOperations(): Op[] {
126
156
  })
127
157
  ),
128
158
  },
159
+ {
160
+ name: 'delete-multiple-rows-from-end',
161
+ apply: t =>
162
+ renderTable(
163
+ mutate(t, m => {
164
+ m.deleteRow(t.rows.length - 1)
165
+ m.deleteRow(t.rows.length - 1)
166
+ })
167
+ ),
168
+ },
129
169
  {
130
170
  name: 'add-column-start',
131
171
  apply: t => renderTable(mutate(t, m => m.addColumnAt(0, ['NewHeader', 'newA', 'newB', 'newC']))),
@@ -802,6 +802,86 @@ describe('HtmlDiff — tables', () => {
802
802
  )
803
803
  })
804
804
 
805
+ it('preserves data-* attributes on a <tr> in an inserted row alongside the injected class', () => {
806
+ // The frontend uses `data-behaviour` (and similar `data-*` attrs)
807
+ // on `<tr>` to drive table semantics. The diff must keep these
808
+ // attributes verbatim AND add `class='diffins'` for the structural
809
+ // change. (Whether downstream DOMPurify allows `data-*` is a
810
+ // separate concern — htmldiff's job is to not lose them.)
811
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
812
+ const newHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td></tr></table>'
813
+
814
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
815
+ '<table>' +
816
+ '<tr><td>A</td></tr>' +
817
+ '<tr data-behaviour="data" class=\'diffins\'>' +
818
+ "<td class='diffins'><ins class='diffins'>B</ins></td>" +
819
+ '</tr>' +
820
+ '</table>'
821
+ )
822
+ })
823
+
824
+ it('preserves data-* attributes on a <tr> in a deleted row alongside the injected class', () => {
825
+ const oldHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td></tr></table>'
826
+ const newHtml = '<table><tr><td>A</td></tr></table>'
827
+
828
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
829
+ '<table>' +
830
+ '<tr><td>A</td></tr>' +
831
+ '<tr data-behaviour="data" class=\'diffdel\'>' +
832
+ "<td class='diffdel'><del class='diffdel'>B</del></td>" +
833
+ '</tr>' +
834
+ '</table>'
835
+ )
836
+ })
837
+
838
+ it('preserves multiple data-* attributes on a <tr> in an inserted row', () => {
839
+ const oldHtml = '<table><tr><td>A</td></tr></table>'
840
+ const newHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data" id="row2" data-x="y"><td>B</td></tr></table>'
841
+
842
+ expect(HtmlDiff.execute(oldHtml, newHtml)).toEqual(
843
+ '<table>' +
844
+ '<tr><td>A</td></tr>' +
845
+ '<tr data-behaviour="data" id="row2" data-x="y" class=\'diffins\'>' +
846
+ "<td class='diffins'><ins class='diffins'>B</ins></td>" +
847
+ '</tr>' +
848
+ '</table>'
849
+ )
850
+ })
851
+
852
+ it('preserves data-* attribute on a <tr> when the row passes through column-add path (preserved row)', () => {
853
+ // The row exists in both old and new but cell counts differ
854
+ // (column added) — goes through diffPreservedRow → multi-column
855
+ // detection path. Attribute should still be preserved verbatim
856
+ // via rowHeaderSlice.
857
+ const oldHtml = '<table><tr><td>A</td></tr><tr data-behaviour="data"><td>B</td><td>C</td></tr></table>'
858
+ const newHtml =
859
+ '<table><tr><td>A</td><td>X</td></tr><tr data-behaviour="data"><td>B</td><td>EXTRA</td><td>C</td></tr></table>'
860
+
861
+ const result = HtmlDiff.execute(oldHtml, newHtml)
862
+ expect(result).toContain('<tr data-behaviour="data">')
863
+ })
864
+
865
+ it('preserves data-* attribute on a <tr> when the row passes through fuzzy-matching with content edit', () => {
866
+ // The row gets fuzzy-matched after exact LCS misses the content
867
+ // edit. Goes through diffStructurallyAlignedTable → fuzzy pair →
868
+ // diffPreservedRow. Attribute should still flow through.
869
+ const oldHtml =
870
+ '<table>' +
871
+ '<tr><td>A</td></tr>' +
872
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over.</td></tr>' +
873
+ '</table>'
874
+ const newHtml =
875
+ '<table>' +
876
+ '<tr><td>A</td></tr>' +
877
+ '<tr data-behaviour="data"><td>The quick brown fox jumps over the lazy dog.</td></tr>' +
878
+ '<tr><td>NEW</td></tr>' +
879
+ '</table>'
880
+
881
+ const result = HtmlDiff.execute(oldHtml, newHtml)
882
+ expect(result).toContain('<tr data-behaviour="data">')
883
+ })
884
+
805
885
  it('preserves <table> attributes verbatim from new (no diff marker on attribute changes)', () => {
806
886
  const oldHtml = '<table border="1"><tr><td>A</td></tr></table>'
807
887
  const newHtml = '<table border="2" style="width:100%"><tr><td>A</td></tr></table>'