@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- import { describe, expect, it } from 'vitest'
1
+ import { describe, expect, it, vi } from 'vitest'
2
2
 
3
- import HtmlDiff from '../src/HtmlDiff'
3
+ import HtmlDiff, { WORD_ALIGNED_OPTIONS } from '../src/HtmlDiff'
4
4
 
5
5
  /**
6
6
  * Three-way diff tests under the genesis-spine model.
@@ -43,9 +43,13 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
43
43
 
44
44
  it('CP and Me each change the same word differently', () => {
45
45
  // Genesis: "Hello world". CP made "Hello cruel world", Me made "Hello brave world".
46
- // Disagreement show both authors' insertions.
46
+ // Both inserted at the same boundary. Under the intent-reading
47
+ // model, the reviewer sees CP's proposal relative to Me's
48
+ // current content: "CP wants `cruel` where Me has `brave`" —
49
+ // ins-cp `cruel` + del-cp `brave`. Reads as a substitution
50
+ // intent, which is what a legal reviewer needs to act on.
47
51
  expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello brave world')).toBe(
48
- "Hello <ins class='diffins cp' data-author='cp'>cruel </ins><ins class='diffins me' data-author='me'>brave </ins>world"
52
+ "Hello <ins class='diffins cp' data-author='cp'>cruel</ins><del class='diffdel cp' data-author='cp'>brave</del> world"
49
53
  )
50
54
  })
51
55
 
@@ -71,6 +75,77 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
71
75
  )
72
76
  })
73
77
 
78
+ it("CP accepted Me's addition with a word removed — del-cp on the removed word", () => {
79
+ // Real flow on the live preview:
80
+ // - Me appends "And I add more things here"
81
+ // - CP "accepts" Me's addition but deletes the word "things"
82
+ // Intent reading: CP wants "things" gone. del-cp on the me-only
83
+ // word; the rest of the shared addition is settled.
84
+ expect(
85
+ HtmlDiff.executeThreeWay('baseline.', 'baseline. And I add more here', 'baseline. And I add more things here')
86
+ ).toBe("baseline. And I add more<del class='diffdel cp' data-author='cp'>&nbsp;things</del> here")
87
+ })
88
+
89
+ it("CP rewrote one of Me's added words — del-cp on the replaced word + ins-cp on the replacement", () => {
90
+ // Non-subset variant: CP didn't just delete, they substituted.
91
+ // Me: "And I add more things here"
92
+ // CP: "And I add more anything here"
93
+ // The single word differs — the intent reading is "CP wants
94
+ // things replaced with anything".
95
+ expect(
96
+ HtmlDiff.executeThreeWay(
97
+ 'baseline.',
98
+ 'baseline. And I add more anything here',
99
+ 'baseline. And I add more things here'
100
+ )
101
+ ).toBe(
102
+ "baseline. And I add more <ins class='diffins cp' data-author='cp'>anything</ins><del class='diffdel cp' data-author='cp'>things</del> here"
103
+ )
104
+ })
105
+
106
+ it("CP extended Me's addition with extra words — ins-cp on the additions, no del", () => {
107
+ // CP added beyond me — cp-extras stay as ins-cp, no del-cp
108
+ // since CP didn't remove anything.
109
+ // Me: "And I add more things here"
110
+ // CP: "And I add more things and other stuff here"
111
+ expect(
112
+ HtmlDiff.executeThreeWay(
113
+ 'baseline.',
114
+ 'baseline. And I add more things and other stuff here',
115
+ 'baseline. And I add more things here'
116
+ )
117
+ ).toBe("baseline. And I add more things<ins class='diffins cp' data-author='cp'>&nbsp;and other stuff</ins> here")
118
+ })
119
+
120
+ it("Me added text that CP didn't engage with — stays as ins-me, NOT del-cp", () => {
121
+ // The critical inverse: Me appends a new paragraph that CP
122
+ // doesn't have anything for at that boundary. This is a
123
+ // genuine Me-side insertion — NOT a "CP removed" event. The
124
+ // emitBoundary's single-side branch (!hasCp) preserves
125
+ // ins-me attribution. Without this carve-out, every Me
126
+ // insertion would be mis-attributed as "CP wants this gone",
127
+ // even when CP never engaged with the content.
128
+ expect(HtmlDiff.executeThreeWay('baseline.', 'baseline.', 'baseline. New paragraph Me added.')).toBe(
129
+ "baseline.<ins class='diffins me' data-author='me'>&nbsp;New paragraph Me added.</ins>"
130
+ )
131
+ })
132
+
133
+ it('CP and Me each added different content at different boundaries — each side keeps their own attribution', () => {
134
+ // Me added at the end of one sentence; CP added at the end of
135
+ // another. Different genesis boundaries — each goes through
136
+ // the !hasCp / !hasMe single-side branch. Neither side's
137
+ // addition surfaces as the other's deletion.
138
+ expect(
139
+ HtmlDiff.executeThreeWay(
140
+ 'First sentence. Second sentence.',
141
+ 'First sentence with cp-addition. Second sentence.',
142
+ 'First sentence. Second sentence with me-addition.'
143
+ )
144
+ ).toBe(
145
+ "First sentence<ins class='diffins cp' data-author='cp'>&nbsp;with cp-addition</ins>. Second sentence<ins class='diffins me' data-author='me'>&nbsp;with me-addition</ins>."
146
+ )
147
+ })
148
+
74
149
  it('Stable across no-change rounds — V5 produces same output as V3 when V5==V3', () => {
75
150
  // The user's V3/V5 invariant: when neither party changes their position
76
151
  // in a subsequent turn, the diff should look identical to the previous
@@ -120,11 +195,19 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
120
195
  })
121
196
 
122
197
  it('cp matches genesis (only Me changed)', () => {
123
- expect(HtmlDiff.executeThreeWay('Hello world', 'Hello world', 'Hello brave world')).toContain("data-author='me'")
198
+ // Negative assertion is load-bearing: without `not.toContain`
199
+ // a cp↔me swap inside the genesis-spine merge would still
200
+ // emit `data-author='cp'` somewhere in the output and the
201
+ // positive assertion would silently pass.
202
+ const out = HtmlDiff.executeThreeWay('Hello world', 'Hello world', 'Hello brave world')
203
+ expect(out).toContain("data-author='me'")
204
+ expect(out).not.toContain("data-author='cp'")
124
205
  })
125
206
 
126
207
  it('me matches genesis (only CP changed)', () => {
127
- expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello world')).toContain("data-author='cp'")
208
+ const out = HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello world')
209
+ expect(out).toContain("data-author='cp'")
210
+ expect(out).not.toContain("data-author='me'")
128
211
  })
129
212
  })
130
213
 
@@ -144,6 +227,24 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
144
227
  expect(out).toMatch(/<p>First paragraph.*data-author='cp'.*<\/p>/)
145
228
  expect(out).toMatch(/<p>Second paragraph.*data-author='me'.*<\/p>/)
146
229
  })
230
+
231
+ it('overlapping formatting wraps from each author do not unbalance the emission stack', () => {
232
+ // Genesis: plain "three". CP wrapped it in <strong>, Me in <u>. The
233
+ // mod-strong (cp) and mod-u (me) wraps cross: <strong> opens before
234
+ // <u>, but </strong> arrives before </u>. The emitter must split
235
+ // the inner wrap so the output stays well-formed instead of
236
+ // throwing an unbalanced-stack error.
237
+ //
238
+ // Under the intent-reading model, Me's `<u>` wrap is a
239
+ // formatting choice CP didn't make — surfaces as a CP-attributed
240
+ // mod wrap (since CP's view doesn't include the underline). The
241
+ // load-bearing assertion here is that the emission stays
242
+ // balanced — the exact mod-author labelling reflects the
243
+ // asymmetric reading and matches the rest of the suite.
244
+ expect(HtmlDiff.executeThreeWay('three', '<strong>three</strong>', '<u>three</u>')).toBe(
245
+ "<strong><ins class='mod strong cp' data-author='cp'><ins class='mod u cp' data-author='cp'>three</ins></ins></strong><ins class='mod u cp' data-author='cp'></ins>"
246
+ )
247
+ })
147
248
  })
148
249
 
149
250
  describe('options pass-through', () => {
@@ -152,10 +253,135 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
152
253
  const without = HtmlDiff.executeThreeWay('a b', 'a b', 'a b')
153
254
  const withFlag = HtmlDiff.executeThreeWay('a b', 'a b', 'a b', { ignoreWhitespaceDifferences: true })
154
255
  expect(without).toContain("data-author='me'")
256
+ // CP matches genesis — any cp attribution would be a mis-merge.
257
+ expect(without).not.toContain("data-author='cp'")
155
258
  expect(withFlag).not.toContain('data-author=')
156
259
  })
157
260
  })
158
261
 
262
+ describe('stack-balance defence', () => {
263
+ // The emission walks segments built by `buildSegments`: ins/del
264
+ // segments go through `insertTag` (which manages the formatting-
265
+ // tag stack), but equal segments push raw words straight to the
266
+ // content buffer. When a formatting opener is in a del segment
267
+ // and its matching closer falls in an equal segment, the stack
268
+ // entry never gets popped — the emitter used to throw "emission
269
+ // left 1 unclosed formatting tag(s) on the stack" and crash the
270
+ // caller. Now it closes the leftover wraps defensively with
271
+ // `</ins>` so the output stays renderable.
272
+
273
+ it('CP inserted a <strong> opener whose closer is matched as equal — does not throw', () => {
274
+ // Genesis has an orphan closer (`X</strong>`); CP wrapped X in
275
+ // a fresh `<strong>`. The opener is ins-cp (no genesis match)
276
+ // but the closer is shared by all three and emits as equal.
277
+ // The mod-`<ins>` opened on the strong push needs to be closed
278
+ // somehow; the defensive path emits a trailing `</ins>`.
279
+ expect(() => HtmlDiff.executeThreeWay('X</strong>', '<strong>X</strong>', 'X</strong>')).not.toThrow()
280
+ })
281
+
282
+ it('CP deleted only the <strong> opener — does not throw', () => {
283
+ // Symmetric: genesis had `<strong>X</strong>`, CP dropped the
284
+ // opener but kept the closer. The opener-delete pushes onto
285
+ // the stack and the closer arrives via an equal segment.
286
+ expect(() => HtmlDiff.executeThreeWay('<strong>X</strong>', 'X</strong>', '<strong>X</strong>')).not.toThrow()
287
+ })
288
+
289
+ it('emits the defensive </ins> close and logs a warning when the stack is unbalanced', async () => {
290
+ const warn = vi.spyOn(console, 'warn').mockImplementation(() => {})
291
+ try {
292
+ const out = HtmlDiff.executeThreeWay('X</strong>', '<strong>X</strong>', 'X</strong>')
293
+ // The content survives.
294
+ expect(out).toContain('X')
295
+ // The defensive close path actually ran — output contains
296
+ // at least one `</ins>` that wasn't paired by `insertTag`
297
+ // (the only way the defensive branch can add one).
298
+ expect(out).toMatch(/<\/ins>/)
299
+ // And the warn was emitted. Without this assertion the path
300
+ // could silently stop firing in a future refactor and the
301
+ // test would still pass on the (incidentally-present) content.
302
+ expect(warn).toHaveBeenCalledWith(expect.stringContaining('unclosed formatting wrap'))
303
+ } finally {
304
+ warn.mockRestore()
305
+ }
306
+ })
307
+ })
308
+
309
+ describe('WORD_ALIGNED_OPTIONS — opinionated consumer defaults', () => {
310
+ // The library default (`orphanMatchThreshold = 0`) keeps every LCS
311
+ // match, however small — which fragments long sentence rewrites
312
+ // into many tiny ins/del pairs around stray word matches. Word's
313
+ // track-changes collapses those into a single coarse del+ins,
314
+ // which is markedly more readable for legal text. The exported
315
+ // `WORD_ALIGNED_OPTIONS` lets consumers opt into that without
316
+ // re-tuning the magic number themselves.
317
+ const longGenesis =
318
+ '"Specified Indebtedness" will have the meaning specified in Section 14 and shall include, with respect to Party B, any obligation (whether present or future, contingent or otherwise) for the payment or repayment of money.'
319
+ const longCp =
320
+ '"Specified Indebtedness" will have the meaning specified in Section 14 of the Agreement except that such term shall not include obligations.'
321
+
322
+ it('exports a 0.25 orphan threshold tuned for Word-aligned output', () => {
323
+ expect(WORD_ALIGNED_OPTIONS).toEqual({ orphanMatchThreshold: 0.25 })
324
+ })
325
+
326
+ it('plumbs through HtmlDiff.execute and reduces fragmentation versus the bare default', () => {
327
+ const bare = HtmlDiff.execute(longGenesis, longCp)
328
+ const aligned = HtmlDiff.execute(longGenesis, longCp, WORD_ALIGNED_OPTIONS)
329
+ const count = (s: string, re: RegExp) => (s.match(re) ?? []).length
330
+ // The bare default keeps every tiny match — Word-aligned produces
331
+ // strictly fewer ins/del wrappers for the same input.
332
+ expect(count(aligned, /<ins/g)).toBeLessThan(count(bare, /<ins/g))
333
+ expect(count(aligned, /<del/g)).toBeLessThan(count(bare, /<del/g))
334
+ })
335
+
336
+ it('plumbs through HtmlDiff.executeThreeWay too', () => {
337
+ const bare = HtmlDiff.executeThreeWay(longGenesis, longCp, longGenesis)
338
+ const aligned = HtmlDiff.executeThreeWay(longGenesis, longCp, longGenesis, WORD_ALIGNED_OPTIONS)
339
+ const count = (s: string, re: RegExp) => (s.match(re) ?? []).length
340
+ expect(count(aligned, /<ins/g)).toBeLessThan(count(bare, /<ins/g))
341
+ expect(count(aligned, /<del/g)).toBeLessThan(count(bare, /<del/g))
342
+ })
343
+ })
344
+
345
+ describe('orphan-match guard for structural tags', () => {
346
+ // Real regression from the live preview (Additional Condition
347
+ // Precedent in the 2002 ISDA Schedule): when CP deletes a section
348
+ // whose answer renders as an empty formatting shell —
349
+ // <p data-html="x"><em><strong></strong></em></p>
350
+ // — the `</strong>` and `</em>` matches sit between two content
351
+ // deletions ("Heading. " before, body after). At
352
+ // WORD_ALIGNED_OPTIONS.orphanMatchThreshold=0.25 those structural
353
+ // matches were rejected as orphans, swallowed into the deletion
354
+ // span, and the browser auto-closed the openers AT THE END of
355
+ // the deletion — visually rendering the entire deletion as
356
+ // bold-italic. The orphan filter now exempts tag-only matches
357
+ // so structural boundaries always survive.
358
+
359
+ it('CP deletes section with em+strong heading + plain body — closers stay between heading and body', () => {
360
+ const genesis =
361
+ '<p data-html="x"><em><strong>Additional Condition Precedent. </strong></em>For the purposes of Section 2(a)(iii).</p>'
362
+ const cp = '<p data-html="x"><em><strong></strong></em></p>'
363
+ const me = genesis
364
+
365
+ const out = HtmlDiff.executeThreeWay(genesis, cp, me, WORD_ALIGNED_OPTIONS)
366
+
367
+ // </strong> appears BEFORE the body deletion — meaning the
368
+ // body sits outside the bold-italic wrap, not inside it.
369
+ const closeStrongIdx = out.indexOf('</strong>')
370
+ const bodyDelIdx = out.indexOf('For the purposes')
371
+ expect(closeStrongIdx).toBeGreaterThan(0)
372
+ expect(bodyDelIdx).toBeGreaterThan(closeStrongIdx)
373
+ // No `<strong>…<del>body` substring exists — confirm by exact
374
+ // shape too. Heading wraps in strong+em, body is a plain del.
375
+ expect(out).toBe(
376
+ '<p data-html="x"><em><strong>' +
377
+ "<del class='diffdel cp' data-author='cp'>Additional Condition Precedent. </del>" +
378
+ '</strong></em>' +
379
+ "<del class='diffdel cp' data-author='cp'>For the purposes of Section 2(a)(iii).</del>" +
380
+ '</p>'
381
+ )
382
+ })
383
+ })
384
+
159
385
  describe('first-turn fallback', () => {
160
386
  it('cp == genesis means CP made no changes — Me-only attribution', () => {
161
387
  // Common case: this is the first turn where the counterparty hasn't
@@ -44,6 +44,14 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
44
44
  })
45
45
 
46
46
  it('Disagreement — different changes at the same place', () => {
47
+ // Genesis cell "five"; CP extended to "five and a half"; Me
48
+ // replaced with "seven". Intent reading from a reviewer's
49
+ // perspective:
50
+ // - del-me "five": Me already removed the genesis word from
51
+ // their cell (genesis tracking).
52
+ // - ins-cp " and a half": CP wants this appended.
53
+ // - del-cp "seven": CP wants Me's "seven" removed (Me has it,
54
+ // CP doesn't).
47
55
  expect(
48
56
  HtmlDiff.executeThreeWay(
49
57
  '<table><tr><td>five</td></tr></table>',
@@ -51,7 +59,7 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
51
59
  '<table><tr><td>seven</td></tr></table>'
52
60
  )
53
61
  ).toBe(
54
- "<table><tr><td><del class='diffdel me' data-author='me'>five</del><ins class='diffins cp' data-author='cp'>&nbsp;and a half</ins><ins class='diffins me' data-author='me'>seven</ins></td></tr></table>"
62
+ "<table><tr><td><del class='diffdel me' data-author='me'>five</del><ins class='diffins cp' data-author='cp'>&nbsp;and a half</ins><del class='diffdel cp' data-author='cp'>seven</del></td></tr></table>"
55
63
  )
56
64
  })
57
65
 
@@ -259,6 +267,75 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
259
267
  const html = `<table>${rows}</table>`
260
268
  expect(HtmlDiff.executeThreeWay(html, html, html)).toBe(html)
261
269
  })
270
+
271
+ it('cell-count mismatch: CP added a column — CP row content is visible (not silently dropped)', () => {
272
+ // Regression: the previous fallback in emitPreservedRow emitted
273
+ // only `del me` + `ins me` for any cell-count mismatch, which
274
+ // silently destroyed CP's row content whenever CP changed the
275
+ // cell count. A reader in cp-only mode would see no trace of
276
+ // CP's added column — a content-loss bug that violates the
277
+ // "CP's changes always visible" invariant.
278
+ const out = HtmlDiff.executeThreeWay(
279
+ '<table><tr><td>a</td><td>b</td></tr></table>',
280
+ '<table><tr><td>a</td><td>X</td><td>b</td></tr></table>',
281
+ '<table><tr><td>a</td><td>b</td></tr></table>'
282
+ )
283
+ expect(out).toBe(
284
+ "<table><tr class='diffdel cp' data-author='cp'><td class='diffdel cp' data-author='cp'><del class='diffdel cp' data-author='cp'>a</del></td><td class='diffdel cp' data-author='cp'><del class='diffdel cp' data-author='cp'>b</del></td></tr><tr class='diffins cp' data-author='cp'><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>a</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>X</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>b</ins></td></tr></table>"
285
+ )
286
+ })
287
+
288
+ it('cell-count mismatch: Me removed a column — symmetric to the CP case', () => {
289
+ const out = HtmlDiff.executeThreeWay(
290
+ '<table><tr><td>a</td><td>b</td></tr></table>',
291
+ '<table><tr><td>a</td><td>b</td></tr></table>',
292
+ '<table><tr><td>a</td></tr></table>'
293
+ )
294
+ expect(out).toBe(
295
+ "<table><tr class='diffdel me' data-author='me'><td class='diffdel me' data-author='me'><del class='diffdel me' data-author='me'>a</del></td><td class='diffdel me' data-author='me'><del class='diffdel me' data-author='me'>b</del></td></tr><tr class='diffins me' data-author='me'><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>a</ins></td></tr></table>"
296
+ )
297
+ })
298
+
299
+ it('CP edited one cell in a row (same shape) — fuzzy-pairs and emits a cell-level diff, not whole-row del+ins', () => {
300
+ // Regression: the 3-way row aligner only did exact lcsAlign over
301
+ // rowKey, so a row where CP edited a single cell's text produced
302
+ // no key match and the algorithm split the row into a whole-row
303
+ // delete + whole-row insert. The 2-way path has always run a
304
+ // fuzzy-pairing pass after lcsAlign; bringing the 3-way path in
305
+ // step removes the asymmetry where cp-only / all-changes views
306
+ // looked materially worse than 2-way for ordinary cell edits.
307
+ //
308
+ // Same-shape genesis/cp/me; CP edited the middle cell's text.
309
+ // Me === genesis. Expect a paired row with cell-level cp-ins
310
+ // markup, NOT two distinct whole-row entries.
311
+ const out = HtmlDiff.executeThreeWay(
312
+ '<table><tr><td>Party A</td><td>old details</td><td>kept</td></tr></table>',
313
+ '<table><tr><td>Party A</td><td>new details</td><td>kept</td></tr></table>',
314
+ '<table><tr><td>Party A</td><td>old details</td><td>kept</td></tr></table>'
315
+ )
316
+ // CP's edit lives inside the row, not as a parallel whole-row
317
+ // delete-then-insert. Whole-row markers would carry `class='diffdel ...'`
318
+ // or `class='diffins ...'` on the `<tr>` itself.
319
+ expect(out).not.toMatch(/<tr [^>]*class=['"]diffdel/)
320
+ expect(out).not.toMatch(/<tr [^>]*class=['"]diffins/)
321
+ expect(out).toContain('Party A')
322
+ expect(out).toContain("data-author='cp'")
323
+ // Me === genesis so any me attribution would indicate a swap.
324
+ expect(out).not.toContain("data-author='me'")
325
+ })
326
+
327
+ it('cell-count mismatch: both sides restructured differently — both ins rows attributed', () => {
328
+ // Genesis 2 cells, CP 3 cells, Me 4 cells. Neither side keeps
329
+ // the genesis shape, so both restructures must be visible.
330
+ const out = HtmlDiff.executeThreeWay(
331
+ '<table><tr><td>a</td><td>b</td></tr></table>',
332
+ '<table><tr><td>a</td><td>X</td><td>b</td></tr></table>',
333
+ '<table><tr><td>a</td><td>b</td><td>Y</td><td>Z</td></tr></table>'
334
+ )
335
+ expect(out).toBe(
336
+ "<table><tr class='diffins cp' data-author='cp'><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>a</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>X</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>b</ins></td></tr><tr class='diffins me' data-author='me'><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>a</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>b</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>Y</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>Z</ins></td></tr></table>"
337
+ )
338
+ })
262
339
  })
263
340
 
264
341
  describe('nested tables', () => {
@@ -270,6 +347,9 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
270
347
  )
271
348
  expect(out).toMatch(/<del[^>]*data-author='cp'[^>]*>inner<\/del>/)
272
349
  expect(out).toMatch(/<ins[^>]*data-author='cp'[^>]*>INNER<\/ins>/)
350
+ // me == genesis here, so any me attribution would indicate a
351
+ // cp↔me swap inside the table-cell merge.
352
+ expect(out).not.toContain("data-author='me'")
273
353
  expect(out.startsWith('<table><tr><td><table>')).toBe(true)
274
354
  expect(out.endsWith('</table></td></tr></table>')).toBe(true)
275
355
  })
@@ -298,4 +378,34 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
298
378
  expect(HtmlDiff.executeThreeWay('<p>a</p>', '<p>a</p>', '<p>a</p>')).toBe('<p>a</p>')
299
379
  })
300
380
  })
381
+
382
+ describe('positional pairing under moderate dissimilarity', () => {
383
+ it('column rename + value rewrite still routes through cell-level diff (not whole-table del+ins)', () => {
384
+ // Real-world regression: cp renamed a column ("Form/Document/Certificate"
385
+ // → "Extra column") and replaced the values in that column with short
386
+ // tokens. Word-level Jaccard between the genesis table and cp's edited
387
+ // table drops to ~0.38 — under the 0.5 threshold the three-way path
388
+ // used to take, which kicked the diff into multi-table content-LCS
389
+ // and produced whole-table del+ins (the cp's CP-bubble showed the
390
+ // entire old table struck through and the entire new table inserted).
391
+ // 2-way had no such guard and produced a cell-level diff for the same
392
+ // inputs; lowering the 3-way threshold brings the two paths in step.
393
+ const genesis =
394
+ '<table><tr><td>A</td><td>Form/Document/Certificate</td><td>Date</td></tr><tr><td>Party A</td><td>IRS W-8</td><td>On execution</td></tr></table>'
395
+ const cp =
396
+ '<table><tr><td>A</td><td>Extra column</td><td>Date</td></tr><tr><td>Party A</td><td>Yes</td><td>On execution</td></tr></table>'
397
+ const me = genesis
398
+ const out = HtmlDiff.executeThreeWay(genesis, cp, me)
399
+ // Expect cell-level cp attribution INSIDE the table cells, NOT a
400
+ // whole-table del+ins wrapping the entire <table>.
401
+ expect(out).not.toMatch(/<del[^>]*><table/)
402
+ expect(out).toMatch(/data-author='cp'/)
403
+ // me === genesis, so any me-attribution markers would mean the
404
+ // diff swapped CP's edits onto Me. Negative assertion locks the
405
+ // attribution direction.
406
+ expect(out).not.toContain("data-author='me'")
407
+ expect(out).toContain('Extra column')
408
+ expect(out).toContain('Form/Document/Certificate')
409
+ })
410
+ })
301
411
  })
@@ -138,10 +138,10 @@ describe('Utils', () => {
138
138
  it('combines extraClasses and dataAttrs in one call', () => {
139
139
  expect(
140
140
  Utils.wrapText('hello', 'del', 'diffdel', {
141
- extraClasses: 'me rejects-cp',
142
- dataAttrs: { author: 'me', rejects: 'cp' },
141
+ extraClasses: 'me',
142
+ dataAttrs: { author: 'me', source: 'edit' },
143
143
  })
144
- ).toBe("<del class='diffdel me rejects-cp' data-author='me' data-rejects='cp'>hello</del>")
144
+ ).toBe("<del class='diffdel me' data-author='me' data-source='edit'>hello</del>")
145
145
  })
146
146
 
147
147
  it('skips the metadata path entirely when neither extraClasses nor dataAttrs is set', () => {