@createiq/htmldiff 1.2.0-beta.1 → 1.2.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +249 -52
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +72 -18
- package/dist/HtmlDiff.d.mts +72 -18
- package/dist/HtmlDiff.mjs +244 -52
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +172 -48
- package/src/ThreeWayDiff.ts +58 -11
- package/src/ThreeWayTable.ts +143 -9
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +232 -6
- package/test/HtmlDiff.threeWay.tables.spec.ts +111 -1
- package/test/Utils.spec.ts +3 -3
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest'
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest'
|
|
2
2
|
|
|
3
|
-
import HtmlDiff from '../src/HtmlDiff'
|
|
3
|
+
import HtmlDiff, { WORD_ALIGNED_OPTIONS } from '../src/HtmlDiff'
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Three-way diff tests under the genesis-spine model.
|
|
@@ -43,9 +43,13 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
43
43
|
|
|
44
44
|
it('CP and Me each change the same word differently', () => {
|
|
45
45
|
// Genesis: "Hello world". CP made "Hello cruel world", Me made "Hello brave world".
|
|
46
|
-
//
|
|
46
|
+
// Both inserted at the same boundary. Under the intent-reading
|
|
47
|
+
// model, the reviewer sees CP's proposal relative to Me's
|
|
48
|
+
// current content: "CP wants `cruel` where Me has `brave`" —
|
|
49
|
+
// ins-cp `cruel` + del-cp `brave`. Reads as a substitution
|
|
50
|
+
// intent, which is what a legal reviewer needs to act on.
|
|
47
51
|
expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello brave world')).toBe(
|
|
48
|
-
"Hello <ins class='diffins cp' data-author='cp'>cruel
|
|
52
|
+
"Hello <ins class='diffins cp' data-author='cp'>cruel</ins><del class='diffdel cp' data-author='cp'>brave</del> world"
|
|
49
53
|
)
|
|
50
54
|
})
|
|
51
55
|
|
|
@@ -71,6 +75,77 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
71
75
|
)
|
|
72
76
|
})
|
|
73
77
|
|
|
78
|
+
it("CP accepted Me's addition with a word removed — del-cp on the removed word", () => {
|
|
79
|
+
// Real flow on the live preview:
|
|
80
|
+
// - Me appends "And I add more things here"
|
|
81
|
+
// - CP "accepts" Me's addition but deletes the word "things"
|
|
82
|
+
// Intent reading: CP wants "things" gone. del-cp on the me-only
|
|
83
|
+
// word; the rest of the shared addition is settled.
|
|
84
|
+
expect(
|
|
85
|
+
HtmlDiff.executeThreeWay('baseline.', 'baseline. And I add more here', 'baseline. And I add more things here')
|
|
86
|
+
).toBe("baseline. And I add more<del class='diffdel cp' data-author='cp'> things</del> here")
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
it("CP rewrote one of Me's added words — del-cp on the replaced word + ins-cp on the replacement", () => {
|
|
90
|
+
// Non-subset variant: CP didn't just delete, they substituted.
|
|
91
|
+
// Me: "And I add more things here"
|
|
92
|
+
// CP: "And I add more anything here"
|
|
93
|
+
// The single word differs — the intent reading is "CP wants
|
|
94
|
+
// things replaced with anything".
|
|
95
|
+
expect(
|
|
96
|
+
HtmlDiff.executeThreeWay(
|
|
97
|
+
'baseline.',
|
|
98
|
+
'baseline. And I add more anything here',
|
|
99
|
+
'baseline. And I add more things here'
|
|
100
|
+
)
|
|
101
|
+
).toBe(
|
|
102
|
+
"baseline. And I add more <ins class='diffins cp' data-author='cp'>anything</ins><del class='diffdel cp' data-author='cp'>things</del> here"
|
|
103
|
+
)
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
it("CP extended Me's addition with extra words — ins-cp on the additions, no del", () => {
|
|
107
|
+
// CP added beyond me — cp-extras stay as ins-cp, no del-cp
|
|
108
|
+
// since CP didn't remove anything.
|
|
109
|
+
// Me: "And I add more things here"
|
|
110
|
+
// CP: "And I add more things and other stuff here"
|
|
111
|
+
expect(
|
|
112
|
+
HtmlDiff.executeThreeWay(
|
|
113
|
+
'baseline.',
|
|
114
|
+
'baseline. And I add more things and other stuff here',
|
|
115
|
+
'baseline. And I add more things here'
|
|
116
|
+
)
|
|
117
|
+
).toBe("baseline. And I add more things<ins class='diffins cp' data-author='cp'> and other stuff</ins> here")
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
it("Me added text that CP didn't engage with — stays as ins-me, NOT del-cp", () => {
|
|
121
|
+
// The critical inverse: Me appends a new paragraph that CP
|
|
122
|
+
// doesn't have anything for at that boundary. This is a
|
|
123
|
+
// genuine Me-side insertion — NOT a "CP removed" event. The
|
|
124
|
+
// emitBoundary's single-side branch (!hasCp) preserves
|
|
125
|
+
// ins-me attribution. Without this carve-out, every Me
|
|
126
|
+
// insertion would be mis-attributed as "CP wants this gone",
|
|
127
|
+
// even when CP never engaged with the content.
|
|
128
|
+
expect(HtmlDiff.executeThreeWay('baseline.', 'baseline.', 'baseline. New paragraph Me added.')).toBe(
|
|
129
|
+
"baseline.<ins class='diffins me' data-author='me'> New paragraph Me added.</ins>"
|
|
130
|
+
)
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
it('CP and Me each added different content at different boundaries — each side keeps their own attribution', () => {
|
|
134
|
+
// Me added at the end of one sentence; CP added at the end of
|
|
135
|
+
// another. Different genesis boundaries — each goes through
|
|
136
|
+
// the !hasCp / !hasMe single-side branch. Neither side's
|
|
137
|
+
// addition surfaces as the other's deletion.
|
|
138
|
+
expect(
|
|
139
|
+
HtmlDiff.executeThreeWay(
|
|
140
|
+
'First sentence. Second sentence.',
|
|
141
|
+
'First sentence with cp-addition. Second sentence.',
|
|
142
|
+
'First sentence. Second sentence with me-addition.'
|
|
143
|
+
)
|
|
144
|
+
).toBe(
|
|
145
|
+
"First sentence<ins class='diffins cp' data-author='cp'> with cp-addition</ins>. Second sentence<ins class='diffins me' data-author='me'> with me-addition</ins>."
|
|
146
|
+
)
|
|
147
|
+
})
|
|
148
|
+
|
|
74
149
|
it('Stable across no-change rounds — V5 produces same output as V3 when V5==V3', () => {
|
|
75
150
|
// The user's V3/V5 invariant: when neither party changes their position
|
|
76
151
|
// in a subsequent turn, the diff should look identical to the previous
|
|
@@ -120,11 +195,19 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
120
195
|
})
|
|
121
196
|
|
|
122
197
|
it('cp matches genesis (only Me changed)', () => {
|
|
123
|
-
|
|
198
|
+
// Negative assertion is load-bearing: without `not.toContain`
|
|
199
|
+
// a cp↔me swap inside the genesis-spine merge would still
|
|
200
|
+
// emit `data-author='cp'` somewhere in the output and the
|
|
201
|
+
// positive assertion would silently pass.
|
|
202
|
+
const out = HtmlDiff.executeThreeWay('Hello world', 'Hello world', 'Hello brave world')
|
|
203
|
+
expect(out).toContain("data-author='me'")
|
|
204
|
+
expect(out).not.toContain("data-author='cp'")
|
|
124
205
|
})
|
|
125
206
|
|
|
126
207
|
it('me matches genesis (only CP changed)', () => {
|
|
127
|
-
|
|
208
|
+
const out = HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello world')
|
|
209
|
+
expect(out).toContain("data-author='cp'")
|
|
210
|
+
expect(out).not.toContain("data-author='me'")
|
|
128
211
|
})
|
|
129
212
|
})
|
|
130
213
|
|
|
@@ -144,6 +227,24 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
144
227
|
expect(out).toMatch(/<p>First paragraph.*data-author='cp'.*<\/p>/)
|
|
145
228
|
expect(out).toMatch(/<p>Second paragraph.*data-author='me'.*<\/p>/)
|
|
146
229
|
})
|
|
230
|
+
|
|
231
|
+
it('overlapping formatting wraps from each author do not unbalance the emission stack', () => {
|
|
232
|
+
// Genesis: plain "three". CP wrapped it in <strong>, Me in <u>. The
|
|
233
|
+
// mod-strong (cp) and mod-u (me) wraps cross: <strong> opens before
|
|
234
|
+
// <u>, but </strong> arrives before </u>. The emitter must split
|
|
235
|
+
// the inner wrap so the output stays well-formed instead of
|
|
236
|
+
// throwing an unbalanced-stack error.
|
|
237
|
+
//
|
|
238
|
+
// Under the intent-reading model, Me's `<u>` wrap is a
|
|
239
|
+
// formatting choice CP didn't make — surfaces as a CP-attributed
|
|
240
|
+
// mod wrap (since CP's view doesn't include the underline). The
|
|
241
|
+
// load-bearing assertion here is that the emission stays
|
|
242
|
+
// balanced — the exact mod-author labelling reflects the
|
|
243
|
+
// asymmetric reading and matches the rest of the suite.
|
|
244
|
+
expect(HtmlDiff.executeThreeWay('three', '<strong>three</strong>', '<u>three</u>')).toBe(
|
|
245
|
+
"<strong><ins class='mod strong cp' data-author='cp'><ins class='mod u cp' data-author='cp'>three</ins></ins></strong><ins class='mod u cp' data-author='cp'></ins>"
|
|
246
|
+
)
|
|
247
|
+
})
|
|
147
248
|
})
|
|
148
249
|
|
|
149
250
|
describe('options pass-through', () => {
|
|
@@ -152,10 +253,135 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
152
253
|
const without = HtmlDiff.executeThreeWay('a b', 'a b', 'a b')
|
|
153
254
|
const withFlag = HtmlDiff.executeThreeWay('a b', 'a b', 'a b', { ignoreWhitespaceDifferences: true })
|
|
154
255
|
expect(without).toContain("data-author='me'")
|
|
256
|
+
// CP matches genesis — any cp attribution would be a mis-merge.
|
|
257
|
+
expect(without).not.toContain("data-author='cp'")
|
|
155
258
|
expect(withFlag).not.toContain('data-author=')
|
|
156
259
|
})
|
|
157
260
|
})
|
|
158
261
|
|
|
262
|
+
describe('stack-balance defence', () => {
|
|
263
|
+
// The emission walks segments built by `buildSegments`: ins/del
|
|
264
|
+
// segments go through `insertTag` (which manages the formatting-
|
|
265
|
+
// tag stack), but equal segments push raw words straight to the
|
|
266
|
+
// content buffer. When a formatting opener is in a del segment
|
|
267
|
+
// and its matching closer falls in an equal segment, the stack
|
|
268
|
+
// entry never gets popped — the emitter used to throw "emission
|
|
269
|
+
// left 1 unclosed formatting tag(s) on the stack" and crash the
|
|
270
|
+
// caller. Now it closes the leftover wraps defensively with
|
|
271
|
+
// `</ins>` so the output stays renderable.
|
|
272
|
+
|
|
273
|
+
it('CP inserted a <strong> opener whose closer is matched as equal — does not throw', () => {
|
|
274
|
+
// Genesis has an orphan closer (`X</strong>`); CP wrapped X in
|
|
275
|
+
// a fresh `<strong>`. The opener is ins-cp (no genesis match)
|
|
276
|
+
// but the closer is shared by all three and emits as equal.
|
|
277
|
+
// The mod-`<ins>` opened on the strong push needs to be closed
|
|
278
|
+
// somehow; the defensive path emits a trailing `</ins>`.
|
|
279
|
+
expect(() => HtmlDiff.executeThreeWay('X</strong>', '<strong>X</strong>', 'X</strong>')).not.toThrow()
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
it('CP deleted only the <strong> opener — does not throw', () => {
|
|
283
|
+
// Symmetric: genesis had `<strong>X</strong>`, CP dropped the
|
|
284
|
+
// opener but kept the closer. The opener-delete pushes onto
|
|
285
|
+
// the stack and the closer arrives via an equal segment.
|
|
286
|
+
expect(() => HtmlDiff.executeThreeWay('<strong>X</strong>', 'X</strong>', '<strong>X</strong>')).not.toThrow()
|
|
287
|
+
})
|
|
288
|
+
|
|
289
|
+
it('emits the defensive </ins> close and logs a warning when the stack is unbalanced', async () => {
|
|
290
|
+
const warn = vi.spyOn(console, 'warn').mockImplementation(() => {})
|
|
291
|
+
try {
|
|
292
|
+
const out = HtmlDiff.executeThreeWay('X</strong>', '<strong>X</strong>', 'X</strong>')
|
|
293
|
+
// The content survives.
|
|
294
|
+
expect(out).toContain('X')
|
|
295
|
+
// The defensive close path actually ran — output contains
|
|
296
|
+
// at least one `</ins>` that wasn't paired by `insertTag`
|
|
297
|
+
// (the only way the defensive branch can add one).
|
|
298
|
+
expect(out).toMatch(/<\/ins>/)
|
|
299
|
+
// And the warn was emitted. Without this assertion the path
|
|
300
|
+
// could silently stop firing in a future refactor and the
|
|
301
|
+
// test would still pass on the (incidentally-present) content.
|
|
302
|
+
expect(warn).toHaveBeenCalledWith(expect.stringContaining('unclosed formatting wrap'))
|
|
303
|
+
} finally {
|
|
304
|
+
warn.mockRestore()
|
|
305
|
+
}
|
|
306
|
+
})
|
|
307
|
+
})
|
|
308
|
+
|
|
309
|
+
describe('WORD_ALIGNED_OPTIONS — opinionated consumer defaults', () => {
|
|
310
|
+
// The library default (`orphanMatchThreshold = 0`) keeps every LCS
|
|
311
|
+
// match, however small — which fragments long sentence rewrites
|
|
312
|
+
// into many tiny ins/del pairs around stray word matches. Word's
|
|
313
|
+
// track-changes collapses those into a single coarse del+ins,
|
|
314
|
+
// which is markedly more readable for legal text. The exported
|
|
315
|
+
// `WORD_ALIGNED_OPTIONS` lets consumers opt into that without
|
|
316
|
+
// re-tuning the magic number themselves.
|
|
317
|
+
const longGenesis =
|
|
318
|
+
'"Specified Indebtedness" will have the meaning specified in Section 14 and shall include, with respect to Party B, any obligation (whether present or future, contingent or otherwise) for the payment or repayment of money.'
|
|
319
|
+
const longCp =
|
|
320
|
+
'"Specified Indebtedness" will have the meaning specified in Section 14 of the Agreement except that such term shall not include obligations.'
|
|
321
|
+
|
|
322
|
+
it('exports a 0.25 orphan threshold tuned for Word-aligned output', () => {
|
|
323
|
+
expect(WORD_ALIGNED_OPTIONS).toEqual({ orphanMatchThreshold: 0.25 })
|
|
324
|
+
})
|
|
325
|
+
|
|
326
|
+
it('plumbs through HtmlDiff.execute and reduces fragmentation versus the bare default', () => {
|
|
327
|
+
const bare = HtmlDiff.execute(longGenesis, longCp)
|
|
328
|
+
const aligned = HtmlDiff.execute(longGenesis, longCp, WORD_ALIGNED_OPTIONS)
|
|
329
|
+
const count = (s: string, re: RegExp) => (s.match(re) ?? []).length
|
|
330
|
+
// The bare default keeps every tiny match — Word-aligned produces
|
|
331
|
+
// strictly fewer ins/del wrappers for the same input.
|
|
332
|
+
expect(count(aligned, /<ins/g)).toBeLessThan(count(bare, /<ins/g))
|
|
333
|
+
expect(count(aligned, /<del/g)).toBeLessThan(count(bare, /<del/g))
|
|
334
|
+
})
|
|
335
|
+
|
|
336
|
+
it('plumbs through HtmlDiff.executeThreeWay too', () => {
|
|
337
|
+
const bare = HtmlDiff.executeThreeWay(longGenesis, longCp, longGenesis)
|
|
338
|
+
const aligned = HtmlDiff.executeThreeWay(longGenesis, longCp, longGenesis, WORD_ALIGNED_OPTIONS)
|
|
339
|
+
const count = (s: string, re: RegExp) => (s.match(re) ?? []).length
|
|
340
|
+
expect(count(aligned, /<ins/g)).toBeLessThan(count(bare, /<ins/g))
|
|
341
|
+
expect(count(aligned, /<del/g)).toBeLessThan(count(bare, /<del/g))
|
|
342
|
+
})
|
|
343
|
+
})
|
|
344
|
+
|
|
345
|
+
describe('orphan-match guard for structural tags', () => {
|
|
346
|
+
// Real regression from the live preview (Additional Condition
|
|
347
|
+
// Precedent in the 2002 ISDA Schedule): when CP deletes a section
|
|
348
|
+
// whose answer renders as an empty formatting shell —
|
|
349
|
+
// <p data-html="x"><em><strong></strong></em></p>
|
|
350
|
+
// — the `</strong>` and `</em>` matches sit between two content
|
|
351
|
+
// deletions ("Heading. " before, body after). At
|
|
352
|
+
// WORD_ALIGNED_OPTIONS.orphanMatchThreshold=0.25 those structural
|
|
353
|
+
// matches were rejected as orphans, swallowed into the deletion
|
|
354
|
+
// span, and the browser auto-closed the openers AT THE END of
|
|
355
|
+
// the deletion — visually rendering the entire deletion as
|
|
356
|
+
// bold-italic. The orphan filter now exempts tag-only matches
|
|
357
|
+
// so structural boundaries always survive.
|
|
358
|
+
|
|
359
|
+
it('CP deletes section with em+strong heading + plain body — closers stay between heading and body', () => {
|
|
360
|
+
const genesis =
|
|
361
|
+
'<p data-html="x"><em><strong>Additional Condition Precedent. </strong></em>For the purposes of Section 2(a)(iii).</p>'
|
|
362
|
+
const cp = '<p data-html="x"><em><strong></strong></em></p>'
|
|
363
|
+
const me = genesis
|
|
364
|
+
|
|
365
|
+
const out = HtmlDiff.executeThreeWay(genesis, cp, me, WORD_ALIGNED_OPTIONS)
|
|
366
|
+
|
|
367
|
+
// </strong> appears BEFORE the body deletion — meaning the
|
|
368
|
+
// body sits outside the bold-italic wrap, not inside it.
|
|
369
|
+
const closeStrongIdx = out.indexOf('</strong>')
|
|
370
|
+
const bodyDelIdx = out.indexOf('For the purposes')
|
|
371
|
+
expect(closeStrongIdx).toBeGreaterThan(0)
|
|
372
|
+
expect(bodyDelIdx).toBeGreaterThan(closeStrongIdx)
|
|
373
|
+
// No `<strong>…<del>body` substring exists — confirm by exact
|
|
374
|
+
// shape too. Heading wraps in strong+em, body is a plain del.
|
|
375
|
+
expect(out).toBe(
|
|
376
|
+
'<p data-html="x"><em><strong>' +
|
|
377
|
+
"<del class='diffdel cp' data-author='cp'>Additional Condition Precedent. </del>" +
|
|
378
|
+
'</strong></em>' +
|
|
379
|
+
"<del class='diffdel cp' data-author='cp'>For the purposes of Section 2(a)(iii).</del>" +
|
|
380
|
+
'</p>'
|
|
381
|
+
)
|
|
382
|
+
})
|
|
383
|
+
})
|
|
384
|
+
|
|
159
385
|
describe('first-turn fallback', () => {
|
|
160
386
|
it('cp == genesis means CP made no changes — Me-only attribution', () => {
|
|
161
387
|
// Common case: this is the first turn where the counterparty hasn't
|
|
@@ -44,6 +44,14 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
|
|
|
44
44
|
})
|
|
45
45
|
|
|
46
46
|
it('Disagreement — different changes at the same place', () => {
|
|
47
|
+
// Genesis cell "five"; CP extended to "five and a half"; Me
|
|
48
|
+
// replaced with "seven". Intent reading from a reviewer's
|
|
49
|
+
// perspective:
|
|
50
|
+
// - del-me "five": Me already removed the genesis word from
|
|
51
|
+
// their cell (genesis tracking).
|
|
52
|
+
// - ins-cp " and a half": CP wants this appended.
|
|
53
|
+
// - del-cp "seven": CP wants Me's "seven" removed (Me has it,
|
|
54
|
+
// CP doesn't).
|
|
47
55
|
expect(
|
|
48
56
|
HtmlDiff.executeThreeWay(
|
|
49
57
|
'<table><tr><td>five</td></tr></table>',
|
|
@@ -51,7 +59,7 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
|
|
|
51
59
|
'<table><tr><td>seven</td></tr></table>'
|
|
52
60
|
)
|
|
53
61
|
).toBe(
|
|
54
|
-
"<table><tr><td><del class='diffdel me' data-author='me'>five</del><ins class='diffins cp' data-author='cp'> and a half</ins><
|
|
62
|
+
"<table><tr><td><del class='diffdel me' data-author='me'>five</del><ins class='diffins cp' data-author='cp'> and a half</ins><del class='diffdel cp' data-author='cp'>seven</del></td></tr></table>"
|
|
55
63
|
)
|
|
56
64
|
})
|
|
57
65
|
|
|
@@ -259,6 +267,75 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
|
|
|
259
267
|
const html = `<table>${rows}</table>`
|
|
260
268
|
expect(HtmlDiff.executeThreeWay(html, html, html)).toBe(html)
|
|
261
269
|
})
|
|
270
|
+
|
|
271
|
+
it('cell-count mismatch: CP added a column — CP row content is visible (not silently dropped)', () => {
|
|
272
|
+
// Regression: the previous fallback in emitPreservedRow emitted
|
|
273
|
+
// only `del me` + `ins me` for any cell-count mismatch, which
|
|
274
|
+
// silently destroyed CP's row content whenever CP changed the
|
|
275
|
+
// cell count. A reader in cp-only mode would see no trace of
|
|
276
|
+
// CP's added column — a content-loss bug that violates the
|
|
277
|
+
// "CP's changes always visible" invariant.
|
|
278
|
+
const out = HtmlDiff.executeThreeWay(
|
|
279
|
+
'<table><tr><td>a</td><td>b</td></tr></table>',
|
|
280
|
+
'<table><tr><td>a</td><td>X</td><td>b</td></tr></table>',
|
|
281
|
+
'<table><tr><td>a</td><td>b</td></tr></table>'
|
|
282
|
+
)
|
|
283
|
+
expect(out).toBe(
|
|
284
|
+
"<table><tr class='diffdel cp' data-author='cp'><td class='diffdel cp' data-author='cp'><del class='diffdel cp' data-author='cp'>a</del></td><td class='diffdel cp' data-author='cp'><del class='diffdel cp' data-author='cp'>b</del></td></tr><tr class='diffins cp' data-author='cp'><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>a</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>X</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>b</ins></td></tr></table>"
|
|
285
|
+
)
|
|
286
|
+
})
|
|
287
|
+
|
|
288
|
+
it('cell-count mismatch: Me removed a column — symmetric to the CP case', () => {
|
|
289
|
+
const out = HtmlDiff.executeThreeWay(
|
|
290
|
+
'<table><tr><td>a</td><td>b</td></tr></table>',
|
|
291
|
+
'<table><tr><td>a</td><td>b</td></tr></table>',
|
|
292
|
+
'<table><tr><td>a</td></tr></table>'
|
|
293
|
+
)
|
|
294
|
+
expect(out).toBe(
|
|
295
|
+
"<table><tr class='diffdel me' data-author='me'><td class='diffdel me' data-author='me'><del class='diffdel me' data-author='me'>a</del></td><td class='diffdel me' data-author='me'><del class='diffdel me' data-author='me'>b</del></td></tr><tr class='diffins me' data-author='me'><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>a</ins></td></tr></table>"
|
|
296
|
+
)
|
|
297
|
+
})
|
|
298
|
+
|
|
299
|
+
it('CP edited one cell in a row (same shape) — fuzzy-pairs and emits a cell-level diff, not whole-row del+ins', () => {
|
|
300
|
+
// Regression: the 3-way row aligner only did exact lcsAlign over
|
|
301
|
+
// rowKey, so a row where CP edited a single cell's text produced
|
|
302
|
+
// no key match and the algorithm split the row into a whole-row
|
|
303
|
+
// delete + whole-row insert. The 2-way path has always run a
|
|
304
|
+
// fuzzy-pairing pass after lcsAlign; bringing the 3-way path in
|
|
305
|
+
// step removes the asymmetry where cp-only / all-changes views
|
|
306
|
+
// looked materially worse than 2-way for ordinary cell edits.
|
|
307
|
+
//
|
|
308
|
+
// Same-shape genesis/cp/me; CP edited the middle cell's text.
|
|
309
|
+
// Me === genesis. Expect a paired row with cell-level cp-ins
|
|
310
|
+
// markup, NOT two distinct whole-row entries.
|
|
311
|
+
const out = HtmlDiff.executeThreeWay(
|
|
312
|
+
'<table><tr><td>Party A</td><td>old details</td><td>kept</td></tr></table>',
|
|
313
|
+
'<table><tr><td>Party A</td><td>new details</td><td>kept</td></tr></table>',
|
|
314
|
+
'<table><tr><td>Party A</td><td>old details</td><td>kept</td></tr></table>'
|
|
315
|
+
)
|
|
316
|
+
// CP's edit lives inside the row, not as a parallel whole-row
|
|
317
|
+
// delete-then-insert. Whole-row markers would carry `class='diffdel ...'`
|
|
318
|
+
// or `class='diffins ...'` on the `<tr>` itself.
|
|
319
|
+
expect(out).not.toMatch(/<tr [^>]*class=['"]diffdel/)
|
|
320
|
+
expect(out).not.toMatch(/<tr [^>]*class=['"]diffins/)
|
|
321
|
+
expect(out).toContain('Party A')
|
|
322
|
+
expect(out).toContain("data-author='cp'")
|
|
323
|
+
// Me === genesis so any me attribution would indicate a swap.
|
|
324
|
+
expect(out).not.toContain("data-author='me'")
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
it('cell-count mismatch: both sides restructured differently — both ins rows attributed', () => {
|
|
328
|
+
// Genesis 2 cells, CP 3 cells, Me 4 cells. Neither side keeps
|
|
329
|
+
// the genesis shape, so both restructures must be visible.
|
|
330
|
+
const out = HtmlDiff.executeThreeWay(
|
|
331
|
+
'<table><tr><td>a</td><td>b</td></tr></table>',
|
|
332
|
+
'<table><tr><td>a</td><td>X</td><td>b</td></tr></table>',
|
|
333
|
+
'<table><tr><td>a</td><td>b</td><td>Y</td><td>Z</td></tr></table>'
|
|
334
|
+
)
|
|
335
|
+
expect(out).toBe(
|
|
336
|
+
"<table><tr class='diffins cp' data-author='cp'><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>a</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>X</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>b</ins></td></tr><tr class='diffins me' data-author='me'><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>a</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>b</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>Y</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>Z</ins></td></tr></table>"
|
|
337
|
+
)
|
|
338
|
+
})
|
|
262
339
|
})
|
|
263
340
|
|
|
264
341
|
describe('nested tables', () => {
|
|
@@ -270,6 +347,9 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
|
|
|
270
347
|
)
|
|
271
348
|
expect(out).toMatch(/<del[^>]*data-author='cp'[^>]*>inner<\/del>/)
|
|
272
349
|
expect(out).toMatch(/<ins[^>]*data-author='cp'[^>]*>INNER<\/ins>/)
|
|
350
|
+
// me == genesis here, so any me attribution would indicate a
|
|
351
|
+
// cp↔me swap inside the table-cell merge.
|
|
352
|
+
expect(out).not.toContain("data-author='me'")
|
|
273
353
|
expect(out.startsWith('<table><tr><td><table>')).toBe(true)
|
|
274
354
|
expect(out.endsWith('</table></td></tr></table>')).toBe(true)
|
|
275
355
|
})
|
|
@@ -298,4 +378,34 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
|
|
|
298
378
|
expect(HtmlDiff.executeThreeWay('<p>a</p>', '<p>a</p>', '<p>a</p>')).toBe('<p>a</p>')
|
|
299
379
|
})
|
|
300
380
|
})
|
|
381
|
+
|
|
382
|
+
describe('positional pairing under moderate dissimilarity', () => {
|
|
383
|
+
it('column rename + value rewrite still routes through cell-level diff (not whole-table del+ins)', () => {
|
|
384
|
+
// Real-world regression: cp renamed a column ("Form/Document/Certificate"
|
|
385
|
+
// → "Extra column") and replaced the values in that column with short
|
|
386
|
+
// tokens. Word-level Jaccard between the genesis table and cp's edited
|
|
387
|
+
// table drops to ~0.38 — under the 0.5 threshold the three-way path
|
|
388
|
+
// used to take, which kicked the diff into multi-table content-LCS
|
|
389
|
+
// and produced whole-table del+ins (the cp's CP-bubble showed the
|
|
390
|
+
// entire old table struck through and the entire new table inserted).
|
|
391
|
+
// 2-way had no such guard and produced a cell-level diff for the same
|
|
392
|
+
// inputs; lowering the 3-way threshold brings the two paths in step.
|
|
393
|
+
const genesis =
|
|
394
|
+
'<table><tr><td>A</td><td>Form/Document/Certificate</td><td>Date</td></tr><tr><td>Party A</td><td>IRS W-8</td><td>On execution</td></tr></table>'
|
|
395
|
+
const cp =
|
|
396
|
+
'<table><tr><td>A</td><td>Extra column</td><td>Date</td></tr><tr><td>Party A</td><td>Yes</td><td>On execution</td></tr></table>'
|
|
397
|
+
const me = genesis
|
|
398
|
+
const out = HtmlDiff.executeThreeWay(genesis, cp, me)
|
|
399
|
+
// Expect cell-level cp attribution INSIDE the table cells, NOT a
|
|
400
|
+
// whole-table del+ins wrapping the entire <table>.
|
|
401
|
+
expect(out).not.toMatch(/<del[^>]*><table/)
|
|
402
|
+
expect(out).toMatch(/data-author='cp'/)
|
|
403
|
+
// me === genesis, so any me-attribution markers would mean the
|
|
404
|
+
// diff swapped CP's edits onto Me. Negative assertion locks the
|
|
405
|
+
// attribution direction.
|
|
406
|
+
expect(out).not.toContain("data-author='me'")
|
|
407
|
+
expect(out).toContain('Extra column')
|
|
408
|
+
expect(out).toContain('Form/Document/Certificate')
|
|
409
|
+
})
|
|
410
|
+
})
|
|
301
411
|
})
|
package/test/Utils.spec.ts
CHANGED
|
@@ -138,10 +138,10 @@ describe('Utils', () => {
|
|
|
138
138
|
it('combines extraClasses and dataAttrs in one call', () => {
|
|
139
139
|
expect(
|
|
140
140
|
Utils.wrapText('hello', 'del', 'diffdel', {
|
|
141
|
-
extraClasses: 'me
|
|
142
|
-
dataAttrs: { author: 'me',
|
|
141
|
+
extraClasses: 'me',
|
|
142
|
+
dataAttrs: { author: 'me', source: 'edit' },
|
|
143
143
|
})
|
|
144
|
-
).toBe("<del class='diffdel me
|
|
144
|
+
).toBe("<del class='diffdel me' data-author='me' data-source='edit'>hello</del>")
|
|
145
145
|
})
|
|
146
146
|
|
|
147
147
|
it('skips the metadata path entirely when neither extraClasses nor dataAttrs is set', () => {
|