@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,6 +48,21 @@ describe('HtmlDiff', () => {
48
48
  'Some formatted text',
49
49
  "Some <ins class='mod strong i'>formatted</ins> text",
50
50
  ],
51
+ // Overlapping formatting wraps — old wraps a word in <strong>, new wraps the same
52
+ // word in <u>. The wraps cross (mod-strong opens before mod-u, but the </strong>
53
+ // closing arrives before </u>), so emission must split the inner wrap to keep
54
+ // HTML well-formed. Regression: previously left mod-strong unclosed and the
55
+ // 3-way path threw on the unbalanced stack.
56
+ [
57
+ '<strong>three</strong>',
58
+ '<u>three</u>',
59
+ "<ins class='mod strong'><u><ins class='mod u'>three</ins></ins><ins class='mod u'></ins></u>",
60
+ ],
61
+ [
62
+ 'a <strong>three</strong> b',
63
+ 'a <u>three</u> b',
64
+ "a <ins class='mod strong'><u><ins class='mod u'>three</ins></ins><ins class='mod u'></ins></u> b",
65
+ ],
51
66
  [
52
67
  '<table><tr><td>col1</td><td>col2</td></tr><tr><td>Data 1</td><td>Data 2</td></tr></table>',
53
68
  '<table><tr><td>col1</td><td>col2</td></tr></table>',
@@ -1,89 +1,191 @@
1
- import { describe, expect, it } from 'vitest'
1
+ import { describe, expect, it, vi } from 'vitest'
2
2
 
3
- import HtmlDiff from '../src/HtmlDiff'
3
+ import HtmlDiff, { WORD_ALIGNED_OPTIONS } from '../src/HtmlDiff'
4
4
 
5
- describe('HtmlDiff.executeThreeWay', () => {
5
+ /**
6
+ * Three-way diff tests under the genesis-spine model.
7
+ *
8
+ * `executeThreeWay(genesis, cpLatest, meCurrent)` compares both cp and
9
+ * me against the shared common ancestor (genesis). Each side's
10
+ * accumulated changes are attributed independently:
11
+ *
12
+ * - Both authors made the same change → emit plain (settled)
13
+ * - One author changed, the other kept the genesis content → emit
14
+ * that author's change with attribution; the kept content shows
15
+ * "pending" via the del/ins wrapping
16
+ * - Both made different changes at the same place → each shown with
17
+ * its author's attribution
18
+ */
19
+ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
6
20
  describe('attribution matrix', () => {
7
- it('CP-only changes Me made no edits (V3 == V2)', () => {
8
- expect(HtmlDiff.executeThreeWay('Hello world.', 'Hello cruel world.', 'Hello cruel world.')).toBe(
9
- "Hello<ins class='diffins cp' data-author='cp'>&nbsp;cruel</ins> world."
21
+ it('settledboth authors made the same change', () => {
22
+ // Genesis: "Hello world". Both cp and me changed it to "Hello cruel world".
23
+ // The change is settled — emit plain.
24
+ expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello cruel world')).toBe(
25
+ 'Hello cruel world'
10
26
  )
11
27
  })
12
28
 
13
- it('Me-only changes CP made no edits (V2 == V1)', () => {
14
- expect(HtmlDiff.executeThreeWay('Hello world.', 'Hello world.', 'Hello world today.')).toBe(
15
- "Hello world<ins class='diffins me' data-author='me'>&nbsp;today</ins>."
29
+ it('CP changes a word, Me kept the genesis word', () => {
30
+ // Genesis: "Hello world". CP changed to "Hello cruel world". Me kept "Hello world".
31
+ // From Me's view: CP's insertion is pending. Render with cp attribution; Me's
32
+ // text ("world") is preserved verbatim.
33
+ expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello world')).toBe(
34
+ "Hello <ins class='diffins cp' data-author='cp'>cruel </ins>world"
16
35
  )
17
36
  })
18
37
 
19
- it('Me keeps CP insertion (both authors visible)', () => {
20
- expect(HtmlDiff.executeThreeWay('Hello world.', 'Hello cruel world.', 'Hello cruel world today.')).toBe(
21
- "Hello<ins class='diffins cp' data-author='cp'>&nbsp;cruel</ins> world<ins class='diffins me' data-author='me'>&nbsp;today</ins>."
38
+ it('Me changes a word, CP kept genesis', () => {
39
+ expect(HtmlDiff.executeThreeWay('Hello world', 'Hello world', 'Hello cruel world')).toBe(
40
+ "Hello <ins class='diffins me' data-author='me'>cruel </ins>world"
22
41
  )
23
42
  })
24
43
 
25
- it('Me rejects CP insertion (data-rejects markup)', () => {
26
- expect(HtmlDiff.executeThreeWay('Hello world.', 'Hello cruel world.', 'Hello world.')).toBe(
27
- "Hello<del class='diffdel me rejects-cp' data-author='me' data-rejects='cp'>&nbsp;cruel</del> world."
44
+ it('CP and Me each change the same word differently', () => {
45
+ // Genesis: "Hello world". CP made "Hello cruel world", Me made "Hello brave world".
46
+ // Both inserted at the same boundary. Under the intent-reading
47
+ // model, the reviewer sees CP's proposal relative to Me's
48
+ // current content: "CP wants `cruel` where Me has `brave`" —
49
+ // ins-cp `cruel` + del-cp `brave`. Reads as a substitution
50
+ // intent, which is what a legal reviewer needs to act on.
51
+ expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello brave world')).toBe(
52
+ "Hello <ins class='diffins cp' data-author='cp'>cruel</ins><del class='diffdel cp' data-author='cp'>brave</del> world"
28
53
  )
29
54
  })
30
55
 
31
- it('CP-deletion of V1 text surfaces as a CP-attributed <del>', () => {
56
+ it('CP deletes a word, Me kept it', () => {
57
+ // Genesis: "Some really fine print". CP removed "really". Me kept it.
58
+ // Render genesis token "really" with del-cp markup — Me's text still has it.
59
+ expect(HtmlDiff.executeThreeWay('Some really fine print', 'Some fine print', 'Some really fine print')).toBe(
60
+ "Some<del class='diffdel cp' data-author='cp'>&nbsp;really</del> fine print"
61
+ )
62
+ })
63
+
64
+ it('Me deletes a word, CP kept it', () => {
65
+ expect(HtmlDiff.executeThreeWay('Some really fine print', 'Some really fine print', 'Some fine print')).toBe(
66
+ "Some<del class='diffdel me' data-author='me'>&nbsp;really</del> fine print"
67
+ )
68
+ })
69
+
70
+ it('Both authors delete the same content → settled, silenced', () => {
71
+ // Genesis: "Some really fine print". Both removed "really".
72
+ // Settled — neither the deletion nor any del markup appears.
73
+ expect(HtmlDiff.executeThreeWay('Some really fine print', 'Some fine print', 'Some fine print')).toBe(
74
+ 'Some fine print'
75
+ )
76
+ })
77
+
78
+ it("CP accepted Me's addition with a word removed — del-cp on the removed word", () => {
79
+ // Real flow on the live preview:
80
+ // - Me appends "And I add more things here"
81
+ // - CP "accepts" Me's addition but deletes the word "things"
82
+ // Intent reading: CP wants "things" gone. del-cp on the me-only
83
+ // word; the rest of the shared addition is settled.
32
84
  expect(
33
- HtmlDiff.executeThreeWay('Some really fine print here.', 'Some fine print here.', 'Some fine print here.')
34
- ).toBe("Some<del class='diffdel cp' data-author='cp'>&nbsp;really</del> fine print here.")
85
+ HtmlDiff.executeThreeWay('baseline.', 'baseline. And I add more here', 'baseline. And I add more things here')
86
+ ).toBe("baseline. And I add more<del class='diffdel cp' data-author='cp'>&nbsp;things</del> here")
35
87
  })
36
88
 
37
- it('Me-deletion of original V1 text (CP did nothing, Me deleted)', () => {
89
+ it("CP rewrote one of Me's added words — del-cp on the replaced word + ins-cp on the replacement", () => {
90
+ // Non-subset variant: CP didn't just delete, they substituted.
91
+ // Me: "And I add more things here"
92
+ // CP: "And I add more anything here"
93
+ // The single word differs — the intent reading is "CP wants
94
+ // things replaced with anything".
38
95
  expect(
39
96
  HtmlDiff.executeThreeWay(
40
- 'Some really fine print here.',
41
- 'Some really fine print here.',
42
- 'Some fine print here.'
97
+ 'baseline.',
98
+ 'baseline. And I add more anything here',
99
+ 'baseline. And I add more things here'
43
100
  )
44
- ).toBe("Some<del class='diffdel me' data-author='me'>&nbsp;really</del> fine print here.")
101
+ ).toBe(
102
+ "baseline. And I add more <ins class='diffins cp' data-author='cp'>anything</ins><del class='diffdel cp' data-author='cp'>things</del> here"
103
+ )
45
104
  })
46
105
 
47
- it('CP and Me Replace ops in different places (del-then-ins for both)', () => {
48
- // CP did will→shall; Me did thirty→sixty business. Both Replaces
49
- // must emit del-then-ins in source order, matching the 2-way
50
- // convention so the diff reads naturally.
106
+ it("CP extended Me's addition with extra words ins-cp on the additions, no del", () => {
107
+ // CP added beyond me cp-extras stay as ins-cp, no del-cp
108
+ // since CP didn't remove anything.
109
+ // Me: "And I add more things here"
110
+ // CP: "And I add more things and other stuff here"
51
111
  expect(
52
112
  HtmlDiff.executeThreeWay(
53
- 'The party will pay the fee within thirty days.',
54
- 'The party shall pay the fee within thirty days.',
55
- 'The party shall pay the fee within sixty business days.'
113
+ 'baseline.',
114
+ 'baseline. And I add more things and other stuff here',
115
+ 'baseline. And I add more things here'
56
116
  )
57
- ).toBe(
58
- "The party <del class='diffdel cp' data-author='cp'>will</del><ins class='diffins cp' data-author='cp'>shall</ins> pay the fee within <del class='diffdel me' data-author='me'>thirty</del><ins class='diffins me' data-author='me'>sixty business</ins> days."
59
- )
117
+ ).toBe("baseline. And I add more things<ins class='diffins cp' data-author='cp'>&nbsp;and other stuff</ins> here")
60
118
  })
61
- })
62
119
 
63
- describe('Replace-collision and tail-end edge cases', () => {
64
- it('Replace collision CP and Me each replace the same V2 token', () => {
65
- // V1: "foo" V2: "bar" (CP Replace). V2: "bar" → V3: "baz" (Me Replace).
66
- // V2's "bar" is replaced-into-by-cp AND replaced-out-by-me reject.
67
- // Off-spine: V1's "foo" (cpDel) emitted before; V3's "baz" (meIns)
68
- // emitted after the reject (mirrors the del-then-ins ordering).
69
- expect(HtmlDiff.executeThreeWay('foo', 'bar', 'baz')).toBe(
70
- "<del class='diffdel cp' data-author='cp'>foo</del><del class='diffdel me rejects-cp' data-author='me' data-rejects='cp'>bar</del><ins class='diffins me' data-author='me'>baz</ins>"
120
+ it("Me added text that CP didn't engage with — stays as ins-me, NOT del-cp", () => {
121
+ // The critical inverse: Me appends a new paragraph that CP
122
+ // doesn't have anything for at that boundary. This is a
123
+ // genuine Me-side insertion NOT a "CP removed" event. The
124
+ // emitBoundary's single-side branch (!hasCp) preserves
125
+ // ins-me attribution. Without this carve-out, every Me
126
+ // insertion would be mis-attributed as "CP wants this gone",
127
+ // even when CP never engaged with the content.
128
+ expect(HtmlDiff.executeThreeWay('baseline.', 'baseline.', 'baseline. New paragraph Me added.')).toBe(
129
+ "baseline.<ins class='diffins me' data-author='me'>&nbsp;New paragraph Me added.</ins>"
71
130
  )
72
131
  })
73
132
 
74
- it('tail-end interleavings CP deletion + Me insertion both at the end of V2', () => {
75
- // V1 = "Hello world" (CP deletes " world" V2 = "Hello").
76
- // V2 = "Hello" (Me adds " cruel" → V3 = "Hello cruel").
77
- // Both off-spine ops land at the tail boundary; output order is
78
- // cpDel then meIns (no V2 token to anchor around).
79
- expect(HtmlDiff.executeThreeWay('Hello world', 'Hello', 'Hello cruel')).toBe(
80
- "Hello<del class='diffdel cp' data-author='cp'>&nbsp;world</del><ins class='diffins me' data-author='me'>&nbsp;cruel</ins>"
133
+ it('CP and Me each added different content at different boundaries each side keeps their own attribution', () => {
134
+ // Me added at the end of one sentence; CP added at the end of
135
+ // another. Different genesis boundaries each goes through
136
+ // the !hasCp / !hasMe single-side branch. Neither side's
137
+ // addition surfaces as the other's deletion.
138
+ expect(
139
+ HtmlDiff.executeThreeWay(
140
+ 'First sentence. Second sentence.',
141
+ 'First sentence with cp-addition. Second sentence.',
142
+ 'First sentence. Second sentence with me-addition.'
143
+ )
144
+ ).toBe(
145
+ "First sentence<ins class='diffins cp' data-author='cp'>&nbsp;with cp-addition</ins>. Second sentence<ins class='diffins me' data-author='me'>&nbsp;with me-addition</ins>."
81
146
  )
82
147
  })
148
+
149
+ it('Stable across no-change rounds — V5 produces same output as V3 when V5==V3', () => {
150
+ // The user's V3/V5 invariant: when neither party changes their position
151
+ // in a subsequent turn, the diff should look identical to the previous
152
+ // turn's diff. With the genesis spine, this falls out automatically.
153
+ const genesis = 'The quick brown fox jumps over the lazy dog'
154
+ const cp = 'The fast brown fox leaps'
155
+ const me = 'The quick brown antelope leaps over the lazy pig'
156
+ const v3Output = HtmlDiff.executeThreeWay(genesis, cp, me)
157
+ const v5Output = HtmlDiff.executeThreeWay(genesis, cp, me)
158
+ expect(v5Output).toBe(v3Output)
159
+ // Sanity check the V3 output contains all four author-attributed changes
160
+ // from the user's expected output (quick→fast cp, fox→antelope me, etc.)
161
+ expect(v3Output).toMatch(/<del class='diffdel cp' data-author='cp'>quick<\/del>/)
162
+ expect(v3Output).toMatch(/<ins class='diffins cp' data-author='cp'>fast<\/ins>/)
163
+ expect(v3Output).toMatch(/<del class='diffdel me' data-author='me'>fox<\/del>/)
164
+ expect(v3Output).toMatch(/<ins class='diffins me' data-author='me'>antelope<\/ins>/)
165
+ })
166
+
167
+ it('Inverted view — switching cp and me args produces inverted attribution', () => {
168
+ // From the user's V4 example: same genesis, but from Party B's view
169
+ // cp and me swap. The output should have all attributions inverted.
170
+ const genesis = 'The quick brown fox jumps over the lazy dog'
171
+ const partyACurrent = 'The quick brown antelope leaps over the lazy pig'
172
+ const partyBCurrent = 'The fast brown fox leaps'
173
+
174
+ const aView = HtmlDiff.executeThreeWay(genesis, partyBCurrent, partyACurrent) // A is me, B is cp
175
+ const bView = HtmlDiff.executeThreeWay(genesis, partyACurrent, partyBCurrent) // B is me, A is cp
176
+
177
+ // A's view: B made fast/leaps changes (CP-attributed), A made antelope/pig (Me).
178
+ expect(aView).toMatch(/<ins class='diffins cp' data-author='cp'>fast<\/ins>/)
179
+ expect(aView).toMatch(/<ins class='diffins me' data-author='me'>antelope<\/ins>/)
180
+
181
+ // B's view: A made antelope/pig (now CP), B made fast/leaps (now Me).
182
+ expect(bView).toMatch(/<ins class='diffins me' data-author='me'>fast<\/ins>/)
183
+ expect(bView).toMatch(/<ins class='diffins cp' data-author='cp'>antelope<\/ins>/)
184
+ })
83
185
  })
84
186
 
85
187
  describe('identity inputs', () => {
86
- it('V1 == V2 == V3 returns the input verbatim', () => {
188
+ it('all three identical input verbatim', () => {
87
189
  const text = '<p>Nothing changed at all.</p>'
88
190
  expect(HtmlDiff.executeThreeWay(text, text, text)).toBe(text)
89
191
  })
@@ -92,77 +194,199 @@ describe('HtmlDiff.executeThreeWay', () => {
92
194
  expect(HtmlDiff.executeThreeWay('', '', '')).toBe('')
93
195
  })
94
196
 
95
- it('V1 == V2 collapses to a single-author diff (CP did nothing)', () => {
96
- const out = HtmlDiff.executeThreeWay('Hello world.', 'Hello world.', 'Hello brave world.')
197
+ it('cp matches genesis (only Me changed)', () => {
198
+ // Negative assertion is load-bearing: without `not.toContain`
199
+ // a cp↔me swap inside the genesis-spine merge would still
200
+ // emit `data-author='cp'` somewhere in the output and the
201
+ // positive assertion would silently pass.
202
+ const out = HtmlDiff.executeThreeWay('Hello world', 'Hello world', 'Hello brave world')
97
203
  expect(out).toContain("data-author='me'")
98
204
  expect(out).not.toContain("data-author='cp'")
99
205
  })
100
206
 
101
- it('V2 == V3 collapses to a single-author diff (Me did nothing)', () => {
102
- const out = HtmlDiff.executeThreeWay('Hello world.', 'Hello cruel world.', 'Hello cruel world.')
207
+ it('me matches genesis (only CP changed)', () => {
208
+ const out = HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello world')
103
209
  expect(out).toContain("data-author='cp'")
104
210
  expect(out).not.toContain("data-author='me'")
105
211
  })
106
212
  })
107
213
 
108
214
  describe('HTML structure handling', () => {
109
- it('preserves wrapping <p> tags around an attributed run', () => {
110
- expect(
111
- HtmlDiff.executeThreeWay('<p>Hello world.</p>', '<p>Hello cruel world.</p>', '<p>Hello cruel world.</p>')
112
- ).toBe("<p>Hello<ins class='diffins cp' data-author='cp'>&nbsp;cruel</ins> world.</p>")
215
+ it('preserves wrapping <p> tags', () => {
216
+ expect(HtmlDiff.executeThreeWay('<p>Hello world.</p>', '<p>Hello cruel world.</p>', '<p>Hello world.</p>')).toBe(
217
+ "<p>Hello<ins class='diffins cp' data-author='cp'>&nbsp;cruel</ins> world.</p>"
218
+ )
113
219
  })
114
220
 
115
- it('attributes formatting-tag edits via the special-case path', () => {
116
- // V1 plain; CP wraps "fee" in <strong>; Me leaves it. The
117
- // formatting-tag special case inside insertTag now carries the
118
- // author class through to the `mod` ins wrapper.
119
- const out = HtmlDiff.executeThreeWay(
120
- '<p>The fee is due.</p>',
121
- '<p>The <strong>fee</strong> is due.</p>',
122
- '<p>The <strong>fee</strong> is due.</p>'
123
- )
124
- expect(out).toContain("data-author='cp'")
125
- expect(out).toMatch(/<ins class='mod[^']*cp'/)
221
+ it('multi-paragraph with edits in different paragraphs by each author', () => {
222
+ const genesis = '<p>First paragraph.</p><p>Second paragraph.</p>'
223
+ const cp = '<p>First paragraph edited by CP.</p><p>Second paragraph.</p>'
224
+ const me = '<p>First paragraph.</p><p>Second paragraph edited by Me.</p>'
225
+ const out = HtmlDiff.executeThreeWay(genesis, cp, me)
226
+ // CP's edit appears in the first paragraph, Me's in the second.
227
+ expect(out).toMatch(/<p>First paragraph.*data-author='cp'.*<\/p>/)
228
+ expect(out).toMatch(/<p>Second paragraph.*data-author='me'.*<\/p>/)
126
229
  })
127
230
 
128
- it('multi-paragraph: each author scoped to their own paragraph', () => {
129
- expect(
130
- HtmlDiff.executeThreeWay(
131
- '<p>First paragraph.</p><p>Second paragraph.</p>',
132
- '<p>First paragraph edited by CP.</p><p>Second paragraph.</p>',
133
- '<p>First paragraph edited by CP.</p><p>Second paragraph also edited by Me.</p>'
134
- )
135
- ).toBe(
136
- "<p>First paragraph<ins class='diffins cp' data-author='cp'>&nbsp;edited by CP</ins>.</p><p>Second paragraph<ins class='diffins me' data-author='me'>&nbsp;also edited by Me</ins>.</p>"
231
+ it('overlapping formatting wraps from each author do not unbalance the emission stack', () => {
232
+ // Genesis: plain "three". CP wrapped it in <strong>, Me in <u>. The
233
+ // mod-strong (cp) and mod-u (me) wraps cross: <strong> opens before
234
+ // <u>, but </strong> arrives before </u>. The emitter must split
235
+ // the inner wrap so the output stays well-formed instead of
236
+ // throwing an unbalanced-stack error.
237
+ //
238
+ // Under the intent-reading model, Me's `<u>` wrap is a
239
+ // formatting choice CP didn't make surfaces as a CP-attributed
240
+ // mod wrap (since CP's view doesn't include the underline). The
241
+ // load-bearing assertion here is that the emission stays
242
+ // balanced — the exact mod-author labelling reflects the
243
+ // asymmetric reading and matches the rest of the suite.
244
+ expect(HtmlDiff.executeThreeWay('three', '<strong>three</strong>', '<u>three</u>')).toBe(
245
+ "<strong><ins class='mod strong cp' data-author='cp'><ins class='mod u cp' data-author='cp'>three</ins></ins></strong><ins class='mod u cp' data-author='cp'></ins>"
137
246
  )
138
247
  })
139
248
  })
140
249
 
141
250
  describe('options pass-through', () => {
142
251
  it('honours ignoreWhitespaceDifferences', () => {
252
+ // Genesis: "a b" (double space). CP keeps it, Me uses "a b" (single space).
143
253
  const without = HtmlDiff.executeThreeWay('a b', 'a b', 'a b')
144
254
  const withFlag = HtmlDiff.executeThreeWay('a b', 'a b', 'a b', { ignoreWhitespaceDifferences: true })
145
- // Without the flag, the whitespace difference triggers a Me-attributed Replace.
146
255
  expect(without).toContain("data-author='me'")
147
- // With the flag, no diff at all.
256
+ // CP matches genesis any cp attribution would be a mis-merge.
257
+ expect(without).not.toContain("data-author='cp'")
148
258
  expect(withFlag).not.toContain('data-author=')
149
259
  })
260
+ })
261
+
262
+ describe('stack-balance defence', () => {
263
+ // The emission walks segments built by `buildSegments`: ins/del
264
+ // segments go through `insertTag` (which manages the formatting-
265
+ // tag stack), but equal segments push raw words straight to the
266
+ // content buffer. When a formatting opener is in a del segment
267
+ // and its matching closer falls in an equal segment, the stack
268
+ // entry never gets popped — the emitter used to throw "emission
269
+ // left 1 unclosed formatting tag(s) on the stack" and crash the
270
+ // caller. Now it closes the leftover wraps defensively with
271
+ // `</ins>` so the output stays renderable.
272
+
273
+ it('CP inserted a <strong> opener whose closer is matched as equal — does not throw', () => {
274
+ // Genesis has an orphan closer (`X</strong>`); CP wrapped X in
275
+ // a fresh `<strong>`. The opener is ins-cp (no genesis match)
276
+ // but the closer is shared by all three and emits as equal.
277
+ // The mod-`<ins>` opened on the strong push needs to be closed
278
+ // somehow; the defensive path emits a trailing `</ins>`.
279
+ expect(() => HtmlDiff.executeThreeWay('X</strong>', '<strong>X</strong>', 'X</strong>')).not.toThrow()
280
+ })
150
281
 
151
- it('useProjections=true forces structural normalisation even when heuristic would skip', () => {
152
- // V1==V2 has no structural diff (heuristic would skip projection),
153
- // V2↔V3 has no structural diff either, so the symmetric default is
154
- // also skip. Forcing useProjections=true here is a no-op functionally
155
- // but exercises the forced-on code path.
156
- const out = HtmlDiff.executeThreeWay('<p>a b c</p>', '<p>a b c</p>', '<p>a x c</p>', { useProjections: true })
157
- expect(out).toContain("data-author='me'")
282
+ it('CP deleted only the <strong> opener does not throw', () => {
283
+ // Symmetric: genesis had `<strong>X</strong>`, CP dropped the
284
+ // opener but kept the closer. The opener-delete pushes onto
285
+ // the stack and the closer arrives via an equal segment.
286
+ expect(() => HtmlDiff.executeThreeWay('<strong>X</strong>', 'X</strong>', '<strong>X</strong>')).not.toThrow()
287
+ })
288
+
289
+ it('emits the defensive </ins> close and logs a warning when the stack is unbalanced', async () => {
290
+ const warn = vi.spyOn(console, 'warn').mockImplementation(() => {})
291
+ try {
292
+ const out = HtmlDiff.executeThreeWay('X</strong>', '<strong>X</strong>', 'X</strong>')
293
+ // The content survives.
294
+ expect(out).toContain('X')
295
+ // The defensive close path actually ran — output contains
296
+ // at least one `</ins>` that wasn't paired by `insertTag`
297
+ // (the only way the defensive branch can add one).
298
+ expect(out).toMatch(/<\/ins>/)
299
+ // And the warn was emitted. Without this assertion the path
300
+ // could silently stop firing in a future refactor and the
301
+ // test would still pass on the (incidentally-present) content.
302
+ expect(warn).toHaveBeenCalledWith(expect.stringContaining('unclosed formatting wrap'))
303
+ } finally {
304
+ warn.mockRestore()
305
+ }
306
+ })
307
+ })
308
+
309
+ describe('WORD_ALIGNED_OPTIONS — opinionated consumer defaults', () => {
310
+ // The library default (`orphanMatchThreshold = 0`) keeps every LCS
311
+ // match, however small — which fragments long sentence rewrites
312
+ // into many tiny ins/del pairs around stray word matches. Word's
313
+ // track-changes collapses those into a single coarse del+ins,
314
+ // which is markedly more readable for legal text. The exported
315
+ // `WORD_ALIGNED_OPTIONS` lets consumers opt into that without
316
+ // re-tuning the magic number themselves.
317
+ const longGenesis =
318
+ '"Specified Indebtedness" will have the meaning specified in Section 14 and shall include, with respect to Party B, any obligation (whether present or future, contingent or otherwise) for the payment or repayment of money.'
319
+ const longCp =
320
+ '"Specified Indebtedness" will have the meaning specified in Section 14 of the Agreement except that such term shall not include obligations.'
321
+
322
+ it('exports a 0.25 orphan threshold tuned for Word-aligned output', () => {
323
+ expect(WORD_ALIGNED_OPTIONS).toEqual({ orphanMatchThreshold: 0.25 })
324
+ })
325
+
326
+ it('plumbs through HtmlDiff.execute and reduces fragmentation versus the bare default', () => {
327
+ const bare = HtmlDiff.execute(longGenesis, longCp)
328
+ const aligned = HtmlDiff.execute(longGenesis, longCp, WORD_ALIGNED_OPTIONS)
329
+ const count = (s: string, re: RegExp) => (s.match(re) ?? []).length
330
+ // The bare default keeps every tiny match — Word-aligned produces
331
+ // strictly fewer ins/del wrappers for the same input.
332
+ expect(count(aligned, /<ins/g)).toBeLessThan(count(bare, /<ins/g))
333
+ expect(count(aligned, /<del/g)).toBeLessThan(count(bare, /<del/g))
334
+ })
335
+
336
+ it('plumbs through HtmlDiff.executeThreeWay too', () => {
337
+ const bare = HtmlDiff.executeThreeWay(longGenesis, longCp, longGenesis)
338
+ const aligned = HtmlDiff.executeThreeWay(longGenesis, longCp, longGenesis, WORD_ALIGNED_OPTIONS)
339
+ const count = (s: string, re: RegExp) => (s.match(re) ?? []).length
340
+ expect(count(aligned, /<ins/g)).toBeLessThan(count(bare, /<ins/g))
341
+ expect(count(aligned, /<del/g)).toBeLessThan(count(bare, /<del/g))
342
+ })
343
+ })
344
+
345
+ describe('orphan-match guard for structural tags', () => {
346
+ // Real regression from the live preview (Additional Condition
347
+ // Precedent in the 2002 ISDA Schedule): when CP deletes a section
348
+ // whose answer renders as an empty formatting shell —
349
+ // <p data-html="x"><em><strong></strong></em></p>
350
+ // — the `</strong>` and `</em>` matches sit between two content
351
+ // deletions ("Heading. " before, body after). At
352
+ // WORD_ALIGNED_OPTIONS.orphanMatchThreshold=0.25 those structural
353
+ // matches were rejected as orphans, swallowed into the deletion
354
+ // span, and the browser auto-closed the openers AT THE END of
355
+ // the deletion — visually rendering the entire deletion as
356
+ // bold-italic. The orphan filter now exempts tag-only matches
357
+ // so structural boundaries always survive.
358
+
359
+ it('CP deletes section with em+strong heading + plain body — closers stay between heading and body', () => {
360
+ const genesis =
361
+ '<p data-html="x"><em><strong>Additional Condition Precedent. </strong></em>For the purposes of Section 2(a)(iii).</p>'
362
+ const cp = '<p data-html="x"><em><strong></strong></em></p>'
363
+ const me = genesis
364
+
365
+ const out = HtmlDiff.executeThreeWay(genesis, cp, me, WORD_ALIGNED_OPTIONS)
366
+
367
+ // </strong> appears BEFORE the body deletion — meaning the
368
+ // body sits outside the bold-italic wrap, not inside it.
369
+ const closeStrongIdx = out.indexOf('</strong>')
370
+ const bodyDelIdx = out.indexOf('For the purposes')
371
+ expect(closeStrongIdx).toBeGreaterThan(0)
372
+ expect(bodyDelIdx).toBeGreaterThan(closeStrongIdx)
373
+ // No `<strong>…<del>body` substring exists — confirm by exact
374
+ // shape too. Heading wraps in strong+em, body is a plain del.
375
+ expect(out).toBe(
376
+ '<p data-html="x"><em><strong>' +
377
+ "<del class='diffdel cp' data-author='cp'>Additional Condition Precedent. </del>" +
378
+ '</strong></em>' +
379
+ "<del class='diffdel cp' data-author='cp'>For the purposes of Section 2(a)(iii).</del>" +
380
+ '</p>'
381
+ )
158
382
  })
159
383
  })
160
384
 
161
- describe('first-turn fallback (real-world scenario)', () => {
162
- it('falls back cleanly when only Party A has edited (V2 == V1)', () => {
163
- // The case the user described: when Party B is drafting V2 against
164
- // V1, the 3-way view from Party A's perspective shows only Me's
165
- // changes — no spurious CP authorship.
385
+ describe('first-turn fallback', () => {
386
+ it('cp == genesis means CP made no changes Me-only attribution', () => {
387
+ // Common case: this is the first turn where the counterparty hasn't
388
+ // responded yet, so the cp version equals the genesis. Only Me's
389
+ // changes appear.
166
390
  const out = HtmlDiff.executeThreeWay(
167
391
  '<p>Draft contract.</p>',
168
392
  '<p>Draft contract.</p>',