@createiq/htmldiff 1.2.0-beta.3 → 1.2.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@createiq/htmldiff",
3
- "version": "1.2.0-beta.3",
3
+ "version": "1.2.0-beta.4",
4
4
  "description": "TypeScript port of htmldiff.net",
5
5
  "type": "module",
6
6
  "author": "Mathew Mannion <mathew.mannion@linklaters.com>",
package/src/HtmlDiff.ts CHANGED
@@ -335,22 +335,6 @@ export default class HtmlDiff {
335
335
  return HtmlDiff.shouldUseContentProjections(oldWords, newWords, oldProj, newProj)
336
336
  }
337
337
 
338
- /**
339
- * Three-way HTML diff. Given V1 (the version Me last sent), V2 (the
340
- * version CP sent back), and V3 (Me's current draft), produces a
341
- * single attributed HTML output where CP's and Me's changes are
342
- * distinguished by `data-author` ('cp' or 'me') and matching
343
- * `class='diffins cp'` / `class='diffdel me'` etc. The "Me rejected
344
- * CP's proposal" case (Me deleted text CP had inserted) gets a
345
- * dedicated marker: `data-rejects='cp'` plus `class='... rejects-cp'`.
346
- *
347
- * Coordinates the symmetric-projection decision (D1) across both
348
- * internal `analyze` calls so V2 tokenises identically on each side
349
- * of the spine. When `useProjections` is left undefined, the decision
350
- * is the conjunction of both pair-wise heuristics — project iff both
351
- * pairs would project on their own. Pass an explicit boolean to
352
- * override.
353
- */
354
338
  /**
355
339
  * Three-way HTML diff against a shared genesis. Produces attributed
356
340
  * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
@@ -439,6 +423,17 @@ export default class HtmlDiff {
439
423
  * buffer. Reusing the instance keeps the formatting-tag stack
440
424
  * (`specialTagDiffStack`) coherent across segments — a `<strong>`
441
425
  * opened in one segment and closed in another stays balanced.
426
+ *
427
+ * Edge case: an ins/del segment can open a formatting wrap whose
428
+ * matching closer ends up in an equal segment (`<strong>` deleted
429
+ * by CP but `</strong>` kept by both — buildSegments emits the open
430
+ * as del-cp and the close as equal). Equal segments bypass
431
+ * `insertTag` and push raw, so the stack entry for the open is
432
+ * never popped. Rather than throw — which forces the caller's UI
433
+ * into an error boundary — close every leftover wrap with `</ins>`
434
+ * at the end of emission. The resulting HTML has an extra
435
+ * `</ins>` next to the formatting closer; DOMParser-normalisation
436
+ * downstream produces sensible nesting.
442
437
  */
443
438
  private static emitSegments(segments: Segment[]): string {
444
439
  const emitter = new HtmlDiff('', '')
@@ -451,18 +446,21 @@ export default class HtmlDiff {
451
446
  // insertTag mutates its `words` array; pass a copy.
452
447
  emitter.insertTag(tag, baseClass, [...seg.words], metadata)
453
448
  }
454
- // Stack-balance invariant: every special-case opening tag pushed onto
455
- // `specialTagDiffStack` during emission must have been matched by a
456
- // closing tag. An unbalanced stack means the input had unbalanced
457
- // formatting tags AND a Replace at an inconvenient position — the
458
- // output would be silently malformed (half-closed `<ins>`). Fail
459
- // loudly so the caller can investigate rather than ship broken HTML.
460
449
  if (emitter.specialTagDiffStack.length > 0) {
461
- throw new Error(
450
+ // Log once so we can spot bad inputs in dev tools, but don't
451
+ // throw — the caller's only fallback was to crash the React
452
+ // tree, which is worse than emitting slightly-imperfect HTML.
453
+ // eslint-disable-next-line no-console
454
+ console.warn(
462
455
  `HtmlDiff.executeThreeWay: emission left ${emitter.specialTagDiffStack.length} ` +
463
- 'unclosed formatting tag(s) on the stack input may have unbalanced ' +
464
- '<strong>/<em>/etc. or there is a bug in segment emission.'
456
+ 'unclosed formatting wrap(s) on the stack. Closing defensively. ' +
457
+ 'This usually means a formatting tag opens in a del/ins segment ' +
458
+ 'and its matching closer is in an equal segment.'
465
459
  )
460
+ while (emitter.specialTagDiffStack.length > 0) {
461
+ emitter.content.push('</ins>')
462
+ emitter.specialTagDiffStack.pop()
463
+ }
466
464
  }
467
465
  return emitter.content.join('')
468
466
  }
@@ -535,9 +535,40 @@ function emitPreservedRow(
535
535
  return out.join('')
536
536
  }
537
537
  // Cell-count mismatch within a preserved row — cell-level structural
538
- // change deferred. Fall back to me-attributed Replace (genesis row
539
- // removed, me row inserted). Lossy for CP within that row.
540
- return emitFullRowAttributed(genesis, rG, 'del', 'me') + emitFullRowAttributed(meCurrent, rM, 'ins', 'me')
538
+ // alignment is non-trivial (which Me cell maps to which CP cell when
539
+ // the counts diverge?). The previous fallback emitted only
540
+ // genesis-as-del + me-as-ins, which silently destroyed CP's row
541
+ // content whenever CP changed the cell count — a content-loss bug
542
+ // (a row where CP added a column would disappear from the rendered
543
+ // diff entirely). Emit each side's row as a distinct attributed
544
+ // block so neither party's restructure can vanish:
545
+ // - if both restructured (different shapes on both sides) the
546
+ // genesis row is settled-deleted (silent) and we emit cp + me
547
+ // rows side by side, each attributed to its author;
548
+ // - if only one restructured, the genesis row is del-attributed to
549
+ // the restructuring author so the reader sees what was there
550
+ // before, then the new shape ins-attributed to the same author.
551
+ //
552
+ // Content edits inside a side that DID keep the genesis cell count
553
+ // are not surfaced here (no positional path is available across
554
+ // mismatched shapes); the underlying data is still present in the
555
+ // source document but the visual diff doesn't decompose it. That is
556
+ // a degradation of detail, not content loss — symmetric for cp/me.
557
+ const cpRestructured = rC.cells.length !== rG.cells.length
558
+ const meRestructured = rM.cells.length !== rG.cells.length
559
+ const blocks: string[] = []
560
+ if (cpRestructured && meRestructured) {
561
+ // Both sides restructured; genesis shape retained by neither.
562
+ blocks.push(emitFullRowAttributed(cpLatest, rC, 'ins', 'cp'))
563
+ blocks.push(emitFullRowAttributed(meCurrent, rM, 'ins', 'me'))
564
+ } else if (cpRestructured) {
565
+ blocks.push(emitFullRowAttributed(genesis, rG, 'del', 'cp'))
566
+ blocks.push(emitFullRowAttributed(cpLatest, rC, 'ins', 'cp'))
567
+ } else {
568
+ blocks.push(emitFullRowAttributed(genesis, rG, 'del', 'me'))
569
+ blocks.push(emitFullRowAttributed(meCurrent, rM, 'ins', 'me'))
570
+ }
571
+ return blocks.join('')
541
572
  }
542
573
 
543
574
  /**
@@ -120,11 +120,19 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
120
120
  })
121
121
 
122
122
  it('cp matches genesis (only Me changed)', () => {
123
- expect(HtmlDiff.executeThreeWay('Hello world', 'Hello world', 'Hello brave world')).toContain("data-author='me'")
123
+ // Negative assertion is load-bearing: without `not.toContain`
124
+ // a cp↔me swap inside the genesis-spine merge would still
125
+ // emit `data-author='cp'` somewhere in the output and the
126
+ // positive assertion would silently pass.
127
+ const out = HtmlDiff.executeThreeWay('Hello world', 'Hello world', 'Hello brave world')
128
+ expect(out).toContain("data-author='me'")
129
+ expect(out).not.toContain("data-author='cp'")
124
130
  })
125
131
 
126
132
  it('me matches genesis (only CP changed)', () => {
127
- expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello world')).toContain("data-author='cp'")
133
+ const out = HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello world')
134
+ expect(out).toContain("data-author='cp'")
135
+ expect(out).not.toContain("data-author='me'")
128
136
  })
129
137
  })
130
138
 
@@ -163,10 +171,47 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
163
171
  const without = HtmlDiff.executeThreeWay('a b', 'a b', 'a b')
164
172
  const withFlag = HtmlDiff.executeThreeWay('a b', 'a b', 'a b', { ignoreWhitespaceDifferences: true })
165
173
  expect(without).toContain("data-author='me'")
174
+ // CP matches genesis — any cp attribution would be a mis-merge.
175
+ expect(without).not.toContain("data-author='cp'")
166
176
  expect(withFlag).not.toContain('data-author=')
167
177
  })
168
178
  })
169
179
 
180
+ describe('stack-balance defence', () => {
181
+ // The emission walks segments built by `buildSegments`: ins/del
182
+ // segments go through `insertTag` (which manages the formatting-
183
+ // tag stack), but equal segments push raw words straight to the
184
+ // content buffer. When a formatting opener is in a del segment
185
+ // and its matching closer falls in an equal segment, the stack
186
+ // entry never gets popped — the emitter used to throw "emission
187
+ // left 1 unclosed formatting tag(s) on the stack" and crash the
188
+ // caller. Now it closes the leftover wraps defensively with
189
+ // `</ins>` so the output stays renderable.
190
+
191
+ it('CP inserted a <strong> opener whose closer is matched as equal — does not throw', () => {
192
+ // Genesis has an orphan closer (`X</strong>`); CP wrapped X in
193
+ // a fresh `<strong>`. The opener is ins-cp (no genesis match)
194
+ // but the closer is shared by all three and emits as equal.
195
+ // The mod-`<ins>` opened on the strong push needs to be closed
196
+ // somehow; the defensive path emits a trailing `</ins>`.
197
+ expect(() => HtmlDiff.executeThreeWay('X</strong>', '<strong>X</strong>', 'X</strong>')).not.toThrow()
198
+ })
199
+
200
+ it('CP deleted only the <strong> opener — does not throw', () => {
201
+ // Symmetric: genesis had `<strong>X</strong>`, CP dropped the
202
+ // opener but kept the closer. The opener-delete pushes onto
203
+ // the stack and the closer arrives via an equal segment.
204
+ expect(() => HtmlDiff.executeThreeWay('<strong>X</strong>', 'X</strong>', '<strong>X</strong>')).not.toThrow()
205
+ })
206
+
207
+ it('produces non-empty output even when the stack is left unbalanced at end', () => {
208
+ const out = HtmlDiff.executeThreeWay('X</strong>', '<strong>X</strong>', 'X</strong>')
209
+ // The content is still there, the formatting wraps just close
210
+ // defensively. Sanity-check the visible content survives.
211
+ expect(out).toContain('X')
212
+ })
213
+ })
214
+
170
215
  describe('first-turn fallback', () => {
171
216
  it('cp == genesis means CP made no changes — Me-only attribution', () => {
172
217
  // Common case: this is the first turn where the counterparty hasn't
@@ -259,6 +259,47 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
259
259
  const html = `<table>${rows}</table>`
260
260
  expect(HtmlDiff.executeThreeWay(html, html, html)).toBe(html)
261
261
  })
262
+
263
+ it('cell-count mismatch: CP added a column — CP row content is visible (not silently dropped)', () => {
264
+ // Regression: the previous fallback in emitPreservedRow emitted
265
+ // only `del me` + `ins me` for any cell-count mismatch, which
266
+ // silently destroyed CP's row content whenever CP changed the
267
+ // cell count. A reader in cp-only mode would see no trace of
268
+ // CP's added column — a content-loss bug that violates the
269
+ // "CP's changes always visible" invariant.
270
+ const out = HtmlDiff.executeThreeWay(
271
+ '<table><tr><td>a</td><td>b</td></tr></table>',
272
+ '<table><tr><td>a</td><td>X</td><td>b</td></tr></table>',
273
+ '<table><tr><td>a</td><td>b</td></tr></table>'
274
+ )
275
+ expect(out).toBe(
276
+ "<table><tr class='diffdel cp' data-author='cp'><td class='diffdel cp' data-author='cp'><del class='diffdel cp' data-author='cp'>a</del></td><td class='diffdel cp' data-author='cp'><del class='diffdel cp' data-author='cp'>b</del></td></tr><tr class='diffins cp' data-author='cp'><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>a</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>X</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>b</ins></td></tr></table>"
277
+ )
278
+ })
279
+
280
+ it('cell-count mismatch: Me removed a column — symmetric to the CP case', () => {
281
+ const out = HtmlDiff.executeThreeWay(
282
+ '<table><tr><td>a</td><td>b</td></tr></table>',
283
+ '<table><tr><td>a</td><td>b</td></tr></table>',
284
+ '<table><tr><td>a</td></tr></table>'
285
+ )
286
+ expect(out).toBe(
287
+ "<table><tr class='diffdel me' data-author='me'><td class='diffdel me' data-author='me'><del class='diffdel me' data-author='me'>a</del></td><td class='diffdel me' data-author='me'><del class='diffdel me' data-author='me'>b</del></td></tr><tr class='diffins me' data-author='me'><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>a</ins></td></tr></table>"
288
+ )
289
+ })
290
+
291
+ it('cell-count mismatch: both sides restructured differently — both ins rows attributed', () => {
292
+ // Genesis 2 cells, CP 3 cells, Me 4 cells. Neither side keeps
293
+ // the genesis shape, so both restructures must be visible.
294
+ const out = HtmlDiff.executeThreeWay(
295
+ '<table><tr><td>a</td><td>b</td></tr></table>',
296
+ '<table><tr><td>a</td><td>X</td><td>b</td></tr></table>',
297
+ '<table><tr><td>a</td><td>b</td><td>Y</td><td>Z</td></tr></table>'
298
+ )
299
+ expect(out).toBe(
300
+ "<table><tr class='diffins cp' data-author='cp'><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>a</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>X</ins></td><td class='diffins cp' data-author='cp'><ins class='diffins cp' data-author='cp'>b</ins></td></tr><tr class='diffins me' data-author='me'><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>a</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>b</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>Y</ins></td><td class='diffins me' data-author='me'><ins class='diffins me' data-author='me'>Z</ins></td></tr></table>"
301
+ )
302
+ })
262
303
  })
263
304
 
264
305
  describe('nested tables', () => {
@@ -270,6 +311,9 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
270
311
  )
271
312
  expect(out).toMatch(/<del[^>]*data-author='cp'[^>]*>inner<\/del>/)
272
313
  expect(out).toMatch(/<ins[^>]*data-author='cp'[^>]*>INNER<\/ins>/)
314
+ // me == genesis here, so any me attribution would indicate a
315
+ // cp↔me swap inside the table-cell merge.
316
+ expect(out).not.toContain("data-author='me'")
273
317
  expect(out.startsWith('<table><tr><td><table>')).toBe(true)
274
318
  expect(out.endsWith('</table></td></tr></table>')).toBe(true)
275
319
  })
@@ -320,6 +364,10 @@ describe('HtmlDiff.executeThreeWay (tables, genesis-spine)', () => {
320
364
  // whole-table del+ins wrapping the entire <table>.
321
365
  expect(out).not.toMatch(/<del[^>]*><table/)
322
366
  expect(out).toMatch(/data-author='cp'/)
367
+ // me === genesis, so any me-attribution markers would mean the
368
+ // diff swapped CP's edits onto Me. Negative assertion locks the
369
+ // attribution direction.
370
+ expect(out).not.toContain("data-author='me'")
323
371
  expect(out).toContain('Extra column')
324
372
  expect(out).toContain('Form/Document/Certificate')
325
373
  })
@@ -138,10 +138,10 @@ describe('Utils', () => {
138
138
  it('combines extraClasses and dataAttrs in one call', () => {
139
139
  expect(
140
140
  Utils.wrapText('hello', 'del', 'diffdel', {
141
- extraClasses: 'me rejects-cp',
142
- dataAttrs: { author: 'me', rejects: 'cp' },
141
+ extraClasses: 'me',
142
+ dataAttrs: { author: 'me', source: 'edit' },
143
143
  })
144
- ).toBe("<del class='diffdel me rejects-cp' data-author='me' data-rejects='cp'>hello</del>")
144
+ ).toBe("<del class='diffdel me' data-author='me' data-source='edit'>hello</del>")
145
145
  })
146
146
 
147
147
  it('skips the metadata path entirely when neither extraClasses nor dataAttrs is set', () => {