@createiq/htmldiff 1.2.0-beta.8 → 1.2.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@createiq/htmldiff",
3
- "version": "1.2.0-beta.8",
3
+ "version": "1.2.0-beta.9",
4
4
  "description": "TypeScript port of htmldiff.net",
5
5
  "type": "module",
6
6
  "author": "Mathew Mannion <mathew.mannion@linklaters.com>",
@@ -1,4 +1,5 @@
1
1
  import Action from './Action'
2
+ import { lcsAlign } from './Alignment'
2
3
  import type { AnalyzeResult } from './HtmlDiff'
3
4
  import type Operation from './Operation'
4
5
  import type { WrapMetadata } from './Utils'
@@ -183,13 +184,31 @@ function collectInsertionsKeyedByEnd(d: AnalyzeResult): Map<number, string[]> {
183
184
  }
184
185
 
185
186
  /**
186
- * Emit any insertions at boundary `b`. When both authors inserted at
187
- * the same boundary AND the inserted token sequences are textually
188
- * identical, the insertion is treated as agreed and emitted unmarked.
189
- * Otherwise each side's insertion is emitted with author attribution.
187
+ * Emit any insertions at boundary `b`. Three cases:
190
188
  *
191
- * The CP-then-Me ordering for disagreement is arbitrary but consistent;
192
- * callers don't depend on it.
189
+ * 1. One side inserted, the other didn't emit that side's tokens
190
+ * with author attribution.
191
+ * 2. Both sides inserted the EXACT same sequence → settled, emit
192
+ * unmarked.
193
+ * 3. Both sides inserted overlapping but different sequences (the
194
+ * common case: one author accepted the other's insertion and
195
+ * edited it, so e.g. cp's "X Y Z" overlaps me's "X Y a Z" with
196
+ * "a" being a one-author-only addition). Run an LCS sub-diff
197
+ * between the two insertion sequences and emit:
198
+ * - tokens in BOTH → settled (equal segment)
199
+ * - tokens only in cp → ins-cp
200
+ * - tokens only in me → ins-me
201
+ * The order of emission preserves the natural reading flow of
202
+ * the merged insertion — common tokens read where they appear,
203
+ * with author-only deltas inserted in their LCS-determined
204
+ * positions.
205
+ *
206
+ * Without this sub-alignment, real-world flows like "Me added 'add
207
+ * more things here', CP accepted minus 'things'" would render as two
208
+ * full redundant insertions (`<ins cp>add more here</ins><ins me>add
209
+ * more things here</ins>`) rather than the obvious single shared
210
+ * insertion with a me-only "things" word — confusing to read and a
211
+ * regression vs Word's track-changes UX.
193
212
  */
194
213
  function emitBoundary(
195
214
  b: number,
@@ -205,14 +224,37 @@ function emitBoundary(
205
224
  const hasMe = !!meIns && meIns.length > 0
206
225
  if (!hasCp && !hasMe) return
207
226
 
208
- if (hasCp && hasMe && tokenArraysEqual(cpIns, meIns)) {
209
- // Both authors inserted the same content — settled. Emit unmarked.
210
- appendSegment(segments, { kind: 'equal' }, cpIns)
227
+ // Only-one-side: emit verbatim with attribution.
228
+ if (!hasCp) {
229
+ appendSegment(segments, { kind: 'ins', author: 'me' }, meIns!)
230
+ return
231
+ }
232
+ if (!hasMe) {
233
+ appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns!)
234
+ return
235
+ }
236
+
237
+ // Both sides inserted at this boundary. Identical sequences are
238
+ // settled; differing sequences get LCS-aligned and split into
239
+ // shared (settled) + author-only sub-segments.
240
+ if (tokenArraysEqual(cpIns!, meIns!)) {
241
+ appendSegment(segments, { kind: 'equal' }, cpIns!)
211
242
  return
212
243
  }
213
244
 
214
- if (hasCp) appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns)
215
- if (hasMe) appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
245
+ const alignment = lcsAlign(cpIns! as string[], meIns! as string[])
246
+ for (const a of alignment) {
247
+ if (a.oldIdx !== null && a.newIdx !== null) {
248
+ // Token appears in both insertions → settled.
249
+ appendSegment(segments, { kind: 'equal' }, [cpIns![a.oldIdx]])
250
+ } else if (a.oldIdx !== null) {
251
+ // Token in cp's insertion only.
252
+ appendSegment(segments, { kind: 'ins', author: 'cp' }, [cpIns![a.oldIdx]])
253
+ } else if (a.newIdx !== null) {
254
+ // Token in me's insertion only.
255
+ appendSegment(segments, { kind: 'ins', author: 'me' }, [meIns![a.newIdx]])
256
+ }
257
+ }
216
258
  }
217
259
 
218
260
  function tokenArraysEqual(a: readonly string[], b: readonly string[]): boolean {
@@ -43,9 +43,13 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
43
43
 
44
44
  it('CP and Me each change the same word differently', () => {
45
45
  // Genesis: "Hello world". CP made "Hello cruel world", Me made "Hello brave world".
46
- // Disagreement — show both authors' insertions.
46
+ // Disagreement — show both authors' insertions. The trailing
47
+ // space between the inserted word and "world" is shared by
48
+ // both insertion sequences, so the boundary-LCS recognises it
49
+ // as settled and emits the inserts as just the word, with the
50
+ // separating space outside the attribution wrappers.
47
51
  expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello brave world')).toBe(
48
- "Hello <ins class='diffins cp' data-author='cp'>cruel </ins><ins class='diffins me' data-author='me'>brave </ins>world"
52
+ "Hello <ins class='diffins cp' data-author='cp'>cruel</ins><ins class='diffins me' data-author='me'>brave</ins> world"
49
53
  )
50
54
  })
51
55
 
@@ -71,6 +75,25 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
71
75
  )
72
76
  })
73
77
 
78
+ it("CP accepted Me's addition with a word removed — emits the common bulk as settled + one me-only word", () => {
79
+ // Real flow on the live preview:
80
+ // - Me appends "And I add more things here" to a paragraph
81
+ // - CP "accepts" Me's addition but deletes the word "things"
82
+ // → CP's version of the appendix is "And I add more here"
83
+ // Genesis has neither addition. Both diffs (against genesis)
84
+ // are pure inserts with substantial overlap. Without the
85
+ // boundary LCS, the two insertions render as two full
86
+ // redundant spans:
87
+ // <ins cp>And I add more here</ins><ins me>And I add more things here</ins>
88
+ // — visually confusing because the reader sees "And I add
89
+ // more" twice. The intent is clearly that CP refined Me's
90
+ // addition by removing one word, so the diff should surface
91
+ // the shared bulk as settled with a me-only "things".
92
+ expect(
93
+ HtmlDiff.executeThreeWay('baseline.', 'baseline. And I add more here', 'baseline. And I add more things here')
94
+ ).toBe("baseline. And I add more<ins class='diffins me' data-author='me'>&nbsp;things</ins> here")
95
+ })
96
+
74
97
  it('Stable across no-change rounds — V5 produces same output as V3 when V5==V3', () => {
75
98
  // The user's V3/V5 invariant: when neither party changes their position
76
99
  // in a subsequent turn, the diff should look identical to the previous