@createiq/htmldiff 1.2.0-beta.8 → 1.2.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +45 -11
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.mjs +45 -11
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/ThreeWayDiff.ts +53 -11
- package/test/HtmlDiff.threeWay.spec.ts +25 -2
package/package.json
CHANGED
package/src/ThreeWayDiff.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import Action from './Action'
|
|
2
|
+
import { lcsAlign } from './Alignment'
|
|
2
3
|
import type { AnalyzeResult } from './HtmlDiff'
|
|
3
4
|
import type Operation from './Operation'
|
|
4
5
|
import type { WrapMetadata } from './Utils'
|
|
@@ -183,13 +184,31 @@ function collectInsertionsKeyedByEnd(d: AnalyzeResult): Map<number, string[]> {
|
|
|
183
184
|
}
|
|
184
185
|
|
|
185
186
|
/**
|
|
186
|
-
* Emit any insertions at boundary `b`.
|
|
187
|
-
* the same boundary AND the inserted token sequences are textually
|
|
188
|
-
* identical, the insertion is treated as agreed and emitted unmarked.
|
|
189
|
-
* Otherwise each side's insertion is emitted with author attribution.
|
|
187
|
+
* Emit any insertions at boundary `b`. Three cases:
|
|
190
188
|
*
|
|
191
|
-
*
|
|
192
|
-
*
|
|
189
|
+
* 1. One side inserted, the other didn't → emit that side's tokens
|
|
190
|
+
* with author attribution.
|
|
191
|
+
* 2. Both sides inserted the EXACT same sequence → settled, emit
|
|
192
|
+
* unmarked.
|
|
193
|
+
* 3. Both sides inserted overlapping but different sequences (the
|
|
194
|
+
* common case: one author accepted the other's insertion and
|
|
195
|
+
* edited it, so e.g. cp's "X Y Z" overlaps me's "X Y a Z" with
|
|
196
|
+
* "a" being a one-author-only addition). Run an LCS sub-diff
|
|
197
|
+
* between the two insertion sequences and emit:
|
|
198
|
+
* - tokens in BOTH → settled (equal segment)
|
|
199
|
+
* - tokens only in cp → ins-cp
|
|
200
|
+
* - tokens only in me → ins-me
|
|
201
|
+
* The order of emission preserves the natural reading flow of
|
|
202
|
+
* the merged insertion — common tokens read where they appear,
|
|
203
|
+
* with author-only deltas inserted in their LCS-determined
|
|
204
|
+
* positions.
|
|
205
|
+
*
|
|
206
|
+
* Without this sub-alignment, real-world flows like "Me added 'add
|
|
207
|
+
* more things here', CP accepted minus 'things'" would render as two
|
|
208
|
+
* full redundant insertions (`<ins cp>add more here</ins><ins me>add
|
|
209
|
+
* more things here</ins>`) rather than the obvious single shared
|
|
210
|
+
* insertion with a me-only "things" word — confusing to read and a
|
|
211
|
+
* regression vs Word's track-changes UX.
|
|
193
212
|
*/
|
|
194
213
|
function emitBoundary(
|
|
195
214
|
b: number,
|
|
@@ -205,14 +224,37 @@ function emitBoundary(
|
|
|
205
224
|
const hasMe = !!meIns && meIns.length > 0
|
|
206
225
|
if (!hasCp && !hasMe) return
|
|
207
226
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
appendSegment(segments, { kind: '
|
|
227
|
+
// Only-one-side: emit verbatim with attribution.
|
|
228
|
+
if (!hasCp) {
|
|
229
|
+
appendSegment(segments, { kind: 'ins', author: 'me' }, meIns!)
|
|
230
|
+
return
|
|
231
|
+
}
|
|
232
|
+
if (!hasMe) {
|
|
233
|
+
appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns!)
|
|
234
|
+
return
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Both sides inserted at this boundary. Identical sequences are
|
|
238
|
+
// settled; differing sequences get LCS-aligned and split into
|
|
239
|
+
// shared (settled) + author-only sub-segments.
|
|
240
|
+
if (tokenArraysEqual(cpIns!, meIns!)) {
|
|
241
|
+
appendSegment(segments, { kind: 'equal' }, cpIns!)
|
|
211
242
|
return
|
|
212
243
|
}
|
|
213
244
|
|
|
214
|
-
|
|
215
|
-
|
|
245
|
+
const alignment = lcsAlign(cpIns! as string[], meIns! as string[])
|
|
246
|
+
for (const a of alignment) {
|
|
247
|
+
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
248
|
+
// Token appears in both insertions → settled.
|
|
249
|
+
appendSegment(segments, { kind: 'equal' }, [cpIns![a.oldIdx]])
|
|
250
|
+
} else if (a.oldIdx !== null) {
|
|
251
|
+
// Token in cp's insertion only.
|
|
252
|
+
appendSegment(segments, { kind: 'ins', author: 'cp' }, [cpIns![a.oldIdx]])
|
|
253
|
+
} else if (a.newIdx !== null) {
|
|
254
|
+
// Token in me's insertion only.
|
|
255
|
+
appendSegment(segments, { kind: 'ins', author: 'me' }, [meIns![a.newIdx]])
|
|
256
|
+
}
|
|
257
|
+
}
|
|
216
258
|
}
|
|
217
259
|
|
|
218
260
|
function tokenArraysEqual(a: readonly string[], b: readonly string[]): boolean {
|
|
@@ -43,9 +43,13 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
43
43
|
|
|
44
44
|
it('CP and Me each change the same word differently', () => {
|
|
45
45
|
// Genesis: "Hello world". CP made "Hello cruel world", Me made "Hello brave world".
|
|
46
|
-
// Disagreement — show both authors' insertions.
|
|
46
|
+
// Disagreement — show both authors' insertions. The trailing
|
|
47
|
+
// space between the inserted word and "world" is shared by
|
|
48
|
+
// both insertion sequences, so the boundary-LCS recognises it
|
|
49
|
+
// as settled and emits the inserts as just the word, with the
|
|
50
|
+
// separating space outside the attribution wrappers.
|
|
47
51
|
expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello brave world')).toBe(
|
|
48
|
-
"Hello <ins class='diffins cp' data-author='cp'>cruel
|
|
52
|
+
"Hello <ins class='diffins cp' data-author='cp'>cruel</ins><ins class='diffins me' data-author='me'>brave</ins> world"
|
|
49
53
|
)
|
|
50
54
|
})
|
|
51
55
|
|
|
@@ -71,6 +75,25 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
71
75
|
)
|
|
72
76
|
})
|
|
73
77
|
|
|
78
|
+
it("CP accepted Me's addition with a word removed — emits the common bulk as settled + one me-only word", () => {
|
|
79
|
+
// Real flow on the live preview:
|
|
80
|
+
// - Me appends "And I add more things here" to a paragraph
|
|
81
|
+
// - CP "accepts" Me's addition but deletes the word "things"
|
|
82
|
+
// → CP's version of the appendix is "And I add more here"
|
|
83
|
+
// Genesis has neither addition. Both diffs (against genesis)
|
|
84
|
+
// are pure inserts with substantial overlap. Without the
|
|
85
|
+
// boundary LCS, the two insertions render as two full
|
|
86
|
+
// redundant spans:
|
|
87
|
+
// <ins cp>And I add more here</ins><ins me>And I add more things here</ins>
|
|
88
|
+
// — visually confusing because the reader sees "And I add
|
|
89
|
+
// more" twice. The intent is clearly that CP refined Me's
|
|
90
|
+
// addition by removing one word, so the diff should surface
|
|
91
|
+
// the shared bulk as settled with a me-only "things".
|
|
92
|
+
expect(
|
|
93
|
+
HtmlDiff.executeThreeWay('baseline.', 'baseline. And I add more here', 'baseline. And I add more things here')
|
|
94
|
+
).toBe("baseline. And I add more<ins class='diffins me' data-author='me'> things</ins> here")
|
|
95
|
+
})
|
|
96
|
+
|
|
74
97
|
it('Stable across no-change rounds — V5 produces same output as V3 when V5==V3', () => {
|
|
75
98
|
// The user's V3/V5 invariant: when neither party changes their position
|
|
76
99
|
// in a subsequent turn, the diff should look identical to the previous
|