@createiq/htmldiff 1.2.0-beta.7 → 1.2.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/HtmlDiff.cjs +56 -11
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.mjs +56 -11
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +25 -0
- package/src/ThreeWayDiff.ts +53 -11
- package/test/HtmlDiff.threeWay.spec.ts +65 -2
package/package.json
CHANGED
package/src/HtmlDiff.ts
CHANGED
|
@@ -1119,6 +1119,31 @@ export default class HtmlDiff {
|
|
|
1119
1119
|
continue
|
|
1120
1120
|
}
|
|
1121
1121
|
|
|
1122
|
+
// Never orphan-reject a match whose tokens are ALL HTML tags.
|
|
1123
|
+
// Tag tokens are structural; rejecting `</strong>` / `</em>` as
|
|
1124
|
+
// an orphan match between two content deletions merges the tag
|
|
1125
|
+
// into the deletion, leaving the matching opener unclosed —
|
|
1126
|
+
// browsers then auto-close the opener at the END of the
|
|
1127
|
+
// deletion, producing visually-wrong output (e.g. the body of
|
|
1128
|
+
// a section deletion rendered as bold-italic because the
|
|
1129
|
+
// closing `</strong></em>` ended up after the body deletion
|
|
1130
|
+
// rather than after the heading). The orphan threshold is
|
|
1131
|
+
// designed for stray word matches between heavily-edited spans,
|
|
1132
|
+
// not for formatting boundaries.
|
|
1133
|
+
let allTags = true
|
|
1134
|
+
for (let i = curr.startInNew; i < curr.endInNew; i++) {
|
|
1135
|
+
if (!Utils.isTag(wordsForDiffNew[i])) {
|
|
1136
|
+
allTags = false
|
|
1137
|
+
break
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
if (allTags) {
|
|
1141
|
+
yield curr
|
|
1142
|
+
prev = curr
|
|
1143
|
+
curr = next
|
|
1144
|
+
continue
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1122
1147
|
let oldDistanceInChars = 0
|
|
1123
1148
|
for (let i = prev.endInOld; i < next.startInOld; i++) {
|
|
1124
1149
|
oldDistanceInChars += wordsForDiffOld[i].length
|
package/src/ThreeWayDiff.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import Action from './Action'
|
|
2
|
+
import { lcsAlign } from './Alignment'
|
|
2
3
|
import type { AnalyzeResult } from './HtmlDiff'
|
|
3
4
|
import type Operation from './Operation'
|
|
4
5
|
import type { WrapMetadata } from './Utils'
|
|
@@ -183,13 +184,31 @@ function collectInsertionsKeyedByEnd(d: AnalyzeResult): Map<number, string[]> {
|
|
|
183
184
|
}
|
|
184
185
|
|
|
185
186
|
/**
|
|
186
|
-
* Emit any insertions at boundary `b`.
|
|
187
|
-
* the same boundary AND the inserted token sequences are textually
|
|
188
|
-
* identical, the insertion is treated as agreed and emitted unmarked.
|
|
189
|
-
* Otherwise each side's insertion is emitted with author attribution.
|
|
187
|
+
* Emit any insertions at boundary `b`. Three cases:
|
|
190
188
|
*
|
|
191
|
-
*
|
|
192
|
-
*
|
|
189
|
+
* 1. One side inserted, the other didn't → emit that side's tokens
|
|
190
|
+
* with author attribution.
|
|
191
|
+
* 2. Both sides inserted the EXACT same sequence → settled, emit
|
|
192
|
+
* unmarked.
|
|
193
|
+
* 3. Both sides inserted overlapping but different sequences (the
|
|
194
|
+
* common case: one author accepted the other's insertion and
|
|
195
|
+
* edited it, so e.g. cp's "X Y Z" overlaps me's "X Y a Z" with
|
|
196
|
+
* "a" being a one-author-only addition). Run an LCS sub-diff
|
|
197
|
+
* between the two insertion sequences and emit:
|
|
198
|
+
* - tokens in BOTH → settled (equal segment)
|
|
199
|
+
* - tokens only in cp → ins-cp
|
|
200
|
+
* - tokens only in me → ins-me
|
|
201
|
+
* The order of emission preserves the natural reading flow of
|
|
202
|
+
* the merged insertion — common tokens read where they appear,
|
|
203
|
+
* with author-only deltas inserted in their LCS-determined
|
|
204
|
+
* positions.
|
|
205
|
+
*
|
|
206
|
+
* Without this sub-alignment, real-world flows like "Me added 'add
|
|
207
|
+
* more things here', CP accepted minus 'things'" would render as two
|
|
208
|
+
* full redundant insertions (`<ins cp>add more here</ins><ins me>add
|
|
209
|
+
* more things here</ins>`) rather than the obvious single shared
|
|
210
|
+
* insertion with a me-only "things" word — confusing to read and a
|
|
211
|
+
* regression vs Word's track-changes UX.
|
|
193
212
|
*/
|
|
194
213
|
function emitBoundary(
|
|
195
214
|
b: number,
|
|
@@ -205,14 +224,37 @@ function emitBoundary(
|
|
|
205
224
|
const hasMe = !!meIns && meIns.length > 0
|
|
206
225
|
if (!hasCp && !hasMe) return
|
|
207
226
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
appendSegment(segments, { kind: '
|
|
227
|
+
// Only-one-side: emit verbatim with attribution.
|
|
228
|
+
if (!hasCp) {
|
|
229
|
+
appendSegment(segments, { kind: 'ins', author: 'me' }, meIns!)
|
|
230
|
+
return
|
|
231
|
+
}
|
|
232
|
+
if (!hasMe) {
|
|
233
|
+
appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns!)
|
|
234
|
+
return
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Both sides inserted at this boundary. Identical sequences are
|
|
238
|
+
// settled; differing sequences get LCS-aligned and split into
|
|
239
|
+
// shared (settled) + author-only sub-segments.
|
|
240
|
+
if (tokenArraysEqual(cpIns!, meIns!)) {
|
|
241
|
+
appendSegment(segments, { kind: 'equal' }, cpIns!)
|
|
211
242
|
return
|
|
212
243
|
}
|
|
213
244
|
|
|
214
|
-
|
|
215
|
-
|
|
245
|
+
const alignment = lcsAlign(cpIns! as string[], meIns! as string[])
|
|
246
|
+
for (const a of alignment) {
|
|
247
|
+
if (a.oldIdx !== null && a.newIdx !== null) {
|
|
248
|
+
// Token appears in both insertions → settled.
|
|
249
|
+
appendSegment(segments, { kind: 'equal' }, [cpIns![a.oldIdx]])
|
|
250
|
+
} else if (a.oldIdx !== null) {
|
|
251
|
+
// Token in cp's insertion only.
|
|
252
|
+
appendSegment(segments, { kind: 'ins', author: 'cp' }, [cpIns![a.oldIdx]])
|
|
253
|
+
} else if (a.newIdx !== null) {
|
|
254
|
+
// Token in me's insertion only.
|
|
255
|
+
appendSegment(segments, { kind: 'ins', author: 'me' }, [meIns![a.newIdx]])
|
|
256
|
+
}
|
|
257
|
+
}
|
|
216
258
|
}
|
|
217
259
|
|
|
218
260
|
function tokenArraysEqual(a: readonly string[], b: readonly string[]): boolean {
|
|
@@ -43,9 +43,13 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
43
43
|
|
|
44
44
|
it('CP and Me each change the same word differently', () => {
|
|
45
45
|
// Genesis: "Hello world". CP made "Hello cruel world", Me made "Hello brave world".
|
|
46
|
-
// Disagreement — show both authors' insertions.
|
|
46
|
+
// Disagreement — show both authors' insertions. The trailing
|
|
47
|
+
// space between the inserted word and "world" is shared by
|
|
48
|
+
// both insertion sequences, so the boundary-LCS recognises it
|
|
49
|
+
// as settled and emits the inserts as just the word, with the
|
|
50
|
+
// separating space outside the attribution wrappers.
|
|
47
51
|
expect(HtmlDiff.executeThreeWay('Hello world', 'Hello cruel world', 'Hello brave world')).toBe(
|
|
48
|
-
"Hello <ins class='diffins cp' data-author='cp'>cruel
|
|
52
|
+
"Hello <ins class='diffins cp' data-author='cp'>cruel</ins><ins class='diffins me' data-author='me'>brave</ins> world"
|
|
49
53
|
)
|
|
50
54
|
})
|
|
51
55
|
|
|
@@ -71,6 +75,25 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
71
75
|
)
|
|
72
76
|
})
|
|
73
77
|
|
|
78
|
+
it("CP accepted Me's addition with a word removed — emits the common bulk as settled + one me-only word", () => {
|
|
79
|
+
// Real flow on the live preview:
|
|
80
|
+
// - Me appends "And I add more things here" to a paragraph
|
|
81
|
+
// - CP "accepts" Me's addition but deletes the word "things"
|
|
82
|
+
// → CP's version of the appendix is "And I add more here"
|
|
83
|
+
// Genesis has neither addition. Both diffs (against genesis)
|
|
84
|
+
// are pure inserts with substantial overlap. Without the
|
|
85
|
+
// boundary LCS, the two insertions render as two full
|
|
86
|
+
// redundant spans:
|
|
87
|
+
// <ins cp>And I add more here</ins><ins me>And I add more things here</ins>
|
|
88
|
+
// — visually confusing because the reader sees "And I add
|
|
89
|
+
// more" twice. The intent is clearly that CP refined Me's
|
|
90
|
+
// addition by removing one word, so the diff should surface
|
|
91
|
+
// the shared bulk as settled with a me-only "things".
|
|
92
|
+
expect(
|
|
93
|
+
HtmlDiff.executeThreeWay('baseline.', 'baseline. And I add more here', 'baseline. And I add more things here')
|
|
94
|
+
).toBe("baseline. And I add more<ins class='diffins me' data-author='me'> things</ins> here")
|
|
95
|
+
})
|
|
96
|
+
|
|
74
97
|
it('Stable across no-change rounds — V5 produces same output as V3 when V5==V3', () => {
|
|
75
98
|
// The user's V3/V5 invariant: when neither party changes their position
|
|
76
99
|
// in a subsequent turn, the diff should look identical to the previous
|
|
@@ -260,6 +283,46 @@ describe('HtmlDiff.executeThreeWay (genesis-spine)', () => {
|
|
|
260
283
|
})
|
|
261
284
|
})
|
|
262
285
|
|
|
286
|
+
describe('orphan-match guard for structural tags', () => {
|
|
287
|
+
// Real regression from the live preview (Additional Condition
|
|
288
|
+
// Precedent in the 2002 ISDA Schedule): when CP deletes a section
|
|
289
|
+
// whose answer renders as an empty formatting shell —
|
|
290
|
+
// <p data-html="x"><em><strong></strong></em></p>
|
|
291
|
+
// — the `</strong>` and `</em>` matches sit between two content
|
|
292
|
+
// deletions ("Heading. " before, body after). At
|
|
293
|
+
// WORD_ALIGNED_OPTIONS.orphanMatchThreshold=0.25 those structural
|
|
294
|
+
// matches were rejected as orphans, swallowed into the deletion
|
|
295
|
+
// span, and the browser auto-closed the openers AT THE END of
|
|
296
|
+
// the deletion — visually rendering the entire deletion as
|
|
297
|
+
// bold-italic. The orphan filter now exempts tag-only matches
|
|
298
|
+
// so structural boundaries always survive.
|
|
299
|
+
|
|
300
|
+
it('CP deletes section with em+strong heading + plain body — closers stay between heading and body', () => {
|
|
301
|
+
const genesis =
|
|
302
|
+
'<p data-html="x"><em><strong>Additional Condition Precedent. </strong></em>For the purposes of Section 2(a)(iii).</p>'
|
|
303
|
+
const cp = '<p data-html="x"><em><strong></strong></em></p>'
|
|
304
|
+
const me = genesis
|
|
305
|
+
|
|
306
|
+
const out = HtmlDiff.executeThreeWay(genesis, cp, me, WORD_ALIGNED_OPTIONS)
|
|
307
|
+
|
|
308
|
+
// </strong> appears BEFORE the body deletion — meaning the
|
|
309
|
+
// body sits outside the bold-italic wrap, not inside it.
|
|
310
|
+
const closeStrongIdx = out.indexOf('</strong>')
|
|
311
|
+
const bodyDelIdx = out.indexOf('For the purposes')
|
|
312
|
+
expect(closeStrongIdx).toBeGreaterThan(0)
|
|
313
|
+
expect(bodyDelIdx).toBeGreaterThan(closeStrongIdx)
|
|
314
|
+
// No `<strong>…<del>body` substring exists — confirm by exact
|
|
315
|
+
// shape too. Heading wraps in strong+em, body is a plain del.
|
|
316
|
+
expect(out).toBe(
|
|
317
|
+
'<p data-html="x"><em><strong>' +
|
|
318
|
+
"<del class='diffdel cp' data-author='cp'>Additional Condition Precedent. </del>" +
|
|
319
|
+
'</strong></em>' +
|
|
320
|
+
"<del class='diffdel cp' data-author='cp'>For the purposes of Section 2(a)(iii).</del>" +
|
|
321
|
+
'</p>'
|
|
322
|
+
)
|
|
323
|
+
})
|
|
324
|
+
})
|
|
325
|
+
|
|
263
326
|
describe('first-turn fallback', () => {
|
|
264
327
|
it('cp == genesis means CP made no changes — Me-only attribution', () => {
|
|
265
328
|
// Common case: this is the first turn where the counterparty hasn't
|