@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -19
- package/dist/HtmlDiff.cjs +418 -420
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +30 -1
- package/dist/HtmlDiff.d.mts +30 -1
- package/dist/HtmlDiff.mjs +418 -420
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/HtmlDiff.ts +106 -50
- package/src/ThreeWayDiff.ts +173 -127
- package/src/ThreeWayTable.ts +408 -484
- package/test/HtmlDiff.spec.ts +15 -0
- package/test/HtmlDiff.threeWay.spec.ts +117 -108
- package/test/HtmlDiff.threeWay.tables.spec.ts +88 -194
package/package.json
CHANGED
package/src/HtmlDiff.ts
CHANGED
|
@@ -162,7 +162,22 @@ export default class HtmlDiff {
|
|
|
162
162
|
// constructor overload that would re-leak the parameter we just hid.
|
|
163
163
|
private tablePreprocessDepth = 0
|
|
164
164
|
|
|
165
|
-
|
|
165
|
+
/**
|
|
166
|
+
* Tracks currently-open formatting-tag wraps. Each entry pairs the
|
|
167
|
+
* opening tag (so a later closing tag can find its match) with the
|
|
168
|
+
* styling info needed to RE-OPEN the wrap if an overlapping
|
|
169
|
+
* formatting-tag close forces it to split. Without the styling info,
|
|
170
|
+
* an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
|
|
171
|
+
* unclosable wrap (the closing tag for the outer wrap arrives while
|
|
172
|
+
* an inner wrap is still on the stack); see `insertTag`'s closing
|
|
173
|
+
* handler for the split logic.
|
|
174
|
+
*/
|
|
175
|
+
private specialTagDiffStack: Array<{
|
|
176
|
+
tag: string
|
|
177
|
+
styledTagNames: string
|
|
178
|
+
cssClass: string
|
|
179
|
+
metadata: WrapMetadata | undefined
|
|
180
|
+
}> = []
|
|
166
181
|
private newWords: string[] = []
|
|
167
182
|
private oldWords: string[] = []
|
|
168
183
|
/**
|
|
@@ -336,41 +351,61 @@ export default class HtmlDiff {
|
|
|
336
351
|
* pairs would project on their own. Pass an explicit boolean to
|
|
337
352
|
* override.
|
|
338
353
|
*/
|
|
339
|
-
|
|
340
|
-
|
|
354
|
+
/**
|
|
355
|
+
* Three-way HTML diff against a shared genesis. Produces attributed
|
|
356
|
+
* HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
|
|
357
|
+
* from Me's accumulated changes (genesis → meCurrent). Use this for
|
|
358
|
+
* blackline UX where the negotiation has gone through multiple turns
|
|
359
|
+
* and the reader wants to see "who proposed what" across the whole
|
|
360
|
+
* history, not just the most recent round.
|
|
361
|
+
*
|
|
362
|
+
* When both parties happen to have made the same change (e.g. CP
|
|
363
|
+
* proposed a wording change in turn N, Me adopted it in turn N+1),
|
|
364
|
+
* the change reads as "settled" and is emitted unmarked — only
|
|
365
|
+
* disagreements and pending proposals carry author attribution.
|
|
366
|
+
*
|
|
367
|
+
* @param genesis the shared common ancestor (per-user — the FE
|
|
368
|
+
* picks between V1.0 and /preview/initialAnswers
|
|
369
|
+
* based on `prefillReceiverAnswers`)
|
|
370
|
+
* @param cpLatest the counterparty's current published version
|
|
371
|
+
* @param meCurrent Me's current draft (the document on screen)
|
|
372
|
+
*/
|
|
373
|
+
static executeThreeWay(genesis: string, cpLatest: string, meCurrent: string, options: ThreeWayOptions = {}): string {
|
|
374
|
+
return HtmlDiff.executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, 0)
|
|
341
375
|
}
|
|
342
376
|
|
|
343
377
|
private static executeThreeWayWithDepth(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
378
|
+
genesis: string,
|
|
379
|
+
cpLatest: string,
|
|
380
|
+
meCurrent: string,
|
|
347
381
|
options: ThreeWayOptions,
|
|
348
382
|
depth: number
|
|
349
383
|
): string {
|
|
350
|
-
// Table preprocessing first — replaces each
|
|
384
|
+
// Table preprocessing first — replaces each genesis/cp/me table with a
|
|
351
385
|
// shared-nonce placeholder, then the word-level merge runs over the
|
|
352
386
|
// table-free inputs. Cells are diffed recursively via executeThreeWay
|
|
353
|
-
// so the cell content is itself three-way attributed.
|
|
354
|
-
// happens at the end.
|
|
387
|
+
// so the cell content is itself three-way attributed.
|
|
355
388
|
//
|
|
356
|
-
// Depth-cap the recursion
|
|
357
|
-
//
|
|
358
|
-
// Beyond the cap, skip table preprocessing entirely and let the
|
|
359
|
-
// word-level merge handle the raw HTML — same bail-out semantics as
|
|
360
|
-
// the 2-way `MaxTablePreprocessDepth` cap.
|
|
389
|
+
// Depth-cap the recursion so adversarially-nested input can't blow
|
|
390
|
+
// stack/memory.
|
|
361
391
|
const tablePreprocess =
|
|
362
392
|
depth < HtmlDiff.MaxThreeWayDepth
|
|
363
|
-
? preprocessTablesThreeWay(
|
|
364
|
-
HtmlDiff.executeThreeWayWithDepth(
|
|
393
|
+
? preprocessTablesThreeWay(genesis, cpLatest, meCurrent, (g, c, m) =>
|
|
394
|
+
HtmlDiff.executeThreeWayWithDepth(g, c, m, options, depth + 1)
|
|
365
395
|
)
|
|
366
396
|
: null
|
|
367
|
-
const
|
|
368
|
-
const
|
|
369
|
-
const
|
|
370
|
-
|
|
397
|
+
const inGenesis = tablePreprocess?.modifiedGenesis ?? genesis
|
|
398
|
+
const inCp = tablePreprocess?.modifiedCp ?? cpLatest
|
|
399
|
+
const inMe = tablePreprocess?.modifiedMe ?? meCurrent
|
|
400
|
+
|
|
401
|
+
// Symmetric projection across both analyses. The genesis-spine
|
|
402
|
+
// algorithm requires `genesis` to tokenise identically on each
|
|
403
|
+
// pair-wise analysis (both have genesis as the OLD side), so the
|
|
404
|
+
// useProjections decision must agree across both calls.
|
|
371
405
|
const useProjections =
|
|
372
406
|
options.useProjections ??
|
|
373
|
-
(HtmlDiff.evaluateProjectionApplicability(
|
|
407
|
+
(HtmlDiff.evaluateProjectionApplicability(inGenesis, inCp) &&
|
|
408
|
+
HtmlDiff.evaluateProjectionApplicability(inGenesis, inMe))
|
|
374
409
|
|
|
375
410
|
const analyzeOpts: AnalyzeOptions = {
|
|
376
411
|
useProjections,
|
|
@@ -379,21 +414,21 @@ export default class HtmlDiff {
|
|
|
379
414
|
orphanMatchThreshold: options.orphanMatchThreshold,
|
|
380
415
|
ignoreWhitespaceDifferences: options.ignoreWhitespaceDifferences,
|
|
381
416
|
}
|
|
382
|
-
const
|
|
383
|
-
const
|
|
417
|
+
const dCp = HtmlDiff.analyze(inGenesis, inCp, analyzeOpts)
|
|
418
|
+
const dMe = HtmlDiff.analyze(inGenesis, inMe, analyzeOpts)
|
|
384
419
|
|
|
385
|
-
// Spine sanity check
|
|
386
|
-
//
|
|
387
|
-
// loudly rather than silently
|
|
388
|
-
if (
|
|
420
|
+
// Spine sanity check — both analyses must share an identical genesis
|
|
421
|
+
// tokenisation. Symmetric useProjections guarantees this; if it ever
|
|
422
|
+
// diverges, fail loudly rather than silently misattribute.
|
|
423
|
+
if (dCp.oldDiffWords.length !== dMe.oldDiffWords.length) {
|
|
389
424
|
throw new Error(
|
|
390
|
-
'HtmlDiff.executeThreeWay:
|
|
391
|
-
`(${
|
|
425
|
+
'HtmlDiff.executeThreeWay: genesis tokenisation diverged across pair-wise analyses ' +
|
|
426
|
+
`(${dCp.oldDiffWords.length} vs ${dMe.oldDiffWords.length}). ` +
|
|
392
427
|
'This indicates the symmetric-projection coordination has a bug.'
|
|
393
428
|
)
|
|
394
429
|
}
|
|
395
430
|
|
|
396
|
-
const segments = buildSegments(
|
|
431
|
+
const segments = buildSegments(dCp, dMe)
|
|
397
432
|
const merged = HtmlDiff.emitSegments(segments)
|
|
398
433
|
return tablePreprocess ? restoreTablePlaceholders(merged, tablePreprocess.placeholderToDiff) : merged
|
|
399
434
|
}
|
|
@@ -807,8 +842,13 @@ export default class HtmlDiff {
|
|
|
807
842
|
// if there are nonTags, the index of the last tag is the index before the first nonTag.
|
|
808
843
|
const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1
|
|
809
844
|
|
|
810
|
-
|
|
811
|
-
|
|
845
|
+
// Pre-injection sits BEFORE the extracted tag-block content (used
|
|
846
|
+
// by closing tags so `</ins></strong>` reads left-to-right).
|
|
847
|
+
// Post-injection sits AFTER (used by opening tags so the rendered
|
|
848
|
+
// order is `<strong><ins ...>` and by the overlap-split case so
|
|
849
|
+
// the re-opened `<ins>`s sit AFTER the actual closing tag).
|
|
850
|
+
let preInject = ''
|
|
851
|
+
let postInject = ''
|
|
812
852
|
|
|
813
853
|
// handle opening tag
|
|
814
854
|
if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
|
|
@@ -820,10 +860,11 @@ export default class HtmlDiff {
|
|
|
820
860
|
}
|
|
821
861
|
const styledTagNames = Array.from(tagNames).join(' ')
|
|
822
862
|
|
|
823
|
-
this.specialTagDiffStack.push(words[0])
|
|
824
863
|
// Carry the caller's metadata into the formatting-tag wrapper so
|
|
825
864
|
// a 3-way author tag survives a `<strong>`/`<em>` content edit.
|
|
826
|
-
|
|
865
|
+
const styledCssClass = `mod ${styledTagNames}`
|
|
866
|
+
this.specialTagDiffStack.push({ tag: words[0], styledTagNames, cssClass: styledCssClass, metadata })
|
|
867
|
+
postInject = `<ins${Utils.composeTagAttributes(styledCssClass, metadata ?? {})}>`
|
|
827
868
|
if (tag === HtmlDiff.DelTag) {
|
|
828
869
|
words.shift()
|
|
829
870
|
|
|
@@ -835,7 +876,6 @@ export default class HtmlDiff {
|
|
|
835
876
|
}
|
|
836
877
|
// handle closing tag
|
|
837
878
|
else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
|
|
838
|
-
const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop()
|
|
839
879
|
// For delete operations: when the tag block contains a mix of formatting and
|
|
840
880
|
// non-formatting closing tags (e.g. </strong></div>), compare against the first
|
|
841
881
|
// closing tag (the formatting one) rather than the last tag in the block.
|
|
@@ -850,19 +890,39 @@ export default class HtmlDiff {
|
|
|
850
890
|
tagIndexToCompare = 0
|
|
851
891
|
}
|
|
852
892
|
}
|
|
853
|
-
const openingAndClosingTagsMatch =
|
|
854
|
-
!!openingTag && Utils.getTagName(openingTag) === Utils.getTagName(words[tagIndexToCompare])
|
|
855
893
|
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
894
|
+
// Search the stack for a matching opener (LIFO). When the match
|
|
895
|
+
// is the top entry, this is the normal balanced case and we
|
|
896
|
+
// emit a single `</ins>` before the closing tag. When the match
|
|
897
|
+
// is below an unmatched opener — i.e. another formatting wrap
|
|
898
|
+
// opened after it but hasn't been closed yet — the wraps
|
|
899
|
+
// overlap in source order, which has no valid LIFO HTML
|
|
900
|
+
// expression. Resolve by SPLITTING the wraps: close everything
|
|
901
|
+
// above the match (their `<ins>`s and the match's `<ins>`), then
|
|
902
|
+
// re-open the above wraps with fresh `<ins>` tags AFTER the
|
|
903
|
+
// closing tag emits. The above wraps continue to apply until
|
|
904
|
+
// their own closing tag arrives.
|
|
905
|
+
const closingTagName = Utils.getTagName(words[tagIndexToCompare])
|
|
906
|
+
let matchIdx = -1
|
|
907
|
+
for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) {
|
|
908
|
+
if (Utils.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
|
|
909
|
+
matchIdx = i
|
|
910
|
+
break
|
|
911
|
+
}
|
|
859
912
|
}
|
|
860
913
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
914
|
+
if (matchIdx >= 0) {
|
|
915
|
+
const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1)
|
|
916
|
+
this.specialTagDiffStack.pop() // pop the matched entry
|
|
917
|
+
// One `</ins>` per above entry, then one for the match itself.
|
|
918
|
+
preInject = '</ins>'.repeat(aboveEntries.length + 1)
|
|
919
|
+
for (const entry of aboveEntries) {
|
|
920
|
+
postInject += `<ins${Utils.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`
|
|
921
|
+
this.specialTagDiffStack.push(entry) // their wrap continues via the new <ins>
|
|
922
|
+
}
|
|
865
923
|
}
|
|
924
|
+
// No match in stack — orphan closing tag, drop the `<ins>` work
|
|
925
|
+
// and just let the tag itself flow through extractConsecutiveWords.
|
|
866
926
|
|
|
867
927
|
if (tag === HtmlDiff.DelTag) {
|
|
868
928
|
words.shift()
|
|
@@ -873,7 +933,7 @@ export default class HtmlDiff {
|
|
|
873
933
|
}
|
|
874
934
|
}
|
|
875
935
|
|
|
876
|
-
if (words.length === 0 &&
|
|
936
|
+
if (words.length === 0 && preInject.length === 0 && postInject.length === 0) {
|
|
877
937
|
break
|
|
878
938
|
}
|
|
879
939
|
|
|
@@ -889,11 +949,7 @@ export default class HtmlDiff {
|
|
|
889
949
|
!HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase())
|
|
890
950
|
: Utils.isTag
|
|
891
951
|
|
|
892
|
-
|
|
893
|
-
this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(''))
|
|
894
|
-
} else {
|
|
895
|
-
this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join('') + specialCaseTagInjection)
|
|
896
|
-
}
|
|
952
|
+
this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join('') + postInject)
|
|
897
953
|
|
|
898
954
|
if (words.length === 0) continue
|
|
899
955
|
|
package/src/ThreeWayDiff.ts
CHANGED
|
@@ -4,92 +4,134 @@ import type Operation from './Operation'
|
|
|
4
4
|
import type { WrapMetadata } from './Utils'
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
* Composes diff(
|
|
8
|
-
*
|
|
9
|
-
*
|
|
7
|
+
* Composes diff(genesis → cp-latest) (CP's accumulated changes from the
|
|
8
|
+
* common ancestor) and diff(genesis → me-current) (Me's accumulated
|
|
9
|
+
* changes from the common ancestor) into a single attributed segment
|
|
10
|
+
* stream. The output is consumed by `HtmlDiff.executeThreeWay` for
|
|
11
|
+
* emission.
|
|
10
12
|
*
|
|
11
|
-
*
|
|
12
|
-
* identically (`HtmlDiff.executeThreeWay` enforces
|
|
13
|
-
* symmetric-projection decision), so
|
|
14
|
-
* the two streams
|
|
15
|
-
*
|
|
16
|
-
*
|
|
13
|
+
* Genesis is the structural spine. Both pair-wise analyses must
|
|
14
|
+
* tokenise genesis identically (`HtmlDiff.executeThreeWay` enforces
|
|
15
|
+
* this via the symmetric-projection decision), so genesis-diff indices
|
|
16
|
+
* are stable across the two streams.
|
|
17
|
+
*
|
|
18
|
+
* Per genesis token: classify by what each side did to it
|
|
19
|
+
* (kept / deleted) and emit accordingly. Per genesis boundary: collect
|
|
20
|
+
* each side's insertions and check for agreement — when both sides
|
|
21
|
+
* inserted identical content, the insertion is treated as "settled"
|
|
22
|
+
* and emitted unmarked (the reader sees the agreed-on text without
|
|
23
|
+
* authorship markup, matching Word-style track-changes conventions
|
|
24
|
+
* where both authors agreeing is silent).
|
|
25
|
+
*
|
|
26
|
+
* The emission order at a boundary mirrors the 2-way del-then-ins
|
|
27
|
+
* convention: a Replace (genesis token deleted + a paired insertion)
|
|
28
|
+
* reads as `<del>old</del><ins>new</ins>`. Pure insertions are
|
|
29
|
+
* positioned at their natural boundary.
|
|
17
30
|
*/
|
|
18
31
|
|
|
19
32
|
export type Author = 'cp' | 'me'
|
|
20
33
|
|
|
21
34
|
/**
|
|
22
|
-
* Attribution assigned to each output segment.
|
|
23
|
-
*
|
|
24
|
-
*
|
|
35
|
+
* Attribution assigned to each output segment.
|
|
36
|
+
*
|
|
37
|
+
* `equal` covers three cases: tokens both authors kept (rendered as the
|
|
38
|
+
* genesis word), insertion spans both authors made identically (rendered
|
|
39
|
+
* plain), and structural tags around both-deleted tokens (rendered to
|
|
40
|
+
* keep layout intact while the content token itself is dropped).
|
|
41
|
+
* Equal segments carry no markup.
|
|
25
42
|
*/
|
|
26
|
-
export type Attribution =
|
|
27
|
-
| { kind: 'equal' }
|
|
28
|
-
| { kind: 'ins'; author: Author }
|
|
29
|
-
| { kind: 'del'; author: Author }
|
|
30
|
-
// Me deleting tokens that CP inserted = rejecting CP's proposal.
|
|
31
|
-
| { kind: 'reject'; by: 'me'; rejected: 'cp' }
|
|
43
|
+
export type Attribution = { kind: 'equal' } | { kind: 'ins'; author: Author } | { kind: 'del'; author: Author }
|
|
32
44
|
|
|
33
45
|
export interface Segment {
|
|
34
46
|
attr: Attribution
|
|
35
|
-
/** Tokens to emit. For Equal segments these are original
|
|
47
|
+
/** Tokens to emit. For Equal segments these are original genesis words
|
|
36
48
|
* (including structural tags); for ins/del they are diff-space tokens. */
|
|
37
49
|
words: string[]
|
|
38
50
|
}
|
|
39
51
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
52
|
+
/**
|
|
53
|
+
* Builds the attributed segment stream for a three-way diff.
|
|
54
|
+
*
|
|
55
|
+
* @param dCp analysis of diff(genesis → cp-latest)
|
|
56
|
+
* @param dMe analysis of diff(genesis → me-current)
|
|
57
|
+
*
|
|
58
|
+
* Both analyses must share the same `oldDiffWords` (the genesis tokens)
|
|
59
|
+
* — the caller guarantees this by passing the same genesis input and
|
|
60
|
+
* the same `useProjections` decision to both `HtmlDiff.analyze` calls.
|
|
61
|
+
*/
|
|
62
|
+
export function buildSegments(dCp: AnalyzeResult, dMe: AnalyzeResult): Segment[] {
|
|
63
|
+
const genesisLen = dCp.oldDiffWords.length
|
|
64
|
+
|
|
65
|
+
// Per genesis token: did each author keep it or delete it?
|
|
66
|
+
const cpFate = buildFateFromGenesis(dCp.operations, genesisLen)
|
|
67
|
+
const meFate = buildFateFromGenesis(dMe.operations, genesisLen)
|
|
68
|
+
|
|
69
|
+
// Per boundary: tokens each author inserted at that boundary. Keyed by
|
|
70
|
+
// `endInOld` so a Replace's insertion sits AFTER the deleted genesis
|
|
71
|
+
// token (visual del-then-ins). Pure Insert ops have endInOld ==
|
|
72
|
+
// startInOld so they land at their natural between-tokens boundary.
|
|
73
|
+
const cpInsAt = collectInsertionsKeyedByEnd(dCp)
|
|
74
|
+
const meInsAt = collectInsertionsKeyedByEnd(dMe)
|
|
46
75
|
|
|
47
|
-
// Inverse map
|
|
48
|
-
|
|
49
|
-
|
|
76
|
+
// Inverse map genesis-diff-index → genesis-original-index. Identity when
|
|
77
|
+
// no projection. Used to slice the original genesis words for Equal
|
|
78
|
+
// segments so structural tags pass through verbatim.
|
|
79
|
+
const diffToOriginal: readonly number[] = dCp.oldContentToOriginal ?? Array.from({ length: genesisLen }, (_, i) => i)
|
|
80
|
+
const genesisOriginalLen = dCp.oldOriginalWords.length
|
|
50
81
|
|
|
51
82
|
const segments: Segment[] = []
|
|
52
83
|
let originalCursor = 0
|
|
53
84
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
// they conceptually "preceded" V2[i] in V1's stream.
|
|
57
|
-
const cpDel = cpDeletionsAt.get(i)
|
|
58
|
-
if (cpDel?.length) appendSegment(segments, { kind: 'del', author: 'cp' }, cpDel)
|
|
85
|
+
// Boundary 0 — pure insertions BEFORE genesis[0].
|
|
86
|
+
emitBoundary(0, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments)
|
|
59
87
|
|
|
60
|
-
|
|
88
|
+
for (let i = 0; i < genesisLen; i++) {
|
|
89
|
+
const cpDel = cpFate[i] === 'deleted'
|
|
90
|
+
const meDel = meFate[i] === 'deleted'
|
|
91
|
+
|
|
92
|
+
// Pick up structural tags from cursor through to this genesis token's
|
|
93
|
+
// original index. Same cursor-based slicing as the 2-way path so a
|
|
94
|
+
// `<p>` opening tag preceding a content token gets attributed with
|
|
95
|
+
// that token's segment.
|
|
61
96
|
const origIdx = diffToOriginal[i]
|
|
62
|
-
const slice =
|
|
97
|
+
const slice = dCp.oldOriginalWords.slice(originalCursor, origIdx + 1)
|
|
63
98
|
originalCursor = origIdx + 1
|
|
64
99
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
100
|
+
if (!cpDel && !meDel) {
|
|
101
|
+
// Kept by both — equal. Emit the original-word slice (includes
|
|
102
|
+
// any leading structural tags).
|
|
103
|
+
appendSegment(segments, { kind: 'equal' }, slice)
|
|
104
|
+
} else if (cpDel && meDel) {
|
|
105
|
+
// Both deleted — settled. Filter at emission time; pass the
|
|
106
|
+
// structural-tag-bearing slice through as equal so layout
|
|
107
|
+
// survives. The content token itself is the LAST element of the
|
|
108
|
+
// slice (since slice ends at origIdx+1); drop only that.
|
|
109
|
+
// If slice has multiple elements (leading structural tags), they
|
|
110
|
+
// belong to the surrounding flow and should remain.
|
|
111
|
+
if (slice.length > 1) {
|
|
112
|
+
appendSegment(segments, { kind: 'equal' }, slice.slice(0, slice.length - 1))
|
|
113
|
+
}
|
|
114
|
+
// The content token itself is silenced.
|
|
115
|
+
} else if (cpDel) {
|
|
116
|
+
// CP deleted, Me kept → render as <del cp>. Me's keeping means the
|
|
117
|
+
// token is still in V_me; the markup tells the reader "CP wanted
|
|
118
|
+
// this gone, you've kept it."
|
|
119
|
+
appendSegment(segments, { kind: 'del', author: 'cp' }, slice)
|
|
120
|
+
} else {
|
|
121
|
+
// Me deleted, CP kept → render as <del me>.
|
|
122
|
+
appendSegment(segments, { kind: 'del', author: 'me' }, slice)
|
|
78
123
|
}
|
|
124
|
+
|
|
125
|
+
// Boundary i+1 — pure insertions between genesis[i] and genesis[i+1],
|
|
126
|
+
// AND replace-insertions paired with genesis[i] (which we just
|
|
127
|
+
// emitted as a deletion).
|
|
128
|
+
emitBoundary(i + 1, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments)
|
|
79
129
|
}
|
|
80
|
-
|
|
81
|
-
//
|
|
82
|
-
//
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const tailMeIns = meInsertionsAt.get(v2DiffLen)
|
|
86
|
-
if (tailMeIns?.length) appendSegment(segments, { kind: 'ins', author: 'me' }, tailMeIns)
|
|
87
|
-
|
|
88
|
-
// Trailing V2-original tokens (structural closing tags after the last
|
|
89
|
-
// content word). Emit as equal — there's no following segment to claim
|
|
90
|
-
// them, and attributing them to either author would be arbitrary.
|
|
91
|
-
if (originalCursor < v2OriginalLen) {
|
|
92
|
-
appendSegment(segments, { kind: 'equal' }, d1.newOriginalWords.slice(originalCursor))
|
|
130
|
+
|
|
131
|
+
// Trailing original tokens (structural closing tags after the last
|
|
132
|
+
// content word).
|
|
133
|
+
if (originalCursor < genesisOriginalLen) {
|
|
134
|
+
appendSegment(segments, { kind: 'equal' }, dCp.oldOriginalWords.slice(originalCursor))
|
|
93
135
|
}
|
|
94
136
|
|
|
95
137
|
return segments
|
|
@@ -97,80 +139,89 @@ export function buildSegments(d1: AnalyzeResult, d2: AnalyzeResult): Segment[] {
|
|
|
97
139
|
|
|
98
140
|
// ────────────────────────────────────────────────────────────────────────────
|
|
99
141
|
|
|
100
|
-
type
|
|
101
|
-
type V2Fate = 'preserved-to-v3' | 'deleted-by-me' | 'replaced-out-by-me'
|
|
102
|
-
|
|
103
|
-
function buildOriginMap(ops: readonly Operation[], v2Len: number): V2Origin[] {
|
|
104
|
-
const out: V2Origin[] = new Array(v2Len).fill('preserved-from-v1')
|
|
105
|
-
for (const op of ops) {
|
|
106
|
-
const origin =
|
|
107
|
-
op.action === Action.Insert ? 'inserted-by-cp' : op.action === Action.Replace ? 'replaced-into-by-cp' : null
|
|
108
|
-
if (origin === null) continue
|
|
109
|
-
for (let i = op.startInNew; i < op.endInNew; i++) {
|
|
110
|
-
if (i >= 0 && i < v2Len) out[i] = origin
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
return out
|
|
114
|
-
}
|
|
142
|
+
type GenesisFate = 'kept' | 'deleted'
|
|
115
143
|
|
|
116
|
-
|
|
117
|
-
|
|
144
|
+
/**
|
|
145
|
+
* Per genesis-diff-index, what did this side do to that token? Both
|
|
146
|
+
* Delete and Replace ops remove the token from the side's output, so
|
|
147
|
+
* both contribute `'deleted'`. Equal ops contribute `'kept'`. Insert
|
|
148
|
+
* ops have an empty old range, so they don't touch the genesis fate
|
|
149
|
+
* map.
|
|
150
|
+
*/
|
|
151
|
+
function buildFateFromGenesis(ops: readonly Operation[], genesisLen: number): GenesisFate[] {
|
|
152
|
+
const out: GenesisFate[] = new Array(genesisLen).fill('kept')
|
|
118
153
|
for (const op of ops) {
|
|
119
|
-
|
|
120
|
-
op.action === Action.Delete ? 'deleted-by-me' : op.action === Action.Replace ? 'replaced-out-by-me' : null
|
|
121
|
-
if (fate === null) continue
|
|
154
|
+
if (op.action !== Action.Delete && op.action !== Action.Replace) continue
|
|
122
155
|
for (let i = op.startInOld; i < op.endInOld; i++) {
|
|
123
|
-
if (i >= 0 && i <
|
|
156
|
+
if (i >= 0 && i < genesisLen) out[i] = 'deleted'
|
|
124
157
|
}
|
|
125
158
|
}
|
|
126
159
|
return out
|
|
127
160
|
}
|
|
128
161
|
|
|
129
|
-
function isDeletion(attr: Attribution): boolean {
|
|
130
|
-
return attr.kind === 'del' || attr.kind === 'reject'
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
function combine(origin: V2Origin, fate: V2Fate): Attribution {
|
|
134
|
-
const cpInserted = origin === 'inserted-by-cp' || origin === 'replaced-into-by-cp'
|
|
135
|
-
const meDeleted = fate === 'deleted-by-me' || fate === 'replaced-out-by-me'
|
|
136
|
-
if (!cpInserted && !meDeleted) return { kind: 'equal' }
|
|
137
|
-
if (cpInserted && !meDeleted) return { kind: 'ins', author: 'cp' }
|
|
138
|
-
if (!cpInserted && meDeleted) return { kind: 'del', author: 'me' }
|
|
139
|
-
return { kind: 'reject', by: 'me', rejected: 'cp' }
|
|
140
|
-
}
|
|
141
|
-
|
|
142
162
|
/**
|
|
143
|
-
*
|
|
144
|
-
*
|
|
145
|
-
*
|
|
163
|
+
* Per genesis boundary `b`, collect tokens this side inserted at that
|
|
164
|
+
* boundary. Keyed by `endInOld` so a Replace at genesis[k..k+1] has its
|
|
165
|
+
* insertion at boundary k+1 (after the deleted token) rather than k
|
|
166
|
+
* (before) — that produces the del-then-ins visual order.
|
|
167
|
+
*
|
|
168
|
+
* For pure Insert ops the old range is empty (endInOld == startInOld),
|
|
169
|
+
* so the key is the same as the semantic between-tokens position.
|
|
146
170
|
*/
|
|
147
|
-
function
|
|
171
|
+
function collectInsertionsKeyedByEnd(d: AnalyzeResult): Map<number, string[]> {
|
|
148
172
|
const out = new Map<number, string[]>()
|
|
149
173
|
for (const op of d.operations) {
|
|
150
|
-
if (op.action !== Action.
|
|
151
|
-
const words = d.
|
|
174
|
+
if (op.action !== Action.Insert && op.action !== Action.Replace) continue
|
|
175
|
+
const words = d.newDiffWords.slice(op.startInNew, op.endInNew)
|
|
152
176
|
if (words.length === 0) continue
|
|
153
|
-
const
|
|
177
|
+
const key = op.endInOld
|
|
178
|
+
const existing = out.get(key) ?? []
|
|
154
179
|
existing.push(...words)
|
|
155
|
-
out.set(
|
|
180
|
+
out.set(key, existing)
|
|
156
181
|
}
|
|
157
182
|
return out
|
|
158
183
|
}
|
|
159
184
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
185
|
+
/**
|
|
186
|
+
* Emit any insertions at boundary `b`. When both authors inserted at
|
|
187
|
+
* the same boundary AND the inserted token sequences are textually
|
|
188
|
+
* identical, the insertion is treated as agreed and emitted unmarked.
|
|
189
|
+
* Otherwise each side's insertion is emitted with author attribution.
|
|
190
|
+
*
|
|
191
|
+
* The CP-then-Me ordering for disagreement is arbitrary but consistent;
|
|
192
|
+
* callers don't depend on it.
|
|
193
|
+
*/
|
|
194
|
+
function emitBoundary(
|
|
195
|
+
b: number,
|
|
196
|
+
cpInsAt: Map<number, string[]>,
|
|
197
|
+
meInsAt: Map<number, string[]>,
|
|
198
|
+
_cpDiffWords: readonly string[],
|
|
199
|
+
_meDiffWords: readonly string[],
|
|
200
|
+
segments: Segment[]
|
|
201
|
+
) {
|
|
202
|
+
const cpIns = cpInsAt.get(b)
|
|
203
|
+
const meIns = meInsAt.get(b)
|
|
204
|
+
const hasCp = !!cpIns && cpIns.length > 0
|
|
205
|
+
const hasMe = !!meIns && meIns.length > 0
|
|
206
|
+
if (!hasCp && !hasMe) return
|
|
207
|
+
|
|
208
|
+
if (hasCp && hasMe && tokenArraysEqual(cpIns, meIns)) {
|
|
209
|
+
// Both authors inserted the same content — settled. Emit unmarked.
|
|
210
|
+
appendSegment(segments, { kind: 'equal' }, cpIns)
|
|
211
|
+
return
|
|
169
212
|
}
|
|
170
|
-
|
|
213
|
+
|
|
214
|
+
if (hasCp) appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns)
|
|
215
|
+
if (hasMe) appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
|
|
171
216
|
}
|
|
172
217
|
|
|
173
|
-
function
|
|
218
|
+
function tokenArraysEqual(a: readonly string[], b: readonly string[]): boolean {
|
|
219
|
+
if (a.length !== b.length) return false
|
|
220
|
+
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false
|
|
221
|
+
return true
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function appendSegment(segments: Segment[], attr: Attribution, words: readonly string[]) {
|
|
174
225
|
if (words.length === 0) return
|
|
175
226
|
const last = segments[segments.length - 1]
|
|
176
227
|
if (last && sameAttribution(last.attr, attr)) {
|
|
@@ -184,7 +235,6 @@ function sameAttribution(a: Attribution, b: Attribution): boolean {
|
|
|
184
235
|
if (a.kind === 'equal' && b.kind === 'equal') return true
|
|
185
236
|
if (a.kind === 'ins' && b.kind === 'ins') return a.author === b.author
|
|
186
237
|
if (a.kind === 'del' && b.kind === 'del') return a.author === b.author
|
|
187
|
-
if (a.kind === 'reject' && b.kind === 'reject') return true
|
|
188
238
|
return false
|
|
189
239
|
}
|
|
190
240
|
|
|
@@ -195,29 +245,25 @@ function sameAttribution(a: Attribution, b: Attribution): boolean {
|
|
|
195
245
|
* pre-wrap) stay consistent. A change here propagates to every author
|
|
196
246
|
* marker in the output.
|
|
197
247
|
*/
|
|
198
|
-
export function authorAttribution(author: Author
|
|
199
|
-
|
|
200
|
-
if (rejects !== undefined) dataAttrs.rejects = rejects
|
|
201
|
-
const extraClasses = rejects !== undefined ? `${author} rejects-${rejects}` : author
|
|
202
|
-
return { extraClasses, dataAttrs }
|
|
248
|
+
export function authorAttribution(author: Author): WrapMetadata {
|
|
249
|
+
return { extraClasses: author, dataAttrs: { author } }
|
|
203
250
|
}
|
|
204
251
|
|
|
205
252
|
/**
|
|
206
253
|
* Resolve a segment's attribution into the wrapper-tag, base CSS class,
|
|
207
254
|
* and `WrapMetadata` consumed by `Utils.wrapText` / `insertTag`. The
|
|
208
255
|
* caller is `HtmlDiff.executeThreeWay`'s emission loop.
|
|
256
|
+
*
|
|
257
|
+
* `equal` segments don't go through this — they're emitted unmarked.
|
|
209
258
|
*/
|
|
210
259
|
export function segmentEmissionShape(attr: Exclude<Attribution, { kind: 'equal' }>): {
|
|
211
260
|
tag: 'ins' | 'del'
|
|
212
261
|
baseClass: 'diffins' | 'diffdel'
|
|
213
262
|
metadata: WrapMetadata
|
|
214
263
|
} {
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
return { tag: 'del', baseClass: 'diffdel', metadata: authorAttribution(attr.author) }
|
|
220
|
-
case 'reject':
|
|
221
|
-
return { tag: 'del', baseClass: 'diffdel', metadata: authorAttribution(attr.by, attr.rejected) }
|
|
264
|
+
return {
|
|
265
|
+
tag: attr.kind,
|
|
266
|
+
baseClass: attr.kind === 'ins' ? 'diffins' : 'diffdel',
|
|
267
|
+
metadata: authorAttribution(attr.author),
|
|
222
268
|
}
|
|
223
269
|
}
|