@createiq/htmldiff 1.2.0-beta.0 → 1.2.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@createiq/htmldiff",
3
- "version": "1.2.0-beta.0",
3
+ "version": "1.2.0-beta.2",
4
4
  "description": "TypeScript port of htmldiff.net",
5
5
  "type": "module",
6
6
  "author": "Mathew Mannion <mathew.mannion@linklaters.com>",
package/src/HtmlDiff.ts CHANGED
@@ -162,7 +162,22 @@ export default class HtmlDiff {
162
162
  // constructor overload that would re-leak the parameter we just hid.
163
163
  private tablePreprocessDepth = 0
164
164
 
165
- private specialTagDiffStack: string[] = []
165
+ /**
166
+ * Tracks currently-open formatting-tag wraps. Each entry pairs the
167
+ * opening tag (so a later closing tag can find its match) with the
168
+ * styling info needed to RE-OPEN the wrap if an overlapping
169
+ * formatting-tag close forces it to split. Without the styling info,
170
+ * an overlap like `<strong>X</strong>` ↔ `<u>X</u>` produces an
171
+ * unclosable wrap (the closing tag for the outer wrap arrives while
172
+ * an inner wrap is still on the stack); see `insertTag`'s closing
173
+ * handler for the split logic.
174
+ */
175
+ private specialTagDiffStack: Array<{
176
+ tag: string
177
+ styledTagNames: string
178
+ cssClass: string
179
+ metadata: WrapMetadata | undefined
180
+ }> = []
166
181
  private newWords: string[] = []
167
182
  private oldWords: string[] = []
168
183
  /**
@@ -336,41 +351,61 @@ export default class HtmlDiff {
336
351
  * pairs would project on their own. Pass an explicit boolean to
337
352
  * override.
338
353
  */
339
- static executeThreeWay(v1: string, v2: string, v3: string, options: ThreeWayOptions = {}): string {
340
- return HtmlDiff.executeThreeWayWithDepth(v1, v2, v3, options, 0)
354
+ /**
355
+ * Three-way HTML diff against a shared genesis. Produces attributed
356
+ * HTML that distinguishes CP's accumulated changes (genesis → cpLatest)
357
+ * from Me's accumulated changes (genesis → meCurrent). Use this for
358
+ * blackline UX where the negotiation has gone through multiple turns
359
+ * and the reader wants to see "who proposed what" across the whole
360
+ * history, not just the most recent round.
361
+ *
362
+ * When both parties happen to have made the same change (e.g. CP
363
+ * proposed a wording change in turn N, Me adopted it in turn N+1),
364
+ * the change reads as "settled" and is emitted unmarked — only
365
+ * disagreements and pending proposals carry author attribution.
366
+ *
367
+ * @param genesis the shared common ancestor (per-user — the FE
368
+ * picks between V1.0 and /preview/initialAnswers
369
+ * based on `prefillReceiverAnswers`)
370
+ * @param cpLatest the counterparty's current published version
371
+ * @param meCurrent Me's current draft (the document on screen)
372
+ */
373
+ static executeThreeWay(genesis: string, cpLatest: string, meCurrent: string, options: ThreeWayOptions = {}): string {
374
+ return HtmlDiff.executeThreeWayWithDepth(genesis, cpLatest, meCurrent, options, 0)
341
375
  }
342
376
 
343
377
  private static executeThreeWayWithDepth(
344
- v1: string,
345
- v2: string,
346
- v3: string,
378
+ genesis: string,
379
+ cpLatest: string,
380
+ meCurrent: string,
347
381
  options: ThreeWayOptions,
348
382
  depth: number
349
383
  ): string {
350
- // Table preprocessing first — replaces each V1/V2/V3 table with a
384
+ // Table preprocessing first — replaces each genesis/cp/me table with a
351
385
  // shared-nonce placeholder, then the word-level merge runs over the
352
386
  // table-free inputs. Cells are diffed recursively via executeThreeWay
353
- // so the cell content is itself three-way attributed. Restoration
354
- // happens at the end.
387
+ // so the cell content is itself three-way attributed.
355
388
  //
356
- // Depth-cap the recursion. Each level recurses cellDiff → executeThreeWay,
357
- // which would otherwise run unbounded on adversarially-nested input.
358
- // Beyond the cap, skip table preprocessing entirely and let the
359
- // word-level merge handle the raw HTML — same bail-out semantics as
360
- // the 2-way `MaxTablePreprocessDepth` cap.
389
+ // Depth-cap the recursion so adversarially-nested input can't blow
390
+ // stack/memory.
361
391
  const tablePreprocess =
362
392
  depth < HtmlDiff.MaxThreeWayDepth
363
- ? preprocessTablesThreeWay(v1, v2, v3, (c1, c2, c3) =>
364
- HtmlDiff.executeThreeWayWithDepth(c1, c2, c3, options, depth + 1)
393
+ ? preprocessTablesThreeWay(genesis, cpLatest, meCurrent, (g, c, m) =>
394
+ HtmlDiff.executeThreeWayWithDepth(g, c, m, options, depth + 1)
365
395
  )
366
396
  : null
367
- const inV1 = tablePreprocess?.modifiedV1 ?? v1
368
- const inV2 = tablePreprocess?.modifiedV2 ?? v2
369
- const inV3 = tablePreprocess?.modifiedV3 ?? v3
370
-
397
+ const inGenesis = tablePreprocess?.modifiedGenesis ?? genesis
398
+ const inCp = tablePreprocess?.modifiedCp ?? cpLatest
399
+ const inMe = tablePreprocess?.modifiedMe ?? meCurrent
400
+
401
+ // Symmetric projection across both analyses. The genesis-spine
402
+ // algorithm requires `genesis` to tokenise identically on each
403
+ // pair-wise analysis (both have genesis as the OLD side), so the
404
+ // useProjections decision must agree across both calls.
371
405
  const useProjections =
372
406
  options.useProjections ??
373
- (HtmlDiff.evaluateProjectionApplicability(inV1, inV2) && HtmlDiff.evaluateProjectionApplicability(inV2, inV3))
407
+ (HtmlDiff.evaluateProjectionApplicability(inGenesis, inCp) &&
408
+ HtmlDiff.evaluateProjectionApplicability(inGenesis, inMe))
374
409
 
375
410
  const analyzeOpts: AnalyzeOptions = {
376
411
  useProjections,
@@ -379,21 +414,21 @@ export default class HtmlDiff {
379
414
  orphanMatchThreshold: options.orphanMatchThreshold,
380
415
  ignoreWhitespaceDifferences: options.ignoreWhitespaceDifferences,
381
416
  }
382
- const d1 = HtmlDiff.analyze(inV1, inV2, analyzeOpts)
383
- const d2 = HtmlDiff.analyze(inV2, inV3, analyzeOpts)
417
+ const dCp = HtmlDiff.analyze(inGenesis, inCp, analyzeOpts)
418
+ const dMe = HtmlDiff.analyze(inGenesis, inMe, analyzeOpts)
384
419
 
385
- // Spine sanity check. Symmetric `useProjections` should guarantee
386
- // alignment, but if a bug ever lets these diverge we want to fail
387
- // loudly rather than silently produce a misattributed output.
388
- if (d1.newDiffWords.length !== d2.oldDiffWords.length) {
420
+ // Spine sanity check both analyses must share an identical genesis
421
+ // tokenisation. Symmetric useProjections guarantees this; if it ever
422
+ // diverges, fail loudly rather than silently misattribute.
423
+ if (dCp.oldDiffWords.length !== dMe.oldDiffWords.length) {
389
424
  throw new Error(
390
- 'HtmlDiff.executeThreeWay: V2 tokenisation diverged across pair-wise analyses ' +
391
- `(${d1.newDiffWords.length} vs ${d2.oldDiffWords.length}). ` +
425
+ 'HtmlDiff.executeThreeWay: genesis tokenisation diverged across pair-wise analyses ' +
426
+ `(${dCp.oldDiffWords.length} vs ${dMe.oldDiffWords.length}). ` +
392
427
  'This indicates the symmetric-projection coordination has a bug.'
393
428
  )
394
429
  }
395
430
 
396
- const segments = buildSegments(d1, d2)
431
+ const segments = buildSegments(dCp, dMe)
397
432
  const merged = HtmlDiff.emitSegments(segments)
398
433
  return tablePreprocess ? restoreTablePlaceholders(merged, tablePreprocess.placeholderToDiff) : merged
399
434
  }
@@ -807,8 +842,13 @@ export default class HtmlDiff {
807
842
  // if there are nonTags, the index of the last tag is the index before the first nonTag.
808
843
  const indexLastTagInFirstTagBlock = indexOfFirstNonTag === -1 ? words.length - 1 : indexOfFirstNonTag - 1
809
844
 
810
- let specialCaseTagInjection = ''
811
- let specialCaseTagInjectionIsBefore = false
845
+ // Pre-injection sits BEFORE the extracted tag-block content (used
846
+ // by closing tags so `</ins></strong>` reads left-to-right).
847
+ // Post-injection sits AFTER (used by opening tags so the rendered
848
+ // order is `<strong><ins ...>` and by the overlap-split case so
849
+ // the re-opened `<ins>`s sit AFTER the actual closing tag).
850
+ let preInject = ''
851
+ let postInject = ''
812
852
 
813
853
  // handle opening tag
814
854
  if (HtmlDiff.SpecialCaseOpeningTagRegex.test(words[0])) {
@@ -820,10 +860,11 @@ export default class HtmlDiff {
820
860
  }
821
861
  const styledTagNames = Array.from(tagNames).join(' ')
822
862
 
823
- this.specialTagDiffStack.push(words[0])
824
863
  // Carry the caller's metadata into the formatting-tag wrapper so
825
864
  // a 3-way author tag survives a `<strong>`/`<em>` content edit.
826
- specialCaseTagInjection = `<ins${Utils.composeTagAttributes(`mod ${styledTagNames}`, metadata ?? {})}>`
865
+ const styledCssClass = `mod ${styledTagNames}`
866
+ this.specialTagDiffStack.push({ tag: words[0], styledTagNames, cssClass: styledCssClass, metadata })
867
+ postInject = `<ins${Utils.composeTagAttributes(styledCssClass, metadata ?? {})}>`
827
868
  if (tag === HtmlDiff.DelTag) {
828
869
  words.shift()
829
870
 
@@ -835,7 +876,6 @@ export default class HtmlDiff {
835
876
  }
836
877
  // handle closing tag
837
878
  else if (HtmlDiff.SpecialCaseClosingTagsSet.has(words[0].toLowerCase())) {
838
- const openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop()
839
879
  // For delete operations: when the tag block contains a mix of formatting and
840
880
  // non-formatting closing tags (e.g. </strong></div>), compare against the first
841
881
  // closing tag (the formatting one) rather than the last tag in the block.
@@ -850,19 +890,39 @@ export default class HtmlDiff {
850
890
  tagIndexToCompare = 0
851
891
  }
852
892
  }
853
- const openingAndClosingTagsMatch =
854
- !!openingTag && Utils.getTagName(openingTag) === Utils.getTagName(words[tagIndexToCompare])
855
893
 
856
- if (openingTag && openingAndClosingTagsMatch) {
857
- specialCaseTagInjection = '</ins>'
858
- specialCaseTagInjectionIsBefore = true
894
+ // Search the stack for a matching opener (LIFO). When the match
895
+ // is the top entry, this is the normal balanced case and we
896
+ // emit a single `</ins>` before the closing tag. When the match
897
+ // is below an unmatched opener — i.e. another formatting wrap
898
+ // opened after it but hasn't been closed yet — the wraps
899
+ // overlap in source order, which has no valid LIFO HTML
900
+ // expression. Resolve by SPLITTING the wraps: close everything
901
+ // above the match (their `<ins>`s and the match's `<ins>`), then
902
+ // re-open the above wraps with fresh `<ins>` tags AFTER the
903
+ // closing tag emits. The above wraps continue to apply until
904
+ // their own closing tag arrives.
905
+ const closingTagName = Utils.getTagName(words[tagIndexToCompare])
906
+ let matchIdx = -1
907
+ for (let i = this.specialTagDiffStack.length - 1; i >= 0; i--) {
908
+ if (Utils.getTagName(this.specialTagDiffStack[i].tag) === closingTagName) {
909
+ matchIdx = i
910
+ break
911
+ }
859
912
  }
860
913
 
861
- // if the tag has a corresponding opening tag, but they don't match,
862
- // we need to push the opening tag back onto the stack
863
- else if (openingTag) {
864
- this.specialTagDiffStack.push(openingTag)
914
+ if (matchIdx >= 0) {
915
+ const aboveEntries = this.specialTagDiffStack.splice(matchIdx + 1)
916
+ this.specialTagDiffStack.pop() // pop the matched entry
917
+ // One `</ins>` per above entry, then one for the match itself.
918
+ preInject = '</ins>'.repeat(aboveEntries.length + 1)
919
+ for (const entry of aboveEntries) {
920
+ postInject += `<ins${Utils.composeTagAttributes(entry.cssClass, entry.metadata ?? {})}>`
921
+ this.specialTagDiffStack.push(entry) // their wrap continues via the new <ins>
922
+ }
865
923
  }
924
+ // No match in stack — orphan closing tag, drop the `<ins>` work
925
+ // and just let the tag itself flow through extractConsecutiveWords.
866
926
 
867
927
  if (tag === HtmlDiff.DelTag) {
868
928
  words.shift()
@@ -873,7 +933,7 @@ export default class HtmlDiff {
873
933
  }
874
934
  }
875
935
 
876
- if (words.length === 0 && specialCaseTagInjection.length === 0) {
936
+ if (words.length === 0 && preInject.length === 0 && postInject.length === 0) {
877
937
  break
878
938
  }
879
939
 
@@ -889,11 +949,7 @@ export default class HtmlDiff {
889
949
  !HtmlDiff.SpecialCaseClosingTagsSet.has(x.toLowerCase())
890
950
  : Utils.isTag
891
951
 
892
- if (specialCaseTagInjectionIsBefore) {
893
- this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTagForExtraction).join(''))
894
- } else {
895
- this.content.push(this.extractConsecutiveWords(words, isTagForExtraction).join('') + specialCaseTagInjection)
896
- }
952
+ this.content.push(preInject + this.extractConsecutiveWords(words, isTagForExtraction).join('') + postInject)
897
953
 
898
954
  if (words.length === 0) continue
899
955
 
@@ -4,92 +4,134 @@ import type Operation from './Operation'
4
4
  import type { WrapMetadata } from './Utils'
5
5
 
6
6
  /**
7
- * Composes diff(V1, V2) (CP's changes) and diff(V2, V3) (Me's changes)
8
- * into a single attributed segment stream. The output is consumed by
9
- * `HtmlDiff.executeThreeWay` for emission.
7
+ * Composes diff(genesis → cp-latest) (CP's accumulated changes from the
8
+ * common ancestor) and diff(genesis me-current) (Me's accumulated
9
+ * changes from the common ancestor) into a single attributed segment
10
+ * stream. The output is consumed by `HtmlDiff.executeThreeWay` for
11
+ * emission.
10
12
  *
11
- * V2 is the structural spine. Both pair-wise analyses must tokenise V2
12
- * identically (`HtmlDiff.executeThreeWay` enforces this via the
13
- * symmetric-projection decision), so V2-diff indices are stable across
14
- * the two streams and we can fold them into a single per-V2-token
15
- * attribution view, interleaved with off-spine CP-deletions (V1-side)
16
- * and Me-insertions (V3-side).
13
+ * Genesis is the structural spine. Both pair-wise analyses must
14
+ * tokenise genesis identically (`HtmlDiff.executeThreeWay` enforces
15
+ * this via the symmetric-projection decision), so genesis-diff indices
16
+ * are stable across the two streams.
17
+ *
18
+ * Per genesis token: classify by what each side did to it
19
+ * (kept / deleted) and emit accordingly. Per genesis boundary: collect
20
+ * each side's insertions and check for agreement — when both sides
21
+ * inserted identical content, the insertion is treated as "settled"
22
+ * and emitted unmarked (the reader sees the agreed-on text without
23
+ * authorship markup, matching Word-style track-changes conventions
24
+ * where both authors agreeing is silent).
25
+ *
26
+ * The emission order at a boundary mirrors the 2-way del-then-ins
27
+ * convention: a Replace (genesis token deleted + a paired insertion)
28
+ * reads as `<del>old</del><ins>new</ins>`. Pure insertions are
29
+ * positioned at their natural boundary.
17
30
  */
18
31
 
19
32
  export type Author = 'cp' | 'me'
20
33
 
21
34
  /**
22
- * Attribution assigned to each output segment. `reject` is its own kind
23
- * (rather than a flavour of `del`) so exhaustive switching is safe — no
24
- * property-presence narrowing required at use sites.
35
+ * Attribution assigned to each output segment.
36
+ *
37
+ * `equal` covers three cases: tokens both authors kept (rendered as the
38
+ * genesis word), insertion spans both authors made identically (rendered
39
+ * plain), and structural tags around both-deleted tokens (rendered to
40
+ * keep layout intact while the content token itself is dropped).
41
+ * Equal segments carry no markup.
25
42
  */
26
- export type Attribution =
27
- | { kind: 'equal' }
28
- | { kind: 'ins'; author: Author }
29
- | { kind: 'del'; author: Author }
30
- // Me deleting tokens that CP inserted = rejecting CP's proposal.
31
- | { kind: 'reject'; by: 'me'; rejected: 'cp' }
43
+ export type Attribution = { kind: 'equal' } | { kind: 'ins'; author: Author } | { kind: 'del'; author: Author }
32
44
 
33
45
  export interface Segment {
34
46
  attr: Attribution
35
- /** Tokens to emit. For Equal segments these are original V2 words
47
+ /** Tokens to emit. For Equal segments these are original genesis words
36
48
  * (including structural tags); for ins/del they are diff-space tokens. */
37
49
  words: string[]
38
50
  }
39
51
 
40
- export function buildSegments(d1: AnalyzeResult, d2: AnalyzeResult): Segment[] {
41
- const v2DiffLen = d1.newDiffWords.length
42
- const fromV1 = buildOriginMap(d1.operations, v2DiffLen)
43
- const toV3 = buildFateMap(d2.operations, v2DiffLen)
44
- const cpDeletionsAt = collectDeletionsAtBoundary(d1)
45
- const meInsertionsAt = collectInsertionsAtBoundary(d2)
52
+ /**
53
+ * Builds the attributed segment stream for a three-way diff.
54
+ *
55
+ * @param dCp analysis of diff(genesis → cp-latest)
56
+ * @param dMe analysis of diff(genesis → me-current)
57
+ *
58
+ * Both analyses must share the same `oldDiffWords` (the genesis tokens)
59
+ * — the caller guarantees this by passing the same genesis input and
60
+ * the same `useProjections` decision to both `HtmlDiff.analyze` calls.
61
+ */
62
+ export function buildSegments(dCp: AnalyzeResult, dMe: AnalyzeResult): Segment[] {
63
+ const genesisLen = dCp.oldDiffWords.length
64
+
65
+ // Per genesis token: did each author keep it or delete it?
66
+ const cpFate = buildFateFromGenesis(dCp.operations, genesisLen)
67
+ const meFate = buildFateFromGenesis(dMe.operations, genesisLen)
68
+
69
+ // Per boundary: tokens each author inserted at that boundary. Keyed by
70
+ // `endInOld` so a Replace's insertion sits AFTER the deleted genesis
71
+ // token (visual del-then-ins). Pure Insert ops have endInOld ==
72
+ // startInOld so they land at their natural between-tokens boundary.
73
+ const cpInsAt = collectInsertionsKeyedByEnd(dCp)
74
+ const meInsAt = collectInsertionsKeyedByEnd(dMe)
46
75
 
47
- // Inverse map V2-diff-index → V2-original-index. Identity when no projection.
48
- const diffToOriginal: readonly number[] = d1.newContentToOriginal ?? Array.from({ length: v2DiffLen }, (_, i) => i)
49
- const v2OriginalLen = d1.newOriginalWords.length
76
+ // Inverse map genesis-diff-index → genesis-original-index. Identity when
77
+ // no projection. Used to slice the original genesis words for Equal
78
+ // segments so structural tags pass through verbatim.
79
+ const diffToOriginal: readonly number[] = dCp.oldContentToOriginal ?? Array.from({ length: genesisLen }, (_, i) => i)
80
+ const genesisOriginalLen = dCp.oldOriginalWords.length
50
81
 
51
82
  const segments: Segment[] = []
52
83
  let originalCursor = 0
53
84
 
54
- for (let i = 0; i < v2DiffLen; i++) {
55
- // CP-deletions from V1 land BEFORE the V2 token at this boundary —
56
- // they conceptually "preceded" V2[i] in V1's stream.
57
- const cpDel = cpDeletionsAt.get(i)
58
- if (cpDel?.length) appendSegment(segments, { kind: 'del', author: 'cp' }, cpDel)
85
+ // Boundary 0 pure insertions BEFORE genesis[0].
86
+ emitBoundary(0, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments)
59
87
 
60
- const attr = combine(fromV1[i], toV3[i])
88
+ for (let i = 0; i < genesisLen; i++) {
89
+ const cpDel = cpFate[i] === 'deleted'
90
+ const meDel = meFate[i] === 'deleted'
91
+
92
+ // Pick up structural tags from cursor through to this genesis token's
93
+ // original index. Same cursor-based slicing as the 2-way path so a
94
+ // `<p>` opening tag preceding a content token gets attributed with
95
+ // that token's segment.
61
96
  const origIdx = diffToOriginal[i]
62
- const slice = d1.newOriginalWords.slice(originalCursor, origIdx + 1)
97
+ const slice = dCp.oldOriginalWords.slice(originalCursor, origIdx + 1)
63
98
  originalCursor = origIdx + 1
64
99
 
65
- // Me-insertions at this boundary go BEFORE V2[i] for pure
66
- // insertions, but AFTER V2[i] when V2[i] is itself a Me-deletion
67
- // (i.e. a Me Replace). This mirrors the 2-way del-then-ins
68
- // convention so a Replace reads as `<del>X</del><ins>Y</ins>`.
69
- const meIns = meInsertionsAt.get(i)
70
- const meInsAfterV2 = meIns?.length && isDeletion(attr)
71
-
72
- if (meIns?.length && !meInsAfterV2) {
73
- appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
74
- }
75
- appendSegment(segments, attr, slice)
76
- if (meInsAfterV2) {
77
- appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
100
+ if (!cpDel && !meDel) {
101
+ // Kept by both equal. Emit the original-word slice (includes
102
+ // any leading structural tags).
103
+ appendSegment(segments, { kind: 'equal' }, slice)
104
+ } else if (cpDel && meDel) {
105
+ // Both deleted settled. Filter at emission time; pass the
106
+ // structural-tag-bearing slice through as equal so layout
107
+ // survives. The content token itself is the LAST element of the
108
+ // slice (since slice ends at origIdx+1); drop only that.
109
+ // If slice has multiple elements (leading structural tags), they
110
+ // belong to the surrounding flow and should remain.
111
+ if (slice.length > 1) {
112
+ appendSegment(segments, { kind: 'equal' }, slice.slice(0, slice.length - 1))
113
+ }
114
+ // The content token itself is silenced.
115
+ } else if (cpDel) {
116
+ // CP deleted, Me kept → render as <del cp>. Me's keeping means the
117
+ // token is still in V_me; the markup tells the reader "CP wanted
118
+ // this gone, you've kept it."
119
+ appendSegment(segments, { kind: 'del', author: 'cp' }, slice)
120
+ } else {
121
+ // Me deleted, CP kept → render as <del me>.
122
+ appendSegment(segments, { kind: 'del', author: 'me' }, slice)
78
123
  }
124
+
125
+ // Boundary i+1 — pure insertions between genesis[i] and genesis[i+1],
126
+ // AND replace-insertions paired with genesis[i] (which we just
127
+ // emitted as a deletion).
128
+ emitBoundary(i + 1, cpInsAt, meInsAt, dCp.newDiffWords, dMe.newDiffWords, segments)
79
129
  }
80
- // Tail-end interleavings (CP-del / Me-ins at boundary v2DiffLen — i.e.
81
- // after every V2 token). Ordering doesn't matter since there's no
82
- // V2 token to anchor around.
83
- const tailCpDel = cpDeletionsAt.get(v2DiffLen)
84
- if (tailCpDel?.length) appendSegment(segments, { kind: 'del', author: 'cp' }, tailCpDel)
85
- const tailMeIns = meInsertionsAt.get(v2DiffLen)
86
- if (tailMeIns?.length) appendSegment(segments, { kind: 'ins', author: 'me' }, tailMeIns)
87
-
88
- // Trailing V2-original tokens (structural closing tags after the last
89
- // content word). Emit as equal — there's no following segment to claim
90
- // them, and attributing them to either author would be arbitrary.
91
- if (originalCursor < v2OriginalLen) {
92
- appendSegment(segments, { kind: 'equal' }, d1.newOriginalWords.slice(originalCursor))
130
+
131
+ // Trailing original tokens (structural closing tags after the last
132
+ // content word).
133
+ if (originalCursor < genesisOriginalLen) {
134
+ appendSegment(segments, { kind: 'equal' }, dCp.oldOriginalWords.slice(originalCursor))
93
135
  }
94
136
 
95
137
  return segments
@@ -97,80 +139,89 @@ export function buildSegments(d1: AnalyzeResult, d2: AnalyzeResult): Segment[] {
97
139
 
98
140
  // ────────────────────────────────────────────────────────────────────────────
99
141
 
100
- type V2Origin = 'preserved-from-v1' | 'inserted-by-cp' | 'replaced-into-by-cp'
101
- type V2Fate = 'preserved-to-v3' | 'deleted-by-me' | 'replaced-out-by-me'
102
-
103
- function buildOriginMap(ops: readonly Operation[], v2Len: number): V2Origin[] {
104
- const out: V2Origin[] = new Array(v2Len).fill('preserved-from-v1')
105
- for (const op of ops) {
106
- const origin =
107
- op.action === Action.Insert ? 'inserted-by-cp' : op.action === Action.Replace ? 'replaced-into-by-cp' : null
108
- if (origin === null) continue
109
- for (let i = op.startInNew; i < op.endInNew; i++) {
110
- if (i >= 0 && i < v2Len) out[i] = origin
111
- }
112
- }
113
- return out
114
- }
142
+ type GenesisFate = 'kept' | 'deleted'
115
143
 
116
- function buildFateMap(ops: readonly Operation[], v2Len: number): V2Fate[] {
117
- const out: V2Fate[] = new Array(v2Len).fill('preserved-to-v3')
144
+ /**
145
+ * Per genesis-diff-index, what did this side do to that token? Both
146
+ * Delete and Replace ops remove the token from the side's output, so
147
+ * both contribute `'deleted'`. Equal ops contribute `'kept'`. Insert
148
+ * ops have an empty old range, so they don't touch the genesis fate
149
+ * map.
150
+ */
151
+ function buildFateFromGenesis(ops: readonly Operation[], genesisLen: number): GenesisFate[] {
152
+ const out: GenesisFate[] = new Array(genesisLen).fill('kept')
118
153
  for (const op of ops) {
119
- const fate =
120
- op.action === Action.Delete ? 'deleted-by-me' : op.action === Action.Replace ? 'replaced-out-by-me' : null
121
- if (fate === null) continue
154
+ if (op.action !== Action.Delete && op.action !== Action.Replace) continue
122
155
  for (let i = op.startInOld; i < op.endInOld; i++) {
123
- if (i >= 0 && i < v2Len) out[i] = fate
156
+ if (i >= 0 && i < genesisLen) out[i] = 'deleted'
124
157
  }
125
158
  }
126
159
  return out
127
160
  }
128
161
 
129
- function isDeletion(attr: Attribution): boolean {
130
- return attr.kind === 'del' || attr.kind === 'reject'
131
- }
132
-
133
- function combine(origin: V2Origin, fate: V2Fate): Attribution {
134
- const cpInserted = origin === 'inserted-by-cp' || origin === 'replaced-into-by-cp'
135
- const meDeleted = fate === 'deleted-by-me' || fate === 'replaced-out-by-me'
136
- if (!cpInserted && !meDeleted) return { kind: 'equal' }
137
- if (cpInserted && !meDeleted) return { kind: 'ins', author: 'cp' }
138
- if (!cpInserted && meDeleted) return { kind: 'del', author: 'me' }
139
- return { kind: 'reject', by: 'me', rejected: 'cp' }
140
- }
141
-
142
162
  /**
143
- * Map V2-diff-boundary CP-deleted V1 tokens at that boundary. Includes
144
- * both pure Delete ops and the V1-side of Replace ops (semantically a
145
- * Delete+Insert; the Insert half is picked up by the V2-token walk).
163
+ * Per genesis boundary `b`, collect tokens this side inserted at that
164
+ * boundary. Keyed by `endInOld` so a Replace at genesis[k..k+1] has its
165
+ * insertion at boundary k+1 (after the deleted token) rather than k
166
+ * (before) — that produces the del-then-ins visual order.
167
+ *
168
+ * For pure Insert ops the old range is empty (endInOld == startInOld),
169
+ * so the key is the same as the semantic between-tokens position.
146
170
  */
147
- function collectDeletionsAtBoundary(d: AnalyzeResult): Map<number, string[]> {
171
+ function collectInsertionsKeyedByEnd(d: AnalyzeResult): Map<number, string[]> {
148
172
  const out = new Map<number, string[]>()
149
173
  for (const op of d.operations) {
150
- if (op.action !== Action.Delete && op.action !== Action.Replace) continue
151
- const words = d.oldDiffWords.slice(op.startInOld, op.endInOld)
174
+ if (op.action !== Action.Insert && op.action !== Action.Replace) continue
175
+ const words = d.newDiffWords.slice(op.startInNew, op.endInNew)
152
176
  if (words.length === 0) continue
153
- const existing = out.get(op.startInNew) ?? []
177
+ const key = op.endInOld
178
+ const existing = out.get(key) ?? []
154
179
  existing.push(...words)
155
- out.set(op.startInNew, existing)
180
+ out.set(key, existing)
156
181
  }
157
182
  return out
158
183
  }
159
184
 
160
- function collectInsertionsAtBoundary(d: AnalyzeResult): Map<number, string[]> {
161
- const out = new Map<number, string[]>()
162
- for (const op of d.operations) {
163
- if (op.action !== Action.Insert && op.action !== Action.Replace) continue
164
- const words = d.newDiffWords.slice(op.startInNew, op.endInNew)
165
- if (words.length === 0) continue
166
- const existing = out.get(op.startInOld) ?? []
167
- existing.push(...words)
168
- out.set(op.startInOld, existing)
185
+ /**
186
+ * Emit any insertions at boundary `b`. When both authors inserted at
187
+ * the same boundary AND the inserted token sequences are textually
188
+ * identical, the insertion is treated as agreed and emitted unmarked.
189
+ * Otherwise each side's insertion is emitted with author attribution.
190
+ *
191
+ * The CP-then-Me ordering for disagreement is arbitrary but consistent;
192
+ * callers don't depend on it.
193
+ */
194
+ function emitBoundary(
195
+ b: number,
196
+ cpInsAt: Map<number, string[]>,
197
+ meInsAt: Map<number, string[]>,
198
+ _cpDiffWords: readonly string[],
199
+ _meDiffWords: readonly string[],
200
+ segments: Segment[]
201
+ ) {
202
+ const cpIns = cpInsAt.get(b)
203
+ const meIns = meInsAt.get(b)
204
+ const hasCp = !!cpIns && cpIns.length > 0
205
+ const hasMe = !!meIns && meIns.length > 0
206
+ if (!hasCp && !hasMe) return
207
+
208
+ if (hasCp && hasMe && tokenArraysEqual(cpIns, meIns)) {
209
+ // Both authors inserted the same content — settled. Emit unmarked.
210
+ appendSegment(segments, { kind: 'equal' }, cpIns)
211
+ return
169
212
  }
170
- return out
213
+
214
+ if (hasCp) appendSegment(segments, { kind: 'ins', author: 'cp' }, cpIns)
215
+ if (hasMe) appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
171
216
  }
172
217
 
173
- function appendSegment(segments: Segment[], attr: Attribution, words: string[]) {
218
+ function tokenArraysEqual(a: readonly string[], b: readonly string[]): boolean {
219
+ if (a.length !== b.length) return false
220
+ for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false
221
+ return true
222
+ }
223
+
224
+ function appendSegment(segments: Segment[], attr: Attribution, words: readonly string[]) {
174
225
  if (words.length === 0) return
175
226
  const last = segments[segments.length - 1]
176
227
  if (last && sameAttribution(last.attr, attr)) {
@@ -184,7 +235,6 @@ function sameAttribution(a: Attribution, b: Attribution): boolean {
184
235
  if (a.kind === 'equal' && b.kind === 'equal') return true
185
236
  if (a.kind === 'ins' && b.kind === 'ins') return a.author === b.author
186
237
  if (a.kind === 'del' && b.kind === 'del') return a.author === b.author
187
- if (a.kind === 'reject' && b.kind === 'reject') return true
188
238
  return false
189
239
  }
190
240
 
@@ -195,29 +245,25 @@ function sameAttribution(a: Attribution, b: Attribution): boolean {
195
245
  * pre-wrap) stay consistent. A change here propagates to every author
196
246
  * marker in the output.
197
247
  */
198
- export function authorAttribution(author: Author, rejects?: Author): WrapMetadata {
199
- const dataAttrs: Record<string, string> = { author }
200
- if (rejects !== undefined) dataAttrs.rejects = rejects
201
- const extraClasses = rejects !== undefined ? `${author} rejects-${rejects}` : author
202
- return { extraClasses, dataAttrs }
248
+ export function authorAttribution(author: Author): WrapMetadata {
249
+ return { extraClasses: author, dataAttrs: { author } }
203
250
  }
204
251
 
205
252
  /**
206
253
  * Resolve a segment's attribution into the wrapper-tag, base CSS class,
207
254
  * and `WrapMetadata` consumed by `Utils.wrapText` / `insertTag`. The
208
255
  * caller is `HtmlDiff.executeThreeWay`'s emission loop.
256
+ *
257
+ * `equal` segments don't go through this — they're emitted unmarked.
209
258
  */
210
259
  export function segmentEmissionShape(attr: Exclude<Attribution, { kind: 'equal' }>): {
211
260
  tag: 'ins' | 'del'
212
261
  baseClass: 'diffins' | 'diffdel'
213
262
  metadata: WrapMetadata
214
263
  } {
215
- switch (attr.kind) {
216
- case 'ins':
217
- return { tag: 'ins', baseClass: 'diffins', metadata: authorAttribution(attr.author) }
218
- case 'del':
219
- return { tag: 'del', baseClass: 'diffdel', metadata: authorAttribution(attr.author) }
220
- case 'reject':
221
- return { tag: 'del', baseClass: 'diffdel', metadata: authorAttribution(attr.by, attr.rejected) }
264
+ return {
265
+ tag: attr.kind,
266
+ baseClass: attr.kind === 'ins' ? 'diffins' : 'diffdel',
267
+ metadata: authorAttribution(attr.author),
222
268
  }
223
269
  }