@createiq/htmldiff 1.1.0 → 1.2.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,223 @@
1
+ import Action from './Action'
2
+ import type { AnalyzeResult } from './HtmlDiff'
3
+ import type Operation from './Operation'
4
+ import type { WrapMetadata } from './Utils'
5
+
6
+ /**
7
+ * Composes diff(V1, V2) (CP's changes) and diff(V2, V3) (Me's changes)
8
+ * into a single attributed segment stream. The output is consumed by
9
+ * `HtmlDiff.executeThreeWay` for emission.
10
+ *
11
+ * V2 is the structural spine. Both pair-wise analyses must tokenise V2
12
+ * identically (`HtmlDiff.executeThreeWay` enforces this via the
13
+ * symmetric-projection decision), so V2-diff indices are stable across
14
+ * the two streams and we can fold them into a single per-V2-token
15
+ * attribution view, interleaved with off-spine CP-deletions (V1-side)
16
+ * and Me-insertions (V3-side).
17
+ */
18
+
19
+ export type Author = 'cp' | 'me'
20
+
21
+ /**
22
+ * Attribution assigned to each output segment. `reject` is its own kind
23
+ * (rather than a flavour of `del`) so exhaustive switching is safe — no
24
+ * property-presence narrowing required at use sites.
25
+ */
26
+ export type Attribution =
27
+ | { kind: 'equal' }
28
+ | { kind: 'ins'; author: Author }
29
+ | { kind: 'del'; author: Author }
30
+ // Me deleting tokens that CP inserted = rejecting CP's proposal.
31
+ | { kind: 'reject'; by: 'me'; rejected: 'cp' }
32
+
33
+ export interface Segment {
34
+ attr: Attribution
35
+ /** Tokens to emit. For Equal segments these are original V2 words
36
+ * (including structural tags); for ins/del they are diff-space tokens. */
37
+ words: string[]
38
+ }
39
+
40
+ export function buildSegments(d1: AnalyzeResult, d2: AnalyzeResult): Segment[] {
41
+ const v2DiffLen = d1.newDiffWords.length
42
+ const fromV1 = buildOriginMap(d1.operations, v2DiffLen)
43
+ const toV3 = buildFateMap(d2.operations, v2DiffLen)
44
+ const cpDeletionsAt = collectDeletionsAtBoundary(d1)
45
+ const meInsertionsAt = collectInsertionsAtBoundary(d2)
46
+
47
+ // Inverse map V2-diff-index → V2-original-index. Identity when no projection.
48
+ const diffToOriginal: readonly number[] = d1.newContentToOriginal ?? Array.from({ length: v2DiffLen }, (_, i) => i)
49
+ const v2OriginalLen = d1.newOriginalWords.length
50
+
51
+ const segments: Segment[] = []
52
+ let originalCursor = 0
53
+
54
+ for (let i = 0; i < v2DiffLen; i++) {
55
+ // CP-deletions from V1 land BEFORE the V2 token at this boundary —
56
+ // they conceptually "preceded" V2[i] in V1's stream.
57
+ const cpDel = cpDeletionsAt.get(i)
58
+ if (cpDel?.length) appendSegment(segments, { kind: 'del', author: 'cp' }, cpDel)
59
+
60
+ const attr = combine(fromV1[i], toV3[i])
61
+ const origIdx = diffToOriginal[i]
62
+ const slice = d1.newOriginalWords.slice(originalCursor, origIdx + 1)
63
+ originalCursor = origIdx + 1
64
+
65
+ // Me-insertions at this boundary go BEFORE V2[i] for pure
66
+ // insertions, but AFTER V2[i] when V2[i] is itself a Me-deletion
67
+ // (i.e. a Me Replace). This mirrors the 2-way del-then-ins
68
+ // convention so a Replace reads as `<del>X</del><ins>Y</ins>`.
69
+ const meIns = meInsertionsAt.get(i)
70
+ const meInsAfterV2 = meIns?.length && isDeletion(attr)
71
+
72
+ if (meIns?.length && !meInsAfterV2) {
73
+ appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
74
+ }
75
+ appendSegment(segments, attr, slice)
76
+ if (meInsAfterV2) {
77
+ appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
78
+ }
79
+ }
80
+ // Tail-end interleavings (CP-del / Me-ins at boundary v2DiffLen — i.e.
81
+ // after every V2 token). Ordering doesn't matter since there's no
82
+ // V2 token to anchor around.
83
+ const tailCpDel = cpDeletionsAt.get(v2DiffLen)
84
+ if (tailCpDel?.length) appendSegment(segments, { kind: 'del', author: 'cp' }, tailCpDel)
85
+ const tailMeIns = meInsertionsAt.get(v2DiffLen)
86
+ if (tailMeIns?.length) appendSegment(segments, { kind: 'ins', author: 'me' }, tailMeIns)
87
+
88
+ // Trailing V2-original tokens (structural closing tags after the last
89
+ // content word). Emit as equal — there's no following segment to claim
90
+ // them, and attributing them to either author would be arbitrary.
91
+ if (originalCursor < v2OriginalLen) {
92
+ appendSegment(segments, { kind: 'equal' }, d1.newOriginalWords.slice(originalCursor))
93
+ }
94
+
95
+ return segments
96
+ }
97
+
98
+ // ────────────────────────────────────────────────────────────────────────────
99
+
100
+ type V2Origin = 'preserved-from-v1' | 'inserted-by-cp' | 'replaced-into-by-cp'
101
+ type V2Fate = 'preserved-to-v3' | 'deleted-by-me' | 'replaced-out-by-me'
102
+
103
+ function buildOriginMap(ops: readonly Operation[], v2Len: number): V2Origin[] {
104
+ const out: V2Origin[] = new Array(v2Len).fill('preserved-from-v1')
105
+ for (const op of ops) {
106
+ const origin =
107
+ op.action === Action.Insert ? 'inserted-by-cp' : op.action === Action.Replace ? 'replaced-into-by-cp' : null
108
+ if (origin === null) continue
109
+ for (let i = op.startInNew; i < op.endInNew; i++) {
110
+ if (i >= 0 && i < v2Len) out[i] = origin
111
+ }
112
+ }
113
+ return out
114
+ }
115
+
116
+ function buildFateMap(ops: readonly Operation[], v2Len: number): V2Fate[] {
117
+ const out: V2Fate[] = new Array(v2Len).fill('preserved-to-v3')
118
+ for (const op of ops) {
119
+ const fate =
120
+ op.action === Action.Delete ? 'deleted-by-me' : op.action === Action.Replace ? 'replaced-out-by-me' : null
121
+ if (fate === null) continue
122
+ for (let i = op.startInOld; i < op.endInOld; i++) {
123
+ if (i >= 0 && i < v2Len) out[i] = fate
124
+ }
125
+ }
126
+ return out
127
+ }
128
+
129
+ function isDeletion(attr: Attribution): boolean {
130
+ return attr.kind === 'del' || attr.kind === 'reject'
131
+ }
132
+
133
+ function combine(origin: V2Origin, fate: V2Fate): Attribution {
134
+ const cpInserted = origin === 'inserted-by-cp' || origin === 'replaced-into-by-cp'
135
+ const meDeleted = fate === 'deleted-by-me' || fate === 'replaced-out-by-me'
136
+ if (!cpInserted && !meDeleted) return { kind: 'equal' }
137
+ if (cpInserted && !meDeleted) return { kind: 'ins', author: 'cp' }
138
+ if (!cpInserted && meDeleted) return { kind: 'del', author: 'me' }
139
+ return { kind: 'reject', by: 'me', rejected: 'cp' }
140
+ }
141
+
142
+ /**
143
+ * Map V2-diff-boundary → CP-deleted V1 tokens at that boundary. Includes
144
+ * both pure Delete ops and the V1-side of Replace ops (semantically a
145
+ * Delete+Insert; the Insert half is picked up by the V2-token walk).
146
+ */
147
+ function collectDeletionsAtBoundary(d: AnalyzeResult): Map<number, string[]> {
148
+ const out = new Map<number, string[]>()
149
+ for (const op of d.operations) {
150
+ if (op.action !== Action.Delete && op.action !== Action.Replace) continue
151
+ const words = d.oldDiffWords.slice(op.startInOld, op.endInOld)
152
+ if (words.length === 0) continue
153
+ const existing = out.get(op.startInNew) ?? []
154
+ existing.push(...words)
155
+ out.set(op.startInNew, existing)
156
+ }
157
+ return out
158
+ }
159
+
160
+ function collectInsertionsAtBoundary(d: AnalyzeResult): Map<number, string[]> {
161
+ const out = new Map<number, string[]>()
162
+ for (const op of d.operations) {
163
+ if (op.action !== Action.Insert && op.action !== Action.Replace) continue
164
+ const words = d.newDiffWords.slice(op.startInNew, op.endInNew)
165
+ if (words.length === 0) continue
166
+ const existing = out.get(op.startInOld) ?? []
167
+ existing.push(...words)
168
+ out.set(op.startInOld, existing)
169
+ }
170
+ return out
171
+ }
172
+
173
+ function appendSegment(segments: Segment[], attr: Attribution, words: string[]) {
174
+ if (words.length === 0) return
175
+ const last = segments[segments.length - 1]
176
+ if (last && sameAttribution(last.attr, attr)) {
177
+ last.words.push(...words)
178
+ return
179
+ }
180
+ segments.push({ attr, words: [...words] })
181
+ }
182
+
183
+ function sameAttribution(a: Attribution, b: Attribution): boolean {
184
+ if (a.kind === 'equal' && b.kind === 'equal') return true
185
+ if (a.kind === 'ins' && b.kind === 'ins') return a.author === b.author
186
+ if (a.kind === 'del' && b.kind === 'del') return a.author === b.author
187
+ if (a.kind === 'reject' && b.kind === 'reject') return true
188
+ return false
189
+ }
190
+
191
+ /**
192
+ * Build the `WrapMetadata` for an attribution. Single source of truth
193
+ * for author-class / data-attr shape so the three emission paths
194
+ * (word-level, table-level full-row/cell, multi-table whole-table
195
+ * pre-wrap) stay consistent. A change here propagates to every author
196
+ * marker in the output.
197
+ */
198
+ export function authorAttribution(author: Author, rejects?: Author): WrapMetadata {
199
+ const dataAttrs: Record<string, string> = { author }
200
+ if (rejects !== undefined) dataAttrs.rejects = rejects
201
+ const extraClasses = rejects !== undefined ? `${author} rejects-${rejects}` : author
202
+ return { extraClasses, dataAttrs }
203
+ }
204
+
205
+ /**
206
+ * Resolve a segment's attribution into the wrapper-tag, base CSS class,
207
+ * and `WrapMetadata` consumed by `Utils.wrapText` / `insertTag`. The
208
+ * caller is `HtmlDiff.executeThreeWay`'s emission loop.
209
+ */
210
+ export function segmentEmissionShape(attr: Exclude<Attribution, { kind: 'equal' }>): {
211
+ tag: 'ins' | 'del'
212
+ baseClass: 'diffins' | 'diffdel'
213
+ metadata: WrapMetadata
214
+ } {
215
+ switch (attr.kind) {
216
+ case 'ins':
217
+ return { tag: 'ins', baseClass: 'diffins', metadata: authorAttribution(attr.author) }
218
+ case 'del':
219
+ return { tag: 'del', baseClass: 'diffdel', metadata: authorAttribution(attr.author) }
220
+ case 'reject':
221
+ return { tag: 'del', baseClass: 'diffdel', metadata: authorAttribution(attr.by, attr.rejected) }
222
+ }
223
+ }