@createiq/htmldiff 1.1.0 → 1.2.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/README.md +40 -0
- package/dist/HtmlDiff.cjs +1255 -493
- package/dist/HtmlDiff.cjs.map +1 -1
- package/dist/HtmlDiff.d.cts +141 -7
- package/dist/HtmlDiff.d.mts +140 -7
- package/dist/HtmlDiff.mjs +1255 -493
- package/dist/HtmlDiff.mjs.map +1 -1
- package/package.json +1 -1
- package/src/Alignment.ts +349 -0
- package/src/HtmlDiff.ts +323 -33
- package/src/HtmlScanner.ts +200 -0
- package/src/TableDiff.ts +67 -522
- package/src/ThreeWayDiff.ts +223 -0
- package/src/ThreeWayTable.ts +701 -0
- package/src/Utils.ts +34 -2
- package/test/HtmlDiff.analyze.spec.ts +152 -0
- package/test/HtmlDiff.tables.spec.ts +43 -19
- package/test/HtmlDiff.threeWay.spec.ts +175 -0
- package/test/HtmlDiff.threeWay.tables.spec.ts +407 -0
- package/test/TableDiff.bench.ts +39 -0
- package/test/Utils.spec.ts +48 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import Action from './Action'
|
|
2
|
+
import type { AnalyzeResult } from './HtmlDiff'
|
|
3
|
+
import type Operation from './Operation'
|
|
4
|
+
import type { WrapMetadata } from './Utils'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Composes diff(V1, V2) (CP's changes) and diff(V2, V3) (Me's changes)
|
|
8
|
+
* into a single attributed segment stream. The output is consumed by
|
|
9
|
+
* `HtmlDiff.executeThreeWay` for emission.
|
|
10
|
+
*
|
|
11
|
+
* V2 is the structural spine. Both pair-wise analyses must tokenise V2
|
|
12
|
+
* identically (`HtmlDiff.executeThreeWay` enforces this via the
|
|
13
|
+
* symmetric-projection decision), so V2-diff indices are stable across
|
|
14
|
+
* the two streams and we can fold them into a single per-V2-token
|
|
15
|
+
* attribution view, interleaved with off-spine CP-deletions (V1-side)
|
|
16
|
+
* and Me-insertions (V3-side).
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
export type Author = 'cp' | 'me'
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Attribution assigned to each output segment. `reject` is its own kind
|
|
23
|
+
* (rather than a flavour of `del`) so exhaustive switching is safe — no
|
|
24
|
+
* property-presence narrowing required at use sites.
|
|
25
|
+
*/
|
|
26
|
+
export type Attribution =
|
|
27
|
+
| { kind: 'equal' }
|
|
28
|
+
| { kind: 'ins'; author: Author }
|
|
29
|
+
| { kind: 'del'; author: Author }
|
|
30
|
+
// Me deleting tokens that CP inserted = rejecting CP's proposal.
|
|
31
|
+
| { kind: 'reject'; by: 'me'; rejected: 'cp' }
|
|
32
|
+
|
|
33
|
+
export interface Segment {
|
|
34
|
+
attr: Attribution
|
|
35
|
+
/** Tokens to emit. For Equal segments these are original V2 words
|
|
36
|
+
* (including structural tags); for ins/del they are diff-space tokens. */
|
|
37
|
+
words: string[]
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function buildSegments(d1: AnalyzeResult, d2: AnalyzeResult): Segment[] {
|
|
41
|
+
const v2DiffLen = d1.newDiffWords.length
|
|
42
|
+
const fromV1 = buildOriginMap(d1.operations, v2DiffLen)
|
|
43
|
+
const toV3 = buildFateMap(d2.operations, v2DiffLen)
|
|
44
|
+
const cpDeletionsAt = collectDeletionsAtBoundary(d1)
|
|
45
|
+
const meInsertionsAt = collectInsertionsAtBoundary(d2)
|
|
46
|
+
|
|
47
|
+
// Inverse map V2-diff-index → V2-original-index. Identity when no projection.
|
|
48
|
+
const diffToOriginal: readonly number[] = d1.newContentToOriginal ?? Array.from({ length: v2DiffLen }, (_, i) => i)
|
|
49
|
+
const v2OriginalLen = d1.newOriginalWords.length
|
|
50
|
+
|
|
51
|
+
const segments: Segment[] = []
|
|
52
|
+
let originalCursor = 0
|
|
53
|
+
|
|
54
|
+
for (let i = 0; i < v2DiffLen; i++) {
|
|
55
|
+
// CP-deletions from V1 land BEFORE the V2 token at this boundary —
|
|
56
|
+
// they conceptually "preceded" V2[i] in V1's stream.
|
|
57
|
+
const cpDel = cpDeletionsAt.get(i)
|
|
58
|
+
if (cpDel?.length) appendSegment(segments, { kind: 'del', author: 'cp' }, cpDel)
|
|
59
|
+
|
|
60
|
+
const attr = combine(fromV1[i], toV3[i])
|
|
61
|
+
const origIdx = diffToOriginal[i]
|
|
62
|
+
const slice = d1.newOriginalWords.slice(originalCursor, origIdx + 1)
|
|
63
|
+
originalCursor = origIdx + 1
|
|
64
|
+
|
|
65
|
+
// Me-insertions at this boundary go BEFORE V2[i] for pure
|
|
66
|
+
// insertions, but AFTER V2[i] when V2[i] is itself a Me-deletion
|
|
67
|
+
// (i.e. a Me Replace). This mirrors the 2-way del-then-ins
|
|
68
|
+
// convention so a Replace reads as `<del>X</del><ins>Y</ins>`.
|
|
69
|
+
const meIns = meInsertionsAt.get(i)
|
|
70
|
+
const meInsAfterV2 = meIns?.length && isDeletion(attr)
|
|
71
|
+
|
|
72
|
+
if (meIns?.length && !meInsAfterV2) {
|
|
73
|
+
appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
|
|
74
|
+
}
|
|
75
|
+
appendSegment(segments, attr, slice)
|
|
76
|
+
if (meInsAfterV2) {
|
|
77
|
+
appendSegment(segments, { kind: 'ins', author: 'me' }, meIns)
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Tail-end interleavings (CP-del / Me-ins at boundary v2DiffLen — i.e.
|
|
81
|
+
// after every V2 token). Ordering doesn't matter since there's no
|
|
82
|
+
// V2 token to anchor around.
|
|
83
|
+
const tailCpDel = cpDeletionsAt.get(v2DiffLen)
|
|
84
|
+
if (tailCpDel?.length) appendSegment(segments, { kind: 'del', author: 'cp' }, tailCpDel)
|
|
85
|
+
const tailMeIns = meInsertionsAt.get(v2DiffLen)
|
|
86
|
+
if (tailMeIns?.length) appendSegment(segments, { kind: 'ins', author: 'me' }, tailMeIns)
|
|
87
|
+
|
|
88
|
+
// Trailing V2-original tokens (structural closing tags after the last
|
|
89
|
+
// content word). Emit as equal — there's no following segment to claim
|
|
90
|
+
// them, and attributing them to either author would be arbitrary.
|
|
91
|
+
if (originalCursor < v2OriginalLen) {
|
|
92
|
+
appendSegment(segments, { kind: 'equal' }, d1.newOriginalWords.slice(originalCursor))
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return segments
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// ────────────────────────────────────────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
type V2Origin = 'preserved-from-v1' | 'inserted-by-cp' | 'replaced-into-by-cp'
|
|
101
|
+
type V2Fate = 'preserved-to-v3' | 'deleted-by-me' | 'replaced-out-by-me'
|
|
102
|
+
|
|
103
|
+
function buildOriginMap(ops: readonly Operation[], v2Len: number): V2Origin[] {
|
|
104
|
+
const out: V2Origin[] = new Array(v2Len).fill('preserved-from-v1')
|
|
105
|
+
for (const op of ops) {
|
|
106
|
+
const origin =
|
|
107
|
+
op.action === Action.Insert ? 'inserted-by-cp' : op.action === Action.Replace ? 'replaced-into-by-cp' : null
|
|
108
|
+
if (origin === null) continue
|
|
109
|
+
for (let i = op.startInNew; i < op.endInNew; i++) {
|
|
110
|
+
if (i >= 0 && i < v2Len) out[i] = origin
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return out
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function buildFateMap(ops: readonly Operation[], v2Len: number): V2Fate[] {
|
|
117
|
+
const out: V2Fate[] = new Array(v2Len).fill('preserved-to-v3')
|
|
118
|
+
for (const op of ops) {
|
|
119
|
+
const fate =
|
|
120
|
+
op.action === Action.Delete ? 'deleted-by-me' : op.action === Action.Replace ? 'replaced-out-by-me' : null
|
|
121
|
+
if (fate === null) continue
|
|
122
|
+
for (let i = op.startInOld; i < op.endInOld; i++) {
|
|
123
|
+
if (i >= 0 && i < v2Len) out[i] = fate
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return out
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function isDeletion(attr: Attribution): boolean {
|
|
130
|
+
return attr.kind === 'del' || attr.kind === 'reject'
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function combine(origin: V2Origin, fate: V2Fate): Attribution {
|
|
134
|
+
const cpInserted = origin === 'inserted-by-cp' || origin === 'replaced-into-by-cp'
|
|
135
|
+
const meDeleted = fate === 'deleted-by-me' || fate === 'replaced-out-by-me'
|
|
136
|
+
if (!cpInserted && !meDeleted) return { kind: 'equal' }
|
|
137
|
+
if (cpInserted && !meDeleted) return { kind: 'ins', author: 'cp' }
|
|
138
|
+
if (!cpInserted && meDeleted) return { kind: 'del', author: 'me' }
|
|
139
|
+
return { kind: 'reject', by: 'me', rejected: 'cp' }
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Map V2-diff-boundary → CP-deleted V1 tokens at that boundary. Includes
|
|
144
|
+
* both pure Delete ops and the V1-side of Replace ops (semantically a
|
|
145
|
+
* Delete+Insert; the Insert half is picked up by the V2-token walk).
|
|
146
|
+
*/
|
|
147
|
+
function collectDeletionsAtBoundary(d: AnalyzeResult): Map<number, string[]> {
|
|
148
|
+
const out = new Map<number, string[]>()
|
|
149
|
+
for (const op of d.operations) {
|
|
150
|
+
if (op.action !== Action.Delete && op.action !== Action.Replace) continue
|
|
151
|
+
const words = d.oldDiffWords.slice(op.startInOld, op.endInOld)
|
|
152
|
+
if (words.length === 0) continue
|
|
153
|
+
const existing = out.get(op.startInNew) ?? []
|
|
154
|
+
existing.push(...words)
|
|
155
|
+
out.set(op.startInNew, existing)
|
|
156
|
+
}
|
|
157
|
+
return out
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function collectInsertionsAtBoundary(d: AnalyzeResult): Map<number, string[]> {
|
|
161
|
+
const out = new Map<number, string[]>()
|
|
162
|
+
for (const op of d.operations) {
|
|
163
|
+
if (op.action !== Action.Insert && op.action !== Action.Replace) continue
|
|
164
|
+
const words = d.newDiffWords.slice(op.startInNew, op.endInNew)
|
|
165
|
+
if (words.length === 0) continue
|
|
166
|
+
const existing = out.get(op.startInOld) ?? []
|
|
167
|
+
existing.push(...words)
|
|
168
|
+
out.set(op.startInOld, existing)
|
|
169
|
+
}
|
|
170
|
+
return out
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function appendSegment(segments: Segment[], attr: Attribution, words: string[]) {
|
|
174
|
+
if (words.length === 0) return
|
|
175
|
+
const last = segments[segments.length - 1]
|
|
176
|
+
if (last && sameAttribution(last.attr, attr)) {
|
|
177
|
+
last.words.push(...words)
|
|
178
|
+
return
|
|
179
|
+
}
|
|
180
|
+
segments.push({ attr, words: [...words] })
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function sameAttribution(a: Attribution, b: Attribution): boolean {
|
|
184
|
+
if (a.kind === 'equal' && b.kind === 'equal') return true
|
|
185
|
+
if (a.kind === 'ins' && b.kind === 'ins') return a.author === b.author
|
|
186
|
+
if (a.kind === 'del' && b.kind === 'del') return a.author === b.author
|
|
187
|
+
if (a.kind === 'reject' && b.kind === 'reject') return true
|
|
188
|
+
return false
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Build the `WrapMetadata` for an attribution. Single source of truth
|
|
193
|
+
* for author-class / data-attr shape so the three emission paths
|
|
194
|
+
* (word-level, table-level full-row/cell, multi-table whole-table
|
|
195
|
+
* pre-wrap) stay consistent. A change here propagates to every author
|
|
196
|
+
* marker in the output.
|
|
197
|
+
*/
|
|
198
|
+
export function authorAttribution(author: Author, rejects?: Author): WrapMetadata {
|
|
199
|
+
const dataAttrs: Record<string, string> = { author }
|
|
200
|
+
if (rejects !== undefined) dataAttrs.rejects = rejects
|
|
201
|
+
const extraClasses = rejects !== undefined ? `${author} rejects-${rejects}` : author
|
|
202
|
+
return { extraClasses, dataAttrs }
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Resolve a segment's attribution into the wrapper-tag, base CSS class,
|
|
207
|
+
* and `WrapMetadata` consumed by `Utils.wrapText` / `insertTag`. The
|
|
208
|
+
* caller is `HtmlDiff.executeThreeWay`'s emission loop.
|
|
209
|
+
*/
|
|
210
|
+
export function segmentEmissionShape(attr: Exclude<Attribution, { kind: 'equal' }>): {
|
|
211
|
+
tag: 'ins' | 'del'
|
|
212
|
+
baseClass: 'diffins' | 'diffdel'
|
|
213
|
+
metadata: WrapMetadata
|
|
214
|
+
} {
|
|
215
|
+
switch (attr.kind) {
|
|
216
|
+
case 'ins':
|
|
217
|
+
return { tag: 'ins', baseClass: 'diffins', metadata: authorAttribution(attr.author) }
|
|
218
|
+
case 'del':
|
|
219
|
+
return { tag: 'del', baseClass: 'diffdel', metadata: authorAttribution(attr.author) }
|
|
220
|
+
case 'reject':
|
|
221
|
+
return { tag: 'del', baseClass: 'diffdel', metadata: authorAttribution(attr.by, attr.rejected) }
|
|
222
|
+
}
|
|
223
|
+
}
|