@omnitype-code/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/blame.js +242 -0
- package/dist/core/ApiClient.js +234 -0
- package/dist/core/FileProvenance.js +483 -0
- package/dist/core/GitNotes.js +120 -0
- package/dist/core/Heartbeat.js +81 -0
- package/dist/core/ModelDetector.js +243 -0
- package/dist/core/ProvenanceResolver.js +424 -0
- package/dist/core/UI.js +97 -0
- package/dist/daemon.js +194 -0
- package/dist/hooks.js +220 -0
- package/dist/index.js +536 -0
- package/package.json +30 -0
- package/src/blame.ts +240 -0
- package/src/core/ApiClient.ts +197 -0
- package/src/core/FileProvenance.ts +538 -0
- package/src/core/GitNotes.ts +141 -0
- package/src/core/Heartbeat.ts +53 -0
- package/src/core/ModelDetector.ts +216 -0
- package/src/core/ProvenanceResolver.ts +433 -0
- package/src/core/UI.ts +105 -0
- package/src/daemon.ts +171 -0
- package/src/hooks.ts +195 -0
- package/src/index.ts +537 -0
- package/tsconfig.json +15 -0
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FileProvenance — character-level origin tracking for a single file.
|
|
3
|
+
*
|
|
4
|
+
* Every character belongs to one of four origins:
|
|
5
|
+
* 'ai' — inserted by an AI tool
|
|
6
|
+
* 'user' — typed manually
|
|
7
|
+
* 'paste' — pasted from clipboard
|
|
8
|
+
* 'existing' — present before tracking began (unknown origin)
|
|
9
|
+
*
|
|
10
|
+
* Internally the file is an ordered array of contiguous Spans covering
|
|
11
|
+
* [start, end) character offsets. Adjacent spans with the same origin
|
|
12
|
+
* are coalesced automatically.
|
|
13
|
+
*
|
|
14
|
+
* For external disk writes we accept a LineHashBaseline (hashes + char lengths)
|
|
15
|
+
* instead of raw text — no source code is ever stored or reconstructed.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
export type Origin = 'ai' | 'user' | 'paste' | 'existing';
|
|
19
|
+
|
|
20
|
+
interface Span {
|
|
21
|
+
start: number; // inclusive
|
|
22
|
+
end: number; // exclusive
|
|
23
|
+
origin: Origin;
|
|
24
|
+
modelId?: number; // index into the file-local model dictionary; undefined = unknown
|
|
25
|
+
timestamp: number; // unix-ms when this origin was last set; 0 = unknown/existing
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface Composition {
|
|
29
|
+
ai: number;
|
|
30
|
+
user: number;
|
|
31
|
+
paste: number;
|
|
32
|
+
existing: number;
|
|
33
|
+
total: number;
|
|
34
|
+
aiEditedByManual: number; // Chars added by 'user' that replaced 'ai' content
|
|
35
|
+
manualEditedByAi: number; // Chars added by 'ai' that replaced 'user' content
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** One entry per line in the baseline snapshot. No source text — only hashes. */
|
|
39
|
+
export interface LineHashBaseline {
|
|
40
|
+
/** FNV-1a 32-bit hash of the line content (without trailing \n). */
|
|
41
|
+
hash: number;
|
|
42
|
+
/** Character length of the line including the trailing \n (except last line). */
|
|
43
|
+
charLen: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Compact origin encoding: 0=existing, 1=user, 2=ai, 3=paste */
|
|
47
|
+
const ORIGIN_ENCODE: Record<Origin, number> = { existing: 0, user: 1, ai: 2, paste: 3 };
|
|
48
|
+
const ORIGIN_DECODE: Origin[] = ['existing', 'user', 'ai', 'paste'];
|
|
49
|
+
|
|
50
|
+
/** Serialized provenance for a single file — stored across sessions. No source text. */
|
|
51
|
+
export interface StoredProvenance {
|
|
52
|
+
hashes: number[];
|
|
53
|
+
origins: number[];
|
|
54
|
+
charLens: number[];
|
|
55
|
+
timestamps?: number[]; // unix-ms when each line's origin was last set; 0 = unknown; optional for backwards compat
|
|
56
|
+
aiEditedByManual: number;
|
|
57
|
+
manualEditedByAi: number;
|
|
58
|
+
/** String-interned model dictionary. e.g. ['claude-3-5-sonnet', 'gpt-4o'] */
|
|
59
|
+
modelDictionary?: string[];
|
|
60
|
+
/** Per-line index into modelDictionary. -1 or undefined = unknown model. */
|
|
61
|
+
modelIds?: number[];
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export interface ProvenanceDelta {
|
|
65
|
+
added: number;
|
|
66
|
+
deleted: Record<Origin, number>;
|
|
67
|
+
aiEditedByManual: number;
|
|
68
|
+
manualEditedByAi: number;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export class FileProvenance {
|
|
72
|
+
private spans: Span[];
|
|
73
|
+
|
|
74
|
+
private aiEditedByManual = 0;
|
|
75
|
+
private manualEditedByAi = 0;
|
|
76
|
+
|
|
77
|
+
/** File-local model dictionary for string interning. */
|
|
78
|
+
private _modelDict: string[] = [];
|
|
79
|
+
|
|
80
|
+
constructor(existingLength: number) {
|
|
81
|
+
this.spans = existingLength > 0
|
|
82
|
+
? [{ start: 0, end: existingLength, origin: 'existing', timestamp: 0 }]
|
|
83
|
+
: [];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Intern a model name and return its numeric ID.
|
|
88
|
+
* Returns undefined for falsy/unknown model names.
|
|
89
|
+
*/
|
|
90
|
+
internModel(model: string | undefined): number | undefined {
|
|
91
|
+
if (!model || model === 'unknown') return undefined;
|
|
92
|
+
let idx = this._modelDict.indexOf(model);
|
|
93
|
+
if (idx === -1) {
|
|
94
|
+
idx = this._modelDict.length;
|
|
95
|
+
this._modelDict.push(model);
|
|
96
|
+
}
|
|
97
|
+
return idx;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Resolve a modelId back to its string name. */
|
|
101
|
+
resolveModel(modelId: number | undefined): string | undefined {
|
|
102
|
+
if (modelId === undefined || modelId < 0 || modelId >= this._modelDict.length) return undefined;
|
|
103
|
+
return this._modelDict[modelId];
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/** Get the full model dictionary (for serialization). */
|
|
107
|
+
getModelDictionary(): string[] {
|
|
108
|
+
return this._modelDict;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Adjust the length of the span(s) at `offset` without changing origin attribution.
|
|
113
|
+
* Used for "silent" updates like indentation/formatting where the line identity
|
|
114
|
+
* (hash) remains the same but the number of whitespace characters changed.
|
|
115
|
+
*/
|
|
116
|
+
adjustLength(offset: number, oldLen: number, newLen: number): void {
|
|
117
|
+
if (oldLen === newLen) return;
|
|
118
|
+
const delta = newLen - oldLen;
|
|
119
|
+
|
|
120
|
+
const idx = this._spanAt(offset);
|
|
121
|
+
if (idx === -1) return;
|
|
122
|
+
|
|
123
|
+
// We extend/shrink the span that contains the offset.
|
|
124
|
+
// Usually this is the start of a line.
|
|
125
|
+
this.spans[idx].end += delta;
|
|
126
|
+
this._shiftFrom(idx + 1, delta);
|
|
127
|
+
this._coalesce();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ── Core operations ──────────────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Insert `length` characters at `offset` with the given origin.
|
|
134
|
+
* timestamp defaults to Date.now() for tracked origins, 0 for 'existing'.
|
|
135
|
+
*/
|
|
136
|
+
insert(offset: number, length: number, origin: Origin, timestamp?: number, modelId?: number): void {
|
|
137
|
+
if (length <= 0) return;
|
|
138
|
+
const ts = timestamp ?? (origin === 'existing' ? 0 : Date.now());
|
|
139
|
+
|
|
140
|
+
const idx = this._spanAt(offset);
|
|
141
|
+
|
|
142
|
+
if (idx === -1) {
|
|
143
|
+
const last = this.spans[this.spans.length - 1];
|
|
144
|
+
const start = last ? last.end : 0;
|
|
145
|
+
this.spans.push({ start, end: start + length, origin, modelId, timestamp: ts });
|
|
146
|
+
this._coalesce();
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const span = this.spans[idx];
|
|
151
|
+
|
|
152
|
+
if (span.start === offset) {
|
|
153
|
+
this.spans.splice(idx, 0, { start: offset, end: offset + length, origin, modelId, timestamp: ts });
|
|
154
|
+
this._shiftFrom(idx + 1, length);
|
|
155
|
+
} else if (span.end === offset) {
|
|
156
|
+
this.spans.splice(idx + 1, 0, { start: offset, end: offset + length, origin, modelId, timestamp: ts });
|
|
157
|
+
this._shiftFrom(idx + 2, length);
|
|
158
|
+
} else {
|
|
159
|
+
// Split span — right half inherits the original span's timestamp and modelId
|
|
160
|
+
const right: Span = { start: offset + length, end: span.end + length, origin: span.origin, modelId: span.modelId, timestamp: span.timestamp };
|
|
161
|
+
span.end = offset;
|
|
162
|
+
this.spans.splice(idx + 1, 0, { start: offset, end: offset + length, origin, modelId, timestamp: ts }, right);
|
|
163
|
+
this._shiftFrom(idx + 3, length);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
this._coalesce();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Delete `length` characters starting at `offset`.
|
|
171
|
+
* Returns the composition of the deleted segment.
|
|
172
|
+
*/
|
|
173
|
+
delete(offset: number, length: number): Record<Origin, number> {
|
|
174
|
+
const deleted: Record<Origin, number> = { ai: 0, user: 0, paste: 0, existing: 0 };
|
|
175
|
+
if (length <= 0) return deleted;
|
|
176
|
+
const end = offset + length;
|
|
177
|
+
|
|
178
|
+
let i = 0;
|
|
179
|
+
while (i < this.spans.length) {
|
|
180
|
+
const s = this.spans[i];
|
|
181
|
+
if (s.end <= offset) { i++; continue; }
|
|
182
|
+
if (s.start >= end) break;
|
|
183
|
+
|
|
184
|
+
const overlapLen = Math.min(s.end, end) - Math.max(s.start, offset);
|
|
185
|
+
deleted[s.origin] += overlapLen;
|
|
186
|
+
|
|
187
|
+
s.end -= overlapLen;
|
|
188
|
+
if (s.start >= s.end) {
|
|
189
|
+
this.spans.splice(i, 1);
|
|
190
|
+
} else {
|
|
191
|
+
i++;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
for (let j = i; j < this.spans.length; j++) {
|
|
196
|
+
this.spans[j].start -= length;
|
|
197
|
+
this.spans[j].end -= length;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
this._coalesce();
|
|
201
|
+
return deleted;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Replace [offset, offset+deleteLen) with insertLen chars of origin.
|
|
206
|
+
* Returns a delta indicating what was added and what was deleted.
|
|
207
|
+
*/
|
|
208
|
+
replace(offset: number, deleteLen: number, insertLen: number, origin: Origin, modelId?: number): ProvenanceDelta {
|
|
209
|
+
const res: ProvenanceDelta = {
|
|
210
|
+
added: insertLen,
|
|
211
|
+
deleted: { ai: 0, user: 0, paste: 0, existing: 0 },
|
|
212
|
+
aiEditedByManual: 0,
|
|
213
|
+
manualEditedByAi: 0
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
if (deleteLen > 0) {
|
|
217
|
+
const del = this.delete(offset, deleteLen);
|
|
218
|
+
Object.assign(res.deleted, del);
|
|
219
|
+
|
|
220
|
+
// Attribution: if a user edit replaces AI work, track that "edit"
|
|
221
|
+
if (origin === 'user' && res.deleted.ai > 0) {
|
|
222
|
+
const amt = Math.min(res.deleted.ai, insertLen);
|
|
223
|
+
this.aiEditedByManual += amt;
|
|
224
|
+
res.aiEditedByManual += amt;
|
|
225
|
+
}
|
|
226
|
+
if (origin === 'ai' && res.deleted.user > 0) {
|
|
227
|
+
const amt = Math.min(res.deleted.user, insertLen);
|
|
228
|
+
this.manualEditedByAi += amt;
|
|
229
|
+
res.manualEditedByAi += amt;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
if (insertLen > 0) this.insert(offset, insertLen, origin, undefined, modelId);
|
|
233
|
+
return res;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Apply a diff between an old baseline and a new baseline, attributing
|
|
238
|
+
* all added/changed lines to `origin`.
|
|
239
|
+
*/
|
|
240
|
+
applyLineDiff(oldBaseline: LineHashBaseline[], newBaseline: LineHashBaseline[], origin: Origin, modelId?: number): ProvenanceDelta {
|
|
241
|
+
const res: ProvenanceDelta = {
|
|
242
|
+
added: 0,
|
|
243
|
+
deleted: { ai: 0, user: 0, paste: 0, existing: 0 },
|
|
244
|
+
aiEditedByManual: 0,
|
|
245
|
+
manualEditedByAi: 0
|
|
246
|
+
};
|
|
247
|
+
const m = oldBaseline.length;
|
|
248
|
+
const n = newBaseline.length;
|
|
249
|
+
const t = this._lcsTable(oldBaseline, newBaseline);
|
|
250
|
+
|
|
251
|
+
// 1. Backtrack to find the optimal edit path (from end to start)
|
|
252
|
+
type Op = { type: 'match' | 'add' | 'delete', oi: number, ni: number };
|
|
253
|
+
const ops: Op[] = [];
|
|
254
|
+
let i = m, j = n;
|
|
255
|
+
while (i > 0 || j > 0) {
|
|
256
|
+
if (i > 0 && j > 0 && oldBaseline[i - 1].hash === newBaseline[j - 1].hash && t[i][j] === t[i - 1][j - 1] + 1) {
|
|
257
|
+
ops.push({ type: 'match', oi: i - 1, ni: j - 1 });
|
|
258
|
+
i--; j--;
|
|
259
|
+
} else if (i > 0 && (j === 0 || t[i - 1][j] >= t[i][j - 1])) {
|
|
260
|
+
ops.push({ type: 'delete', oi: i - 1, ni: -1 });
|
|
261
|
+
i--;
|
|
262
|
+
} else {
|
|
263
|
+
ops.push({ type: 'add', oi: -1, ni: j - 1 });
|
|
264
|
+
j--;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
ops.reverse();
|
|
268
|
+
|
|
269
|
+
// 2. Apply operations forward to maintain correct character offsets
|
|
270
|
+
let offset = 0;
|
|
271
|
+
let lastDeleted: ProvenanceDelta['deleted'] | null = null;
|
|
272
|
+
|
|
273
|
+
for (const op of ops) {
|
|
274
|
+
if (op.type === 'match') {
|
|
275
|
+
const olen = oldBaseline[op.oi].charLen;
|
|
276
|
+
const nlen = newBaseline[op.ni].charLen;
|
|
277
|
+
if (olen !== nlen) this.adjustLength(offset, olen, nlen);
|
|
278
|
+
offset += nlen;
|
|
279
|
+
lastDeleted = null;
|
|
280
|
+
} else if (op.type === 'add') {
|
|
281
|
+
const len = newBaseline[op.ni].charLen;
|
|
282
|
+
this.insert(offset, len, origin, undefined, modelId);
|
|
283
|
+
res.added += len;
|
|
284
|
+
|
|
285
|
+
// Tracking transitions for consecutive replacements
|
|
286
|
+
if (lastDeleted) {
|
|
287
|
+
if (origin === 'user' && lastDeleted.ai > 0) {
|
|
288
|
+
const amt = Math.min(lastDeleted.ai, len);
|
|
289
|
+
this.aiEditedByManual += amt;
|
|
290
|
+
res.aiEditedByManual += amt;
|
|
291
|
+
} else if (origin === 'ai' && lastDeleted.user > 0) {
|
|
292
|
+
const amt = Math.min(lastDeleted.user, len);
|
|
293
|
+
this.manualEditedByAi += amt;
|
|
294
|
+
res.manualEditedByAi += amt;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
offset += len;
|
|
298
|
+
} else {
|
|
299
|
+
const len = oldBaseline[op.oi].charLen;
|
|
300
|
+
const del = this.delete(offset, len);
|
|
301
|
+
for (const k in del) res.deleted[k as Origin] += (del as any)[k];
|
|
302
|
+
lastDeleted = del;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
return res;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
getComposition(): Composition {
|
|
310
|
+
const c: Composition = {
|
|
311
|
+
ai: 0, user: 0, paste: 0, existing: 0, total: 0,
|
|
312
|
+
aiEditedByManual: this.aiEditedByManual,
|
|
313
|
+
manualEditedByAi: this.manualEditedByAi
|
|
314
|
+
};
|
|
315
|
+
for (const s of this.spans) {
|
|
316
|
+
const len = s.end - s.start;
|
|
317
|
+
c[s.origin] += len;
|
|
318
|
+
c.total += len;
|
|
319
|
+
}
|
|
320
|
+
return c;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// ── Persistence ──────────────────────────────────────────────────────────────
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Returns the dominant origin for each line.
|
|
327
|
+
* A line's origin is whichever origin covers the most characters in that line.
|
|
328
|
+
*/
|
|
329
|
+
getLineOrigins(baseline: LineHashBaseline[]): Origin[] {
|
|
330
|
+
return this._getLineData(baseline).map(d => d.origin);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/** Get per-line model names (resolved from dictionary). */
|
|
334
|
+
getLineModels(baseline: LineHashBaseline[]): Array<string | undefined> {
|
|
335
|
+
return this._getLineData(baseline).map(d => this.resolveModel(d.modelId));
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/** Returns per-model char counts for AI-attributed content in this file. */
|
|
339
|
+
getModelBreakdown(baseline: LineHashBaseline[]): Record<string, number> {
|
|
340
|
+
const result: Record<string, number> = {};
|
|
341
|
+
for (const span of this.spans) {
|
|
342
|
+
if (span.origin !== 'ai') continue;
|
|
343
|
+
const model = this.resolveModel(span.modelId);
|
|
344
|
+
if (!model || model === 'unknown') continue;
|
|
345
|
+
// Count chars covered by this span that overlap with the baseline
|
|
346
|
+
const totalChars = span.end - span.start;
|
|
347
|
+
result[model] = (result[model] ?? 0) + totalChars;
|
|
348
|
+
}
|
|
349
|
+
return result;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
private _getLineData(baseline: LineHashBaseline[]): Array<{ origin: Origin; timestamp: number; modelId?: number }> {
|
|
353
|
+
const result: Array<{ origin: Origin; timestamp: number; modelId?: number }> = [];
|
|
354
|
+
let offset = 0;
|
|
355
|
+
for (const line of baseline) {
|
|
356
|
+
const lineEnd = offset + line.charLen;
|
|
357
|
+
const counts: Record<Origin, number> = { ai: 0, user: 0, paste: 0, existing: 0 };
|
|
358
|
+
const timestamps: Record<Origin, number> = { ai: 0, user: 0, paste: 0, existing: 0 };
|
|
359
|
+
const modelCounts = new Map<number | undefined, number>();
|
|
360
|
+
for (const span of this.spans) {
|
|
361
|
+
if (span.end <= offset || span.start >= lineEnd) continue;
|
|
362
|
+
const lo = Math.max(span.start, offset);
|
|
363
|
+
const hi = Math.min(span.end, lineEnd);
|
|
364
|
+
const covered = hi - lo;
|
|
365
|
+
counts[span.origin] += covered;
|
|
366
|
+
if (span.timestamp > timestamps[span.origin]) timestamps[span.origin] = span.timestamp;
|
|
367
|
+
modelCounts.set(span.modelId, (modelCounts.get(span.modelId) ?? 0) + covered);
|
|
368
|
+
}
|
|
369
|
+
let dominant: Origin = 'existing';
|
|
370
|
+
let max = 0;
|
|
371
|
+
for (const k of ['ai', 'user', 'paste', 'existing'] as Origin[]) {
|
|
372
|
+
if (counts[k] > max) { max = counts[k]; dominant = k; }
|
|
373
|
+
}
|
|
374
|
+
// Pick the model with the most coverage on this line
|
|
375
|
+
let dominantModel: number | undefined;
|
|
376
|
+
let maxModelCov = 0;
|
|
377
|
+
for (const [mid, cov] of modelCounts) {
|
|
378
|
+
if (mid !== undefined && cov > maxModelCov) { maxModelCov = cov; dominantModel = mid; }
|
|
379
|
+
}
|
|
380
|
+
result.push({ origin: dominant, timestamp: timestamps[dominant], modelId: dominantModel });
|
|
381
|
+
offset = lineEnd;
|
|
382
|
+
}
|
|
383
|
+
return result;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
/** Serialize to a compact, storable format. */
|
|
387
|
+
toStored(baseline: LineHashBaseline[]): StoredProvenance {
|
|
388
|
+
const lineData = this._getLineData(baseline);
|
|
389
|
+
const hasModels = this._modelDict.length > 0;
|
|
390
|
+
const stored: StoredProvenance = {
|
|
391
|
+
hashes: baseline.map(l => l.hash),
|
|
392
|
+
origins: lineData.map(d => ORIGIN_ENCODE[d.origin]),
|
|
393
|
+
charLens: baseline.map(l => l.charLen),
|
|
394
|
+
timestamps: lineData.map(d => d.timestamp),
|
|
395
|
+
aiEditedByManual: this.aiEditedByManual,
|
|
396
|
+
manualEditedByAi: this.manualEditedByAi,
|
|
397
|
+
};
|
|
398
|
+
if (hasModels) {
|
|
399
|
+
stored.modelDictionary = [...this._modelDict];
|
|
400
|
+
stored.modelIds = lineData.map(d => d.modelId ?? -1);
|
|
401
|
+
}
|
|
402
|
+
return stored;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Reconstruct FileProvenance from a previous session's stored data,
|
|
407
|
+
* reconciling against the current file baseline via LCS on hashes.
|
|
408
|
+
*
|
|
409
|
+
* Lines that match (same hash) keep their stored origin.
|
|
410
|
+
* New or changed lines (no hash match) are marked 'existing'.
|
|
411
|
+
*/
|
|
412
|
+
static fromStored(stored: StoredProvenance, currentBaseline: LineHashBaseline[]): FileProvenance {
|
|
413
|
+
const m = stored.hashes.length;
|
|
414
|
+
const n = currentBaseline.length;
|
|
415
|
+
|
|
416
|
+
// LCS table on line hashes
|
|
417
|
+
const t: number[][] = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
|
|
418
|
+
for (let i = 1; i <= m; i++) {
|
|
419
|
+
for (let j = 1; j <= n; j++) {
|
|
420
|
+
t[i][j] = stored.hashes[i - 1] === currentBaseline[j - 1].hash
|
|
421
|
+
? t[i - 1][j - 1] + 1
|
|
422
|
+
: Math.max(t[i - 1][j], t[i][j - 1]);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// Backtrack to assign origins and timestamps to current lines
|
|
427
|
+
const lineOrigins: Origin[] = new Array(n).fill('existing' as Origin);
|
|
428
|
+
const lineTimestamps: number[] = new Array(n).fill(0);
|
|
429
|
+
let i = m, j = n;
|
|
430
|
+
while (i > 0 && j > 0) {
|
|
431
|
+
if (stored.hashes[i - 1] === currentBaseline[j - 1].hash && t[i][j] === t[i - 1][j - 1] + 1) {
|
|
432
|
+
lineOrigins[j - 1] = ORIGIN_DECODE[stored.origins[i - 1]] ?? 'existing';
|
|
433
|
+
lineTimestamps[j - 1] = stored.timestamps?.[i - 1] ?? 0;
|
|
434
|
+
i--; j--;
|
|
435
|
+
} else if (t[i - 1][j] >= t[i][j - 1]) {
|
|
436
|
+
i--;
|
|
437
|
+
} else {
|
|
438
|
+
j--;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Restore model dictionary
|
|
443
|
+
const fp = new FileProvenance(0);
|
|
444
|
+
if (stored.modelDictionary) {
|
|
445
|
+
fp._modelDict = [...stored.modelDictionary];
|
|
446
|
+
}
|
|
447
|
+
fp.aiEditedByManual = stored.aiEditedByManual || 0;
|
|
448
|
+
fp.manualEditedByAi = stored.manualEditedByAi || 0;
|
|
449
|
+
|
|
450
|
+
// Build per-line modelId array from stored data (reconciled via LCS)
|
|
451
|
+
const lineModelIds: Array<number | undefined> = new Array(n).fill(undefined);
|
|
452
|
+
// Re-walk the LCS backtrack to map stored modelIds to current lines
|
|
453
|
+
let ii = m, jj = n;
|
|
454
|
+
while (ii > 0 && jj > 0) {
|
|
455
|
+
if (stored.hashes[ii - 1] === currentBaseline[jj - 1].hash && t[ii][jj] === t[ii - 1][jj - 1] + 1) {
|
|
456
|
+
const mid = stored.modelIds?.[ii - 1];
|
|
457
|
+
lineModelIds[jj - 1] = (mid !== undefined && mid >= 0) ? mid : undefined;
|
|
458
|
+
ii--; jj--;
|
|
459
|
+
} else if (t[ii - 1][jj] >= t[ii][jj - 1]) {
|
|
460
|
+
ii--;
|
|
461
|
+
} else {
|
|
462
|
+
jj--;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
let offset = 0;
|
|
467
|
+
for (let k = 0; k < n; k++) {
|
|
468
|
+
const len = currentBaseline[k].charLen;
|
|
469
|
+
if (len > 0) {
|
|
470
|
+
fp.insert(offset, len, lineOrigins[k], lineTimestamps[k], lineModelIds[k]);
|
|
471
|
+
offset += len;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
return fp;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// ── Private helpers ──────────────────────────────────────────────────────────
|
|
478
|
+
|
|
479
|
+
/** Index of the span that contains `offset`, or -1 if past end. Binary search O(log n). */
|
|
480
|
+
private _spanAt(offset: number): number {
|
|
481
|
+
let lo = 0, hi = this.spans.length - 1;
|
|
482
|
+
while (lo <= hi) {
|
|
483
|
+
const mid = (lo + hi) >>> 1;
|
|
484
|
+
const s = this.spans[mid];
|
|
485
|
+
if (offset < s.start) { hi = mid - 1; }
|
|
486
|
+
else if (offset > s.end){ lo = mid + 1; }
|
|
487
|
+
else { return mid; }
|
|
488
|
+
}
|
|
489
|
+
return -1;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
private _shiftFrom(idx: number, delta: number): void {
|
|
493
|
+
for (let i = idx; i < this.spans.length; i++) {
|
|
494
|
+
this.spans[i].start += delta;
|
|
495
|
+
this.spans[i].end += delta;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
private _coalesce(): void {
|
|
500
|
+
this.spans = this.spans.filter(s => s.end > s.start);
|
|
501
|
+
let i = 0;
|
|
502
|
+
while (i < this.spans.length - 1) {
|
|
503
|
+
const a = this.spans[i], b = this.spans[i + 1];
|
|
504
|
+
if (a.origin === b.origin && a.modelId === b.modelId && a.end === b.start) {
|
|
505
|
+
a.end = b.end;
|
|
506
|
+
a.timestamp = Math.max(a.timestamp, b.timestamp);
|
|
507
|
+
this.spans.splice(i + 1, 1);
|
|
508
|
+
} else {
|
|
509
|
+
i++;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* O(m×n) LCS table over hash values.
|
|
516
|
+
* Guard: if either side exceeds MAX_LCS_SIDE lines the table would exceed ~36 MB.
|
|
517
|
+
* In that case return an all-zeros table — the diff algorithm will treat every
|
|
518
|
+
* line as deleted+added, which is conservative (no lines get wrongly matched)
|
|
519
|
+
* but loses attribution for unchanged lines on those huge files.
|
|
520
|
+
*/
|
|
521
|
+
private static readonly MAX_LCS_SIDE = 3000;
|
|
522
|
+
|
|
523
|
+
private _lcsTable(a: LineHashBaseline[], b: LineHashBaseline[]): number[][] {
|
|
524
|
+
const m = a.length, n = b.length;
|
|
525
|
+
if (m > FileProvenance.MAX_LCS_SIDE || n > FileProvenance.MAX_LCS_SIDE) {
|
|
526
|
+
return Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
|
|
527
|
+
}
|
|
528
|
+
const t: number[][] = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
|
|
529
|
+
for (let i = 1; i <= m; i++) {
|
|
530
|
+
for (let j = 1; j <= n; j++) {
|
|
531
|
+
t[i][j] = a[i - 1].hash === b[j - 1].hash
|
|
532
|
+
? t[i - 1][j - 1] + 1
|
|
533
|
+
: Math.max(t[i - 1][j], t[i][j - 1]);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
return t;
|
|
537
|
+
}
|
|
538
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitNotes — read/write omnitype attribution data as Git Notes.
|
|
3
|
+
*
|
|
4
|
+
* Notes live at refs/notes/ai. Each note on a commit is a JSON blob:
|
|
5
|
+
*
|
|
6
|
+
* {
|
|
7
|
+
* "v": 1,
|
|
8
|
+
* "tool": "omnitype",
|
|
9
|
+
* "ts": <unix-ms>,
|
|
10
|
+
* "files": {
|
|
11
|
+
* "src/foo.ts": [
|
|
12
|
+
* { "start": 1, "end": 45, "origin": "ai", "model": "claude-sonnet-4-5" },
|
|
13
|
+
* { "start": 46, "end": 60, "origin": "user" }
|
|
14
|
+
* ]
|
|
15
|
+
* }
|
|
16
|
+
* }
|
|
17
|
+
*
|
|
18
|
+
* Line numbers are 1-indexed, inclusive — matching git blame output.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { execFileSync, execFile } from 'child_process';
|
|
22
|
+
import { promisify } from 'util';
|
|
23
|
+
|
|
24
|
+
const _execFile = promisify(execFile);
|
|
25
|
+
const NOTES_REF = 'refs/notes/ai';
|
|
26
|
+
|
|
27
|
+
export interface NoteRange {
|
|
28
|
+
start: number; // 1-indexed
|
|
29
|
+
end: number; // inclusive
|
|
30
|
+
origin: 'ai' | 'user' | 'paste' | 'existing';
|
|
31
|
+
model?: string;
|
|
32
|
+
tool?: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface NoteFile {
|
|
36
|
+
[relPath: string]: NoteRange[];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface CommitNote {
|
|
40
|
+
v: 1;
|
|
41
|
+
tool: 'omnitype';
|
|
42
|
+
ts: number;
|
|
43
|
+
files: NoteFile;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function buildNote(files: NoteFile): CommitNote {
|
|
47
|
+
return { v: 1, tool: 'omnitype', ts: Date.now(), files };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Write (or overwrite) a Git Note for the given commit in the repo at repoPath. */
|
|
51
|
+
export async function writeNote(repoPath: string, commitish: string, note: CommitNote): Promise<void> {
|
|
52
|
+
const json = JSON.stringify(note);
|
|
53
|
+
await _execFile('git', ['-C', repoPath, 'notes', '--ref', NOTES_REF, 'add', '-f', '-m', json, commitish]);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Read the omnitype Git Note for a commit. Returns null if none exists. */
|
|
57
|
+
export function readNote(repoPath: string, commitish: string): CommitNote | null {
|
|
58
|
+
try {
|
|
59
|
+
const out = execFileSync(
|
|
60
|
+
'git', ['-C', repoPath, 'notes', '--ref', NOTES_REF, 'show', commitish],
|
|
61
|
+
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
62
|
+
).trim();
|
|
63
|
+
const parsed = JSON.parse(out);
|
|
64
|
+
if (parsed?.v === 1 && parsed?.tool === 'omnitype') return parsed as CommitNote;
|
|
65
|
+
return null;
|
|
66
|
+
} catch { return null; }
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Merge two notes — second note's ranges overwrite first for conflicting files. */
|
|
70
|
+
export function mergeNotes(base: CommitNote, overlay: CommitNote): CommitNote {
|
|
71
|
+
return {
|
|
72
|
+
...base,
|
|
73
|
+
ts: Math.max(base.ts, overlay.ts),
|
|
74
|
+
files: { ...base.files, ...overlay.files },
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Build a NoteFile from a StoredProvenance map (as returned by FileProvenance.toStored).
|
|
80
|
+
* Converts span-level data into 1-indexed line ranges for the note.
|
|
81
|
+
*/
|
|
82
|
+
export function provenanceToNoteFile(
|
|
83
|
+
files: Record<string, { origins: number[]; modelIds?: number[]; modelDictionary?: string[]; toolIds?: number[]; toolDictionary?: string[] }>,
|
|
84
|
+
): NoteFile {
|
|
85
|
+
const DECODE: Record<number, NoteRange['origin']> = { 0: 'existing', 1: 'user', 2: 'ai', 3: 'paste' };
|
|
86
|
+
const result: NoteFile = {};
|
|
87
|
+
|
|
88
|
+
for (const [relPath, sp] of Object.entries(files)) {
|
|
89
|
+
const ranges: NoteRange[] = [];
|
|
90
|
+
let lineNum = 1;
|
|
91
|
+
let runStart = lineNum;
|
|
92
|
+
let runOrigin: NoteRange['origin'] = DECODE[sp.origins[0]] ?? 'existing';
|
|
93
|
+
let runModel: string | undefined;
|
|
94
|
+
let runTool: string | undefined;
|
|
95
|
+
|
|
96
|
+
const flush = (end: number) => {
|
|
97
|
+
if (end < runStart) return;
|
|
98
|
+
const r: NoteRange = { start: runStart, end, origin: runOrigin };
|
|
99
|
+
if (runModel) r.model = runModel;
|
|
100
|
+
if (runTool) r.tool = runTool;
|
|
101
|
+
ranges.push(r);
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
for (let i = 0; i < sp.origins.length; i++) {
|
|
105
|
+
const origin = DECODE[sp.origins[i]] ?? 'existing';
|
|
106
|
+
const model = (sp.modelIds && sp.modelDictionary && sp.modelIds[i] >= 0)
|
|
107
|
+
? sp.modelDictionary[sp.modelIds[i]] : undefined;
|
|
108
|
+
const tool = (sp.toolIds && sp.toolDictionary && sp.toolIds[i] >= 0)
|
|
109
|
+
? sp.toolDictionary[sp.toolIds[i]] : undefined;
|
|
110
|
+
|
|
111
|
+
if (origin !== runOrigin || model !== runModel || tool !== runTool) {
|
|
112
|
+
flush(lineNum - 1);
|
|
113
|
+
runStart = lineNum;
|
|
114
|
+
runOrigin = origin;
|
|
115
|
+
runModel = model;
|
|
116
|
+
runTool = tool;
|
|
117
|
+
}
|
|
118
|
+
lineNum++;
|
|
119
|
+
}
|
|
120
|
+
flush(lineNum - 1);
|
|
121
|
+
|
|
122
|
+
if (ranges.length > 0) result[relPath] = ranges;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return result;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** Fetch git notes from remote so teammates' attributions are visible locally. */
|
|
129
|
+
export async function fetchNotes(repoPath: string, remote = 'origin'): Promise<void> {
|
|
130
|
+
try {
|
|
131
|
+
await _execFile('git', [
|
|
132
|
+
'-C', repoPath, 'fetch', remote,
|
|
133
|
+
`${NOTES_REF}:${NOTES_REF}`,
|
|
134
|
+
]);
|
|
135
|
+
} catch { /* remote may not have notes yet */ }
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/** Push git notes to remote. */
|
|
139
|
+
export async function pushNotes(repoPath: string, remote = 'origin'): Promise<void> {
|
|
140
|
+
await _execFile('git', ['-C', repoPath, 'push', remote, NOTES_REF]);
|
|
141
|
+
}
|