@tracemarketplace/shared 0.0.6 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunker.d.ts.map +1 -1
- package/dist/chunker.js +14 -2
- package/dist/chunker.js.map +1 -1
- package/dist/extractor-claude-code.test.d.ts +2 -0
- package/dist/extractor-claude-code.test.d.ts.map +1 -0
- package/dist/extractor-claude-code.test.js +290 -0
- package/dist/extractor-claude-code.test.js.map +1 -0
- package/dist/extractor-codex.test.d.ts +2 -0
- package/dist/extractor-codex.test.d.ts.map +1 -0
- package/dist/extractor-codex.test.js +212 -0
- package/dist/extractor-codex.test.js.map +1 -0
- package/dist/extractor-cursor.test.d.ts +2 -0
- package/dist/extractor-cursor.test.d.ts.map +1 -0
- package/dist/extractor-cursor.test.js +120 -0
- package/dist/extractor-cursor.test.js.map +1 -0
- package/dist/extractors/claude-code.d.ts.map +1 -1
- package/dist/extractors/claude-code.js +172 -73
- package/dist/extractors/claude-code.js.map +1 -1
- package/dist/extractors/codex.d.ts.map +1 -1
- package/dist/extractors/codex.js +63 -35
- package/dist/extractors/codex.js.map +1 -1
- package/dist/extractors/common.d.ts +14 -0
- package/dist/extractors/common.d.ts.map +1 -0
- package/dist/extractors/common.js +100 -0
- package/dist/extractors/common.js.map +1 -0
- package/dist/extractors/cursor.d.ts.map +1 -1
- package/dist/extractors/cursor.js +205 -45
- package/dist/extractors/cursor.js.map +1 -1
- package/dist/hash.d.ts.map +1 -1
- package/dist/hash.js +35 -2
- package/dist/hash.js.map +1 -1
- package/dist/hash.test.js +29 -2
- package/dist/hash.test.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/redact.d.ts +12 -0
- package/dist/redact.d.ts.map +1 -1
- package/dist/redact.js +120 -38
- package/dist/redact.js.map +1 -1
- package/dist/redact.test.d.ts +2 -0
- package/dist/redact.test.d.ts.map +1 -0
- package/dist/redact.test.js +96 -0
- package/dist/redact.test.js.map +1 -0
- package/dist/turn-actors.d.ts +3 -0
- package/dist/turn-actors.d.ts.map +1 -0
- package/dist/turn-actors.js +57 -0
- package/dist/turn-actors.js.map +1 -0
- package/dist/turn-actors.test.d.ts +2 -0
- package/dist/turn-actors.test.d.ts.map +1 -0
- package/dist/turn-actors.test.js +65 -0
- package/dist/turn-actors.test.js.map +1 -0
- package/dist/types.d.ts +5 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/utils.d.ts +1 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +4 -0
- package/dist/utils.js.map +1 -1
- package/dist/validators.d.ts +24 -0
- package/dist/validators.d.ts.map +1 -1
- package/dist/validators.js +3 -0
- package/dist/validators.js.map +1 -1
- package/package.json +5 -1
- package/src/chunker.ts +17 -2
- package/src/extractor-claude-code.test.ts +326 -0
- package/src/extractor-codex.test.ts +225 -0
- package/src/extractor-cursor.test.ts +141 -0
- package/src/extractors/claude-code.ts +180 -69
- package/src/extractors/codex.ts +69 -38
- package/src/extractors/common.ts +139 -0
- package/src/extractors/cursor.ts +294 -52
- package/src/hash.test.ts +31 -2
- package/src/hash.ts +38 -3
- package/src/index.ts +1 -0
- package/src/redact.test.ts +100 -0
- package/src/redact.ts +175 -58
- package/src/turn-actors.test.ts +71 -0
- package/src/turn-actors.ts +71 -0
- package/src/types.ts +6 -0
- package/src/utils.ts +3 -1
- package/src/validators.ts +3 -0
package/src/extractors/cursor.ts
CHANGED
|
@@ -1,6 +1,250 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
2
|
import { hashString, computeContentHash } from "../hash.js";
|
|
3
|
-
import type { NormalizedTrace, Turn, TokenUsage } from "../types.js";
|
|
3
|
+
import type { ContentBlock, NormalizedTrace, Turn, TokenUsage } from "../types.js";
|
|
4
|
+
import {
|
|
5
|
+
collectTraceMetrics,
|
|
6
|
+
createPassiveEnvState,
|
|
7
|
+
extractTextFragments,
|
|
8
|
+
normalizeTimestamp,
|
|
9
|
+
pushUniqueTextBlock,
|
|
10
|
+
} from "./common.js";
|
|
11
|
+
|
|
12
|
+
type CursorHeader = {
|
|
13
|
+
bubbleId?: string;
|
|
14
|
+
id?: string;
|
|
15
|
+
type?: number;
|
|
16
|
+
createdAt?: string | number | null;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
type CursorBlob = {
|
|
20
|
+
type?: number | string;
|
|
21
|
+
role?: string;
|
|
22
|
+
text?: string;
|
|
23
|
+
content?: string;
|
|
24
|
+
message?: string;
|
|
25
|
+
markdown?: string;
|
|
26
|
+
thinking?: unknown;
|
|
27
|
+
usage?: {
|
|
28
|
+
promptTokens?: number;
|
|
29
|
+
completionTokens?: number;
|
|
30
|
+
};
|
|
31
|
+
tokenCount?: {
|
|
32
|
+
inputTokens?: number;
|
|
33
|
+
outputTokens?: number;
|
|
34
|
+
};
|
|
35
|
+
createdAt?: string | number | null;
|
|
36
|
+
model?: string;
|
|
37
|
+
modelName?: string;
|
|
38
|
+
context?: {
|
|
39
|
+
openFiles?: unknown[];
|
|
40
|
+
};
|
|
41
|
+
openFiles?: unknown[];
|
|
42
|
+
relevantFiles?: unknown[];
|
|
43
|
+
recentlyViewedFiles?: unknown[];
|
|
44
|
+
workspaceUris?: unknown[];
|
|
45
|
+
attachedFolders?: unknown[];
|
|
46
|
+
attachedFoldersNew?: unknown[];
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
type CursorDb = {
|
|
50
|
+
prepare(sql: string): {
|
|
51
|
+
get(param: string): { value: string } | undefined;
|
|
52
|
+
};
|
|
53
|
+
close(): void;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
function toNumber(value: unknown): number | null {
|
|
57
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function hasCursorConversationPayload(blob: CursorBlob): boolean {
|
|
61
|
+
return Boolean(
|
|
62
|
+
blob.role ||
|
|
63
|
+
blob.text ||
|
|
64
|
+
blob.content ||
|
|
65
|
+
blob.message ||
|
|
66
|
+
blob.markdown ||
|
|
67
|
+
blob.thinking ||
|
|
68
|
+
blob.usage ||
|
|
69
|
+
blob.tokenCount ||
|
|
70
|
+
blob.model ||
|
|
71
|
+
blob.modelName ||
|
|
72
|
+
blob.context?.openFiles ||
|
|
73
|
+
blob.openFiles ||
|
|
74
|
+
blob.relevantFiles ||
|
|
75
|
+
blob.recentlyViewedFiles ||
|
|
76
|
+
blob.workspaceUris ||
|
|
77
|
+
blob.attachedFolders ||
|
|
78
|
+
blob.attachedFoldersNew,
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function readCursorBlob(
|
|
83
|
+
db: CursorDb,
|
|
84
|
+
sessionId: string,
|
|
85
|
+
bubbleId: string,
|
|
86
|
+
): CursorBlob | null {
|
|
87
|
+
const keys = [
|
|
88
|
+
`bubbleId:${sessionId}:${bubbleId}`,
|
|
89
|
+
`agentKv:blob:${bubbleId}`,
|
|
90
|
+
];
|
|
91
|
+
|
|
92
|
+
for (const key of keys) {
|
|
93
|
+
const row = db
|
|
94
|
+
.prepare("SELECT value FROM cursorDiskKV WHERE key = ?")
|
|
95
|
+
.get(key) as { value: string } | undefined;
|
|
96
|
+
if (!row) continue;
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const blob = JSON.parse(row.value) as CursorBlob;
|
|
100
|
+
if (hasCursorConversationPayload(blob)) return blob;
|
|
101
|
+
} catch {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function cursorRole(header: CursorHeader, blob: CursorBlob): "user" | "assistant" {
|
|
110
|
+
const role = blob.role?.toLowerCase();
|
|
111
|
+
if (role === "user" || role === "assistant") return role;
|
|
112
|
+
|
|
113
|
+
const type = typeof blob.type === "number" ? blob.type : header.type;
|
|
114
|
+
return type === 1 ? "user" : "assistant";
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function extractCursorTokenUsage(blob: CursorBlob): TokenUsage | null {
|
|
118
|
+
const usage = blob.usage;
|
|
119
|
+
const tokenCount = blob.tokenCount;
|
|
120
|
+
|
|
121
|
+
const inputTokens =
|
|
122
|
+
toNumber(usage?.promptTokens) ?? toNumber(tokenCount?.inputTokens);
|
|
123
|
+
const outputTokens =
|
|
124
|
+
toNumber(usage?.completionTokens) ?? toNumber(tokenCount?.outputTokens);
|
|
125
|
+
|
|
126
|
+
if (inputTokens === null && outputTokens === null) {
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
input_tokens: inputTokens ?? 0,
|
|
132
|
+
output_tokens: outputTokens ?? 0,
|
|
133
|
+
cache_read_input_tokens: null,
|
|
134
|
+
cache_creation_input_tokens: null,
|
|
135
|
+
reasoning_tokens: null,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function normalizeCursorPath(value: string): string {
|
|
140
|
+
if (!value.startsWith("file://")) return value;
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
return decodeURIComponent(new URL(value).pathname);
|
|
144
|
+
} catch {
|
|
145
|
+
return value;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function extractCursorPaths(value: unknown): string[] {
|
|
150
|
+
if (typeof value === "string") {
|
|
151
|
+
return value ? [normalizeCursorPath(value)] : [];
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (Array.isArray(value)) {
|
|
155
|
+
return value.flatMap((item) => extractCursorPaths(item));
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (!value || typeof value !== "object") {
|
|
159
|
+
return [];
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const record = value as Record<string, unknown>;
|
|
163
|
+
const paths: string[] = [];
|
|
164
|
+
for (const key of [
|
|
165
|
+
"path",
|
|
166
|
+
"uri",
|
|
167
|
+
"absolutePath",
|
|
168
|
+
"relativeWorkspacePath",
|
|
169
|
+
"relativePath",
|
|
170
|
+
"filePath",
|
|
171
|
+
]) {
|
|
172
|
+
if (key in record) {
|
|
173
|
+
paths.push(...extractCursorPaths(record[key]));
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return paths;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function collectCursorOpenFiles(blob: CursorBlob): string[] {
|
|
181
|
+
const paths = [
|
|
182
|
+
blob.context?.openFiles,
|
|
183
|
+
blob.openFiles,
|
|
184
|
+
blob.relevantFiles,
|
|
185
|
+
blob.recentlyViewedFiles,
|
|
186
|
+
blob.workspaceUris,
|
|
187
|
+
blob.attachedFolders,
|
|
188
|
+
blob.attachedFoldersNew,
|
|
189
|
+
].flatMap((value) => extractCursorPaths(value));
|
|
190
|
+
|
|
191
|
+
return Array.from(new Set(paths.filter(Boolean)));
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function extractCursorBlocks(blob: CursorBlob): ContentBlock[] {
|
|
195
|
+
const blocks: ContentBlock[] = [];
|
|
196
|
+
|
|
197
|
+
const thinkingText = extractTextFragments(blob.thinking).join("\n\n");
|
|
198
|
+
pushUniqueTextBlock(blocks, "thinking", thinkingText);
|
|
199
|
+
|
|
200
|
+
const messageText = [
|
|
201
|
+
blob.text,
|
|
202
|
+
blob.content,
|
|
203
|
+
blob.message,
|
|
204
|
+
blob.markdown,
|
|
205
|
+
].find((value): value is string => typeof value === "string" && value.trim().length > 0);
|
|
206
|
+
pushUniqueTextBlock(blocks, "text", messageText ?? null);
|
|
207
|
+
|
|
208
|
+
return blocks;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function mergeUsage(
|
|
212
|
+
left: TokenUsage | null,
|
|
213
|
+
right: TokenUsage | null,
|
|
214
|
+
): TokenUsage | null {
|
|
215
|
+
if (!left && !right) return null;
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
input_tokens: (left?.input_tokens ?? 0) + (right?.input_tokens ?? 0),
|
|
219
|
+
output_tokens: (left?.output_tokens ?? 0) + (right?.output_tokens ?? 0),
|
|
220
|
+
cache_read_input_tokens: null,
|
|
221
|
+
cache_creation_input_tokens: null,
|
|
222
|
+
reasoning_tokens: null,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function appendCursorTurn(turns: Turn[], nextTurn: Turn): void {
|
|
227
|
+
const previousTurn = turns[turns.length - 1];
|
|
228
|
+
if (!previousTurn || previousTurn.role !== nextTurn.role) {
|
|
229
|
+
turns.push(nextTurn);
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const previousBubbleIds = Array.isArray(previousTurn.source_metadata.bubbleIds)
|
|
234
|
+
? previousTurn.source_metadata.bubbleIds
|
|
235
|
+
: [previousTurn.source_metadata.bubbleId].filter(Boolean);
|
|
236
|
+
|
|
237
|
+
turns[turns.length - 1] = {
|
|
238
|
+
...previousTurn,
|
|
239
|
+
content: [...previousTurn.content, ...nextTurn.content],
|
|
240
|
+
usage: mergeUsage(previousTurn.usage, nextTurn.usage),
|
|
241
|
+
model: previousTurn.model ?? nextTurn.model,
|
|
242
|
+
source_metadata: {
|
|
243
|
+
...previousTurn.source_metadata,
|
|
244
|
+
bubbleIds: [...previousBubbleIds, nextTurn.source_metadata.bubbleId].filter(Boolean),
|
|
245
|
+
},
|
|
246
|
+
};
|
|
247
|
+
}
|
|
4
248
|
|
|
5
249
|
export async function extractCursor(
|
|
6
250
|
dbPath: string,
|
|
@@ -8,7 +252,7 @@ export async function extractCursor(
|
|
|
8
252
|
submittedBy = "unknown"
|
|
9
253
|
): Promise<NormalizedTrace> {
|
|
10
254
|
const Database = (await import("better-sqlite3")).default;
|
|
11
|
-
const db = new Database(dbPath, { readonly: true });
|
|
255
|
+
const db: CursorDb = new Database(dbPath, { readonly: true });
|
|
12
256
|
|
|
13
257
|
try {
|
|
14
258
|
const composerRow = db
|
|
@@ -17,63 +261,66 @@ export async function extractCursor(
|
|
|
17
261
|
if (!composerRow) throw new Error(`Session ${sessionId} not found in cursor DB`);
|
|
18
262
|
|
|
19
263
|
const composerData = JSON.parse(composerRow.value);
|
|
20
|
-
const headers
|
|
264
|
+
const headers = (composerData.fullConversationHeadersOnly ?? []) as CursorHeader[];
|
|
21
265
|
|
|
22
266
|
const turns: Turn[] = [];
|
|
23
267
|
const openFiles: string[] = [];
|
|
268
|
+
let totalInputTokens = 0;
|
|
269
|
+
let totalOutputTokens = 0;
|
|
24
270
|
|
|
25
271
|
for (const header of headers) {
|
|
26
272
|
const bubbleId = header.bubbleId ?? header.id;
|
|
27
273
|
if (!bubbleId) continue;
|
|
28
274
|
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
.get(`agentKv:blob:${bubbleId}`) as { value: string } | undefined;
|
|
32
|
-
if (!blobRow) continue;
|
|
33
|
-
|
|
34
|
-
const blob = JSON.parse(blobRow.value);
|
|
35
|
-
const role: "user" | "assistant" =
|
|
36
|
-
blob.type === "user" || blob.role === "user" ? "user" : "assistant";
|
|
37
|
-
const text: string = blob.text ?? blob.content ?? blob.message ?? "";
|
|
275
|
+
const blob = readCursorBlob(db, sessionId, bubbleId);
|
|
276
|
+
if (!blob) continue;
|
|
38
277
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
278
|
+
const role = cursorRole(header, blob);
|
|
279
|
+
const tokenUsage = extractCursorTokenUsage(blob);
|
|
280
|
+
if (tokenUsage) {
|
|
281
|
+
totalInputTokens += tokenUsage.input_tokens;
|
|
282
|
+
totalOutputTokens += tokenUsage.output_tokens;
|
|
43
283
|
}
|
|
44
284
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
reasoning_tokens: null,
|
|
52
|
-
}
|
|
53
|
-
: null;
|
|
54
|
-
|
|
55
|
-
turns.push({
|
|
285
|
+
openFiles.push(...collectCursorOpenFiles(blob));
|
|
286
|
+
|
|
287
|
+
const content = extractCursorBlocks(blob);
|
|
288
|
+
if (content.length === 0) continue;
|
|
289
|
+
|
|
290
|
+
appendCursorTurn(turns, {
|
|
56
291
|
turn_id: bubbleId,
|
|
57
292
|
parent_turn_id: null,
|
|
58
293
|
role,
|
|
59
|
-
timestamp: blob.createdAt ?? header.createdAt
|
|
60
|
-
content
|
|
61
|
-
model: blob.model ?? null,
|
|
294
|
+
timestamp: normalizeTimestamp(blob.createdAt ?? header.createdAt),
|
|
295
|
+
content,
|
|
296
|
+
model: blob.model ?? blob.modelName ?? null,
|
|
62
297
|
usage: tokenUsage,
|
|
63
|
-
source_metadata: { bubbleId, type: blob.type },
|
|
298
|
+
source_metadata: { bubbleId, type: blob.type ?? header.type ?? null },
|
|
64
299
|
});
|
|
65
300
|
}
|
|
66
301
|
|
|
67
|
-
|
|
302
|
+
if (turns.length === 0) {
|
|
303
|
+
throw new Error(`Session ${sessionId} contained no readable conversation bubbles`);
|
|
304
|
+
}
|
|
68
305
|
|
|
69
|
-
const
|
|
70
|
-
const
|
|
306
|
+
const metrics = collectTraceMetrics(turns);
|
|
307
|
+
const startedAt =
|
|
308
|
+
turns[0]?.timestamp ??
|
|
309
|
+
normalizeTimestamp(composerData.createdAt) ??
|
|
310
|
+
new Date().toISOString();
|
|
311
|
+
const endedAt =
|
|
312
|
+
turns[turns.length - 1]?.timestamp ??
|
|
313
|
+
normalizeTimestamp(composerData.createdAt) ??
|
|
314
|
+
new Date().toISOString();
|
|
71
315
|
|
|
72
316
|
const partialTrace: Omit<NormalizedTrace, "trace_id"> = {
|
|
73
317
|
schema_version: "1.0",
|
|
74
318
|
source_tool: "cursor",
|
|
75
319
|
source_session_id: sessionId,
|
|
76
|
-
source_version:
|
|
320
|
+
source_version:
|
|
321
|
+
typeof composerData._v === "number" || typeof composerData._v === "string"
|
|
322
|
+
? String(composerData._v)
|
|
323
|
+
: null,
|
|
77
324
|
submitted_by: submittedBy,
|
|
78
325
|
submitted_at: new Date().toISOString(),
|
|
79
326
|
extracted_at: new Date().toISOString(),
|
|
@@ -84,24 +331,18 @@ export async function extractCursor(
|
|
|
84
331
|
ended_at: endedAt,
|
|
85
332
|
turns,
|
|
86
333
|
turn_count: turns.length,
|
|
87
|
-
tool_call_count:
|
|
88
|
-
has_tool_calls:
|
|
89
|
-
has_thinking_blocks:
|
|
90
|
-
has_file_changes:
|
|
91
|
-
has_shell_commands:
|
|
92
|
-
total_input_tokens: null,
|
|
93
|
-
total_output_tokens: null,
|
|
334
|
+
tool_call_count: metrics.toolCallCount,
|
|
335
|
+
has_tool_calls: metrics.toolCallCount > 0,
|
|
336
|
+
has_thinking_blocks: metrics.hasThinkingBlocks,
|
|
337
|
+
has_file_changes: metrics.hasFileChanges,
|
|
338
|
+
has_shell_commands: metrics.hasShellCommands,
|
|
339
|
+
total_input_tokens: totalInputTokens || null,
|
|
340
|
+
total_output_tokens: totalOutputTokens || null,
|
|
94
341
|
total_cache_read_tokens: null,
|
|
95
342
|
content_fidelity: "chat_only",
|
|
96
|
-
env_state: {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
inferred_changed_files: null,
|
|
100
|
-
inferred_error_files: null,
|
|
101
|
-
shell_exit_codes: null,
|
|
102
|
-
open_files_in_editor: openFiles.length > 0 ? openFiles : null,
|
|
103
|
-
extraction_method: "passive",
|
|
104
|
-
},
|
|
343
|
+
env_state: createPassiveEnvState({
|
|
344
|
+
open_files_in_editor: openFiles.length > 0 ? Array.from(new Set(openFiles)) : null,
|
|
345
|
+
}),
|
|
105
346
|
score: null,
|
|
106
347
|
raw_r2_key: "",
|
|
107
348
|
normalized_r2_key: "",
|
|
@@ -112,7 +353,8 @@ export async function extractCursor(
|
|
|
112
353
|
|
|
113
354
|
return { ...partialTrace, trace_id: traceId };
|
|
114
355
|
} catch (err) {
|
|
115
|
-
db.close();
|
|
116
356
|
throw err;
|
|
357
|
+
} finally {
|
|
358
|
+
db.close();
|
|
117
359
|
}
|
|
118
360
|
}
|
package/src/hash.test.ts
CHANGED
|
@@ -42,10 +42,10 @@ describe("computeContentHash", () => {
|
|
|
42
42
|
expect(computeContentHash(trace)).toBe(computeContentHash(trace));
|
|
43
43
|
});
|
|
44
44
|
|
|
45
|
-
it("returns
|
|
45
|
+
it("returns the same hash for different session_ids when content is identical", () => {
|
|
46
46
|
const a = makeTrace({ source_session_id: "session-aaa" });
|
|
47
47
|
const b = makeTrace({ source_session_id: "session-bbb" });
|
|
48
|
-
expect(computeContentHash(a)).
|
|
48
|
+
expect(computeContentHash(a)).toBe(computeContentHash(b));
|
|
49
49
|
});
|
|
50
50
|
|
|
51
51
|
it("returns different hashes for different turns", () => {
|
|
@@ -65,6 +65,35 @@ describe("computeContentHash", () => {
|
|
|
65
65
|
expect(computeContentHash(a)).not.toBe(computeContentHash(b));
|
|
66
66
|
});
|
|
67
67
|
|
|
68
|
+
it("ignores volatile turn metadata like timestamps and turn ids", () => {
|
|
69
|
+
const a = makeTrace({
|
|
70
|
+
turns: [{
|
|
71
|
+
turn_id: "turn-a",
|
|
72
|
+
parent_turn_id: null,
|
|
73
|
+
role: "assistant",
|
|
74
|
+
timestamp: "2024-01-01T00:00:00Z",
|
|
75
|
+
content: [{ type: "text", text: "hello" }],
|
|
76
|
+
model: "gpt-test",
|
|
77
|
+
usage: { input_tokens: 1, output_tokens: 2, cache_read_input_tokens: null, cache_creation_input_tokens: null, reasoning_tokens: null },
|
|
78
|
+
source_metadata: { foo: "bar" },
|
|
79
|
+
}],
|
|
80
|
+
});
|
|
81
|
+
const b = makeTrace({
|
|
82
|
+
turns: [{
|
|
83
|
+
turn_id: "turn-b",
|
|
84
|
+
parent_turn_id: "parent-b",
|
|
85
|
+
role: "assistant",
|
|
86
|
+
timestamp: "2024-01-05T00:00:00Z",
|
|
87
|
+
content: [{ type: "text", text: "hello" }],
|
|
88
|
+
model: "gpt-test",
|
|
89
|
+
usage: { input_tokens: 999, output_tokens: 999, cache_read_input_tokens: 1, cache_creation_input_tokens: 1, reasoning_tokens: 1 },
|
|
90
|
+
source_metadata: { baz: "qux" },
|
|
91
|
+
}],
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
expect(computeContentHash(a)).toBe(computeContentHash(b));
|
|
95
|
+
});
|
|
96
|
+
|
|
68
97
|
it("output is a 64-char hex string", () => {
|
|
69
98
|
const hash = computeContentHash(makeTrace());
|
|
70
99
|
expect(hash).toMatch(/^[0-9a-f]{64}$/);
|
package/src/hash.ts
CHANGED
|
@@ -1,11 +1,46 @@
|
|
|
1
1
|
import { createHash } from "crypto";
|
|
2
|
-
import type { NormalizedTrace } from "./types.js";
|
|
2
|
+
import type { ContentBlock, NormalizedTrace, Turn } from "./types.js";
|
|
3
|
+
|
|
4
|
+
function canonicalizeBlock(block: ContentBlock) {
|
|
5
|
+
switch (block.type) {
|
|
6
|
+
case "text":
|
|
7
|
+
case "thinking":
|
|
8
|
+
return { type: block.type, text: block.text };
|
|
9
|
+
case "tool_use":
|
|
10
|
+
return {
|
|
11
|
+
type: block.type,
|
|
12
|
+
tool_name: block.tool_name,
|
|
13
|
+
tool_input: block.tool_input,
|
|
14
|
+
};
|
|
15
|
+
case "tool_result":
|
|
16
|
+
return {
|
|
17
|
+
type: block.type,
|
|
18
|
+
is_error: block.is_error,
|
|
19
|
+
result_content: block.result_content,
|
|
20
|
+
exit_code: block.exit_code,
|
|
21
|
+
};
|
|
22
|
+
case "image":
|
|
23
|
+
return {
|
|
24
|
+
type: block.type,
|
|
25
|
+
media_type: block.media_type,
|
|
26
|
+
data_r2_key: block.data_r2_key,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function canonicalizeTurn(turn: Turn) {
|
|
32
|
+
return {
|
|
33
|
+
role: turn.role,
|
|
34
|
+
model: turn.model,
|
|
35
|
+
content: turn.content.map(canonicalizeBlock),
|
|
36
|
+
};
|
|
37
|
+
}
|
|
3
38
|
|
|
4
39
|
export function computeContentHash(trace: NormalizedTrace): string {
|
|
5
40
|
const content = JSON.stringify({
|
|
6
41
|
source_tool: trace.source_tool,
|
|
7
|
-
|
|
8
|
-
turns: trace.turns,
|
|
42
|
+
content_fidelity: trace.content_fidelity,
|
|
43
|
+
turns: trace.turns.map(canonicalizeTurn),
|
|
9
44
|
});
|
|
10
45
|
return createHash("sha256").update(content).digest("hex");
|
|
11
46
|
}
|
package/src/index.ts
CHANGED
|
@@ -5,6 +5,7 @@ export * from "./utils.js";
|
|
|
5
5
|
export * from "./validators.js";
|
|
6
6
|
export * from "./redact.js";
|
|
7
7
|
export * from "./chunker.js";
|
|
8
|
+
export * from "./turn-actors.js";
|
|
8
9
|
export { extractClaudeCode } from "./extractors/claude-code.js";
|
|
9
10
|
export { extractCodex } from "./extractors/codex.js";
|
|
10
11
|
export { extractCursor } from "./extractors/cursor.js";
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import type { NormalizedTrace } from "./types.js";
|
|
3
|
+
import { redactTraceWithStats } from "./redact.js";
|
|
4
|
+
|
|
5
|
+
function makeTrace(): NormalizedTrace {
|
|
6
|
+
return {
|
|
7
|
+
trace_id: "trace-1",
|
|
8
|
+
schema_version: "1.0",
|
|
9
|
+
source_tool: "claude_code",
|
|
10
|
+
source_session_id: "session-1",
|
|
11
|
+
source_version: null,
|
|
12
|
+
submitted_by: "user@example.com",
|
|
13
|
+
submitted_at: "2024-01-01T00:00:00.000Z",
|
|
14
|
+
extracted_at: "2024-01-01T00:00:00.000Z",
|
|
15
|
+
git_branch: "main",
|
|
16
|
+
cwd_hash: null,
|
|
17
|
+
working_language: null,
|
|
18
|
+
started_at: "2024-01-01T00:00:00.000Z",
|
|
19
|
+
ended_at: "2024-01-01T00:00:01.000Z",
|
|
20
|
+
turns: [
|
|
21
|
+
{
|
|
22
|
+
turn_id: "t1",
|
|
23
|
+
parent_turn_id: null,
|
|
24
|
+
role: "assistant",
|
|
25
|
+
timestamp: "2024-01-01T00:00:00.000Z",
|
|
26
|
+
model: "test",
|
|
27
|
+
usage: null,
|
|
28
|
+
source_metadata: {},
|
|
29
|
+
content: [
|
|
30
|
+
{
|
|
31
|
+
type: "tool_use",
|
|
32
|
+
tool_call_id: "c1",
|
|
33
|
+
tool_name: "Bash",
|
|
34
|
+
tool_input: {
|
|
35
|
+
command: "echo sk-proj-secret-secret-secret-secret",
|
|
36
|
+
nested: {
|
|
37
|
+
path: "/Users/tester/project/.env",
|
|
38
|
+
},
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
type: "tool_result",
|
|
43
|
+
tool_call_id: "c1",
|
|
44
|
+
is_error: false,
|
|
45
|
+
result_content: "Bearer abc.def.ghi",
|
|
46
|
+
exit_code: 0,
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
turn_count: 1,
|
|
52
|
+
tool_call_count: 1,
|
|
53
|
+
has_tool_calls: true,
|
|
54
|
+
has_thinking_blocks: false,
|
|
55
|
+
has_file_changes: false,
|
|
56
|
+
has_shell_commands: true,
|
|
57
|
+
total_input_tokens: null,
|
|
58
|
+
total_output_tokens: null,
|
|
59
|
+
total_cache_read_tokens: null,
|
|
60
|
+
content_fidelity: "full",
|
|
61
|
+
env_state: {
|
|
62
|
+
git_branch: "main",
|
|
63
|
+
inferred_file_tree: ["/Users/tester/project/.env"],
|
|
64
|
+
inferred_changed_files: null,
|
|
65
|
+
inferred_error_files: null,
|
|
66
|
+
shell_exit_codes: [0],
|
|
67
|
+
open_files_in_editor: null,
|
|
68
|
+
extraction_method: "passive",
|
|
69
|
+
},
|
|
70
|
+
score: null,
|
|
71
|
+
raw_r2_key: "",
|
|
72
|
+
normalized_r2_key: "",
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
describe("redactTraceWithStats", () => {
|
|
77
|
+
it("redacts nested strings and reports redaction stats", () => {
|
|
78
|
+
const result = redactTraceWithStats(makeTrace(), { homeDir: "/Users/tester" });
|
|
79
|
+
|
|
80
|
+
const toolUse = result.trace.turns[0]?.content[0];
|
|
81
|
+
const toolResult = result.trace.turns[0]?.content[1];
|
|
82
|
+
|
|
83
|
+
expect(result.trace.submitted_by).toBe("[redacted]");
|
|
84
|
+
expect(toolUse?.type).toBe("tool_use");
|
|
85
|
+
if (!toolUse || toolUse.type !== "tool_use") {
|
|
86
|
+
throw new Error("expected a tool_use block");
|
|
87
|
+
}
|
|
88
|
+
expect(toolUse.tool_input.command).toContain("[OPENAI_KEY]");
|
|
89
|
+
expect((toolUse.tool_input.nested as { path: string }).path).toBe("~/project/.env");
|
|
90
|
+
expect(toolResult?.type).toBe("tool_result");
|
|
91
|
+
if (!toolResult || toolResult.type !== "tool_result") {
|
|
92
|
+
throw new Error("expected a tool_result block");
|
|
93
|
+
}
|
|
94
|
+
expect(toolResult.result_content).toBe("Bearer [BEARER_TOKEN]");
|
|
95
|
+
expect(result.trace.env_state?.inferred_file_tree?.[0]).toBe("~/project/.env");
|
|
96
|
+
expect(result.stats.changed).toBe(true);
|
|
97
|
+
expect(result.stats.piiMatches).toBe(0);
|
|
98
|
+
expect(result.stats.totalMatches).toBeGreaterThanOrEqual(3);
|
|
99
|
+
});
|
|
100
|
+
});
|