@tracemarketplace/shared 0.0.6 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/chunker.d.ts.map +1 -1
  2. package/dist/chunker.js +14 -2
  3. package/dist/chunker.js.map +1 -1
  4. package/dist/extractor-claude-code.test.d.ts +2 -0
  5. package/dist/extractor-claude-code.test.d.ts.map +1 -0
  6. package/dist/extractor-claude-code.test.js +290 -0
  7. package/dist/extractor-claude-code.test.js.map +1 -0
  8. package/dist/extractor-codex.test.d.ts +2 -0
  9. package/dist/extractor-codex.test.d.ts.map +1 -0
  10. package/dist/extractor-codex.test.js +212 -0
  11. package/dist/extractor-codex.test.js.map +1 -0
  12. package/dist/extractor-cursor.test.d.ts +2 -0
  13. package/dist/extractor-cursor.test.d.ts.map +1 -0
  14. package/dist/extractor-cursor.test.js +120 -0
  15. package/dist/extractor-cursor.test.js.map +1 -0
  16. package/dist/extractors/claude-code.d.ts.map +1 -1
  17. package/dist/extractors/claude-code.js +172 -73
  18. package/dist/extractors/claude-code.js.map +1 -1
  19. package/dist/extractors/codex.d.ts.map +1 -1
  20. package/dist/extractors/codex.js +63 -35
  21. package/dist/extractors/codex.js.map +1 -1
  22. package/dist/extractors/common.d.ts +14 -0
  23. package/dist/extractors/common.d.ts.map +1 -0
  24. package/dist/extractors/common.js +100 -0
  25. package/dist/extractors/common.js.map +1 -0
  26. package/dist/extractors/cursor.d.ts.map +1 -1
  27. package/dist/extractors/cursor.js +205 -45
  28. package/dist/extractors/cursor.js.map +1 -1
  29. package/dist/hash.d.ts.map +1 -1
  30. package/dist/hash.js +35 -2
  31. package/dist/hash.js.map +1 -1
  32. package/dist/hash.test.js +29 -2
  33. package/dist/hash.test.js.map +1 -1
  34. package/dist/index.d.ts +1 -0
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +1 -0
  37. package/dist/index.js.map +1 -1
  38. package/dist/redact.d.ts +12 -0
  39. package/dist/redact.d.ts.map +1 -1
  40. package/dist/redact.js +120 -38
  41. package/dist/redact.js.map +1 -1
  42. package/dist/redact.test.d.ts +2 -0
  43. package/dist/redact.test.d.ts.map +1 -0
  44. package/dist/redact.test.js +96 -0
  45. package/dist/redact.test.js.map +1 -0
  46. package/dist/turn-actors.d.ts +3 -0
  47. package/dist/turn-actors.d.ts.map +1 -0
  48. package/dist/turn-actors.js +57 -0
  49. package/dist/turn-actors.js.map +1 -0
  50. package/dist/turn-actors.test.d.ts +2 -0
  51. package/dist/turn-actors.test.d.ts.map +1 -0
  52. package/dist/turn-actors.test.js +65 -0
  53. package/dist/turn-actors.test.js.map +1 -0
  54. package/dist/types.d.ts +5 -0
  55. package/dist/types.d.ts.map +1 -1
  56. package/dist/utils.d.ts +1 -1
  57. package/dist/utils.d.ts.map +1 -1
  58. package/dist/utils.js +4 -0
  59. package/dist/utils.js.map +1 -1
  60. package/dist/validators.d.ts +24 -0
  61. package/dist/validators.d.ts.map +1 -1
  62. package/dist/validators.js +3 -0
  63. package/dist/validators.js.map +1 -1
  64. package/package.json +5 -1
  65. package/src/chunker.ts +17 -2
  66. package/src/extractor-claude-code.test.ts +326 -0
  67. package/src/extractor-codex.test.ts +225 -0
  68. package/src/extractor-cursor.test.ts +141 -0
  69. package/src/extractors/claude-code.ts +180 -69
  70. package/src/extractors/codex.ts +69 -38
  71. package/src/extractors/common.ts +139 -0
  72. package/src/extractors/cursor.ts +294 -52
  73. package/src/hash.test.ts +31 -2
  74. package/src/hash.ts +38 -3
  75. package/src/index.ts +1 -0
  76. package/src/redact.test.ts +100 -0
  77. package/src/redact.ts +175 -58
  78. package/src/turn-actors.test.ts +71 -0
  79. package/src/turn-actors.ts +71 -0
  80. package/src/types.ts +6 -0
  81. package/src/utils.ts +3 -1
  82. package/src/validators.ts +3 -0
@@ -1,6 +1,250 @@
1
1
  import { randomUUID } from "crypto";
2
2
  import { hashString, computeContentHash } from "../hash.js";
3
- import type { NormalizedTrace, Turn, TokenUsage } from "../types.js";
3
+ import type { ContentBlock, NormalizedTrace, Turn, TokenUsage } from "../types.js";
4
+ import {
5
+ collectTraceMetrics,
6
+ createPassiveEnvState,
7
+ extractTextFragments,
8
+ normalizeTimestamp,
9
+ pushUniqueTextBlock,
10
+ } from "./common.js";
11
+
12
+ type CursorHeader = {
13
+ bubbleId?: string;
14
+ id?: string;
15
+ type?: number;
16
+ createdAt?: string | number | null;
17
+ };
18
+
19
+ type CursorBlob = {
20
+ type?: number | string;
21
+ role?: string;
22
+ text?: string;
23
+ content?: string;
24
+ message?: string;
25
+ markdown?: string;
26
+ thinking?: unknown;
27
+ usage?: {
28
+ promptTokens?: number;
29
+ completionTokens?: number;
30
+ };
31
+ tokenCount?: {
32
+ inputTokens?: number;
33
+ outputTokens?: number;
34
+ };
35
+ createdAt?: string | number | null;
36
+ model?: string;
37
+ modelName?: string;
38
+ context?: {
39
+ openFiles?: unknown[];
40
+ };
41
+ openFiles?: unknown[];
42
+ relevantFiles?: unknown[];
43
+ recentlyViewedFiles?: unknown[];
44
+ workspaceUris?: unknown[];
45
+ attachedFolders?: unknown[];
46
+ attachedFoldersNew?: unknown[];
47
+ };
48
+
49
+ type CursorDb = {
50
+ prepare(sql: string): {
51
+ get(param: string): { value: string } | undefined;
52
+ };
53
+ close(): void;
54
+ };
55
+
56
+ function toNumber(value: unknown): number | null {
57
+ return typeof value === "number" && Number.isFinite(value) ? value : null;
58
+ }
59
+
60
+ function hasCursorConversationPayload(blob: CursorBlob): boolean {
61
+ return Boolean(
62
+ blob.role ||
63
+ blob.text ||
64
+ blob.content ||
65
+ blob.message ||
66
+ blob.markdown ||
67
+ blob.thinking ||
68
+ blob.usage ||
69
+ blob.tokenCount ||
70
+ blob.model ||
71
+ blob.modelName ||
72
+ blob.context?.openFiles ||
73
+ blob.openFiles ||
74
+ blob.relevantFiles ||
75
+ blob.recentlyViewedFiles ||
76
+ blob.workspaceUris ||
77
+ blob.attachedFolders ||
78
+ blob.attachedFoldersNew,
79
+ );
80
+ }
81
+
82
+ function readCursorBlob(
83
+ db: CursorDb,
84
+ sessionId: string,
85
+ bubbleId: string,
86
+ ): CursorBlob | null {
87
+ const keys = [
88
+ `bubbleId:${sessionId}:${bubbleId}`,
89
+ `agentKv:blob:${bubbleId}`,
90
+ ];
91
+
92
+ for (const key of keys) {
93
+ const row = db
94
+ .prepare("SELECT value FROM cursorDiskKV WHERE key = ?")
95
+ .get(key) as { value: string } | undefined;
96
+ if (!row) continue;
97
+
98
+ try {
99
+ const blob = JSON.parse(row.value) as CursorBlob;
100
+ if (hasCursorConversationPayload(blob)) return blob;
101
+ } catch {
102
+ return null;
103
+ }
104
+ }
105
+
106
+ return null;
107
+ }
108
+
109
+ function cursorRole(header: CursorHeader, blob: CursorBlob): "user" | "assistant" {
110
+ const role = blob.role?.toLowerCase();
111
+ if (role === "user" || role === "assistant") return role;
112
+
113
+ const type = typeof blob.type === "number" ? blob.type : header.type;
114
+ return type === 1 ? "user" : "assistant";
115
+ }
116
+
117
+ function extractCursorTokenUsage(blob: CursorBlob): TokenUsage | null {
118
+ const usage = blob.usage;
119
+ const tokenCount = blob.tokenCount;
120
+
121
+ const inputTokens =
122
+ toNumber(usage?.promptTokens) ?? toNumber(tokenCount?.inputTokens);
123
+ const outputTokens =
124
+ toNumber(usage?.completionTokens) ?? toNumber(tokenCount?.outputTokens);
125
+
126
+ if (inputTokens === null && outputTokens === null) {
127
+ return null;
128
+ }
129
+
130
+ return {
131
+ input_tokens: inputTokens ?? 0,
132
+ output_tokens: outputTokens ?? 0,
133
+ cache_read_input_tokens: null,
134
+ cache_creation_input_tokens: null,
135
+ reasoning_tokens: null,
136
+ };
137
+ }
138
+
139
+ function normalizeCursorPath(value: string): string {
140
+ if (!value.startsWith("file://")) return value;
141
+
142
+ try {
143
+ return decodeURIComponent(new URL(value).pathname);
144
+ } catch {
145
+ return value;
146
+ }
147
+ }
148
+
149
+ function extractCursorPaths(value: unknown): string[] {
150
+ if (typeof value === "string") {
151
+ return value ? [normalizeCursorPath(value)] : [];
152
+ }
153
+
154
+ if (Array.isArray(value)) {
155
+ return value.flatMap((item) => extractCursorPaths(item));
156
+ }
157
+
158
+ if (!value || typeof value !== "object") {
159
+ return [];
160
+ }
161
+
162
+ const record = value as Record<string, unknown>;
163
+ const paths: string[] = [];
164
+ for (const key of [
165
+ "path",
166
+ "uri",
167
+ "absolutePath",
168
+ "relativeWorkspacePath",
169
+ "relativePath",
170
+ "filePath",
171
+ ]) {
172
+ if (key in record) {
173
+ paths.push(...extractCursorPaths(record[key]));
174
+ }
175
+ }
176
+
177
+ return paths;
178
+ }
179
+
180
+ function collectCursorOpenFiles(blob: CursorBlob): string[] {
181
+ const paths = [
182
+ blob.context?.openFiles,
183
+ blob.openFiles,
184
+ blob.relevantFiles,
185
+ blob.recentlyViewedFiles,
186
+ blob.workspaceUris,
187
+ blob.attachedFolders,
188
+ blob.attachedFoldersNew,
189
+ ].flatMap((value) => extractCursorPaths(value));
190
+
191
+ return Array.from(new Set(paths.filter(Boolean)));
192
+ }
193
+
194
+ function extractCursorBlocks(blob: CursorBlob): ContentBlock[] {
195
+ const blocks: ContentBlock[] = [];
196
+
197
+ const thinkingText = extractTextFragments(blob.thinking).join("\n\n");
198
+ pushUniqueTextBlock(blocks, "thinking", thinkingText);
199
+
200
+ const messageText = [
201
+ blob.text,
202
+ blob.content,
203
+ blob.message,
204
+ blob.markdown,
205
+ ].find((value): value is string => typeof value === "string" && value.trim().length > 0);
206
+ pushUniqueTextBlock(blocks, "text", messageText ?? null);
207
+
208
+ return blocks;
209
+ }
210
+
211
+ function mergeUsage(
212
+ left: TokenUsage | null,
213
+ right: TokenUsage | null,
214
+ ): TokenUsage | null {
215
+ if (!left && !right) return null;
216
+
217
+ return {
218
+ input_tokens: (left?.input_tokens ?? 0) + (right?.input_tokens ?? 0),
219
+ output_tokens: (left?.output_tokens ?? 0) + (right?.output_tokens ?? 0),
220
+ cache_read_input_tokens: null,
221
+ cache_creation_input_tokens: null,
222
+ reasoning_tokens: null,
223
+ };
224
+ }
225
+
226
+ function appendCursorTurn(turns: Turn[], nextTurn: Turn): void {
227
+ const previousTurn = turns[turns.length - 1];
228
+ if (!previousTurn || previousTurn.role !== nextTurn.role) {
229
+ turns.push(nextTurn);
230
+ return;
231
+ }
232
+
233
+ const previousBubbleIds = Array.isArray(previousTurn.source_metadata.bubbleIds)
234
+ ? previousTurn.source_metadata.bubbleIds
235
+ : [previousTurn.source_metadata.bubbleId].filter(Boolean);
236
+
237
+ turns[turns.length - 1] = {
238
+ ...previousTurn,
239
+ content: [...previousTurn.content, ...nextTurn.content],
240
+ usage: mergeUsage(previousTurn.usage, nextTurn.usage),
241
+ model: previousTurn.model ?? nextTurn.model,
242
+ source_metadata: {
243
+ ...previousTurn.source_metadata,
244
+ bubbleIds: [...previousBubbleIds, nextTurn.source_metadata.bubbleId].filter(Boolean),
245
+ },
246
+ };
247
+ }
4
248
 
5
249
  export async function extractCursor(
6
250
  dbPath: string,
@@ -8,7 +252,7 @@ export async function extractCursor(
8
252
  submittedBy = "unknown"
9
253
  ): Promise<NormalizedTrace> {
10
254
  const Database = (await import("better-sqlite3")).default;
11
- const db = new Database(dbPath, { readonly: true });
255
+ const db: CursorDb = new Database(dbPath, { readonly: true });
12
256
 
13
257
  try {
14
258
  const composerRow = db
@@ -17,63 +261,66 @@ export async function extractCursor(
17
261
  if (!composerRow) throw new Error(`Session ${sessionId} not found in cursor DB`);
18
262
 
19
263
  const composerData = JSON.parse(composerRow.value);
20
- const headers: any[] = composerData.fullConversationHeadersOnly ?? [];
264
+ const headers = (composerData.fullConversationHeadersOnly ?? []) as CursorHeader[];
21
265
 
22
266
  const turns: Turn[] = [];
23
267
  const openFiles: string[] = [];
268
+ let totalInputTokens = 0;
269
+ let totalOutputTokens = 0;
24
270
 
25
271
  for (const header of headers) {
26
272
  const bubbleId = header.bubbleId ?? header.id;
27
273
  if (!bubbleId) continue;
28
274
 
29
- const blobRow = db
30
- .prepare("SELECT value FROM cursorDiskKV WHERE key = ?")
31
- .get(`agentKv:blob:${bubbleId}`) as { value: string } | undefined;
32
- if (!blobRow) continue;
33
-
34
- const blob = JSON.parse(blobRow.value);
35
- const role: "user" | "assistant" =
36
- blob.type === "user" || blob.role === "user" ? "user" : "assistant";
37
- const text: string = blob.text ?? blob.content ?? blob.message ?? "";
275
+ const blob = readCursorBlob(db, sessionId, bubbleId);
276
+ if (!blob) continue;
38
277
 
39
- if (blob.context?.openFiles) {
40
- openFiles.push(
41
- ...blob.context.openFiles.map((f: any) => (typeof f === "string" ? f : f.path ?? ""))
42
- );
278
+ const role = cursorRole(header, blob);
279
+ const tokenUsage = extractCursorTokenUsage(blob);
280
+ if (tokenUsage) {
281
+ totalInputTokens += tokenUsage.input_tokens;
282
+ totalOutputTokens += tokenUsage.output_tokens;
43
283
  }
44
284
 
45
- const tokenUsage: TokenUsage | null = blob.usage
46
- ? {
47
- input_tokens: blob.usage.promptTokens ?? 0,
48
- output_tokens: blob.usage.completionTokens ?? 0,
49
- cache_read_input_tokens: null,
50
- cache_creation_input_tokens: null,
51
- reasoning_tokens: null,
52
- }
53
- : null;
54
-
55
- turns.push({
285
+ openFiles.push(...collectCursorOpenFiles(blob));
286
+
287
+ const content = extractCursorBlocks(blob);
288
+ if (content.length === 0) continue;
289
+
290
+ appendCursorTurn(turns, {
56
291
  turn_id: bubbleId,
57
292
  parent_turn_id: null,
58
293
  role,
59
- timestamp: blob.createdAt ?? header.createdAt ?? null,
60
- content: [{ type: "text", text }],
61
- model: blob.model ?? null,
294
+ timestamp: normalizeTimestamp(blob.createdAt ?? header.createdAt),
295
+ content,
296
+ model: blob.model ?? blob.modelName ?? null,
62
297
  usage: tokenUsage,
63
- source_metadata: { bubbleId, type: blob.type },
298
+ source_metadata: { bubbleId, type: blob.type ?? header.type ?? null },
64
299
  });
65
300
  }
66
301
 
67
- db.close();
302
+ if (turns.length === 0) {
303
+ throw new Error(`Session ${sessionId} contained no readable conversation bubbles`);
304
+ }
68
305
 
69
- const startedAt = turns[0]?.timestamp ?? new Date().toISOString();
70
- const endedAt = turns[turns.length - 1]?.timestamp ?? new Date().toISOString();
306
+ const metrics = collectTraceMetrics(turns);
307
+ const startedAt =
308
+ turns[0]?.timestamp ??
309
+ normalizeTimestamp(composerData.createdAt) ??
310
+ new Date().toISOString();
311
+ const endedAt =
312
+ turns[turns.length - 1]?.timestamp ??
313
+ normalizeTimestamp(composerData.createdAt) ??
314
+ new Date().toISOString();
71
315
 
72
316
  const partialTrace: Omit<NormalizedTrace, "trace_id"> = {
73
317
  schema_version: "1.0",
74
318
  source_tool: "cursor",
75
319
  source_session_id: sessionId,
76
- source_version: null,
320
+ source_version:
321
+ typeof composerData._v === "number" || typeof composerData._v === "string"
322
+ ? String(composerData._v)
323
+ : null,
77
324
  submitted_by: submittedBy,
78
325
  submitted_at: new Date().toISOString(),
79
326
  extracted_at: new Date().toISOString(),
@@ -84,24 +331,18 @@ export async function extractCursor(
84
331
  ended_at: endedAt,
85
332
  turns,
86
333
  turn_count: turns.length,
87
- tool_call_count: 0,
88
- has_tool_calls: false,
89
- has_thinking_blocks: false,
90
- has_file_changes: false,
91
- has_shell_commands: false,
92
- total_input_tokens: null,
93
- total_output_tokens: null,
334
+ tool_call_count: metrics.toolCallCount,
335
+ has_tool_calls: metrics.toolCallCount > 0,
336
+ has_thinking_blocks: metrics.hasThinkingBlocks,
337
+ has_file_changes: metrics.hasFileChanges,
338
+ has_shell_commands: metrics.hasShellCommands,
339
+ total_input_tokens: totalInputTokens || null,
340
+ total_output_tokens: totalOutputTokens || null,
94
341
  total_cache_read_tokens: null,
95
342
  content_fidelity: "chat_only",
96
- env_state: {
97
- git_branch: null,
98
- inferred_file_tree: null,
99
- inferred_changed_files: null,
100
- inferred_error_files: null,
101
- shell_exit_codes: null,
102
- open_files_in_editor: openFiles.length > 0 ? openFiles : null,
103
- extraction_method: "passive",
104
- },
343
+ env_state: createPassiveEnvState({
344
+ open_files_in_editor: openFiles.length > 0 ? Array.from(new Set(openFiles)) : null,
345
+ }),
105
346
  score: null,
106
347
  raw_r2_key: "",
107
348
  normalized_r2_key: "",
@@ -112,7 +353,8 @@ export async function extractCursor(
112
353
 
113
354
  return { ...partialTrace, trace_id: traceId };
114
355
  } catch (err) {
115
- db.close();
116
356
  throw err;
357
+ } finally {
358
+ db.close();
117
359
  }
118
360
  }
package/src/hash.test.ts CHANGED
@@ -42,10 +42,10 @@ describe("computeContentHash", () => {
42
42
  expect(computeContentHash(trace)).toBe(computeContentHash(trace));
43
43
  });
44
44
 
45
- it("returns different hashes for different session_ids", () => {
45
+ it("returns the same hash for different session_ids when content is identical", () => {
46
46
  const a = makeTrace({ source_session_id: "session-aaa" });
47
47
  const b = makeTrace({ source_session_id: "session-bbb" });
48
- expect(computeContentHash(a)).not.toBe(computeContentHash(b));
48
+ expect(computeContentHash(a)).toBe(computeContentHash(b));
49
49
  });
50
50
 
51
51
  it("returns different hashes for different turns", () => {
@@ -65,6 +65,35 @@ describe("computeContentHash", () => {
65
65
  expect(computeContentHash(a)).not.toBe(computeContentHash(b));
66
66
  });
67
67
 
68
+ it("ignores volatile turn metadata like timestamps and turn ids", () => {
69
+ const a = makeTrace({
70
+ turns: [{
71
+ turn_id: "turn-a",
72
+ parent_turn_id: null,
73
+ role: "assistant",
74
+ timestamp: "2024-01-01T00:00:00Z",
75
+ content: [{ type: "text", text: "hello" }],
76
+ model: "gpt-test",
77
+ usage: { input_tokens: 1, output_tokens: 2, cache_read_input_tokens: null, cache_creation_input_tokens: null, reasoning_tokens: null },
78
+ source_metadata: { foo: "bar" },
79
+ }],
80
+ });
81
+ const b = makeTrace({
82
+ turns: [{
83
+ turn_id: "turn-b",
84
+ parent_turn_id: "parent-b",
85
+ role: "assistant",
86
+ timestamp: "2024-01-05T00:00:00Z",
87
+ content: [{ type: "text", text: "hello" }],
88
+ model: "gpt-test",
89
+ usage: { input_tokens: 999, output_tokens: 999, cache_read_input_tokens: 1, cache_creation_input_tokens: 1, reasoning_tokens: 1 },
90
+ source_metadata: { baz: "qux" },
91
+ }],
92
+ });
93
+
94
+ expect(computeContentHash(a)).toBe(computeContentHash(b));
95
+ });
96
+
68
97
  it("output is a 64-char hex string", () => {
69
98
  const hash = computeContentHash(makeTrace());
70
99
  expect(hash).toMatch(/^[0-9a-f]{64}$/);
package/src/hash.ts CHANGED
@@ -1,11 +1,46 @@
1
1
  import { createHash } from "crypto";
2
- import type { NormalizedTrace } from "./types.js";
2
+ import type { ContentBlock, NormalizedTrace, Turn } from "./types.js";
3
+
4
+ function canonicalizeBlock(block: ContentBlock) {
5
+ switch (block.type) {
6
+ case "text":
7
+ case "thinking":
8
+ return { type: block.type, text: block.text };
9
+ case "tool_use":
10
+ return {
11
+ type: block.type,
12
+ tool_name: block.tool_name,
13
+ tool_input: block.tool_input,
14
+ };
15
+ case "tool_result":
16
+ return {
17
+ type: block.type,
18
+ is_error: block.is_error,
19
+ result_content: block.result_content,
20
+ exit_code: block.exit_code,
21
+ };
22
+ case "image":
23
+ return {
24
+ type: block.type,
25
+ media_type: block.media_type,
26
+ data_r2_key: block.data_r2_key,
27
+ };
28
+ }
29
+ }
30
+
31
+ function canonicalizeTurn(turn: Turn) {
32
+ return {
33
+ role: turn.role,
34
+ model: turn.model,
35
+ content: turn.content.map(canonicalizeBlock),
36
+ };
37
+ }
3
38
 
4
39
  export function computeContentHash(trace: NormalizedTrace): string {
5
40
  const content = JSON.stringify({
6
41
  source_tool: trace.source_tool,
7
- source_session_id: trace.source_session_id,
8
- turns: trace.turns,
42
+ content_fidelity: trace.content_fidelity,
43
+ turns: trace.turns.map(canonicalizeTurn),
9
44
  });
10
45
  return createHash("sha256").update(content).digest("hex");
11
46
  }
package/src/index.ts CHANGED
@@ -5,6 +5,7 @@ export * from "./utils.js";
5
5
  export * from "./validators.js";
6
6
  export * from "./redact.js";
7
7
  export * from "./chunker.js";
8
+ export * from "./turn-actors.js";
8
9
  export { extractClaudeCode } from "./extractors/claude-code.js";
9
10
  export { extractCodex } from "./extractors/codex.js";
10
11
  export { extractCursor } from "./extractors/cursor.js";
@@ -0,0 +1,100 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import type { NormalizedTrace } from "./types.js";
3
+ import { redactTraceWithStats } from "./redact.js";
4
+
5
+ function makeTrace(): NormalizedTrace {
6
+ return {
7
+ trace_id: "trace-1",
8
+ schema_version: "1.0",
9
+ source_tool: "claude_code",
10
+ source_session_id: "session-1",
11
+ source_version: null,
12
+ submitted_by: "user@example.com",
13
+ submitted_at: "2024-01-01T00:00:00.000Z",
14
+ extracted_at: "2024-01-01T00:00:00.000Z",
15
+ git_branch: "main",
16
+ cwd_hash: null,
17
+ working_language: null,
18
+ started_at: "2024-01-01T00:00:00.000Z",
19
+ ended_at: "2024-01-01T00:00:01.000Z",
20
+ turns: [
21
+ {
22
+ turn_id: "t1",
23
+ parent_turn_id: null,
24
+ role: "assistant",
25
+ timestamp: "2024-01-01T00:00:00.000Z",
26
+ model: "test",
27
+ usage: null,
28
+ source_metadata: {},
29
+ content: [
30
+ {
31
+ type: "tool_use",
32
+ tool_call_id: "c1",
33
+ tool_name: "Bash",
34
+ tool_input: {
35
+ command: "echo sk-proj-secret-secret-secret-secret",
36
+ nested: {
37
+ path: "/Users/tester/project/.env",
38
+ },
39
+ },
40
+ },
41
+ {
42
+ type: "tool_result",
43
+ tool_call_id: "c1",
44
+ is_error: false,
45
+ result_content: "Bearer abc.def.ghi",
46
+ exit_code: 0,
47
+ },
48
+ ],
49
+ },
50
+ ],
51
+ turn_count: 1,
52
+ tool_call_count: 1,
53
+ has_tool_calls: true,
54
+ has_thinking_blocks: false,
55
+ has_file_changes: false,
56
+ has_shell_commands: true,
57
+ total_input_tokens: null,
58
+ total_output_tokens: null,
59
+ total_cache_read_tokens: null,
60
+ content_fidelity: "full",
61
+ env_state: {
62
+ git_branch: "main",
63
+ inferred_file_tree: ["/Users/tester/project/.env"],
64
+ inferred_changed_files: null,
65
+ inferred_error_files: null,
66
+ shell_exit_codes: [0],
67
+ open_files_in_editor: null,
68
+ extraction_method: "passive",
69
+ },
70
+ score: null,
71
+ raw_r2_key: "",
72
+ normalized_r2_key: "",
73
+ };
74
+ }
75
+
76
+ describe("redactTraceWithStats", () => {
77
+ it("redacts nested strings and reports redaction stats", () => {
78
+ const result = redactTraceWithStats(makeTrace(), { homeDir: "/Users/tester" });
79
+
80
+ const toolUse = result.trace.turns[0]?.content[0];
81
+ const toolResult = result.trace.turns[0]?.content[1];
82
+
83
+ expect(result.trace.submitted_by).toBe("[redacted]");
84
+ expect(toolUse?.type).toBe("tool_use");
85
+ if (!toolUse || toolUse.type !== "tool_use") {
86
+ throw new Error("expected a tool_use block");
87
+ }
88
+ expect(toolUse.tool_input.command).toContain("[OPENAI_KEY]");
89
+ expect((toolUse.tool_input.nested as { path: string }).path).toBe("~/project/.env");
90
+ expect(toolResult?.type).toBe("tool_result");
91
+ if (!toolResult || toolResult.type !== "tool_result") {
92
+ throw new Error("expected a tool_result block");
93
+ }
94
+ expect(toolResult.result_content).toBe("Bearer [BEARER_TOKEN]");
95
+ expect(result.trace.env_state?.inferred_file_tree?.[0]).toBe("~/project/.env");
96
+ expect(result.stats.changed).toBe(true);
97
+ expect(result.stats.piiMatches).toBe(0);
98
+ expect(result.stats.totalMatches).toBeGreaterThanOrEqual(3);
99
+ });
100
+ });