pi-memory-stone 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,374 @@
1
+ /**
2
+ * Deterministic JSONL session parser.
3
+ * Extracts structured records from session entries without LLM.
4
+ *
5
+ * Parsed from event data passed to agent_end, or from raw JSONL for backfill.
6
+ */
7
+
8
+ import type { RecordKind, RecordScope, FileAction } from "../db/schema.js";
9
+ import { redactSecrets, shouldIgnoreFile } from "../privacy/index.js";
10
+
11
+ // ─── Types for parsed session data ──────────────────────────────────
12
+
13
+ export interface ParsedTurn {
14
+ /** Entry ID of the user message that started this turn */
15
+ userEntryId: string;
16
+ /** Text content of the user prompt */
17
+ userPrompt: string;
18
+ /** Entry IDs of assistant messages in this turn */
19
+ assistantEntryIds: string[];
20
+ /** Concatenated text from assistant responses */
21
+ assistantText: string;
22
+ /** Tool calls in this turn */
23
+ toolCalls: ParsedToolCall[];
24
+ /** Errors encountered in this turn */
25
+ errors: ParsedError[];
26
+ }
27
+
28
+ export interface ParsedToolCall {
29
+ entryId: string;
30
+ toolName: string;
31
+ args: Record<string, unknown>;
32
+ toolCallId?: string;
33
+ /** Result text (truncated) */
34
+ resultText: string;
35
+ isError: boolean;
36
+ }
37
+
38
+ export interface ParsedError {
39
+ entryId: string;
40
+ toolName: string;
41
+ message: string;
42
+ }
43
+
44
+ export interface ParsedFileActivity {
45
+ entryId: string;
46
+ path: string;
47
+ action: FileAction;
48
+ }
49
+
50
+ export interface RecordPayload {
51
+ kind: RecordKind;
52
+ scope: RecordScope;
53
+ text: string;
54
+ tags?: string;
55
+ entryIdStart?: string;
56
+ entryIdEnd?: string;
57
+ fileActivities?: ParsedFileActivity[];
58
+ }
59
+
60
+ // ─── Entry helpers ──────────────────────────────────────────────────
61
+
62
+ interface SessionEntry {
63
+ type: string;
64
+ id: string;
65
+ parentId?: string | null;
66
+ timestamp?: string;
67
+ message?: Record<string, unknown>;
68
+ customType?: string;
69
+ summary?: string;
70
+ data?: unknown;
71
+ }
72
+
73
+ interface AgentMessage {
74
+ role?: string;
75
+ content?: unknown;
76
+ provider?: string;
77
+ model?: string;
78
+ usage?: unknown;
79
+ stopReason?: string;
80
+ toolCallId?: string;
81
+ toolName?: string;
82
+ details?: unknown;
83
+ isError?: boolean;
84
+ display?: boolean;
85
+ }
86
+
87
+ // ─── Content extraction ─────────────────────────────────────────────
88
+
89
+ function extractText(content: unknown): string {
90
+ if (typeof content === "string") return content;
91
+ if (!Array.isArray(content)) return "";
92
+ return (content as Array<Record<string, unknown>>)
93
+ .filter((c) => c.type === "text" && typeof c.text === "string")
94
+ .map((c) => c.text as string)
95
+ .join("\n");
96
+ }
97
+
98
+ function extractToolCalls(content: unknown): Array<{ id: string; name: string; arguments: Record<string, unknown> }> {
99
+ if (!Array.isArray(content)) return [];
100
+ return (content as Array<Record<string, unknown>>)
101
+ .filter((c) => c.type === "toolCall" && typeof c.name === "string")
102
+ .map((c) => ({
103
+ id: c.id as string,
104
+ name: c.name as string,
105
+ arguments: (c.arguments as Record<string, unknown>) ?? {},
106
+ }));
107
+ }
108
+
109
+ function extractThinking(content: unknown): string {
110
+ if (!Array.isArray(content)) return "";
111
+ return (content as Array<Record<string, unknown>>)
112
+ .filter((c) => c.type === "thinking" && typeof c.thinking === "string")
113
+ .map((c) => c.thinking as string)
114
+ .join("\n");
115
+ }
116
+
117
+ // ─── File activity detection ────────────────────────────────────────
118
+
119
+ function detectFileActivity(
120
+ toolName: string,
121
+ args: Record<string, unknown>,
122
+ entryId: string,
123
+ ): ParsedFileActivity[] {
124
+ const activities: ParsedFileActivity[] = [];
125
+ const path = typeof args.path === "string" ? args.path : undefined;
126
+
127
+ // Skip sensitive files
128
+ if (path && shouldIgnoreFile(path)) return [];
129
+
130
+ switch (toolName) {
131
+ case "read":
132
+ if (path) {
133
+ activities.push({ entryId, path, action: "read" });
134
+ }
135
+ break;
136
+ case "write":
137
+ if (path) {
138
+ activities.push({ entryId, path, action: "write" });
139
+ }
140
+ break;
141
+ case "edit":
142
+ if (path) {
143
+ activities.push({ entryId, path, action: "edit" });
144
+ }
145
+ break;
146
+ case "bash": {
147
+ break;
148
+ }
149
+ }
150
+
151
+ return activities;
152
+ }
153
+
154
+ function detectBashFileActivity(
155
+ command: string,
156
+ entryId: string,
157
+ ): ParsedFileActivity[] {
158
+ const activities: ParsedFileActivity[] = [];
159
+ const seen = new Set<string>();
160
+
161
+ // Detect file paths in git commands
162
+ const gitFilePattern = /\s([\w.\-/]+\.(?:ts|tsx|js|jsx|py|rs|go|java|rb|php|css|html|json|yaml|yml|toml|md|sql|sh|bash|zsh))/g;
163
+ let match: RegExpExecArray | null;
164
+ while ((match = gitFilePattern.exec(command)) !== null) {
165
+ const filePath = match[1];
166
+ if (!seen.has(filePath) && !shouldIgnoreFile(filePath)) {
167
+ seen.add(filePath);
168
+ activities.push({ entryId, path: filePath, action: "bash" });
169
+ }
170
+ }
171
+
172
+ return activities;
173
+ }
174
+
175
+ // ─── Turn parsing ───────────────────────────────────────────────────
176
+
177
+ export function parseEntries(entries: SessionEntry[]): {
178
+ turns: ParsedTurn[];
179
+ fileActivities: ParsedFileActivity[];
180
+ compactions: Array<{ entryId: string; summary: string }>;
181
+ } {
182
+ const turns: ParsedTurn[] = [];
183
+ const fileActivities: ParsedFileActivity[] = [];
184
+ const compactions: Array<{ entryId: string; summary: string }> = [];
185
+
186
+ let currentTurn: ParsedTurn | null = null;
187
+
188
+ for (const entry of entries) {
189
+ // Handle compaction entries
190
+ if (entry.type === "compaction" && entry.summary) {
191
+ compactions.push({ entryId: entry.id, summary: entry.summary });
192
+ continue;
193
+ }
194
+
195
+ // Handle branch summary entries
196
+ if (entry.type === "branch_summary" && entry.summary) {
197
+ compactions.push({ entryId: entry.id, summary: entry.summary });
198
+ continue;
199
+ }
200
+
201
+ // Handle custom_message entries (extension messages)
202
+ if (entry.type === "custom_message") {
203
+ // These participate in context but aren't user messages
204
+ continue;
205
+ }
206
+
207
+ // Skip non-message entries
208
+ if (entry.type !== "message" || !entry.message) continue;
209
+
210
+ const msg = entry.message as AgentMessage;
211
+
212
+ // User message: starts a new turn
213
+ if (msg.role === "user") {
214
+ // Save previous turn if exists
215
+ if (currentTurn) {
216
+ turns.push(currentTurn);
217
+ }
218
+
219
+ const userPrompt = redactSecrets(extractText(msg.content));
220
+ currentTurn = {
221
+ userEntryId: entry.id,
222
+ userPrompt,
223
+ assistantEntryIds: [],
224
+ assistantText: "",
225
+ toolCalls: [],
226
+ errors: [],
227
+ };
228
+ continue;
229
+ }
230
+
231
+ // Assistant message
232
+ if (msg.role === "assistant") {
233
+ if (!currentTurn) continue;
234
+
235
+ currentTurn.assistantEntryIds.push(entry.id);
236
+ const text = extractText(msg.content);
237
+ const thinking = extractThinking(msg.content);
238
+ const toolCalls = extractToolCalls(msg.content);
239
+
240
+ for (const call of toolCalls) {
241
+ currentTurn.toolCalls.push({
242
+ entryId: entry.id,
243
+ toolCallId: call.id,
244
+ toolName: call.name,
245
+ args: call.arguments,
246
+ resultText: "",
247
+ isError: false,
248
+ });
249
+ fileActivities.push(...detectFileActivity(call.name, call.arguments, entry.id));
250
+ }
251
+
252
+ if (text || thinking) {
253
+ currentTurn.assistantText += (currentTurn.assistantText ? "\n" : "") + (text || thinking);
254
+ }
255
+ continue;
256
+ }
257
+
258
+ // Tool result
259
+ if (msg.role === "toolResult") {
260
+ if (!currentTurn) continue;
261
+
262
+ const toolName = msg.toolName || "unknown";
263
+ const resultText = extractText(msg.content);
264
+
265
+ // Redact
266
+ const redactedResult = redactSecrets(resultText);
267
+
268
+ const existingCall = typeof msg.toolCallId === "string"
269
+ ? currentTurn.toolCalls.find((call) => call.toolCallId === msg.toolCallId)
270
+ : undefined;
271
+
272
+ const toolCall: ParsedToolCall = existingCall ?? {
273
+ entryId: entry.id,
274
+ toolCallId: typeof msg.toolCallId === "string" ? msg.toolCallId : undefined,
275
+ toolName,
276
+ args: {},
277
+ resultText: "",
278
+ isError: false,
279
+ };
280
+ toolCall.resultText = redactedResult.slice(0, 500); // Truncate for storage
281
+ toolCall.isError = msg.isError === true;
282
+ if (!existingCall) currentTurn.toolCalls.push(toolCall);
283
+
284
+ if (toolCall.isError) {
285
+ currentTurn.errors.push({
286
+ entryId: entry.id,
287
+ toolName,
288
+ message: redactedResult.slice(0, 200),
289
+ });
290
+ }
291
+ continue;
292
+ }
293
+
294
+ // Bash execution entry
295
+ if (msg.role === "bashExecution") {
296
+ if (!currentTurn) continue;
297
+
298
+ const command = (msg as Record<string, unknown>).command as string | undefined;
299
+ if (command) {
300
+ const bashActivities = detectBashFileActivity(command, entry.id);
301
+ fileActivities.push(...bashActivities);
302
+ }
303
+ continue;
304
+ }
305
+ }
306
+
307
+ // Save last turn
308
+ if (currentTurn) {
309
+ turns.push(currentTurn);
310
+ }
311
+
312
+ return { turns, fileActivities, compactions };
313
+ }
314
+
315
+ // ─── Record generation ──────────────────────────────────────────────
316
+
317
+ export function turnsToRecords(
318
+ turns: ParsedTurn[],
319
+ projectId: string | null,
320
+ sessionId: string,
321
+ sessionFile: string,
322
+ ): RecordPayload[] {
323
+ const records: RecordPayload[] = [];
324
+
325
+ for (const turn of turns) {
326
+ // Turn summary
327
+ const summaryParts: string[] = [];
328
+
329
+ // User prompt
330
+ const truncatedPrompt = turn.userPrompt.length > 500 ? turn.userPrompt.slice(0, 497) + "..." : turn.userPrompt;
331
+ summaryParts.push(`User: ${truncatedPrompt}`);
332
+
333
+ // Assistant response
334
+ if (turn.assistantText) {
335
+ const truncatedAssistant = turn.assistantText.length > 800 ? turn.assistantText.slice(0, 797) + "..." : turn.assistantText;
336
+ summaryParts.push(`Assistant: ${truncatedAssistant}`);
337
+ }
338
+
339
+ // Tool calls
340
+ if (turn.toolCalls.length > 0) {
341
+ const toolNames = [...new Set(turn.toolCalls.map((tc) => tc.toolName))];
342
+ summaryParts.push(`Tools used: ${toolNames.join(", ")}`);
343
+ }
344
+
345
+ // Errors
346
+ if (turn.errors.length > 0) {
347
+ summaryParts.push(`Errors: ${turn.errors.map((e) => `${e.toolName}: ${e.message}`).join("; ")}`);
348
+ }
349
+
350
+ const text = redactSecrets(summaryParts.join("\n"));
351
+ if (!text.trim()) continue;
352
+
353
+ records.push({
354
+ kind: "turn_summary",
355
+ scope: "project",
356
+ text,
357
+ entryIdStart: turn.userEntryId,
358
+ entryIdEnd: turn.toolCalls.length > 0 ? turn.toolCalls[turn.toolCalls.length - 1].entryId : turn.userEntryId,
359
+ });
360
+
361
+ // Error records
362
+ for (const err of turn.errors) {
363
+ records.push({
364
+ kind: "error_resolution",
365
+ scope: "project",
366
+ text: `Tool: ${err.toolName}\nError: ${err.message}`,
367
+ entryIdStart: err.entryId,
368
+ entryIdEnd: err.entryId,
369
+ });
370
+ }
371
+ }
372
+
373
+ return records;
374
+ }
@@ -0,0 +1,167 @@
1
+ /**
2
+ * Privacy & redaction module.
3
+ * Safe-by-default: redact secrets before DB storage.
4
+ * Ignores sensitive files/tool outputs.
5
+ */
6
+
7
+ // ─── Secret patterns ────────────────────────────────────────────────
8
+
9
+ type SecretReplacement = string | ((substring: string, ...args: any[]) => string);
10
+
11
+ const SECRET_PATTERNS: { name: string; regex: RegExp; replacement: SecretReplacement }[] = [
12
+ // API keys (common formats)
13
+ {
14
+ name: "openai-key",
15
+ regex: /sk-(?:proj-)?[A-Za-z0-9_-]{20,}/g,
16
+ replacement: "[REDACTED:openai-key]",
17
+ },
18
+ {
19
+ name: "github-token",
20
+ regex: /gh[pousr]_[A-Za-z0-9_]{20,}/g,
21
+ replacement: "[REDACTED:github-token]",
22
+ },
23
+ {
24
+ name: "aws-key",
25
+ regex: /AKIA[0-9A-Z]{16}/g,
26
+ replacement: "[REDACTED:aws-key]",
27
+ },
28
+ {
29
+ name: "aws-secret",
30
+ regex: /(?<=SecretAccessKey[=:]\s*)[A-Za-z0-9/+]{40,}/g,
31
+ replacement: "[REDACTED:aws-secret]",
32
+ },
33
+ {
34
+ name: "jwt",
35
+ regex: /eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}/g,
36
+ replacement: "[REDACTED:jwt]",
37
+ },
38
+ {
39
+ name: "generic-api-key",
40
+ regex: /(?:api[_-]?key|apikey|api[_-]?secret|secret[_-]?key)[=:]\s*['"]?[A-Za-z0-9_\-.]{16,}['"]?/gi,
41
+ replacement: "[REDACTED:api-key]",
42
+ },
43
+ {
44
+ name: "secret-assignment",
45
+ regex: /\b(?:secret|secret[_-]?key)\b\s*[=:]\s*(?:['"][^'"]+['"]|[^\s'"`]+)/gi,
46
+ replacement: "[REDACTED:secret]",
47
+ },
48
+ {
49
+ name: "private-key",
50
+ regex: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g,
51
+ replacement: "[REDACTED:private-key]",
52
+ },
53
+ {
54
+ name: "password-assignment",
55
+ regex: /\b(?:password|passwd|pwd)\b\s*[=:]\s*(?:['"][^'"]+['"]|[^\s'"`]+)/gi,
56
+ replacement: "[REDACTED:password]",
57
+ },
58
+ {
59
+ name: "token-assignment",
60
+ regex: /\b(?:token|auth[_-]?token|access[_-]?token|refresh[_-]?token)\b\s*[=:]\s*(?:['"][^'"]+['"]|[A-Za-z0-9_\-.]{16,})/gi,
61
+ replacement: "[REDACTED:token]",
62
+ },
63
+ {
64
+ name: "token-bearer",
65
+ regex: /(?:Bearer|token)\s+[A-Za-z0-9_\-.]{20,}/gi,
66
+ replacement: "[REDACTED:token]",
67
+ },
68
+ {
69
+ name: "connection-string",
70
+ regex: /(?:mongodb|postgres|mysql|redis|sqlite):\/\/[^\s"'`]+/gi,
71
+ replacement: (match: string) => {
72
+ // Keep the protocol but redact credentials
73
+ const url = match.replace(/\/\/[^@]+@/, "//[REDACTED]@");
74
+ return url;
75
+ },
76
+ },
77
+ ];
78
+
79
+ function replaceSecretPattern(text: string, pattern: { regex: RegExp; replacement: SecretReplacement }): string {
80
+ if (typeof pattern.replacement === "string") {
81
+ return text.replace(pattern.regex, pattern.replacement);
82
+ }
83
+ return text.replace(pattern.regex, pattern.replacement);
84
+ }
85
+
86
+ // ─── Sensitive path patterns ────────────────────────────────────────
87
+
88
+ const DEFAULT_SENSITIVE_PATHS = [
89
+ /\.env(\..*)?$/,
90
+ /\.envrc$/,
91
+ /credentials/i,
92
+ /\.pem$/,
93
+ /\.key$/,
94
+ /\.crt$/,
95
+ /id_rsa/,
96
+ /id_ed25519/,
97
+ /\.ssh\//,
98
+ /\.gnupg\//,
99
+ /\.aws\/(?:config|credentials)/,
100
+ /secrets?\//i,
101
+ /\.git-credentials/,
102
+ ];
103
+
104
+ const DEFAULT_IGNORE_DIRS = [
105
+ "node_modules",
106
+ ".git",
107
+ "dist",
108
+ "build",
109
+ ".next",
110
+ ".cache",
111
+ "coverage",
112
+ "__pycache__",
113
+ ".venv",
114
+ "venv",
115
+ ".terraform",
116
+ ".serverless",
117
+ ];
118
+
119
+ // ─── Redaction ──────────────────────────────────────────────────────
120
+
121
+ export function redactSecrets(text: string): string {
122
+ let result = text;
123
+ for (const pattern of SECRET_PATTERNS) {
124
+ result = replaceSecretPattern(result, pattern);
125
+ }
126
+ return result;
127
+ }
128
+
129
+ export function isSensitivePath(path: string, extraPatterns: RegExp[] = []): boolean {
130
+ const allPatterns = [...DEFAULT_SENSITIVE_PATHS, ...extraPatterns];
131
+
132
+ // Check directory ignore patterns
133
+ const segments = path.split("/");
134
+ for (const seg of segments) {
135
+ if (DEFAULT_IGNORE_DIRS.includes(seg)) return true;
136
+ }
137
+
138
+ // Check path patterns
139
+ for (const pattern of allPatterns) {
140
+ if (pattern.test(path)) return true;
141
+ }
142
+
143
+ return false;
144
+ }
145
+
146
+ export function isSensitiveToolOutput(toolName: string, path?: string): boolean {
147
+ // Never redact tool outputs in general, but flag sensitive paths
148
+ if (path && isSensitivePath(path)) return true;
149
+ return false;
150
+ }
151
+
152
+ export function redactSensitiveFileContent(content: string, path: string): string {
153
+ if (isSensitivePath(path)) {
154
+ return `[REDACTED: sensitive file at ${path}]`;
155
+ }
156
+ return redactSecrets(content);
157
+ }
158
+
159
+ export function shouldIgnoreFile(path: string): boolean {
160
+ return isSensitivePath(path);
161
+ }
162
+
163
+ export function shouldIgnoreToolResult(toolName: string, args: Record<string, unknown>): boolean {
164
+ const path = typeof args.path === "string" ? args.path : undefined;
165
+ if (path && isSensitivePath(path)) return true;
166
+ return false;
167
+ }