assistme 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ import type {
3
3
  HookCallback,
4
4
  PreToolUseHookInput,
5
5
  PostToolUseHookInput,
6
+ PostToolUseFailureHookInput,
6
7
  } from "@anthropic-ai/claude-agent-sdk";
7
8
  import { emitEvent } from "../db/supabase.js";
8
9
  import { log } from "../utils/logger.js";
@@ -13,6 +14,16 @@ import {
13
14
  MAX_SKILL_RECORD_RESULT_LENGTH,
14
15
  } from "../utils/constants.js";
15
16
 
17
+ /**
18
+ * Record of a tool call that failed, used for reflection and strategy switching.
19
+ */
20
+ export interface ToolFailureRecord {
21
+ toolName: string;
22
+ input: Record<string, unknown>;
23
+ error: string;
24
+ timestamp: number;
25
+ }
26
+
16
27
  /**
17
28
  * Strip MCP server prefix from tool names for web UI compatibility.
18
29
  * e.g. "mcp__assistme-browser__browser_navigate" → "browser_navigate"
@@ -23,14 +34,17 @@ export function stripMcpPrefix(toolName: string): string {
23
34
  return match ? match[1] : toolName;
24
35
  }
25
36
 
37
+
26
38
  /**
27
39
  * Create PreToolUse and PostToolUse hooks that emit events to Supabase.
28
40
  * These hooks let the web UI display tool activity in real-time.
41
+ * Also tracks tool failures for agentic reflection and strategy switching.
29
42
  */
30
43
  export function createEventHooks(
31
44
  taskId: string,
32
- toolCallRecords: ToolCallRecord[]
33
- ): Partial<Record<"PreToolUse" | "PostToolUse", HookCallbackMatcher[]>> {
45
+ toolCallRecords: ToolCallRecord[],
46
+ toolFailures: ToolFailureRecord[] = []
47
+ ): Partial<Record<"PreToolUse" | "PostToolUse" | "PostToolUseFailure", HookCallbackMatcher[]>> {
34
48
  const preToolUseHook: HookCallback = async (input) => {
35
49
  if (input.hook_event_name !== "PreToolUse") return { continue: true };
36
50
 
@@ -84,8 +98,35 @@ export function createEventHooks(
84
98
  return {};
85
99
  };
86
100
 
101
+ const postToolUseFailureHook: HookCallback = async (input) => {
102
+ if (input.hook_event_name !== "PostToolUseFailure") return {};
103
+
104
+ const failureInput = input as PostToolUseFailureHookInput;
105
+ const rawName = failureInput.tool_name;
106
+ const displayName = stripMcpPrefix(rawName);
107
+ const errorStr = failureInput.error;
108
+
109
+ toolFailures.push({
110
+ toolName: displayName,
111
+ input: (failureInput.tool_input as Record<string, unknown>) || {},
112
+ error: errorStr.slice(0, 500),
113
+ timestamp: Date.now(),
114
+ });
115
+
116
+ await emitEvent(taskId, "tool_failure", {
117
+ name: displayName,
118
+ error: errorStr.slice(0, 500),
119
+ failure_count: toolFailures.filter((f) => f.toolName === displayName).length,
120
+ });
121
+
122
+ log.warn(`Tool failure tracked: ${displayName} (total: ${toolFailures.length})`);
123
+
124
+ return {};
125
+ };
126
+
87
127
  return {
88
128
  PreToolUse: [{ hooks: [preToolUseHook] }],
89
129
  PostToolUse: [{ hooks: [postToolUseHook] }],
130
+ PostToolUseFailure: [{ hooks: [postToolUseFailureHook] }],
90
131
  };
91
132
  }
@@ -1,5 +1,10 @@
1
1
  import { callMcpHandler } from "../db/api-client.js";
2
2
  import { log } from "../utils/logger.js";
3
+ import {
4
+ MEMORY_DEDUP_SIMILARITY_THRESHOLD,
5
+ MEMORY_COMPRESSION_THRESHOLD,
6
+ MEMORY_COMPRESSION_TARGET,
7
+ } from "../utils/constants.js";
3
8
 
4
9
  export type MemoryCategory =
5
10
  | "general"
@@ -153,4 +158,123 @@ export class MemoryManager {
153
158
  });
154
159
  return result.count;
155
160
  }
161
+
162
+ // ── Compression & Deduplication ──────────────────────────────────
163
+
164
+ /**
165
+ * Check if memory count exceeds threshold and compress if needed.
166
+ * Called automatically after task completion.
167
+ */
168
+ async compressIfNeeded(): Promise<number> {
169
+ try {
170
+ const all = await this.list(undefined, 200);
171
+ if (all.length < MEMORY_COMPRESSION_THRESHOLD) {
172
+ return 0;
173
+ }
174
+
175
+ log.info(`Memory compression triggered: ${all.length} memories (threshold: ${MEMORY_COMPRESSION_THRESHOLD})`);
176
+
177
+ let removed = 0;
178
+
179
+ // Step 1: Remove expired memories
180
+ const now = Date.now();
181
+ for (const m of all) {
182
+ if (m.expires_at && new Date(m.expires_at).getTime() < now) {
183
+ await this.remove(m.id);
184
+ removed++;
185
+ }
186
+ }
187
+
188
+ // Step 2: Deduplicate similar memories (keep the higher-importance one)
189
+ const remaining = all.filter(
190
+ (m) => !m.expires_at || new Date(m.expires_at).getTime() >= now
191
+ );
192
+ const duplicateIds = this.findDuplicates(remaining);
193
+ for (const id of duplicateIds) {
194
+ await this.remove(id);
195
+ removed++;
196
+ }
197
+
198
+ // Step 3: If still over target, remove lowest-importance, least-accessed memories
199
+ const afterDedup = remaining.filter((m) => !duplicateIds.has(m.id));
200
+ if (afterDedup.length > MEMORY_COMPRESSION_TARGET) {
201
+ const toRemove = afterDedup
202
+ .sort((a, b) => {
203
+ // Sort by importance ASC, then access_count ASC, then created_at ASC
204
+ if (a.importance !== b.importance) return a.importance - b.importance;
205
+ if (a.access_count !== b.access_count) return a.access_count - b.access_count;
206
+ return new Date(a.created_at).getTime() - new Date(b.created_at).getTime();
207
+ })
208
+ .slice(0, afterDedup.length - MEMORY_COMPRESSION_TARGET);
209
+
210
+ for (const m of toRemove) {
211
+ // Never remove high-importance instructions
212
+ if (m.category === "instruction" && m.importance >= 8) continue;
213
+ await this.remove(m.id);
214
+ removed++;
215
+ }
216
+ }
217
+
218
+ if (removed > 0) {
219
+ log.info(`Memory compression complete: removed ${removed} memories`);
220
+ }
221
+ return removed;
222
+ } catch (err) {
223
+ log.warn(`Memory compression error: ${err instanceof Error ? err.message : err}`);
224
+ return 0;
225
+ }
226
+ }
227
+
228
+ /**
229
+ * Find duplicate memories based on content similarity.
230
+ * Returns the IDs of memories that should be removed (keeps the higher-importance duplicate).
231
+ */
232
+ private findDuplicates(memories: Memory[]): Set<string> {
233
+ const toRemove = new Set<string>();
234
+
235
+ for (let i = 0; i < memories.length; i++) {
236
+ if (toRemove.has(memories[i].id)) continue;
237
+
238
+ for (let j = i + 1; j < memories.length; j++) {
239
+ if (toRemove.has(memories[j].id)) continue;
240
+ if (memories[i].category !== memories[j].category) continue;
241
+
242
+ const similarity = computeWordOverlap(memories[i].content, memories[j].content);
243
+ if (similarity >= MEMORY_DEDUP_SIMILARITY_THRESHOLD) {
244
+ // Keep the one with higher importance, or if equal, the newer one
245
+ if (
246
+ memories[i].importance > memories[j].importance ||
247
+ (memories[i].importance === memories[j].importance &&
248
+ new Date(memories[i].created_at) > new Date(memories[j].created_at))
249
+ ) {
250
+ toRemove.add(memories[j].id);
251
+ } else {
252
+ toRemove.add(memories[i].id);
253
+ }
254
+ }
255
+ }
256
+ }
257
+
258
+ return toRemove;
259
+ }
260
+ }
261
+
262
+ /**
263
+ * Compute word-level Jaccard similarity between two strings.
264
+ * Returns a value between 0 (no overlap) and 1 (identical).
265
+ */
266
+ function computeWordOverlap(a: string, b: string): number {
267
+ const wordsA = new Set(a.toLowerCase().split(/\s+/).filter(Boolean));
268
+ const wordsB = new Set(b.toLowerCase().split(/\s+/).filter(Boolean));
269
+
270
+ if (wordsA.size === 0 && wordsB.size === 0) return 1;
271
+ if (wordsA.size === 0 || wordsB.size === 0) return 0;
272
+
273
+ let intersection = 0;
274
+ for (const w of wordsA) {
275
+ if (wordsB.has(w)) intersection++;
276
+ }
277
+
278
+ const union = wordsA.size + wordsB.size - intersection;
279
+ return union === 0 ? 0 : intersection / union;
156
280
  }
@@ -28,13 +28,14 @@ import {
28
28
  createAgentToolsServer,
29
29
  BROWSER_TOOL_NAMES,
30
30
  } from "./mcp-servers.js";
31
- import { createEventHooks } from "./event-hooks.js";
31
+ import { createEventHooks, type ToolFailureRecord } from "./event-hooks.js";
32
32
  import { BASE_SYSTEM_PROMPT } from "./system-prompt.js";
33
33
  import {
34
34
  MAX_RESPONSE_CONTENT_LENGTH,
35
35
  MAX_HISTORY_ENTRIES,
36
36
  MAX_HISTORY_RESPONSE_LENGTH,
37
37
  MAX_COMPLETE_TASK_RETRIES,
38
+ MAX_BUDGET_USD,
38
39
  } from "../utils/constants.js";
39
40
  import { errorMessage } from "../utils/errors.js";
40
41
 
@@ -87,12 +88,11 @@ class TaskTimeout {
87
88
  }
88
89
  }
89
90
 
90
- // Constants are now imported from utils/constants.ts
91
-
92
91
  export class TaskProcessor {
93
92
  private memoryManager: MemoryManager | null = null;
94
93
  private skillManager: SkillManager;
95
94
  private sessionId: string | null = null;
95
+ private userId: string | null = null;
96
96
  /** In-memory conversation history, keyed by conversation_id */
97
97
  private historyCache: Map<string, HistoryEntry[]> = new Map();
98
98
 
@@ -100,7 +100,13 @@ export class TaskProcessor {
100
100
  this.skillManager = new SkillManager();
101
101
  }
102
102
 
103
+ /** @deprecated Use setUserId() instead */
103
104
  init(userId: string): void {
105
+ this.setUserId(userId);
106
+ }
107
+
108
+ setUserId(userId: string): void {
109
+ this.userId = userId;
104
110
  this.memoryManager = new MemoryManager();
105
111
  this.skillManager.setUserId(userId);
106
112
  // Load DB skills asynchronously (non-blocking)
@@ -139,6 +145,7 @@ export class TaskProcessor {
139
145
 
140
146
  let finalResponse = "";
141
147
  const toolCallRecords: ToolCallRecord[] = [];
148
+ const toolFailures: ToolFailureRecord[] = [];
142
149
  let tokenUsage: Record<string, number> | undefined;
143
150
  let agentSessionId: string | undefined;
144
151
 
@@ -162,7 +169,6 @@ export class TaskProcessor {
162
169
  }
163
170
 
164
171
  // Inject lightweight skill descriptions (full content loaded on-demand via skill_invoke)
165
- // Pass task prompt so relevant skills are prioritized to the top
166
172
  const skillPrompt = this.skillManager.buildSkillDescriptions(task.prompt);
167
173
  if (skillPrompt) {
168
174
  systemPrompt += skillPrompt;
@@ -176,7 +182,6 @@ export class TaskProcessor {
176
182
  log.debug("DB conversation history unavailable, using in-memory cache");
177
183
  }
178
184
 
179
- // Fall back to in-memory cache if DB returned nothing
180
185
  if (history.length === 0) {
181
186
  history = this.historyCache.get(task.conversation_id) || [];
182
187
  }
@@ -210,21 +215,13 @@ export class TaskProcessor {
210
215
  onUserWaitEnd: () => taskTimeout.resume(),
211
216
  });
212
217
 
213
- // Create event hooks for Supabase event emission
214
- const eventHooks = createEventHooks(task.id, toolCallRecords);
218
+ // Create event hooks with failure tracking
219
+ const eventHooks = createEventHooks(task.id, toolCallRecords, toolFailures);
215
220
 
216
221
  // Build allowed tools list
217
222
  const allowedTools = [
218
- // SDK built-in tools
219
- "Read",
220
- "Write",
221
- "Edit",
222
- "Bash",
223
- "Glob",
224
- "Grep",
225
- // Browser MCP tools
223
+ "Read", "Write", "Edit", "Bash", "Glob", "Grep",
226
224
  ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
227
- // Agent MCP tools (memory, skills)
228
225
  "mcp__assistme-agent__memory_store",
229
226
  "mcp__assistme-agent__skill_create",
230
227
  "mcp__assistme-agent__skill_improve",
@@ -235,31 +232,20 @@ export class TaskProcessor {
235
232
  "mcp__assistme-agent__skill_browse",
236
233
  "mcp__assistme-agent__skill_add",
237
234
  "mcp__assistme-agent__skill_publish",
238
- // User interaction
239
235
  "mcp__assistme-agent__ask_user",
240
- // Job automation tools
241
236
  "mcp__assistme-agent__job_run",
242
237
  "mcp__assistme-agent__job_schedule",
243
238
  "mcp__assistme-agent__job_status",
244
- // Credential tools (local storage)
245
239
  "mcp__assistme-agent__credential_get",
246
240
  "mcp__assistme-agent__credential_set",
247
241
  "mcp__assistme-agent__credential_list",
248
242
  "mcp__assistme-agent__credential_remove",
249
243
  ];
250
244
 
251
- // Build async generator for prompt (required for MCP tools)
252
- async function* promptMessages() {
253
- yield {
254
- type: "user" as const,
255
- message: {
256
- role: "user" as const,
257
- content: task.prompt,
258
- },
259
- parent_tool_use_id: null,
260
- session_id: "",
261
- };
262
- }
245
+ const mcpServers = {
246
+ "assistme-browser": browserServer,
247
+ "assistme-agent": agentToolsServer,
248
+ };
263
249
 
264
250
  const options: Options = {
265
251
  model: config.model,
@@ -269,40 +255,30 @@ export class TaskProcessor {
269
255
  allowedTools,
270
256
  permissionMode: "bypassPermissions",
271
257
  allowDangerouslySkipPermissions: true,
272
- mcpServers: {
273
- "assistme-browser": browserServer,
274
- "assistme-agent": agentToolsServer,
275
- },
258
+ mcpServers,
276
259
  hooks: eventHooks,
277
260
  persistSession: true,
278
261
  abortController,
262
+ thinking: { type: "adaptive" },
263
+ effort: "high",
264
+ maxBudgetUsd: MAX_BUDGET_USD,
279
265
  };
280
266
 
267
+ // ── Execute: single SDK query handles planning, execution, and self-verification
281
268
  try {
282
- for await (const message of query({
283
- prompt: promptMessages(),
284
- options,
285
- })) {
286
- // Timeout is handled by TaskTimeout + AbortController
287
-
269
+ for await (const message of query({ prompt: task.prompt, options })) {
288
270
  switch (message.type) {
289
271
  case "assistant": {
290
- // Extract text and thinking from content blocks
291
272
  const assistantMsg = message as SDKAssistantMessage;
292
273
  for (const block of assistantMsg.message.content) {
293
274
  if (block.type === "text") {
294
275
  finalResponse += block.text;
295
276
  log.agent(block.text);
296
- await emitEvent(task.id, "text_delta", {
297
- text: block.text,
298
- });
277
+ await emitEvent(task.id, "text_delta", { text: block.text });
299
278
  } else if (block.type === "thinking" && "thinking" in block) {
300
279
  const thinkingBlock = block as { type: "thinking"; thinking: string };
301
- const thinkingText = thinkingBlock.thinking;
302
- log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
303
- await emitEvent(task.id, "thinking", {
304
- text: thinkingText,
305
- });
280
+ log.debug(`Thinking: ${thinkingBlock.thinking.slice(0, 100)}...`);
281
+ await emitEvent(task.id, "thinking", { text: thinkingBlock.thinking });
306
282
  }
307
283
  }
308
284
  break;
@@ -310,7 +286,6 @@ export class TaskProcessor {
310
286
 
311
287
  case "result": {
312
288
  const resultMsg = message as SDKResultMessage;
313
- // Extract token usage
314
289
  tokenUsage = {
315
290
  input_tokens: resultMsg.usage.input_tokens,
316
291
  output_tokens: resultMsg.usage.output_tokens,
@@ -318,18 +293,17 @@ export class TaskProcessor {
318
293
 
319
294
  if (resultMsg.subtype === "success") {
320
295
  const successMsg = resultMsg as SDKResultSuccess;
321
- // Use result text as final response if we didn't collect
322
- // text from assistant messages (fallback)
323
296
  if (!finalResponse && successMsg.result) {
324
297
  finalResponse = successMsg.result;
325
298
  }
299
+ agentSessionId = successMsg.session_id;
326
300
  log.info(
327
301
  `Task cost: $${successMsg.total_cost_usd.toFixed(4)}, turns: ${successMsg.num_turns}`
328
302
  );
329
303
  } else {
330
- const errorMsg = resultMsg as SDKResultError;
331
- log.warn(`SDK result: ${errorMsg.subtype}`);
332
- for (const err of errorMsg.errors) {
304
+ const errMsg = resultMsg as SDKResultError;
305
+ log.warn(`SDK result: ${errMsg.subtype}`);
306
+ for (const err of errMsg.errors) {
333
307
  await emitEvent(task.id, "error", { message: err });
334
308
  }
335
309
  }
@@ -337,7 +311,6 @@ export class TaskProcessor {
337
311
  }
338
312
 
339
313
  default:
340
- // Capture session ID from init message for post-task session resume
341
314
  if (message.type === "system" && "subtype" in message) {
342
315
  const sysMsg = message as { type: string; subtype?: string; session_id?: string };
343
316
  if (sysMsg.subtype === "init" && sysMsg.session_id) {
@@ -370,27 +343,32 @@ export class TaskProcessor {
370
343
  // Save to in-memory conversation history cache
371
344
  const convHistory = this.historyCache.get(task.conversation_id) || [];
372
345
  convHistory.push({ prompt: task.prompt, response: finalResponse });
373
- // Keep only the most recent entries
374
346
  if (convHistory.length > MAX_HISTORY_ENTRIES * 2) {
375
347
  convHistory.splice(0, convHistory.length - MAX_HISTORY_ENTRIES * 2);
376
348
  }
377
349
  this.historyCache.set(task.conversation_id, convHistory);
378
350
 
379
- // Post-task: resume the same session to evaluate skill creation (fire-and-forget)
351
+ // Post-task: trigger memory compression (non-blocking)
352
+ if (this.memoryManager) {
353
+ this.memoryManager.compressIfNeeded().catch((err) =>
354
+ log.debug(`Memory compression skipped: ${err}`)
355
+ );
356
+ }
357
+
358
+ // Post-task: resume the same session to evaluate skill creation
380
359
  if (agentSessionId) {
381
360
  this.evaluateSkillPostTask(agentSessionId, config.model).catch((err) =>
382
361
  log.debug(`Post-task skill evaluation skipped: ${err}`)
383
362
  );
384
363
  }
385
364
  } catch (err) {
386
- const errorMsg = errorMessage(err);
387
- log.error(`Task failed: ${errorMsg}`);
365
+ const errMsg = errorMessage(err);
366
+ log.error(`Task failed: ${errMsg}`);
388
367
 
389
- await failTask(task.id, errorMsg);
390
- await emitEvent(task.id, "error", { message: errorMsg });
368
+ await failTask(task.id, errMsg);
369
+ await emitEvent(task.id, "error", { message: errMsg });
391
370
  await emitEvent(task.id, "status_change", { status: "failed" });
392
371
  } finally {
393
- // Clear correlation ID
394
372
  setCorrelationId(null);
395
373
 
396
374
  // Disconnect browser after task (keep user's browser running)