npm - assistme - Versions diffs - 0.3.6 → 0.4.0 - Mend

assistme 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/{chunk-4YWS463E.js → chunk-4SBIN27G.js} +10 -0
package/dist/index.js +296 -93
package/dist/{job-runner-JT3JWZBV.js → job-runner-CJ7HM4GZ.js} +1 -1
package/package.json +1 -1
package/src/agent/event-hooks.ts +43 -2
package/src/agent/memory.ts +124 -0
package/src/agent/processor.ts +42 -64
package/src/agent/skill-evaluator.ts +173 -61
package/src/agent/system-prompt.ts +9 -0
package/src/db/types.ts +3 -1
package/src/utils/constants.ts +21 -0
package/src/utils/schemas.ts +33 -0
package/{src → tests}/agent/event-hooks.test.ts +121 -33
package/{src → tests}/agent/mcp-servers.test.ts +43 -29
package/{src → tests}/agent/memory.test.ts +71 -3
package/{src → tests}/agent/processor.test.ts +59 -55
package/{src → tests}/agent/scheduler.test.ts +1 -1
package/{src → tests}/agent/session.test.ts +20 -10
package/{src → tests}/agent/skills.test.ts +51 -29
package/{src → tests}/credentials/credential-store.test.ts +23 -8
package/{src → tests}/credentials/encryption.test.ts +1 -1
package/{src → tests}/db/supabase.test.ts +4 -4
package/{src → tests}/tools/filesystem.test.ts +6 -15
package/{src → tests}/tools/shell.test.ts +1 -1
package/{src → tests}/utils/config.test.ts +2 -1
package/{src → tests}/utils/rate-limiter.test.ts +1 -1
package/{src → tests}/utils/retry.test.ts +6 -12
package/tsconfig.json +1 -1
package/vitest.config.ts +1 -1

package/src/agent/event-hooks.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import type {
   HookCallback,
   PreToolUseHookInput,
   PostToolUseHookInput,
+  PostToolUseFailureHookInput,
 } from "@anthropic-ai/claude-agent-sdk";
 import { emitEvent } from "../db/supabase.js";
 import { log } from "../utils/logger.js";
@@ -13,6 +14,16 @@ import {
   MAX_SKILL_RECORD_RESULT_LENGTH,
 } from "../utils/constants.js";
+/**
+ * Record of a tool call that failed, used for reflection and strategy switching.
+ */
+export interface ToolFailureRecord {
+  toolName: string;
+  input: Record<string, unknown>;
+  error: string;
+  timestamp: number;
+}
 /**
  * Strip MCP server prefix from tool names for web UI compatibility.
  * e.g. "mcp__assistme-browser__browser_navigate" → "browser_navigate"
@@ -23,14 +34,17 @@ export function stripMcpPrefix(toolName: string): string {
   return match ? match[1] : toolName;
 }
 /**
  * Create PreToolUse and PostToolUse hooks that emit events to Supabase.
  * These hooks let the web UI display tool activity in real-time.
+ * Also tracks tool failures for agentic reflection and strategy switching.
  */
 export function createEventHooks(
   taskId: string,
-  toolCallRecords: ToolCallRecord[]
-): Partial<Record<"PreToolUse" | "PostToolUse", HookCallbackMatcher[]>> {
+  toolCallRecords: ToolCallRecord[],
+  toolFailures: ToolFailureRecord[] = []
+): Partial<Record<"PreToolUse" | "PostToolUse" | "PostToolUseFailure", HookCallbackMatcher[]>> {
   const preToolUseHook: HookCallback = async (input) => {
     if (input.hook_event_name !== "PreToolUse") return { continue: true };
@@ -84,8 +98,35 @@ export function createEventHooks(
     return {};
   };
+  const postToolUseFailureHook: HookCallback = async (input) => {
+    if (input.hook_event_name !== "PostToolUseFailure") return {};
+    const failureInput = input as PostToolUseFailureHookInput;
+    const rawName = failureInput.tool_name;
+    const displayName = stripMcpPrefix(rawName);
+    const errorStr = failureInput.error;
+    toolFailures.push({
+      toolName: displayName,
+      input: (failureInput.tool_input as Record<string, unknown>) || {},
+      error: errorStr.slice(0, 500),
+      timestamp: Date.now(),
+    });
+    await emitEvent(taskId, "tool_failure", {
+      name: displayName,
+      error: errorStr.slice(0, 500),
+      failure_count: toolFailures.filter((f) => f.toolName === displayName).length,
+    });
+    log.warn(`Tool failure tracked: ${displayName} (total: ${toolFailures.length})`);
+    return {};
+  };
   return {
     PreToolUse: [{ hooks: [preToolUseHook] }],
     PostToolUse: [{ hooks: [postToolUseHook] }],
+    PostToolUseFailure: [{ hooks: [postToolUseFailureHook] }],
   };
 }

package/src/agent/memory.ts CHANGED Viewed

@@ -1,5 +1,10 @@
 import { callMcpHandler } from "../db/api-client.js";
 import { log } from "../utils/logger.js";
+import {
+  MEMORY_DEDUP_SIMILARITY_THRESHOLD,
+  MEMORY_COMPRESSION_THRESHOLD,
+  MEMORY_COMPRESSION_TARGET,
+} from "../utils/constants.js";
 export type MemoryCategory =
   | "general"
@@ -153,4 +158,123 @@ export class MemoryManager {
     });
     return result.count;
   }
+  // ── Compression & Deduplication ──────────────────────────────────
+  /**
+   * Check if memory count exceeds threshold and compress if needed.
+   * Called automatically after task completion.
+   */
+  async compressIfNeeded(): Promise<number> {
+    try {
+      const all = await this.list(undefined, 200);
+      if (all.length < MEMORY_COMPRESSION_THRESHOLD) {
+        return 0;
+      }
+      log.info(`Memory compression triggered: ${all.length} memories (threshold: ${MEMORY_COMPRESSION_THRESHOLD})`);
+      let removed = 0;
+      // Step 1: Remove expired memories
+      const now = Date.now();
+      for (const m of all) {
+        if (m.expires_at && new Date(m.expires_at).getTime() < now) {
+          await this.remove(m.id);
+          removed++;
+        }
+      }
+      // Step 2: Deduplicate similar memories (keep the higher-importance one)
+      const remaining = all.filter(
+        (m) => !m.expires_at || new Date(m.expires_at).getTime() >= now
+      );
+      const duplicateIds = this.findDuplicates(remaining);
+      for (const id of duplicateIds) {
+        await this.remove(id);
+        removed++;
+      }
+      // Step 3: If still over target, remove lowest-importance, least-accessed memories
+      const afterDedup = remaining.filter((m) => !duplicateIds.has(m.id));
+      if (afterDedup.length > MEMORY_COMPRESSION_TARGET) {
+        const toRemove = afterDedup
+          .sort((a, b) => {
+            // Sort by importance ASC, then access_count ASC, then created_at ASC
+            if (a.importance !== b.importance) return a.importance - b.importance;
+            if (a.access_count !== b.access_count) return a.access_count - b.access_count;
+            return new Date(a.created_at).getTime() - new Date(b.created_at).getTime();
+          })
+          .slice(0, afterDedup.length - MEMORY_COMPRESSION_TARGET);
+        for (const m of toRemove) {
+          // Never remove high-importance instructions
+          if (m.category === "instruction" && m.importance >= 8) continue;
+          await this.remove(m.id);
+          removed++;
+        }
+      }
+      if (removed > 0) {
+        log.info(`Memory compression complete: removed ${removed} memories`);
+      }
+      return removed;
+    } catch (err) {
+      log.warn(`Memory compression error: ${err instanceof Error ? err.message : err}`);
+      return 0;
+    }
+  }
+  /**
+   * Find duplicate memories based on content similarity.
+   * Returns the IDs of memories that should be removed (keeps the higher-importance duplicate).
+   */
+  private findDuplicates(memories: Memory[]): Set<string> {
+    const toRemove = new Set<string>();
+    for (let i = 0; i < memories.length; i++) {
+      if (toRemove.has(memories[i].id)) continue;
+      for (let j = i + 1; j < memories.length; j++) {
+        if (toRemove.has(memories[j].id)) continue;
+        if (memories[i].category !== memories[j].category) continue;
+        const similarity = computeWordOverlap(memories[i].content, memories[j].content);
+        if (similarity >= MEMORY_DEDUP_SIMILARITY_THRESHOLD) {
+          // Keep the one with higher importance, or if equal, the newer one
+          if (
+            memories[i].importance > memories[j].importance ||
+            (memories[i].importance === memories[j].importance &&
+              new Date(memories[i].created_at) > new Date(memories[j].created_at))
+          ) {
+            toRemove.add(memories[j].id);
+          } else {
+            toRemove.add(memories[i].id);
+          }
+        }
+      }
+    }
+    return toRemove;
+  }
+}
+/**
+ * Compute word-level Jaccard similarity between two strings.
+ * Returns a value between 0 (no overlap) and 1 (identical).
+ */
+function computeWordOverlap(a: string, b: string): number {
+  const wordsA = new Set(a.toLowerCase().split(/\s+/).filter(Boolean));
+  const wordsB = new Set(b.toLowerCase().split(/\s+/).filter(Boolean));
+  if (wordsA.size === 0 && wordsB.size === 0) return 1;
+  if (wordsA.size === 0 || wordsB.size === 0) return 0;
+  let intersection = 0;
+  for (const w of wordsA) {
+    if (wordsB.has(w)) intersection++;
+  }
+  const union = wordsA.size + wordsB.size - intersection;
+  return union === 0 ? 0 : intersection / union;
 }

package/src/agent/processor.ts CHANGED Viewed

@@ -28,13 +28,14 @@ import {
   createAgentToolsServer,
   BROWSER_TOOL_NAMES,
 } from "./mcp-servers.js";
-import { createEventHooks } from "./event-hooks.js";
+import { createEventHooks, type ToolFailureRecord } from "./event-hooks.js";
 import { BASE_SYSTEM_PROMPT } from "./system-prompt.js";
 import {
   MAX_RESPONSE_CONTENT_LENGTH,
   MAX_HISTORY_ENTRIES,
   MAX_HISTORY_RESPONSE_LENGTH,
   MAX_COMPLETE_TASK_RETRIES,
+  MAX_BUDGET_USD,
 } from "../utils/constants.js";
 import { errorMessage } from "../utils/errors.js";
@@ -87,12 +88,11 @@ class TaskTimeout {
   }
 }
-// Constants are now imported from utils/constants.ts
 export class TaskProcessor {
   private memoryManager: MemoryManager | null = null;
   private skillManager: SkillManager;
   private sessionId: string | null = null;
+  private userId: string | null = null;
   /** In-memory conversation history, keyed by conversation_id */
   private historyCache: Map<string, HistoryEntry[]> = new Map();
@@ -100,7 +100,13 @@ export class TaskProcessor {
     this.skillManager = new SkillManager();
   }
+  /** @deprecated Use setUserId() instead */
   init(userId: string): void {
+    this.setUserId(userId);
+  }
+  setUserId(userId: string): void {
+    this.userId = userId;
     this.memoryManager = new MemoryManager();
     this.skillManager.setUserId(userId);
     // Load DB skills asynchronously (non-blocking)
@@ -139,6 +145,7 @@ export class TaskProcessor {
     let finalResponse = "";
     const toolCallRecords: ToolCallRecord[] = [];
+    const toolFailures: ToolFailureRecord[] = [];
     let tokenUsage: Record<string, number> | undefined;
     let agentSessionId: string | undefined;
@@ -162,7 +169,6 @@ export class TaskProcessor {
       }
       // Inject lightweight skill descriptions (full content loaded on-demand via skill_invoke)
-      // Pass task prompt so relevant skills are prioritized to the top
       const skillPrompt = this.skillManager.buildSkillDescriptions(task.prompt);
       if (skillPrompt) {
         systemPrompt += skillPrompt;
@@ -176,7 +182,6 @@ export class TaskProcessor {
         log.debug("DB conversation history unavailable, using in-memory cache");
       }
-      // Fall back to in-memory cache if DB returned nothing
       if (history.length === 0) {
         history = this.historyCache.get(task.conversation_id) || [];
       }
@@ -210,21 +215,13 @@ export class TaskProcessor {
         onUserWaitEnd: () => taskTimeout.resume(),
       });
-      // Create event hooks for Supabase event emission
-      const eventHooks = createEventHooks(task.id, toolCallRecords);
+      // Create event hooks with failure tracking
+      const eventHooks = createEventHooks(task.id, toolCallRecords, toolFailures);
       // Build allowed tools list
       const allowedTools = [
-        // SDK built-in tools
-        "Read",
-        "Write",
-        "Edit",
-        "Bash",
-        "Glob",
-        "Grep",
-        // Browser MCP tools
+        "Read", "Write", "Edit", "Bash", "Glob", "Grep",
         ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
-        // Agent MCP tools (memory, skills)
         "mcp__assistme-agent__memory_store",
         "mcp__assistme-agent__skill_create",
         "mcp__assistme-agent__skill_improve",
@@ -235,31 +232,20 @@ export class TaskProcessor {
         "mcp__assistme-agent__skill_browse",
         "mcp__assistme-agent__skill_add",
         "mcp__assistme-agent__skill_publish",
-        // User interaction
         "mcp__assistme-agent__ask_user",
-        // Job automation tools
         "mcp__assistme-agent__job_run",
         "mcp__assistme-agent__job_schedule",
         "mcp__assistme-agent__job_status",
-        // Credential tools (local storage)
         "mcp__assistme-agent__credential_get",
         "mcp__assistme-agent__credential_set",
         "mcp__assistme-agent__credential_list",
         "mcp__assistme-agent__credential_remove",
       ];
-      // Build async generator for prompt (required for MCP tools)
-      async function* promptMessages() {
-        yield {
-          type: "user" as const,
-          message: {
-            role: "user" as const,
-            content: task.prompt,
-          },
-          parent_tool_use_id: null,
-          session_id: "",
-        };
-      }
+      const mcpServers = {
+        "assistme-browser": browserServer,
+        "assistme-agent": agentToolsServer,
+      };
       const options: Options = {
         model: config.model,
@@ -269,40 +255,30 @@ export class TaskProcessor {
         allowedTools,
         permissionMode: "bypassPermissions",
         allowDangerouslySkipPermissions: true,
-        mcpServers: {
-          "assistme-browser": browserServer,
-          "assistme-agent": agentToolsServer,
-        },
+        mcpServers,
         hooks: eventHooks,
         persistSession: true,
         abortController,
+        thinking: { type: "adaptive" },
+        effort: "high",
+        maxBudgetUsd: MAX_BUDGET_USD,
       };
+      // ── Execute: single SDK query handles planning, execution, and self-verification
       try {
-        for await (const message of query({
-          prompt: promptMessages(),
-          options,
-        })) {
-          // Timeout is handled by TaskTimeout + AbortController
+        for await (const message of query({ prompt: task.prompt, options })) {
           switch (message.type) {
             case "assistant": {
-              // Extract text and thinking from content blocks
               const assistantMsg = message as SDKAssistantMessage;
               for (const block of assistantMsg.message.content) {
                 if (block.type === "text") {
                   finalResponse += block.text;
                   log.agent(block.text);
-                  await emitEvent(task.id, "text_delta", {
-                    text: block.text,
-                  });
+                  await emitEvent(task.id, "text_delta", { text: block.text });
                 } else if (block.type === "thinking" && "thinking" in block) {
                   const thinkingBlock = block as { type: "thinking"; thinking: string };
-                  const thinkingText = thinkingBlock.thinking;
-                  log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
-                  await emitEvent(task.id, "thinking", {
-                    text: thinkingText,
-                  });
+                  log.debug(`Thinking: ${thinkingBlock.thinking.slice(0, 100)}...`);
+                  await emitEvent(task.id, "thinking", { text: thinkingBlock.thinking });
                 }
               }
               break;
@@ -310,7 +286,6 @@ export class TaskProcessor {
             case "result": {
               const resultMsg = message as SDKResultMessage;
-              // Extract token usage
               tokenUsage = {
                 input_tokens: resultMsg.usage.input_tokens,
                 output_tokens: resultMsg.usage.output_tokens,
@@ -318,18 +293,17 @@ export class TaskProcessor {
               if (resultMsg.subtype === "success") {
                 const successMsg = resultMsg as SDKResultSuccess;
-                // Use result text as final response if we didn't collect
-                // text from assistant messages (fallback)
                 if (!finalResponse && successMsg.result) {
                   finalResponse = successMsg.result;
                 }
+                agentSessionId = successMsg.session_id;
                 log.info(
                   `Task cost: $${successMsg.total_cost_usd.toFixed(4)}, turns: ${successMsg.num_turns}`
                 );
               } else {
-                const errorMsg = resultMsg as SDKResultError;
-                log.warn(`SDK result: ${errorMsg.subtype}`);
-                for (const err of errorMsg.errors) {
+                const errMsg = resultMsg as SDKResultError;
+                log.warn(`SDK result: ${errMsg.subtype}`);
+                for (const err of errMsg.errors) {
                   await emitEvent(task.id, "error", { message: err });
                 }
               }
@@ -337,7 +311,6 @@ export class TaskProcessor {
             }
             default:
-              // Capture session ID from init message for post-task session resume
               if (message.type === "system" && "subtype" in message) {
                 const sysMsg = message as { type: string; subtype?: string; session_id?: string };
                 if (sysMsg.subtype === "init" && sysMsg.session_id) {
@@ -370,27 +343,32 @@ export class TaskProcessor {
       // Save to in-memory conversation history cache
       const convHistory = this.historyCache.get(task.conversation_id) || [];
       convHistory.push({ prompt: task.prompt, response: finalResponse });
-      // Keep only the most recent entries
       if (convHistory.length > MAX_HISTORY_ENTRIES * 2) {
         convHistory.splice(0, convHistory.length - MAX_HISTORY_ENTRIES * 2);
       }
       this.historyCache.set(task.conversation_id, convHistory);
-      // Post-task: resume the same session to evaluate skill creation (fire-and-forget)
+      // Post-task: trigger memory compression (non-blocking)
+      if (this.memoryManager) {
+        this.memoryManager.compressIfNeeded().catch((err) =>
+          log.debug(`Memory compression skipped: ${err}`)
+        );
+      }
+      // Post-task: resume the same session to evaluate skill creation
       if (agentSessionId) {
         this.evaluateSkillPostTask(agentSessionId, config.model).catch((err) =>
           log.debug(`Post-task skill evaluation skipped: ${err}`)
         );
       }
     } catch (err) {
-      const errorMsg = errorMessage(err);
-      log.error(`Task failed: ${errorMsg}`);
+      const errMsg = errorMessage(err);
+      log.error(`Task failed: ${errMsg}`);
-      await failTask(task.id, errorMsg);
-      await emitEvent(task.id, "error", { message: errorMsg });
+      await failTask(task.id, errMsg);
+      await emitEvent(task.id, "error", { message: errMsg });
       await emitEvent(task.id, "status_change", { status: "failed" });
     } finally {
-      // Clear correlation ID
       setCorrelationId(null);
       // Disconnect browser after task (keep user's browser running)