assistme 0.2.8 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,45 +1,9 @@
1
1
  import { describe, it, expect, vi, beforeEach } from "vitest";
2
2
 
3
- // Build a fluent mock that mimics the Supabase query builder chain
4
- function createSupabaseMock() {
5
- let finalResult: Record<string, unknown> = { data: [], error: null };
6
-
7
- const chain: Record<string, unknown> = {};
8
- const methods = [
9
- "select", "insert", "update", "delete", "eq", "neq", "not",
10
- "or", "in", "order", "limit", "single", "from",
11
- ];
12
- for (const method of methods) {
13
- chain[method] = vi.fn().mockReturnValue(chain);
14
- }
15
-
16
- // Make the chain thenable (for await)
17
- chain.then = (resolve: (value: unknown) => void) => resolve(finalResult);
18
- chain.single = vi.fn().mockImplementation(() => {
19
- const singleChain = { ...chain, then: (resolve: (value: unknown) => void) => resolve(finalResult) };
20
- return singleChain;
21
- });
3
+ const mockCallMcpHandler = vi.fn();
22
4
 
23
- const mockSupabase = {
24
- from: vi.fn().mockReturnValue(chain),
25
- _chain: chain,
26
- _setResult(result: Record<string, unknown>) {
27
- finalResult = result;
28
- chain.then = (resolve: (value: unknown) => void) => resolve(result);
29
- chain.single = vi.fn().mockImplementation(() => ({
30
- ...chain,
31
- then: (resolve: (value: unknown) => void) => resolve(result),
32
- }));
33
- },
34
- };
35
-
36
- return mockSupabase;
37
- }
38
-
39
- let mockSb: ReturnType<typeof createSupabaseMock>;
40
-
41
- vi.mock("../db/supabase.js", () => ({
42
- getSupabase: () => mockSb,
5
+ vi.mock("../db/api-client.js", () => ({
6
+ callMcpHandler: (...args: unknown[]) => mockCallMcpHandler(...args),
43
7
  }));
44
8
 
45
9
  vi.mock("../utils/logger.js", () => ({
@@ -58,12 +22,11 @@ describe("MemoryManager", () => {
58
22
 
59
23
  beforeEach(() => {
60
24
  vi.clearAllMocks();
61
- mockSb = createSupabaseMock();
62
25
  manager = new MemoryManager("user-123");
63
26
  });
64
27
 
65
28
  describe("remember()", () => {
66
- it("inserts a memory with default values", async () => {
29
+ it("stores a memory with default values", async () => {
67
30
  const mockData = {
68
31
  id: "mem-1",
69
32
  user_id: "user-123",
@@ -77,16 +40,23 @@ describe("MemoryManager", () => {
77
40
  last_accessed_at: null,
78
41
  created_at: "2026-01-01",
79
42
  };
80
- mockSb._setResult({ data: mockData, error: null });
43
+ mockCallMcpHandler.mockResolvedValueOnce(mockData);
81
44
 
82
45
  const result = await manager.remember("User prefers dark mode");
83
46
 
84
- expect(mockSb.from).toHaveBeenCalledWith("agent_memories");
47
+ expect(mockCallMcpHandler).toHaveBeenCalledWith("memory.store", {
48
+ category: "general",
49
+ content: "User prefers dark mode",
50
+ importance: 5,
51
+ tags: [],
52
+ source_message_id: null,
53
+ expires_at: null,
54
+ });
85
55
  expect(result.content).toBe("User prefers dark mode");
86
56
  expect(result.category).toBe("general");
87
57
  });
88
58
 
89
- it("inserts with custom category and importance", async () => {
59
+ it("stores with custom category and importance", async () => {
90
60
  const mockData = {
91
61
  id: "mem-2",
92
62
  user_id: "user-123",
@@ -100,7 +70,7 @@ describe("MemoryManager", () => {
100
70
  last_accessed_at: null,
101
71
  created_at: "2026-01-01",
102
72
  };
103
- mockSb._setResult({ data: mockData, error: null });
73
+ mockCallMcpHandler.mockResolvedValueOnce(mockData);
104
74
 
105
75
  const result = await manager.remember("Likes TypeScript", "preference", {
106
76
  importance: 8,
@@ -112,12 +82,10 @@ describe("MemoryManager", () => {
112
82
  expect(result.importance).toBe(8);
113
83
  });
114
84
 
115
- it("throws on DB error", async () => {
116
- mockSb._setResult({ data: null, error: { message: "DB down" } });
85
+ it("throws on edge function error", async () => {
86
+ mockCallMcpHandler.mockRejectedValueOnce(new Error("Edge function error"));
117
87
 
118
- await expect(manager.remember("test")).rejects.toThrow(
119
- "Failed to store memory"
120
- );
88
+ await expect(manager.remember("test")).rejects.toThrow();
121
89
  });
122
90
 
123
91
  it("computes expiry from expiresInDays", async () => {
@@ -134,46 +102,54 @@ describe("MemoryManager", () => {
134
102
  last_accessed_at: null,
135
103
  created_at: "2026-01-01",
136
104
  };
137
- mockSb._setResult({ data: mockData, error: null });
105
+ mockCallMcpHandler.mockResolvedValueOnce(mockData);
138
106
 
139
107
  const result = await manager.remember("temp context", "context", {
140
108
  expiresInDays: 7,
141
109
  });
142
110
 
143
111
  expect(result.expires_at).not.toBeNull();
112
+ expect(mockCallMcpHandler).toHaveBeenCalledWith(
113
+ "memory.store",
114
+ expect.objectContaining({
115
+ expires_at: expect.any(String),
116
+ })
117
+ );
144
118
  });
145
119
  });
146
120
 
147
121
  describe("buildMemoryPrompt()", () => {
148
122
  it("returns empty string when no memories", async () => {
149
- mockSb._setResult({ data: [], error: null });
123
+ mockCallMcpHandler.mockResolvedValueOnce([]);
150
124
  const result = await manager.buildMemoryPrompt();
151
125
  expect(result).toBe("");
152
126
  });
153
127
  });
154
128
 
155
129
  describe("list()", () => {
156
- it("queries agent_memories with user_id filter", async () => {
157
- mockSb._setResult({ data: [], error: null });
130
+ it("calls memory.list action", async () => {
131
+ mockCallMcpHandler.mockResolvedValueOnce([]);
158
132
  await manager.list();
159
- expect(mockSb.from).toHaveBeenCalledWith("agent_memories");
160
- expect(mockSb._chain.eq).toHaveBeenCalledWith("user_id", "user-123");
133
+ expect(mockCallMcpHandler).toHaveBeenCalledWith("memory.list", {
134
+ category: null,
135
+ limit: 20,
136
+ });
161
137
  });
162
138
 
163
- it("throws on DB error", async () => {
164
- mockSb._setResult({ data: null, error: { message: "fail" } });
165
- await expect(manager.list()).rejects.toThrow("Failed to list memories");
139
+ it("returns empty array on null response", async () => {
140
+ mockCallMcpHandler.mockResolvedValueOnce(null);
141
+ const result = await manager.list();
142
+ expect(result).toEqual([]);
166
143
  });
167
144
  });
168
145
 
169
146
  describe("remove()", () => {
170
- it("calls delete on agent_memories", async () => {
171
- mockSb._setResult({ error: null });
147
+ it("calls memory.remove action", async () => {
148
+ mockCallMcpHandler.mockResolvedValueOnce(undefined);
172
149
  await manager.remove("mem-1");
173
- expect(mockSb.from).toHaveBeenCalledWith("agent_memories");
174
- expect(mockSb._chain.delete).toHaveBeenCalled();
175
- expect(mockSb._chain.eq).toHaveBeenCalledWith("id", "mem-1");
176
- expect(mockSb._chain.eq).toHaveBeenCalledWith("user_id", "user-123");
150
+ expect(mockCallMcpHandler).toHaveBeenCalledWith("memory.remove", {
151
+ memory_id: "mem-1",
152
+ });
177
153
  });
178
154
  });
179
155
  });
@@ -1,4 +1,4 @@
1
- import { getSupabase } from "../db/supabase.js";
1
+ import { callMcpHandler } from "../db/api-client.js";
2
2
  import { log } from "../utils/logger.js";
3
3
 
4
4
  export type MemoryCategory =
@@ -26,10 +26,8 @@ export interface Memory {
26
26
  // ── Memory Manager ──────────────────────────────────────────────────
27
27
 
28
28
  export class MemoryManager {
29
- private userId: string;
30
-
31
- constructor(userId: string) {
32
- this.userId = userId;
29
+ constructor(_userId: string) {
30
+ // userId is no longer needed — auth is handled by the MCP token in callMcpHandler
33
31
  }
34
32
 
35
33
  /**
@@ -46,74 +44,38 @@ export class MemoryManager {
46
44
  expiresInDays?: number;
47
45
  }
48
46
  ): Promise<Memory> {
49
- const sb = getSupabase();
50
-
51
47
  const expiresAt = options?.expiresInDays
52
48
  ? new Date(
53
49
  Date.now() + options.expiresInDays * 86400_000
54
50
  ).toISOString()
55
51
  : null;
56
52
 
57
- const { data, error } = await sb
58
- .from("agent_memories")
59
- .insert({
60
- user_id: this.userId,
61
- category,
62
- content,
63
- importance: options?.importance ?? 5,
64
- tags: options?.tags ?? [],
65
- source_message_id: options?.sourceMessageId ?? null,
66
- expires_at: expiresAt,
67
- })
68
- .select()
69
- .single();
53
+ const data = await callMcpHandler<Memory>("memory.store", {
54
+ category,
55
+ content,
56
+ importance: options?.importance ?? 5,
57
+ tags: options?.tags ?? [],
58
+ source_message_id: options?.sourceMessageId ?? null,
59
+ expires_at: expiresAt,
60
+ });
70
61
 
71
- if (error) throw new Error(`Failed to store memory: ${error.message}`);
72
62
  log.debug(`Memory stored: [${category}] ${content.slice(0, 80)}...`);
73
- return data as Memory;
63
+ return data;
74
64
  }
75
65
 
76
66
  /**
77
67
  * Search memories by query text. Uses ILIKE + tag containment.
78
68
  */
79
69
  async search(query: string, limit = 10): Promise<Memory[]> {
80
- const sb = getSupabase();
81
-
82
- // Sanitize query for use in ILIKE to prevent injection
83
- const sanitized = query.replace(/[%_]/g, "\\$&");
84
-
85
- const { data, error } = await sb
86
- .from("agent_memories")
87
- .select("*")
88
- .eq("user_id", this.userId)
89
- .or(
90
- `content.ilike.%${sanitized}%,tags.cs.{${sanitized}}`
91
- )
92
- .order("importance", { ascending: false })
93
- .limit(limit);
94
-
95
- if (error) {
96
- log.warn(`Memory search failed: ${error.message}`);
70
+ try {
71
+ return await callMcpHandler<Memory[]>("memory.search", {
72
+ query,
73
+ limit,
74
+ });
75
+ } catch (err) {
76
+ log.warn(`Memory search failed: ${err instanceof Error ? err.message : err}`);
97
77
  return [];
98
78
  }
99
-
100
- // Increment access_count for each matched memory individually
101
- if (data && data.length > 0) {
102
- const now = new Date().toISOString();
103
- await Promise.all(
104
- data.map((m) =>
105
- sb
106
- .from("agent_memories")
107
- .update({
108
- access_count: m.access_count + 1,
109
- last_accessed_at: now,
110
- })
111
- .eq("id", m.id)
112
- )
113
- );
114
- }
115
-
116
- return (data || []) as Memory[];
117
79
  }
118
80
 
119
81
  /**
@@ -122,49 +84,13 @@ export class MemoryManager {
122
84
  * Automatically filters out expired memories.
123
85
  */
124
86
  async getContext(maxItems = 20): Promise<Memory[]> {
125
- const sb = getSupabase();
126
- const now = new Date().toISOString();
127
-
128
- // Get instructions first (always relevant)
129
- const { data: instructions } = await sb
130
- .from("agent_memories")
131
- .select("*")
132
- .eq("user_id", this.userId)
133
- .eq("category", "instruction")
134
- .or(`expires_at.is.null,expires_at.gt.${now}`)
135
- .order("importance", { ascending: false })
136
- .limit(5);
137
-
138
- // Get preferences
139
- const { data: preferences } = await sb
140
- .from("agent_memories")
141
- .select("*")
142
- .eq("user_id", this.userId)
143
- .eq("category", "preference")
144
- .or(`expires_at.is.null,expires_at.gt.${now}`)
145
- .order("importance", { ascending: false })
146
- .limit(5);
147
-
148
- // Get most important general memories
149
- const { data: general } = await sb
150
- .from("agent_memories")
151
- .select("*")
152
- .eq("user_id", this.userId)
153
- .not("category", "in", '("instruction","preference")')
154
- .or(`expires_at.is.null,expires_at.gt.${now}`)
155
- .order("importance", { ascending: false })
156
- .order("updated_at", { ascending: false })
157
- .limit(maxItems - 10);
158
-
159
- const all = [
160
- ...(instructions || []),
161
- ...(preferences || []),
162
- ...(general || []),
163
- ] as Memory[];
87
+ const all = await callMcpHandler<Memory[]>("memory.get_context", {
88
+ max_items: maxItems,
89
+ });
164
90
 
165
91
  // Deduplicate by id
166
92
  const seen = new Set<string>();
167
- return all.filter((m) => {
93
+ return (all || []).filter((m) => {
168
94
  if (seen.has(m.id)) return false;
169
95
  seen.add(m.id);
170
96
  return true;
@@ -210,22 +136,11 @@ export class MemoryManager {
210
136
  category?: MemoryCategory,
211
137
  limit = 20
212
138
  ): Promise<Memory[]> {
213
- const sb = getSupabase();
214
- let query = sb
215
- .from("agent_memories")
216
- .select("*")
217
- .eq("user_id", this.userId)
218
- .order("importance", { ascending: false })
219
- .order("created_at", { ascending: false })
220
- .limit(limit);
221
-
222
- if (category) {
223
- query = query.eq("category", category);
224
- }
225
-
226
- const { data, error } = await query;
227
- if (error) throw new Error(`Failed to list memories: ${error.message}`);
228
- return (data || []) as Memory[];
139
+ const data = await callMcpHandler<Memory[]>("memory.list", {
140
+ category: category || null,
141
+ limit,
142
+ });
143
+ return data || [];
229
144
  }
230
145
 
231
146
  async add(
@@ -238,29 +153,13 @@ export class MemoryManager {
238
153
  }
239
154
 
240
155
  async remove(memoryId: string): Promise<void> {
241
- const sb = getSupabase();
242
- const { error } = await sb
243
- .from("agent_memories")
244
- .delete()
245
- .eq("id", memoryId)
246
- .eq("user_id", this.userId);
247
-
248
- if (error) throw new Error(`Failed to delete memory: ${error.message}`);
156
+ await callMcpHandler("memory.remove", { memory_id: memoryId });
249
157
  }
250
158
 
251
159
  async clear(category?: MemoryCategory): Promise<number> {
252
- const sb = getSupabase();
253
- let query = sb
254
- .from("agent_memories")
255
- .delete()
256
- .eq("user_id", this.userId);
257
-
258
- if (category) {
259
- query = query.eq("category", category);
260
- }
261
-
262
- const { error, count } = await query.select("id");
263
- if (error) throw new Error(`Failed to clear memories: ${error.message}`);
264
- return count || 0;
160
+ const result = await callMcpHandler<{ count: number }>("memory.clear", {
161
+ category: category || null,
162
+ });
163
+ return result.count;
265
164
  }
266
165
  }
@@ -21,6 +21,7 @@ import { getBrowser } from "../tools/browser.js";
21
21
  import { MemoryManager } from "./memory.js";
22
22
  import { SkillManager } from "./skills.js";
23
23
  import { type ToolCallRecord } from "./skill-extractor.js";
24
+ import { evaluateAndMaybeCreateSkill } from "./skill-evaluator.js";
24
25
  import { withRetry } from "../utils/retry.js";
25
26
  import {
26
27
  createBrowserMcpServer,
@@ -58,15 +59,25 @@ Available capabilities:
58
59
  - PROACTIVELY use memory_store during tasks when you discover user preferences, habits, or important context
59
60
  - Before completing a task, consider if anything learned should be remembered for future conversations
60
61
 
61
- 4. SKILL PLANNING (pre-task):
62
- - Before executing a complex task, analyze if it matches an existing skill (use skill_invoke)
63
- - If no matching skill exists, consider whether this task represents a reusable workflow
64
- - To create a new skill: use skill_create to save a draft, then ASK the user if they want to add it
65
- - If the user approves, use skill_add to add it to their collection, then proceed with the task
66
- - If a skill's instructions could be improved based on your experience, use skill_improve
67
- - Use skill_search to find relevant skills when the task doesn't obviously match the listed skills
68
- - Skills use {{variable_name}} placeholders for user-specific data (repos, channels, boards, etc.)
69
- - Use skill_configure to set variable values after creating skills or when the user provides their data
62
+ 4. SKILL-AWARE EXECUTION (CRITICAL — follow this for EVERY task):
63
+ Step A — Search: Before executing ANY task, check if an existing skill matches (use skill_invoke or skill_search).
64
+ Step B If skill found: load it with skill_invoke and follow its instructions precisely. If the instructions are incomplete or wrong, adapt and improve as you go note what changed.
65
+ Step C If NO skill found: BEFORE executing, draft a skill plan following the Agent Skills format:
66
+ Skill Draft: [kebab-case-name]
67
+ Description: [what this skill does and when to use it]
68
+ Steps:
69
+ 1. [first step]
70
+ 2. [second step]
71
+ ...
72
+ The draft should be a reusable workflow, not specific to this one request. Use generic placeholders where the user provided specific values.
73
+ Step D — Execute: Follow the skill draft (or loaded skill) step by step. Refine the draft as you discover better approaches, edge cases, or missing steps.
74
+ Step E — After execution: The system will automatically evaluate whether to save the skill. You do NOT need to call skill_create manually.
75
+
76
+ Agent Skills format reference (agentskills.io):
77
+ - name: 1-64 chars, lowercase kebab-case (a-z, 0-9, hyphens), no leading/trailing/consecutive hyphens
78
+ - description: 1-1024 chars, describe what the skill does AND when to use it, include keywords for discoverability
79
+ - body: markdown step-by-step instructions, examples, edge cases. Keep under 500 lines.
80
+ - Progressive disclosure: metadata (~100 tokens) → instructions (<5000 tokens) → references (on demand)
70
81
 
71
82
  5. JOB AUTOMATION:
72
83
  - When the user describes their job/role/daily work, use skill_generate to decompose it into automatable skills
@@ -103,6 +114,14 @@ Guidelines:
103
114
  - Summarize results clearly at the end
104
115
  - When you learn something about the user (preferences, habits), use memory_store to remember it
105
116
 
117
+ CRITICAL — Ask before you guess:
118
+ - Before executing a task, verify you have all required information. If anything is ambiguous or missing, use request_user_input to ask.
119
+ - First try to resolve unknowns yourself: check memories, read workspace files (e.g. git remote, config files), or infer from conversation history.
120
+ - If you still lack a critical piece of information after self-resolution, ASK the user via request_user_input. Do NOT guess, assume defaults, or proceed with incomplete information.
121
+ - Examples of when to ask: which account/repo/project to target, what format the user wants, which of multiple options to choose, credentials or URLs that cannot be inferred.
122
+ - Keep questions specific and actionable. Explain what you already know and what exactly you need.
123
+ - After receiving the answer, store it with memory_store if it is likely to be useful in future conversations.
124
+
106
125
  Workspace path: {workspace_path}`;
107
126
 
108
127
  const MAX_HISTORY_ENTRIES = 10;
@@ -134,6 +153,22 @@ export class TaskProcessor {
134
153
  this.sessionId = sessionId;
135
154
  }
136
155
 
156
+ /**
157
+ * Post-task: resume the same Agent SDK session to evaluate whether
158
+ * to create/update a skill. The agent already has full context from
159
+ * the task it just completed — no need to re-describe anything.
160
+ */
161
+ private async evaluateSkillPostTask(
162
+ agentSessionId: string,
163
+ model: string
164
+ ): Promise<void> {
165
+ await evaluateAndMaybeCreateSkill({
166
+ sessionId: agentSessionId,
167
+ skillManager: this.skillManager,
168
+ model,
169
+ });
170
+ }
171
+
137
172
  async processTask(task: AgentTask): Promise<void> {
138
173
  const config = getConfig();
139
174
  resetEventSequence();
@@ -150,6 +185,7 @@ export class TaskProcessor {
150
185
  let finalResponse = "";
151
186
  const toolCallRecords: ToolCallRecord[] = [];
152
187
  let tokenUsage: Record<string, number> | undefined;
188
+ let agentSessionId: string | undefined;
153
189
 
154
190
  try {
155
191
  // Task is already claimed atomically by pollAndClaimTask in session.ts
@@ -171,7 +207,8 @@ export class TaskProcessor {
171
207
  }
172
208
 
173
209
  // Inject lightweight skill descriptions (full content loaded on-demand via skill_invoke)
174
- const skillPrompt = this.skillManager.buildSkillDescriptions();
210
+ // Pass task prompt so relevant skills are prioritized to the top
211
+ const skillPrompt = this.skillManager.buildSkillDescriptions(task.prompt);
175
212
  if (skillPrompt) {
176
213
  systemPrompt += skillPrompt;
177
214
  }
@@ -234,13 +271,13 @@ export class TaskProcessor {
234
271
  "mcp__assistme-agent__skill_improve",
235
272
  "mcp__assistme-agent__skill_invoke",
236
273
  "mcp__assistme-agent__skill_search",
237
- "mcp__assistme-agent__skill_configure",
238
274
  "mcp__assistme-agent__skill_generate",
239
275
  "mcp__assistme-agent__skill_link_job",
240
276
  "mcp__assistme-agent__skill_browse",
241
277
  "mcp__assistme-agent__skill_add",
242
278
  "mcp__assistme-agent__skill_publish",
243
- // User confirmation
279
+ // User interaction
280
+ "mcp__assistme-agent__request_user_input",
244
281
  "mcp__assistme-agent__request_user_confirmation",
245
282
  // Job automation tools
246
283
  "mcp__assistme-agent__job_run",
@@ -275,7 +312,7 @@ export class TaskProcessor {
275
312
  "assistme-agent": agentToolsServer,
276
313
  },
277
314
  hooks: eventHooks,
278
- persistSession: false,
315
+ persistSession: true,
279
316
  abortController,
280
317
  };
281
318
 
@@ -347,7 +384,10 @@ export class TaskProcessor {
347
384
  }
348
385
 
349
386
  default:
350
- // system, user, tool_progress, etc. log but no action needed
387
+ // Capture session ID from init message for post-task session resume
388
+ if (message.type === "system" && "subtype" in message && (message as Record<string, unknown>).subtype === "init") {
389
+ agentSessionId = (message as Record<string, unknown>).session_id as string;
390
+ }
351
391
  log.debug(`SDK message type: ${message.type}`);
352
392
  break;
353
393
  }
@@ -374,9 +414,11 @@ export class TaskProcessor {
374
414
  }
375
415
  this.historyCache.set(task.conversation_id, convHistory);
376
416
 
377
- // Note: Memory extraction and skill creation are handled by the agent itself
378
- // during task execution via memory_store and skill_create tools.
379
- // No separate LLM API calls needed — the agent SDK handles everything.
417
+ // Post-task: resume the same session to evaluate skill creation (fire-and-forget)
418
+ if (agentSessionId) {
419
+ this.evaluateSkillPostTask(agentSessionId, config.model)
420
+ .catch((err) => log.debug(`Post-task skill evaluation skipped: ${err}`));
421
+ }
380
422
  } catch (err) {
381
423
  const errorMsg = err instanceof Error ? err.message : String(err);
382
424
  log.error(`Task failed: ${errorMsg}`);