assistme 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ import { getLimiterForTool } from "../utils/rate-limiter.js";
9
9
  import { log } from "../utils/logger.js";
10
10
  import type { MemoryManager, MemoryCategory } from "./memory.js";
11
11
  import type { SkillManager } from "./skills.js";
12
- import { substituteArguments, preprocessDynamicContext } from "./skills.js";
12
+ import { substituteArguments, preprocessDynamicContext, validateSkillName } from "./skills.js";
13
13
  import { emitEvent, setActionRequest, pollActionResponse } from "../db/supabase.js";
14
14
  import { callMcpHandler } from "../db/api-client.js";
15
15
  import { JobRunner } from "./job-runner.js";
@@ -223,6 +223,17 @@ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfig
223
223
  emoji: z.string().optional().describe("Single emoji representing this skill"),
224
224
  },
225
225
  async (args) => {
226
+ // Validate skill name format
227
+ const nameError = validateSkillName(args.name);
228
+ if (nameError) {
229
+ return {
230
+ content: [{
231
+ type: "text",
232
+ text: `Invalid skill name: ${nameError}. Use lowercase kebab-case like "flight-booking".`,
233
+ }],
234
+ };
235
+ }
236
+
226
237
  // Check for duplicates in user's collection
227
238
  const existing = skillManager.findSimilar(args.name);
228
239
  if (existing) {
@@ -605,7 +616,78 @@ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfig
605
616
  }
606
617
  ),
607
618
 
608
- // ── User Confirmation Tool ─────────────────────────────────
619
+ // ── User Interaction Tools ──────────────────────────────────
620
+
621
+ tool(
622
+ "request_user_input",
623
+ "Ask the user a clarifying question and wait for their free-text response. " +
624
+ "Use this when you need information that cannot be inferred from context, memory, or the workspace — " +
625
+ "e.g. which account to use, specific preferences, ambiguous instructions, or missing parameters for a skill. " +
626
+ "Do NOT use this for information you can discover yourself (git remote, file contents, etc.).",
627
+ {
628
+ question: z.string().describe("The question to ask the user (supports markdown). Be specific about what you need and why."),
629
+ placeholder: z.string().optional().describe("Placeholder text for the input field (e.g. 'https://github.com/owner/repo')"),
630
+ timeout_seconds: z.number().optional().describe("How long to wait for response (default: 300)"),
631
+ },
632
+ async (args) => {
633
+ const actionId = `input_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
634
+ const timeout = (args.timeout_seconds || 300) * 1000;
635
+
636
+ const actionData = {
637
+ id: actionId,
638
+ type: "input",
639
+ message: args.question,
640
+ placeholder: args.placeholder || "",
641
+ created_at: new Date().toISOString(),
642
+ };
643
+
644
+ try {
645
+ await setActionRequest(taskId, actionData);
646
+ log.info(`Input request ${actionId}: "${args.question.slice(0, 80)}..."`);
647
+
648
+ emitEvent(taskId, "user_action_request", actionData).catch(() => {});
649
+
650
+ const startTime = Date.now();
651
+ const pollInterval = 2000;
652
+
653
+ while (Date.now() - startTime < timeout) {
654
+ const response = await pollActionResponse(taskId);
655
+ // Match response to this specific request by action_id
656
+ if (response && (!response.action_id || response.action_id === actionId)) {
657
+ const text = (response.text || response.value || "") as string;
658
+ log.info(`User input received: "${text.slice(0, 80)}"`);
659
+ return {
660
+ content: [{
661
+ type: "text",
662
+ text: JSON.stringify({ status: "responded", text }),
663
+ }],
664
+ };
665
+ }
666
+
667
+ await new Promise((resolve) => setTimeout(resolve, pollInterval));
668
+ }
669
+
670
+ log.warn(`Input request ${actionId} timed out`);
671
+ return {
672
+ content: [{
673
+ type: "text",
674
+ text: JSON.stringify({
675
+ status: "timeout",
676
+ message: "User did not respond within the timeout period.",
677
+ }),
678
+ }],
679
+ };
680
+ } catch (err) {
681
+ log.error(`request_user_input failed: ${err}`);
682
+ return {
683
+ content: [{
684
+ type: "text",
685
+ text: `Failed to request user input: ${err instanceof Error ? err.message : err}`,
686
+ }],
687
+ };
688
+ }
689
+ }
690
+ ),
609
691
 
610
692
  tool(
611
693
  "request_user_confirmation",
@@ -647,7 +729,8 @@ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfig
647
729
 
648
730
  while (Date.now() - startTime < timeout) {
649
731
  const response = await pollActionResponse(taskId);
650
- if (response) {
732
+ // Match response to this specific request by action_id
733
+ if (response && (!response.action_id || response.action_id === actionId)) {
651
734
  const actionKey = (response.action_key || response.action || "") as string;
652
735
  const label = (response.label || actionKey) as string;
653
736
  log.info(`User responded: ${label} (${actionKey})`);
@@ -21,6 +21,7 @@ import { getBrowser } from "../tools/browser.js";
21
21
  import { MemoryManager } from "./memory.js";
22
22
  import { SkillManager } from "./skills.js";
23
23
  import { type ToolCallRecord } from "./skill-extractor.js";
24
+ import { evaluateAndMaybeCreateSkill } from "./skill-evaluator.js";
24
25
  import { withRetry } from "../utils/retry.js";
25
26
  import {
26
27
  createBrowserMcpServer,
@@ -58,13 +59,25 @@ Available capabilities:
58
59
  - PROACTIVELY use memory_store during tasks when you discover user preferences, habits, or important context
59
60
  - Before completing a task, consider if anything learned should be remembered for future conversations
60
61
 
61
- 4. SKILL PLANNING (pre-task):
62
- - Before executing a complex task, analyze if it matches an existing skill (use skill_invoke)
63
- - If no matching skill exists, consider whether this task represents a reusable workflow
64
- - To create a new skill: use skill_create to save a draft, then ASK the user if they want to add it
65
- - If the user approves, use skill_add to add it to their collection, then proceed with the task
66
- - If a skill's instructions could be improved based on your experience, use skill_improve
67
- - Use skill_search to find relevant skills when the task doesn't obviously match the listed skills
62
+ 4. SKILL-AWARE EXECUTION (CRITICAL — follow this for EVERY task):
63
+ Step A — Search: Before executing ANY task, check if an existing skill matches (use skill_invoke or skill_search).
64
+ Step B If skill found: load it with skill_invoke and follow its instructions precisely. If the instructions are incomplete or wrong, adapt and improve as you go note what changed.
65
+ Step C If NO skill found: BEFORE executing, draft a skill plan following the Agent Skills format:
66
+ Skill Draft: [kebab-case-name]
67
+ Description: [what this skill does and when to use it]
68
+ Steps:
69
+ 1. [first step]
70
+ 2. [second step]
71
+ ...
72
+ The draft should be a reusable workflow, not specific to this one request. Use generic placeholders where the user provided specific values.
73
+ Step D — Execute: Follow the skill draft (or loaded skill) step by step. Refine the draft as you discover better approaches, edge cases, or missing steps.
74
+ Step E — After execution: The system will automatically evaluate whether to save the skill. You do NOT need to call skill_create manually.
75
+
76
+ Agent Skills format reference (agentskills.io):
77
+ - name: 1-64 chars, lowercase kebab-case (a-z, 0-9, hyphens), no leading/trailing/consecutive hyphens
78
+ - description: 1-1024 chars, describe what the skill does AND when to use it, include keywords for discoverability
79
+ - body: markdown step-by-step instructions, examples, edge cases. Keep under 500 lines.
80
+ - Progressive disclosure: metadata (~100 tokens) → instructions (<5000 tokens) → references (on demand)
68
81
 
69
82
  5. JOB AUTOMATION:
70
83
  - When the user describes their job/role/daily work, use skill_generate to decompose it into automatable skills
@@ -101,6 +114,14 @@ Guidelines:
101
114
  - Summarize results clearly at the end
102
115
  - When you learn something about the user (preferences, habits), use memory_store to remember it
103
116
 
117
+ CRITICAL — Ask before you guess:
118
+ - Before executing a task, verify you have all required information. If anything is ambiguous or missing, use request_user_input to ask.
119
+ - First try to resolve unknowns yourself: check memories, read workspace files (e.g. git remote, config files), or infer from conversation history.
120
+ - If you still lack a critical piece of information after self-resolution, ASK the user via request_user_input. Do NOT guess, assume defaults, or proceed with incomplete information.
121
+ - Examples of when to ask: which account/repo/project to target, what format the user wants, which of multiple options to choose, credentials or URLs that cannot be inferred.
122
+ - Keep questions specific and actionable. Explain what you already know and what exactly you need.
123
+ - After receiving the answer, store it with memory_store if it is likely to be useful in future conversations.
124
+
104
125
  Workspace path: {workspace_path}`;
105
126
 
106
127
  const MAX_HISTORY_ENTRIES = 10;
@@ -132,6 +153,22 @@ export class TaskProcessor {
132
153
  this.sessionId = sessionId;
133
154
  }
134
155
 
156
+ /**
157
+ * Post-task: resume the same Agent SDK session to evaluate whether
158
+ * to create/update a skill. The agent already has full context from
159
+ * the task it just completed — no need to re-describe anything.
160
+ */
161
+ private async evaluateSkillPostTask(
162
+ agentSessionId: string,
163
+ model: string
164
+ ): Promise<void> {
165
+ await evaluateAndMaybeCreateSkill({
166
+ sessionId: agentSessionId,
167
+ skillManager: this.skillManager,
168
+ model,
169
+ });
170
+ }
171
+
135
172
  async processTask(task: AgentTask): Promise<void> {
136
173
  const config = getConfig();
137
174
  resetEventSequence();
@@ -148,6 +185,7 @@ export class TaskProcessor {
148
185
  let finalResponse = "";
149
186
  const toolCallRecords: ToolCallRecord[] = [];
150
187
  let tokenUsage: Record<string, number> | undefined;
188
+ let agentSessionId: string | undefined;
151
189
 
152
190
  try {
153
191
  // Task is already claimed atomically by pollAndClaimTask in session.ts
@@ -169,7 +207,8 @@ export class TaskProcessor {
169
207
  }
170
208
 
171
209
  // Inject lightweight skill descriptions (full content loaded on-demand via skill_invoke)
172
- const skillPrompt = this.skillManager.buildSkillDescriptions();
210
+ // Pass task prompt so relevant skills are prioritized to the top
211
+ const skillPrompt = this.skillManager.buildSkillDescriptions(task.prompt);
173
212
  if (skillPrompt) {
174
213
  systemPrompt += skillPrompt;
175
214
  }
@@ -237,7 +276,8 @@ export class TaskProcessor {
237
276
  "mcp__assistme-agent__skill_browse",
238
277
  "mcp__assistme-agent__skill_add",
239
278
  "mcp__assistme-agent__skill_publish",
240
- // User confirmation
279
+ // User interaction
280
+ "mcp__assistme-agent__request_user_input",
241
281
  "mcp__assistme-agent__request_user_confirmation",
242
282
  // Job automation tools
243
283
  "mcp__assistme-agent__job_run",
@@ -272,7 +312,7 @@ export class TaskProcessor {
272
312
  "assistme-agent": agentToolsServer,
273
313
  },
274
314
  hooks: eventHooks,
275
- persistSession: false,
315
+ persistSession: true,
276
316
  abortController,
277
317
  };
278
318
 
@@ -344,7 +384,10 @@ export class TaskProcessor {
344
384
  }
345
385
 
346
386
  default:
347
- // system, user, tool_progress, etc. log but no action needed
387
+ // Capture session ID from init message for post-task session resume
388
+ if (message.type === "system" && "subtype" in message && (message as Record<string, unknown>).subtype === "init") {
389
+ agentSessionId = (message as Record<string, unknown>).session_id as string;
390
+ }
348
391
  log.debug(`SDK message type: ${message.type}`);
349
392
  break;
350
393
  }
@@ -371,9 +414,11 @@ export class TaskProcessor {
371
414
  }
372
415
  this.historyCache.set(task.conversation_id, convHistory);
373
416
 
374
- // Note: Memory extraction and skill creation are handled by the agent itself
375
- // during task execution via memory_store and skill_create tools.
376
- // No separate LLM API calls needed — the agent SDK handles everything.
417
+ // Post-task: resume the same session to evaluate skill creation (fire-and-forget)
418
+ if (agentSessionId) {
419
+ this.evaluateSkillPostTask(agentSessionId, config.model)
420
+ .catch((err) => log.debug(`Post-task skill evaluation skipped: ${err}`));
421
+ }
377
422
  } catch (err) {
378
423
  const errorMsg = err instanceof Error ? err.message : String(err);
379
424
  log.error(`Task failed: ${errorMsg}`);
@@ -0,0 +1,258 @@
1
+ import {
2
+ query,
3
+ type SDKAssistantMessage,
4
+ type SDKResultMessage,
5
+ } from "@anthropic-ai/claude-agent-sdk";
6
+ import { log } from "../utils/logger.js";
7
+ import type { SkillManager } from "./skills.js";
8
+ import { validateSkillName, normalizeSkillName } from "./skills.js";
9
+
10
+ // ── Types ───────────────────────────────────────────────────────────
11
+
12
+ interface SkillDecision {
13
+ action: "create" | "update" | "skip";
14
+ // For "create"
15
+ name?: string;
16
+ description?: string;
17
+ instructions?: string;
18
+ emoji?: string;
19
+ keywords?: string[];
20
+ // For "update"
21
+ existing_skill_name?: string;
22
+ improved_instructions?: string;
23
+ improved_description?: string;
24
+ // Always present
25
+ reason: string;
26
+ }
27
+
28
+ // ── Agent Skills format spec (agentskills.io) ───────────────────────
29
+
30
+ const SKILL_EVALUATION_PROMPT = `You just completed a task. Now evaluate whether it should be saved as a reusable Agent Skill.
31
+
32
+ ## Agent Skills Format (agentskills.io)
33
+
34
+ A skill follows the SKILL.md format:
35
+ - name: 1-64 chars, lowercase kebab-case (a-z, 0-9, hyphens), no leading/trailing/consecutive hyphens
36
+ - description: 1-1024 chars, describe WHAT it does AND WHEN to use it, include searchable keywords
37
+ - body: markdown step-by-step instructions, examples, edge cases. Keep under 500 lines, <5000 tokens.
38
+ - Use generic placeholders (e.g. {url}, {query}, {product_name}) instead of specific values
39
+ - Instructions should be a REUSABLE workflow, not a transcript of what just happened
40
+ - Include error handling steps and tool references (browser_navigate, browser_read_page, Bash, Read, etc.)
41
+
42
+ ## Your Decision
43
+
44
+ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON). Choose one action:
45
+
46
+ 1. **"create"** — The task is a reusable workflow worth saving.
47
+ Include: name, description, instructions (full SKILL.md body), emoji, keywords (3-5, include Chinese if task was in Chinese)
48
+
49
+ 2. **"update"** — An existing skill should be improved based on what you just learned.
50
+ Include: existing_skill_name, improved_instructions (full updated body), improved_description (if changed)
51
+
52
+ 3. **"skip"** — Not worth capturing (simple Q&A, one-off, too vague, already fully covered by existing skill).
53
+
54
+ Always include "reason" explaining your decision.
55
+
56
+ Use your judgment — no rigid rules. Consider: Is this repeatable? Can it be generalized? Would it save time next time?`;
57
+
58
+ // ── Evaluator ───────────────────────────────────────────────────────
59
+
60
+ /**
61
+ * Post-task skill evaluator using Agent SDK session resume.
62
+ *
63
+ * Resumes the same session that just completed the task, so the agent
64
+ * has full context of what happened. No need to re-describe the task,
65
+ * tool calls, or results — it already knows everything.
66
+ */
67
+ export async function evaluateAndMaybeCreateSkill(opts: {
68
+ sessionId: string;
69
+ skillManager: SkillManager;
70
+ model?: string;
71
+ }): Promise<void> {
72
+ const { sessionId, skillManager, model } = opts;
73
+
74
+ if (!sessionId) {
75
+ log.debug("Skill evaluation skipped: no session ID to resume");
76
+ return;
77
+ }
78
+
79
+ // Build existing skills context so the agent knows what already exists
80
+ const existingSkills = skillManager.getAll();
81
+ const existingList = existingSkills.length > 0
82
+ ? existingSkills.map((s) => `- ${s.name}: ${s.description}`).join("\n")
83
+ : "(no existing skills)";
84
+
85
+ const prompt = `${SKILL_EVALUATION_PROMPT}
86
+
87
+ ## Existing Skills (do NOT duplicate these)
88
+ ${existingList}
89
+
90
+ Respond with a JSON object now.`;
91
+
92
+ try {
93
+ let responseText = "";
94
+
95
+ for await (const message of query({
96
+ prompt,
97
+ options: {
98
+ resume: sessionId,
99
+ model,
100
+ maxTurns: 1,
101
+ allowedTools: [],
102
+ },
103
+ })) {
104
+ if (message.type === "assistant") {
105
+ const assistantMsg = message as SDKAssistantMessage;
106
+ for (const block of assistantMsg.message.content) {
107
+ if (block.type === "text") {
108
+ responseText += block.text;
109
+ }
110
+ }
111
+ } else if (message.type === "result") {
112
+ const resultMsg = message as SDKResultMessage;
113
+ if (resultMsg.subtype === "success" && "total_cost_usd" in resultMsg) {
114
+ log.debug(`Skill evaluation cost: $${(resultMsg as { total_cost_usd: number }).total_cost_usd.toFixed(4)}`);
115
+ }
116
+ }
117
+ }
118
+
119
+ // Parse the decision — try the full text first, then extract JSON
120
+ const decision = parseJsonResponse(responseText);
121
+ if (!decision) {
122
+ log.debug("Skill evaluation: no valid JSON in response");
123
+ return;
124
+ }
125
+
126
+ if (!["create", "update", "skip"].includes(decision.action)) {
127
+ log.debug("Skill evaluation: invalid action");
128
+ return;
129
+ }
130
+
131
+ // Execute the decision
132
+ await executeSkillDecision(decision, skillManager);
133
+ } catch (err) {
134
+ log.debug(`Skill evaluation error: ${err}`);
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Execute the skill decision: create a new skill or update an existing one.
140
+ */
141
+ async function executeSkillDecision(
142
+ decision: SkillDecision,
143
+ skillManager: SkillManager
144
+ ): Promise<void> {
145
+ switch (decision.action) {
146
+ case "create": {
147
+ if (!decision.name || !decision.instructions) {
148
+ log.debug("Skill create skipped: missing name or instructions");
149
+ return;
150
+ }
151
+
152
+ // Normalize name to valid kebab-case (model may return invalid format)
153
+ let skillName = decision.name;
154
+ if (validateSkillName(skillName)) {
155
+ skillName = normalizeSkillName(skillName);
156
+ if (!skillName || validateSkillName(skillName)) {
157
+ log.debug(`Skill create skipped: name "${decision.name}" cannot be normalized`);
158
+ return;
159
+ }
160
+ log.debug(`Normalized skill name: "${decision.name}" → "${skillName}"`);
161
+ }
162
+
163
+ // Check for duplicates
164
+ const existing = skillManager.findSimilar(skillName);
165
+ if (existing) {
166
+ log.debug(`Skill create skipped: similar skill "${existing.name}" exists`);
167
+ return;
168
+ }
169
+
170
+ const result = await skillManager.create(
171
+ skillName,
172
+ decision.description || "",
173
+ decision.instructions,
174
+ {
175
+ source: "auto_extracted",
176
+ emoji: decision.emoji,
177
+ keywords: decision.keywords,
178
+ }
179
+ );
180
+
181
+ if (result) {
182
+ // syncToAgentSkills is handled by skill_create flow; only sync here
183
+ // with sourceSkillId linkage since create() doesn't set it.
184
+ await skillManager.syncToAgentSkills(
185
+ skillName,
186
+ decision.description || "",
187
+ decision.instructions,
188
+ "1.0.0",
189
+ {
190
+ source: "auto_extracted",
191
+ emoji: decision.emoji,
192
+ keywords: decision.keywords,
193
+ sourceSkillId: result.id,
194
+ }
195
+ );
196
+ log.info(`Auto-created skill "${skillName}": ${decision.reason}`);
197
+ }
198
+ break;
199
+ }
200
+
201
+ case "update": {
202
+ if (!decision.existing_skill_name || !decision.improved_instructions) {
203
+ log.debug("Skill update skipped: missing skill name or instructions");
204
+ return;
205
+ }
206
+
207
+ const updated = skillManager.update(
208
+ decision.existing_skill_name,
209
+ decision.improved_instructions,
210
+ decision.improved_description
211
+ );
212
+
213
+ if (updated) {
214
+ log.info(`Auto-improved skill "${decision.existing_skill_name}": ${decision.reason}`);
215
+ } else {
216
+ log.debug(`Skill update failed: "${decision.existing_skill_name}" not found`);
217
+ }
218
+ break;
219
+ }
220
+
221
+ case "skip":
222
+ log.debug(`Skill evaluation: skip — ${decision.reason}`);
223
+ break;
224
+ }
225
+ }
226
+
227
+ /**
228
+ * Attempt to parse a SkillDecision from the model's response text.
229
+ * Tries the full text first (model returned pure JSON), then falls
230
+ * back to extracting the outermost balanced `{…}` block.
231
+ */
232
+ function parseJsonResponse(text: string): SkillDecision | null {
233
+ const trimmed = text.trim();
234
+
235
+ // Fast path: entire response is JSON
236
+ try {
237
+ const parsed = JSON.parse(trimmed) as SkillDecision;
238
+ if (parsed.action) return parsed;
239
+ } catch { /* not pure JSON */ }
240
+
241
+ // Fallback: find the first balanced `{…}` block
242
+ const start = trimmed.indexOf("{");
243
+ if (start === -1) return null;
244
+
245
+ let depth = 0;
246
+ for (let i = start; i < trimmed.length; i++) {
247
+ if (trimmed[i] === "{") depth++;
248
+ else if (trimmed[i] === "}") depth--;
249
+ if (depth === 0) {
250
+ try {
251
+ return JSON.parse(trimmed.slice(start, i + 1)) as SkillDecision;
252
+ } catch {
253
+ return null;
254
+ }
255
+ }
256
+ }
257
+ return null;
258
+ }