assistme 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +357 -61
- package/package.json +1 -1
- package/src/agent/mcp-servers.ts +42 -27
- package/src/agent/processor.ts +61 -16
- package/src/agent/skill-evaluator.ts +258 -0
- package/src/agent/skills.ts +110 -14
package/package.json
CHANGED
package/src/agent/mcp-servers.ts
CHANGED
|
@@ -9,7 +9,7 @@ import { getLimiterForTool } from "../utils/rate-limiter.js";
|
|
|
9
9
|
import { log } from "../utils/logger.js";
|
|
10
10
|
import type { MemoryManager, MemoryCategory } from "./memory.js";
|
|
11
11
|
import type { SkillManager } from "./skills.js";
|
|
12
|
-
import { substituteArguments, preprocessDynamicContext } from "./skills.js";
|
|
12
|
+
import { substituteArguments, preprocessDynamicContext, validateSkillName } from "./skills.js";
|
|
13
13
|
import { emitEvent, setActionRequest, pollActionResponse } from "../db/supabase.js";
|
|
14
14
|
import { callMcpHandler } from "../db/api-client.js";
|
|
15
15
|
import { JobRunner } from "./job-runner.js";
|
|
@@ -223,6 +223,17 @@ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfig
|
|
|
223
223
|
emoji: z.string().optional().describe("Single emoji representing this skill"),
|
|
224
224
|
},
|
|
225
225
|
async (args) => {
|
|
226
|
+
// Validate skill name format
|
|
227
|
+
const nameError = validateSkillName(args.name);
|
|
228
|
+
if (nameError) {
|
|
229
|
+
return {
|
|
230
|
+
content: [{
|
|
231
|
+
type: "text",
|
|
232
|
+
text: `Invalid skill name: ${nameError}. Use lowercase kebab-case like "flight-booking".`,
|
|
233
|
+
}],
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
226
237
|
// Check for duplicates in user's collection
|
|
227
238
|
const existing = skillManager.findSimilar(args.name);
|
|
228
239
|
if (existing) {
|
|
@@ -442,12 +453,12 @@ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfig
|
|
|
442
453
|
}
|
|
443
454
|
|
|
444
455
|
response += `**Your task:** Analyze this job description and decompose it into 4-10 automatable skills.\n\n`;
|
|
445
|
-
response += `**IMPORTANT — You MUST use
|
|
456
|
+
response += `**IMPORTANT — You MUST use ask_user before creating skills:**\n`;
|
|
446
457
|
response += `1. Analyze the job and draft a list of proposed skills (name, emoji, one-line description for each).\n`;
|
|
447
|
-
response += `2. Call \`
|
|
458
|
+
response += `2. Call \`ask_user\` with the formatted skill list as "question" and these options:\n`;
|
|
448
459
|
response += ` - options: [{label: "Approve All", action_key: "approve_all", description: "Create all proposed skills"}, {label: "Cancel", action_key: "cancel", description: "Do not create any skills"}]\n`;
|
|
449
460
|
response += `3. WAIT for the response. If action_key is "approve_all", create all skills using \`skill_create\`. If "cancel", stop.\n`;
|
|
450
|
-
response += `4. Do NOT ask for confirmation in text. Do NOT create skills without calling
|
|
461
|
+
response += `4. Do NOT ask for confirmation in text. Do NOT create skills without calling ask_user first.\n\n`;
|
|
451
462
|
response += `For each skill, call \`skill_create\` with:\n`;
|
|
452
463
|
response += `- name: kebab-case name (e.g. "slack-message-check")\n`;
|
|
453
464
|
response += `- description: one-line description\n`;
|
|
@@ -605,59 +616,63 @@ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfig
|
|
|
605
616
|
}
|
|
606
617
|
),
|
|
607
618
|
|
|
608
|
-
// ── User
|
|
619
|
+
// ── User Interaction Tool ───────────────────────────────────
|
|
609
620
|
|
|
610
621
|
tool(
|
|
611
|
-
"
|
|
612
|
-
"
|
|
613
|
-
"
|
|
614
|
-
"
|
|
622
|
+
"ask_user",
|
|
623
|
+
"Ask the user a question via the web UI and wait for their response. " +
|
|
624
|
+
"Shows a message with optional predefined option buttons PLUS a free-text input field — " +
|
|
625
|
+
"the user can either click a suggested option or type a custom answer. " +
|
|
626
|
+
"ALWAYS provide options when you can suggest likely answers. " +
|
|
627
|
+
"Do NOT use this for information you can discover yourself (git remote, file contents, etc.).",
|
|
615
628
|
{
|
|
616
|
-
|
|
629
|
+
question: z.string().describe("The question to ask (supports markdown). Be specific about what you need and why."),
|
|
617
630
|
options: z.array(z.object({
|
|
618
631
|
label: z.string().describe("Button label shown to user"),
|
|
619
632
|
action_key: z.string().describe("Machine-readable key returned when selected"),
|
|
620
633
|
description: z.string().optional().describe("Tooltip/description for this option"),
|
|
621
|
-
})).describe("
|
|
634
|
+
})).optional().describe("Suggested options shown as buttons. The user can always type a custom answer instead."),
|
|
635
|
+
placeholder: z.string().optional().describe("Placeholder text for the free-text input field"),
|
|
622
636
|
timeout_seconds: z.number().optional().describe("How long to wait for response (default: 300)"),
|
|
623
637
|
},
|
|
624
638
|
async (args) => {
|
|
625
|
-
const actionId = `
|
|
639
|
+
const actionId = `ask_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
626
640
|
const timeout = (args.timeout_seconds || 300) * 1000;
|
|
627
641
|
|
|
628
642
|
const actionData = {
|
|
629
643
|
id: actionId,
|
|
630
|
-
type: "
|
|
631
|
-
message: args.
|
|
632
|
-
options: args.options,
|
|
644
|
+
type: "ask_user",
|
|
645
|
+
message: args.question,
|
|
646
|
+
options: args.options || [],
|
|
647
|
+
placeholder: args.placeholder || "",
|
|
633
648
|
created_at: new Date().toISOString(),
|
|
634
649
|
};
|
|
635
650
|
|
|
636
651
|
try {
|
|
637
|
-
// Store action request in message metadata via RPC — UI reads this
|
|
638
652
|
await setActionRequest(taskId, actionData);
|
|
639
|
-
log.info(`
|
|
653
|
+
log.info(`Ask user ${actionId}: "${args.question.slice(0, 80)}..."`);
|
|
640
654
|
|
|
641
|
-
// Also emit event for real-time notification (best-effort)
|
|
642
655
|
emitEvent(taskId, "user_action_request", actionData).catch(() => {});
|
|
643
656
|
|
|
644
|
-
// Poll for response
|
|
645
657
|
const startTime = Date.now();
|
|
646
658
|
const pollInterval = 2000;
|
|
647
659
|
|
|
648
660
|
while (Date.now() - startTime < timeout) {
|
|
649
661
|
const response = await pollActionResponse(taskId);
|
|
650
|
-
if (response) {
|
|
651
|
-
|
|
652
|
-
const
|
|
653
|
-
|
|
662
|
+
if (response && (!response.action_id || response.action_id === actionId)) {
|
|
663
|
+
// Response can be either an option click or free-text input
|
|
664
|
+
const actionKey = (response.action_key || "") as string;
|
|
665
|
+
const text = (response.text || "") as string;
|
|
666
|
+
const label = (response.label || actionKey || text) as string;
|
|
667
|
+
log.info(`User responded: "${label}"`);
|
|
654
668
|
return {
|
|
655
669
|
content: [{
|
|
656
670
|
type: "text",
|
|
657
671
|
text: JSON.stringify({
|
|
658
672
|
status: "responded",
|
|
659
|
-
action_key: actionKey,
|
|
673
|
+
action_key: actionKey || "custom_input",
|
|
660
674
|
label,
|
|
675
|
+
text: text || label,
|
|
661
676
|
}),
|
|
662
677
|
}],
|
|
663
678
|
};
|
|
@@ -666,7 +681,7 @@ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfig
|
|
|
666
681
|
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
|
667
682
|
}
|
|
668
683
|
|
|
669
|
-
log.warn(`
|
|
684
|
+
log.warn(`Ask user ${actionId} timed out after ${args.timeout_seconds || 300}s`);
|
|
670
685
|
return {
|
|
671
686
|
content: [{
|
|
672
687
|
type: "text",
|
|
@@ -677,11 +692,11 @@ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfig
|
|
|
677
692
|
}],
|
|
678
693
|
};
|
|
679
694
|
} catch (err) {
|
|
680
|
-
log.error(`
|
|
695
|
+
log.error(`ask_user failed: ${err}`);
|
|
681
696
|
return {
|
|
682
697
|
content: [{
|
|
683
698
|
type: "text",
|
|
684
|
-
text: `Failed to
|
|
699
|
+
text: `Failed to ask user: ${err instanceof Error ? err.message : err}`,
|
|
685
700
|
}],
|
|
686
701
|
};
|
|
687
702
|
}
|
package/src/agent/processor.ts
CHANGED
|
@@ -21,6 +21,7 @@ import { getBrowser } from "../tools/browser.js";
|
|
|
21
21
|
import { MemoryManager } from "./memory.js";
|
|
22
22
|
import { SkillManager } from "./skills.js";
|
|
23
23
|
import { type ToolCallRecord } from "./skill-extractor.js";
|
|
24
|
+
import { evaluateAndMaybeCreateSkill } from "./skill-evaluator.js";
|
|
24
25
|
import { withRetry } from "../utils/retry.js";
|
|
25
26
|
import {
|
|
26
27
|
createBrowserMcpServer,
|
|
@@ -58,17 +59,29 @@ Available capabilities:
|
|
|
58
59
|
- PROACTIVELY use memory_store during tasks when you discover user preferences, habits, or important context
|
|
59
60
|
- Before completing a task, consider if anything learned should be remembered for future conversations
|
|
60
61
|
|
|
61
|
-
4. SKILL
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
62
|
+
4. SKILL-AWARE EXECUTION (CRITICAL — follow this for EVERY task):
|
|
63
|
+
Step A — Search: Before executing ANY task, check if an existing skill matches (use skill_invoke or skill_search).
|
|
64
|
+
Step B — If skill found: load it with skill_invoke and follow its instructions precisely. If the instructions are incomplete or wrong, adapt and improve as you go — note what changed.
|
|
65
|
+
Step C — If NO skill found: BEFORE executing, draft a skill plan following the Agent Skills format:
|
|
66
|
+
Skill Draft: [kebab-case-name]
|
|
67
|
+
Description: [what this skill does and when to use it]
|
|
68
|
+
Steps:
|
|
69
|
+
1. [first step]
|
|
70
|
+
2. [second step]
|
|
71
|
+
...
|
|
72
|
+
The draft should be a reusable workflow, not specific to this one request. Use generic placeholders where the user provided specific values.
|
|
73
|
+
Step D — Execute: Follow the skill draft (or loaded skill) step by step. Refine the draft as you discover better approaches, edge cases, or missing steps.
|
|
74
|
+
Step E — After execution: The system will automatically evaluate whether to save the skill. You do NOT need to call skill_create manually.
|
|
75
|
+
|
|
76
|
+
Agent Skills format reference (agentskills.io):
|
|
77
|
+
- name: 1-64 chars, lowercase kebab-case (a-z, 0-9, hyphens), no leading/trailing/consecutive hyphens
|
|
78
|
+
- description: 1-1024 chars, describe what the skill does AND when to use it, include keywords for discoverability
|
|
79
|
+
- body: markdown step-by-step instructions, examples, edge cases. Keep under 500 lines.
|
|
80
|
+
- Progressive disclosure: metadata (~100 tokens) → instructions (<5000 tokens) → references (on demand)
|
|
68
81
|
|
|
69
82
|
5. JOB AUTOMATION:
|
|
70
83
|
- When the user describes their job/role/daily work, use skill_generate to decompose it into automatable skills
|
|
71
|
-
- ALWAYS use
|
|
84
|
+
- ALWAYS use ask_user to get user approval before creating skills — never create skills without approval
|
|
72
85
|
- Use job_run to start a job — it gives you the job's goal and available skills as capabilities
|
|
73
86
|
- When running a job, be AGENTIC: decide dynamically what to do based on what you discover
|
|
74
87
|
- Do NOT follow a fixed sequence — if checking Slack reveals a task that needs GitHub, go do GitHub immediately
|
|
@@ -101,6 +114,15 @@ Guidelines:
|
|
|
101
114
|
- Summarize results clearly at the end
|
|
102
115
|
- When you learn something about the user (preferences, habits), use memory_store to remember it
|
|
103
116
|
|
|
117
|
+
CRITICAL — Ask before you guess:
|
|
118
|
+
- Before executing a task, verify you have all required information. If anything is ambiguous or missing, use ask_user to ask.
|
|
119
|
+
- First try to resolve unknowns yourself: check memories, read workspace files (e.g. git remote, config files), or infer from conversation history.
|
|
120
|
+
- If you still lack a critical piece of information after self-resolution, ASK the user via ask_user. Do NOT guess, assume defaults, or proceed with incomplete information.
|
|
121
|
+
- When asking, provide suggested options as buttons whenever possible — the user can always type a custom answer instead.
|
|
122
|
+
- Examples of when to ask: which account/repo/project to target, what format the user wants, which of multiple options to choose, credentials or URLs that cannot be inferred.
|
|
123
|
+
- Keep questions specific and actionable. Explain what you already know and what exactly you need.
|
|
124
|
+
- After receiving the answer, store it with memory_store if it is likely to be useful in future conversations.
|
|
125
|
+
|
|
104
126
|
Workspace path: {workspace_path}`;
|
|
105
127
|
|
|
106
128
|
const MAX_HISTORY_ENTRIES = 10;
|
|
@@ -132,6 +154,22 @@ export class TaskProcessor {
|
|
|
132
154
|
this.sessionId = sessionId;
|
|
133
155
|
}
|
|
134
156
|
|
|
157
|
+
/**
|
|
158
|
+
* Post-task: resume the same Agent SDK session to evaluate whether
|
|
159
|
+
* to create/update a skill. The agent already has full context from
|
|
160
|
+
* the task it just completed — no need to re-describe anything.
|
|
161
|
+
*/
|
|
162
|
+
private async evaluateSkillPostTask(
|
|
163
|
+
agentSessionId: string,
|
|
164
|
+
model: string
|
|
165
|
+
): Promise<void> {
|
|
166
|
+
await evaluateAndMaybeCreateSkill({
|
|
167
|
+
sessionId: agentSessionId,
|
|
168
|
+
skillManager: this.skillManager,
|
|
169
|
+
model,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
|
|
135
173
|
async processTask(task: AgentTask): Promise<void> {
|
|
136
174
|
const config = getConfig();
|
|
137
175
|
resetEventSequence();
|
|
@@ -148,6 +186,7 @@ export class TaskProcessor {
|
|
|
148
186
|
let finalResponse = "";
|
|
149
187
|
const toolCallRecords: ToolCallRecord[] = [];
|
|
150
188
|
let tokenUsage: Record<string, number> | undefined;
|
|
189
|
+
let agentSessionId: string | undefined;
|
|
151
190
|
|
|
152
191
|
try {
|
|
153
192
|
// Task is already claimed atomically by pollAndClaimTask in session.ts
|
|
@@ -169,7 +208,8 @@ export class TaskProcessor {
|
|
|
169
208
|
}
|
|
170
209
|
|
|
171
210
|
// Inject lightweight skill descriptions (full content loaded on-demand via skill_invoke)
|
|
172
|
-
|
|
211
|
+
// Pass task prompt so relevant skills are prioritized to the top
|
|
212
|
+
const skillPrompt = this.skillManager.buildSkillDescriptions(task.prompt);
|
|
173
213
|
if (skillPrompt) {
|
|
174
214
|
systemPrompt += skillPrompt;
|
|
175
215
|
}
|
|
@@ -237,8 +277,8 @@ export class TaskProcessor {
|
|
|
237
277
|
"mcp__assistme-agent__skill_browse",
|
|
238
278
|
"mcp__assistme-agent__skill_add",
|
|
239
279
|
"mcp__assistme-agent__skill_publish",
|
|
240
|
-
// User
|
|
241
|
-
"mcp__assistme-
|
|
280
|
+
// User interaction
|
|
281
|
+
"mcp__assistme-agent__ask_user",
|
|
242
282
|
// Job automation tools
|
|
243
283
|
"mcp__assistme-agent__job_run",
|
|
244
284
|
"mcp__assistme-agent__job_schedule",
|
|
@@ -272,7 +312,7 @@ export class TaskProcessor {
|
|
|
272
312
|
"assistme-agent": agentToolsServer,
|
|
273
313
|
},
|
|
274
314
|
hooks: eventHooks,
|
|
275
|
-
persistSession:
|
|
315
|
+
persistSession: true,
|
|
276
316
|
abortController,
|
|
277
317
|
};
|
|
278
318
|
|
|
@@ -344,7 +384,10 @@ export class TaskProcessor {
|
|
|
344
384
|
}
|
|
345
385
|
|
|
346
386
|
default:
|
|
347
|
-
//
|
|
387
|
+
// Capture session ID from init message for post-task session resume
|
|
388
|
+
if (message.type === "system" && "subtype" in message && (message as Record<string, unknown>).subtype === "init") {
|
|
389
|
+
agentSessionId = (message as Record<string, unknown>).session_id as string;
|
|
390
|
+
}
|
|
348
391
|
log.debug(`SDK message type: ${message.type}`);
|
|
349
392
|
break;
|
|
350
393
|
}
|
|
@@ -371,9 +414,11 @@ export class TaskProcessor {
|
|
|
371
414
|
}
|
|
372
415
|
this.historyCache.set(task.conversation_id, convHistory);
|
|
373
416
|
|
|
374
|
-
//
|
|
375
|
-
|
|
376
|
-
|
|
417
|
+
// Post-task: resume the same session to evaluate skill creation (fire-and-forget)
|
|
418
|
+
if (agentSessionId) {
|
|
419
|
+
this.evaluateSkillPostTask(agentSessionId, config.model)
|
|
420
|
+
.catch((err) => log.debug(`Post-task skill evaluation skipped: ${err}`));
|
|
421
|
+
}
|
|
377
422
|
} catch (err) {
|
|
378
423
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
379
424
|
log.error(`Task failed: ${errorMsg}`);
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import {
|
|
2
|
+
query,
|
|
3
|
+
type SDKAssistantMessage,
|
|
4
|
+
type SDKResultMessage,
|
|
5
|
+
} from "@anthropic-ai/claude-agent-sdk";
|
|
6
|
+
import { log } from "../utils/logger.js";
|
|
7
|
+
import type { SkillManager } from "./skills.js";
|
|
8
|
+
import { validateSkillName, normalizeSkillName } from "./skills.js";
|
|
9
|
+
|
|
10
|
+
// ── Types ───────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
interface SkillDecision {
|
|
13
|
+
action: "create" | "update" | "skip";
|
|
14
|
+
// For "create"
|
|
15
|
+
name?: string;
|
|
16
|
+
description?: string;
|
|
17
|
+
instructions?: string;
|
|
18
|
+
emoji?: string;
|
|
19
|
+
keywords?: string[];
|
|
20
|
+
// For "update"
|
|
21
|
+
existing_skill_name?: string;
|
|
22
|
+
improved_instructions?: string;
|
|
23
|
+
improved_description?: string;
|
|
24
|
+
// Always present
|
|
25
|
+
reason: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// ── Agent Skills format spec (agentskills.io) ───────────────────────
|
|
29
|
+
|
|
30
|
+
const SKILL_EVALUATION_PROMPT = `You just completed a task. Now evaluate whether it should be saved as a reusable Agent Skill.
|
|
31
|
+
|
|
32
|
+
## Agent Skills Format (agentskills.io)
|
|
33
|
+
|
|
34
|
+
A skill follows the SKILL.md format:
|
|
35
|
+
- name: 1-64 chars, lowercase kebab-case (a-z, 0-9, hyphens), no leading/trailing/consecutive hyphens
|
|
36
|
+
- description: 1-1024 chars, describe WHAT it does AND WHEN to use it, include searchable keywords
|
|
37
|
+
- body: markdown step-by-step instructions, examples, edge cases. Keep under 500 lines, <5000 tokens.
|
|
38
|
+
- Use generic placeholders (e.g. {url}, {query}, {product_name}) instead of specific values
|
|
39
|
+
- Instructions should be a REUSABLE workflow, not a transcript of what just happened
|
|
40
|
+
- Include error handling steps and tool references (browser_navigate, browser_read_page, Bash, Read, etc.)
|
|
41
|
+
|
|
42
|
+
## Your Decision
|
|
43
|
+
|
|
44
|
+
Respond with ONLY a JSON object (no markdown, no explanation outside the JSON). Choose one action:
|
|
45
|
+
|
|
46
|
+
1. **"create"** — The task is a reusable workflow worth saving.
|
|
47
|
+
Include: name, description, instructions (full SKILL.md body), emoji, keywords (3-5, include Chinese if task was in Chinese)
|
|
48
|
+
|
|
49
|
+
2. **"update"** — An existing skill should be improved based on what you just learned.
|
|
50
|
+
Include: existing_skill_name, improved_instructions (full updated body), improved_description (if changed)
|
|
51
|
+
|
|
52
|
+
3. **"skip"** — Not worth capturing (simple Q&A, one-off, too vague, already fully covered by existing skill).
|
|
53
|
+
|
|
54
|
+
Always include "reason" explaining your decision.
|
|
55
|
+
|
|
56
|
+
Use your judgment — no rigid rules. Consider: Is this repeatable? Can it be generalized? Would it save time next time?`;
|
|
57
|
+
|
|
58
|
+
// ── Evaluator ───────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Post-task skill evaluator using Agent SDK session resume.
|
|
62
|
+
*
|
|
63
|
+
* Resumes the same session that just completed the task, so the agent
|
|
64
|
+
* has full context of what happened. No need to re-describe the task,
|
|
65
|
+
* tool calls, or results — it already knows everything.
|
|
66
|
+
*/
|
|
67
|
+
export async function evaluateAndMaybeCreateSkill(opts: {
|
|
68
|
+
sessionId: string;
|
|
69
|
+
skillManager: SkillManager;
|
|
70
|
+
model?: string;
|
|
71
|
+
}): Promise<void> {
|
|
72
|
+
const { sessionId, skillManager, model } = opts;
|
|
73
|
+
|
|
74
|
+
if (!sessionId) {
|
|
75
|
+
log.debug("Skill evaluation skipped: no session ID to resume");
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Build existing skills context so the agent knows what already exists
|
|
80
|
+
const existingSkills = skillManager.getAll();
|
|
81
|
+
const existingList = existingSkills.length > 0
|
|
82
|
+
? existingSkills.map((s) => `- ${s.name}: ${s.description}`).join("\n")
|
|
83
|
+
: "(no existing skills)";
|
|
84
|
+
|
|
85
|
+
const prompt = `${SKILL_EVALUATION_PROMPT}
|
|
86
|
+
|
|
87
|
+
## Existing Skills (do NOT duplicate these)
|
|
88
|
+
${existingList}
|
|
89
|
+
|
|
90
|
+
Respond with a JSON object now.`;
|
|
91
|
+
|
|
92
|
+
try {
|
|
93
|
+
let responseText = "";
|
|
94
|
+
|
|
95
|
+
for await (const message of query({
|
|
96
|
+
prompt,
|
|
97
|
+
options: {
|
|
98
|
+
resume: sessionId,
|
|
99
|
+
model,
|
|
100
|
+
maxTurns: 1,
|
|
101
|
+
allowedTools: [],
|
|
102
|
+
},
|
|
103
|
+
})) {
|
|
104
|
+
if (message.type === "assistant") {
|
|
105
|
+
const assistantMsg = message as SDKAssistantMessage;
|
|
106
|
+
for (const block of assistantMsg.message.content) {
|
|
107
|
+
if (block.type === "text") {
|
|
108
|
+
responseText += block.text;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
} else if (message.type === "result") {
|
|
112
|
+
const resultMsg = message as SDKResultMessage;
|
|
113
|
+
if (resultMsg.subtype === "success" && "total_cost_usd" in resultMsg) {
|
|
114
|
+
log.debug(`Skill evaluation cost: $${(resultMsg as { total_cost_usd: number }).total_cost_usd.toFixed(4)}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Parse the decision — try the full text first, then extract JSON
|
|
120
|
+
const decision = parseJsonResponse(responseText);
|
|
121
|
+
if (!decision) {
|
|
122
|
+
log.debug("Skill evaluation: no valid JSON in response");
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (!["create", "update", "skip"].includes(decision.action)) {
|
|
127
|
+
log.debug("Skill evaluation: invalid action");
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Execute the decision
|
|
132
|
+
await executeSkillDecision(decision, skillManager);
|
|
133
|
+
} catch (err) {
|
|
134
|
+
log.debug(`Skill evaluation error: ${err}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Execute the skill decision: create a new skill or update an existing one.
|
|
140
|
+
*/
|
|
141
|
+
async function executeSkillDecision(
|
|
142
|
+
decision: SkillDecision,
|
|
143
|
+
skillManager: SkillManager
|
|
144
|
+
): Promise<void> {
|
|
145
|
+
switch (decision.action) {
|
|
146
|
+
case "create": {
|
|
147
|
+
if (!decision.name || !decision.instructions) {
|
|
148
|
+
log.debug("Skill create skipped: missing name or instructions");
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Normalize name to valid kebab-case (model may return invalid format)
|
|
153
|
+
let skillName = decision.name;
|
|
154
|
+
if (validateSkillName(skillName)) {
|
|
155
|
+
skillName = normalizeSkillName(skillName);
|
|
156
|
+
if (!skillName || validateSkillName(skillName)) {
|
|
157
|
+
log.debug(`Skill create skipped: name "${decision.name}" cannot be normalized`);
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
log.debug(`Normalized skill name: "${decision.name}" → "${skillName}"`);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Check for duplicates
|
|
164
|
+
const existing = skillManager.findSimilar(skillName);
|
|
165
|
+
if (existing) {
|
|
166
|
+
log.debug(`Skill create skipped: similar skill "${existing.name}" exists`);
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const result = await skillManager.create(
|
|
171
|
+
skillName,
|
|
172
|
+
decision.description || "",
|
|
173
|
+
decision.instructions,
|
|
174
|
+
{
|
|
175
|
+
source: "auto_extracted",
|
|
176
|
+
emoji: decision.emoji,
|
|
177
|
+
keywords: decision.keywords,
|
|
178
|
+
}
|
|
179
|
+
);
|
|
180
|
+
|
|
181
|
+
if (result) {
|
|
182
|
+
// syncToAgentSkills is handled by skill_create flow; only sync here
|
|
183
|
+
// with sourceSkillId linkage since create() doesn't set it.
|
|
184
|
+
await skillManager.syncToAgentSkills(
|
|
185
|
+
skillName,
|
|
186
|
+
decision.description || "",
|
|
187
|
+
decision.instructions,
|
|
188
|
+
"1.0.0",
|
|
189
|
+
{
|
|
190
|
+
source: "auto_extracted",
|
|
191
|
+
emoji: decision.emoji,
|
|
192
|
+
keywords: decision.keywords,
|
|
193
|
+
sourceSkillId: result.id,
|
|
194
|
+
}
|
|
195
|
+
);
|
|
196
|
+
log.info(`Auto-created skill "${skillName}": ${decision.reason}`);
|
|
197
|
+
}
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
case "update": {
|
|
202
|
+
if (!decision.existing_skill_name || !decision.improved_instructions) {
|
|
203
|
+
log.debug("Skill update skipped: missing skill name or instructions");
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const updated = skillManager.update(
|
|
208
|
+
decision.existing_skill_name,
|
|
209
|
+
decision.improved_instructions,
|
|
210
|
+
decision.improved_description
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
if (updated) {
|
|
214
|
+
log.info(`Auto-improved skill "${decision.existing_skill_name}": ${decision.reason}`);
|
|
215
|
+
} else {
|
|
216
|
+
log.debug(`Skill update failed: "${decision.existing_skill_name}" not found`);
|
|
217
|
+
}
|
|
218
|
+
break;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
case "skip":
|
|
222
|
+
log.debug(`Skill evaluation: skip — ${decision.reason}`);
|
|
223
|
+
break;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Attempt to parse a SkillDecision from the model's response text.
|
|
229
|
+
* Tries the full text first (model returned pure JSON), then falls
|
|
230
|
+
* back to extracting the outermost balanced `{…}` block.
|
|
231
|
+
*/
|
|
232
|
+
function parseJsonResponse(text: string): SkillDecision | null {
|
|
233
|
+
const trimmed = text.trim();
|
|
234
|
+
|
|
235
|
+
// Fast path: entire response is JSON
|
|
236
|
+
try {
|
|
237
|
+
const parsed = JSON.parse(trimmed) as SkillDecision;
|
|
238
|
+
if (parsed.action) return parsed;
|
|
239
|
+
} catch { /* not pure JSON */ }
|
|
240
|
+
|
|
241
|
+
// Fallback: find the first balanced `{…}` block
|
|
242
|
+
const start = trimmed.indexOf("{");
|
|
243
|
+
if (start === -1) return null;
|
|
244
|
+
|
|
245
|
+
let depth = 0;
|
|
246
|
+
for (let i = start; i < trimmed.length; i++) {
|
|
247
|
+
if (trimmed[i] === "{") depth++;
|
|
248
|
+
else if (trimmed[i] === "}") depth--;
|
|
249
|
+
if (depth === 0) {
|
|
250
|
+
try {
|
|
251
|
+
return JSON.parse(trimmed.slice(start, i + 1)) as SkillDecision;
|
|
252
|
+
} catch {
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
return null;
|
|
258
|
+
}
|