assistme 0.2.8 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-TTEGHE2E.js +47 -0
- package/dist/chunk-UWE5WVQI.js +289 -0
- package/dist/config-PUIS2TQL.js +12 -0
- package/dist/index.js +818 -704
- package/dist/job-runner-N4XAAWLJ.js +7 -0
- package/package.json +1 -2
- package/src/agent/job-runner.ts +33 -71
- package/src/agent/mcp-servers.ts +111 -151
- package/src/agent/memory.test.ts +41 -65
- package/src/agent/memory.ts +33 -134
- package/src/agent/processor.ts +59 -17
- package/src/agent/scheduler.ts +47 -93
- package/src/agent/session.test.ts +8 -12
- package/src/agent/session.ts +10 -53
- package/src/agent/skill-evaluator.ts +258 -0
- package/src/agent/skills.ts +191 -494
- package/src/commands/job.ts +6 -6
- package/src/commands/status.ts +3 -10
- package/src/db/api-client.ts +68 -0
- package/src/db/supabase.test.ts +71 -184
- package/src/db/supabase.ts +140 -243
- package/dist/chunk-XY3LGAOY.js +0 -580
- package/dist/job-runner-XTGLMPZ3.js +0 -6
package/src/agent/scheduler.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { callMcpHandler } from "../db/api-client.js";
|
|
2
2
|
import { log } from "../utils/logger.js";
|
|
3
3
|
|
|
4
4
|
const SCHEDULER_INTERVAL = 30_000; // Check every 30 seconds
|
|
@@ -35,10 +35,8 @@ export function getNextRunTime(cronExpr: string, timezone: string, fromDate?: Da
|
|
|
35
35
|
|
|
36
36
|
const [minExpr, hourExpr, domExpr, monExpr, dowExpr] = parts;
|
|
37
37
|
|
|
38
|
-
// Simple cron parser — handles: *, */N, N, N-M, N,M
|
|
39
38
|
function parseField(expr: string, min: number, max: number): number[] {
|
|
40
39
|
const values: number[] = [];
|
|
41
|
-
|
|
42
40
|
for (const part of expr.split(",")) {
|
|
43
41
|
if (part === "*") {
|
|
44
42
|
for (let i = min; i <= max; i++) values.push(i);
|
|
@@ -52,7 +50,6 @@ export function getNextRunTime(cronExpr: string, timezone: string, fromDate?: Da
|
|
|
52
50
|
values.push(parseInt(part));
|
|
53
51
|
}
|
|
54
52
|
}
|
|
55
|
-
|
|
56
53
|
return values.sort((a, b) => a - b);
|
|
57
54
|
}
|
|
58
55
|
|
|
@@ -60,17 +57,14 @@ export function getNextRunTime(cronExpr: string, timezone: string, fromDate?: Da
|
|
|
60
57
|
const hours = parseField(hourExpr, 0, 23);
|
|
61
58
|
const daysOfMonth = parseField(domExpr, 1, 31);
|
|
62
59
|
const months = parseField(monExpr, 1, 12);
|
|
63
|
-
const daysOfWeek = parseField(dowExpr, 0, 6);
|
|
60
|
+
const daysOfWeek = parseField(dowExpr, 0, 6);
|
|
64
61
|
|
|
65
62
|
const useUTC = timezone === "UTC";
|
|
66
63
|
|
|
67
|
-
|
|
68
|
-
const candidate = new Date(now.getTime() + 60_000); // Start from next minute
|
|
64
|
+
const candidate = new Date(now.getTime() + 60_000);
|
|
69
65
|
candidate.setSeconds(0, 0);
|
|
70
66
|
|
|
71
|
-
// Search up to 366 days ahead
|
|
72
67
|
for (let i = 0; i < 527040; i++) {
|
|
73
|
-
// 366 * 24 * 60
|
|
74
68
|
const m = useUTC ? candidate.getUTCMinutes() : candidate.getMinutes();
|
|
75
69
|
const h = useUTC ? candidate.getUTCHours() : candidate.getHours();
|
|
76
70
|
const dom = useUTC ? candidate.getUTCDate() : candidate.getDate();
|
|
@@ -87,10 +81,9 @@ export function getNextRunTime(cronExpr: string, timezone: string, fromDate?: Da
|
|
|
87
81
|
return candidate;
|
|
88
82
|
}
|
|
89
83
|
|
|
90
|
-
candidate.setTime(candidate.getTime() + 60_000);
|
|
84
|
+
candidate.setTime(candidate.getTime() + 60_000);
|
|
91
85
|
}
|
|
92
86
|
|
|
93
|
-
// Fallback: 24 hours from now
|
|
94
87
|
return new Date(now.getTime() + 86400_000);
|
|
95
88
|
}
|
|
96
89
|
|
|
@@ -103,10 +96,8 @@ export class Scheduler {
|
|
|
103
96
|
this.onScheduledTask = onScheduledTask;
|
|
104
97
|
this.running = true;
|
|
105
98
|
|
|
106
|
-
// Initialize next_run_at for tasks that don't have it yet
|
|
107
99
|
await this.initializeNextRuns();
|
|
108
100
|
|
|
109
|
-
// Check for due tasks periodically
|
|
110
101
|
this.timer = setInterval(() => this.checkDueTasks(), SCHEDULER_INTERVAL);
|
|
111
102
|
log.info("Scheduler started (checking every 30s)");
|
|
112
103
|
}
|
|
@@ -121,22 +112,15 @@ export class Scheduler {
|
|
|
121
112
|
|
|
122
113
|
private async initializeNextRuns(): Promise<void> {
|
|
123
114
|
try {
|
|
124
|
-
const
|
|
125
|
-
const sb = getSupabase();
|
|
126
|
-
const { data } = await sb
|
|
127
|
-
.from("agent_scheduled_tasks")
|
|
128
|
-
.select("*")
|
|
129
|
-
.eq("user_id", userId)
|
|
130
|
-
.eq("enabled", true)
|
|
131
|
-
.is("next_run_at", null);
|
|
115
|
+
const data = await callMcpHandler<ScheduledTask[]>("schedule.get_uninitialized");
|
|
132
116
|
|
|
133
117
|
if (data) {
|
|
134
118
|
for (const task of data) {
|
|
135
119
|
const nextRun = getNextRunTime(task.cron_expression, task.timezone);
|
|
136
|
-
await
|
|
137
|
-
.
|
|
138
|
-
|
|
139
|
-
|
|
120
|
+
await callMcpHandler("schedule.update", {
|
|
121
|
+
task_id: task.id,
|
|
122
|
+
next_run_at: nextRun.toISOString(),
|
|
123
|
+
});
|
|
140
124
|
}
|
|
141
125
|
}
|
|
142
126
|
} catch (err) {
|
|
@@ -148,44 +132,37 @@ export class Scheduler {
|
|
|
148
132
|
if (!this.running || !this.onScheduledTask) return;
|
|
149
133
|
|
|
150
134
|
try {
|
|
151
|
-
const
|
|
152
|
-
const sb = getSupabase();
|
|
153
|
-
|
|
154
|
-
const { data: dueTasks } = await sb
|
|
155
|
-
.from("agent_scheduled_tasks")
|
|
156
|
-
.select("*")
|
|
157
|
-
.eq("user_id", userId)
|
|
158
|
-
.eq("enabled", true)
|
|
159
|
-
.lte("next_run_at", new Date().toISOString())
|
|
160
|
-
.order("next_run_at", { ascending: true })
|
|
161
|
-
.limit(1);
|
|
135
|
+
const dueTasks = await callMcpHandler<ScheduledTask[]>("schedule.check_due");
|
|
162
136
|
|
|
163
137
|
if (!dueTasks || dueTasks.length === 0) return;
|
|
164
138
|
|
|
165
|
-
const task = dueTasks[0]
|
|
139
|
+
const task = dueTasks[0];
|
|
166
140
|
log.info(`Scheduled task due: "${task.name}"`);
|
|
167
141
|
|
|
168
|
-
// Calculate next run before executing
|
|
169
142
|
const nextRun = getNextRunTime(task.cron_expression, task.timezone);
|
|
170
143
|
|
|
171
144
|
// Update: set running, advance next_run_at
|
|
172
|
-
await
|
|
173
|
-
.
|
|
174
|
-
.
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
})
|
|
179
|
-
.eq("id", task.id);
|
|
145
|
+
await callMcpHandler("schedule.update", {
|
|
146
|
+
task_id: task.id,
|
|
147
|
+
last_run_at: new Date().toISOString(),
|
|
148
|
+
next_run_at: nextRun.toISOString(),
|
|
149
|
+
run_count: task.run_count + 1,
|
|
150
|
+
});
|
|
180
151
|
|
|
181
152
|
// Execute
|
|
182
153
|
try {
|
|
183
154
|
await this.onScheduledTask(task);
|
|
184
155
|
|
|
185
|
-
await
|
|
156
|
+
await callMcpHandler("schedule.update", {
|
|
157
|
+
task_id: task.id,
|
|
158
|
+
last_error: null,
|
|
159
|
+
});
|
|
186
160
|
} catch (err) {
|
|
187
161
|
const errMsg = err instanceof Error ? err.message : String(err);
|
|
188
|
-
await
|
|
162
|
+
await callMcpHandler("schedule.update", {
|
|
163
|
+
task_id: task.id,
|
|
164
|
+
last_error: errMsg,
|
|
165
|
+
});
|
|
189
166
|
log.error(`Scheduled task "${task.name}" failed: ${errMsg}`);
|
|
190
167
|
}
|
|
191
168
|
} catch (err) {
|
|
@@ -197,68 +174,45 @@ export class Scheduler {
|
|
|
197
174
|
// ── CRUD helpers for CLI commands ──────────────────────────────────
|
|
198
175
|
|
|
199
176
|
export async function createScheduledTask(
|
|
200
|
-
|
|
177
|
+
_userId: string,
|
|
201
178
|
name: string,
|
|
202
179
|
prompt: string,
|
|
203
180
|
cronExpression: string,
|
|
204
181
|
timezone = "UTC"
|
|
205
182
|
): Promise<ScheduledTask> {
|
|
206
|
-
const sb = getSupabase();
|
|
207
183
|
const nextRun = getNextRunTime(cronExpression, timezone);
|
|
208
184
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
timezone,
|
|
217
|
-
next_run_at: nextRun.toISOString(),
|
|
218
|
-
})
|
|
219
|
-
.select()
|
|
220
|
-
.single();
|
|
221
|
-
|
|
222
|
-
if (error) throw new Error(`Failed to create schedule: ${error.message}`);
|
|
223
|
-
return data as ScheduledTask;
|
|
185
|
+
return callMcpHandler<ScheduledTask>("schedule.create", {
|
|
186
|
+
name,
|
|
187
|
+
prompt,
|
|
188
|
+
cron_expression: cronExpression,
|
|
189
|
+
timezone,
|
|
190
|
+
next_run_at: nextRun.toISOString(),
|
|
191
|
+
});
|
|
224
192
|
}
|
|
225
193
|
|
|
226
|
-
export async function listScheduledTasks(
|
|
227
|
-
|
|
228
|
-
const { data, error } = await sb
|
|
229
|
-
.from("agent_scheduled_tasks")
|
|
230
|
-
.select("*")
|
|
231
|
-
.eq("user_id", userId)
|
|
232
|
-
.order("created_at", { ascending: false });
|
|
233
|
-
|
|
234
|
-
if (error) throw new Error(`Failed to list schedules: ${error.message}`);
|
|
235
|
-
return (data || []) as ScheduledTask[];
|
|
194
|
+
export async function listScheduledTasks(_userId: string): Promise<ScheduledTask[]> {
|
|
195
|
+
return callMcpHandler<ScheduledTask[]>("schedule.list");
|
|
236
196
|
}
|
|
237
197
|
|
|
238
198
|
export async function toggleScheduledTask(taskId: string, enabled: boolean): Promise<void> {
|
|
239
|
-
const
|
|
240
|
-
|
|
199
|
+
const params: Record<string, unknown> = { task_id: taskId, enabled };
|
|
200
|
+
|
|
241
201
|
if (enabled) {
|
|
242
|
-
//
|
|
243
|
-
const {
|
|
244
|
-
.
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
update.next_run_at = nextRun.toISOString();
|
|
202
|
+
// Need to recalculate next run when re-enabling
|
|
203
|
+
const taskData = await callMcpHandler<{ cron_expression: string; timezone: string }>(
|
|
204
|
+
"schedule.get_task",
|
|
205
|
+
{ task_id: taskId },
|
|
206
|
+
);
|
|
207
|
+
if (taskData) {
|
|
208
|
+
const nextRun = getNextRunTime(taskData.cron_expression, taskData.timezone);
|
|
209
|
+
params.next_run_at = nextRun.toISOString();
|
|
251
210
|
}
|
|
252
211
|
}
|
|
253
212
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
if (error) throw new Error(`Failed to toggle schedule: ${error.message}`);
|
|
213
|
+
await callMcpHandler("schedule.toggle", params);
|
|
257
214
|
}
|
|
258
215
|
|
|
259
216
|
export async function deleteScheduledTask(taskId: string): Promise<void> {
|
|
260
|
-
|
|
261
|
-
const { error } = await sb.from("agent_scheduled_tasks").delete().eq("id", taskId);
|
|
262
|
-
|
|
263
|
-
if (error) throw new Error(`Failed to delete schedule: ${error.message}`);
|
|
217
|
+
await callMcpHandler("schedule.delete", { task_id: taskId });
|
|
264
218
|
}
|
|
@@ -15,17 +15,6 @@ const mockSession = {
|
|
|
15
15
|
metadata: {},
|
|
16
16
|
};
|
|
17
17
|
|
|
18
|
-
// Build a fluent Supabase chain that does nothing
|
|
19
|
-
const chain: Record<string, unknown> = {};
|
|
20
|
-
const methods = [
|
|
21
|
-
"select", "insert", "update", "delete", "eq", "neq", "not",
|
|
22
|
-
"or", "in", "order", "limit", "single", "from", "lt",
|
|
23
|
-
];
|
|
24
|
-
for (const method of methods) {
|
|
25
|
-
chain[method] = vi.fn().mockReturnValue(chain);
|
|
26
|
-
}
|
|
27
|
-
chain.then = (resolve: (value: unknown) => void) => resolve({ data: [], error: null });
|
|
28
|
-
|
|
29
18
|
const mockCreateSession = vi.fn().mockResolvedValue(mockSession);
|
|
30
19
|
const mockUpdateHeartbeat = vi.fn().mockResolvedValue(undefined);
|
|
31
20
|
const mockEndSession = vi.fn().mockResolvedValue(undefined);
|
|
@@ -35,6 +24,8 @@ const mockClaimTask = vi.fn().mockResolvedValue(true);
|
|
|
35
24
|
const mockCreateTask = vi.fn().mockResolvedValue({ id: "task-001", prompt: "test" });
|
|
36
25
|
const mockGetOrCreateCliConversation = vi.fn().mockResolvedValue("conv-001");
|
|
37
26
|
|
|
27
|
+
const mockCleanupStaleSessions = vi.fn().mockResolvedValue(0);
|
|
28
|
+
|
|
38
29
|
vi.mock("../db/supabase.js", () => ({
|
|
39
30
|
createSession: (...args: unknown[]) => mockCreateSession(...args),
|
|
40
31
|
updateHeartbeat: (...args: unknown[]) => mockUpdateHeartbeat(...args),
|
|
@@ -44,7 +35,12 @@ vi.mock("../db/supabase.js", () => ({
|
|
|
44
35
|
claimTask: (...args: unknown[]) => mockClaimTask(...args),
|
|
45
36
|
createTask: (...args: unknown[]) => mockCreateTask(...args),
|
|
46
37
|
getOrCreateCliConversation: (...args: unknown[]) => mockGetOrCreateCliConversation(...args),
|
|
47
|
-
|
|
38
|
+
cleanupStaleSessions: (...args: unknown[]) => mockCleanupStaleSessions(...args),
|
|
39
|
+
pollAndClaimJobRun: vi.fn().mockResolvedValue(null),
|
|
40
|
+
}));
|
|
41
|
+
|
|
42
|
+
vi.mock("../db/api-client.js", () => ({
|
|
43
|
+
callMcpHandler: vi.fn().mockResolvedValue(null),
|
|
48
44
|
}));
|
|
49
45
|
|
|
50
46
|
vi.mock("../utils/config.js", () => ({
|
package/src/agent/session.ts
CHANGED
|
@@ -8,11 +8,12 @@ import {
|
|
|
8
8
|
claimTask,
|
|
9
9
|
createTask,
|
|
10
10
|
getOrCreateCliConversation,
|
|
11
|
-
|
|
11
|
+
cleanupStaleSessions,
|
|
12
12
|
AgentSession,
|
|
13
13
|
ConversationMessage,
|
|
14
14
|
PendingJobRun,
|
|
15
15
|
} from "../db/supabase.js";
|
|
16
|
+
import { callMcpHandler } from "../db/api-client.js";
|
|
16
17
|
import { getConfig } from "../utils/config.js";
|
|
17
18
|
import { log } from "../utils/logger.js";
|
|
18
19
|
import { Scheduler, ScheduledTask } from "./scheduler.js";
|
|
@@ -21,7 +22,6 @@ import { JobRunner } from "./job-runner.js";
|
|
|
21
22
|
const DEFAULT_HEARTBEAT_INTERVAL = 30_000; // 30 seconds
|
|
22
23
|
const DEFAULT_POLL_INTERVAL = 2_000; // 2 seconds
|
|
23
24
|
const MAX_POLL_INTERVAL = 30_000; // Max backoff: 30 seconds
|
|
24
|
-
const STALE_SESSION_THRESHOLD = 120_000; // 2 minutes without heartbeat = stale
|
|
25
25
|
|
|
26
26
|
export class SessionManager {
|
|
27
27
|
private session: AgentSession | null = null;
|
|
@@ -93,9 +93,6 @@ export class SessionManager {
|
|
|
93
93
|
return this.session;
|
|
94
94
|
}
|
|
95
95
|
|
|
96
|
-
/**
|
|
97
|
-
* Schedule the next poll with exponential backoff on failures.
|
|
98
|
-
*/
|
|
99
96
|
private schedulePoll(): void {
|
|
100
97
|
if (!this.running) return;
|
|
101
98
|
|
|
@@ -126,10 +123,6 @@ export class SessionManager {
|
|
|
126
123
|
}
|
|
127
124
|
}
|
|
128
125
|
|
|
129
|
-
/**
|
|
130
|
-
* Execute a pending job run triggered from the web UI.
|
|
131
|
-
* Loads the job, builds the agentic prompt, and processes it as a chat task.
|
|
132
|
-
*/
|
|
133
126
|
private async executeJobRun(jobRun: PendingJobRun): Promise<void> {
|
|
134
127
|
if (!this.session || !this.userId || !this.conversationId || !this.onTask)
|
|
135
128
|
return;
|
|
@@ -157,11 +150,10 @@ export class SessionManager {
|
|
|
157
150
|
|
|
158
151
|
// Link session to run record (non-critical)
|
|
159
152
|
try {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
.
|
|
163
|
-
|
|
164
|
-
.eq("id", jobRun.id);
|
|
153
|
+
await callMcpHandler("job.link_run_session", {
|
|
154
|
+
run_id: jobRun.id,
|
|
155
|
+
session_id: this.session.id,
|
|
156
|
+
});
|
|
165
157
|
} catch (linkErr) {
|
|
166
158
|
log.debug(`Failed to link session to job run: ${linkErr}`);
|
|
167
159
|
}
|
|
@@ -253,50 +245,19 @@ export class SessionManager {
|
|
|
253
245
|
this.schedulePoll();
|
|
254
246
|
}
|
|
255
247
|
|
|
256
|
-
/**
|
|
257
|
-
* Mark sessions as offline if they haven't sent a heartbeat recently.
|
|
258
|
-
* Runs once on startup to clean up orphaned sessions from crashed processes.
|
|
259
|
-
*/
|
|
260
248
|
private async cleanupStaleSessions(): Promise<void> {
|
|
261
|
-
if (!this.userId) return;
|
|
249
|
+
if (!this.userId || !this.session) return;
|
|
262
250
|
|
|
263
251
|
try {
|
|
264
|
-
const
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
).toISOString();
|
|
268
|
-
|
|
269
|
-
const { data: stale } = await sb
|
|
270
|
-
.from("agent_sessions")
|
|
271
|
-
.select("id")
|
|
272
|
-
.eq("user_id", this.userId)
|
|
273
|
-
.in("status", ["online", "busy"])
|
|
274
|
-
.lt("last_heartbeat_at", threshold)
|
|
275
|
-
.neq("id", this.session?.id || "");
|
|
276
|
-
|
|
277
|
-
if (stale && stale.length > 0) {
|
|
278
|
-
for (const s of stale) {
|
|
279
|
-
await sb
|
|
280
|
-
.from("agent_sessions")
|
|
281
|
-
.update({
|
|
282
|
-
status: "offline",
|
|
283
|
-
ended_at: new Date().toISOString(),
|
|
284
|
-
metadata: { ended_reason: "stale_session_cleanup" },
|
|
285
|
-
})
|
|
286
|
-
.eq("id", s.id);
|
|
287
|
-
}
|
|
288
|
-
log.info(`Cleaned up ${stale.length} stale session(s)`);
|
|
252
|
+
const cleaned = await cleanupStaleSessions(this.session.id);
|
|
253
|
+
if (cleaned > 0) {
|
|
254
|
+
log.info(`Cleaned up ${cleaned} stale session(s)`);
|
|
289
255
|
}
|
|
290
256
|
} catch (err) {
|
|
291
257
|
log.debug(`Stale session cleanup error: ${err}`);
|
|
292
258
|
}
|
|
293
259
|
}
|
|
294
260
|
|
|
295
|
-
/**
|
|
296
|
-
* Submit a task from the interactive prompt or scheduled job.
|
|
297
|
-
* Sets processing=true BEFORE creating the task so the poll loop
|
|
298
|
-
* never races to pick it up.
|
|
299
|
-
*/
|
|
300
261
|
async submitTask(prompt: string): Promise<void> {
|
|
301
262
|
if (!this.session || !this.userId || !this.conversationId || !this.onTask) {
|
|
302
263
|
throw new Error("Session not started");
|
|
@@ -328,9 +289,6 @@ export class SessionManager {
|
|
|
328
289
|
}
|
|
329
290
|
}
|
|
330
291
|
|
|
331
|
-
/**
|
|
332
|
-
* Stop the session with a safety timeout to prevent hanging on shutdown.
|
|
333
|
-
*/
|
|
334
292
|
async stop(timeoutMs = 5_000): Promise<void> {
|
|
335
293
|
this.running = false;
|
|
336
294
|
this.scheduler.stop();
|
|
@@ -347,7 +305,6 @@ export class SessionManager {
|
|
|
347
305
|
|
|
348
306
|
if (this.session) {
|
|
349
307
|
try {
|
|
350
|
-
// Wrap DB call with a deadline to avoid hanging on shutdown
|
|
351
308
|
await Promise.race([
|
|
352
309
|
endSession(this.session.id),
|
|
353
310
|
new Promise<never>((_, reject) =>
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import {
|
|
2
|
+
query,
|
|
3
|
+
type SDKAssistantMessage,
|
|
4
|
+
type SDKResultMessage,
|
|
5
|
+
} from "@anthropic-ai/claude-agent-sdk";
|
|
6
|
+
import { log } from "../utils/logger.js";
|
|
7
|
+
import type { SkillManager } from "./skills.js";
|
|
8
|
+
import { validateSkillName, normalizeSkillName } from "./skills.js";
|
|
9
|
+
|
|
10
|
+
// ── Types ───────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
interface SkillDecision {
|
|
13
|
+
action: "create" | "update" | "skip";
|
|
14
|
+
// For "create"
|
|
15
|
+
name?: string;
|
|
16
|
+
description?: string;
|
|
17
|
+
instructions?: string;
|
|
18
|
+
emoji?: string;
|
|
19
|
+
keywords?: string[];
|
|
20
|
+
// For "update"
|
|
21
|
+
existing_skill_name?: string;
|
|
22
|
+
improved_instructions?: string;
|
|
23
|
+
improved_description?: string;
|
|
24
|
+
// Always present
|
|
25
|
+
reason: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// ── Agent Skills format spec (agentskills.io) ───────────────────────
|
|
29
|
+
|
|
30
|
+
const SKILL_EVALUATION_PROMPT = `You just completed a task. Now evaluate whether it should be saved as a reusable Agent Skill.
|
|
31
|
+
|
|
32
|
+
## Agent Skills Format (agentskills.io)
|
|
33
|
+
|
|
34
|
+
A skill follows the SKILL.md format:
|
|
35
|
+
- name: 1-64 chars, lowercase kebab-case (a-z, 0-9, hyphens), no leading/trailing/consecutive hyphens
|
|
36
|
+
- description: 1-1024 chars, describe WHAT it does AND WHEN to use it, include searchable keywords
|
|
37
|
+
- body: markdown step-by-step instructions, examples, edge cases. Keep under 500 lines, <5000 tokens.
|
|
38
|
+
- Use generic placeholders (e.g. {url}, {query}, {product_name}) instead of specific values
|
|
39
|
+
- Instructions should be a REUSABLE workflow, not a transcript of what just happened
|
|
40
|
+
- Include error handling steps and tool references (browser_navigate, browser_read_page, Bash, Read, etc.)
|
|
41
|
+
|
|
42
|
+
## Your Decision
|
|
43
|
+
|
|
44
|
+
Respond with ONLY a JSON object (no markdown, no explanation outside the JSON). Choose one action:
|
|
45
|
+
|
|
46
|
+
1. **"create"** — The task is a reusable workflow worth saving.
|
|
47
|
+
Include: name, description, instructions (full SKILL.md body), emoji, keywords (3-5, include Chinese if task was in Chinese)
|
|
48
|
+
|
|
49
|
+
2. **"update"** — An existing skill should be improved based on what you just learned.
|
|
50
|
+
Include: existing_skill_name, improved_instructions (full updated body), improved_description (if changed)
|
|
51
|
+
|
|
52
|
+
3. **"skip"** — Not worth capturing (simple Q&A, one-off, too vague, already fully covered by existing skill).
|
|
53
|
+
|
|
54
|
+
Always include "reason" explaining your decision.
|
|
55
|
+
|
|
56
|
+
Use your judgment — no rigid rules. Consider: Is this repeatable? Can it be generalized? Would it save time next time?`;
|
|
57
|
+
|
|
58
|
+
// ── Evaluator ───────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Post-task skill evaluator using Agent SDK session resume.
|
|
62
|
+
*
|
|
63
|
+
* Resumes the same session that just completed the task, so the agent
|
|
64
|
+
* has full context of what happened. No need to re-describe the task,
|
|
65
|
+
* tool calls, or results — it already knows everything.
|
|
66
|
+
*/
|
|
67
|
+
export async function evaluateAndMaybeCreateSkill(opts: {
|
|
68
|
+
sessionId: string;
|
|
69
|
+
skillManager: SkillManager;
|
|
70
|
+
model?: string;
|
|
71
|
+
}): Promise<void> {
|
|
72
|
+
const { sessionId, skillManager, model } = opts;
|
|
73
|
+
|
|
74
|
+
if (!sessionId) {
|
|
75
|
+
log.debug("Skill evaluation skipped: no session ID to resume");
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Build existing skills context so the agent knows what already exists
|
|
80
|
+
const existingSkills = skillManager.getAll();
|
|
81
|
+
const existingList = existingSkills.length > 0
|
|
82
|
+
? existingSkills.map((s) => `- ${s.name}: ${s.description}`).join("\n")
|
|
83
|
+
: "(no existing skills)";
|
|
84
|
+
|
|
85
|
+
const prompt = `${SKILL_EVALUATION_PROMPT}
|
|
86
|
+
|
|
87
|
+
## Existing Skills (do NOT duplicate these)
|
|
88
|
+
${existingList}
|
|
89
|
+
|
|
90
|
+
Respond with a JSON object now.`;
|
|
91
|
+
|
|
92
|
+
try {
|
|
93
|
+
let responseText = "";
|
|
94
|
+
|
|
95
|
+
for await (const message of query({
|
|
96
|
+
prompt,
|
|
97
|
+
options: {
|
|
98
|
+
resume: sessionId,
|
|
99
|
+
model,
|
|
100
|
+
maxTurns: 1,
|
|
101
|
+
allowedTools: [],
|
|
102
|
+
},
|
|
103
|
+
})) {
|
|
104
|
+
if (message.type === "assistant") {
|
|
105
|
+
const assistantMsg = message as SDKAssistantMessage;
|
|
106
|
+
for (const block of assistantMsg.message.content) {
|
|
107
|
+
if (block.type === "text") {
|
|
108
|
+
responseText += block.text;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
} else if (message.type === "result") {
|
|
112
|
+
const resultMsg = message as SDKResultMessage;
|
|
113
|
+
if (resultMsg.subtype === "success" && "total_cost_usd" in resultMsg) {
|
|
114
|
+
log.debug(`Skill evaluation cost: $${(resultMsg as { total_cost_usd: number }).total_cost_usd.toFixed(4)}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Parse the decision — try the full text first, then extract JSON
|
|
120
|
+
const decision = parseJsonResponse(responseText);
|
|
121
|
+
if (!decision) {
|
|
122
|
+
log.debug("Skill evaluation: no valid JSON in response");
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (!["create", "update", "skip"].includes(decision.action)) {
|
|
127
|
+
log.debug("Skill evaluation: invalid action");
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Execute the decision
|
|
132
|
+
await executeSkillDecision(decision, skillManager);
|
|
133
|
+
} catch (err) {
|
|
134
|
+
log.debug(`Skill evaluation error: ${err}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Execute the skill decision: create a new skill or update an existing one.
|
|
140
|
+
*/
|
|
141
|
+
async function executeSkillDecision(
|
|
142
|
+
decision: SkillDecision,
|
|
143
|
+
skillManager: SkillManager
|
|
144
|
+
): Promise<void> {
|
|
145
|
+
switch (decision.action) {
|
|
146
|
+
case "create": {
|
|
147
|
+
if (!decision.name || !decision.instructions) {
|
|
148
|
+
log.debug("Skill create skipped: missing name or instructions");
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Normalize name to valid kebab-case (model may return invalid format)
|
|
153
|
+
let skillName = decision.name;
|
|
154
|
+
if (validateSkillName(skillName)) {
|
|
155
|
+
skillName = normalizeSkillName(skillName);
|
|
156
|
+
if (!skillName || validateSkillName(skillName)) {
|
|
157
|
+
log.debug(`Skill create skipped: name "${decision.name}" cannot be normalized`);
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
log.debug(`Normalized skill name: "${decision.name}" → "${skillName}"`);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Check for duplicates
|
|
164
|
+
const existing = skillManager.findSimilar(skillName);
|
|
165
|
+
if (existing) {
|
|
166
|
+
log.debug(`Skill create skipped: similar skill "${existing.name}" exists`);
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const result = await skillManager.create(
|
|
171
|
+
skillName,
|
|
172
|
+
decision.description || "",
|
|
173
|
+
decision.instructions,
|
|
174
|
+
{
|
|
175
|
+
source: "auto_extracted",
|
|
176
|
+
emoji: decision.emoji,
|
|
177
|
+
keywords: decision.keywords,
|
|
178
|
+
}
|
|
179
|
+
);
|
|
180
|
+
|
|
181
|
+
if (result) {
|
|
182
|
+
// syncToAgentSkills is handled by skill_create flow; only sync here
|
|
183
|
+
// with sourceSkillId linkage since create() doesn't set it.
|
|
184
|
+
await skillManager.syncToAgentSkills(
|
|
185
|
+
skillName,
|
|
186
|
+
decision.description || "",
|
|
187
|
+
decision.instructions,
|
|
188
|
+
"1.0.0",
|
|
189
|
+
{
|
|
190
|
+
source: "auto_extracted",
|
|
191
|
+
emoji: decision.emoji,
|
|
192
|
+
keywords: decision.keywords,
|
|
193
|
+
sourceSkillId: result.id,
|
|
194
|
+
}
|
|
195
|
+
);
|
|
196
|
+
log.info(`Auto-created skill "${skillName}": ${decision.reason}`);
|
|
197
|
+
}
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
case "update": {
|
|
202
|
+
if (!decision.existing_skill_name || !decision.improved_instructions) {
|
|
203
|
+
log.debug("Skill update skipped: missing skill name or instructions");
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const updated = skillManager.update(
|
|
208
|
+
decision.existing_skill_name,
|
|
209
|
+
decision.improved_instructions,
|
|
210
|
+
decision.improved_description
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
if (updated) {
|
|
214
|
+
log.info(`Auto-improved skill "${decision.existing_skill_name}": ${decision.reason}`);
|
|
215
|
+
} else {
|
|
216
|
+
log.debug(`Skill update failed: "${decision.existing_skill_name}" not found`);
|
|
217
|
+
}
|
|
218
|
+
break;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
case "skip":
|
|
222
|
+
log.debug(`Skill evaluation: skip — ${decision.reason}`);
|
|
223
|
+
break;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Attempt to parse a SkillDecision from the model's response text.
|
|
229
|
+
* Tries the full text first (model returned pure JSON), then falls
|
|
230
|
+
* back to extracting the outermost balanced `{…}` block.
|
|
231
|
+
*/
|
|
232
|
+
function parseJsonResponse(text: string): SkillDecision | null {
|
|
233
|
+
const trimmed = text.trim();
|
|
234
|
+
|
|
235
|
+
// Fast path: entire response is JSON
|
|
236
|
+
try {
|
|
237
|
+
const parsed = JSON.parse(trimmed) as SkillDecision;
|
|
238
|
+
if (parsed.action) return parsed;
|
|
239
|
+
} catch { /* not pure JSON */ }
|
|
240
|
+
|
|
241
|
+
// Fallback: find the first balanced `{…}` block
|
|
242
|
+
const start = trimmed.indexOf("{");
|
|
243
|
+
if (start === -1) return null;
|
|
244
|
+
|
|
245
|
+
let depth = 0;
|
|
246
|
+
for (let i = start; i < trimmed.length; i++) {
|
|
247
|
+
if (trimmed[i] === "{") depth++;
|
|
248
|
+
else if (trimmed[i] === "}") depth--;
|
|
249
|
+
if (depth === 0) {
|
|
250
|
+
try {
|
|
251
|
+
return JSON.parse(trimmed.slice(start, i + 1)) as SkillDecision;
|
|
252
|
+
} catch {
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
return null;
|
|
258
|
+
}
|