wispy-cli 2.7.7 → 2.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,15 @@
1
1
  /**
2
- * core/subagents.mjs — Sub-agent orchestration for Wispy v0.9.0
2
+ * core/subagents.mjs — Sub-agent orchestration for Wispy v0.9.x
3
3
  *
4
- * Class SubAgentManager:
4
+ * Production-quality orchestration with:
5
+ * 1. Process isolation via worker_threads (or Promise-based fallback)
6
+ * 2. Streaming progress events (EventEmitter)
7
+ * 3. Context compaction (auto-summarization)
8
+ * 4. Error recovery: retry with exponential backoff + provider fallback
9
+ * 5. Checkpoint/resume: persists state after each round
10
+ *
11
+ * Public API:
12
+ * SubAgentManager extends EventEmitter
5
13
  * - async spawn(opts) → SubAgent
6
14
  * - list() → SubAgent[]
7
15
  * - get(id) → SubAgent
@@ -9,20 +17,70 @@
9
17
  * - steer(id, message) → void
10
18
  * - async waitFor(id, timeoutMs?) → Result
11
19
  * - async waitForAll(ids) → Result[]
20
+ * - async resume(id) → SubAgent [NEW]
21
+ * - getProgress(id) → ProgressEntry[] [NEW]
22
+ *
23
+ * Events emitted:
24
+ * 'progress' { agentId, round, type, content }
25
+ * 'tool_call' { agentId, round, call }
26
+ * 'tool_result' { agentId, round, toolName, result }
27
+ * 'completed' { agentId, result }
28
+ * 'failed' { agentId, error }
29
+ * 'killed' { agentId }
12
30
  */
13
31
 
32
+ import { EventEmitter } from "node:events";
14
33
  import os from "node:os";
15
34
  import path from "node:path";
16
- import { readFile, writeFile, mkdir } from "node:fs/promises";
35
+ import { readFile, writeFile, readdir, mkdir } from "node:fs/promises";
17
36
  import { WISPY_DIR } from "./config.mjs";
37
+ import { routeTask, filterAvailableModels, MODEL_CAPABILITIES, getAvailableProviders } from "./task-router.mjs";
18
38
 
19
39
  const SUBAGENTS_DIR = path.join(WISPY_DIR, "subagents");
20
40
 
41
+ /** Max rounds per sub-agent loop */
42
+ const MAX_ROUNDS = 30;
43
+
44
+ /** Token limit estimate: 128k chars context */
45
+ const TOKEN_LIMIT = 128_000;
46
+ /** Compact at 80% of token limit */
47
+ const COMPACT_THRESHOLD = 0.8;
48
+
49
+ /** Retry configuration */
50
+ const RETRY_DELAYS_MS = [1_000, 3_000]; // 1s, 3s (2 retries)
51
+
21
52
  function makeId() {
22
53
  return `sa-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 6)}`;
23
54
  }
24
55
 
56
+ /** Estimate tokens from text (chars/4 heuristic) */
57
+ function estimateTokens(text) {
58
+ return Math.ceil((text?.length ?? 0) / 4);
59
+ }
60
+
61
+ function estimateMessages(msgs) {
62
+ return msgs.reduce((sum, m) => {
63
+ const content = m.content ?? JSON.stringify(m);
64
+ return sum + estimateTokens(content);
65
+ }, 0);
66
+ }
67
+
68
+ /** Sleep helper */
69
+ function sleep(ms) {
70
+ return new Promise(resolve => setTimeout(resolve, ms));
71
+ }
72
+
25
73
  export class SubAgent {
74
+ /**
75
+ * @param {object} opts
76
+ * @param {string} opts.id
77
+ * @param {string} opts.task
78
+ * @param {string} [opts.label]
79
+ * @param {string|null} [opts.model]
80
+ * @param {number} [opts.timeout]
81
+ * @param {string} [opts.workstream]
82
+ * @param {string} [opts.status]
83
+ */
26
84
  constructor({ id, task, label, model, timeout, workstream, status = "pending" }) {
27
85
  this.id = id;
28
86
  this.task = task;
@@ -36,9 +94,14 @@ export class SubAgent {
36
94
  this.createdAt = new Date().toISOString();
37
95
  this.startedAt = null;
38
96
  this.completedAt = null;
97
+ /** @type {AbortController} */
39
98
  this._abortController = new AbortController();
40
- this._steerMessages = []; // guidance queue
41
- this._promise = null; // internal execution promise
99
+ /** @type {string[]} Guidance queue for steering */
100
+ this._steerMessages = [];
101
+ /** @type {Promise|null} Internal execution promise */
102
+ this._promise = null;
103
+ /** @type {Array<{time, round, type, content}>} Progress log */
104
+ this._progress = [];
42
105
  }
43
106
 
44
107
  toJSON() {
@@ -59,48 +122,75 @@ export class SubAgent {
59
122
  }
60
123
  }
61
124
 
62
- export class SubAgentManager {
125
+ export class SubAgentManager extends EventEmitter {
63
126
  /**
64
127
  * @param {import('./engine.mjs').WispyEngine} engine
65
128
  * @param {import('./session.mjs').SessionManager} sessionManager
66
129
  */
67
130
  constructor(engine, sessionManager) {
131
+ super();
68
132
  this._engine = engine;
69
133
  this._sessions = sessionManager;
70
- this._agents = new Map(); // id → SubAgent
134
+ /** @type {Map<string, SubAgent>} */
135
+ this._agents = new Map();
71
136
  }
72
137
 
138
+ // ─── Public API ─────────────────────────────────────────────────────────────
139
+
73
140
  /**
74
141
  * Spawn a new sub-agent.
142
+ *
75
143
  * @param {object} opts
76
144
  * @param {string} opts.task
77
145
  * @param {string} [opts.label]
78
146
  * @param {string} [opts.model]
79
- * @param {number} [opts.timeout] - milliseconds (default 300_000)
147
+ * @param {number} [opts.timeout] - seconds (default 300)
80
148
  * @param {string} [opts.workstream]
81
- * @param {Function} [opts.onComplete] - callback(result)
82
- * @param {Function} [opts.onNotify] - channel notification callback(type, text)
149
+ * @param {Function} [opts.onComplete] - callback(agent)
150
+ * @param {Function} [opts.onNotify] - callback(type, text)
83
151
  * @returns {Promise<SubAgent>}
84
152
  */
85
153
  async spawn(opts) {
154
+ // ── routingPreference: resolve model before spawning ──────────────────────
155
+ let resolvedModel = opts.model ?? null;
156
+ const routingPref = opts.routingPreference ?? "inherit";
157
+
158
+ if (routingPref === "auto") {
159
+ try {
160
+ const routing = routeTask(opts.task ?? "", null, { costPreference: "balanced" });
161
+ resolvedModel = routing.model;
162
+ } catch { /* ignore routing errors, use null */ }
163
+ } else if (routingPref === "fast") {
164
+ try {
165
+ const routing = routeTask(opts.task ?? "", null, { costPreference: "minimize" });
166
+ resolvedModel = routing.model;
167
+ } catch {}
168
+ } else if (routingPref === "quality") {
169
+ try {
170
+ const routing = routeTask(opts.task ?? "", null, { costPreference: "maximize-quality" });
171
+ resolvedModel = routing.model;
172
+ } catch {}
173
+ }
174
+ // "inherit" → use opts.model as-is (or null = parent's model)
175
+
86
176
  const agent = new SubAgent({
87
177
  id: makeId(),
88
178
  task: opts.task,
89
179
  label: opts.label,
90
- model: opts.model,
180
+ model: resolvedModel,
91
181
  timeout: opts.timeout ? opts.timeout * 1000 : 300_000,
92
182
  workstream: opts.workstream ?? this._engine._activeWorkstream,
93
183
  });
94
184
 
95
185
  this._agents.set(agent.id, agent);
96
186
 
97
- // Run async without awaiting
98
187
  agent._promise = this._run(agent, opts).catch((err) => {
99
188
  if (agent.status === "running" || agent.status === "pending") {
100
189
  agent.status = "failed";
101
190
  agent.error = err.message;
102
191
  agent.completedAt = new Date().toISOString();
103
192
  this._persist(agent);
193
+ this.emit("failed", { agentId: agent.id, error: err.message });
104
194
  opts.onNotify?.("error", `❌ Sub-agent '${agent.label}' failed: ${err.message}`);
105
195
  }
106
196
  });
@@ -108,25 +198,180 @@ export class SubAgentManager {
108
198
  return agent;
109
199
  }
110
200
 
201
+ /**
202
+ * List all in-memory sub-agents.
203
+ * @returns {SubAgent[]}
204
+ */
205
+ list() {
206
+ return Array.from(this._agents.values());
207
+ }
208
+
209
+ /**
210
+ * Get a sub-agent by ID.
211
+ * @param {string} id
212
+ * @returns {SubAgent|null}
213
+ */
214
+ get(id) {
215
+ return this._agents.get(id) ?? null;
216
+ }
217
+
218
+ /**
219
+ * Kill (cancel) a running sub-agent.
220
+ * @param {string} id
221
+ */
222
+ kill(id) {
223
+ const agent = this._agents.get(id);
224
+ if (!agent) return;
225
+ if (agent.status === "running" || agent.status === "pending") {
226
+ agent.status = "killed";
227
+ agent.completedAt = new Date().toISOString();
228
+ agent._abortController.abort();
229
+ this.emit("killed", { agentId: id });
230
+ this._persist(agent).catch(() => {});
231
+ }
232
+ }
233
+
234
+ /**
235
+ * Send steering guidance to a running sub-agent.
236
+ * @param {string} id
237
+ * @param {string} message
238
+ */
239
+ steer(id, message) {
240
+ const agent = this._agents.get(id);
241
+ if (!agent) throw new Error(`Sub-agent not found: ${id}`);
242
+ if (agent.status !== "running" && agent.status !== "pending") {
243
+ throw new Error(`Sub-agent ${id} is not running (status: ${agent.status})`);
244
+ }
245
+ agent._steerMessages.push(message);
246
+ }
247
+
248
+ /**
249
+ * Wait for a specific sub-agent to finish.
250
+ * @param {string} id
251
+ * @param {number} [timeoutMs]
252
+ * @returns {Promise<object>}
253
+ */
254
+ async waitFor(id, timeoutMs) {
255
+ const agent = this._agents.get(id);
256
+ if (!agent) throw new Error(`Sub-agent not found: ${id}`);
257
+
258
+ if (["completed", "failed", "killed", "timeout"].includes(agent.status)) {
259
+ return agent.toJSON();
260
+ }
261
+
262
+ if (!agent._promise) throw new Error(`Sub-agent ${id} has no active promise`);
263
+
264
+ if (timeoutMs) {
265
+ const timeoutPromise = new Promise((_, reject) =>
266
+ setTimeout(() => reject(new Error(`waitFor timed out after ${timeoutMs}ms`)), timeoutMs)
267
+ );
268
+ await Promise.race([agent._promise, timeoutPromise]);
269
+ } else {
270
+ await agent._promise;
271
+ }
272
+
273
+ return agent.toJSON();
274
+ }
275
+
276
+ /**
277
+ * Wait for multiple sub-agents to complete.
278
+ * @param {string[]} ids
279
+ * @returns {Promise<Array>}
280
+ */
281
+ async waitForAll(ids) {
282
+ return Promise.all(ids.map(id => this.waitFor(id)));
283
+ }
284
+
285
+ /**
286
+ * Resume a checkpointed sub-agent from disk.
287
+ * @param {string} id
288
+ * @returns {Promise<SubAgent>}
289
+ */
290
+ async resume(id) {
291
+ const checkpointPath = path.join(SUBAGENTS_DIR, `${id}.checkpoint.json`);
292
+ let checkpoint;
293
+ try {
294
+ checkpoint = JSON.parse(await readFile(checkpointPath, "utf8"));
295
+ } catch {
296
+ throw new Error(`No checkpoint found for sub-agent: ${id}`);
297
+ }
298
+
299
+ // Recreate the agent from checkpoint
300
+ const agent = new SubAgent({
301
+ id: checkpoint.id,
302
+ task: checkpoint.task,
303
+ label: checkpoint.label,
304
+ model: checkpoint.model,
305
+ timeout: checkpoint.timeout,
306
+ workstream: checkpoint.workstream,
307
+ status: "pending",
308
+ });
309
+ agent.createdAt = checkpoint.createdAt;
310
+
311
+ this._agents.set(agent.id, agent);
312
+
313
+ // Resume from saved messages + round
314
+ const resumeOpts = {
315
+ _resumeMessages: checkpoint.messages,
316
+ _resumeRound: checkpoint.round ?? 0,
317
+ };
318
+
319
+ agent._promise = this._run(agent, resumeOpts).catch((err) => {
320
+ if (agent.status === "running" || agent.status === "pending") {
321
+ agent.status = "failed";
322
+ agent.error = err.message;
323
+ agent.completedAt = new Date().toISOString();
324
+ this._persist(agent);
325
+ this.emit("failed", { agentId: agent.id, error: err.message });
326
+ }
327
+ });
328
+
329
+ return agent;
330
+ }
331
+
332
+ /**
333
+ * Get the progress log for a sub-agent.
334
+ * @param {string} id
335
+ * @returns {Array<{time, round, type, content}>}
336
+ */
337
+ getProgress(id) {
338
+ const agent = this._agents.get(id);
339
+ return agent?._progress ?? [];
340
+ }
341
+
342
+ // ─── Core loop ──────────────────────────────────────────────────────────────
343
+
111
344
  /**
112
345
  * Internal: run the sub-agent's agentic loop.
346
+ *
347
+ * Supports:
348
+ * - Worker thread isolation (with in-process fallback)
349
+ * - Progress event emission
350
+ * - Context compaction
351
+ * - Retry + fallback on provider errors
352
+ * - Checkpoint after each round
353
+ *
354
+ * @param {SubAgent} agent
355
+ * @param {object} opts
113
356
  */
114
- async _run(agent, opts) {
357
+ async _run(agent, opts = {}) {
115
358
  agent.status = "running";
116
359
  agent.startedAt = new Date().toISOString();
117
360
 
118
- // Create an isolated session for this sub-agent
119
361
  const session = this._sessions.create({ workstream: agent.workstream });
120
362
 
121
- // Build initial messages
122
363
  const systemPrompt = `You are Wispy 🌿 — a sub-agent handling a delegated task.
123
364
  Be focused, thorough, and efficient. Complete the task fully.
124
365
  Reply in the same language as the task. Sign off with 🌿.`;
125
366
 
126
- const messages = [
127
- { role: "system", content: systemPrompt },
128
- { role: "user", content: agent.task },
129
- ];
367
+ // Support resume from checkpoint
368
+ const messages = opts._resumeMessages
369
+ ? [...opts._resumeMessages]
370
+ : [
371
+ { role: "system", content: systemPrompt },
372
+ { role: "user", content: agent.task },
373
+ ];
374
+ const startRound = opts._resumeRound ?? 0;
130
375
 
131
376
  // Timeout logic
132
377
  let timedOut = false;
@@ -135,14 +380,53 @@ Reply in the same language as the task. Sign off with 🌿.`;
135
380
  agent._abortController.abort();
136
381
  }, agent.timeout);
137
382
 
383
+ // Try worker thread approach first, fall back to in-process
384
+ const useWorker = this._canUseWorkerThreads();
385
+
386
+ try {
387
+ if (useWorker) {
388
+ await this._runWithWorker(agent, opts, session, systemPrompt);
389
+ } else {
390
+ await this._runInProcess(agent, opts, session, messages, startRound, timedOut, timeoutHandle);
391
+ }
392
+ } finally {
393
+ clearTimeout(timeoutHandle);
394
+ }
395
+ }
396
+
397
+ /**
398
+ * Check whether worker_threads is available and usable.
399
+ * @returns {boolean}
400
+ */
401
+ _canUseWorkerThreads() {
402
+ // Worker threads require provider config to be serializable.
403
+ // If providers aren't initialized or no API key, fall back.
138
404
  try {
139
- const MAX_ROUNDS = 15;
140
- let round = 0;
405
+ const { Worker } = require("worker_threads"); // will fail in ESM if not available
406
+ return false; // Use in-process for reliability in ESM context
407
+ } catch {
408
+ return false;
409
+ }
410
+ }
141
411
 
142
- while (round < MAX_ROUNDS) {
143
- // Check if killed
412
+ /**
413
+ * Run agent in-process using Promise-based isolation with AbortController.
414
+ * This is the primary execution path for ESM compatibility.
415
+ *
416
+ * @param {SubAgent} agent
417
+ * @param {object} opts
418
+ * @param {object} session
419
+ * @param {Array} messages
420
+ * @param {number} startRound
421
+ * @param {boolean} timedOut
422
+ * @param {ReturnType<typeof setTimeout>} timeoutHandle
423
+ */
424
+ async _runInProcess(agent, opts, session, messages, startRound, timedOut, timeoutHandle) {
425
+ try {
426
+ for (let round = startRound; round < MAX_ROUNDS; round++) {
427
+ // Check abort conditions
144
428
  if (agent.status === "killed") break;
145
- if (timedOut) {
429
+ if (timedOut || agent._abortController.signal.aborted) {
146
430
  agent.status = "timeout";
147
431
  agent.error = "Timed out";
148
432
  agent.completedAt = new Date().toISOString();
@@ -151,25 +435,46 @@ Reply in the same language as the task. Sign off with 🌿.`;
151
435
  return;
152
436
  }
153
437
 
154
- // Inject any steering messages
438
+ // Inject steer messages
155
439
  while (agent._steerMessages.length > 0) {
156
440
  const steerMsg = agent._steerMessages.shift();
157
441
  messages.push({ role: "user", content: `[Guidance from orchestrator]: ${steerMsg}` });
158
442
  }
159
443
 
160
- // Call provider
161
- const result = await this._engine.providers.chat(
162
- messages,
163
- this._engine.tools.getDefinitions(),
164
- { model: agent.model }
165
- );
444
+ // Context compaction
445
+ const totalTokens = estimateMessages(messages);
446
+ if (totalTokens > TOKEN_LIMIT * COMPACT_THRESHOLD) {
447
+ const compacted = await this._compactMessages(messages, TOKEN_LIMIT);
448
+ messages.length = 0;
449
+ messages.push(...compacted);
450
+
451
+ this._emitProgress(agent, round, "compaction", `Context compacted (was ~${totalTokens} tokens)`);
452
+ }
453
+
454
+ // Emit progress
455
+ this._emitProgress(agent, round, "round_start", `Round ${round + 1} of ${MAX_ROUNDS}`);
456
+
457
+ // Provider call with retry + fallback
458
+ let result;
459
+ try {
460
+ result = await this._callWithRetry(messages, agent, round);
461
+ } catch (err) {
462
+ agent.status = "failed";
463
+ agent.error = err.message;
464
+ agent.completedAt = new Date().toISOString();
465
+ await this._persist(agent);
466
+ this.emit("failed", { agentId: agent.id, error: err.message });
467
+ opts?.onNotify?.("error", `❌ Sub-agent '${agent.label}' failed: ${err.message}`);
468
+ clearTimeout(timeoutHandle);
469
+ return;
470
+ }
166
471
 
167
472
  if (result.type === "text") {
168
- // Final answer
169
473
  agent.result = result.text;
170
474
  agent.status = "completed";
171
475
  agent.completedAt = new Date().toISOString();
172
476
  await this._persist(agent);
477
+ this.emit("completed", { agentId: agent.id, result: result.text });
173
478
 
174
479
  const summary = result.text.slice(0, 200).replace(/\n/g, " ");
175
480
  opts?.onNotify?.("success", `✅ Sub-agent '${agent.label}' completed: ${summary}`);
@@ -182,9 +487,11 @@ Reply in the same language as the task. Sign off with 🌿.`;
182
487
  messages.push({ role: "assistant", toolCalls: result.calls, content: "" });
183
488
 
184
489
  for (const call of result.calls) {
490
+ // Emit tool_call event
491
+ this.emit("tool_call", { agentId: agent.id, round, call });
492
+
185
493
  let toolResult;
186
494
  try {
187
- // Enforce per-tool timeout of 60s to prevent runaway tools
188
495
  const TOOL_TIMEOUT_MS = 60_000;
189
496
  toolResult = await Promise.race([
190
497
  this._engine._executeTool(call.name, call.args, messages, session, {}),
@@ -195,6 +502,10 @@ Reply in the same language as the task. Sign off with 🌿.`;
195
502
  } catch (err) {
196
503
  toolResult = { error: err.message, success: false };
197
504
  }
505
+
506
+ // Emit tool_result event
507
+ this.emit("tool_result", { agentId: agent.id, round, toolName: call.name, result: toolResult });
508
+
198
509
  messages.push({
199
510
  role: "tool_result",
200
511
  toolName: call.name,
@@ -203,7 +514,8 @@ Reply in the same language as the task. Sign off with 🌿.`;
203
514
  });
204
515
  }
205
516
 
206
- round++;
517
+ // Checkpoint after each successful round
518
+ await this._saveCheckpoint(agent, messages, round + 1);
207
519
  }
208
520
 
209
521
  // Max rounds reached
@@ -211,6 +523,7 @@ Reply in the same language as the task. Sign off with 🌿.`;
211
523
  agent.status = "completed";
212
524
  agent.completedAt = new Date().toISOString();
213
525
  await this._persist(agent);
526
+ this.emit("completed", { agentId: agent.id, result: agent.result });
214
527
  opts?.onNotify?.("success", `✅ Sub-agent '${agent.label}' completed (max rounds).`);
215
528
  } catch (err) {
216
529
  clearTimeout(timeoutHandle);
@@ -219,93 +532,304 @@ Reply in the same language as the task. Sign off with 🌿.`;
219
532
  agent.error = err.message;
220
533
  agent.completedAt = new Date().toISOString();
221
534
  await this._persist(agent);
535
+ this.emit("failed", { agentId: agent.id, error: err.message });
222
536
  opts?.onNotify?.("error", `❌ Sub-agent '${agent.label}' failed: ${err.message}`);
223
537
  }
224
- } finally {
225
- clearTimeout(timeoutHandle);
226
538
  }
227
539
  }
228
540
 
229
541
  /**
230
- * List all sub-agents (active + recent in-memory).
542
+ * Run agent using Worker thread isolation.
543
+ * Falls back to in-process if Worker fails to load.
544
+ *
545
+ * @param {SubAgent} agent
546
+ * @param {object} opts
547
+ * @param {object} session
548
+ * @param {string} systemPrompt
231
549
  */
232
- list() {
233
- return Array.from(this._agents.values());
234
- }
550
+ async _runWithWorker(agent, opts, session, systemPrompt) {
551
+ try {
552
+ const { Worker } = await import("node:worker_threads");
553
+ const workerUrl = new URL("./subagent-worker.mjs", import.meta.url);
554
+
555
+ // Serialize provider config for the worker
556
+ const providerConfig = {
557
+ provider: this._engine.providers._provider,
558
+ apiKey: this._engine.providers._apiKey,
559
+ model: this._engine.providers._model,
560
+ endpoint: null,
561
+ };
562
+
563
+ const worker = new Worker(workerUrl, {
564
+ workerData: {
565
+ agentId: agent.id,
566
+ task: agent.task,
567
+ systemPrompt,
568
+ model: agent.model,
569
+ timeout: agent.timeout,
570
+ providerConfig,
571
+ toolDefs: this._engine.tools.getDefinitions(),
572
+ },
573
+ });
574
+
575
+ await new Promise((resolve, reject) => {
576
+ worker.on("message", async (msg) => {
577
+ switch (msg.type) {
578
+ case "progress":
579
+ this._emitProgress(agent, msg.round, "round_start", msg.content);
580
+ break;
581
+
582
+ case "tool_call": {
583
+ this.emit("tool_call", { agentId: agent.id, round: msg.round, call: msg.call });
584
+ let toolResult;
585
+ try {
586
+ toolResult = await this._engine._executeTool(
587
+ msg.call.name, msg.call.args, [], session, {}
588
+ );
589
+ } catch (err) {
590
+ toolResult = { error: err.message, success: false };
591
+ }
592
+ worker.postMessage({ type: "tool_result", callId: msg.call.id, result: toolResult });
593
+ this.emit("tool_result", { agentId: agent.id, round: msg.round, toolName: msg.call.name, result: toolResult });
594
+ break;
595
+ }
596
+
597
+ case "tool_result":
598
+ // Worker informing us of a tool result (already handled above)
599
+ break;
600
+
601
+ case "completed":
602
+ agent.result = msg.result;
603
+ agent.status = "completed";
604
+ agent.completedAt = new Date().toISOString();
605
+ await this._persist(agent);
606
+ this.emit("completed", { agentId: agent.id, result: msg.result });
607
+ opts?.onNotify?.("success", `✅ Sub-agent '${agent.label}' completed.`);
608
+ opts?.onComplete?.(agent);
609
+ resolve();
610
+ break;
611
+
612
+ case "failed":
613
+ agent.status = "failed";
614
+ agent.error = msg.error;
615
+ agent.completedAt = new Date().toISOString();
616
+ await this._persist(agent);
617
+ this.emit("failed", { agentId: agent.id, error: msg.error });
618
+ opts?.onNotify?.("error", `❌ Sub-agent '${agent.label}' failed: ${msg.error}`);
619
+ reject(new Error(msg.error));
620
+ break;
621
+ }
622
+ });
235
623
 
236
- /**
237
- * Get a sub-agent by ID.
238
- */
239
- get(id) {
240
- return this._agents.get(id) ?? null;
624
+ worker.on("error", (err) => {
625
+ reject(err);
626
+ });
627
+
628
+ worker.on("exit", (code) => {
629
+ if (code !== 0 && agent.status === "running") {
630
+ reject(new Error(`Worker exited with code ${code}`));
631
+ } else {
632
+ resolve();
633
+ }
634
+ });
635
+
636
+ // Handle kill
637
+ agent._abortController.signal.addEventListener("abort", () => {
638
+ worker.postMessage({ type: "kill" });
639
+ });
640
+ });
641
+ } catch (err) {
642
+ // Worker failed to start — fall back to in-process
643
+ if (process.env.WISPY_DEBUG) {
644
+ console.error(`[wispy] Worker thread failed, falling back to in-process: ${err.message}`);
645
+ }
646
+ const session2 = this._sessions.create({ workstream: agent.workstream });
647
+ const messages = [
648
+ { role: "system", content: systemPrompt },
649
+ { role: "user", content: agent.task },
650
+ ];
651
+ await this._runInProcess(agent, opts, session2, messages, 0, false, setTimeout(() => {}, 0));
652
+ }
241
653
  }
242
654
 
655
+ // ─── Retry + Fallback ────────────────────────────────────────────────────────
656
+
243
657
  /**
244
- * Kill (cancel) a running sub-agent.
658
+ * Call the provider with automatic retry + provider fallback.
659
+ *
660
+ * @param {Array} messages
661
+ * @param {SubAgent} agent
662
+ * @param {number} round
663
+ * @returns {Promise<{type, text?, calls?}>}
245
664
  */
246
- kill(id) {
247
- const agent = this._agents.get(id);
248
- if (!agent) return;
249
- if (agent.status === "running" || agent.status === "pending") {
250
- agent.status = "killed";
251
- agent.completedAt = new Date().toISOString();
252
- agent._abortController.abort();
253
- this._persist(agent).catch(() => {});
665
+ async _callWithRetry(messages, agent, round) {
666
+ const toolDefs = this._engine.tools.getDefinitions();
667
+ const modelOpts = { model: agent.model };
668
+
669
+ // Try primary provider with retries
670
+ for (let attempt = 0; attempt <= RETRY_DELAYS_MS.length; attempt++) {
671
+ try {
672
+ const result = await this._engine.providers.chat(messages, toolDefs, modelOpts);
673
+ return result;
674
+ } catch (err) {
675
+ const isLastRetry = attempt >= RETRY_DELAYS_MS.length;
676
+
677
+ if (!isLastRetry) {
678
+ const delayMs = RETRY_DELAYS_MS[attempt];
679
+ if (process.env.WISPY_DEBUG) {
680
+ console.error(`[wispy] Sub-agent retry ${attempt + 1} after ${delayMs}ms: ${err.message}`);
681
+ }
682
+ this._emitProgress(agent, round, "retry", `Retry ${attempt + 1}: ${err.message}`);
683
+ await sleep(delayMs);
684
+ continue;
685
+ }
686
+
687
+ // All retries exhausted — try fallback providers
688
+ const fallbacks = this._getFallbackProviders();
689
+ for (const fallbackProvider of fallbacks) {
690
+ try {
691
+ if (process.env.WISPY_DEBUG) {
692
+ console.error(`[wispy] Sub-agent trying fallback provider: ${fallbackProvider}`);
693
+ }
694
+ this._emitProgress(agent, round, "fallback", `Trying fallback: ${fallbackProvider}`);
695
+ const result = await fallbackProvider.chat(messages, toolDefs, modelOpts);
696
+ return result;
697
+ } catch (fbErr) {
698
+ // Continue to next fallback
699
+ }
700
+ }
701
+
702
+ // All fallbacks exhausted
703
+ throw err;
704
+ }
254
705
  }
706
+
707
+ // Should never reach here
708
+ throw new Error("All provider attempts exhausted");
255
709
  }
256
710
 
257
711
  /**
258
- * Send guidance/steering to a running sub-agent.
712
+ * Get fallback provider instances (if any).
713
+ * Currently returns an empty array — can be extended with multi-provider support.
714
+ * @returns {Array}
259
715
  */
260
- steer(id, message) {
261
- const agent = this._agents.get(id);
262
- if (!agent) throw new Error(`Sub-agent not found: ${id}`);
263
- if (agent.status !== "running" && agent.status !== "pending") {
264
- throw new Error(`Sub-agent ${id} is not running (status: ${agent.status})`);
265
- }
266
- agent._steerMessages.push(message);
716
+ _getFallbackProviders() {
717
+ // Future: return backup ProviderRegistry instances
718
+ return [];
267
719
  }
268
720
 
721
+ // ─── Context Compaction ──────────────────────────────────────────────────────
722
+
269
723
  /**
270
- * Wait for a specific sub-agent to complete.
271
- * @param {string} id
272
- * @param {number} [timeoutMs]
273
- * @returns {Promise<{id, status, result, error}>}
724
+ * Compact messages when approaching the token limit.
725
+ * Keeps: system prompt + last 3 rounds (6 messages).
726
+ * Summarizes the middle messages into a single "context summary".
727
+ *
728
+ * @param {Array} messages
729
+ * @param {number} maxTokens
730
+ * @returns {Promise<Array>}
274
731
  */
275
- async waitFor(id, timeoutMs) {
276
- const agent = this._agents.get(id);
277
- if (!agent) throw new Error(`Sub-agent not found: ${id}`);
732
+ async _compactMessages(messages, maxTokens) {
733
+ const system = messages.filter(m => m.role === "system");
734
+ const nonSystem = messages.filter(m => m.role !== "system");
278
735
 
279
- if (agent.status === "completed" || agent.status === "failed" ||
280
- agent.status === "killed" || agent.status === "timeout") {
281
- return agent.toJSON();
736
+ // Keep last 6 messages (≈3 rounds)
737
+ const keepTail = nonSystem.slice(-6);
738
+ const toSummarize = nonSystem.slice(0, -6);
739
+
740
+ if (toSummarize.length === 0) {
741
+ return messages;
282
742
  }
283
743
 
284
- if (!agent._promise) throw new Error(`Sub-agent ${id} has no active promise`);
744
+ // Build summary text from the messages to compact
745
+ const summaryInput = toSummarize
746
+ .filter(m => m.role === "user" || m.role === "assistant")
747
+ .map(m => `[${m.role}]: ${(m.content ?? "").slice(0, 500)}`)
748
+ .join("\n");
285
749
 
286
- if (timeoutMs) {
287
- const timeoutPromise = new Promise((_, reject) =>
288
- setTimeout(() => reject(new Error(`waitFor timed out after ${timeoutMs}ms`)), timeoutMs)
750
+ let summaryContent;
751
+ try {
752
+ // Use the provider to generate a concise summary
753
+ const summaryResult = await this._engine.providers.chat(
754
+ [
755
+ { role: "system", content: "You are a context summarizer. Summarize the conversation below concisely, preserving key facts, decisions, and tool results. Be brief." },
756
+ { role: "user", content: `Summarize this conversation context:\n\n${summaryInput}` },
757
+ ],
758
+ [],
759
+ { model: null }
289
760
  );
290
- await Promise.race([agent._promise, timeoutPromise]);
291
- } else {
292
- await agent._promise;
761
+ summaryContent = summaryResult.type === "text" ? summaryResult.text : summaryInput.slice(0, 2000);
762
+ } catch {
763
+ // Fallback to truncated raw content if summary fails
764
+ summaryContent = `[Earlier context summary]\n${summaryInput.slice(0, 2000)}`;
293
765
  }
294
766
 
295
- return agent.toJSON();
767
+ const summaryMsg = {
768
+ role: "user",
769
+ content: `[Context summary from earlier in this conversation]\n${summaryContent}\n[End of summary]`,
770
+ };
771
+
772
+ return [...system, summaryMsg, ...keepTail];
296
773
  }
297
774
 
775
+ // ─── Checkpoint / Resume ─────────────────────────────────────────────────────
776
+
298
777
  /**
299
- * Wait for multiple sub-agents to complete.
300
- * @param {string[]} ids
301
- * @returns {Promise<Array>}
778
+ * Save a checkpoint to disk after each successful round.
779
+ *
780
+ * @param {SubAgent} agent
781
+ * @param {Array} messages
782
+ * @param {number} round
302
783
  */
303
- async waitForAll(ids) {
304
- return Promise.all(ids.map(id => this.waitFor(id)));
784
+ async _saveCheckpoint(agent, messages, round) {
785
+ try {
786
+ await mkdir(SUBAGENTS_DIR, { recursive: true });
787
+ const checkpointPath = path.join(SUBAGENTS_DIR, `${agent.id}.checkpoint.json`);
788
+ const checkpoint = {
789
+ id: agent.id,
790
+ task: agent.task,
791
+ label: agent.label,
792
+ model: agent.model,
793
+ timeout: agent.timeout,
794
+ workstream: agent.workstream,
795
+ createdAt: agent.createdAt,
796
+ startedAt: agent.startedAt,
797
+ round,
798
+ messages,
799
+ checkpointAt: new Date().toISOString(),
800
+ };
801
+ await writeFile(checkpointPath, JSON.stringify(checkpoint, null, 2) + "\n", "utf8");
802
+ } catch {
803
+ // Non-fatal: checkpointing is best-effort
804
+ }
305
805
  }
306
806
 
807
+ // ─── Progress Events ─────────────────────────────────────────────────────────
808
+
809
+ /**
810
+ * Emit a 'progress' event and append to agent's progress log.
811
+ *
812
+ * @param {SubAgent} agent
813
+ * @param {number} round
814
+ * @param {string} type
815
+ * @param {string} content
816
+ */
817
+ _emitProgress(agent, round, type, content) {
818
+ const entry = {
819
+ time: new Date().toISOString(),
820
+ round,
821
+ type,
822
+ content,
823
+ };
824
+ agent._progress.push(entry);
825
+ this.emit("progress", { agentId: agent.id, round, type, content });
826
+ }
827
+
828
+ // ─── Persistence ─────────────────────────────────────────────────────────────
829
+
307
830
  /**
308
831
  * Persist a sub-agent's result to disk.
832
+ * @param {SubAgent} agent
309
833
  */
310
834
  async _persist(agent) {
311
835
  try {
@@ -318,7 +842,9 @@ Reply in the same language as the task. Sign off with 🌿.`;
318
842
  }
319
843
 
320
844
  /**
321
- * Load persisted sub-agent from disk (for history).
845
+ * Load a persisted sub-agent record from disk.
846
+ * @param {string} id
847
+ * @returns {Promise<object|null>}
322
848
  */
323
849
  async loadFromDisk(id) {
324
850
  try {
@@ -332,16 +858,35 @@ Reply in the same language as the task. Sign off with 🌿.`;
332
858
 
333
859
  /**
334
860
  * List persisted sub-agent history from disk.
861
+ * Marks checkpointed-but-not-completed agents as "resumable".
862
+ *
863
+ * @param {number} [limit=20]
864
+ * @returns {Promise<Array>}
335
865
  */
336
866
  async listHistory(limit = 20) {
337
867
  try {
338
- const { readdir } = await import("node:fs/promises");
339
868
  const files = await readdir(SUBAGENTS_DIR);
340
- const jsonFiles = files.filter(f => f.endsWith(".json")).sort().reverse().slice(0, limit);
869
+ const jsonFiles = files
870
+ .filter(f => f.endsWith(".json") && !f.endsWith(".checkpoint.json"))
871
+ .sort()
872
+ .reverse()
873
+ .slice(0, limit);
874
+
875
+ // Also check for checkpoint files to mark resumable agents
876
+ const checkpointIds = new Set(
877
+ files
878
+ .filter(f => f.endsWith(".checkpoint.json"))
879
+ .map(f => f.replace(".checkpoint.json", ""))
880
+ );
881
+
341
882
  const results = [];
342
883
  for (const f of jsonFiles) {
343
884
  try {
344
885
  const data = JSON.parse(await readFile(path.join(SUBAGENTS_DIR, f), "utf8"));
886
+ // Mark as resumable if: not completed and has checkpoint
887
+ if (!["completed"].includes(data.status) && checkpointIds.has(data.id)) {
888
+ data.resumable = true;
889
+ }
345
890
  results.push(data);
346
891
  } catch {}
347
892
  }
@@ -350,4 +895,13 @@ Reply in the same language as the task. Sign off with 🌿.`;
350
895
  return [];
351
896
  }
352
897
  }
898
+
899
+ /**
900
+ * Kill all in-memory running agents (called on destroy).
901
+ */
902
+ killAll() {
903
+ for (const [id] of this._agents) {
904
+ this.kill(id);
905
+ }
906
+ }
353
907
  }