@xynogen/pix-subagent 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,526 @@
1
+ /**
2
+ * agent-manager.ts — Tracks agents, background execution, resume support.
3
+ *
4
+ * Background agents are subject to a configurable concurrency limit (default: 4).
5
+ * Excess agents are queued and auto-started as running agents complete.
6
+ * Foreground agents bypass the queue (they block the parent anyway).
7
+ */
8
+
9
+ import { randomUUID } from "node:crypto";
10
+ import { statSync } from "node:fs";
11
+ import { isAbsolute } from "node:path";
12
+ import type { Model } from "@earendil-works/pi-ai";
13
+ import type {
14
+ AgentSession,
15
+ ExtensionAPI,
16
+ ExtensionContext,
17
+ } from "@earendil-works/pi-coding-agent";
18
+ import { resumeAgent, runAgent, type ToolActivity } from "./agent-runner.ts";
19
+ import type {
20
+ AgentInvocation,
21
+ AgentRecord,
22
+ SubagentType,
23
+ ThinkingLevel,
24
+ } from "./types.ts";
25
+ import { addUsage } from "./usage.ts";
26
+
27
+ export type OnAgentComplete = (record: AgentRecord) => void;
28
+ export type OnAgentStart = (record: AgentRecord) => void;
29
+ export type OnAgentCompact = (
30
+ record: AgentRecord,
31
+ info: CompactionInfo,
32
+ ) => void;
33
+ export type CompactionInfo = {
34
+ reason: "manual" | "threshold" | "overflow";
35
+ tokensBefore: number;
36
+ };
37
+
38
+ /** Default max concurrent background agents. */
39
+ const DEFAULT_MAX_CONCURRENT = 4;
40
+
41
+ /**
42
+ * Validate a caller-supplied SpawnOptions.cwd. `undefined`/`null` mean "unset"
43
+ * (parent cwd). Anything else must be an absolute path to an existing
44
+ * directory — curated errors instead of TypeErrors from path/fs internals
45
+ * (RPC callers send arbitrary JSON: null, numbers, file paths).
46
+ */
47
+ function assertValidSpawnCwd(
48
+ cwd: unknown,
49
+ ): asserts cwd is string | undefined | null {
50
+ if (cwd == null) return;
51
+ if (typeof cwd !== "string" || !isAbsolute(cwd)) {
52
+ throw new Error(
53
+ `SpawnOptions.cwd must be an absolute path: "${String(cwd)}"`,
54
+ );
55
+ }
56
+ let isDirectory = false;
57
+ try {
58
+ isDirectory = statSync(cwd).isDirectory();
59
+ } catch {
60
+ throw new Error(`SpawnOptions.cwd does not exist: "${cwd}"`);
61
+ }
62
+ if (!isDirectory) {
63
+ throw new Error(`SpawnOptions.cwd is not a directory: "${cwd}"`);
64
+ }
65
+ }
66
+
67
+ interface SpawnArgs {
68
+ pi: ExtensionAPI;
69
+ ctx: ExtensionContext;
70
+ type: SubagentType;
71
+ prompt: string;
72
+ options: SpawnOptions;
73
+ }
74
+
75
+ interface SpawnOptions {
76
+ description: string;
77
+ model?: Model<any>;
78
+ maxTurns?: number;
79
+ isolated?: boolean;
80
+ inheritContext?: boolean;
81
+ thinkingLevel?: ThinkingLevel;
82
+ isBackground?: boolean;
83
+ /**
84
+ * Skip the maxConcurrent queue check for this spawn — start immediately even
85
+ * if the configured concurrency limit would otherwise queue it. Used by the
86
+ * scheduler so a fired job can't be deferred past its trigger window.
87
+ */
88
+ bypassQueue?: boolean;
89
+ /** Working directory for the agent (absolute path). Default: parent session cwd. */
90
+ cwd?: string;
91
+ /** Resolved invocation snapshot captured for UI display. */
92
+ invocation?: AgentInvocation;
93
+ /** Parent abort signal — when aborted, the subagent is also stopped. */
94
+ signal?: AbortSignal;
95
+ /** Called on tool start/end with activity info (for streaming progress to UI). */
96
+ onToolActivity?: (activity: ToolActivity) => void;
97
+ /** Called on streaming text deltas from the assistant response. */
98
+ onTextDelta?: (delta: string, fullText: string) => void;
99
+ /** Called when the agent session is created (for accessing session stats). */
100
+ onSessionCreated?: (session: AgentSession) => void;
101
+ /** Called at the end of each agentic turn with the cumulative count. */
102
+ onTurnEnd?: (turnCount: number) => void;
103
+ /** Called once per assistant message_end with that message's usage delta. */
104
+ onAssistantUsage?: (usage: {
105
+ input: number;
106
+ output: number;
107
+ cacheWrite: number;
108
+ }) => void;
109
+ /** Called when the session successfully compacts. */
110
+ onCompaction?: (info: CompactionInfo) => void;
111
+ /** Caller-supplied tool-name subset — intersected (never widens). Omit → type default. */
112
+ allowedToolNames?: string[];
113
+ }
114
+
115
+ export class AgentManager {
116
+ private agents = new Map<string, AgentRecord>();
117
+ private cleanupInterval: ReturnType<typeof setInterval>;
118
+ private onComplete?: OnAgentComplete;
119
+ private onStart?: OnAgentStart;
120
+ private onCompact?: OnAgentCompact;
121
+ private maxConcurrent: number;
122
+ /** Base repos worktrees were created from — so dispose() can prune them all,
123
+ * not just the parent repo (caller-supplied cwd can target other repos). */
124
+ /** Queue of background agents waiting to start. */
125
+ private queue: { id: string; args: SpawnArgs }[] = [];
126
+ /** Number of currently running background agents. */
127
+ private runningBackground = 0;
128
+
129
+ constructor(
130
+ onComplete?: OnAgentComplete,
131
+ maxConcurrent = DEFAULT_MAX_CONCURRENT,
132
+ onStart?: OnAgentStart,
133
+ onCompact?: OnAgentCompact,
134
+ ) {
135
+ this.onComplete = onComplete;
136
+ this.onStart = onStart;
137
+ this.onCompact = onCompact;
138
+ this.maxConcurrent = maxConcurrent;
139
+ // Cleanup completed agents after 10 minutes (but keep sessions for resume)
140
+ this.cleanupInterval = setInterval(() => this.cleanup(), 60_000);
141
+ this.cleanupInterval.unref();
142
+ }
143
+
144
+ /** Update the max concurrent background agents limit. */
145
+ setMaxConcurrent(n: number) {
146
+ this.maxConcurrent = Math.max(1, n);
147
+ // Start queued agents if the new limit allows
148
+ this.drainQueue();
149
+ }
150
+
151
+ getMaxConcurrent(): number {
152
+ return this.maxConcurrent;
153
+ }
154
+
155
+ /**
156
+ * Spawn an agent and return its ID immediately (for background use).
157
+ * If the concurrency limit is reached, the agent is queued.
158
+ */
159
+ spawn(
160
+ pi: ExtensionAPI,
161
+ ctx: ExtensionContext,
162
+ type: SubagentType,
163
+ prompt: string,
164
+ options: SpawnOptions,
165
+ ): string {
166
+ // Validate before the queue branch — a queued spawn should fail at the
167
+ // call, not minutes later at drain. Throw (not warn): programmatic callers
168
+ // can fix and retry; the RPC layer converts throws into error envelopes.
169
+ assertValidSpawnCwd(options.cwd);
170
+
171
+ const id = randomUUID().slice(0, 17);
172
+ const abortController = new AbortController();
173
+ const record: AgentRecord = {
174
+ id,
175
+ type,
176
+ description: options.description,
177
+ status: options.isBackground ? "queued" : "running",
178
+ toolUses: 0,
179
+ startedAt: Date.now(),
180
+ abortController,
181
+ lifetimeUsage: { input: 0, output: 0, cacheWrite: 0 },
182
+ compactionCount: 0,
183
+ invocation: options.invocation,
184
+ };
185
+ this.agents.set(id, record);
186
+
187
+ const args: SpawnArgs = { pi, ctx, type, prompt, options };
188
+
189
+ if (
190
+ options.isBackground &&
191
+ !options.bypassQueue &&
192
+ this.runningBackground >= this.maxConcurrent
193
+ ) {
194
+ // Queue it — will be started when a running agent completes
195
+ this.queue.push({ id, args });
196
+ return id;
197
+ }
198
+
199
+ // startAgent can throw (e.g. strict worktree-isolation failure) — clean
200
+ // up the record so callers don't see an orphan in `listAgents()`.
201
+ try {
202
+ this.startAgent(id, record, args);
203
+ } catch (err) {
204
+ this.agents.delete(id);
205
+ throw err;
206
+ }
207
+ return id;
208
+ }
209
+
210
+ /** Actually start an agent (called immediately or from queue drain). */
211
+ private startAgent(
212
+ id: string,
213
+ record: AgentRecord,
214
+ { pi, ctx, type, prompt, options }: SpawnArgs,
215
+ ) {
216
+ // Re-validate a caller-supplied cwd: queued spawns can start minutes after
217
+ // spawn()'s check, and the directory may be gone by then (TOCTOU). Same
218
+ // curated errors; drainQueue parks a throw on the record as an error.
219
+ assertValidSpawnCwd(options.cwd);
220
+ const customCwd = options.cwd ?? undefined;
221
+
222
+ record.status = "running";
223
+ record.startedAt = Date.now();
224
+ if (options.isBackground) this.runningBackground++;
225
+ this.onStart?.(record);
226
+
227
+ // Wire parent abort signal to stop the subagent when the parent is interrupted
228
+ let detachParentSignal: (() => void) | undefined;
229
+ if (options.signal) {
230
+ const onParentAbort = () => this.abort(id);
231
+ options.signal.addEventListener("abort", onParentAbort, { once: true });
232
+ detachParentSignal = () =>
233
+ options.signal!.removeEventListener("abort", onParentAbort);
234
+ }
235
+ const detach = () => {
236
+ detachParentSignal?.();
237
+ detachParentSignal = undefined;
238
+ };
239
+
240
+ const promise = runAgent(ctx, type, prompt, {
241
+ pi,
242
+ agentId: id,
243
+ model: options.model,
244
+ maxTurns: options.maxTurns,
245
+ isolated: options.isolated,
246
+ inheritContext: options.inheritContext,
247
+ thinkingLevel: options.thinkingLevel,
248
+ // Worktree wins for the working dir (the agent must run in the copy —
249
+ // which, with a custom cwd, was created from that target). Config stays
250
+ // with the parent project when a caller-supplied cwd is in play; it must
251
+ // stay undefined otherwise so plain worktree runs keep resolving config
252
+ // (incl. relative extension paths and memory) inside the worktree copy.
253
+ cwd: customCwd,
254
+ configCwd: customCwd !== undefined ? ctx.cwd : undefined,
255
+ allowedToolNames: options.allowedToolNames,
256
+ signal: record.abortController!.signal,
257
+ onToolActivity: (activity) => {
258
+ if (activity.type === "end") record.toolUses++;
259
+ options.onToolActivity?.(activity);
260
+ },
261
+ onTurnEnd: options.onTurnEnd,
262
+ onTextDelta: options.onTextDelta,
263
+ onAssistantUsage: (usage) => {
264
+ addUsage(record.lifetimeUsage, usage);
265
+ options.onAssistantUsage?.(usage);
266
+ },
267
+ onCompaction: (info) => {
268
+ record.compactionCount++;
269
+ this.onCompact?.(record, info);
270
+ options.onCompaction?.(info);
271
+ },
272
+ onSessionCreated: (session) => {
273
+ record.session = session;
274
+ // Flush any steers that arrived before the session was ready
275
+ if (record.pendingSteers?.length) {
276
+ for (const msg of record.pendingSteers) {
277
+ session.steer(msg).catch(() => {});
278
+ }
279
+ record.pendingSteers = undefined;
280
+ }
281
+ options.onSessionCreated?.(session);
282
+ },
283
+ })
284
+ .then(({ responseText, session, aborted, steered }) => {
285
+ // Don't overwrite status if externally stopped via abort()
286
+ if (record.status !== "stopped") {
287
+ record.status = aborted
288
+ ? "aborted"
289
+ : steered
290
+ ? "steered"
291
+ : "completed";
292
+ }
293
+ record.result = responseText;
294
+ record.session = session;
295
+ record.completedAt ??= Date.now();
296
+
297
+ detach();
298
+
299
+ if (options.isBackground) {
300
+ this.runningBackground--;
301
+ try {
302
+ this.onComplete?.(record);
303
+ } catch {
304
+ /* ignore completion side-effect errors */
305
+ }
306
+ this.drainQueue();
307
+ }
308
+ return responseText;
309
+ })
310
+ .catch((err) => {
311
+ // Don't overwrite status if externally stopped via abort()
312
+ if (record.status !== "stopped") {
313
+ record.status = "error";
314
+ }
315
+ record.error = err instanceof Error ? err.message : String(err);
316
+ record.completedAt ??= Date.now();
317
+
318
+ detach();
319
+
320
+ if (options.isBackground) {
321
+ this.runningBackground--;
322
+ this.onComplete?.(record);
323
+ this.drainQueue();
324
+ }
325
+ return "";
326
+ });
327
+
328
+ record.promise = promise;
329
+ }
330
+
331
+ /** Start queued agents up to the concurrency limit. */
332
+ private drainQueue() {
333
+ while (
334
+ this.queue.length > 0 &&
335
+ this.runningBackground < this.maxConcurrent
336
+ ) {
337
+ const next = this.queue.shift()!;
338
+ const record = this.agents.get(next.id);
339
+ if (record?.status !== "queued") continue;
340
+ try {
341
+ this.startAgent(next.id, record, next.args);
342
+ } catch (err) {
343
+ // Late failure (e.g. strict worktree-isolation) — surface on the record
344
+ // so the user/agent can see it via /agents, then keep draining.
345
+ record.status = "error";
346
+ record.error = err instanceof Error ? err.message : String(err);
347
+ record.completedAt = Date.now();
348
+ this.onComplete?.(record);
349
+ }
350
+ }
351
+ }
352
+
353
+ /**
354
+ * Spawn an agent and wait for completion (foreground use).
355
+ * Foreground agents bypass the concurrency queue.
356
+ */
357
+ async spawnAndWait(
358
+ pi: ExtensionAPI,
359
+ ctx: ExtensionContext,
360
+ type: SubagentType,
361
+ prompt: string,
362
+ options: Omit<SpawnOptions, "isBackground">,
363
+ ): Promise<AgentRecord> {
364
+ const id = this.spawn(pi, ctx, type, prompt, {
365
+ ...options,
366
+ isBackground: false,
367
+ });
368
+ const record = this.agents.get(id)!;
369
+ await record.promise;
370
+ return record;
371
+ }
372
+
373
+ /**
374
+ * Resume an existing agent session with a new prompt.
375
+ */
376
+ async resume(
377
+ id: string,
378
+ prompt: string,
379
+ signal?: AbortSignal,
380
+ ): Promise<AgentRecord | undefined> {
381
+ const record = this.agents.get(id);
382
+ if (!record?.session) return undefined;
383
+
384
+ record.status = "running";
385
+ record.startedAt = Date.now();
386
+ record.completedAt = undefined;
387
+ record.result = undefined;
388
+ record.error = undefined;
389
+
390
+ try {
391
+ const responseText = await resumeAgent(record.session, prompt, {
392
+ onToolActivity: (activity) => {
393
+ if (activity.type === "end") record.toolUses++;
394
+ },
395
+ onAssistantUsage: (usage) => {
396
+ addUsage(record.lifetimeUsage, usage);
397
+ },
398
+ onCompaction: (info) => {
399
+ record.compactionCount++;
400
+ this.onCompact?.(record, info);
401
+ },
402
+ signal,
403
+ });
404
+ record.status = "completed";
405
+ record.result = responseText;
406
+ record.completedAt = Date.now();
407
+ } catch (err) {
408
+ record.status = "error";
409
+ record.error = err instanceof Error ? err.message : String(err);
410
+ record.completedAt = Date.now();
411
+ }
412
+
413
+ return record;
414
+ }
415
+
416
+ getRecord(id: string): AgentRecord | undefined {
417
+ return this.agents.get(id);
418
+ }
419
+
420
+ listAgents(): AgentRecord[] {
421
+ return [...this.agents.values()].sort((a, b) => b.startedAt - a.startedAt);
422
+ }
423
+
424
+ abort(id: string): boolean {
425
+ const record = this.agents.get(id);
426
+ if (!record) return false;
427
+
428
+ // Remove from queue if queued
429
+ if (record.status === "queued") {
430
+ this.queue = this.queue.filter((q) => q.id !== id);
431
+ record.status = "stopped";
432
+ record.completedAt = Date.now();
433
+ return true;
434
+ }
435
+
436
+ if (record.status !== "running") return false;
437
+ record.abortController?.abort();
438
+ record.status = "stopped";
439
+ record.completedAt = Date.now();
440
+ return true;
441
+ }
442
+
443
+ /** Dispose a record's session and remove it from the map. */
444
+ private removeRecord(id: string, record: AgentRecord): void {
445
+ record.session?.dispose?.();
446
+ record.session = undefined;
447
+ this.agents.delete(id);
448
+ }
449
+
450
+ private cleanup() {
451
+ const cutoff = Date.now() - 10 * 60_000;
452
+ for (const [id, record] of this.agents) {
453
+ if (record.status === "running" || record.status === "queued") continue;
454
+ if ((record.completedAt ?? 0) >= cutoff) continue;
455
+ this.removeRecord(id, record);
456
+ }
457
+ }
458
+
459
+ /**
460
+ * Remove all completed/stopped/errored records immediately.
461
+ * Called on session start/switch so tasks from a prior session don't persist.
462
+ */
463
+ clearCompleted(): void {
464
+ for (const [id, record] of this.agents) {
465
+ if (record.status === "running" || record.status === "queued") continue;
466
+ this.removeRecord(id, record);
467
+ }
468
+ }
469
+
470
+ /** Whether any agents are still running or queued. */
471
+ hasRunning(): boolean {
472
+ return [...this.agents.values()].some(
473
+ (r) => r.status === "running" || r.status === "queued",
474
+ );
475
+ }
476
+
477
+ /** Abort all running and queued agents immediately. */
478
+ abortAll(): number {
479
+ let count = 0;
480
+ // Clear queued agents first
481
+ for (const queued of this.queue) {
482
+ const record = this.agents.get(queued.id);
483
+ if (record) {
484
+ record.status = "stopped";
485
+ record.completedAt = Date.now();
486
+ count++;
487
+ }
488
+ }
489
+ this.queue = [];
490
+ // Abort running agents
491
+ for (const record of this.agents.values()) {
492
+ if (record.status === "running") {
493
+ record.abortController?.abort();
494
+ record.status = "stopped";
495
+ record.completedAt = Date.now();
496
+ count++;
497
+ }
498
+ }
499
+ return count;
500
+ }
501
+
502
+ /** Wait for all running and queued agents to complete (including queued ones). */
503
+ async waitForAll(): Promise<void> {
504
+ // Loop because drainQueue respects the concurrency limit — as running
505
+ // agents finish they start queued ones, which need awaiting too.
506
+ while (true) {
507
+ this.drainQueue();
508
+ const pending = [...this.agents.values()]
509
+ .filter((r) => r.status === "running" || r.status === "queued")
510
+ .map((r) => r.promise)
511
+ .filter(Boolean);
512
+ if (pending.length === 0) break;
513
+ await Promise.allSettled(pending);
514
+ }
515
+ }
516
+
517
+ dispose() {
518
+ clearInterval(this.cleanupInterval);
519
+ // Clear queue
520
+ this.queue = [];
521
+ for (const record of this.agents.values()) {
522
+ record.session?.dispose();
523
+ }
524
+ this.agents.clear();
525
+ }
526
+ }