pi-crew 0.9.8 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/extension/register.ts +94 -21
- package/src/extension/registration/subagent-helpers.ts +1 -0
- package/src/extension/registration/subagent-tools.ts +9 -0
- package/src/runtime/batch-barrier.ts +145 -0
- package/src/runtime/child-pi.ts +15 -2
- package/src/runtime/crash-classification.ts +208 -0
- package/src/runtime/custom-tools/irc-tool.ts +47 -7
- package/src/runtime/live-agent-manager.ts +185 -0
- package/src/runtime/process-lifecycle.ts +481 -0
- package/src/runtime/subagent-manager.ts +6 -0
- package/src/runtime/task-output-context.ts +52 -1
- package/src/runtime/tool-output-pruner.ts +334 -0
- package/src/state/types.ts +5 -0
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Owned-process lifecycle abstraction (P0 item #3).
|
|
3
|
+
*
|
|
4
|
+
* Distilled and adapted from gajae-code's `runtime/process-lifecycle.ts`.
|
|
5
|
+
*
|
|
6
|
+
* Two complementary primitives:
|
|
7
|
+
*
|
|
8
|
+
* F1(a) {@link spawnOwnedProcess} / {@link OwnedProcess} — wraps a
|
|
9
|
+
* `child_process.spawn` child with explicit ownership: escalating
|
|
10
|
+
* (SIGTERM → grace → SIGKILL) teardown, idempotent `dispose()`, bounded
|
|
11
|
+
* `awaitExit()`, abort-signal wiring, and postmortem registration so
|
|
12
|
+
* an owned child can never outlive its owner.
|
|
13
|
+
*
|
|
14
|
+
* F1(b) {@link registerResourceOwner} — a generic postmortem registry for
|
|
15
|
+
* NON-process resources (timers, sockets, Workers, VM contexts) with
|
|
16
|
+
* `disposeAllOwners()` / `disposeOwner(name)`.
|
|
17
|
+
*
|
|
18
|
+
* ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
19
|
+
* ║ IMPORTANT — INCREMENTAL, NOT FULL MIGRATION ║
|
|
20
|
+
* ║ pi-crew ALREADY has sophisticated kill logic in child-pi.ts ║
|
|
21
|
+
* ║ (killProcessTree, escalating SIGTERM→grace→SIGKILL, hard-kill timer, ║
|
|
22
|
+
* ║ post-exit stdio guard) and async-runner.ts does detached/setsid spawns. ║
|
|
23
|
+
* ║ Those paths are NOT rewritten here. This module provides a clean ║
|
|
24
|
+
* ║ ownership primitive for NEW code paths that need guaranteed teardown ║
|
|
25
|
+
* ║ without re-implementing the escalation dance each time. ║
|
|
26
|
+
* ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
27
|
+
*
|
|
28
|
+
* Cross-platform: on Windows there is no SIGTERM; teardown uses
|
|
29
|
+
* `taskkill /F /T /PID` escalation directly (force-kill the whole tree).
|
|
30
|
+
* See `.crew/knowledge.md` gotchas: BSD/Windows signal handling differs.
|
|
31
|
+
*/
|
|
32
|
+
import { spawn, type ChildProcess, type SpawnOptions } from "node:child_process";
|
|
33
|
+
import { logInternalError } from "../utils/internal-error.ts";
|
|
34
|
+
|
|
35
|
+
// ── tunables ──────────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
const DEFAULT_GRACEFUL_MS = 2_000;
|
|
38
|
+
/** Hard cap on how long dispose() waits after SIGKILL before giving up, so a
|
|
39
|
+
* wedged/unkillable child can never block shutdown forever. */
|
|
40
|
+
const SIGKILL_REAP_CAP_MS = 2_000;
|
|
41
|
+
/** After the root child exits on its own, how long to wait for the process
|
|
42
|
+
* group to drain before deregistering. Clean servers drain immediately. */
|
|
43
|
+
const ROOT_EXIT_DRAIN_MS = 250;
|
|
44
|
+
|
|
45
|
+
const isPosix = process.platform !== "win32";
|
|
46
|
+
|
|
47
|
+
const delay = (ms: number): Promise<void> =>
|
|
48
|
+
new Promise((resolve) => {
|
|
49
|
+
setTimeout(resolve, Math.max(0, ms));
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
/** Poll `predicate` until true or `timeoutMs` elapses. Returns the final value. */
|
|
53
|
+
async function pollUntil(predicate: () => boolean, timeoutMs: number, intervalMs = 20): Promise<boolean> {
|
|
54
|
+
if (predicate()) return true;
|
|
55
|
+
const deadline = Date.now() + Math.max(0, timeoutMs);
|
|
56
|
+
while (Date.now() < deadline) {
|
|
57
|
+
await delay(Math.min(intervalMs, Math.max(0, deadline - Date.now())));
|
|
58
|
+
if (predicate()) return true;
|
|
59
|
+
}
|
|
60
|
+
return predicate();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Whether a POSIX process group still has any member (zombies count as alive). */
|
|
64
|
+
function groupAlive(pgid: number): boolean {
|
|
65
|
+
try {
|
|
66
|
+
process.kill(-pgid, 0);
|
|
67
|
+
return true;
|
|
68
|
+
} catch (err) {
|
|
69
|
+
// EPERM => the group exists but we cannot signal it; treat as alive.
|
|
70
|
+
return (err as NodeJS.ErrnoException).code === "EPERM";
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ── F1(a) OwnedProcess ────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
/** Options for {@link spawnOwnedProcess}. */
|
|
77
|
+
export interface SpawnOwnedOptions {
|
|
78
|
+
cwd?: string;
|
|
79
|
+
env?: Record<string, string | undefined>;
|
|
80
|
+
/** stdin mode passed through to the child. Defaults to `"ignore"`. */
|
|
81
|
+
stdin?: "pipe" | "ignore";
|
|
82
|
+
/** When aborted, the owned process tree is disposed (escalating kill). */
|
|
83
|
+
signal?: AbortSignal;
|
|
84
|
+
/** Grace period (ms) between SIGTERM and SIGKILL on dispose. Default 2000. */
|
|
85
|
+
gracefulMs?: number;
|
|
86
|
+
/**
|
|
87
|
+
* Spawn the child as its own process-group leader so the whole descendant
|
|
88
|
+
* tree can be signalled on dispose. Defaults to `true` on POSIX. Has no
|
|
89
|
+
* effect on Windows, where teardown falls back to single-process kill.
|
|
90
|
+
*/
|
|
91
|
+
processGroup?: boolean;
|
|
92
|
+
/** Label used in diagnostics. */
|
|
93
|
+
name?: string;
|
|
94
|
+
/** Extra SpawnOptions merged in (e.g. windowsHide). */
|
|
95
|
+
extraOptions?: SpawnOptions;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** Result of a bounded {@link OwnedProcess.awaitExit}. */
|
|
99
|
+
export interface AwaitExitResult {
|
|
100
|
+
/** `true` when the process has exited; `false` when the timeout fired first. */
|
|
101
|
+
exited: boolean;
|
|
102
|
+
/** Exit code if known, else `null`. */
|
|
103
|
+
code: number | null;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/** Exit callback signature for {@link OwnedProcess.onExit}. */
|
|
107
|
+
export type OwnedExitCallback = (code: number | null, signal: NodeJS.Signals | null) => void;
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* A spawned child process owned by the runtime with guaranteed teardown.
|
|
111
|
+
*
|
|
112
|
+
* Implemented as a class so callers retain a strong handle and so `dispose()`
|
|
113
|
+
* can be idempotent (concurrent/repeated calls return the same in-flight
|
|
114
|
+
* promise). Never throws from `dispose()` / `awaitExit()`.
|
|
115
|
+
*/
|
|
116
|
+
export class OwnedProcess {
|
|
117
|
+
readonly child: ChildProcess;
|
|
118
|
+
readonly pid: number | undefined;
|
|
119
|
+
/** Process-group id (POSIX detached only); `undefined` on Windows / opt-out. */
|
|
120
|
+
readonly pgid: number | undefined;
|
|
121
|
+
private readonly gracefulMs: number;
|
|
122
|
+
private readonly name: string | undefined;
|
|
123
|
+
private disposed = false;
|
|
124
|
+
private disposePromise: Promise<void> | undefined;
|
|
125
|
+
private deregistered = false;
|
|
126
|
+
/** Terminal once teardown/reconciliation has confirmed the group is gone. */
|
|
127
|
+
private terminated = false;
|
|
128
|
+
private exitPromise: Promise<{ code: number | null; signal: NodeJS.Signals | null }>;
|
|
129
|
+
private exitCallbacks = new Set<OwnedExitCallback>();
|
|
130
|
+
private onAbort: (() => void) | undefined;
|
|
131
|
+
private readonly abortSignal: AbortSignal | undefined;
|
|
132
|
+
|
|
133
|
+
constructor(child: ChildProcess, opts: SpawnOwnedOptions, registerSelf: (owner: OwnedProcess) => () => void) {
|
|
134
|
+
this.child = child;
|
|
135
|
+
this.pid = child.pid;
|
|
136
|
+
this.gracefulMs = opts.gracefulMs ?? DEFAULT_GRACEFUL_MS;
|
|
137
|
+
this.name = opts.name;
|
|
138
|
+
this.abortSignal = opts.signal;
|
|
139
|
+
|
|
140
|
+
const useGroup = (opts.processGroup ?? true) && isPosix;
|
|
141
|
+
// On POSIX with `detached`, the child is its own process-group leader,
|
|
142
|
+
// so the group id equals its pid.
|
|
143
|
+
this.pgid = useGroup ? child.pid : undefined;
|
|
144
|
+
|
|
145
|
+
this.exitPromise = new Promise((resolve) => {
|
|
146
|
+
child.once("exit", (code, signal) => {
|
|
147
|
+
resolve({ code: code, signal: signal });
|
|
148
|
+
for (const cb of this.exitCallbacks) {
|
|
149
|
+
try {
|
|
150
|
+
cb(code, signal);
|
|
151
|
+
} catch (err) {
|
|
152
|
+
logInternalError("owned-process.onExit-callback", err, this.name ? `name=${this.name}` : undefined);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// Register for postmortem cleanup and wire abort.
|
|
159
|
+
const deregister = registerSelf(this);
|
|
160
|
+
this.deregisterFn = deregister;
|
|
161
|
+
|
|
162
|
+
// When the root exits on its own (not via dispose), reconcile ownership
|
|
163
|
+
// by the *group*: after a short drain window, deregister if the group is
|
|
164
|
+
// empty, otherwise reap the owned group (no child outlives its owner).
|
|
165
|
+
void this.exitPromise
|
|
166
|
+
.then(() => {
|
|
167
|
+
if (this.disposed) return; // dispose() owns deregistration
|
|
168
|
+
if (this.pgid === undefined) {
|
|
169
|
+
this.deregisterInternal();
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
void (async () => {
|
|
173
|
+
const drained = await pollUntil(() => !groupAlive(this.pgid!), ROOT_EXIT_DRAIN_MS);
|
|
174
|
+
if (this.disposed) return;
|
|
175
|
+
if (drained) {
|
|
176
|
+
this.deregisterInternal();
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
// Root exited but the owned group still has descendants: reap them.
|
|
180
|
+
await this.dispose();
|
|
181
|
+
})();
|
|
182
|
+
})
|
|
183
|
+
.catch(() => undefined);
|
|
184
|
+
|
|
185
|
+
if (this.abortSignal) {
|
|
186
|
+
if (this.abortSignal.aborted) {
|
|
187
|
+
void this.dispose();
|
|
188
|
+
} else {
|
|
189
|
+
this.onAbort = () => void this.dispose();
|
|
190
|
+
this.abortSignal.addEventListener("abort", this.onAbort, { once: true });
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
private deregisterFn: () => void = () => {};
|
|
196
|
+
|
|
197
|
+
/** `true` once `dispose()` has started. */
|
|
198
|
+
get isDisposed(): boolean {
|
|
199
|
+
return this.disposed;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Register a callback invoked exactly once when the root child exits.
|
|
204
|
+
* If the child has already exited, the callback is invoked synchronously
|
|
205
|
+
* with the cached exit info. Returns an unsubscribe function.
|
|
206
|
+
*/
|
|
207
|
+
onExit(callback: OwnedExitCallback): () => void {
|
|
208
|
+
this.exitCallbacks.add(callback);
|
|
209
|
+
// If already exited, the exitPromise is resolved; fire immediately.
|
|
210
|
+
// We race to detect resolution without awaiting.
|
|
211
|
+
let settled = false;
|
|
212
|
+
this.exitPromise.then((info) => {
|
|
213
|
+
if (settled) return; // callback may have been removed
|
|
214
|
+
if (this.exitCallbacks.has(callback)) {
|
|
215
|
+
try {
|
|
216
|
+
callback(info.code, info.signal);
|
|
217
|
+
} catch (err) {
|
|
218
|
+
logInternalError("owned-process.onExit-immediate", err, this.name ? `name=${this.name}` : undefined);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
return () => {
|
|
223
|
+
settled = true;
|
|
224
|
+
this.exitCallbacks.delete(callback);
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Wait for the root child to exit, optionally bounded by `timeoutMs`.
|
|
230
|
+
* With no timeout it resolves only when the child exits. Never rejects.
|
|
231
|
+
*/
|
|
232
|
+
async awaitExit(opts?: { timeoutMs?: number }): Promise<AwaitExitResult> {
|
|
233
|
+
const exitResult = this.exitPromise.then((info) => ({ exited: true as const, code: info.code }));
|
|
234
|
+
if (opts?.timeoutMs === undefined) return exitResult;
|
|
235
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
236
|
+
const timeout = new Promise<AwaitExitResult>((resolve) => {
|
|
237
|
+
timer = setTimeout(
|
|
238
|
+
() => resolve({ exited: false, code: this.child.exitCode }),
|
|
239
|
+
Math.max(0, opts.timeoutMs!),
|
|
240
|
+
);
|
|
241
|
+
});
|
|
242
|
+
try {
|
|
243
|
+
return await Promise.race([exitResult, timeout]);
|
|
244
|
+
} finally {
|
|
245
|
+
if (timer) clearTimeout(timer);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/** Signal the process tree with `signal` (group-aware on POSIX). */
|
|
250
|
+
private signalTree(signal: NodeJS.Signals): void {
|
|
251
|
+
const pid = this.child.pid;
|
|
252
|
+
if (pid === undefined) return;
|
|
253
|
+
if (this.pgid !== undefined) {
|
|
254
|
+
try {
|
|
255
|
+
process.kill(-this.pgid, signal);
|
|
256
|
+
return;
|
|
257
|
+
} catch {
|
|
258
|
+
/* group already gone */
|
|
259
|
+
}
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
try {
|
|
263
|
+
this.child.kill(signal);
|
|
264
|
+
} catch {
|
|
265
|
+
/* already gone */
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
private deregisterInternal(): void {
|
|
270
|
+
if (this.deregistered) return;
|
|
271
|
+
this.deregistered = true;
|
|
272
|
+
this.terminated = true;
|
|
273
|
+
this.deregisterFn();
|
|
274
|
+
if (this.onAbort && this.abortSignal) {
|
|
275
|
+
this.abortSignal.removeEventListener("abort", this.onAbort);
|
|
276
|
+
this.onAbort = undefined;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Idempotently terminate the owned process *group*: SIGTERM the group, wait
|
|
282
|
+
* `gracefulMs`, then SIGKILL, polling liveness throughout. On Windows,
|
|
283
|
+
* escalate directly to taskkill /F /T /PID. Removes the abort listener and
|
|
284
|
+
* deregisters from the live-owner set only after teardown has completed.
|
|
285
|
+
* Repeated/concurrent calls return the same in-flight promise. Never throws.
|
|
286
|
+
*/
|
|
287
|
+
dispose(): Promise<void> {
|
|
288
|
+
// Already terminal: never re-probe a recycled pgid.
|
|
289
|
+
if (this.terminated) {
|
|
290
|
+
this.disposed = true;
|
|
291
|
+
if (!this.disposePromise) this.disposePromise = Promise.resolve();
|
|
292
|
+
return this.disposePromise;
|
|
293
|
+
}
|
|
294
|
+
if (this.disposePromise) return this.disposePromise;
|
|
295
|
+
this.disposed = true;
|
|
296
|
+
if (this.onAbort && this.abortSignal) {
|
|
297
|
+
this.abortSignal.removeEventListener("abort", this.onAbort);
|
|
298
|
+
this.onAbort = undefined;
|
|
299
|
+
}
|
|
300
|
+
this.disposePromise = (async () => {
|
|
301
|
+
try {
|
|
302
|
+
if (!isPosix) {
|
|
303
|
+
await this.disposeWindows();
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
if (this.pgid !== undefined) {
|
|
307
|
+
// Group ownership: reap until the whole group is gone, even if
|
|
308
|
+
// the root has already exited (it may have backgrounded children).
|
|
309
|
+
if (!groupAlive(this.pgid)) return;
|
|
310
|
+
this.signalTree("SIGTERM");
|
|
311
|
+
if (await pollUntil(() => !groupAlive(this.pgid!), this.gracefulMs)) return;
|
|
312
|
+
this.signalTree("SIGKILL");
|
|
313
|
+
if (!(await pollUntil(() => !groupAlive(this.pgid!), SIGKILL_REAP_CAP_MS))) {
|
|
314
|
+
console.warn(`[pi-crew] owned process group still alive after SIGKILL (name=${this.name ?? "?"}, pgid=${this.pgid})`);
|
|
315
|
+
}
|
|
316
|
+
return;
|
|
317
|
+
}
|
|
318
|
+
// Single-process fallback (processGroup:false).
|
|
319
|
+
if (this.child.exitCode !== null) return;
|
|
320
|
+
this.signalTree("SIGTERM");
|
|
321
|
+
if ((await this.awaitExit({ timeoutMs: this.gracefulMs })).exited) return;
|
|
322
|
+
this.signalTree("SIGKILL");
|
|
323
|
+
await this.awaitExit({ timeoutMs: SIGKILL_REAP_CAP_MS });
|
|
324
|
+
} catch (err) {
|
|
325
|
+
logInternalError("owned-process.dispose", err, this.name ? `name=${this.name}` : undefined);
|
|
326
|
+
} finally {
|
|
327
|
+
// Deregister only after teardown completes so a postmortem firing
|
|
328
|
+
// mid-grace still sees the owner.
|
|
329
|
+
this.deregisterInternal();
|
|
330
|
+
}
|
|
331
|
+
})();
|
|
332
|
+
return this.disposePromise;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/** Windows teardown: no SIGTERM; escalate to taskkill /F /T /PID. */
|
|
336
|
+
private async disposeWindows(): Promise<void> {
|
|
337
|
+
const pid = this.child.pid;
|
|
338
|
+
if (pid === undefined) return;
|
|
339
|
+
if (this.child.exitCode !== null) return;
|
|
340
|
+
// First try a graceful taskkill (no /F), then escalate to /F /T.
|
|
341
|
+
const tryTaskkill = (force: boolean): Promise<void> =>
|
|
342
|
+
new Promise((resolve) => {
|
|
343
|
+
const args = ["/T", "/PID", String(pid), ...(force ? ["/F"] : [])];
|
|
344
|
+
const tk = spawn("taskkill", args, { stdio: "ignore", windowsHide: true });
|
|
345
|
+
tk.on("error", () => resolve());
|
|
346
|
+
tk.on("exit", () => resolve());
|
|
347
|
+
});
|
|
348
|
+
await tryTaskkill(false);
|
|
349
|
+
if ((await this.awaitExit({ timeoutMs: this.gracefulMs })).exited) return;
|
|
350
|
+
await tryTaskkill(true);
|
|
351
|
+
await this.awaitExit({ timeoutMs: SIGKILL_REAP_CAP_MS });
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// ── live-owner set + postmortem ───────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
const liveOwners = new Set<OwnedProcess>();
|
|
358
|
+
let ownedPostmortemRegistered = false;
|
|
359
|
+
|
|
360
|
+
function ensureOwnedPostmortem(): void {
|
|
361
|
+
if (ownedPostmortemRegistered) return;
|
|
362
|
+
ownedPostmortemRegistered = true;
|
|
363
|
+
// Register a process-exit handler that disposes every live owned process.
|
|
364
|
+
// We wire both beforeExit (event-loop empty) and exit (synchronous final).
|
|
365
|
+
const drain = async (): Promise<void> => {
|
|
366
|
+
await Promise.all([...liveOwners].map((owner) => owner.dispose().catch(() => undefined)));
|
|
367
|
+
};
|
|
368
|
+
process.once("beforeExit", () => {
|
|
369
|
+
void drain().catch(() => undefined);
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Spawn a child process owned by the runtime. The returned {@link OwnedProcess}
|
|
375
|
+
* is registered for postmortem cleanup and tears down its whole process group
|
|
376
|
+
* on dispose/abort.
|
|
377
|
+
*
|
|
378
|
+
* NOTE: this is for NEW ownership-scoped spawns. Do NOT use it to replace
|
|
379
|
+
* child-pi.ts's runChildPi (which has its own battle-tested escalation logic)
|
|
380
|
+
* or async-runner.ts's intentionally-detached background spawns.
|
|
381
|
+
*/
|
|
382
|
+
export function spawnOwnedProcess(command: string, args: readonly string[] = [], opts: SpawnOwnedOptions = {}): OwnedProcess {
|
|
383
|
+
ensureOwnedPostmortem();
|
|
384
|
+
const useGroup = (opts.processGroup ?? true) && isPosix;
|
|
385
|
+
const spawnOpts: SpawnOptions = {
|
|
386
|
+
cwd: opts.cwd,
|
|
387
|
+
env: opts.env as NodeJS.ProcessEnv | undefined,
|
|
388
|
+
stdio: [opts.stdin ?? "ignore", "pipe", "pipe"],
|
|
389
|
+
detached: useGroup,
|
|
390
|
+
windowsHide: true,
|
|
391
|
+
...opts.extraOptions,
|
|
392
|
+
};
|
|
393
|
+
const child = spawn(command, args as string[], spawnOpts);
|
|
394
|
+
const owner = new OwnedProcess(child, opts, (self) => {
|
|
395
|
+
liveOwners.add(self);
|
|
396
|
+
return () => {
|
|
397
|
+
liveOwners.delete(self);
|
|
398
|
+
};
|
|
399
|
+
});
|
|
400
|
+
return owner;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/** Number of currently live owned processes. Exposed for leak assertions/tests. */
|
|
404
|
+
export function liveOwnedProcessCount(): number {
|
|
405
|
+
return liveOwners.size;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/** Dispose every live owned process. For owner-scoped teardown and tests. */
|
|
409
|
+
export async function disposeAllOwnedProcesses(): Promise<void> {
|
|
410
|
+
await Promise.all([...liveOwners].map((owner) => owner.dispose().catch(() => undefined)));
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// ── F1(b) generic resource owners ─────────────────────────────────────────────
|
|
414
|
+
|
|
415
|
+
type ResourceDisposer = () => void | Promise<void>;
|
|
416
|
+
|
|
417
|
+
const resourceOwners = new Map<string, ResourceDisposer>();
|
|
418
|
+
let resourcePostmortemRegistered = false;
|
|
419
|
+
|
|
420
|
+
function ensureResourcePostmortem(): void {
|
|
421
|
+
if (resourcePostmortemRegistered) return;
|
|
422
|
+
resourcePostmortemRegistered = true;
|
|
423
|
+
process.once("beforeExit", () => {
|
|
424
|
+
void disposeAllOwners().catch(() => undefined);
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Register a non-process resource for postmortem/fatal-exit cleanup.
|
|
430
|
+
*
|
|
431
|
+
* Idempotent by `name`: re-registering the same name replaces the prior
|
|
432
|
+
* disposer (last wins). Returns an unregister function that removes the owner
|
|
433
|
+
* only while it is still the active registration for that name.
|
|
434
|
+
*/
|
|
435
|
+
export function registerResourceOwner(name: string, disposer: ResourceDisposer): () => void {
|
|
436
|
+
resourceOwners.set(name, disposer);
|
|
437
|
+
ensureResourcePostmortem();
|
|
438
|
+
let unregistered = false;
|
|
439
|
+
return () => {
|
|
440
|
+
if (unregistered) return;
|
|
441
|
+
unregistered = true;
|
|
442
|
+
if (resourceOwners.get(name) === disposer) {
|
|
443
|
+
resourceOwners.delete(name);
|
|
444
|
+
}
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/** Number of registered resource owners. Exposed for leak assertions/tests. */
|
|
449
|
+
export function resourceOwnerCount(): number {
|
|
450
|
+
return resourceOwners.size;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Run and clear every registered resource disposer. Attempts all disposers even
|
|
455
|
+
* if some throw, then surfaces the failures as an `AggregateError` so callers
|
|
456
|
+
* can distinguish "all closed" from "a resource may still be alive".
|
|
457
|
+
*/
|
|
458
|
+
export async function disposeAllOwners(): Promise<void> {
|
|
459
|
+
const disposers = [...resourceOwners.values()];
|
|
460
|
+
resourceOwners.clear();
|
|
461
|
+
const errors: unknown[] = [];
|
|
462
|
+
for (const disposer of disposers) {
|
|
463
|
+
try {
|
|
464
|
+
await disposer();
|
|
465
|
+
} catch (err) {
|
|
466
|
+
errors.push(err);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
if (errors.length > 0) {
|
|
470
|
+
throw new AggregateError(errors, `${errors.length} resource disposer(s) failed during teardown`);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/** Dispose a single named resource owner. Returns true if it was registered. */
|
|
475
|
+
export async function disposeOwner(name: string): Promise<boolean> {
|
|
476
|
+
const disposer = resourceOwners.get(name);
|
|
477
|
+
if (!disposer) return false;
|
|
478
|
+
resourceOwners.delete(name);
|
|
479
|
+
await disposer();
|
|
480
|
+
return true;
|
|
481
|
+
}
|
|
@@ -27,6 +27,9 @@ export interface SubagentSpawnOptions {
|
|
|
27
27
|
skill?: string | string[] | false;
|
|
28
28
|
maxTurns?: number;
|
|
29
29
|
ownerSessionGeneration?: number;
|
|
30
|
+
/** Optional batch grouping id (Rule 1). Agents sharing a batchId coalesce
|
|
31
|
+
* completion notifications into one. undefined => individual (default). */
|
|
32
|
+
batchId?: string;
|
|
30
33
|
}
|
|
31
34
|
|
|
32
35
|
export interface SubagentRecord {
|
|
@@ -45,6 +48,8 @@ export interface SubagentRecord {
|
|
|
45
48
|
skill?: string | string[] | false;
|
|
46
49
|
background: boolean;
|
|
47
50
|
ownerSessionGeneration?: number;
|
|
51
|
+
/** Batch grouping id (Rule 1). undefined => individual notification. */
|
|
52
|
+
batchId?: string;
|
|
48
53
|
stuckNotified?: boolean;
|
|
49
54
|
blockedAt?: number;
|
|
50
55
|
promise?: Promise<void>;
|
|
@@ -255,6 +260,7 @@ export class SubagentManager {
|
|
|
255
260
|
skill: options.skill,
|
|
256
261
|
background: options.background,
|
|
257
262
|
ownerSessionGeneration: options.ownerSessionGeneration,
|
|
263
|
+
batchId: options.batchId,
|
|
258
264
|
};
|
|
259
265
|
this.records.set(record.id, record);
|
|
260
266
|
this.cwdByRecord.set(record.id, options.cwd);
|
|
@@ -4,6 +4,7 @@ import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../stat
|
|
|
4
4
|
import { writeArtifact } from "../state/artifact-store.ts";
|
|
5
5
|
import { resolveRealContainedPath } from "../utils/safe-paths.ts";
|
|
6
6
|
import type { WorkflowStep } from "../workflows/workflow-config.ts";
|
|
7
|
+
import { pruneToolOutputs, type ToolResultEntry, type FileEditEvent, DEFAULT_PRUNE_CONFIG } from "./tool-output-pruner.ts";
|
|
7
8
|
|
|
8
9
|
export interface DependencyContextEntry {
|
|
9
10
|
taskId: string;
|
|
@@ -111,6 +112,51 @@ function aggregateUsage(task: TeamTaskState): DependencyContextEntry["usage"] {
|
|
|
111
112
|
return { inputTokens, outputTokens, durationMs };
|
|
112
113
|
}
|
|
113
114
|
|
|
115
|
+
/**
|
|
116
|
+
* Apply staleness-aware pruning to shared reads before they are injected
|
|
117
|
+
* into a downstream worker's prompt. Converts shared reads to generic
|
|
118
|
+
* {@link ToolResultEntry}s (toolName="read") and file edits from dependency
|
|
119
|
+
* artifacts, then delegates to {@link pruneToolOutputs}. Superseded reads
|
|
120
|
+
* (same base file re-read, or file edited by a later dependency) are replaced
|
|
121
|
+
* with compact digest notices, reducing context bloat.
|
|
122
|
+
*
|
|
123
|
+
* OPT-IN: the default prune config protects recent results and only fires
|
|
124
|
+
* when minimum-savings hysteresis is met, so small/unique reads pass through
|
|
125
|
+
* unchanged.
|
|
126
|
+
*/
|
|
127
|
+
function pruneSharedReads(
|
|
128
|
+
reads: Array<{ name: string; path: string; content: string }>,
|
|
129
|
+
dependencies: DependencyContextEntry[],
|
|
130
|
+
): Array<{ name: string; path: string; content: string }> {
|
|
131
|
+
if (reads.length === 0) return reads;
|
|
132
|
+
// Convert shared reads to tool result entries (ordered oldest → newest
|
|
133
|
+
// by position in the reads array — earlier entries are "older").
|
|
134
|
+
const entries: ToolResultEntry[] = reads.map((read, index) => ({
|
|
135
|
+
id: `shared-read-${index}`,
|
|
136
|
+
toolName: "read",
|
|
137
|
+
target: read.path,
|
|
138
|
+
content: read.content,
|
|
139
|
+
}));
|
|
140
|
+
// Collect file edit events from dependency artifacts produced to shared/.
|
|
141
|
+
// A dependency that wrote a shared file after an earlier read invalidates
|
|
142
|
+
// that read (the content is now stale relative to the latest version).
|
|
143
|
+
const sharedRoot = path.resolve("shared");
|
|
144
|
+
const fileEdits: FileEditEvent[] = [];
|
|
145
|
+
for (let depIndex = 0; depIndex < dependencies.length; depIndex++) {
|
|
146
|
+
const dep = dependencies[depIndex]!;
|
|
147
|
+
const produced = dep.artifactsProduced ?? [];
|
|
148
|
+
for (const artifact of produced) {
|
|
149
|
+
if (typeof artifact !== "string") continue;
|
|
150
|
+
// Map artifact path to shared-relative and check against read targets.
|
|
151
|
+
fileEdits.push({ target: path.resolve(sharedRoot, artifact), index: reads.length + depIndex });
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
const pruned = pruneToolOutputs(entries, DEFAULT_PRUNE_CONFIG);
|
|
155
|
+
if (pruned.prunedCount === 0) return reads;
|
|
156
|
+
// Map pruned entries back to the shared-read shape.
|
|
157
|
+
return pruned.results.map((entry, index) => ({ ...reads[index]!, content: entry.content }));
|
|
158
|
+
}
|
|
159
|
+
|
|
114
160
|
export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks: TeamTaskState[], task: TeamTaskState, step: WorkflowStep): DependencyOutputContext {
|
|
115
161
|
const byStep = new Map(tasks.map((item) => [item.stepId, item]).filter((entry): entry is [string, TeamTaskState] => Boolean(entry[0])));
|
|
116
162
|
const byId = new Map(tasks.map((item) => [item.id, item]));
|
|
@@ -127,10 +173,15 @@ export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks:
|
|
|
127
173
|
usage: aggregateUsage(item),
|
|
128
174
|
};
|
|
129
175
|
});
|
|
130
|
-
const
|
|
176
|
+
const rawSharedReads = (step.reads === false ? [] : step.reads ?? []).map((name) => {
|
|
131
177
|
const filePath = sharedPath(manifest, name);
|
|
132
178
|
return { name, path: filePath, content: readIfSmall(filePath, path.resolve(manifest.artifactsRoot, "shared")) ?? "" };
|
|
133
179
|
}).filter((item) => item.content.trim().length > 0);
|
|
180
|
+
// Apply staleness-aware pruning to shared reads: drops superseded reads
|
|
181
|
+
// (same file re-read with different selectors) and replaces stale large
|
|
182
|
+
// outputs with compact digest notices before injecting into the worker
|
|
183
|
+
// prompt. OPT-IN: default config protects recent results.
|
|
184
|
+
const sharedReads = pruneSharedReads(rawSharedReads, dependencies);
|
|
134
185
|
return { dependencies, sharedReads };
|
|
135
186
|
}
|
|
136
187
|
|