pi-crew 0.9.7 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +95 -0
- package/README.md +9 -2
- package/package.json +3 -2
- package/src/config/defaults.ts +8 -4
- package/src/extension/register.ts +94 -21
- package/src/extension/registration/subagent-helpers.ts +1 -0
- package/src/extension/registration/subagent-tools.ts +9 -0
- package/src/runtime/batch-barrier.ts +145 -0
- package/src/runtime/capability-inventory.ts +20 -1
- package/src/runtime/child-pi.ts +23 -3
- package/src/runtime/crash-classification.ts +208 -0
- package/src/runtime/custom-tools/irc-tool.ts +47 -7
- package/src/runtime/live-agent-manager.ts +185 -0
- package/src/runtime/process-lifecycle.ts +481 -0
- package/src/runtime/subagent-manager.ts +6 -0
- package/src/runtime/task-output-context.ts +77 -10
- package/src/runtime/tool-output-pruner.ts +334 -0
- package/src/skills/discover-skills.ts +61 -8
- package/src/skills/validate.ts +267 -0
- package/src/state/types.ts +5 -0
- package/src/ui/keybinding-map.ts +128 -41
- package/src/ui/run-event-bus.ts +83 -0
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Owned-process lifecycle abstraction (P0 item #3).
|
|
3
|
+
*
|
|
4
|
+
* Distilled and adapted from gajae-code's `runtime/process-lifecycle.ts`.
|
|
5
|
+
*
|
|
6
|
+
* Two complementary primitives:
|
|
7
|
+
*
|
|
8
|
+
* F1(a) {@link spawnOwnedProcess} / {@link OwnedProcess} — wraps a
|
|
9
|
+
* `child_process.spawn` child with explicit ownership: escalating
|
|
10
|
+
* (SIGTERM → grace → SIGKILL) teardown, idempotent `dispose()`, bounded
|
|
11
|
+
* `awaitExit()`, abort-signal wiring, and postmortem registration so
|
|
12
|
+
* an owned child can never outlive its owner.
|
|
13
|
+
*
|
|
14
|
+
* F1(b) {@link registerResourceOwner} — a generic postmortem registry for
|
|
15
|
+
* NON-process resources (timers, sockets, Workers, VM contexts) with
|
|
16
|
+
* `disposeAllOwners()` / `disposeOwner(name)`.
|
|
17
|
+
*
|
|
18
|
+
* ╔═══════════════════════════════════════════════════════════════════════════╗
|
|
19
|
+
* ║ IMPORTANT — INCREMENTAL, NOT FULL MIGRATION ║
|
|
20
|
+
* ║ pi-crew ALREADY has sophisticated kill logic in child-pi.ts ║
|
|
21
|
+
* ║ (killProcessTree, escalating SIGTERM→grace→SIGKILL, hard-kill timer, ║
|
|
22
|
+
* ║ post-exit stdio guard) and async-runner.ts does detached/setsid spawns. ║
|
|
23
|
+
* ║ Those paths are NOT rewritten here. This module provides a clean ║
|
|
24
|
+
* ║ ownership primitive for NEW code paths that need guaranteed teardown ║
|
|
25
|
+
* ║ without re-implementing the escalation dance each time. ║
|
|
26
|
+
* ╚═══════════════════════════════════════════════════════════════════════════╝
|
|
27
|
+
*
|
|
28
|
+
* Cross-platform: on Windows there is no SIGTERM; teardown uses
|
|
29
|
+
* `taskkill /F /T /PID` escalation directly (force-kill the whole tree).
|
|
30
|
+
* See `.crew/knowledge.md` gotchas: BSD/Windows signal handling differs.
|
|
31
|
+
*/
|
|
32
|
+
import { spawn, type ChildProcess, type SpawnOptions } from "node:child_process";
|
|
33
|
+
import { logInternalError } from "../utils/internal-error.ts";
|
|
34
|
+
|
|
35
|
+
// ── tunables ──────────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
const DEFAULT_GRACEFUL_MS = 2_000;
|
|
38
|
+
/** Hard cap on how long dispose() waits after SIGKILL before giving up, so a
|
|
39
|
+
* wedged/unkillable child can never block shutdown forever. */
|
|
40
|
+
const SIGKILL_REAP_CAP_MS = 2_000;
|
|
41
|
+
/** After the root child exits on its own, how long to wait for the process
|
|
42
|
+
* group to drain before deregistering. Clean servers drain immediately. */
|
|
43
|
+
const ROOT_EXIT_DRAIN_MS = 250;
|
|
44
|
+
|
|
45
|
+
const isPosix = process.platform !== "win32";
|
|
46
|
+
|
|
47
|
+
const delay = (ms: number): Promise<void> =>
|
|
48
|
+
new Promise((resolve) => {
|
|
49
|
+
setTimeout(resolve, Math.max(0, ms));
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
/** Poll `predicate` until true or `timeoutMs` elapses. Returns the final value. */
|
|
53
|
+
async function pollUntil(predicate: () => boolean, timeoutMs: number, intervalMs = 20): Promise<boolean> {
|
|
54
|
+
if (predicate()) return true;
|
|
55
|
+
const deadline = Date.now() + Math.max(0, timeoutMs);
|
|
56
|
+
while (Date.now() < deadline) {
|
|
57
|
+
await delay(Math.min(intervalMs, Math.max(0, deadline - Date.now())));
|
|
58
|
+
if (predicate()) return true;
|
|
59
|
+
}
|
|
60
|
+
return predicate();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Whether a POSIX process group still has any member (zombies count as alive). */
|
|
64
|
+
function groupAlive(pgid: number): boolean {
|
|
65
|
+
try {
|
|
66
|
+
process.kill(-pgid, 0);
|
|
67
|
+
return true;
|
|
68
|
+
} catch (err) {
|
|
69
|
+
// EPERM => the group exists but we cannot signal it; treat as alive.
|
|
70
|
+
return (err as NodeJS.ErrnoException).code === "EPERM";
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ── F1(a) OwnedProcess ────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
/** Options for {@link spawnOwnedProcess}. */
|
|
77
|
+
export interface SpawnOwnedOptions {
|
|
78
|
+
cwd?: string;
|
|
79
|
+
env?: Record<string, string | undefined>;
|
|
80
|
+
/** stdin mode passed through to the child. Defaults to `"ignore"`. */
|
|
81
|
+
stdin?: "pipe" | "ignore";
|
|
82
|
+
/** When aborted, the owned process tree is disposed (escalating kill). */
|
|
83
|
+
signal?: AbortSignal;
|
|
84
|
+
/** Grace period (ms) between SIGTERM and SIGKILL on dispose. Default 2000. */
|
|
85
|
+
gracefulMs?: number;
|
|
86
|
+
/**
|
|
87
|
+
* Spawn the child as its own process-group leader so the whole descendant
|
|
88
|
+
* tree can be signalled on dispose. Defaults to `true` on POSIX. Has no
|
|
89
|
+
* effect on Windows, where teardown falls back to single-process kill.
|
|
90
|
+
*/
|
|
91
|
+
processGroup?: boolean;
|
|
92
|
+
/** Label used in diagnostics. */
|
|
93
|
+
name?: string;
|
|
94
|
+
/** Extra SpawnOptions merged in (e.g. windowsHide). */
|
|
95
|
+
extraOptions?: SpawnOptions;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** Result of a bounded {@link OwnedProcess.awaitExit}. */
|
|
99
|
+
export interface AwaitExitResult {
|
|
100
|
+
/** `true` when the process has exited; `false` when the timeout fired first. */
|
|
101
|
+
exited: boolean;
|
|
102
|
+
/** Exit code if known, else `null`. */
|
|
103
|
+
code: number | null;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/** Exit callback signature for {@link OwnedProcess.onExit}. */
|
|
107
|
+
export type OwnedExitCallback = (code: number | null, signal: NodeJS.Signals | null) => void;
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* A spawned child process owned by the runtime with guaranteed teardown.
|
|
111
|
+
*
|
|
112
|
+
* Implemented as a class so callers retain a strong handle and so `dispose()`
|
|
113
|
+
* can be idempotent (concurrent/repeated calls return the same in-flight
|
|
114
|
+
* promise). Never throws from `dispose()` / `awaitExit()`.
|
|
115
|
+
*/
|
|
116
|
+
export class OwnedProcess {
|
|
117
|
+
readonly child: ChildProcess;
|
|
118
|
+
readonly pid: number | undefined;
|
|
119
|
+
/** Process-group id (POSIX detached only); `undefined` on Windows / opt-out. */
|
|
120
|
+
readonly pgid: number | undefined;
|
|
121
|
+
private readonly gracefulMs: number;
|
|
122
|
+
private readonly name: string | undefined;
|
|
123
|
+
private disposed = false;
|
|
124
|
+
private disposePromise: Promise<void> | undefined;
|
|
125
|
+
private deregistered = false;
|
|
126
|
+
/** Terminal once teardown/reconciliation has confirmed the group is gone. */
|
|
127
|
+
private terminated = false;
|
|
128
|
+
private exitPromise: Promise<{ code: number | null; signal: NodeJS.Signals | null }>;
|
|
129
|
+
private exitCallbacks = new Set<OwnedExitCallback>();
|
|
130
|
+
private onAbort: (() => void) | undefined;
|
|
131
|
+
private readonly abortSignal: AbortSignal | undefined;
|
|
132
|
+
|
|
133
|
+
constructor(child: ChildProcess, opts: SpawnOwnedOptions, registerSelf: (owner: OwnedProcess) => () => void) {
|
|
134
|
+
this.child = child;
|
|
135
|
+
this.pid = child.pid;
|
|
136
|
+
this.gracefulMs = opts.gracefulMs ?? DEFAULT_GRACEFUL_MS;
|
|
137
|
+
this.name = opts.name;
|
|
138
|
+
this.abortSignal = opts.signal;
|
|
139
|
+
|
|
140
|
+
const useGroup = (opts.processGroup ?? true) && isPosix;
|
|
141
|
+
// On POSIX with `detached`, the child is its own process-group leader,
|
|
142
|
+
// so the group id equals its pid.
|
|
143
|
+
this.pgid = useGroup ? child.pid : undefined;
|
|
144
|
+
|
|
145
|
+
this.exitPromise = new Promise((resolve) => {
|
|
146
|
+
child.once("exit", (code, signal) => {
|
|
147
|
+
resolve({ code: code, signal: signal });
|
|
148
|
+
for (const cb of this.exitCallbacks) {
|
|
149
|
+
try {
|
|
150
|
+
cb(code, signal);
|
|
151
|
+
} catch (err) {
|
|
152
|
+
logInternalError("owned-process.onExit-callback", err, this.name ? `name=${this.name}` : undefined);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// Register for postmortem cleanup and wire abort.
|
|
159
|
+
const deregister = registerSelf(this);
|
|
160
|
+
this.deregisterFn = deregister;
|
|
161
|
+
|
|
162
|
+
// When the root exits on its own (not via dispose), reconcile ownership
|
|
163
|
+
// by the *group*: after a short drain window, deregister if the group is
|
|
164
|
+
// empty, otherwise reap the owned group (no child outlives its owner).
|
|
165
|
+
void this.exitPromise
|
|
166
|
+
.then(() => {
|
|
167
|
+
if (this.disposed) return; // dispose() owns deregistration
|
|
168
|
+
if (this.pgid === undefined) {
|
|
169
|
+
this.deregisterInternal();
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
void (async () => {
|
|
173
|
+
const drained = await pollUntil(() => !groupAlive(this.pgid!), ROOT_EXIT_DRAIN_MS);
|
|
174
|
+
if (this.disposed) return;
|
|
175
|
+
if (drained) {
|
|
176
|
+
this.deregisterInternal();
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
// Root exited but the owned group still has descendants: reap them.
|
|
180
|
+
await this.dispose();
|
|
181
|
+
})();
|
|
182
|
+
})
|
|
183
|
+
.catch(() => undefined);
|
|
184
|
+
|
|
185
|
+
if (this.abortSignal) {
|
|
186
|
+
if (this.abortSignal.aborted) {
|
|
187
|
+
void this.dispose();
|
|
188
|
+
} else {
|
|
189
|
+
this.onAbort = () => void this.dispose();
|
|
190
|
+
this.abortSignal.addEventListener("abort", this.onAbort, { once: true });
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
private deregisterFn: () => void = () => {};
|
|
196
|
+
|
|
197
|
+
/** `true` once `dispose()` has started. */
|
|
198
|
+
get isDisposed(): boolean {
|
|
199
|
+
return this.disposed;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Register a callback invoked exactly once when the root child exits.
|
|
204
|
+
* If the child has already exited, the callback is invoked synchronously
|
|
205
|
+
* with the cached exit info. Returns an unsubscribe function.
|
|
206
|
+
*/
|
|
207
|
+
onExit(callback: OwnedExitCallback): () => void {
|
|
208
|
+
this.exitCallbacks.add(callback);
|
|
209
|
+
// If already exited, the exitPromise is resolved; fire immediately.
|
|
210
|
+
// We race to detect resolution without awaiting.
|
|
211
|
+
let settled = false;
|
|
212
|
+
this.exitPromise.then((info) => {
|
|
213
|
+
if (settled) return; // callback may have been removed
|
|
214
|
+
if (this.exitCallbacks.has(callback)) {
|
|
215
|
+
try {
|
|
216
|
+
callback(info.code, info.signal);
|
|
217
|
+
} catch (err) {
|
|
218
|
+
logInternalError("owned-process.onExit-immediate", err, this.name ? `name=${this.name}` : undefined);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
return () => {
|
|
223
|
+
settled = true;
|
|
224
|
+
this.exitCallbacks.delete(callback);
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Wait for the root child to exit, optionally bounded by `timeoutMs`.
|
|
230
|
+
* With no timeout it resolves only when the child exits. Never rejects.
|
|
231
|
+
*/
|
|
232
|
+
async awaitExit(opts?: { timeoutMs?: number }): Promise<AwaitExitResult> {
|
|
233
|
+
const exitResult = this.exitPromise.then((info) => ({ exited: true as const, code: info.code }));
|
|
234
|
+
if (opts?.timeoutMs === undefined) return exitResult;
|
|
235
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
236
|
+
const timeout = new Promise<AwaitExitResult>((resolve) => {
|
|
237
|
+
timer = setTimeout(
|
|
238
|
+
() => resolve({ exited: false, code: this.child.exitCode }),
|
|
239
|
+
Math.max(0, opts.timeoutMs!),
|
|
240
|
+
);
|
|
241
|
+
});
|
|
242
|
+
try {
|
|
243
|
+
return await Promise.race([exitResult, timeout]);
|
|
244
|
+
} finally {
|
|
245
|
+
if (timer) clearTimeout(timer);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/** Signal the process tree with `signal` (group-aware on POSIX). */
|
|
250
|
+
private signalTree(signal: NodeJS.Signals): void {
|
|
251
|
+
const pid = this.child.pid;
|
|
252
|
+
if (pid === undefined) return;
|
|
253
|
+
if (this.pgid !== undefined) {
|
|
254
|
+
try {
|
|
255
|
+
process.kill(-this.pgid, signal);
|
|
256
|
+
return;
|
|
257
|
+
} catch {
|
|
258
|
+
/* group already gone */
|
|
259
|
+
}
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
try {
|
|
263
|
+
this.child.kill(signal);
|
|
264
|
+
} catch {
|
|
265
|
+
/* already gone */
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
private deregisterInternal(): void {
|
|
270
|
+
if (this.deregistered) return;
|
|
271
|
+
this.deregistered = true;
|
|
272
|
+
this.terminated = true;
|
|
273
|
+
this.deregisterFn();
|
|
274
|
+
if (this.onAbort && this.abortSignal) {
|
|
275
|
+
this.abortSignal.removeEventListener("abort", this.onAbort);
|
|
276
|
+
this.onAbort = undefined;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Idempotently terminate the owned process *group*: SIGTERM the group, wait
|
|
282
|
+
* `gracefulMs`, then SIGKILL, polling liveness throughout. On Windows,
|
|
283
|
+
* escalate directly to taskkill /F /T /PID. Removes the abort listener and
|
|
284
|
+
* deregisters from the live-owner set only after teardown has completed.
|
|
285
|
+
* Repeated/concurrent calls return the same in-flight promise. Never throws.
|
|
286
|
+
*/
|
|
287
|
+
dispose(): Promise<void> {
|
|
288
|
+
// Already terminal: never re-probe a recycled pgid.
|
|
289
|
+
if (this.terminated) {
|
|
290
|
+
this.disposed = true;
|
|
291
|
+
if (!this.disposePromise) this.disposePromise = Promise.resolve();
|
|
292
|
+
return this.disposePromise;
|
|
293
|
+
}
|
|
294
|
+
if (this.disposePromise) return this.disposePromise;
|
|
295
|
+
this.disposed = true;
|
|
296
|
+
if (this.onAbort && this.abortSignal) {
|
|
297
|
+
this.abortSignal.removeEventListener("abort", this.onAbort);
|
|
298
|
+
this.onAbort = undefined;
|
|
299
|
+
}
|
|
300
|
+
this.disposePromise = (async () => {
|
|
301
|
+
try {
|
|
302
|
+
if (!isPosix) {
|
|
303
|
+
await this.disposeWindows();
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
if (this.pgid !== undefined) {
|
|
307
|
+
// Group ownership: reap until the whole group is gone, even if
|
|
308
|
+
// the root has already exited (it may have backgrounded children).
|
|
309
|
+
if (!groupAlive(this.pgid)) return;
|
|
310
|
+
this.signalTree("SIGTERM");
|
|
311
|
+
if (await pollUntil(() => !groupAlive(this.pgid!), this.gracefulMs)) return;
|
|
312
|
+
this.signalTree("SIGKILL");
|
|
313
|
+
if (!(await pollUntil(() => !groupAlive(this.pgid!), SIGKILL_REAP_CAP_MS))) {
|
|
314
|
+
console.warn(`[pi-crew] owned process group still alive after SIGKILL (name=${this.name ?? "?"}, pgid=${this.pgid})`);
|
|
315
|
+
}
|
|
316
|
+
return;
|
|
317
|
+
}
|
|
318
|
+
// Single-process fallback (processGroup:false).
|
|
319
|
+
if (this.child.exitCode !== null) return;
|
|
320
|
+
this.signalTree("SIGTERM");
|
|
321
|
+
if ((await this.awaitExit({ timeoutMs: this.gracefulMs })).exited) return;
|
|
322
|
+
this.signalTree("SIGKILL");
|
|
323
|
+
await this.awaitExit({ timeoutMs: SIGKILL_REAP_CAP_MS });
|
|
324
|
+
} catch (err) {
|
|
325
|
+
logInternalError("owned-process.dispose", err, this.name ? `name=${this.name}` : undefined);
|
|
326
|
+
} finally {
|
|
327
|
+
// Deregister only after teardown completes so a postmortem firing
|
|
328
|
+
// mid-grace still sees the owner.
|
|
329
|
+
this.deregisterInternal();
|
|
330
|
+
}
|
|
331
|
+
})();
|
|
332
|
+
return this.disposePromise;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/** Windows teardown: no SIGTERM; escalate to taskkill /F /T /PID. */
|
|
336
|
+
private async disposeWindows(): Promise<void> {
|
|
337
|
+
const pid = this.child.pid;
|
|
338
|
+
if (pid === undefined) return;
|
|
339
|
+
if (this.child.exitCode !== null) return;
|
|
340
|
+
// First try a graceful taskkill (no /F), then escalate to /F /T.
|
|
341
|
+
const tryTaskkill = (force: boolean): Promise<void> =>
|
|
342
|
+
new Promise((resolve) => {
|
|
343
|
+
const args = ["/T", "/PID", String(pid), ...(force ? ["/F"] : [])];
|
|
344
|
+
const tk = spawn("taskkill", args, { stdio: "ignore", windowsHide: true });
|
|
345
|
+
tk.on("error", () => resolve());
|
|
346
|
+
tk.on("exit", () => resolve());
|
|
347
|
+
});
|
|
348
|
+
await tryTaskkill(false);
|
|
349
|
+
if ((await this.awaitExit({ timeoutMs: this.gracefulMs })).exited) return;
|
|
350
|
+
await tryTaskkill(true);
|
|
351
|
+
await this.awaitExit({ timeoutMs: SIGKILL_REAP_CAP_MS });
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// ── live-owner set + postmortem ───────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
const liveOwners = new Set<OwnedProcess>();
|
|
358
|
+
let ownedPostmortemRegistered = false;
|
|
359
|
+
|
|
360
|
+
function ensureOwnedPostmortem(): void {
|
|
361
|
+
if (ownedPostmortemRegistered) return;
|
|
362
|
+
ownedPostmortemRegistered = true;
|
|
363
|
+
// Register a process-exit handler that disposes every live owned process.
|
|
364
|
+
// We wire both beforeExit (event-loop empty) and exit (synchronous final).
|
|
365
|
+
const drain = async (): Promise<void> => {
|
|
366
|
+
await Promise.all([...liveOwners].map((owner) => owner.dispose().catch(() => undefined)));
|
|
367
|
+
};
|
|
368
|
+
process.once("beforeExit", () => {
|
|
369
|
+
void drain().catch(() => undefined);
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Spawn a child process owned by the runtime. The returned {@link OwnedProcess}
|
|
375
|
+
* is registered for postmortem cleanup and tears down its whole process group
|
|
376
|
+
* on dispose/abort.
|
|
377
|
+
*
|
|
378
|
+
* NOTE: this is for NEW ownership-scoped spawns. Do NOT use it to replace
|
|
379
|
+
* child-pi.ts's runChildPi (which has its own battle-tested escalation logic)
|
|
380
|
+
* or async-runner.ts's intentionally-detached background spawns.
|
|
381
|
+
*/
|
|
382
|
+
export function spawnOwnedProcess(command: string, args: readonly string[] = [], opts: SpawnOwnedOptions = {}): OwnedProcess {
|
|
383
|
+
ensureOwnedPostmortem();
|
|
384
|
+
const useGroup = (opts.processGroup ?? true) && isPosix;
|
|
385
|
+
const spawnOpts: SpawnOptions = {
|
|
386
|
+
cwd: opts.cwd,
|
|
387
|
+
env: opts.env as NodeJS.ProcessEnv | undefined,
|
|
388
|
+
stdio: [opts.stdin ?? "ignore", "pipe", "pipe"],
|
|
389
|
+
detached: useGroup,
|
|
390
|
+
windowsHide: true,
|
|
391
|
+
...opts.extraOptions,
|
|
392
|
+
};
|
|
393
|
+
const child = spawn(command, args as string[], spawnOpts);
|
|
394
|
+
const owner = new OwnedProcess(child, opts, (self) => {
|
|
395
|
+
liveOwners.add(self);
|
|
396
|
+
return () => {
|
|
397
|
+
liveOwners.delete(self);
|
|
398
|
+
};
|
|
399
|
+
});
|
|
400
|
+
return owner;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/** Number of currently live owned processes. Exposed for leak assertions/tests. */
|
|
404
|
+
export function liveOwnedProcessCount(): number {
|
|
405
|
+
return liveOwners.size;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/** Dispose every live owned process. For owner-scoped teardown and tests. */
|
|
409
|
+
export async function disposeAllOwnedProcesses(): Promise<void> {
|
|
410
|
+
await Promise.all([...liveOwners].map((owner) => owner.dispose().catch(() => undefined)));
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// ── F1(b) generic resource owners ─────────────────────────────────────────────
|
|
414
|
+
|
|
415
|
+
type ResourceDisposer = () => void | Promise<void>;
|
|
416
|
+
|
|
417
|
+
const resourceOwners = new Map<string, ResourceDisposer>();
|
|
418
|
+
let resourcePostmortemRegistered = false;
|
|
419
|
+
|
|
420
|
+
function ensureResourcePostmortem(): void {
|
|
421
|
+
if (resourcePostmortemRegistered) return;
|
|
422
|
+
resourcePostmortemRegistered = true;
|
|
423
|
+
process.once("beforeExit", () => {
|
|
424
|
+
void disposeAllOwners().catch(() => undefined);
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Register a non-process resource for postmortem/fatal-exit cleanup.
|
|
430
|
+
*
|
|
431
|
+
* Idempotent by `name`: re-registering the same name replaces the prior
|
|
432
|
+
* disposer (last wins). Returns an unregister function that removes the owner
|
|
433
|
+
* only while it is still the active registration for that name.
|
|
434
|
+
*/
|
|
435
|
+
export function registerResourceOwner(name: string, disposer: ResourceDisposer): () => void {
|
|
436
|
+
resourceOwners.set(name, disposer);
|
|
437
|
+
ensureResourcePostmortem();
|
|
438
|
+
let unregistered = false;
|
|
439
|
+
return () => {
|
|
440
|
+
if (unregistered) return;
|
|
441
|
+
unregistered = true;
|
|
442
|
+
if (resourceOwners.get(name) === disposer) {
|
|
443
|
+
resourceOwners.delete(name);
|
|
444
|
+
}
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/** Number of registered resource owners. Exposed for leak assertions/tests. */
|
|
449
|
+
export function resourceOwnerCount(): number {
|
|
450
|
+
return resourceOwners.size;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Run and clear every registered resource disposer. Attempts all disposers even
|
|
455
|
+
* if some throw, then surfaces the failures as an `AggregateError` so callers
|
|
456
|
+
* can distinguish "all closed" from "a resource may still be alive".
|
|
457
|
+
*/
|
|
458
|
+
export async function disposeAllOwners(): Promise<void> {
|
|
459
|
+
const disposers = [...resourceOwners.values()];
|
|
460
|
+
resourceOwners.clear();
|
|
461
|
+
const errors: unknown[] = [];
|
|
462
|
+
for (const disposer of disposers) {
|
|
463
|
+
try {
|
|
464
|
+
await disposer();
|
|
465
|
+
} catch (err) {
|
|
466
|
+
errors.push(err);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
if (errors.length > 0) {
|
|
470
|
+
throw new AggregateError(errors, `${errors.length} resource disposer(s) failed during teardown`);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/** Dispose a single named resource owner. Returns true if it was registered. */
|
|
475
|
+
export async function disposeOwner(name: string): Promise<boolean> {
|
|
476
|
+
const disposer = resourceOwners.get(name);
|
|
477
|
+
if (!disposer) return false;
|
|
478
|
+
resourceOwners.delete(name);
|
|
479
|
+
await disposer();
|
|
480
|
+
return true;
|
|
481
|
+
}
|
|
@@ -27,6 +27,9 @@ export interface SubagentSpawnOptions {
|
|
|
27
27
|
skill?: string | string[] | false;
|
|
28
28
|
maxTurns?: number;
|
|
29
29
|
ownerSessionGeneration?: number;
|
|
30
|
+
/** Optional batch grouping id (Rule 1). Agents sharing a batchId coalesce
|
|
31
|
+
* completion notifications into one. undefined => individual (default). */
|
|
32
|
+
batchId?: string;
|
|
30
33
|
}
|
|
31
34
|
|
|
32
35
|
export interface SubagentRecord {
|
|
@@ -45,6 +48,8 @@ export interface SubagentRecord {
|
|
|
45
48
|
skill?: string | string[] | false;
|
|
46
49
|
background: boolean;
|
|
47
50
|
ownerSessionGeneration?: number;
|
|
51
|
+
/** Batch grouping id (Rule 1). undefined => individual notification. */
|
|
52
|
+
batchId?: string;
|
|
48
53
|
stuckNotified?: boolean;
|
|
49
54
|
blockedAt?: number;
|
|
50
55
|
promise?: Promise<void>;
|
|
@@ -255,6 +260,7 @@ export class SubagentManager {
|
|
|
255
260
|
skill: options.skill,
|
|
256
261
|
background: options.background,
|
|
257
262
|
ownerSessionGeneration: options.ownerSessionGeneration,
|
|
263
|
+
batchId: options.batchId,
|
|
258
264
|
};
|
|
259
265
|
this.records.set(record.id, record);
|
|
260
266
|
this.cwdByRecord.set(record.id, options.cwd);
|
|
@@ -4,6 +4,7 @@ import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../stat
|
|
|
4
4
|
import { writeArtifact } from "../state/artifact-store.ts";
|
|
5
5
|
import { resolveRealContainedPath } from "../utils/safe-paths.ts";
|
|
6
6
|
import type { WorkflowStep } from "../workflows/workflow-config.ts";
|
|
7
|
+
import { pruneToolOutputs, type ToolResultEntry, type FileEditEvent, DEFAULT_PRUNE_CONFIG } from "./tool-output-pruner.ts";
|
|
7
8
|
|
|
8
9
|
export interface DependencyContextEntry {
|
|
9
10
|
taskId: string;
|
|
@@ -30,20 +31,36 @@ function containedExists(filePath: string, baseDir?: string): boolean {
|
|
|
30
31
|
}
|
|
31
32
|
}
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
/**
|
|
35
|
+
* L4 output-handling: single consistent threshold for all artifact reads.
|
|
36
|
+
* Sized from real data (27 result artifacts: max 9226 bytes; 100% < 16KB).
|
|
37
|
+
* 32KB gives 2x headroom over the largest observed real output while still
|
|
38
|
+
* bounding memory. Larger than the old inconsistent per-call-site values
|
|
39
|
+
* (24K/40K/80K) which truncated the same artifact differently depending on
|
|
40
|
+
* which code path read it.
|
|
41
|
+
*/
|
|
42
|
+
const MAX_RESULT_INLINE_BYTES = 32_000;
|
|
43
|
+
|
|
44
|
+
function readIfSmall(filePath: string, baseDir?: string): string | undefined {
|
|
45
|
+
const maxBytes = MAX_RESULT_INLINE_BYTES;
|
|
34
46
|
try {
|
|
35
47
|
const safePath = baseDir ? resolveRealContainedPath(baseDir, filePath) : filePath;
|
|
36
48
|
const stat = fs.statSync(safePath);
|
|
37
49
|
if (stat.size > maxBytes) {
|
|
38
|
-
//
|
|
39
|
-
|
|
50
|
+
// L4: head + tail instead of head-only. Keeps closing markdown
|
|
51
|
+
// structure (code fences, headings) instead of leaving them truncated.
|
|
52
|
+
const head = Math.floor(maxBytes * 0.75);
|
|
53
|
+
const tail = maxBytes - head;
|
|
54
|
+
const headBuf = Buffer.alloc(head);
|
|
55
|
+
const tailBuf = Buffer.alloc(tail);
|
|
40
56
|
const fd = fs.openSync(safePath, "r");
|
|
41
57
|
try {
|
|
42
|
-
fs.readSync(fd,
|
|
58
|
+
fs.readSync(fd, headBuf, 0, head, 0);
|
|
59
|
+
fs.readSync(fd, tailBuf, 0, tail, stat.size - tail);
|
|
43
60
|
} finally {
|
|
44
61
|
fs.closeSync(fd);
|
|
45
62
|
}
|
|
46
|
-
return `${
|
|
63
|
+
return `${headBuf.toString("utf-8")}\n\n...[pi-crew truncated ${stat.size - maxBytes} bytes, head+tail preserved]...\n${tailBuf.toString("utf-8")}`;
|
|
47
64
|
}
|
|
48
65
|
return fs.readFileSync(safePath, "utf-8");
|
|
49
66
|
} catch {
|
|
@@ -95,11 +112,56 @@ function aggregateUsage(task: TeamTaskState): DependencyContextEntry["usage"] {
|
|
|
95
112
|
return { inputTokens, outputTokens, durationMs };
|
|
96
113
|
}
|
|
97
114
|
|
|
115
|
+
/**
|
|
116
|
+
* Apply staleness-aware pruning to shared reads before they are injected
|
|
117
|
+
* into a downstream worker's prompt. Converts shared reads to generic
|
|
118
|
+
* {@link ToolResultEntry}s (toolName="read") and file edits from dependency
|
|
119
|
+
* artifacts, then delegates to {@link pruneToolOutputs}. Superseded reads
|
|
120
|
+
* (same base file re-read, or file edited by a later dependency) are replaced
|
|
121
|
+
* with compact digest notices, reducing context bloat.
|
|
122
|
+
*
|
|
123
|
+
* OPT-IN: the default prune config protects recent results and only fires
|
|
124
|
+
* when minimum-savings hysteresis is met, so small/unique reads pass through
|
|
125
|
+
* unchanged.
|
|
126
|
+
*/
|
|
127
|
+
function pruneSharedReads(
|
|
128
|
+
reads: Array<{ name: string; path: string; content: string }>,
|
|
129
|
+
dependencies: DependencyContextEntry[],
|
|
130
|
+
): Array<{ name: string; path: string; content: string }> {
|
|
131
|
+
if (reads.length === 0) return reads;
|
|
132
|
+
// Convert shared reads to tool result entries (ordered oldest → newest
|
|
133
|
+
// by position in the reads array — earlier entries are "older").
|
|
134
|
+
const entries: ToolResultEntry[] = reads.map((read, index) => ({
|
|
135
|
+
id: `shared-read-${index}`,
|
|
136
|
+
toolName: "read",
|
|
137
|
+
target: read.path,
|
|
138
|
+
content: read.content,
|
|
139
|
+
}));
|
|
140
|
+
// Collect file edit events from dependency artifacts produced to shared/.
|
|
141
|
+
// A dependency that wrote a shared file after an earlier read invalidates
|
|
142
|
+
// that read (the content is now stale relative to the latest version).
|
|
143
|
+
const sharedRoot = path.resolve("shared");
|
|
144
|
+
const fileEdits: FileEditEvent[] = [];
|
|
145
|
+
for (let depIndex = 0; depIndex < dependencies.length; depIndex++) {
|
|
146
|
+
const dep = dependencies[depIndex]!;
|
|
147
|
+
const produced = dep.artifactsProduced ?? [];
|
|
148
|
+
for (const artifact of produced) {
|
|
149
|
+
if (typeof artifact !== "string") continue;
|
|
150
|
+
// Map artifact path to shared-relative and check against read targets.
|
|
151
|
+
fileEdits.push({ target: path.resolve(sharedRoot, artifact), index: reads.length + depIndex });
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
const pruned = pruneToolOutputs(entries, DEFAULT_PRUNE_CONFIG);
|
|
155
|
+
if (pruned.prunedCount === 0) return reads;
|
|
156
|
+
// Map pruned entries back to the shared-read shape.
|
|
157
|
+
return pruned.results.map((entry, index) => ({ ...reads[index]!, content: entry.content }));
|
|
158
|
+
}
|
|
159
|
+
|
|
98
160
|
export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks: TeamTaskState[], task: TeamTaskState, step: WorkflowStep): DependencyOutputContext {
|
|
99
161
|
const byStep = new Map(tasks.map((item) => [item.stepId, item]).filter((entry): entry is [string, TeamTaskState] => Boolean(entry[0])));
|
|
100
162
|
const byId = new Map(tasks.map((item) => [item.id, item]));
|
|
101
163
|
const dependencies = task.dependsOn.map((dep) => byStep.get(dep) ?? byId.get(dep)).filter((item): item is TeamTaskState => Boolean(item)).map((item) => {
|
|
102
|
-
const resultText = item.resultArtifact ? readIfSmall(item.resultArtifact.path,
|
|
164
|
+
const resultText = item.resultArtifact ? readIfSmall(item.resultArtifact.path, manifest.artifactsRoot) : undefined;
|
|
103
165
|
return {
|
|
104
166
|
taskId: item.id,
|
|
105
167
|
role: item.role,
|
|
@@ -111,10 +173,15 @@ export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks:
|
|
|
111
173
|
usage: aggregateUsage(item),
|
|
112
174
|
};
|
|
113
175
|
});
|
|
114
|
-
const
|
|
176
|
+
const rawSharedReads = (step.reads === false ? [] : step.reads ?? []).map((name) => {
|
|
115
177
|
const filePath = sharedPath(manifest, name);
|
|
116
|
-
return { name, path: filePath, content: readIfSmall(filePath,
|
|
178
|
+
return { name, path: filePath, content: readIfSmall(filePath, path.resolve(manifest.artifactsRoot, "shared")) ?? "" };
|
|
117
179
|
}).filter((item) => item.content.trim().length > 0);
|
|
180
|
+
// Apply staleness-aware pruning to shared reads: drops superseded reads
|
|
181
|
+
// (same file re-read with different selectors) and replaces stale large
|
|
182
|
+
// outputs with compact digest notices before injecting into the worker
|
|
183
|
+
// prompt. OPT-IN: default config protects recent results.
|
|
184
|
+
const sharedReads = pruneSharedReads(rawSharedReads, dependencies);
|
|
118
185
|
return { dependencies, sharedReads };
|
|
119
186
|
}
|
|
120
187
|
|
|
@@ -139,7 +206,7 @@ export function renderDependencyOutputContext(context: DependencyOutputContext):
|
|
|
139
206
|
export function writeTaskSharedOutput(manifest: TeamRunManifest, step: WorkflowStep, task: TeamTaskState): ArtifactDescriptor | undefined {
|
|
140
207
|
if (step.output === false) return undefined;
|
|
141
208
|
const name = safeSharedName(step.output || `${task.id}.md`);
|
|
142
|
-
const source = task.resultArtifact ? readIfSmall(task.resultArtifact.path,
|
|
209
|
+
const source = task.resultArtifact ? readIfSmall(task.resultArtifact.path, manifest.artifactsRoot) : undefined;
|
|
143
210
|
if (!source) return undefined;
|
|
144
211
|
return writeArtifact(manifest.artifactsRoot, {
|
|
145
212
|
kind: "metadata",
|
|
@@ -160,7 +227,7 @@ export function writeTaskInputsArtifact(manifest: TeamRunManifest, task: TeamTas
|
|
|
160
227
|
|
|
161
228
|
export function aggregateTaskOutputs(tasks: TeamTaskState[], manifest?: TeamRunManifest): string {
|
|
162
229
|
return tasks.map((task, index) => {
|
|
163
|
-
const body = task.resultArtifact ? readIfSmall(task.resultArtifact.path,
|
|
230
|
+
const body = task.resultArtifact ? readIfSmall(task.resultArtifact.path, manifest?.artifactsRoot) : undefined;
|
|
164
231
|
const hasBody = Boolean(body?.trim());
|
|
165
232
|
const expectedMissing = task.resultArtifact && !containedExists(task.resultArtifact.path, manifest?.artifactsRoot);
|
|
166
233
|
const status = task.status === "skipped"
|