pi-crew 0.5.2 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +67 -0
- package/docs/bugs/cross-session-notification-leakage.md +82 -0
- package/docs/coding-agent-optimization.md +268 -0
- package/docs/deep-review-report.md +384 -0
- package/docs/distillation/cybersecurity-patterns.md +294 -0
- package/docs/migration-v0.4-v0.5.md +191 -0
- package/docs/optimization-plan.md +642 -0
- package/docs/pi-mono-opportunities.md +969 -0
- package/docs/pi-mono-review.md +291 -0
- package/docs/skills/REFERENCE.md +144 -0
- package/package.json +7 -6
- package/skills/artifact-analysis-loop/SKILL.md +302 -0
- package/skills/async-worker-recovery/SKILL.md +19 -1
- package/skills/child-pi-spawning/SKILL.md +19 -6
- package/skills/context-artifact-hygiene/SKILL.md +19 -2
- package/skills/delegation-patterns/SKILL.md +68 -3
- package/skills/detection-pipeline-design/SKILL.md +285 -0
- package/skills/event-log-tracing/SKILL.md +20 -6
- package/skills/git-master/SKILL.md +20 -6
- package/skills/hunting-investigation-loop/SKILL.md +401 -0
- package/skills/incident-playbook-construction/SKILL.md +383 -0
- package/skills/live-agent-lifecycle/SKILL.md +20 -6
- package/skills/mailbox-interactive/SKILL.md +19 -6
- package/skills/model-routing-context/SKILL.md +19 -1
- package/skills/multi-perspective-review/SKILL.md +19 -4
- package/skills/observability-reliability/SKILL.md +19 -2
- package/skills/orchestration/SKILL.md +20 -2
- package/skills/ownership-session-security/SKILL.md +20 -2
- package/skills/pi-extension-lifecycle/SKILL.md +20 -2
- package/skills/post-mortem/SKILL.md +7 -2
- package/skills/read-only-explorer/SKILL.md +20 -6
- package/skills/requirements-to-task-packet/SKILL.md +23 -3
- package/skills/resource-discovery-config/SKILL.md +20 -2
- package/skills/runtime-state-reader/SKILL.md +20 -2
- package/skills/safe-bash/SKILL.md +21 -6
- package/skills/scrutinize/SKILL.md +20 -2
- package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
- package/skills/security-review/SKILL.md +560 -0
- package/skills/state-mutation-locking/SKILL.md +22 -2
- package/skills/systematic-debugging/SKILL.md +8 -6
- package/skills/threat-hypothesis-framework/SKILL.md +175 -0
- package/skills/ui-render-performance/SKILL.md +20 -2
- package/skills/verification-before-done/SKILL.md +17 -2
- package/skills/widget-rendering/SKILL.md +21 -6
- package/skills/workspace-isolation/SKILL.md +20 -6
- package/skills/worktree-isolation/SKILL.md +20 -6
- package/src/agents/agent-config.ts +40 -1
- package/src/config/config.ts +22 -5
- package/src/config/role-tools.ts +82 -0
- package/src/config/types.ts +4 -0
- package/src/extension/crew-cleanup.ts +114 -0
- package/src/extension/register.ts +15 -3
- package/src/extension/team-tool/run.ts +7 -7
- package/src/observability/event-bus.ts +60 -0
- package/src/runtime/background-runner.ts +8 -2
- package/src/runtime/child-pi.ts +122 -34
- package/src/runtime/crew-agent-runtime.ts +1 -0
- package/src/runtime/foreground-control.ts +87 -17
- package/src/runtime/pi-args.ts +11 -1
- package/src/runtime/pi-json-output.ts +31 -0
- package/src/runtime/progress-tracker.ts +124 -0
- package/src/runtime/skill-effectiveness.ts +473 -0
- package/src/runtime/skill-instructions.ts +37 -3
- package/src/runtime/task-runner.ts +91 -17
- package/src/runtime/team-runner.ts +11 -11
- package/src/runtime/tool-progress.ts +10 -3
- package/src/runtime/verification-gates.ts +367 -0
- package/src/schema/team-tool-schema.ts +7 -0
- package/src/state/decision-ledger.ts +92 -43
- package/src/state/event-log.ts +136 -10
- package/src/state/hook-instinct-bridge.ts +5 -5
- package/src/state/state-store.ts +3 -1
- package/src/state/types.ts +4 -0
- package/src/types/new-api-types.ts +34 -0
- package/src/ui/agent-management-overlay.ts +5 -1
- package/src/ui/crew-widget.ts +29 -15
- package/src/ui/powerbar-publisher.ts +100 -7
- package/src/ui/tool-render.ts +15 -15
- package/src/utils/session-utils.ts +52 -0
- package/src/worktree/worktree-manager.ts +32 -13
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
// NOTE: globalProgressTracker import kept for documentation but not directly used
|
|
3
|
+
// since we don't have agent IDs to untrack. Actual progress clearing should be
|
|
4
|
+
// handled by the progress tracker itself on shutdown.
|
|
5
|
+
// import { globalProgressTracker } from "../runtime/progress-tracker.ts";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Registers cleanup handlers for graceful shutdown.
|
|
9
|
+
* Handles session_shutdown and SIGTERM/SIGHUP signals.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
interface ChildProcessInfo {
|
|
13
|
+
pid: number;
|
|
14
|
+
runId: string;
|
|
15
|
+
agentId: string;
|
|
16
|
+
startedAt: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
class ChildProcessRegistry {
|
|
20
|
+
private processes = new Map<number, ChildProcessInfo>();
|
|
21
|
+
|
|
22
|
+
register(pid: number, runId: string, agentId: string): void {
|
|
23
|
+
this.processes.set(pid, { pid, runId, agentId, startedAt: Date.now() });
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
unregister(pid: number): void {
|
|
27
|
+
this.processes.delete(pid);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
getAllPids(): number[] {
|
|
31
|
+
return Array.from(this.processes.keys());
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
getInfo(pid: number): ChildProcessInfo | undefined {
|
|
35
|
+
return this.processes.get(pid);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
clear(): void {
|
|
39
|
+
this.processes.clear();
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export const childProcessRegistry = new ChildProcessRegistry();
|
|
44
|
+
|
|
45
|
+
export function registerCleanupHandler(pi: ExtensionAPI): void {
|
|
46
|
+
// Handle session_shutdown event
|
|
47
|
+
pi.on("session_shutdown", async () => {
|
|
48
|
+
console.log("[pi-crew] Session shutdown - cleaning up resources");
|
|
49
|
+
|
|
50
|
+
try {
|
|
51
|
+
// Kill all child-pi processes
|
|
52
|
+
await cleanupChildProcesses();
|
|
53
|
+
|
|
54
|
+
// Cleanup temp directories
|
|
55
|
+
await cleanupTempDirectories();
|
|
56
|
+
|
|
57
|
+
console.log("[pi-crew] Cleanup complete");
|
|
58
|
+
} catch (error) {
|
|
59
|
+
console.error("[pi-crew] Cleanup error:", error);
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
// Handle SIGTERM/SIGHUP signals
|
|
64
|
+
const handleSignal = async (signal: string): Promise<void> => {
|
|
65
|
+
console.log(`[pi-crew] Received ${signal} - starting cleanup`);
|
|
66
|
+
await cleanupChildProcesses();
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
process.on("SIGTERM", () => { void handleSignal("SIGTERM"); });
|
|
70
|
+
process.on("SIGHUP", () => { void handleSignal("SIGHUP"); });
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async function cleanupChildProcesses(): Promise<void> {
|
|
74
|
+
const pids = childProcessRegistry.getAllPids();
|
|
75
|
+
|
|
76
|
+
for (const pid of pids) {
|
|
77
|
+
try {
|
|
78
|
+
process.kill(pid, "SIGTERM");
|
|
79
|
+
console.log(`[pi-crew] Sent SIGTERM to child process ${pid}`);
|
|
80
|
+
} catch (error: unknown) {
|
|
81
|
+
// Process may already be dead or not exist
|
|
82
|
+
const err = error as NodeJS.ErrnoException;
|
|
83
|
+
if (err.code !== "ESRCH" && err.code !== "ENOENT") {
|
|
84
|
+
console.error(`[pi-crew] Error killing process ${pid}:`, err.message);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
childProcessRegistry.unregister(pid);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Clear progress tracker
|
|
91
|
+
// Note: Can't call untrack on all because we don't track agent IDs here
|
|
92
|
+
// The progress tracker should clear itself on shutdown via session_dispose
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function cleanupTempDirectories(): Promise<void> {
|
|
96
|
+
// NOTE: getTempDir is not available in paths.ts.
|
|
97
|
+
// For now, just log that cleanup is pending.
|
|
98
|
+
// Actual temp directory cleanup should be implemented by the run-graph
|
|
99
|
+
// or the specific code that creates temporary workspaces.
|
|
100
|
+
try {
|
|
101
|
+
console.log(`[pi-crew] Temp directory cleanup deferred to run-graph`);
|
|
102
|
+
} catch (error) {
|
|
103
|
+
console.error("[pi-crew] Temp cleanup error:", error);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Export for child-pi.ts to register processes
|
|
108
|
+
export function registerChildProcess(pid: number, runId: string, agentId: string): void {
|
|
109
|
+
childProcessRegistry.register(pid, runId, agentId);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function unregisterChildProcess(pid: number): void {
|
|
113
|
+
childProcessRegistry.unregister(pid);
|
|
114
|
+
}
|
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
stopAsyncRunNotifier,
|
|
18
18
|
} from "./async-notifier.ts";
|
|
19
19
|
import { registerAutonomousPolicy } from "./autonomous-policy.ts";
|
|
20
|
+
import { registerCleanupHandler } from "./crew-cleanup.ts";
|
|
20
21
|
import { notifyActiveRuns } from "./session-summary.ts";
|
|
21
22
|
|
|
22
23
|
let _cachedLiveRunSidebar: typeof LiveRunSidebarType | undefined;
|
|
@@ -703,7 +704,7 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
703
704
|
widgetState,
|
|
704
705
|
loadConfig(currentCtx.cwd).config.ui,
|
|
705
706
|
);
|
|
706
|
-
clearPiCrewPowerbar(pi.events
|
|
707
|
+
clearPiCrewPowerbar(pi.events);
|
|
707
708
|
};
|
|
708
709
|
const openLiveSidebar = (ctx: ExtensionContext, runId: string): void => {
|
|
709
710
|
const uiConfig = loadConfig(ctx.cwd).config.ui;
|
|
@@ -1084,7 +1085,7 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
1084
1085
|
widgetState,
|
|
1085
1086
|
currentCtx ? loadConfig(currentCtx.cwd).config.ui : undefined,
|
|
1086
1087
|
);
|
|
1087
|
-
clearPiCrewPowerbar(pi.events
|
|
1088
|
+
clearPiCrewPowerbar(pi.events);
|
|
1088
1089
|
disposePowerbarCoalescer();
|
|
1089
1090
|
heartbeatWatcher?.dispose();
|
|
1090
1091
|
if (autoRepairTimer) {
|
|
@@ -1496,8 +1497,17 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
1496
1497
|
manifests,
|
|
1497
1498
|
);
|
|
1498
1499
|
// Health notifications: only warn about genuinely running runs
|
|
1500
|
+
// Filter to only current session's runs to prevent cross-session notification leakage
|
|
1501
|
+
const currentSessionGen = sessionGeneration;
|
|
1502
|
+
const currentSessionId = currentCtx ? (currentCtx as unknown as Record<string, unknown>).sessionId as string | undefined : undefined;
|
|
1503
|
+
const sessionManifests = manifests.filter(
|
|
1504
|
+
(run) =>
|
|
1505
|
+
!run.ownerSessionId ||
|
|
1506
|
+
run.ownerSessionId === currentSessionId ||
|
|
1507
|
+
(run as unknown as Record<string, unknown>).ownerSessionGeneration === currentSessionGen,
|
|
1508
|
+
);
|
|
1499
1509
|
const now = Date.now();
|
|
1500
|
-
for (const run of
|
|
1510
|
+
for (const run of sessionManifests) {
|
|
1501
1511
|
if (run.status !== "running") continue;
|
|
1502
1512
|
try {
|
|
1503
1513
|
const snapshot = snapshotCache.get(run.runId);
|
|
@@ -1792,6 +1802,8 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
1792
1802
|
});
|
|
1793
1803
|
time("register.tools");
|
|
1794
1804
|
|
|
1805
|
+
registerCleanupHandler(pi);
|
|
1806
|
+
|
|
1795
1807
|
registerTeamCommands(pi, {
|
|
1796
1808
|
startForegroundRun,
|
|
1797
1809
|
abortForegroundRun,
|
|
@@ -22,7 +22,7 @@ async function executeTeamRun(...args: Parameters<typeof ExecuteTeamRunFn>): Pro
|
|
|
22
22
|
return _cachedExecuteTeamRun(...args);
|
|
23
23
|
}
|
|
24
24
|
import { spawnBackgroundTeamRun } from "../../subagents/async-entry.ts";
|
|
25
|
-
import { appendEvent, readEvents } from "../../state/event-log.ts";
|
|
25
|
+
import { appendEvent, appendEventAsync, readEvents } from "../../state/event-log.ts";
|
|
26
26
|
import { resolveCrewRuntime, runtimeResolutionState } from "../../runtime/runtime-resolver.ts";
|
|
27
27
|
import { normalizeSkillOverride } from "../../runtime/skill-instructions.ts";
|
|
28
28
|
import { expandParallelResearchWorkflow } from "../../runtime/parallel-research.ts";
|
|
@@ -67,7 +67,7 @@ function scheduleBackgroundEarlyExitGuard(cwd: string, runId: string, pid: numbe
|
|
|
67
67
|
const tail = tailFile(logPath);
|
|
68
68
|
const message = `Background runner exited within 3s; see background.log${tail ? `\n${tail}` : ""}`;
|
|
69
69
|
const failed = updateRunStatus(loaded.manifest, "failed", "Background runner exited within 3s; see background.log");
|
|
70
|
-
|
|
70
|
+
void appendEventAsync(failed.eventsPath, { type: "async.failed", runId: failed.runId, message, data: { pid, detail: liveness.detail } });
|
|
71
71
|
}, 3000);
|
|
72
72
|
timer.unref();
|
|
73
73
|
}
|
|
@@ -140,7 +140,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
|
|
|
140
140
|
const runtimeResolution = runtimeResolutionState(runtime);
|
|
141
141
|
const executionManifest = { ...updatedManifest, runtimeResolution, runConfig: executedConfig, updatedAt: new Date().toISOString() };
|
|
142
142
|
atomicWriteJson(paths.manifestPath, executionManifest);
|
|
143
|
-
|
|
143
|
+
appendEventAsync(executionManifest.eventsPath, { type: "runtime.resolved", runId: executionManifest.runId, message: `Runtime resolved: ${runtime.kind} safety=${runtime.safety}`, data: { runtimeResolution } }).catch(() => {});
|
|
144
144
|
const runAsync = params.async ?? executedConfig.asyncByDefault ?? false;
|
|
145
145
|
let effectiveRuntime = runtime;
|
|
146
146
|
if (runAsync && runtime.kind === "live-session") {
|
|
@@ -150,13 +150,13 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
|
|
|
150
150
|
const effectiveManifest = effectiveRuntime !== runtime ? { ...executionManifest, runtimeResolution: effectiveRuntimeResolution, updatedAt: new Date().toISOString() } : executionManifest;
|
|
151
151
|
if (effectiveRuntime !== runtime) {
|
|
152
152
|
atomicWriteJson(paths.manifestPath, effectiveManifest);
|
|
153
|
-
|
|
153
|
+
appendEventAsync(effectiveManifest.eventsPath, { type: "runtime.resolved", runId: effectiveManifest.runId, message: `Runtime overridden: child-process (async fallback from live-session)`, data: { runtimeResolution: effectiveRuntimeResolution } }).catch(() => {});
|
|
154
154
|
}
|
|
155
155
|
if (runAsync) {
|
|
156
156
|
if (effectiveRuntime.safety === "blocked") {
|
|
157
157
|
const runningManifest = updateRunStatus(effectiveManifest, "running", "Checking worker runtime availability.");
|
|
158
158
|
const blocked = updateRunStatus(runningManifest, "blocked", effectiveRuntime.reason ?? "Child worker execution is disabled; refusing to create no-op scaffold subagents.");
|
|
159
|
-
|
|
159
|
+
void appendEventAsync(blocked.eventsPath, { type: "run.blocked", runId: blocked.runId, message: blocked.summary, data: { runtime: effectiveRuntime, runtimeResolution: effectiveRuntimeResolution, async: true, diagnostics: { requestedMode: effectiveRuntime.requestedMode, workersDisabled: executedConfig.executeWorkers === false, envCrew: process.env.PI_CREW_EXECUTE_WORKERS, envTeams: process.env.PI_TEAMS_EXECUTE_WORKERS } } });
|
|
160
160
|
unregisterActiveRun(blocked.runId);
|
|
161
161
|
return result([
|
|
162
162
|
`Blocked pi-crew run ${blocked.runId}: real subagent workers are disabled.`,
|
|
@@ -169,7 +169,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
|
|
|
169
169
|
const spawned = await spawnBackgroundTeamRun(effectiveManifest);
|
|
170
170
|
const asyncManifest = { ...effectiveManifest, async: { pid: spawned.pid, logPath: spawned.logPath, spawnedAt: new Date().toISOString() } };
|
|
171
171
|
atomicWriteJson(paths.manifestPath, asyncManifest);
|
|
172
|
-
|
|
172
|
+
void appendEventAsync(effectiveManifest.eventsPath, { type: "async.spawned", runId: effectiveManifest.runId, data: { pid: spawned.pid, logPath: spawned.logPath } });
|
|
173
173
|
scheduleBackgroundEarlyExitGuard(ctx.cwd, effectiveManifest.runId, spawned.pid, spawned.logPath);
|
|
174
174
|
// Wait for the async run to complete and return actual results.
|
|
175
175
|
try {
|
|
@@ -280,7 +280,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
|
|
|
280
280
|
if (runtime.safety === "blocked") {
|
|
281
281
|
const runningManifest = updateRunStatus(executionManifest, "running", "Checking worker runtime availability.");
|
|
282
282
|
const blocked = updateRunStatus(runningManifest, "blocked", runtime.reason ?? "Child worker execution is disabled; refusing to create no-op scaffold subagents.");
|
|
283
|
-
|
|
283
|
+
void appendEventAsync(blocked.eventsPath, { type: "run.blocked", runId: blocked.runId, message: blocked.summary, data: { runtime, runtimeResolution, diagnostics: { requestedMode: runtime.requestedMode, workersDisabled: executedConfig.executeWorkers === false, envCrew: process.env.PI_CREW_EXECUTE_WORKERS, envTeams: process.env.PI_TEAMS_EXECUTE_WORKERS } } });
|
|
284
284
|
unregisterActiveRun(blocked.runId);
|
|
285
285
|
return result([
|
|
286
286
|
`Blocked pi-crew run ${blocked.runId}: real subagent workers are disabled.`,
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type { AgentProgress } from "../runtime/progress-tracker.ts";
|
|
2
|
+
|
|
3
|
+
export type CrewEventType =
|
|
4
|
+
| "agent:progress"
|
|
5
|
+
| "agent:complete"
|
|
6
|
+
| "agent:error"
|
|
7
|
+
| "run:start"
|
|
8
|
+
| "run:complete";
|
|
9
|
+
|
|
10
|
+
export interface CrewEvent {
|
|
11
|
+
type: CrewEventType;
|
|
12
|
+
runId: string;
|
|
13
|
+
agentId?: string;
|
|
14
|
+
payload?: AgentProgress | string;
|
|
15
|
+
timestamp: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
type CrewEventListener = (event: CrewEvent) => void;
|
|
19
|
+
|
|
20
|
+
class EventBus {
|
|
21
|
+
private listeners = new Map<CrewEventType, Set<CrewEventListener>>();
|
|
22
|
+
private static instance?: EventBus;
|
|
23
|
+
|
|
24
|
+
static getInstance(): EventBus {
|
|
25
|
+
if (!EventBus.instance) {
|
|
26
|
+
EventBus.instance = new EventBus();
|
|
27
|
+
}
|
|
28
|
+
return EventBus.instance;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
emit(event: CrewEvent): void {
|
|
32
|
+
const listeners = this.listeners.get(event.type);
|
|
33
|
+
if (listeners) {
|
|
34
|
+
for (const listener of listeners) {
|
|
35
|
+
try {
|
|
36
|
+
listener(event);
|
|
37
|
+
} catch (e) {
|
|
38
|
+
console.error("[EventBus] Listener error:", e);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
on(type: CrewEventType, listener: CrewEventListener): () => void {
|
|
45
|
+
if (!this.listeners.has(type)) {
|
|
46
|
+
this.listeners.set(type, new Set());
|
|
47
|
+
}
|
|
48
|
+
this.listeners.get(type)!.add(listener);
|
|
49
|
+
|
|
50
|
+
return () => {
|
|
51
|
+
this.listeners.get(type)?.delete(listener);
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
off(type: CrewEventType, listener: CrewEventListener): void {
|
|
56
|
+
this.listeners.get(type)?.delete(listener);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export const crewEventBus = EventBus.getInstance();
|
|
@@ -127,6 +127,8 @@ function setupUnhandledRejectionGuard(state: { cwd?: string; runId?: string; eve
|
|
|
127
127
|
}
|
|
128
128
|
|
|
129
129
|
async function main(): Promise<void> {
|
|
130
|
+
// FIX: Store logFd so it can be closed on exit to prevent file descriptor leak
|
|
131
|
+
let logFd: number | undefined;
|
|
130
132
|
// Redirect console to background.log since stdio is "ignore" in detached mode.
|
|
131
133
|
// Must be BEFORE any console.log/console.error calls.
|
|
132
134
|
const _cwd = argValue("--cwd");
|
|
@@ -134,13 +136,17 @@ async function main(): Promise<void> {
|
|
|
134
136
|
if (_cwd && _runId) {
|
|
135
137
|
try {
|
|
136
138
|
const logPath = path.join(_cwd, ".crew/state/runs", _runId, "background.log");
|
|
137
|
-
|
|
139
|
+
logFd = fs.openSync(logPath, "a");
|
|
138
140
|
const origWrite = (prefix: string) => (data: any, ...args: any[]) => {
|
|
139
141
|
const msg = [data, ...args].map(String).join(" ") + "\n";
|
|
140
|
-
fs.writeSync(logFd
|
|
142
|
+
fs.writeSync(logFd!, msg);
|
|
141
143
|
};
|
|
142
144
|
console.log = origWrite("OUT");
|
|
143
145
|
console.error = origWrite("ERR");
|
|
146
|
+
// FIX: Close logFd on process exit to prevent file descriptor leak
|
|
147
|
+
process.on("exit", () => {
|
|
148
|
+
try { if (logFd !== undefined) fs.closeSync(logFd); } catch { /* ignore */ }
|
|
149
|
+
});
|
|
144
150
|
} catch { /* best-effort */ }
|
|
145
151
|
}
|
|
146
152
|
|
package/src/runtime/child-pi.ts
CHANGED
|
@@ -10,6 +10,7 @@ import { logInternalError } from "../utils/internal-error.ts";
|
|
|
10
10
|
import { attachPostExitStdioGuard, trySignalChild } from "./post-exit-stdio-guard.ts";
|
|
11
11
|
import { redactJsonLine, SECRET_KEY_PATTERN } from "../utils/redaction.ts";
|
|
12
12
|
import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
|
|
13
|
+
import { registerChildProcess, unregisterChildProcess } from "../extension/crew-cleanup.ts";
|
|
13
14
|
|
|
14
15
|
const POST_EXIT_STDIO_GUARD_MS = DEFAULT_CHILD_PI.postExitStdioGuardMs;
|
|
15
16
|
const FINAL_DRAIN_MS = DEFAULT_CHILD_PI.finalDrainMs;
|
|
@@ -117,6 +118,8 @@ export interface ChildPiLifecycleEvent {
|
|
|
117
118
|
error?: string;
|
|
118
119
|
/** Stderr captured at timeout moment (for response_timeout events). */
|
|
119
120
|
stderr?: string;
|
|
121
|
+
/** Last N chars of stderr for error context (exit/error events). */
|
|
122
|
+
stderrExcerpt?: string;
|
|
120
123
|
/** Timestamp (ISO). */
|
|
121
124
|
ts: string;
|
|
122
125
|
}
|
|
@@ -146,6 +149,16 @@ export interface ChildPiRunInput {
|
|
|
146
149
|
parentContext?: string;
|
|
147
150
|
/** When true, prepend parentContext to the task prompt. */
|
|
148
151
|
inheritContext?: boolean;
|
|
152
|
+
/** Pass to pi to mark certain commands as context-excluded. Default: false */
|
|
153
|
+
excludeContextBash?: boolean;
|
|
154
|
+
/** pi session ID for session naming (aligns with pi-crew run ID) */
|
|
155
|
+
sessionId?: string;
|
|
156
|
+
/** Run ID for cleanup tracking */
|
|
157
|
+
runId?: string;
|
|
158
|
+
/** Agent ID for cleanup tracking */
|
|
159
|
+
agentId?: string;
|
|
160
|
+
/** Role for tool restrictions (from role-tools.ts) */
|
|
161
|
+
role?: string;
|
|
149
162
|
}
|
|
150
163
|
|
|
151
164
|
export interface ChildPiRunResult {
|
|
@@ -168,18 +181,24 @@ export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): S
|
|
|
168
181
|
// Bug #12 fix: essential env vars (PATH, HOME, etc.) are always preserved so child can find npm/node.
|
|
169
182
|
const filteredEnv = sanitizeEnvSecrets(env, {
|
|
170
183
|
allowList: [
|
|
171
|
-
// Model provider API keys (
|
|
172
|
-
"
|
|
173
|
-
"
|
|
174
|
-
"
|
|
175
|
-
"
|
|
176
|
-
"
|
|
177
|
-
"
|
|
178
|
-
"
|
|
179
|
-
"
|
|
180
|
-
"
|
|
181
|
-
"
|
|
182
|
-
"
|
|
184
|
+
// Model provider API keys (explicit list — do NOT use wildcards)
|
|
185
|
+
"MINIMAX_API_KEY",
|
|
186
|
+
"MINIMAX_GROUP_ID",
|
|
187
|
+
"OPENAI_API_KEY",
|
|
188
|
+
"OPENAI_ORG_ID",
|
|
189
|
+
"ANTHROPIC_API_KEY",
|
|
190
|
+
"GOOGLE_API_KEY",
|
|
191
|
+
"GOOGLE_GENERATIVE_LANGUAGE_API_KEY",
|
|
192
|
+
"AZURE_OPENAI_API_KEY",
|
|
193
|
+
"AZURE_OPENAI_ENDPOINT",
|
|
194
|
+
"AWS_ACCESS_KEY_ID",
|
|
195
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
196
|
+
"AWS_REGION",
|
|
197
|
+
"ZEU_API_KEY",
|
|
198
|
+
"ZERODEV_API_KEY",
|
|
199
|
+
// SECURITY FIX: Removed dangerous wildcards "*_API_KEY", "*_TOKEN", "*_SECRET"
|
|
200
|
+
// These patterns would leak ALL secrets matching the pattern to child processes.
|
|
201
|
+
// Only add specific, intended provider keys above.
|
|
183
202
|
// Essential non-secret vars for child process to function
|
|
184
203
|
"PATH",
|
|
185
204
|
"HOME",
|
|
@@ -368,23 +387,31 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
368
387
|
if (depth.blocked) return { exitCode: 1, stdout: "", stderr: `pi-crew depth guard blocked child worker: depth ${depth.depth} >= max ${depth.maxDepth}` };
|
|
369
388
|
const mock = process.env.PI_TEAMS_MOCK_CHILD_PI;
|
|
370
389
|
if (mock) {
|
|
390
|
+
// SECURITY: Log mock mode activation prominently for audit trail
|
|
391
|
+
console.warn(`[⚠️ PI_CREW_MOCK_MODE] Mock mode active: ${mock} — NOT running real agents!`);
|
|
392
|
+
// SECURITY FIX: Require PI_CREW_ALLOW_MOCK alongside PI_TEAMS_MOCK_CHILD_PI
|
|
393
|
+
const allowMock = process.env.PI_CREW_ALLOW_MOCK === "1" || process.env.PI_CREW_ALLOW_MOCK === "true";
|
|
394
|
+
if (!allowMock) {
|
|
395
|
+
console.error(`[🚨 PI_CREW_MOCK_MODE] SECURITY: PI_TEAMS_MOCK_CHILD_PI is set but PI_CREW_ALLOW_MOCK is not "1". Ignoring mock request for safety.`);
|
|
396
|
+
return { exitCode: 1, stdout: "", stderr: "Mock mode requires PI_CREW_ALLOW_MOCK=1 alongside PI_TEAMS_MOCK_CHILD_PI" };
|
|
397
|
+
}
|
|
371
398
|
if (mock === "success") {
|
|
372
|
-
const stdout = `
|
|
399
|
+
const stdout = `[MOCK] Success for ${input.agent.name}\n`;
|
|
373
400
|
observeStdoutChunk(input, stdout);
|
|
374
401
|
return { exitCode: 0, stdout, stderr: "" };
|
|
375
402
|
}
|
|
376
403
|
if (mock === "json-success" || mock === "adaptive-plan") {
|
|
377
404
|
const text = mock === "adaptive-plan" && effectiveTask.includes("ADAPTIVE_PLAN_JSON_START")
|
|
378
|
-
? `Adaptive
|
|
379
|
-
: `
|
|
405
|
+
? `[MOCK] Adaptive plan\nADAPTIVE_PLAN_JSON_START\n${JSON.stringify({ phases: [{ name: "research", tasks: [{ role: "explorer", task: "Explore adaptive target" }, { role: "analyst", task: "Analyze adaptive target" }, { role: "planner", task: "Plan adaptive target" }] }, { name: "build", tasks: [{ role: "executor", task: "Implement adaptive target" }] }, { name: "check", tasks: [{ role: "reviewer", task: "Review adaptive target" }, { role: "test-engineer", task: "Test adaptive target" }, { role: "writer", task: "Summarize adaptive target" }] }] })}\nADAPTIVE_PLAN_JSON_END`
|
|
406
|
+
: `[MOCK] JSON success for ${input.agent.name}`;
|
|
380
407
|
const stdout = `${JSON.stringify({ type: "message", message: { role: "assistant", content: [{ type: "text", text }] } })}\n${JSON.stringify({ type: "message_end", usage: { input: 10, output: 5, cost: 0.001, turns: 1 } })}\n`;
|
|
381
408
|
observeStdoutChunk(input, stdout);
|
|
382
409
|
return { exitCode: 0, stdout, stderr: "" };
|
|
383
410
|
}
|
|
384
|
-
if (mock === "retryable-failure") return { exitCode: 1, stdout: "", stderr: "rate limit: mock failure" };
|
|
385
|
-
return { exitCode: 1, stdout: "", stderr: `
|
|
411
|
+
if (mock === "retryable-failure") return { exitCode: 1, stdout: "", stderr: "[MOCK] rate limit: mock failure" };
|
|
412
|
+
return { exitCode: 1, stdout: "", stderr: `[MOCK] failure: ${mock}` };
|
|
386
413
|
}
|
|
387
|
-
const built = buildPiWorkerArgs({ task: effectiveTask, agent: input.agent, model: input.model, sessionEnabled: true, maxDepth: input.maxDepth, skillPaths: input.skillPaths });
|
|
414
|
+
const built = buildPiWorkerArgs({ task: effectiveTask, agent: input.agent, model: input.model, sessionEnabled: true, maxDepth: input.maxDepth, skillPaths: input.skillPaths, role: input.role });
|
|
388
415
|
const spawnSpec = getPiSpawnCommand(built.args);
|
|
389
416
|
try {
|
|
390
417
|
return await new Promise<ChildPiRunResult>((resolve) => {
|
|
@@ -393,6 +420,10 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
393
420
|
activeChildProcesses.set(child.pid, child);
|
|
394
421
|
input.onSpawn?.(child.pid);
|
|
395
422
|
input.onLifecycleEvent?.({ type: "spawned", pid: child.pid, ts: new Date().toISOString() });
|
|
423
|
+
// Register with cleanup handler for graceful shutdown
|
|
424
|
+
if (input.runId && input.agentId) {
|
|
425
|
+
registerChildProcess(child.pid, input.runId, input.agentId);
|
|
426
|
+
}
|
|
396
427
|
} else {
|
|
397
428
|
input.onLifecycleEvent?.({ type: "spawn_error", error: "spawn returned no pid", ts: new Date().toISOString() });
|
|
398
429
|
}
|
|
@@ -414,6 +445,36 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
414
445
|
let hardKilled = false;
|
|
415
446
|
const cleanupErrors: string[] = [];
|
|
416
447
|
let turnCount = 0;
|
|
448
|
+
// Track in-flight operations for proper rejection on unexpected exit
|
|
449
|
+
interface PendingOperation {
|
|
450
|
+
id: string;
|
|
451
|
+
type: "prompt" | "steer" | "json_event";
|
|
452
|
+
startedAt: number;
|
|
453
|
+
}
|
|
454
|
+
const pendingOperations = new Map<string, PendingOperation>();
|
|
455
|
+
let operationIdCounter = 0;
|
|
456
|
+
|
|
457
|
+
const startOperation = (type: PendingOperation["type"]): string => {
|
|
458
|
+
const id = `op-${++operationIdCounter}`;
|
|
459
|
+
pendingOperations.set(id, { id, type, startedAt: Date.now() });
|
|
460
|
+
return id;
|
|
461
|
+
};
|
|
462
|
+
|
|
463
|
+
const completeOperation = (id: string): void => {
|
|
464
|
+
pendingOperations.delete(id);
|
|
465
|
+
};
|
|
466
|
+
|
|
467
|
+
const rejectPendingOperations = (error: Error): void => {
|
|
468
|
+
pendingOperations.forEach((op, id) => {
|
|
469
|
+
logInternalError(
|
|
470
|
+
"child-pi.pending-operation-rejected",
|
|
471
|
+
error,
|
|
472
|
+
`opId=${id} type=${op.type} elapsed=${Date.now() - op.startedAt}ms`,
|
|
473
|
+
);
|
|
474
|
+
});
|
|
475
|
+
pendingOperations.clear();
|
|
476
|
+
};
|
|
477
|
+
|
|
417
478
|
let softLimitReached = false;
|
|
418
479
|
const maxTurns = input.maxTurns;
|
|
419
480
|
const graceTurns = input.graceTurns;
|
|
@@ -450,20 +511,27 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
450
511
|
},
|
|
451
512
|
onJsonEvent: (event) => {
|
|
452
513
|
restartNoResponseTimer();
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
if (
|
|
457
|
-
|
|
458
|
-
if (
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
514
|
+
const eventOpId = startOperation("json_event");
|
|
515
|
+
try {
|
|
516
|
+
// Turn-count-based steering: soft limit steer + hard abort after graceTurns
|
|
517
|
+
if (event && typeof event === "object" && !Array.isArray(event)) {
|
|
518
|
+
const obj = event as Record<string, unknown>;
|
|
519
|
+
if (obj.type === "turn_end") {
|
|
520
|
+
turnCount += 1;
|
|
521
|
+
if (maxTurns !== undefined && !softLimitReached && turnCount >= maxTurns) {
|
|
522
|
+
softLimitReached = true;
|
|
523
|
+
// Inject steer via stdin to tell child to wrap up
|
|
524
|
+
child.stdin?.write(JSON.stringify({ type: "steer", message: "You have reached your turn limit. Wrap up immediately — provide your final answer now." }) + "\n");
|
|
525
|
+
} else if (maxTurns !== undefined && softLimitReached && turnCount >= maxTurns + (graceTurns ?? 5)) {
|
|
526
|
+
// Hard abort — terminate after grace turns
|
|
527
|
+
try { child.kill(process.platform === "win32" ? undefined : "SIGTERM"); } catch { /* best-effort */ }
|
|
528
|
+
}
|
|
465
529
|
}
|
|
466
530
|
}
|
|
531
|
+
completeOperation(eventOpId);
|
|
532
|
+
} catch (err) {
|
|
533
|
+
completeOperation(eventOpId);
|
|
534
|
+
throw err;
|
|
467
535
|
}
|
|
468
536
|
input.onJsonEvent?.(event);
|
|
469
537
|
if (!isFinalAssistantEvent(event) || childExited || settled || finalDrainTimer) return;
|
|
@@ -587,20 +655,38 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
587
655
|
stderr = appendBoundedTail(stderr, chunk.toString("utf-8"));
|
|
588
656
|
});
|
|
589
657
|
child.on("error", (error) => {
|
|
658
|
+
// Reject pending operations with process error context
|
|
659
|
+
const processError = new Error(
|
|
660
|
+
`Child Pi process error: ${error.message}. Stderr: ${stderr.slice(-500) || "(none)"}`,
|
|
661
|
+
);
|
|
662
|
+
rejectPendingOperations(processError);
|
|
590
663
|
try {
|
|
591
|
-
input.onLifecycleEvent?.({ type: "spawn_error", pid: child.pid, error:
|
|
664
|
+
input.onLifecycleEvent?.({ type: "spawn_error", pid: child.pid, error: processError.message, ts: new Date().toISOString(), stderrExcerpt: stderr.slice(-500) || undefined });
|
|
592
665
|
} catch (err) {
|
|
593
666
|
logInternalError("child-pi.on-lifecycle-event", err, `event=error, pid=${child.pid}`);
|
|
594
667
|
}
|
|
595
|
-
settle({ exitCode: null, stdout, stderr, error:
|
|
668
|
+
settle({ exitCode: null, stdout, stderr, error: processError.message });
|
|
596
669
|
});
|
|
597
|
-
child.on("exit", (code) => {
|
|
670
|
+
child.on("exit", (code, signal) => {
|
|
598
671
|
if (child.pid) {
|
|
599
672
|
activeChildProcesses.delete(child.pid);
|
|
600
673
|
clearHardKillTimer(child.pid);
|
|
674
|
+
// Unregister from cleanup handler
|
|
675
|
+
unregisterChildProcess(child.pid);
|
|
676
|
+
}
|
|
677
|
+
// Build comprehensive exit error for unexpected exits
|
|
678
|
+
const isUnexpectedExit = !childExited && !settled && !responseTimeoutHit && !abortRequested;
|
|
679
|
+
const exitError = isUnexpectedExit
|
|
680
|
+
? new Error(
|
|
681
|
+
`Child Pi process exited unexpectedly (code=${code ?? "null"} signal=${signal ?? "null"}). `
|
|
682
|
+
+ `Stderr: ${stderr.slice(-1000) || "(none)"}`,
|
|
683
|
+
)
|
|
684
|
+
: null;
|
|
685
|
+
if (exitError) {
|
|
686
|
+
rejectPendingOperations(exitError);
|
|
601
687
|
}
|
|
602
688
|
try {
|
|
603
|
-
input.onLifecycleEvent?.({ type: "exit", pid: child.pid, exitCode: code, ts: new Date().toISOString() });
|
|
689
|
+
input.onLifecycleEvent?.({ type: "exit", pid: child.pid, exitCode: code, ts: new Date().toISOString(), error: exitError?.message, stderrExcerpt: isUnexpectedExit ? stderr.slice(-1000) || undefined : undefined });
|
|
604
690
|
} catch (err) {
|
|
605
691
|
logInternalError("child-pi.on-lifecycle-event", err, `event=exit, pid=${child.pid}`);
|
|
606
692
|
}
|
|
@@ -618,6 +704,8 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
618
704
|
if (child.pid) {
|
|
619
705
|
activeChildProcesses.delete(child.pid);
|
|
620
706
|
clearHardKillTimer(child.pid);
|
|
707
|
+
// Unregister from cleanup handler
|
|
708
|
+
unregisterChildProcess(child.pid);
|
|
621
709
|
}
|
|
622
710
|
try {
|
|
623
711
|
input.onLifecycleEvent?.({ type: "close", pid: child.pid, exitCode, ts: new Date().toISOString() });
|