pi-crew 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/package.json +1 -1
- package/src/agents/discover-agents.ts +2 -1
- package/src/config/config.ts +760 -229
- package/src/config/types.ts +34 -5
- package/src/extension/help.ts +1 -0
- package/src/extension/management.ts +2 -1
- package/src/extension/register.ts +1176 -255
- package/src/extension/registration/commands.ts +15 -2
- package/src/extension/registration/team-tool.ts +1 -1
- package/src/extension/session-summary.ts +11 -1
- package/src/extension/team-tool/api.ts +4 -1
- package/src/extension/team-tool/cache-control.ts +23 -0
- package/src/extension/team-tool/cancel.ts +27 -16
- package/src/extension/team-tool/context.ts +2 -0
- package/src/extension/team-tool/handle-settings.ts +2 -0
- package/src/extension/team-tool/health-monitor.ts +563 -0
- package/src/extension/team-tool/inspect.ts +10 -3
- package/src/extension/team-tool/lifecycle-actions.ts +12 -5
- package/src/extension/team-tool/respond.ts +6 -3
- package/src/extension/team-tool/status.ts +4 -1
- package/src/extension/team-tool-types.ts +2 -0
- package/src/extension/team-tool.ts +901 -177
- package/src/runtime/adaptive-plan.ts +1 -1
- package/src/runtime/child-pi.ts +15 -2
- package/src/runtime/crash-recovery.ts +30 -0
- package/src/runtime/foreground-watchdog.ts +129 -0
- package/src/runtime/manifest-cache.ts +4 -2
- package/src/runtime/pi-args.ts +3 -2
- package/src/runtime/run-tracker.ts +11 -0
- package/src/runtime/runtime-policy.ts +15 -2
- package/src/runtime/skill-instructions.ts +11 -0
- package/src/runtime/stale-reconciler.ts +322 -18
- package/src/runtime/task-runner.ts +8 -1
- package/src/schema/config-schema.ts +1 -0
- package/src/schema/team-tool-schema.ts +204 -76
- package/src/state/atomic-write.ts +2 -2
- package/src/state/locks.ts +19 -0
- package/src/state/mailbox.ts +22 -5
- package/src/state/state-store.ts +13 -3
- package/src/teams/discover-teams.ts +2 -1
- package/src/ui/run-event-bus.ts +2 -1
- package/src/ui/settings-overlay.ts +2 -0
- package/src/workflows/discover-workflows.ts +5 -1
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
|
+
import * as os from "node:os";
|
|
2
3
|
import * as path from "node:path";
|
|
3
4
|
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
|
4
|
-
import { checkProcessLiveness } from "./process-status.ts";
|
|
5
5
|
import { recordFromTask, upsertCrewAgent } from "./crew-agent-records.ts";
|
|
6
|
+
import { checkProcessLiveness } from "./process-status.ts";
|
|
7
|
+
|
|
8
|
+
/** Age threshold for orphaned temp directory cleanup: 1 hour. */
|
|
9
|
+
const ORPHAN_TEMP_DIR_AGE_THRESHOLD_MS = 60 * 60 * 1000;
|
|
6
10
|
|
|
7
11
|
/**
|
|
8
12
|
* Result of reconciling a single stale run.
|
|
@@ -10,7 +14,12 @@ import { recordFromTask, upsertCrewAgent } from "./crew-agent-records.ts";
|
|
|
10
14
|
export interface ReconcileResult {
|
|
11
15
|
runId: string;
|
|
12
16
|
/** What was found and what action was taken */
|
|
13
|
-
verdict:
|
|
17
|
+
verdict:
|
|
18
|
+
| "healthy"
|
|
19
|
+
| "result_exists"
|
|
20
|
+
| "pid_dead"
|
|
21
|
+
| "pid_alive_stale"
|
|
22
|
+
| "no_status";
|
|
14
23
|
/** Whether repair was applied */
|
|
15
24
|
repaired: boolean;
|
|
16
25
|
/** Human-readable detail */
|
|
@@ -21,6 +30,8 @@ export interface ReconcileResult {
|
|
|
21
30
|
|
|
22
31
|
const STALE_ALIVE_PID_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
23
32
|
const ACTIVE_EVIDENCE_TTL_MS = 5 * 60 * 1000;
|
|
33
|
+
/** For no-PID runs, repair when ALL running tasks have heartbeat stale beyond this threshold. */
|
|
34
|
+
const NO_PID_HEARTBEAT_STALE_MS = 5 * 60 * 1000; // 5 minutes — same as heartbeat-gradient deadMs
|
|
24
35
|
|
|
25
36
|
/**
|
|
26
37
|
* Phase 1: Check if a result file already exists for the run.
|
|
@@ -31,14 +42,28 @@ function checkResultFile(
|
|
|
31
42
|
tasks: TeamTaskState[],
|
|
32
43
|
): { found: boolean; repaired: boolean } {
|
|
33
44
|
// Check if all tasks already have terminal status (result was written but manifest wasn't updated)
|
|
34
|
-
const allTerminal =
|
|
35
|
-
|
|
36
|
-
|
|
45
|
+
const allTerminal =
|
|
46
|
+
tasks.length > 0 &&
|
|
47
|
+
tasks.every(
|
|
48
|
+
(t) =>
|
|
49
|
+
t.status === "completed" ||
|
|
50
|
+
t.status === "failed" ||
|
|
51
|
+
t.status === "cancelled" ||
|
|
52
|
+
t.status === "skipped" ||
|
|
53
|
+
t.status === "needs_attention",
|
|
54
|
+
);
|
|
37
55
|
if (allTerminal) {
|
|
38
56
|
// Sync agent records even when tasks are already terminal
|
|
39
57
|
// (e.g., a previous reconcile fixed tasks but crashed before updating agents)
|
|
40
58
|
for (const task of tasks) {
|
|
41
|
-
try {
|
|
59
|
+
try {
|
|
60
|
+
upsertCrewAgent(
|
|
61
|
+
manifest,
|
|
62
|
+
recordFromTask(manifest, task, "scaffold"),
|
|
63
|
+
);
|
|
64
|
+
} catch {
|
|
65
|
+
/* non-critical */
|
|
66
|
+
}
|
|
42
67
|
}
|
|
43
68
|
return { found: true, repaired: false };
|
|
44
69
|
}
|
|
@@ -52,7 +77,10 @@ function checkResultFile(
|
|
|
52
77
|
* written, treat the PID as alive even if process.kill returns false
|
|
53
78
|
* (handles SIGKILL race where PID hasn't been recycled yet).
|
|
54
79
|
*/
|
|
55
|
-
function checkPidLiveness(
|
|
80
|
+
function checkPidLiveness(
|
|
81
|
+
pid: number | undefined,
|
|
82
|
+
stateRoot?: string,
|
|
83
|
+
): {
|
|
56
84
|
alive: boolean;
|
|
57
85
|
detail: string;
|
|
58
86
|
} {
|
|
@@ -67,13 +95,18 @@ function checkPidLiveness(pid: number | undefined, stateRoot?: string): {
|
|
|
67
95
|
const heartbeatPath = path.join(stateRoot, "heartbeat.json");
|
|
68
96
|
try {
|
|
69
97
|
if (fs.existsSync(heartbeatPath)) {
|
|
70
|
-
const hb = JSON.parse(
|
|
98
|
+
const hb = JSON.parse(
|
|
99
|
+
fs.readFileSync(heartbeatPath, "utf-8"),
|
|
100
|
+
) as { pid?: number; at?: number };
|
|
71
101
|
if (hb?.pid === pid && hb?.at) {
|
|
72
102
|
const ageMs = Date.now() - hb.at;
|
|
73
103
|
// Heartbeat written < 5 min ago → process was alive recently.
|
|
74
104
|
// Don't repair yet; let the next reconciliation cycle catch it.
|
|
75
105
|
if (ageMs < 5 * 60_000) {
|
|
76
|
-
return {
|
|
106
|
+
return {
|
|
107
|
+
alive: true,
|
|
108
|
+
detail: `process dead but heartbeat ${Math.round(ageMs / 1000)}s old`,
|
|
109
|
+
};
|
|
77
110
|
}
|
|
78
111
|
}
|
|
79
112
|
}
|
|
@@ -101,18 +134,76 @@ function evaluateStaleness(
|
|
|
101
134
|
return { stale: false, reason: "updated_at_invalid" };
|
|
102
135
|
}
|
|
103
136
|
if (now - updatedAt > STALE_ALIVE_PID_MS) {
|
|
104
|
-
return {
|
|
137
|
+
return {
|
|
138
|
+
stale: true,
|
|
139
|
+
reason: `alive_but_stale_${Math.round((now - updatedAt) / 3600_000)}h`,
|
|
140
|
+
};
|
|
105
141
|
}
|
|
106
142
|
return { stale: false, reason: "alive_and_recent" };
|
|
107
143
|
}
|
|
108
144
|
|
|
109
145
|
function hasRecentActiveEvidence(tasks: TeamTaskState[], now: number): boolean {
|
|
110
146
|
return tasks.some((task) => {
|
|
111
|
-
if (task.status !== "running" && task.status !== "waiting")
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
147
|
+
if (task.status !== "running" && task.status !== "waiting")
|
|
148
|
+
return false;
|
|
149
|
+
const heartbeatAt = task.heartbeat?.lastSeenAt
|
|
150
|
+
? new Date(task.heartbeat.lastSeenAt).getTime()
|
|
151
|
+
: Number.NaN;
|
|
152
|
+
if (
|
|
153
|
+
task.heartbeat?.alive !== false &&
|
|
154
|
+
Number.isFinite(heartbeatAt) &&
|
|
155
|
+
now - heartbeatAt <= ACTIVE_EVIDENCE_TTL_MS
|
|
156
|
+
)
|
|
157
|
+
return true;
|
|
158
|
+
const activityAt = task.agentProgress?.lastActivityAt
|
|
159
|
+
? new Date(task.agentProgress.lastActivityAt).getTime()
|
|
160
|
+
: Number.NaN;
|
|
161
|
+
return (
|
|
162
|
+
Number.isFinite(activityAt) &&
|
|
163
|
+
now - activityAt <= ACTIVE_EVIDENCE_TTL_MS
|
|
164
|
+
);
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* For no-PID runs: check if ALL running tasks have heartbeats stale beyond
|
|
170
|
+
* the no-PID heartbeat threshold. This detects zombie tasks where the worker
|
|
171
|
+
* process died but no PID was recorded (e.g. live-session /tmp/ workspaces).
|
|
172
|
+
* Tasks with no heartbeat AND no agent progress are considered NOT stale
|
|
173
|
+
* (they may be newly spawned and haven't reported yet).
|
|
174
|
+
*/
|
|
175
|
+
function allRunningTasksHeartbeatStale(
|
|
176
|
+
tasks: TeamTaskState[],
|
|
177
|
+
now: number,
|
|
178
|
+
): boolean {
|
|
179
|
+
const runningTasks = tasks.filter(
|
|
180
|
+
(t) => t.status === "running" || t.status === "waiting",
|
|
181
|
+
);
|
|
182
|
+
if (runningTasks.length === 0) return false;
|
|
183
|
+
return runningTasks.every((task) => {
|
|
184
|
+
const heartbeatAt = task.heartbeat?.lastSeenAt
|
|
185
|
+
? new Date(task.heartbeat.lastSeenAt).getTime()
|
|
186
|
+
: Number.NaN;
|
|
187
|
+
const activityAt = task.agentProgress?.lastActivityAt
|
|
188
|
+
? new Date(task.agentProgress.lastActivityAt).getTime()
|
|
189
|
+
: Number.NaN;
|
|
190
|
+
// If no heartbeat AND no activity, we can't determine staleness — assume not stale
|
|
191
|
+
if (!Number.isFinite(heartbeatAt) && !Number.isFinite(activityAt))
|
|
192
|
+
return false;
|
|
193
|
+
// If heartbeat is recent enough, not stale
|
|
194
|
+
if (
|
|
195
|
+
Number.isFinite(heartbeatAt) &&
|
|
196
|
+
now - heartbeatAt <= NO_PID_HEARTBEAT_STALE_MS
|
|
197
|
+
)
|
|
198
|
+
return false;
|
|
199
|
+
// If agent progress is recent enough, not stale
|
|
200
|
+
if (
|
|
201
|
+
Number.isFinite(activityAt) &&
|
|
202
|
+
now - activityAt <= NO_PID_HEARTBEAT_STALE_MS
|
|
203
|
+
)
|
|
204
|
+
return false;
|
|
205
|
+
// Both present and both stale → this task is stale
|
|
206
|
+
return true;
|
|
116
207
|
});
|
|
117
208
|
}
|
|
118
209
|
|
|
@@ -126,7 +217,11 @@ function repairStaleRun(
|
|
|
126
217
|
): TeamTaskState[] {
|
|
127
218
|
const now = new Date().toISOString();
|
|
128
219
|
const repairedTasks = tasks.map((task) => {
|
|
129
|
-
if (
|
|
220
|
+
if (
|
|
221
|
+
task.status === "running" ||
|
|
222
|
+
task.status === "queued" ||
|
|
223
|
+
task.status === "waiting"
|
|
224
|
+
) {
|
|
130
225
|
return {
|
|
131
226
|
...task,
|
|
132
227
|
status: "cancelled" as const,
|
|
@@ -138,7 +233,14 @@ function repairStaleRun(
|
|
|
138
233
|
});
|
|
139
234
|
// Update agent records so widget sees cancelled status immediately
|
|
140
235
|
for (const task of repairedTasks) {
|
|
141
|
-
try {
|
|
236
|
+
try {
|
|
237
|
+
upsertCrewAgent(
|
|
238
|
+
manifest,
|
|
239
|
+
recordFromTask(manifest, task, "scaffold"),
|
|
240
|
+
);
|
|
241
|
+
} catch {
|
|
242
|
+
/* non-critical */
|
|
243
|
+
}
|
|
142
244
|
}
|
|
143
245
|
return repairedTasks;
|
|
144
246
|
}
|
|
@@ -183,8 +285,31 @@ export function reconcileStaleRun(
|
|
|
183
285
|
detail: "No PID recorded, but recent task heartbeat/progress exists; not repairing",
|
|
184
286
|
};
|
|
185
287
|
}
|
|
288
|
+
// No PID and no recent activity. If ALL running tasks have stale heartbeats
|
|
289
|
+
// (beyond NO_PID_HEARTBEAT_STALE_MS = 5min), repair immediately — the worker
|
|
290
|
+
// process is dead but we have no PID to check. This handles /tmp/ live-session
|
|
291
|
+
// workspaces where agents exit without calling submit_result.
|
|
292
|
+
if (allRunningTasksHeartbeatStale(tasks, now)) {
|
|
293
|
+
const repaired = repairStaleRun(
|
|
294
|
+
manifest,
|
|
295
|
+
tasks,
|
|
296
|
+
"no_pid_heartbeat_stale",
|
|
297
|
+
);
|
|
298
|
+
return {
|
|
299
|
+
runId,
|
|
300
|
+
verdict: "no_status",
|
|
301
|
+
repaired: true,
|
|
302
|
+
detail: `No PID; all running task heartbeats stale >${Math.round(NO_PID_HEARTBEAT_STALE_MS / 60_000)}min; repaired ${repaired.filter((t) => t.status === "cancelled").length} tasks`,
|
|
303
|
+
repairedTasks: repaired,
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
// Fall through: no recent activity but not all tasks stale enough yet.
|
|
307
|
+
// Check the longer STALE_ALIVE_PID_MS threshold for very old runs.
|
|
186
308
|
const updatedAt = new Date(manifest.updatedAt).getTime();
|
|
187
|
-
if (
|
|
309
|
+
if (
|
|
310
|
+
Number.isFinite(updatedAt) &&
|
|
311
|
+
now - updatedAt > STALE_ALIVE_PID_MS
|
|
312
|
+
) {
|
|
188
313
|
const repaired = repairStaleRun(manifest, tasks, "no_pid_stale");
|
|
189
314
|
return {
|
|
190
315
|
runId,
|
|
@@ -223,3 +348,182 @@ export function reconcileStaleRun(
|
|
|
223
348
|
repairedTasks: repaired,
|
|
224
349
|
};
|
|
225
350
|
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Result of orphaned temp workspace reconciliation.
|
|
354
|
+
*/
|
|
355
|
+
export interface OrphanReconcileResult {
|
|
356
|
+
/** Number of runs repaired (manifests cancelled). */
|
|
357
|
+
repaired: number;
|
|
358
|
+
/** Number of /tmp/pi-crew-* directories removed. */
|
|
359
|
+
cleanedDirs: number;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Scan /tmp (os.tmpdir()) for orphaned pi-crew-* workspaces and reconcile
|
|
364
|
+
* any stale runs found. This catches runs created by tests or crashed sessions
|
|
365
|
+
* that the per-CWD auto-repair timer would miss.
|
|
366
|
+
*
|
|
367
|
+
* When `cleanupOrphanedTempDirs` is not explicitly set to `false`, directories
|
|
368
|
+
* older than 1 hour with no remaining running manifests are deleted after
|
|
369
|
+
* their runs are reconciled.
|
|
370
|
+
*
|
|
371
|
+
* @returns Number of runs repaired and directories cleaned.
|
|
372
|
+
*/
|
|
373
|
+
export function reconcileOrphanedTempWorkspaces(
|
|
374
|
+
now = Date.now(),
|
|
375
|
+
options?: { cleanupOrphanedTempDirs?: boolean },
|
|
376
|
+
): OrphanReconcileResult {
|
|
377
|
+
const tmpDir = getSafeTempDir();
|
|
378
|
+
if (!tmpDir) return { repaired: 0, cleanedDirs: 0 };
|
|
379
|
+
let repaired = 0;
|
|
380
|
+
let cleanedDirs = 0;
|
|
381
|
+
try {
|
|
382
|
+
const entries = fs.readdirSync(tmpDir, { withFileTypes: true });
|
|
383
|
+
for (const entry of entries) {
|
|
384
|
+
if (!entry.isDirectory() || !entry.name.startsWith("pi-crew-"))
|
|
385
|
+
continue;
|
|
386
|
+
const workspaceDir = path.join(tmpDir, entry.name);
|
|
387
|
+
const crewDir = path.join(workspaceDir, ".crew");
|
|
388
|
+
if (!fs.existsSync(crewDir)) continue;
|
|
389
|
+
const stateRunsDir = path.join(crewDir, "state", "runs");
|
|
390
|
+
if (!fs.existsSync(stateRunsDir)) continue;
|
|
391
|
+
let hasRunning = false;
|
|
392
|
+
try {
|
|
393
|
+
for (const runDir of fs.readdirSync(stateRunsDir)) {
|
|
394
|
+
const manifestPath = path.join(
|
|
395
|
+
stateRunsDir,
|
|
396
|
+
runDir,
|
|
397
|
+
"manifest.json",
|
|
398
|
+
);
|
|
399
|
+
const tasksPath = path.join(
|
|
400
|
+
stateRunsDir,
|
|
401
|
+
runDir,
|
|
402
|
+
"tasks.json",
|
|
403
|
+
);
|
|
404
|
+
if (
|
|
405
|
+
!fs.existsSync(manifestPath) ||
|
|
406
|
+
!fs.existsSync(tasksPath)
|
|
407
|
+
)
|
|
408
|
+
continue;
|
|
409
|
+
try {
|
|
410
|
+
const manifest: TeamRunManifest = JSON.parse(
|
|
411
|
+
fs.readFileSync(manifestPath, "utf-8"),
|
|
412
|
+
);
|
|
413
|
+
if (manifest.status !== "running") continue;
|
|
414
|
+
const tasks: TeamTaskState[] = JSON.parse(
|
|
415
|
+
fs.readFileSync(tasksPath, "utf-8"),
|
|
416
|
+
);
|
|
417
|
+
const result = reconcileStaleRun(manifest, tasks, now);
|
|
418
|
+
if (result.repaired && result.repairedTasks) {
|
|
419
|
+
// Persist repaired tasks
|
|
420
|
+
fs.writeFileSync(
|
|
421
|
+
tasksPath,
|
|
422
|
+
JSON.stringify(result.repairedTasks, null, 2),
|
|
423
|
+
);
|
|
424
|
+
// Update manifest status
|
|
425
|
+
const updated = {
|
|
426
|
+
...manifest,
|
|
427
|
+
status: "cancelled" as const,
|
|
428
|
+
updatedAt: new Date(now).toISOString(),
|
|
429
|
+
summary: `Stale run reconciled: ${result.detail}`,
|
|
430
|
+
};
|
|
431
|
+
fs.writeFileSync(
|
|
432
|
+
manifestPath,
|
|
433
|
+
JSON.stringify(updated, null, 2),
|
|
434
|
+
);
|
|
435
|
+
// Update agent records
|
|
436
|
+
for (const task of result.repairedTasks) {
|
|
437
|
+
try {
|
|
438
|
+
upsertCrewAgent(
|
|
439
|
+
updated,
|
|
440
|
+
recordFromTask(
|
|
441
|
+
updated,
|
|
442
|
+
task,
|
|
443
|
+
"scaffold",
|
|
444
|
+
),
|
|
445
|
+
);
|
|
446
|
+
} catch {
|
|
447
|
+
/* non-critical */
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
repaired++;
|
|
451
|
+
}
|
|
452
|
+
// If still running after reconciliation attempt, mark for dir-preserving
|
|
453
|
+
if (
|
|
454
|
+
result.verdict === "healthy" ||
|
|
455
|
+
(result.verdict === "no_status" && !result.repaired)
|
|
456
|
+
) {
|
|
457
|
+
hasRunning = true;
|
|
458
|
+
}
|
|
459
|
+
} catch {
|
|
460
|
+
/* skip corrupt manifests */
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
} catch {
|
|
464
|
+
/* skip unreadable dirs */
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// Post-loop: check if this workspace dir can be cleaned up.
|
|
468
|
+
// Eligible when cleanup is enabled, no running manifests remain, and
|
|
469
|
+
// the directory is older than the age threshold.
|
|
470
|
+
if (!hasRunning) {
|
|
471
|
+
// Re-scan manifests to confirm no running runs remain
|
|
472
|
+
// (some may have been cancelled on a previous pass)
|
|
473
|
+
if (fs.existsSync(stateRunsDir)) {
|
|
474
|
+
try {
|
|
475
|
+
for (const runDir of fs.readdirSync(stateRunsDir)) {
|
|
476
|
+
const manifestPath = path.join(
|
|
477
|
+
stateRunsDir,
|
|
478
|
+
runDir,
|
|
479
|
+
"manifest.json",
|
|
480
|
+
);
|
|
481
|
+
if (!fs.existsSync(manifestPath)) continue;
|
|
482
|
+
try {
|
|
483
|
+
const manifest: TeamRunManifest = JSON.parse(
|
|
484
|
+
fs.readFileSync(manifestPath, "utf-8"),
|
|
485
|
+
);
|
|
486
|
+
if (manifest.status === "running") {
|
|
487
|
+
hasRunning = true;
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
} catch {
|
|
491
|
+
/* skip corrupt */
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
} catch {
|
|
495
|
+
/* skip unreadable */
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
const cleanupEnabled = options?.cleanupOrphanedTempDirs !== false;
|
|
501
|
+
if (cleanupEnabled && !hasRunning) {
|
|
502
|
+
try {
|
|
503
|
+
const stat = fs.statSync(workspaceDir);
|
|
504
|
+
const dirAge = now - stat.mtimeMs;
|
|
505
|
+
if (dirAge > ORPHAN_TEMP_DIR_AGE_THRESHOLD_MS) {
|
|
506
|
+
fs.rmSync(workspaceDir, {
|
|
507
|
+
recursive: true,
|
|
508
|
+
force: true,
|
|
509
|
+
});
|
|
510
|
+
cleanedDirs++;
|
|
511
|
+
}
|
|
512
|
+
} catch {
|
|
513
|
+
/* skip if stat or rm fails */
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
} catch {
|
|
518
|
+
/* skip if tmpdir unreadable */
|
|
519
|
+
}
|
|
520
|
+
return { repaired, cleanedDirs };
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
function getSafeTempDir(): string | undefined {
|
|
524
|
+
try {
|
|
525
|
+
return fs.existsSync(os.tmpdir()) ? os.tmpdir() : undefined;
|
|
526
|
+
} catch {
|
|
527
|
+
return undefined;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
@@ -416,6 +416,8 @@ export async function runTeamTask(
|
|
|
416
416
|
skillPaths,
|
|
417
417
|
maxTurns: input.runtimeConfig?.maxTurns,
|
|
418
418
|
graceTurns: input.runtimeConfig?.graceTurns,
|
|
419
|
+
inheritContext: input.runtimeConfig?.inheritContext,
|
|
420
|
+
parentContext: input.parentContext,
|
|
419
421
|
onSpawn: (pid) => {
|
|
420
422
|
try {
|
|
421
423
|
({ task, tasks } = checkpointTask(
|
|
@@ -827,8 +829,13 @@ export async function runTeamTask(
|
|
|
827
829
|
// _yieldResult: preserved for future use — yield completion contract not yet wired to task.result
|
|
828
830
|
let _yieldResult: YieldResult | undefined;
|
|
829
831
|
let noYield = false;
|
|
832
|
+
// Child-process workers do not have a submit_result tool — the yield contract
|
|
833
|
+
// only applies to live-session workers where submit_result is injected by the
|
|
834
|
+
// runtime. Skipping yield detection for child-process prevents every child
|
|
835
|
+
// worker from incorrectly being marked needs_attention.
|
|
830
836
|
const yieldEnabled =
|
|
831
|
-
|
|
837
|
+
runtimeKind !== "child-process" &&
|
|
838
|
+
(input.runtimeConfig?.yield?.enabled ?? DEFAULT_YIELD_CONFIG.enabled);
|
|
832
839
|
if (yieldEnabled && collectedJsonEvents.length > 0) {
|
|
833
840
|
if (hasYieldInOutput(collectedJsonEvents)) {
|
|
834
841
|
const yieldEvent = collectedJsonEvents.find((e) =>
|
|
@@ -113,6 +113,7 @@ export const PiTeamsReliabilityConfigSchema = Type.Object({
|
|
|
113
113
|
}, { additionalProperties: false })),
|
|
114
114
|
autoRecover: Type.Optional(Type.Boolean()),
|
|
115
115
|
deadletterThreshold: Type.Optional(Type.Integer({ minimum: 1 })),
|
|
116
|
+
cleanupOrphanedTempDirs: Type.Optional(Type.Boolean()),
|
|
116
117
|
}, { additionalProperties: false });
|
|
117
118
|
|
|
118
119
|
export const PiTeamsOtlpConfigSchema = Type.Object({
|