pi-crew 0.1.44 → 0.1.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +5 -5
  3. package/agents/analyst.md +11 -11
  4. package/agents/critic.md +11 -11
  5. package/agents/executor.md +11 -11
  6. package/agents/explorer.md +11 -11
  7. package/agents/planner.md +11 -11
  8. package/agents/reviewer.md +11 -11
  9. package/agents/security-reviewer.md +11 -11
  10. package/agents/test-engineer.md +11 -11
  11. package/agents/verifier.md +11 -11
  12. package/agents/writer.md +11 -11
  13. package/docs/next-upgrade-roadmap.md +733 -0
  14. package/docs/research-awesome-agent-skills-distillation.md +100 -0
  15. package/docs/research-oh-my-pi-distillation.md +322 -0
  16. package/docs/source-runtime-refactor-map.md +24 -0
  17. package/docs/usage.md +3 -3
  18. package/install.mjs +52 -8
  19. package/package.json +1 -1
  20. package/schema.json +2 -1
  21. package/skills/async-worker-recovery/SKILL.md +42 -0
  22. package/skills/context-artifact-hygiene/SKILL.md +52 -0
  23. package/skills/delegation-patterns/SKILL.md +54 -0
  24. package/skills/mailbox-interactive/SKILL.md +40 -0
  25. package/skills/model-routing-context/SKILL.md +39 -0
  26. package/skills/multi-perspective-review/SKILL.md +58 -0
  27. package/skills/observability-reliability/SKILL.md +41 -0
  28. package/skills/ownership-session-security/SKILL.md +41 -0
  29. package/skills/pi-extension-lifecycle/SKILL.md +39 -0
  30. package/skills/requirements-to-task-packet/SKILL.md +63 -0
  31. package/skills/resource-discovery-config/SKILL.md +41 -0
  32. package/skills/runtime-state-reader/SKILL.md +44 -0
  33. package/skills/secure-agent-orchestration-review/SKILL.md +45 -0
  34. package/skills/state-mutation-locking/SKILL.md +42 -0
  35. package/skills/systematic-debugging/SKILL.md +67 -0
  36. package/skills/ui-render-performance/SKILL.md +39 -0
  37. package/skills/verification-before-done/SKILL.md +57 -0
  38. package/skills/worktree-isolation/SKILL.md +39 -0
  39. package/src/agents/discover-agents.ts +12 -11
  40. package/src/config/config.ts +48 -24
  41. package/src/config/defaults.ts +14 -0
  42. package/src/extension/project-init.ts +62 -2
  43. package/src/extension/register.ts +19 -10
  44. package/src/extension/registration/commands.ts +49 -26
  45. package/src/extension/registration/subagent-helpers.ts +8 -0
  46. package/src/extension/registration/subagent-tools.ts +2 -1
  47. package/src/extension/registration/team-tool.ts +28 -8
  48. package/src/extension/run-index.ts +13 -5
  49. package/src/extension/run-maintenance.ts +22 -3
  50. package/src/extension/team-tool/api.ts +25 -8
  51. package/src/extension/team-tool/cancel.ts +134 -102
  52. package/src/extension/team-tool/context.ts +6 -0
  53. package/src/extension/team-tool/lifecycle-actions.ts +17 -5
  54. package/src/extension/team-tool/respond.ts +103 -66
  55. package/src/extension/team-tool/run.ts +53 -10
  56. package/src/extension/team-tool/status.ts +12 -1
  57. package/src/extension/team-tool-types.ts +2 -0
  58. package/src/extension/team-tool.ts +32 -11
  59. package/src/observability/event-to-metric.ts +8 -1
  60. package/src/runtime/background-runner.ts +10 -4
  61. package/src/runtime/cancellation.ts +51 -0
  62. package/src/runtime/child-pi.ts +17 -4
  63. package/src/runtime/crash-recovery.ts +1 -0
  64. package/src/runtime/crew-agent-records.ts +41 -1
  65. package/src/runtime/deadletter.ts +1 -0
  66. package/src/runtime/delivery-coordinator.ts +174 -142
  67. package/src/runtime/effectiveness.ts +76 -0
  68. package/src/runtime/live-agent-control.ts +2 -1
  69. package/src/runtime/live-agent-manager.ts +20 -2
  70. package/src/runtime/live-control-realtime.ts +1 -1
  71. package/src/runtime/live-session-runtime.ts +5 -1
  72. package/src/runtime/manifest-cache.ts +17 -2
  73. package/src/runtime/model-fallback.ts +6 -4
  74. package/src/runtime/overflow-recovery.ts +175 -156
  75. package/src/runtime/pi-args.ts +18 -3
  76. package/src/runtime/process-status.ts +5 -1
  77. package/src/runtime/retry-executor.ts +26 -9
  78. package/src/runtime/runtime-resolver.ts +22 -6
  79. package/src/runtime/skill-instructions.ts +222 -0
  80. package/src/runtime/stale-reconciler.ts +189 -179
  81. package/src/runtime/subagent-manager.ts +3 -0
  82. package/src/runtime/task-runner/capabilities.ts +78 -0
  83. package/src/runtime/task-runner/live-executor.ts +4 -0
  84. package/src/runtime/task-runner/prompt-builder.ts +3 -1
  85. package/src/runtime/task-runner/prompt-pipeline.ts +64 -0
  86. package/src/runtime/task-runner.ts +44 -5
  87. package/src/runtime/team-runner.ts +91 -19
  88. package/src/schema/config-schema.ts +1 -0
  89. package/src/schema/team-tool-schema.ts +3 -3
  90. package/src/state/active-run-registry.ts +165 -0
  91. package/src/state/contracts.ts +1 -1
  92. package/src/state/mailbox.ts +44 -4
  93. package/src/state/state-store.ts +51 -1
  94. package/src/state/types.ts +46 -2
  95. package/src/teams/team-config.ts +1 -0
  96. package/src/ui/crew-widget.ts +9 -4
  97. package/src/ui/dashboard-panes/mailbox-pane.ts +2 -1
  98. package/src/ui/dashboard-panes/progress-pane.ts +2 -0
  99. package/src/ui/powerbar-publisher.ts +1 -1
  100. package/src/ui/run-snapshot-cache.ts +66 -39
  101. package/src/ui/snapshot-types.ts +7 -0
  102. package/src/utils/paths.ts +4 -2
  103. package/src/workflows/workflow-config.ts +1 -0
@@ -9,6 +9,7 @@ import { readCrewAgents } from "../../runtime/crew-agent-records.ts";
9
9
  import { checkProcessLiveness, isActiveRunStatus } from "../../runtime/process-status.ts";
10
10
  import { formatTaskGraphLines, waitingReason } from "../../runtime/task-display.ts";
11
11
  import { verifyTaskCompletion, formatOutputPreview } from "../../runtime/completion-guard.ts";
12
+ import { evaluateRunEffectiveness } from "../../runtime/effectiveness.ts";
12
13
  import type { PiTeamsToolResult } from "../tool-result.ts";
13
14
  import { result, type TeamContext } from "./context.ts";
14
15
 
@@ -51,6 +52,10 @@ export function handleStatus(params: TeamToolParamsValue, ctx: TeamContext): PiT
51
52
  groupJoinLines.push(`- ${String(message.data?.partial) === "true" ? "partial" : "completed"} request=${requestId} message=${message.id} ack=${timedOut ? "timeout" : ack}`);
52
53
  }
53
54
  const totalUsage = aggregateUsage(tasks);
55
+ const completedTasks = tasks.filter((task) => task.status === "completed");
56
+ const effectiveness = evaluateRunEffectiveness({ manifest, tasks, executeWorkers: manifest.runtimeResolution?.kind !== "scaffold", runtimeConfig: loadConfig(ctx.cwd).config.runtime });
57
+ const noObservedWorkTasks = effectiveness.noObservedWorkTaskIds.map((id) => tasks.find((task) => task.id === id)).filter((task): task is typeof tasks[number] => task !== undefined);
58
+ const attentionTasks = effectiveness.needsAttentionTaskIds.map((id) => tasks.find((task) => task.id === id)).filter((task): task is typeof tasks[number] => task !== undefined);
54
59
  const activeAgents = crewAgents.filter((agent) => agent.status === "running");
55
60
  const completedAgents = crewAgents.filter((agent) => agent.status !== "running");
56
61
  const waitingTasks = tasks.filter((task) => task.status === "queued" || task.status === "waiting");
@@ -61,6 +66,7 @@ export function handleStatus(params: TeamToolParamsValue, ctx: TeamContext): PiT
61
66
  `Workflow: ${manifest.workflow ?? "(none)"}`,
62
67
  `Status: ${manifest.status}`,
63
68
  `Workspace mode: ${manifest.workspaceMode}`,
69
+ ...(manifest.runtimeResolution ? [`Runtime: ${manifest.runtimeResolution.kind}`, `Runtime safety: ${manifest.runtimeResolution.safety}`, `Runtime requested: ${manifest.runtimeResolution.requestedMode}${manifest.runtimeResolution.reason ? ` (${manifest.runtimeResolution.reason})` : ""}`] : []),
64
70
  `Goal: ${manifest.goal}`,
65
71
  `Created: ${manifest.createdAt}`,
66
72
  `Updated: ${manifest.updatedAt}`,
@@ -72,7 +78,12 @@ export function handleStatus(params: TeamToolParamsValue, ctx: TeamContext): PiT
72
78
  "Tasks:",
73
79
  ...(tasks.length ? tasks.map((task) => `- ${task.id} [${task.status}] ${task.role} -> ${task.agent}${task.taskPacket ? ` scope=${task.taskPacket.scope}` : ""}${task.verification ? ` green=${task.verification.observedGreenLevel}/${task.verification.requiredGreenLevel}` : ""}${task.modelAttempts?.length ? ` attempts=${task.modelAttempts.length}` : ""}${task.modelRouting ? ` modelRouting=${task.modelRouting.requested ? `${task.modelRouting.requested}->` : ""}${task.modelRouting.resolved}${task.modelRouting.usedAttempt ? ` attempt=${task.modelRouting.usedAttempt + 1}` : ""}` : ""}${task.agentProgress?.activityState ? ` activityState=${task.agentProgress.activityState}` : ""}${attentionByTask.get(task.id)?.data?.reason ? ` attention=${String(attentionByTask.get(task.id)?.data?.reason)}` : ""}${task.jsonEvents !== undefined ? ` jsonEvents=${task.jsonEvents}` : ""}${task.usage ? ` usage=${JSON.stringify(task.usage)}` : ""}${task.resultArtifact ? ` result=${task.resultArtifact.path}` : ""}${task.transcriptArtifact ? ` transcript=${task.transcriptArtifact.path}` : ""}${task.worktree ? ` worktree=${task.worktree.path}` : ""}${task.error ? ` error=${task.error}` : ""}`) : ["- (none)"]),
74
80
  `Task counts: ${[...counts.entries()].map(([status, count]) => `${status}=${count}`).join(", ") || "none"}`,
75
- "Completion verification:",
81
+ "Effectiveness:",
82
+ `- observable=${effectiveness.observable}/${Math.max(1, effectiveness.completed)} completed tasks`,
83
+ `- workerExecution=${effectiveness.workerExecution} guard=${effectiveness.guardMode} severity=${effectiveness.severity}`,
84
+ `- noObservedWork=${effectiveness.noObservedWorkTaskIds.length ? effectiveness.noObservedWorkTaskIds.join(",") : "none"}`,
85
+ `- needsAttention=${effectiveness.needsAttentionTaskIds.length ? effectiveness.needsAttentionTaskIds.join(",") : "none"}`,
86
+ "Completion verification",
76
87
  ...(tasks.filter((t) => t.status === "completed").length ? tasks.filter((t) => t.status === "completed").map((t) => {
77
88
  const guard = verifyTaskCompletion(t, manifest);
78
89
  return `- ${t.id} green=${guard.greenLevel}/3${guard.warnings.length ? ` warnings=[${guard.warnings.join(", ")}]` : ""}`;
@@ -6,5 +6,7 @@ export interface TeamToolDetails {
6
6
  abortedIds?: string[];
7
7
  missingIds?: string[];
8
8
  foreignIds?: string[];
9
+ intent?: string;
9
10
  resumedIds?: string[];
11
+ mailboxIds?: string[];
10
12
  }
@@ -29,14 +29,14 @@ import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../stat
29
29
  import { executeTeamRun } from "../runtime/team-runner.ts";
30
30
  import { checkProcessLiveness, isActiveRunStatus } from "../runtime/process-status.ts";
31
31
  import { saveCrewAgents, readCrewAgents, recordFromTask } from "../runtime/crew-agent-records.ts";
32
- import { resolveCrewRuntime } from "../runtime/runtime-resolver.ts";
32
+ import { resolveCrewRuntime, runtimeResolutionState } from "../runtime/runtime-resolver.ts";
33
33
  import { applyAttentionState, formatActivityAge, resolveCrewControlConfig } from "../runtime/agent-control.ts";
34
34
  import { writeForegroundInterruptRequest } from "../runtime/foreground-control.ts";
35
35
  import { formatTaskGraphLines, waitingReason } from "../runtime/task-display.ts";
36
36
  import { directTeamAndWorkflowFromRun } from "../runtime/direct-run.ts";
37
37
  import { parsePiJsonOutput } from "../runtime/pi-json-output.ts";
38
38
  import { buildParentContext, configRecord, formatScoped, result, type TeamContext } from "./team-tool/context.ts";
39
- import { autonomousPatchFromConfig, configPatchFromConfig, formatAutonomyStatus } from "./team-tool/config-patch.ts";
39
+ import { autonomousPatchFromConfig, configPatchFromConfig, effectiveRunConfig, formatAutonomyStatus } from "./team-tool/config-patch.ts";
40
40
  import { handleApi } from "./team-tool/api.ts";
41
41
  import { handleRun } from "./team-tool/run.ts";
42
42
  import { handleDoctor } from "./team-tool/doctor.ts";
@@ -47,6 +47,7 @@ import { handleCancel } from "./team-tool/cancel.ts";
47
47
  import { handleRespond } from "./team-tool/respond.ts";
48
48
  import { handlePlan } from "./team-tool/plan.ts";
49
49
  import { logInternalError } from "../utils/internal-error.ts";
50
+ import { normalizeSkillOverride } from "../runtime/skill-instructions.ts";
50
51
 
51
52
  export type { TeamToolDetails } from "./team-tool-types.ts";
52
53
  export type { TeamContext } from "./team-tool/context.ts";
@@ -176,18 +177,37 @@ export async function handleResume(params: TeamToolParamsValue, ctx: TeamContext
176
177
  const workflow = direct?.workflow ?? allWorkflows(discoverWorkflows(ctx.cwd)).find((candidate) => candidate.name === loaded.manifest.workflow);
177
178
  if (!workflow) return result(`Workflow '${loaded.manifest.workflow}' not found.`, { action: "resume", status: "error" }, true);
178
179
  return await withRunLock(loaded.manifest, async () => {
180
+ const loadedConfig = loadConfig(ctx.cwd);
179
181
  const recovered = recoverCheckpointedTasks(loaded.manifest, loaded.tasks);
180
182
  const resumeManifest = recovered.manifest;
183
+ const executedConfig = effectiveRunConfig(loadedConfig.config, params.config);
184
+ const runtime = await resolveCrewRuntime(executedConfig);
185
+ const runtimeResolution = runtimeResolutionState(runtime);
186
+ const runtimeManifest = { ...resumeManifest, runtimeResolution, updatedAt: new Date().toISOString() };
187
+ saveRunManifest(runtimeManifest);
188
+ appendEvent(runtimeManifest.eventsPath, { type: "runtime.resolved", runId: runtimeManifest.runId, message: `Runtime resolved for resume: ${runtime.kind} safety=${runtime.safety}`, data: { runtimeResolution, action: "resume" } });
189
+ if (runtime.safety === "blocked") {
190
+ const runningManifest = updateRunStatus(runtimeManifest, "running", "Checking worker runtime availability before resume.");
191
+ const blocked = updateRunStatus(runningManifest, "blocked", runtime.reason ?? "Child worker execution is disabled; refusing to resume with no-op scaffold subagents.");
192
+ appendEvent(blocked.eventsPath, { type: "run.blocked", runId: blocked.runId, message: blocked.summary, data: { runtime, action: "resume" } });
193
+ return result([
194
+ `Blocked resume for pi-crew run ${blocked.runId}: real subagent workers are disabled.`,
195
+ `Runtime: ${runtime.kind} (requested ${runtime.requestedMode})`,
196
+ runtime.reason ?? "Child worker execution is disabled.",
197
+ "",
198
+ "To resume effective subagents, remove executeWorkers=false / PI_CREW_EXECUTE_WORKERS=0 / PI_TEAMS_EXECUTE_WORKERS=0 or set runtime.mode=child-process.",
199
+ "Use runtime.mode=scaffold only for explicit dry-run prompt/artifact generation.",
200
+ ].join("\n"), { action: "resume", status: "error", runId: blocked.runId, artifactsRoot: blocked.artifactsRoot }, true);
201
+ }
181
202
  const resetTasks = recovered.tasks.map((task) => task.status === "failed" || task.status === "cancelled" || task.status === "skipped" || task.status === "running" ? { ...task, status: "queued" as const, error: undefined, startedAt: undefined, finishedAt: undefined, claim: undefined } : task);
182
- saveRunTasks(resumeManifest, resetTasks);
183
- const replay = replayPendingMailboxMessages(resumeManifest);
184
- appendEvent(resumeManifest.eventsPath, { type: "run.resume_requested", runId: resumeManifest.runId, data: { replayedMailboxMessages: replay.messages.length, recoveredCheckpointTasks: recovered.recovered } });
185
- if (recovered.recovered.length) appendEvent(resumeManifest.eventsPath, { type: "task.checkpoint_recovered", runId: resumeManifest.runId, message: `Recovered ${recovered.recovered.length} task(s) from artifact-written checkpoints.`, data: { taskIds: recovered.recovered } });
186
- if (replay.messages.length) appendEvent(resumeManifest.eventsPath, { type: "mailbox.replayed", runId: resumeManifest.runId, message: `Replayed ${replay.messages.length} pending inbox message(s).`, data: { messageIds: replay.messages.map((message) => message.id), taskIds: replay.messages.map((message) => message.taskId).filter(Boolean) } });
187
- const loadedConfig = loadConfig(ctx.cwd);
188
- const runtime = await resolveCrewRuntime(loadedConfig.config);
203
+ saveRunTasks(runtimeManifest, resetTasks);
204
+ const replay = replayPendingMailboxMessages(runtimeManifest);
205
+ appendEvent(runtimeManifest.eventsPath, { type: "run.resume_requested", runId: runtimeManifest.runId, data: { replayedMailboxMessages: replay.messages.length, recoveredCheckpointTasks: recovered.recovered } });
206
+ if (recovered.recovered.length) appendEvent(runtimeManifest.eventsPath, { type: "task.checkpoint_recovered", runId: runtimeManifest.runId, message: `Recovered ${recovered.recovered.length} task(s) from artifact-written checkpoints.`, data: { taskIds: recovered.recovered } });
207
+ if (replay.messages.length) appendEvent(runtimeManifest.eventsPath, { type: "mailbox.replayed", runId: runtimeManifest.runId, message: `Replayed ${replay.messages.length} pending inbox message(s).`, data: { messageIds: replay.messages.map((message) => message.id), taskIds: replay.messages.map((message) => message.taskId).filter(Boolean) } });
189
208
  const executeWorkers = runtime.kind !== "scaffold";
190
- const executed = await executeTeamRun({ manifest: resumeManifest, tasks: resetTasks, team, workflow, agents, executeWorkers, limits: loadedConfig.config.limits, runtime, runtimeConfig: loadedConfig.config.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal: ctx.signal, reliability: loadedConfig.config.reliability, metricRegistry: ctx.metricRegistry });
209
+ const resumeSkillOverride = normalizeSkillOverride(params.skill) ?? runtimeManifest.skillOverride;
210
+ const executed = await executeTeamRun({ manifest: runtimeManifest, tasks: resetTasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, skillOverride: resumeSkillOverride, signal: ctx.signal, reliability: executedConfig.reliability, metricRegistry: ctx.metricRegistry });
191
211
  return result([`Resumed run ${executed.manifest.runId}.`, `Status: ${executed.manifest.status}`, `Tasks: ${executed.tasks.length}`, `Artifacts: ${executed.manifest.artifactsRoot}`].join("\n"), { action: "resume", status: executed.manifest.status === "failed" ? "error" : "ok", runId: executed.manifest.runId, artifactsRoot: executed.manifest.artifactsRoot }, executed.manifest.status === "failed");
192
212
  });
193
213
  }
@@ -199,7 +219,7 @@ export async function handleTeamTool(params: TeamToolParamsValue, ctx: TeamConte
199
219
  case "get": return handleGet(params, ctx);
200
220
  case "init": {
201
221
  const cfg = configRecord(params.config);
202
- const initialized = initializeProject(ctx.cwd, { copyBuiltins: cfg.copyBuiltins === true, overwrite: cfg.overwrite === true });
222
+ const initialized = initializeProject(ctx.cwd, { copyBuiltins: cfg.copyBuiltins === true, overwrite: cfg.overwrite === true, configScope: cfg.configScope === "project" || cfg.scope === "project" ? "project" : cfg.configScope === "none" || cfg.scope === "none" ? "none" : "global" });
203
223
  return result([
204
224
  "Initialized pi-crew project layout.",
205
225
  "Directories:",
@@ -207,6 +227,7 @@ export async function handleTeamTool(params: TeamToolParamsValue, ctx: TeamConte
207
227
  "Copied builtin files:",
208
228
  ...(initialized.copiedFiles.length ? initialized.copiedFiles.map((file) => `- ${file}`) : ["- (none)"]),
209
229
  ...(initialized.skippedFiles.length ? ["Skipped existing files:", ...initialized.skippedFiles.map((file) => `- ${file}`)] : []),
230
+ `Config: ${initialized.configPath || "(none)"} (${initialized.configScope}${initialized.configCreated ? "; created" : initialized.configSkipped ? "; already existed" : "; unchanged"})`,
210
231
  `Gitignore: ${initialized.gitignorePath} (${initialized.gitignoreUpdated ? "updated" : "already configured"})`,
211
232
  ].join("\n"), { action: "init", status: "ok" });
212
233
  }
@@ -13,6 +13,13 @@ function numberValue(value: unknown, fallback = 0): number {
13
13
  return typeof value === "number" && Number.isFinite(value) ? value : fallback;
14
14
  }
15
15
 
16
+ const CANCELLATION_REASON_LABELS = new Set(["caller_cancelled", "leader_interrupted", "provider_timeout", "worker_timeout", "tool_timeout", "shutdown", "unknown"]);
17
+
18
+ function cancellationReasonLabel(value: unknown): string {
19
+ const raw = stringValue(value, "unknown");
20
+ return CANCELLATION_REASON_LABELS.has(raw) ? raw : "unknown";
21
+ }
22
+
16
23
  export interface EventToMetricSubscription {
17
24
  dispose(): void;
18
25
  }
@@ -36,7 +43,7 @@ export function wireEventToMetrics(events: ExtensionAPI["events"] | undefined, r
36
43
  const handlers: Array<[string, (data: unknown) => void]> = [
37
44
  ["crew.run.completed", (data) => { const item = recordValue(data); runCount.inc({ status: "completed" }); runDuration.observe({ team: stringValue(item.team, "unknown") }, numberValue(item.durationMs)); }],
38
45
  ["crew.run.failed", () => runCount.inc({ status: "failed" })],
39
- ["crew.run.cancelled", () => runCount.inc({ status: "cancelled" })],
46
+ ["crew.run.cancelled", (data) => { const item = recordValue(data); runCount.inc({ status: "cancelled", reason: cancellationReasonLabel(item.reason) }); }],
40
47
  ["crew.task.completed", (data) => { const item = recordValue(data); taskCount.inc({ status: "completed" }); taskDuration.observe({ role: stringValue(item.role, "unknown") }, numberValue(item.durationMs)); tokenUsage.observe({ role: stringValue(item.role, "unknown") }, numberValue(item.tokens)); }],
41
48
  ["crew.task.failed", () => taskCount.inc({ status: "failed" })],
42
49
  ["crew.task.retry_attempt", (data) => { const item = recordValue(data); taskCount.inc({ status: "retry" }); retryAttemptCount.inc({ runId: stringValue(item.runId, "unknown"), taskId: stringValue(item.taskId, "unknown") }); }],
@@ -1,11 +1,11 @@
1
1
  import { allAgents, discoverAgents } from "../agents/discover-agents.ts";
2
2
  import { allTeams, discoverTeams } from "../teams/discover-teams.ts";
3
3
  import { appendEvent } from "../state/event-log.ts";
4
- import { loadRunManifestById, updateRunStatus } from "../state/state-store.ts";
4
+ import { loadRunManifestById, saveRunManifest, updateRunStatus } from "../state/state-store.ts";
5
5
  import { allWorkflows, discoverWorkflows } from "../workflows/discover-workflows.ts";
6
6
  import { loadConfig } from "../config/config.ts";
7
7
  import { executeTeamRun } from "./team-runner.ts";
8
- import { resolveCrewRuntime } from "./runtime-resolver.ts";
8
+ import { resolveCrewRuntime, runtimeResolutionState } from "./runtime-resolver.ts";
9
9
  import { directTeamAndWorkflowFromRun } from "./direct-run.ts";
10
10
  import { expandParallelResearchWorkflow } from "./parallel-research.ts";
11
11
  import { writeAsyncStartMarker } from "./async-marker.ts";
@@ -36,9 +36,15 @@ async function main(): Promise<void> {
36
36
  if (!baseWorkflow) throw new Error(`Workflow '${manifest.workflow ?? ""}' not found.`);
37
37
  const workflow = expandParallelResearchWorkflow(baseWorkflow, cwd);
38
38
  const loadedConfig = loadConfig(cwd);
39
- const runtime = await resolveCrewRuntime(loadedConfig.config);
39
+ const runConfig = manifest.runConfig && typeof manifest.runConfig === "object" && !Array.isArray(manifest.runConfig) ? manifest.runConfig as typeof loadedConfig.config : loadedConfig.config;
40
+ const runtime = manifest.runtimeResolution ? { kind: manifest.runtimeResolution.kind, requestedMode: manifest.runtimeResolution.requestedMode, available: manifest.runtimeResolution.available, fallback: manifest.runtimeResolution.fallback, steer: manifest.runtimeResolution.kind === "live-session", resume: manifest.runtimeResolution.kind === "live-session", liveToolActivity: manifest.runtimeResolution.kind === "live-session", transcript: manifest.runtimeResolution.kind !== "scaffold", reason: manifest.runtimeResolution.reason, safety: manifest.runtimeResolution.safety } : await resolveCrewRuntime(runConfig);
41
+ const runtimeResolution = manifest.runtimeResolution ?? runtimeResolutionState(runtime);
42
+ manifest = { ...manifest, runtimeResolution, runConfig, updatedAt: new Date().toISOString() };
43
+ saveRunManifest(manifest);
44
+ appendEvent(manifest.eventsPath, { type: "runtime.resolved", runId: manifest.runId, message: `Runtime resolved: ${runtime.kind} safety=${runtime.safety}`, data: { runtimeResolution, async: true } });
45
+ if (runtime.safety === "blocked") throw new Error(runtime.reason ?? "Child worker execution is disabled; refusing to create no-op scaffold subagents.");
40
46
  const executeWorkers = runtime.kind !== "scaffold";
41
- const result = await executeTeamRun({ manifest, tasks, team, workflow, agents, executeWorkers, limits: loadedConfig.config.limits, runtime, runtimeConfig: loadedConfig.config.runtime });
47
+ const result = await executeTeamRun({ manifest, tasks, team, workflow, agents, executeWorkers, limits: runConfig.limits, runtime, runtimeConfig: runConfig.runtime, skillOverride: manifest.skillOverride, reliability: runConfig.reliability });
42
48
  manifest = result.manifest;
43
49
  tasks = result.tasks;
44
50
  appendEvent(manifest.eventsPath, { type: "async.completed", runId: manifest.runId, data: { status: manifest.status, tasks: tasks.length } });
@@ -0,0 +1,51 @@
1
+ export type CancellationReasonCode = "caller_cancelled" | "leader_interrupted" | "provider_timeout" | "worker_timeout" | "tool_timeout" | "shutdown" | "unknown";
2
+
3
+ export interface CancellationReason {
4
+ code: CancellationReasonCode;
5
+ message: string;
6
+ cause?: unknown;
7
+ }
8
+
9
+ const KNOWN_CODES: ReadonlySet<string> = new Set(["caller_cancelled", "leader_interrupted", "provider_timeout", "worker_timeout", "tool_timeout", "shutdown", "unknown"]);
10
+
11
+ export class CrewCancellationError extends Error {
12
+ readonly reason: CancellationReason;
13
+
14
+ constructor(reason: CancellationReason) {
15
+ super(reason.message);
16
+ this.name = "CrewCancellationError";
17
+ this.reason = reason;
18
+ }
19
+ }
20
+
21
+ function reasonFromString(value: string): CancellationReason {
22
+ const trimmed = value.trim();
23
+ if (KNOWN_CODES.has(trimmed)) return { code: trimmed as CancellationReasonCode, message: `Cancelled: ${trimmed}` };
24
+ return { code: "caller_cancelled", message: trimmed || "Cancelled by caller." };
25
+ }
26
+
27
+ export function cancellationReasonFromUnknown(value: unknown): CancellationReason {
28
+ if (value instanceof CrewCancellationError) return value.reason;
29
+ if (value instanceof Error) return { code: "caller_cancelled", message: value.message || "Cancelled by caller.", cause: value };
30
+ if (typeof value === "string") return reasonFromString(value);
31
+ if (value && typeof value === "object" && !Array.isArray(value)) {
32
+ const record = value as { code?: unknown; reason?: unknown; message?: unknown; cause?: unknown };
33
+ const rawCode = typeof record.code === "string" ? record.code : typeof record.reason === "string" ? record.reason : undefined;
34
+ const code = rawCode && KNOWN_CODES.has(rawCode) ? rawCode as CancellationReasonCode : "caller_cancelled";
35
+ const message = typeof record.message === "string" && record.message.trim() ? record.message.trim() : `Cancelled: ${code}`;
36
+ return { code, message, cause: record.cause ?? value };
37
+ }
38
+ return { code: "caller_cancelled", message: "Cancelled by caller." };
39
+ }
40
+
41
+ export function cancellationReasonFromSignal(signal: AbortSignal | undefined): CancellationReason {
42
+ return cancellationReasonFromUnknown(signal?.reason);
43
+ }
44
+
45
+ export function cancellationErrorFromSignal(signal: AbortSignal | undefined): CrewCancellationError {
46
+ return new CrewCancellationError(cancellationReasonFromSignal(signal));
47
+ }
48
+
49
+ export function throwIfCancelled(signal: AbortSignal | undefined): void {
50
+ if (signal?.aborted) throw cancellationErrorFromSignal(signal);
51
+ }
@@ -2,6 +2,7 @@ import { spawn, type ChildProcess, type SpawnOptions } from "node:child_process"
2
2
  import * as fs from "node:fs";
3
3
  import * as path from "node:path";
4
4
  import type { AgentConfig } from "../agents/agent-config.ts";
5
+ import type { WorkerExitStatus } from "../state/types.ts";
5
6
  import { buildPiWorkerArgs, checkCrewDepth, cleanupTempDir } from "./pi-args.ts";
6
7
  import { getPiSpawnCommand } from "./pi-spawn.ts";
7
8
  import { DEFAULT_CHILD_PI } from "../config/defaults.ts";
@@ -88,6 +89,7 @@ export interface ChildPiRunInput {
88
89
  task: string;
89
90
  agent: AgentConfig;
90
91
  model?: string;
92
+ skillPaths?: string[];
91
93
  signal?: AbortSignal;
92
94
  transcriptPath?: string;
93
95
  onStdoutLine?: (line: string) => void;
@@ -104,6 +106,7 @@ export interface ChildPiRunResult {
104
106
  stdout: string;
105
107
  stderr: string;
106
108
  error?: string;
109
+ exitStatus?: WorkerExitStatus;
107
110
  }
108
111
 
109
112
  export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): SpawnOptions {
@@ -283,7 +286,7 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
283
286
  if (mock === "retryable-failure") return { exitCode: 1, stdout: "", stderr: "rate limit: mock failure" };
284
287
  return { exitCode: 1, stdout: "", stderr: `mock failure: ${mock}` };
285
288
  }
286
- const built = buildPiWorkerArgs({ task: input.task, agent: input.agent, model: input.model, sessionEnabled: false, maxDepth: input.maxDepth });
289
+ const built = buildPiWorkerArgs({ task: input.task, agent: input.agent, model: input.model, sessionEnabled: false, maxDepth: input.maxDepth, skillPaths: input.skillPaths });
287
290
  const spawnSpec = getPiSpawnCommand(built.args);
288
291
  try {
289
292
  return await new Promise<ChildPiRunResult>((resolve) => {
@@ -306,6 +309,9 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
306
309
  const responseTimeoutMs = Number.isFinite(responseTimeoutEnv) && responseTimeoutEnv >= 0 ? responseTimeoutEnv : input.responseTimeoutMs ?? RESPONSE_TIMEOUT_MS;
307
310
  let responseTimeoutHit = false;
308
311
  let forcedFinalDrain = false;
312
+ let abortRequested = input.signal?.aborted === true;
313
+ let hardKilled = false;
314
+ const cleanupErrors: string[] = [];
309
315
  const restartNoResponseTimer = (): void => {
310
316
  if (responseTimeoutMs <= 0) return;
311
317
  if (noResponseTimer) clearTimeout(noResponseTimer);
@@ -347,6 +353,7 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
347
353
  hardKillTimer = setTimeout(() => {
348
354
  if (settled || childExited) return;
349
355
  try {
356
+ hardKilled = true;
350
357
  child.kill(process.platform === "win32" ? undefined : "SIGKILL");
351
358
  } catch (error) {
352
359
  logInternalError("child-pi.final-drain-kill", error, `pid=${child.pid}`);
@@ -382,11 +389,16 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
382
389
  clearChildPiTimeouts();
383
390
  lineObserver.flush();
384
391
  input.signal?.removeEventListener("abort", abort);
385
- cleanupTempDir(built.tempDir);
386
- resolve(result);
392
+ try {
393
+ cleanupTempDir(built.tempDir);
394
+ } catch (error) {
395
+ cleanupErrors.push(error instanceof Error ? error.message : String(error));
396
+ }
397
+ resolve({ ...result, exitStatus: result.exitStatus ?? { exitCode: result.exitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
387
398
  };
388
399
 
389
400
  const abort = (): void => {
401
+ abortRequested = true;
390
402
  killProcessTree(child.pid, child);
391
403
  if (process.platform !== "win32") {
392
404
  trySignalChild(child, "SIGTERM");
@@ -431,11 +443,12 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
431
443
  clearHardKillTimer(child.pid);
432
444
  }
433
445
  const timeoutError = responseTimeoutHit && !stderr.trim() ? { error: `Child Pi produced no new output for ${responseTimeoutMs}ms; process was terminated as unresponsive.` } : undefined;
446
+ const finalExitCode = forcedFinalDrain && !timeoutError ? 0 : exitCode;
434
447
  // A final assistant event is the child Pi contract for "the worker produced its answer".
435
448
  // Some Pi processes can linger during post-final cleanup/stdio shutdown; finalDrain terminates
436
449
  // that lingering process so the parent can continue, but it must not turn a completed
437
450
  // subagent answer into a failed task. Real pre-final response timeouts still report errors.
438
- settle({ exitCode: forcedFinalDrain && !timeoutError ? 0 : exitCode, stdout, stderr, ...(timeoutError ? { error: timeoutError.error } : {}) });
451
+ settle({ exitCode: finalExitCode, stdout, stderr, ...(timeoutError ? { error: timeoutError.error } : {}), exitStatus: { exitCode: finalExitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
439
452
  });
440
453
  });
441
454
  } finally {
@@ -75,6 +75,7 @@ export function reconcileAllStaleRuns(cwd: string, manifestCache: ManifestCache,
75
75
  if (!fresh || fresh.manifest.status !== "running") return;
76
76
  const result = reconcileStaleRun(fresh.manifest, fresh.tasks, now);
77
77
  if (result.repaired) {
78
+ if (result.repairedTasks) saveRunTasks(fresh.manifest, result.repairedTasks);
78
79
  updateRunStatus(fresh.manifest, "failed", `Stale run reconciled: ${result.detail}`);
79
80
  appendEvent(fresh.manifest.eventsPath, { type: "crew.run.reconciled_stale", runId: manifest.runId, message: result.detail, data: { verdict: result.verdict } });
80
81
  }
@@ -61,6 +61,23 @@ export function agentOutputPath(manifest: TeamRunManifest, taskId: string): stri
61
61
  }
62
62
 
63
63
  const AGENT_READER_TTL_MS = 200;
64
+ const ASYNC_AGENT_READER_CACHE_MAX_ENTRIES = 128;
65
+
66
+ const asyncAgentReaderCache = new Map<string, { expiresAt: number; records: CrewAgentRecord[]; inFlight?: Promise<CrewAgentRecord[]> }>();
67
+
68
+ function setAsyncAgentReaderCache(filePath: string, entry: { expiresAt: number; records: CrewAgentRecord[]; inFlight?: Promise<CrewAgentRecord[]> }): void {
69
+ const now = Date.now();
70
+ for (const [key, cached] of asyncAgentReaderCache) {
71
+ if (cached.expiresAt <= now && !cached.inFlight) asyncAgentReaderCache.delete(key);
72
+ }
73
+ if (asyncAgentReaderCache.has(filePath)) asyncAgentReaderCache.delete(filePath);
74
+ asyncAgentReaderCache.set(filePath, entry);
75
+ while (asyncAgentReaderCache.size > ASYNC_AGENT_READER_CACHE_MAX_ENTRIES) {
76
+ const oldest = asyncAgentReaderCache.keys().next().value;
77
+ if (!oldest) break;
78
+ asyncAgentReaderCache.delete(oldest);
79
+ }
80
+ }
64
81
 
65
82
  export function readCrewAgents(manifest: TeamRunManifest): CrewAgentRecord[] {
66
83
  try {
@@ -70,9 +87,32 @@ export function readCrewAgents(manifest: TeamRunManifest): CrewAgentRecord[] {
70
87
  }
71
88
  }
72
89
 
90
+ export async function readCrewAgentsAsync(manifest: TeamRunManifest): Promise<CrewAgentRecord[]> {
91
+ const filePath = agentsPath(manifest);
92
+ const now = Date.now();
93
+ const cached = asyncAgentReaderCache.get(filePath);
94
+ if (cached && cached.expiresAt > now) return cached.records;
95
+ if (cached?.inFlight) return cached.inFlight;
96
+ const inFlight = (async (): Promise<CrewAgentRecord[]> => {
97
+ try {
98
+ const parsed = JSON.parse(await fs.promises.readFile(filePath, "utf-8")) as unknown;
99
+ const records = Array.isArray(parsed) ? redactSecrets(parsed) as CrewAgentRecord[] : [];
100
+ setAsyncAgentReaderCache(filePath, { expiresAt: Date.now() + AGENT_READER_TTL_MS, records });
101
+ return records;
102
+ } catch {
103
+ setAsyncAgentReaderCache(filePath, { expiresAt: Date.now() + AGENT_READER_TTL_MS, records: [] });
104
+ return [];
105
+ }
106
+ })();
107
+ setAsyncAgentReaderCache(filePath, { expiresAt: now + AGENT_READER_TTL_MS, records: cached?.records ?? [], inFlight });
108
+ return inFlight;
109
+ }
110
+
73
111
  export function saveCrewAgents(manifest: TeamRunManifest, records: CrewAgentRecord[]): void {
74
112
  fs.mkdirSync(manifest.stateRoot, { recursive: true });
75
- atomicWriteJson(agentsPath(manifest), redactSecrets(records));
113
+ const filePath = agentsPath(manifest);
114
+ atomicWriteJson(filePath, redactSecrets(records));
115
+ asyncAgentReaderCache.delete(filePath);
76
116
  for (const record of records) writeCrewAgentStatus(manifest, record);
77
117
  }
78
118
 
@@ -12,6 +12,7 @@ export interface DeadletterEntry {
12
12
  reason: DeadletterReason;
13
13
  attempts: number;
14
14
  lastError?: string;
15
+ attemptId?: string;
15
16
  timestamp: string;
16
17
  }
17
18