pi-crew 0.1.44 → 0.1.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +5 -5
- package/agents/analyst.md +11 -11
- package/agents/critic.md +11 -11
- package/agents/executor.md +11 -11
- package/agents/explorer.md +11 -11
- package/agents/planner.md +11 -11
- package/agents/reviewer.md +11 -11
- package/agents/security-reviewer.md +11 -11
- package/agents/test-engineer.md +11 -11
- package/agents/verifier.md +11 -11
- package/agents/writer.md +11 -11
- package/docs/next-upgrade-roadmap.md +733 -0
- package/docs/research-awesome-agent-skills-distillation.md +100 -0
- package/docs/research-oh-my-pi-distillation.md +322 -0
- package/docs/source-runtime-refactor-map.md +24 -0
- package/docs/usage.md +3 -3
- package/install.mjs +52 -8
- package/package.json +1 -1
- package/schema.json +2 -1
- package/skills/async-worker-recovery/SKILL.md +42 -0
- package/skills/context-artifact-hygiene/SKILL.md +52 -0
- package/skills/delegation-patterns/SKILL.md +54 -0
- package/skills/mailbox-interactive/SKILL.md +40 -0
- package/skills/model-routing-context/SKILL.md +39 -0
- package/skills/multi-perspective-review/SKILL.md +58 -0
- package/skills/observability-reliability/SKILL.md +41 -0
- package/skills/ownership-session-security/SKILL.md +41 -0
- package/skills/pi-extension-lifecycle/SKILL.md +39 -0
- package/skills/requirements-to-task-packet/SKILL.md +63 -0
- package/skills/resource-discovery-config/SKILL.md +41 -0
- package/skills/runtime-state-reader/SKILL.md +44 -0
- package/skills/secure-agent-orchestration-review/SKILL.md +45 -0
- package/skills/state-mutation-locking/SKILL.md +42 -0
- package/skills/systematic-debugging/SKILL.md +67 -0
- package/skills/ui-render-performance/SKILL.md +39 -0
- package/skills/verification-before-done/SKILL.md +57 -0
- package/skills/worktree-isolation/SKILL.md +39 -0
- package/src/agents/discover-agents.ts +12 -11
- package/src/config/config.ts +48 -24
- package/src/config/defaults.ts +14 -0
- package/src/extension/project-init.ts +62 -2
- package/src/extension/register.ts +19 -10
- package/src/extension/registration/commands.ts +49 -26
- package/src/extension/registration/subagent-helpers.ts +8 -0
- package/src/extension/registration/subagent-tools.ts +2 -1
- package/src/extension/registration/team-tool.ts +28 -8
- package/src/extension/run-index.ts +13 -5
- package/src/extension/run-maintenance.ts +22 -3
- package/src/extension/team-tool/api.ts +25 -8
- package/src/extension/team-tool/cancel.ts +134 -102
- package/src/extension/team-tool/context.ts +6 -0
- package/src/extension/team-tool/lifecycle-actions.ts +17 -5
- package/src/extension/team-tool/respond.ts +103 -66
- package/src/extension/team-tool/run.ts +53 -10
- package/src/extension/team-tool/status.ts +12 -1
- package/src/extension/team-tool-types.ts +2 -0
- package/src/extension/team-tool.ts +32 -11
- package/src/observability/event-to-metric.ts +8 -1
- package/src/runtime/background-runner.ts +10 -4
- package/src/runtime/cancellation.ts +51 -0
- package/src/runtime/child-pi.ts +17 -4
- package/src/runtime/crash-recovery.ts +1 -0
- package/src/runtime/crew-agent-records.ts +41 -1
- package/src/runtime/deadletter.ts +1 -0
- package/src/runtime/delivery-coordinator.ts +174 -142
- package/src/runtime/effectiveness.ts +76 -0
- package/src/runtime/live-agent-control.ts +2 -1
- package/src/runtime/live-agent-manager.ts +20 -2
- package/src/runtime/live-control-realtime.ts +1 -1
- package/src/runtime/live-session-runtime.ts +5 -1
- package/src/runtime/manifest-cache.ts +17 -2
- package/src/runtime/model-fallback.ts +6 -4
- package/src/runtime/overflow-recovery.ts +175 -156
- package/src/runtime/pi-args.ts +18 -3
- package/src/runtime/process-status.ts +5 -1
- package/src/runtime/retry-executor.ts +26 -9
- package/src/runtime/runtime-resolver.ts +22 -6
- package/src/runtime/skill-instructions.ts +222 -0
- package/src/runtime/stale-reconciler.ts +189 -179
- package/src/runtime/subagent-manager.ts +3 -0
- package/src/runtime/task-runner/capabilities.ts +78 -0
- package/src/runtime/task-runner/live-executor.ts +4 -0
- package/src/runtime/task-runner/prompt-builder.ts +3 -1
- package/src/runtime/task-runner/prompt-pipeline.ts +64 -0
- package/src/runtime/task-runner.ts +44 -5
- package/src/runtime/team-runner.ts +91 -19
- package/src/schema/config-schema.ts +1 -0
- package/src/schema/team-tool-schema.ts +3 -3
- package/src/state/active-run-registry.ts +165 -0
- package/src/state/contracts.ts +1 -1
- package/src/state/mailbox.ts +44 -4
- package/src/state/state-store.ts +51 -1
- package/src/state/types.ts +46 -2
- package/src/teams/team-config.ts +1 -0
- package/src/ui/crew-widget.ts +9 -4
- package/src/ui/dashboard-panes/mailbox-pane.ts +2 -1
- package/src/ui/dashboard-panes/progress-pane.ts +2 -0
- package/src/ui/powerbar-publisher.ts +1 -1
- package/src/ui/run-snapshot-cache.ts +66 -39
- package/src/ui/snapshot-types.ts +7 -0
- package/src/utils/paths.ts +4 -2
- package/src/workflows/workflow-config.ts +1 -0
|
@@ -9,6 +9,7 @@ import { readCrewAgents } from "../../runtime/crew-agent-records.ts";
|
|
|
9
9
|
import { checkProcessLiveness, isActiveRunStatus } from "../../runtime/process-status.ts";
|
|
10
10
|
import { formatTaskGraphLines, waitingReason } from "../../runtime/task-display.ts";
|
|
11
11
|
import { verifyTaskCompletion, formatOutputPreview } from "../../runtime/completion-guard.ts";
|
|
12
|
+
import { evaluateRunEffectiveness } from "../../runtime/effectiveness.ts";
|
|
12
13
|
import type { PiTeamsToolResult } from "../tool-result.ts";
|
|
13
14
|
import { result, type TeamContext } from "./context.ts";
|
|
14
15
|
|
|
@@ -51,6 +52,10 @@ export function handleStatus(params: TeamToolParamsValue, ctx: TeamContext): PiT
|
|
|
51
52
|
groupJoinLines.push(`- ${String(message.data?.partial) === "true" ? "partial" : "completed"} request=${requestId} message=${message.id} ack=${timedOut ? "timeout" : ack}`);
|
|
52
53
|
}
|
|
53
54
|
const totalUsage = aggregateUsage(tasks);
|
|
55
|
+
const completedTasks = tasks.filter((task) => task.status === "completed");
|
|
56
|
+
const effectiveness = evaluateRunEffectiveness({ manifest, tasks, executeWorkers: manifest.runtimeResolution?.kind !== "scaffold", runtimeConfig: loadConfig(ctx.cwd).config.runtime });
|
|
57
|
+
const noObservedWorkTasks = effectiveness.noObservedWorkTaskIds.map((id) => tasks.find((task) => task.id === id)).filter((task): task is typeof tasks[number] => task !== undefined);
|
|
58
|
+
const attentionTasks = effectiveness.needsAttentionTaskIds.map((id) => tasks.find((task) => task.id === id)).filter((task): task is typeof tasks[number] => task !== undefined);
|
|
54
59
|
const activeAgents = crewAgents.filter((agent) => agent.status === "running");
|
|
55
60
|
const completedAgents = crewAgents.filter((agent) => agent.status !== "running");
|
|
56
61
|
const waitingTasks = tasks.filter((task) => task.status === "queued" || task.status === "waiting");
|
|
@@ -61,6 +66,7 @@ export function handleStatus(params: TeamToolParamsValue, ctx: TeamContext): PiT
|
|
|
61
66
|
`Workflow: ${manifest.workflow ?? "(none)"}`,
|
|
62
67
|
`Status: ${manifest.status}`,
|
|
63
68
|
`Workspace mode: ${manifest.workspaceMode}`,
|
|
69
|
+
...(manifest.runtimeResolution ? [`Runtime: ${manifest.runtimeResolution.kind}`, `Runtime safety: ${manifest.runtimeResolution.safety}`, `Runtime requested: ${manifest.runtimeResolution.requestedMode}${manifest.runtimeResolution.reason ? ` (${manifest.runtimeResolution.reason})` : ""}`] : []),
|
|
64
70
|
`Goal: ${manifest.goal}`,
|
|
65
71
|
`Created: ${manifest.createdAt}`,
|
|
66
72
|
`Updated: ${manifest.updatedAt}`,
|
|
@@ -72,7 +78,12 @@ export function handleStatus(params: TeamToolParamsValue, ctx: TeamContext): PiT
|
|
|
72
78
|
"Tasks:",
|
|
73
79
|
...(tasks.length ? tasks.map((task) => `- ${task.id} [${task.status}] ${task.role} -> ${task.agent}${task.taskPacket ? ` scope=${task.taskPacket.scope}` : ""}${task.verification ? ` green=${task.verification.observedGreenLevel}/${task.verification.requiredGreenLevel}` : ""}${task.modelAttempts?.length ? ` attempts=${task.modelAttempts.length}` : ""}${task.modelRouting ? ` modelRouting=${task.modelRouting.requested ? `${task.modelRouting.requested}->` : ""}${task.modelRouting.resolved}${task.modelRouting.usedAttempt ? ` attempt=${task.modelRouting.usedAttempt + 1}` : ""}` : ""}${task.agentProgress?.activityState ? ` activityState=${task.agentProgress.activityState}` : ""}${attentionByTask.get(task.id)?.data?.reason ? ` attention=${String(attentionByTask.get(task.id)?.data?.reason)}` : ""}${task.jsonEvents !== undefined ? ` jsonEvents=${task.jsonEvents}` : ""}${task.usage ? ` usage=${JSON.stringify(task.usage)}` : ""}${task.resultArtifact ? ` result=${task.resultArtifact.path}` : ""}${task.transcriptArtifact ? ` transcript=${task.transcriptArtifact.path}` : ""}${task.worktree ? ` worktree=${task.worktree.path}` : ""}${task.error ? ` error=${task.error}` : ""}`) : ["- (none)"]),
|
|
74
80
|
`Task counts: ${[...counts.entries()].map(([status, count]) => `${status}=${count}`).join(", ") || "none"}`,
|
|
75
|
-
"
|
|
81
|
+
"Effectiveness:",
|
|
82
|
+
`- observable=${effectiveness.observable}/${Math.max(1, effectiveness.completed)} completed tasks`,
|
|
83
|
+
`- workerExecution=${effectiveness.workerExecution} guard=${effectiveness.guardMode} severity=${effectiveness.severity}`,
|
|
84
|
+
`- noObservedWork=${effectiveness.noObservedWorkTaskIds.length ? effectiveness.noObservedWorkTaskIds.join(",") : "none"}`,
|
|
85
|
+
`- needsAttention=${effectiveness.needsAttentionTaskIds.length ? effectiveness.needsAttentionTaskIds.join(",") : "none"}`,
|
|
86
|
+
"Completion verification",
|
|
76
87
|
...(tasks.filter((t) => t.status === "completed").length ? tasks.filter((t) => t.status === "completed").map((t) => {
|
|
77
88
|
const guard = verifyTaskCompletion(t, manifest);
|
|
78
89
|
return `- ${t.id} green=${guard.greenLevel}/3${guard.warnings.length ? ` warnings=[${guard.warnings.join(", ")}]` : ""}`;
|
|
@@ -29,14 +29,14 @@ import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../stat
|
|
|
29
29
|
import { executeTeamRun } from "../runtime/team-runner.ts";
|
|
30
30
|
import { checkProcessLiveness, isActiveRunStatus } from "../runtime/process-status.ts";
|
|
31
31
|
import { saveCrewAgents, readCrewAgents, recordFromTask } from "../runtime/crew-agent-records.ts";
|
|
32
|
-
import { resolveCrewRuntime } from "../runtime/runtime-resolver.ts";
|
|
32
|
+
import { resolveCrewRuntime, runtimeResolutionState } from "../runtime/runtime-resolver.ts";
|
|
33
33
|
import { applyAttentionState, formatActivityAge, resolveCrewControlConfig } from "../runtime/agent-control.ts";
|
|
34
34
|
import { writeForegroundInterruptRequest } from "../runtime/foreground-control.ts";
|
|
35
35
|
import { formatTaskGraphLines, waitingReason } from "../runtime/task-display.ts";
|
|
36
36
|
import { directTeamAndWorkflowFromRun } from "../runtime/direct-run.ts";
|
|
37
37
|
import { parsePiJsonOutput } from "../runtime/pi-json-output.ts";
|
|
38
38
|
import { buildParentContext, configRecord, formatScoped, result, type TeamContext } from "./team-tool/context.ts";
|
|
39
|
-
import { autonomousPatchFromConfig, configPatchFromConfig, formatAutonomyStatus } from "./team-tool/config-patch.ts";
|
|
39
|
+
import { autonomousPatchFromConfig, configPatchFromConfig, effectiveRunConfig, formatAutonomyStatus } from "./team-tool/config-patch.ts";
|
|
40
40
|
import { handleApi } from "./team-tool/api.ts";
|
|
41
41
|
import { handleRun } from "./team-tool/run.ts";
|
|
42
42
|
import { handleDoctor } from "./team-tool/doctor.ts";
|
|
@@ -47,6 +47,7 @@ import { handleCancel } from "./team-tool/cancel.ts";
|
|
|
47
47
|
import { handleRespond } from "./team-tool/respond.ts";
|
|
48
48
|
import { handlePlan } from "./team-tool/plan.ts";
|
|
49
49
|
import { logInternalError } from "../utils/internal-error.ts";
|
|
50
|
+
import { normalizeSkillOverride } from "../runtime/skill-instructions.ts";
|
|
50
51
|
|
|
51
52
|
export type { TeamToolDetails } from "./team-tool-types.ts";
|
|
52
53
|
export type { TeamContext } from "./team-tool/context.ts";
|
|
@@ -176,18 +177,37 @@ export async function handleResume(params: TeamToolParamsValue, ctx: TeamContext
|
|
|
176
177
|
const workflow = direct?.workflow ?? allWorkflows(discoverWorkflows(ctx.cwd)).find((candidate) => candidate.name === loaded.manifest.workflow);
|
|
177
178
|
if (!workflow) return result(`Workflow '${loaded.manifest.workflow}' not found.`, { action: "resume", status: "error" }, true);
|
|
178
179
|
return await withRunLock(loaded.manifest, async () => {
|
|
180
|
+
const loadedConfig = loadConfig(ctx.cwd);
|
|
179
181
|
const recovered = recoverCheckpointedTasks(loaded.manifest, loaded.tasks);
|
|
180
182
|
const resumeManifest = recovered.manifest;
|
|
183
|
+
const executedConfig = effectiveRunConfig(loadedConfig.config, params.config);
|
|
184
|
+
const runtime = await resolveCrewRuntime(executedConfig);
|
|
185
|
+
const runtimeResolution = runtimeResolutionState(runtime);
|
|
186
|
+
const runtimeManifest = { ...resumeManifest, runtimeResolution, updatedAt: new Date().toISOString() };
|
|
187
|
+
saveRunManifest(runtimeManifest);
|
|
188
|
+
appendEvent(runtimeManifest.eventsPath, { type: "runtime.resolved", runId: runtimeManifest.runId, message: `Runtime resolved for resume: ${runtime.kind} safety=${runtime.safety}`, data: { runtimeResolution, action: "resume" } });
|
|
189
|
+
if (runtime.safety === "blocked") {
|
|
190
|
+
const runningManifest = updateRunStatus(runtimeManifest, "running", "Checking worker runtime availability before resume.");
|
|
191
|
+
const blocked = updateRunStatus(runningManifest, "blocked", runtime.reason ?? "Child worker execution is disabled; refusing to resume with no-op scaffold subagents.");
|
|
192
|
+
appendEvent(blocked.eventsPath, { type: "run.blocked", runId: blocked.runId, message: blocked.summary, data: { runtime, action: "resume" } });
|
|
193
|
+
return result([
|
|
194
|
+
`Blocked resume for pi-crew run ${blocked.runId}: real subagent workers are disabled.`,
|
|
195
|
+
`Runtime: ${runtime.kind} (requested ${runtime.requestedMode})`,
|
|
196
|
+
runtime.reason ?? "Child worker execution is disabled.",
|
|
197
|
+
"",
|
|
198
|
+
"To resume effective subagents, remove executeWorkers=false / PI_CREW_EXECUTE_WORKERS=0 / PI_TEAMS_EXECUTE_WORKERS=0 or set runtime.mode=child-process.",
|
|
199
|
+
"Use runtime.mode=scaffold only for explicit dry-run prompt/artifact generation.",
|
|
200
|
+
].join("\n"), { action: "resume", status: "error", runId: blocked.runId, artifactsRoot: blocked.artifactsRoot }, true);
|
|
201
|
+
}
|
|
181
202
|
const resetTasks = recovered.tasks.map((task) => task.status === "failed" || task.status === "cancelled" || task.status === "skipped" || task.status === "running" ? { ...task, status: "queued" as const, error: undefined, startedAt: undefined, finishedAt: undefined, claim: undefined } : task);
|
|
182
|
-
saveRunTasks(
|
|
183
|
-
const replay = replayPendingMailboxMessages(
|
|
184
|
-
appendEvent(
|
|
185
|
-
if (recovered.recovered.length) appendEvent(
|
|
186
|
-
if (replay.messages.length) appendEvent(
|
|
187
|
-
const loadedConfig = loadConfig(ctx.cwd);
|
|
188
|
-
const runtime = await resolveCrewRuntime(loadedConfig.config);
|
|
203
|
+
saveRunTasks(runtimeManifest, resetTasks);
|
|
204
|
+
const replay = replayPendingMailboxMessages(runtimeManifest);
|
|
205
|
+
appendEvent(runtimeManifest.eventsPath, { type: "run.resume_requested", runId: runtimeManifest.runId, data: { replayedMailboxMessages: replay.messages.length, recoveredCheckpointTasks: recovered.recovered } });
|
|
206
|
+
if (recovered.recovered.length) appendEvent(runtimeManifest.eventsPath, { type: "task.checkpoint_recovered", runId: runtimeManifest.runId, message: `Recovered ${recovered.recovered.length} task(s) from artifact-written checkpoints.`, data: { taskIds: recovered.recovered } });
|
|
207
|
+
if (replay.messages.length) appendEvent(runtimeManifest.eventsPath, { type: "mailbox.replayed", runId: runtimeManifest.runId, message: `Replayed ${replay.messages.length} pending inbox message(s).`, data: { messageIds: replay.messages.map((message) => message.id), taskIds: replay.messages.map((message) => message.taskId).filter(Boolean) } });
|
|
189
208
|
const executeWorkers = runtime.kind !== "scaffold";
|
|
190
|
-
const
|
|
209
|
+
const resumeSkillOverride = normalizeSkillOverride(params.skill) ?? runtimeManifest.skillOverride;
|
|
210
|
+
const executed = await executeTeamRun({ manifest: runtimeManifest, tasks: resetTasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, skillOverride: resumeSkillOverride, signal: ctx.signal, reliability: executedConfig.reliability, metricRegistry: ctx.metricRegistry });
|
|
191
211
|
return result([`Resumed run ${executed.manifest.runId}.`, `Status: ${executed.manifest.status}`, `Tasks: ${executed.tasks.length}`, `Artifacts: ${executed.manifest.artifactsRoot}`].join("\n"), { action: "resume", status: executed.manifest.status === "failed" ? "error" : "ok", runId: executed.manifest.runId, artifactsRoot: executed.manifest.artifactsRoot }, executed.manifest.status === "failed");
|
|
192
212
|
});
|
|
193
213
|
}
|
|
@@ -199,7 +219,7 @@ export async function handleTeamTool(params: TeamToolParamsValue, ctx: TeamConte
|
|
|
199
219
|
case "get": return handleGet(params, ctx);
|
|
200
220
|
case "init": {
|
|
201
221
|
const cfg = configRecord(params.config);
|
|
202
|
-
const initialized = initializeProject(ctx.cwd, { copyBuiltins: cfg.copyBuiltins === true, overwrite: cfg.overwrite === true });
|
|
222
|
+
const initialized = initializeProject(ctx.cwd, { copyBuiltins: cfg.copyBuiltins === true, overwrite: cfg.overwrite === true, configScope: cfg.configScope === "project" || cfg.scope === "project" ? "project" : cfg.configScope === "none" || cfg.scope === "none" ? "none" : "global" });
|
|
203
223
|
return result([
|
|
204
224
|
"Initialized pi-crew project layout.",
|
|
205
225
|
"Directories:",
|
|
@@ -207,6 +227,7 @@ export async function handleTeamTool(params: TeamToolParamsValue, ctx: TeamConte
|
|
|
207
227
|
"Copied builtin files:",
|
|
208
228
|
...(initialized.copiedFiles.length ? initialized.copiedFiles.map((file) => `- ${file}`) : ["- (none)"]),
|
|
209
229
|
...(initialized.skippedFiles.length ? ["Skipped existing files:", ...initialized.skippedFiles.map((file) => `- ${file}`)] : []),
|
|
230
|
+
`Config: ${initialized.configPath || "(none)"} (${initialized.configScope}${initialized.configCreated ? "; created" : initialized.configSkipped ? "; already existed" : "; unchanged"})`,
|
|
210
231
|
`Gitignore: ${initialized.gitignorePath} (${initialized.gitignoreUpdated ? "updated" : "already configured"})`,
|
|
211
232
|
].join("\n"), { action: "init", status: "ok" });
|
|
212
233
|
}
|
|
@@ -13,6 +13,13 @@ function numberValue(value: unknown, fallback = 0): number {
|
|
|
13
13
|
return typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
+
const CANCELLATION_REASON_LABELS = new Set(["caller_cancelled", "leader_interrupted", "provider_timeout", "worker_timeout", "tool_timeout", "shutdown", "unknown"]);
|
|
17
|
+
|
|
18
|
+
function cancellationReasonLabel(value: unknown): string {
|
|
19
|
+
const raw = stringValue(value, "unknown");
|
|
20
|
+
return CANCELLATION_REASON_LABELS.has(raw) ? raw : "unknown";
|
|
21
|
+
}
|
|
22
|
+
|
|
16
23
|
export interface EventToMetricSubscription {
|
|
17
24
|
dispose(): void;
|
|
18
25
|
}
|
|
@@ -36,7 +43,7 @@ export function wireEventToMetrics(events: ExtensionAPI["events"] | undefined, r
|
|
|
36
43
|
const handlers: Array<[string, (data: unknown) => void]> = [
|
|
37
44
|
["crew.run.completed", (data) => { const item = recordValue(data); runCount.inc({ status: "completed" }); runDuration.observe({ team: stringValue(item.team, "unknown") }, numberValue(item.durationMs)); }],
|
|
38
45
|
["crew.run.failed", () => runCount.inc({ status: "failed" })],
|
|
39
|
-
["crew.run.cancelled", () => runCount.inc({ status: "cancelled" })],
|
|
46
|
+
["crew.run.cancelled", (data) => { const item = recordValue(data); runCount.inc({ status: "cancelled", reason: cancellationReasonLabel(item.reason) }); }],
|
|
40
47
|
["crew.task.completed", (data) => { const item = recordValue(data); taskCount.inc({ status: "completed" }); taskDuration.observe({ role: stringValue(item.role, "unknown") }, numberValue(item.durationMs)); tokenUsage.observe({ role: stringValue(item.role, "unknown") }, numberValue(item.tokens)); }],
|
|
41
48
|
["crew.task.failed", () => taskCount.inc({ status: "failed" })],
|
|
42
49
|
["crew.task.retry_attempt", (data) => { const item = recordValue(data); taskCount.inc({ status: "retry" }); retryAttemptCount.inc({ runId: stringValue(item.runId, "unknown"), taskId: stringValue(item.taskId, "unknown") }); }],
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { allAgents, discoverAgents } from "../agents/discover-agents.ts";
|
|
2
2
|
import { allTeams, discoverTeams } from "../teams/discover-teams.ts";
|
|
3
3
|
import { appendEvent } from "../state/event-log.ts";
|
|
4
|
-
import { loadRunManifestById, updateRunStatus } from "../state/state-store.ts";
|
|
4
|
+
import { loadRunManifestById, saveRunManifest, updateRunStatus } from "../state/state-store.ts";
|
|
5
5
|
import { allWorkflows, discoverWorkflows } from "../workflows/discover-workflows.ts";
|
|
6
6
|
import { loadConfig } from "../config/config.ts";
|
|
7
7
|
import { executeTeamRun } from "./team-runner.ts";
|
|
8
|
-
import { resolveCrewRuntime } from "./runtime-resolver.ts";
|
|
8
|
+
import { resolveCrewRuntime, runtimeResolutionState } from "./runtime-resolver.ts";
|
|
9
9
|
import { directTeamAndWorkflowFromRun } from "./direct-run.ts";
|
|
10
10
|
import { expandParallelResearchWorkflow } from "./parallel-research.ts";
|
|
11
11
|
import { writeAsyncStartMarker } from "./async-marker.ts";
|
|
@@ -36,9 +36,15 @@ async function main(): Promise<void> {
|
|
|
36
36
|
if (!baseWorkflow) throw new Error(`Workflow '${manifest.workflow ?? ""}' not found.`);
|
|
37
37
|
const workflow = expandParallelResearchWorkflow(baseWorkflow, cwd);
|
|
38
38
|
const loadedConfig = loadConfig(cwd);
|
|
39
|
-
const
|
|
39
|
+
const runConfig = manifest.runConfig && typeof manifest.runConfig === "object" && !Array.isArray(manifest.runConfig) ? manifest.runConfig as typeof loadedConfig.config : loadedConfig.config;
|
|
40
|
+
const runtime = manifest.runtimeResolution ? { kind: manifest.runtimeResolution.kind, requestedMode: manifest.runtimeResolution.requestedMode, available: manifest.runtimeResolution.available, fallback: manifest.runtimeResolution.fallback, steer: manifest.runtimeResolution.kind === "live-session", resume: manifest.runtimeResolution.kind === "live-session", liveToolActivity: manifest.runtimeResolution.kind === "live-session", transcript: manifest.runtimeResolution.kind !== "scaffold", reason: manifest.runtimeResolution.reason, safety: manifest.runtimeResolution.safety } : await resolveCrewRuntime(runConfig);
|
|
41
|
+
const runtimeResolution = manifest.runtimeResolution ?? runtimeResolutionState(runtime);
|
|
42
|
+
manifest = { ...manifest, runtimeResolution, runConfig, updatedAt: new Date().toISOString() };
|
|
43
|
+
saveRunManifest(manifest);
|
|
44
|
+
appendEvent(manifest.eventsPath, { type: "runtime.resolved", runId: manifest.runId, message: `Runtime resolved: ${runtime.kind} safety=${runtime.safety}`, data: { runtimeResolution, async: true } });
|
|
45
|
+
if (runtime.safety === "blocked") throw new Error(runtime.reason ?? "Child worker execution is disabled; refusing to create no-op scaffold subagents.");
|
|
40
46
|
const executeWorkers = runtime.kind !== "scaffold";
|
|
41
|
-
const result = await executeTeamRun({ manifest, tasks, team, workflow, agents, executeWorkers, limits:
|
|
47
|
+
const result = await executeTeamRun({ manifest, tasks, team, workflow, agents, executeWorkers, limits: runConfig.limits, runtime, runtimeConfig: runConfig.runtime, skillOverride: manifest.skillOverride, reliability: runConfig.reliability });
|
|
42
48
|
manifest = result.manifest;
|
|
43
49
|
tasks = result.tasks;
|
|
44
50
|
appendEvent(manifest.eventsPath, { type: "async.completed", runId: manifest.runId, data: { status: manifest.status, tasks: tasks.length } });
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
export type CancellationReasonCode = "caller_cancelled" | "leader_interrupted" | "provider_timeout" | "worker_timeout" | "tool_timeout" | "shutdown" | "unknown";
|
|
2
|
+
|
|
3
|
+
export interface CancellationReason {
|
|
4
|
+
code: CancellationReasonCode;
|
|
5
|
+
message: string;
|
|
6
|
+
cause?: unknown;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const KNOWN_CODES: ReadonlySet<string> = new Set(["caller_cancelled", "leader_interrupted", "provider_timeout", "worker_timeout", "tool_timeout", "shutdown", "unknown"]);
|
|
10
|
+
|
|
11
|
+
export class CrewCancellationError extends Error {
|
|
12
|
+
readonly reason: CancellationReason;
|
|
13
|
+
|
|
14
|
+
constructor(reason: CancellationReason) {
|
|
15
|
+
super(reason.message);
|
|
16
|
+
this.name = "CrewCancellationError";
|
|
17
|
+
this.reason = reason;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function reasonFromString(value: string): CancellationReason {
|
|
22
|
+
const trimmed = value.trim();
|
|
23
|
+
if (KNOWN_CODES.has(trimmed)) return { code: trimmed as CancellationReasonCode, message: `Cancelled: ${trimmed}` };
|
|
24
|
+
return { code: "caller_cancelled", message: trimmed || "Cancelled by caller." };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function cancellationReasonFromUnknown(value: unknown): CancellationReason {
|
|
28
|
+
if (value instanceof CrewCancellationError) return value.reason;
|
|
29
|
+
if (value instanceof Error) return { code: "caller_cancelled", message: value.message || "Cancelled by caller.", cause: value };
|
|
30
|
+
if (typeof value === "string") return reasonFromString(value);
|
|
31
|
+
if (value && typeof value === "object" && !Array.isArray(value)) {
|
|
32
|
+
const record = value as { code?: unknown; reason?: unknown; message?: unknown; cause?: unknown };
|
|
33
|
+
const rawCode = typeof record.code === "string" ? record.code : typeof record.reason === "string" ? record.reason : undefined;
|
|
34
|
+
const code = rawCode && KNOWN_CODES.has(rawCode) ? rawCode as CancellationReasonCode : "caller_cancelled";
|
|
35
|
+
const message = typeof record.message === "string" && record.message.trim() ? record.message.trim() : `Cancelled: ${code}`;
|
|
36
|
+
return { code, message, cause: record.cause ?? value };
|
|
37
|
+
}
|
|
38
|
+
return { code: "caller_cancelled", message: "Cancelled by caller." };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function cancellationReasonFromSignal(signal: AbortSignal | undefined): CancellationReason {
|
|
42
|
+
return cancellationReasonFromUnknown(signal?.reason);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function cancellationErrorFromSignal(signal: AbortSignal | undefined): CrewCancellationError {
|
|
46
|
+
return new CrewCancellationError(cancellationReasonFromSignal(signal));
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function throwIfCancelled(signal: AbortSignal | undefined): void {
|
|
50
|
+
if (signal?.aborted) throw cancellationErrorFromSignal(signal);
|
|
51
|
+
}
|
package/src/runtime/child-pi.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { spawn, type ChildProcess, type SpawnOptions } from "node:child_process"
|
|
|
2
2
|
import * as fs from "node:fs";
|
|
3
3
|
import * as path from "node:path";
|
|
4
4
|
import type { AgentConfig } from "../agents/agent-config.ts";
|
|
5
|
+
import type { WorkerExitStatus } from "../state/types.ts";
|
|
5
6
|
import { buildPiWorkerArgs, checkCrewDepth, cleanupTempDir } from "./pi-args.ts";
|
|
6
7
|
import { getPiSpawnCommand } from "./pi-spawn.ts";
|
|
7
8
|
import { DEFAULT_CHILD_PI } from "../config/defaults.ts";
|
|
@@ -88,6 +89,7 @@ export interface ChildPiRunInput {
|
|
|
88
89
|
task: string;
|
|
89
90
|
agent: AgentConfig;
|
|
90
91
|
model?: string;
|
|
92
|
+
skillPaths?: string[];
|
|
91
93
|
signal?: AbortSignal;
|
|
92
94
|
transcriptPath?: string;
|
|
93
95
|
onStdoutLine?: (line: string) => void;
|
|
@@ -104,6 +106,7 @@ export interface ChildPiRunResult {
|
|
|
104
106
|
stdout: string;
|
|
105
107
|
stderr: string;
|
|
106
108
|
error?: string;
|
|
109
|
+
exitStatus?: WorkerExitStatus;
|
|
107
110
|
}
|
|
108
111
|
|
|
109
112
|
export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): SpawnOptions {
|
|
@@ -283,7 +286,7 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
283
286
|
if (mock === "retryable-failure") return { exitCode: 1, stdout: "", stderr: "rate limit: mock failure" };
|
|
284
287
|
return { exitCode: 1, stdout: "", stderr: `mock failure: ${mock}` };
|
|
285
288
|
}
|
|
286
|
-
const built = buildPiWorkerArgs({ task: input.task, agent: input.agent, model: input.model, sessionEnabled: false, maxDepth: input.maxDepth });
|
|
289
|
+
const built = buildPiWorkerArgs({ task: input.task, agent: input.agent, model: input.model, sessionEnabled: false, maxDepth: input.maxDepth, skillPaths: input.skillPaths });
|
|
287
290
|
const spawnSpec = getPiSpawnCommand(built.args);
|
|
288
291
|
try {
|
|
289
292
|
return await new Promise<ChildPiRunResult>((resolve) => {
|
|
@@ -306,6 +309,9 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
306
309
|
const responseTimeoutMs = Number.isFinite(responseTimeoutEnv) && responseTimeoutEnv >= 0 ? responseTimeoutEnv : input.responseTimeoutMs ?? RESPONSE_TIMEOUT_MS;
|
|
307
310
|
let responseTimeoutHit = false;
|
|
308
311
|
let forcedFinalDrain = false;
|
|
312
|
+
let abortRequested = input.signal?.aborted === true;
|
|
313
|
+
let hardKilled = false;
|
|
314
|
+
const cleanupErrors: string[] = [];
|
|
309
315
|
const restartNoResponseTimer = (): void => {
|
|
310
316
|
if (responseTimeoutMs <= 0) return;
|
|
311
317
|
if (noResponseTimer) clearTimeout(noResponseTimer);
|
|
@@ -347,6 +353,7 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
347
353
|
hardKillTimer = setTimeout(() => {
|
|
348
354
|
if (settled || childExited) return;
|
|
349
355
|
try {
|
|
356
|
+
hardKilled = true;
|
|
350
357
|
child.kill(process.platform === "win32" ? undefined : "SIGKILL");
|
|
351
358
|
} catch (error) {
|
|
352
359
|
logInternalError("child-pi.final-drain-kill", error, `pid=${child.pid}`);
|
|
@@ -382,11 +389,16 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
382
389
|
clearChildPiTimeouts();
|
|
383
390
|
lineObserver.flush();
|
|
384
391
|
input.signal?.removeEventListener("abort", abort);
|
|
385
|
-
|
|
386
|
-
|
|
392
|
+
try {
|
|
393
|
+
cleanupTempDir(built.tempDir);
|
|
394
|
+
} catch (error) {
|
|
395
|
+
cleanupErrors.push(error instanceof Error ? error.message : String(error));
|
|
396
|
+
}
|
|
397
|
+
resolve({ ...result, exitStatus: result.exitStatus ?? { exitCode: result.exitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
|
|
387
398
|
};
|
|
388
399
|
|
|
389
400
|
const abort = (): void => {
|
|
401
|
+
abortRequested = true;
|
|
390
402
|
killProcessTree(child.pid, child);
|
|
391
403
|
if (process.platform !== "win32") {
|
|
392
404
|
trySignalChild(child, "SIGTERM");
|
|
@@ -431,11 +443,12 @@ export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResu
|
|
|
431
443
|
clearHardKillTimer(child.pid);
|
|
432
444
|
}
|
|
433
445
|
const timeoutError = responseTimeoutHit && !stderr.trim() ? { error: `Child Pi produced no new output for ${responseTimeoutMs}ms; process was terminated as unresponsive.` } : undefined;
|
|
446
|
+
const finalExitCode = forcedFinalDrain && !timeoutError ? 0 : exitCode;
|
|
434
447
|
// A final assistant event is the child Pi contract for "the worker produced its answer".
|
|
435
448
|
// Some Pi processes can linger during post-final cleanup/stdio shutdown; finalDrain terminates
|
|
436
449
|
// that lingering process so the parent can continue, but it must not turn a completed
|
|
437
450
|
// subagent answer into a failed task. Real pre-final response timeouts still report errors.
|
|
438
|
-
settle({ exitCode:
|
|
451
|
+
settle({ exitCode: finalExitCode, stdout, stderr, ...(timeoutError ? { error: timeoutError.error } : {}), exitStatus: { exitCode: finalExitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
|
|
439
452
|
});
|
|
440
453
|
});
|
|
441
454
|
} finally {
|
|
@@ -75,6 +75,7 @@ export function reconcileAllStaleRuns(cwd: string, manifestCache: ManifestCache,
|
|
|
75
75
|
if (!fresh || fresh.manifest.status !== "running") return;
|
|
76
76
|
const result = reconcileStaleRun(fresh.manifest, fresh.tasks, now);
|
|
77
77
|
if (result.repaired) {
|
|
78
|
+
if (result.repairedTasks) saveRunTasks(fresh.manifest, result.repairedTasks);
|
|
78
79
|
updateRunStatus(fresh.manifest, "failed", `Stale run reconciled: ${result.detail}`);
|
|
79
80
|
appendEvent(fresh.manifest.eventsPath, { type: "crew.run.reconciled_stale", runId: manifest.runId, message: result.detail, data: { verdict: result.verdict } });
|
|
80
81
|
}
|
|
@@ -61,6 +61,23 @@ export function agentOutputPath(manifest: TeamRunManifest, taskId: string): stri
|
|
|
61
61
|
}
|
|
62
62
|
|
|
63
63
|
const AGENT_READER_TTL_MS = 200;
|
|
64
|
+
const ASYNC_AGENT_READER_CACHE_MAX_ENTRIES = 128;
|
|
65
|
+
|
|
66
|
+
const asyncAgentReaderCache = new Map<string, { expiresAt: number; records: CrewAgentRecord[]; inFlight?: Promise<CrewAgentRecord[]> }>();
|
|
67
|
+
|
|
68
|
+
function setAsyncAgentReaderCache(filePath: string, entry: { expiresAt: number; records: CrewAgentRecord[]; inFlight?: Promise<CrewAgentRecord[]> }): void {
|
|
69
|
+
const now = Date.now();
|
|
70
|
+
for (const [key, cached] of asyncAgentReaderCache) {
|
|
71
|
+
if (cached.expiresAt <= now && !cached.inFlight) asyncAgentReaderCache.delete(key);
|
|
72
|
+
}
|
|
73
|
+
if (asyncAgentReaderCache.has(filePath)) asyncAgentReaderCache.delete(filePath);
|
|
74
|
+
asyncAgentReaderCache.set(filePath, entry);
|
|
75
|
+
while (asyncAgentReaderCache.size > ASYNC_AGENT_READER_CACHE_MAX_ENTRIES) {
|
|
76
|
+
const oldest = asyncAgentReaderCache.keys().next().value;
|
|
77
|
+
if (!oldest) break;
|
|
78
|
+
asyncAgentReaderCache.delete(oldest);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
64
81
|
|
|
65
82
|
export function readCrewAgents(manifest: TeamRunManifest): CrewAgentRecord[] {
|
|
66
83
|
try {
|
|
@@ -70,9 +87,32 @@ export function readCrewAgents(manifest: TeamRunManifest): CrewAgentRecord[] {
|
|
|
70
87
|
}
|
|
71
88
|
}
|
|
72
89
|
|
|
90
|
+
export async function readCrewAgentsAsync(manifest: TeamRunManifest): Promise<CrewAgentRecord[]> {
|
|
91
|
+
const filePath = agentsPath(manifest);
|
|
92
|
+
const now = Date.now();
|
|
93
|
+
const cached = asyncAgentReaderCache.get(filePath);
|
|
94
|
+
if (cached && cached.expiresAt > now) return cached.records;
|
|
95
|
+
if (cached?.inFlight) return cached.inFlight;
|
|
96
|
+
const inFlight = (async (): Promise<CrewAgentRecord[]> => {
|
|
97
|
+
try {
|
|
98
|
+
const parsed = JSON.parse(await fs.promises.readFile(filePath, "utf-8")) as unknown;
|
|
99
|
+
const records = Array.isArray(parsed) ? redactSecrets(parsed) as CrewAgentRecord[] : [];
|
|
100
|
+
setAsyncAgentReaderCache(filePath, { expiresAt: Date.now() + AGENT_READER_TTL_MS, records });
|
|
101
|
+
return records;
|
|
102
|
+
} catch {
|
|
103
|
+
setAsyncAgentReaderCache(filePath, { expiresAt: Date.now() + AGENT_READER_TTL_MS, records: [] });
|
|
104
|
+
return [];
|
|
105
|
+
}
|
|
106
|
+
})();
|
|
107
|
+
setAsyncAgentReaderCache(filePath, { expiresAt: now + AGENT_READER_TTL_MS, records: cached?.records ?? [], inFlight });
|
|
108
|
+
return inFlight;
|
|
109
|
+
}
|
|
110
|
+
|
|
73
111
|
export function saveCrewAgents(manifest: TeamRunManifest, records: CrewAgentRecord[]): void {
|
|
74
112
|
fs.mkdirSync(manifest.stateRoot, { recursive: true });
|
|
75
|
-
|
|
113
|
+
const filePath = agentsPath(manifest);
|
|
114
|
+
atomicWriteJson(filePath, redactSecrets(records));
|
|
115
|
+
asyncAgentReaderCache.delete(filePath);
|
|
76
116
|
for (const record of records) writeCrewAgentStatus(manifest, record);
|
|
77
117
|
}
|
|
78
118
|
|