pi-crew 0.1.43 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/docs/research-phase10-distillation.md +199 -0
  2. package/docs/research-phase11-distillation.md +201 -0
  3. package/package.json +1 -1
  4. package/src/agents/discover-agents.ts +1 -0
  5. package/src/config/config.ts +19 -0
  6. package/src/extension/register.ts +127 -8
  7. package/src/extension/registration/team-tool.ts +2 -1
  8. package/src/extension/run-index.ts +19 -0
  9. package/src/extension/team-tool/api.ts +1 -1
  10. package/src/extension/team-tool/cancel.ts +103 -31
  11. package/src/extension/team-tool/context.ts +1 -0
  12. package/src/extension/team-tool/respond.ts +67 -0
  13. package/src/extension/team-tool/run.ts +2 -2
  14. package/src/extension/team-tool/status.ts +7 -1
  15. package/src/extension/team-tool-types.ts +4 -0
  16. package/src/extension/team-tool.ts +2 -0
  17. package/src/observability/event-to-metric.ts +6 -0
  18. package/src/runtime/completion-guard.ts +190 -103
  19. package/src/runtime/crash-recovery.ts +30 -0
  20. package/src/runtime/crew-agent-runtime.ts +2 -1
  21. package/src/runtime/delivery-coordinator.ts +143 -0
  22. package/src/runtime/model-fallback.ts +5 -2
  23. package/src/runtime/overflow-recovery.ts +157 -0
  24. package/src/runtime/process-status.ts +1 -1
  25. package/src/runtime/session-resources.ts +25 -0
  26. package/src/runtime/session-snapshot.ts +59 -0
  27. package/src/runtime/stale-reconciler.ts +179 -0
  28. package/src/runtime/supervisor-contact.ts +59 -0
  29. package/src/runtime/task-runner.ts +14 -0
  30. package/src/runtime/team-runner.ts +6 -4
  31. package/src/schema/config-schema.ts +1 -0
  32. package/src/schema/team-tool-schema.ts +6 -1
  33. package/src/state/contracts.ts +6 -2
  34. package/src/ui/crew-widget.ts +5 -4
  35. package/src/ui/powerbar-publisher.ts +3 -3
  36. package/src/ui/run-snapshot-cache.ts +275 -1
  37. package/src/ui/status-colors.ts +4 -0
  38. package/src/utils/atomic-write.ts +33 -0
@@ -1,4 +1,6 @@
1
1
  import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
2
+ import * as fs from "node:fs";
3
+ import * as path from "node:path";
2
4
  import { loadConfig } from "../config/config.ts";
3
5
  import { registerAutonomousPolicy } from "./autonomous-policy.ts";
4
6
  import { startAsyncRunNotifier, stopAsyncRunNotifier, type AsyncNotifierState } from "./async-notifier.ts";
@@ -8,6 +10,7 @@ import { registerPiCrewRpc, type PiCrewRpcHandle } from "./cross-extension-rpc.t
8
10
  import { stopCrewWidget, updateCrewWidget, type CrewWidgetState } from "../ui/crew-widget.ts";
9
11
  import { clearPiCrewPowerbar, registerPiCrewPowerbarSegments, updatePiCrewPowerbar } from "../ui/powerbar-publisher.ts";
10
12
  import { loadRunManifestById, updateRunStatus } from "../state/state-store.ts";
13
+ import type { TeamRunManifest } from "../state/types.ts";
11
14
  import { terminateActiveChildPiProcesses } from "../subagents/spawn.ts";
12
15
  import { SubagentManager } from "../subagents/manager.ts";
13
16
  import { __test__subagentSpawnParams, sendAgentWakeUp, sendFollowUp } from "./registration/subagent-helpers.ts";
@@ -34,6 +37,9 @@ import { OTLPExporter } from "../observability/exporters/otlp-exporter.ts";
34
37
  import { HeartbeatWatcher } from "../runtime/heartbeat-watcher.ts";
35
38
  import { appendDeadletter } from "../runtime/deadletter.ts";
36
39
  import { detectInterruptedRuns } from "../runtime/crash-recovery.ts";
40
+ import { DeliveryCoordinator } from "../runtime/delivery-coordinator.ts";
41
+ import { OverflowRecoveryTracker } from "../runtime/overflow-recovery.ts";
42
+ import { tryRegisterSessionCleanup } from "../runtime/session-resources.ts";
37
43
  import { initI18n } from "../i18n.ts";
38
44
 
39
45
  export { __test__subagentSpawnParams };
@@ -83,6 +89,8 @@ export function registerPiTeams(pi: ExtensionAPI): void {
83
89
  let metricSink: MetricSink | undefined;
84
90
  let heartbeatWatcher: HeartbeatWatcher | undefined;
85
91
  let otlpExporter: OTLPExporter | undefined;
92
+ let deliveryCoordinator: DeliveryCoordinator | undefined;
93
+ let overflowTracker: OverflowRecoveryTracker | undefined;
86
94
  const configureNotifications = (ctx: ExtensionContext): void => {
87
95
  notificationRouter?.dispose();
88
96
  notificationSink?.dispose();
@@ -148,6 +156,28 @@ export function registerPiTeams(pi: ExtensionAPI): void {
148
156
  }
149
157
  };
150
158
  const autoRecoveryLast = new Map<string, number>();
159
+ const configureDeliveryCoordinator = (): void => {
160
+ deliveryCoordinator?.dispose();
161
+ deliveryCoordinator = undefined;
162
+ overflowTracker?.dispose();
163
+ overflowTracker = undefined;
164
+ deliveryCoordinator = new DeliveryCoordinator({
165
+ emit: (event, data) => { pi.events?.emit?.(event, data); },
166
+ sendFollowUp: (title, body) => { sendFollowUp(pi, [title, body].filter((line): line is string => Boolean(line)).join("\n")); },
167
+ sendWakeUp: (message) => { sendAgentWakeUp(pi, message); },
168
+ });
169
+ overflowTracker = new OverflowRecoveryTracker({
170
+ onPhaseChange: (state, previousPhase) => {
171
+ if (metricRegistry) {
172
+ metricRegistry.counter("crew.task.overflow_recovery_total", "Overflow recovery phase transitions").inc({ phase: state.phase, previous_phase: previousPhase });
173
+ }
174
+ pi.events?.emit?.("crew.task.overflow", { runId: state.runId, taskId: state.taskId, phase: state.phase, previousPhase });
175
+ },
176
+ onTimeout: (state) => {
177
+ notifyOperator({ id: `overflow_timeout_${state.taskId}`, severity: "warning", source: "overflow-recovery", runId: state.runId, title: `Task ${state.taskId} overflow recovery timed out`, body: `Phase: ${state.phase}, compaction_count: ${state.compactionCount}, retry_count: ${state.retryCount}. The task may be stuck.` });
178
+ },
179
+ });
180
+ };
151
181
  const notifyOperator = (notification: NotificationDescriptor): void => {
152
182
  try {
153
183
  notificationRouter?.enqueue(notification);
@@ -207,6 +237,7 @@ export function registerPiTeams(pi: ExtensionAPI): void {
207
237
  const foregroundControllers = new Set<AbortController>();
208
238
  let liveSidebarRunId: string | undefined;
209
239
  let renderScheduler: RenderScheduler | undefined;
240
+ let preloadTimer: ReturnType<typeof setTimeout> | undefined;
210
241
  const stopSessionBoundSubagents = (): void => {
211
242
  for (const controller of foregroundControllers) controller.abort();
212
243
  foregroundControllers.clear();
@@ -314,6 +345,7 @@ export function registerPiTeams(pi: ExtensionAPI): void {
314
345
  const cleanupRuntime = (): void => {
315
346
  if (cleanedUp) return;
316
347
  cleanedUp = true;
348
+ if (preloadTimer) { clearTimeout(preloadTimer); preloadTimer = undefined; }
317
349
  stopSessionBoundSubagents();
318
350
  stopAsyncRunNotifier(notifierState);
319
351
  stopCrewWidget(currentCtx, widgetState, currentCtx ? loadConfig(currentCtx.cwd).config.ui : undefined);
@@ -328,6 +360,10 @@ export function registerPiTeams(pi: ExtensionAPI): void {
328
360
  eventMetricSub = undefined;
329
361
  otlpExporter = undefined;
330
362
  metricRegistry = undefined;
363
+ deliveryCoordinator?.dispose();
364
+ overflowTracker?.dispose();
365
+ deliveryCoordinator = undefined;
366
+ overflowTracker = undefined;
331
367
  manifestCache.dispose();
332
368
  runSnapshotCache.dispose?.();
333
369
  renderScheduler?.dispose();
@@ -360,16 +396,68 @@ export function registerPiTeams(pi: ExtensionAPI): void {
360
396
  autoRecoveryLast.clear();
361
397
  configureNotifications(ctx);
362
398
  configureObservability(ctx);
399
+ configureDeliveryCoordinator();
400
+ const sessionId = (ctx as unknown as Record<string, unknown>).sessionId;
401
+ if (typeof sessionId === "string" && sessionId) deliveryCoordinator?.activate(sessionId);
402
+ tryRegisterSessionCleanup(pi, () => { terminateActiveChildPiProcesses(); cleanupRuntime(); });
363
403
  registerPiCrewPowerbarSegments(pi.events, loadedConfig.config.ui);
364
404
  startAsyncRunNotifier(ctx, notifierState, loadedConfig.config.notifierIntervalMs ?? DEFAULT_UI.notifierIntervalMs, { generation: ownerGeneration, isCurrent: (generation) => generation === sessionGeneration && currentCtx === ctx && !cleanedUp });
365
405
  const cache = getManifestCache(ctx.cwd);
366
406
  updateCrewWidget(ctx, widgetState, loadedConfig.config.ui, cache, getRunSnapshotCache(ctx.cwd));
367
407
  updatePiCrewPowerbar(pi.events, ctx.cwd, loadedConfig.config.ui, cache, getRunSnapshotCache(ctx.cwd), ctx, widgetState.notificationCount ?? 0);
368
408
  renderScheduler?.dispose();
409
+ // Phase 12: Async preloading — renderTick reads only a pre-computed frame
410
+ // from memory (zero fs I/O). Background preload refreshes the frame async.
411
+ let preloading = false;
412
+
413
+ let lastPreloadedConfig: ReturnType<typeof loadConfig> | undefined;
414
+ let lastPreloadedManifests: TeamRunManifest[] = [];
415
+ let lastFrameManifestCache: ReturnType<typeof createManifestCache> | undefined;
416
+ let lastFrameSnapshotCache: ReturnType<typeof createRunSnapshotCache> | undefined;
417
+
418
+ const buildFrame = async (): Promise<boolean> => {
419
+ if (!currentCtx) return false;
420
+ lastPreloadedConfig = loadConfig(currentCtx.cwd);
421
+ lastFrameManifestCache = getManifestCache(currentCtx.cwd);
422
+ lastFrameSnapshotCache = getRunSnapshotCache(currentCtx.cwd);
423
+ const manifests = lastFrameManifestCache.list(20);
424
+ lastPreloadedManifests = manifests;
425
+ const runIds = manifests.map((r) => r.runId);
426
+ await lastFrameSnapshotCache.preloadAllStale(runIds);
427
+ return true;
428
+ };
429
+
430
+ const backgroundPreload = (): void => {
431
+ if (!currentCtx || preloading) return;
432
+ preloading = true;
433
+ buildFrame()
434
+ .then((ok) => {
435
+ preloading = false;
436
+ if (ok) renderScheduler?.schedule();
437
+ })
438
+ .catch((error: unknown) => {
439
+ preloading = false;
440
+ logInternalError("register.backgroundPreload", error);
441
+ });
442
+ };
443
+
444
+ const startPreloadLoop = (intervalMs: number): void => {
445
+ if (preloadTimer) clearTimeout(preloadTimer);
446
+ const tick = (): void => {
447
+ backgroundPreload();
448
+ preloadTimer = setTimeout(tick, intervalMs);
449
+ preloadTimer.unref();
450
+ };
451
+ preloadTimer = setTimeout(tick, intervalMs);
452
+ preloadTimer.unref();
453
+ };
454
+
369
455
  const renderTick = (): void => {
370
456
  if (!currentCtx) return;
371
- const config = loadConfig(currentCtx.cwd).config.ui;
372
- const activeCache = getManifestCache(currentCtx.cwd);
457
+ const config = lastPreloadedConfig?.config.ui;
458
+ const activeCache = lastFrameManifestCache ?? getManifestCache(currentCtx.cwd);
459
+ const snapshotCache = lastFrameSnapshotCache ?? getRunSnapshotCache(currentCtx.cwd);
460
+ const manifests = lastPreloadedManifests.length > 0 ? lastPreloadedManifests : activeCache.list(20);
373
461
  if (liveSidebarRunId) {
374
462
  const placement = config?.widgetPlacement ?? "aboveEditor";
375
463
  if (widgetState.lastVisibility !== "hidden" || widgetState.lastPlacement !== placement) {
@@ -382,13 +470,18 @@ export function registerPiTeams(pi: ExtensionAPI): void {
382
470
  }
383
471
  requestRender(currentCtx);
384
472
  } else {
385
- updateCrewWidget(currentCtx, widgetState, config, activeCache, getRunSnapshotCache(currentCtx.cwd));
473
+ updateCrewWidget(currentCtx, widgetState, config, activeCache, snapshotCache, manifests);
386
474
  }
387
- updatePiCrewPowerbar(pi.events, currentCtx.cwd, config, activeCache, getRunSnapshotCache(currentCtx.cwd), currentCtx, widgetState.notificationCount ?? 0);
475
+ updatePiCrewPowerbar(pi.events, currentCtx.cwd, config, activeCache, snapshotCache, currentCtx, widgetState.notificationCount ?? 0, manifests);
476
+ // Health notifications: only warn about genuinely running runs
388
477
  const now = Date.now();
389
- for (const run of activeCache.list(20)) {
478
+ for (const run of manifests) {
479
+ if (run.status !== "running") continue;
390
480
  try {
391
- const snapshot = getRunSnapshotCache(currentCtx.cwd).refreshIfStale(run.runId);
481
+ const snapshot = snapshotCache.get(run.runId);
482
+ if (!snapshot) continue;
483
+ // Skip if snapshot shows run already completed/failed (stale cache)
484
+ if (snapshot.manifest.status !== "running") continue;
392
485
  const summary = summarizeHeartbeats(snapshot, { now });
393
486
  const maybeNotifyHealth = (kind: string, count: number, title: string, body: string): void => {
394
487
  if (count <= 0) return;
@@ -405,18 +498,40 @@ export function registerPiTeams(pi: ExtensionAPI): void {
405
498
  }
406
499
  }
407
500
  };
501
+
502
+ const fallbackMs = loadedConfig.config.ui?.dashboardLiveRefreshMs ?? 250;
408
503
  renderScheduler = new RenderScheduler(pi.events, renderTick, {
409
- fallbackMs: loadedConfig.config.ui?.dashboardLiveRefreshMs ?? 250,
504
+ fallbackMs,
410
505
  onInvalidate: () => getRunSnapshotCache(ctx.cwd).invalidate(),
411
506
  });
507
+ // Start async preload loop — refreshes snapshot cache in background
508
+ startPreloadLoop(fallbackMs);
412
509
  });
413
510
  pi.on("session_before_switch", () => {
414
511
  sessionGeneration++;
512
+ // Phase 11b: Capture state before session switch
513
+ const pendingCount = deliveryCoordinator?.getPendingCount() ?? 0;
514
+ if (pendingCount > 0) {
515
+ logInternalError("register.session-before-switch", `Switching session with ${pendingCount} pending deliveries`);
516
+ }
517
+ deliveryCoordinator?.deactivate();
415
518
  stopAsyncRunNotifier(notifierState);
416
519
  stopSessionBoundSubagents();
417
520
  });
418
521
  pi.on("session_shutdown", () => cleanupRuntime());
419
522
 
523
+ // Phase 11a: Dynamic resource discovery — inject pi-crew skill paths.
524
+ try {
525
+ pi.on("resources_discover", () => {
526
+ const skillDir = path.resolve(process.cwd(), "skills");
527
+ const extSkillDir = path.resolve(__dirname, "..", "..", "skills");
528
+ const paths: string[] = [];
529
+ if (fs.existsSync(extSkillDir)) paths.push(extSkillDir);
530
+ if (skillDir !== extSkillDir && fs.existsSync(skillDir)) paths.push(skillDir);
531
+ return paths.length > 0 ? { skillPaths: paths } : {};
532
+ });
533
+ } catch { /* older Pi without resources_discover */ }
534
+
420
535
  registerCompactionGuard(pi, { foregroundControllers });
421
536
 
422
537
  // Phase 1.4: Permission gate for destructive team actions.
@@ -435,7 +550,11 @@ export function registerPiTeams(pi: ExtensionAPI): void {
435
550
  };
436
551
  });
437
552
 
438
- registerTeamTool(pi, { foregroundControllers, startForegroundRun, openLiveSidebar, getManifestCache, getRunSnapshotCache, getMetricRegistry: () => metricRegistry, widgetState });
553
+ registerTeamTool(pi, { foregroundControllers, startForegroundRun, openLiveSidebar, getManifestCache, getRunSnapshotCache, getMetricRegistry: () => metricRegistry, widgetState, onJsonEvent: (taskId, runId, event) => {
554
+ const record = event as Record<string, unknown>;
555
+ const eventType = typeof record.type === "string" ? record.type : undefined;
556
+ if (eventType) overflowTracker?.feedEvent(taskId, runId, eventType);
557
+ } });
439
558
  registerSubagentTools(pi, subagentManager, { ownerSessionGeneration: captureSessionGeneration });
440
559
  time("register.tools");
441
560
 
@@ -17,6 +17,7 @@ export interface RegisterTeamToolDeps {
17
17
  getRunSnapshotCache?: (cwd: string) => ReturnType<typeof createRunSnapshotCache>;
18
18
  getMetricRegistry?: () => MetricRegistry | undefined;
19
19
  widgetState: CrewWidgetState;
20
+ onJsonEvent?: (taskId: string, runId: string, event: unknown) => void;
20
21
  }
21
22
 
22
23
  export function registerTeamTool(pi: ExtensionAPI, deps: RegisterTeamToolDeps): void {
@@ -38,7 +39,7 @@ export function registerTeamTool(pi: ExtensionAPI, deps: RegisterTeamToolDeps):
38
39
  const runLabel = resolved.team ?? resolved.agent ?? "direct";
39
40
  pi.setSessionName(`pi-crew: ${runLabel}/${resolved.workflow ?? "default"} — ${resolved.goal.slice(0, 60)}`);
40
41
  }
41
- const output = await handleTeamTool(resolved, { ...ctx, signal: controller.signal, metricRegistry: deps.getMetricRegistry?.(), startForegroundRun: (runner, runId) => deps.startForegroundRun(ctx, runner, runId), onRunStarted: (runId) => deps.openLiveSidebar(ctx, runId) });
42
+ const output = await handleTeamTool(resolved, { ...ctx, signal: controller.signal, metricRegistry: deps.getMetricRegistry?.(), startForegroundRun: (runner, runId) => deps.startForegroundRun(ctx, runner, runId), onRunStarted: (runId) => deps.openLiveSidebar(ctx, runId), onJsonEvent: deps.onJsonEvent });
42
43
  if (resolved.action === "run") {
43
44
  pi.appendEntry("crew:run-started", {
44
45
  runId: output.details?.runId,
@@ -55,3 +55,22 @@ export function listRecentRuns(cwd: string, max = 20): TeamRunManifest[] {
55
55
  const roots = scopedRunRoots(cwd);
56
56
  return mergeRuns(roots.map((root) => collectRuns(root, max)), max);
57
57
  }
58
+
59
+ /**
60
+ * List runs filtered to a specific scope.
61
+ * - "project": only runs in the project crew root
62
+ * - "user": only runs in the user crew root
63
+ * - "all" (default): merge both scopes (current behavior)
64
+ */
65
+ export function listRunsByScope(cwd: string, scope: "project" | "user" | "all" = "all", max?: number): TeamRunManifest[] {
66
+ const projectRoot = findRepoRoot(cwd);
67
+ switch (scope) {
68
+ case "project":
69
+ return projectRoot ? collectRuns(projectCrewRoot(cwd), max) : [];
70
+ case "user":
71
+ return collectRuns(userCrewRoot(), max);
72
+ case "all":
73
+ default:
74
+ return max !== undefined ? listRecentRuns(cwd, max) : listRuns(cwd);
75
+ }
76
+ }
@@ -110,7 +110,7 @@ export async function handleApi(params: TeamToolParamsValue, ctx: TeamContext):
110
110
  const approval = current.manifest.planApproval;
111
111
  if (!approval?.required || approval.status !== "pending") return result("Run has no pending plan approval request.", { action: "api", status: "error", runId: loaded.manifest.runId }, true);
112
112
  const now = new Date().toISOString();
113
- const tasks = current.tasks.map((task) => task.status === "queued" || task.status === "running" ? { ...task, status: "cancelled" as const, finishedAt: now, error: "Plan approval was cancelled." } : task);
113
+ const tasks = current.tasks.map((task) => task.status === "queued" || task.status === "running" || task.status === "waiting" ? { ...task, status: "cancelled" as const, finishedAt: now, error: "Plan approval was cancelled." } : task);
114
114
  let manifest: typeof current.manifest = { ...current.manifest, updatedAt: now, planApproval: { ...approval, status: "cancelled" as const, cancelledAt: now, updatedAt: now } };
115
115
  saveRunManifest(manifest);
116
116
  saveRunTasks(manifest, tasks);
@@ -1,31 +1,103 @@
1
- import type { TeamToolParamsValue } from "../../schema/team-tool-schema.ts";
2
- import { withRunLockSync } from "../../state/locks.ts";
3
- import { loadRunManifestById, saveRunTasks, updateRunStatus } from "../../state/state-store.ts";
4
- import { saveCrewAgents, recordFromTask } from "../../runtime/crew-agent-records.ts";
5
- import { writeForegroundInterruptRequest } from "../../runtime/foreground-control.ts";
6
- import { logInternalError } from "../../utils/internal-error.ts";
7
- import type { PiTeamsToolResult } from "../tool-result.ts";
8
- import { result, type TeamContext } from "./context.ts";
9
-
10
- export function handleCancel(params: TeamToolParamsValue, ctx: TeamContext): PiTeamsToolResult {
11
- if (!params.runId) return result("Cancel requires runId.", { action: "cancel", status: "error" }, true);
12
- const loaded = loadRunManifestById(ctx.cwd, params.runId);
13
- if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "cancel", status: "error" }, true);
14
- return withRunLockSync(loaded.manifest, () => {
15
- if (loaded.manifest.status === "completed" && !params.force) return result(`Run ${loaded.manifest.runId} is already completed; nothing to cancel. Use force: true to mark it cancelled anyway.`, { action: "cancel", status: "ok", runId: loaded.manifest.runId, artifactsRoot: loaded.manifest.artifactsRoot });
16
- const tasks = loaded.tasks.map((task) => task.status === "queued" || task.status === "running" ? { ...task, status: "cancelled" as const, finishedAt: new Date().toISOString(), error: "Run cancelled by user request." } : task);
17
- saveRunTasks(loaded.manifest, tasks);
18
- try {
19
- saveCrewAgents(loaded.manifest, tasks.map((task) => recordFromTask(loaded.manifest, task, "child-process")));
20
- } catch (error) {
21
- logInternalError("team-tool.handleCancel.crewAgents", error, `runId=${loaded.manifest.runId}`);
22
- }
23
- try {
24
- writeForegroundInterruptRequest(loaded.manifest, "Run cancelled by user request.");
25
- } catch (error) {
26
- logInternalError("team-tool.handleCancel.interruptRequest", error, `runId=${loaded.manifest.runId}`);
27
- }
28
- const updated = updateRunStatus(loaded.manifest, "cancelled", "Run cancelled by user request. Already-finished worker processes are not retroactively changed.");
29
- return result(`Cancelled run ${updated.runId}.`, { action: "cancel", status: "ok", runId: updated.runId, artifactsRoot: updated.artifactsRoot });
30
- });
31
- }
1
+ import type { TeamToolParamsValue } from "../../schema/team-tool-schema.ts";
2
+ import { withRunLockSync } from "../../state/locks.ts";
3
+ import { loadRunManifestById, saveRunTasks, updateRunStatus } from "../../state/state-store.ts";
4
+ import { saveCrewAgents, recordFromTask } from "../../runtime/crew-agent-records.ts";
5
+ import { writeForegroundInterruptRequest } from "../../runtime/foreground-control.ts";
6
+ import { logInternalError } from "../../utils/internal-error.ts";
7
+ import type { PiTeamsToolResult } from "../tool-result.ts";
8
+ import { result, type TeamContext } from "./context.ts";
9
+
10
+ export interface AbortOwnedResult {
11
+ abortedIds: string[];
12
+ missingIds: string[];
13
+ foreignIds: string[];
14
+ }
15
+
16
+ /**
17
+ * Classify task IDs by ownership.
18
+ * - Tasks with status "queued" or "running" that belong to the current session → abortedIds
19
+ * - Task IDs not found in the run → missingIds
20
+ * - Tasks with status "queued" or "running" that belong to a different session → foreignIds
21
+ * - Tasks already completed/failed/cancelled → neither (not included in any list)
22
+ *
23
+ * Currently, task ownership is determined by the manifest's run-level ownership.
24
+ * Since tasks in a single run are all owned by the session that created the run,
25
+ * the ownerSessionId comes from the context. Foreign detection compares
26
+ * the requesting session against the run's creating session.
27
+ */
28
+ export function abortOwned(
29
+ runId: string,
30
+ taskIds: string[] | undefined,
31
+ ctx: TeamContext,
32
+ ): AbortOwnedResult {
33
+ const loaded = loadRunManifestById(ctx.cwd, runId);
34
+ if (!loaded) return { abortedIds: [], missingIds: taskIds ?? [], foreignIds: [] };
35
+
36
+ const result: AbortOwnedResult = { abortedIds: [], missingIds: [], foreignIds: [] };
37
+ const taskMap = new Map(loaded.tasks.map((t) => [t.id, t] as const));
38
+ const targetIds = taskIds ?? loaded.tasks.map((t) => t.id);
39
+
40
+ for (const id of targetIds) {
41
+ const task = taskMap.get(id);
42
+ if (!task) {
43
+ result.missingIds.push(id);
44
+ continue;
45
+ }
46
+ if (task.status !== "queued" && task.status !== "running" && task.status !== "waiting") continue;
47
+ // All tasks in a run are owned by the session that created the run.
48
+ // Since cancel is always called within the session that created it,
49
+ // all cancellable tasks are abortable.
50
+ // Foreign detection is a placeholder for when tasks can be owned
51
+ // by different sessions (e.g., shared runs with session-scoped tasks).
52
+ result.abortedIds.push(id);
53
+ }
54
+
55
+ return result;
56
+ }
57
+
58
+ export function handleCancel(params: TeamToolParamsValue, ctx: TeamContext): PiTeamsToolResult {
59
+ if (!params.runId) return result("Cancel requires runId.", { action: "cancel", status: "error" }, true);
60
+ const loaded = loadRunManifestById(ctx.cwd, params.runId);
61
+ if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "cancel", status: "error" }, true);
62
+ return withRunLockSync(loaded.manifest, () => {
63
+ if ((loaded.manifest.status === "completed" || loaded.manifest.status === "cancelled") && !params.force) return result(`Run ${loaded.manifest.runId} is already ${loaded.manifest.status}; nothing to cancel. Use force: true to mark it cancelled anyway.`, { action: "cancel", status: "ok", runId: loaded.manifest.runId, artifactsRoot: loaded.manifest.artifactsRoot });
64
+
65
+ // Classify tasks for foreign-aware cancellation
66
+ const abortResult = abortOwned(loaded.manifest.runId, undefined, ctx);
67
+ const cancellableIds = new Set(abortResult.abortedIds);
68
+
69
+ const tasks = loaded.tasks.map((task) => {
70
+ if (cancellableIds.has(task.id) && (task.status === "queued" || task.status === "running" || task.status === "waiting")) {
71
+ return { ...task, status: "cancelled" as const, finishedAt: new Date().toISOString(), error: "Run cancelled by user request." };
72
+ }
73
+ return task;
74
+ });
75
+ saveRunTasks(loaded.manifest, tasks);
76
+ try {
77
+ saveCrewAgents(loaded.manifest, tasks.map((task) => recordFromTask(loaded.manifest, task, "child-process")));
78
+ } catch (error) {
79
+ logInternalError("team-tool.handleCancel.crewAgents", error, `runId=${loaded.manifest.runId}`);
80
+ }
81
+ try {
82
+ writeForegroundInterruptRequest(loaded.manifest, "Run cancelled by user request.");
83
+ } catch (error) {
84
+ logInternalError("team-tool.handleCancel.interruptRequest", error, `runId=${loaded.manifest.runId}`);
85
+ }
86
+ const updated = updateRunStatus(loaded.manifest, "cancelled", "Run cancelled by user request. Already-finished worker processes are not retroactively changed.");
87
+
88
+ // Build descriptive message including foreign/missing info
89
+ const parts = [`Cancelled run ${updated.runId}.`];
90
+ if (abortResult.foreignIds.length > 0) parts.push(` ${abortResult.foreignIds.length} task(s) belong to another session and were not cancelled: ${abortResult.foreignIds.join(", ")}.`);
91
+ if (abortResult.missingIds.length > 0) parts.push(` ${abortResult.missingIds.length} task ID(s) not found: ${abortResult.missingIds.join(", ")}.`);
92
+
93
+ return result(parts.join(""), {
94
+ action: "cancel",
95
+ status: "ok",
96
+ runId: updated.runId,
97
+ artifactsRoot: updated.artifactsRoot,
98
+ abortedIds: abortResult.abortedIds,
99
+ missingIds: abortResult.missingIds,
100
+ foreignIds: abortResult.foreignIds,
101
+ });
102
+ });
103
+ }
@@ -11,6 +11,7 @@ export type TeamContext = Pick<ExtensionContext, "cwd"> & Partial<Pick<Extension
11
11
  signal?: AbortSignal;
12
12
  startForegroundRun?: (runner: (signal?: AbortSignal) => Promise<void>, runId?: string) => void;
13
13
  onRunStarted?: (runId: string) => void;
14
+ onJsonEvent?: (taskId: string, runId: string, event: unknown) => void;
14
15
  };
15
16
 
16
17
  export function result(text: string, details: TeamToolDetails, isError = false): PiTeamsToolResult {
@@ -0,0 +1,67 @@
1
+ import type { TeamToolParamsValue } from "../../schema/team-tool-schema.ts";
2
+ import { withRunLockSync } from "../../state/locks.ts";
3
+ import { loadRunManifestById, saveRunTasks } from "../../state/state-store.ts";
4
+ import { saveCrewAgents, recordFromTask } from "../../runtime/crew-agent-records.ts";
5
+ import { logInternalError } from "../../utils/internal-error.ts";
6
+ import type { PiTeamsToolResult } from "../tool-result.ts";
7
+ import { result, type TeamContext } from "./context.ts";
8
+
9
+ /**
10
+ * Handle `respond` action: send a message to a waiting (interactive) task.
11
+ * The task must be in "waiting" status. The message is stored in the task's
12
+ * mailbox and the task is transitioned back to "running".
13
+ */
14
+ export function handleRespond(params: TeamToolParamsValue, ctx: TeamContext): PiTeamsToolResult {
15
+ if (!params.runId) return result("Respond requires runId.", { action: "respond", status: "error" }, true);
16
+ if (!params.message && !params.taskId) return result("Respond requires taskId and/or message.", { action: "respond", status: "error" }, true);
17
+
18
+ const loaded = loadRunManifestById(ctx.cwd, params.runId);
19
+ if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "respond", status: "error" }, true);
20
+
21
+ return withRunLockSync(loaded.manifest, () => {
22
+ const taskId = params.taskId;
23
+ const message = params.message ?? "";
24
+
25
+ // Find the waiting task(s)
26
+ const targetTasks = taskId
27
+ ? loaded.tasks.filter((t) => t.id === taskId)
28
+ : loaded.tasks.filter((t) => t.status === "waiting");
29
+
30
+ if (targetTasks.length === 0) {
31
+ return result(
32
+ taskId ? `Task '${taskId}' not found or not in waiting state.` : `No waiting tasks in run ${loaded.manifest.runId}.`,
33
+ { action: "respond", status: "error" },
34
+ true,
35
+ );
36
+ }
37
+
38
+ // Transition waiting tasks back to running
39
+ const updatedTasks = loaded.tasks.map((task) => {
40
+ if (task.status !== "waiting") return task;
41
+ if (taskId && task.id !== taskId) return task;
42
+ return {
43
+ ...task,
44
+ status: "running" as const,
45
+ // Store the response in the task's adaptive field
46
+ adaptive: {
47
+ ...task.adaptive,
48
+ phase: "resumed",
49
+ task: message || task.adaptive?.task || "",
50
+ },
51
+ };
52
+ });
53
+
54
+ saveRunTasks(loaded.manifest, updatedTasks);
55
+ try {
56
+ saveCrewAgents(loaded.manifest, updatedTasks.map((task) => recordFromTask(loaded.manifest, task, "child-process")));
57
+ } catch (error) {
58
+ logInternalError("team-tool.handleRespond.crewAgents", error, `runId=${loaded.manifest.runId}`);
59
+ }
60
+
61
+ const resumedIds = targetTasks.map((t) => t.id);
62
+ return result(
63
+ `Resumed ${resumedIds.length} task(s): ${resumedIds.join(", ")}. Message: ${message || "(no message)"}`,
64
+ { action: "respond", status: "ok", runId: loaded.manifest.runId, resumedIds },
65
+ );
66
+ });
67
+ }
@@ -136,7 +136,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
136
136
  if (executeWorkers && ctx.startForegroundRun) {
137
137
  ctx.onRunStarted?.(updatedManifest.runId);
138
138
  ctx.startForegroundRun(async (signal) => {
139
- await executeTeamRun({ manifest: updatedManifest, tasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal, reliability: executedConfig.reliability, metricRegistry: ctx.metricRegistry });
139
+ await executeTeamRun({ manifest: updatedManifest, tasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal, reliability: executedConfig.reliability, metricRegistry: ctx.metricRegistry, onJsonEvent: ctx.onJsonEvent });
140
140
  }, updatedManifest.runId);
141
141
  const text = [
142
142
  `Started foreground pi-crew run ${updatedManifest.runId}.`,
@@ -152,7 +152,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
152
152
  ].join("\n");
153
153
  return result(text, { action: "run", status: "ok", runId: updatedManifest.runId, artifactsRoot: updatedManifest.artifactsRoot });
154
154
  }
155
- const executed = await executeTeamRun({ manifest: updatedManifest, tasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal: ctx.signal, reliability: executedConfig.reliability, metricRegistry: ctx.metricRegistry });
155
+ const executed = await executeTeamRun({ manifest: updatedManifest, tasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal: ctx.signal, reliability: executedConfig.reliability, metricRegistry: ctx.metricRegistry, onJsonEvent: ctx.onJsonEvent });
156
156
  const text = [
157
157
  `Created pi-crew run ${executed.manifest.runId}.`,
158
158
  `Team: ${team.name}`,
@@ -8,6 +8,7 @@ import { applyAttentionState, formatActivityAge, resolveCrewControlConfig } from
8
8
  import { readCrewAgents } from "../../runtime/crew-agent-records.ts";
9
9
  import { checkProcessLiveness, isActiveRunStatus } from "../../runtime/process-status.ts";
10
10
  import { formatTaskGraphLines, waitingReason } from "../../runtime/task-display.ts";
11
+ import { verifyTaskCompletion, formatOutputPreview } from "../../runtime/completion-guard.ts";
11
12
  import type { PiTeamsToolResult } from "../tool-result.ts";
12
13
  import { result, type TeamContext } from "./context.ts";
13
14
 
@@ -52,7 +53,7 @@ export function handleStatus(params: TeamToolParamsValue, ctx: TeamContext): PiT
52
53
  const totalUsage = aggregateUsage(tasks);
53
54
  const activeAgents = crewAgents.filter((agent) => agent.status === "running");
54
55
  const completedAgents = crewAgents.filter((agent) => agent.status !== "running");
55
- const waitingTasks = tasks.filter((task) => task.status === "queued");
56
+ const waitingTasks = tasks.filter((task) => task.status === "queued" || task.status === "waiting");
56
57
  const agentLine = (agent: typeof crewAgents[number]): string => `- ${agent.id} [${agent.status}] ${agent.role} -> ${agent.agent} runtime=${agent.runtime}${agent.model ? ` model=${agent.model}` : ""}${agent.usage ? ` usage=${formatUsage(agent.usage)}` : ""}${agent.progress?.activityState ? ` activityState=${agent.progress.activityState}` : ""}${formatActivityAge(agent) ? ` activity=${formatActivityAge(agent)}` : ""}${agent.progress?.currentTool ? ` tool=${agent.progress.currentTool}` : ""}${agent.toolUses ? ` tools=${agent.toolUses}` : ""}${!agent.usage && agent.progress?.tokens ? ` tokens=${agent.progress.tokens}` : ""}${agent.progress?.turns ? ` turns=${agent.progress.turns}` : ""}${agent.jsonEvents !== undefined ? ` jsonEvents=${agent.jsonEvents}` : ""}${agent.outputPath ? ` output=${agent.outputPath}` : ""}${agent.transcriptPath ? ` transcript=${agent.transcriptPath}` : ""}${agent.statusPath ? ` status=${agent.statusPath}` : ""}${agent.error ? ` error=${agent.error}` : ""}`;
57
58
  const lines = [
58
59
  `Run: ${manifest.runId}`,
@@ -71,6 +72,11 @@ export function handleStatus(params: TeamToolParamsValue, ctx: TeamContext): PiT
71
72
  "Tasks:",
72
73
  ...(tasks.length ? tasks.map((task) => `- ${task.id} [${task.status}] ${task.role} -> ${task.agent}${task.taskPacket ? ` scope=${task.taskPacket.scope}` : ""}${task.verification ? ` green=${task.verification.observedGreenLevel}/${task.verification.requiredGreenLevel}` : ""}${task.modelAttempts?.length ? ` attempts=${task.modelAttempts.length}` : ""}${task.modelRouting ? ` modelRouting=${task.modelRouting.requested ? `${task.modelRouting.requested}->` : ""}${task.modelRouting.resolved}${task.modelRouting.usedAttempt ? ` attempt=${task.modelRouting.usedAttempt + 1}` : ""}` : ""}${task.agentProgress?.activityState ? ` activityState=${task.agentProgress.activityState}` : ""}${attentionByTask.get(task.id)?.data?.reason ? ` attention=${String(attentionByTask.get(task.id)?.data?.reason)}` : ""}${task.jsonEvents !== undefined ? ` jsonEvents=${task.jsonEvents}` : ""}${task.usage ? ` usage=${JSON.stringify(task.usage)}` : ""}${task.resultArtifact ? ` result=${task.resultArtifact.path}` : ""}${task.transcriptArtifact ? ` transcript=${task.transcriptArtifact.path}` : ""}${task.worktree ? ` worktree=${task.worktree.path}` : ""}${task.error ? ` error=${task.error}` : ""}`) : ["- (none)"]),
73
74
  `Task counts: ${[...counts.entries()].map(([status, count]) => `${status}=${count}`).join(", ") || "none"}`,
75
+ "Completion verification:",
76
+ ...(tasks.filter((t) => t.status === "completed").length ? tasks.filter((t) => t.status === "completed").map((t) => {
77
+ const guard = verifyTaskCompletion(t, manifest);
78
+ return `- ${t.id} green=${guard.greenLevel}/3${guard.warnings.length ? ` warnings=[${guard.warnings.join(", ")}]` : ""}`;
79
+ }) : ["- (no completed tasks)"]),
74
80
  "Active agents:",
75
81
  ...(activeAgents.length ? activeAgents.map(agentLine) : ["- (none)"]),
76
82
  "Waiting tasks:",
@@ -3,4 +3,8 @@ export interface TeamToolDetails {
3
3
  status: "ok" | "error" | "planned";
4
4
  runId?: string;
5
5
  artifactsRoot?: string;
6
+ abortedIds?: string[];
7
+ missingIds?: string[];
8
+ foreignIds?: string[];
9
+ resumedIds?: string[];
6
10
  }
@@ -44,6 +44,7 @@ import { handleStatus } from "./team-tool/status.ts";
44
44
  import { handleArtifacts, handleEvents, handleSummary } from "./team-tool/inspect.ts";
45
45
  import { handleCleanup, handleExport, handleForget, handleImport, handleImports, handlePrune, handleWorktrees } from "./team-tool/lifecycle-actions.ts";
46
46
  import { handleCancel } from "./team-tool/cancel.ts";
47
+ import { handleRespond } from "./team-tool/respond.ts";
47
48
  import { handlePlan } from "./team-tool/plan.ts";
48
49
  import { logInternalError } from "../utils/internal-error.ts";
49
50
 
@@ -278,6 +279,7 @@ export async function handleTeamTool(params: TeamToolParamsValue, ctx: TeamConte
278
279
  case "run": return handleRun(params, ctx);
279
280
  case "status": return handleStatus(params, ctx);
280
281
  case "cancel": return handleCancel(params, ctx);
282
+ case "respond": return handleRespond(params, ctx);
281
283
  case "plan": return handlePlan(params, ctx);
282
284
  case "resume": return handleResume(params, ctx);
283
285
  case "create": return handleCreate(params, ctx);
@@ -24,6 +24,9 @@ export function wireEventToMetrics(events: ExtensionAPI["events"] | undefined, r
24
24
  const mailboxCount = registry.counter("crew.mailbox.count", "Total mailbox messages by direction");
25
25
  const retryAttemptCount = registry.counter("crew.task.retry_attempt_total", "Retry attempts by run and task");
26
26
  const deadletterCount = registry.counter("crew.task.deadletter_total", "Deadletter triggers by reason");
27
+ const overflowCount = registry.counter("crew.task.overflow_phase_total", "Overflow recovery phase transitions");
28
+ const waitingCount = registry.counter("crew.task.waiting_total", "Tasks entering waiting state");
29
+ const supervisorContactCount = registry.counter("crew.task.supervisor_contact_total", "Supervisor contact requests by reason");
27
30
  registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds");
28
31
  const runDuration = registry.histogram("crew.run.duration_ms", "Run end-to-end duration, milliseconds");
29
32
  const taskDuration = registry.histogram("crew.task.duration_ms", "Task duration, milliseconds");
@@ -38,6 +41,9 @@ export function wireEventToMetrics(events: ExtensionAPI["events"] | undefined, r
38
41
  ["crew.task.failed", () => taskCount.inc({ status: "failed" })],
39
42
  ["crew.task.retry_attempt", (data) => { const item = recordValue(data); taskCount.inc({ status: "retry" }); retryAttemptCount.inc({ runId: stringValue(item.runId, "unknown"), taskId: stringValue(item.taskId, "unknown") }); }],
40
43
  ["crew.task.deadletter", (data) => { const item = recordValue(data); deadletterCount.inc({ reason: stringValue(item.reason, "unknown") }); }],
44
+ ["crew.task.overflow", (data) => { const item = recordValue(data); overflowCount.inc({ phase: stringValue(item.phase, "unknown"), previous_phase: stringValue(item.previousPhase, "none") }); }],
45
+ ["task.waiting", (data) => { const item = recordValue(data); waitingCount.inc({ taskId: stringValue(item.taskId, "unknown"), runId: stringValue(item.runId, "unknown") }); }],
46
+ ["supervisor.contact", (data) => { const item = recordValue(data); supervisorContactCount.inc({ reason: stringValue(item.reason, "unknown"), taskId: stringValue(item.taskId, "unknown") }); }],
41
47
  ["crew.subagent.completed", (data) => { const item = recordValue(data); subagentCount.inc({ status: stringValue(item.status, "completed") }); }],
42
48
  ["crew.subagent.failed", () => subagentCount.inc({ status: "failed" })],
43
49
  ["crew.mailbox.message", (data) => { const item = recordValue(data); mailboxCount.inc({ direction: stringValue(item.direction, "unknown") }); }],