npm - @desplega.ai/agent-swarm - Versions diffs - 1.53.1 → 1.54.1 - Mend

@desplega.ai/agent-swarm 1.53.1 → 1.54.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/openapi.json +22 -1
package/package.json +1 -1
package/src/be/db.ts +34 -0
package/src/be/migrations/024_add_was_paused.sql +1 -0
package/src/commands/runner.ts +46 -1
package/src/heartbeat/heartbeat.ts +107 -11
package/src/http/agents.ts +3 -0
package/src/http/heartbeat.ts +43 -0
package/src/http/index.ts +4 -2
package/src/http/poll.ts +3 -0
package/src/http/tasks.ts +27 -4
package/src/providers/pi-mono-adapter.ts +25 -0
package/src/scheduler/scheduler.ts +1 -0
package/src/tests/events-db.test.ts +0 -1
package/src/tests/events-http.test.ts +10 -4
package/src/tests/heartbeat.test.ts +148 -6
package/src/tests/workflow-hitl-routing.test.ts +545 -0
package/src/tools/store-progress.ts +8 -2
package/src/types.ts +3 -0
package/src/workflows/engine.ts +59 -18
package/src/workflows/recovery.ts +4 -4
package/src/workflows/resume.ts +21 -15

package/openapi.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "openapi": "3.1.0",
   "info": {
     "title": "Agent Swarm API",
-    "version": "1.53.0",
+    "version": "1.53.1",
     "description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
   },
   "servers": [
@@ -2424,6 +2424,27 @@
         }
       }
     },
+    "/api/heartbeat/sweep": {
+      "post": {
+        "summary": "Trigger an immediate heartbeat sweep",
+        "tags": [
+          "Heartbeat"
+        ],
+        "security": [
+          {
+            "bearerAuth": []
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Sweep completed successfully"
+          },
+          "401": {
+            "description": "Unauthorized"
+          }
+        }
+      }
+    },
     "/api/memory/index": {
       "post": {
         "summary": "Ingest content into memory system (async embedding)",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@desplega.ai/agent-swarm",
-  "version": "1.53.1",
+  "version": "1.54.1",
   "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
   "license": "MIT",
   "author": "desplega.sh <contact@desplega.sh>",

package/src/be/db.ts CHANGED Viewed

@@ -728,6 +728,7 @@ type AgentTaskRow = {
   peakContextPercent: number | null;
   totalContextTokensUsed: number | null;
   contextWindowSize: number | null;
+  was_paused: number;
 };
 function rowToAgentTask(row: AgentTaskRow): AgentTask {
@@ -781,6 +782,7 @@ function rowToAgentTask(row: AgentTaskRow): AgentTask {
     failureReason: row.failureReason ?? undefined,
     output: row.output ?? undefined,
     progress: row.progress ?? undefined,
+    wasPaused: !!row.was_paused,
   };
 }
@@ -1509,6 +1511,7 @@ export function pauseTask(id: string): AgentTask | null {
     .prepare<AgentTaskRow, [string]>(
       `UPDATE agent_tasks
        SET status = 'paused',
+           was_paused = 1,
            lastUpdatedAt = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
        WHERE id = ? AND status = 'in_progress'
        RETURNING *`,
@@ -1543,6 +1546,7 @@ export function resumeTask(taskId: string): AgentTask | null {
     .prepare<AgentTaskRow, [string]>(
       `UPDATE agent_tasks
        SET status = 'in_progress',
+           was_paused = 1,
            lastUpdatedAt = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
        WHERE id = ? AND status = 'paused'
        RETURNING *`,
@@ -5599,6 +5603,18 @@ export function updateActiveSessionProviderSessionId(
   return result.changes > 0;
 }
+/**
+ * Get the active session for a specific task.
+ * Used by the heartbeat to cross-reference stalled tasks with worker sessions.
+ */
+export function getActiveSessionForTask(taskId: string): ActiveSession | null {
+  return (
+    getDb()
+      .prepare<ActiveSession, [string]>("SELECT * FROM active_sessions WHERE taskId = ? LIMIT 1")
+      .get(taskId) ?? null
+  );
+}
 /**
  * Reassociate session logs from a runner session to a real task ID.
  * Used when a pool task is claimed — logs were stored under a random UUID,
@@ -6222,6 +6238,24 @@ export function getStepByIdempotencyKey(key: string): WorkflowRunStep | null {
   return row ? rowToWorkflowRunStep(row) : null;
 }
+export function getStepCountForNode(runId: string, nodeId: string): number {
+  const row = getDb()
+    .prepare<{ cnt: number }, [string, string]>(
+      "SELECT COUNT(*) as cnt FROM workflow_run_steps WHERE runId = ? AND nodeId = ?",
+    )
+    .get(runId, nodeId);
+  return row?.cnt ?? 0;
+}
+export function getLatestStepForNode(runId: string, nodeId: string): WorkflowRunStep | null {
+  const row = getDb()
+    .prepare<WorkflowRunStepRow, [string, string]>(
+      "SELECT * FROM workflow_run_steps WHERE runId = ? AND nodeId = ? ORDER BY startedAt DESC LIMIT 1",
+    )
+    .get(runId, nodeId);
+  return row ? rowToWorkflowRunStep(row) : null;
+}
 // --- Workflow Version History ---
 type WorkflowVersionRow = {

package/src/be/migrations/024_add_was_paused.sql ADDED Viewed

	@@ -0,0 +1 @@
1	+ ALTER TABLE agent_tasks ADD COLUMN was_paused INTEGER NOT NULL DEFAULT 0;

package/src/commands/runner.ts CHANGED Viewed

@@ -305,6 +305,12 @@ export function humanizeToolName(name: string): string {
 export function toolCallToProgress(toolName: string, args: unknown): string | null {
   if (SKIP_PROGRESS_TOOLS.has(toolName)) return null;
+  // Normalize: pi-mono uses lowercase ("read"), Claude uses PascalCase ("Read")
+  const normalized =
+    toolName.startsWith("mcp__") || toolName.includes("_")
+      ? toolName
+      : toolName.charAt(0).toUpperCase() + toolName.slice(1);
   const a = args as Record<string, unknown>;
   const shortPath = (p: unknown) => {
     if (typeof p !== "string") return "";
@@ -313,7 +319,7 @@ export function toolCallToProgress(toolName: string, args: unknown): string | nu
     return parts.length > 2 ? parts.slice(-2).join("/") : p;
   };
-  switch (toolName) {
+  switch (normalized) {
     case "Read":
       return `📖 Reading ${shortPath(a.file_path)}`;
     case "Edit":
@@ -1067,6 +1073,25 @@ async function cleanupActiveSessions(config: ApiConfig): Promise<void> {
   }
 }
+/** Trigger a heartbeat sweep via the API (lead startup self-check) */
+async function triggerHeartbeatSweep(config: ApiConfig): Promise<boolean> {
+  try {
+    const headers: Record<string, string> = {
+      "Content-Type": "application/json",
+      "X-Agent-ID": config.agentId,
+    };
+    if (config.apiKey) headers.Authorization = `Bearer ${config.apiKey}`;
+    const resp = await fetch(`${config.apiUrl}/api/heartbeat/sweep`, {
+      method: "POST",
+      headers,
+    });
+    return resp.ok;
+  } catch (err) {
+    console.warn(`[runner] Failed to trigger heartbeat sweep: ${(err as Error).message}`);
+    return false;
+  }
+}
 /** Trigger types returned by the poll API */
 interface Trigger {
   type:
@@ -1991,6 +2016,9 @@ async function checkCompletedProcesses(
           failureReason,
         },
         validator: (data) => data.exitCode === 0,
+        // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+        filter: ({}, ctx) => ctx.deps.length > 0,
+        conditions: [{ timeout_ms: 3_600_000 }], // 1 hour: process runtime
       });
       // Commit channel activity cursors after successful processing
@@ -2703,6 +2731,17 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
     }
     // ========== END: Resume paused tasks ==========
+    // ========== Lead startup self-check ==========
+    if (isLead) {
+      console.log(`[${role}] Running startup heartbeat sweep...`);
+      const swept = await triggerHeartbeatSweep(apiConfig);
+      if (swept) {
+        console.log(`[${role}] Startup heartbeat sweep completed`);
+      } else {
+        console.warn(`[${role}] Startup heartbeat sweep failed (non-fatal)`);
+      }
+    }
     // Track last finished task check for leads (to avoid re-processing)
     while (true) {
       // Ping server on each iteration to keep status updated
@@ -2790,6 +2829,9 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
                 triggerType: trigger.type,
                 role,
               },
+              // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+              filter: ({}, ctx) => ctx.deps.length > 0,
+              conditions: [{ timeout_ms: 60_000 }], // 1 min: immediate after poll
             });
           }
@@ -3020,6 +3062,9 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
               role,
               model: taskModel,
             },
+            // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+            filter: ({}, ctx) => ctx.deps.length > 0,
+            conditions: [{ timeout_ms: 60_000 }], // 1 min: process startup
           });
           // Attach trigger metadata for logging

package/src/heartbeat/heartbeat.ts CHANGED Viewed

@@ -2,6 +2,9 @@ import {
   claimTask,
   cleanupStaleSessions,
   createTaskExtended,
+  deleteActiveSession,
+  failTask,
+  getActiveSessionForTask,
   getActiveTaskCount,
   getAllAgents,
   getDb,
@@ -29,15 +32,25 @@ import "./templates";
 /** Default heartbeat interval: 90 seconds */
 const DEFAULT_INTERVAL_MS = Number(process.env.HEARTBEAT_INTERVAL_MS) || 90_000;
-/** Stall threshold: tasks in_progress with no update for this many minutes */
+/** Stall threshold: tasks with fresh worker heartbeat but no task update for this many minutes */
 const STALL_THRESHOLD_MINUTES = Number(process.env.HEARTBEAT_STALL_THRESHOLD_MIN) || 30;
+/** Stall threshold: tasks with no active session (worker clearly dead) */
+const STALL_THRESHOLD_NO_SESSION_MIN = Number(process.env.HEARTBEAT_STALL_NO_SESSION_MIN) || 5;
+/** Stall threshold: tasks with stale worker heartbeat */
+const STALL_THRESHOLD_STALE_HEARTBEAT_MIN = Number(process.env.HEARTBEAT_STALL_STALE_HB_MIN) || 15;
 /** Stale resource cleanup threshold (minutes) */
 const STALE_CLEANUP_THRESHOLD_MINUTES = Number(process.env.HEARTBEAT_STALE_CLEANUP_MIN) || 30;
 /** Max pool tasks to auto-assign per sweep */
 const MAX_AUTO_ASSIGN_PER_SWEEP = Number(process.env.HEARTBEAT_MAX_AUTO_ASSIGN) || 5;
+/** Escalation cooldown: minimum time between escalations for the same task set (ms) */
+const ESCALATION_COOLDOWN_MS =
+  Number(process.env.HEARTBEAT_ESCALATION_COOLDOWN_MS) || 15 * 60 * 1000;
 const HEARTBEAT_ESCALATION_MARKER = "[heartbeat-escalation]";
 // ============================================================================
@@ -46,6 +59,7 @@ const HEARTBEAT_ESCALATION_MARKER = "[heartbeat-escalation]";
 export interface HeartbeatFindings {
   stalledTasks: AgentTask[];
+  autoFailedTasks: Array<{ taskId: string; agentId: string; reason: string }>;
   workerHealthFixes: Array<{ agentId: string; oldStatus: string; newStatus: string }>;
   autoAssigned: Array<{ taskId: string; agentId: string }>;
   staleCleanup: {
@@ -66,6 +80,9 @@ export interface HeartbeatFindings {
 let heartbeatInterval: ReturnType<typeof setInterval> | null = null;
 let isSweeping = false;
+/** Tracks last escalation time per escalation key to prevent spam */
+const lastEscalationTime: Map<string, number> = new Map();
 // ============================================================================
 // Tier 1: Preflight Gate
 // ============================================================================
@@ -106,6 +123,7 @@ export function preflightGate(): boolean {
 export async function codeLevelTriage(): Promise<HeartbeatFindings> {
   const findings: HeartbeatFindings = {
     stalledTasks: [],
+    autoFailedTasks: [],
     workerHealthFixes: [],
     autoAssigned: [],
     staleCleanup: {
@@ -118,8 +136,8 @@ export async function codeLevelTriage(): Promise<HeartbeatFindings> {
     escalationNeeded: false,
   };
-  // 1. Detect stalled tasks
-  detectStalledTasks(findings);
+  // 1. Detect and remediate stalled tasks (tiered: auto-fail dead workers, escalate ambiguous)
+  detectAndRemediateStalledTasks(findings);
   // 2. Check and fix worker health
   checkWorkerHealth(findings);
@@ -137,11 +155,72 @@ export async function codeLevelTriage(): Promise<HeartbeatFindings> {
 }
 /**
- * Detect in_progress tasks that haven't been updated in a while.
+ * Tiered stall detection and auto-remediation.
+ *
+ * Cross-checks stalled tasks with active_sessions to determine severity:
+ * - No active session → worker is dead → auto-fail (5 min threshold)
+ * - Stale session heartbeat → worker likely crashed → auto-fail (15 min threshold)
+ * - Fresh session heartbeat → worker alive but task stale → escalate to lead (30 min threshold)
  */
-function detectStalledTasks(findings: HeartbeatFindings): void {
-  const stalled = getStalledInProgressTasks(STALL_THRESHOLD_MINUTES);
-  findings.stalledTasks = stalled;
+function detectAndRemediateStalledTasks(findings: HeartbeatFindings): void {
+  // Use the shortest threshold to catch all potentially stalled tasks
+  const candidates = getStalledInProgressTasks(STALL_THRESHOLD_NO_SESSION_MIN);
+  for (const task of candidates) {
+    if (!task.agentId) continue; // Unassigned tasks can't be stalled
+    const session = getActiveSessionForTask(task.id);
+    const taskAgeMs = Date.now() - new Date(task.lastUpdatedAt).getTime();
+    if (!session) {
+      // Case A: No active session — worker is dead
+      if (taskAgeMs >= STALL_THRESHOLD_NO_SESSION_MIN * 60 * 1000) {
+        const reason =
+          "Auto-failed by heartbeat: worker session not found (no active session for task)";
+        const failed = failTask(task.id, reason);
+        if (failed) {
+          findings.autoFailedTasks.push({ taskId: task.id, agentId: task.agentId, reason });
+          console.log(`[Heartbeat] Auto-failed task ${task.id.slice(0, 8)} — no active session`);
+          // Fix agent status if no other active tasks
+          const remaining = getActiveTaskCount(task.agentId);
+          if (remaining === 0) {
+            updateAgentStatus(task.agentId, "idle");
+          }
+        }
+      }
+    } else {
+      const sessionHeartbeatAgeMs = Date.now() - new Date(session.lastHeartbeatAt).getTime();
+      const isStaleHeartbeat =
+        sessionHeartbeatAgeMs >= STALL_THRESHOLD_STALE_HEARTBEAT_MIN * 60 * 1000;
+      if (isStaleHeartbeat) {
+        // Case B: Session exists but heartbeat is stale — worker likely crashed
+        if (taskAgeMs >= STALL_THRESHOLD_STALE_HEARTBEAT_MIN * 60 * 1000) {
+          const reason =
+            "Auto-failed by heartbeat: worker session heartbeat is stale (likely crashed)";
+          const failed = failTask(task.id, reason);
+          if (failed) {
+            findings.autoFailedTasks.push({ taskId: task.id, agentId: task.agentId, reason });
+            deleteActiveSession(task.id);
+            console.log(
+              `[Heartbeat] Auto-failed task ${task.id.slice(0, 8)} — stale session heartbeat`,
+            );
+            const remaining = getActiveTaskCount(task.agentId);
+            if (remaining === 0) {
+              updateAgentStatus(task.agentId, "idle");
+            }
+          }
+        }
+      } else {
+        // Case C: Session exists and heartbeat is fresh — ambiguous
+        if (taskAgeMs >= STALL_THRESHOLD_MINUTES * 60 * 1000) {
+          findings.stalledTasks.push(task);
+        }
+      }
+    }
+  }
 }
 /**
@@ -232,15 +311,13 @@ async function cleanupStaleResources(findings: HeartbeatFindings): Promise<void>
 /**
  * Evaluate whether findings require escalation to a Claude session (lead agent).
- * Only escalate for truly ambiguous situations that need human-level reasoning.
+ * Only escalate for ambiguous stalls (worker alive but task not updating).
  */
 function evaluateEscalation(findings: HeartbeatFindings): void {
-  // Stalled tasks are ambiguous — the task might be actively worked on
-  // but the worker just hasn't called store-progress recently
   if (findings.stalledTasks.length > 0) {
     findings.escalationNeeded = true;
     const taskIds = findings.stalledTasks.map((t) => t.id.slice(0, 8)).join(", ");
-    findings.escalationReason = `${findings.stalledTasks.length} task(s) stalled (no update for ${STALL_THRESHOLD_MINUTES}+ min): ${taskIds}`;
+    findings.escalationReason = `${findings.stalledTasks.length} task(s) stalled with active worker (no task update for ${STALL_THRESHOLD_MINUTES}+ min): ${taskIds}`;
   }
 }
@@ -255,6 +332,13 @@ function escalateToLead(findings: HeartbeatFindings): void {
   }
   const escalationKey = buildEscalationKey(findings);
+  // Cooldown check — prevent repeated escalations for the same task set
+  const lastTime = lastEscalationTime.get(escalationKey);
+  if (lastTime && Date.now() - lastTime < ESCALATION_COOLDOWN_MS) {
+    return;
+  }
   if (hasActiveEscalationTask(lead.id, escalationKey)) {
     return;
   }
@@ -294,6 +378,7 @@ function escalateToLead(findings: HeartbeatFindings): void {
     priority: 70,
   });
+  lastEscalationTime.set(escalationKey, Date.now());
   console.log(`[Heartbeat] Created triage task for lead ${lead.name}`);
 }
@@ -337,6 +422,7 @@ export async function runHeartbeatSweep(): Promise<void> {
     if (!preflightGate()) {
       const cleanupOnlyFindings: HeartbeatFindings = {
         stalledTasks: [],
+        autoFailedTasks: [],
         workerHealthFixes: [],
         autoAssigned: [],
         staleCleanup: {
@@ -374,6 +460,9 @@ export async function runHeartbeatSweep(): Promise<void> {
 function logFindings(findings: HeartbeatFindings): void {
   const parts: string[] = [];
+  if (findings.autoFailedTasks.length > 0) {
+    parts.push(`auto_failed=${findings.autoFailedTasks.length}`);
+  }
   if (findings.stalledTasks.length > 0) {
     parts.push(`stalled=${findings.stalledTasks.length}`);
   }
@@ -432,3 +521,10 @@ export function stopHeartbeat(): void {
     console.log("[Heartbeat] Stopped");
   }
 }
+/**
+ * Reset escalation cooldown state. Exported for testing only.
+ */
+export function resetEscalationCooldowns(): void {
+  lastEscalationTime.clear();
+}

package/src/http/agents.ts CHANGED Viewed

@@ -205,6 +205,9 @@ export async function handleAgentRegister(
           // Validates that registered happened before reconnected
           return ctx.deps.length > 0;
         },
+        // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+        filter: ({}, ctx) => ctx.deps.length > 0,
+        conditions: [{ timeout_ms: 86_400_000 }], // 1 day: agents may be offline for extended periods
       });
     }

package/src/http/heartbeat.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import type { IncomingMessage, ServerResponse } from "node:http";
+import { runHeartbeatSweep } from "../heartbeat/heartbeat";
+import { route } from "./route-def";
+import { json } from "./utils";
+// ─── Route Definitions ───────────────────────────────────────────────────────
+const triggerSweep = route({
+  method: "post",
+  path: "/api/heartbeat/sweep",
+  pattern: ["api", "heartbeat", "sweep"],
+  summary: "Trigger an immediate heartbeat sweep",
+  tags: ["Heartbeat"],
+  responses: {
+    200: { description: "Sweep completed successfully" },
+    401: { description: "Unauthorized" },
+  },
+  auth: { apiKey: true },
+});
+// ─── Handler ─────────────────────────────────────────────────────────────────
+export async function handleHeartbeat(
+  req: IncomingMessage,
+  res: ServerResponse,
+  pathSegments: string[],
+): Promise<boolean> {
+  if (triggerSweep.match(req.method, pathSegments)) {
+    const parsed = await triggerSweep.parse(req, res, pathSegments, new URLSearchParams());
+    if (!parsed) return true;
+    try {
+      await runHeartbeatSweep();
+      json(res, { success: true, message: "Heartbeat sweep completed" });
+    } catch (err) {
+      const message = err instanceof Error ? err.message : "Unknown error during heartbeat sweep";
+      json(res, { success: false, error: message }, 500);
+    }
+    return true;
+  }
+  return false;
+}

package/src/http/index.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import {
   type Server,
   type ServerResponse,
 } from "node:http";
-import { assert, initialize } from "@desplega.ai/business-use";
+import { ensure, initialize } from "@desplega.ai/business-use";
 import type { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
 import { getEnabledCapabilities, hasCapability } from "@/server";
 import { initAgentMail } from "../agentmail";
@@ -25,6 +25,7 @@ import { handleDbQuery } from "./db-query";
 import { handleEcosystem } from "./ecosystem";
 import { handleEpics } from "./epics";
 import { handleEvents } from "./events";
+import { handleHeartbeat } from "./heartbeat";
 import { handleMcp } from "./mcp";
 import { handleMcpServers } from "./mcp-servers";
 import { handleMemory } from "./memory";
@@ -120,6 +121,7 @@ const httpServer = createHttpServer(async (req, res) => {
     () => handleSkills(req, res, pathSegments, queryParams, myAgentId),
     () => handleMcpServers(req, res, pathSegments, queryParams),
     () => handleMemory(req, res, pathSegments, myAgentId),
+    () => handleHeartbeat(req, res, pathSegments),
     () => handleEvents(req, res, pathSegments, queryParams, myAgentId),
     () => handleMcp(req, res, transports),
   ];
@@ -186,7 +188,7 @@ httpServer
   .listen(port, async () => {
     console.log(`MCP HTTP server running on http://localhost:${port}/mcp`);
-    assert({
+    ensure({
       id: "listen",
       flow: "api",
       runId: globalState.__runId!,

package/src/http/poll.ts CHANGED Viewed

@@ -141,6 +141,9 @@ export async function handlePoll(
                 previousStatus: pendingTask.status,
               },
               validator: (data) => data.previousStatus === "pending",
+              // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+              filter: ({}, ctx) => ctx.deps.length > 0,
+              conditions: [{ timeout_ms: 300_000 }], // 5 min: polling interval + queue wait
             });
             return {

package/src/http/tasks.ts CHANGED Viewed

@@ -344,20 +344,32 @@ export async function handleTasks(
           reason,
         },
         validator: (data) => data.previousStatus === "pending",
+        // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+        filter: ({}, ctx) => ctx.deps.length > 0,
+        conditions: [{ timeout_ms: 86_400_000 }], // 1 day: task may sit pending for a long time
       });
     } else {
       ensure({
         id: "cancelled_in_progress",
         flow: "task",
         runId: parsed.params.id,
-        depIds: ["started"],
+        depIds:
+          task.status === "paused"
+            ? ["started", "paused"]
+            : task.wasPaused
+              ? ["started", "resumed"]
+              : ["started"],
         data: {
           taskId: parsed.params.id,
           agentId: task.agentId,
           previousStatus: task.status,
           reason,
         },
-        validator: (data) => data.previousStatus === "in_progress",
+        validator: (data) =>
+          data.previousStatus === "in_progress" || data.previousStatus === "paused",
+        // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+        filter: ({}, ctx) => ctx.deps.length > 0,
+        conditions: [{ timeout_ms: 3_600_000 }], // 1 hour: task running time
       });
     }
@@ -423,6 +435,8 @@ export async function handleTasks(
         return { task, alreadyFinished: true };
       }
+      const wasPaused = task.wasPaused;
       let updatedTask: typeof task;
       if (parsed.body.status === "completed") {
         const result = completeTask(
@@ -448,7 +462,7 @@ export async function handleTasks(
         updateAgentStatusFromCapacity(task.agentId);
       }
-      return { task: updatedTask };
+      return { task: updatedTask, wasPaused };
     })();
     if ("error" in result && result.error) {
@@ -462,7 +476,7 @@ export async function handleTasks(
         id: finishEventId,
         flow: "task",
         runId: parsed.params.id,
-        depIds: ["started"],
+        depIds: result.wasPaused ? ["started", "resumed"] : ["started"],
         data: {
           taskId: parsed.params.id,
           agentId: myAgentId,
@@ -472,6 +486,9 @@ export async function handleTasks(
             : { failureReason: parsed.body.failureReason }),
         },
         validator: (data) => data.previousStatus === "in_progress",
+        // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+        filter: ({}, ctx) => ctx.deps.length > 0,
+        conditions: [{ timeout_ms: 3_600_000 }], // 1 hour: task running time
       });
     }
@@ -530,6 +547,9 @@ export async function handleTasks(
         previousStatus: task.status,
       },
       validator: (data) => data.previousStatus === "in_progress",
+      // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+      filter: ({}, ctx) => ctx.deps.length > 0,
+      conditions: [{ timeout_ms: 3_600_000 }], // 1 hour
     });
     json(res, { success: true, task: pausedTask });
@@ -585,6 +605,9 @@ export async function handleTasks(
         previousStatus: task.status,
       },
       validator: (data) => data.previousStatus === "paused",
+      // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
+      filter: ({}, ctx) => ctx.deps.length > 0,
+      conditions: [{ timeout_ms: 86_400_000 }], // 1 day: tasks may stay paused for extended periods
     });
     json(res, { success: true, task: resumedTask });

package/src/providers/pi-mono-adapter.ts CHANGED Viewed

@@ -207,6 +207,17 @@ class PiMonoSession implements ProviderSession {
             this.lastEmittedMessage = text;
           }
         }
+        // Emit context_usage for dashboard tracking
+        const usage = this.agentSession.getContextUsage();
+        if (usage && usage.tokens != null) {
+          this.emit({
+            type: "context_usage",
+            contextUsedTokens: usage.tokens,
+            contextTotalTokens: usage.contextWindow,
+            contextPercent: usage.percent ?? 0,
+            outputTokens: 0,
+          });
+        }
         break;
       }
       case "tool_execution_start": {
@@ -224,6 +235,13 @@ class PiMonoSession implements ProviderSession {
             },
           }),
         });
+        // Emit normalized tool_start for runner auto-progress
+        this.emit({
+          type: "tool_start",
+          toolCallId: event.toolCallId,
+          toolName: event.toolName,
+          args: event.args,
+        });
         break;
       }
       case "tool_execution_end":
@@ -244,6 +262,13 @@ class PiMonoSession implements ProviderSession {
             },
           }),
         });
+        // Emit normalized tool_end
+        this.emit({
+          type: "tool_end",
+          toolCallId: event.toolCallId,
+          toolName: event.toolName,
+          result: event.result,
+        });
         break;
       case "auto_retry_start":
         this.emit({

package/src/scheduler/scheduler.ts CHANGED Viewed

@@ -276,6 +276,7 @@ export function startScheduler(
       const start = ctx.deps.find((d) => d.id === "listen");
       return !!start && start.data?.capabilities?.includes("scheduling");
     },
+    conditions: [{ timeout_ms: 10_000 }], // 10s: scheduler starts immediately after listen
   });
 }