@desplega.ai/agent-swarm 1.53.1 → 1.54.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/openapi.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "openapi": "3.1.0",
3
3
  "info": {
4
4
  "title": "Agent Swarm API",
5
- "version": "1.53.0",
5
+ "version": "1.53.1",
6
6
  "description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
7
7
  },
8
8
  "servers": [
@@ -2424,6 +2424,27 @@
2424
2424
  }
2425
2425
  }
2426
2426
  },
2427
+ "/api/heartbeat/sweep": {
2428
+ "post": {
2429
+ "summary": "Trigger an immediate heartbeat sweep",
2430
+ "tags": [
2431
+ "Heartbeat"
2432
+ ],
2433
+ "security": [
2434
+ {
2435
+ "bearerAuth": []
2436
+ }
2437
+ ],
2438
+ "responses": {
2439
+ "200": {
2440
+ "description": "Sweep completed successfully"
2441
+ },
2442
+ "401": {
2443
+ "description": "Unauthorized"
2444
+ }
2445
+ }
2446
+ }
2447
+ },
2427
2448
  "/api/memory/index": {
2428
2449
  "post": {
2429
2450
  "summary": "Ingest content into memory system (async embedding)",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@desplega.ai/agent-swarm",
3
- "version": "1.53.1",
3
+ "version": "1.54.1",
4
4
  "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
5
5
  "license": "MIT",
6
6
  "author": "desplega.sh <contact@desplega.sh>",
package/src/be/db.ts CHANGED
@@ -728,6 +728,7 @@ type AgentTaskRow = {
728
728
  peakContextPercent: number | null;
729
729
  totalContextTokensUsed: number | null;
730
730
  contextWindowSize: number | null;
731
+ was_paused: number;
731
732
  };
732
733
 
733
734
  function rowToAgentTask(row: AgentTaskRow): AgentTask {
@@ -781,6 +782,7 @@ function rowToAgentTask(row: AgentTaskRow): AgentTask {
781
782
  failureReason: row.failureReason ?? undefined,
782
783
  output: row.output ?? undefined,
783
784
  progress: row.progress ?? undefined,
785
+ wasPaused: !!row.was_paused,
784
786
  };
785
787
  }
786
788
 
@@ -1509,6 +1511,7 @@ export function pauseTask(id: string): AgentTask | null {
1509
1511
  .prepare<AgentTaskRow, [string]>(
1510
1512
  `UPDATE agent_tasks
1511
1513
  SET status = 'paused',
1514
+ was_paused = 1,
1512
1515
  lastUpdatedAt = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
1513
1516
  WHERE id = ? AND status = 'in_progress'
1514
1517
  RETURNING *`,
@@ -1543,6 +1546,7 @@ export function resumeTask(taskId: string): AgentTask | null {
1543
1546
  .prepare<AgentTaskRow, [string]>(
1544
1547
  `UPDATE agent_tasks
1545
1548
  SET status = 'in_progress',
1549
+ was_paused = 1,
1546
1550
  lastUpdatedAt = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
1547
1551
  WHERE id = ? AND status = 'paused'
1548
1552
  RETURNING *`,
@@ -5599,6 +5603,18 @@ export function updateActiveSessionProviderSessionId(
5599
5603
  return result.changes > 0;
5600
5604
  }
5601
5605
 
5606
+ /**
5607
+ * Get the active session for a specific task.
5608
+ * Used by the heartbeat to cross-reference stalled tasks with worker sessions.
5609
+ */
5610
+ export function getActiveSessionForTask(taskId: string): ActiveSession | null {
5611
+ return (
5612
+ getDb()
5613
+ .prepare<ActiveSession, [string]>("SELECT * FROM active_sessions WHERE taskId = ? LIMIT 1")
5614
+ .get(taskId) ?? null
5615
+ );
5616
+ }
5617
+
5602
5618
  /**
5603
5619
  * Reassociate session logs from a runner session to a real task ID.
5604
5620
  * Used when a pool task is claimed — logs were stored under a random UUID,
@@ -6222,6 +6238,24 @@ export function getStepByIdempotencyKey(key: string): WorkflowRunStep | null {
6222
6238
  return row ? rowToWorkflowRunStep(row) : null;
6223
6239
  }
6224
6240
 
6241
+ export function getStepCountForNode(runId: string, nodeId: string): number {
6242
+ const row = getDb()
6243
+ .prepare<{ cnt: number }, [string, string]>(
6244
+ "SELECT COUNT(*) as cnt FROM workflow_run_steps WHERE runId = ? AND nodeId = ?",
6245
+ )
6246
+ .get(runId, nodeId);
6247
+ return row?.cnt ?? 0;
6248
+ }
6249
+
6250
+ export function getLatestStepForNode(runId: string, nodeId: string): WorkflowRunStep | null {
6251
+ const row = getDb()
6252
+ .prepare<WorkflowRunStepRow, [string, string]>(
6253
+ "SELECT * FROM workflow_run_steps WHERE runId = ? AND nodeId = ? ORDER BY startedAt DESC LIMIT 1",
6254
+ )
6255
+ .get(runId, nodeId);
6256
+ return row ? rowToWorkflowRunStep(row) : null;
6257
+ }
6258
+
6225
6259
  // --- Workflow Version History ---
6226
6260
 
6227
6261
  type WorkflowVersionRow = {
@@ -0,0 +1 @@
1
+ ALTER TABLE agent_tasks ADD COLUMN was_paused INTEGER NOT NULL DEFAULT 0;
@@ -305,6 +305,12 @@ export function humanizeToolName(name: string): string {
305
305
  export function toolCallToProgress(toolName: string, args: unknown): string | null {
306
306
  if (SKIP_PROGRESS_TOOLS.has(toolName)) return null;
307
307
 
308
+ // Normalize: pi-mono uses lowercase ("read"), Claude uses PascalCase ("Read")
309
+ const normalized =
310
+ toolName.startsWith("mcp__") || toolName.includes("_")
311
+ ? toolName
312
+ : toolName.charAt(0).toUpperCase() + toolName.slice(1);
313
+
308
314
  const a = args as Record<string, unknown>;
309
315
  const shortPath = (p: unknown) => {
310
316
  if (typeof p !== "string") return "";
@@ -313,7 +319,7 @@ export function toolCallToProgress(toolName: string, args: unknown): string | nu
313
319
  return parts.length > 2 ? parts.slice(-2).join("/") : p;
314
320
  };
315
321
 
316
- switch (toolName) {
322
+ switch (normalized) {
317
323
  case "Read":
318
324
  return `📖 Reading ${shortPath(a.file_path)}`;
319
325
  case "Edit":
@@ -1067,6 +1073,25 @@ async function cleanupActiveSessions(config: ApiConfig): Promise<void> {
1067
1073
  }
1068
1074
  }
1069
1075
 
1076
+ /** Trigger a heartbeat sweep via the API (lead startup self-check) */
1077
+ async function triggerHeartbeatSweep(config: ApiConfig): Promise<boolean> {
1078
+ try {
1079
+ const headers: Record<string, string> = {
1080
+ "Content-Type": "application/json",
1081
+ "X-Agent-ID": config.agentId,
1082
+ };
1083
+ if (config.apiKey) headers.Authorization = `Bearer ${config.apiKey}`;
1084
+ const resp = await fetch(`${config.apiUrl}/api/heartbeat/sweep`, {
1085
+ method: "POST",
1086
+ headers,
1087
+ });
1088
+ return resp.ok;
1089
+ } catch (err) {
1090
+ console.warn(`[runner] Failed to trigger heartbeat sweep: ${(err as Error).message}`);
1091
+ return false;
1092
+ }
1093
+ }
1094
+
1070
1095
  /** Trigger types returned by the poll API */
1071
1096
  interface Trigger {
1072
1097
  type:
@@ -1991,6 +2016,9 @@ async function checkCompletedProcesses(
1991
2016
  failureReason,
1992
2017
  },
1993
2018
  validator: (data) => data.exitCode === 0,
2019
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
2020
+ filter: ({}, ctx) => ctx.deps.length > 0,
2021
+ conditions: [{ timeout_ms: 3_600_000 }], // 1 hour: process runtime
1994
2022
  });
1995
2023
 
1996
2024
  // Commit channel activity cursors after successful processing
@@ -2703,6 +2731,17 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
2703
2731
  }
2704
2732
  // ========== END: Resume paused tasks ==========
2705
2733
 
2734
+ // ========== Lead startup self-check ==========
2735
+ if (isLead) {
2736
+ console.log(`[${role}] Running startup heartbeat sweep...`);
2737
+ const swept = await triggerHeartbeatSweep(apiConfig);
2738
+ if (swept) {
2739
+ console.log(`[${role}] Startup heartbeat sweep completed`);
2740
+ } else {
2741
+ console.warn(`[${role}] Startup heartbeat sweep failed (non-fatal)`);
2742
+ }
2743
+ }
2744
+
2706
2745
  // Track last finished task check for leads (to avoid re-processing)
2707
2746
  while (true) {
2708
2747
  // Ping server on each iteration to keep status updated
@@ -2790,6 +2829,9 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
2790
2829
  triggerType: trigger.type,
2791
2830
  role,
2792
2831
  },
2832
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
2833
+ filter: ({}, ctx) => ctx.deps.length > 0,
2834
+ conditions: [{ timeout_ms: 60_000 }], // 1 min: immediate after poll
2793
2835
  });
2794
2836
  }
2795
2837
 
@@ -3020,6 +3062,9 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
3020
3062
  role,
3021
3063
  model: taskModel,
3022
3064
  },
3065
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
3066
+ filter: ({}, ctx) => ctx.deps.length > 0,
3067
+ conditions: [{ timeout_ms: 60_000 }], // 1 min: process startup
3023
3068
  });
3024
3069
 
3025
3070
  // Attach trigger metadata for logging
@@ -2,6 +2,9 @@ import {
2
2
  claimTask,
3
3
  cleanupStaleSessions,
4
4
  createTaskExtended,
5
+ deleteActiveSession,
6
+ failTask,
7
+ getActiveSessionForTask,
5
8
  getActiveTaskCount,
6
9
  getAllAgents,
7
10
  getDb,
@@ -29,15 +32,25 @@ import "./templates";
29
32
  /** Default heartbeat interval: 90 seconds */
30
33
  const DEFAULT_INTERVAL_MS = Number(process.env.HEARTBEAT_INTERVAL_MS) || 90_000;
31
34
 
32
- /** Stall threshold: tasks in_progress with no update for this many minutes */
35
+ /** Stall threshold: tasks with fresh worker heartbeat but no task update for this many minutes */
33
36
  const STALL_THRESHOLD_MINUTES = Number(process.env.HEARTBEAT_STALL_THRESHOLD_MIN) || 30;
34
37
 
38
+ /** Stall threshold: tasks with no active session (worker clearly dead) */
39
+ const STALL_THRESHOLD_NO_SESSION_MIN = Number(process.env.HEARTBEAT_STALL_NO_SESSION_MIN) || 5;
40
+
41
+ /** Stall threshold: tasks with stale worker heartbeat */
42
+ const STALL_THRESHOLD_STALE_HEARTBEAT_MIN = Number(process.env.HEARTBEAT_STALL_STALE_HB_MIN) || 15;
43
+
35
44
  /** Stale resource cleanup threshold (minutes) */
36
45
  const STALE_CLEANUP_THRESHOLD_MINUTES = Number(process.env.HEARTBEAT_STALE_CLEANUP_MIN) || 30;
37
46
 
38
47
  /** Max pool tasks to auto-assign per sweep */
39
48
  const MAX_AUTO_ASSIGN_PER_SWEEP = Number(process.env.HEARTBEAT_MAX_AUTO_ASSIGN) || 5;
40
49
 
50
+ /** Escalation cooldown: minimum time between escalations for the same task set (ms) */
51
+ const ESCALATION_COOLDOWN_MS =
52
+ Number(process.env.HEARTBEAT_ESCALATION_COOLDOWN_MS) || 15 * 60 * 1000;
53
+
41
54
  const HEARTBEAT_ESCALATION_MARKER = "[heartbeat-escalation]";
42
55
 
43
56
  // ============================================================================
@@ -46,6 +59,7 @@ const HEARTBEAT_ESCALATION_MARKER = "[heartbeat-escalation]";
46
59
 
47
60
  export interface HeartbeatFindings {
48
61
  stalledTasks: AgentTask[];
62
+ autoFailedTasks: Array<{ taskId: string; agentId: string; reason: string }>;
49
63
  workerHealthFixes: Array<{ agentId: string; oldStatus: string; newStatus: string }>;
50
64
  autoAssigned: Array<{ taskId: string; agentId: string }>;
51
65
  staleCleanup: {
@@ -66,6 +80,9 @@ export interface HeartbeatFindings {
66
80
  let heartbeatInterval: ReturnType<typeof setInterval> | null = null;
67
81
  let isSweeping = false;
68
82
 
83
+ /** Tracks last escalation time per escalation key to prevent spam */
84
+ const lastEscalationTime: Map<string, number> = new Map();
85
+
69
86
  // ============================================================================
70
87
  // Tier 1: Preflight Gate
71
88
  // ============================================================================
@@ -106,6 +123,7 @@ export function preflightGate(): boolean {
106
123
  export async function codeLevelTriage(): Promise<HeartbeatFindings> {
107
124
  const findings: HeartbeatFindings = {
108
125
  stalledTasks: [],
126
+ autoFailedTasks: [],
109
127
  workerHealthFixes: [],
110
128
  autoAssigned: [],
111
129
  staleCleanup: {
@@ -118,8 +136,8 @@ export async function codeLevelTriage(): Promise<HeartbeatFindings> {
118
136
  escalationNeeded: false,
119
137
  };
120
138
 
121
- // 1. Detect stalled tasks
122
- detectStalledTasks(findings);
139
+ // 1. Detect and remediate stalled tasks (tiered: auto-fail dead workers, escalate ambiguous)
140
+ detectAndRemediateStalledTasks(findings);
123
141
 
124
142
  // 2. Check and fix worker health
125
143
  checkWorkerHealth(findings);
@@ -137,11 +155,72 @@ export async function codeLevelTriage(): Promise<HeartbeatFindings> {
137
155
  }
138
156
 
139
157
  /**
140
- * Detect in_progress tasks that haven't been updated in a while.
158
+ * Tiered stall detection and auto-remediation.
159
+ *
160
+ * Cross-checks stalled tasks with active_sessions to determine severity:
161
+ * - No active session → worker is dead → auto-fail (5 min threshold)
162
+ * - Stale session heartbeat → worker likely crashed → auto-fail (15 min threshold)
163
+ * - Fresh session heartbeat → worker alive but task stale → escalate to lead (30 min threshold)
141
164
  */
142
- function detectStalledTasks(findings: HeartbeatFindings): void {
143
- const stalled = getStalledInProgressTasks(STALL_THRESHOLD_MINUTES);
144
- findings.stalledTasks = stalled;
165
+ function detectAndRemediateStalledTasks(findings: HeartbeatFindings): void {
166
+ // Use the shortest threshold to catch all potentially stalled tasks
167
+ const candidates = getStalledInProgressTasks(STALL_THRESHOLD_NO_SESSION_MIN);
168
+
169
+ for (const task of candidates) {
170
+ if (!task.agentId) continue; // Unassigned tasks can't be stalled
171
+
172
+ const session = getActiveSessionForTask(task.id);
173
+ const taskAgeMs = Date.now() - new Date(task.lastUpdatedAt).getTime();
174
+
175
+ if (!session) {
176
+ // Case A: No active session — worker is dead
177
+ if (taskAgeMs >= STALL_THRESHOLD_NO_SESSION_MIN * 60 * 1000) {
178
+ const reason =
179
+ "Auto-failed by heartbeat: worker session not found (no active session for task)";
180
+ const failed = failTask(task.id, reason);
181
+ if (failed) {
182
+ findings.autoFailedTasks.push({ taskId: task.id, agentId: task.agentId, reason });
183
+ console.log(`[Heartbeat] Auto-failed task ${task.id.slice(0, 8)} — no active session`);
184
+
185
+ // Fix agent status if no other active tasks
186
+ const remaining = getActiveTaskCount(task.agentId);
187
+ if (remaining === 0) {
188
+ updateAgentStatus(task.agentId, "idle");
189
+ }
190
+ }
191
+ }
192
+ } else {
193
+ const sessionHeartbeatAgeMs = Date.now() - new Date(session.lastHeartbeatAt).getTime();
194
+ const isStaleHeartbeat =
195
+ sessionHeartbeatAgeMs >= STALL_THRESHOLD_STALE_HEARTBEAT_MIN * 60 * 1000;
196
+
197
+ if (isStaleHeartbeat) {
198
+ // Case B: Session exists but heartbeat is stale — worker likely crashed
199
+ if (taskAgeMs >= STALL_THRESHOLD_STALE_HEARTBEAT_MIN * 60 * 1000) {
200
+ const reason =
201
+ "Auto-failed by heartbeat: worker session heartbeat is stale (likely crashed)";
202
+ const failed = failTask(task.id, reason);
203
+ if (failed) {
204
+ findings.autoFailedTasks.push({ taskId: task.id, agentId: task.agentId, reason });
205
+ deleteActiveSession(task.id);
206
+ console.log(
207
+ `[Heartbeat] Auto-failed task ${task.id.slice(0, 8)} — stale session heartbeat`,
208
+ );
209
+
210
+ const remaining = getActiveTaskCount(task.agentId);
211
+ if (remaining === 0) {
212
+ updateAgentStatus(task.agentId, "idle");
213
+ }
214
+ }
215
+ }
216
+ } else {
217
+ // Case C: Session exists and heartbeat is fresh — ambiguous
218
+ if (taskAgeMs >= STALL_THRESHOLD_MINUTES * 60 * 1000) {
219
+ findings.stalledTasks.push(task);
220
+ }
221
+ }
222
+ }
223
+ }
145
224
  }
146
225
 
147
226
  /**
@@ -232,15 +311,13 @@ async function cleanupStaleResources(findings: HeartbeatFindings): Promise<void>
232
311
 
233
312
  /**
234
313
  * Evaluate whether findings require escalation to a Claude session (lead agent).
235
- * Only escalate for truly ambiguous situations that need human-level reasoning.
314
+ * Only escalate for ambiguous stalls (worker alive but task not updating).
236
315
  */
237
316
  function evaluateEscalation(findings: HeartbeatFindings): void {
238
- // Stalled tasks are ambiguous — the task might be actively worked on
239
- // but the worker just hasn't called store-progress recently
240
317
  if (findings.stalledTasks.length > 0) {
241
318
  findings.escalationNeeded = true;
242
319
  const taskIds = findings.stalledTasks.map((t) => t.id.slice(0, 8)).join(", ");
243
- findings.escalationReason = `${findings.stalledTasks.length} task(s) stalled (no update for ${STALL_THRESHOLD_MINUTES}+ min): ${taskIds}`;
320
+ findings.escalationReason = `${findings.stalledTasks.length} task(s) stalled with active worker (no task update for ${STALL_THRESHOLD_MINUTES}+ min): ${taskIds}`;
244
321
  }
245
322
  }
246
323
 
@@ -255,6 +332,13 @@ function escalateToLead(findings: HeartbeatFindings): void {
255
332
  }
256
333
 
257
334
  const escalationKey = buildEscalationKey(findings);
335
+
336
+ // Cooldown check — prevent repeated escalations for the same task set
337
+ const lastTime = lastEscalationTime.get(escalationKey);
338
+ if (lastTime && Date.now() - lastTime < ESCALATION_COOLDOWN_MS) {
339
+ return;
340
+ }
341
+
258
342
  if (hasActiveEscalationTask(lead.id, escalationKey)) {
259
343
  return;
260
344
  }
@@ -294,6 +378,7 @@ function escalateToLead(findings: HeartbeatFindings): void {
294
378
  priority: 70,
295
379
  });
296
380
 
381
+ lastEscalationTime.set(escalationKey, Date.now());
297
382
  console.log(`[Heartbeat] Created triage task for lead ${lead.name}`);
298
383
  }
299
384
 
@@ -337,6 +422,7 @@ export async function runHeartbeatSweep(): Promise<void> {
337
422
  if (!preflightGate()) {
338
423
  const cleanupOnlyFindings: HeartbeatFindings = {
339
424
  stalledTasks: [],
425
+ autoFailedTasks: [],
340
426
  workerHealthFixes: [],
341
427
  autoAssigned: [],
342
428
  staleCleanup: {
@@ -374,6 +460,9 @@ export async function runHeartbeatSweep(): Promise<void> {
374
460
  function logFindings(findings: HeartbeatFindings): void {
375
461
  const parts: string[] = [];
376
462
 
463
+ if (findings.autoFailedTasks.length > 0) {
464
+ parts.push(`auto_failed=${findings.autoFailedTasks.length}`);
465
+ }
377
466
  if (findings.stalledTasks.length > 0) {
378
467
  parts.push(`stalled=${findings.stalledTasks.length}`);
379
468
  }
@@ -432,3 +521,10 @@ export function stopHeartbeat(): void {
432
521
  console.log("[Heartbeat] Stopped");
433
522
  }
434
523
  }
524
+
525
+ /**
526
+ * Reset escalation cooldown state. Exported for testing only.
527
+ */
528
+ export function resetEscalationCooldowns(): void {
529
+ lastEscalationTime.clear();
530
+ }
@@ -205,6 +205,9 @@ export async function handleAgentRegister(
205
205
  // Validates that registered happened before reconnected
206
206
  return ctx.deps.length > 0;
207
207
  },
208
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
209
+ filter: ({}, ctx) => ctx.deps.length > 0,
210
+ conditions: [{ timeout_ms: 86_400_000 }], // 1 day: agents may be offline for extended periods
208
211
  });
209
212
  }
210
213
 
@@ -0,0 +1,43 @@
1
+ import type { IncomingMessage, ServerResponse } from "node:http";
2
+ import { runHeartbeatSweep } from "../heartbeat/heartbeat";
3
+ import { route } from "./route-def";
4
+ import { json } from "./utils";
5
+
6
+ // ─── Route Definitions ───────────────────────────────────────────────────────
7
+
8
+ const triggerSweep = route({
9
+ method: "post",
10
+ path: "/api/heartbeat/sweep",
11
+ pattern: ["api", "heartbeat", "sweep"],
12
+ summary: "Trigger an immediate heartbeat sweep",
13
+ tags: ["Heartbeat"],
14
+ responses: {
15
+ 200: { description: "Sweep completed successfully" },
16
+ 401: { description: "Unauthorized" },
17
+ },
18
+ auth: { apiKey: true },
19
+ });
20
+
21
+ // ─── Handler ─────────────────────────────────────────────────────────────────
22
+
23
+ export async function handleHeartbeat(
24
+ req: IncomingMessage,
25
+ res: ServerResponse,
26
+ pathSegments: string[],
27
+ ): Promise<boolean> {
28
+ if (triggerSweep.match(req.method, pathSegments)) {
29
+ const parsed = await triggerSweep.parse(req, res, pathSegments, new URLSearchParams());
30
+ if (!parsed) return true;
31
+
32
+ try {
33
+ await runHeartbeatSweep();
34
+ json(res, { success: true, message: "Heartbeat sweep completed" });
35
+ } catch (err) {
36
+ const message = err instanceof Error ? err.message : "Unknown error during heartbeat sweep";
37
+ json(res, { success: false, error: message }, 500);
38
+ }
39
+ return true;
40
+ }
41
+
42
+ return false;
43
+ }
package/src/http/index.ts CHANGED
@@ -4,7 +4,7 @@ import {
4
4
  type Server,
5
5
  type ServerResponse,
6
6
  } from "node:http";
7
- import { assert, initialize } from "@desplega.ai/business-use";
7
+ import { ensure, initialize } from "@desplega.ai/business-use";
8
8
  import type { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
9
9
  import { getEnabledCapabilities, hasCapability } from "@/server";
10
10
  import { initAgentMail } from "../agentmail";
@@ -25,6 +25,7 @@ import { handleDbQuery } from "./db-query";
25
25
  import { handleEcosystem } from "./ecosystem";
26
26
  import { handleEpics } from "./epics";
27
27
  import { handleEvents } from "./events";
28
+ import { handleHeartbeat } from "./heartbeat";
28
29
  import { handleMcp } from "./mcp";
29
30
  import { handleMcpServers } from "./mcp-servers";
30
31
  import { handleMemory } from "./memory";
@@ -120,6 +121,7 @@ const httpServer = createHttpServer(async (req, res) => {
120
121
  () => handleSkills(req, res, pathSegments, queryParams, myAgentId),
121
122
  () => handleMcpServers(req, res, pathSegments, queryParams),
122
123
  () => handleMemory(req, res, pathSegments, myAgentId),
124
+ () => handleHeartbeat(req, res, pathSegments),
123
125
  () => handleEvents(req, res, pathSegments, queryParams, myAgentId),
124
126
  () => handleMcp(req, res, transports),
125
127
  ];
@@ -186,7 +188,7 @@ httpServer
186
188
  .listen(port, async () => {
187
189
  console.log(`MCP HTTP server running on http://localhost:${port}/mcp`);
188
190
 
189
- assert({
191
+ ensure({
190
192
  id: "listen",
191
193
  flow: "api",
192
194
  runId: globalState.__runId!,
package/src/http/poll.ts CHANGED
@@ -141,6 +141,9 @@ export async function handlePoll(
141
141
  previousStatus: pendingTask.status,
142
142
  },
143
143
  validator: (data) => data.previousStatus === "pending",
144
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
145
+ filter: ({}, ctx) => ctx.deps.length > 0,
146
+ conditions: [{ timeout_ms: 300_000 }], // 5 min: polling interval + queue wait
144
147
  });
145
148
 
146
149
  return {
package/src/http/tasks.ts CHANGED
@@ -344,20 +344,32 @@ export async function handleTasks(
344
344
  reason,
345
345
  },
346
346
  validator: (data) => data.previousStatus === "pending",
347
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
348
+ filter: ({}, ctx) => ctx.deps.length > 0,
349
+ conditions: [{ timeout_ms: 86_400_000 }], // 1 day: task may sit pending for a long time
347
350
  });
348
351
  } else {
349
352
  ensure({
350
353
  id: "cancelled_in_progress",
351
354
  flow: "task",
352
355
  runId: parsed.params.id,
353
- depIds: ["started"],
356
+ depIds:
357
+ task.status === "paused"
358
+ ? ["started", "paused"]
359
+ : task.wasPaused
360
+ ? ["started", "resumed"]
361
+ : ["started"],
354
362
  data: {
355
363
  taskId: parsed.params.id,
356
364
  agentId: task.agentId,
357
365
  previousStatus: task.status,
358
366
  reason,
359
367
  },
360
- validator: (data) => data.previousStatus === "in_progress",
368
+ validator: (data) =>
369
+ data.previousStatus === "in_progress" || data.previousStatus === "paused",
370
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
371
+ filter: ({}, ctx) => ctx.deps.length > 0,
372
+ conditions: [{ timeout_ms: 3_600_000 }], // 1 hour: task running time
361
373
  });
362
374
  }
363
375
 
@@ -423,6 +435,8 @@ export async function handleTasks(
423
435
  return { task, alreadyFinished: true };
424
436
  }
425
437
 
438
+ const wasPaused = task.wasPaused;
439
+
426
440
  let updatedTask: typeof task;
427
441
  if (parsed.body.status === "completed") {
428
442
  const result = completeTask(
@@ -448,7 +462,7 @@ export async function handleTasks(
448
462
  updateAgentStatusFromCapacity(task.agentId);
449
463
  }
450
464
 
451
- return { task: updatedTask };
465
+ return { task: updatedTask, wasPaused };
452
466
  })();
453
467
 
454
468
  if ("error" in result && result.error) {
@@ -462,7 +476,7 @@ export async function handleTasks(
462
476
  id: finishEventId,
463
477
  flow: "task",
464
478
  runId: parsed.params.id,
465
- depIds: ["started"],
479
+ depIds: result.wasPaused ? ["started", "resumed"] : ["started"],
466
480
  data: {
467
481
  taskId: parsed.params.id,
468
482
  agentId: myAgentId,
@@ -472,6 +486,9 @@ export async function handleTasks(
472
486
  : { failureReason: parsed.body.failureReason }),
473
487
  },
474
488
  validator: (data) => data.previousStatus === "in_progress",
489
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
490
+ filter: ({}, ctx) => ctx.deps.length > 0,
491
+ conditions: [{ timeout_ms: 3_600_000 }], // 1 hour: task running time
475
492
  });
476
493
  }
477
494
 
@@ -530,6 +547,9 @@ export async function handleTasks(
530
547
  previousStatus: task.status,
531
548
  },
532
549
  validator: (data) => data.previousStatus === "in_progress",
550
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
551
+ filter: ({}, ctx) => ctx.deps.length > 0,
552
+ conditions: [{ timeout_ms: 3_600_000 }], // 1 hour
533
553
  });
534
554
 
535
555
  json(res, { success: true, task: pausedTask });
@@ -585,6 +605,9 @@ export async function handleTasks(
585
605
  previousStatus: task.status,
586
606
  },
587
607
  validator: (data) => data.previousStatus === "paused",
608
+ // biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
609
+ filter: ({}, ctx) => ctx.deps.length > 0,
610
+ conditions: [{ timeout_ms: 86_400_000 }], // 1 day: tasks may stay paused for extended periods
588
611
  });
589
612
 
590
613
  json(res, { success: true, task: resumedTask });
@@ -207,6 +207,17 @@ class PiMonoSession implements ProviderSession {
207
207
  this.lastEmittedMessage = text;
208
208
  }
209
209
  }
210
+ // Emit context_usage for dashboard tracking
211
+ const usage = this.agentSession.getContextUsage();
212
+ if (usage && usage.tokens != null) {
213
+ this.emit({
214
+ type: "context_usage",
215
+ contextUsedTokens: usage.tokens,
216
+ contextTotalTokens: usage.contextWindow,
217
+ contextPercent: usage.percent ?? 0,
218
+ outputTokens: 0,
219
+ });
220
+ }
210
221
  break;
211
222
  }
212
223
  case "tool_execution_start": {
@@ -224,6 +235,13 @@ class PiMonoSession implements ProviderSession {
224
235
  },
225
236
  }),
226
237
  });
238
+ // Emit normalized tool_start for runner auto-progress
239
+ this.emit({
240
+ type: "tool_start",
241
+ toolCallId: event.toolCallId,
242
+ toolName: event.toolName,
243
+ args: event.args,
244
+ });
227
245
  break;
228
246
  }
229
247
  case "tool_execution_end":
@@ -244,6 +262,13 @@ class PiMonoSession implements ProviderSession {
244
262
  },
245
263
  }),
246
264
  });
265
+ // Emit normalized tool_end
266
+ this.emit({
267
+ type: "tool_end",
268
+ toolCallId: event.toolCallId,
269
+ toolName: event.toolName,
270
+ result: event.result,
271
+ });
247
272
  break;
248
273
  case "auto_retry_start":
249
274
  this.emit({
@@ -276,6 +276,7 @@ export function startScheduler(
276
276
  const start = ctx.deps.find((d) => d.id === "listen");
277
277
  return !!start && start.data?.capabilities?.includes("scheduling");
278
278
  },
279
+ conditions: [{ timeout_ms: 10_000 }], // 10s: scheduler starts immediately after listen
279
280
  });
280
281
  }
281
282