@desplega.ai/agent-swarm 1.99.1 → 1.100.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/openapi.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "openapi": "3.1.0",
3
3
  "info": {
4
4
  "title": "Agent Swarm API",
5
- "version": "1.99.1",
5
+ "version": "1.100.0",
6
6
  "description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
7
7
  },
8
8
  "servers": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@desplega.ai/agent-swarm",
3
- "version": "1.99.1",
3
+ "version": "1.100.0",
4
4
  "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
5
5
  "license": "MIT",
6
6
  "author": "desplega.sh <contact@desplega.sh>",
@@ -111,14 +111,14 @@
111
111
  "@aws-sdk/client-bedrock": "3.1048.0",
112
112
  "@desplega.ai/business-use": "^0.4.2",
113
113
  "@desplega.ai/localtunnel": "^2.2.0",
114
- "@earendil-works/pi-agent-core": "^0.79.1",
115
- "@earendil-works/pi-ai": "^0.79.1",
116
- "@earendil-works/pi-coding-agent": "^0.79.1",
114
+ "@earendil-works/pi-agent-core": "^0.79.6",
115
+ "@earendil-works/pi-ai": "^0.79.6",
116
+ "@earendil-works/pi-coding-agent": "^0.79.6",
117
117
  "@inkjs/ui": "^2.0.0",
118
118
  "@linear/sdk": "^77.0.0",
119
119
  "@modelcontextprotocol/sdk": "^1.25.1",
120
- "@openai/codex-sdk": "^0.139.0",
121
- "@opencode-ai/sdk": "^1.17.4",
120
+ "@openai/codex-sdk": "^0.140.0",
121
+ "@opencode-ai/sdk": "^1.17.7",
122
122
  "@openfort/openfort-node": "^0.9.1",
123
123
  "@opentelemetry/api": "^1.9.1",
124
124
  "@opentelemetry/exporter-trace-otlp-http": "^0.218.0",
package/src/be/db.ts CHANGED
@@ -4,6 +4,7 @@ import pkg from "../../package.json";
4
4
  import { addEyesReactionOnTaskStart } from "../github/task-reactions";
5
5
  import { type ModelTier, parseModelTier } from "../model-tiers";
6
6
  import { configureDbResolver } from "../prompts/resolver";
7
+ import { telemetry } from "../telemetry";
7
8
  import type {
8
9
  ActiveSession,
9
10
  Agent,
@@ -114,6 +115,19 @@ import { isReservedConfigKey, reservedKeyError } from "./swarm-config-guard";
114
115
  let db: Database | null = null;
115
116
  let sqliteVecAvailable = false;
116
117
 
118
+ type TaskTelemetryProps = Parameters<typeof telemetry.taskEvent>[1];
119
+
120
+ function emitTaskLifecycleTelemetryAfterCommit(
121
+ event: string,
122
+ props: TaskTelemetryProps,
123
+ verify?: (task: AgentTask | null) => boolean,
124
+ ): void {
125
+ queueMicrotask(() => {
126
+ if (verify && !verify(getTaskById(props.taskId))) return;
127
+ telemetry.taskEvent(event, props);
128
+ });
129
+ }
130
+
117
131
  export function isSqliteVecAvailable(): boolean {
118
132
  return sqliteVecAvailable;
119
133
  }
@@ -2105,6 +2119,16 @@ export function completeTask(id: string, output?: string): AgentTask | null {
2105
2119
  }
2106
2120
 
2107
2121
  if (row && oldTask) {
2122
+ emitTaskLifecycleTelemetryAfterCommit(
2123
+ "completed",
2124
+ {
2125
+ taskId: id,
2126
+ agentId: row.agentId ?? undefined,
2127
+ durationMs: row.createdAt ? Date.now() - new Date(row.createdAt).getTime() : undefined,
2128
+ },
2129
+ (task) => task?.status === "completed",
2130
+ );
2131
+
2108
2132
  try {
2109
2133
  createLogEntry({
2110
2134
  eventType: "task_status_change",
@@ -2145,6 +2169,16 @@ export function failTask(id: string, reason: string): AgentTask | null {
2145
2169
  const scrubbedReason = scrubSecrets(reason);
2146
2170
  const row = taskQueries.setFailure().get(scrubbedReason, finishedAt, id);
2147
2171
  if (row && oldTask) {
2172
+ emitTaskLifecycleTelemetryAfterCommit(
2173
+ "failed",
2174
+ {
2175
+ taskId: id,
2176
+ agentId: row.agentId ?? undefined,
2177
+ durationMs: row.createdAt ? Date.now() - new Date(row.createdAt).getTime() : undefined,
2178
+ },
2179
+ (task) => task?.status === "failed",
2180
+ );
2181
+
2148
2182
  try {
2149
2183
  createLogEntry({
2150
2184
  eventType: "task_status_change",
@@ -2192,6 +2226,20 @@ export function cancelTask(id: string, reason?: string): AgentTask | null {
2192
2226
  const row = taskQueries.setCancelled().get(cancelReason, finishedAt, id);
2193
2227
 
2194
2228
  if (row && oldTask) {
2229
+ emitTaskLifecycleTelemetryAfterCommit(
2230
+ "cancelled",
2231
+ {
2232
+ taskId: id,
2233
+ source: oldTask.source,
2234
+ agentId: oldTask.agentId ?? undefined,
2235
+ previousStatus: oldTask.status,
2236
+ durationMs: oldTask.createdAt
2237
+ ? Date.now() - new Date(oldTask.createdAt).getTime()
2238
+ : undefined,
2239
+ },
2240
+ (task) => task?.status === "cancelled",
2241
+ );
2242
+
2195
2243
  try {
2196
2244
  createLogEntry({
2197
2245
  eventType: "task_status_change",
@@ -3157,6 +3205,18 @@ export function createTaskExtended(task: string, options?: CreateTaskOptions): A
3157
3205
  });
3158
3206
  } catch {}
3159
3207
 
3208
+ emitTaskLifecycleTelemetryAfterCommit(
3209
+ "created",
3210
+ {
3211
+ taskId: row.id,
3212
+ source: row.source,
3213
+ tags: options?.tags ?? [],
3214
+ hasParent: !!row.parentTaskId,
3215
+ priority: row.priority,
3216
+ },
3217
+ (task) => task !== null,
3218
+ );
3219
+
3160
3220
  try {
3161
3221
  import("../workflows/event-bus").then(({ workflowEventBus }) => {
3162
3222
  workflowEventBus.emit("task.created", {
package/src/http/index.ts CHANGED
@@ -382,7 +382,7 @@ async function shutdown() {
382
382
  // Stop OAuth keepalive
383
383
  if (process.env.OAUTH_KEEPALIVE_DISABLE !== "true") {
384
384
  const { stopOAuthKeepalive } = await import("../oauth/keepalive");
385
- stopOAuthKeepalive();
385
+ await stopOAuthKeepalive();
386
386
  }
387
387
 
388
388
  // Stop MCP OAuth pending-session garbage collector
package/src/http/tasks.ts CHANGED
@@ -26,7 +26,6 @@ import {
26
26
  import { ModelTierSchema, splitLegacyModelAlias } from "../model-tiers";
27
27
  import { createTaskWithSiblingAwareness } from "../tasks/sibling-awareness";
28
28
  import { createResumeFollowUp, createWorkerTaskFollowUp } from "../tasks/worker-follow-up";
29
- import { telemetry } from "../telemetry";
30
29
  import {
31
30
  type AgentTaskSource,
32
31
  AgentTaskSourceSchema,
@@ -420,14 +419,6 @@ export async function handleTasks(
420
419
  },
421
420
  });
422
421
 
423
- telemetry.taskEvent("created", {
424
- taskId: task.id,
425
- source: task.source,
426
- tags: parsed.body.tags ?? [],
427
- hasParent: !!task.parentTaskId,
428
- priority: task.priority,
429
- });
430
-
431
422
  json(res, task, 201);
432
423
  } catch (error) {
433
424
  console.error("[HTTP] Failed to create task:", error);
@@ -536,14 +527,6 @@ export async function handleTasks(
536
527
  });
537
528
  }
538
529
 
539
- telemetry.taskEvent("cancelled", {
540
- taskId: parsed.params.id,
541
- source: task.source,
542
- agentId: task.agentId ?? undefined,
543
- previousStatus: task.status,
544
- durationMs: task.createdAt ? Date.now() - new Date(task.createdAt).getTime() : undefined,
545
- });
546
-
547
530
  if (task.agentId) {
548
531
  updateAgentStatusFromCapacity(task.agentId);
549
532
  }
@@ -645,15 +628,6 @@ export async function handleTasks(
645
628
  if (result.task && !("alreadyFinished" in result && result.alreadyFinished)) {
646
629
  const finishEventId = parsed.body.status === "completed" ? "completed" : "failed";
647
630
 
648
- const durationMs = result.task.createdAt
649
- ? Date.now() - new Date(result.task.createdAt).getTime()
650
- : undefined;
651
-
652
- telemetry.taskEvent(finishEventId, {
653
- taskId: parsed.params.id,
654
- agentId: myAgentId,
655
- durationMs,
656
- });
657
631
  ensure({
658
632
  id: finishEventId,
659
633
  flow: "task",
@@ -10,7 +10,6 @@ const SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1000;
10
10
  * first refresh round-trip (Atlassian returns the authoritative expiry).
11
11
  */
12
12
  const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
13
- const SLACK_ALERTS_CHANNEL = process.env.SLACK_ALERTS_CHANNEL || "C08JCRURPBV";
14
13
 
15
14
  const WEBHOOK_EVENTS = [
16
15
  "jira:issue_updated",
@@ -40,6 +39,12 @@ function getRegisteredWebhookUrl(): string {
40
39
  // ─── Slack alert (best-effort) ───────────────────────────────────────────────
41
40
 
42
41
  async function notifySlack(text: string): Promise<void> {
42
+ const channel = process.env.SLACK_ALERTS_CHANNEL;
43
+ if (!channel) {
44
+ console.warn("[Jira webhook keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert");
45
+ return;
46
+ }
47
+
43
48
  try {
44
49
  const { getSlackApp } = await import("../slack/app");
45
50
  const app = getSlackApp();
@@ -48,13 +53,21 @@ async function notifySlack(text: string): Promise<void> {
48
53
  return;
49
54
  }
50
55
  await app.client.chat.postMessage({
51
- channel: SLACK_ALERTS_CHANNEL,
56
+ channel,
52
57
  text,
53
58
  });
54
- console.log("[Jira webhook keepalive] Slack notification sent");
59
+ console.log(`[Jira webhook keepalive] Slack notification sent to ${channel}`);
55
60
  } catch (slackErr) {
61
+ const code =
62
+ typeof slackErr === "object" && slackErr !== null && "code" in slackErr
63
+ ? ` code=${String(slackErr.code)}`
64
+ : "";
65
+ const data =
66
+ typeof slackErr === "object" && slackErr !== null && "data" in slackErr
67
+ ? ` data=${JSON.stringify(slackErr.data)}`
68
+ : "";
56
69
  console.error(
57
- "[Jira webhook keepalive] Failed to send Slack notification:",
70
+ `[Jira webhook keepalive] Failed to send Slack notification to ${channel}${code}${data}:`,
58
71
  slackErr instanceof Error ? slackErr.message : slackErr,
59
72
  );
60
73
  }
@@ -362,3 +375,9 @@ export function stopJiraWebhookKeepalive(): void {
362
375
  console.log("[Jira webhook keepalive] Stopped");
363
376
  }
364
377
  }
378
+
379
+ // ─── Test helpers (exported for unit tests only) ─────────────────────────────
380
+
381
+ export const _test = {
382
+ notifySlack,
383
+ };
@@ -1,42 +1,46 @@
1
1
  import { ensureTokenOrThrow } from "./ensure-token";
2
2
 
3
- // Tick every 50 minutes with a 65-minute "expiring soon" buffer.
4
- //
5
- // Atlassian (and Linear) issue 1h access tokens. With this cadence the DB row
6
- // is always rotated before its current access token expires, so anything that
7
- // reads oauth_tokens.accessToken directly without going through jiraFetch /
8
- // linear-outbound (e.g. agents using the read-only db-query MCP) sees a
9
- // not-yet-expired token. The 65-min buffer is wider than the access-token
10
- // lifetime, so isTokenExpiringSoon always returns true and every tick rotates.
11
- //
12
- // Touching the row this often also serves the original "keep the refresh
13
- // token alive" goal — Atlassian expires inactive refresh tokens after 90 days,
14
- // and Linear's behavior is similar; refreshing every 50 min trivially keeps
15
- // both providers active.
16
- const KEEPALIVE_INTERVAL_MS = 50 * 60 * 1000;
17
- const KEEPALIVE_BUFFER_MS = 65 * 60 * 1000;
18
- const SLACK_ALERTS_CHANNEL = process.env.SLACK_ALERTS_CHANNEL || "C08JCRURPBV";
3
+ // Keep refresh tokens warm without constantly rotating strict-rotation
4
+ // providers. Reactive callers still refresh access tokens before API use.
5
+ const KEEPALIVE_INTERVAL_MS = 12 * 60 * 60 * 1000;
6
+ const KEEPALIVE_BUFFER_MS = 10 * 60 * 1000;
7
+ const STARTUP_KEEPALIVE_DELAY_MS = 10_000;
19
8
 
20
9
  const KEEPALIVE_PROVIDERS = ["linear", "jira"] as const;
21
10
 
22
11
  let keepaliveInterval: ReturnType<typeof setInterval> | null = null;
12
+ let startupKeepaliveTimeout: ReturnType<typeof setTimeout> | null = null;
13
+ let inflightKeepalive: Promise<void> | null = null;
14
+
15
+ function scheduleKeepaliveRun(trigger: "startup" | "interval" | "manual"): Promise<void> {
16
+ if (inflightKeepalive) {
17
+ console.log(`[OAuth Keepalive] ${trigger} tick skipped; previous run still in flight`);
18
+ return inflightKeepalive;
19
+ }
20
+
21
+ inflightKeepalive = runKeepalive(trigger).finally(() => {
22
+ inflightKeepalive = null;
23
+ });
24
+ return inflightKeepalive;
25
+ }
23
26
 
24
27
  /**
25
28
  * Proactively refresh OAuth tokens on a schedule.
26
29
  *
27
30
  * Two purposes, both served by the same tick:
28
31
  *
29
- * 1. Access-token freshness in the DB. Anything that reads
30
- * `oauth_tokens.accessToken` directly (db-query MCP, future MCP servers,
31
- * `tracker-status`) needs a not-yet-expired value. The 50-min cadence
32
- * keeps the row ahead of the 1h access-token lifetime.
33
- * 2. Refresh-token liveness. Atlassian rotates refresh tokens and expires
32
+ * 1. Refresh-token liveness. Atlassian rotates refresh tokens and expires
34
33
  * them after ~90 days of inactivity, so silent gaps in usage would kill
35
- * the integration. Refreshing on every tick keeps the refresh token
36
- * active and surfaces a dead one as a Slack alert instead of a runtime
37
- * 401 in the middle of an agent task.
34
+ * the integration. The 12h cadence keeps the refresh token active without
35
+ * rotating it dozens of times per day.
36
+ * 2. Loud failure on boot and during scheduled checks. A dead token surfaces
37
+ * as structured logs plus a Slack alert instead of silently retrying.
38
+ *
39
+ * Access-token freshness is handled reactively by ensureToken callers before
40
+ * Jira/Linear API use.
38
41
  */
39
- async function runKeepalive(): Promise<void> {
42
+ async function runKeepalive(trigger: "startup" | "interval" | "manual" = "manual"): Promise<void> {
43
+ console.log(`[OAuth Keepalive] Running ${trigger} token refresh check`);
40
44
  for (const provider of KEEPALIVE_PROVIDERS) {
41
45
  console.log(`[OAuth Keepalive] Running scheduled token refresh for ${provider}...`);
42
46
  try {
@@ -53,6 +57,12 @@ async function runKeepalive(): Promise<void> {
53
57
  }
54
58
 
55
59
  async function notifySlack(text: string): Promise<void> {
60
+ const channel = process.env.SLACK_ALERTS_CHANNEL;
61
+ if (!channel) {
62
+ console.warn("[OAuth Keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert");
63
+ return;
64
+ }
65
+
56
66
  try {
57
67
  const { getSlackApp } = await import("../slack/app");
58
68
  const app = getSlackApp();
@@ -61,13 +71,21 @@ async function notifySlack(text: string): Promise<void> {
61
71
  return;
62
72
  }
63
73
  await app.client.chat.postMessage({
64
- channel: SLACK_ALERTS_CHANNEL,
74
+ channel,
65
75
  text,
66
76
  });
67
- console.log("[OAuth Keepalive] Slack notification sent");
77
+ console.log(`[OAuth Keepalive] Slack notification sent to ${channel}`);
68
78
  } catch (slackErr) {
79
+ const code =
80
+ typeof slackErr === "object" && slackErr !== null && "code" in slackErr
81
+ ? ` code=${String(slackErr.code)}`
82
+ : "";
83
+ const data =
84
+ typeof slackErr === "object" && slackErr !== null && "data" in slackErr
85
+ ? ` data=${JSON.stringify(slackErr.data)}`
86
+ : "";
69
87
  console.error(
70
- "[OAuth Keepalive] Failed to send Slack notification:",
88
+ `[OAuth Keepalive] Failed to send Slack notification to ${channel}${code}${data}:`,
71
89
  slackErr instanceof Error ? slackErr.message : slackErr,
72
90
  );
73
91
  }
@@ -87,21 +105,45 @@ export function startOAuthKeepalive(): void {
87
105
  `[OAuth Keepalive] Starting (interval ${Math.round(KEEPALIVE_INTERVAL_MS / 60_000)}min, buffer ${Math.round(KEEPALIVE_BUFFER_MS / 60_000)}min)`,
88
106
  );
89
107
 
90
- // Run once after a short delay (let server finish startup)
91
- setTimeout(() => runKeepalive(), 10_000);
108
+ // Run once after a short delay (let server finish startup).
109
+ startupKeepaliveTimeout = setTimeout(() => {
110
+ startupKeepaliveTimeout = null;
111
+ scheduleKeepaliveRun("startup");
112
+ }, STARTUP_KEEPALIVE_DELAY_MS);
92
113
 
93
114
  keepaliveInterval = setInterval(() => {
94
- runKeepalive();
115
+ scheduleKeepaliveRun("interval");
95
116
  }, KEEPALIVE_INTERVAL_MS);
96
117
  }
97
118
 
98
119
  /**
99
- * Stop the OAuth keepalive timer.
120
+ * Stop the OAuth keepalive timer and wait for any in-flight refresh to persist.
100
121
  */
101
- export function stopOAuthKeepalive(): void {
122
+ export async function stopOAuthKeepalive(): Promise<void> {
123
+ if (startupKeepaliveTimeout) {
124
+ clearTimeout(startupKeepaliveTimeout);
125
+ startupKeepaliveTimeout = null;
126
+ }
127
+
102
128
  if (keepaliveInterval) {
103
129
  clearInterval(keepaliveInterval);
104
130
  keepaliveInterval = null;
105
131
  console.log("[OAuth Keepalive] Stopped");
106
132
  }
133
+
134
+ if (inflightKeepalive) {
135
+ console.log("[OAuth Keepalive] Waiting for in-flight token refresh before shutdown");
136
+ await inflightKeepalive;
137
+ }
107
138
  }
139
+
140
+ // ─── Test helpers (exported for unit tests only) ─────────────────────────────
141
+
142
+ export const _test = {
143
+ KEEPALIVE_INTERVAL_MS,
144
+ KEEPALIVE_BUFFER_MS,
145
+ STARTUP_KEEPALIVE_DELAY_MS,
146
+ notifySlack,
147
+ runKeepalive: scheduleKeepaliveRun,
148
+ getInflightKeepalive: () => inflightKeepalive,
149
+ };
@@ -1,10 +1,19 @@
1
- import { afterAll, beforeAll, beforeEach, describe, expect, mock, test } from "bun:test";
1
+ import { afterAll, beforeAll, beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
2
2
  import { unlink } from "node:fs/promises";
3
3
  import { closeDb, getDb, initDb } from "../be/db";
4
4
  import { upsertOAuthApp } from "../be/db-queries/oauth";
5
5
  import { getJiraMetadata, updateJiraMetadata } from "../jira/metadata";
6
6
 
7
7
  const TEST_DB_PATH = "./test-jira-webhook-lifecycle.sqlite";
8
+ const originalSlackAlertsChannel = process.env.SLACK_ALERTS_CHANNEL;
9
+
10
+ function restoreSlackAlertsChannel(): void {
11
+ if (originalSlackAlertsChannel === undefined) {
12
+ delete process.env.SLACK_ALERTS_CHANNEL;
13
+ return;
14
+ }
15
+ process.env.SLACK_ALERTS_CHANNEL = originalSlackAlertsChannel;
16
+ }
8
17
 
9
18
  // Mock the Jira fetch client. Each test installs its own per-call response.
10
19
  const jiraFetchMock = mock(
@@ -38,22 +47,40 @@ beforeAll(() => {
38
47
  afterAll(async () => {
39
48
  delete process.env.JIRA_WEBHOOK_TOKEN;
40
49
  delete process.env.MCP_BASE_URL;
50
+ restoreSlackAlertsChannel();
41
51
  closeDb();
42
52
  await unlink(TEST_DB_PATH).catch(() => {});
43
53
  await unlink(`${TEST_DB_PATH}-wal`).catch(() => {});
44
54
  await unlink(`${TEST_DB_PATH}-shm`).catch(() => {});
45
55
  });
46
56
 
47
- const { refreshJiraWebhooks, registerJiraWebhook } = await import("../jira/webhook-lifecycle");
57
+ const { _test, refreshJiraWebhooks, registerJiraWebhook } = await import(
58
+ "../jira/webhook-lifecycle"
59
+ );
48
60
 
49
61
  beforeEach(() => {
50
62
  jiraFetchMock.mockClear();
63
+ restoreSlackAlertsChannel();
51
64
  // Reset the webhookIds list each test (and clear metadata writebacks).
52
65
  getDb()
53
66
  .query("UPDATE oauth_apps SET metadata = ? WHERE provider = 'jira'")
54
67
  .run(JSON.stringify({ cloudId: "cloud-1", siteUrl: "https://example.atlassian.net" }));
55
68
  });
56
69
 
70
+ describe("Jira webhook Slack alerts", () => {
71
+ test("skips Slack notification when alerts channel env is unset", async () => {
72
+ delete process.env.SLACK_ALERTS_CHANNEL;
73
+ const warn = spyOn(console, "warn").mockImplementation(() => {});
74
+
75
+ await expect(_test.notifySlack("test alert")).resolves.toBeUndefined();
76
+
77
+ expect(warn).toHaveBeenCalledWith(
78
+ "[Jira webhook keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert",
79
+ );
80
+ warn.mockRestore();
81
+ });
82
+ });
83
+
57
84
  describe("registerJiraWebhook", () => {
58
85
  test("posts the right body shape and persists webhookId into metadata", async () => {
59
86
  jiraFetchMock.mockImplementationOnce(
@@ -68,6 +68,13 @@ function fakeReqRes(rawBody: string, headers: Record<string, string>) {
68
68
  return { req, res, captured };
69
69
  }
70
70
 
71
+ async function waitFor(predicate: () => boolean): Promise<void> {
72
+ for (let i = 0; i < 20; i++) {
73
+ if (predicate()) return;
74
+ await new Promise((resolve) => setTimeout(resolve, 5));
75
+ }
76
+ }
77
+
71
78
  const KAPSO_PATH = ["api", "integrations", "kapso", "webhook"];
72
79
 
73
80
  beforeAll(() => {
@@ -226,9 +233,8 @@ describe("handleWebhooks — Kapso HMAC gate", () => {
226
233
  return new Response(JSON.stringify({ success: true }), { status: 200 });
227
234
  }) as typeof fetch;
228
235
 
229
- const rawBody = JSON.stringify(
230
- makePayload({ phoneNumberId: "pn-http", messageId: "wamid.HTTP_OK" }),
231
- );
236
+ const messageId = `wamid.HTTP_OK_${crypto.randomUUID()}`;
237
+ const rawBody = JSON.stringify(makePayload({ phoneNumberId: "pn-http", messageId }));
232
238
  const { req, res, captured } = fakeReqRes(rawBody, {
233
239
  "x-webhook-signature": sign(HMAC_SECRET, rawBody),
234
240
  });
@@ -236,22 +242,37 @@ describe("handleWebhooks — Kapso HMAC gate", () => {
236
242
  expect(handled).toBe(true);
237
243
  expect(captured.status).toBe(200);
238
244
  expect(JSON.parse(captured.body)).toMatchObject({ received: true, routing: "task" });
239
- expect(calls).toHaveLength(2);
245
+ await waitFor(
246
+ () =>
247
+ calls.some((call) => call.body.message_id === messageId) &&
248
+ calls.some(
249
+ (call) =>
250
+ (call.body.reaction as { message_id?: string } | undefined)?.message_id === messageId,
251
+ ),
252
+ );
253
+ const messageCalls = calls.filter(
254
+ (call) =>
255
+ call.body.message_id === messageId ||
256
+ (call.body.reaction as { message_id?: string } | undefined)?.message_id === messageId,
257
+ );
258
+ expect(messageCalls).toHaveLength(2);
240
259
  expect(
241
- calls.every((call) => call.url === "https://kapso.test/meta/whatsapp/v24.0/pn-http/messages"),
260
+ messageCalls.every(
261
+ (call) => call.url === "https://kapso.test/meta/whatsapp/v24.0/pn-http/messages",
262
+ ),
242
263
  ).toBe(true);
243
- expect(calls.map((call) => call.body)).toContainEqual({
264
+ expect(messageCalls.map((call) => call.body)).toContainEqual({
244
265
  messaging_product: "whatsapp",
245
266
  status: "read",
246
- message_id: "wamid.HTTP_OK",
267
+ message_id: messageId,
247
268
  typing_indicator: { type: "text" },
248
269
  });
249
- expect(calls.map((call) => call.body)).toContainEqual({
270
+ expect(messageCalls.map((call) => call.body)).toContainEqual({
250
271
  messaging_product: "whatsapp",
251
272
  recipient_type: "individual",
252
273
  to: "34679077777",
253
274
  type: "reaction",
254
- reaction: { message_id: "wamid.HTTP_OK", emoji: "👀" },
275
+ reaction: { message_id: messageId, emoji: "👀" },
255
276
  });
256
277
  });
257
278
 
@@ -0,0 +1,135 @@
1
+ import { afterAll, beforeAll, beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
2
+ import { unlink } from "node:fs/promises";
3
+ import { closeDb, initDb } from "../be/db";
4
+ import {
5
+ deleteOAuthTokens,
6
+ getOAuthTokens,
7
+ storeOAuthTokens,
8
+ upsertOAuthApp,
9
+ } from "../be/db-queries/oauth";
10
+ import { _test, stopOAuthKeepalive } from "../oauth/keepalive";
11
+
12
+ const TEST_DB_PATH = "./test-oauth-keepalive.sqlite";
13
+
14
+ const originalSlackAlertsChannel = process.env.SLACK_ALERTS_CHANNEL;
15
+ function restoreSlackAlertsChannel(): void {
16
+ if (originalSlackAlertsChannel === undefined) {
17
+ delete process.env.SLACK_ALERTS_CHANNEL;
18
+ return;
19
+ }
20
+ process.env.SLACK_ALERTS_CHANNEL = originalSlackAlertsChannel;
21
+ }
22
+
23
+ const testApp = {
24
+ clientId: "test-client-id",
25
+ clientSecret: "test-client-secret",
26
+ authorizeUrl: "https://example.com/oauth/authorize",
27
+ tokenUrl: "https://example.com/oauth/token",
28
+ redirectUri: "http://localhost:3013/callback",
29
+ scopes: "read,write",
30
+ };
31
+
32
+ const originalFetch = globalThis.fetch;
33
+
34
+ beforeAll(() => {
35
+ initDb(TEST_DB_PATH);
36
+ upsertOAuthApp("linear", testApp);
37
+ upsertOAuthApp("jira", {
38
+ ...testApp,
39
+ tokenUrl: "https://example.com/jira/oauth/token",
40
+ });
41
+ });
42
+
43
+ beforeEach(async () => {
44
+ await stopOAuthKeepalive();
45
+ deleteOAuthTokens("linear");
46
+ deleteOAuthTokens("jira");
47
+ globalThis.fetch = originalFetch;
48
+ restoreSlackAlertsChannel();
49
+ mock.restore();
50
+ });
51
+
52
+ afterAll(async () => {
53
+ await stopOAuthKeepalive();
54
+ globalThis.fetch = originalFetch;
55
+ restoreSlackAlertsChannel();
56
+ closeDb();
57
+ await unlink(TEST_DB_PATH).catch(() => {});
58
+ await unlink(`${TEST_DB_PATH}-wal`).catch(() => {});
59
+ await unlink(`${TEST_DB_PATH}-shm`).catch(() => {});
60
+ });
61
+
62
+ describe("OAuth keepalive", () => {
63
+ test("uses a 12h cadence with a 10m refresh buffer", () => {
64
+ expect(_test.KEEPALIVE_INTERVAL_MS).toBe(12 * 60 * 60 * 1000);
65
+ expect(_test.KEEPALIVE_BUFFER_MS).toBe(10 * 60 * 1000);
66
+ });
67
+
68
+ test("skips Slack notification when alerts channel env is unset", async () => {
69
+ delete process.env.SLACK_ALERTS_CHANNEL;
70
+ const warn = spyOn(console, "warn").mockImplementation(() => {});
71
+
72
+ await expect(_test.notifySlack("test alert")).resolves.toBeUndefined();
73
+
74
+ expect(warn).toHaveBeenCalledWith(
75
+ "[OAuth Keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert",
76
+ );
77
+ });
78
+
79
+ test("stopOAuthKeepalive waits for in-flight Jira refresh persistence", async () => {
80
+ storeOAuthTokens("linear", {
81
+ accessToken: "linear-access",
82
+ refreshToken: "linear-refresh",
83
+ expiresAt: new Date(Date.now() + 60 * 60 * 1000).toISOString(),
84
+ });
85
+ storeOAuthTokens("jira", {
86
+ accessToken: "old-jira-access",
87
+ refreshToken: "old-jira-refresh",
88
+ expiresAt: new Date(Date.now() + 60 * 1000).toISOString(),
89
+ });
90
+
91
+ let releaseTokenResponse!: () => void;
92
+ const tokenResponseReady = new Promise<void>((resolve) => {
93
+ releaseTokenResponse = resolve;
94
+ });
95
+ let fetchStarted!: () => void;
96
+ const fetchStartedPromise = new Promise<void>((resolve) => {
97
+ fetchStarted = resolve;
98
+ });
99
+
100
+ globalThis.fetch = mock(async () => {
101
+ fetchStarted();
102
+ await tokenResponseReady;
103
+ return new Response(
104
+ JSON.stringify({
105
+ access_token: "new-jira-access",
106
+ token_type: "Bearer",
107
+ expires_in: 3600,
108
+ refresh_token: "new-jira-refresh",
109
+ }),
110
+ { status: 200, headers: { "Content-Type": "application/json" } },
111
+ );
112
+ });
113
+
114
+ const keepaliveRun = _test.runKeepalive("manual");
115
+ await fetchStartedPromise;
116
+
117
+ let stopResolved = false;
118
+ const stopPromise = stopOAuthKeepalive().then(() => {
119
+ stopResolved = true;
120
+ });
121
+
122
+ await Promise.resolve();
123
+ expect(stopResolved).toBe(false);
124
+ expect(getOAuthTokens("jira")?.refreshToken).toBe("old-jira-refresh");
125
+
126
+ releaseTokenResponse();
127
+ await stopPromise;
128
+ await keepaliveRun;
129
+
130
+ expect(stopResolved).toBe(true);
131
+ const tokens = getOAuthTokens("jira");
132
+ expect(tokens?.accessToken).toBe("new-jira-access");
133
+ expect(tokens?.refreshToken).toBe("new-jira-refresh");
134
+ });
135
+ });
@@ -0,0 +1,153 @@
1
+ import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test";
2
+ import { unlink } from "node:fs/promises";
3
+ import {
4
+ cancelTask,
5
+ closeDb,
6
+ completeTask,
7
+ createAgent,
8
+ createTaskExtended,
9
+ failTask,
10
+ getDb,
11
+ initDb,
12
+ } from "../be/db";
13
+ import { telemetry } from "../telemetry";
14
+
15
+ const TEST_DB_PATH = "./test-task-lifecycle-telemetry.sqlite";
16
+ const WORKER_ID = "bbbb0000-0000-4000-8000-000000000002";
17
+
18
+ async function flushMicrotasks(): Promise<void> {
19
+ await Promise.resolve();
20
+ }
21
+
22
+ async function removeTestDb(): Promise<void> {
23
+ for (const suffix of ["", "-wal", "-shm"]) {
24
+ try {
25
+ await unlink(TEST_DB_PATH + suffix);
26
+ } catch {
27
+ // File does not exist.
28
+ }
29
+ }
30
+ }
31
+
32
+ describe("task lifecycle telemetry", () => {
33
+ let taskEventSpy: ReturnType<typeof spyOn>;
34
+ let calls: Array<{ event: string; props: Parameters<typeof telemetry.taskEvent>[1] }>;
35
+
36
+ beforeEach(async () => {
37
+ closeDb();
38
+ await removeTestDb();
39
+ initDb(TEST_DB_PATH);
40
+ createAgent({ id: WORKER_ID, name: "Telemetry Worker", isLead: false, status: "idle" });
41
+
42
+ calls = [];
43
+ taskEventSpy = spyOn(telemetry, "taskEvent").mockImplementation((event, props) => {
44
+ calls.push({ event, props });
45
+ });
46
+ });
47
+
48
+ afterEach(async () => {
49
+ taskEventSpy.mockRestore();
50
+ closeDb();
51
+ await removeTestDb();
52
+ });
53
+
54
+ test("emits task.created from createTaskExtended after the task is committed", async () => {
55
+ const task = createTaskExtended("create telemetry", {
56
+ agentId: WORKER_ID,
57
+ source: "mcp",
58
+ tags: ["telemetry"],
59
+ priority: 60,
60
+ });
61
+
62
+ expect(calls).toHaveLength(0);
63
+
64
+ await flushMicrotasks();
65
+
66
+ expect(calls).toEqual([
67
+ {
68
+ event: "created",
69
+ props: {
70
+ taskId: task.id,
71
+ source: "mcp",
72
+ tags: ["telemetry"],
73
+ hasParent: false,
74
+ priority: 60,
75
+ },
76
+ },
77
+ ]);
78
+ });
79
+
80
+ test("does not emit task.created when an enclosing transaction rolls back", async () => {
81
+ const txn = getDb().transaction(() => {
82
+ createTaskExtended("rolled back telemetry", {
83
+ agentId: WORKER_ID,
84
+ source: "mcp",
85
+ });
86
+ throw new Error("rollback");
87
+ });
88
+
89
+ expect(() => txn()).toThrow("rollback");
90
+
91
+ await flushMicrotasks();
92
+
93
+ expect(calls).toHaveLength(0);
94
+ });
95
+
96
+ test("emits terminal lifecycle events from universal status helpers", async () => {
97
+ const completedTask = createTaskExtended("complete telemetry", {
98
+ agentId: WORKER_ID,
99
+ source: "mcp",
100
+ });
101
+ await flushMicrotasks();
102
+ calls = [];
103
+
104
+ completeTask(completedTask.id, "done");
105
+ await flushMicrotasks();
106
+
107
+ expect(calls).toHaveLength(1);
108
+ expect(calls[0]).toMatchObject({
109
+ event: "completed",
110
+ props: { taskId: completedTask.id, agentId: WORKER_ID },
111
+ });
112
+ expect(typeof calls[0]?.props.durationMs).toBe("number");
113
+
114
+ const failedTask = createTaskExtended("fail telemetry", {
115
+ agentId: WORKER_ID,
116
+ source: "mcp",
117
+ });
118
+ await flushMicrotasks();
119
+ calls = [];
120
+
121
+ failTask(failedTask.id, "nope");
122
+ await flushMicrotasks();
123
+
124
+ expect(calls).toHaveLength(1);
125
+ expect(calls[0]).toMatchObject({
126
+ event: "failed",
127
+ props: { taskId: failedTask.id, agentId: WORKER_ID },
128
+ });
129
+ expect(typeof calls[0]?.props.durationMs).toBe("number");
130
+
131
+ const cancelledTask = createTaskExtended("cancel telemetry", {
132
+ agentId: WORKER_ID,
133
+ source: "api",
134
+ });
135
+ await flushMicrotasks();
136
+ calls = [];
137
+
138
+ cancelTask(cancelledTask.id, "not needed");
139
+ await flushMicrotasks();
140
+
141
+ expect(calls).toHaveLength(1);
142
+ expect(calls[0]).toMatchObject({
143
+ event: "cancelled",
144
+ props: {
145
+ taskId: cancelledTask.id,
146
+ source: "api",
147
+ agentId: WORKER_ID,
148
+ previousStatus: "pending",
149
+ },
150
+ });
151
+ expect(typeof calls[0]?.props.durationMs).toBe("number");
152
+ });
153
+ });
@@ -21,7 +21,13 @@ import {
21
21
  type ExecutorResult,
22
22
  } from "../workflows/executors/base";
23
23
  import { ExecutorRegistry } from "../workflows/executors/registry";
24
- import { SwarmScriptExecutor } from "../workflows/executors/swarm-script";
24
+ import {
25
+ SWARM_SCRIPT_DEFAULT_TIMEOUT_MS,
26
+ SWARM_SCRIPT_MAX_TIMEOUT_MS,
27
+ SWARM_SCRIPT_MIN_TIMEOUT_MS,
28
+ SwarmScriptConfigSchema,
29
+ SwarmScriptExecutor,
30
+ } from "../workflows/executors/swarm-script";
25
31
  import { interpolate } from "../workflows/template";
26
32
 
27
33
  const TEST_DB_PATH = "./test-workflow-swarm-script.sqlite";
@@ -141,6 +147,38 @@ beforeEach(() => {
141
147
  });
142
148
 
143
149
  describe("SwarmScriptExecutor", () => {
150
+ test("config schema validates timeoutMs bounds and applies the runtime default", () => {
151
+ expect(SwarmScriptConfigSchema.parse({ scriptName: "quick" }).timeoutMs).toBe(
152
+ SWARM_SCRIPT_DEFAULT_TIMEOUT_MS,
153
+ );
154
+
155
+ expect(
156
+ SwarmScriptConfigSchema.safeParse({
157
+ scriptName: "quick",
158
+ timeoutMs: SWARM_SCRIPT_MIN_TIMEOUT_MS - 1,
159
+ }).success,
160
+ ).toBe(false);
161
+ expect(
162
+ SwarmScriptConfigSchema.safeParse({
163
+ scriptName: "quick",
164
+ timeoutMs: SWARM_SCRIPT_MAX_TIMEOUT_MS + 1,
165
+ }).success,
166
+ ).toBe(false);
167
+
168
+ expect(
169
+ SwarmScriptConfigSchema.parse({
170
+ scriptName: "quick",
171
+ timeoutMs: SWARM_SCRIPT_MIN_TIMEOUT_MS,
172
+ }).timeoutMs,
173
+ ).toBe(SWARM_SCRIPT_MIN_TIMEOUT_MS);
174
+ expect(
175
+ SwarmScriptConfigSchema.parse({
176
+ scriptName: "quick",
177
+ timeoutMs: SWARM_SCRIPT_MAX_TIMEOUT_MS,
178
+ }).timeoutMs,
179
+ ).toBe(SWARM_SCRIPT_MAX_TIMEOUT_MS);
180
+ });
181
+
144
182
  test("A workflow with one swarm-script node resolves by name + runs + returns result", async () => {
145
183
  await saveScript(
146
184
  "add-one",
@@ -250,6 +288,49 @@ describe("SwarmScriptExecutor", () => {
250
288
  expect(success.status).toBe("success");
251
289
  });
252
290
 
291
+ test("timeoutMs not set — script completes with the default 30s window", async () => {
292
+ await saveScript("quick", `export default async () => ({ done: true });`);
293
+ const executor = new SwarmScriptExecutor(deps);
294
+ const wf = makeWorkflow({ nodes: [] });
295
+ const result = await executor.run({
296
+ config: { scriptName: "quick" },
297
+ context: {},
298
+ meta: {
299
+ runId: crypto.randomUUID(),
300
+ stepId: crypto.randomUUID(),
301
+ nodeId: "script",
302
+ workflowId: wf.id,
303
+ dryRun: false,
304
+ },
305
+ });
306
+
307
+ expect(result.status).toBe("success");
308
+ expect(result.output?.result).toEqual({ done: true });
309
+ });
310
+
311
+ test("timeoutMs set — a long-running script is killed before it finishes", async () => {
312
+ await saveScript(
313
+ "sleeper",
314
+ `export default async () => { await new Promise(r => setTimeout(r, 3000)); return { done: true }; };`,
315
+ );
316
+ const executor = new SwarmScriptExecutor(deps);
317
+ const wf = makeWorkflow({ nodes: [] });
318
+ const result = await executor.run({
319
+ config: { scriptName: "sleeper", timeoutMs: 300 },
320
+ context: {},
321
+ meta: {
322
+ runId: crypto.randomUUID(),
323
+ stepId: crypto.randomUUID(),
324
+ nodeId: "script",
325
+ workflowId: wf.id,
326
+ dryRun: false,
327
+ },
328
+ });
329
+
330
+ expect(result.status).toBe("failed");
331
+ expect(result.output?.exitCode).not.toBe(0);
332
+ });
333
+
253
334
  test("Failure in the script surfaces as a workflow-node failure", async () => {
254
335
  await saveScript("throws", `export default async () => { throw new Error("boom"); };`);
255
336
  const executor = new SwarmScriptExecutor(deps);
@@ -1,15 +1,26 @@
1
1
  import { z } from "zod";
2
2
  import { getScript, getScriptVersion } from "../../be/scripts/db";
3
+ import { DEFAULT_SCRIPT_RESOURCES } from "../../scripts-runtime/executors/types";
3
4
  import { runScript } from "../../scripts-runtime/loader";
4
5
  import type { ExecutorMeta } from "../../types";
5
6
  import { BaseExecutor, type ExecutorResult } from "./base";
6
7
 
8
+ export const SWARM_SCRIPT_DEFAULT_TIMEOUT_MS = DEFAULT_SCRIPT_RESOURCES.wallClockMs;
9
+ export const SWARM_SCRIPT_MIN_TIMEOUT_MS = 1_000;
10
+ export const SWARM_SCRIPT_MAX_TIMEOUT_MS = DEFAULT_SCRIPT_RESOURCES.cpuTimeSec * 1_000;
11
+
7
12
  export const SwarmScriptConfigSchema = z.object({
8
13
  scriptName: z.string().min(1),
9
14
  scope: z.enum(["global", "agent"]).optional(),
10
15
  pinHash: z.string().min(1).optional(),
11
16
  args: z.record(z.string(), z.unknown()).default({}),
12
17
  fsMode: z.enum(["none", "workspace-rw"]).default("none"),
18
+ timeoutMs: z
19
+ .number()
20
+ .int()
21
+ .min(SWARM_SCRIPT_MIN_TIMEOUT_MS)
22
+ .max(SWARM_SCRIPT_MAX_TIMEOUT_MS)
23
+ .default(SWARM_SCRIPT_DEFAULT_TIMEOUT_MS),
13
24
  });
14
25
 
15
26
  export const SwarmScriptOutputSchema = z.object({
@@ -61,6 +72,7 @@ export class SwarmScriptExecutor extends BaseExecutor<
61
72
  args: config.args,
62
73
  fsMode: "none",
63
74
  agentId: agentId ?? "workflow",
75
+ timeoutMs: config.timeoutMs,
64
76
  });
65
77
 
66
78
  const workflowOutput = {