@desplega.ai/agent-swarm 1.99.1 → 1.100.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +1 -1
- package/package.json +6 -6
- package/src/be/db.ts +60 -0
- package/src/commands/runner.ts +2 -2
- package/src/http/index.ts +1 -1
- package/src/http/tasks.ts +0 -26
- package/src/jira/webhook-lifecycle.ts +23 -4
- package/src/oauth/keepalive.ts +75 -33
- package/src/providers/codex-adapter.ts +11 -3
- package/src/providers/swarm-events-shared.ts +4 -5
- package/src/providers/types.ts +1 -1
- package/src/tests/codex-swarm-events.test.ts +24 -0
- package/src/tests/jira-webhook-lifecycle.test.ts +29 -2
- package/src/tests/kapso-inbound.test.ts +30 -9
- package/src/tests/oauth-keepalive.test.ts +135 -0
- package/src/tests/task-lifecycle-telemetry.test.ts +153 -0
- package/src/tests/workflow-swarm-script.test.ts +82 -1
- package/src/workflows/executors/swarm-script.ts +12 -0
package/openapi.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"openapi": "3.1.0",
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "Agent Swarm API",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.100.1",
|
|
6
6
|
"description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
|
|
7
7
|
},
|
|
8
8
|
"servers": [
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@desplega.ai/agent-swarm",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.100.1",
|
|
4
4
|
"description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "desplega.sh <contact@desplega.sh>",
|
|
@@ -111,14 +111,14 @@
|
|
|
111
111
|
"@aws-sdk/client-bedrock": "3.1048.0",
|
|
112
112
|
"@desplega.ai/business-use": "^0.4.2",
|
|
113
113
|
"@desplega.ai/localtunnel": "^2.2.0",
|
|
114
|
-
"@earendil-works/pi-agent-core": "^0.79.
|
|
115
|
-
"@earendil-works/pi-ai": "^0.79.
|
|
116
|
-
"@earendil-works/pi-coding-agent": "^0.79.
|
|
114
|
+
"@earendil-works/pi-agent-core": "^0.79.6",
|
|
115
|
+
"@earendil-works/pi-ai": "^0.79.6",
|
|
116
|
+
"@earendil-works/pi-coding-agent": "^0.79.6",
|
|
117
117
|
"@inkjs/ui": "^2.0.0",
|
|
118
118
|
"@linear/sdk": "^77.0.0",
|
|
119
119
|
"@modelcontextprotocol/sdk": "^1.25.1",
|
|
120
|
-
"@openai/codex-sdk": "^0.
|
|
121
|
-
"@opencode-ai/sdk": "^1.17.
|
|
120
|
+
"@openai/codex-sdk": "^0.140.0",
|
|
121
|
+
"@opencode-ai/sdk": "^1.17.7",
|
|
122
122
|
"@openfort/openfort-node": "^0.9.1",
|
|
123
123
|
"@opentelemetry/api": "^1.9.1",
|
|
124
124
|
"@opentelemetry/exporter-trace-otlp-http": "^0.218.0",
|
package/src/be/db.ts
CHANGED
|
@@ -4,6 +4,7 @@ import pkg from "../../package.json";
|
|
|
4
4
|
import { addEyesReactionOnTaskStart } from "../github/task-reactions";
|
|
5
5
|
import { type ModelTier, parseModelTier } from "../model-tiers";
|
|
6
6
|
import { configureDbResolver } from "../prompts/resolver";
|
|
7
|
+
import { telemetry } from "../telemetry";
|
|
7
8
|
import type {
|
|
8
9
|
ActiveSession,
|
|
9
10
|
Agent,
|
|
@@ -114,6 +115,19 @@ import { isReservedConfigKey, reservedKeyError } from "./swarm-config-guard";
|
|
|
114
115
|
let db: Database | null = null;
|
|
115
116
|
let sqliteVecAvailable = false;
|
|
116
117
|
|
|
118
|
+
type TaskTelemetryProps = Parameters<typeof telemetry.taskEvent>[1];
|
|
119
|
+
|
|
120
|
+
function emitTaskLifecycleTelemetryAfterCommit(
|
|
121
|
+
event: string,
|
|
122
|
+
props: TaskTelemetryProps,
|
|
123
|
+
verify?: (task: AgentTask | null) => boolean,
|
|
124
|
+
): void {
|
|
125
|
+
queueMicrotask(() => {
|
|
126
|
+
if (verify && !verify(getTaskById(props.taskId))) return;
|
|
127
|
+
telemetry.taskEvent(event, props);
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
117
131
|
export function isSqliteVecAvailable(): boolean {
|
|
118
132
|
return sqliteVecAvailable;
|
|
119
133
|
}
|
|
@@ -2105,6 +2119,16 @@ export function completeTask(id: string, output?: string): AgentTask | null {
|
|
|
2105
2119
|
}
|
|
2106
2120
|
|
|
2107
2121
|
if (row && oldTask) {
|
|
2122
|
+
emitTaskLifecycleTelemetryAfterCommit(
|
|
2123
|
+
"completed",
|
|
2124
|
+
{
|
|
2125
|
+
taskId: id,
|
|
2126
|
+
agentId: row.agentId ?? undefined,
|
|
2127
|
+
durationMs: row.createdAt ? Date.now() - new Date(row.createdAt).getTime() : undefined,
|
|
2128
|
+
},
|
|
2129
|
+
(task) => task?.status === "completed",
|
|
2130
|
+
);
|
|
2131
|
+
|
|
2108
2132
|
try {
|
|
2109
2133
|
createLogEntry({
|
|
2110
2134
|
eventType: "task_status_change",
|
|
@@ -2145,6 +2169,16 @@ export function failTask(id: string, reason: string): AgentTask | null {
|
|
|
2145
2169
|
const scrubbedReason = scrubSecrets(reason);
|
|
2146
2170
|
const row = taskQueries.setFailure().get(scrubbedReason, finishedAt, id);
|
|
2147
2171
|
if (row && oldTask) {
|
|
2172
|
+
emitTaskLifecycleTelemetryAfterCommit(
|
|
2173
|
+
"failed",
|
|
2174
|
+
{
|
|
2175
|
+
taskId: id,
|
|
2176
|
+
agentId: row.agentId ?? undefined,
|
|
2177
|
+
durationMs: row.createdAt ? Date.now() - new Date(row.createdAt).getTime() : undefined,
|
|
2178
|
+
},
|
|
2179
|
+
(task) => task?.status === "failed",
|
|
2180
|
+
);
|
|
2181
|
+
|
|
2148
2182
|
try {
|
|
2149
2183
|
createLogEntry({
|
|
2150
2184
|
eventType: "task_status_change",
|
|
@@ -2192,6 +2226,20 @@ export function cancelTask(id: string, reason?: string): AgentTask | null {
|
|
|
2192
2226
|
const row = taskQueries.setCancelled().get(cancelReason, finishedAt, id);
|
|
2193
2227
|
|
|
2194
2228
|
if (row && oldTask) {
|
|
2229
|
+
emitTaskLifecycleTelemetryAfterCommit(
|
|
2230
|
+
"cancelled",
|
|
2231
|
+
{
|
|
2232
|
+
taskId: id,
|
|
2233
|
+
source: oldTask.source,
|
|
2234
|
+
agentId: oldTask.agentId ?? undefined,
|
|
2235
|
+
previousStatus: oldTask.status,
|
|
2236
|
+
durationMs: oldTask.createdAt
|
|
2237
|
+
? Date.now() - new Date(oldTask.createdAt).getTime()
|
|
2238
|
+
: undefined,
|
|
2239
|
+
},
|
|
2240
|
+
(task) => task?.status === "cancelled",
|
|
2241
|
+
);
|
|
2242
|
+
|
|
2195
2243
|
try {
|
|
2196
2244
|
createLogEntry({
|
|
2197
2245
|
eventType: "task_status_change",
|
|
@@ -3157,6 +3205,18 @@ export function createTaskExtended(task: string, options?: CreateTaskOptions): A
|
|
|
3157
3205
|
});
|
|
3158
3206
|
} catch {}
|
|
3159
3207
|
|
|
3208
|
+
emitTaskLifecycleTelemetryAfterCommit(
|
|
3209
|
+
"created",
|
|
3210
|
+
{
|
|
3211
|
+
taskId: row.id,
|
|
3212
|
+
source: row.source,
|
|
3213
|
+
tags: options?.tags ?? [],
|
|
3214
|
+
hasParent: !!row.parentTaskId,
|
|
3215
|
+
priority: row.priority,
|
|
3216
|
+
},
|
|
3217
|
+
(task) => task !== null,
|
|
3218
|
+
);
|
|
3219
|
+
|
|
3160
3220
|
try {
|
|
3161
3221
|
import("../workflows/event-bus").then(({ workflowEventBus }) => {
|
|
3162
3222
|
workflowEventBus.emit("task.created", {
|
package/src/commands/runner.ts
CHANGED
|
@@ -1451,7 +1451,7 @@ function setupShutdownHandlers(
|
|
|
1451
1451
|
);
|
|
1452
1452
|
for (const [taskId, task] of state.activeTasks) {
|
|
1453
1453
|
console.log(`[${role}] Superseding task ${taskId.slice(0, 8)}`);
|
|
1454
|
-
task.session.abort().catch(() => {});
|
|
1454
|
+
task.session.abort("graceful_shutdown").catch(() => {});
|
|
1455
1455
|
if (apiConfig) {
|
|
1456
1456
|
const supersede = await supersedeTaskViaAPI(
|
|
1457
1457
|
apiConfig,
|
|
@@ -4706,7 +4706,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
4706
4706
|
console.log(
|
|
4707
4707
|
`[${role}] Task ${taskId.slice(0, 8)} was cancelled — sending SIGTERM to subprocess`,
|
|
4708
4708
|
);
|
|
4709
|
-
task.session.abort().catch(() => {});
|
|
4709
|
+
task.session.abort("cancelled").catch(() => {});
|
|
4710
4710
|
cancelledSignaled.add(taskId);
|
|
4711
4711
|
}
|
|
4712
4712
|
}
|
package/src/http/index.ts
CHANGED
|
@@ -382,7 +382,7 @@ async function shutdown() {
|
|
|
382
382
|
// Stop OAuth keepalive
|
|
383
383
|
if (process.env.OAUTH_KEEPALIVE_DISABLE !== "true") {
|
|
384
384
|
const { stopOAuthKeepalive } = await import("../oauth/keepalive");
|
|
385
|
-
stopOAuthKeepalive();
|
|
385
|
+
await stopOAuthKeepalive();
|
|
386
386
|
}
|
|
387
387
|
|
|
388
388
|
// Stop MCP OAuth pending-session garbage collector
|
package/src/http/tasks.ts
CHANGED
|
@@ -26,7 +26,6 @@ import {
|
|
|
26
26
|
import { ModelTierSchema, splitLegacyModelAlias } from "../model-tiers";
|
|
27
27
|
import { createTaskWithSiblingAwareness } from "../tasks/sibling-awareness";
|
|
28
28
|
import { createResumeFollowUp, createWorkerTaskFollowUp } from "../tasks/worker-follow-up";
|
|
29
|
-
import { telemetry } from "../telemetry";
|
|
30
29
|
import {
|
|
31
30
|
type AgentTaskSource,
|
|
32
31
|
AgentTaskSourceSchema,
|
|
@@ -420,14 +419,6 @@ export async function handleTasks(
|
|
|
420
419
|
},
|
|
421
420
|
});
|
|
422
421
|
|
|
423
|
-
telemetry.taskEvent("created", {
|
|
424
|
-
taskId: task.id,
|
|
425
|
-
source: task.source,
|
|
426
|
-
tags: parsed.body.tags ?? [],
|
|
427
|
-
hasParent: !!task.parentTaskId,
|
|
428
|
-
priority: task.priority,
|
|
429
|
-
});
|
|
430
|
-
|
|
431
422
|
json(res, task, 201);
|
|
432
423
|
} catch (error) {
|
|
433
424
|
console.error("[HTTP] Failed to create task:", error);
|
|
@@ -536,14 +527,6 @@ export async function handleTasks(
|
|
|
536
527
|
});
|
|
537
528
|
}
|
|
538
529
|
|
|
539
|
-
telemetry.taskEvent("cancelled", {
|
|
540
|
-
taskId: parsed.params.id,
|
|
541
|
-
source: task.source,
|
|
542
|
-
agentId: task.agentId ?? undefined,
|
|
543
|
-
previousStatus: task.status,
|
|
544
|
-
durationMs: task.createdAt ? Date.now() - new Date(task.createdAt).getTime() : undefined,
|
|
545
|
-
});
|
|
546
|
-
|
|
547
530
|
if (task.agentId) {
|
|
548
531
|
updateAgentStatusFromCapacity(task.agentId);
|
|
549
532
|
}
|
|
@@ -645,15 +628,6 @@ export async function handleTasks(
|
|
|
645
628
|
if (result.task && !("alreadyFinished" in result && result.alreadyFinished)) {
|
|
646
629
|
const finishEventId = parsed.body.status === "completed" ? "completed" : "failed";
|
|
647
630
|
|
|
648
|
-
const durationMs = result.task.createdAt
|
|
649
|
-
? Date.now() - new Date(result.task.createdAt).getTime()
|
|
650
|
-
: undefined;
|
|
651
|
-
|
|
652
|
-
telemetry.taskEvent(finishEventId, {
|
|
653
|
-
taskId: parsed.params.id,
|
|
654
|
-
agentId: myAgentId,
|
|
655
|
-
durationMs,
|
|
656
|
-
});
|
|
657
631
|
ensure({
|
|
658
632
|
id: finishEventId,
|
|
659
633
|
flow: "task",
|
|
@@ -10,7 +10,6 @@ const SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1000;
|
|
|
10
10
|
* first refresh round-trip (Atlassian returns the authoritative expiry).
|
|
11
11
|
*/
|
|
12
12
|
const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
|
|
13
|
-
const SLACK_ALERTS_CHANNEL = process.env.SLACK_ALERTS_CHANNEL || "C08JCRURPBV";
|
|
14
13
|
|
|
15
14
|
const WEBHOOK_EVENTS = [
|
|
16
15
|
"jira:issue_updated",
|
|
@@ -40,6 +39,12 @@ function getRegisteredWebhookUrl(): string {
|
|
|
40
39
|
// ─── Slack alert (best-effort) ───────────────────────────────────────────────
|
|
41
40
|
|
|
42
41
|
async function notifySlack(text: string): Promise<void> {
|
|
42
|
+
const channel = process.env.SLACK_ALERTS_CHANNEL;
|
|
43
|
+
if (!channel) {
|
|
44
|
+
console.warn("[Jira webhook keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert");
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
|
|
43
48
|
try {
|
|
44
49
|
const { getSlackApp } = await import("../slack/app");
|
|
45
50
|
const app = getSlackApp();
|
|
@@ -48,13 +53,21 @@ async function notifySlack(text: string): Promise<void> {
|
|
|
48
53
|
return;
|
|
49
54
|
}
|
|
50
55
|
await app.client.chat.postMessage({
|
|
51
|
-
channel
|
|
56
|
+
channel,
|
|
52
57
|
text,
|
|
53
58
|
});
|
|
54
|
-
console.log(
|
|
59
|
+
console.log(`[Jira webhook keepalive] Slack notification sent to ${channel}`);
|
|
55
60
|
} catch (slackErr) {
|
|
61
|
+
const code =
|
|
62
|
+
typeof slackErr === "object" && slackErr !== null && "code" in slackErr
|
|
63
|
+
? ` code=${String(slackErr.code)}`
|
|
64
|
+
: "";
|
|
65
|
+
const data =
|
|
66
|
+
typeof slackErr === "object" && slackErr !== null && "data" in slackErr
|
|
67
|
+
? ` data=${JSON.stringify(slackErr.data)}`
|
|
68
|
+
: "";
|
|
56
69
|
console.error(
|
|
57
|
-
|
|
70
|
+
`[Jira webhook keepalive] Failed to send Slack notification to ${channel}${code}${data}:`,
|
|
58
71
|
slackErr instanceof Error ? slackErr.message : slackErr,
|
|
59
72
|
);
|
|
60
73
|
}
|
|
@@ -362,3 +375,9 @@ export function stopJiraWebhookKeepalive(): void {
|
|
|
362
375
|
console.log("[Jira webhook keepalive] Stopped");
|
|
363
376
|
}
|
|
364
377
|
}
|
|
378
|
+
|
|
379
|
+
// ─── Test helpers (exported for unit tests only) ─────────────────────────────
|
|
380
|
+
|
|
381
|
+
export const _test = {
|
|
382
|
+
notifySlack,
|
|
383
|
+
};
|
package/src/oauth/keepalive.ts
CHANGED
|
@@ -1,42 +1,46 @@
|
|
|
1
1
|
import { ensureTokenOrThrow } from "./ensure-token";
|
|
2
2
|
|
|
3
|
-
//
|
|
4
|
-
//
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
// linear-outbound (e.g. agents using the read-only db-query MCP) sees a
|
|
9
|
-
// not-yet-expired token. The 65-min buffer is wider than the access-token
|
|
10
|
-
// lifetime, so isTokenExpiringSoon always returns true and every tick rotates.
|
|
11
|
-
//
|
|
12
|
-
// Touching the row this often also serves the original "keep the refresh
|
|
13
|
-
// token alive" goal — Atlassian expires inactive refresh tokens after 90 days,
|
|
14
|
-
// and Linear's behavior is similar; refreshing every 50 min trivially keeps
|
|
15
|
-
// both providers active.
|
|
16
|
-
const KEEPALIVE_INTERVAL_MS = 50 * 60 * 1000;
|
|
17
|
-
const KEEPALIVE_BUFFER_MS = 65 * 60 * 1000;
|
|
18
|
-
const SLACK_ALERTS_CHANNEL = process.env.SLACK_ALERTS_CHANNEL || "C08JCRURPBV";
|
|
3
|
+
// Keep refresh tokens warm without constantly rotating strict-rotation
|
|
4
|
+
// providers. Reactive callers still refresh access tokens before API use.
|
|
5
|
+
const KEEPALIVE_INTERVAL_MS = 12 * 60 * 60 * 1000;
|
|
6
|
+
const KEEPALIVE_BUFFER_MS = 10 * 60 * 1000;
|
|
7
|
+
const STARTUP_KEEPALIVE_DELAY_MS = 10_000;
|
|
19
8
|
|
|
20
9
|
const KEEPALIVE_PROVIDERS = ["linear", "jira"] as const;
|
|
21
10
|
|
|
22
11
|
let keepaliveInterval: ReturnType<typeof setInterval> | null = null;
|
|
12
|
+
let startupKeepaliveTimeout: ReturnType<typeof setTimeout> | null = null;
|
|
13
|
+
let inflightKeepalive: Promise<void> | null = null;
|
|
14
|
+
|
|
15
|
+
function scheduleKeepaliveRun(trigger: "startup" | "interval" | "manual"): Promise<void> {
|
|
16
|
+
if (inflightKeepalive) {
|
|
17
|
+
console.log(`[OAuth Keepalive] ${trigger} tick skipped; previous run still in flight`);
|
|
18
|
+
return inflightKeepalive;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
inflightKeepalive = runKeepalive(trigger).finally(() => {
|
|
22
|
+
inflightKeepalive = null;
|
|
23
|
+
});
|
|
24
|
+
return inflightKeepalive;
|
|
25
|
+
}
|
|
23
26
|
|
|
24
27
|
/**
|
|
25
28
|
* Proactively refresh OAuth tokens on a schedule.
|
|
26
29
|
*
|
|
27
30
|
* Two purposes, both served by the same tick:
|
|
28
31
|
*
|
|
29
|
-
* 1.
|
|
30
|
-
* `oauth_tokens.accessToken` directly (db-query MCP, future MCP servers,
|
|
31
|
-
* `tracker-status`) needs a not-yet-expired value. The 50-min cadence
|
|
32
|
-
* keeps the row ahead of the 1h access-token lifetime.
|
|
33
|
-
* 2. Refresh-token liveness. Atlassian rotates refresh tokens and expires
|
|
32
|
+
* 1. Refresh-token liveness. Atlassian rotates refresh tokens and expires
|
|
34
33
|
* them after ~90 days of inactivity, so silent gaps in usage would kill
|
|
35
|
-
* the integration.
|
|
36
|
-
*
|
|
37
|
-
*
|
|
34
|
+
* the integration. The 12h cadence keeps the refresh token active without
|
|
35
|
+
* rotating it dozens of times per day.
|
|
36
|
+
* 2. Loud failure on boot and during scheduled checks. A dead token surfaces
|
|
37
|
+
* as structured logs plus a Slack alert instead of silently retrying.
|
|
38
|
+
*
|
|
39
|
+
* Access-token freshness is handled reactively by ensureToken callers before
|
|
40
|
+
* Jira/Linear API use.
|
|
38
41
|
*/
|
|
39
|
-
async function runKeepalive(): Promise<void> {
|
|
42
|
+
async function runKeepalive(trigger: "startup" | "interval" | "manual" = "manual"): Promise<void> {
|
|
43
|
+
console.log(`[OAuth Keepalive] Running ${trigger} token refresh check`);
|
|
40
44
|
for (const provider of KEEPALIVE_PROVIDERS) {
|
|
41
45
|
console.log(`[OAuth Keepalive] Running scheduled token refresh for ${provider}...`);
|
|
42
46
|
try {
|
|
@@ -53,6 +57,12 @@ async function runKeepalive(): Promise<void> {
|
|
|
53
57
|
}
|
|
54
58
|
|
|
55
59
|
async function notifySlack(text: string): Promise<void> {
|
|
60
|
+
const channel = process.env.SLACK_ALERTS_CHANNEL;
|
|
61
|
+
if (!channel) {
|
|
62
|
+
console.warn("[OAuth Keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert");
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
56
66
|
try {
|
|
57
67
|
const { getSlackApp } = await import("../slack/app");
|
|
58
68
|
const app = getSlackApp();
|
|
@@ -61,13 +71,21 @@ async function notifySlack(text: string): Promise<void> {
|
|
|
61
71
|
return;
|
|
62
72
|
}
|
|
63
73
|
await app.client.chat.postMessage({
|
|
64
|
-
channel
|
|
74
|
+
channel,
|
|
65
75
|
text,
|
|
66
76
|
});
|
|
67
|
-
console.log(
|
|
77
|
+
console.log(`[OAuth Keepalive] Slack notification sent to ${channel}`);
|
|
68
78
|
} catch (slackErr) {
|
|
79
|
+
const code =
|
|
80
|
+
typeof slackErr === "object" && slackErr !== null && "code" in slackErr
|
|
81
|
+
? ` code=${String(slackErr.code)}`
|
|
82
|
+
: "";
|
|
83
|
+
const data =
|
|
84
|
+
typeof slackErr === "object" && slackErr !== null && "data" in slackErr
|
|
85
|
+
? ` data=${JSON.stringify(slackErr.data)}`
|
|
86
|
+
: "";
|
|
69
87
|
console.error(
|
|
70
|
-
|
|
88
|
+
`[OAuth Keepalive] Failed to send Slack notification to ${channel}${code}${data}:`,
|
|
71
89
|
slackErr instanceof Error ? slackErr.message : slackErr,
|
|
72
90
|
);
|
|
73
91
|
}
|
|
@@ -87,21 +105,45 @@ export function startOAuthKeepalive(): void {
|
|
|
87
105
|
`[OAuth Keepalive] Starting (interval ${Math.round(KEEPALIVE_INTERVAL_MS / 60_000)}min, buffer ${Math.round(KEEPALIVE_BUFFER_MS / 60_000)}min)`,
|
|
88
106
|
);
|
|
89
107
|
|
|
90
|
-
// Run once after a short delay (let server finish startup)
|
|
91
|
-
setTimeout(() =>
|
|
108
|
+
// Run once after a short delay (let server finish startup).
|
|
109
|
+
startupKeepaliveTimeout = setTimeout(() => {
|
|
110
|
+
startupKeepaliveTimeout = null;
|
|
111
|
+
scheduleKeepaliveRun("startup");
|
|
112
|
+
}, STARTUP_KEEPALIVE_DELAY_MS);
|
|
92
113
|
|
|
93
114
|
keepaliveInterval = setInterval(() => {
|
|
94
|
-
|
|
115
|
+
scheduleKeepaliveRun("interval");
|
|
95
116
|
}, KEEPALIVE_INTERVAL_MS);
|
|
96
117
|
}
|
|
97
118
|
|
|
98
119
|
/**
|
|
99
|
-
* Stop the OAuth keepalive timer.
|
|
120
|
+
* Stop the OAuth keepalive timer and wait for any in-flight refresh to persist.
|
|
100
121
|
*/
|
|
101
|
-
export function stopOAuthKeepalive(): void {
|
|
122
|
+
export async function stopOAuthKeepalive(): Promise<void> {
|
|
123
|
+
if (startupKeepaliveTimeout) {
|
|
124
|
+
clearTimeout(startupKeepaliveTimeout);
|
|
125
|
+
startupKeepaliveTimeout = null;
|
|
126
|
+
}
|
|
127
|
+
|
|
102
128
|
if (keepaliveInterval) {
|
|
103
129
|
clearInterval(keepaliveInterval);
|
|
104
130
|
keepaliveInterval = null;
|
|
105
131
|
console.log("[OAuth Keepalive] Stopped");
|
|
106
132
|
}
|
|
133
|
+
|
|
134
|
+
if (inflightKeepalive) {
|
|
135
|
+
console.log("[OAuth Keepalive] Waiting for in-flight token refresh before shutdown");
|
|
136
|
+
await inflightKeepalive;
|
|
137
|
+
}
|
|
107
138
|
}
|
|
139
|
+
|
|
140
|
+
// ─── Test helpers (exported for unit tests only) ─────────────────────────────
|
|
141
|
+
|
|
142
|
+
export const _test = {
|
|
143
|
+
KEEPALIVE_INTERVAL_MS,
|
|
144
|
+
KEEPALIVE_BUFFER_MS,
|
|
145
|
+
STARTUP_KEEPALIVE_DELAY_MS,
|
|
146
|
+
notifySlack,
|
|
147
|
+
runKeepalive: scheduleKeepaliveRun,
|
|
148
|
+
getInflightKeepalive: () => inflightKeepalive,
|
|
149
|
+
};
|
|
@@ -519,9 +519,9 @@ export class CodexSession implements ProviderSession {
|
|
|
519
519
|
return this.completionPromise;
|
|
520
520
|
}
|
|
521
521
|
|
|
522
|
-
async abort(): Promise<void> {
|
|
522
|
+
async abort(reason?: string): Promise<void> {
|
|
523
523
|
this.aborted = true;
|
|
524
|
-
this.abortController?.abort();
|
|
524
|
+
this.abortController?.abort(reason ?? "cancelled");
|
|
525
525
|
}
|
|
526
526
|
|
|
527
527
|
private emit(event: ProviderEvent): void {
|
|
@@ -992,6 +992,14 @@ export class CodexSession implements ProviderSession {
|
|
|
992
992
|
} catch (err) {
|
|
993
993
|
// AbortError from the SDK propagates here when signal.abort() fires.
|
|
994
994
|
if (this.aborted || (err instanceof Error && err.name === "AbortError")) {
|
|
995
|
+
// Prefer the abort reason from the signal (set by the caller of
|
|
996
|
+
// abort()) — this distinguishes tool-loop aborts from cancel-poll
|
|
997
|
+
// and graceful-shutdown aborts that all used to produce a bare
|
|
998
|
+
// "cancelled" failureReason.
|
|
999
|
+
const abortReason =
|
|
1000
|
+
typeof this.abortController?.signal.reason === "string"
|
|
1001
|
+
? this.abortController.signal.reason
|
|
1002
|
+
: "cancelled";
|
|
995
1003
|
const cost = this.buildCostData(this.lastUsage, true);
|
|
996
1004
|
this.emit({ type: "result", cost, isError: true, errorCategory: "cancelled" });
|
|
997
1005
|
this.settle({
|
|
@@ -999,7 +1007,7 @@ export class CodexSession implements ProviderSession {
|
|
|
999
1007
|
sessionId: this._sessionId,
|
|
1000
1008
|
cost,
|
|
1001
1009
|
isError: true,
|
|
1002
|
-
failureReason:
|
|
1010
|
+
failureReason: abortReason,
|
|
1003
1011
|
});
|
|
1004
1012
|
return;
|
|
1005
1013
|
}
|
|
@@ -126,7 +126,7 @@ export function createSwarmEventHandler(
|
|
|
126
126
|
console.log(
|
|
127
127
|
`[swarm-events] aborting task ${taskId}: cancelled via /cancelled-tasks poll`,
|
|
128
128
|
);
|
|
129
|
-
opts.abortRef.current?.abort();
|
|
129
|
+
opts.abortRef.current?.abort("cancelled");
|
|
130
130
|
if (opts.onCancel) {
|
|
131
131
|
try {
|
|
132
132
|
await opts.onCancel();
|
|
@@ -152,10 +152,9 @@ export function createSwarmEventHandler(
|
|
|
152
152
|
// was indistinguishable from a /cancelled-tasks abort or a runner
|
|
153
153
|
// SIGTERM. `result.reason` already carries the diagnostic detail
|
|
154
154
|
// ("Tool X called 15 times…", "ping-pong between A and B…").
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
);
|
|
158
|
-
opts.abortRef.current?.abort();
|
|
155
|
+
const loopReason = `tool-loop: ${result.reason ?? "unknown reason"}`;
|
|
156
|
+
console.log(`[swarm-events] aborting task ${taskId}: ${loopReason}`);
|
|
157
|
+
opts.abortRef.current?.abort(loopReason);
|
|
159
158
|
}
|
|
160
159
|
})
|
|
161
160
|
.catch(() => {});
|
package/src/providers/types.ts
CHANGED
|
@@ -118,7 +118,7 @@ export interface ProviderSession {
|
|
|
118
118
|
readonly sessionId: string | undefined;
|
|
119
119
|
onEvent(listener: (event: ProviderEvent) => void): void;
|
|
120
120
|
waitForCompletion(): Promise<ProviderResult>;
|
|
121
|
-
abort(): Promise<void>;
|
|
121
|
+
abort(reason?: string): Promise<void>;
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
/** Result returned when a provider session completes. */
|
|
@@ -97,6 +97,30 @@ describe("createCodexSwarmEventHandler", () => {
|
|
|
97
97
|
expect(controller.signal.aborted).toBe(true);
|
|
98
98
|
});
|
|
99
99
|
|
|
100
|
+
test("sets abort signal reason to 'cancelled' on cancel-poll abort", async () => {
|
|
101
|
+
installFetchStub((url) => {
|
|
102
|
+
if (url.includes("/cancelled-tasks")) {
|
|
103
|
+
return new Response(
|
|
104
|
+
JSON.stringify({ cancelled: [{ id: "task-1", failureReason: "user request" }] }),
|
|
105
|
+
{ status: 200 },
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
return new Response("{}", { status: 200 });
|
|
109
|
+
});
|
|
110
|
+
const controller = new AbortController();
|
|
111
|
+
const opts = buildOpts({ abortRef: { current: controller } });
|
|
112
|
+
const handler = createCodexSwarmEventHandler(opts);
|
|
113
|
+
handler({
|
|
114
|
+
type: "tool_start",
|
|
115
|
+
toolCallId: "call-1",
|
|
116
|
+
toolName: "bash",
|
|
117
|
+
args: { command: "sleep 9999" },
|
|
118
|
+
});
|
|
119
|
+
await new Promise((resolve) => setTimeout(resolve, 30));
|
|
120
|
+
expect(controller.signal.aborted).toBe(true);
|
|
121
|
+
expect(controller.signal.reason).toBe("cancelled");
|
|
122
|
+
});
|
|
123
|
+
|
|
100
124
|
test("logs the abort reason when /cancelled-tasks reports the task", async () => {
|
|
101
125
|
installFetchStub((url) => {
|
|
102
126
|
if (url.includes("/cancelled-tasks")) {
|
|
@@ -1,10 +1,19 @@
|
|
|
1
|
-
import { afterAll, beforeAll, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
1
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
|
|
2
2
|
import { unlink } from "node:fs/promises";
|
|
3
3
|
import { closeDb, getDb, initDb } from "../be/db";
|
|
4
4
|
import { upsertOAuthApp } from "../be/db-queries/oauth";
|
|
5
5
|
import { getJiraMetadata, updateJiraMetadata } from "../jira/metadata";
|
|
6
6
|
|
|
7
7
|
const TEST_DB_PATH = "./test-jira-webhook-lifecycle.sqlite";
|
|
8
|
+
const originalSlackAlertsChannel = process.env.SLACK_ALERTS_CHANNEL;
|
|
9
|
+
|
|
10
|
+
function restoreSlackAlertsChannel(): void {
|
|
11
|
+
if (originalSlackAlertsChannel === undefined) {
|
|
12
|
+
delete process.env.SLACK_ALERTS_CHANNEL;
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
process.env.SLACK_ALERTS_CHANNEL = originalSlackAlertsChannel;
|
|
16
|
+
}
|
|
8
17
|
|
|
9
18
|
// Mock the Jira fetch client. Each test installs its own per-call response.
|
|
10
19
|
const jiraFetchMock = mock(
|
|
@@ -38,22 +47,40 @@ beforeAll(() => {
|
|
|
38
47
|
afterAll(async () => {
|
|
39
48
|
delete process.env.JIRA_WEBHOOK_TOKEN;
|
|
40
49
|
delete process.env.MCP_BASE_URL;
|
|
50
|
+
restoreSlackAlertsChannel();
|
|
41
51
|
closeDb();
|
|
42
52
|
await unlink(TEST_DB_PATH).catch(() => {});
|
|
43
53
|
await unlink(`${TEST_DB_PATH}-wal`).catch(() => {});
|
|
44
54
|
await unlink(`${TEST_DB_PATH}-shm`).catch(() => {});
|
|
45
55
|
});
|
|
46
56
|
|
|
47
|
-
const { refreshJiraWebhooks, registerJiraWebhook } = await import(
|
|
57
|
+
const { _test, refreshJiraWebhooks, registerJiraWebhook } = await import(
|
|
58
|
+
"../jira/webhook-lifecycle"
|
|
59
|
+
);
|
|
48
60
|
|
|
49
61
|
beforeEach(() => {
|
|
50
62
|
jiraFetchMock.mockClear();
|
|
63
|
+
restoreSlackAlertsChannel();
|
|
51
64
|
// Reset the webhookIds list each test (and clear metadata writebacks).
|
|
52
65
|
getDb()
|
|
53
66
|
.query("UPDATE oauth_apps SET metadata = ? WHERE provider = 'jira'")
|
|
54
67
|
.run(JSON.stringify({ cloudId: "cloud-1", siteUrl: "https://example.atlassian.net" }));
|
|
55
68
|
});
|
|
56
69
|
|
|
70
|
+
describe("Jira webhook Slack alerts", () => {
|
|
71
|
+
test("skips Slack notification when alerts channel env is unset", async () => {
|
|
72
|
+
delete process.env.SLACK_ALERTS_CHANNEL;
|
|
73
|
+
const warn = spyOn(console, "warn").mockImplementation(() => {});
|
|
74
|
+
|
|
75
|
+
await expect(_test.notifySlack("test alert")).resolves.toBeUndefined();
|
|
76
|
+
|
|
77
|
+
expect(warn).toHaveBeenCalledWith(
|
|
78
|
+
"[Jira webhook keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert",
|
|
79
|
+
);
|
|
80
|
+
warn.mockRestore();
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
57
84
|
describe("registerJiraWebhook", () => {
|
|
58
85
|
test("posts the right body shape and persists webhookId into metadata", async () => {
|
|
59
86
|
jiraFetchMock.mockImplementationOnce(
|
|
@@ -68,6 +68,13 @@ function fakeReqRes(rawBody: string, headers: Record<string, string>) {
|
|
|
68
68
|
return { req, res, captured };
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
+
async function waitFor(predicate: () => boolean): Promise<void> {
|
|
72
|
+
for (let i = 0; i < 20; i++) {
|
|
73
|
+
if (predicate()) return;
|
|
74
|
+
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
71
78
|
const KAPSO_PATH = ["api", "integrations", "kapso", "webhook"];
|
|
72
79
|
|
|
73
80
|
beforeAll(() => {
|
|
@@ -226,9 +233,8 @@ describe("handleWebhooks — Kapso HMAC gate", () => {
|
|
|
226
233
|
return new Response(JSON.stringify({ success: true }), { status: 200 });
|
|
227
234
|
}) as typeof fetch;
|
|
228
235
|
|
|
229
|
-
const
|
|
230
|
-
|
|
231
|
-
);
|
|
236
|
+
const messageId = `wamid.HTTP_OK_${crypto.randomUUID()}`;
|
|
237
|
+
const rawBody = JSON.stringify(makePayload({ phoneNumberId: "pn-http", messageId }));
|
|
232
238
|
const { req, res, captured } = fakeReqRes(rawBody, {
|
|
233
239
|
"x-webhook-signature": sign(HMAC_SECRET, rawBody),
|
|
234
240
|
});
|
|
@@ -236,22 +242,37 @@ describe("handleWebhooks — Kapso HMAC gate", () => {
|
|
|
236
242
|
expect(handled).toBe(true);
|
|
237
243
|
expect(captured.status).toBe(200);
|
|
238
244
|
expect(JSON.parse(captured.body)).toMatchObject({ received: true, routing: "task" });
|
|
239
|
-
|
|
245
|
+
await waitFor(
|
|
246
|
+
() =>
|
|
247
|
+
calls.some((call) => call.body.message_id === messageId) &&
|
|
248
|
+
calls.some(
|
|
249
|
+
(call) =>
|
|
250
|
+
(call.body.reaction as { message_id?: string } | undefined)?.message_id === messageId,
|
|
251
|
+
),
|
|
252
|
+
);
|
|
253
|
+
const messageCalls = calls.filter(
|
|
254
|
+
(call) =>
|
|
255
|
+
call.body.message_id === messageId ||
|
|
256
|
+
(call.body.reaction as { message_id?: string } | undefined)?.message_id === messageId,
|
|
257
|
+
);
|
|
258
|
+
expect(messageCalls).toHaveLength(2);
|
|
240
259
|
expect(
|
|
241
|
-
|
|
260
|
+
messageCalls.every(
|
|
261
|
+
(call) => call.url === "https://kapso.test/meta/whatsapp/v24.0/pn-http/messages",
|
|
262
|
+
),
|
|
242
263
|
).toBe(true);
|
|
243
|
-
expect(
|
|
264
|
+
expect(messageCalls.map((call) => call.body)).toContainEqual({
|
|
244
265
|
messaging_product: "whatsapp",
|
|
245
266
|
status: "read",
|
|
246
|
-
message_id:
|
|
267
|
+
message_id: messageId,
|
|
247
268
|
typing_indicator: { type: "text" },
|
|
248
269
|
});
|
|
249
|
-
expect(
|
|
270
|
+
expect(messageCalls.map((call) => call.body)).toContainEqual({
|
|
250
271
|
messaging_product: "whatsapp",
|
|
251
272
|
recipient_type: "individual",
|
|
252
273
|
to: "34679077777",
|
|
253
274
|
type: "reaction",
|
|
254
|
-
reaction: { message_id:
|
|
275
|
+
reaction: { message_id: messageId, emoji: "👀" },
|
|
255
276
|
});
|
|
256
277
|
});
|
|
257
278
|
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
|
|
2
|
+
import { unlink } from "node:fs/promises";
|
|
3
|
+
import { closeDb, initDb } from "../be/db";
|
|
4
|
+
import {
|
|
5
|
+
deleteOAuthTokens,
|
|
6
|
+
getOAuthTokens,
|
|
7
|
+
storeOAuthTokens,
|
|
8
|
+
upsertOAuthApp,
|
|
9
|
+
} from "../be/db-queries/oauth";
|
|
10
|
+
import { _test, stopOAuthKeepalive } from "../oauth/keepalive";
|
|
11
|
+
|
|
12
|
+
const TEST_DB_PATH = "./test-oauth-keepalive.sqlite";
|
|
13
|
+
|
|
14
|
+
const originalSlackAlertsChannel = process.env.SLACK_ALERTS_CHANNEL;
|
|
15
|
+
function restoreSlackAlertsChannel(): void {
|
|
16
|
+
if (originalSlackAlertsChannel === undefined) {
|
|
17
|
+
delete process.env.SLACK_ALERTS_CHANNEL;
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
process.env.SLACK_ALERTS_CHANNEL = originalSlackAlertsChannel;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const testApp = {
|
|
24
|
+
clientId: "test-client-id",
|
|
25
|
+
clientSecret: "test-client-secret",
|
|
26
|
+
authorizeUrl: "https://example.com/oauth/authorize",
|
|
27
|
+
tokenUrl: "https://example.com/oauth/token",
|
|
28
|
+
redirectUri: "http://localhost:3013/callback",
|
|
29
|
+
scopes: "read,write",
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const originalFetch = globalThis.fetch;
|
|
33
|
+
|
|
34
|
+
beforeAll(() => {
|
|
35
|
+
initDb(TEST_DB_PATH);
|
|
36
|
+
upsertOAuthApp("linear", testApp);
|
|
37
|
+
upsertOAuthApp("jira", {
|
|
38
|
+
...testApp,
|
|
39
|
+
tokenUrl: "https://example.com/jira/oauth/token",
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
beforeEach(async () => {
|
|
44
|
+
await stopOAuthKeepalive();
|
|
45
|
+
deleteOAuthTokens("linear");
|
|
46
|
+
deleteOAuthTokens("jira");
|
|
47
|
+
globalThis.fetch = originalFetch;
|
|
48
|
+
restoreSlackAlertsChannel();
|
|
49
|
+
mock.restore();
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
afterAll(async () => {
|
|
53
|
+
await stopOAuthKeepalive();
|
|
54
|
+
globalThis.fetch = originalFetch;
|
|
55
|
+
restoreSlackAlertsChannel();
|
|
56
|
+
closeDb();
|
|
57
|
+
await unlink(TEST_DB_PATH).catch(() => {});
|
|
58
|
+
await unlink(`${TEST_DB_PATH}-wal`).catch(() => {});
|
|
59
|
+
await unlink(`${TEST_DB_PATH}-shm`).catch(() => {});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe("OAuth keepalive", () => {
|
|
63
|
+
test("uses a 12h cadence with a 10m refresh buffer", () => {
|
|
64
|
+
expect(_test.KEEPALIVE_INTERVAL_MS).toBe(12 * 60 * 60 * 1000);
|
|
65
|
+
expect(_test.KEEPALIVE_BUFFER_MS).toBe(10 * 60 * 1000);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test("skips Slack notification when alerts channel env is unset", async () => {
|
|
69
|
+
delete process.env.SLACK_ALERTS_CHANNEL;
|
|
70
|
+
const warn = spyOn(console, "warn").mockImplementation(() => {});
|
|
71
|
+
|
|
72
|
+
await expect(_test.notifySlack("test alert")).resolves.toBeUndefined();
|
|
73
|
+
|
|
74
|
+
expect(warn).toHaveBeenCalledWith(
|
|
75
|
+
"[OAuth Keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert",
|
|
76
|
+
);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("stopOAuthKeepalive waits for in-flight Jira refresh persistence", async () => {
|
|
80
|
+
storeOAuthTokens("linear", {
|
|
81
|
+
accessToken: "linear-access",
|
|
82
|
+
refreshToken: "linear-refresh",
|
|
83
|
+
expiresAt: new Date(Date.now() + 60 * 60 * 1000).toISOString(),
|
|
84
|
+
});
|
|
85
|
+
storeOAuthTokens("jira", {
|
|
86
|
+
accessToken: "old-jira-access",
|
|
87
|
+
refreshToken: "old-jira-refresh",
|
|
88
|
+
expiresAt: new Date(Date.now() + 60 * 1000).toISOString(),
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
let releaseTokenResponse!: () => void;
|
|
92
|
+
const tokenResponseReady = new Promise<void>((resolve) => {
|
|
93
|
+
releaseTokenResponse = resolve;
|
|
94
|
+
});
|
|
95
|
+
let fetchStarted!: () => void;
|
|
96
|
+
const fetchStartedPromise = new Promise<void>((resolve) => {
|
|
97
|
+
fetchStarted = resolve;
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
globalThis.fetch = mock(async () => {
|
|
101
|
+
fetchStarted();
|
|
102
|
+
await tokenResponseReady;
|
|
103
|
+
return new Response(
|
|
104
|
+
JSON.stringify({
|
|
105
|
+
access_token: "new-jira-access",
|
|
106
|
+
token_type: "Bearer",
|
|
107
|
+
expires_in: 3600,
|
|
108
|
+
refresh_token: "new-jira-refresh",
|
|
109
|
+
}),
|
|
110
|
+
{ status: 200, headers: { "Content-Type": "application/json" } },
|
|
111
|
+
);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
const keepaliveRun = _test.runKeepalive("manual");
|
|
115
|
+
await fetchStartedPromise;
|
|
116
|
+
|
|
117
|
+
let stopResolved = false;
|
|
118
|
+
const stopPromise = stopOAuthKeepalive().then(() => {
|
|
119
|
+
stopResolved = true;
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
await Promise.resolve();
|
|
123
|
+
expect(stopResolved).toBe(false);
|
|
124
|
+
expect(getOAuthTokens("jira")?.refreshToken).toBe("old-jira-refresh");
|
|
125
|
+
|
|
126
|
+
releaseTokenResponse();
|
|
127
|
+
await stopPromise;
|
|
128
|
+
await keepaliveRun;
|
|
129
|
+
|
|
130
|
+
expect(stopResolved).toBe(true);
|
|
131
|
+
const tokens = getOAuthTokens("jira");
|
|
132
|
+
expect(tokens?.accessToken).toBe("new-jira-access");
|
|
133
|
+
expect(tokens?.refreshToken).toBe("new-jira-refresh");
|
|
134
|
+
});
|
|
135
|
+
});
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test";
|
|
2
|
+
import { unlink } from "node:fs/promises";
|
|
3
|
+
import {
|
|
4
|
+
cancelTask,
|
|
5
|
+
closeDb,
|
|
6
|
+
completeTask,
|
|
7
|
+
createAgent,
|
|
8
|
+
createTaskExtended,
|
|
9
|
+
failTask,
|
|
10
|
+
getDb,
|
|
11
|
+
initDb,
|
|
12
|
+
} from "../be/db";
|
|
13
|
+
import { telemetry } from "../telemetry";
|
|
14
|
+
|
|
15
|
+
const TEST_DB_PATH = "./test-task-lifecycle-telemetry.sqlite";
|
|
16
|
+
const WORKER_ID = "bbbb0000-0000-4000-8000-000000000002";
|
|
17
|
+
|
|
18
|
+
async function flushMicrotasks(): Promise<void> {
|
|
19
|
+
await Promise.resolve();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async function removeTestDb(): Promise<void> {
|
|
23
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
24
|
+
try {
|
|
25
|
+
await unlink(TEST_DB_PATH + suffix);
|
|
26
|
+
} catch {
|
|
27
|
+
// File does not exist.
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
describe("task lifecycle telemetry", () => {
|
|
33
|
+
let taskEventSpy: ReturnType<typeof spyOn>;
|
|
34
|
+
let calls: Array<{ event: string; props: Parameters<typeof telemetry.taskEvent>[1] }>;
|
|
35
|
+
|
|
36
|
+
beforeEach(async () => {
|
|
37
|
+
closeDb();
|
|
38
|
+
await removeTestDb();
|
|
39
|
+
initDb(TEST_DB_PATH);
|
|
40
|
+
createAgent({ id: WORKER_ID, name: "Telemetry Worker", isLead: false, status: "idle" });
|
|
41
|
+
|
|
42
|
+
calls = [];
|
|
43
|
+
taskEventSpy = spyOn(telemetry, "taskEvent").mockImplementation((event, props) => {
|
|
44
|
+
calls.push({ event, props });
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
afterEach(async () => {
|
|
49
|
+
taskEventSpy.mockRestore();
|
|
50
|
+
closeDb();
|
|
51
|
+
await removeTestDb();
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test("emits task.created from createTaskExtended after the task is committed", async () => {
|
|
55
|
+
const task = createTaskExtended("create telemetry", {
|
|
56
|
+
agentId: WORKER_ID,
|
|
57
|
+
source: "mcp",
|
|
58
|
+
tags: ["telemetry"],
|
|
59
|
+
priority: 60,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
expect(calls).toHaveLength(0);
|
|
63
|
+
|
|
64
|
+
await flushMicrotasks();
|
|
65
|
+
|
|
66
|
+
expect(calls).toEqual([
|
|
67
|
+
{
|
|
68
|
+
event: "created",
|
|
69
|
+
props: {
|
|
70
|
+
taskId: task.id,
|
|
71
|
+
source: "mcp",
|
|
72
|
+
tags: ["telemetry"],
|
|
73
|
+
hasParent: false,
|
|
74
|
+
priority: 60,
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
]);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test("does not emit task.created when an enclosing transaction rolls back", async () => {
|
|
81
|
+
const txn = getDb().transaction(() => {
|
|
82
|
+
createTaskExtended("rolled back telemetry", {
|
|
83
|
+
agentId: WORKER_ID,
|
|
84
|
+
source: "mcp",
|
|
85
|
+
});
|
|
86
|
+
throw new Error("rollback");
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
expect(() => txn()).toThrow("rollback");
|
|
90
|
+
|
|
91
|
+
await flushMicrotasks();
|
|
92
|
+
|
|
93
|
+
expect(calls).toHaveLength(0);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test("emits terminal lifecycle events from universal status helpers", async () => {
|
|
97
|
+
const completedTask = createTaskExtended("complete telemetry", {
|
|
98
|
+
agentId: WORKER_ID,
|
|
99
|
+
source: "mcp",
|
|
100
|
+
});
|
|
101
|
+
await flushMicrotasks();
|
|
102
|
+
calls = [];
|
|
103
|
+
|
|
104
|
+
completeTask(completedTask.id, "done");
|
|
105
|
+
await flushMicrotasks();
|
|
106
|
+
|
|
107
|
+
expect(calls).toHaveLength(1);
|
|
108
|
+
expect(calls[0]).toMatchObject({
|
|
109
|
+
event: "completed",
|
|
110
|
+
props: { taskId: completedTask.id, agentId: WORKER_ID },
|
|
111
|
+
});
|
|
112
|
+
expect(typeof calls[0]?.props.durationMs).toBe("number");
|
|
113
|
+
|
|
114
|
+
const failedTask = createTaskExtended("fail telemetry", {
|
|
115
|
+
agentId: WORKER_ID,
|
|
116
|
+
source: "mcp",
|
|
117
|
+
});
|
|
118
|
+
await flushMicrotasks();
|
|
119
|
+
calls = [];
|
|
120
|
+
|
|
121
|
+
failTask(failedTask.id, "nope");
|
|
122
|
+
await flushMicrotasks();
|
|
123
|
+
|
|
124
|
+
expect(calls).toHaveLength(1);
|
|
125
|
+
expect(calls[0]).toMatchObject({
|
|
126
|
+
event: "failed",
|
|
127
|
+
props: { taskId: failedTask.id, agentId: WORKER_ID },
|
|
128
|
+
});
|
|
129
|
+
expect(typeof calls[0]?.props.durationMs).toBe("number");
|
|
130
|
+
|
|
131
|
+
const cancelledTask = createTaskExtended("cancel telemetry", {
|
|
132
|
+
agentId: WORKER_ID,
|
|
133
|
+
source: "api",
|
|
134
|
+
});
|
|
135
|
+
await flushMicrotasks();
|
|
136
|
+
calls = [];
|
|
137
|
+
|
|
138
|
+
cancelTask(cancelledTask.id, "not needed");
|
|
139
|
+
await flushMicrotasks();
|
|
140
|
+
|
|
141
|
+
expect(calls).toHaveLength(1);
|
|
142
|
+
expect(calls[0]).toMatchObject({
|
|
143
|
+
event: "cancelled",
|
|
144
|
+
props: {
|
|
145
|
+
taskId: cancelledTask.id,
|
|
146
|
+
source: "api",
|
|
147
|
+
agentId: WORKER_ID,
|
|
148
|
+
previousStatus: "pending",
|
|
149
|
+
},
|
|
150
|
+
});
|
|
151
|
+
expect(typeof calls[0]?.props.durationMs).toBe("number");
|
|
152
|
+
});
|
|
153
|
+
});
|
|
@@ -21,7 +21,13 @@ import {
|
|
|
21
21
|
type ExecutorResult,
|
|
22
22
|
} from "../workflows/executors/base";
|
|
23
23
|
import { ExecutorRegistry } from "../workflows/executors/registry";
|
|
24
|
-
import {
|
|
24
|
+
import {
|
|
25
|
+
SWARM_SCRIPT_DEFAULT_TIMEOUT_MS,
|
|
26
|
+
SWARM_SCRIPT_MAX_TIMEOUT_MS,
|
|
27
|
+
SWARM_SCRIPT_MIN_TIMEOUT_MS,
|
|
28
|
+
SwarmScriptConfigSchema,
|
|
29
|
+
SwarmScriptExecutor,
|
|
30
|
+
} from "../workflows/executors/swarm-script";
|
|
25
31
|
import { interpolate } from "../workflows/template";
|
|
26
32
|
|
|
27
33
|
const TEST_DB_PATH = "./test-workflow-swarm-script.sqlite";
|
|
@@ -141,6 +147,38 @@ beforeEach(() => {
|
|
|
141
147
|
});
|
|
142
148
|
|
|
143
149
|
describe("SwarmScriptExecutor", () => {
|
|
150
|
+
test("config schema validates timeoutMs bounds and applies the runtime default", () => {
|
|
151
|
+
expect(SwarmScriptConfigSchema.parse({ scriptName: "quick" }).timeoutMs).toBe(
|
|
152
|
+
SWARM_SCRIPT_DEFAULT_TIMEOUT_MS,
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
expect(
|
|
156
|
+
SwarmScriptConfigSchema.safeParse({
|
|
157
|
+
scriptName: "quick",
|
|
158
|
+
timeoutMs: SWARM_SCRIPT_MIN_TIMEOUT_MS - 1,
|
|
159
|
+
}).success,
|
|
160
|
+
).toBe(false);
|
|
161
|
+
expect(
|
|
162
|
+
SwarmScriptConfigSchema.safeParse({
|
|
163
|
+
scriptName: "quick",
|
|
164
|
+
timeoutMs: SWARM_SCRIPT_MAX_TIMEOUT_MS + 1,
|
|
165
|
+
}).success,
|
|
166
|
+
).toBe(false);
|
|
167
|
+
|
|
168
|
+
expect(
|
|
169
|
+
SwarmScriptConfigSchema.parse({
|
|
170
|
+
scriptName: "quick",
|
|
171
|
+
timeoutMs: SWARM_SCRIPT_MIN_TIMEOUT_MS,
|
|
172
|
+
}).timeoutMs,
|
|
173
|
+
).toBe(SWARM_SCRIPT_MIN_TIMEOUT_MS);
|
|
174
|
+
expect(
|
|
175
|
+
SwarmScriptConfigSchema.parse({
|
|
176
|
+
scriptName: "quick",
|
|
177
|
+
timeoutMs: SWARM_SCRIPT_MAX_TIMEOUT_MS,
|
|
178
|
+
}).timeoutMs,
|
|
179
|
+
).toBe(SWARM_SCRIPT_MAX_TIMEOUT_MS);
|
|
180
|
+
});
|
|
181
|
+
|
|
144
182
|
test("A workflow with one swarm-script node resolves by name + runs + returns result", async () => {
|
|
145
183
|
await saveScript(
|
|
146
184
|
"add-one",
|
|
@@ -250,6 +288,49 @@ describe("SwarmScriptExecutor", () => {
|
|
|
250
288
|
expect(success.status).toBe("success");
|
|
251
289
|
});
|
|
252
290
|
|
|
291
|
+
test("timeoutMs not set — script completes with the default 30s window", async () => {
|
|
292
|
+
await saveScript("quick", `export default async () => ({ done: true });`);
|
|
293
|
+
const executor = new SwarmScriptExecutor(deps);
|
|
294
|
+
const wf = makeWorkflow({ nodes: [] });
|
|
295
|
+
const result = await executor.run({
|
|
296
|
+
config: { scriptName: "quick" },
|
|
297
|
+
context: {},
|
|
298
|
+
meta: {
|
|
299
|
+
runId: crypto.randomUUID(),
|
|
300
|
+
stepId: crypto.randomUUID(),
|
|
301
|
+
nodeId: "script",
|
|
302
|
+
workflowId: wf.id,
|
|
303
|
+
dryRun: false,
|
|
304
|
+
},
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
expect(result.status).toBe("success");
|
|
308
|
+
expect(result.output?.result).toEqual({ done: true });
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
test("timeoutMs set — a long-running script is killed before it finishes", async () => {
|
|
312
|
+
await saveScript(
|
|
313
|
+
"sleeper",
|
|
314
|
+
`export default async () => { await new Promise(r => setTimeout(r, 3000)); return { done: true }; };`,
|
|
315
|
+
);
|
|
316
|
+
const executor = new SwarmScriptExecutor(deps);
|
|
317
|
+
const wf = makeWorkflow({ nodes: [] });
|
|
318
|
+
const result = await executor.run({
|
|
319
|
+
config: { scriptName: "sleeper", timeoutMs: 300 },
|
|
320
|
+
context: {},
|
|
321
|
+
meta: {
|
|
322
|
+
runId: crypto.randomUUID(),
|
|
323
|
+
stepId: crypto.randomUUID(),
|
|
324
|
+
nodeId: "script",
|
|
325
|
+
workflowId: wf.id,
|
|
326
|
+
dryRun: false,
|
|
327
|
+
},
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
expect(result.status).toBe("failed");
|
|
331
|
+
expect(result.output?.exitCode).not.toBe(0);
|
|
332
|
+
});
|
|
333
|
+
|
|
253
334
|
test("Failure in the script surfaces as a workflow-node failure", async () => {
|
|
254
335
|
await saveScript("throws", `export default async () => { throw new Error("boom"); };`);
|
|
255
336
|
const executor = new SwarmScriptExecutor(deps);
|
|
@@ -1,15 +1,26 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { getScript, getScriptVersion } from "../../be/scripts/db";
|
|
3
|
+
import { DEFAULT_SCRIPT_RESOURCES } from "../../scripts-runtime/executors/types";
|
|
3
4
|
import { runScript } from "../../scripts-runtime/loader";
|
|
4
5
|
import type { ExecutorMeta } from "../../types";
|
|
5
6
|
import { BaseExecutor, type ExecutorResult } from "./base";
|
|
6
7
|
|
|
8
|
+
export const SWARM_SCRIPT_DEFAULT_TIMEOUT_MS = DEFAULT_SCRIPT_RESOURCES.wallClockMs;
|
|
9
|
+
export const SWARM_SCRIPT_MIN_TIMEOUT_MS = 1_000;
|
|
10
|
+
export const SWARM_SCRIPT_MAX_TIMEOUT_MS = DEFAULT_SCRIPT_RESOURCES.cpuTimeSec * 1_000;
|
|
11
|
+
|
|
7
12
|
export const SwarmScriptConfigSchema = z.object({
|
|
8
13
|
scriptName: z.string().min(1),
|
|
9
14
|
scope: z.enum(["global", "agent"]).optional(),
|
|
10
15
|
pinHash: z.string().min(1).optional(),
|
|
11
16
|
args: z.record(z.string(), z.unknown()).default({}),
|
|
12
17
|
fsMode: z.enum(["none", "workspace-rw"]).default("none"),
|
|
18
|
+
timeoutMs: z
|
|
19
|
+
.number()
|
|
20
|
+
.int()
|
|
21
|
+
.min(SWARM_SCRIPT_MIN_TIMEOUT_MS)
|
|
22
|
+
.max(SWARM_SCRIPT_MAX_TIMEOUT_MS)
|
|
23
|
+
.default(SWARM_SCRIPT_DEFAULT_TIMEOUT_MS),
|
|
13
24
|
});
|
|
14
25
|
|
|
15
26
|
export const SwarmScriptOutputSchema = z.object({
|
|
@@ -61,6 +72,7 @@ export class SwarmScriptExecutor extends BaseExecutor<
|
|
|
61
72
|
args: config.args,
|
|
62
73
|
fsMode: "none",
|
|
63
74
|
agentId: agentId ?? "workflow",
|
|
75
|
+
timeoutMs: config.timeoutMs,
|
|
64
76
|
});
|
|
65
77
|
|
|
66
78
|
const workflowOutput = {
|