@desplega.ai/agent-swarm 1.99.0 → 1.100.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +1 -1
- package/package.json +6 -6
- package/src/be/boot-scrub-logs.ts +79 -20
- package/src/be/db.ts +60 -0
- package/src/http/index.ts +1 -1
- package/src/http/tasks.ts +0 -26
- package/src/jira/webhook-lifecycle.ts +23 -4
- package/src/oauth/keepalive.ts +75 -33
- package/src/tests/jira-webhook-lifecycle.test.ts +29 -2
- package/src/tests/kapso-inbound.test.ts +30 -9
- package/src/tests/oauth-keepalive.test.ts +135 -0
- package/src/tests/task-lifecycle-telemetry.test.ts +153 -0
- package/src/tests/workflow-swarm-script.test.ts +82 -1
- package/src/workflows/executors/swarm-script.ts +12 -0
package/openapi.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"openapi": "3.1.0",
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "Agent Swarm API",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.100.0",
|
|
6
6
|
"description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
|
|
7
7
|
},
|
|
8
8
|
"servers": [
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@desplega.ai/agent-swarm",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.100.0",
|
|
4
4
|
"description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "desplega.sh <contact@desplega.sh>",
|
|
@@ -111,14 +111,14 @@
|
|
|
111
111
|
"@aws-sdk/client-bedrock": "3.1048.0",
|
|
112
112
|
"@desplega.ai/business-use": "^0.4.2",
|
|
113
113
|
"@desplega.ai/localtunnel": "^2.2.0",
|
|
114
|
-
"@earendil-works/pi-agent-core": "^0.79.
|
|
115
|
-
"@earendil-works/pi-ai": "^0.79.
|
|
116
|
-
"@earendil-works/pi-coding-agent": "^0.79.
|
|
114
|
+
"@earendil-works/pi-agent-core": "^0.79.6",
|
|
115
|
+
"@earendil-works/pi-ai": "^0.79.6",
|
|
116
|
+
"@earendil-works/pi-coding-agent": "^0.79.6",
|
|
117
117
|
"@inkjs/ui": "^2.0.0",
|
|
118
118
|
"@linear/sdk": "^77.0.0",
|
|
119
119
|
"@modelcontextprotocol/sdk": "^1.25.1",
|
|
120
|
-
"@openai/codex-sdk": "^0.
|
|
121
|
-
"@opencode-ai/sdk": "^1.17.
|
|
120
|
+
"@openai/codex-sdk": "^0.140.0",
|
|
121
|
+
"@opencode-ai/sdk": "^1.17.7",
|
|
122
122
|
"@openfort/openfort-node": "^0.9.1",
|
|
123
123
|
"@opentelemetry/api": "^1.9.1",
|
|
124
124
|
"@opentelemetry/exporter-trace-otlp-http": "^0.218.0",
|
|
@@ -5,13 +5,24 @@
|
|
|
5
5
|
*
|
|
6
6
|
* Idempotent: already-scrubbed rows are no-ops (scrubSecrets is idempotent).
|
|
7
7
|
* Uses seed_state to avoid re-scanning on subsequent boots.
|
|
8
|
+
*
|
|
9
|
+
* Restart-safe: progress is persisted as a cursor in seed_state after each
|
|
10
|
+
* batch, so a restart (e.g. K8s probe SIGKILL) resumes from the last
|
|
11
|
+
* committed batch instead of re-scanning from zero.
|
|
12
|
+
*
|
|
13
|
+
* Non-blocking: yields to the event loop between batches so /health and
|
|
14
|
+
* startup/liveness probes stay responsive.
|
|
8
15
|
*/
|
|
9
16
|
|
|
10
17
|
import { scrubSecrets } from "../utils/secret-scrubber";
|
|
11
18
|
import { getDb } from "./db";
|
|
12
19
|
|
|
13
20
|
const SCRUB_KEY = "boot-scrub-logs-v2";
|
|
14
|
-
const
|
|
21
|
+
const CURSOR_KEY = "boot-scrub-logs-v2-cursor";
|
|
22
|
+
const BATCH_SIZE = 200;
|
|
23
|
+
|
|
24
|
+
/** Yield to the event loop so probes can respond. */
|
|
25
|
+
const yieldTick = () => new Promise<void>((r) => setTimeout(r, 5));
|
|
15
26
|
|
|
16
27
|
export async function runBootScrubLogs(): Promise<void> {
|
|
17
28
|
const db = getDb();
|
|
@@ -24,46 +35,94 @@ export async function runBootScrubLogs(): Promise<void> {
|
|
|
24
35
|
|
|
25
36
|
if (done) return;
|
|
26
37
|
|
|
27
|
-
//
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
WHERE content LIKE '%lin!_oauth!_%' ESCAPE '!'
|
|
35
|
-
OR content LIKE '%lin!_api!_%' ESCAPE '!'
|
|
36
|
-
OR content LIKE '%npm!_%' ESCAPE '!'
|
|
37
|
-
OR content LIKE '%ATATT%'`,
|
|
38
|
-
)
|
|
39
|
-
.all();
|
|
38
|
+
// Resume from last cursor if a previous run was interrupted
|
|
39
|
+
const savedCursor =
|
|
40
|
+
db
|
|
41
|
+
.prepare<{ seededHash: string }, [string, string]>(
|
|
42
|
+
"SELECT seededHash FROM seed_state WHERE kind = ? AND key = ?",
|
|
43
|
+
)
|
|
44
|
+
.get("maintenance", CURSOR_KEY)?.seededHash ?? "";
|
|
40
45
|
|
|
41
|
-
|
|
46
|
+
const lastProcessedId = savedCursor || "";
|
|
47
|
+
|
|
48
|
+
// Count total work remaining (for logging only)
|
|
49
|
+
const totalRemaining =
|
|
50
|
+
db
|
|
51
|
+
.prepare<{ count: number }, [string]>(
|
|
52
|
+
`SELECT COUNT(*) as count FROM session_logs
|
|
53
|
+
WHERE id > ?
|
|
54
|
+
AND (content LIKE '%lin!_oauth!_%' ESCAPE '!'
|
|
55
|
+
OR content LIKE '%lin!_api!_%' ESCAPE '!'
|
|
56
|
+
OR content LIKE '%npm!_%' ESCAPE '!'
|
|
57
|
+
OR content LIKE '%ATATT%')`,
|
|
58
|
+
)
|
|
59
|
+
.get(lastProcessedId)?.count ?? 0;
|
|
60
|
+
|
|
61
|
+
if (totalRemaining === 0) {
|
|
42
62
|
markDone(db);
|
|
43
63
|
return;
|
|
44
64
|
}
|
|
45
65
|
|
|
46
|
-
console.log(
|
|
66
|
+
console.log(
|
|
67
|
+
`[boot-scrub-logs] starting: ${totalRemaining} candidate rows remaining` +
|
|
68
|
+
(lastProcessedId ? ` (resuming from cursor ${lastProcessedId.slice(0, 8)}…)` : ""),
|
|
69
|
+
);
|
|
47
70
|
|
|
71
|
+
const selectBatch = db.prepare<{ id: string; content: string }, [string]>(
|
|
72
|
+
`SELECT id, content FROM session_logs
|
|
73
|
+
WHERE id > ?
|
|
74
|
+
AND (content LIKE '%lin!_oauth!_%' ESCAPE '!'
|
|
75
|
+
OR content LIKE '%lin!_api!_%' ESCAPE '!'
|
|
76
|
+
OR content LIKE '%npm!_%' ESCAPE '!'
|
|
77
|
+
OR content LIKE '%ATATT%')
|
|
78
|
+
ORDER BY id ASC
|
|
79
|
+
LIMIT ${BATCH_SIZE}`,
|
|
80
|
+
);
|
|
48
81
|
const update = db.prepare("UPDATE session_logs SET content = ? WHERE id = ?");
|
|
82
|
+
const saveCursor = db.prepare(
|
|
83
|
+
`INSERT INTO seed_state (kind, key, seededHash, seededAt)
|
|
84
|
+
VALUES ('maintenance', '${CURSOR_KEY}', ?, datetime('now'))
|
|
85
|
+
ON CONFLICT (kind, key) DO UPDATE SET seededHash = ?, seededAt = datetime('now')`,
|
|
86
|
+
);
|
|
87
|
+
|
|
49
88
|
let scrubbed = 0;
|
|
89
|
+
let scanned = 0;
|
|
90
|
+
let cursor = lastProcessedId;
|
|
91
|
+
|
|
92
|
+
// Paginated cursor loop — each iteration fetches the next BATCH_SIZE rows
|
|
93
|
+
// ordered by id, processes them in a transaction, saves the cursor, and
|
|
94
|
+
// yields to the event loop.
|
|
95
|
+
for (;;) {
|
|
96
|
+
const rows = selectBatch.all(cursor);
|
|
97
|
+
if (rows.length === 0) break;
|
|
98
|
+
|
|
99
|
+
const batchLastId = rows[rows.length - 1]!.id;
|
|
50
100
|
|
|
51
|
-
for (let i = 0; i < rows.length; i += BATCH_SIZE) {
|
|
52
|
-
const batch = rows.slice(i, i + BATCH_SIZE);
|
|
53
101
|
const tx = db.transaction(() => {
|
|
54
|
-
for (const row of
|
|
102
|
+
for (const row of rows) {
|
|
55
103
|
const cleaned = scrubSecrets(row.content);
|
|
56
104
|
if (cleaned !== row.content) {
|
|
57
105
|
update.run(cleaned, row.id);
|
|
58
106
|
scrubbed++;
|
|
59
107
|
}
|
|
60
108
|
}
|
|
109
|
+
// Persist cursor inside the same transaction so it's atomic with the scrub
|
|
110
|
+
saveCursor.run(batchLastId, batchLastId);
|
|
61
111
|
});
|
|
62
112
|
tx();
|
|
113
|
+
|
|
114
|
+
scanned += rows.length;
|
|
115
|
+
cursor = batchLastId;
|
|
116
|
+
|
|
117
|
+
// Yield to the event loop between batches
|
|
118
|
+
await yieldTick();
|
|
63
119
|
}
|
|
64
120
|
|
|
65
121
|
markDone(db);
|
|
66
|
-
|
|
122
|
+
// Clean up the cursor key now that we're fully done
|
|
123
|
+
db.run("DELETE FROM seed_state WHERE kind = 'maintenance' AND key = ?", [CURSOR_KEY]);
|
|
124
|
+
|
|
125
|
+
console.log(`[boot-scrub-logs] complete: scanned=${scanned} scrubbed=${scrubbed}`);
|
|
67
126
|
}
|
|
68
127
|
|
|
69
128
|
function markDone(db: ReturnType<typeof getDb>) {
|
package/src/be/db.ts
CHANGED
|
@@ -4,6 +4,7 @@ import pkg from "../../package.json";
|
|
|
4
4
|
import { addEyesReactionOnTaskStart } from "../github/task-reactions";
|
|
5
5
|
import { type ModelTier, parseModelTier } from "../model-tiers";
|
|
6
6
|
import { configureDbResolver } from "../prompts/resolver";
|
|
7
|
+
import { telemetry } from "../telemetry";
|
|
7
8
|
import type {
|
|
8
9
|
ActiveSession,
|
|
9
10
|
Agent,
|
|
@@ -114,6 +115,19 @@ import { isReservedConfigKey, reservedKeyError } from "./swarm-config-guard";
|
|
|
114
115
|
let db: Database | null = null;
|
|
115
116
|
let sqliteVecAvailable = false;
|
|
116
117
|
|
|
118
|
+
type TaskTelemetryProps = Parameters<typeof telemetry.taskEvent>[1];
|
|
119
|
+
|
|
120
|
+
function emitTaskLifecycleTelemetryAfterCommit(
|
|
121
|
+
event: string,
|
|
122
|
+
props: TaskTelemetryProps,
|
|
123
|
+
verify?: (task: AgentTask | null) => boolean,
|
|
124
|
+
): void {
|
|
125
|
+
queueMicrotask(() => {
|
|
126
|
+
if (verify && !verify(getTaskById(props.taskId))) return;
|
|
127
|
+
telemetry.taskEvent(event, props);
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
117
131
|
export function isSqliteVecAvailable(): boolean {
|
|
118
132
|
return sqliteVecAvailable;
|
|
119
133
|
}
|
|
@@ -2105,6 +2119,16 @@ export function completeTask(id: string, output?: string): AgentTask | null {
|
|
|
2105
2119
|
}
|
|
2106
2120
|
|
|
2107
2121
|
if (row && oldTask) {
|
|
2122
|
+
emitTaskLifecycleTelemetryAfterCommit(
|
|
2123
|
+
"completed",
|
|
2124
|
+
{
|
|
2125
|
+
taskId: id,
|
|
2126
|
+
agentId: row.agentId ?? undefined,
|
|
2127
|
+
durationMs: row.createdAt ? Date.now() - new Date(row.createdAt).getTime() : undefined,
|
|
2128
|
+
},
|
|
2129
|
+
(task) => task?.status === "completed",
|
|
2130
|
+
);
|
|
2131
|
+
|
|
2108
2132
|
try {
|
|
2109
2133
|
createLogEntry({
|
|
2110
2134
|
eventType: "task_status_change",
|
|
@@ -2145,6 +2169,16 @@ export function failTask(id: string, reason: string): AgentTask | null {
|
|
|
2145
2169
|
const scrubbedReason = scrubSecrets(reason);
|
|
2146
2170
|
const row = taskQueries.setFailure().get(scrubbedReason, finishedAt, id);
|
|
2147
2171
|
if (row && oldTask) {
|
|
2172
|
+
emitTaskLifecycleTelemetryAfterCommit(
|
|
2173
|
+
"failed",
|
|
2174
|
+
{
|
|
2175
|
+
taskId: id,
|
|
2176
|
+
agentId: row.agentId ?? undefined,
|
|
2177
|
+
durationMs: row.createdAt ? Date.now() - new Date(row.createdAt).getTime() : undefined,
|
|
2178
|
+
},
|
|
2179
|
+
(task) => task?.status === "failed",
|
|
2180
|
+
);
|
|
2181
|
+
|
|
2148
2182
|
try {
|
|
2149
2183
|
createLogEntry({
|
|
2150
2184
|
eventType: "task_status_change",
|
|
@@ -2192,6 +2226,20 @@ export function cancelTask(id: string, reason?: string): AgentTask | null {
|
|
|
2192
2226
|
const row = taskQueries.setCancelled().get(cancelReason, finishedAt, id);
|
|
2193
2227
|
|
|
2194
2228
|
if (row && oldTask) {
|
|
2229
|
+
emitTaskLifecycleTelemetryAfterCommit(
|
|
2230
|
+
"cancelled",
|
|
2231
|
+
{
|
|
2232
|
+
taskId: id,
|
|
2233
|
+
source: oldTask.source,
|
|
2234
|
+
agentId: oldTask.agentId ?? undefined,
|
|
2235
|
+
previousStatus: oldTask.status,
|
|
2236
|
+
durationMs: oldTask.createdAt
|
|
2237
|
+
? Date.now() - new Date(oldTask.createdAt).getTime()
|
|
2238
|
+
: undefined,
|
|
2239
|
+
},
|
|
2240
|
+
(task) => task?.status === "cancelled",
|
|
2241
|
+
);
|
|
2242
|
+
|
|
2195
2243
|
try {
|
|
2196
2244
|
createLogEntry({
|
|
2197
2245
|
eventType: "task_status_change",
|
|
@@ -3157,6 +3205,18 @@ export function createTaskExtended(task: string, options?: CreateTaskOptions): A
|
|
|
3157
3205
|
});
|
|
3158
3206
|
} catch {}
|
|
3159
3207
|
|
|
3208
|
+
emitTaskLifecycleTelemetryAfterCommit(
|
|
3209
|
+
"created",
|
|
3210
|
+
{
|
|
3211
|
+
taskId: row.id,
|
|
3212
|
+
source: row.source,
|
|
3213
|
+
tags: options?.tags ?? [],
|
|
3214
|
+
hasParent: !!row.parentTaskId,
|
|
3215
|
+
priority: row.priority,
|
|
3216
|
+
},
|
|
3217
|
+
(task) => task !== null,
|
|
3218
|
+
);
|
|
3219
|
+
|
|
3160
3220
|
try {
|
|
3161
3221
|
import("../workflows/event-bus").then(({ workflowEventBus }) => {
|
|
3162
3222
|
workflowEventBus.emit("task.created", {
|
package/src/http/index.ts
CHANGED
|
@@ -382,7 +382,7 @@ async function shutdown() {
|
|
|
382
382
|
// Stop OAuth keepalive
|
|
383
383
|
if (process.env.OAUTH_KEEPALIVE_DISABLE !== "true") {
|
|
384
384
|
const { stopOAuthKeepalive } = await import("../oauth/keepalive");
|
|
385
|
-
stopOAuthKeepalive();
|
|
385
|
+
await stopOAuthKeepalive();
|
|
386
386
|
}
|
|
387
387
|
|
|
388
388
|
// Stop MCP OAuth pending-session garbage collector
|
package/src/http/tasks.ts
CHANGED
|
@@ -26,7 +26,6 @@ import {
|
|
|
26
26
|
import { ModelTierSchema, splitLegacyModelAlias } from "../model-tiers";
|
|
27
27
|
import { createTaskWithSiblingAwareness } from "../tasks/sibling-awareness";
|
|
28
28
|
import { createResumeFollowUp, createWorkerTaskFollowUp } from "../tasks/worker-follow-up";
|
|
29
|
-
import { telemetry } from "../telemetry";
|
|
30
29
|
import {
|
|
31
30
|
type AgentTaskSource,
|
|
32
31
|
AgentTaskSourceSchema,
|
|
@@ -420,14 +419,6 @@ export async function handleTasks(
|
|
|
420
419
|
},
|
|
421
420
|
});
|
|
422
421
|
|
|
423
|
-
telemetry.taskEvent("created", {
|
|
424
|
-
taskId: task.id,
|
|
425
|
-
source: task.source,
|
|
426
|
-
tags: parsed.body.tags ?? [],
|
|
427
|
-
hasParent: !!task.parentTaskId,
|
|
428
|
-
priority: task.priority,
|
|
429
|
-
});
|
|
430
|
-
|
|
431
422
|
json(res, task, 201);
|
|
432
423
|
} catch (error) {
|
|
433
424
|
console.error("[HTTP] Failed to create task:", error);
|
|
@@ -536,14 +527,6 @@ export async function handleTasks(
|
|
|
536
527
|
});
|
|
537
528
|
}
|
|
538
529
|
|
|
539
|
-
telemetry.taskEvent("cancelled", {
|
|
540
|
-
taskId: parsed.params.id,
|
|
541
|
-
source: task.source,
|
|
542
|
-
agentId: task.agentId ?? undefined,
|
|
543
|
-
previousStatus: task.status,
|
|
544
|
-
durationMs: task.createdAt ? Date.now() - new Date(task.createdAt).getTime() : undefined,
|
|
545
|
-
});
|
|
546
|
-
|
|
547
530
|
if (task.agentId) {
|
|
548
531
|
updateAgentStatusFromCapacity(task.agentId);
|
|
549
532
|
}
|
|
@@ -645,15 +628,6 @@ export async function handleTasks(
|
|
|
645
628
|
if (result.task && !("alreadyFinished" in result && result.alreadyFinished)) {
|
|
646
629
|
const finishEventId = parsed.body.status === "completed" ? "completed" : "failed";
|
|
647
630
|
|
|
648
|
-
const durationMs = result.task.createdAt
|
|
649
|
-
? Date.now() - new Date(result.task.createdAt).getTime()
|
|
650
|
-
: undefined;
|
|
651
|
-
|
|
652
|
-
telemetry.taskEvent(finishEventId, {
|
|
653
|
-
taskId: parsed.params.id,
|
|
654
|
-
agentId: myAgentId,
|
|
655
|
-
durationMs,
|
|
656
|
-
});
|
|
657
631
|
ensure({
|
|
658
632
|
id: finishEventId,
|
|
659
633
|
flow: "task",
|
|
@@ -10,7 +10,6 @@ const SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1000;
|
|
|
10
10
|
* first refresh round-trip (Atlassian returns the authoritative expiry).
|
|
11
11
|
*/
|
|
12
12
|
const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
|
|
13
|
-
const SLACK_ALERTS_CHANNEL = process.env.SLACK_ALERTS_CHANNEL || "C08JCRURPBV";
|
|
14
13
|
|
|
15
14
|
const WEBHOOK_EVENTS = [
|
|
16
15
|
"jira:issue_updated",
|
|
@@ -40,6 +39,12 @@ function getRegisteredWebhookUrl(): string {
|
|
|
40
39
|
// ─── Slack alert (best-effort) ───────────────────────────────────────────────
|
|
41
40
|
|
|
42
41
|
async function notifySlack(text: string): Promise<void> {
|
|
42
|
+
const channel = process.env.SLACK_ALERTS_CHANNEL;
|
|
43
|
+
if (!channel) {
|
|
44
|
+
console.warn("[Jira webhook keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert");
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
|
|
43
48
|
try {
|
|
44
49
|
const { getSlackApp } = await import("../slack/app");
|
|
45
50
|
const app = getSlackApp();
|
|
@@ -48,13 +53,21 @@ async function notifySlack(text: string): Promise<void> {
|
|
|
48
53
|
return;
|
|
49
54
|
}
|
|
50
55
|
await app.client.chat.postMessage({
|
|
51
|
-
channel
|
|
56
|
+
channel,
|
|
52
57
|
text,
|
|
53
58
|
});
|
|
54
|
-
console.log(
|
|
59
|
+
console.log(`[Jira webhook keepalive] Slack notification sent to ${channel}`);
|
|
55
60
|
} catch (slackErr) {
|
|
61
|
+
const code =
|
|
62
|
+
typeof slackErr === "object" && slackErr !== null && "code" in slackErr
|
|
63
|
+
? ` code=${String(slackErr.code)}`
|
|
64
|
+
: "";
|
|
65
|
+
const data =
|
|
66
|
+
typeof slackErr === "object" && slackErr !== null && "data" in slackErr
|
|
67
|
+
? ` data=${JSON.stringify(slackErr.data)}`
|
|
68
|
+
: "";
|
|
56
69
|
console.error(
|
|
57
|
-
|
|
70
|
+
`[Jira webhook keepalive] Failed to send Slack notification to ${channel}${code}${data}:`,
|
|
58
71
|
slackErr instanceof Error ? slackErr.message : slackErr,
|
|
59
72
|
);
|
|
60
73
|
}
|
|
@@ -362,3 +375,9 @@ export function stopJiraWebhookKeepalive(): void {
|
|
|
362
375
|
console.log("[Jira webhook keepalive] Stopped");
|
|
363
376
|
}
|
|
364
377
|
}
|
|
378
|
+
|
|
379
|
+
// ─── Test helpers (exported for unit tests only) ─────────────────────────────
|
|
380
|
+
|
|
381
|
+
export const _test = {
|
|
382
|
+
notifySlack,
|
|
383
|
+
};
|
package/src/oauth/keepalive.ts
CHANGED
|
@@ -1,42 +1,46 @@
|
|
|
1
1
|
import { ensureTokenOrThrow } from "./ensure-token";
|
|
2
2
|
|
|
3
|
-
//
|
|
4
|
-
//
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
// linear-outbound (e.g. agents using the read-only db-query MCP) sees a
|
|
9
|
-
// not-yet-expired token. The 65-min buffer is wider than the access-token
|
|
10
|
-
// lifetime, so isTokenExpiringSoon always returns true and every tick rotates.
|
|
11
|
-
//
|
|
12
|
-
// Touching the row this often also serves the original "keep the refresh
|
|
13
|
-
// token alive" goal — Atlassian expires inactive refresh tokens after 90 days,
|
|
14
|
-
// and Linear's behavior is similar; refreshing every 50 min trivially keeps
|
|
15
|
-
// both providers active.
|
|
16
|
-
const KEEPALIVE_INTERVAL_MS = 50 * 60 * 1000;
|
|
17
|
-
const KEEPALIVE_BUFFER_MS = 65 * 60 * 1000;
|
|
18
|
-
const SLACK_ALERTS_CHANNEL = process.env.SLACK_ALERTS_CHANNEL || "C08JCRURPBV";
|
|
3
|
+
// Keep refresh tokens warm without constantly rotating strict-rotation
|
|
4
|
+
// providers. Reactive callers still refresh access tokens before API use.
|
|
5
|
+
const KEEPALIVE_INTERVAL_MS = 12 * 60 * 60 * 1000;
|
|
6
|
+
const KEEPALIVE_BUFFER_MS = 10 * 60 * 1000;
|
|
7
|
+
const STARTUP_KEEPALIVE_DELAY_MS = 10_000;
|
|
19
8
|
|
|
20
9
|
const KEEPALIVE_PROVIDERS = ["linear", "jira"] as const;
|
|
21
10
|
|
|
22
11
|
let keepaliveInterval: ReturnType<typeof setInterval> | null = null;
|
|
12
|
+
let startupKeepaliveTimeout: ReturnType<typeof setTimeout> | null = null;
|
|
13
|
+
let inflightKeepalive: Promise<void> | null = null;
|
|
14
|
+
|
|
15
|
+
function scheduleKeepaliveRun(trigger: "startup" | "interval" | "manual"): Promise<void> {
|
|
16
|
+
if (inflightKeepalive) {
|
|
17
|
+
console.log(`[OAuth Keepalive] ${trigger} tick skipped; previous run still in flight`);
|
|
18
|
+
return inflightKeepalive;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
inflightKeepalive = runKeepalive(trigger).finally(() => {
|
|
22
|
+
inflightKeepalive = null;
|
|
23
|
+
});
|
|
24
|
+
return inflightKeepalive;
|
|
25
|
+
}
|
|
23
26
|
|
|
24
27
|
/**
|
|
25
28
|
* Proactively refresh OAuth tokens on a schedule.
|
|
26
29
|
*
|
|
27
30
|
* Two purposes, both served by the same tick:
|
|
28
31
|
*
|
|
29
|
-
* 1.
|
|
30
|
-
* `oauth_tokens.accessToken` directly (db-query MCP, future MCP servers,
|
|
31
|
-
* `tracker-status`) needs a not-yet-expired value. The 50-min cadence
|
|
32
|
-
* keeps the row ahead of the 1h access-token lifetime.
|
|
33
|
-
* 2. Refresh-token liveness. Atlassian rotates refresh tokens and expires
|
|
32
|
+
* 1. Refresh-token liveness. Atlassian rotates refresh tokens and expires
|
|
34
33
|
* them after ~90 days of inactivity, so silent gaps in usage would kill
|
|
35
|
-
* the integration.
|
|
36
|
-
*
|
|
37
|
-
*
|
|
34
|
+
* the integration. The 12h cadence keeps the refresh token active without
|
|
35
|
+
* rotating it dozens of times per day.
|
|
36
|
+
* 2. Loud failure on boot and during scheduled checks. A dead token surfaces
|
|
37
|
+
* as structured logs plus a Slack alert instead of silently retrying.
|
|
38
|
+
*
|
|
39
|
+
* Access-token freshness is handled reactively by ensureToken callers before
|
|
40
|
+
* Jira/Linear API use.
|
|
38
41
|
*/
|
|
39
|
-
async function runKeepalive(): Promise<void> {
|
|
42
|
+
async function runKeepalive(trigger: "startup" | "interval" | "manual" = "manual"): Promise<void> {
|
|
43
|
+
console.log(`[OAuth Keepalive] Running ${trigger} token refresh check`);
|
|
40
44
|
for (const provider of KEEPALIVE_PROVIDERS) {
|
|
41
45
|
console.log(`[OAuth Keepalive] Running scheduled token refresh for ${provider}...`);
|
|
42
46
|
try {
|
|
@@ -53,6 +57,12 @@ async function runKeepalive(): Promise<void> {
|
|
|
53
57
|
}
|
|
54
58
|
|
|
55
59
|
async function notifySlack(text: string): Promise<void> {
|
|
60
|
+
const channel = process.env.SLACK_ALERTS_CHANNEL;
|
|
61
|
+
if (!channel) {
|
|
62
|
+
console.warn("[OAuth Keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert");
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
56
66
|
try {
|
|
57
67
|
const { getSlackApp } = await import("../slack/app");
|
|
58
68
|
const app = getSlackApp();
|
|
@@ -61,13 +71,21 @@ async function notifySlack(text: string): Promise<void> {
|
|
|
61
71
|
return;
|
|
62
72
|
}
|
|
63
73
|
await app.client.chat.postMessage({
|
|
64
|
-
channel
|
|
74
|
+
channel,
|
|
65
75
|
text,
|
|
66
76
|
});
|
|
67
|
-
console.log(
|
|
77
|
+
console.log(`[OAuth Keepalive] Slack notification sent to ${channel}`);
|
|
68
78
|
} catch (slackErr) {
|
|
79
|
+
const code =
|
|
80
|
+
typeof slackErr === "object" && slackErr !== null && "code" in slackErr
|
|
81
|
+
? ` code=${String(slackErr.code)}`
|
|
82
|
+
: "";
|
|
83
|
+
const data =
|
|
84
|
+
typeof slackErr === "object" && slackErr !== null && "data" in slackErr
|
|
85
|
+
? ` data=${JSON.stringify(slackErr.data)}`
|
|
86
|
+
: "";
|
|
69
87
|
console.error(
|
|
70
|
-
|
|
88
|
+
`[OAuth Keepalive] Failed to send Slack notification to ${channel}${code}${data}:`,
|
|
71
89
|
slackErr instanceof Error ? slackErr.message : slackErr,
|
|
72
90
|
);
|
|
73
91
|
}
|
|
@@ -87,21 +105,45 @@ export function startOAuthKeepalive(): void {
|
|
|
87
105
|
`[OAuth Keepalive] Starting (interval ${Math.round(KEEPALIVE_INTERVAL_MS / 60_000)}min, buffer ${Math.round(KEEPALIVE_BUFFER_MS / 60_000)}min)`,
|
|
88
106
|
);
|
|
89
107
|
|
|
90
|
-
// Run once after a short delay (let server finish startup)
|
|
91
|
-
setTimeout(() =>
|
|
108
|
+
// Run once after a short delay (let server finish startup).
|
|
109
|
+
startupKeepaliveTimeout = setTimeout(() => {
|
|
110
|
+
startupKeepaliveTimeout = null;
|
|
111
|
+
scheduleKeepaliveRun("startup");
|
|
112
|
+
}, STARTUP_KEEPALIVE_DELAY_MS);
|
|
92
113
|
|
|
93
114
|
keepaliveInterval = setInterval(() => {
|
|
94
|
-
|
|
115
|
+
scheduleKeepaliveRun("interval");
|
|
95
116
|
}, KEEPALIVE_INTERVAL_MS);
|
|
96
117
|
}
|
|
97
118
|
|
|
98
119
|
/**
|
|
99
|
-
* Stop the OAuth keepalive timer.
|
|
120
|
+
* Stop the OAuth keepalive timer and wait for any in-flight refresh to persist.
|
|
100
121
|
*/
|
|
101
|
-
export function stopOAuthKeepalive(): void {
|
|
122
|
+
export async function stopOAuthKeepalive(): Promise<void> {
|
|
123
|
+
if (startupKeepaliveTimeout) {
|
|
124
|
+
clearTimeout(startupKeepaliveTimeout);
|
|
125
|
+
startupKeepaliveTimeout = null;
|
|
126
|
+
}
|
|
127
|
+
|
|
102
128
|
if (keepaliveInterval) {
|
|
103
129
|
clearInterval(keepaliveInterval);
|
|
104
130
|
keepaliveInterval = null;
|
|
105
131
|
console.log("[OAuth Keepalive] Stopped");
|
|
106
132
|
}
|
|
133
|
+
|
|
134
|
+
if (inflightKeepalive) {
|
|
135
|
+
console.log("[OAuth Keepalive] Waiting for in-flight token refresh before shutdown");
|
|
136
|
+
await inflightKeepalive;
|
|
137
|
+
}
|
|
107
138
|
}
|
|
139
|
+
|
|
140
|
+
// ─── Test helpers (exported for unit tests only) ─────────────────────────────
|
|
141
|
+
|
|
142
|
+
export const _test = {
|
|
143
|
+
KEEPALIVE_INTERVAL_MS,
|
|
144
|
+
KEEPALIVE_BUFFER_MS,
|
|
145
|
+
STARTUP_KEEPALIVE_DELAY_MS,
|
|
146
|
+
notifySlack,
|
|
147
|
+
runKeepalive: scheduleKeepaliveRun,
|
|
148
|
+
getInflightKeepalive: () => inflightKeepalive,
|
|
149
|
+
};
|
|
@@ -1,10 +1,19 @@
|
|
|
1
|
-
import { afterAll, beforeAll, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
1
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
|
|
2
2
|
import { unlink } from "node:fs/promises";
|
|
3
3
|
import { closeDb, getDb, initDb } from "../be/db";
|
|
4
4
|
import { upsertOAuthApp } from "../be/db-queries/oauth";
|
|
5
5
|
import { getJiraMetadata, updateJiraMetadata } from "../jira/metadata";
|
|
6
6
|
|
|
7
7
|
const TEST_DB_PATH = "./test-jira-webhook-lifecycle.sqlite";
|
|
8
|
+
const originalSlackAlertsChannel = process.env.SLACK_ALERTS_CHANNEL;
|
|
9
|
+
|
|
10
|
+
function restoreSlackAlertsChannel(): void {
|
|
11
|
+
if (originalSlackAlertsChannel === undefined) {
|
|
12
|
+
delete process.env.SLACK_ALERTS_CHANNEL;
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
process.env.SLACK_ALERTS_CHANNEL = originalSlackAlertsChannel;
|
|
16
|
+
}
|
|
8
17
|
|
|
9
18
|
// Mock the Jira fetch client. Each test installs its own per-call response.
|
|
10
19
|
const jiraFetchMock = mock(
|
|
@@ -38,22 +47,40 @@ beforeAll(() => {
|
|
|
38
47
|
afterAll(async () => {
|
|
39
48
|
delete process.env.JIRA_WEBHOOK_TOKEN;
|
|
40
49
|
delete process.env.MCP_BASE_URL;
|
|
50
|
+
restoreSlackAlertsChannel();
|
|
41
51
|
closeDb();
|
|
42
52
|
await unlink(TEST_DB_PATH).catch(() => {});
|
|
43
53
|
await unlink(`${TEST_DB_PATH}-wal`).catch(() => {});
|
|
44
54
|
await unlink(`${TEST_DB_PATH}-shm`).catch(() => {});
|
|
45
55
|
});
|
|
46
56
|
|
|
47
|
-
const { refreshJiraWebhooks, registerJiraWebhook } = await import(
|
|
57
|
+
const { _test, refreshJiraWebhooks, registerJiraWebhook } = await import(
|
|
58
|
+
"../jira/webhook-lifecycle"
|
|
59
|
+
);
|
|
48
60
|
|
|
49
61
|
beforeEach(() => {
|
|
50
62
|
jiraFetchMock.mockClear();
|
|
63
|
+
restoreSlackAlertsChannel();
|
|
51
64
|
// Reset the webhookIds list each test (and clear metadata writebacks).
|
|
52
65
|
getDb()
|
|
53
66
|
.query("UPDATE oauth_apps SET metadata = ? WHERE provider = 'jira'")
|
|
54
67
|
.run(JSON.stringify({ cloudId: "cloud-1", siteUrl: "https://example.atlassian.net" }));
|
|
55
68
|
});
|
|
56
69
|
|
|
70
|
+
describe("Jira webhook Slack alerts", () => {
|
|
71
|
+
test("skips Slack notification when alerts channel env is unset", async () => {
|
|
72
|
+
delete process.env.SLACK_ALERTS_CHANNEL;
|
|
73
|
+
const warn = spyOn(console, "warn").mockImplementation(() => {});
|
|
74
|
+
|
|
75
|
+
await expect(_test.notifySlack("test alert")).resolves.toBeUndefined();
|
|
76
|
+
|
|
77
|
+
expect(warn).toHaveBeenCalledWith(
|
|
78
|
+
"[Jira webhook keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert",
|
|
79
|
+
);
|
|
80
|
+
warn.mockRestore();
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
57
84
|
describe("registerJiraWebhook", () => {
|
|
58
85
|
test("posts the right body shape and persists webhookId into metadata", async () => {
|
|
59
86
|
jiraFetchMock.mockImplementationOnce(
|
|
@@ -68,6 +68,13 @@ function fakeReqRes(rawBody: string, headers: Record<string, string>) {
|
|
|
68
68
|
return { req, res, captured };
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
+
async function waitFor(predicate: () => boolean): Promise<void> {
|
|
72
|
+
for (let i = 0; i < 20; i++) {
|
|
73
|
+
if (predicate()) return;
|
|
74
|
+
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
71
78
|
const KAPSO_PATH = ["api", "integrations", "kapso", "webhook"];
|
|
72
79
|
|
|
73
80
|
beforeAll(() => {
|
|
@@ -226,9 +233,8 @@ describe("handleWebhooks — Kapso HMAC gate", () => {
|
|
|
226
233
|
return new Response(JSON.stringify({ success: true }), { status: 200 });
|
|
227
234
|
}) as typeof fetch;
|
|
228
235
|
|
|
229
|
-
const
|
|
230
|
-
|
|
231
|
-
);
|
|
236
|
+
const messageId = `wamid.HTTP_OK_${crypto.randomUUID()}`;
|
|
237
|
+
const rawBody = JSON.stringify(makePayload({ phoneNumberId: "pn-http", messageId }));
|
|
232
238
|
const { req, res, captured } = fakeReqRes(rawBody, {
|
|
233
239
|
"x-webhook-signature": sign(HMAC_SECRET, rawBody),
|
|
234
240
|
});
|
|
@@ -236,22 +242,37 @@ describe("handleWebhooks — Kapso HMAC gate", () => {
|
|
|
236
242
|
expect(handled).toBe(true);
|
|
237
243
|
expect(captured.status).toBe(200);
|
|
238
244
|
expect(JSON.parse(captured.body)).toMatchObject({ received: true, routing: "task" });
|
|
239
|
-
|
|
245
|
+
await waitFor(
|
|
246
|
+
() =>
|
|
247
|
+
calls.some((call) => call.body.message_id === messageId) &&
|
|
248
|
+
calls.some(
|
|
249
|
+
(call) =>
|
|
250
|
+
(call.body.reaction as { message_id?: string } | undefined)?.message_id === messageId,
|
|
251
|
+
),
|
|
252
|
+
);
|
|
253
|
+
const messageCalls = calls.filter(
|
|
254
|
+
(call) =>
|
|
255
|
+
call.body.message_id === messageId ||
|
|
256
|
+
(call.body.reaction as { message_id?: string } | undefined)?.message_id === messageId,
|
|
257
|
+
);
|
|
258
|
+
expect(messageCalls).toHaveLength(2);
|
|
240
259
|
expect(
|
|
241
|
-
|
|
260
|
+
messageCalls.every(
|
|
261
|
+
(call) => call.url === "https://kapso.test/meta/whatsapp/v24.0/pn-http/messages",
|
|
262
|
+
),
|
|
242
263
|
).toBe(true);
|
|
243
|
-
expect(
|
|
264
|
+
expect(messageCalls.map((call) => call.body)).toContainEqual({
|
|
244
265
|
messaging_product: "whatsapp",
|
|
245
266
|
status: "read",
|
|
246
|
-
message_id:
|
|
267
|
+
message_id: messageId,
|
|
247
268
|
typing_indicator: { type: "text" },
|
|
248
269
|
});
|
|
249
|
-
expect(
|
|
270
|
+
expect(messageCalls.map((call) => call.body)).toContainEqual({
|
|
250
271
|
messaging_product: "whatsapp",
|
|
251
272
|
recipient_type: "individual",
|
|
252
273
|
to: "34679077777",
|
|
253
274
|
type: "reaction",
|
|
254
|
-
reaction: { message_id:
|
|
275
|
+
reaction: { message_id: messageId, emoji: "👀" },
|
|
255
276
|
});
|
|
256
277
|
});
|
|
257
278
|
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
|
|
2
|
+
import { unlink } from "node:fs/promises";
|
|
3
|
+
import { closeDb, initDb } from "../be/db";
|
|
4
|
+
import {
|
|
5
|
+
deleteOAuthTokens,
|
|
6
|
+
getOAuthTokens,
|
|
7
|
+
storeOAuthTokens,
|
|
8
|
+
upsertOAuthApp,
|
|
9
|
+
} from "../be/db-queries/oauth";
|
|
10
|
+
import { _test, stopOAuthKeepalive } from "../oauth/keepalive";
|
|
11
|
+
|
|
12
|
+
const TEST_DB_PATH = "./test-oauth-keepalive.sqlite";
|
|
13
|
+
|
|
14
|
+
const originalSlackAlertsChannel = process.env.SLACK_ALERTS_CHANNEL;
|
|
15
|
+
function restoreSlackAlertsChannel(): void {
|
|
16
|
+
if (originalSlackAlertsChannel === undefined) {
|
|
17
|
+
delete process.env.SLACK_ALERTS_CHANNEL;
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
process.env.SLACK_ALERTS_CHANNEL = originalSlackAlertsChannel;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const testApp = {
|
|
24
|
+
clientId: "test-client-id",
|
|
25
|
+
clientSecret: "test-client-secret",
|
|
26
|
+
authorizeUrl: "https://example.com/oauth/authorize",
|
|
27
|
+
tokenUrl: "https://example.com/oauth/token",
|
|
28
|
+
redirectUri: "http://localhost:3013/callback",
|
|
29
|
+
scopes: "read,write",
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const originalFetch = globalThis.fetch;
|
|
33
|
+
|
|
34
|
+
beforeAll(() => {
|
|
35
|
+
initDb(TEST_DB_PATH);
|
|
36
|
+
upsertOAuthApp("linear", testApp);
|
|
37
|
+
upsertOAuthApp("jira", {
|
|
38
|
+
...testApp,
|
|
39
|
+
tokenUrl: "https://example.com/jira/oauth/token",
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
beforeEach(async () => {
|
|
44
|
+
await stopOAuthKeepalive();
|
|
45
|
+
deleteOAuthTokens("linear");
|
|
46
|
+
deleteOAuthTokens("jira");
|
|
47
|
+
globalThis.fetch = originalFetch;
|
|
48
|
+
restoreSlackAlertsChannel();
|
|
49
|
+
mock.restore();
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
afterAll(async () => {
|
|
53
|
+
await stopOAuthKeepalive();
|
|
54
|
+
globalThis.fetch = originalFetch;
|
|
55
|
+
restoreSlackAlertsChannel();
|
|
56
|
+
closeDb();
|
|
57
|
+
await unlink(TEST_DB_PATH).catch(() => {});
|
|
58
|
+
await unlink(`${TEST_DB_PATH}-wal`).catch(() => {});
|
|
59
|
+
await unlink(`${TEST_DB_PATH}-shm`).catch(() => {});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe("OAuth keepalive", () => {
|
|
63
|
+
test("uses a 12h cadence with a 10m refresh buffer", () => {
|
|
64
|
+
expect(_test.KEEPALIVE_INTERVAL_MS).toBe(12 * 60 * 60 * 1000);
|
|
65
|
+
expect(_test.KEEPALIVE_BUFFER_MS).toBe(10 * 60 * 1000);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test("skips Slack notification when alerts channel env is unset", async () => {
|
|
69
|
+
delete process.env.SLACK_ALERTS_CHANNEL;
|
|
70
|
+
const warn = spyOn(console, "warn").mockImplementation(() => {});
|
|
71
|
+
|
|
72
|
+
await expect(_test.notifySlack("test alert")).resolves.toBeUndefined();
|
|
73
|
+
|
|
74
|
+
expect(warn).toHaveBeenCalledWith(
|
|
75
|
+
"[OAuth Keepalive] SLACK_ALERTS_CHANNEL not set; skipping alert",
|
|
76
|
+
);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("stopOAuthKeepalive waits for in-flight Jira refresh persistence", async () => {
|
|
80
|
+
storeOAuthTokens("linear", {
|
|
81
|
+
accessToken: "linear-access",
|
|
82
|
+
refreshToken: "linear-refresh",
|
|
83
|
+
expiresAt: new Date(Date.now() + 60 * 60 * 1000).toISOString(),
|
|
84
|
+
});
|
|
85
|
+
storeOAuthTokens("jira", {
|
|
86
|
+
accessToken: "old-jira-access",
|
|
87
|
+
refreshToken: "old-jira-refresh",
|
|
88
|
+
expiresAt: new Date(Date.now() + 60 * 1000).toISOString(),
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
let releaseTokenResponse!: () => void;
|
|
92
|
+
const tokenResponseReady = new Promise<void>((resolve) => {
|
|
93
|
+
releaseTokenResponse = resolve;
|
|
94
|
+
});
|
|
95
|
+
let fetchStarted!: () => void;
|
|
96
|
+
const fetchStartedPromise = new Promise<void>((resolve) => {
|
|
97
|
+
fetchStarted = resolve;
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
globalThis.fetch = mock(async () => {
|
|
101
|
+
fetchStarted();
|
|
102
|
+
await tokenResponseReady;
|
|
103
|
+
return new Response(
|
|
104
|
+
JSON.stringify({
|
|
105
|
+
access_token: "new-jira-access",
|
|
106
|
+
token_type: "Bearer",
|
|
107
|
+
expires_in: 3600,
|
|
108
|
+
refresh_token: "new-jira-refresh",
|
|
109
|
+
}),
|
|
110
|
+
{ status: 200, headers: { "Content-Type": "application/json" } },
|
|
111
|
+
);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
const keepaliveRun = _test.runKeepalive("manual");
|
|
115
|
+
await fetchStartedPromise;
|
|
116
|
+
|
|
117
|
+
let stopResolved = false;
|
|
118
|
+
const stopPromise = stopOAuthKeepalive().then(() => {
|
|
119
|
+
stopResolved = true;
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
await Promise.resolve();
|
|
123
|
+
expect(stopResolved).toBe(false);
|
|
124
|
+
expect(getOAuthTokens("jira")?.refreshToken).toBe("old-jira-refresh");
|
|
125
|
+
|
|
126
|
+
releaseTokenResponse();
|
|
127
|
+
await stopPromise;
|
|
128
|
+
await keepaliveRun;
|
|
129
|
+
|
|
130
|
+
expect(stopResolved).toBe(true);
|
|
131
|
+
const tokens = getOAuthTokens("jira");
|
|
132
|
+
expect(tokens?.accessToken).toBe("new-jira-access");
|
|
133
|
+
expect(tokens?.refreshToken).toBe("new-jira-refresh");
|
|
134
|
+
});
|
|
135
|
+
});
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test";
|
|
2
|
+
import { unlink } from "node:fs/promises";
|
|
3
|
+
import {
|
|
4
|
+
cancelTask,
|
|
5
|
+
closeDb,
|
|
6
|
+
completeTask,
|
|
7
|
+
createAgent,
|
|
8
|
+
createTaskExtended,
|
|
9
|
+
failTask,
|
|
10
|
+
getDb,
|
|
11
|
+
initDb,
|
|
12
|
+
} from "../be/db";
|
|
13
|
+
import { telemetry } from "../telemetry";
|
|
14
|
+
|
|
15
|
+
const TEST_DB_PATH = "./test-task-lifecycle-telemetry.sqlite";
|
|
16
|
+
const WORKER_ID = "bbbb0000-0000-4000-8000-000000000002";
|
|
17
|
+
|
|
18
|
+
async function flushMicrotasks(): Promise<void> {
|
|
19
|
+
await Promise.resolve();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async function removeTestDb(): Promise<void> {
|
|
23
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
24
|
+
try {
|
|
25
|
+
await unlink(TEST_DB_PATH + suffix);
|
|
26
|
+
} catch {
|
|
27
|
+
// File does not exist.
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
describe("task lifecycle telemetry", () => {
|
|
33
|
+
let taskEventSpy: ReturnType<typeof spyOn>;
|
|
34
|
+
let calls: Array<{ event: string; props: Parameters<typeof telemetry.taskEvent>[1] }>;
|
|
35
|
+
|
|
36
|
+
beforeEach(async () => {
|
|
37
|
+
closeDb();
|
|
38
|
+
await removeTestDb();
|
|
39
|
+
initDb(TEST_DB_PATH);
|
|
40
|
+
createAgent({ id: WORKER_ID, name: "Telemetry Worker", isLead: false, status: "idle" });
|
|
41
|
+
|
|
42
|
+
calls = [];
|
|
43
|
+
taskEventSpy = spyOn(telemetry, "taskEvent").mockImplementation((event, props) => {
|
|
44
|
+
calls.push({ event, props });
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
afterEach(async () => {
|
|
49
|
+
taskEventSpy.mockRestore();
|
|
50
|
+
closeDb();
|
|
51
|
+
await removeTestDb();
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test("emits task.created from createTaskExtended after the task is committed", async () => {
|
|
55
|
+
const task = createTaskExtended("create telemetry", {
|
|
56
|
+
agentId: WORKER_ID,
|
|
57
|
+
source: "mcp",
|
|
58
|
+
tags: ["telemetry"],
|
|
59
|
+
priority: 60,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
expect(calls).toHaveLength(0);
|
|
63
|
+
|
|
64
|
+
await flushMicrotasks();
|
|
65
|
+
|
|
66
|
+
expect(calls).toEqual([
|
|
67
|
+
{
|
|
68
|
+
event: "created",
|
|
69
|
+
props: {
|
|
70
|
+
taskId: task.id,
|
|
71
|
+
source: "mcp",
|
|
72
|
+
tags: ["telemetry"],
|
|
73
|
+
hasParent: false,
|
|
74
|
+
priority: 60,
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
]);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test("does not emit task.created when an enclosing transaction rolls back", async () => {
|
|
81
|
+
const txn = getDb().transaction(() => {
|
|
82
|
+
createTaskExtended("rolled back telemetry", {
|
|
83
|
+
agentId: WORKER_ID,
|
|
84
|
+
source: "mcp",
|
|
85
|
+
});
|
|
86
|
+
throw new Error("rollback");
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
expect(() => txn()).toThrow("rollback");
|
|
90
|
+
|
|
91
|
+
await flushMicrotasks();
|
|
92
|
+
|
|
93
|
+
expect(calls).toHaveLength(0);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test("emits terminal lifecycle events from universal status helpers", async () => {
|
|
97
|
+
const completedTask = createTaskExtended("complete telemetry", {
|
|
98
|
+
agentId: WORKER_ID,
|
|
99
|
+
source: "mcp",
|
|
100
|
+
});
|
|
101
|
+
await flushMicrotasks();
|
|
102
|
+
calls = [];
|
|
103
|
+
|
|
104
|
+
completeTask(completedTask.id, "done");
|
|
105
|
+
await flushMicrotasks();
|
|
106
|
+
|
|
107
|
+
expect(calls).toHaveLength(1);
|
|
108
|
+
expect(calls[0]).toMatchObject({
|
|
109
|
+
event: "completed",
|
|
110
|
+
props: { taskId: completedTask.id, agentId: WORKER_ID },
|
|
111
|
+
});
|
|
112
|
+
expect(typeof calls[0]?.props.durationMs).toBe("number");
|
|
113
|
+
|
|
114
|
+
const failedTask = createTaskExtended("fail telemetry", {
|
|
115
|
+
agentId: WORKER_ID,
|
|
116
|
+
source: "mcp",
|
|
117
|
+
});
|
|
118
|
+
await flushMicrotasks();
|
|
119
|
+
calls = [];
|
|
120
|
+
|
|
121
|
+
failTask(failedTask.id, "nope");
|
|
122
|
+
await flushMicrotasks();
|
|
123
|
+
|
|
124
|
+
expect(calls).toHaveLength(1);
|
|
125
|
+
expect(calls[0]).toMatchObject({
|
|
126
|
+
event: "failed",
|
|
127
|
+
props: { taskId: failedTask.id, agentId: WORKER_ID },
|
|
128
|
+
});
|
|
129
|
+
expect(typeof calls[0]?.props.durationMs).toBe("number");
|
|
130
|
+
|
|
131
|
+
const cancelledTask = createTaskExtended("cancel telemetry", {
|
|
132
|
+
agentId: WORKER_ID,
|
|
133
|
+
source: "api",
|
|
134
|
+
});
|
|
135
|
+
await flushMicrotasks();
|
|
136
|
+
calls = [];
|
|
137
|
+
|
|
138
|
+
cancelTask(cancelledTask.id, "not needed");
|
|
139
|
+
await flushMicrotasks();
|
|
140
|
+
|
|
141
|
+
expect(calls).toHaveLength(1);
|
|
142
|
+
expect(calls[0]).toMatchObject({
|
|
143
|
+
event: "cancelled",
|
|
144
|
+
props: {
|
|
145
|
+
taskId: cancelledTask.id,
|
|
146
|
+
source: "api",
|
|
147
|
+
agentId: WORKER_ID,
|
|
148
|
+
previousStatus: "pending",
|
|
149
|
+
},
|
|
150
|
+
});
|
|
151
|
+
expect(typeof calls[0]?.props.durationMs).toBe("number");
|
|
152
|
+
});
|
|
153
|
+
});
|
|
@@ -21,7 +21,13 @@ import {
|
|
|
21
21
|
type ExecutorResult,
|
|
22
22
|
} from "../workflows/executors/base";
|
|
23
23
|
import { ExecutorRegistry } from "../workflows/executors/registry";
|
|
24
|
-
import {
|
|
24
|
+
import {
|
|
25
|
+
SWARM_SCRIPT_DEFAULT_TIMEOUT_MS,
|
|
26
|
+
SWARM_SCRIPT_MAX_TIMEOUT_MS,
|
|
27
|
+
SWARM_SCRIPT_MIN_TIMEOUT_MS,
|
|
28
|
+
SwarmScriptConfigSchema,
|
|
29
|
+
SwarmScriptExecutor,
|
|
30
|
+
} from "../workflows/executors/swarm-script";
|
|
25
31
|
import { interpolate } from "../workflows/template";
|
|
26
32
|
|
|
27
33
|
const TEST_DB_PATH = "./test-workflow-swarm-script.sqlite";
|
|
@@ -141,6 +147,38 @@ beforeEach(() => {
|
|
|
141
147
|
});
|
|
142
148
|
|
|
143
149
|
describe("SwarmScriptExecutor", () => {
|
|
150
|
+
test("config schema validates timeoutMs bounds and applies the runtime default", () => {
|
|
151
|
+
expect(SwarmScriptConfigSchema.parse({ scriptName: "quick" }).timeoutMs).toBe(
|
|
152
|
+
SWARM_SCRIPT_DEFAULT_TIMEOUT_MS,
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
expect(
|
|
156
|
+
SwarmScriptConfigSchema.safeParse({
|
|
157
|
+
scriptName: "quick",
|
|
158
|
+
timeoutMs: SWARM_SCRIPT_MIN_TIMEOUT_MS - 1,
|
|
159
|
+
}).success,
|
|
160
|
+
).toBe(false);
|
|
161
|
+
expect(
|
|
162
|
+
SwarmScriptConfigSchema.safeParse({
|
|
163
|
+
scriptName: "quick",
|
|
164
|
+
timeoutMs: SWARM_SCRIPT_MAX_TIMEOUT_MS + 1,
|
|
165
|
+
}).success,
|
|
166
|
+
).toBe(false);
|
|
167
|
+
|
|
168
|
+
expect(
|
|
169
|
+
SwarmScriptConfigSchema.parse({
|
|
170
|
+
scriptName: "quick",
|
|
171
|
+
timeoutMs: SWARM_SCRIPT_MIN_TIMEOUT_MS,
|
|
172
|
+
}).timeoutMs,
|
|
173
|
+
).toBe(SWARM_SCRIPT_MIN_TIMEOUT_MS);
|
|
174
|
+
expect(
|
|
175
|
+
SwarmScriptConfigSchema.parse({
|
|
176
|
+
scriptName: "quick",
|
|
177
|
+
timeoutMs: SWARM_SCRIPT_MAX_TIMEOUT_MS,
|
|
178
|
+
}).timeoutMs,
|
|
179
|
+
).toBe(SWARM_SCRIPT_MAX_TIMEOUT_MS);
|
|
180
|
+
});
|
|
181
|
+
|
|
144
182
|
test("A workflow with one swarm-script node resolves by name + runs + returns result", async () => {
|
|
145
183
|
await saveScript(
|
|
146
184
|
"add-one",
|
|
@@ -250,6 +288,49 @@ describe("SwarmScriptExecutor", () => {
|
|
|
250
288
|
expect(success.status).toBe("success");
|
|
251
289
|
});
|
|
252
290
|
|
|
291
|
+
test("timeoutMs not set — script completes with the default 30s window", async () => {
|
|
292
|
+
await saveScript("quick", `export default async () => ({ done: true });`);
|
|
293
|
+
const executor = new SwarmScriptExecutor(deps);
|
|
294
|
+
const wf = makeWorkflow({ nodes: [] });
|
|
295
|
+
const result = await executor.run({
|
|
296
|
+
config: { scriptName: "quick" },
|
|
297
|
+
context: {},
|
|
298
|
+
meta: {
|
|
299
|
+
runId: crypto.randomUUID(),
|
|
300
|
+
stepId: crypto.randomUUID(),
|
|
301
|
+
nodeId: "script",
|
|
302
|
+
workflowId: wf.id,
|
|
303
|
+
dryRun: false,
|
|
304
|
+
},
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
expect(result.status).toBe("success");
|
|
308
|
+
expect(result.output?.result).toEqual({ done: true });
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
test("timeoutMs set — a long-running script is killed before it finishes", async () => {
|
|
312
|
+
await saveScript(
|
|
313
|
+
"sleeper",
|
|
314
|
+
`export default async () => { await new Promise(r => setTimeout(r, 3000)); return { done: true }; };`,
|
|
315
|
+
);
|
|
316
|
+
const executor = new SwarmScriptExecutor(deps);
|
|
317
|
+
const wf = makeWorkflow({ nodes: [] });
|
|
318
|
+
const result = await executor.run({
|
|
319
|
+
config: { scriptName: "sleeper", timeoutMs: 300 },
|
|
320
|
+
context: {},
|
|
321
|
+
meta: {
|
|
322
|
+
runId: crypto.randomUUID(),
|
|
323
|
+
stepId: crypto.randomUUID(),
|
|
324
|
+
nodeId: "script",
|
|
325
|
+
workflowId: wf.id,
|
|
326
|
+
dryRun: false,
|
|
327
|
+
},
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
expect(result.status).toBe("failed");
|
|
331
|
+
expect(result.output?.exitCode).not.toBe(0);
|
|
332
|
+
});
|
|
333
|
+
|
|
253
334
|
test("Failure in the script surfaces as a workflow-node failure", async () => {
|
|
254
335
|
await saveScript("throws", `export default async () => { throw new Error("boom"); };`);
|
|
255
336
|
const executor = new SwarmScriptExecutor(deps);
|
|
@@ -1,15 +1,26 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { getScript, getScriptVersion } from "../../be/scripts/db";
|
|
3
|
+
import { DEFAULT_SCRIPT_RESOURCES } from "../../scripts-runtime/executors/types";
|
|
3
4
|
import { runScript } from "../../scripts-runtime/loader";
|
|
4
5
|
import type { ExecutorMeta } from "../../types";
|
|
5
6
|
import { BaseExecutor, type ExecutorResult } from "./base";
|
|
6
7
|
|
|
8
|
+
export const SWARM_SCRIPT_DEFAULT_TIMEOUT_MS = DEFAULT_SCRIPT_RESOURCES.wallClockMs;
|
|
9
|
+
export const SWARM_SCRIPT_MIN_TIMEOUT_MS = 1_000;
|
|
10
|
+
export const SWARM_SCRIPT_MAX_TIMEOUT_MS = DEFAULT_SCRIPT_RESOURCES.cpuTimeSec * 1_000;
|
|
11
|
+
|
|
7
12
|
export const SwarmScriptConfigSchema = z.object({
|
|
8
13
|
scriptName: z.string().min(1),
|
|
9
14
|
scope: z.enum(["global", "agent"]).optional(),
|
|
10
15
|
pinHash: z.string().min(1).optional(),
|
|
11
16
|
args: z.record(z.string(), z.unknown()).default({}),
|
|
12
17
|
fsMode: z.enum(["none", "workspace-rw"]).default("none"),
|
|
18
|
+
timeoutMs: z
|
|
19
|
+
.number()
|
|
20
|
+
.int()
|
|
21
|
+
.min(SWARM_SCRIPT_MIN_TIMEOUT_MS)
|
|
22
|
+
.max(SWARM_SCRIPT_MAX_TIMEOUT_MS)
|
|
23
|
+
.default(SWARM_SCRIPT_DEFAULT_TIMEOUT_MS),
|
|
13
24
|
});
|
|
14
25
|
|
|
15
26
|
export const SwarmScriptOutputSchema = z.object({
|
|
@@ -61,6 +72,7 @@ export class SwarmScriptExecutor extends BaseExecutor<
|
|
|
61
72
|
args: config.args,
|
|
62
73
|
fsMode: "none",
|
|
63
74
|
agentId: agentId ?? "workflow",
|
|
75
|
+
timeoutMs: config.timeoutMs,
|
|
64
76
|
});
|
|
65
77
|
|
|
66
78
|
const workflowOutput = {
|