@desplega.ai/agent-swarm 1.89.0 → 1.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/openapi.json +1 -1
- package/package.json +2 -2
- package/plugin/skills/composio/SKILL.md +138 -63
- package/plugin/skills/composio-gmail/SKILL.md +83 -0
- package/plugin/skills/composio-google-calendar/SKILL.md +81 -0
- package/plugin/skills/composio-google-docs/SKILL.md +71 -0
- package/src/be/db.ts +28 -0
- package/src/be/modelsdev-cache.json +752 -81
- package/src/heartbeat/heartbeat.ts +54 -7
- package/src/http/tasks.ts +2 -0
- package/src/tasks/worker-follow-up.ts +19 -1
- package/src/tests/heartbeat-supersede-resume.test.ts +91 -1
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
assignUnassignedTaskPending,
|
|
3
|
+
backfillSupersedeTaskResumeTaskId,
|
|
3
4
|
cleanupStaleSessions,
|
|
4
5
|
createTaskExtended,
|
|
5
6
|
deleteActiveSession,
|
|
@@ -25,7 +26,7 @@ import {
|
|
|
25
26
|
updateAgentStatus,
|
|
26
27
|
} from "../be/db";
|
|
27
28
|
import { resolveTemplate } from "../prompts/resolver";
|
|
28
|
-
import { createResumeFollowUp } from "../tasks/worker-follow-up";
|
|
29
|
+
import { createResumeFollowUp, getNextResumeGeneration } from "../tasks/worker-follow-up";
|
|
29
30
|
import type { AgentTask } from "../types";
|
|
30
31
|
import { getExecutorRegistry } from "../workflows";
|
|
31
32
|
import { recoverIncompleteRuns } from "../workflows/recovery";
|
|
@@ -60,6 +61,11 @@ const STALE_CLEANUP_THRESHOLD_MINUTES = Number(process.env.HEARTBEAT_STALE_CLEAN
|
|
|
60
61
|
/** Max pool tasks to auto-assign per sweep */
|
|
61
62
|
const MAX_AUTO_ASSIGN_PER_SWEEP = Number(process.env.HEARTBEAT_MAX_AUTO_ASSIGN) || 5;
|
|
62
63
|
|
|
64
|
+
/** Max crash-recovery resume generations before failing for lead triage */
|
|
65
|
+
export const MAX_RESUME_GENERATIONS = Number(process.env.HEARTBEAT_MAX_RESUME_GENERATIONS) || 3;
|
|
66
|
+
|
|
67
|
+
export const RESUME_BUDGET_EXHAUSTED_REASON = "resume_budget_exhausted";
|
|
68
|
+
|
|
63
69
|
/** Heartbeat checklist interval: how often to check HEARTBEAT.md (default: 30 min) */
|
|
64
70
|
const HEARTBEAT_CHECKLIST_INTERVAL_MS =
|
|
65
71
|
Number(process.env.HEARTBEAT_CHECKLIST_INTERVAL_MS) || 30 * 60 * 1000;
|
|
@@ -98,10 +104,17 @@ export interface HeartbeatFindings {
|
|
|
98
104
|
let heartbeatInterval: ReturnType<typeof setInterval> | null = null;
|
|
99
105
|
let checklistInterval: ReturnType<typeof setInterval> | null = null;
|
|
100
106
|
let isSweeping = false;
|
|
107
|
+
let beforeHeartbeatSupersedeForTests: ((task: AgentTask) => void) | null = null;
|
|
101
108
|
|
|
102
109
|
/** Tasks auto-failed during the reboot sweep, consumed by boot triage */
|
|
103
110
|
let rebootAffectedTasks: Array<{ original: AgentTask; retryTaskId: string | null }> = [];
|
|
104
111
|
|
|
112
|
+
export function setBeforeHeartbeatSupersedeForTests(
|
|
113
|
+
hook: ((task: AgentTask) => void) | null,
|
|
114
|
+
): void {
|
|
115
|
+
beforeHeartbeatSupersedeForTests = hook;
|
|
116
|
+
}
|
|
117
|
+
|
|
105
118
|
// ============================================================================
|
|
106
119
|
// Tier 1: Preflight Gate
|
|
107
120
|
// ============================================================================
|
|
@@ -300,16 +313,40 @@ function remediateCrashedWorkerTask(
|
|
|
300
313
|
return;
|
|
301
314
|
}
|
|
302
315
|
|
|
303
|
-
|
|
316
|
+
const nextResumeGeneration = getNextResumeGeneration(task);
|
|
317
|
+
if (nextResumeGeneration > MAX_RESUME_GENERATIONS) {
|
|
318
|
+
const failed = failTask(task.id, RESUME_BUDGET_EXHAUSTED_REASON);
|
|
319
|
+
if (failed) {
|
|
320
|
+
findings.autoFailedTasks.push({
|
|
321
|
+
taskId: task.id,
|
|
322
|
+
agentId: task.agentId,
|
|
323
|
+
reason: RESUME_BUDGET_EXHAUSTED_REASON,
|
|
324
|
+
});
|
|
325
|
+
if (opts.cleanupActiveSession) deleteActiveSession(task.id);
|
|
326
|
+
console.warn(
|
|
327
|
+
`[Heartbeat] Auto-failed task ${task.id.slice(0, 8)} — ${RESUME_BUDGET_EXHAUSTED_REASON} (${opts.shortLabel})`,
|
|
328
|
+
);
|
|
329
|
+
const remaining = getActiveTaskCount(task.agentId);
|
|
330
|
+
if (remaining === 0) updateAgentStatus(task.agentId, "idle");
|
|
331
|
+
}
|
|
332
|
+
return;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
beforeHeartbeatSupersedeForTests?.(task);
|
|
336
|
+
|
|
304
337
|
const superseded = supersedeTask(task.id, {
|
|
305
338
|
reason: opts.supersedeReason,
|
|
306
339
|
resumeTaskId: null,
|
|
307
340
|
});
|
|
308
|
-
if (!superseded)
|
|
341
|
+
if (!superseded) {
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
309
344
|
|
|
310
345
|
const resume = createResumeFollowUp({ parentId: task.id, reason: "crash_recovery" });
|
|
311
346
|
|
|
312
347
|
if (resume.kind === "created") {
|
|
348
|
+
backfillSupersedeTaskResumeTaskId(task.id, resume.task.id);
|
|
349
|
+
|
|
313
350
|
findings.autoResumedTasks.push({
|
|
314
351
|
taskId: task.id,
|
|
315
352
|
resumeTaskId: resume.task.id,
|
|
@@ -320,10 +357,20 @@ function remediateCrashedWorkerTask(
|
|
|
320
357
|
`[Heartbeat] Auto-superseded task ${task.id.slice(0, 8)} — created resume ${resume.task.id.slice(0, 8)} (${opts.shortLabel})`,
|
|
321
358
|
);
|
|
322
359
|
} else {
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
360
|
+
const reason =
|
|
361
|
+
resume.kind === "skipped"
|
|
362
|
+
? `resume_creation_skipped_${resume.reason}`
|
|
363
|
+
: "resume_creation_skipped_workflow";
|
|
364
|
+
const failed = failTask(task.id, reason);
|
|
365
|
+
if (failed) {
|
|
366
|
+
findings.autoFailedTasks.push({
|
|
367
|
+
taskId: task.id,
|
|
368
|
+
agentId: task.agentId,
|
|
369
|
+
reason,
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
console.warn(
|
|
373
|
+
`[Heartbeat] Task ${task.id.slice(0, 8)} failed because no resume was created (${
|
|
327
374
|
resume.kind === "skipped" ? resume.reason : "workflow-skip"
|
|
328
375
|
})`,
|
|
329
376
|
);
|
package/src/http/tasks.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { IncomingMessage, ServerResponse } from "node:http";
|
|
|
2
2
|
import { ensure } from "@desplega.ai/business-use";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import {
|
|
5
|
+
backfillSupersedeTaskResumeTaskId,
|
|
5
6
|
cancelTask,
|
|
6
7
|
completeTask,
|
|
7
8
|
failTask,
|
|
@@ -905,6 +906,7 @@ export async function handleTasks(
|
|
|
905
906
|
}
|
|
906
907
|
|
|
907
908
|
const resumeTaskId = followUp.task.id;
|
|
909
|
+
backfillSupersedeTaskResumeTaskId(parsed.params.id, resumeTaskId);
|
|
908
910
|
|
|
909
911
|
ensure({
|
|
910
912
|
id: "task.superseded",
|
|
@@ -22,6 +22,20 @@ export const WORKER_LIVENESS_WINDOW_SECONDS = Number(
|
|
|
22
22
|
process.env.WORKER_LIVENESS_WINDOW_SECONDS || "30",
|
|
23
23
|
);
|
|
24
24
|
|
|
25
|
+
export const RESUME_GENERATION_TAG_PREFIX = "resume-generation:";
|
|
26
|
+
|
|
27
|
+
export function getResumeGeneration(task: Pick<AgentTask, "tags">): number {
|
|
28
|
+
const tag = task.tags.find((value) => value.startsWith(RESUME_GENERATION_TAG_PREFIX));
|
|
29
|
+
if (!tag) return 0;
|
|
30
|
+
|
|
31
|
+
const parsed = Number(tag.slice(RESUME_GENERATION_TAG_PREFIX.length));
|
|
32
|
+
return Number.isInteger(parsed) && parsed > 0 ? parsed : 0;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function getNextResumeGeneration(parent: Pick<AgentTask, "tags">): number {
|
|
36
|
+
return getResumeGeneration(parent) + 1;
|
|
37
|
+
}
|
|
38
|
+
|
|
25
39
|
function attachmentPointer(a: TaskAttachment): string {
|
|
26
40
|
switch (a.kind) {
|
|
27
41
|
case "url":
|
|
@@ -205,7 +219,11 @@ export function createResumeFollowUp(args: {
|
|
|
205
219
|
].join("\n");
|
|
206
220
|
|
|
207
221
|
const priority = Math.min(100, (parent.priority ?? 50) + 10);
|
|
208
|
-
const tags = [
|
|
222
|
+
const tags = [
|
|
223
|
+
"auto-resume",
|
|
224
|
+
`reason:${args.reason}`,
|
|
225
|
+
`${RESUME_GENERATION_TAG_PREFIX}${getNextResumeGeneration(parent)}`,
|
|
226
|
+
];
|
|
209
227
|
|
|
210
228
|
// Identity-shaped fields (dir, VCS provider/repo/number/url/etc.,
|
|
211
229
|
// outputSchema, slack channel/thread/user, agentmail, mention, contextKey,
|
|
@@ -10,16 +10,29 @@ import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:tes
|
|
|
10
10
|
import { unlink } from "node:fs/promises";
|
|
11
11
|
import {
|
|
12
12
|
closeDb,
|
|
13
|
+
completeTask,
|
|
13
14
|
createAgent,
|
|
14
15
|
createTaskExtended,
|
|
15
16
|
getChildTasks,
|
|
16
17
|
getDb,
|
|
18
|
+
getLogsByTaskId,
|
|
17
19
|
getTaskById,
|
|
18
20
|
initDb,
|
|
19
21
|
insertActiveSession,
|
|
20
22
|
startTask,
|
|
21
23
|
} from "../be/db";
|
|
22
|
-
import {
|
|
24
|
+
import {
|
|
25
|
+
createTrackerSync,
|
|
26
|
+
getTrackerSync,
|
|
27
|
+
getTrackerSyncByExternalId,
|
|
28
|
+
} from "../be/db-queries/tracker";
|
|
29
|
+
import {
|
|
30
|
+
codeLevelTriage,
|
|
31
|
+
MAX_RESUME_GENERATIONS,
|
|
32
|
+
RESUME_BUDGET_EXHAUSTED_REASON,
|
|
33
|
+
setBeforeHeartbeatSupersedeForTests,
|
|
34
|
+
} from "../heartbeat/heartbeat";
|
|
35
|
+
import { RESUME_GENERATION_TAG_PREFIX } from "../tasks/worker-follow-up";
|
|
23
36
|
|
|
24
37
|
const TEST_DB_PATH = "./test-heartbeat-supersede-resume.sqlite";
|
|
25
38
|
|
|
@@ -46,6 +59,8 @@ describe("Heartbeat — supersede + resume (DES-523)", () => {
|
|
|
46
59
|
});
|
|
47
60
|
|
|
48
61
|
beforeEach(() => {
|
|
62
|
+
setBeforeHeartbeatSupersedeForTests(null);
|
|
63
|
+
getDb().run("DELETE FROM tracker_sync");
|
|
49
64
|
getDb().run("DELETE FROM agent_tasks");
|
|
50
65
|
getDb().run("DELETE FROM agents");
|
|
51
66
|
getDb().run("DELETE FROM active_sessions");
|
|
@@ -81,7 +96,82 @@ describe("Heartbeat — supersede + resume (DES-523)", () => {
|
|
|
81
96
|
expect(resume.taskType).toBe("resume");
|
|
82
97
|
expect(resume.tags).toContain("auto-resume");
|
|
83
98
|
expect(resume.tags).toContain("reason:crash_recovery");
|
|
99
|
+
expect(resume.tags).toContain(`${RESUME_GENERATION_TAG_PREFIX}1`);
|
|
84
100
|
expect(resume.id).toBe(findings.autoResumedTasks[0]!.resumeTaskId);
|
|
101
|
+
|
|
102
|
+
const supersedeLog = getLogsByTaskId(parent.id).find(
|
|
103
|
+
(log) => log.eventType === "task_superseded",
|
|
104
|
+
);
|
|
105
|
+
expect(supersedeLog).toBeTruthy();
|
|
106
|
+
const metadata = JSON.parse(supersedeLog!.metadata ?? "{}") as { resumeTaskId?: string };
|
|
107
|
+
expect(metadata.resumeTaskId).toBe(resume.id);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test("Case A: crash-recovery resume chain stops at the generation cap", async () => {
|
|
111
|
+
const agent = createAgent({ name: "dead-resume-worker", isLead: false, status: "busy" });
|
|
112
|
+
const parent = createTaskExtended("Resume at generation cap", {
|
|
113
|
+
agentId: agent.id,
|
|
114
|
+
taskType: "resume",
|
|
115
|
+
tags: [
|
|
116
|
+
"auto-resume",
|
|
117
|
+
"reason:crash_recovery",
|
|
118
|
+
`${RESUME_GENERATION_TAG_PREFIX}${MAX_RESUME_GENERATIONS}`,
|
|
119
|
+
],
|
|
120
|
+
});
|
|
121
|
+
startTask(parent.id);
|
|
122
|
+
|
|
123
|
+
const oldTime = new Date(Date.now() - 10 * 60 * 1000).toISOString();
|
|
124
|
+
getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [oldTime, parent.id]);
|
|
125
|
+
|
|
126
|
+
const findings = await codeLevelTriage();
|
|
127
|
+
|
|
128
|
+
expect(findings.autoResumedTasks.length).toBe(0);
|
|
129
|
+
expect(findings.autoFailedTasks.length).toBe(1);
|
|
130
|
+
expect(findings.autoFailedTasks[0]!.taskId).toBe(parent.id);
|
|
131
|
+
expect(findings.autoFailedTasks[0]!.reason).toBe(RESUME_BUDGET_EXHAUSTED_REASON);
|
|
132
|
+
|
|
133
|
+
const updatedParent = getTaskById(parent.id);
|
|
134
|
+
expect(updatedParent?.status).toBe("failed");
|
|
135
|
+
expect(updatedParent?.failureReason).toBe(RESUME_BUDGET_EXHAUSTED_REASON);
|
|
136
|
+
expect(getChildTasks(parent.id).length).toBe(0);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test("Case A: supersede race does not create a resume child or repoint tracker_sync", async () => {
|
|
140
|
+
const agent = createAgent({ name: "dead-worker-race", isLead: false, status: "busy" });
|
|
141
|
+
const parent = createTaskExtended("Tracked parent that finishes during heartbeat", {
|
|
142
|
+
agentId: agent.id,
|
|
143
|
+
});
|
|
144
|
+
startTask(parent.id);
|
|
145
|
+
|
|
146
|
+
createTrackerSync({
|
|
147
|
+
provider: "linear",
|
|
148
|
+
entityType: "task",
|
|
149
|
+
swarmId: parent.id,
|
|
150
|
+
externalId: "linear-race-issue",
|
|
151
|
+
externalIdentifier: "ENG-637",
|
|
152
|
+
externalUrl: "https://linear.app/test/issue/ENG-637",
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
const oldTime = new Date(Date.now() - 10 * 60 * 1000).toISOString();
|
|
156
|
+
getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [oldTime, parent.id]);
|
|
157
|
+
|
|
158
|
+
setBeforeHeartbeatSupersedeForTests((task) => {
|
|
159
|
+
expect(task.id).toBe(parent.id);
|
|
160
|
+
completeTask(parent.id, "finished by racing worker");
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
const findings = await codeLevelTriage();
|
|
164
|
+
|
|
165
|
+
expect(findings.autoResumedTasks.length).toBe(0);
|
|
166
|
+
expect(findings.autoFailedTasks.length).toBe(0);
|
|
167
|
+
|
|
168
|
+
const updatedParent = getTaskById(parent.id);
|
|
169
|
+
expect(updatedParent?.status).toBe("completed");
|
|
170
|
+
expect(getChildTasks(parent.id).length).toBe(0);
|
|
171
|
+
|
|
172
|
+
expect(getTrackerSync("linear", "task", parent.id)).not.toBeNull();
|
|
173
|
+
const byExternal = getTrackerSyncByExternalId("linear", "task", "linear-race-issue");
|
|
174
|
+
expect(byExternal?.swarmId).toBe(parent.id);
|
|
85
175
|
});
|
|
86
176
|
|
|
87
177
|
// --------------------------------------------------------------------------
|