@desplega.ai/agent-swarm 1.86.0 → 1.87.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +72 -1
- package/package.json +3 -1
- package/src/be/db-queries/tracker.ts +21 -0
- package/src/be/db.ts +235 -14
- package/src/be/migrations/079_task_followup_config.sql +1 -0
- package/src/be/modelsdev-cache.json +77663 -74073
- package/src/cli.tsx +26 -0
- package/src/commands/context-preamble.ts +272 -0
- package/src/commands/e2b.ts +728 -0
- package/src/commands/resume-session.ts +35 -78
- package/src/commands/runner.ts +125 -13
- package/src/e2b/dispatch.ts +429 -0
- package/src/e2b/env.ts +206 -0
- package/src/heartbeat/heartbeat.ts +145 -30
- package/src/heartbeat/templates.ts +11 -7
- package/src/http/session-data.ts +8 -1
- package/src/http/tasks.ts +152 -3
- package/src/jira/sync.ts +4 -4
- package/src/linear/sync.ts +6 -5
- package/src/providers/claude-adapter.ts +10 -76
- package/src/providers/claude-managed-adapter.ts +61 -75
- package/src/providers/codex-adapter.ts +15 -18
- package/src/providers/codex-oauth/auth-json.ts +18 -1
- package/src/providers/codex-oauth/flow.ts +24 -1
- package/src/providers/types.ts +6 -0
- package/src/tasks/worker-follow-up.ts +162 -2
- package/src/telemetry.ts +11 -1
- package/src/tests/claude-adapter.test.ts +5 -27
- package/src/tests/claude-managed-adapter.test.ts +38 -52
- package/src/tests/codex-adapter.test.ts +6 -31
- package/src/tests/codex-oauth.test.ts +149 -3
- package/src/tests/codex-pool.test.ts +14 -3
- package/src/tests/e2b-dispatch.test.ts +330 -0
- package/src/tests/heartbeat-supersede-resume.test.ts +285 -0
- package/src/tests/heartbeat.test.ts +26 -16
- package/src/tests/prompt-template-remaining.test.ts +4 -0
- package/src/tests/resume-session.test.ts +42 -50
- package/src/tests/structured-output.test.ts +69 -0
- package/src/tests/task-completion-idempotency.test.ts +185 -2
- package/src/tests/task-supersede-resume.test.ts +722 -0
- package/src/tests/telemetry-init.test.ts +69 -0
- package/src/tests/vcs-tracking.test.ts +39 -0
- package/src/tools/send-task.ts +12 -1
- package/src/tools/store-progress.ts +2 -2
- package/src/tools/templates.ts +14 -2
- package/src/types.ts +46 -1
- package/src/workflows/executors/agent-task.ts +3 -0
|
@@ -423,10 +423,6 @@ class ClaudeSession implements ProviderSession {
|
|
|
423
423
|
this.config.prompt,
|
|
424
424
|
];
|
|
425
425
|
|
|
426
|
-
if (this.config.resumeSessionId) {
|
|
427
|
-
cmd.push("--resume", this.config.resumeSessionId);
|
|
428
|
-
}
|
|
429
|
-
|
|
430
426
|
if (this.config.additionalArgs?.length) {
|
|
431
427
|
cmd.push(...this.config.additionalArgs);
|
|
432
428
|
}
|
|
@@ -728,78 +724,7 @@ class ClaudeSession implements ProviderSession {
|
|
|
728
724
|
}
|
|
729
725
|
|
|
730
726
|
async waitForCompletion(): Promise<ProviderResult> {
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
// Stale session retry: if process failed because session not found and we used --resume,
|
|
734
|
-
// strip --resume and retry with a fresh session
|
|
735
|
-
if (result.exitCode !== 0 && this.errorTracker.isSessionNotFound()) {
|
|
736
|
-
const hasResume =
|
|
737
|
-
!!this.config.resumeSessionId || (this.config.additionalArgs || []).includes("--resume");
|
|
738
|
-
if (hasResume) {
|
|
739
|
-
console.log(
|
|
740
|
-
`\x1b[33m[${this.config.role}] Session resume failed for task ${this.config.taskId.slice(0, 8)} — retrying without --resume\x1b[0m`,
|
|
741
|
-
);
|
|
742
|
-
|
|
743
|
-
const freshArgs = (this.config.additionalArgs || []).filter((arg, idx, arr) => {
|
|
744
|
-
if (arg === "--resume") return false;
|
|
745
|
-
if (idx > 0 && arr[idx - 1] === "--resume") return false;
|
|
746
|
-
return true;
|
|
747
|
-
});
|
|
748
|
-
|
|
749
|
-
const logDir = this.config.logFile.substring(0, this.config.logFile.lastIndexOf("/"));
|
|
750
|
-
const retryTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
751
|
-
const retryLogFile = `${logDir}/${retryTimestamp}-retry-${this.config.taskId.slice(0, 8)}.jsonl`;
|
|
752
|
-
|
|
753
|
-
const retryConfig: ProviderSessionConfig = {
|
|
754
|
-
...this.config,
|
|
755
|
-
additionalArgs: freshArgs,
|
|
756
|
-
logFile: retryLogFile,
|
|
757
|
-
resumeSessionId: undefined,
|
|
758
|
-
};
|
|
759
|
-
|
|
760
|
-
// Write new task file for retry
|
|
761
|
-
const taskFilePath = await writeTaskFile(this.taskFilePid, {
|
|
762
|
-
taskId: this.config.taskId,
|
|
763
|
-
agentId: this.config.agentId,
|
|
764
|
-
startedAt: new Date().toISOString(),
|
|
765
|
-
});
|
|
766
|
-
|
|
767
|
-
// Re-stage the system prompt for the retry — the original was unlinked
|
|
768
|
-
// when the first session finished. Same soft-fail semantics: null
|
|
769
|
-
// falls back to the inline --append-system-prompt argv.
|
|
770
|
-
let retrySystemPromptFile: string | null = null;
|
|
771
|
-
if (retryConfig.systemPrompt) {
|
|
772
|
-
const candidate = getSystemPromptFilePath(retryConfig.taskId);
|
|
773
|
-
try {
|
|
774
|
-
await writeFile(candidate, retryConfig.systemPrompt);
|
|
775
|
-
retrySystemPromptFile = candidate;
|
|
776
|
-
} catch (err) {
|
|
777
|
-
console.warn(
|
|
778
|
-
`\x1b[33m[claude]\x1b[0m Failed to stage retry system prompt to ${candidate} (${err}); falling back to --append-system-prompt argv.`,
|
|
779
|
-
);
|
|
780
|
-
}
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
const retrySession = new ClaudeSession(
|
|
784
|
-
retryConfig,
|
|
785
|
-
this.model,
|
|
786
|
-
taskFilePath,
|
|
787
|
-
this.taskFilePid,
|
|
788
|
-
null,
|
|
789
|
-
this.claudeBinaryArgv,
|
|
790
|
-
retrySystemPromptFile,
|
|
791
|
-
);
|
|
792
|
-
|
|
793
|
-
// Forward events from retry to our listeners
|
|
794
|
-
for (const listener of this.listeners) {
|
|
795
|
-
retrySession.onEvent(listener);
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
return retrySession.waitForCompletion();
|
|
799
|
-
}
|
|
800
|
-
}
|
|
801
|
-
|
|
802
|
-
return result;
|
|
727
|
+
return this.completionPromise;
|
|
803
728
|
}
|
|
804
729
|
|
|
805
730
|
async abort(): Promise<void> {
|
|
@@ -812,6 +737,15 @@ export class ClaudeAdapter implements ProviderAdapter {
|
|
|
812
737
|
readonly traits = { hasMcp: true, hasLocalEnvironment: true };
|
|
813
738
|
|
|
814
739
|
async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
|
|
740
|
+
// Native resume is deprecated. Follow-up continuity is delivered via the
|
|
741
|
+
// context preamble (see src/commands/context-preamble.ts). Any stray
|
|
742
|
+
// resumeSessionId is logged and ignored — we always spawn a fresh session.
|
|
743
|
+
if (config.resumeSessionId) {
|
|
744
|
+
console.warn(
|
|
745
|
+
"[claude-adapter] resumeSessionId ignored — native resume is disabled by deprecation plan",
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
|
|
815
749
|
const model = config.model || "opus";
|
|
816
750
|
|
|
817
751
|
const credType = validateClaudeCredentials(config.env || process.env);
|
|
@@ -808,85 +808,71 @@ export class ClaudeManagedAdapter implements ProviderAdapter {
|
|
|
808
808
|
}
|
|
809
809
|
|
|
810
810
|
async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
|
|
811
|
-
|
|
812
|
-
|
|
811
|
+
// Native resume is deprecated. Follow-up continuity is delivered via the
|
|
812
|
+
// context preamble (see src/commands/context-preamble.ts). Any stray
|
|
813
|
+
// resumeSessionId is logged and ignored — we always create a fresh session.
|
|
814
|
+
if (config.resumeSessionId) {
|
|
815
|
+
console.warn(
|
|
816
|
+
"[claude-managed-adapter] resumeSessionId ignored — native resume is disabled by deprecation plan",
|
|
817
|
+
);
|
|
818
|
+
}
|
|
819
|
+
|
|
813
820
|
const seenEventIds = new Set<string>();
|
|
814
821
|
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
//
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
const createParams: Record<string, unknown> = {
|
|
854
|
-
agent: this.agentId,
|
|
855
|
-
environment_id: this.environmentId,
|
|
856
|
-
title: `Task ${config.taskId}`,
|
|
857
|
-
metadata: {
|
|
858
|
-
swarmAgentId: config.agentId,
|
|
859
|
-
swarmTaskId: config.taskId,
|
|
822
|
+
// Fresh session. Compose the cache-control-annotated user message and
|
|
823
|
+
// open the managed session against the pre-existing agent + env.
|
|
824
|
+
const userMessageContent: BetaManagedAgentsTextBlock[] | null =
|
|
825
|
+
composeManagedUserMessage(config);
|
|
826
|
+
// Phase 4: derive `resources` from `config.vcsRepo` (which the runner
|
|
827
|
+
// copies from `task.vcsRepo` at the spawn site, see
|
|
828
|
+
// src/commands/runner.ts:3296). The SDK contract is
|
|
829
|
+
// `BetaManagedAgentsGitHubRepositoryResourceParams`:
|
|
830
|
+
// { type: 'github_repository', url, authorization_token, checkout?: { type: 'branch', name } }
|
|
831
|
+
// We default `branch` to "main" since `ProviderSessionConfig` only
|
|
832
|
+
// carries the repo identifier as a string.
|
|
833
|
+
//
|
|
834
|
+
// GitHub auth: prefer the operator-side `MANAGED_GITHUB_VAULT_ID`
|
|
835
|
+
// (passed via `vault_ids` on the session — see runbook §"Claude Managed
|
|
836
|
+
// Agents — GitHub access"). If a literal PAT is supplied via
|
|
837
|
+
// `MANAGED_GITHUB_TOKEN`, use that instead. Without either, the SDK's
|
|
838
|
+
// required `authorization_token` field gets an empty string and the
|
|
839
|
+
// operator sees an authentication error from Anthropic — which is
|
|
840
|
+
// strictly better than silently dropping `resources`.
|
|
841
|
+
const createParams: Record<string, unknown> = {
|
|
842
|
+
agent: this.agentId,
|
|
843
|
+
environment_id: this.environmentId,
|
|
844
|
+
title: `Task ${config.taskId}`,
|
|
845
|
+
metadata: {
|
|
846
|
+
swarmAgentId: config.agentId,
|
|
847
|
+
swarmTaskId: config.taskId,
|
|
848
|
+
},
|
|
849
|
+
};
|
|
850
|
+
if (config.vcsRepo) {
|
|
851
|
+
const repoUrl = normalizeRepoUrl(config.vcsRepo);
|
|
852
|
+
const branch = "main"; // ProviderSessionConfig doesn't carry per-task branch info today.
|
|
853
|
+
const githubToken = process.env.MANAGED_GITHUB_TOKEN ?? "";
|
|
854
|
+
createParams.resources = [
|
|
855
|
+
{
|
|
856
|
+
type: "github_repository",
|
|
857
|
+
url: repoUrl,
|
|
858
|
+
authorization_token: githubToken,
|
|
859
|
+
checkout: { type: "branch", name: branch },
|
|
860
860
|
},
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
];
|
|
874
|
-
}
|
|
875
|
-
// Multiple vaults can be linked to a single session — `vault_ids` is an
|
|
876
|
-
// array. The MCP vault holds the static-bearer credential for our
|
|
877
|
-
// `/mcp` endpoint (provisioned by `claude-managed-setup`); the GitHub
|
|
878
|
-
// vault holds the credential used by the `github_repository` resource.
|
|
879
|
-
// Either or both may be unset.
|
|
880
|
-
const vaultIds = [
|
|
881
|
-
process.env.MANAGED_MCP_VAULT_ID,
|
|
882
|
-
process.env.MANAGED_GITHUB_VAULT_ID,
|
|
883
|
-
].filter((v): v is string => !!v && v.length > 0);
|
|
884
|
-
if (vaultIds.length > 0) {
|
|
885
|
-
createParams.vault_ids = Array.from(new Set(vaultIds));
|
|
886
|
-
}
|
|
887
|
-
const created = await Promise.resolve(this.client.beta.sessions.create(createParams));
|
|
888
|
-
sessionId = created.id;
|
|
861
|
+
];
|
|
862
|
+
}
|
|
863
|
+
// Multiple vaults can be linked to a single session — `vault_ids` is an
|
|
864
|
+
// array. The MCP vault holds the static-bearer credential for our
|
|
865
|
+
// `/mcp` endpoint (provisioned by `claude-managed-setup`); the GitHub
|
|
866
|
+
// vault holds the credential used by the `github_repository` resource.
|
|
867
|
+
// Either or both may be unset.
|
|
868
|
+
const vaultIds = [process.env.MANAGED_MCP_VAULT_ID, process.env.MANAGED_GITHUB_VAULT_ID].filter(
|
|
869
|
+
(v): v is string => !!v && v.length > 0,
|
|
870
|
+
);
|
|
871
|
+
if (vaultIds.length > 0) {
|
|
872
|
+
createParams.vault_ids = Array.from(new Set(vaultIds));
|
|
889
873
|
}
|
|
874
|
+
const created = await Promise.resolve(this.client.beta.sessions.create(createParams));
|
|
875
|
+
const sessionId = created.id;
|
|
890
876
|
|
|
891
877
|
return new ClaudeManagedSession(
|
|
892
878
|
this.client,
|
|
@@ -6,7 +6,8 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Phase 1 — factory wiring + skeleton classes.
|
|
8
8
|
* Phase 2 — event stream normalization, CostData, AbortController, log file,
|
|
9
|
-
* AGENTS.md system-prompt injection
|
|
9
|
+
* AGENTS.md system-prompt injection. (Native resume was removed in
|
|
10
|
+
* the 2026-05-28 deprecate-native-resume plan — see context-preamble.ts.)
|
|
10
11
|
* Phase 3 — per-session MCP config builder + model catalogue wiring. The
|
|
11
12
|
* baseline Codex config (`~/.codex/config.toml`) is written at
|
|
12
13
|
* Docker image build time (deferred to Phase 6). For local dev
|
|
@@ -1280,9 +1281,15 @@ export async function createInProcessCodexSession(
|
|
|
1280
1281
|
model: resolvedModel,
|
|
1281
1282
|
};
|
|
1282
1283
|
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1284
|
+
// Native resume is deprecated. Follow-up continuity is delivered via the
|
|
1285
|
+
// context preamble (see src/commands/context-preamble.ts). Any stray
|
|
1286
|
+
// resumeSessionId is logged and ignored — we always start a fresh thread.
|
|
1287
|
+
if (config.resumeSessionId) {
|
|
1288
|
+
console.warn(
|
|
1289
|
+
"[codex-adapter] resumeSessionId ignored — native resume is disabled by deprecation plan",
|
|
1290
|
+
);
|
|
1291
|
+
}
|
|
1292
|
+
const thread = codex.startThread(threadOptions);
|
|
1286
1293
|
|
|
1287
1294
|
return new CodexSession(
|
|
1288
1295
|
thread,
|
|
@@ -1615,20 +1622,10 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
1615
1622
|
return new CodexSubprocessSession(config, this.skillsDir);
|
|
1616
1623
|
}
|
|
1617
1624
|
|
|
1618
|
-
async canResume(
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
try {
|
|
1623
|
-
const codex = new Codex();
|
|
1624
|
-
// `resumeThread` is synchronous in 0.118.x and returns a Thread handle.
|
|
1625
|
-
// The runner only calls canResume when deciding whether to resume a
|
|
1626
|
-
// task, so we accept the (cheap) handshake cost.
|
|
1627
|
-
codex.resumeThread(sessionId);
|
|
1628
|
-
return true;
|
|
1629
|
-
} catch {
|
|
1630
|
-
return false;
|
|
1631
|
-
}
|
|
1625
|
+
async canResume(_sessionId: string): Promise<boolean> {
|
|
1626
|
+
// Native resume is deprecated; runner no longer threads resumeSessionId
|
|
1627
|
+
// to adapters. Follow-up continuity flows via the context preamble.
|
|
1628
|
+
return false;
|
|
1632
1629
|
}
|
|
1633
1630
|
|
|
1634
1631
|
formatCommand(commandName: string): string {
|
|
@@ -22,14 +22,31 @@
|
|
|
22
22
|
* purposes and doesn't validate it as a separate JWT.
|
|
23
23
|
*/
|
|
24
24
|
|
|
25
|
+
import { extractChatgptUserId } from "./flow.js";
|
|
25
26
|
import type { CodexAuthJson, CodexOAuthCredentials } from "./types.js";
|
|
26
27
|
|
|
27
28
|
export function authJsonToCredentialSelection(auth: CodexAuthJson, slot = 0, total = 1) {
|
|
29
|
+
// Prefer the per-grant `chatgpt_user_id` so two slots authenticated against
|
|
30
|
+
// the same ChatGPT Team workspace get distinct suffixes. Fall back to
|
|
31
|
+
// account_id when the JWT lacks the claim — preserves boot for any
|
|
32
|
+
// unexpected token shape, at the cost of re-introducing the slot-collision
|
|
33
|
+
// bug for that specific slot only. The warn is a deliberate canary.
|
|
34
|
+
const userId = extractChatgptUserId(auth.tokens.access_token);
|
|
35
|
+
const suffixSource = userId ?? auth.tokens.account_id;
|
|
36
|
+
if (!userId) {
|
|
37
|
+
console.warn(
|
|
38
|
+
"[codex-oauth] No chatgpt_user_id in JWT — falling back to account_id for keySuffix derivation. " +
|
|
39
|
+
"If two slots share an account, their suffixes will collide.",
|
|
40
|
+
);
|
|
41
|
+
}
|
|
28
42
|
return {
|
|
43
|
+
// `selected` satisfies the CredentialSelection interface but is never read
|
|
44
|
+
// for CODEX_OAUTH: creds are materialised to ~/.codex/auth.json (not env-injected),
|
|
45
|
+
// and all tracking flows through `keySuffix` + `index` (never `selected`).
|
|
29
46
|
selected: auth.tokens.account_id,
|
|
30
47
|
index: slot,
|
|
31
48
|
total,
|
|
32
|
-
keySuffix:
|
|
49
|
+
keySuffix: suffixSource.slice(-5),
|
|
33
50
|
keyType: "CODEX_OAUTH",
|
|
34
51
|
};
|
|
35
52
|
}
|
|
@@ -85,7 +85,12 @@ export function decodeJwt(token: string): Record<string, unknown> | null {
|
|
|
85
85
|
const parts = token.split(".");
|
|
86
86
|
if (parts.length !== 3) return null;
|
|
87
87
|
const payload = parts[1] ?? "";
|
|
88
|
-
|
|
88
|
+
// Normalize base64url → standard base64 before decoding.
|
|
89
|
+
// JWTs use base64url (RFC 7515): '-' replaces '+', '_' replaces '/', padding stripped.
|
|
90
|
+
// atob() only accepts standard base64; passing raw base64url throws on '-' or '_'.
|
|
91
|
+
const b64 = payload.replace(/-/g, "+").replace(/_/g, "/");
|
|
92
|
+
const padded = b64 + "=".repeat((4 - (b64.length % 4)) % 4);
|
|
93
|
+
const decoded = atob(padded);
|
|
89
94
|
return JSON.parse(decoded);
|
|
90
95
|
} catch {
|
|
91
96
|
return null;
|
|
@@ -172,6 +177,24 @@ export function getAccountId(accessToken: string): string | null {
|
|
|
172
177
|
return typeof accountId === "string" && accountId.length > 0 ? accountId : null;
|
|
173
178
|
}
|
|
174
179
|
|
|
180
|
+
/**
|
|
181
|
+
* Extract `chatgpt_user_id` from the JWT's OpenAI-auth claim namespace. This
|
|
182
|
+
* is per-OAuth-grant (unique per user-on-account), distinct from
|
|
183
|
+
* `chatgpt_account_id` which is shared across all users in a ChatGPT Team
|
|
184
|
+
* workspace. Used to give pooled credentials a slot-unique `keySuffix` even
|
|
185
|
+
* when multiple pool slots authenticate against the same Team account.
|
|
186
|
+
*
|
|
187
|
+
* Returns null if the JWT cannot be decoded, the namespace is absent, or the
|
|
188
|
+
* claim is missing/empty. Callers MUST handle null by falling back to a
|
|
189
|
+
* different identifier (typically `chatgpt_account_id`) — do not throw.
|
|
190
|
+
*/
|
|
191
|
+
export function extractChatgptUserId(accessToken: string): string | null {
|
|
192
|
+
const payload = decodeJwt(accessToken);
|
|
193
|
+
const auth = payload?.[JWT_CLAIM_PATH] as Record<string, unknown> | undefined;
|
|
194
|
+
const userId = auth?.chatgpt_user_id;
|
|
195
|
+
return typeof userId === "string" && userId.length > 0 ? userId : null;
|
|
196
|
+
}
|
|
197
|
+
|
|
175
198
|
export async function createAuthorizationFlow(
|
|
176
199
|
originator = "agent-swarm",
|
|
177
200
|
): Promise<{ verifier: string; state: string; url: string }> {
|
package/src/providers/types.ts
CHANGED
|
@@ -89,6 +89,12 @@ export interface ProviderSessionConfig {
|
|
|
89
89
|
apiKey: string;
|
|
90
90
|
cwd: string;
|
|
91
91
|
vcsRepo?: string;
|
|
92
|
+
/**
|
|
93
|
+
* @deprecated Never set by the runner — native session resume was removed in
|
|
94
|
+
* the 2026-05-28 plan. Adapters log + ignore any stray value. Follow-up
|
|
95
|
+
* continuity flows through the context preamble; see
|
|
96
|
+
* `src/commands/context-preamble.ts` and `src/commands/resume-session.ts`.
|
|
97
|
+
*/
|
|
92
98
|
resumeSessionId?: string;
|
|
93
99
|
iteration?: number;
|
|
94
100
|
logFile: string;
|
|
@@ -1,9 +1,27 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
createTaskExtended,
|
|
3
|
+
getActiveTaskCount,
|
|
4
|
+
getAgentById,
|
|
5
|
+
getLeadAgent,
|
|
6
|
+
getTaskAttachments,
|
|
7
|
+
getTaskById,
|
|
8
|
+
} from "../be/db";
|
|
9
|
+
import { repointTrackerSyncBySwarmId } from "../be/db-queries/tracker";
|
|
2
10
|
import { resolveTemplate } from "../prompts/resolver";
|
|
3
|
-
import type { AgentTask, TaskAttachment } from "../types";
|
|
11
|
+
import type { AgentTask, ResumeReason, TaskAttachment } from "../types";
|
|
4
12
|
// Side-effect import: registers task lifecycle templates in the in-memory registry.
|
|
5
13
|
import "../tools/templates";
|
|
6
14
|
|
|
15
|
+
/**
|
|
16
|
+
* Liveness window (seconds) for considering a worker "online" enough to
|
|
17
|
+
* pre-assign a resume task. Defaults to 30s; override via env. The worker
|
|
18
|
+
* heartbeats `lastActivityAt` on its agent row at least once per
|
|
19
|
+
* provider tool-call / poll tick, so 30s comfortably covers a healthy worker.
|
|
20
|
+
*/
|
|
21
|
+
export const WORKER_LIVENESS_WINDOW_SECONDS = Number(
|
|
22
|
+
process.env.WORKER_LIVENESS_WINDOW_SECONDS || "30",
|
|
23
|
+
);
|
|
24
|
+
|
|
7
25
|
function attachmentPointer(a: TaskAttachment): string {
|
|
8
26
|
switch (a.kind) {
|
|
9
27
|
case "url":
|
|
@@ -36,6 +54,7 @@ export function createWorkerTaskFollowUp(args: {
|
|
|
36
54
|
const { task, status, output, failureReason } = args;
|
|
37
55
|
|
|
38
56
|
if (task.workflowRunId) return null;
|
|
57
|
+
if (task.followUpConfig?.disabled === true) return null;
|
|
39
58
|
|
|
40
59
|
const taskAgent = getAgentById(task.agentId ?? "");
|
|
41
60
|
if (!taskAgent || taskAgent.isLead) return null;
|
|
@@ -45,6 +64,16 @@ export function createWorkerTaskFollowUp(args: {
|
|
|
45
64
|
|
|
46
65
|
const agentName = taskAgent.name || task.agentId?.slice(0, 8) || "Unknown";
|
|
47
66
|
const taskDesc = task.task.slice(0, 200);
|
|
67
|
+
const creatorAgent = task.creatorAgentId
|
|
68
|
+
? `${task.creatorAgentId}${task.creatorAgentId === leadAgent.id ? " (you)" : ""}`
|
|
69
|
+
: "<none>";
|
|
70
|
+
const instructions =
|
|
71
|
+
status === "completed"
|
|
72
|
+
? (task.followUpConfig?.onCompleted ?? "")
|
|
73
|
+
: (task.followUpConfig?.onFailed ?? "");
|
|
74
|
+
const followUpInstructions = instructions
|
|
75
|
+
? `\nAdditional instructions from the task creator:\n${instructions}\n`
|
|
76
|
+
: "";
|
|
48
77
|
|
|
49
78
|
let followUpDescription: string;
|
|
50
79
|
if (status === "completed") {
|
|
@@ -55,7 +84,9 @@ export function createWorkerTaskFollowUp(args: {
|
|
|
55
84
|
const completedResult = resolveTemplate("task.worker.completed", {
|
|
56
85
|
agent_name: agentName,
|
|
57
86
|
task_desc: taskDesc,
|
|
87
|
+
creator_agent: creatorAgent,
|
|
58
88
|
output_summary: outputSummary,
|
|
89
|
+
follow_up_instructions: followUpInstructions,
|
|
59
90
|
task_id: task.id,
|
|
60
91
|
});
|
|
61
92
|
followUpDescription = completedResult.text;
|
|
@@ -64,7 +95,9 @@ export function createWorkerTaskFollowUp(args: {
|
|
|
64
95
|
const failedResult = resolveTemplate("task.worker.failed", {
|
|
65
96
|
agent_name: agentName,
|
|
66
97
|
task_desc: taskDesc,
|
|
98
|
+
creator_agent: creatorAgent,
|
|
67
99
|
failure_reason: reason,
|
|
100
|
+
follow_up_instructions: followUpInstructions,
|
|
68
101
|
task_id: task.id,
|
|
69
102
|
});
|
|
70
103
|
followUpDescription = failedResult.text;
|
|
@@ -80,3 +113,130 @@ export function createWorkerTaskFollowUp(args: {
|
|
|
80
113
|
slackUserId: task.slackUserId,
|
|
81
114
|
});
|
|
82
115
|
}
|
|
116
|
+
|
|
117
|
+
/** Result of `createResumeFollowUp`. */
|
|
118
|
+
export type CreateResumeFollowUpResult =
|
|
119
|
+
| { kind: "created"; task: AgentTask }
|
|
120
|
+
| { kind: "workflow-skip"; stepId: string }
|
|
121
|
+
| { kind: "skipped"; reason: "parent_not_found" | "lead_not_found" };
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Create a "resume" follow-up task for a parent that is being superseded
|
|
125
|
+
* (graceful shutdown, context-limit pressure, manual operator action).
|
|
126
|
+
*
|
|
127
|
+
* Workflow carve-out: if the parent is a workflow step (`workflowRunStepId`
|
|
128
|
+
* is set), no follow-up is created. Returns `{ kind: 'workflow-skip', stepId }`
|
|
129
|
+
* so the caller can `failTask(parent.id, 'superseded_workflow_task')` and let
|
|
130
|
+
* the workflow engine's retry/failure policy take over.
|
|
131
|
+
*
|
|
132
|
+
* Field inheritance is transitive via `createTaskExtended`'s `parentTaskId`
|
|
133
|
+
* lookup (`dir`, `vcsRepo`/`vcsProvider`/etc., `outputSchema`, Slack/AgentMail
|
|
134
|
+
* context, `requestedByUserId`, `contextKey`, `followUpConfig`). This was chosen
|
|
135
|
+
* over re-listing fields here so there is a single source of truth.
|
|
136
|
+
*
|
|
137
|
+
* `model` is intentionally NOT inherited: a resume task is routinely claimed by
|
|
138
|
+
* a different worker (and thus a different harness/provider) than the parent, so
|
|
139
|
+
* carrying the parent's concrete provider-specific model would break the child
|
|
140
|
+
* at session-init. The resume task runs on the assignee agent's own model. See
|
|
141
|
+
* the `model` carve-out comment in `createTaskExtended` (`src/be/db.ts`).
|
|
142
|
+
*
|
|
143
|
+
* Routing: the parent's assigned worker (`parent.agentId`) is preferred if
|
|
144
|
+
* its `lastActivityAt` is within `WORKER_LIVENESS_WINDOW_SECONDS` AND it has
|
|
145
|
+
* remaining capacity (`getActiveTaskCount < agent.maxTasks`). Otherwise the
|
|
146
|
+
* resume task goes to the unassigned pool for any worker to pick up.
|
|
147
|
+
*/
|
|
148
|
+
export function createResumeFollowUp(args: {
|
|
149
|
+
parentId: string;
|
|
150
|
+
reason: ResumeReason;
|
|
151
|
+
}): CreateResumeFollowUpResult {
|
|
152
|
+
const parent = getTaskById(args.parentId);
|
|
153
|
+
if (!parent) return { kind: "skipped", reason: "parent_not_found" };
|
|
154
|
+
|
|
155
|
+
// Workflow carve-out — let the engine's retry policy handle recovery.
|
|
156
|
+
if (parent.workflowRunStepId) {
|
|
157
|
+
return { kind: "workflow-skip", stepId: parent.workflowRunStepId };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Routing decision — same DB process so the read-then-create window is
|
|
161
|
+
// small. Acceptable for v1 per the plan (the unassigned-pool fallback
|
|
162
|
+
// covers the race anyway).
|
|
163
|
+
//
|
|
164
|
+
// For `graceful_shutdown` specifically, force the unassigned-pool path:
|
|
165
|
+
// the parent worker is exiting and will call `closeAgent` (→ offline)
|
|
166
|
+
// moments after the supersede loop. At the moment of this check it
|
|
167
|
+
// still looks fresh + has capacity (the parent just terminal-
|
|
168
|
+
// transitioned), so the liveness branch would assign the resume task to
|
|
169
|
+
// a dying worker — leaving it orphaned in `pending` once the worker
|
|
170
|
+
// closes. Pool routing lets any live worker claim it.
|
|
171
|
+
//
|
|
172
|
+
// Other reasons keep the liveness-aware routing:
|
|
173
|
+
// - `crash_recovery`: parent worker is presumed dead → `lastActivityAt`
|
|
174
|
+
// is stale or `status === "offline"`, so the existing check already
|
|
175
|
+
// rejects it naturally.
|
|
176
|
+
// - `context_limits` / `manual_supersede`: the worker is alive and
|
|
177
|
+
// can keep handling the resume task on a fresh session.
|
|
178
|
+
let preferredAgentId: string | undefined;
|
|
179
|
+
if (parent.agentId && args.reason !== "graceful_shutdown") {
|
|
180
|
+
const candidate = getAgentById(parent.agentId);
|
|
181
|
+
if (candidate && candidate.status !== "offline") {
|
|
182
|
+
const lastActivity = candidate.lastActivityAt ? Date.parse(candidate.lastActivityAt) : 0;
|
|
183
|
+
const fresh =
|
|
184
|
+
Number.isFinite(lastActivity) &&
|
|
185
|
+
Date.now() - lastActivity < WORKER_LIVENESS_WINDOW_SECONDS * 1000;
|
|
186
|
+
const activeCount = getActiveTaskCount(candidate.id);
|
|
187
|
+
const hasCap = activeCount < (candidate.maxTasks ?? 1);
|
|
188
|
+
if (fresh && hasCap) {
|
|
189
|
+
preferredAgentId = candidate.id;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const parentDesc = parent.task.slice(0, 200);
|
|
195
|
+
const followUpDescription = [
|
|
196
|
+
"Resume interrupted task.",
|
|
197
|
+
"",
|
|
198
|
+
`Parent task: ${parentDesc}`,
|
|
199
|
+
"",
|
|
200
|
+
`Reason: ${args.reason}`,
|
|
201
|
+
"",
|
|
202
|
+
"The full prior context (description, recent tool calls, artifacts) is",
|
|
203
|
+
"prepended to this prompt at dispatch time via the resume context preamble.",
|
|
204
|
+
"Do NOT redo work already completed — extend it.",
|
|
205
|
+
].join("\n");
|
|
206
|
+
|
|
207
|
+
const priority = Math.min(100, (parent.priority ?? 50) + 10);
|
|
208
|
+
const tags = ["auto-resume", `reason:${args.reason}`];
|
|
209
|
+
|
|
210
|
+
// Identity-shaped fields (dir, VCS provider/repo/number/url/etc.,
|
|
211
|
+
// outputSchema, slack channel/thread/user, agentmail, mention, contextKey,
|
|
212
|
+
// requestedByUserId, followUpConfig) are auto-inherited from the parent by
|
|
213
|
+
// `createTaskExtended`'s parentTaskId block (see src/be/db.ts). `model` is
|
|
214
|
+
// deliberately excluded there so the resume task resolves to the claiming
|
|
215
|
+
// agent's own provider/model — never the parent's concrete model string.
|
|
216
|
+
// We only override what's SPECIFIC to the resume task here.
|
|
217
|
+
const created = createTaskExtended(followUpDescription, {
|
|
218
|
+
agentId: preferredAgentId,
|
|
219
|
+
creatorAgentId: parent.creatorAgentId,
|
|
220
|
+
source: "system",
|
|
221
|
+
taskType: "resume",
|
|
222
|
+
tags,
|
|
223
|
+
priority,
|
|
224
|
+
parentTaskId: parent.id,
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
// Repoint Linear / Jira `tracker_sync` rows from the (now terminal) parent
|
|
228
|
+
// to the resume child. Without this, outbound completion posts for the
|
|
229
|
+
// resume task can't find their tracker_sync row, and subsequent inbound
|
|
230
|
+
// webhooks load the terminal parent and create duplicate tasks.
|
|
231
|
+
//
|
|
232
|
+
// Safe to call when no tracker_sync rows exist for this parent (no-op).
|
|
233
|
+
// Covers all providers (Linear AND Jira) and entity types in one call.
|
|
234
|
+
const repointed = repointTrackerSyncBySwarmId(parent.id, created.id);
|
|
235
|
+
if (repointed > 0) {
|
|
236
|
+
console.log(
|
|
237
|
+
`[ResumeFollowUp] Repointed ${repointed} tracker_sync row(s) from ${parent.id.slice(0, 8)} → ${created.id.slice(0, 8)}`,
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return { kind: "created", task: created };
|
|
242
|
+
}
|
package/src/telemetry.ts
CHANGED
|
@@ -153,6 +153,16 @@ function isCloudDeployment(): boolean {
|
|
|
153
153
|
return raw === "true" || raw === "1";
|
|
154
154
|
}
|
|
155
155
|
|
|
156
|
+
function getTelemetryEnvironment(): string {
|
|
157
|
+
const explicit = process.env.DESPLEGA_TELEMETRY_ENV?.trim();
|
|
158
|
+
if (explicit) return explicit;
|
|
159
|
+
|
|
160
|
+
// Do not default from NODE_ENV: shipped Bun/npm installs can report
|
|
161
|
+
// "development" even when the operator did not choose a telemetry cohort.
|
|
162
|
+
if (process.env.NODE_ENV === "test") return "test";
|
|
163
|
+
return "production";
|
|
164
|
+
}
|
|
165
|
+
|
|
156
166
|
/** Fire-and-forget telemetry event. Never throws, never blocks. */
|
|
157
167
|
export function track(options: TrackOptions): void {
|
|
158
168
|
if (!isEnabled() || !installationId) return;
|
|
@@ -177,7 +187,7 @@ export function track(options: TrackOptions): void {
|
|
|
177
187
|
metadata: {
|
|
178
188
|
transport: "https",
|
|
179
189
|
schema_version: 1,
|
|
180
|
-
environment:
|
|
190
|
+
environment: getTelemetryEnvironment(),
|
|
181
191
|
is_cloud: isCloudDeployment(),
|
|
182
192
|
...getOrgIdentity(),
|
|
183
193
|
...options.metadata,
|