@desplega.ai/agent-swarm 1.71.2 → 1.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +3 -2
  2. package/openapi.json +994 -62
  3. package/package.json +2 -1
  4. package/src/be/budget-admission.ts +121 -0
  5. package/src/be/budget-refusal-notify.ts +145 -0
  6. package/src/be/db.ts +488 -5
  7. package/src/be/migrations/044_provider_meta.sql +2 -0
  8. package/src/be/migrations/046_budgets_and_pricing.sql +87 -0
  9. package/src/be/migrations/047_session_costs_cost_source.sql +16 -0
  10. package/src/cli.tsx +22 -1
  11. package/src/commands/claude-managed-setup.ts +687 -0
  12. package/src/commands/codex-login.ts +1 -1
  13. package/src/commands/runner.ts +175 -28
  14. package/src/commands/templates.ts +10 -6
  15. package/src/http/budgets.ts +219 -0
  16. package/src/http/index.ts +6 -0
  17. package/src/http/integrations.ts +134 -0
  18. package/src/http/poll.ts +161 -3
  19. package/src/http/pricing.ts +245 -0
  20. package/src/http/session-data.ts +54 -6
  21. package/src/http/tasks.ts +23 -2
  22. package/src/prompts/base-prompt.ts +103 -73
  23. package/src/prompts/session-templates.ts +43 -0
  24. package/src/providers/claude-adapter.ts +3 -1
  25. package/src/providers/claude-managed-adapter.ts +871 -0
  26. package/src/providers/claude-managed-models.ts +117 -0
  27. package/src/providers/claude-managed-swarm-events.ts +77 -0
  28. package/src/providers/codex-adapter.ts +3 -1
  29. package/src/providers/codex-skill-resolver.ts +10 -0
  30. package/src/providers/codex-swarm-events.ts +20 -161
  31. package/src/providers/devin-adapter.ts +894 -0
  32. package/src/providers/devin-api.ts +207 -0
  33. package/src/providers/devin-playbooks.ts +91 -0
  34. package/src/providers/devin-skill-resolver.ts +113 -0
  35. package/src/providers/index.ts +10 -1
  36. package/src/providers/pi-mono-adapter.ts +3 -1
  37. package/src/providers/swarm-events-shared.ts +262 -0
  38. package/src/providers/types.ts +26 -1
  39. package/src/tests/base-prompt.test.ts +199 -0
  40. package/src/tests/budget-admission.test.ts +339 -0
  41. package/src/tests/budget-claim-gate.test.ts +288 -0
  42. package/src/tests/budget-refusal-notification.test.ts +324 -0
  43. package/src/tests/budgets-routes.test.ts +331 -0
  44. package/src/tests/claude-managed-adapter.test.ts +1301 -0
  45. package/src/tests/claude-managed-setup.test.ts +325 -0
  46. package/src/tests/devin-adapter.test.ts +677 -0
  47. package/src/tests/devin-api.test.ts +339 -0
  48. package/src/tests/integrations-http.test.ts +211 -0
  49. package/src/tests/migration-046-budgets.test.ts +327 -0
  50. package/src/tests/pricing-routes.test.ts +315 -0
  51. package/src/tests/prompt-template-remaining.test.ts +4 -0
  52. package/src/tests/prompt-template-session.test.ts +2 -2
  53. package/src/tests/provider-adapter.test.ts +1 -1
  54. package/src/tests/runner-budget-refused.test.ts +271 -0
  55. package/src/tests/session-costs-codex-recompute.test.ts +386 -0
  56. package/src/tools/poll-task.ts +13 -2
  57. package/src/tools/task-action.ts +92 -2
  58. package/src/tools/templates.ts +29 -0
  59. package/src/types.ts +116 -0
  60. package/src/utils/budget-backoff.ts +34 -0
  61. package/src/utils/credentials.ts +4 -0
  62. package/src/utils/provider-metadata.ts +9 -0
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Pricing table for Anthropic-managed Claude models, mirroring the layout of
3
+ * `src/providers/codex-models.ts`. Rates are USD per million tokens (Mtok)
4
+ * sourced from https://platform.claude.com/docs/en/about-claude/pricing
5
+ * (verified 2026-04-28).
6
+ *
7
+ * The managed-agents API does NOT report dollar cost on the `span.model_request_end`
8
+ * event — only token counts (`input_tokens`, `output_tokens`,
9
+ * `cache_read_input_tokens`, `cache_creation_input_tokens`). Phase 4 of the
10
+ * provider plan computes USD locally via {@link computeClaudeManagedCostUsd},
11
+ * then folds in Anthropic's $0.08/session-hour runtime fee inside the adapter.
12
+ *
13
+ * Bump this file when Anthropic publishes new rates or new models.
14
+ *
15
+ * Cache nomenclature mapping:
16
+ * - `cache_read_input_tokens` → "cache hit" rate (cheapest)
17
+ * - `cache_creation_input_tokens` → "cache write" rate (input × 1.25 for 5m TTL)
18
+ * - regular `input_tokens` (uncached) → standard input rate
19
+ *
20
+ * Anthropic's pricing page lists 5-minute and 1-hour cache TTLs separately;
21
+ * managed-agents currently uses the 5-minute breakpoint by default, which is
22
+ * the rate captured here. If a future SDK release surfaces TTL on the usage
23
+ * payload, refine these by TTL.
24
+ */
25
+
26
+ /** Models supported by the managed-agents surface for the swarm worker. */
27
+ export const CLAUDE_MANAGED_MODELS = [
28
+ "claude-sonnet-4-6",
29
+ "claude-opus-4-7",
30
+ "claude-haiku-4-5",
31
+ ] as const;
32
+
33
+ export type ClaudeManagedModel = (typeof CLAUDE_MANAGED_MODELS)[number];
34
+
35
+ /** Pricing per million tokens (USD). */
36
+ export interface ClaudeManagedModelPricing {
37
+ /** USD per million uncached input tokens. */
38
+ inputPerMillion: number;
39
+ /** USD per million output tokens. */
40
+ outputPerMillion: number;
41
+ /** USD per million tokens read from prompt cache. */
42
+ cacheReadPerMillion: number;
43
+ /** USD per million tokens written to prompt cache (5-minute TTL). */
44
+ cacheWritePerMillion: number;
45
+ }
46
+
47
+ /**
48
+ * Anthropic public list pricing as of 2026-04-28. Source:
49
+ * https://platform.claude.com/docs/en/about-claude/pricing
50
+ *
51
+ * - claude-sonnet-4-6: $3 / $15 / $0.30 / $3.75 (in / out / cache-read / cache-write)
52
+ * - claude-opus-4-7: $15 / $75 / $1.50 / $18.75
53
+ * - claude-haiku-4-5: $1 / $5 / $0.10 / $1.25
54
+ */
55
+ export const CLAUDE_MANAGED_MODEL_PRICING: Record<ClaudeManagedModel, ClaudeManagedModelPricing> = {
56
+ "claude-sonnet-4-6": {
57
+ inputPerMillion: 3.0,
58
+ outputPerMillion: 15.0,
59
+ cacheReadPerMillion: 0.3,
60
+ cacheWritePerMillion: 3.75,
61
+ },
62
+ "claude-opus-4-7": {
63
+ inputPerMillion: 15.0,
64
+ outputPerMillion: 75.0,
65
+ cacheReadPerMillion: 1.5,
66
+ cacheWritePerMillion: 18.75,
67
+ },
68
+ "claude-haiku-4-5": {
69
+ inputPerMillion: 1.0,
70
+ outputPerMillion: 5.0,
71
+ cacheReadPerMillion: 0.1,
72
+ cacheWritePerMillion: 1.25,
73
+ },
74
+ };
75
+
76
+ /**
77
+ * Models we've already warned about — keeps `console.warn` from spamming the
78
+ * worker logs when an old session keeps replaying through `span.model_request_end`
79
+ * events with an unrecognized model string.
80
+ */
81
+ const warnedUnknownModels = new Set<string>();
82
+
83
+ /**
84
+ * Compute USD cost for one Claude managed-agents session, given the SDK's
85
+ * accumulated token counts.
86
+ *
87
+ * Returns `0` (with a deduplicated `console.warn`) for unknown model strings —
88
+ * we'd rather under-report than make up a number on a typo.
89
+ *
90
+ * Note: the runtime $0.08/session-hour fee is NOT folded in here. The adapter
91
+ * computes that separately because it depends on the session's wallclock
92
+ * `durationMs`, which is provider-state, not token-state.
93
+ */
94
+ export function computeClaudeManagedCostUsd(
95
+ model: string,
96
+ inputTokens: number,
97
+ outputTokens: number,
98
+ cacheReadTokens: number,
99
+ cacheWriteTokens: number,
100
+ ): number {
101
+ const pricing = CLAUDE_MANAGED_MODEL_PRICING[model as ClaudeManagedModel];
102
+ if (!pricing) {
103
+ if (!warnedUnknownModels.has(model)) {
104
+ warnedUnknownModels.add(model);
105
+ console.warn(
106
+ `[claude-managed-models] Unknown model "${model}" — returning $0 cost. ` +
107
+ `Add it to CLAUDE_MANAGED_MODEL_PRICING in src/providers/claude-managed-models.ts.`,
108
+ );
109
+ }
110
+ return 0;
111
+ }
112
+ const inputCost = (inputTokens / 1_000_000) * pricing.inputPerMillion;
113
+ const outputCost = (outputTokens / 1_000_000) * pricing.outputPerMillion;
114
+ const cacheReadCost = (cacheReadTokens / 1_000_000) * pricing.cacheReadPerMillion;
115
+ const cacheWriteCost = (cacheWriteTokens / 1_000_000) * pricing.cacheWritePerMillion;
116
+ return inputCost + outputCost + cacheReadCost + cacheWriteCost;
117
+ }
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Adapter-side swarm lifecycle hooks for the Claude Managed Agents provider.
3
+ *
4
+ * Phase 5 of the managed-agents rollout. Mirrors `codex-swarm-events.ts`:
5
+ * thin wrapper around `swarm-events-shared.ts` that wires the shared
6
+ * throttle/poll/heartbeat scaffolding to the managed-agents cancel pathway.
7
+ *
8
+ * ## Cancel callback semantics
9
+ *
10
+ * When the shared `checkCancelled` poll detects a cancellation, it (1) fires
11
+ * `abortRef.current?.abort()` and (2) invokes our `onCancel` callback. The
12
+ * callback issues `client.beta.sessions.events.send(sessionId, { events:
13
+ * [{ type: "user.interrupt" }] })` followed by
14
+ * `client.beta.sessions.archive(sessionId)`.
15
+ *
16
+ * Both calls are best-effort and `.catch()`-ed: an already-archived or
17
+ * already-terminated session returns errors that we don't want to leak into
18
+ * the event handler (which is supposed to be synchronous and never throw).
19
+ *
20
+ * The adapter's own `abort()` method does the same dance directly — this
21
+ * binding exists so EXTERNAL cancel polls (i.e. another worker process or
22
+ * the runner-side polling layer) can drive the same flow without going
23
+ * through `session.abort()`.
24
+ */
25
+
26
+ import { createSwarmEventHandler, type SwarmEventHandlerOpts } from "./swarm-events-shared";
27
+ import type { ProviderEvent } from "./types";
28
+
29
+ /**
30
+ * Minimal slice of the managed-agents client surface we need to issue
31
+ * `user.interrupt` + archive. Kept as a structural type so this file doesn't
32
+ * have to import the full `ManagedAgentsClient` (and so unit tests can
33
+ * pass in a tiny stub).
34
+ */
35
+ export interface ClaudeManagedCancelClient {
36
+ beta: {
37
+ sessions: {
38
+ archive: (sessionId: string) => unknown;
39
+ events: {
40
+ send: (sessionId: string, params: { events: Array<Record<string, unknown>> }) => unknown;
41
+ };
42
+ };
43
+ };
44
+ }
45
+
46
+ export interface ClaudeManagedSwarmEventHandlerOpts
47
+ extends Omit<SwarmEventHandlerOpts, "onCancel" | "sessionIdFallbackPrefix"> {
48
+ /** The Anthropic SDK client; used to send interrupt + archive on cancel. */
49
+ client: ClaudeManagedCancelClient;
50
+ /** The managed session currently in flight. */
51
+ managedSessionId: string;
52
+ }
53
+
54
+ /**
55
+ * Build a swarm-event handler that drives managed-agents cancel actions on
56
+ * top of the shared throttle/poll/heartbeat scaffolding.
57
+ */
58
+ export function createClaudeManagedSwarmEventHandler(
59
+ opts: ClaudeManagedSwarmEventHandlerOpts,
60
+ ): (event: ProviderEvent) => void {
61
+ const { client, managedSessionId, ...shared } = opts;
62
+
63
+ return createSwarmEventHandler({
64
+ ...shared,
65
+ sessionIdFallbackPrefix: "claude-managed",
66
+ onCancel: () => {
67
+ // Fire-and-forget interrupt; swallow errors (already-archived sessions
68
+ // raise here and we don't want to leak that into the handler).
69
+ void Promise.resolve(
70
+ client.beta.sessions.events.send(managedSessionId, {
71
+ events: [{ type: "user.interrupt" }],
72
+ }),
73
+ ).catch(() => {});
74
+ void Promise.resolve(client.beta.sessions.archive(managedSessionId)).catch(() => {});
75
+ },
76
+ });
77
+ }
@@ -408,6 +408,7 @@ class CodexSession implements ProviderSession {
408
408
  numTurns: this.numTurns,
409
409
  model: this.resolvedModel,
410
410
  isError,
411
+ provider: "codex",
411
412
  };
412
413
  }
413
414
 
@@ -468,7 +469,7 @@ class CodexSession implements ProviderSession {
468
469
  switch (event.type) {
469
470
  case "thread.started": {
470
471
  this._sessionId = event.thread_id;
471
- this.emit({ type: "session_init", sessionId: event.thread_id });
472
+ this.emit({ type: "session_init", sessionId: event.thread_id, provider: "codex" });
472
473
  break;
473
474
  }
474
475
  case "turn.started": {
@@ -742,6 +743,7 @@ class CodexSession implements ProviderSession {
742
743
 
743
744
  export class CodexAdapter implements ProviderAdapter {
744
745
  readonly name = "codex";
746
+ readonly traits = { hasMcp: true, hasLocalEnvironment: true };
745
747
 
746
748
  /**
747
749
  * Optional override for the skill resolver's skills directory. When unset,
@@ -30,6 +30,8 @@ import type { ProviderEvent } from "./types";
30
30
  */
31
31
  const SLASH_COMMAND_REGEX = /^\/([a-z0-9:_-]+)(?:\s+(.*))?$/;
32
32
 
33
+ const MAX_SKILL_CHARS = Number(process.env.MAX_SKILL_CHARS) || 100_000;
34
+
33
35
  /**
34
36
  * Resolve the default skills directory for Codex.
35
37
  *
@@ -104,6 +106,14 @@ export async function resolveCodexPrompt(
104
106
  return prompt;
105
107
  }
106
108
 
109
+ if (skillContent.length > MAX_SKILL_CHARS) {
110
+ emit?.({
111
+ type: "raw_stderr",
112
+ content: `[codex] skill resolver: SKILL.md for /${commandName} exceeds ${MAX_SKILL_CHARS} chars (${skillContent.length}), truncating\n`,
113
+ });
114
+ skillContent = skillContent.slice(0, MAX_SKILL_CHARS);
115
+ }
116
+
107
117
  // Assemble the user-request body: trailing args from the slash line (if any),
108
118
  // plus any subsequent lines from the original prompt. Joined with a newline
109
119
  // so `/work-on-task foo\n\nproceed` becomes `foo\n\nproceed`.
@@ -1,10 +1,12 @@
1
1
  /**
2
2
  * Adapter-side swarm lifecycle hooks for the Codex provider.
3
3
  *
4
- * Phase 5 of the codex provider rollout. Mirrors `pi-mono-extension.ts`
5
- * but adapted to the event-stream model: instead of pi-mono's
6
- * `pi.on("tool_call", ...)` extension API, we attach a single listener to
7
- * `CodexSession.onEvent(...)` and react to normalized `ProviderEvent`s.
4
+ * Phase 5 (managed-agents) extracted the throttle/poll/heartbeat scaffolding
5
+ * into `swarm-events-shared.ts`. This file is now a thin pass-through that
6
+ * preserves the codex-specific public API (`createCodexSwarmEventHandler`,
7
+ * `CodexSwarmEventHandlerOpts`) and adopts the shared implementation
8
+ * verbatim — same throttle constants, same `fireAndForget` semantics, same
9
+ * `try/catch` swallow-everything contract.
8
10
  *
9
11
  * ## Two-layer cancellation
10
12
  *
@@ -17,170 +19,27 @@
17
19
  * `AbortController`. This *accelerates* cancellation latency but does NOT
18
20
  * block tool execution — Codex's SDK lacks a preToolUse blocking hook
19
21
  * (unlike pi-mono's `block: true` return value).
20
- *
21
- * The handler MUST be synchronous from the caller's perspective. Every
22
- * fetch is fire-and-forget with `.catch(() => {})` so a single bad request
23
- * never breaks the session. The handler also never throws — `try/catch`
24
- * around the dispatch swallows everything for safety.
25
22
  */
26
23
 
27
- import { checkToolLoop } from "../hooks/tool-loop-detection";
24
+ import { createSwarmEventHandler, type SwarmEventHandlerOpts } from "./swarm-events-shared";
28
25
  import type { ProviderEvent } from "./types";
29
26
 
30
- export interface CodexSwarmEventHandlerOpts {
31
- apiUrl: string;
32
- apiKey: string;
33
- agentId: string;
34
- /** Task currently being worked on. When null, all task-scoped hooks are no-ops. */
35
- taskId: string | null;
36
- /** Mutable reference to the session's per-turn AbortController. */
37
- abortRef: { current: AbortController | null };
38
- }
39
-
40
- /** Throttle windows (ms) keyed by action name. */
41
- const CANCELLATION_THROTTLE_MS = 500;
42
- const HEARTBEAT_THROTTLE_MS = 5_000;
43
- const ACTIVITY_THROTTLE_MS = 5_000;
44
- const CONTEXT_THROTTLE_MS = 30_000;
45
-
46
- function apiHeaders(opts: CodexSwarmEventHandlerOpts): Record<string, string> {
47
- return {
48
- "Content-Type": "application/json",
49
- Authorization: `Bearer ${opts.apiKey}`,
50
- "X-Agent-ID": opts.agentId,
51
- };
52
- }
53
-
54
- function fireAndForget(url: string, init: RequestInit): void {
55
- void fetch(url, init).catch(() => {});
56
- }
27
+ /**
28
+ * Codex-specific opts. Currently identical to the shared opts modulo the
29
+ * `onCancel` field (which codex doesn't use today). Re-exported so existing
30
+ * call sites (`codex-adapter.ts:303`) keep their import path stable.
31
+ */
32
+ export type CodexSwarmEventHandlerOpts = Omit<
33
+ SwarmEventHandlerOpts,
34
+ "onCancel" | "sessionIdFallbackPrefix"
35
+ >;
57
36
 
58
37
  /** Build the handler. The returned function reacts to normalized events. */
59
38
  export function createCodexSwarmEventHandler(
60
39
  opts: CodexSwarmEventHandlerOpts,
61
40
  ): (event: ProviderEvent) => void {
62
- const lastCall: Record<string, number> = {};
63
- let sessionId: string | undefined;
64
-
65
- const shouldRun = (key: string, throttleMs: number): boolean => {
66
- const now = Date.now();
67
- if (now - (lastCall[key] ?? 0) < throttleMs) return false;
68
- lastCall[key] = now;
69
- return true;
70
- };
71
-
72
- const checkCancelled = (): void => {
73
- const taskId = opts.taskId;
74
- if (!taskId) return;
75
- void (async () => {
76
- try {
77
- const res = await fetch(
78
- `${opts.apiUrl}/cancelled-tasks?taskId=${encodeURIComponent(taskId)}`,
79
- { headers: apiHeaders(opts) },
80
- );
81
- if (!res.ok) return;
82
- const data = (await res.json()) as {
83
- cancelled?: Array<{ id: string; failureReason?: string }>;
84
- };
85
- const isCancelled = data.cancelled?.some((t) => t.id === taskId);
86
- if (isCancelled) {
87
- opts.abortRef.current?.abort();
88
- }
89
- } catch {
90
- // Swallow — fire-and-forget.
91
- }
92
- })();
93
- };
94
-
95
- const checkLoop = (toolName: string, args: unknown): void => {
96
- const taskId = opts.taskId;
97
- if (!taskId) return;
98
- const argRecord = args && typeof args === "object" ? (args as Record<string, unknown>) : {};
99
- void checkToolLoop(taskId, toolName, argRecord)
100
- .then((result) => {
101
- if (result.blocked) {
102
- opts.abortRef.current?.abort();
103
- }
104
- })
105
- .catch(() => {});
106
- };
107
-
108
- return (event: ProviderEvent): void => {
109
- try {
110
- switch (event.type) {
111
- case "session_init": {
112
- sessionId = event.sessionId;
113
- break;
114
- }
115
- case "tool_start": {
116
- if (shouldRun("cancellation", CANCELLATION_THROTTLE_MS)) {
117
- checkCancelled();
118
- }
119
- checkLoop(event.toolName, event.args);
120
- if (opts.taskId && shouldRun("heartbeat", HEARTBEAT_THROTTLE_MS)) {
121
- fireAndForget(
122
- `${opts.apiUrl}/api/active-sessions/heartbeat/${encodeURIComponent(opts.taskId)}`,
123
- { method: "PUT", headers: apiHeaders(opts) },
124
- );
125
- }
126
- if (shouldRun("activity", ACTIVITY_THROTTLE_MS)) {
127
- fireAndForget(
128
- `${opts.apiUrl}/api/agents/${encodeURIComponent(opts.agentId)}/activity`,
129
- { method: "PUT", headers: apiHeaders(opts) },
130
- );
131
- }
132
- break;
133
- }
134
- case "context_usage": {
135
- if (opts.taskId && shouldRun("context-progress", CONTEXT_THROTTLE_MS)) {
136
- fireAndForget(`${opts.apiUrl}/api/tasks/${encodeURIComponent(opts.taskId)}/context`, {
137
- method: "POST",
138
- headers: apiHeaders(opts),
139
- body: JSON.stringify({
140
- eventType: "progress",
141
- sessionId: sessionId ?? `codex-${opts.taskId}`,
142
- contextUsedTokens: event.contextUsedTokens,
143
- contextTotalTokens: event.contextTotalTokens,
144
- contextPercent: event.contextPercent,
145
- }),
146
- });
147
- }
148
- break;
149
- }
150
- case "compaction": {
151
- if (opts.taskId) {
152
- fireAndForget(`${opts.apiUrl}/api/tasks/${encodeURIComponent(opts.taskId)}/context`, {
153
- method: "POST",
154
- headers: apiHeaders(opts),
155
- body: JSON.stringify({
156
- eventType: "compaction",
157
- sessionId: sessionId ?? `codex-${opts.taskId}`,
158
- contextTotalTokens: event.contextTotalTokens,
159
- preCompactTokens: event.preCompactTokens,
160
- compactTrigger: event.compactTrigger,
161
- }),
162
- });
163
- }
164
- break;
165
- }
166
- case "result": {
167
- // Final completion context event mirrors pi-mono's session_shutdown
168
- // POST. The runner separately calls `/api/tasks/{id}/finish`.
169
- if (opts.taskId) {
170
- fireAndForget(`${opts.apiUrl}/api/tasks/${encodeURIComponent(opts.taskId)}/context`, {
171
- method: "POST",
172
- headers: apiHeaders(opts),
173
- body: JSON.stringify({
174
- eventType: "completion",
175
- sessionId: sessionId ?? `codex-${opts.taskId}`,
176
- }),
177
- });
178
- }
179
- break;
180
- }
181
- }
182
- } catch {
183
- // Never throw from the handler — the event loop is hot.
184
- }
185
- };
41
+ return createSwarmEventHandler({
42
+ ...opts,
43
+ sessionIdFallbackPrefix: "codex",
44
+ });
186
45
  }