@desplega.ai/agent-swarm 1.71.2 → 1.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +3 -2
  2. package/openapi.json +994 -62
  3. package/package.json +2 -1
  4. package/src/be/budget-admission.ts +121 -0
  5. package/src/be/budget-refusal-notify.ts +145 -0
  6. package/src/be/db.ts +488 -5
  7. package/src/be/migrations/044_provider_meta.sql +2 -0
  8. package/src/be/migrations/046_budgets_and_pricing.sql +87 -0
  9. package/src/be/migrations/047_session_costs_cost_source.sql +16 -0
  10. package/src/cli.tsx +22 -1
  11. package/src/commands/claude-managed-setup.ts +687 -0
  12. package/src/commands/codex-login.ts +1 -1
  13. package/src/commands/runner.ts +175 -28
  14. package/src/commands/templates.ts +10 -6
  15. package/src/http/budgets.ts +219 -0
  16. package/src/http/index.ts +6 -0
  17. package/src/http/integrations.ts +134 -0
  18. package/src/http/poll.ts +161 -3
  19. package/src/http/pricing.ts +245 -0
  20. package/src/http/session-data.ts +54 -6
  21. package/src/http/tasks.ts +23 -2
  22. package/src/prompts/base-prompt.ts +103 -73
  23. package/src/prompts/session-templates.ts +43 -0
  24. package/src/providers/claude-adapter.ts +3 -1
  25. package/src/providers/claude-managed-adapter.ts +871 -0
  26. package/src/providers/claude-managed-models.ts +117 -0
  27. package/src/providers/claude-managed-swarm-events.ts +77 -0
  28. package/src/providers/codex-adapter.ts +3 -1
  29. package/src/providers/codex-skill-resolver.ts +10 -0
  30. package/src/providers/codex-swarm-events.ts +20 -161
  31. package/src/providers/devin-adapter.ts +894 -0
  32. package/src/providers/devin-api.ts +207 -0
  33. package/src/providers/devin-playbooks.ts +91 -0
  34. package/src/providers/devin-skill-resolver.ts +113 -0
  35. package/src/providers/index.ts +10 -1
  36. package/src/providers/pi-mono-adapter.ts +3 -1
  37. package/src/providers/swarm-events-shared.ts +262 -0
  38. package/src/providers/types.ts +26 -1
  39. package/src/tests/base-prompt.test.ts +199 -0
  40. package/src/tests/budget-admission.test.ts +339 -0
  41. package/src/tests/budget-claim-gate.test.ts +288 -0
  42. package/src/tests/budget-refusal-notification.test.ts +324 -0
  43. package/src/tests/budgets-routes.test.ts +331 -0
  44. package/src/tests/claude-managed-adapter.test.ts +1301 -0
  45. package/src/tests/claude-managed-setup.test.ts +325 -0
  46. package/src/tests/devin-adapter.test.ts +677 -0
  47. package/src/tests/devin-api.test.ts +339 -0
  48. package/src/tests/integrations-http.test.ts +211 -0
  49. package/src/tests/migration-046-budgets.test.ts +327 -0
  50. package/src/tests/pricing-routes.test.ts +315 -0
  51. package/src/tests/prompt-template-remaining.test.ts +4 -0
  52. package/src/tests/prompt-template-session.test.ts +2 -2
  53. package/src/tests/provider-adapter.test.ts +1 -1
  54. package/src/tests/runner-budget-refused.test.ts +271 -0
  55. package/src/tests/session-costs-codex-recompute.test.ts +386 -0
  56. package/src/tools/poll-task.ts +13 -2
  57. package/src/tools/task-action.ts +92 -2
  58. package/src/tools/templates.ts +29 -0
  59. package/src/types.ts +116 -0
  60. package/src/utils/budget-backoff.ts +34 -0
  61. package/src/utils/credentials.ts +4 -0
  62. package/src/utils/provider-metadata.ts +9 -0
@@ -0,0 +1,871 @@
1
+ /**
2
+ * ClaudeManagedAdapter — harness provider for Anthropic's Managed Agents
3
+ * (`@anthropic-ai/sdk` beta surface).
4
+ *
5
+ * **Phase 3 status**: real session lifecycle. `createSession` opens (or resumes)
6
+ * a managed session, sends the composed user message, streams `events.stream`
7
+ * SSE events, and translates them into the cross-provider `ProviderEvent` union.
8
+ * `abort()` interrupts + archives. `canResume()` retrieves the session and
9
+ * inspects its status.
10
+ *
11
+ * Reference: thoughts/taras/plans/2026-04-28-claude-managed-agents-provider.md
12
+ *
13
+ * ### SDK shape notes (verified against
14
+ * `node_modules/@anthropic-ai/sdk/resources/beta/sessions/*.d.ts` on
15
+ * `@anthropic-ai/sdk@latest` (post-bump)):
16
+ *
17
+ * - Event types are dot-separated tags: `agent.message`, `agent.tool_use`,
18
+ * `agent.mcp_tool_use`, `agent.tool_result`, `agent.mcp_tool_result`,
19
+ * `agent.thinking`, `agent.thread_context_compacted`, `span.model_request_end`,
20
+ * `session.status_running`, `session.status_idle`, `session.status_terminated`,
21
+ * `session.error`, `session.deleted`, etc.
22
+ * - Session status field is `'rescheduling' | 'running' | 'idle' | 'terminated'`.
23
+ * "Archived" is not a status — it's signaled by `archived_at !== null`.
24
+ * `canResume` therefore rejects only `terminated` sessions and sessions whose
25
+ * `archived_at` is non-null.
26
+ * - `events.send` takes `{ events: [...] }` — an array, NOT a single event arg.
27
+ * - `events.stream` returns a `Stream<BetaManagedAgentsStreamSessionEvents>`
28
+ * which is an `AsyncIterable`.
29
+ * - `events.list` returns a `PagePromise` which is also `AsyncIterable` over
30
+ * `BetaManagedAgentsSessionEvent`.
31
+ * - `BetaManagedAgentsTextBlock` does NOT have a `cache_control` field in its
32
+ * TS definition — but the API does honor it (per the prompt-caching beta).
33
+ * We attach it via a typed extension and cast on the way out so the runtime
34
+ * payload includes it; the type-level `cache_control` annotation is captured
35
+ * in `BetaManagedAgentsTextBlock`.
36
+ */
37
+
38
+ import Anthropic from "@anthropic-ai/sdk";
39
+ // Real type usages (the Phase 1 imports become non-decorative as of this phase).
40
+ import type { BetaManagedAgentsAgent as Agent } from "@anthropic-ai/sdk/resources/beta/agents";
41
+ import type { BetaEnvironment as Environment } from "@anthropic-ai/sdk/resources/beta/environments";
42
+ import type {
43
+ BetaManagedAgentsAgentMCPToolResultEvent,
44
+ BetaManagedAgentsAgentMCPToolUseEvent,
45
+ BetaManagedAgentsAgentMessageEvent,
46
+ BetaManagedAgentsAgentThinkingEvent,
47
+ BetaManagedAgentsAgentThreadContextCompactedEvent,
48
+ BetaManagedAgentsAgentToolResultEvent,
49
+ BetaManagedAgentsAgentToolUseEvent,
50
+ BetaManagedAgentsSessionErrorEvent,
51
+ BetaManagedAgentsSessionStatusIdleEvent,
52
+ BetaManagedAgentsSessionStatusTerminatedEvent,
53
+ BetaManagedAgentsSpanModelRequestEndEvent,
54
+ BetaManagedAgentsStreamSessionEvents,
55
+ BetaManagedAgentsTextBlock,
56
+ BetaManagedAgentsSession as Session,
57
+ BetaManagedAgentsSessionEvent as SessionEvent,
58
+ } from "@anthropic-ai/sdk/resources/beta/sessions";
59
+ import type { SkillCreateResponse as Skill } from "@anthropic-ai/sdk/resources/beta/skills";
60
+
61
+ import { checkToolLoop } from "../hooks/tool-loop-detection";
62
+ import { scrubSecrets } from "../utils/secret-scrubber";
63
+ import { computeClaudeManagedCostUsd } from "./claude-managed-models";
64
+ import { createClaudeManagedSwarmEventHandler } from "./claude-managed-swarm-events";
65
+ import type {
66
+ CostData,
67
+ ProviderAdapter,
68
+ ProviderEvent,
69
+ ProviderResult,
70
+ ProviderSession,
71
+ ProviderSessionConfig,
72
+ } from "./types";
73
+
74
+ // Re-export the type aliases at module level so adjacent files / tests can use
75
+ // the short names without re-discovering the long Beta-prefixed ones. Kept on
76
+ // `void` lines so unused-import lints stay quiet for the type imports above.
77
+ void (null as unknown as Agent);
78
+ void (null as unknown as Environment);
79
+ void (null as unknown as Skill);
80
+
81
+ /**
82
+ * Required env vars validated at construction time. Listing them in one place
83
+ * keeps the error messages consistent and makes it easy for Phase 2 (worker
84
+ * bootstrap / docker-entrypoint) to mirror the validation.
85
+ */
86
+ const REQUIRED_ENV_VARS = [
87
+ "ANTHROPIC_API_KEY",
88
+ "MANAGED_AGENT_ID",
89
+ "MANAGED_ENVIRONMENT_ID",
90
+ ] as const;
91
+
92
+ /**
93
+ * Default context window for managed Claude sessions when we don't have a
94
+ * model-specific override. Sized to match Sonnet 4.x (1M extended-context
95
+ * variant). The Phase 4 pricing-table commit will replace this with a
96
+ * per-model lookup.
97
+ */
98
+ const DEFAULT_CONTEXT_TOTAL_TOKENS = 1_000_000;
99
+
100
+ /**
101
+ * Compose the per-session user-message content blocks. Returns two blocks:
102
+ *
103
+ * 1. A static prefix — agent identity + composed system prompt. Must be
104
+ * byte-identical across two different `config` inputs that share the
105
+ * same `agentId` so the managed-agents service can dedupe / cache it
106
+ * server-side.
107
+ * 2. The per-task body — `User request:\n${config.prompt}`.
108
+ *
109
+ * Exported (named) so unit tests can assert the static-prefix invariant.
110
+ *
111
+ * NOTE: An earlier revision attached `cache_control: { type: "ephemeral" }`
112
+ * to block #1 to manually mark a prompt-cache breakpoint, but the
113
+ * managed-agents `events.send` endpoint rejects unknown fields with
114
+ * `events.0.content.0.cache_control: Extra inputs are not permitted`.
115
+ * Caching is handled server-side; we only control the static-prefix shape.
116
+ */
117
+ export function composeManagedUserMessage(
118
+ config: Pick<ProviderSessionConfig, "agentId" | "systemPrompt" | "prompt">,
119
+ ): BetaManagedAgentsTextBlock[] {
120
+ const staticPrefix = `[swarm worker] agentId=${config.agentId}\n\n` + `${config.systemPrompt}`;
121
+
122
+ return [
123
+ {
124
+ type: "text",
125
+ text: staticPrefix,
126
+ },
127
+ {
128
+ type: "text",
129
+ text: `---\n\nUser request:\n${config.prompt}`,
130
+ },
131
+ ];
132
+ }
133
+
134
+ /**
135
+ * Normalize the runner's `vcsRepo` (which may be `"owner/repo"` shorthand or
136
+ * a fully-qualified `https://...` URL — see `src/types.ts:136` and
137
+ * `src/commands/runner.ts:3185-3192`) to a fully-qualified GitHub HTTPS URL,
138
+ * which is what the managed-agents `BetaManagedAgentsGitHubRepositoryResourceParams.url`
139
+ * field expects. Pass-through if already a URL.
140
+ */
141
+ export function normalizeRepoUrl(vcsRepo: string): string {
142
+ if (vcsRepo.startsWith("http://") || vcsRepo.startsWith("https://")) {
143
+ return vcsRepo;
144
+ }
145
+ return `https://github.com/${vcsRepo}`;
146
+ }
147
+
148
+ /**
149
+ * Build the empty-zero `CostData` shape used at the start of a session and
150
+ * mutated in-place as `span.model_request_end` events accumulate token counts.
151
+ */
152
+ function emptyCost(config: ProviderSessionConfig, model: string): CostData {
153
+ return {
154
+ sessionId: "",
155
+ taskId: config.taskId,
156
+ agentId: config.agentId,
157
+ totalCostUsd: 0,
158
+ inputTokens: 0,
159
+ outputTokens: 0,
160
+ cacheReadTokens: 0,
161
+ cacheWriteTokens: 0,
162
+ durationMs: 0,
163
+ numTurns: 0,
164
+ model,
165
+ isError: false,
166
+ };
167
+ }
168
+
169
+ /**
170
+ * Subset of the Anthropic client surface this adapter consumes. Captured as an
171
+ * interface so unit tests can substitute a small fake without dragging in the
172
+ * full SDK.
173
+ */
174
+ export interface ManagedAgentsClient {
175
+ beta: {
176
+ sessions: {
177
+ create: (params: Record<string, unknown>) => Promise<Session> | Session;
178
+ retrieve: (sessionId: string) => Promise<Session> | Session;
179
+ archive: (sessionId: string) => Promise<Session> | Session;
180
+ events: {
181
+ stream: (
182
+ sessionId: string,
183
+ ) =>
184
+ | Promise<AsyncIterable<BetaManagedAgentsStreamSessionEvents>>
185
+ | AsyncIterable<BetaManagedAgentsStreamSessionEvents>;
186
+ send: (
187
+ sessionId: string,
188
+ params: { events: Array<Record<string, unknown>> },
189
+ ) => Promise<unknown>;
190
+ list: (
191
+ sessionId: string,
192
+ ) => Promise<AsyncIterable<SessionEvent>> | AsyncIterable<SessionEvent>;
193
+ };
194
+ };
195
+ };
196
+ }
197
+
198
+ /**
199
+ * Running session backed by a managed-agents `Session`. Mirrors `CodexSession`:
200
+ * owns the SSE consumer, the JSONL log file handle, the abort controller, the
201
+ * cost accumulator, and the listener buffer.
202
+ */
203
+ class ClaudeManagedSession implements ProviderSession {
204
+ private readonly client: ManagedAgentsClient;
205
+ private readonly _sessionId: string;
206
+ private readonly userMessageContent: BetaManagedAgentsTextBlock[] | null;
207
+ private readonly listeners: Array<(event: ProviderEvent) => void> = [];
208
+ private readonly eventQueue: ProviderEvent[] = [];
209
+ private readonly logFileHandle: ReturnType<ReturnType<typeof Bun.file>["writer"]>;
210
+ private readonly startedAt = Date.now();
211
+ private readonly completionPromise: Promise<ProviderResult>;
212
+ private resolveCompletion!: (result: ProviderResult) => void;
213
+ private readonly abortController = new AbortController();
214
+ private readonly seenEventIds: Set<string>;
215
+ private readonly cost: CostData;
216
+ /** Per-task taskId — captured for `checkToolLoop` lookups. */
217
+ private readonly taskId: string | null;
218
+ private aborted = false;
219
+ private settled = false;
220
+
221
+ constructor(
222
+ client: ManagedAgentsClient,
223
+ sessionId: string,
224
+ config: ProviderSessionConfig,
225
+ userMessageContent: BetaManagedAgentsTextBlock[] | null,
226
+ seenEventIds: Set<string> = new Set(),
227
+ ) {
228
+ this.client = client;
229
+ this._sessionId = sessionId;
230
+ this.userMessageContent = userMessageContent;
231
+ this.seenEventIds = seenEventIds;
232
+ this.cost = emptyCost(config, config.model);
233
+ this.taskId = config.taskId;
234
+ this.logFileHandle = Bun.file(config.logFile).writer();
235
+ this.completionPromise = new Promise<ProviderResult>((resolve) => {
236
+ this.resolveCompletion = resolve;
237
+ });
238
+
239
+ // Phase 5: adapter-side swarm hooks. Lower-latency cancellation poll,
240
+ // tool-loop detection (the handler also calls `checkToolLoop` on
241
+ // tool_start; we additionally call it inline below for the blocked-result
242
+ // emit), heartbeat, activity ping, and context-usage reporting. Skipped
243
+ // when there's no task or API context to talk to.
244
+ if (config.taskId && config.apiUrl && config.apiKey) {
245
+ const abortRef = { current: this.abortController };
246
+ const handler = createClaudeManagedSwarmEventHandler({
247
+ apiUrl: config.apiUrl,
248
+ apiKey: config.apiKey,
249
+ agentId: config.agentId,
250
+ taskId: config.taskId,
251
+ abortRef,
252
+ client: this.client,
253
+ managedSessionId: this._sessionId,
254
+ });
255
+ this.listeners.push(handler);
256
+ }
257
+
258
+ // Kick the SSE loop asynchronously so the constructor can return.
259
+ void this.runSession();
260
+ }
261
+
262
+ get sessionId(): string | undefined {
263
+ return this._sessionId;
264
+ }
265
+
266
+ onEvent(listener: (event: ProviderEvent) => void): void {
267
+ this.listeners.push(listener);
268
+ for (const event of this.eventQueue) {
269
+ try {
270
+ listener(event);
271
+ } catch {
272
+ // Bad listener must not kill the session.
273
+ }
274
+ }
275
+ this.eventQueue.length = 0;
276
+ }
277
+
278
+ async waitForCompletion(): Promise<ProviderResult> {
279
+ return this.completionPromise;
280
+ }
281
+
282
+ /**
283
+ * Idempotent abort. Sets the local flag, fires the abort controller (which
284
+ * unblocks any awaiting SDK calls), then sends `user.interrupt` and archives
285
+ * the managed session out-of-band — the SSE loop's catch path emits the
286
+ * terminal `result` event and settles the completion promise.
287
+ */
288
+ async abort(): Promise<void> {
289
+ if (this.aborted) return;
290
+ this.aborted = true;
291
+ this.abortController.abort();
292
+ // Fire-and-forget interrupt + archive. We don't block the caller on these
293
+ // round-trips; the SSE loop (or its catch path) settles the promise.
294
+ void this.client.beta.sessions.events
295
+ .send(this._sessionId, {
296
+ events: [{ type: "user.interrupt" }],
297
+ })
298
+ .catch(() => {
299
+ // Already-archived / already-terminated sessions return errors here.
300
+ // Swallow — the cancel intent is recorded in `aborted`.
301
+ });
302
+ void Promise.resolve(this.client.beta.sessions.archive(this._sessionId)).catch(() => {
303
+ // Same — best-effort.
304
+ });
305
+ }
306
+
307
+ /**
308
+ * Central event emit — runs `scrubSecrets` over `raw_log`/`raw_stderr`
309
+ * content before any egress (log file write OR listener dispatch). Mirrors
310
+ * `CodexSession.emit` (codex-adapter.ts:347-374). Kept private; this class
311
+ * is the only emitter.
312
+ */
313
+ private emit(event: ProviderEvent): void {
314
+ const scrubbed: ProviderEvent =
315
+ event.type === "raw_log" || event.type === "raw_stderr"
316
+ ? { ...event, content: scrubSecrets(event.content) }
317
+ : event;
318
+ try {
319
+ this.logFileHandle.write(
320
+ `${JSON.stringify({ ...scrubbed, timestamp: new Date().toISOString() })}\n`,
321
+ );
322
+ } catch {
323
+ // Log writer failure must not break the event stream.
324
+ }
325
+ if (this.listeners.length > 0) {
326
+ for (const listener of this.listeners) {
327
+ try {
328
+ listener(scrubbed);
329
+ } catch {
330
+ // Swallow listener errors.
331
+ }
332
+ }
333
+ } else {
334
+ this.eventQueue.push(scrubbed);
335
+ }
336
+ }
337
+
338
+ private settle(result: ProviderResult): void {
339
+ if (this.settled) return;
340
+ this.settled = true;
341
+ this.resolveCompletion(result);
342
+ }
343
+
344
+ /**
345
+ * Build the terminal `CostData` snapshot.
346
+ *
347
+ * Phase 4 wires real USD pricing:
348
+ * 1. Per-token cost via `computeClaudeManagedCostUsd` (looks up the per-Mtok
349
+ * rates in `claude-managed-models.ts`).
350
+ * 2. Anthropic's $0.08/session-hour runtime fee — billed continuously by
351
+ * Anthropic regardless of model usage, so we add it here to surface in
352
+ * the swarm's per-session cost UI.
353
+ */
354
+ private snapshotCost(isError: boolean): CostData {
355
+ const durationMs = Date.now() - this.startedAt;
356
+ const tokenCostUsd = computeClaudeManagedCostUsd(
357
+ this.cost.model,
358
+ this.cost.inputTokens ?? 0,
359
+ this.cost.outputTokens ?? 0,
360
+ this.cost.cacheReadTokens ?? 0,
361
+ this.cost.cacheWriteTokens ?? 0,
362
+ );
363
+ // $0.08 / session-hour. Sandbox runtime is billed by wallclock, so we
364
+ // amortize linearly across the session's `durationMs`.
365
+ const runtimeFeeUsd = (durationMs / 3_600_000) * 0.08;
366
+ return {
367
+ ...this.cost,
368
+ durationMs,
369
+ isError,
370
+ totalCostUsd: tokenCostUsd + runtimeFeeUsd,
371
+ };
372
+ }
373
+
374
+ /**
375
+ * Tool-loop detection: fires asynchronously alongside each `tool_start`
376
+ * emit. If `checkToolLoop` reports `blocked: true`, we surface the reason
377
+ * via `raw_stderr` and trigger `abortController.abort()` — the SSE loop's
378
+ * AbortError catch path emits the cancelled `result` and settles.
379
+ *
380
+ * Made non-blocking so the SSE for-await loop stays synchronous in the hot
381
+ * path. Errors from `checkToolLoop` (file I/O on `/tmp`) are swallowed —
382
+ * loop detection failure must never kill a real session.
383
+ */
384
+ private runToolLoopCheck(toolName: string, args: unknown): void {
385
+ if (!this.taskId) return;
386
+ const argRecord = args && typeof args === "object" ? (args as Record<string, unknown>) : {};
387
+ void checkToolLoop(this.taskId, toolName, argRecord)
388
+ .then((result) => {
389
+ if (result.blocked) {
390
+ this.emit({
391
+ type: "raw_stderr",
392
+ content: `[claude-managed] Tool-loop detection blocked further calls: ${result.reason ?? "(no reason given)"}\n`,
393
+ });
394
+ this.abortController.abort();
395
+ }
396
+ })
397
+ .catch(() => {});
398
+ }
399
+
400
+ /**
401
+ * Translate one Anthropic SSE event into zero-or-more `ProviderEvent`s.
402
+ * Returns `true` if the event was a terminal session-status (idle /
403
+ * terminated) — caller breaks the SSE loop on `true`.
404
+ */
405
+ private handleEvent(event: BetaManagedAgentsStreamSessionEvents): {
406
+ terminal: boolean;
407
+ isError: boolean;
408
+ assistantText?: string;
409
+ } {
410
+ // Always raw-log first (mirrors codex-adapter.ts:467).
411
+ this.emit({ type: "raw_log", content: JSON.stringify(event) });
412
+
413
+ let assistantText: string | undefined;
414
+
415
+ switch (event.type) {
416
+ case "agent.message": {
417
+ const msg = event as BetaManagedAgentsAgentMessageEvent;
418
+ const text = msg.content.map((block) => block.text).join("");
419
+ if (text) {
420
+ this.emit({ type: "message", role: "assistant", content: text });
421
+ assistantText = text;
422
+ }
423
+ return { terminal: false, isError: false, assistantText };
424
+ }
425
+ case "agent.tool_use": {
426
+ const tu = event as BetaManagedAgentsAgentToolUseEvent;
427
+ this.runToolLoopCheck(tu.name, tu.input);
428
+ this.emit({
429
+ type: "tool_start",
430
+ toolCallId: tu.id,
431
+ toolName: tu.name,
432
+ args: tu.input,
433
+ });
434
+ return { terminal: false, isError: false };
435
+ }
436
+ case "agent.mcp_tool_use": {
437
+ const tu = event as BetaManagedAgentsAgentMCPToolUseEvent;
438
+ const fqToolName = `${tu.mcp_server_name}:${tu.name}`;
439
+ this.runToolLoopCheck(fqToolName, tu.input);
440
+ this.emit({
441
+ type: "tool_start",
442
+ toolCallId: tu.id,
443
+ toolName: fqToolName,
444
+ args: tu.input,
445
+ });
446
+ return { terminal: false, isError: false };
447
+ }
448
+ case "agent.tool_result": {
449
+ const tr = event as BetaManagedAgentsAgentToolResultEvent;
450
+ this.emit({
451
+ type: "tool_end",
452
+ toolCallId: tr.tool_use_id,
453
+ // We don't have the tool name on the result event itself — the
454
+ // dashboard already keys off `toolCallId` to pair start/end, so
455
+ // passing through an empty string here is fine.
456
+ toolName: "",
457
+ result: { content: tr.content ?? [], isError: tr.is_error ?? false },
458
+ });
459
+ return { terminal: false, isError: false };
460
+ }
461
+ case "agent.mcp_tool_result": {
462
+ const tr = event as BetaManagedAgentsAgentMCPToolResultEvent;
463
+ this.emit({
464
+ type: "tool_end",
465
+ toolCallId: tr.mcp_tool_use_id,
466
+ toolName: "",
467
+ result: { content: tr.content ?? [], isError: tr.is_error ?? false },
468
+ });
469
+ return { terminal: false, isError: false };
470
+ }
471
+ case "agent.thinking": {
472
+ const th = event as BetaManagedAgentsAgentThinkingEvent;
473
+ this.emit({
474
+ type: "custom",
475
+ name: "claude-managed.thinking",
476
+ data: { id: th.id, processedAt: th.processed_at },
477
+ });
478
+ return { terminal: false, isError: false };
479
+ }
480
+ case "agent.thread_context_compacted": {
481
+ // The SDK doesn't currently expose pre/post-compact token counts on
482
+ // this event. Emit a `compaction` ProviderEvent with the values we
483
+ // *do* know; consumers that need richer data can subscribe to
484
+ // `raw_log` for the original payload.
485
+ const _cc = event as BetaManagedAgentsAgentThreadContextCompactedEvent;
486
+ this.emit({
487
+ type: "compaction",
488
+ preCompactTokens: this.cost.inputTokens ?? 0,
489
+ compactTrigger: "auto",
490
+ contextTotalTokens: DEFAULT_CONTEXT_TOTAL_TOKENS,
491
+ });
492
+ return { terminal: false, isError: false };
493
+ }
494
+ case "span.model_request_end": {
495
+ const sp = event as BetaManagedAgentsSpanModelRequestEndEvent;
496
+ const usage = sp.model_usage;
497
+ this.cost.inputTokens = (this.cost.inputTokens ?? 0) + usage.input_tokens;
498
+ this.cost.outputTokens = (this.cost.outputTokens ?? 0) + usage.output_tokens;
499
+ this.cost.cacheReadTokens =
500
+ (this.cost.cacheReadTokens ?? 0) + usage.cache_read_input_tokens;
501
+ this.cost.cacheWriteTokens =
502
+ (this.cost.cacheWriteTokens ?? 0) + usage.cache_creation_input_tokens;
503
+ this.cost.numTurns += 1;
504
+
505
+ const used = (this.cost.inputTokens ?? 0) + (this.cost.outputTokens ?? 0);
506
+ const total = DEFAULT_CONTEXT_TOTAL_TOKENS;
507
+ this.emit({
508
+ type: "context_usage",
509
+ contextUsedTokens: used,
510
+ contextTotalTokens: total,
511
+ contextPercent: Math.min(100, (used / total) * 100),
512
+ outputTokens: this.cost.outputTokens ?? 0,
513
+ });
514
+ return { terminal: false, isError: false };
515
+ }
516
+ case "session.status_running":
517
+ case "session.status_rescheduled":
518
+ case "span.model_request_start":
519
+ case "session.deleted":
520
+ case "user.message":
521
+ case "user.interrupt":
522
+ case "user.tool_confirmation":
523
+ case "user.custom_tool_result":
524
+ case "agent.custom_tool_use": {
525
+ // No-op for Phase 3. Future phases may surface these as `progress`
526
+ // events (the dashboard tracks status transitions today via
527
+ // `session_init` + `result` only).
528
+ return { terminal: false, isError: false };
529
+ }
530
+ case "session.error": {
531
+ const se = event as BetaManagedAgentsSessionErrorEvent;
532
+ this.emit({
533
+ type: "error",
534
+ message: se.error.message,
535
+ category: "managed_agent_error",
536
+ });
537
+ // Only the `terminal` retry status fully kills the session. Other
538
+ // states (`retrying`, `exhausted`) are non-fatal; we let the stream
539
+ // continue and rely on `status_terminated` / `status_idle` for the
540
+ // terminal hand-off.
541
+ const fatal = se.error.retry_status?.type === "terminal";
542
+ return { terminal: fatal, isError: true };
543
+ }
544
+ case "session.status_terminated": {
545
+ const _t = event as BetaManagedAgentsSessionStatusTerminatedEvent;
546
+ return { terminal: true, isError: true };
547
+ }
548
+ case "session.status_idle": {
549
+ const _i = event as BetaManagedAgentsSessionStatusIdleEvent;
550
+ return { terminal: true, isError: false };
551
+ }
552
+ default: {
553
+ // SDK occasionally adds new event variants. Surface the unknown via
554
+ // raw_log only — already done at the top of the function.
555
+ return { terminal: false, isError: false };
556
+ }
557
+ }
558
+ }
559
+
560
+ /**
561
+ * The SSE consumer. Opens the stream BEFORE sending the user message so we
562
+ * never miss the agent's response (race-safe ordering, per the quickstart
563
+ * docs: https://platform.claude.com/docs/en/managed-agents/quickstart).
564
+ */
565
+ private async runSession(): Promise<void> {
566
+ let lastAssistantText: string | undefined;
567
+ let saw_terminal = false;
568
+ let isError = false;
569
+
570
+ try {
571
+ // 1. Open the stream first.
572
+ const stream = await Promise.resolve(
573
+ this.client.beta.sessions.events.stream(this._sessionId),
574
+ );
575
+
576
+ // 2. Send the user message (skipped on resume — `userMessageContent`
577
+ // is null then).
578
+ if (this.userMessageContent) {
579
+ await this.client.beta.sessions.events.send(this._sessionId, {
580
+ events: [
581
+ {
582
+ type: "user.message",
583
+ content: this.userMessageContent as unknown as Record<string, unknown>[],
584
+ },
585
+ ],
586
+ });
587
+ }
588
+
589
+ // 3. Emit `session_init` once the session is wired up. Listeners
590
+ // attached via `onEvent` will see this either immediately (if they
591
+ // attached pre-emit) or via the queue flush.
592
+ this.emit({
593
+ type: "session_init",
594
+ sessionId: this._sessionId,
595
+ provider: "claude" as const,
596
+ providerMeta: { managed: true },
597
+ });
598
+
599
+ // 4. Drain the SSE stream.
600
+ try {
601
+ for await (const event of stream) {
602
+ // Phase 5: external abort (swarm-events poll, tool-loop detection)
603
+ // can fire `abortController.abort()` without crashing the SSE
604
+ // stream. Bail proactively so the cancel path runs.
605
+ if (this.abortController.signal.aborted) {
606
+ throw Object.assign(new Error("aborted"), { name: "AbortError" });
607
+ }
608
+ // Resume dedup: skip events we already saw via `events.list`.
609
+ if (this.seenEventIds.size > 0 && "id" in event && event.id) {
610
+ if (this.seenEventIds.has(event.id)) {
611
+ continue;
612
+ }
613
+ this.seenEventIds.add(event.id);
614
+ }
615
+ const out = this.handleEvent(event);
616
+ if (out.assistantText) {
617
+ lastAssistantText = out.assistantText;
618
+ }
619
+ if (out.terminal) {
620
+ saw_terminal = true;
621
+ isError = out.isError;
622
+ break;
623
+ }
624
+ }
625
+ } catch (err) {
626
+ if (
627
+ this.aborted ||
628
+ this.abortController.signal.aborted ||
629
+ (err instanceof Error && err.name === "AbortError")
630
+ ) {
631
+ // Cancellation path — the abort controller fired and we crashed
632
+ // out of the for-await. Emit the cancelled `result` and return.
633
+ const cost = this.snapshotCost(true);
634
+ this.emit({
635
+ type: "result",
636
+ cost,
637
+ isError: true,
638
+ errorCategory: "cancelled",
639
+ });
640
+ this.settle({
641
+ exitCode: 130,
642
+ sessionId: this._sessionId,
643
+ cost,
644
+ isError: true,
645
+ failureReason: "cancelled",
646
+ });
647
+ return;
648
+ }
649
+ throw err;
650
+ }
651
+
652
+ // 5. Stream-broken-without-terminal path: surface as an error so the
653
+ // runner knows the run wasn't a clean idle-completion.
654
+ if (!saw_terminal) {
655
+ const cost = this.snapshotCost(true);
656
+ this.emit({
657
+ type: "error",
658
+ message: "Managed-agents SSE stream ended without a terminal status event.",
659
+ category: "stream_ended",
660
+ });
661
+ this.emit({
662
+ type: "result",
663
+ cost,
664
+ isError: true,
665
+ errorCategory: "stream_ended",
666
+ });
667
+ this.settle({
668
+ exitCode: 1,
669
+ sessionId: this._sessionId,
670
+ cost,
671
+ isError: true,
672
+ failureReason: "stream_ended",
673
+ });
674
+ return;
675
+ }
676
+
677
+ // 6. Clean terminal. Emit `result` and settle.
678
+ const cost = this.snapshotCost(isError);
679
+ this.emit({
680
+ type: "result",
681
+ cost,
682
+ isError,
683
+ errorCategory: isError ? "terminated" : undefined,
684
+ output: lastAssistantText,
685
+ });
686
+ this.settle({
687
+ exitCode: isError ? 1 : 0,
688
+ sessionId: this._sessionId,
689
+ cost,
690
+ isError,
691
+ output: lastAssistantText,
692
+ failureReason: isError ? "terminated" : undefined,
693
+ });
694
+ } catch (err) {
695
+ const message = err instanceof Error ? err.message : String(err);
696
+ this.emit({ type: "raw_stderr", content: `[claude-managed] Error: ${message}\n` });
697
+ this.emit({ type: "error", message });
698
+ const cost = this.snapshotCost(true);
699
+ this.emit({ type: "result", cost, isError: true, errorCategory: "exception" });
700
+ this.settle({
701
+ exitCode: 1,
702
+ sessionId: this._sessionId,
703
+ cost,
704
+ isError: true,
705
+ failureReason: message,
706
+ });
707
+ } finally {
708
+ try {
709
+ await this.logFileHandle.end();
710
+ } catch {
711
+ // Ignore log writer cleanup failures.
712
+ }
713
+ }
714
+ }
715
+ }
716
+
717
+ export class ClaudeManagedAdapter implements ProviderAdapter {
718
+ readonly name = "claude-managed";
719
+ // Anthropic's cloud sandbox calls back into our /mcp endpoint, but the worker
720
+ // process is a thin SSE relay — no /workspace, no PM2, no agent-fs, no skills FS.
721
+ readonly traits = { hasMcp: true, hasLocalEnvironment: false };
722
+
723
+ /** Anthropic API key (kept private; never logged). */
724
+ private readonly apiKey: string;
725
+ /** Managed agent identifier (created by `claude-managed-setup` CLI in Phase 2). */
726
+ private readonly agentId: string;
727
+ /** Managed environment identifier (created by `claude-managed-setup` CLI in Phase 2). */
728
+ private readonly environmentId: string;
729
+ /**
730
+ * Anthropic SDK client. Lazily constructed in the ctor unless a test
731
+ * supplies an injected fake — see the `client` constructor option.
732
+ */
733
+ private readonly client: ManagedAgentsClient;
734
+
735
+ constructor(opts: { client?: ManagedAgentsClient } = {}) {
736
+ const missing = REQUIRED_ENV_VARS.filter((key) => !process.env[key]);
737
+ if (missing.length > 0) {
738
+ throw new Error(
739
+ `[claude-managed] Missing required env var(s): ${missing.join(", ")}. ` +
740
+ `Run \`bun run src/cli.tsx claude-managed-setup\` to create an Anthropic-side ` +
741
+ `agent + environment and persist their IDs to swarm_config.`,
742
+ );
743
+ }
744
+
745
+ this.apiKey = process.env.ANTHROPIC_API_KEY as string;
746
+ this.agentId = process.env.MANAGED_AGENT_ID as string;
747
+ this.environmentId = process.env.MANAGED_ENVIRONMENT_ID as string;
748
+
749
+ if (opts.client) {
750
+ this.client = opts.client;
751
+ } else {
752
+ // Cast at the boundary — the SDK's `client.beta` surface conforms to
753
+ // our narrower `ManagedAgentsClient` interface (which exists for
754
+ // testability) but TypeScript can't infer that without us spelling out
755
+ // every method signature on both sides.
756
+ this.client = new Anthropic({ apiKey: this.apiKey }) as unknown as ManagedAgentsClient;
757
+ }
758
+ }
759
+
760
+ async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
761
+ let sessionId: string;
762
+ let userMessageContent: BetaManagedAgentsTextBlock[] | null;
763
+ const seenEventIds = new Set<string>();
764
+
765
+ if (config.resumeSessionId) {
766
+ // Resume path: skip `sessions.create`. Pre-fetch event history via
767
+ // `events.list` so the SSE loop can skip duplicates that the live
768
+ // stream replays. NO new `user.message` is sent (the agent already
769
+ // has one in flight).
770
+ sessionId = config.resumeSessionId;
771
+ userMessageContent = null;
772
+ try {
773
+ const list = await Promise.resolve(this.client.beta.sessions.events.list(sessionId));
774
+ for await (const evt of list) {
775
+ if ("id" in evt && evt.id) {
776
+ seenEventIds.add(evt.id);
777
+ }
778
+ }
779
+ } catch {
780
+ // If history fetch fails, fall through with an empty `seenEventIds`
781
+ // — the worst case is that the listener sees a few duplicate events
782
+ // (which the runner-side dedup handles).
783
+ }
784
+ } else {
785
+ // Fresh session. Compose the cache-control-annotated user message and
786
+ // open the managed session against the pre-existing agent + env.
787
+ userMessageContent = composeManagedUserMessage(config);
788
+ // Phase 4: derive `resources` from `config.vcsRepo` (which the runner
789
+ // copies from `task.vcsRepo` at the spawn site, see
790
+ // src/commands/runner.ts:3296). The SDK contract is
791
+ // `BetaManagedAgentsGitHubRepositoryResourceParams`:
792
+ // { type: 'github_repository', url, authorization_token, checkout?: { type: 'branch', name } }
793
+ // We default `branch` to "main" since `ProviderSessionConfig` only
794
+ // carries the repo identifier as a string.
795
+ //
796
+ // GitHub auth: prefer the operator-side `MANAGED_GITHUB_VAULT_ID`
797
+ // (passed via `vault_ids` on the session — see runbook §"Claude Managed
798
+ // Agents — GitHub access"). If a literal PAT is supplied via
799
+ // `MANAGED_GITHUB_TOKEN`, use that instead. Without either, the SDK's
800
+ // required `authorization_token` field gets an empty string and the
801
+ // operator sees an authentication error from Anthropic — which is
802
+ // strictly better than silently dropping `resources`.
803
+ const createParams: Record<string, unknown> = {
804
+ agent: this.agentId,
805
+ environment_id: this.environmentId,
806
+ title: `Task ${config.taskId}`,
807
+ metadata: {
808
+ swarmAgentId: config.agentId,
809
+ swarmTaskId: config.taskId,
810
+ },
811
+ };
812
+ if (config.vcsRepo) {
813
+ const repoUrl = normalizeRepoUrl(config.vcsRepo);
814
+ const branch = "main"; // ProviderSessionConfig doesn't carry per-task branch info today.
815
+ const githubToken = process.env.MANAGED_GITHUB_TOKEN ?? "";
816
+ createParams.resources = [
817
+ {
818
+ type: "github_repository",
819
+ url: repoUrl,
820
+ authorization_token: githubToken,
821
+ checkout: { type: "branch", name: branch },
822
+ },
823
+ ];
824
+ }
825
+ // Multiple vaults can be linked to a single session — `vault_ids` is an
826
+ // array. The MCP vault holds the static-bearer credential for our
827
+ // `/mcp` endpoint (provisioned by `claude-managed-setup`); the GitHub
828
+ // vault holds the credential used by the `github_repository` resource.
829
+ // Either or both may be unset.
830
+ const vaultIds = [
831
+ process.env.MANAGED_MCP_VAULT_ID,
832
+ process.env.MANAGED_GITHUB_VAULT_ID,
833
+ ].filter((v): v is string => !!v && v.length > 0);
834
+ if (vaultIds.length > 0) {
835
+ createParams.vault_ids = Array.from(new Set(vaultIds));
836
+ }
837
+ const created = await Promise.resolve(this.client.beta.sessions.create(createParams));
838
+ sessionId = created.id;
839
+ }
840
+
841
+ return new ClaudeManagedSession(
842
+ this.client,
843
+ sessionId,
844
+ config,
845
+ userMessageContent,
846
+ seenEventIds,
847
+ );
848
+ }
849
+
850
+ /**
851
+ * Resume eligibility: the managed session must exist and not be in a
852
+ * terminal state. The SDK's `Session.status` enum is
853
+ * `'rescheduling' | 'running' | 'idle' | 'terminated'`. Archived sessions
854
+ * (`archived_at !== null`) are also rejected — we'd be reattaching to a
855
+ * frozen session.
856
+ */
857
+ async canResume(sessionId: string): Promise<boolean> {
858
+ try {
859
+ const s = await Promise.resolve(this.client.beta.sessions.retrieve(sessionId));
860
+ if (s.status === "terminated") return false;
861
+ if (s.archived_at != null) return false;
862
+ return true;
863
+ } catch {
864
+ return false;
865
+ }
866
+ }
867
+
868
+ formatCommand(commandName: string): string {
869
+ return `/${commandName}`;
870
+ }
871
+ }