@desplega.ai/agent-swarm 1.85.0 → 1.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +1 -0
  2. package/openapi.json +1 -1
  3. package/package.json +8 -6
  4. package/src/be/db.ts +44 -0
  5. package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
  6. package/src/be/modelsdev-cache.json +152028 -0
  7. package/src/be/modelsdev-cache.ts +46 -0
  8. package/src/be/seed-pricing.ts +7 -44
  9. package/src/cli.tsx +12 -2
  10. package/src/commands/codex-session-runner.ts +132 -0
  11. package/src/commands/credential-wait.ts +2 -2
  12. package/src/commands/provider-credentials.ts +10 -5
  13. package/src/commands/runner.ts +3 -3
  14. package/src/prompts/base-prompt.ts +49 -3
  15. package/src/providers/claude-adapter.ts +83 -2
  16. package/src/providers/claude-managed-models.ts +18 -2
  17. package/src/providers/codex-adapter.ts +417 -97
  18. package/src/providers/codex-models.ts +9 -2
  19. package/src/providers/index.ts +28 -19
  20. package/src/providers/pricing-sources.md +7 -4
  21. package/src/providers/swarm-events-shared.ts +14 -0
  22. package/src/slack/HEURISTICS.md +5 -1
  23. package/src/slack/handlers.test.ts +35 -0
  24. package/src/slack/handlers.ts +79 -2
  25. package/src/tests/base-prompt.test.ts +46 -8
  26. package/src/tests/claude-managed-adapter.test.ts +4 -4
  27. package/src/tests/codex-adapter-otel.test.ts +4 -4
  28. package/src/tests/codex-adapter.test.ts +20 -7
  29. package/src/tests/codex-swarm-events.test.ts +35 -0
  30. package/src/tests/context-window.test.ts +1 -0
  31. package/src/tests/credential-check.test.ts +48 -29
  32. package/src/tests/entrypoint-config-env-export.test.ts +81 -0
  33. package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
  34. package/src/tests/migration-046-budgets.test.ts +6 -5
  35. package/src/tests/pricing-routes.test.ts +6 -5
  36. package/src/tests/provider-adapter.test.ts +10 -10
  37. package/src/tests/provider-command-format.test.ts +4 -4
  38. package/src/tests/session-costs-codex-recompute.test.ts +25 -0
  39. package/src/tools/send-task.ts +30 -9
  40. package/src/utils/context-window.ts +1 -0
  41. package/templates/schedules/daily-blocker-digest/config.json +13 -0
  42. package/templates/schedules/daily-blocker-digest/content.md +150 -0
  43. package/templates/schedules/daily-compounding-reflection/config.json +21 -0
  44. package/templates/schedules/daily-compounding-reflection/content.md +210 -0
  45. package/templates/schedules/daily-hn-briefing/config.json +13 -0
  46. package/templates/schedules/daily-hn-briefing/content.md +97 -0
  47. package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
  48. package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
  49. package/templates/schedules/gtm-weekly-review/config.json +13 -0
  50. package/templates/schedules/gtm-weekly-review/content.md +58 -0
  51. package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
  52. package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
  53. package/templates/schema.ts +26 -0
  54. package/templates/skills/agentmail-sending/config.json +13 -0
  55. package/templates/skills/agentmail-sending/content.md +48 -0
  56. package/templates/skills/artifacts/config.json +13 -0
  57. package/templates/skills/artifacts/content.md +87 -0
  58. package/templates/skills/browser-use-cloud/config.json +13 -0
  59. package/templates/skills/browser-use-cloud/content.md +155 -0
  60. package/templates/skills/desloppify/config.json +13 -0
  61. package/templates/skills/desloppify/content.md +201 -0
  62. package/templates/skills/exa-search/config.json +13 -0
  63. package/templates/skills/exa-search/content.md +106 -0
  64. package/templates/skills/jira-interaction/config.json +13 -0
  65. package/templates/skills/jira-interaction/content.md +252 -0
  66. package/templates/skills/kapso-whatsapp/config.json +13 -0
  67. package/templates/skills/kapso-whatsapp/content.md +369 -0
  68. package/templates/skills/kv-storage/config.json +13 -0
  69. package/templates/skills/kv-storage/content.md +111 -0
  70. package/templates/skills/linear-interaction/config.json +20 -0
  71. package/templates/skills/linear-interaction/content.md +230 -0
  72. package/templates/skills/pages/config.json +18 -0
  73. package/templates/skills/pages/content.md +85 -0
  74. package/templates/skills/profile-corruption-escalation/config.json +13 -0
  75. package/templates/skills/profile-corruption-escalation/content.md +105 -0
  76. package/templates/skills/scheduled-task-resilience/config.json +13 -0
  77. package/templates/skills/scheduled-task-resilience/content.md +95 -0
  78. package/templates/skills/sprite-cli/config.json +13 -0
  79. package/templates/skills/sprite-cli/content.md +133 -0
  80. package/templates/skills/turso-interaction/config.json +13 -0
  81. package/templates/skills/turso-interaction/content.md +192 -0
  82. package/templates/skills/workflow-iterate/config.json +18 -0
  83. package/templates/skills/workflow-iterate/content.md +399 -0
  84. package/templates/skills/workflow-structured-output/config.json +13 -0
  85. package/templates/skills/workflow-structured-output/content.md +101 -0
  86. package/templates/skills/x-api-interactions/config.json +13 -0
  87. package/templates/skills/x-api-interactions/content.md +109 -0
  88. package/templates/workflows/autopilot/config.json +13 -0
  89. package/templates/workflows/autopilot/content.md +58 -0
  90. package/templates/workflows/linear-drain-loop/config.json +21 -0
  91. package/templates/workflows/linear-drain-loop/content.md +72 -0
  92. package/templates/workflows/ralph-loop/config.json +13 -0
  93. package/templates/workflows/ralph-loop/content.md +75 -0
@@ -5,8 +5,11 @@ Operators bumping a rate by hand should also update this file.
5
5
 
6
6
  ## Primary: vendored models.dev snapshot
7
7
 
8
- - **Path**: `ui/src/lib/modelsdev-cache.json`
9
- - **Loaded by**: `src/be/seed-pricing.ts` `seedPricingFromModelsDev()`,
8
+ - **Source-of-truth path**: `src/be/modelsdev-cache.json`
9
+ - **UI compatibility path**: `ui/src/lib/modelsdev-cache.json` symlinks to the
10
+ backend snapshot so existing UI imports keep working.
11
+ - **Loaded by**: `src/be/modelsdev-cache.ts` → `src/be/seed-pricing.ts` →
12
+ `seedPricingFromModelsDev()`,
10
13
  called from `src/server.ts` after `initDb`.
11
14
  - **Projection rules** (see the same module for code-level detail):
12
15
  - Anthropic models → rows under `provider='claude'` AND `provider='claude-managed'`.
@@ -23,7 +26,7 @@ Operators bumping a rate by hand should also update this file.
23
26
  - Run `bun run scripts/refresh-modelsdev-pricing.ts` (Phase 2 — adds the
24
27
  script). It fetches the latest snapshot from models.dev, diffs against
25
28
  the vendored copy, prints a summary, and writes the new file.
26
- - Commit the regenerated `modelsdev-cache.json` together with a bump
29
+ - Commit the regenerated `src/be/modelsdev-cache.json` together with a bump
27
30
  note in the PR description.
28
31
 
29
32
  ## Manual overrides
@@ -47,6 +50,6 @@ no input/output pricing rows at the lookup time, the row is persisted with
47
50
  `costSource='unpriced'` (rather than 'harness'). The UI surfaces this as a
48
51
  yellow badge.
49
52
 
50
- To fix: either add the model to `modelsdev-cache.json` (preferred — the
53
+ To fix: either add the model to `src/be/modelsdev-cache.json` (preferred — the
51
54
  upstream snapshot probably needs refreshing) or add a manual override row via
52
55
  the existing admin route `POST /api/pricing`.
@@ -119,6 +119,13 @@ export function createSwarmEventHandler(
119
119
  };
120
120
  const isCancelled = data.cancelled?.some((t) => t.id === taskId);
121
121
  if (isCancelled) {
122
+ // Log BEFORE aborting so the reason is visible in the worker
123
+ // transcript even when the abort propagates immediately. Without
124
+ // this, cancellations originating from this poll were invisible
125
+ // (only the runner-side poll in runner.ts logs them).
126
+ console.log(
127
+ `[swarm-events] aborting task ${taskId}: cancelled via /cancelled-tasks poll`,
128
+ );
122
129
  opts.abortRef.current?.abort();
123
130
  if (opts.onCancel) {
124
131
  try {
@@ -141,6 +148,13 @@ export function createSwarmEventHandler(
141
148
  void checkToolLoop(taskId, toolName, argRecord)
142
149
  .then((result) => {
143
150
  if (result.blocked) {
151
+ // Surface the loop-detector's reason. Without this log, the abort
152
+ // was indistinguishable from a /cancelled-tasks abort or a runner
153
+ // SIGTERM. `result.reason` already carries the diagnostic detail
154
+ // ("Tool X called 15 times…", "ping-pong between A and B…").
155
+ console.log(
156
+ `[swarm-events] aborting task ${taskId}: tool-loop detected — ${result.reason ?? "unknown reason"}`,
157
+ );
144
158
  opts.abortRef.current?.abort();
145
159
  }
146
160
  })
@@ -29,9 +29,13 @@ When someone @mentions the bot in a thread, the router checks whether a worker a
29
29
 
30
30
  When enabled, thread replies that do NOT @mention the bot are captured, buffered, and batched into a single follow-up task. This allows humans to give multi-message feedback in a thread without needing to @mention the bot each time.
31
31
 
32
+ A thread counts as having swarm activity if **either**:
33
+ - a Slack task is already linked to it via `slackChannelId` + `slackThreadTs` (someone started it by @mentioning the bot), **or**
34
+ - the swarm itself posted the thread's **root message** — i.e. the swarm started the thread with a proactive/standalone message (a notification, status update, or an agent posting unprompted). In this case there is no task row yet, so without this the human's reply would otherwise require an @mention. The root author is resolved via a one-time `conversations.replies` lookup (cached per thread) that matches our bot specifically — by `user` for normal posts and by `bot_id` for persona-override (username/icon) posts — so threads started by *other* bots are not picked up. The lookup is skipped when a linked task already matches.
35
+
32
36
  ### How it works
33
37
 
34
- 1. A human sends a non-@mention message in a thread where the swarm is already active (has existing tasks)
38
+ 1. A human sends a non-@mention message in a thread where the swarm is already active (see definition above)
35
39
  2. The message enters an in-memory buffer keyed by `channelId:threadTs`
36
40
  3. A debounce timer starts (default 10 seconds)
37
41
  4. Additional messages within the window are appended to the buffer, resetting the timer each time
@@ -6,6 +6,7 @@ import {
6
6
  checkUserAccess,
7
7
  formatFileSize,
8
8
  isBotMessage,
9
+ isSwarmThreadRoot,
9
10
  type UserFilterConfig,
10
11
  } from "./handlers";
11
12
 
@@ -359,3 +360,37 @@ describe("isBotMessage", () => {
359
360
  });
360
361
  });
361
362
  });
363
+
364
+ describe("isSwarmThreadRoot", () => {
365
+ test("matches our bot by user ID (non-persona post)", () => {
366
+ expect(isSwarmThreadRoot({ user: "UBOT123" }, "UBOT123", "B_SWARM")).toBe(true);
367
+ });
368
+
369
+ test("matches our bot by bot_id (persona post omits user)", () => {
370
+ // Posts with username/icon_emoji override carry bot_id but no user field.
371
+ expect(isSwarmThreadRoot({ bot_id: "B_SWARM" }, "UBOT123", "B_SWARM")).toBe(true);
372
+ });
373
+
374
+ test("does NOT match a different bot in the workspace", () => {
375
+ // A thread started by some OTHER bot must not be treated as swarm-started.
376
+ expect(isSwarmThreadRoot({ bot_id: "B_OTHER", user: "UOTHER" }, "UBOT123", "B_SWARM")).toBe(
377
+ false,
378
+ );
379
+ });
380
+
381
+ test("does NOT match a human-started thread", () => {
382
+ expect(isSwarmThreadRoot({ user: "UHUMAN456" }, "UBOT123", "B_SWARM")).toBe(false);
383
+ });
384
+
385
+ test("returns false for an undefined root message", () => {
386
+ expect(isSwarmThreadRoot(undefined, "UBOT123", "B_SWARM")).toBe(false);
387
+ });
388
+
389
+ test("returns false when our bot identity is unknown", () => {
390
+ expect(isSwarmThreadRoot({ bot_id: "B_SWARM", user: "UBOT123" }, null, null)).toBe(false);
391
+ });
392
+
393
+ test("falls back to bot_id when bot user ID is unknown but bot_id is", () => {
394
+ expect(isSwarmThreadRoot({ bot_id: "B_SWARM" }, null, "B_SWARM")).toBe(true);
395
+ });
396
+ });
@@ -178,6 +178,75 @@ interface ThreadMessage {
178
178
  // Cache for bot's own user ID (avoids redundant auth.test calls)
179
179
  let cachedBotUserId: string | null = null;
180
180
 
181
+ // Cache for bot's own bot_id (auth.test). Persona-override messages
182
+ // (username/icon_emoji) carry `bot_id` but not `user`, so this is needed to
183
+ // recognize swarm-authored messages that the `cachedBotUserId` check would miss.
184
+ let cachedBotId: string | null = null;
185
+
186
+ // Cache: `${channelId}:${threadTs}` → whether our swarm bot authored the thread
187
+ // root. A thread's root author never changes, so caching is permanently correct.
188
+ // Bounded to avoid unbounded growth in long-running processes.
189
+ const swarmThreadRootCache = new Map<string, boolean>();
190
+ const SWARM_THREAD_ROOT_CACHE_MAX = 1000;
191
+
192
+ /**
193
+ * Pure check: does the given thread-root message belong to our own swarm bot?
194
+ * Exported for testing.
195
+ *
196
+ * Matches OUR bot specifically (not any bot in the workspace):
197
+ * - non-persona posts carry `user === <our bot user id>`
198
+ * - persona posts (username/icon_emoji override) carry `bot_id === <our bot id>`
199
+ * but typically omit `user`
200
+ */
201
+ export function isSwarmThreadRoot(
202
+ root: { bot_id?: string; user?: string } | undefined,
203
+ botUserId: string | null,
204
+ botId: string | null,
205
+ ): boolean {
206
+ if (!root) return false;
207
+ if (botUserId && root.user === botUserId) return true;
208
+ if (botId && root.bot_id === botId) return true;
209
+ return false;
210
+ }
211
+
212
+ /**
213
+ * Returns true if the root message of the given thread was posted by our own
214
+ * swarm bot (a proactive/standalone message the swarm started). Used to treat
215
+ * human replies to swarm-initiated threads as follow-ups that don't require an
216
+ * @mention. Result is cached per thread.
217
+ */
218
+ async function wasThreadStartedBySwarm(
219
+ client: WebClient,
220
+ channelId: string,
221
+ threadTs: string,
222
+ ): Promise<boolean> {
223
+ const key = `${channelId}:${threadTs}`;
224
+ const cached = swarmThreadRootCache.get(key);
225
+ if (cached !== undefined) return cached;
226
+
227
+ let startedBySwarm = false;
228
+ try {
229
+ const resp = await client.conversations.replies({
230
+ channel: channelId,
231
+ ts: threadTs,
232
+ limit: 1,
233
+ inclusive: true,
234
+ });
235
+ const root = resp.messages?.[0] as { bot_id?: string; user?: string } | undefined;
236
+ startedBySwarm = isSwarmThreadRoot(root, cachedBotUserId, cachedBotId);
237
+ } catch (error) {
238
+ console.error("[Slack] Failed to check whether thread was started by swarm:", error);
239
+ }
240
+
241
+ // Evict oldest entry (insertion-ordered Map) once the cap is reached.
242
+ if (swarmThreadRootCache.size >= SWARM_THREAD_ROOT_CACHE_MAX) {
243
+ const oldest = swarmThreadRootCache.keys().next().value;
244
+ if (oldest !== undefined) swarmThreadRootCache.delete(oldest);
245
+ }
246
+ swarmThreadRootCache.set(key, startedBySwarm);
247
+ return startedBySwarm;
248
+ }
249
+
181
250
  // Cache for user display names
182
251
  const userNameCache = new Map<string, string>();
183
252
 
@@ -346,6 +415,7 @@ export function registerMessageHandler(app: App): void {
346
415
  try {
347
416
  const authResult = await client.auth.test();
348
417
  cachedBotUserId = authResult.user_id as string;
418
+ cachedBotId = (authResult.bot_id as string | undefined) ?? null;
349
419
  } catch (error) {
350
420
  console.error("[Slack] Failed to cache bot user ID:", error);
351
421
  }
@@ -458,8 +528,15 @@ export function registerMessageHandler(app: App): void {
458
528
  );
459
529
  return;
460
530
  }
461
- // Check if this thread has any swarm activity (existing tasks)
462
- const hasSwarmActivity = getAgentWorkingOnThread(msg.channel, msg.thread_ts) !== null;
531
+ // Treat the thread as having swarm activity if either:
532
+ // - a Slack task is already linked to it (someone started it via @mention), or
533
+ // - the swarm itself posted the thread's root message (a proactive/standalone
534
+ // message the swarm started). In the latter case there is no task row yet,
535
+ // so the human's reply would otherwise require an @mention. The Slack lookup
536
+ // is skipped when a task already matches.
537
+ const hasSwarmActivity =
538
+ getAgentWorkingOnThread(msg.channel, msg.thread_ts) !== null ||
539
+ (await wasThreadStartedBySwarm(client, msg.channel, msg.thread_ts));
463
540
 
464
541
  if (hasSwarmActivity) {
465
542
  const threadKey = `${msg.channel}:${msg.thread_ts}`;
@@ -328,16 +328,17 @@ describe("getBasePrompt — truncation", () => {
328
328
  });
329
329
 
330
330
  test("total budget respected — tools truncated before claudeMd", async () => {
331
- // Use soulMd to eat up most of the 150k total budget so that
332
- // truncatable sections (claudeMd, toolsMd) must compete for the remainder.
331
+ // Use soulMd to eat up most of the 120k total budget (lowered from 150k
332
+ // in the Picateclas spawn-OOM fix, 2026-05-28) so that truncatable
333
+ // sections (claudeMd, toolsMd) must compete for the remainder.
333
334
  // soulMd is part of `prompt` which counts toward protectedLength.
334
335
  const baseResult = await getBasePrompt(minimalArgs);
335
336
  const staticLength = baseResult.length; // ~12-13k for static content
336
337
 
337
338
  // Leave exactly enough budget for claudeMd but not toolsMd.
338
- // Total budget = 150k - protectedLength.
339
- // We want: protectedLength ≈ 150k - 18k = 132k, so claudeMd (15k) fits but toolsMd doesn't.
340
- const soulSize = 132_000 - staticLength;
339
+ // Total budget = 120k - protectedLength.
340
+ // We want: protectedLength ≈ 120k - 18k = 102k, so claudeMd (15k) fits but toolsMd doesn't.
341
+ const soulSize = 102_000 - staticLength;
341
342
  const result = await getBasePrompt({
342
343
  ...minimalArgs,
343
344
  soulMd: bigString(Math.max(0, soulSize)),
@@ -354,7 +355,29 @@ describe("getBasePrompt — truncation", () => {
354
355
  expect(hasToolsTruncation || !hasToolsHeader).toBe(true);
355
356
  });
356
357
 
357
- test("repo context never truncated", async () => {
358
+ test("Picateclas spawn-OOM hardening — total prompt stays below MAX_ARG_STRLEN", async () => {
359
+ // Even at the worst-case where every truncatable section maxes out its
360
+ // budget and the repo CLAUDE.md is huge, the final prompt must stay
361
+ // safely below Linux's `MAX_ARG_STRLEN = 131,072` bytes (the per-argv-
362
+ // element kernel limit that bit Picateclas attempts 4-6, 2026-05-28).
363
+ const result = await getBasePrompt({
364
+ ...minimalArgs,
365
+ soulMd: bigString(40_000),
366
+ claudeMd: bigString(40_000),
367
+ toolsMd: bigString(40_000),
368
+ repoContext: {
369
+ claudeMd: bigString(60_000),
370
+ clonePath: "/workspace/repos/big-repo",
371
+ },
372
+ });
373
+ expect(result.length).toBeLessThan(131_072);
374
+ });
375
+
376
+ test("repo CLAUDE.md is capped at REPO_CLAUDE_MD_MAX_CHARS (12 KB) with on-disk pointer", async () => {
377
+ // Picateclas spawn-OOM permanent fix (2026-05-28): repo CLAUDE.md was the
378
+ // single biggest volatile component of the bootstrap argv. It is now
379
+ // truncated to ~12 KB with a footer pointing at the on-disk file, mirroring
380
+ // the same shape as the agent claudeMd / toolsMd caps.
358
381
  const hugeRepoClaudeMd = bigString(30_000);
359
382
  const result = await getBasePrompt({
360
383
  ...minimalArgs,
@@ -363,8 +386,23 @@ describe("getBasePrompt — truncation", () => {
363
386
  clonePath: "/workspace/big-repo",
364
387
  },
365
388
  });
366
- // The full repo content should be present (never truncated)
367
- expect(result).toContain(hugeRepoClaudeMd);
389
+ // The full 30 KB content should NOT survive capped at ~12 KB.
390
+ expect(result).not.toContain(hugeRepoClaudeMd);
391
+ // The truncation footer points at the on-disk path so readers can find
392
+ // the full content.
393
+ expect(result).toContain("[...truncated — see /workspace/big-repo/CLAUDE.md");
394
+ });
395
+
396
+ test("repo CLAUDE.md under the cap is preserved verbatim", async () => {
397
+ const smallRepoClaudeMd = bigString(5_000);
398
+ const result = await getBasePrompt({
399
+ ...minimalArgs,
400
+ repoContext: {
401
+ claudeMd: smallRepoClaudeMd,
402
+ clonePath: "/workspace/small-repo",
403
+ },
404
+ });
405
+ expect(result).toContain(smallRepoClaudeMd);
368
406
  expect(result).not.toContain("[...truncated");
369
407
  });
370
408
  });
@@ -40,14 +40,14 @@ describe("ClaudeManagedAdapter (Phase 1 skeleton)", () => {
40
40
  }
41
41
  });
42
42
 
43
- test("factory returns ClaudeManagedAdapter for 'claude-managed'", () => {
44
- const adapter = createProviderAdapter("claude-managed");
43
+ test("factory returns ClaudeManagedAdapter for 'claude-managed'", async () => {
44
+ const adapter = await createProviderAdapter("claude-managed");
45
45
  expect(adapter).toBeInstanceOf(ClaudeManagedAdapter);
46
46
  expect(adapter.name).toBe("claude-managed");
47
47
  });
48
48
 
49
- test("factory still rejects unknown providers and lists claude-managed", () => {
50
- expect(() => createProviderAdapter("nope")).toThrow(
49
+ test("factory still rejects unknown providers and lists claude-managed", async () => {
50
+ expect(createProviderAdapter("nope")).rejects.toThrow(
51
51
  'Unknown HARNESS_PROVIDER: "nope". Supported: claude, pi, codex, devin, claude-managed',
52
52
  );
53
53
  });
@@ -85,7 +85,7 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
85
85
  });
86
86
 
87
87
  test("gate on (SWARM_ENABLE_HARNESS_OTEL) → spawn env carries TRACEPARENT", async () => {
88
- const adapter = new CodexAdapter();
88
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
89
89
  await adapter.createSession(testConfig({ env: { SWARM_ENABLE_HARNESS_OTEL: "1" } }));
90
90
 
91
91
  expect(capturedEnv).toBeDefined();
@@ -93,14 +93,14 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
93
93
  });
94
94
 
95
95
  test("gate on via deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias → TRACEPARENT injected", async () => {
96
- const adapter = new CodexAdapter();
96
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
97
97
  await adapter.createSession(testConfig({ env: { SWARM_ENABLE_CLAUDE_CODE_OTEL: "1" } }));
98
98
 
99
99
  expect(capturedEnv?.TRACEPARENT).toBe(`00-${TRACE_ID}-${SPAN_ID}-01`);
100
100
  });
101
101
 
102
102
  test("gate off → no TRACEPARENT, existing env wiring intact", async () => {
103
- const adapter = new CodexAdapter();
103
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
104
104
  await adapter.createSession(testConfig({ env: {} }));
105
105
 
106
106
  expect(capturedEnv).toBeDefined();
@@ -112,7 +112,7 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
112
112
 
113
113
  test("gate on but unsampled active span → no TRACEPARENT", async () => {
114
114
  getActiveSpanSpy.mockReturnValue(makeSpan({ sampled: false }));
115
- const adapter = new CodexAdapter();
115
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
116
116
  await adapter.createSession(testConfig({ env: { SWARM_ENABLE_HARNESS_OTEL: "1" } }));
117
117
 
118
118
  expect(capturedEnv?.TRACEPARENT).toBeUndefined();
@@ -96,7 +96,7 @@ async function runSessionWithThrowingThread(
96
96
  };
97
97
 
98
98
  try {
99
- const adapter = new CodexAdapter();
99
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
100
100
  const session = await adapter.createSession(config);
101
101
  const emitted: ProviderEvent[] = [];
102
102
  session.onEvent((e) => emitted.push(e));
@@ -175,7 +175,7 @@ async function runSessionWithFakeThread(
175
175
  };
176
176
 
177
177
  try {
178
- const adapter = new CodexAdapter();
178
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
179
179
  const session = await adapter.createSession(config);
180
180
 
181
181
  const emitted: ProviderEvent[] = [];
@@ -575,7 +575,7 @@ describe("CodexSession event mapping", () => {
575
575
  };
576
576
 
577
577
  try {
578
- const adapter = new CodexAdapter();
578
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
579
579
  const config = testConfig({
580
580
  logFile: join(tmpLogDir, "abort.log"),
581
581
  cwd: "",
@@ -612,7 +612,7 @@ describe("CodexSession event mapping", () => {
612
612
 
613
613
  describe("CodexAdapter.canResume", () => {
614
614
  test("returns false for empty / non-string session ids", async () => {
615
- const adapter = new CodexAdapter();
615
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
616
616
  expect(await adapter.canResume("")).toBe(false);
617
617
  // @ts-expect-error: deliberate runtime check for non-string input
618
618
  expect(await adapter.canResume(undefined)).toBe(false);
@@ -631,7 +631,7 @@ describe("CodexAdapter.canResume", () => {
631
631
  ).resumeThread = function resumeThread(): unknown {
632
632
  return { id: "thread-resumed" };
633
633
  };
634
- const adapter = new CodexAdapter();
634
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
635
635
  expect(await adapter.canResume("thread-resumed")).toBe(true);
636
636
 
637
637
  // Failure path
@@ -789,6 +789,10 @@ describe("resolveCodexModel", () => {
789
789
  expect(resolveCodexModel("gpt-5.4-mini")).toBe("gpt-5.4-mini");
790
790
  });
791
791
 
792
+ test("passthrough 'gpt-5.5' → gpt-5.5", () => {
793
+ expect(resolveCodexModel("gpt-5.5")).toBe("gpt-5.5");
794
+ });
795
+
792
796
  test("passthrough 'gpt-5.3-codex' → gpt-5.3-codex", () => {
793
797
  expect(resolveCodexModel("gpt-5.3-codex")).toBe("gpt-5.3-codex");
794
798
  });
@@ -816,6 +820,10 @@ describe("getCodexContextWindow", () => {
816
820
  expect(getCodexContextWindow("gpt-5.4-mini")).toBe(200_000);
817
821
  });
818
822
 
823
+ test("gpt-5.5 → 1_050_000", () => {
824
+ expect(getCodexContextWindow("gpt-5.5")).toBe(1_050_000);
825
+ });
826
+
819
827
  test("gpt-5.3-codex → 1_000_000 (1M context)", () => {
820
828
  expect(getCodexContextWindow("gpt-5.3-codex")).toBe(1_000_000);
821
829
  });
@@ -833,6 +841,11 @@ describe("computeCodexCostUsd", () => {
833
841
  expect(cost).toBeCloseTo(17.5, 4);
834
842
  });
835
843
 
844
+ test("gpt-5.5 with 1M uncached input + 1M output = $5 + $30 = $35", () => {
845
+ const cost = computeCodexCostUsd("gpt-5.5", 1_000_000, 0, 1_000_000);
846
+ expect(cost).toBeCloseTo(35, 4);
847
+ });
848
+
836
849
  test("gpt-5.4 with cached input applies the cached discount", () => {
837
850
  // 1M input, 800k cached → 200k uncached.
838
851
  // 200_000 × $2.50/M = $0.50
@@ -1136,7 +1149,7 @@ async function runSessionWithFakeThreadAndDeps(
1136
1149
  };
1137
1150
 
1138
1151
  try {
1139
- const adapter = new CodexAdapter({ summarizeDeps });
1152
+ const adapter = new CodexAdapter({ summarizeDeps, bypassSubprocess: true });
1140
1153
  const session = await adapter.createSession(config);
1141
1154
  const emitted: ProviderEvent[] = [];
1142
1155
  session.onEvent((e) => emitted.push(e));
@@ -1626,7 +1639,7 @@ describe("CodexSession — rate-limit error preservation", () => {
1626
1639
  };
1627
1640
 
1628
1641
  try {
1629
- const adapter = new CodexAdapter();
1642
+ const adapter = new CodexAdapter({ bypassSubprocess: true });
1630
1643
  const config = testConfig({
1631
1644
  logFile: join(tmpLogDir, "abort-guard.log"),
1632
1645
  cwd: "",
@@ -97,6 +97,41 @@ describe("createCodexSwarmEventHandler", () => {
97
97
  expect(controller.signal.aborted).toBe(true);
98
98
  });
99
99
 
100
+ test("logs the abort reason when /cancelled-tasks reports the task", async () => {
101
+ installFetchStub((url) => {
102
+ if (url.includes("/cancelled-tasks")) {
103
+ return new Response(
104
+ JSON.stringify({ cancelled: [{ id: "task-1", failureReason: "user request" }] }),
105
+ { status: 200 },
106
+ );
107
+ }
108
+ return new Response("{}", { status: 200 });
109
+ });
110
+ const logs: string[] = [];
111
+ const origLog = console.log;
112
+ console.log = (...args: unknown[]) => logs.push(args.map(String).join(" "));
113
+ try {
114
+ const controller = new AbortController();
115
+ const handler = createCodexSwarmEventHandler(
116
+ buildOpts({ abortRef: { current: controller }, taskId: "task-1" }),
117
+ );
118
+ handler({
119
+ type: "tool_start",
120
+ toolCallId: "call-1",
121
+ toolName: "bash",
122
+ args: { command: "sleep 9999" },
123
+ });
124
+ await new Promise((resolve) => setTimeout(resolve, 30));
125
+ } finally {
126
+ console.log = origLog;
127
+ }
128
+ // The log MUST include the literal taskId (not the `${taskId}` template).
129
+ const abortLog = logs.find((l) =>
130
+ l.includes("aborting task task-1: cancelled via /cancelled-tasks poll"),
131
+ );
132
+ expect(abortLog).toBeDefined();
133
+ });
134
+
100
135
  test("throttles the cancellation check across rapid tool_start events", async () => {
101
136
  const { calls } = installFetchStub(
102
137
  () => new Response(JSON.stringify({ cancelled: [] }), { status: 200 }),
@@ -9,6 +9,7 @@ import {
9
9
 
10
10
  describe("getContextWindowSize", () => {
11
11
  test("returns 1M for opus models", () => {
12
+ expect(getContextWindowSize("claude-opus-4-8")).toBe(1_000_000);
12
13
  expect(getContextWindowSize("claude-opus-4-7")).toBe(1_000_000);
13
14
  expect(getContextWindowSize("claude-opus-4-6")).toBe(1_000_000);
14
15
  expect(getContextWindowSize("opus")).toBe(1_000_000);
@@ -315,48 +315,64 @@ describe("checkOpencodeCredentials", () => {
315
315
  describe("checkProviderCredentials dispatcher", () => {
316
316
  const HOME = "/home/worker";
317
317
 
318
- test("dispatches to the right adapter for every supported provider", () => {
319
- expect(checkProviderCredentials("claude", { CLAUDE_CODE_OAUTH_TOKEN: "x" }).ready).toBe(true);
320
- expect(checkProviderCredentials("claude", {}).ready).toBe(false);
318
+ test("dispatches to the right adapter for every supported provider", async () => {
319
+ expect((await checkProviderCredentials("claude", { CLAUDE_CODE_OAUTH_TOKEN: "x" })).ready).toBe(
320
+ true,
321
+ );
322
+ expect((await checkProviderCredentials("claude", {})).ready).toBe(false);
321
323
 
322
324
  expect(
323
- checkProviderCredentials(
324
- "claude-managed",
325
- {
326
- ANTHROPIC_API_KEY: "x",
327
- MANAGED_AGENT_ID: "a",
328
- MANAGED_ENVIRONMENT_ID: "e",
329
- MCP_BASE_URL: "https://x",
330
- },
331
- { homeDir: HOME, fs: noFiles },
325
+ (
326
+ await checkProviderCredentials(
327
+ "claude-managed",
328
+ {
329
+ ANTHROPIC_API_KEY: "x",
330
+ MANAGED_AGENT_ID: "a",
331
+ MANAGED_ENVIRONMENT_ID: "e",
332
+ MCP_BASE_URL: "https://x",
333
+ },
334
+ { homeDir: HOME, fs: noFiles },
335
+ )
332
336
  ).ready,
333
337
  ).toBe(true);
334
338
 
335
- expect(checkProviderCredentials("devin", { DEVIN_API_KEY: "x", DEVIN_ORG_ID: "y" }).ready).toBe(
336
- true,
337
- );
339
+ expect(
340
+ (await checkProviderCredentials("devin", { DEVIN_API_KEY: "x", DEVIN_ORG_ID: "y" })).ready,
341
+ ).toBe(true);
338
342
 
339
343
  expect(
340
- checkProviderCredentials("codex", { OPENAI_API_KEY: "x" }, { homeDir: HOME, fs: noFiles })
341
- .ready,
344
+ (
345
+ await checkProviderCredentials(
346
+ "codex",
347
+ { OPENAI_API_KEY: "x" },
348
+ { homeDir: HOME, fs: noFiles },
349
+ )
350
+ ).ready,
342
351
  ).toBe(true);
343
352
 
344
353
  expect(
345
- checkProviderCredentials("pi", { ANTHROPIC_API_KEY: "x" }, { homeDir: HOME, fs: noFiles })
346
- .ready,
354
+ (
355
+ await checkProviderCredentials(
356
+ "pi",
357
+ { ANTHROPIC_API_KEY: "x" },
358
+ { homeDir: HOME, fs: noFiles },
359
+ )
360
+ ).ready,
347
361
  ).toBe(true);
348
362
 
349
363
  expect(
350
- checkProviderCredentials(
351
- "opencode",
352
- { OPENROUTER_API_KEY: "x" },
353
- { homeDir: HOME, fs: noFiles },
364
+ (
365
+ await checkProviderCredentials(
366
+ "opencode",
367
+ { OPENROUTER_API_KEY: "x" },
368
+ { homeDir: HOME, fs: noFiles },
369
+ )
354
370
  ).ready,
355
371
  ).toBe(true);
356
372
  });
357
373
 
358
- test("throws on unknown provider", () => {
359
- expect(() => checkProviderCredentials("nope", {})).toThrow(/unknown provider/i);
374
+ test("throws on unknown provider", async () => {
375
+ expect(checkProviderCredentials("nope", {})).rejects.toThrow(/unknown provider/i);
360
376
  });
361
377
  });
362
378
 
@@ -366,16 +382,16 @@ describe("snapshot: every provider", () => {
366
382
  const HOME = "/home/worker";
367
383
  const providers = ["claude", "claude-managed", "codex", "devin", "opencode", "pi"] as const;
368
384
 
369
- test("fully unset env → ready=false with non-empty missing[] and hint", () => {
385
+ test("fully unset env → ready=false with non-empty missing[] and hint", async () => {
370
386
  for (const p of providers) {
371
- const status = checkProviderCredentials(p, {}, { homeDir: HOME, fs: noFiles });
387
+ const status = await checkProviderCredentials(p, {}, { homeDir: HOME, fs: noFiles });
372
388
  expect(status.ready).toBe(false);
373
389
  expect(status.missing.length).toBeGreaterThan(0);
374
390
  expect(status.hint).toBeTruthy();
375
391
  }
376
392
  });
377
393
 
378
- test("minimum sufficient env → ready=true", () => {
394
+ test("minimum sufficient env → ready=true", async () => {
379
395
  const minimums: Record<string, Record<string, string>> = {
380
396
  claude: { CLAUDE_CODE_OAUTH_TOKEN: "x" },
381
397
  "claude-managed": {
@@ -390,7 +406,10 @@ describe("snapshot: every provider", () => {
390
406
  pi: { ANTHROPIC_API_KEY: "x" },
391
407
  };
392
408
  for (const p of providers) {
393
- const status = checkProviderCredentials(p, minimums[p]!, { homeDir: HOME, fs: noFiles });
409
+ const status = await checkProviderCredentials(p, minimums[p]!, {
410
+ homeDir: HOME,
411
+ fs: noFiles,
412
+ });
394
413
  expect(status.ready).toBe(true);
395
414
  }
396
415
  });