@desplega.ai/agent-swarm 1.85.0 → 1.86.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/openapi.json +1 -1
- package/package.json +8 -6
- package/src/be/db.ts +44 -0
- package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
- package/src/be/modelsdev-cache.json +152028 -0
- package/src/be/modelsdev-cache.ts +46 -0
- package/src/be/seed-pricing.ts +7 -44
- package/src/cli.tsx +12 -2
- package/src/commands/codex-session-runner.ts +132 -0
- package/src/commands/credential-wait.ts +2 -2
- package/src/commands/provider-credentials.ts +10 -5
- package/src/commands/runner.ts +3 -3
- package/src/prompts/base-prompt.ts +49 -3
- package/src/providers/claude-adapter.ts +83 -2
- package/src/providers/claude-managed-models.ts +18 -2
- package/src/providers/codex-adapter.ts +417 -97
- package/src/providers/codex-models.ts +9 -2
- package/src/providers/index.ts +28 -19
- package/src/providers/pricing-sources.md +7 -4
- package/src/providers/swarm-events-shared.ts +14 -0
- package/src/slack/HEURISTICS.md +5 -1
- package/src/slack/handlers.test.ts +35 -0
- package/src/slack/handlers.ts +79 -2
- package/src/tests/base-prompt.test.ts +46 -8
- package/src/tests/claude-managed-adapter.test.ts +4 -4
- package/src/tests/codex-adapter-otel.test.ts +4 -4
- package/src/tests/codex-adapter.test.ts +20 -7
- package/src/tests/codex-swarm-events.test.ts +35 -0
- package/src/tests/context-window.test.ts +1 -0
- package/src/tests/credential-check.test.ts +48 -29
- package/src/tests/entrypoint-config-env-export.test.ts +81 -0
- package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
- package/src/tests/migration-046-budgets.test.ts +6 -5
- package/src/tests/pricing-routes.test.ts +6 -5
- package/src/tests/provider-adapter.test.ts +10 -10
- package/src/tests/provider-command-format.test.ts +4 -4
- package/src/tests/session-costs-codex-recompute.test.ts +25 -0
- package/src/tools/send-task.ts +30 -9
- package/src/utils/context-window.ts +1 -0
- package/templates/schedules/daily-blocker-digest/config.json +13 -0
- package/templates/schedules/daily-blocker-digest/content.md +150 -0
- package/templates/schedules/daily-compounding-reflection/config.json +21 -0
- package/templates/schedules/daily-compounding-reflection/content.md +210 -0
- package/templates/schedules/daily-hn-briefing/config.json +13 -0
- package/templates/schedules/daily-hn-briefing/content.md +97 -0
- package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
- package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
- package/templates/schedules/gtm-weekly-review/config.json +13 -0
- package/templates/schedules/gtm-weekly-review/content.md +58 -0
- package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
- package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
- package/templates/schema.ts +26 -0
- package/templates/skills/agentmail-sending/config.json +13 -0
- package/templates/skills/agentmail-sending/content.md +48 -0
- package/templates/skills/artifacts/config.json +13 -0
- package/templates/skills/artifacts/content.md +87 -0
- package/templates/skills/browser-use-cloud/config.json +13 -0
- package/templates/skills/browser-use-cloud/content.md +155 -0
- package/templates/skills/desloppify/config.json +13 -0
- package/templates/skills/desloppify/content.md +201 -0
- package/templates/skills/exa-search/config.json +13 -0
- package/templates/skills/exa-search/content.md +106 -0
- package/templates/skills/jira-interaction/config.json +13 -0
- package/templates/skills/jira-interaction/content.md +252 -0
- package/templates/skills/kapso-whatsapp/config.json +13 -0
- package/templates/skills/kapso-whatsapp/content.md +369 -0
- package/templates/skills/kv-storage/config.json +13 -0
- package/templates/skills/kv-storage/content.md +111 -0
- package/templates/skills/linear-interaction/config.json +20 -0
- package/templates/skills/linear-interaction/content.md +230 -0
- package/templates/skills/pages/config.json +18 -0
- package/templates/skills/pages/content.md +85 -0
- package/templates/skills/profile-corruption-escalation/config.json +13 -0
- package/templates/skills/profile-corruption-escalation/content.md +105 -0
- package/templates/skills/scheduled-task-resilience/config.json +13 -0
- package/templates/skills/scheduled-task-resilience/content.md +95 -0
- package/templates/skills/sprite-cli/config.json +13 -0
- package/templates/skills/sprite-cli/content.md +133 -0
- package/templates/skills/turso-interaction/config.json +13 -0
- package/templates/skills/turso-interaction/content.md +192 -0
- package/templates/skills/workflow-iterate/config.json +18 -0
- package/templates/skills/workflow-iterate/content.md +399 -0
- package/templates/skills/workflow-structured-output/config.json +13 -0
- package/templates/skills/workflow-structured-output/content.md +101 -0
- package/templates/skills/x-api-interactions/config.json +13 -0
- package/templates/skills/x-api-interactions/content.md +109 -0
- package/templates/workflows/autopilot/config.json +13 -0
- package/templates/workflows/autopilot/content.md +58 -0
- package/templates/workflows/linear-drain-loop/config.json +21 -0
- package/templates/workflows/linear-drain-loop/content.md +72 -0
- package/templates/workflows/ralph-loop/config.json +13 -0
- package/templates/workflows/ralph-loop/content.md +75 -0
|
@@ -5,8 +5,11 @@ Operators bumping a rate by hand should also update this file.
|
|
|
5
5
|
|
|
6
6
|
## Primary: vendored models.dev snapshot
|
|
7
7
|
|
|
8
|
-
- **
|
|
9
|
-
- **
|
|
8
|
+
- **Source-of-truth path**: `src/be/modelsdev-cache.json`
|
|
9
|
+
- **UI compatibility path**: `ui/src/lib/modelsdev-cache.json` symlinks to the
|
|
10
|
+
backend snapshot so existing UI imports keep working.
|
|
11
|
+
- **Loaded by**: `src/be/modelsdev-cache.ts` → `src/be/seed-pricing.ts` →
|
|
12
|
+
`seedPricingFromModelsDev()`,
|
|
10
13
|
called from `src/server.ts` after `initDb`.
|
|
11
14
|
- **Projection rules** (see the same module for code-level detail):
|
|
12
15
|
- Anthropic models → rows under `provider='claude'` AND `provider='claude-managed'`.
|
|
@@ -23,7 +26,7 @@ Operators bumping a rate by hand should also update this file.
|
|
|
23
26
|
- Run `bun run scripts/refresh-modelsdev-pricing.ts` (Phase 2 — adds the
|
|
24
27
|
script). It fetches the latest snapshot from models.dev, diffs against
|
|
25
28
|
the vendored copy, prints a summary, and writes the new file.
|
|
26
|
-
- Commit the regenerated `modelsdev-cache.json` together with a bump
|
|
29
|
+
- Commit the regenerated `src/be/modelsdev-cache.json` together with a bump
|
|
27
30
|
note in the PR description.
|
|
28
31
|
|
|
29
32
|
## Manual overrides
|
|
@@ -47,6 +50,6 @@ no input/output pricing rows at the lookup time, the row is persisted with
|
|
|
47
50
|
`costSource='unpriced'` (rather than 'harness'). The UI surfaces this as a
|
|
48
51
|
yellow badge.
|
|
49
52
|
|
|
50
|
-
To fix: either add the model to `modelsdev-cache.json` (preferred — the
|
|
53
|
+
To fix: either add the model to `src/be/modelsdev-cache.json` (preferred — the
|
|
51
54
|
upstream snapshot probably needs refreshing) or add a manual override row via
|
|
52
55
|
the existing admin route `POST /api/pricing`.
|
|
@@ -119,6 +119,13 @@ export function createSwarmEventHandler(
|
|
|
119
119
|
};
|
|
120
120
|
const isCancelled = data.cancelled?.some((t) => t.id === taskId);
|
|
121
121
|
if (isCancelled) {
|
|
122
|
+
// Log BEFORE aborting so the reason is visible in the worker
|
|
123
|
+
// transcript even when the abort propagates immediately. Without
|
|
124
|
+
// this, cancellations originating from this poll were invisible
|
|
125
|
+
// (only the runner-side poll in runner.ts logs them).
|
|
126
|
+
console.log(
|
|
127
|
+
`[swarm-events] aborting task ${taskId}: cancelled via /cancelled-tasks poll`,
|
|
128
|
+
);
|
|
122
129
|
opts.abortRef.current?.abort();
|
|
123
130
|
if (opts.onCancel) {
|
|
124
131
|
try {
|
|
@@ -141,6 +148,13 @@ export function createSwarmEventHandler(
|
|
|
141
148
|
void checkToolLoop(taskId, toolName, argRecord)
|
|
142
149
|
.then((result) => {
|
|
143
150
|
if (result.blocked) {
|
|
151
|
+
// Surface the loop-detector's reason. Without this log, the abort
|
|
152
|
+
// was indistinguishable from a /cancelled-tasks abort or a runner
|
|
153
|
+
// SIGTERM. `result.reason` already carries the diagnostic detail
|
|
154
|
+
// ("Tool X called 15 times…", "ping-pong between A and B…").
|
|
155
|
+
console.log(
|
|
156
|
+
`[swarm-events] aborting task ${taskId}: tool-loop detected — ${result.reason ?? "unknown reason"}`,
|
|
157
|
+
);
|
|
144
158
|
opts.abortRef.current?.abort();
|
|
145
159
|
}
|
|
146
160
|
})
|
package/src/slack/HEURISTICS.md
CHANGED
|
@@ -29,9 +29,13 @@ When someone @mentions the bot in a thread, the router checks whether a worker a
|
|
|
29
29
|
|
|
30
30
|
When enabled, thread replies that do NOT @mention the bot are captured, buffered, and batched into a single follow-up task. This allows humans to give multi-message feedback in a thread without needing to @mention the bot each time.
|
|
31
31
|
|
|
32
|
+
A thread counts as having swarm activity if **either**:
|
|
33
|
+
- a Slack task is already linked to it via `slackChannelId` + `slackThreadTs` (someone started it by @mentioning the bot), **or**
|
|
34
|
+
- the swarm itself posted the thread's **root message** — i.e. the swarm started the thread with a proactive/standalone message (a notification, status update, or an agent posting unprompted). In this case there is no task row yet, so without this the human's reply would otherwise require an @mention. The root author is resolved via a one-time `conversations.replies` lookup (cached per thread) that matches our bot specifically — by `user` for normal posts and by `bot_id` for persona-override (username/icon) posts — so threads started by *other* bots are not picked up. The lookup is skipped when a linked task already matches.
|
|
35
|
+
|
|
32
36
|
### How it works
|
|
33
37
|
|
|
34
|
-
1. A human sends a non-@mention message in a thread where the swarm is already active (
|
|
38
|
+
1. A human sends a non-@mention message in a thread where the swarm is already active (see definition above)
|
|
35
39
|
2. The message enters an in-memory buffer keyed by `channelId:threadTs`
|
|
36
40
|
3. A debounce timer starts (default 10 seconds)
|
|
37
41
|
4. Additional messages within the window are appended to the buffer, resetting the timer each time
|
|
@@ -6,6 +6,7 @@ import {
|
|
|
6
6
|
checkUserAccess,
|
|
7
7
|
formatFileSize,
|
|
8
8
|
isBotMessage,
|
|
9
|
+
isSwarmThreadRoot,
|
|
9
10
|
type UserFilterConfig,
|
|
10
11
|
} from "./handlers";
|
|
11
12
|
|
|
@@ -359,3 +360,37 @@ describe("isBotMessage", () => {
|
|
|
359
360
|
});
|
|
360
361
|
});
|
|
361
362
|
});
|
|
363
|
+
|
|
364
|
+
describe("isSwarmThreadRoot", () => {
|
|
365
|
+
test("matches our bot by user ID (non-persona post)", () => {
|
|
366
|
+
expect(isSwarmThreadRoot({ user: "UBOT123" }, "UBOT123", "B_SWARM")).toBe(true);
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
test("matches our bot by bot_id (persona post omits user)", () => {
|
|
370
|
+
// Posts with username/icon_emoji override carry bot_id but no user field.
|
|
371
|
+
expect(isSwarmThreadRoot({ bot_id: "B_SWARM" }, "UBOT123", "B_SWARM")).toBe(true);
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
test("does NOT match a different bot in the workspace", () => {
|
|
375
|
+
// A thread started by some OTHER bot must not be treated as swarm-started.
|
|
376
|
+
expect(isSwarmThreadRoot({ bot_id: "B_OTHER", user: "UOTHER" }, "UBOT123", "B_SWARM")).toBe(
|
|
377
|
+
false,
|
|
378
|
+
);
|
|
379
|
+
});
|
|
380
|
+
|
|
381
|
+
test("does NOT match a human-started thread", () => {
|
|
382
|
+
expect(isSwarmThreadRoot({ user: "UHUMAN456" }, "UBOT123", "B_SWARM")).toBe(false);
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
test("returns false for an undefined root message", () => {
|
|
386
|
+
expect(isSwarmThreadRoot(undefined, "UBOT123", "B_SWARM")).toBe(false);
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
test("returns false when our bot identity is unknown", () => {
|
|
390
|
+
expect(isSwarmThreadRoot({ bot_id: "B_SWARM", user: "UBOT123" }, null, null)).toBe(false);
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
test("falls back to bot_id when bot user ID is unknown but bot_id is", () => {
|
|
394
|
+
expect(isSwarmThreadRoot({ bot_id: "B_SWARM" }, null, "B_SWARM")).toBe(true);
|
|
395
|
+
});
|
|
396
|
+
});
|
package/src/slack/handlers.ts
CHANGED
|
@@ -178,6 +178,75 @@ interface ThreadMessage {
|
|
|
178
178
|
// Cache for bot's own user ID (avoids redundant auth.test calls)
|
|
179
179
|
let cachedBotUserId: string | null = null;
|
|
180
180
|
|
|
181
|
+
// Cache for bot's own bot_id (auth.test). Persona-override messages
|
|
182
|
+
// (username/icon_emoji) carry `bot_id` but not `user`, so this is needed to
|
|
183
|
+
// recognize swarm-authored messages that the `cachedBotUserId` check would miss.
|
|
184
|
+
let cachedBotId: string | null = null;
|
|
185
|
+
|
|
186
|
+
// Cache: `${channelId}:${threadTs}` → whether our swarm bot authored the thread
|
|
187
|
+
// root. A thread's root author never changes, so caching is permanently correct.
|
|
188
|
+
// Bounded to avoid unbounded growth in long-running processes.
|
|
189
|
+
const swarmThreadRootCache = new Map<string, boolean>();
|
|
190
|
+
const SWARM_THREAD_ROOT_CACHE_MAX = 1000;
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Pure check: does the given thread-root message belong to our own swarm bot?
|
|
194
|
+
* Exported for testing.
|
|
195
|
+
*
|
|
196
|
+
* Matches OUR bot specifically (not any bot in the workspace):
|
|
197
|
+
* - non-persona posts carry `user === <our bot user id>`
|
|
198
|
+
* - persona posts (username/icon_emoji override) carry `bot_id === <our bot id>`
|
|
199
|
+
* but typically omit `user`
|
|
200
|
+
*/
|
|
201
|
+
export function isSwarmThreadRoot(
|
|
202
|
+
root: { bot_id?: string; user?: string } | undefined,
|
|
203
|
+
botUserId: string | null,
|
|
204
|
+
botId: string | null,
|
|
205
|
+
): boolean {
|
|
206
|
+
if (!root) return false;
|
|
207
|
+
if (botUserId && root.user === botUserId) return true;
|
|
208
|
+
if (botId && root.bot_id === botId) return true;
|
|
209
|
+
return false;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Returns true if the root message of the given thread was posted by our own
|
|
214
|
+
* swarm bot (a proactive/standalone message the swarm started). Used to treat
|
|
215
|
+
* human replies to swarm-initiated threads as follow-ups that don't require an
|
|
216
|
+
* @mention. Result is cached per thread.
|
|
217
|
+
*/
|
|
218
|
+
async function wasThreadStartedBySwarm(
|
|
219
|
+
client: WebClient,
|
|
220
|
+
channelId: string,
|
|
221
|
+
threadTs: string,
|
|
222
|
+
): Promise<boolean> {
|
|
223
|
+
const key = `${channelId}:${threadTs}`;
|
|
224
|
+
const cached = swarmThreadRootCache.get(key);
|
|
225
|
+
if (cached !== undefined) return cached;
|
|
226
|
+
|
|
227
|
+
let startedBySwarm = false;
|
|
228
|
+
try {
|
|
229
|
+
const resp = await client.conversations.replies({
|
|
230
|
+
channel: channelId,
|
|
231
|
+
ts: threadTs,
|
|
232
|
+
limit: 1,
|
|
233
|
+
inclusive: true,
|
|
234
|
+
});
|
|
235
|
+
const root = resp.messages?.[0] as { bot_id?: string; user?: string } | undefined;
|
|
236
|
+
startedBySwarm = isSwarmThreadRoot(root, cachedBotUserId, cachedBotId);
|
|
237
|
+
} catch (error) {
|
|
238
|
+
console.error("[Slack] Failed to check whether thread was started by swarm:", error);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Evict oldest entry (insertion-ordered Map) once the cap is reached.
|
|
242
|
+
if (swarmThreadRootCache.size >= SWARM_THREAD_ROOT_CACHE_MAX) {
|
|
243
|
+
const oldest = swarmThreadRootCache.keys().next().value;
|
|
244
|
+
if (oldest !== undefined) swarmThreadRootCache.delete(oldest);
|
|
245
|
+
}
|
|
246
|
+
swarmThreadRootCache.set(key, startedBySwarm);
|
|
247
|
+
return startedBySwarm;
|
|
248
|
+
}
|
|
249
|
+
|
|
181
250
|
// Cache for user display names
|
|
182
251
|
const userNameCache = new Map<string, string>();
|
|
183
252
|
|
|
@@ -346,6 +415,7 @@ export function registerMessageHandler(app: App): void {
|
|
|
346
415
|
try {
|
|
347
416
|
const authResult = await client.auth.test();
|
|
348
417
|
cachedBotUserId = authResult.user_id as string;
|
|
418
|
+
cachedBotId = (authResult.bot_id as string | undefined) ?? null;
|
|
349
419
|
} catch (error) {
|
|
350
420
|
console.error("[Slack] Failed to cache bot user ID:", error);
|
|
351
421
|
}
|
|
@@ -458,8 +528,15 @@ export function registerMessageHandler(app: App): void {
|
|
|
458
528
|
);
|
|
459
529
|
return;
|
|
460
530
|
}
|
|
461
|
-
//
|
|
462
|
-
|
|
531
|
+
// Treat the thread as having swarm activity if either:
|
|
532
|
+
// - a Slack task is already linked to it (someone started it via @mention), or
|
|
533
|
+
// - the swarm itself posted the thread's root message (a proactive/standalone
|
|
534
|
+
// message the swarm started). In the latter case there is no task row yet,
|
|
535
|
+
// so the human's reply would otherwise require an @mention. The Slack lookup
|
|
536
|
+
// is skipped when a task already matches.
|
|
537
|
+
const hasSwarmActivity =
|
|
538
|
+
getAgentWorkingOnThread(msg.channel, msg.thread_ts) !== null ||
|
|
539
|
+
(await wasThreadStartedBySwarm(client, msg.channel, msg.thread_ts));
|
|
463
540
|
|
|
464
541
|
if (hasSwarmActivity) {
|
|
465
542
|
const threadKey = `${msg.channel}:${msg.thread_ts}`;
|
|
@@ -328,16 +328,17 @@ describe("getBasePrompt — truncation", () => {
|
|
|
328
328
|
});
|
|
329
329
|
|
|
330
330
|
test("total budget respected — tools truncated before claudeMd", async () => {
|
|
331
|
-
// Use soulMd to eat up most of the
|
|
332
|
-
//
|
|
331
|
+
// Use soulMd to eat up most of the 120k total budget (lowered from 150k
|
|
332
|
+
// in the Picateclas spawn-OOM fix, 2026-05-28) so that truncatable
|
|
333
|
+
// sections (claudeMd, toolsMd) must compete for the remainder.
|
|
333
334
|
// soulMd is part of `prompt` which counts toward protectedLength.
|
|
334
335
|
const baseResult = await getBasePrompt(minimalArgs);
|
|
335
336
|
const staticLength = baseResult.length; // ~12-13k for static content
|
|
336
337
|
|
|
337
338
|
// Leave exactly enough budget for claudeMd but not toolsMd.
|
|
338
|
-
// Total budget =
|
|
339
|
-
// We want: protectedLength ≈
|
|
340
|
-
const soulSize =
|
|
339
|
+
// Total budget = 120k - protectedLength.
|
|
340
|
+
// We want: protectedLength ≈ 120k - 18k = 102k, so claudeMd (15k) fits but toolsMd doesn't.
|
|
341
|
+
const soulSize = 102_000 - staticLength;
|
|
341
342
|
const result = await getBasePrompt({
|
|
342
343
|
...minimalArgs,
|
|
343
344
|
soulMd: bigString(Math.max(0, soulSize)),
|
|
@@ -354,7 +355,29 @@ describe("getBasePrompt — truncation", () => {
|
|
|
354
355
|
expect(hasToolsTruncation || !hasToolsHeader).toBe(true);
|
|
355
356
|
});
|
|
356
357
|
|
|
357
|
-
test("
|
|
358
|
+
test("Picateclas spawn-OOM hardening — total prompt stays below MAX_ARG_STRLEN", async () => {
|
|
359
|
+
// Even at the worst-case where every truncatable section maxes out its
|
|
360
|
+
// budget and the repo CLAUDE.md is huge, the final prompt must stay
|
|
361
|
+
// safely below Linux's `MAX_ARG_STRLEN = 131,072` bytes (the per-argv-
|
|
362
|
+
// element kernel limit that bit Picateclas attempts 4-6, 2026-05-28).
|
|
363
|
+
const result = await getBasePrompt({
|
|
364
|
+
...minimalArgs,
|
|
365
|
+
soulMd: bigString(40_000),
|
|
366
|
+
claudeMd: bigString(40_000),
|
|
367
|
+
toolsMd: bigString(40_000),
|
|
368
|
+
repoContext: {
|
|
369
|
+
claudeMd: bigString(60_000),
|
|
370
|
+
clonePath: "/workspace/repos/big-repo",
|
|
371
|
+
},
|
|
372
|
+
});
|
|
373
|
+
expect(result.length).toBeLessThan(131_072);
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
test("repo CLAUDE.md is capped at REPO_CLAUDE_MD_MAX_CHARS (12 KB) with on-disk pointer", async () => {
|
|
377
|
+
// Picateclas spawn-OOM permanent fix (2026-05-28): repo CLAUDE.md was the
|
|
378
|
+
// single biggest volatile component of the bootstrap argv. It is now
|
|
379
|
+
// truncated to ~12 KB with a footer pointing at the on-disk file, mirroring
|
|
380
|
+
// the same shape as the agent claudeMd / toolsMd caps.
|
|
358
381
|
const hugeRepoClaudeMd = bigString(30_000);
|
|
359
382
|
const result = await getBasePrompt({
|
|
360
383
|
...minimalArgs,
|
|
@@ -363,8 +386,23 @@ describe("getBasePrompt — truncation", () => {
|
|
|
363
386
|
clonePath: "/workspace/big-repo",
|
|
364
387
|
},
|
|
365
388
|
});
|
|
366
|
-
// The full
|
|
367
|
-
expect(result).toContain(hugeRepoClaudeMd);
|
|
389
|
+
// The full 30 KB content should NOT survive — capped at ~12 KB.
|
|
390
|
+
expect(result).not.toContain(hugeRepoClaudeMd);
|
|
391
|
+
// The truncation footer points at the on-disk path so readers can find
|
|
392
|
+
// the full content.
|
|
393
|
+
expect(result).toContain("[...truncated — see /workspace/big-repo/CLAUDE.md");
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
test("repo CLAUDE.md under the cap is preserved verbatim", async () => {
|
|
397
|
+
const smallRepoClaudeMd = bigString(5_000);
|
|
398
|
+
const result = await getBasePrompt({
|
|
399
|
+
...minimalArgs,
|
|
400
|
+
repoContext: {
|
|
401
|
+
claudeMd: smallRepoClaudeMd,
|
|
402
|
+
clonePath: "/workspace/small-repo",
|
|
403
|
+
},
|
|
404
|
+
});
|
|
405
|
+
expect(result).toContain(smallRepoClaudeMd);
|
|
368
406
|
expect(result).not.toContain("[...truncated");
|
|
369
407
|
});
|
|
370
408
|
});
|
|
@@ -40,14 +40,14 @@ describe("ClaudeManagedAdapter (Phase 1 skeleton)", () => {
|
|
|
40
40
|
}
|
|
41
41
|
});
|
|
42
42
|
|
|
43
|
-
test("factory returns ClaudeManagedAdapter for 'claude-managed'", () => {
|
|
44
|
-
const adapter = createProviderAdapter("claude-managed");
|
|
43
|
+
test("factory returns ClaudeManagedAdapter for 'claude-managed'", async () => {
|
|
44
|
+
const adapter = await createProviderAdapter("claude-managed");
|
|
45
45
|
expect(adapter).toBeInstanceOf(ClaudeManagedAdapter);
|
|
46
46
|
expect(adapter.name).toBe("claude-managed");
|
|
47
47
|
});
|
|
48
48
|
|
|
49
|
-
test("factory still rejects unknown providers and lists claude-managed", () => {
|
|
50
|
-
expect(
|
|
49
|
+
test("factory still rejects unknown providers and lists claude-managed", async () => {
|
|
50
|
+
expect(createProviderAdapter("nope")).rejects.toThrow(
|
|
51
51
|
'Unknown HARNESS_PROVIDER: "nope". Supported: claude, pi, codex, devin, claude-managed',
|
|
52
52
|
);
|
|
53
53
|
});
|
|
@@ -85,7 +85,7 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
|
|
|
85
85
|
});
|
|
86
86
|
|
|
87
87
|
test("gate on (SWARM_ENABLE_HARNESS_OTEL) → spawn env carries TRACEPARENT", async () => {
|
|
88
|
-
const adapter = new CodexAdapter();
|
|
88
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
89
89
|
await adapter.createSession(testConfig({ env: { SWARM_ENABLE_HARNESS_OTEL: "1" } }));
|
|
90
90
|
|
|
91
91
|
expect(capturedEnv).toBeDefined();
|
|
@@ -93,14 +93,14 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
|
|
|
93
93
|
});
|
|
94
94
|
|
|
95
95
|
test("gate on via deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias → TRACEPARENT injected", async () => {
|
|
96
|
-
const adapter = new CodexAdapter();
|
|
96
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
97
97
|
await adapter.createSession(testConfig({ env: { SWARM_ENABLE_CLAUDE_CODE_OTEL: "1" } }));
|
|
98
98
|
|
|
99
99
|
expect(capturedEnv?.TRACEPARENT).toBe(`00-${TRACE_ID}-${SPAN_ID}-01`);
|
|
100
100
|
});
|
|
101
101
|
|
|
102
102
|
test("gate off → no TRACEPARENT, existing env wiring intact", async () => {
|
|
103
|
-
const adapter = new CodexAdapter();
|
|
103
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
104
104
|
await adapter.createSession(testConfig({ env: {} }));
|
|
105
105
|
|
|
106
106
|
expect(capturedEnv).toBeDefined();
|
|
@@ -112,7 +112,7 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
|
|
|
112
112
|
|
|
113
113
|
test("gate on but unsampled active span → no TRACEPARENT", async () => {
|
|
114
114
|
getActiveSpanSpy.mockReturnValue(makeSpan({ sampled: false }));
|
|
115
|
-
const adapter = new CodexAdapter();
|
|
115
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
116
116
|
await adapter.createSession(testConfig({ env: { SWARM_ENABLE_HARNESS_OTEL: "1" } }));
|
|
117
117
|
|
|
118
118
|
expect(capturedEnv?.TRACEPARENT).toBeUndefined();
|
|
@@ -96,7 +96,7 @@ async function runSessionWithThrowingThread(
|
|
|
96
96
|
};
|
|
97
97
|
|
|
98
98
|
try {
|
|
99
|
-
const adapter = new CodexAdapter();
|
|
99
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
100
100
|
const session = await adapter.createSession(config);
|
|
101
101
|
const emitted: ProviderEvent[] = [];
|
|
102
102
|
session.onEvent((e) => emitted.push(e));
|
|
@@ -175,7 +175,7 @@ async function runSessionWithFakeThread(
|
|
|
175
175
|
};
|
|
176
176
|
|
|
177
177
|
try {
|
|
178
|
-
const adapter = new CodexAdapter();
|
|
178
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
179
179
|
const session = await adapter.createSession(config);
|
|
180
180
|
|
|
181
181
|
const emitted: ProviderEvent[] = [];
|
|
@@ -575,7 +575,7 @@ describe("CodexSession event mapping", () => {
|
|
|
575
575
|
};
|
|
576
576
|
|
|
577
577
|
try {
|
|
578
|
-
const adapter = new CodexAdapter();
|
|
578
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
579
579
|
const config = testConfig({
|
|
580
580
|
logFile: join(tmpLogDir, "abort.log"),
|
|
581
581
|
cwd: "",
|
|
@@ -612,7 +612,7 @@ describe("CodexSession event mapping", () => {
|
|
|
612
612
|
|
|
613
613
|
describe("CodexAdapter.canResume", () => {
|
|
614
614
|
test("returns false for empty / non-string session ids", async () => {
|
|
615
|
-
const adapter = new CodexAdapter();
|
|
615
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
616
616
|
expect(await adapter.canResume("")).toBe(false);
|
|
617
617
|
// @ts-expect-error: deliberate runtime check for non-string input
|
|
618
618
|
expect(await adapter.canResume(undefined)).toBe(false);
|
|
@@ -631,7 +631,7 @@ describe("CodexAdapter.canResume", () => {
|
|
|
631
631
|
).resumeThread = function resumeThread(): unknown {
|
|
632
632
|
return { id: "thread-resumed" };
|
|
633
633
|
};
|
|
634
|
-
const adapter = new CodexAdapter();
|
|
634
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
635
635
|
expect(await adapter.canResume("thread-resumed")).toBe(true);
|
|
636
636
|
|
|
637
637
|
// Failure path
|
|
@@ -789,6 +789,10 @@ describe("resolveCodexModel", () => {
|
|
|
789
789
|
expect(resolveCodexModel("gpt-5.4-mini")).toBe("gpt-5.4-mini");
|
|
790
790
|
});
|
|
791
791
|
|
|
792
|
+
test("passthrough 'gpt-5.5' → gpt-5.5", () => {
|
|
793
|
+
expect(resolveCodexModel("gpt-5.5")).toBe("gpt-5.5");
|
|
794
|
+
});
|
|
795
|
+
|
|
792
796
|
test("passthrough 'gpt-5.3-codex' → gpt-5.3-codex", () => {
|
|
793
797
|
expect(resolveCodexModel("gpt-5.3-codex")).toBe("gpt-5.3-codex");
|
|
794
798
|
});
|
|
@@ -816,6 +820,10 @@ describe("getCodexContextWindow", () => {
|
|
|
816
820
|
expect(getCodexContextWindow("gpt-5.4-mini")).toBe(200_000);
|
|
817
821
|
});
|
|
818
822
|
|
|
823
|
+
test("gpt-5.5 → 1_050_000", () => {
|
|
824
|
+
expect(getCodexContextWindow("gpt-5.5")).toBe(1_050_000);
|
|
825
|
+
});
|
|
826
|
+
|
|
819
827
|
test("gpt-5.3-codex → 1_000_000 (1M context)", () => {
|
|
820
828
|
expect(getCodexContextWindow("gpt-5.3-codex")).toBe(1_000_000);
|
|
821
829
|
});
|
|
@@ -833,6 +841,11 @@ describe("computeCodexCostUsd", () => {
|
|
|
833
841
|
expect(cost).toBeCloseTo(17.5, 4);
|
|
834
842
|
});
|
|
835
843
|
|
|
844
|
+
test("gpt-5.5 with 1M uncached input + 1M output = $5 + $30 = $35", () => {
|
|
845
|
+
const cost = computeCodexCostUsd("gpt-5.5", 1_000_000, 0, 1_000_000);
|
|
846
|
+
expect(cost).toBeCloseTo(35, 4);
|
|
847
|
+
});
|
|
848
|
+
|
|
836
849
|
test("gpt-5.4 with cached input applies the cached discount", () => {
|
|
837
850
|
// 1M input, 800k cached → 200k uncached.
|
|
838
851
|
// 200_000 × $2.50/M = $0.50
|
|
@@ -1136,7 +1149,7 @@ async function runSessionWithFakeThreadAndDeps(
|
|
|
1136
1149
|
};
|
|
1137
1150
|
|
|
1138
1151
|
try {
|
|
1139
|
-
const adapter = new CodexAdapter({ summarizeDeps });
|
|
1152
|
+
const adapter = new CodexAdapter({ summarizeDeps, bypassSubprocess: true });
|
|
1140
1153
|
const session = await adapter.createSession(config);
|
|
1141
1154
|
const emitted: ProviderEvent[] = [];
|
|
1142
1155
|
session.onEvent((e) => emitted.push(e));
|
|
@@ -1626,7 +1639,7 @@ describe("CodexSession — rate-limit error preservation", () => {
|
|
|
1626
1639
|
};
|
|
1627
1640
|
|
|
1628
1641
|
try {
|
|
1629
|
-
const adapter = new CodexAdapter();
|
|
1642
|
+
const adapter = new CodexAdapter({ bypassSubprocess: true });
|
|
1630
1643
|
const config = testConfig({
|
|
1631
1644
|
logFile: join(tmpLogDir, "abort-guard.log"),
|
|
1632
1645
|
cwd: "",
|
|
@@ -97,6 +97,41 @@ describe("createCodexSwarmEventHandler", () => {
|
|
|
97
97
|
expect(controller.signal.aborted).toBe(true);
|
|
98
98
|
});
|
|
99
99
|
|
|
100
|
+
test("logs the abort reason when /cancelled-tasks reports the task", async () => {
|
|
101
|
+
installFetchStub((url) => {
|
|
102
|
+
if (url.includes("/cancelled-tasks")) {
|
|
103
|
+
return new Response(
|
|
104
|
+
JSON.stringify({ cancelled: [{ id: "task-1", failureReason: "user request" }] }),
|
|
105
|
+
{ status: 200 },
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
return new Response("{}", { status: 200 });
|
|
109
|
+
});
|
|
110
|
+
const logs: string[] = [];
|
|
111
|
+
const origLog = console.log;
|
|
112
|
+
console.log = (...args: unknown[]) => logs.push(args.map(String).join(" "));
|
|
113
|
+
try {
|
|
114
|
+
const controller = new AbortController();
|
|
115
|
+
const handler = createCodexSwarmEventHandler(
|
|
116
|
+
buildOpts({ abortRef: { current: controller }, taskId: "task-1" }),
|
|
117
|
+
);
|
|
118
|
+
handler({
|
|
119
|
+
type: "tool_start",
|
|
120
|
+
toolCallId: "call-1",
|
|
121
|
+
toolName: "bash",
|
|
122
|
+
args: { command: "sleep 9999" },
|
|
123
|
+
});
|
|
124
|
+
await new Promise((resolve) => setTimeout(resolve, 30));
|
|
125
|
+
} finally {
|
|
126
|
+
console.log = origLog;
|
|
127
|
+
}
|
|
128
|
+
// The log MUST include the literal taskId (not the `${taskId}` template).
|
|
129
|
+
const abortLog = logs.find((l) =>
|
|
130
|
+
l.includes("aborting task task-1: cancelled via /cancelled-tasks poll"),
|
|
131
|
+
);
|
|
132
|
+
expect(abortLog).toBeDefined();
|
|
133
|
+
});
|
|
134
|
+
|
|
100
135
|
test("throttles the cancellation check across rapid tool_start events", async () => {
|
|
101
136
|
const { calls } = installFetchStub(
|
|
102
137
|
() => new Response(JSON.stringify({ cancelled: [] }), { status: 200 }),
|
|
@@ -9,6 +9,7 @@ import {
|
|
|
9
9
|
|
|
10
10
|
describe("getContextWindowSize", () => {
|
|
11
11
|
test("returns 1M for opus models", () => {
|
|
12
|
+
expect(getContextWindowSize("claude-opus-4-8")).toBe(1_000_000);
|
|
12
13
|
expect(getContextWindowSize("claude-opus-4-7")).toBe(1_000_000);
|
|
13
14
|
expect(getContextWindowSize("claude-opus-4-6")).toBe(1_000_000);
|
|
14
15
|
expect(getContextWindowSize("opus")).toBe(1_000_000);
|
|
@@ -315,48 +315,64 @@ describe("checkOpencodeCredentials", () => {
|
|
|
315
315
|
describe("checkProviderCredentials dispatcher", () => {
|
|
316
316
|
const HOME = "/home/worker";
|
|
317
317
|
|
|
318
|
-
test("dispatches to the right adapter for every supported provider", () => {
|
|
319
|
-
expect(checkProviderCredentials("claude", { CLAUDE_CODE_OAUTH_TOKEN: "x" }).ready).toBe(
|
|
320
|
-
|
|
318
|
+
test("dispatches to the right adapter for every supported provider", async () => {
|
|
319
|
+
expect((await checkProviderCredentials("claude", { CLAUDE_CODE_OAUTH_TOKEN: "x" })).ready).toBe(
|
|
320
|
+
true,
|
|
321
|
+
);
|
|
322
|
+
expect((await checkProviderCredentials("claude", {})).ready).toBe(false);
|
|
321
323
|
|
|
322
324
|
expect(
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
325
|
+
(
|
|
326
|
+
await checkProviderCredentials(
|
|
327
|
+
"claude-managed",
|
|
328
|
+
{
|
|
329
|
+
ANTHROPIC_API_KEY: "x",
|
|
330
|
+
MANAGED_AGENT_ID: "a",
|
|
331
|
+
MANAGED_ENVIRONMENT_ID: "e",
|
|
332
|
+
MCP_BASE_URL: "https://x",
|
|
333
|
+
},
|
|
334
|
+
{ homeDir: HOME, fs: noFiles },
|
|
335
|
+
)
|
|
332
336
|
).ready,
|
|
333
337
|
).toBe(true);
|
|
334
338
|
|
|
335
|
-
expect(
|
|
336
|
-
|
|
337
|
-
);
|
|
339
|
+
expect(
|
|
340
|
+
(await checkProviderCredentials("devin", { DEVIN_API_KEY: "x", DEVIN_ORG_ID: "y" })).ready,
|
|
341
|
+
).toBe(true);
|
|
338
342
|
|
|
339
343
|
expect(
|
|
340
|
-
|
|
341
|
-
|
|
344
|
+
(
|
|
345
|
+
await checkProviderCredentials(
|
|
346
|
+
"codex",
|
|
347
|
+
{ OPENAI_API_KEY: "x" },
|
|
348
|
+
{ homeDir: HOME, fs: noFiles },
|
|
349
|
+
)
|
|
350
|
+
).ready,
|
|
342
351
|
).toBe(true);
|
|
343
352
|
|
|
344
353
|
expect(
|
|
345
|
-
|
|
346
|
-
|
|
354
|
+
(
|
|
355
|
+
await checkProviderCredentials(
|
|
356
|
+
"pi",
|
|
357
|
+
{ ANTHROPIC_API_KEY: "x" },
|
|
358
|
+
{ homeDir: HOME, fs: noFiles },
|
|
359
|
+
)
|
|
360
|
+
).ready,
|
|
347
361
|
).toBe(true);
|
|
348
362
|
|
|
349
363
|
expect(
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
364
|
+
(
|
|
365
|
+
await checkProviderCredentials(
|
|
366
|
+
"opencode",
|
|
367
|
+
{ OPENROUTER_API_KEY: "x" },
|
|
368
|
+
{ homeDir: HOME, fs: noFiles },
|
|
369
|
+
)
|
|
354
370
|
).ready,
|
|
355
371
|
).toBe(true);
|
|
356
372
|
});
|
|
357
373
|
|
|
358
|
-
test("throws on unknown provider", () => {
|
|
359
|
-
expect(
|
|
374
|
+
test("throws on unknown provider", async () => {
|
|
375
|
+
expect(checkProviderCredentials("nope", {})).rejects.toThrow(/unknown provider/i);
|
|
360
376
|
});
|
|
361
377
|
});
|
|
362
378
|
|
|
@@ -366,16 +382,16 @@ describe("snapshot: every provider", () => {
|
|
|
366
382
|
const HOME = "/home/worker";
|
|
367
383
|
const providers = ["claude", "claude-managed", "codex", "devin", "opencode", "pi"] as const;
|
|
368
384
|
|
|
369
|
-
test("fully unset env → ready=false with non-empty missing[] and hint", () => {
|
|
385
|
+
test("fully unset env → ready=false with non-empty missing[] and hint", async () => {
|
|
370
386
|
for (const p of providers) {
|
|
371
|
-
const status = checkProviderCredentials(p, {}, { homeDir: HOME, fs: noFiles });
|
|
387
|
+
const status = await checkProviderCredentials(p, {}, { homeDir: HOME, fs: noFiles });
|
|
372
388
|
expect(status.ready).toBe(false);
|
|
373
389
|
expect(status.missing.length).toBeGreaterThan(0);
|
|
374
390
|
expect(status.hint).toBeTruthy();
|
|
375
391
|
}
|
|
376
392
|
});
|
|
377
393
|
|
|
378
|
-
test("minimum sufficient env → ready=true", () => {
|
|
394
|
+
test("minimum sufficient env → ready=true", async () => {
|
|
379
395
|
const minimums: Record<string, Record<string, string>> = {
|
|
380
396
|
claude: { CLAUDE_CODE_OAUTH_TOKEN: "x" },
|
|
381
397
|
"claude-managed": {
|
|
@@ -390,7 +406,10 @@ describe("snapshot: every provider", () => {
|
|
|
390
406
|
pi: { ANTHROPIC_API_KEY: "x" },
|
|
391
407
|
};
|
|
392
408
|
for (const p of providers) {
|
|
393
|
-
const status = checkProviderCredentials(p, minimums[p]!, {
|
|
409
|
+
const status = await checkProviderCredentials(p, minimums[p]!, {
|
|
410
|
+
homeDir: HOME,
|
|
411
|
+
fs: noFiles,
|
|
412
|
+
});
|
|
394
413
|
expect(status.ready).toBe(true);
|
|
395
414
|
}
|
|
396
415
|
});
|