@desplega.ai/agent-swarm 1.85.0 → 1.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +1 -0
  2. package/openapi.json +1 -1
  3. package/package.json +8 -6
  4. package/src/be/db.ts +44 -0
  5. package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
  6. package/src/be/modelsdev-cache.json +152028 -0
  7. package/src/be/modelsdev-cache.ts +46 -0
  8. package/src/be/seed-pricing.ts +7 -44
  9. package/src/cli.tsx +12 -2
  10. package/src/commands/codex-session-runner.ts +132 -0
  11. package/src/commands/credential-wait.ts +2 -2
  12. package/src/commands/provider-credentials.ts +10 -5
  13. package/src/commands/runner.ts +3 -3
  14. package/src/prompts/base-prompt.ts +49 -3
  15. package/src/providers/claude-adapter.ts +83 -2
  16. package/src/providers/claude-managed-models.ts +18 -2
  17. package/src/providers/codex-adapter.ts +417 -97
  18. package/src/providers/codex-models.ts +9 -2
  19. package/src/providers/index.ts +28 -19
  20. package/src/providers/pricing-sources.md +7 -4
  21. package/src/providers/swarm-events-shared.ts +14 -0
  22. package/src/slack/HEURISTICS.md +5 -1
  23. package/src/slack/handlers.test.ts +35 -0
  24. package/src/slack/handlers.ts +79 -2
  25. package/src/tests/base-prompt.test.ts +46 -8
  26. package/src/tests/claude-managed-adapter.test.ts +4 -4
  27. package/src/tests/codex-adapter-otel.test.ts +4 -4
  28. package/src/tests/codex-adapter.test.ts +20 -7
  29. package/src/tests/codex-swarm-events.test.ts +35 -0
  30. package/src/tests/context-window.test.ts +1 -0
  31. package/src/tests/credential-check.test.ts +48 -29
  32. package/src/tests/entrypoint-config-env-export.test.ts +81 -0
  33. package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
  34. package/src/tests/migration-046-budgets.test.ts +6 -5
  35. package/src/tests/pricing-routes.test.ts +6 -5
  36. package/src/tests/provider-adapter.test.ts +10 -10
  37. package/src/tests/provider-command-format.test.ts +4 -4
  38. package/src/tests/session-costs-codex-recompute.test.ts +25 -0
  39. package/src/tools/send-task.ts +30 -9
  40. package/src/utils/context-window.ts +1 -0
  41. package/templates/schedules/daily-blocker-digest/config.json +13 -0
  42. package/templates/schedules/daily-blocker-digest/content.md +150 -0
  43. package/templates/schedules/daily-compounding-reflection/config.json +21 -0
  44. package/templates/schedules/daily-compounding-reflection/content.md +210 -0
  45. package/templates/schedules/daily-hn-briefing/config.json +13 -0
  46. package/templates/schedules/daily-hn-briefing/content.md +97 -0
  47. package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
  48. package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
  49. package/templates/schedules/gtm-weekly-review/config.json +13 -0
  50. package/templates/schedules/gtm-weekly-review/content.md +58 -0
  51. package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
  52. package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
  53. package/templates/schema.ts +26 -0
  54. package/templates/skills/agentmail-sending/config.json +13 -0
  55. package/templates/skills/agentmail-sending/content.md +48 -0
  56. package/templates/skills/artifacts/config.json +13 -0
  57. package/templates/skills/artifacts/content.md +87 -0
  58. package/templates/skills/browser-use-cloud/config.json +13 -0
  59. package/templates/skills/browser-use-cloud/content.md +155 -0
  60. package/templates/skills/desloppify/config.json +13 -0
  61. package/templates/skills/desloppify/content.md +201 -0
  62. package/templates/skills/exa-search/config.json +13 -0
  63. package/templates/skills/exa-search/content.md +106 -0
  64. package/templates/skills/jira-interaction/config.json +13 -0
  65. package/templates/skills/jira-interaction/content.md +252 -0
  66. package/templates/skills/kapso-whatsapp/config.json +13 -0
  67. package/templates/skills/kapso-whatsapp/content.md +369 -0
  68. package/templates/skills/kv-storage/config.json +13 -0
  69. package/templates/skills/kv-storage/content.md +111 -0
  70. package/templates/skills/linear-interaction/config.json +20 -0
  71. package/templates/skills/linear-interaction/content.md +230 -0
  72. package/templates/skills/pages/config.json +18 -0
  73. package/templates/skills/pages/content.md +85 -0
  74. package/templates/skills/profile-corruption-escalation/config.json +13 -0
  75. package/templates/skills/profile-corruption-escalation/content.md +105 -0
  76. package/templates/skills/scheduled-task-resilience/config.json +13 -0
  77. package/templates/skills/scheduled-task-resilience/content.md +95 -0
  78. package/templates/skills/sprite-cli/config.json +13 -0
  79. package/templates/skills/sprite-cli/content.md +133 -0
  80. package/templates/skills/turso-interaction/config.json +13 -0
  81. package/templates/skills/turso-interaction/content.md +192 -0
  82. package/templates/skills/workflow-iterate/config.json +18 -0
  83. package/templates/skills/workflow-iterate/content.md +399 -0
  84. package/templates/skills/workflow-structured-output/config.json +13 -0
  85. package/templates/skills/workflow-structured-output/content.md +101 -0
  86. package/templates/skills/x-api-interactions/config.json +13 -0
  87. package/templates/skills/x-api-interactions/content.md +109 -0
  88. package/templates/workflows/autopilot/config.json +13 -0
  89. package/templates/workflows/autopilot/content.md +58 -0
  90. package/templates/workflows/linear-drain-loop/config.json +21 -0
  91. package/templates/workflows/linear-drain-loop/content.md +72 -0
  92. package/templates/workflows/ralph-loop/config.json +13 -0
  93. package/templates/workflows/ralph-loop/content.md +75 -0
@@ -0,0 +1,81 @@
1
+ import { describe, expect, test } from "bun:test";
2
+
3
+ /**
4
+ * Tests for the config→env-var export filter in docker-entrypoint.sh.
5
+ *
6
+ * The entrypoint fetches swarm config and writes valid POSIX identifier keys
7
+ * to /tmp/swarm_config.env for sourcing. Keys containing hyphens or other
8
+ * non-identifier characters must be skipped — otherwise `source` interprets
9
+ * them as commands:
10
+ *
11
+ * CF-Access-Client-Id=84853443... → "command not found"
12
+ *
13
+ * This filter mirrors the jq expression in docker-entrypoint.sh so the
14
+ * logic can be verified without a Docker environment.
15
+ */
16
+
17
+ const POSIX_IDENTIFIER = /^[A-Za-z_][A-Za-z0-9_]*$/;
18
+ const DYNAMIC_KEYS = new Set(["codex_oauth", "HARNESS_PROVIDER"]);
19
+
20
+ /** Mirrors the jq filter in docker-entrypoint.sh. */
21
+ function filterForEnvExport(
22
+ configs: Array<{ key: string; value: string }>,
23
+ ): Record<string, string> {
24
+ const result: Record<string, string> = {};
25
+ for (const { key, value } of configs) {
26
+ if (DYNAMIC_KEYS.has(key)) continue;
27
+ if (!POSIX_IDENTIFIER.test(key)) continue;
28
+ result[key] = value;
29
+ }
30
+ return result;
31
+ }
32
+
33
+ describe("entrypoint config env export: POSIX identifier filter", () => {
34
+ test("includes valid POSIX identifier keys", () => {
35
+ const result = filterForEnvExport([
36
+ { key: "FOO", value: "bar" },
37
+ { key: "MY_VAR_123", value: "val" },
38
+ { key: "_UNDERSCORE_START", value: "ok" },
39
+ ]);
40
+ expect(result.FOO).toBe("bar");
41
+ expect(result.MY_VAR_123).toBe("val");
42
+ expect(result._UNDERSCORE_START).toBe("ok");
43
+ });
44
+
45
+ test("excludes hyphenated keys (CF-Access-Client-Id pattern)", () => {
46
+ const result = filterForEnvExport([
47
+ { key: "FOO", value: "keep" },
48
+ { key: "CF-Access-Client-Id", value: "secret1" },
49
+ { key: "CF-Access-Client-Secret", value: "secret2" },
50
+ { key: "BAR", value: "keep" },
51
+ ]);
52
+ expect(result.FOO).toBe("keep");
53
+ expect(result.BAR).toBe("keep");
54
+ expect("CF-Access-Client-Id" in result).toBe(false);
55
+ expect("CF-Access-Client-Secret" in result).toBe(false);
56
+ });
57
+
58
+ test("excludes keys starting with a digit", () => {
59
+ const result = filterForEnvExport([
60
+ { key: "VALID", value: "yes" },
61
+ { key: "123_INVALID", value: "no" },
62
+ ]);
63
+ expect(result.VALID).toBe("yes");
64
+ expect("123_INVALID" in result).toBe(false);
65
+ });
66
+
67
+ test("excludes codex_oauth and HARNESS_PROVIDER (existing behaviour)", () => {
68
+ const result = filterForEnvExport([
69
+ { key: "NORMAL", value: "val" },
70
+ { key: "codex_oauth", value: "secret" },
71
+ { key: "HARNESS_PROVIDER", value: "claude" },
72
+ ]);
73
+ expect(result.NORMAL).toBe("val");
74
+ expect("codex_oauth" in result).toBe(false);
75
+ expect("HARNESS_PROVIDER" in result).toBe(false);
76
+ });
77
+
78
+ test("returns empty object for empty configs array", () => {
79
+ expect(filterForEnvExport([])).toEqual({});
80
+ });
81
+ });
@@ -1,11 +1,14 @@
1
1
  import { afterAll, beforeAll, describe, expect, test } from "bun:test";
2
2
  import { unlinkSync } from "node:fs";
3
3
  import {
4
+ cancelTask,
4
5
  closeDb,
5
6
  completeTask,
6
7
  createAgent,
7
8
  createTaskExtended,
9
+ failTask,
8
10
  findCompletedTaskInThread,
11
+ findRecentCancelledTaskInThread,
9
12
  getDb,
10
13
  getTaskById,
11
14
  initDb,
@@ -229,6 +232,168 @@ describe("follow-up re-delegation guard logic", () => {
229
232
  // → Guard does NOT block: first-time delegation is fine
230
233
  });
231
234
 
235
+ test("findRecentCancelledTaskInThread finds tasks with status='cancelled'", () => {
236
+ const agent = createAgent({
237
+ name: "cancel-thread-worker-1",
238
+ isLead: false,
239
+ status: "idle",
240
+ capabilities: [],
241
+ });
242
+ const task = createTaskExtended("cancelled work", {
243
+ agentId: agent.id,
244
+ slackChannelId: "C_CANCEL_1",
245
+ slackThreadTs: "9000.0001",
246
+ });
247
+ cancelTask(task.id, "user cancelled");
248
+
249
+ const result = findRecentCancelledTaskInThread("C_CANCEL_1", "9000.0001", 2880);
250
+ expect(result).not.toBeNull();
251
+ expect(result!.id).toBe(task.id);
252
+ expect(result!.status).toBe("cancelled");
253
+ });
254
+
255
+ test("findRecentCancelledTaskInThread finds failed tasks with 'cancelled' failureReason", () => {
256
+ const agent = createAgent({
257
+ name: "cancel-thread-worker-2",
258
+ isLead: false,
259
+ status: "idle",
260
+ capabilities: [],
261
+ });
262
+ const task = createTaskExtended("aborted work", {
263
+ agentId: agent.id,
264
+ slackChannelId: "C_CANCEL_2",
265
+ slackThreadTs: "9000.0002",
266
+ });
267
+ failTask(task.id, "cancelled");
268
+
269
+ const result = findRecentCancelledTaskInThread("C_CANCEL_2", "9000.0002", 2880);
270
+ expect(result).not.toBeNull();
271
+ expect(result!.id).toBe(task.id);
272
+ expect(result!.failureReason).toBe("cancelled");
273
+ });
274
+
275
+ test("findRecentCancelledTaskInThread finds failed tasks with 'exit 130' failureReason", () => {
276
+ const agent = createAgent({
277
+ name: "cancel-thread-worker-3",
278
+ isLead: false,
279
+ status: "idle",
280
+ capabilities: [],
281
+ });
282
+ const task = createTaskExtended("aborted work via SIGINT", {
283
+ agentId: agent.id,
284
+ slackChannelId: "C_CANCEL_3",
285
+ slackThreadTs: "9000.0003",
286
+ });
287
+ failTask(task.id, "exit 130: aborted by user");
288
+
289
+ const result = findRecentCancelledTaskInThread("C_CANCEL_3", "9000.0003", 2880);
290
+ expect(result).not.toBeNull();
291
+ expect(result!.id).toBe(task.id);
292
+ });
293
+
294
+ test("findRecentCancelledTaskInThread ignores plain failed tasks (no cancellation marker)", () => {
295
+ const agent = createAgent({
296
+ name: "cancel-thread-worker-4",
297
+ isLead: false,
298
+ status: "idle",
299
+ capabilities: [],
300
+ });
301
+ const task = createTaskExtended("genuinely failed work", {
302
+ agentId: agent.id,
303
+ slackChannelId: "C_CANCEL_4",
304
+ slackThreadTs: "9000.0004",
305
+ });
306
+ failTask(task.id, "TypeError: cannot read property of undefined");
307
+
308
+ const result = findRecentCancelledTaskInThread("C_CANCEL_4", "9000.0004", 2880);
309
+ expect(result).toBeNull();
310
+ });
311
+
312
+ test("guard bypasses re-delegation block when cancellation is more recent than completion", () => {
313
+ const channel = "C_BYPASS_1";
314
+ const thread = "10000.0001";
315
+
316
+ // Step 1: An old completed task in the thread
317
+ const completedTask = createTaskExtended("first attempt — completed", {
318
+ agentId: workerAgent.id,
319
+ slackChannelId: channel,
320
+ slackThreadTs: thread,
321
+ });
322
+ completeTask(completedTask.id, "first attempt done");
323
+
324
+ // Backdate to 30 minutes ago so the cancellation is more recent.
325
+ const thirtyMinAgo = new Date(Date.now() - 30 * 60 * 1000).toISOString();
326
+ getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [
327
+ thirtyMinAgo,
328
+ completedTask.id,
329
+ ]);
330
+
331
+ // Step 2: A more-recent cancellation in the same thread
332
+ const cancelledTask = createTaskExtended("second attempt — cancelled mid-work", {
333
+ agentId: workerAgent.id,
334
+ slackChannelId: channel,
335
+ slackThreadTs: thread,
336
+ });
337
+ cancelTask(cancelledTask.id, "cancelled");
338
+
339
+ // Guard checks:
340
+ const recentCompleted = findCompletedTaskInThread(channel, thread, 2880);
341
+ const recentCancelled = findRecentCancelledTaskInThread(channel, thread, 2880);
342
+ expect(recentCompleted).not.toBeNull();
343
+ expect(recentCancelled).not.toBeNull();
344
+
345
+ // The bypass condition: cancellation is more recent than completion.
346
+ const cancelledMoreRecent =
347
+ recentCancelled &&
348
+ new Date(recentCancelled.lastUpdatedAt).getTime() >
349
+ new Date(recentCompleted!.lastUpdatedAt).getTime();
350
+ expect(cancelledMoreRecent).toBe(true);
351
+
352
+ // → Guard does NOT block: re-delegation is allowed.
353
+ });
354
+
355
+ test("guard still blocks when completion is more recent than any cancellation", () => {
356
+ const channel = "C_BYPASS_2";
357
+ const thread = "11000.0001";
358
+
359
+ // Step 1: A cancelled task (older)
360
+ const cancelledTask = createTaskExtended("attempt 1 — cancelled", {
361
+ agentId: workerAgent.id,
362
+ slackChannelId: channel,
363
+ slackThreadTs: thread,
364
+ });
365
+ cancelTask(cancelledTask.id, "cancelled");
366
+
367
+ // Backdate the cancellation to 30 minutes ago
368
+ const thirtyMinAgo = new Date(Date.now() - 30 * 60 * 1000).toISOString();
369
+ getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [
370
+ thirtyMinAgo,
371
+ cancelledTask.id,
372
+ ]);
373
+
374
+ // Step 2: A more-recent completion (the retry succeeded)
375
+ const completedTask = createTaskExtended("attempt 2 — completed", {
376
+ agentId: workerAgent.id,
377
+ slackChannelId: channel,
378
+ slackThreadTs: thread,
379
+ });
380
+ completeTask(completedTask.id, "retry succeeded");
381
+
382
+ // Guard:
383
+ const recentCompleted = findCompletedTaskInThread(channel, thread, 2880);
384
+ const recentCancelled = findRecentCancelledTaskInThread(channel, thread, 2880);
385
+ expect(recentCompleted).not.toBeNull();
386
+ expect(recentCancelled).not.toBeNull();
387
+
388
+ const cancelledMoreRecent =
389
+ recentCancelled &&
390
+ new Date(recentCancelled.lastUpdatedAt).getTime() >
391
+ new Date(recentCompleted!.lastUpdatedAt).getTime();
392
+ expect(cancelledMoreRecent).toBe(false);
393
+
394
+ // → Guard BLOCKS as before: the work was already redone successfully.
395
+ });
396
+
232
397
  test("allows delegation when source task is a follow-up but completed work is outside time window", () => {
233
398
  // Create and complete a worker task, then backdate it
234
399
  const oldWorkerTask = createTaskExtended("old task", {
@@ -134,15 +134,16 @@ describe("migration 046 — budgets and pricing", () => {
134
134
  expect(colMap.get("effective_from")!.pk).toBeGreaterThan(0);
135
135
  });
136
136
 
137
- test("pricing seed has exactly 12 rows (4 models × 3 token_classes), all at effective_from=0", () => {
137
+ test("pricing seed includes every known Codex model/token class at effective_from=0", () => {
138
138
  const db = getDb();
139
- const total = db.prepare<CountRow, []>("SELECT COUNT(*) as cnt FROM pricing").get();
140
- expect(total?.cnt).toBe(12);
139
+ const minimumCodexRows = Object.keys(CODEX_MODEL_PRICING).length * 3;
141
140
 
142
141
  const seedRows = db
143
- .prepare<CountRow, []>("SELECT COUNT(*) as cnt FROM pricing WHERE effective_from = 0")
142
+ .prepare<CountRow, []>(
143
+ "SELECT COUNT(*) as cnt FROM pricing WHERE provider = 'codex' AND effective_from = 0",
144
+ )
144
145
  .get();
145
- expect(seedRows?.cnt).toBe(12);
146
+ expect(seedRows?.cnt ?? 0).toBeGreaterThanOrEqual(minimumCodexRows);
146
147
  });
147
148
 
148
149
  test("every CODEX_MODEL_PRICING entry has rows for input / cached_input / output with matching rates", () => {
@@ -16,6 +16,7 @@ import { closeDb, getDb, getLogsByEventType, initDb } from "../be/db";
16
16
  import { handleCore } from "../http/core";
17
17
  import { handlePricing } from "../http/pricing";
18
18
  import { getPathSegments, parseQueryParams } from "../http/utils";
19
+ import { CODEX_MODEL_PRICING } from "../providers/codex-models";
19
20
 
20
21
  const TEST_DB_PATH = "./test-pricing-routes.sqlite";
21
22
  const API_KEY = "test-pricing-secret-key";
@@ -71,7 +72,7 @@ afterAll(async () => {
71
72
  afterEach(() => {
72
73
  const db = getDb();
73
74
  // Remove every non-seed pricing row so each test starts from the migration
74
- // 044 seed (effective_from=0). The seed uses literal 0 for effective_from.
75
+ // seed rows (effective_from=0). The seed uses literal 0 for effective_from.
75
76
  db.prepare("DELETE FROM pricing WHERE effective_from > 0").run();
76
77
  db.prepare("DELETE FROM agent_log WHERE eventType LIKE 'pricing.%'").run();
77
78
  });
@@ -103,18 +104,18 @@ describe("Phase 6 — /api/pricing REST surface", () => {
103
104
  });
104
105
 
105
106
  describe("read endpoints", () => {
106
- test("GET /api/pricing lists every row including the migration 044 seed", async () => {
107
+ test("GET /api/pricing lists every row including codex seed rows", async () => {
107
108
  const res = await authedFetch(`/api/pricing`);
108
109
  expect(res.status).toBe(200);
109
110
  const body = await res.json();
110
111
  expect(body.rows).toBeInstanceOf(Array);
111
- // Migration 044 seeds 12 codex rows with effective_from=0. They should
112
- // all be present here.
112
+ // Codex seed rows include the migration 046 baseline plus later model
113
+ // backfills. They should all be present here.
113
114
  const seedRows = body.rows.filter(
114
115
  (r: { provider: string; effectiveFrom: number }) =>
115
116
  r.provider === "codex" && r.effectiveFrom === 0,
116
117
  );
117
- expect(seedRows.length).toBe(12);
118
+ expect(seedRows.length).toBe(Object.keys(CODEX_MODEL_PRICING).length * 3);
118
119
  });
119
120
 
120
121
  test("GET /api/pricing/{provider}/{model}/{tokenClass} returns rows latest-first", async () => {
@@ -6,32 +6,32 @@ import { PiMonoAdapter } from "../providers/pi-mono-adapter";
6
6
  import type { CostData, ProviderEvent } from "../providers/types";
7
7
 
8
8
  describe("createProviderAdapter", () => {
9
- test("returns ClaudeAdapter for 'claude'", () => {
10
- const adapter = createProviderAdapter("claude");
9
+ test("returns ClaudeAdapter for 'claude'", async () => {
10
+ const adapter = await createProviderAdapter("claude");
11
11
  expect(adapter).toBeInstanceOf(ClaudeAdapter);
12
12
  expect(adapter.name).toBe("claude");
13
13
  });
14
14
 
15
- test("returns PiMonoAdapter for 'pi'", () => {
16
- const adapter = createProviderAdapter("pi");
15
+ test("returns PiMonoAdapter for 'pi'", async () => {
16
+ const adapter = await createProviderAdapter("pi");
17
17
  expect(adapter).toBeInstanceOf(PiMonoAdapter);
18
18
  expect(adapter.name).toBe("pi");
19
19
  });
20
20
 
21
- test("returns OpencodeAdapter for 'opencode'", () => {
22
- const adapter = createProviderAdapter("opencode");
21
+ test("returns OpencodeAdapter for 'opencode'", async () => {
22
+ const adapter = await createProviderAdapter("opencode");
23
23
  expect(adapter).toBeInstanceOf(OpencodeAdapter);
24
24
  expect(adapter.name).toBe("opencode");
25
25
  });
26
26
 
27
- test("throws for unknown provider", () => {
28
- expect(() => createProviderAdapter("unknown")).toThrow(
27
+ test("throws for unknown provider", async () => {
28
+ expect(createProviderAdapter("unknown")).rejects.toThrow(
29
29
  'Unknown HARNESS_PROVIDER: "unknown". Supported: claude, pi, codex, devin, claude-managed, opencode',
30
30
  );
31
31
  });
32
32
 
33
- test("throws for empty string", () => {
34
- expect(() => createProviderAdapter("")).toThrow("Unknown HARNESS_PROVIDER");
33
+ test("throws for empty string", async () => {
34
+ expect(createProviderAdapter("")).rejects.toThrow("Unknown HARNESS_PROVIDER");
35
35
  });
36
36
  });
37
37
 
@@ -37,10 +37,10 @@ describe("ProviderAdapter.formatCommand", () => {
37
37
  expect(codex.name).toBe("codex");
38
38
  });
39
39
 
40
- test("createProviderAdapter returns adapters that implement formatCommand", () => {
41
- const claudeAdapter = createProviderAdapter("claude");
42
- const piAdapter = createProviderAdapter("pi");
43
- const codexAdapter = createProviderAdapter("codex");
40
+ test("createProviderAdapter returns adapters that implement formatCommand", async () => {
41
+ const claudeAdapter = await createProviderAdapter("claude");
42
+ const piAdapter = await createProviderAdapter("pi");
43
+ const codexAdapter = await createProviderAdapter("codex");
44
44
  expect(typeof claudeAdapter.formatCommand).toBe("function");
45
45
  expect(typeof piAdapter.formatCommand).toBe("function");
46
46
  expect(typeof codexAdapter.formatCommand).toBe("function");
@@ -153,6 +153,31 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
153
153
  expect(body.cost.totalCostUsd).toBeCloseTo(6.64, 5);
154
154
  });
155
155
 
156
+ test("provider=codex model=gpt-5.5 uses seeded pricing rows instead of falling through to unpriced", async () => {
157
+ const res = await authedFetch(`/api/session-costs`, {
158
+ method: "POST",
159
+ body: JSON.stringify({
160
+ sessionId: "codex-gpt-5-5-regression",
161
+ agentId: testAgent.id,
162
+ totalCostUsd: 0,
163
+ // Mirrors task 1a459c1c-c89c-417a-a60c-6a060ad4a602.
164
+ inputTokens: 3_495_764,
165
+ cacheReadTokens: 3_333_632,
166
+ outputTokens: 8_106,
167
+ model: "gpt-5.5",
168
+ provider: "codex",
169
+ durationMs: 1_000,
170
+ numTurns: 1,
171
+ }),
172
+ });
173
+ expect(res.status).toBe(201);
174
+ const body = (await res.json()) as CreatedCostResponse;
175
+ expect(body.cost.costSource).toBe("pricing-table");
176
+ // uncached = 3,495,764 - 3,333,632 = 162,132
177
+ // cost = (162,132 * 5.0 + 3,333,632 * 0.5 + 8,106 * 30.0) / 1_000_000
178
+ expect(body.cost.totalCostUsd).toBeCloseTo(2.720656, 6);
179
+ });
180
+
156
181
  test("provider=codex but input/output rows missing → 'unpriced', worker value preserved", async () => {
157
182
  // Only seed cached_input. Missing input + output blocks recompute and
158
183
  // Phase 2 tags the row 'unpriced' (no rates means we can't trust harness USD either).
@@ -4,6 +4,7 @@ import * as z from "zod";
4
4
  import {
5
5
  createTaskExtended,
6
6
  findCompletedTaskInThread,
7
+ findRecentCancelledTaskInThread,
7
8
  getActiveTaskCount,
8
9
  getAgentById,
9
10
  getDb,
@@ -192,6 +193,13 @@ export async function sendTaskHandler(
192
193
  // When the source task is a "follow-up" (worker completed/failed notification),
193
194
  // check if there are completed tasks in the same Slack thread recently.
194
195
  // This prevents the cycle: worker completes → follow-up → Lead re-delegates → repeat.
196
+ //
197
+ // Exception: if a MORE RECENT task in the same thread was cancelled (exit 130,
198
+ // status='cancelled', or status='failed' with failureReason containing
199
+ // "cancelled"), bypass the guard. A cancellation means the work was
200
+ // interrupted — re-dispatch is the correct response, not a deduped no-op.
201
+ // Without this bypass, a cancelled worker permanently jams the thread
202
+ // against re-delegation when an earlier completed sibling exists.
195
203
  if (sourceTaskId) {
196
204
  const sourceTask = getTaskById(sourceTaskId);
197
205
  if (
@@ -205,15 +213,28 @@ export async function sendTaskHandler(
205
213
  2880, // 48 hours in minutes
206
214
  );
207
215
  if (recentCompleted) {
208
- const msg = `Blocked: re-delegation from follow-up task in a thread that already has completed work (task ${recentCompleted.id.slice(0, 8)}). The original request was already handled.`;
209
- return {
210
- content: [{ type: "text", text: msg }],
211
- structuredContent: {
212
- yourAgentId: creatorAgentId,
213
- success: false,
214
- message: msg,
215
- },
216
- };
216
+ const recentCancelled = findRecentCancelledTaskInThread(
217
+ sourceTask.slackChannelId,
218
+ sourceTask.slackThreadTs,
219
+ 2880,
220
+ );
221
+ const cancelledMoreRecent =
222
+ recentCancelled &&
223
+ new Date(recentCancelled.lastUpdatedAt).getTime() >
224
+ new Date(recentCompleted.lastUpdatedAt).getTime();
225
+ if (!cancelledMoreRecent) {
226
+ const msg = `Blocked: re-delegation from follow-up task in a thread that already has completed work (task ${recentCompleted.id.slice(0, 8)}). The original request was already handled.`;
227
+ return {
228
+ content: [{ type: "text", text: msg }],
229
+ structuredContent: {
230
+ yourAgentId: creatorAgentId,
231
+ success: false,
232
+ message: msg,
233
+ },
234
+ };
235
+ }
236
+ // else: fall through — the cancellation is more recent than the
237
+ // completion, so re-delegation is legitimate.
217
238
  }
218
239
  }
219
240
  }
@@ -27,6 +27,7 @@ export const CONTEXT_FORMULA = "input-cache-output" as const;
27
27
 
28
28
  const CONTEXT_WINDOW_DEFAULTS: Record<string, number> = {
29
29
  // Anthropic 4.x family
30
+ "claude-opus-4-8": 1_000_000,
30
31
  "claude-opus-4-7": 1_000_000,
31
32
  "claude-opus-4-6": 1_000_000,
32
33
  "claude-opus-4-5": 1_000_000,
@@ -0,0 +1,13 @@
1
+ {
2
+ "kind": "schedule",
3
+ "name": "daily-blocker-digest",
4
+ "displayName": "Daily Blocker Digest",
5
+ "slug": "daily-blocker-digest",
6
+ "title": "Daily Blocker Digest",
7
+ "description": "Ask the lead to summarize stuck work, failing checks, and owner decisions every weekday.",
8
+ "version": "1.0.0",
9
+ "category": "schedules",
10
+ "placeholders": ["SLACK_CHANNEL_ID", "TIMEZONE"],
11
+ "runAllSeedersCandidate": true,
12
+ "tags": ["operations", "slack", "digest"]
13
+ }
@@ -0,0 +1,150 @@
1
+ # Daily Blocker Digest
2
+
3
+ Ask the lead to summarize stuck work, failing checks, and owner decisions every weekday.
4
+
5
+ ## Schedule
6
+
7
+ ```json
8
+ {
9
+ "cron": "5 2 * * *",
10
+ "timezone": "UTC",
11
+ "agentRole": "lead",
12
+ "enabled": true
13
+ }
14
+ ```
15
+
16
+ ## Scheduled Task
17
+
18
+ This is the full task prompt the schedule runs on each fire — including the accumulated operational learnings baked into it. Adapt the swarm-specific references (channel IDs, agent names, repo paths) to your environment before enabling.
19
+
20
+ Task Type: Daily Blocker Digest — "Compound Prelude" (unified with PR review)
21
+
22
+ You are Lead. This runs 5 minutes before the compound evolution. Purpose: surface every item claimed to be "awaiting human" so the compound can detect stale-state items (blockers actually resolved but never removed), AND provide the single daily summary of open PRs. Rule from Taras (2026-04-22): verify, don't assume.
23
+
24
+ ---
25
+
26
+ ## Phase 1: Gather Blockers from 4 Sources
27
+
28
+ ### 1A. HEARTBEAT.md "Active Blockers" section
29
+ Read `/workspace/HEARTBEAT.md`. Extract every bullet under "Active Blockers (awaiting Taras)" or similar. Each item is a claim of the form "X is broken/pending".
30
+
31
+ ### 1B. Open PRs across all our repos — with clickable URLs
32
+ Loop over the repo list and gather ALL open PRs with their URL, age, review status, draft flag, labels, author.
33
+
34
+ ```bash
35
+ for repo in desplega-ai/agent-swarm desplega-ai/agent-swarm-landing desplega-ai/landing desplega-ai/landing-labs desplega-ai/qa-use desplega-ai/agent-fs desplega-ai/chat-py desplega-ai/argus desplega-ai/argus-action desplega-ai/ai-toolbox desplega-ai/agent-work; do
36
+ gh pr list --repo "$repo" --state open --json number,title,author,createdAt,url,reviewDecision,isDraft,labels 2>/dev/null | jq --arg repo "$repo" '.[] | . + {repo: $repo}'
37
+ done
38
+ ```
39
+
40
+ Compute `daysOpen` from `createdAt`. Split PRs into buckets:
41
+ - **Dependabot**: author.login == "dependabot" or "app/dependabot" — handled separately at the bottom
42
+ - **Security dependabot**: any dependabot PR with "critical", "high", "security", or "vulnerability" in title or labels — list separately with :shield:
43
+ - **Stale** (60+ days open): :rotating_light: at the top
44
+ - **Aging** (30-59 days): :warning:
45
+ - **Recent** (<30 days): normal listing
46
+
47
+ Format every PR link as: `<URL|repo #NUM>` — always a clickable Slack link, never raw numbers.
48
+
49
+ ### 1C. Tasks awaiting user reply
50
+ Use `db-query`:
51
+ ```sql
52
+ SELECT id, task, slackUserId, createdAt
53
+ FROM agent_tasks
54
+ WHERE slackReplySent = 1
55
+ AND status = 'completed'
56
+ AND requestedByUserId IS NOT NULL
57
+ AND datetime(createdAt) > datetime('now', '-7 days')
58
+ ORDER BY createdAt DESC
59
+ LIMIT 20
60
+ ```
61
+
62
+ ### 1D. Stuck in-flight tasks
63
+ Use `get-tasks` with status=in_progress. Flag any with `lastUpdatedAt` >2h old.
64
+
65
+ ---
66
+
67
+ ## Phase 2: Verify Each Blocker Claim
68
+
69
+ For each claim in 1A, run a quick verification:
70
+ - PR numbers → check if merged (use gh pr view)
71
+ - API/key issues → test the actual API (curl + check response)
72
+ - "awaiting response from X" items → check Slack thread for newer messages
73
+ - Worker-activity claims → check the actual task status
74
+
75
+ Do NOT trust the HEARTBEAT wording. If verification shows the item is resolved, mark it `RESOLVED-STALE` and commit to removing from HEARTBEAT in Phase 4.
76
+
77
+ ---
78
+
79
+ ## Phase 3: Post Unified Digest to Slack
80
+
81
+ Use `slack-post` with channelId `C0A4J7GB0UD`, pinging `<@U08NR6QD6CS>`. Format:
82
+
83
+ ```
84
+ :clipboard: *Daily Blocker Digest + PR Review* — [YYYY-MM-DD]
85
+
86
+ <@U08NR6QD6CS> Here's the combined morning digest.
87
+
88
+ *Awaiting Taras — HEARTBEAT blockers* (N verified real, M stale)
89
+ • PR link — <title> — [verified: still open]
90
+ • <other item> — [verified: status]
91
+ • ~~<stale item>~~ — RESOLVED-STALE, removed from HEARTBEAT
92
+
93
+ :rotating_light: *STALE PRs (60+ days)*
94
+ 1. <url|repo #NUM> — <title> (X days) — @author
95
+
96
+ :warning: *AGING PRs (30-59 days)*
97
+ 1. <url|repo #NUM> — <title> (X days) — @author
98
+
99
+ *Recent PRs*
100
+ 1. <url|repo #NUM> — <title> (X days) — @author
101
+
102
+ :shield: *Security dependabot (merge soon)*
103
+ • <url|repo #NUM> — <bump text>
104
+
105
+ *Tasks awaiting user reply* (N)
106
+ • <task summary> — from @<userId>
107
+
108
+ *Stuck in-flight* (N, >2h no update)
109
+ • <task id> — <age>
110
+
111
+ ---
112
+ _Also: X dependabot PRs pending (routine dependency bumps)_
113
+ _Stale HEARTBEAT items removed this run: N_
114
+ ```
115
+
116
+ Keep it scannable. Every PR MUST be a clickable `<url|repo #N>` link. If everything is clean, say "All clear — no blockers, no stuck tasks, only routine dependabot churn."
117
+
118
+ ---
119
+
120
+ ## Phase 4: Clean HEARTBEAT.md
121
+
122
+ For each item marked `RESOLVED-STALE`:
123
+ - Remove the line from `/workspace/HEARTBEAT.md`
124
+ - Save a shared memory noting the stale-state catch (permanent receipt for the compound)
125
+
126
+ ---
127
+
128
+ ## Phase 5: Hand-off to Compound
129
+
130
+ Write a memory titled `daily-blocker-digest-YYYY-MM-DD.md` to `/workspace/shared/memory/d454d1a5-4df9-49bd-8a89-e58d6a657dc3/` with:
131
+ - List of all verified blockers (still real) with PR URLs
132
+ - List of RESOLVED-STALE items removed this run
133
+ - Summary counts: total PRs open, stale count, aging count
134
+ - Any patterns noticed ("I keep forgetting X finished shipping on date Y")
135
+
136
+ The compound evolution runs 5 minutes after this. Its Phase 0 reads this memory via `memory-search "daily-blocker-digest"`.
137
+
138
+ ---
139
+
140
+ ## Anti-patterns
141
+
142
+ - ❌ Copying HEARTBEAT verbatim without verifying each line
143
+ - ❌ Raw PR numbers instead of clickable `<url|repo #N>` links
144
+ - ❌ Listing all dependabot PRs inline — collapse into single footer count (except security ones)
145
+ - ❌ Marking things RESOLVED-STALE without evidence
146
+ - ❌ Skipping Phase 4 — if you don't clean HEARTBEAT, the problem recurs tomorrow
147
+
148
+ ## Completion
149
+
150
+ Call `store-progress` with status `completed` and `output` = one-paragraph summary of (a) how many blockers verified real vs stale, (b) PR counts (stale/aging/recent/dependabot), (c) any surprises.