@desplega.ai/agent-swarm 1.85.0 → 1.86.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/openapi.json +1 -1
- package/package.json +8 -6
- package/src/be/db.ts +44 -0
- package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
- package/src/be/modelsdev-cache.json +152028 -0
- package/src/be/modelsdev-cache.ts +46 -0
- package/src/be/seed-pricing.ts +7 -44
- package/src/cli.tsx +12 -2
- package/src/commands/codex-session-runner.ts +132 -0
- package/src/commands/credential-wait.ts +2 -2
- package/src/commands/provider-credentials.ts +10 -5
- package/src/commands/runner.ts +3 -3
- package/src/prompts/base-prompt.ts +49 -3
- package/src/providers/claude-adapter.ts +83 -2
- package/src/providers/claude-managed-models.ts +18 -2
- package/src/providers/codex-adapter.ts +417 -97
- package/src/providers/codex-models.ts +9 -2
- package/src/providers/index.ts +28 -19
- package/src/providers/pricing-sources.md +7 -4
- package/src/providers/swarm-events-shared.ts +14 -0
- package/src/slack/HEURISTICS.md +5 -1
- package/src/slack/handlers.test.ts +35 -0
- package/src/slack/handlers.ts +79 -2
- package/src/tests/base-prompt.test.ts +46 -8
- package/src/tests/claude-managed-adapter.test.ts +4 -4
- package/src/tests/codex-adapter-otel.test.ts +4 -4
- package/src/tests/codex-adapter.test.ts +20 -7
- package/src/tests/codex-swarm-events.test.ts +35 -0
- package/src/tests/context-window.test.ts +1 -0
- package/src/tests/credential-check.test.ts +48 -29
- package/src/tests/entrypoint-config-env-export.test.ts +81 -0
- package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
- package/src/tests/migration-046-budgets.test.ts +6 -5
- package/src/tests/pricing-routes.test.ts +6 -5
- package/src/tests/provider-adapter.test.ts +10 -10
- package/src/tests/provider-command-format.test.ts +4 -4
- package/src/tests/session-costs-codex-recompute.test.ts +25 -0
- package/src/tools/send-task.ts +30 -9
- package/src/utils/context-window.ts +1 -0
- package/templates/schedules/daily-blocker-digest/config.json +13 -0
- package/templates/schedules/daily-blocker-digest/content.md +150 -0
- package/templates/schedules/daily-compounding-reflection/config.json +21 -0
- package/templates/schedules/daily-compounding-reflection/content.md +210 -0
- package/templates/schedules/daily-hn-briefing/config.json +13 -0
- package/templates/schedules/daily-hn-briefing/content.md +97 -0
- package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
- package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
- package/templates/schedules/gtm-weekly-review/config.json +13 -0
- package/templates/schedules/gtm-weekly-review/content.md +58 -0
- package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
- package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
- package/templates/schema.ts +26 -0
- package/templates/skills/agentmail-sending/config.json +13 -0
- package/templates/skills/agentmail-sending/content.md +48 -0
- package/templates/skills/artifacts/config.json +13 -0
- package/templates/skills/artifacts/content.md +87 -0
- package/templates/skills/browser-use-cloud/config.json +13 -0
- package/templates/skills/browser-use-cloud/content.md +155 -0
- package/templates/skills/desloppify/config.json +13 -0
- package/templates/skills/desloppify/content.md +201 -0
- package/templates/skills/exa-search/config.json +13 -0
- package/templates/skills/exa-search/content.md +106 -0
- package/templates/skills/jira-interaction/config.json +13 -0
- package/templates/skills/jira-interaction/content.md +252 -0
- package/templates/skills/kapso-whatsapp/config.json +13 -0
- package/templates/skills/kapso-whatsapp/content.md +369 -0
- package/templates/skills/kv-storage/config.json +13 -0
- package/templates/skills/kv-storage/content.md +111 -0
- package/templates/skills/linear-interaction/config.json +20 -0
- package/templates/skills/linear-interaction/content.md +230 -0
- package/templates/skills/pages/config.json +18 -0
- package/templates/skills/pages/content.md +85 -0
- package/templates/skills/profile-corruption-escalation/config.json +13 -0
- package/templates/skills/profile-corruption-escalation/content.md +105 -0
- package/templates/skills/scheduled-task-resilience/config.json +13 -0
- package/templates/skills/scheduled-task-resilience/content.md +95 -0
- package/templates/skills/sprite-cli/config.json +13 -0
- package/templates/skills/sprite-cli/content.md +133 -0
- package/templates/skills/turso-interaction/config.json +13 -0
- package/templates/skills/turso-interaction/content.md +192 -0
- package/templates/skills/workflow-iterate/config.json +18 -0
- package/templates/skills/workflow-iterate/content.md +399 -0
- package/templates/skills/workflow-structured-output/config.json +13 -0
- package/templates/skills/workflow-structured-output/content.md +101 -0
- package/templates/skills/x-api-interactions/config.json +13 -0
- package/templates/skills/x-api-interactions/content.md +109 -0
- package/templates/workflows/autopilot/config.json +13 -0
- package/templates/workflows/autopilot/content.md +58 -0
- package/templates/workflows/linear-drain-loop/config.json +21 -0
- package/templates/workflows/linear-drain-loop/content.md +72 -0
- package/templates/workflows/ralph-loop/config.json +13 -0
- package/templates/workflows/ralph-loop/content.md +75 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Tests for the config→env-var export filter in docker-entrypoint.sh.
|
|
5
|
+
*
|
|
6
|
+
* The entrypoint fetches swarm config and writes valid POSIX identifier keys
|
|
7
|
+
* to /tmp/swarm_config.env for sourcing. Keys containing hyphens or other
|
|
8
|
+
* non-identifier characters must be skipped — otherwise `source` interprets
|
|
9
|
+
* them as commands:
|
|
10
|
+
*
|
|
11
|
+
* CF-Access-Client-Id=84853443... → "command not found"
|
|
12
|
+
*
|
|
13
|
+
* This filter mirrors the jq expression in docker-entrypoint.sh so the
|
|
14
|
+
* logic can be verified without a Docker environment.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const POSIX_IDENTIFIER = /^[A-Za-z_][A-Za-z0-9_]*$/;
|
|
18
|
+
const DYNAMIC_KEYS = new Set(["codex_oauth", "HARNESS_PROVIDER"]);
|
|
19
|
+
|
|
20
|
+
/** Mirrors the jq filter in docker-entrypoint.sh. */
|
|
21
|
+
function filterForEnvExport(
|
|
22
|
+
configs: Array<{ key: string; value: string }>,
|
|
23
|
+
): Record<string, string> {
|
|
24
|
+
const result: Record<string, string> = {};
|
|
25
|
+
for (const { key, value } of configs) {
|
|
26
|
+
if (DYNAMIC_KEYS.has(key)) continue;
|
|
27
|
+
if (!POSIX_IDENTIFIER.test(key)) continue;
|
|
28
|
+
result[key] = value;
|
|
29
|
+
}
|
|
30
|
+
return result;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
describe("entrypoint config env export: POSIX identifier filter", () => {
|
|
34
|
+
test("includes valid POSIX identifier keys", () => {
|
|
35
|
+
const result = filterForEnvExport([
|
|
36
|
+
{ key: "FOO", value: "bar" },
|
|
37
|
+
{ key: "MY_VAR_123", value: "val" },
|
|
38
|
+
{ key: "_UNDERSCORE_START", value: "ok" },
|
|
39
|
+
]);
|
|
40
|
+
expect(result.FOO).toBe("bar");
|
|
41
|
+
expect(result.MY_VAR_123).toBe("val");
|
|
42
|
+
expect(result._UNDERSCORE_START).toBe("ok");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test("excludes hyphenated keys (CF-Access-Client-Id pattern)", () => {
|
|
46
|
+
const result = filterForEnvExport([
|
|
47
|
+
{ key: "FOO", value: "keep" },
|
|
48
|
+
{ key: "CF-Access-Client-Id", value: "secret1" },
|
|
49
|
+
{ key: "CF-Access-Client-Secret", value: "secret2" },
|
|
50
|
+
{ key: "BAR", value: "keep" },
|
|
51
|
+
]);
|
|
52
|
+
expect(result.FOO).toBe("keep");
|
|
53
|
+
expect(result.BAR).toBe("keep");
|
|
54
|
+
expect("CF-Access-Client-Id" in result).toBe(false);
|
|
55
|
+
expect("CF-Access-Client-Secret" in result).toBe(false);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test("excludes keys starting with a digit", () => {
|
|
59
|
+
const result = filterForEnvExport([
|
|
60
|
+
{ key: "VALID", value: "yes" },
|
|
61
|
+
{ key: "123_INVALID", value: "no" },
|
|
62
|
+
]);
|
|
63
|
+
expect(result.VALID).toBe("yes");
|
|
64
|
+
expect("123_INVALID" in result).toBe(false);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test("excludes codex_oauth and HARNESS_PROVIDER (existing behaviour)", () => {
|
|
68
|
+
const result = filterForEnvExport([
|
|
69
|
+
{ key: "NORMAL", value: "val" },
|
|
70
|
+
{ key: "codex_oauth", value: "secret" },
|
|
71
|
+
{ key: "HARNESS_PROVIDER", value: "claude" },
|
|
72
|
+
]);
|
|
73
|
+
expect(result.NORMAL).toBe("val");
|
|
74
|
+
expect("codex_oauth" in result).toBe(false);
|
|
75
|
+
expect("HARNESS_PROVIDER" in result).toBe(false);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test("returns empty object for empty configs array", () => {
|
|
79
|
+
expect(filterForEnvExport([])).toEqual({});
|
|
80
|
+
});
|
|
81
|
+
});
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
2
2
|
import { unlinkSync } from "node:fs";
|
|
3
3
|
import {
|
|
4
|
+
cancelTask,
|
|
4
5
|
closeDb,
|
|
5
6
|
completeTask,
|
|
6
7
|
createAgent,
|
|
7
8
|
createTaskExtended,
|
|
9
|
+
failTask,
|
|
8
10
|
findCompletedTaskInThread,
|
|
11
|
+
findRecentCancelledTaskInThread,
|
|
9
12
|
getDb,
|
|
10
13
|
getTaskById,
|
|
11
14
|
initDb,
|
|
@@ -229,6 +232,168 @@ describe("follow-up re-delegation guard logic", () => {
|
|
|
229
232
|
// → Guard does NOT block: first-time delegation is fine
|
|
230
233
|
});
|
|
231
234
|
|
|
235
|
+
test("findRecentCancelledTaskInThread finds tasks with status='cancelled'", () => {
|
|
236
|
+
const agent = createAgent({
|
|
237
|
+
name: "cancel-thread-worker-1",
|
|
238
|
+
isLead: false,
|
|
239
|
+
status: "idle",
|
|
240
|
+
capabilities: [],
|
|
241
|
+
});
|
|
242
|
+
const task = createTaskExtended("cancelled work", {
|
|
243
|
+
agentId: agent.id,
|
|
244
|
+
slackChannelId: "C_CANCEL_1",
|
|
245
|
+
slackThreadTs: "9000.0001",
|
|
246
|
+
});
|
|
247
|
+
cancelTask(task.id, "user cancelled");
|
|
248
|
+
|
|
249
|
+
const result = findRecentCancelledTaskInThread("C_CANCEL_1", "9000.0001", 2880);
|
|
250
|
+
expect(result).not.toBeNull();
|
|
251
|
+
expect(result!.id).toBe(task.id);
|
|
252
|
+
expect(result!.status).toBe("cancelled");
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
test("findRecentCancelledTaskInThread finds failed tasks with 'cancelled' failureReason", () => {
|
|
256
|
+
const agent = createAgent({
|
|
257
|
+
name: "cancel-thread-worker-2",
|
|
258
|
+
isLead: false,
|
|
259
|
+
status: "idle",
|
|
260
|
+
capabilities: [],
|
|
261
|
+
});
|
|
262
|
+
const task = createTaskExtended("aborted work", {
|
|
263
|
+
agentId: agent.id,
|
|
264
|
+
slackChannelId: "C_CANCEL_2",
|
|
265
|
+
slackThreadTs: "9000.0002",
|
|
266
|
+
});
|
|
267
|
+
failTask(task.id, "cancelled");
|
|
268
|
+
|
|
269
|
+
const result = findRecentCancelledTaskInThread("C_CANCEL_2", "9000.0002", 2880);
|
|
270
|
+
expect(result).not.toBeNull();
|
|
271
|
+
expect(result!.id).toBe(task.id);
|
|
272
|
+
expect(result!.failureReason).toBe("cancelled");
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
test("findRecentCancelledTaskInThread finds failed tasks with 'exit 130' failureReason", () => {
|
|
276
|
+
const agent = createAgent({
|
|
277
|
+
name: "cancel-thread-worker-3",
|
|
278
|
+
isLead: false,
|
|
279
|
+
status: "idle",
|
|
280
|
+
capabilities: [],
|
|
281
|
+
});
|
|
282
|
+
const task = createTaskExtended("aborted work via SIGINT", {
|
|
283
|
+
agentId: agent.id,
|
|
284
|
+
slackChannelId: "C_CANCEL_3",
|
|
285
|
+
slackThreadTs: "9000.0003",
|
|
286
|
+
});
|
|
287
|
+
failTask(task.id, "exit 130: aborted by user");
|
|
288
|
+
|
|
289
|
+
const result = findRecentCancelledTaskInThread("C_CANCEL_3", "9000.0003", 2880);
|
|
290
|
+
expect(result).not.toBeNull();
|
|
291
|
+
expect(result!.id).toBe(task.id);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test("findRecentCancelledTaskInThread ignores plain failed tasks (no cancellation marker)", () => {
|
|
295
|
+
const agent = createAgent({
|
|
296
|
+
name: "cancel-thread-worker-4",
|
|
297
|
+
isLead: false,
|
|
298
|
+
status: "idle",
|
|
299
|
+
capabilities: [],
|
|
300
|
+
});
|
|
301
|
+
const task = createTaskExtended("genuinely failed work", {
|
|
302
|
+
agentId: agent.id,
|
|
303
|
+
slackChannelId: "C_CANCEL_4",
|
|
304
|
+
slackThreadTs: "9000.0004",
|
|
305
|
+
});
|
|
306
|
+
failTask(task.id, "TypeError: cannot read property of undefined");
|
|
307
|
+
|
|
308
|
+
const result = findRecentCancelledTaskInThread("C_CANCEL_4", "9000.0004", 2880);
|
|
309
|
+
expect(result).toBeNull();
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
test("guard bypasses re-delegation block when cancellation is more recent than completion", () => {
|
|
313
|
+
const channel = "C_BYPASS_1";
|
|
314
|
+
const thread = "10000.0001";
|
|
315
|
+
|
|
316
|
+
// Step 1: An old completed task in the thread
|
|
317
|
+
const completedTask = createTaskExtended("first attempt — completed", {
|
|
318
|
+
agentId: workerAgent.id,
|
|
319
|
+
slackChannelId: channel,
|
|
320
|
+
slackThreadTs: thread,
|
|
321
|
+
});
|
|
322
|
+
completeTask(completedTask.id, "first attempt done");
|
|
323
|
+
|
|
324
|
+
// Backdate to 30 minutes ago so the cancellation is more recent.
|
|
325
|
+
const thirtyMinAgo = new Date(Date.now() - 30 * 60 * 1000).toISOString();
|
|
326
|
+
getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [
|
|
327
|
+
thirtyMinAgo,
|
|
328
|
+
completedTask.id,
|
|
329
|
+
]);
|
|
330
|
+
|
|
331
|
+
// Step 2: A more-recent cancellation in the same thread
|
|
332
|
+
const cancelledTask = createTaskExtended("second attempt — cancelled mid-work", {
|
|
333
|
+
agentId: workerAgent.id,
|
|
334
|
+
slackChannelId: channel,
|
|
335
|
+
slackThreadTs: thread,
|
|
336
|
+
});
|
|
337
|
+
cancelTask(cancelledTask.id, "cancelled");
|
|
338
|
+
|
|
339
|
+
// Guard checks:
|
|
340
|
+
const recentCompleted = findCompletedTaskInThread(channel, thread, 2880);
|
|
341
|
+
const recentCancelled = findRecentCancelledTaskInThread(channel, thread, 2880);
|
|
342
|
+
expect(recentCompleted).not.toBeNull();
|
|
343
|
+
expect(recentCancelled).not.toBeNull();
|
|
344
|
+
|
|
345
|
+
// The bypass condition: cancellation is more recent than completion.
|
|
346
|
+
const cancelledMoreRecent =
|
|
347
|
+
recentCancelled &&
|
|
348
|
+
new Date(recentCancelled.lastUpdatedAt).getTime() >
|
|
349
|
+
new Date(recentCompleted!.lastUpdatedAt).getTime();
|
|
350
|
+
expect(cancelledMoreRecent).toBe(true);
|
|
351
|
+
|
|
352
|
+
// → Guard does NOT block: re-delegation is allowed.
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
test("guard still blocks when completion is more recent than any cancellation", () => {
|
|
356
|
+
const channel = "C_BYPASS_2";
|
|
357
|
+
const thread = "11000.0001";
|
|
358
|
+
|
|
359
|
+
// Step 1: A cancelled task (older)
|
|
360
|
+
const cancelledTask = createTaskExtended("attempt 1 — cancelled", {
|
|
361
|
+
agentId: workerAgent.id,
|
|
362
|
+
slackChannelId: channel,
|
|
363
|
+
slackThreadTs: thread,
|
|
364
|
+
});
|
|
365
|
+
cancelTask(cancelledTask.id, "cancelled");
|
|
366
|
+
|
|
367
|
+
// Backdate the cancellation to 30 minutes ago
|
|
368
|
+
const thirtyMinAgo = new Date(Date.now() - 30 * 60 * 1000).toISOString();
|
|
369
|
+
getDb().run("UPDATE agent_tasks SET lastUpdatedAt = ? WHERE id = ?", [
|
|
370
|
+
thirtyMinAgo,
|
|
371
|
+
cancelledTask.id,
|
|
372
|
+
]);
|
|
373
|
+
|
|
374
|
+
// Step 2: A more-recent completion (the retry succeeded)
|
|
375
|
+
const completedTask = createTaskExtended("attempt 2 — completed", {
|
|
376
|
+
agentId: workerAgent.id,
|
|
377
|
+
slackChannelId: channel,
|
|
378
|
+
slackThreadTs: thread,
|
|
379
|
+
});
|
|
380
|
+
completeTask(completedTask.id, "retry succeeded");
|
|
381
|
+
|
|
382
|
+
// Guard:
|
|
383
|
+
const recentCompleted = findCompletedTaskInThread(channel, thread, 2880);
|
|
384
|
+
const recentCancelled = findRecentCancelledTaskInThread(channel, thread, 2880);
|
|
385
|
+
expect(recentCompleted).not.toBeNull();
|
|
386
|
+
expect(recentCancelled).not.toBeNull();
|
|
387
|
+
|
|
388
|
+
const cancelledMoreRecent =
|
|
389
|
+
recentCancelled &&
|
|
390
|
+
new Date(recentCancelled.lastUpdatedAt).getTime() >
|
|
391
|
+
new Date(recentCompleted!.lastUpdatedAt).getTime();
|
|
392
|
+
expect(cancelledMoreRecent).toBe(false);
|
|
393
|
+
|
|
394
|
+
// → Guard BLOCKS as before: the work was already redone successfully.
|
|
395
|
+
});
|
|
396
|
+
|
|
232
397
|
test("allows delegation when source task is a follow-up but completed work is outside time window", () => {
|
|
233
398
|
// Create and complete a worker task, then backdate it
|
|
234
399
|
const oldWorkerTask = createTaskExtended("old task", {
|
|
@@ -134,15 +134,16 @@ describe("migration 046 — budgets and pricing", () => {
|
|
|
134
134
|
expect(colMap.get("effective_from")!.pk).toBeGreaterThan(0);
|
|
135
135
|
});
|
|
136
136
|
|
|
137
|
-
test("pricing seed
|
|
137
|
+
test("pricing seed includes every known Codex model/token class at effective_from=0", () => {
|
|
138
138
|
const db = getDb();
|
|
139
|
-
const
|
|
140
|
-
expect(total?.cnt).toBe(12);
|
|
139
|
+
const minimumCodexRows = Object.keys(CODEX_MODEL_PRICING).length * 3;
|
|
141
140
|
|
|
142
141
|
const seedRows = db
|
|
143
|
-
.prepare<CountRow, []>(
|
|
142
|
+
.prepare<CountRow, []>(
|
|
143
|
+
"SELECT COUNT(*) as cnt FROM pricing WHERE provider = 'codex' AND effective_from = 0",
|
|
144
|
+
)
|
|
144
145
|
.get();
|
|
145
|
-
expect(seedRows?.cnt).
|
|
146
|
+
expect(seedRows?.cnt ?? 0).toBeGreaterThanOrEqual(minimumCodexRows);
|
|
146
147
|
});
|
|
147
148
|
|
|
148
149
|
test("every CODEX_MODEL_PRICING entry has rows for input / cached_input / output with matching rates", () => {
|
|
@@ -16,6 +16,7 @@ import { closeDb, getDb, getLogsByEventType, initDb } from "../be/db";
|
|
|
16
16
|
import { handleCore } from "../http/core";
|
|
17
17
|
import { handlePricing } from "../http/pricing";
|
|
18
18
|
import { getPathSegments, parseQueryParams } from "../http/utils";
|
|
19
|
+
import { CODEX_MODEL_PRICING } from "../providers/codex-models";
|
|
19
20
|
|
|
20
21
|
const TEST_DB_PATH = "./test-pricing-routes.sqlite";
|
|
21
22
|
const API_KEY = "test-pricing-secret-key";
|
|
@@ -71,7 +72,7 @@ afterAll(async () => {
|
|
|
71
72
|
afterEach(() => {
|
|
72
73
|
const db = getDb();
|
|
73
74
|
// Remove every non-seed pricing row so each test starts from the migration
|
|
74
|
-
//
|
|
75
|
+
// seed rows (effective_from=0). The seed uses literal 0 for effective_from.
|
|
75
76
|
db.prepare("DELETE FROM pricing WHERE effective_from > 0").run();
|
|
76
77
|
db.prepare("DELETE FROM agent_log WHERE eventType LIKE 'pricing.%'").run();
|
|
77
78
|
});
|
|
@@ -103,18 +104,18 @@ describe("Phase 6 — /api/pricing REST surface", () => {
|
|
|
103
104
|
});
|
|
104
105
|
|
|
105
106
|
describe("read endpoints", () => {
|
|
106
|
-
test("GET /api/pricing lists every row including
|
|
107
|
+
test("GET /api/pricing lists every row including codex seed rows", async () => {
|
|
107
108
|
const res = await authedFetch(`/api/pricing`);
|
|
108
109
|
expect(res.status).toBe(200);
|
|
109
110
|
const body = await res.json();
|
|
110
111
|
expect(body.rows).toBeInstanceOf(Array);
|
|
111
|
-
//
|
|
112
|
-
// all be present here.
|
|
112
|
+
// Codex seed rows include the migration 046 baseline plus later model
|
|
113
|
+
// backfills. They should all be present here.
|
|
113
114
|
const seedRows = body.rows.filter(
|
|
114
115
|
(r: { provider: string; effectiveFrom: number }) =>
|
|
115
116
|
r.provider === "codex" && r.effectiveFrom === 0,
|
|
116
117
|
);
|
|
117
|
-
expect(seedRows.length).toBe(
|
|
118
|
+
expect(seedRows.length).toBe(Object.keys(CODEX_MODEL_PRICING).length * 3);
|
|
118
119
|
});
|
|
119
120
|
|
|
120
121
|
test("GET /api/pricing/{provider}/{model}/{tokenClass} returns rows latest-first", async () => {
|
|
@@ -6,32 +6,32 @@ import { PiMonoAdapter } from "../providers/pi-mono-adapter";
|
|
|
6
6
|
import type { CostData, ProviderEvent } from "../providers/types";
|
|
7
7
|
|
|
8
8
|
describe("createProviderAdapter", () => {
|
|
9
|
-
test("returns ClaudeAdapter for 'claude'", () => {
|
|
10
|
-
const adapter = createProviderAdapter("claude");
|
|
9
|
+
test("returns ClaudeAdapter for 'claude'", async () => {
|
|
10
|
+
const adapter = await createProviderAdapter("claude");
|
|
11
11
|
expect(adapter).toBeInstanceOf(ClaudeAdapter);
|
|
12
12
|
expect(adapter.name).toBe("claude");
|
|
13
13
|
});
|
|
14
14
|
|
|
15
|
-
test("returns PiMonoAdapter for 'pi'", () => {
|
|
16
|
-
const adapter = createProviderAdapter("pi");
|
|
15
|
+
test("returns PiMonoAdapter for 'pi'", async () => {
|
|
16
|
+
const adapter = await createProviderAdapter("pi");
|
|
17
17
|
expect(adapter).toBeInstanceOf(PiMonoAdapter);
|
|
18
18
|
expect(adapter.name).toBe("pi");
|
|
19
19
|
});
|
|
20
20
|
|
|
21
|
-
test("returns OpencodeAdapter for 'opencode'", () => {
|
|
22
|
-
const adapter = createProviderAdapter("opencode");
|
|
21
|
+
test("returns OpencodeAdapter for 'opencode'", async () => {
|
|
22
|
+
const adapter = await createProviderAdapter("opencode");
|
|
23
23
|
expect(adapter).toBeInstanceOf(OpencodeAdapter);
|
|
24
24
|
expect(adapter.name).toBe("opencode");
|
|
25
25
|
});
|
|
26
26
|
|
|
27
|
-
test("throws for unknown provider", () => {
|
|
28
|
-
expect(
|
|
27
|
+
test("throws for unknown provider", async () => {
|
|
28
|
+
expect(createProviderAdapter("unknown")).rejects.toThrow(
|
|
29
29
|
'Unknown HARNESS_PROVIDER: "unknown". Supported: claude, pi, codex, devin, claude-managed, opencode',
|
|
30
30
|
);
|
|
31
31
|
});
|
|
32
32
|
|
|
33
|
-
test("throws for empty string", () => {
|
|
34
|
-
expect(
|
|
33
|
+
test("throws for empty string", async () => {
|
|
34
|
+
expect(createProviderAdapter("")).rejects.toThrow("Unknown HARNESS_PROVIDER");
|
|
35
35
|
});
|
|
36
36
|
});
|
|
37
37
|
|
|
@@ -37,10 +37,10 @@ describe("ProviderAdapter.formatCommand", () => {
|
|
|
37
37
|
expect(codex.name).toBe("codex");
|
|
38
38
|
});
|
|
39
39
|
|
|
40
|
-
test("createProviderAdapter returns adapters that implement formatCommand", () => {
|
|
41
|
-
const claudeAdapter = createProviderAdapter("claude");
|
|
42
|
-
const piAdapter = createProviderAdapter("pi");
|
|
43
|
-
const codexAdapter = createProviderAdapter("codex");
|
|
40
|
+
test("createProviderAdapter returns adapters that implement formatCommand", async () => {
|
|
41
|
+
const claudeAdapter = await createProviderAdapter("claude");
|
|
42
|
+
const piAdapter = await createProviderAdapter("pi");
|
|
43
|
+
const codexAdapter = await createProviderAdapter("codex");
|
|
44
44
|
expect(typeof claudeAdapter.formatCommand).toBe("function");
|
|
45
45
|
expect(typeof piAdapter.formatCommand).toBe("function");
|
|
46
46
|
expect(typeof codexAdapter.formatCommand).toBe("function");
|
|
@@ -153,6 +153,31 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
|
|
|
153
153
|
expect(body.cost.totalCostUsd).toBeCloseTo(6.64, 5);
|
|
154
154
|
});
|
|
155
155
|
|
|
156
|
+
test("provider=codex model=gpt-5.5 uses seeded pricing rows instead of falling through to unpriced", async () => {
|
|
157
|
+
const res = await authedFetch(`/api/session-costs`, {
|
|
158
|
+
method: "POST",
|
|
159
|
+
body: JSON.stringify({
|
|
160
|
+
sessionId: "codex-gpt-5-5-regression",
|
|
161
|
+
agentId: testAgent.id,
|
|
162
|
+
totalCostUsd: 0,
|
|
163
|
+
// Mirrors task 1a459c1c-c89c-417a-a60c-6a060ad4a602.
|
|
164
|
+
inputTokens: 3_495_764,
|
|
165
|
+
cacheReadTokens: 3_333_632,
|
|
166
|
+
outputTokens: 8_106,
|
|
167
|
+
model: "gpt-5.5",
|
|
168
|
+
provider: "codex",
|
|
169
|
+
durationMs: 1_000,
|
|
170
|
+
numTurns: 1,
|
|
171
|
+
}),
|
|
172
|
+
});
|
|
173
|
+
expect(res.status).toBe(201);
|
|
174
|
+
const body = (await res.json()) as CreatedCostResponse;
|
|
175
|
+
expect(body.cost.costSource).toBe("pricing-table");
|
|
176
|
+
// uncached = 3,495,764 - 3,333,632 = 162,132
|
|
177
|
+
// cost = (162,132 * 5.0 + 3,333,632 * 0.5 + 8,106 * 30.0) / 1_000_000
|
|
178
|
+
expect(body.cost.totalCostUsd).toBeCloseTo(2.720656, 6);
|
|
179
|
+
});
|
|
180
|
+
|
|
156
181
|
test("provider=codex but input/output rows missing → 'unpriced', worker value preserved", async () => {
|
|
157
182
|
// Only seed cached_input. Missing input + output blocks recompute and
|
|
158
183
|
// Phase 2 tags the row 'unpriced' (no rates means we can't trust harness USD either).
|
package/src/tools/send-task.ts
CHANGED
|
@@ -4,6 +4,7 @@ import * as z from "zod";
|
|
|
4
4
|
import {
|
|
5
5
|
createTaskExtended,
|
|
6
6
|
findCompletedTaskInThread,
|
|
7
|
+
findRecentCancelledTaskInThread,
|
|
7
8
|
getActiveTaskCount,
|
|
8
9
|
getAgentById,
|
|
9
10
|
getDb,
|
|
@@ -192,6 +193,13 @@ export async function sendTaskHandler(
|
|
|
192
193
|
// When the source task is a "follow-up" (worker completed/failed notification),
|
|
193
194
|
// check if there are completed tasks in the same Slack thread recently.
|
|
194
195
|
// This prevents the cycle: worker completes → follow-up → Lead re-delegates → repeat.
|
|
196
|
+
//
|
|
197
|
+
// Exception: if a MORE RECENT task in the same thread was cancelled (exit 130,
|
|
198
|
+
// status='cancelled', or status='failed' with failureReason containing
|
|
199
|
+
// "cancelled"), bypass the guard. A cancellation means the work was
|
|
200
|
+
// interrupted — re-dispatch is the correct response, not a deduped no-op.
|
|
201
|
+
// Without this bypass, a cancelled worker permanently jams the thread
|
|
202
|
+
// against re-delegation when an earlier completed sibling exists.
|
|
195
203
|
if (sourceTaskId) {
|
|
196
204
|
const sourceTask = getTaskById(sourceTaskId);
|
|
197
205
|
if (
|
|
@@ -205,15 +213,28 @@ export async function sendTaskHandler(
|
|
|
205
213
|
2880, // 48 hours in minutes
|
|
206
214
|
);
|
|
207
215
|
if (recentCompleted) {
|
|
208
|
-
const
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
216
|
+
const recentCancelled = findRecentCancelledTaskInThread(
|
|
217
|
+
sourceTask.slackChannelId,
|
|
218
|
+
sourceTask.slackThreadTs,
|
|
219
|
+
2880,
|
|
220
|
+
);
|
|
221
|
+
const cancelledMoreRecent =
|
|
222
|
+
recentCancelled &&
|
|
223
|
+
new Date(recentCancelled.lastUpdatedAt).getTime() >
|
|
224
|
+
new Date(recentCompleted.lastUpdatedAt).getTime();
|
|
225
|
+
if (!cancelledMoreRecent) {
|
|
226
|
+
const msg = `Blocked: re-delegation from follow-up task in a thread that already has completed work (task ${recentCompleted.id.slice(0, 8)}). The original request was already handled.`;
|
|
227
|
+
return {
|
|
228
|
+
content: [{ type: "text", text: msg }],
|
|
229
|
+
structuredContent: {
|
|
230
|
+
yourAgentId: creatorAgentId,
|
|
231
|
+
success: false,
|
|
232
|
+
message: msg,
|
|
233
|
+
},
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
// else: fall through — the cancellation is more recent than the
|
|
237
|
+
// completion, so re-delegation is legitimate.
|
|
217
238
|
}
|
|
218
239
|
}
|
|
219
240
|
}
|
|
@@ -27,6 +27,7 @@ export const CONTEXT_FORMULA = "input-cache-output" as const;
|
|
|
27
27
|
|
|
28
28
|
const CONTEXT_WINDOW_DEFAULTS: Record<string, number> = {
|
|
29
29
|
// Anthropic 4.x family
|
|
30
|
+
"claude-opus-4-8": 1_000_000,
|
|
30
31
|
"claude-opus-4-7": 1_000_000,
|
|
31
32
|
"claude-opus-4-6": 1_000_000,
|
|
32
33
|
"claude-opus-4-5": 1_000_000,
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "schedule",
|
|
3
|
+
"name": "daily-blocker-digest",
|
|
4
|
+
"displayName": "Daily Blocker Digest",
|
|
5
|
+
"slug": "daily-blocker-digest",
|
|
6
|
+
"title": "Daily Blocker Digest",
|
|
7
|
+
"description": "Ask the lead to summarize stuck work, failing checks, and owner decisions every weekday.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "schedules",
|
|
10
|
+
"placeholders": ["SLACK_CHANNEL_ID", "TIMEZONE"],
|
|
11
|
+
"runAllSeedersCandidate": true,
|
|
12
|
+
"tags": ["operations", "slack", "digest"]
|
|
13
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# Daily Blocker Digest
|
|
2
|
+
|
|
3
|
+
Ask the lead to summarize stuck work, failing checks, and owner decisions every weekday.
|
|
4
|
+
|
|
5
|
+
## Schedule
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{
|
|
9
|
+
"cron": "5 2 * * *",
|
|
10
|
+
"timezone": "UTC",
|
|
11
|
+
"agentRole": "lead",
|
|
12
|
+
"enabled": true
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Scheduled Task
|
|
17
|
+
|
|
18
|
+
This is the full task prompt the schedule runs on each fire — including the accumulated operational learnings baked into it. Adapt the swarm-specific references (channel IDs, agent names, repo paths) to your environment before enabling.
|
|
19
|
+
|
|
20
|
+
Task Type: Daily Blocker Digest — "Compound Prelude" (unified with PR review)
|
|
21
|
+
|
|
22
|
+
You are Lead. This runs 5 minutes before the compound evolution. Purpose: surface every item claimed to be "awaiting human" so the compound can detect stale-state items (blockers actually resolved but never removed), AND provide the single daily summary of open PRs. Rule from Taras (2026-04-22): verify, don't assume.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Phase 1: Gather Blockers from 4 Sources
|
|
27
|
+
|
|
28
|
+
### 1A. HEARTBEAT.md "Active Blockers" section
|
|
29
|
+
Read `/workspace/HEARTBEAT.md`. Extract every bullet under "Active Blockers (awaiting Taras)" or similar. Each item is a claim of the form "X is broken/pending".
|
|
30
|
+
|
|
31
|
+
### 1B. Open PRs across all our repos — with clickable URLs
|
|
32
|
+
Loop over the repo list and gather ALL open PRs with their URL, age, review status, draft flag, labels, author.
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
for repo in desplega-ai/agent-swarm desplega-ai/agent-swarm-landing desplega-ai/landing desplega-ai/landing-labs desplega-ai/qa-use desplega-ai/agent-fs desplega-ai/chat-py desplega-ai/argus desplega-ai/argus-action desplega-ai/ai-toolbox desplega-ai/agent-work; do
|
|
36
|
+
gh pr list --repo "$repo" --state open --json number,title,author,createdAt,url,reviewDecision,isDraft,labels 2>/dev/null | jq --arg repo "$repo" '.[] | . + {repo: $repo}'
|
|
37
|
+
done
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Compute `daysOpen` from `createdAt`. Split PRs into buckets:
|
|
41
|
+
- **Dependabot**: author.login == "dependabot" or "app/dependabot" — handled separately at the bottom
|
|
42
|
+
- **Security dependabot**: any dependabot PR with "critical", "high", "security", or "vulnerability" in title or labels — list separately with :shield:
|
|
43
|
+
- **Stale** (60+ days open): :rotating_light: at the top
|
|
44
|
+
- **Aging** (30-59 days): :warning:
|
|
45
|
+
- **Recent** (<30 days): normal listing
|
|
46
|
+
|
|
47
|
+
Format every PR link as: `<URL|repo #NUM>` — always a clickable Slack link, never raw numbers.
|
|
48
|
+
|
|
49
|
+
### 1C. Tasks awaiting user reply
|
|
50
|
+
Use `db-query`:
|
|
51
|
+
```sql
|
|
52
|
+
SELECT id, task, slackUserId, createdAt
|
|
53
|
+
FROM agent_tasks
|
|
54
|
+
WHERE slackReplySent = 1
|
|
55
|
+
AND status = 'completed'
|
|
56
|
+
AND requestedByUserId IS NOT NULL
|
|
57
|
+
AND datetime(createdAt) > datetime('now', '-7 days')
|
|
58
|
+
ORDER BY createdAt DESC
|
|
59
|
+
LIMIT 20
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### 1D. Stuck in-flight tasks
|
|
63
|
+
Use `get-tasks` with status=in_progress. Flag any with `lastUpdatedAt` >2h old.
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Phase 2: Verify Each Blocker Claim
|
|
68
|
+
|
|
69
|
+
For each claim in 1A, run a quick verification:
|
|
70
|
+
- PR numbers → check if merged (use gh pr view)
|
|
71
|
+
- API/key issues → test the actual API (curl + check response)
|
|
72
|
+
- "awaiting response from X" items → check Slack thread for newer messages
|
|
73
|
+
- Worker-activity claims → check the actual task status
|
|
74
|
+
|
|
75
|
+
Do NOT trust the HEARTBEAT wording. If verification shows the item is resolved, mark it `RESOLVED-STALE` and commit to removing from HEARTBEAT in Phase 4.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Phase 3: Post Unified Digest to Slack
|
|
80
|
+
|
|
81
|
+
Use `slack-post` with channelId `C0A4J7GB0UD`, pinging `<@U08NR6QD6CS>`. Format:
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
:clipboard: *Daily Blocker Digest + PR Review* — [YYYY-MM-DD]
|
|
85
|
+
|
|
86
|
+
<@U08NR6QD6CS> Here's the combined morning digest.
|
|
87
|
+
|
|
88
|
+
*Awaiting Taras — HEARTBEAT blockers* (N verified real, M stale)
|
|
89
|
+
• PR link — <title> — [verified: still open]
|
|
90
|
+
• <other item> — [verified: status]
|
|
91
|
+
• ~~<stale item>~~ — RESOLVED-STALE, removed from HEARTBEAT
|
|
92
|
+
|
|
93
|
+
:rotating_light: *STALE PRs (60+ days)*
|
|
94
|
+
1. <url|repo #NUM> — <title> (X days) — @author
|
|
95
|
+
|
|
96
|
+
:warning: *AGING PRs (30-59 days)*
|
|
97
|
+
1. <url|repo #NUM> — <title> (X days) — @author
|
|
98
|
+
|
|
99
|
+
*Recent PRs*
|
|
100
|
+
1. <url|repo #NUM> — <title> (X days) — @author
|
|
101
|
+
|
|
102
|
+
:shield: *Security dependabot (merge soon)*
|
|
103
|
+
• <url|repo #NUM> — <bump text>
|
|
104
|
+
|
|
105
|
+
*Tasks awaiting user reply* (N)
|
|
106
|
+
• <task summary> — from @<userId>
|
|
107
|
+
|
|
108
|
+
*Stuck in-flight* (N, >2h no update)
|
|
109
|
+
• <task id> — <age>
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
_Also: X dependabot PRs pending (routine dependency bumps)_
|
|
113
|
+
_Stale HEARTBEAT items removed this run: N_
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Keep it scannable. Every PR MUST be a clickable `<url|repo #N>` link. If everything is clean, say "All clear — no blockers, no stuck tasks, only routine dependabot churn."
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Phase 4: Clean HEARTBEAT.md
|
|
121
|
+
|
|
122
|
+
For each item marked `RESOLVED-STALE`:
|
|
123
|
+
- Remove the line from `/workspace/HEARTBEAT.md`
|
|
124
|
+
- Save a shared memory noting the stale-state catch (permanent receipt for the compound)
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Phase 5: Hand-off to Compound
|
|
129
|
+
|
|
130
|
+
Write a memory titled `daily-blocker-digest-YYYY-MM-DD.md` to `/workspace/shared/memory/d454d1a5-4df9-49bd-8a89-e58d6a657dc3/` with:
|
|
131
|
+
- List of all verified blockers (still real) with PR URLs
|
|
132
|
+
- List of RESOLVED-STALE items removed this run
|
|
133
|
+
- Summary counts: total PRs open, stale count, aging count
|
|
134
|
+
- Any patterns noticed ("I keep forgetting X finished shipping on date Y")
|
|
135
|
+
|
|
136
|
+
The compound evolution runs 5 minutes after this. Its Phase 0 reads this memory via `memory-search "daily-blocker-digest"`.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Anti-patterns
|
|
141
|
+
|
|
142
|
+
- ❌ Copying HEARTBEAT verbatim without verifying each line
|
|
143
|
+
- ❌ Raw PR numbers instead of clickable `<url|repo #N>` links
|
|
144
|
+
- ❌ Listing all dependabot PRs inline — collapse into single footer count (except security ones)
|
|
145
|
+
- ❌ Marking things RESOLVED-STALE without evidence
|
|
146
|
+
- ❌ Skipping Phase 4 — if you don't clean HEARTBEAT, the problem recurs tomorrow
|
|
147
|
+
|
|
148
|
+
## Completion
|
|
149
|
+
|
|
150
|
+
Call `store-progress` with status `completed` and `output` = one-paragraph summary of (a) how many blockers verified real vs stale, (b) PR counts (stale/aging/recent/dependabot), (c) any surprises.
|