@desplega.ai/agent-swarm 1.93.0 → 1.95.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +2 -2
  2. package/openapi.json +180 -1
  3. package/package.json +4 -3
  4. package/src/be/db.ts +74 -9
  5. package/src/be/migrations/090_model_tiers.sql +2 -0
  6. package/src/be/migrations/091_seed_swarm_operations_metrics.sql +12 -0
  7. package/src/be/migrations/092_metrics_dashboard_combobox_filters.sql +68 -0
  8. package/src/be/migrations/093_slack_message_tracking.sql +6 -0
  9. package/src/be/migrations/094_mcp_extra_authorize_params.sql +4 -0
  10. package/src/be/migrations/runner.ts +52 -0
  11. package/src/be/modelsdev-cache.json +2060 -198
  12. package/src/be/scripts/boot-reembed.ts +74 -0
  13. package/src/be/scripts/db.ts +19 -3
  14. package/src/be/seed/index.ts +1 -1
  15. package/src/be/seed/registry.ts +2 -2
  16. package/src/be/seed/runner.ts +5 -5
  17. package/src/be/seed/types.ts +6 -1
  18. package/src/be/seed-pricing.ts +1 -0
  19. package/src/be/seed-scripts/index.ts +3 -2
  20. package/src/be/skill-sync.ts +4 -4
  21. package/src/be/swarm-config-guard.ts +8 -0
  22. package/src/commands/provider-credentials.ts +14 -8
  23. package/src/commands/runner.ts +84 -13
  24. package/src/http/index.ts +13 -2
  25. package/src/http/mcp-oauth.ts +14 -0
  26. package/src/http/metrics.ts +55 -6
  27. package/src/http/schedules.ts +16 -15
  28. package/src/http/script-runs.ts +7 -1
  29. package/src/http/scripts.ts +147 -1
  30. package/src/http/tasks.ts +7 -0
  31. package/src/model-tiers.ts +140 -0
  32. package/src/oauth/mcp-wrapper.ts +14 -0
  33. package/src/providers/claude-managed-models.ts +9 -0
  34. package/src/providers/codex-skill-resolver.ts +22 -8
  35. package/src/providers/opencode-adapter.ts +21 -2
  36. package/src/providers/pi-mono-adapter.ts +143 -26
  37. package/src/providers/types.ts +12 -0
  38. package/src/scheduler/scheduler.ts +22 -34
  39. package/src/server-user.ts +8 -2
  40. package/src/slack/responses.ts +39 -11
  41. package/src/slack/watcher.ts +121 -8
  42. package/src/tests/agents-list-model-display.test.ts +13 -0
  43. package/src/tests/aws-error-classifier.test.ts +148 -0
  44. package/src/tests/claude-managed-adapter.test.ts +12 -0
  45. package/src/tests/context-window.test.ts +7 -0
  46. package/src/tests/credential-check.test.ts +185 -46
  47. package/src/tests/harness-provider-resolution.test.ts +23 -0
  48. package/src/tests/http-api-integration.test.ts +19 -0
  49. package/src/tests/mcp-oauth-queries.test.ts +71 -1
  50. package/src/tests/mcp-oauth-wrapper.test.ts +109 -0
  51. package/src/tests/metrics-http.test.ts +137 -3
  52. package/src/tests/migration-046-budgets.test.ts +33 -0
  53. package/src/tests/migration-runner-regressions.test.ts +69 -0
  54. package/src/tests/model-control.test.ts +162 -46
  55. package/src/tests/opencode-adapter.test.ts +38 -1
  56. package/src/tests/pi-mono-adapter.test.ts +319 -0
  57. package/src/tests/provider-command-format.test.ts +12 -0
  58. package/src/tests/providers/pi-cost.test.ts +9 -0
  59. package/src/tests/runner-fallback-output.test.ts +50 -0
  60. package/src/tests/scripts-boot-reembed.test.ts +163 -0
  61. package/src/tests/scripts-embeddings.test.ts +90 -0
  62. package/src/tests/seed.test.ts +26 -1
  63. package/src/tests/session-costs-model-key-normalize.test.ts +2 -0
  64. package/src/tests/skill-fs-writer.test.ts +7 -1
  65. package/src/tests/skill-sync.test.ts +15 -3
  66. package/src/tests/slack-watcher.test.ts +66 -0
  67. package/src/tests/workflow-agent-task.test.ts +5 -2
  68. package/src/tests/workflow-validation-port-routing.test.ts +181 -0
  69. package/src/tools/mcp-servers/mcp-server-create.ts +7 -0
  70. package/src/tools/mcp-servers/mcp-server-update.ts +8 -0
  71. package/src/tools/memory-get.ts +11 -0
  72. package/src/tools/memory-search.ts +18 -0
  73. package/src/tools/schedules/create-schedule.ts +71 -70
  74. package/src/tools/schedules/update-schedule.ts +43 -31
  75. package/src/tools/send-task.ts +16 -5
  76. package/src/tools/task-action.ts +11 -3
  77. package/src/types.ts +30 -0
  78. package/src/utils/aws-error-classifier.ts +97 -0
  79. package/src/utils/context-window.ts +2 -0
  80. package/src/utils/credentials.test.ts +68 -0
  81. package/src/utils/credentials.ts +44 -3
  82. package/src/utils/pretty-print.ts +25 -10
  83. package/src/utils/skill-fs-writer.ts +11 -3
  84. package/src/workflows/engine.ts +3 -2
  85. package/src/workflows/executors/agent-task.ts +3 -1
@@ -76,7 +76,60 @@ describe("Metrics HTTP API", () => {
76
76
  const body = (await res.json()) as { metrics: Metric[]; total: number };
77
77
  expect(body.total).toBeGreaterThanOrEqual(1);
78
78
  const starter = body.metrics.find((metric) => metric.slug === "swarm-operations-overview");
79
- expect(starter?.definition.widgets.map((widget) => widget.viz.type)).toContain("multi-line");
79
+ expect(starter?.definition.layout?.columns).toBe(3);
80
+ expect(starter?.definition.widgets.map((widget) => widget.id)).toEqual([
81
+ "tasks-created-per-day",
82
+ "usage-by-user",
83
+ "usage-by-model",
84
+ "avg-cost-per-task-by-model",
85
+ "avg-task-time-by-model",
86
+ "cost-per-minute-by-model",
87
+ "cost-per-minute-by-agent",
88
+ "agent-performance",
89
+ "task-outcomes-by-day",
90
+ "recent-task-outcomes",
91
+ ]);
92
+ expect(
93
+ starter?.definition.variables?.find((variable) => variable.key === "userFilter"),
94
+ ).toMatchObject({
95
+ type: "select",
96
+ defaultValue: "all",
97
+ optionsQuery: { valueKey: "id", labelKey: "label" },
98
+ });
99
+ expect(
100
+ starter?.definition.variables?.find((variable) => variable.key === "agentFilter"),
101
+ ).toMatchObject({
102
+ type: "select",
103
+ defaultValue: "all",
104
+ optionsQuery: { valueKey: "id", labelKey: "label" },
105
+ });
106
+
107
+ const run = await fetch(`${BASE}/api/metrics/definitions/${starter!.id}/run`, {
108
+ method: "POST",
109
+ headers,
110
+ body: JSON.stringify({ variables: {} }),
111
+ });
112
+ expect(run.status).toBe(200);
113
+ const runBody = (await run.json()) as MetricRunResponse & {
114
+ metric: Metric;
115
+ variables: Record<string, string>;
116
+ };
117
+ expect(runBody.variables.userFilter).toBe("all");
118
+ expect(runBody.variables.agentFilter).toBe("all");
119
+ expect(
120
+ runBody.metric.definition.variables?.find((variable) => variable.key === "userFilter")
121
+ ?.options?.[0],
122
+ ).toEqual({
123
+ label: "All requesters",
124
+ value: "all",
125
+ });
126
+ expect(
127
+ runBody.metric.definition.variables?.find((variable) => variable.key === "agentFilter")
128
+ ?.options?.[0],
129
+ ).toEqual({
130
+ label: "All agents",
131
+ value: "all",
132
+ });
80
133
  });
81
134
 
82
135
  test("create, run, update snapshots prior definition", async () => {
@@ -221,8 +274,79 @@ describe("Metrics HTTP API", () => {
221
274
  expect(runBody.widgets[0]?.result.rows[0]).toHaveProperty("count");
222
275
  });
223
276
 
277
+ test("run resolves dynamic select variable options from read-only SQL", async () => {
278
+ const created = await fetch(`${BASE}/api/metrics/definitions`, {
279
+ method: "POST",
280
+ headers,
281
+ body: JSON.stringify({
282
+ slug: "dynamic-variable-options",
283
+ title: "Dynamic Variable Options",
284
+ definition: {
285
+ version: 1,
286
+ variables: [
287
+ {
288
+ key: "agent",
289
+ label: "Agent",
290
+ type: "select",
291
+ optionsQuery: {
292
+ sql: "SELECT 'agent-a' AS id, 'Agent A' AS name UNION ALL SELECT 'agent-b' AS id, 'Agent B' AS name",
293
+ valueKey: "id",
294
+ labelKey: "name",
295
+ },
296
+ },
297
+ ],
298
+ widgets: [
299
+ {
300
+ id: "selected-agent",
301
+ title: "Selected agent",
302
+ query: {
303
+ sql: "SELECT ? AS agent",
304
+ params: ["{{agent}}"],
305
+ maxRows: 10,
306
+ },
307
+ viz: { type: "table", columns: [{ key: "agent", label: "Agent" }] },
308
+ },
309
+ ],
310
+ },
311
+ }),
312
+ });
313
+ expect(created.status).toBe(201);
314
+ const { id } = (await created.json()) as { id: string; version: number };
315
+
316
+ const run = await fetch(`${BASE}/api/metrics/definitions/${id}/run`, {
317
+ method: "POST",
318
+ headers,
319
+ body: JSON.stringify({ variables: { agent: "agent-b" } }),
320
+ });
321
+ expect(run.status).toBe(200);
322
+ const runBody = (await run.json()) as MetricRunResponse & {
323
+ metric: Metric;
324
+ variables: Record<string, string>;
325
+ };
326
+ expect(runBody.variables.agent).toBe("agent-b");
327
+ expect(runBody.metric.definition.variables?.[0]?.options).toEqual([
328
+ { label: "Agent A", value: "agent-a" },
329
+ { label: "Agent B", value: "agent-b" },
330
+ ]);
331
+ expect(runBody.widgets[0]?.result.rows[0]).toEqual({ agent: "agent-b" });
332
+
333
+ const defaultedRun = await fetch(`${BASE}/api/metrics/definitions/${id}/run`, {
334
+ method: "POST",
335
+ headers,
336
+ body: JSON.stringify({ variables: {} }),
337
+ });
338
+ expect(defaultedRun.status).toBe(200);
339
+ const defaultedBody = (await defaultedRun.json()) as { variables: Record<string, string> };
340
+ expect(defaultedBody.variables.agent).toBe("agent-a");
341
+ });
342
+
224
343
  test("saved metric SQL rejects writes and multiple statements", async () => {
225
- for (const sql of ["DELETE FROM agent_tasks", "SELECT 1; SELECT 2"]) {
344
+ for (const [sql, target] of [
345
+ ["DELETE FROM agent_tasks", "widget"],
346
+ ["SELECT 1; SELECT 2", "widget"],
347
+ ["DELETE FROM agents", "variable"],
348
+ ["SELECT 1; SELECT 2", "variable"],
349
+ ] as const) {
226
350
  const res = await fetch(`${BASE}/api/metrics/definitions`, {
227
351
  method: "POST",
228
352
  headers,
@@ -230,11 +354,21 @@ describe("Metrics HTTP API", () => {
230
354
  title: "Bad Metric",
231
355
  definition: {
232
356
  version: 1,
357
+ variables:
358
+ target === "variable"
359
+ ? [
360
+ {
361
+ key: "agent",
362
+ type: "select",
363
+ optionsQuery: { sql, valueKey: "id" },
364
+ },
365
+ ]
366
+ : undefined,
233
367
  widgets: [
234
368
  {
235
369
  id: "bad",
236
370
  title: "Bad",
237
- query: { sql },
371
+ query: { sql: target === "widget" ? sql : "SELECT 1 AS x" },
238
372
  viz: { type: "stat", value: "x" },
239
373
  },
240
374
  ],
@@ -1,6 +1,7 @@
1
1
  import { afterAll, beforeAll, describe, expect, test } from "bun:test";
2
2
  import { unlink } from "node:fs/promises";
3
3
  import { closeDb, getDb, initDb } from "../be/db";
4
+ import { seedPricingFromModelsDev } from "../be/seed-pricing";
4
5
  import { CODEX_MODEL_PRICING } from "../providers/codex-models";
5
6
 
6
7
  const TEST_DB_PATH = "./test-migration-046.sqlite";
@@ -173,6 +174,38 @@ describe("migration 046 — budgets and pricing", () => {
173
174
  }
174
175
  });
175
176
 
177
+ test("models.dev seed includes Claude Mythos 5 pricing rows", () => {
178
+ const db = getDb();
179
+ const result = seedPricingFromModelsDev({ quiet: true });
180
+ expect(result.modelsdevFound).toBe(true);
181
+
182
+ const expectedPrices = {
183
+ input: 10,
184
+ cached_input: 1,
185
+ cache_write: 12.5,
186
+ output: 50,
187
+ } as const;
188
+ const seededKeys = [
189
+ ["claude", "claude-mythos-5"],
190
+ ["claude-managed", "claude-mythos-5"],
191
+ ["claude", "mythos"],
192
+ ["claude-managed", "mythos"],
193
+ ["pi", "mythos"],
194
+ ] as const;
195
+
196
+ for (const [provider, model] of seededKeys) {
197
+ for (const [tokenClass, price] of Object.entries(expectedPrices)) {
198
+ const row = db
199
+ .prepare<PricingRow, [string, string, string]>(
200
+ `SELECT * FROM pricing
201
+ WHERE provider = ? AND model = ? AND token_class = ? AND effective_from = 0`,
202
+ )
203
+ .get(provider, model, tokenClass);
204
+ expect(row?.price_per_million_usd).toBe(price);
205
+ }
206
+ }
207
+ });
208
+
176
209
  test("idx_pricing_lookup index exists", () => {
177
210
  const db = getDb();
178
211
  const idx = db
@@ -2,9 +2,11 @@ import { Database } from "bun:sqlite";
2
2
  import { afterEach, describe, expect, test } from "bun:test";
3
3
  import { unlink } from "node:fs/promises";
4
4
  import { closeDb, initDb } from "../be/db";
5
+ import { runMigrations } from "../be/migrations/runner";
5
6
 
6
7
  const INCOMPLETE_DB_PATH = "./test-migration-incomplete.sqlite";
7
8
  const FRESH_DB_PATH = "./test-migration-fresh.sqlite";
9
+ const REPAIR_DB_PATH = "./test-migration-repair.sqlite";
8
10
 
9
11
  async function removeDbFiles(dbPath: string): Promise<void> {
10
12
  for (const suffix of ["", "-wal", "-shm"]) {
@@ -22,6 +24,7 @@ afterEach(async () => {
22
24
  closeDb();
23
25
  await removeDbFiles(INCOMPLETE_DB_PATH);
24
26
  await removeDbFiles(FRESH_DB_PATH);
27
+ await removeDbFiles(REPAIR_DB_PATH);
25
28
  });
26
29
 
27
30
  describe("migration regressions", () => {
@@ -98,4 +101,70 @@ describe("migration regressions", () => {
98
101
  expect(requestedByFk?.table).toBe("users");
99
102
  expect(requestedByFk?.to).toBe("id");
100
103
  });
104
+
105
+ test("repairs seed-as-090 history so 090_model_tiers is never skipped", () => {
106
+ // 2026-06-10 incident: PR #722 shipped the metrics seed as migration 090
107
+ // and production applied it; PR #719 then renumbered the seed to 091 and
108
+ // took 090 for model tiers. The runner keys applied migrations on version,
109
+ // so those databases skipped 090_model_tiers and crashed on the missing
110
+ // modelTier column. repairRenumberedModelTiers() in the runner must detect
111
+ // that history and fix it on boot.
112
+ const SEED_NAME = "090_seed_swarm_operations_metrics";
113
+ const SEED_CHECKSUM = "8ca4a05263b42d115b419f468bf5113caa5b7ee4363177568897513549224b01";
114
+
115
+ // Raw Database + runMigrations directly: initDb()'s test-template fast
116
+ // path skips the runner entirely, and the repair lives in the runner.
117
+ const database = new Database(REPAIR_DB_PATH, { create: true });
118
+ runMigrations(database);
119
+
120
+ // Reconstruct the divergent history: modelTier columns absent, version 90
121
+ // recorded as the seed migration.
122
+ database.run("ALTER TABLE agent_tasks DROP COLUMN modelTier");
123
+ database.run("ALTER TABLE scheduled_tasks DROP COLUMN modelTier");
124
+ database.run("UPDATE _migrations SET name = ?, checksum = ? WHERE version = 90", [
125
+ SEED_NAME,
126
+ SEED_CHECKSUM,
127
+ ]);
128
+
129
+ // Next boot repairs the history.
130
+ runMigrations(database);
131
+
132
+ for (const table of ["agent_tasks", "scheduled_tasks"]) {
133
+ const columns = database
134
+ .prepare<{ name: string }, []>(`PRAGMA table_info(${table})`)
135
+ .all()
136
+ .map((column) => column.name);
137
+ expect(columns).toContain("modelTier");
138
+ }
139
+
140
+ const row = database
141
+ .prepare<{ name: string; checksum: string }, []>(
142
+ "SELECT name, checksum FROM _migrations WHERE version = 90",
143
+ )
144
+ .get();
145
+ expect(row?.name).toBe("090_model_tiers");
146
+ expect(row?.checksum).not.toBe(SEED_CHECKSUM);
147
+
148
+ // The original failure mode: inserting a task with a modelTier value.
149
+ const now = new Date().toISOString();
150
+ expect(() => {
151
+ database.run(
152
+ `INSERT INTO agent_tasks (id, task, status, source, modelTier, createdAt, lastUpdatedAt)
153
+ VALUES (?, ?, ?, ?, ?, ?, ?)`,
154
+ [crypto.randomUUID(), "boot triage", "pending", "system", "regular", now, now],
155
+ );
156
+ }).not.toThrow();
157
+
158
+ // Healthy histories are untouched: booting again is a no-op.
159
+ runMigrations(database);
160
+ const rowAfter = database
161
+ .prepare<{ name: string; checksum: string }, []>(
162
+ "SELECT name, checksum FROM _migrations WHERE version = 90",
163
+ )
164
+ .get();
165
+ expect(rowAfter?.name).toBe("090_model_tiers");
166
+ expect(rowAfter?.checksum).toBe(row?.checksum);
167
+
168
+ database.close();
169
+ });
101
170
  });
@@ -12,7 +12,17 @@ import {
12
12
  updateScheduledTask,
13
13
  upsertSwarmConfig,
14
14
  } from "../be/db";
15
+ import {
16
+ parseModelTier,
17
+ resolveModelTier,
18
+ resolveTaskModelSelection,
19
+ splitLegacyModelAlias,
20
+ } from "../model-tiers";
15
21
  import { runScheduleNow } from "../scheduler";
22
+ import { createScheduleInputSchema } from "../tools/schedules/create-schedule";
23
+ import { updateScheduleInputSchema } from "../tools/schedules/update-schedule";
24
+ import { sendTaskInputSchema } from "../tools/send-task";
25
+ import { taskActionInputSchema } from "../tools/task-action";
16
26
 
17
27
  const TEST_DB_PATH = "./test-model-control.sqlite";
18
28
 
@@ -84,6 +94,24 @@ describe("Model Control - Task Creation", () => {
84
94
  expect(task.model).toBe("sonnet");
85
95
  expect(task.status).toBe("offered");
86
96
  });
97
+
98
+ test("should store modelTier when creating a task with portable tier", () => {
99
+ const task = createTaskExtended("Test task with tier", { modelTier: "smart" });
100
+ expect(task.model).toBeUndefined();
101
+ expect(task.modelTier).toBe("smart");
102
+
103
+ const retrieved = getTaskById(task.id);
104
+ expect(retrieved?.modelTier).toBe("smart");
105
+ });
106
+
107
+ test("should preserve freeform concrete model strings", () => {
108
+ const task = createTaskExtended("Test task with freeform model", {
109
+ model: "openrouter/anthropic/claude-sonnet-4.6",
110
+ });
111
+
112
+ expect(task.model).toBe("openrouter/anthropic/claude-sonnet-4.6");
113
+ expect(task.modelTier).toBeUndefined();
114
+ });
87
115
  });
88
116
 
89
117
  describe("Model Control - Schedule Creation", () => {
@@ -102,7 +130,7 @@ describe("Model Control - Schedule Creation", () => {
102
130
  });
103
131
 
104
132
  test("should store all valid model values on schedules", () => {
105
- for (const model of ["haiku", "sonnet", "opus", "fable"] as const) {
133
+ for (const model of ["haiku", "sonnet", "opus", "fable", "gpt-5.5"] as const) {
106
134
  const schedule = createScheduledTask({
107
135
  name: `model-schedule-all-${model}-${Date.now()}`,
108
136
  intervalMs: 60000,
@@ -123,6 +151,21 @@ describe("Model Control - Schedule Creation", () => {
123
151
 
124
152
  expect(schedule.model).toBeUndefined();
125
153
  });
154
+
155
+ test("should store modelTier on scheduled task creation", () => {
156
+ const schedule = createScheduledTask({
157
+ name: "model-schedule-tier",
158
+ intervalMs: 60000,
159
+ taskTemplate: "Scheduled with portable tier",
160
+ modelTier: "regular",
161
+ });
162
+
163
+ expect(schedule.model).toBeUndefined();
164
+ expect(schedule.modelTier).toBe("regular");
165
+
166
+ const retrieved = getScheduledTaskById(schedule.id);
167
+ expect(retrieved?.modelTier).toBe("regular");
168
+ });
126
169
  });
127
170
 
128
171
  describe("Model Control - Schedule Update", () => {
@@ -169,6 +212,23 @@ describe("Model Control - Schedule Update", () => {
169
212
  expect(updated?.model).toBe("haiku");
170
213
  expect(updated?.priority).toBe(90);
171
214
  });
215
+
216
+ test("should update and clear modelTier on existing schedule", () => {
217
+ const schedule = createScheduledTask({
218
+ name: "model-tier-update-test",
219
+ intervalMs: 60000,
220
+ taskTemplate: "Update model tier test",
221
+ modelTier: "regular",
222
+ });
223
+
224
+ expect(schedule.modelTier).toBe("regular");
225
+
226
+ const updated = updateScheduledTask(schedule.id, { modelTier: "ultra" });
227
+ expect(updated?.modelTier).toBe("ultra");
228
+
229
+ const cleared = updateScheduledTask(schedule.id, { modelTier: null });
230
+ expect(cleared?.modelTier).toBeUndefined();
231
+ });
172
232
  });
173
233
 
174
234
  describe("Model Control - Schedule to Task Propagation", () => {
@@ -213,6 +273,28 @@ describe("Model Control - Schedule to Task Propagation", () => {
213
273
  const task = getTaskById(row!.id);
214
274
  expect(task?.model).toBeUndefined();
215
275
  });
276
+
277
+ test("should propagate modelTier from schedule to task on manual run", async () => {
278
+ const schedule = createScheduledTask({
279
+ name: "model-tier-propagate-manual",
280
+ intervalMs: 60000,
281
+ taskTemplate: "Propagated model tier task (manual)",
282
+ modelTier: "smart",
283
+ enabled: true,
284
+ });
285
+
286
+ await runScheduleNow(schedule.id);
287
+
288
+ const { getDb } = await import("../be/db");
289
+ const row = getDb()
290
+ .query("SELECT id FROM agent_tasks WHERE task = ? ORDER BY createdAt DESC LIMIT 1")
291
+ .get("Propagated model tier task (manual)") as { id: string } | null;
292
+
293
+ expect(row).not.toBeNull();
294
+ const task = getTaskById(row!.id);
295
+ expect(task?.model).toBeUndefined();
296
+ expect(task?.modelTier).toBe("smart");
297
+ });
216
298
  });
217
299
 
218
300
  describe("Model Control - Config MODEL_OVERRIDE Resolution", () => {
@@ -268,71 +350,105 @@ describe("Model Control - Config MODEL_OVERRIDE Resolution", () => {
268
350
  });
269
351
 
270
352
  describe("Model Control - Priority Resolution Logic", () => {
271
- // The runner resolves model as: task.model || freshEnv.MODEL_OVERRIDE || "opus"
272
- // We test the same logic pattern here to ensure correctness
273
-
274
- function resolveModel(taskModel?: string, configOverride?: string): string {
275
- return taskModel || configOverride || "opus";
276
- }
277
-
278
353
  test("task.model takes highest priority", () => {
279
- expect(resolveModel("haiku", "sonnet")).toBe("haiku");
354
+ expect(
355
+ resolveTaskModelSelection({
356
+ model: "gpt-5.5",
357
+ modelTier: "smol",
358
+ harnessProvider: "codex",
359
+ }).model,
360
+ ).toBe("gpt-5.5");
280
361
  });
281
362
 
282
- test("config MODEL_OVERRIDE is used when task has no model", () => {
283
- expect(resolveModel(undefined, "sonnet")).toBe("sonnet");
363
+ test("task.modelTier resolves using the claiming worker harness", () => {
364
+ expect(resolveModelTier({ tier: "smol", harnessProvider: "claude" })).toBe("haiku");
365
+ expect(resolveModelTier({ tier: "smol", harnessProvider: "codex" })).toBe("gpt-5.4-mini");
366
+ expect(resolveModelTier({ tier: "smart", harnessProvider: "opencode" })).toBe(
367
+ "openrouter/deepseek/deepseek-v4-pro",
368
+ );
369
+ expect(resolveModelTier({ tier: "ultra", harnessProvider: "pi" })).toBe(
370
+ "openrouter/anthropic/claude-opus-4.8",
371
+ );
284
372
  });
285
373
 
286
- test("defaults to 'opus' when no task model and no config override", () => {
287
- expect(resolveModel(undefined, undefined)).toBe("opus");
374
+ test("task.modelTier supports env map and direct tier overrides", () => {
375
+ expect(
376
+ resolveModelTier({
377
+ tier: "regular",
378
+ harnessProvider: "codex",
379
+ env: { MODEL_TIER_MAP: JSON.stringify({ regular: "gpt-5.3-codex" }) },
380
+ }),
381
+ ).toBe("gpt-5.3-codex");
382
+ expect(
383
+ resolveModelTier({
384
+ tier: "regular",
385
+ harnessProvider: "codex",
386
+ env: {
387
+ MODEL_TIER_MAP: JSON.stringify({ regular: "gpt-5.3-codex" }),
388
+ MODEL_TIER_REGULAR: "gpt-5.5",
389
+ },
390
+ }),
391
+ ).toBe("gpt-5.5");
288
392
  });
289
393
 
290
- test("empty string task model falls through to config", () => {
291
- expect(resolveModel("", "sonnet")).toBe("sonnet");
394
+ test("legacy model aliases parse as tiers", () => {
395
+ expect(parseModelTier("haiku")).toBe("smol");
396
+ expect(parseModelTier("sonnet")).toBe("regular");
397
+ expect(parseModelTier("opus")).toBe("smart");
398
+ expect(parseModelTier("fable")).toBe("ultra");
399
+ expect(splitLegacyModelAlias({ model: "opus" })).toEqual({ modelTier: "smart" });
292
400
  });
293
401
 
294
- test("empty string config override falls through to default", () => {
295
- expect(resolveModel(undefined, "")).toBe("opus");
402
+ test("freeform concrete model strings stay concrete", () => {
403
+ expect(splitLegacyModelAlias({ model: "gpt-5.5" })).toEqual({
404
+ model: "gpt-5.5",
405
+ modelTier: undefined,
406
+ });
296
407
  });
297
408
 
298
- test("all three levels specified task wins", () => {
299
- expect(resolveModel("haiku", "sonnet")).toBe("haiku");
300
- // "opus" is the hardcoded default, tested implicitly
409
+ test("missing task model selection falls through to adapter/config", () => {
410
+ expect(
411
+ resolveTaskModelSelection({ model: "", modelTier: undefined, harnessProvider: "codex" }),
412
+ ).toEqual({ source: "none" });
301
413
  });
302
414
  });
303
415
 
304
416
  describe("Model Control - Zod Validation Schema", () => {
305
- // The MCP tools use z.enum(["haiku", "sonnet", "opus"]) for validation.
306
- // We test the schema directly to ensure only valid values are accepted.
307
-
308
- test("should accept valid model values", async () => {
309
- const { z } = await import("zod");
310
- const modelSchema = z.enum(["haiku", "sonnet", "opus"]).optional();
311
-
312
- expect(modelSchema.parse("haiku")).toBe("haiku");
313
- expect(modelSchema.parse("sonnet")).toBe("sonnet");
314
- expect(modelSchema.parse("opus")).toBe("opus");
315
- expect(modelSchema.parse(undefined)).toBeUndefined();
417
+ test("task tools accept freeform concrete models and model tiers", () => {
418
+ expect(
419
+ sendTaskInputSchema.parse({ agentId: crypto.randomUUID(), task: "x", model: "gpt-5.5" })
420
+ .model,
421
+ ).toBe("gpt-5.5");
422
+ expect(
423
+ taskActionInputSchema.parse({ action: "create", task: "x", modelTier: "ultra" }).modelTier,
424
+ ).toBe("ultra");
316
425
  });
317
426
 
318
- test("should reject invalid model values", async () => {
319
- const { z } = await import("zod");
320
- const modelSchema = z.enum(["haiku", "sonnet", "opus"]).optional();
321
-
322
- expect(() => modelSchema.parse("gpt-4")).toThrow();
323
- expect(() => modelSchema.parse("claude")).toThrow();
324
- expect(() => modelSchema.parse("turbo")).toThrow();
325
- expect(() => modelSchema.parse("")).toThrow();
326
- expect(() => modelSchema.parse(123)).toThrow();
327
- expect(() => modelSchema.parse(null)).toThrow();
427
+ test("task tools reject empty model strings and invalid tiers", () => {
428
+ expect(() =>
429
+ sendTaskInputSchema.parse({ agentId: crypto.randomUUID(), task: "x", model: "" }),
430
+ ).toThrow();
431
+ expect(() =>
432
+ taskActionInputSchema.parse({ action: "create", task: "x", modelTier: "massive" }),
433
+ ).toThrow();
328
434
  });
329
435
 
330
436
  test("nullable model schema (update-schedule) should accept null", async () => {
331
- const { z } = await import("zod");
332
- const modelSchema = z.enum(["haiku", "sonnet", "opus"]).nullable().optional();
437
+ expect(updateScheduleInputSchema.shape.model.parse(null)).toBeNull();
438
+ expect(updateScheduleInputSchema.shape.model.parse("gpt-5.5")).toBe("gpt-5.5");
439
+ expect(updateScheduleInputSchema.shape.modelTier.parse(null)).toBeNull();
440
+ expect(updateScheduleInputSchema.shape.modelTier.parse("smol")).toBe("smol");
441
+ });
333
442
 
334
- expect(modelSchema.parse(null)).toBeNull();
335
- expect(modelSchema.parse("haiku")).toBe("haiku");
336
- expect(modelSchema.parse(undefined)).toBeUndefined();
443
+ test("create schedule schema accepts freeform model and modelTier", () => {
444
+ const parsed = createScheduleInputSchema.parse({
445
+ name: "schema-model-tier",
446
+ taskTemplate: "x",
447
+ intervalMs: 60000,
448
+ model: "openrouter/openai/gpt-5.5",
449
+ modelTier: "smart",
450
+ });
451
+ expect(parsed.model).toBe("openrouter/openai/gpt-5.5");
452
+ expect(parsed.modelTier).toBe("smart");
337
453
  });
338
454
  });
@@ -7,7 +7,7 @@
7
7
  */
8
8
 
9
9
  import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
10
- import { writeFileSync } from "node:fs";
10
+ import { mkdirSync, rmSync, writeFileSync } from "node:fs";
11
11
  import { join } from "node:path";
12
12
  import type { Event as OpencodeEvent } from "@opencode-ai/sdk";
13
13
  import type { ProviderEvent, ProviderResult, ProviderSessionConfig } from "../providers/types";
@@ -156,6 +156,15 @@ describe("OpencodeSession — SSE→ProviderEvent mapping", () => {
156
156
  ];
157
157
  const { emitted, result, serverCloseCalls } = await driveSession(events);
158
158
 
159
+ const sessionInit = emitted.find((e) => e.type === "session_init");
160
+ expect(sessionInit).toBeDefined();
161
+ if (sessionInit?.type === "session_init") {
162
+ expect(sessionInit.provider).toBe("opencode");
163
+ expect(sessionInit.harnessVariant).toBe("stock");
164
+ expect(typeof sessionInit.harnessVariantMeta?.version).toBe("string");
165
+ expect((sessionInit.harnessVariantMeta?.version as string).length).toBeGreaterThan(0);
166
+ }
167
+
159
168
  const resultEvent = emitted.find((e) => e.type === "result");
160
169
  expect(resultEvent).toBeDefined();
161
170
  if (resultEvent?.type === "result") {
@@ -605,16 +614,22 @@ describe("OpencodeSession — context_usage emission (phase 9 fix)", () => {
605
614
  // ── DES-300: per-task isolation ────────────────────────────────────────────────
606
615
 
607
616
  describe("OpencodeAdapter — per-task isolation (DES-300)", () => {
617
+ let prevOpencodeSkillsDir: string | undefined;
618
+
608
619
  beforeEach(() => {
620
+ prevOpencodeSkillsDir = process.env.OPENCODE_SKILLS_DIR;
609
621
  lastPromptArgs = undefined;
610
622
  lastCreateOpencodeConfig = undefined;
611
623
  mock.restore();
612
624
  });
613
625
 
614
626
  afterEach(() => {
627
+ if (prevOpencodeSkillsDir === undefined) delete process.env.OPENCODE_SKILLS_DIR;
628
+ else process.env.OPENCODE_SKILLS_DIR = prevOpencodeSkillsDir;
615
629
  // Clean up any written files from tests
616
630
  Bun.$`rm -rf /tmp/opencode-task-1.json /tmp/opencode-data-task-1`.quiet().nothrow();
617
631
  Bun.$`rm -rf /tmp/test/.opencode`.quiet().nothrow();
632
+ rmSync("/tmp/opencode-skills-test", { recursive: true, force: true });
618
633
  });
619
634
 
620
635
  test("session.prompt receives agent=swarm-<taskId>", async () => {
@@ -629,6 +644,28 @@ describe("OpencodeAdapter — per-task isolation (DES-300)", () => {
629
644
  expect(args.body?.agent).toBe("swarm-task-1");
630
645
  });
631
646
 
647
+ test("inlines a leading slash skill before sending prompt", async () => {
648
+ const skillDir = "/tmp/opencode-skills-test/work-on-task";
649
+ mkdirSync(skillDir, { recursive: true });
650
+ writeFileSync(join(skillDir, "SKILL.md"), "Use the task worker procedure.");
651
+ process.env.OPENCODE_SKILLS_DIR = "/tmp/opencode-skills-test";
652
+
653
+ const events: OpencodeEvent[] = [
654
+ { type: "session.idle", properties: { sessionID: "sess-abc-123" } },
655
+ ];
656
+ const cfg = testConfig({
657
+ taskId: "task-1",
658
+ prompt: "/work-on-task task-123\n\nTask body.",
659
+ });
660
+ await driveSession(events, cfg);
661
+
662
+ const args = lastPromptArgs as { body?: { parts?: Array<{ type: string; text: string }> } };
663
+ const text = args.body?.parts?.[0]?.text ?? "";
664
+ expect(text).toStartWith("Use the task worker procedure.");
665
+ expect(text).toContain("User request: task-123\n\nTask body.");
666
+ expect(text).not.toContain("/work-on-task task-123");
667
+ });
668
+
632
669
  test("createOpencode receives config with model, mcp.swarm, and permission", async () => {
633
670
  const events: OpencodeEvent[] = [
634
671
  { type: "session.idle", properties: { sessionID: "sess-abc-123" } },