@desplega.ai/agent-swarm 1.93.0 → 1.95.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +2 -2
  2. package/openapi.json +180 -1
  3. package/package.json +4 -3
  4. package/src/be/db.ts +74 -9
  5. package/src/be/migrations/090_model_tiers.sql +2 -0
  6. package/src/be/migrations/091_seed_swarm_operations_metrics.sql +12 -0
  7. package/src/be/migrations/092_metrics_dashboard_combobox_filters.sql +68 -0
  8. package/src/be/migrations/093_slack_message_tracking.sql +6 -0
  9. package/src/be/migrations/094_mcp_extra_authorize_params.sql +4 -0
  10. package/src/be/migrations/runner.ts +52 -0
  11. package/src/be/modelsdev-cache.json +2060 -198
  12. package/src/be/scripts/boot-reembed.ts +74 -0
  13. package/src/be/scripts/db.ts +19 -3
  14. package/src/be/seed/index.ts +1 -1
  15. package/src/be/seed/registry.ts +2 -2
  16. package/src/be/seed/runner.ts +5 -5
  17. package/src/be/seed/types.ts +6 -1
  18. package/src/be/seed-pricing.ts +1 -0
  19. package/src/be/seed-scripts/index.ts +3 -2
  20. package/src/be/skill-sync.ts +4 -4
  21. package/src/be/swarm-config-guard.ts +8 -0
  22. package/src/commands/provider-credentials.ts +14 -8
  23. package/src/commands/runner.ts +84 -13
  24. package/src/http/index.ts +13 -2
  25. package/src/http/mcp-oauth.ts +14 -0
  26. package/src/http/metrics.ts +55 -6
  27. package/src/http/schedules.ts +16 -15
  28. package/src/http/script-runs.ts +7 -1
  29. package/src/http/scripts.ts +147 -1
  30. package/src/http/tasks.ts +7 -0
  31. package/src/model-tiers.ts +140 -0
  32. package/src/oauth/mcp-wrapper.ts +14 -0
  33. package/src/providers/claude-managed-models.ts +9 -0
  34. package/src/providers/codex-skill-resolver.ts +22 -8
  35. package/src/providers/opencode-adapter.ts +21 -2
  36. package/src/providers/pi-mono-adapter.ts +143 -26
  37. package/src/providers/types.ts +12 -0
  38. package/src/scheduler/scheduler.ts +22 -34
  39. package/src/server-user.ts +8 -2
  40. package/src/slack/responses.ts +39 -11
  41. package/src/slack/watcher.ts +121 -8
  42. package/src/tests/agents-list-model-display.test.ts +13 -0
  43. package/src/tests/aws-error-classifier.test.ts +148 -0
  44. package/src/tests/claude-managed-adapter.test.ts +12 -0
  45. package/src/tests/context-window.test.ts +7 -0
  46. package/src/tests/credential-check.test.ts +185 -46
  47. package/src/tests/harness-provider-resolution.test.ts +23 -0
  48. package/src/tests/http-api-integration.test.ts +19 -0
  49. package/src/tests/mcp-oauth-queries.test.ts +71 -1
  50. package/src/tests/mcp-oauth-wrapper.test.ts +109 -0
  51. package/src/tests/metrics-http.test.ts +137 -3
  52. package/src/tests/migration-046-budgets.test.ts +33 -0
  53. package/src/tests/migration-runner-regressions.test.ts +69 -0
  54. package/src/tests/model-control.test.ts +162 -46
  55. package/src/tests/opencode-adapter.test.ts +38 -1
  56. package/src/tests/pi-mono-adapter.test.ts +319 -0
  57. package/src/tests/provider-command-format.test.ts +12 -0
  58. package/src/tests/providers/pi-cost.test.ts +9 -0
  59. package/src/tests/runner-fallback-output.test.ts +50 -0
  60. package/src/tests/scripts-boot-reembed.test.ts +163 -0
  61. package/src/tests/scripts-embeddings.test.ts +90 -0
  62. package/src/tests/seed.test.ts +26 -1
  63. package/src/tests/session-costs-model-key-normalize.test.ts +2 -0
  64. package/src/tests/skill-fs-writer.test.ts +7 -1
  65. package/src/tests/skill-sync.test.ts +15 -3
  66. package/src/tests/slack-watcher.test.ts +66 -0
  67. package/src/tests/workflow-agent-task.test.ts +5 -2
  68. package/src/tests/workflow-validation-port-routing.test.ts +181 -0
  69. package/src/tools/mcp-servers/mcp-server-create.ts +7 -0
  70. package/src/tools/mcp-servers/mcp-server-update.ts +8 -0
  71. package/src/tools/memory-get.ts +11 -0
  72. package/src/tools/memory-search.ts +18 -0
  73. package/src/tools/schedules/create-schedule.ts +71 -70
  74. package/src/tools/schedules/update-schedule.ts +43 -31
  75. package/src/tools/send-task.ts +16 -5
  76. package/src/tools/task-action.ts +11 -3
  77. package/src/types.ts +30 -0
  78. package/src/utils/aws-error-classifier.ts +97 -0
  79. package/src/utils/context-window.ts +2 -0
  80. package/src/utils/credentials.test.ts +68 -0
  81. package/src/utils/credentials.ts +44 -3
  82. package/src/utils/pretty-print.ts +25 -10
  83. package/src/utils/skill-fs-writer.ts +11 -3
  84. package/src/workflows/engine.ts +3 -2
  85. package/src/workflows/executors/agent-task.ts +3 -1
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Post-listen backfill: embed scripts that are missing embeddings (e.g. after
3
+ * boot seeding with scriptEmbeddingMode: "skip"). Runs once per boot,
4
+ * async/non-blocking, idempotent, no-op when every non-scratch script already
5
+ * has an embedding row.
6
+ *
7
+ * Mirrors the memory boot-reembed pattern (src/be/memory/boot-reembed.ts).
8
+ */
9
+
10
+ import { getDb } from "@/be/db";
11
+ import type { ScriptScope } from "@/types";
12
+ import { embedScript } from "./embeddings";
13
+
14
+ type ScriptMissingEmbedding = {
15
+ id: string;
16
+ name: string;
17
+ scope: ScriptScope;
18
+ scopeId: string | null;
19
+ source: string;
20
+ description: string;
21
+ intent: string;
22
+ signatureJson: string;
23
+ argsJsonSchema: string | null;
24
+ contentHash: string;
25
+ version: number;
26
+ isScratch: number;
27
+ typeChecked: number;
28
+ fsMode: "none" | "workspace-rw";
29
+ createdByAgentId: string | null;
30
+ createdAt: string;
31
+ updatedAt: string;
32
+ };
33
+
34
+ export async function runBootReembedScripts(): Promise<void> {
35
+ const db = getDb();
36
+
37
+ const missing = db
38
+ .prepare<ScriptMissingEmbedding, []>(
39
+ `SELECT s.* FROM scripts s
40
+ LEFT JOIN script_embeddings e ON e.scriptId = s.id
41
+ WHERE s.isScratch = 0 AND e.scriptId IS NULL`,
42
+ )
43
+ .all();
44
+
45
+ if (missing.length === 0) {
46
+ return;
47
+ }
48
+
49
+ console.log(`[boot-reembed-scripts] starting: ${missing.length} scripts missing embeddings`);
50
+
51
+ let embedded = 0;
52
+ let failed = 0;
53
+
54
+ for (const row of missing) {
55
+ try {
56
+ await embedScript({
57
+ ...row,
58
+ scopeId: row.scopeId ?? null,
59
+ isScratch: row.isScratch === 1,
60
+ typeChecked: row.typeChecked === 1,
61
+ createdByAgentId: row.createdByAgentId ?? null,
62
+ });
63
+ embedded++;
64
+ } catch (err) {
65
+ failed++;
66
+ console.error(
67
+ `[boot-reembed-scripts] failed to embed "${row.name}":`,
68
+ (err as Error).message,
69
+ );
70
+ }
71
+ }
72
+
73
+ console.log(`[boot-reembed-scripts] complete: embedded=${embedded} failed=${failed}`);
74
+ }
@@ -26,6 +26,7 @@ type ScriptWriteArgs = ScriptIdentity & {
26
26
  fsMode?: ScriptFsMode;
27
27
  agentId?: string | null;
28
28
  changeReason?: string | null;
29
+ embeddingMode?: "sync" | "skip";
29
30
  };
30
31
 
31
32
  export type UpsertScriptResult = {
@@ -178,10 +179,11 @@ export function insertScript(args: ScriptWriteArgs): ScriptRecord {
178
179
  * immediately consistent for authored/promoted scripts.
179
180
  */
180
181
  export async function upsertScriptByName(args: ScriptWriteArgs): Promise<UpsertScriptResult> {
182
+ const shouldEmbed = args.embeddingMode !== "skip";
181
183
  const existing = getScript(args);
182
184
  if (!existing) {
183
185
  const script = insertScript(args);
184
- if (!script.isScratch) {
186
+ if (!script.isScratch && shouldEmbed) {
185
187
  await embedScript(script);
186
188
  }
187
189
  return {
@@ -235,7 +237,7 @@ export async function upsertScriptByName(args: ScriptWriteArgs): Promise<UpsertS
235
237
 
236
238
  if (!row) throw new Error("Failed to update script metadata");
237
239
  const script = rowToScript(row);
238
- if (!script.isScratch && (trackedMetadataChanged || promotedFromScratch)) {
240
+ if (!script.isScratch && shouldEmbed && (trackedMetadataChanged || promotedFromScratch)) {
239
241
  await embedScript(script);
240
242
  }
241
243
  return {
@@ -318,7 +320,7 @@ export async function upsertScriptByName(args: ScriptWriteArgs): Promise<UpsertS
318
320
  });
319
321
 
320
322
  const script = txn();
321
- if (!script.isScratch) {
323
+ if (!script.isScratch && shouldEmbed) {
322
324
  await embedScript(script);
323
325
  }
324
326
 
@@ -347,6 +349,11 @@ export function getScript(args: ScriptIdentity): ScriptRecord | null {
347
349
  return row ? rowToScript(row) : null;
348
350
  }
349
351
 
352
+ export function getScriptById(id: string): ScriptRecord | null {
353
+ const row = getDb().prepare<ScriptRow, [string]>("SELECT * FROM scripts WHERE id = ?").get(id);
354
+ return row ? rowToScript(row) : null;
355
+ }
356
+
350
357
  export function getScriptVersion(args: {
351
358
  scriptId: string;
352
359
  version?: number;
@@ -408,6 +415,15 @@ export function listScripts(args?: {
408
415
  .map(rowToScript);
409
416
  }
410
417
 
418
+ export function listScriptVersions(scriptId: string): ScriptVersionRecord[] {
419
+ return getDb()
420
+ .prepare<ScriptVersionRow, [string]>(
421
+ "SELECT * FROM script_versions WHERE scriptId = ? ORDER BY version DESC",
422
+ )
423
+ .all(scriptId)
424
+ .map(rowToScriptVersion);
425
+ }
426
+
411
427
  export function deleteScript(args: ScriptIdentity): boolean {
412
428
  const existing = getScript(args);
413
429
  if (!existing) return false;
@@ -6,4 +6,4 @@
6
6
  export { runAllSeeders, SEEDERS } from "./registry";
7
7
  export { runSeeder, runSeeders } from "./runner";
8
8
  export { getSeedState, recordSeedState } from "./state-db";
9
- export type { SeedAction, Seeder, SeederResult, SeedItem } from "./types";
9
+ export type { SeedAction, Seeder, SeederResult, SeederRunOptions, SeedItem } from "./types";
@@ -9,11 +9,11 @@
9
9
  import { scriptsSeeder } from "../seed-scripts";
10
10
  import { skillsSeeder } from "../seed-skills";
11
11
  import { runSeeders } from "./runner";
12
- import type { Seeder, SeederResult } from "./types";
12
+ import type { Seeder, SeederResult, SeederRunOptions } from "./types";
13
13
 
14
14
  export const SEEDERS: Seeder[] = [scriptsSeeder, skillsSeeder];
15
15
 
16
16
  /** Apply every registered seeder. Called at API boot and by the seed CLI. */
17
- export function runAllSeeders(opts?: { quiet?: boolean }): Promise<SeederResult[]> {
17
+ export function runAllSeeders(opts?: SeederRunOptions): Promise<SeederResult[]> {
18
18
  return runSeeders(SEEDERS, opts);
19
19
  }
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  import { getSeedState, recordSeedState } from "./state-db";
8
- import type { Seeder, SeederResult } from "./types";
8
+ import type { Seeder, SeederResult, SeederRunOptions } from "./types";
9
9
 
10
10
  /**
11
11
  * Apply one seeder. Idempotent and version-aware:
@@ -14,7 +14,7 @@ import type { Seeder, SeederResult } from "./types";
14
14
  * - upstream pristine, src same -> no-op
15
15
  * - upstream user-modified -> preserve (never overwrite)
16
16
  */
17
- export async function runSeeder(seeder: Seeder, opts?: { quiet?: boolean }): Promise<SeederResult> {
17
+ export async function runSeeder(seeder: Seeder, opts?: SeederRunOptions): Promise<SeederResult> {
18
18
  const result: SeederResult = {
19
19
  kind: seeder.kind,
20
20
  created: 0,
@@ -31,7 +31,7 @@ export async function runSeeder(seeder: Seeder, opts?: { quiet?: boolean }): Pro
31
31
 
32
32
  // Absent upstream -> create.
33
33
  if (upstream === null) {
34
- await seeder.apply(item, "create");
34
+ await seeder.apply(item, "create", opts);
35
35
  recordSeedState(seeder.kind, item.key, item.contentHash);
36
36
  result.created += 1;
37
37
  continue;
@@ -60,7 +60,7 @@ export async function runSeeder(seeder: Seeder, opts?: { quiet?: boolean }): Pro
60
60
  }
61
61
 
62
62
  // Pristine upstream + changed source -> update to the new source version.
63
- await seeder.apply(item, "update");
63
+ await seeder.apply(item, "update", opts);
64
64
  recordSeedState(seeder.kind, item.key, item.contentHash);
65
65
  result.updated += 1;
66
66
  } catch (err) {
@@ -88,7 +88,7 @@ export async function runSeeder(seeder: Seeder, opts?: { quiet?: boolean }): Pro
88
88
  /** Apply a list of seeders in order. */
89
89
  export async function runSeeders(
90
90
  seeders: Seeder[],
91
- opts?: { quiet?: boolean },
91
+ opts?: SeederRunOptions,
92
92
  ): Promise<SeederResult[]> {
93
93
  const results: SeederResult[] = [];
94
94
  for (const seeder of seeders) {
@@ -35,6 +35,11 @@ export interface SeedItem {
35
35
  readonly contentHash: string;
36
36
  }
37
37
 
38
+ export type SeederRunOptions = {
39
+ quiet?: boolean;
40
+ scriptEmbeddingMode?: "sync" | "skip";
41
+ };
42
+
38
43
  export interface Seeder<TItem extends SeedItem = SeedItem> {
39
44
  /** Kind discriminator — namespaces this seeder's rows in `seed_state`. */
40
45
  readonly kind: string;
@@ -46,7 +51,7 @@ export interface Seeder<TItem extends SeedItem = SeedItem> {
46
51
  */
47
52
  upstreamHash(item: TItem): string | null | Promise<string | null>;
48
53
  /** Create or update the upstream entity so it matches the source definition. */
49
- apply(item: TItem, action: "create" | "update"): void | Promise<void>;
54
+ apply(item: TItem, action: "create" | "update", opts?: SeederRunOptions): void | Promise<void>;
50
55
  }
51
56
 
52
57
  export type SeederResult = {
@@ -68,6 +68,7 @@ const MANUAL_PRICING_OVERRIDES: Array<{
68
68
  */
69
69
  const ANTHROPIC_SHORTNAME_TO_MODELSDEV: Record<string, string> = {
70
70
  fable: "claude-fable-5",
71
+ mythos: "claude-mythos-5",
71
72
  opus: "claude-opus-4-8",
72
73
  sonnet: "claude-sonnet-4-6",
73
74
  haiku: "claude-haiku-4-5",
@@ -21,7 +21,7 @@ import { computeContentHash } from "../db";
21
21
  import { getScript, upsertScriptByName } from "../scripts/db";
22
22
  import { extractArgsJsonSchema } from "../scripts/extract-schema";
23
23
  import { typecheckScript } from "../scripts/typecheck";
24
- import type { Seeder, SeedItem } from "../seed/types";
24
+ import type { Seeder, SeederRunOptions, SeedItem } from "../seed/types";
25
25
  import bootTriageSrc from "./catalog/boot-triage.inline.ts" with { type: "text" };
26
26
  // @ts-expect-error Bun text imports synthesize a default string for this helper.
27
27
  import catalogReportSrc from "./catalog/catalog-report.inline.ts" with { type: "text" };
@@ -234,7 +234,7 @@ export const scriptsSeeder: Seeder<ScriptSeedItem> = {
234
234
  return existing ? existing.contentHash : null;
235
235
  },
236
236
 
237
- async apply(item): Promise<void> {
237
+ async apply(item, _action, opts?: SeederRunOptions): Promise<void> {
238
238
  const { script } = item;
239
239
 
240
240
  const imports = validateScriptImports(script.source);
@@ -260,6 +260,7 @@ export const scriptsSeeder: Seeder<ScriptSeedItem> = {
260
260
  isScratch: false,
261
261
  typeChecked: true,
262
262
  changeReason: "Seeded from the built-in scripts catalog (src/be/seed-scripts)",
263
+ embeddingMode: opts?.scriptEmbeddingMode ?? "sync",
263
264
  });
264
265
  },
265
266
  };
@@ -1,9 +1,8 @@
1
1
  /**
2
2
  * Filesystem sync for skills.
3
3
  *
4
- * Writes installed skills to ~/.claude/skills/<name>/SKILL.md,
5
- * ~/.pi/agent/skills/<name>/SKILL.md, and ~/.codex/skills/<name>/SKILL.md
6
- * so Claude Code, Pi, and Codex discover them natively.
4
+ * Writes installed skills to every local harness skill tree so Claude Code,
5
+ * Pi, Codex, OpenCode, and AGENTS.md-compatible adapters can discover them.
7
6
  *
8
7
  * This runs on the API side — workers call it via POST /api/skills/sync-filesystem.
9
8
  * The actual FS write logic lives in the worker-safe src/utils/skill-fs-writer.ts
@@ -13,6 +12,7 @@
13
12
  import { homedir } from "node:os";
14
13
  import {
15
14
  type SkillFsEntry,
15
+ type SkillHarnessTarget,
16
16
  type SkillSyncResult,
17
17
  writeSkillsToFilesystem,
18
18
  } from "../utils/skill-fs-writer";
@@ -32,7 +32,7 @@ export type { SkillSyncResult };
32
32
  */
33
33
  export function syncSkillsToFilesystem(
34
34
  agentId: string,
35
- harnessType: "claude" | "pi" | "codex" | "all" = "all",
35
+ harnessType: SkillHarnessTarget = "all",
36
36
  homeOverride?: string,
37
37
  ): SkillSyncResult {
38
38
  const skills = getAgentSkills(agentId);
@@ -58,6 +58,14 @@ const VALIDATED_KEYS: Record<string, (value: unknown) => string | null> = {
58
58
  if (["true", "false", "1", "0"].includes(normalized)) return null;
59
59
  return "Invalid SWARM_USE_CLAUDE_BRIDGE value (must be one of: true, false, 1, 0)";
60
60
  },
61
+ // AWS credential mode for the Bedrock path on the pi harness.
62
+ // sdk — AWS SDK default credential chain (env, ~/.aws/*, SSO, IMDS, …)
63
+ // bearer — explicit bearer token via AWS_BEARER_TOKEN_BEDROCK (future/Mantle)
64
+ // When absent the worker infers the mode from MODEL_OVERRIDE (sdk semantics).
65
+ BEDROCK_AUTH_MODE: (value) => {
66
+ if (value === "sdk" || value === "bearer") return null;
67
+ return "Invalid BEDROCK_AUTH_MODE value (must be one of: sdk, bearer)";
68
+ },
61
69
  };
62
70
 
63
71
  export function validateConfigValue(key: string, value: unknown): string | null {
@@ -302,14 +302,20 @@ export async function validateProviderCredentials(provider: string): Promise<Liv
302
302
  }
303
303
  case "pi":
304
304
  case "opencode": {
305
- // pi-mono with MODEL_OVERRIDE=amazon-bedrock/* delegates credential
306
- // resolution to the AWS SDK default chain (env, ~/.aws/*, SSO, IMDS,
307
- // assume-role, ). pi-ai exposes no Bedrock-specific check we could
308
- // call here, and the SDK chain may issue slow IMDS network calls on
309
- // non-EC2 hosts so the live test is a presence check, mirroring the
310
- // codex-OAuth pattern above. Real validation happens at the first
311
- // Bedrock inference call.
312
- if (provider === "pi" && env.MODEL_OVERRIDE?.toLowerCase().startsWith("amazon-bedrock/")) {
305
+ // For the pi Bedrock path, the real credential check is the
306
+ // `ListFoundationModels` probe that `checkProviderCredentials` (the
307
+ // `pi` dynamic-import arm) already ran. That probe result is already
308
+ // in `buildCredStatusReport` the live-test is a pass-through / no-op
309
+ // so we never issue a second AWS SDK call here (which would drag the
310
+ // SDK into the wrong binary or make slow IMDS calls on non-EC2 hosts).
311
+ // Bedrock mode: explicit BEDROCK_AUTH_MODE=sdk OR
312
+ // absent BEDROCK_AUTH_MODE + amazon-bedrock/ MODEL_OVERRIDE prefix.
313
+ if (
314
+ provider === "pi" &&
315
+ (env.BEDROCK_AUTH_MODE?.toLowerCase() === "sdk" ||
316
+ (env.BEDROCK_AUTH_MODE === undefined &&
317
+ env.MODEL_OVERRIDE?.toLowerCase().startsWith("amazon-bedrock/")))
318
+ ) {
313
319
  return presenceCheckOk();
314
320
  }
315
321
  // Both pi-mono and opencode resolve credentials in the same order:
@@ -2,6 +2,7 @@ import { existsSync, statSync } from "node:fs";
2
2
  import { mkdir, readFile, stat, writeFile } from "node:fs/promises";
3
3
  import { ensure, initialize } from "@desplega.ai/business-use";
4
4
  import type { TemplateResponse } from "../../templates/schema.ts";
5
+ import { resolveTaskModelSelection } from "../model-tiers.ts";
5
6
  import {
6
7
  type Attributes,
7
8
  initOtel,
@@ -350,6 +351,7 @@ async function fetchResolvedEnv(
350
351
  apiKey: string,
351
352
  agentId: string,
352
353
  baseEnv: Record<string, string | undefined> = process.env,
354
+ taskModel?: string,
353
355
  ): Promise<ResolvedEnvResult> {
354
356
  const env: Record<string, string | undefined> = { ...baseEnv };
355
357
 
@@ -382,6 +384,12 @@ async function fetchResolvedEnv(
382
384
 
383
385
  const resolvedProvider = resolveHarnessProvider(env, baseEnv);
384
386
 
387
+ // Effective model: per-task model takes priority over the agent-level
388
+ // MODEL_OVERRIDE from swarm_config. Passed to resolveCredentialPools so
389
+ // the harness × model matrix can exclude incompatible credential vars
390
+ // (e.g. OPENAI_API_KEY when an OpenRouter model is selected on opencode).
391
+ const effectiveModel = taskModel || (env.MODEL_OVERRIDE as string | undefined) || "";
392
+
385
393
  const credentialSelections = await resolveCredentialPools(env, {
386
394
  apiUrl,
387
395
  apiKey,
@@ -393,6 +401,7 @@ async function fetchResolvedEnv(
393
401
  // Use the resolved provider (swarm_config > env) so an operator can flip
394
402
  // the worker's harness from the dashboard without restarting the container.
395
403
  provider: resolvedProvider,
404
+ model: effectiveModel,
396
405
  });
397
406
 
398
407
  return { env, credentialSelections, resolvedProvider };
@@ -427,6 +436,7 @@ const RELOADABLE_ENV_KEYS: ReadonlySet<string> = new Set([
427
436
  "MODEL_OVERRIDE",
428
437
  "AGENT_FS_SHARED_ORG_ID",
429
438
  "SWARM_USE_CLAUDE_BRIDGE",
439
+ "BEDROCK_AUTH_MODE",
430
440
  ]);
431
441
 
432
442
  /**
@@ -867,6 +877,7 @@ export async function ensureTaskFinished(
867
877
  * from the resolved swarm_config value. Falls back to env when omitted.
868
878
  */
869
879
  provider?: ProviderName,
880
+ failureDiagnostics?: string,
870
881
  ): Promise<void> {
871
882
  const headers: Record<string, string> = {
872
883
  "X-Agent-ID": config.agentId,
@@ -883,6 +894,9 @@ export async function ensureTaskFinished(
883
894
 
884
895
  if (status === "failed") {
885
896
  body.failureReason = failureReason || `Claude process exited with code ${exitCode}`;
897
+ if (failureDiagnostics) {
898
+ body.failureReason = `${body.failureReason}\n\n${failureDiagnostics}`;
899
+ }
886
900
  } else if (providerOutput) {
887
901
  const validation = await validateProviderOutputIfNeeded(config, taskId, providerOutput);
888
902
  if (validation.ok) {
@@ -1653,6 +1667,32 @@ async function findBridgeFailureArtifact(cwd: string): Promise<string | undefine
1653
1667
  }
1654
1668
  }
1655
1669
 
1670
+ async function readBridgeFailureTail(
1671
+ artifactPath: string,
1672
+ maxLines = 40,
1673
+ maxChars = 4000,
1674
+ ): Promise<string | undefined> {
1675
+ try {
1676
+ const text = await Bun.file(artifactPath).text();
1677
+ const tail = text.split(/\r?\n/).slice(-maxLines).join("\n").trim();
1678
+ if (!tail) return undefined;
1679
+ return tail.length > maxChars ? tail.slice(-maxChars) : tail;
1680
+ } catch {
1681
+ return undefined;
1682
+ }
1683
+ }
1684
+
1685
+ export async function getBridgeFailureDiagnostics(
1686
+ cwd: string,
1687
+ ): Promise<{ artifactPath: string; paneTail?: string } | undefined> {
1688
+ const artifactPath = await findBridgeFailureArtifact(cwd);
1689
+ if (!artifactPath) return undefined;
1690
+ return {
1691
+ artifactPath,
1692
+ paneTail: await readBridgeFailureTail(artifactPath),
1693
+ };
1694
+ }
1695
+
1656
1696
  async function updateHarnessVariantMeta(
1657
1697
  apiUrl: string,
1658
1698
  apiKey: string,
@@ -2519,6 +2559,7 @@ async function spawnProviderProcess(
2519
2559
  iteration: number;
2520
2560
  taskId?: string;
2521
2561
  model?: string;
2562
+ modelTier?: string;
2522
2563
  resumeSessionId?: string;
2523
2564
  harnessProvider: ProviderName;
2524
2565
  cwd?: string;
@@ -2532,11 +2573,15 @@ async function spawnProviderProcess(
2532
2573
  // Correlation ID for logs/display — always defined
2533
2574
  const effectiveTaskId = realTaskId || crypto.randomUUID();
2534
2575
 
2535
- // Resolve env first so we can use MODEL_OVERRIDE from config
2576
+ // Resolve env first so we can use MODEL_OVERRIDE from config.
2577
+ // Pass opts.model (per-task model) so the credential picker can apply
2578
+ // the harness × model matrix (e.g. exclude OPENAI_API_KEY for OpenRouter models).
2536
2579
  const { env: freshEnv, credentialSelections } = await fetchResolvedEnv(
2537
2580
  opts.apiUrl,
2538
2581
  opts.apiKey,
2539
2582
  opts.agentId,
2583
+ process.env,
2584
+ opts.model,
2540
2585
  );
2541
2586
 
2542
2587
  // Report which key was selected for this task (fire-and-forget)
@@ -2553,7 +2598,14 @@ async function spawnProviderProcess(
2553
2598
  }
2554
2599
 
2555
2600
  const configModel = (freshEnv.MODEL_OVERRIDE as string | undefined) || "";
2556
- const model = opts.model || configModel || "";
2601
+ const taskModelSelection = resolveTaskModelSelection({
2602
+ model: opts.model,
2603
+ modelTier: opts.modelTier,
2604
+ harnessProvider: opts.harnessProvider,
2605
+ env: freshEnv,
2606
+ });
2607
+ const taskModel = taskModelSelection.model || "";
2608
+ const model = taskModel || configModel || "";
2557
2609
 
2558
2610
  // Resolve Codex OAuth pool slot BEFORE building ProviderSessionConfig so we
2559
2611
  // can pass codexSlot through and the adapter writes token refreshes back to
@@ -2644,7 +2696,7 @@ async function spawnProviderProcess(
2644
2696
  );
2645
2697
  const initialModelReport = buildLatestModelReport({
2646
2698
  model,
2647
- taskModel: opts.model,
2699
+ taskModel,
2648
2700
  configModel,
2649
2701
  taskId: realTaskId,
2650
2702
  harnessProvider: opts.harnessProvider,
@@ -2766,6 +2818,17 @@ async function spawnProviderProcess(
2766
2818
  );
2767
2819
  }
2768
2820
 
2821
+ // Structured session-start log for observability (covers all providers)
2822
+ {
2823
+ const variant = event.harnessVariant ?? "unknown";
2824
+ const version =
2825
+ (event.harnessVariantMeta as Record<string, unknown> | undefined)?.version ??
2826
+ "unknown";
2827
+ console.log(
2828
+ `[${opts.role}] [harness] provider=${event.provider ?? opts.harnessProvider} variant=${variant} version=${version} model=${model || "default"}`,
2829
+ );
2830
+ }
2831
+
2769
2832
  // Buffer session start event
2770
2833
  bufferEvent({
2771
2834
  category: "session",
@@ -3342,6 +3405,20 @@ async function checkCompletedProcesses(
3342
3405
  rateLimitedUntil,
3343
3406
  ).catch(() => {});
3344
3407
  }
3408
+ let bridgeDiagnostics: Awaited<ReturnType<typeof getBridgeFailureDiagnostics>> | undefined;
3409
+ if (result.exitCode !== 0 && harnessProvider === "claude" && workingDir) {
3410
+ bridgeDiagnostics = await getBridgeFailureDiagnostics(workingDir);
3411
+ if (bridgeDiagnostics?.artifactPath && result.sessionId) {
3412
+ console.log(`[${role}] Bridge failure artifact found: ${bridgeDiagnostics.artifactPath}`);
3413
+ updateHarnessVariantMeta(apiConfig.apiUrl, apiConfig.apiKey, taskId, result.sessionId, {
3414
+ failureArtifact: bridgeDiagnostics.artifactPath,
3415
+ }).catch((err) => console.warn(`[runner] Failed to update harness variant meta: ${err}`));
3416
+ }
3417
+ }
3418
+ const bridgeFailureDiagnostics =
3419
+ bridgeDiagnostics?.paneTail != null
3420
+ ? `Claude bridge final tmux pane tail (${bridgeDiagnostics.artifactPath}):\n${bridgeDiagnostics.paneTail}`
3421
+ : undefined;
3345
3422
  await ensureTaskFinished(
3346
3423
  apiConfig,
3347
3424
  role,
@@ -3350,6 +3427,7 @@ async function checkCompletedProcesses(
3350
3427
  failureReason,
3351
3428
  result.output,
3352
3429
  harnessProvider,
3430
+ bridgeFailureDiagnostics,
3353
3431
  );
3354
3432
 
3355
3433
  if (result.exitCode === 0 && credentialInfo) {
@@ -3361,16 +3439,6 @@ async function checkCompletedProcesses(
3361
3439
  ).catch(() => {});
3362
3440
  }
3363
3441
 
3364
- if (result.exitCode !== 0 && harnessProvider === "claude" && workingDir && result.sessionId) {
3365
- const artifactPath = await findBridgeFailureArtifact(workingDir);
3366
- if (artifactPath) {
3367
- console.log(`[${role}] Bridge failure artifact found: ${artifactPath}`);
3368
- updateHarnessVariantMeta(apiConfig.apiUrl, apiConfig.apiKey, taskId, result.sessionId, {
3369
- failureArtifact: artifactPath,
3370
- }).catch((err) => console.warn(`[runner] Failed to update harness variant meta: ${err}`));
3371
- }
3372
- }
3373
-
3374
3442
  ensure({
3375
3443
  id: "worker_process_finished",
3376
3444
  flow: "task",
@@ -4391,6 +4459,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
4391
4459
  iteration,
4392
4460
  taskId: task.id,
4393
4461
  model: (task as { model?: string }).model,
4462
+ modelTier: (task as { modelTier?: string }).modelTier,
4394
4463
  harnessProvider: state.harnessProvider,
4395
4464
  cwd: resumeCwd,
4396
4465
  vcsRepo: task.vcsRepo,
@@ -4710,6 +4779,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
4710
4779
 
4711
4780
  // Extract model from task data for per-task model selection
4712
4781
  const taskModel = (trigger.task as { model?: string } | undefined)?.model;
4782
+ const taskModelTier = (trigger.task as { modelTier?: string } | undefined)?.modelTier;
4713
4783
 
4714
4784
  // Detect Slack context for conditional prompt sections
4715
4785
  const taskSlackChannelId = (trigger.task as { slackChannelId?: string } | undefined)
@@ -4852,6 +4922,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
4852
4922
  iteration,
4853
4923
  taskId: trigger.taskId,
4854
4924
  model: taskModel,
4925
+ modelTier: taskModelTier,
4855
4926
  harnessProvider: state.harnessProvider,
4856
4927
  cwd: effectiveCwd,
4857
4928
  vcsRepo: taskVcsRepo,
package/src/http/index.ts CHANGED
@@ -458,10 +458,12 @@ try {
458
458
  // Seed the built-in entity catalog (scripts today; more kinds later) so
459
459
  // `script-search` & co. return useful hits from a fresh DB. Idempotent and
460
460
  // version-aware: a pristine entity updates when its source changes, a
461
- // user-modified one is preserved. See src/be/seed for the framework.
461
+ // user-modified one is preserved. Script embeddings are deferred to a
462
+ // post-listen backfill so boot doesn't block on embedding provider calls.
463
+ // See src/be/seed for the framework.
462
464
  try {
463
465
  const { runAllSeeders } = await import("../be/seed");
464
- await runAllSeeders();
466
+ await runAllSeeders({ scriptEmbeddingMode: "skip" });
465
467
  } catch (err) {
466
468
  console.error("[startup] Failed to seed built-in entities:", err);
467
469
  }
@@ -565,6 +567,15 @@ httpServer
565
567
  .catch((err) => {
566
568
  console.error("[boot-reembed] startup backfill failed (non-fatal):", err);
567
569
  });
570
+
571
+ // Background backfill: embed any scripts that were seeded without embeddings
572
+ // (scriptEmbeddingMode: "skip" during boot). Non-blocking, idempotent, no-op
573
+ // when every non-scratch script already has an embedding.
574
+ import("../be/scripts/boot-reembed")
575
+ .then(({ runBootReembedScripts }) => runBootReembedScripts())
576
+ .catch((err) => {
577
+ console.error("[boot-reembed-scripts] startup backfill failed (non-fatal):", err);
578
+ });
568
579
  })
569
580
  .on("error", (err) => {
570
581
  console.error("HTTP Server Error:", err);
@@ -362,6 +362,19 @@ async function prepareAuthorizeFlow(
362
362
 
363
363
  const scopes = q.scopes ? splitScopes(q.scopes) : client.scopes;
364
364
 
365
+ let extraParams: Record<string, string> | undefined;
366
+ if (server.extraAuthorizeParams) {
367
+ try {
368
+ const parsed = JSON.parse(server.extraAuthorizeParams);
369
+ if (parsed && typeof parsed === "object") {
370
+ extraParams = Object.fromEntries(Object.entries(parsed).map(([k, v]) => [k, String(v)]));
371
+ }
372
+ } catch {
373
+ // Malformed config must never break the authorize flow — log + ignore.
374
+ console.warn(`[mcp-oauth] Ignoring malformed extraAuthorizeParams for server ${mcpServerId}`);
375
+ }
376
+ }
377
+
365
378
  const built = await buildAuthorizeUrl({
366
379
  authorizeUrl: client.authorizeUrl,
367
380
  tokenUrl: client.tokenUrl,
@@ -369,6 +382,7 @@ async function prepareAuthorizeFlow(
369
382
  redirectUri: callbackRedirectUri(),
370
383
  scopes,
371
384
  resource: client.resourceUrl,
385
+ extraParams,
372
386
  });
373
387
 
374
388
  insertMcpOAuthPending({