npm - @desplega.ai/agent-swarm - Versions diffs - 1.74.4 → 1.76.0 - Mend

@desplega.ai/agent-swarm 1.74.4 → 1.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/README.md +1 -1
package/openapi.json +1264 -46
package/package.json +2 -2
package/src/be/db.ts +563 -9
package/src/be/memory/edges-store.ts +69 -0
package/src/be/memory/providers/sqlite-store.ts +4 -0
package/src/be/memory/raters/explicit-self.ts +22 -0
package/src/be/memory/raters/implicit-citation.ts +44 -0
package/src/be/memory/raters/llm-client.ts +172 -0
package/src/be/memory/raters/llm-summarizer.ts +218 -0
package/src/be/memory/raters/llm.ts +375 -0
package/src/be/memory/raters/noop.ts +14 -0
package/src/be/memory/raters/registry.ts +86 -0
package/src/be/memory/raters/retrieval.ts +88 -0
package/src/be/memory/raters/run-server-raters.ts +97 -0
package/src/be/memory/raters/store.ts +228 -0
package/src/be/memory/raters/types.ts +101 -0
package/src/be/memory/reranker.ts +32 -2
package/src/be/memory/retrieval-store.ts +116 -0
package/src/be/memory/types.ts +3 -0
package/src/be/migrations/051_memory_posteriors_and_retrieval.sql +67 -0
package/src/be/migrations/052_memory_edges.sql +36 -0
package/src/be/migrations/053_agent_waiting_for_credentials_status.sql +61 -0
package/src/be/migrations/054_agent_harness_provider.sql +21 -0
package/src/be/migrations/055_agent_cred_status.sql +15 -0
package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
package/src/be/migrations/057_inbox_item_state.sql +27 -0
package/src/be/migrations/058_task_templates.sql +31 -0
package/src/be/swarm-config-guard.ts +24 -0
package/src/commands/credential-wait.ts +186 -0
package/src/commands/provider-credentials.ts +434 -0
package/src/commands/runner.ts +253 -21
package/src/hooks/hook.ts +143 -66
package/src/http/agents.ts +191 -1
package/src/http/config.ts +11 -1
package/src/http/core.ts +5 -0
package/src/http/inbox-state.ts +89 -0
package/src/http/index.ts +10 -0
package/src/http/memory.ts +230 -1
package/src/http/sessions.ts +86 -0
package/src/http/status.ts +665 -0
package/src/http/task-templates.ts +51 -0
package/src/http/tasks.ts +85 -5
package/src/http/users.ts +134 -0
package/src/prompts/memories.ts +62 -0
package/src/providers/claude-adapter.ts +22 -0
package/src/providers/claude-managed-adapter.ts +24 -0
package/src/providers/codex-adapter.ts +43 -1
package/src/providers/devin-adapter.ts +18 -0
package/src/providers/index.ts +7 -0
package/src/providers/opencode-adapter.ts +60 -0
package/src/providers/pi-mono-adapter.ts +71 -0
package/src/providers/types.ts +34 -0
package/src/server.ts +2 -0
package/src/slack/handlers.ts +0 -1
package/src/tests/agents-harness-provider.test.ts +333 -0
package/src/tests/credential-check.test.ts +367 -0
package/src/tests/credential-status-api.test.ts +223 -0
package/src/tests/credential-status-routing.test.ts +150 -0
package/src/tests/credential-wait.test.ts +282 -0
package/src/tests/harness-provider-resolution.test.ts +242 -0
package/src/tests/jira-sync.test.ts +1 -1
package/src/tests/memory-edges.test.ts +722 -0
package/src/tests/memory-rate-endpoint.test.ts +330 -0
package/src/tests/memory-rate-tool.test.ts +252 -0
package/src/tests/memory-rater-e2e.test.ts +578 -0
package/src/tests/memory-rater-implicit-citation.test.ts +304 -0
package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
package/src/tests/memory-rater-llm.test.ts +964 -0
package/src/tests/memory-rater-store.test.ts +249 -0
package/src/tests/memory-reranker.test.ts +161 -2
package/src/tests/migration-runner-regressions.test.ts +17 -2
package/src/tests/mocks/mock-llm-rater-client.ts +35 -0
package/src/tests/run-server-raters.test.ts +291 -0
package/src/tests/sessions.test.ts +141 -0
package/src/tests/status.test.ts +843 -0
package/src/tests/stop-hook-task-resolution.test.ts +98 -0
package/src/tests/template-recommendations.test.ts +148 -0
package/src/tests/tool-annotations.test.ts +2 -2
package/src/tests/use-dismissible-card.test.ts +140 -0
package/src/tools/memory-rate.ts +166 -0
package/src/tools/memory-search.ts +18 -0
package/src/tools/store-progress.ts +37 -0
package/src/tools/swarm-config/set-config.ts +17 -1
package/src/tools/tool-config.ts +1 -0
package/src/types.ts +122 -1
package/src/utils/harness-provider.ts +32 -0
package/tsconfig.json +0 -2

package/src/be/migrations/054_agent_harness_provider.sql ADDED Viewed

@@ -0,0 +1,21 @@
+-- 054_agent_harness_provider.sql
+--
+-- Phase 1.5 of the cloud-personalization plan
+-- (thoughts/taras/plans/2026-05-08-cloud-personalization-phases-1-4.md).
+--
+-- Add a first-class `harness_provider` column on `agents` so each agent's
+-- harness (claude / codex / pi / devin / claude-managed / opencode) is
+-- queryable per-row, independent of `process.env.HARNESS_PROVIDER` at
+-- worker boot.
+--
+-- Workers push their `HARNESS_PROVIDER` value on registration; an operator
+-- can later re-assign via `PATCH /api/agents/:id/harness-provider`. The
+-- worker itself does NOT yet react in real time — picked up on next worker
+-- restart. Full per-agent harness with dynamic adapter loading lives in
+-- Linear DES-359.
+--
+-- Forward-only. NULL default = backward-compat for already-registered
+-- agents (their column stays NULL until they re-register or an operator
+-- patches it).
+ALTER TABLE agents ADD COLUMN harness_provider TEXT NULL;

package/src/be/migrations/055_agent_cred_status.sql ADDED Viewed

@@ -0,0 +1,15 @@
+-- 055_agent_cred_status.sql
+--
+-- Worker-self-reported credential snapshot. Pairs with `harness_provider`
+-- (054): the JSON describes the agent's creds for whichever harness that
+-- agent runs. NULL = unreported (worker hasn't booted yet, or
+-- CRED_CHECK_DISABLE=1 was set).
+--
+-- The existing `credentialMissing` column (053) stays. This one is additive
+-- and carries the full snapshot (ready, missing, satisfiedBy, hint,
+-- liveTest, reportedAt, reportKind). Once `cred_status.missing` is proven
+-- across deploys, `credentialMissing` can be retired in a later migration.
+--
+-- Forward-only.
+ALTER TABLE agents ADD COLUMN cred_status TEXT;

package/src/be/migrations/056_drop_agent_tasks_source_check.sql ADDED Viewed

@@ -0,0 +1,139 @@
+-- Drop the SQL CHECK constraint on agent_tasks.source.
+-- The Zod layer (`AgentTaskSourceSchema` in src/types.ts) is now the single
+-- source of truth for the allowed enum, so adding a new source no longer
+-- requires a forward-only migration. This makes future source additions
+-- (Phase 1 of the UI chat/session experience plan) cheap.
+--
+-- SQLite cannot ALTER a CHECK constraint in place; we follow the table-rebuild
+-- pattern from migration 043_jira_source.sql verbatim, minus the CHECK clause
+-- on `source`. All other columns, defaults, indexes, and FKs are preserved
+-- exactly. No data migration — existing rows remain valid.
+--
+-- INSERT uses an explicit column list (no `SELECT *`) to be robust against
+-- column-order drift between SQLite versions and against post-043 ALTERs
+-- (migration 044 added `provider` and `providerMeta`).
+PRAGMA foreign_keys=off;
+CREATE TABLE agent_tasks_new (
+  id TEXT PRIMARY KEY,
+  agentId TEXT,
+  creatorAgentId TEXT,
+  task TEXT NOT NULL,
+  status TEXT NOT NULL DEFAULT 'pending',
+  source TEXT NOT NULL DEFAULT 'mcp',
+  taskType TEXT,
+  tags TEXT DEFAULT '[]',
+  priority INTEGER DEFAULT 50,
+  dependsOn TEXT DEFAULT '[]',
+  offeredTo TEXT,
+  offeredAt TEXT,
+  acceptedAt TEXT,
+  rejectionReason TEXT,
+  slackChannelId TEXT,
+  slackThreadTs TEXT,
+  slackUserId TEXT,
+  mentionMessageId TEXT,
+  mentionChannelId TEXT,
+  vcsProvider TEXT,
+  vcsRepo TEXT,
+  vcsEventType TEXT,
+  vcsNumber INTEGER,
+  vcsCommentId INTEGER,
+  vcsAuthor TEXT,
+  vcsUrl TEXT,
+  parentTaskId TEXT,
+  claudeSessionId TEXT,
+  agentmailInboxId TEXT,
+  agentmailMessageId TEXT,
+  agentmailThreadId TEXT,
+  model TEXT,
+  scheduleId TEXT,
+  workflowRunId TEXT REFERENCES workflow_runs(id),
+  workflowRunStepId TEXT REFERENCES workflow_run_steps(id),
+  createdAt TEXT NOT NULL,
+  lastUpdatedAt TEXT NOT NULL,
+  finishedAt TEXT,
+  failureReason TEXT,
+  output TEXT,
+  progress TEXT,
+  notifiedAt TEXT,
+  dir TEXT,
+  outputSchema TEXT,
+  compactionCount INTEGER DEFAULT 0,
+  peakContextPercent REAL,
+  totalContextTokensUsed INTEGER,
+  contextWindowSize INTEGER,
+  was_paused INTEGER NOT NULL DEFAULT 0,
+  credentialKeySuffix TEXT,
+  credentialKeyType TEXT,
+  requestedByUserId TEXT REFERENCES users(id),
+  vcsInstallationId INTEGER,
+  vcsNodeId TEXT,
+  slackReplySent INTEGER DEFAULT 0,
+  swarmVersion TEXT,
+  contextKey TEXT,
+  provider TEXT,
+  providerMeta TEXT
+);
+INSERT INTO agent_tasks_new (
+  id, agentId, creatorAgentId, task, status, source, taskType, tags,
+  priority, dependsOn, offeredTo, offeredAt, acceptedAt, rejectionReason,
+  slackChannelId, slackThreadTs, slackUserId,
+  mentionMessageId, mentionChannelId,
+  vcsProvider, vcsRepo, vcsEventType, vcsNumber, vcsCommentId, vcsAuthor, vcsUrl,
+  parentTaskId, claudeSessionId,
+  agentmailInboxId, agentmailMessageId, agentmailThreadId,
+  model, scheduleId, workflowRunId, workflowRunStepId,
+  createdAt, lastUpdatedAt, finishedAt, failureReason, output, progress, notifiedAt,
+  dir, outputSchema, compactionCount, peakContextPercent,
+  totalContextTokensUsed, contextWindowSize, was_paused,
+  credentialKeySuffix, credentialKeyType, requestedByUserId,
+  vcsInstallationId, vcsNodeId, slackReplySent, swarmVersion, contextKey,
+  provider, providerMeta
+)
+SELECT
+  id, agentId, creatorAgentId, task, status, source, taskType, tags,
+  priority, dependsOn, offeredTo, offeredAt, acceptedAt, rejectionReason,
+  slackChannelId, slackThreadTs, slackUserId,
+  mentionMessageId, mentionChannelId,
+  vcsProvider, vcsRepo, vcsEventType, vcsNumber, vcsCommentId, vcsAuthor, vcsUrl,
+  parentTaskId, claudeSessionId,
+  agentmailInboxId, agentmailMessageId, agentmailThreadId,
+  model, scheduleId, workflowRunId, workflowRunStepId,
+  createdAt, lastUpdatedAt, finishedAt, failureReason, output, progress, notifiedAt,
+  dir, outputSchema, compactionCount, peakContextPercent,
+  totalContextTokensUsed, contextWindowSize, was_paused,
+  credentialKeySuffix, credentialKeyType, requestedByUserId,
+  vcsInstallationId, vcsNodeId, slackReplySent, swarmVersion, contextKey,
+  provider, providerMeta
+FROM agent_tasks;
+DROP TABLE agent_tasks;
+ALTER TABLE agent_tasks_new RENAME TO agent_tasks;
+-- Recreate every index that existed on agent_tasks (mirrors 043 + later additions):
+--   001/004/006/009/026: agentId, status, offeredTo, taskType, agentmailThreadId, scheduleId, workflowRunId
+--   031: requestedByUserId (partial)
+--   034: parentTaskId
+--   037: swarmVersion
+--   040: composite (slackChannelId, slackThreadTs, status)
+--   042: contextKey + (contextKey, status) composite
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_agentId ON agent_tasks(agentId);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_status ON agent_tasks(status);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_offeredTo ON agent_tasks(offeredTo);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_taskType ON agent_tasks(taskType);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_agentmailThreadId ON agent_tasks(agentmailThreadId);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_schedule_id ON agent_tasks(scheduleId);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_workflow_run ON agent_tasks(workflowRunId);
+CREATE INDEX IF NOT EXISTS idx_tasks_requested_by ON agent_tasks(requestedByUserId) WHERE requestedByUserId IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_parentTaskId ON agent_tasks(parentTaskId);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_swarmVersion ON agent_tasks(swarmVersion);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_slack_thread
+  ON agent_tasks(slackChannelId, slackThreadTs, status);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_context_key
+  ON agent_tasks(contextKey);
+CREATE INDEX IF NOT EXISTS idx_agent_tasks_context_key_status
+  ON agent_tasks(contextKey, status);
+PRAGMA foreign_keys=on;

package/src/be/migrations/057_inbox_item_state.sql ADDED Viewed

@@ -0,0 +1,27 @@
+-- Inbox item state — per-user dismiss/snooze/done state for action-items inbox
+-- buckets (approval, credential_missing, broken_task, to_read, to_start_template).
+--
+-- itemType is enforced via Zod (`InboxItemTypeSchema` in src/types.ts), not a
+-- SQL CHECK constraint — Phase 1 lesson, lets us extend the enum without a
+-- forward-only migration. Direct SQL inserts can bypass; the HTTP layer
+-- (`PATCH /api/inbox-state`) is the only sanctioned writer.
+--
+-- itemId references the underlying entity (task id, approval-request id,
+-- agent id, template id, …) but is left as a free TEXT column rather than a
+-- typed FK because itemType disambiguates which table it points at.
+CREATE TABLE IF NOT EXISTS inbox_item_state (
+  id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
+  userId TEXT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+  itemType TEXT NOT NULL,
+  itemId TEXT NOT NULL,
+  status TEXT NOT NULL DEFAULT 'open',
+  snoozeUntil TEXT,
+  dismissedAt TEXT,
+  doneAt TEXT,
+  createdAt TEXT NOT NULL DEFAULT (datetime('now')),
+  lastUpdatedAt TEXT NOT NULL DEFAULT (datetime('now')),
+  UNIQUE(userId, itemType, itemId)
+);
+CREATE INDEX IF NOT EXISTS idx_inbox_item_state_userId_status
+  ON inbox_item_state(userId, status);

package/src/be/migrations/058_task_templates.sql ADDED Viewed

@@ -0,0 +1,31 @@
+-- Task templates — "To start" bucket starters. Polymorphic from day one
+-- (kind = 'task' | 'workflow' | 'schedule') so v2 can register workflow /
+-- schedule starters without a follow-up migration. v1 only inserts/reads
+-- kind='task' rows; the schema is shaped for v2.
+--
+-- The `prompt` column is NOT NULL only because v1 only ever seeds task rows;
+-- a future migration can relax that when workflow/schedule starters land
+-- (workflows carry workflowId in `payload`, schedules carry cron + prompt).
+--
+-- Table name kept as `task_templates` for v1 to match existing references
+-- across the plan; v2 may rename to `quick_starts` if non-task kinds graduate.
+CREATE TABLE IF NOT EXISTS task_templates (
+  id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
+  title TEXT NOT NULL,
+  description TEXT NOT NULL,
+  prompt TEXT NOT NULL,
+  kind TEXT NOT NULL DEFAULT 'task' CHECK(kind IN ('task','workflow','schedule')),
+  payload TEXT NOT NULL DEFAULT '{}',
+  category TEXT,
+  tags TEXT NOT NULL DEFAULT '[]',
+  createdAt TEXT NOT NULL DEFAULT (datetime('now'))
+);
+CREATE INDEX IF NOT EXISTS idx_task_templates_kind ON task_templates(kind);
+INSERT INTO task_templates (title, description, prompt, category, tags) VALUES
+  ('Refactor a file', 'Improve a file without changing behavior', 'Refactor the file at <path> for readability while preserving behavior. Run typecheck + tests after.', 'engineering', '["refactor"]'),
+  ('Investigate a bug', 'Reproduce, root-cause, and propose a fix', 'Investigate the following bug: <symptom>. Reproduce locally, identify the root cause, and propose a fix.', 'engineering', '["debug"]'),
+  ('Open a PR', 'Create a PR for the current branch', 'Open a PR from the current branch with a clear summary and test plan.', 'git', '["git","pr"]'),
+  ('Write tests for X', 'Cover an under-tested module', 'Write unit tests for <module>. Aim for ~80% line coverage.', 'engineering', '["test"]'),
+  ('Daily triage', 'Review failed tasks + pending approvals', 'Triage the action-items inbox: dismiss noise, escalate blockers, summarize unread sessions.', 'ops', '["triage"]');

package/src/be/swarm-config-guard.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+import { ProviderNameSchema } from "../types";
 /**
  * Guards against storing reserved keys in the swarm_config table.
  *
@@ -23,3 +25,25 @@ export function reservedKeyError(key: string): Error {
       `Set it as an environment variable instead.`,
   );
 }
+/**
+ * Per-key value validators run on `upsertSwarmConfig` writes via the HTTP
+ * config API. Use this when an invalid value would silently break workers
+ * (e.g. typo'd HARNESS_PROVIDER would fall back to "claude" with only a
+ * console.warn — the operator wouldn't see why their config was ignored).
+ *
+ * Returns a human-readable error string when the value is invalid, or
+ * `null` when the key has no validator or the value passes.
+ */
+const VALIDATED_KEYS: Record<string, (value: unknown) => string | null> = {
+  HARNESS_PROVIDER: (value) => {
+    const parsed = ProviderNameSchema.safeParse(value);
+    if (parsed.success) return null;
+    return `Invalid HARNESS_PROVIDER value (must be one of: ${ProviderNameSchema.options.join(", ")})`;
+  },
+};
+export function validateConfigValue(key: string, value: unknown): string | null {
+  const validator = VALIDATED_KEYS[key.toUpperCase()];
+  return validator ? validator(value) : null;
+}

package/src/commands/credential-wait.ts ADDED Viewed

@@ -0,0 +1,186 @@
+/**
+ * Worker-side credential wait loop.
+ *
+ * Runs once at boot, *after* the worker has registered with the API
+ * (`POST /api/agents`). While harness credentials are missing, the loop:
+ *
+ *   1. Calls `checkProviderCredentials(provider, process.env)` — if ready,
+ *      returns immediately.
+ *   2. Otherwise calls the caller-provided `refreshEnv()` (typically
+ *      `fetchResolvedEnv` from runner.ts) to pull `swarm_config` keys into
+ *      `process.env`.
+ *   3. Re-checks; if ready, returns.
+ *   4. Logs a `[boot] waiting for …` line and invokes `onTick(status)` so
+ *      callers can report state to the API.
+ *   5. Sleeps with exponential backoff (2s → 30s, cap configurable).
+ *   6. If `BOOT_MAX_WAIT_SECONDS` is set and exceeded, throws a
+ *      `BootMaxWaitExceededError` so the runner can exit with a distinct
+ *      code. Default 0 = wait forever.
+ *
+ * Why TS-level wait instead of bash-level fail-fast: workers running under
+ * `restart: unless-stopped` would otherwise loop the container forever when
+ * a credential is set via `swarm_config` after the first boot, because the
+ * entrypoint hard-exits before the process can refresh.
+ */
+import type { CredCheckOptions, CredStatus } from "../providers/types";
+import { checkProviderCredentials } from "./provider-credentials";
+/** Exit code distinct from generic failures so monitoring can distinguish
+ * "config never arrived" from worker process crashes. Matches sysexits(3)'s
+ * `EX_CONFIG`.
+ */
+export const EX_CONFIG = 78;
+export class BootMaxWaitExceededError extends Error {
+  constructor(
+    public readonly elapsedSeconds: number,
+    public readonly lastStatus: CredStatus,
+  ) {
+    super(
+      `Boot wait exceeded BOOT_MAX_WAIT_SECONDS (${elapsedSeconds.toFixed(1)}s). ` +
+        `Still missing: ${lastStatus.missing.join(", ") || "(unknown)"}.`,
+    );
+    this.name = "BootMaxWaitExceededError";
+  }
+}
+export interface AwaitCredentialsOptions {
+  /** Harness provider name — picks the predicate to run. */
+  provider: string;
+  /** Pull latest swarm_config values into env. Resolves to the merged env. */
+  refreshEnv: () => Promise<Record<string, string | undefined>>;
+  /** Callback invoked on every tick — Phase 3 wires this to the status-report API. */
+  onTick?: (status: CredStatus, attempt: number) => void;
+  /** Override env source (defaults to `process.env`). */
+  initialEnv?: Record<string, string | undefined>;
+  /** Sleep helper override for tests. */
+  sleep?: (ms: number) => Promise<void>;
+  /** Clock override for tests (returns ms epoch). */
+  now?: () => number;
+  /** Forwarded to `checkProviderCredentials` (file-presence injection for codex/pi/opencode). */
+  credCheckOptions?: CredCheckOptions;
+  /** Override the default backoff config (else read from env). */
+  backoff?: {
+    initialMs?: number;
+    maxMs?: number;
+    maxWaitSeconds?: number;
+  };
+  /** Logger override (defaults to console.log). */
+  log?: (line: string) => void;
+}
+interface ResolvedBackoff {
+  initialMs: number;
+  maxMs: number;
+  maxWaitSeconds: number;
+}
+function resolveBackoff(
+  override: AwaitCredentialsOptions["backoff"],
+  env: Record<string, string | undefined>,
+): ResolvedBackoff {
+  const parsePositive = (raw: string | undefined, fallback: number): number => {
+    if (!raw) return fallback;
+    const n = Number(raw);
+    return Number.isFinite(n) && n >= 0 ? n : fallback;
+  };
+  return {
+    initialMs: override?.initialMs ?? parsePositive(env.BOOT_INITIAL_BACKOFF_MS, 2000),
+    maxMs: override?.maxMs ?? parsePositive(env.BOOT_MAX_BACKOFF_MS, 30000),
+    // 0 = wait forever — the runner can override with a finite ceiling per
+    // worker if monitoring wants a "config never arrived" signal.
+    maxWaitSeconds: override?.maxWaitSeconds ?? parsePositive(env.BOOT_MAX_WAIT_SECONDS, 0),
+  };
+}
+/** Update process.env in place from a refreshed env object. */
+function applyEnvUpdates(refreshed: Record<string, string | undefined>): void {
+  for (const [key, value] of Object.entries(refreshed)) {
+    if (value === undefined) {
+      delete process.env[key];
+    } else {
+      process.env[key] = value;
+    }
+  }
+}
+/**
+ * Block until the worker's harness has its credentials.
+ *
+ * Returns the final `CredStatus` (always `ready: true`) once satisfied. The
+ * caller is then free to start the polling loop.
+ */
+export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<CredStatus> {
+  const sleep = opts.sleep ?? ((ms: number) => Bun.sleep(ms));
+  const now = opts.now ?? (() => Date.now());
+  const log = opts.log ?? ((line: string) => console.log(line));
+  const initialEnv = opts.initialEnv ?? process.env;
+  const backoff = resolveBackoff(opts.backoff, initialEnv);
+  // Fast path: already satisfied at boot.
+  let status = checkProviderCredentials(opts.provider, initialEnv, opts.credCheckOptions);
+  if (status.ready) {
+    log(`[boot] credentials ready (provider=${opts.provider}, satisfiedBy=${status.satisfiedBy})`);
+    return status;
+  }
+  const start = now();
+  let attempt = 0;
+  let delayMs = backoff.initialMs;
+  while (!status.ready) {
+    attempt += 1;
+    // Notify the caller (Phase 3 reports waiting_for_credentials to the API).
+    try {
+      opts.onTick?.(status, attempt);
+    } catch (err) {
+      // onTick failures must never break the wait loop — they're just
+      // best-effort status reporting.
+      log(`[boot] onTick error (non-fatal): ${err}`);
+    }
+    log(
+      `[boot] waiting for ${status.missing.join(", ") || "credentials"} ` +
+        `(attempt ${attempt}, retry in ${delayMs}ms)${status.hint ? ` — ${status.hint}` : ""}`,
+    );
+    await sleep(delayMs);
+    // Refresh env from swarm_config (the whole point of the loop — the
+    // server may have just been told about a credential).
+    try {
+      const refreshed = await opts.refreshEnv();
+      applyEnvUpdates(refreshed);
+    } catch (err) {
+      // Don't crash on a transient refresh failure; just retry on the next tick.
+      log(`[boot] env refresh failed (non-fatal): ${err}`);
+    }
+    status = checkProviderCredentials(opts.provider, process.env, opts.credCheckOptions);
+    if (!status.ready) {
+      // Exponential backoff with cap.
+      delayMs = Math.min(delayMs * 2, backoff.maxMs);
+      if (backoff.maxWaitSeconds > 0) {
+        const elapsedSec = (now() - start) / 1000;
+        if (elapsedSec >= backoff.maxWaitSeconds) {
+          throw new BootMaxWaitExceededError(elapsedSec, status);
+        }
+      }
+    }
+  }
+  log(
+    `[boot] credentials ready (provider=${opts.provider}, satisfiedBy=${status.satisfiedBy}, attempts=${attempt})`,
+  );
+  // Final tick so callers can clear the waiting state.
+  try {
+    opts.onTick?.(status, attempt);
+  } catch {
+    // best-effort
+  }
+  return status;
+}