npm - @runuai/host - Versions diffs - 0.1.0 - Mend

@runuai/host 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/LICENSE +21 -0
package/README.md +91 -0
package/bin/uai-host.mjs +14 -0
package/db/migrations/0000_host_tasks.sql +12 -0
package/db/migrations/0001_host_ui.sql +11 -0
package/db/migrations/0002_host_github_tokens.sql +8 -0
package/db/migrations/0003_host_ssh_keys.sql +8 -0
package/db/migrations/0004_host_owner_name.sql +1 -0
package/db/migrations/meta/_journal.json +41 -0
package/db/schema.ts +82 -0
package/images/standard/Dockerfile +232 -0
package/images/standard/README.md +122 -0
package/images/standard/container/code-server-settings.json +36 -0
package/images/standard/container/uai-init +215 -0
package/images/standard/tool-versions +2 -0
package/lib/agent.ts +292 -0
package/lib/agents/claude.ts +343 -0
package/lib/agents/codex.ts +522 -0
package/lib/agents/factory.ts +34 -0
package/lib/agents/mock.ts +133 -0
package/lib/agents/proc.ts +172 -0
package/lib/agents/registry.ts +109 -0
package/lib/agents/types.ts +133 -0
package/lib/attachments.ts +46 -0
package/lib/cloud-state.ts +56 -0
package/lib/command-db.ts +278 -0
package/lib/db.ts +68 -0
package/lib/env.ts +140 -0
package/lib/git-diff.ts +370 -0
package/lib/git-identity.ts +65 -0
package/lib/github-tokens.ts +321 -0
package/lib/orchestrator.ts +975 -0
package/lib/preview-ports.ts +85 -0
package/lib/repo-clone.ts +127 -0
package/lib/runtime-state.ts +120 -0
package/lib/secrets.ts +71 -0
package/lib/ssh.ts +186 -0
package/lib/standard-image.ts +152 -0
package/lib/task-diff.ts +113 -0
package/lib/task-status.ts +46 -0
package/lib/transcript.ts +30 -0
package/lib/ulid.ts +7 -0
package/package.json +85 -0
package/scripts/agent/_common.sh +248 -0
package/scripts/agent/task-down.sh +113 -0
package/scripts/agent/task-status.sh +54 -0
package/scripts/agent/task-up.sh +457 -0
package/scripts/install/darwin.ts +167 -0
package/scripts/install/linux.ts +115 -0
package/scripts/install/types.ts +35 -0
package/scripts/install/util.ts +39 -0
package/scripts/install/win.ts +130 -0
package/src/cli.ts +445 -0
package/src/index.ts +375 -0
package/src/load-env.ts +52 -0
package/src/main.ts +1156 -0
package/src/paths.ts +64 -0
package/src/protocol.ts +413 -0
package/src/ui/server.ts +343 -0
package/src/ui/types.ts +78 -0
package/ui/app.js +264 -0
package/ui/index.html +55 -0
package/ui/style.css +359 -0
package/ui/uai-logo-black.svg +9 -0

package/lib/orchestrator.ts ADDED Viewed

@@ -0,0 +1,975 @@
+/**
+ * Orchestrator — the chat channel runtime.
+ *
+ * One **channel** per task. A channel holds an `AgentSession` per roster
+ * agent and emits protocol events for the cloud side to persist, stream,
+ * and route. It is the in-process stand-in for what becomes the host
+ * agent process in the hosted product (docs/hosted-architecture.md).
+ *
+ * Responsibilities:
+ *   - Lazily build a channel: load the project roster, spawn sessions.
+ *   - Deliver cloud-routed messages into live agent sessions.
+ *   - Emit typed HostEvents for agent output, tools, permissions, exits.
+ *
+ * Module singleton, stashed on globalThis so Next.js HMR reuses it
+ * instead of leaking a second orchestrator.
+ */
+import { spawnSync } from "node:child_process";
+import { existsSync, readFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { inArray } from "drizzle-orm";
+import { getDb, schema } from "./db";
+import { mockAgentFactory } from "./agents/mock";
+import { realAgentFactory } from "./agents/factory";
+import {
+  type AgentEvent,
+  type AgentSession,
+  type AgentSessionFactory,
+  type Roster,
+  type RosterAgent,
+} from "./agents/types";
+import { ACTIVE_STATUSES } from "./task-status";
+import { getHostTask, upsertHostTask } from "./runtime-state";
+import { setupTaskGithub } from "./github-tokens";
+import { setupTaskGitIdentity } from "./git-identity";
+import type { ChannelEnsureInput, HostEvent } from "../src/protocol";
+export type HostEventSubscriber = (event: HostEvent) => void;
+// ---------------------------------------------------------------------------
+// Channel — one task's live conversation.
+// ---------------------------------------------------------------------------
+interface Channel {
+  taskId: string;
+  roster: Roster;
+  sessions: Map<string, AgentSession>;
+  /** Session-spawn inputs, kept so sessions can be started lazily. */
+  containerName: string;
+  /** Per-agent system preamble (channel briefing + assembled persona). */
+  preambles: Map<string, string>;
+  /** Per-agent first-turn message — only agents whose `initialPrompt` is
+   *  non-empty have an entry. Delivered once at container-ready. */
+  firstTurns: Map<string, string>;
+  /** True once agent sessions have been spawned (lazily, on first send). */
+  sessionsStarted: boolean;
+  /** Per-agent respawn counter — bounded so a broken agent can't
+   *  loop forever rewriting its config. */
+  respawns: Map<string, number>;
+}
+/** Hard cap on automatic respawns per agent per channel lifetime. */
+const MAX_RESPAWNS_PER_AGENT = 5;
+/** Substrings in an agent's error output that mean "config was
+ *  unlinked between runs" — repair-and-respawn covers the common
+ *  Docker-Desktop-macOS race where Claude's atomic writes briefly
+ *  leave .claude.json missing. */
+const CLAUDE_CONFIG_MISSING_PATTERNS = [
+  /Claude configuration file not found/i,
+  /\/\.claude\.json/i,
+];
+class Orchestrator {
+  private readonly channels = new Map<string, Channel>();
+  private readonly channelSpecs = new Map<string, ChannelEnsureInput>();
+  private readonly hostSubscribers = new Set<HostEventSubscriber>();
+  constructor(private readonly factory: AgentSessionFactory) {}
+  // -- subscriptions --------------------------------------------------------
+  subscribeHostEvents(fn: HostEventSubscriber): () => void {
+    this.hostSubscribers.add(fn);
+    return () => this.hostSubscribers.delete(fn);
+  }
+  /** Emit a host-originated system note into a task's channel (ADR-027). */
+  emitSystemNote(taskId: string, text: string): void {
+    this.emitHost({ kind: "system.note", taskId, text });
+  }
+  private emitHost(event: HostEvent): void {
+    for (const fn of this.hostSubscribers) {
+      try {
+        fn(event);
+      } catch {
+        // A broken cloud subscriber must not take the channel down.
+      }
+    }
+  }
+  // -- channel lifecycle ----------------------------------------------------
+  registerChannelSpec(spec: ChannelEnsureInput): void {
+    this.channelSpecs.set(spec.taskId, spec);
+  }
+  private async getOrCreateChannel(taskId: string): Promise<Channel | null> {
+    const existing = this.channels.get(taskId);
+    if (existing) return existing;
+    const spec = this.channelSpecs.get(taskId);
+    if (!spec) return null;
+    const roster = spec.agents;
+    const preambles = new Map<string, string>();
+    const firstTurns = new Map<string, string>();
+    for (const agent of roster) {
+      preambles.set(
+        agent.id,
+        buildSystemPreamble(
+          roster,
+          agent,
+          spec.projects,
+          spec.globalContext,
+          spec.workspacePath,
+          spec.branch,
+        ),
+      );
+      // ADR-022: any agent with a non-empty initialPrompt opens a first
+      // turn at container-ready. The rest stay silent until addressed.
+      const firstTurn = assembleFirstTurnPrompt(
+        spec.projects,
+        spec.globalContext,
+        agent,
+      );
+      if (firstTurn !== null) firstTurns.set(agent.id, firstTurn);
+    }
+    const channel: Channel = {
+      taskId,
+      roster,
+      sessions: new Map(),
+      containerName: `task-${taskId.toLowerCase()}-app-1`,
+      preambles,
+      firstTurns,
+      sessionsStarted: false,
+      respawns: new Map(),
+    };
+    this.channels.set(taskId, channel);
+    return channel;
+  }
+  /**
+   * Spawn the channel's agent sessions, lazily and exactly once.
+   *
+   * The real adapters `docker exec` into the task container, which only
+   * exists after `task-up` has run `docker compose up`. So sessions are
+   * gated on the task being `running` — calling this earlier is a no-op
+   * that returns `false`. Once the task is running, the first call
+   * spawns every roster agent's session.
+   *
+   * Returns whether sessions are ready.
+   */
+  private async ensureSessions(channel: Channel): Promise<boolean> {
+    if (channel.sessionsStarted) return true;
+    const task = getHostTask(channel.taskId);
+    if (!task || task.statusMirror !== "running") return false;
+    // Flip the flag before the await so a concurrent send can't
+    // double-spawn the sessions.
+    channel.sessionsStarted = true;
+    // Set the task creator's git author identity in the container (ADR-029).
+    // The SSH key itself is installed earlier by task-up.sh (host clone +
+    // container), using the creator's per-user key. Best-effort.
+    setupTaskGitIdentity(channel.taskId, task.ownerName, task.ownerEmail);
+    for (const agent of channel.roster) {
+      const session = await this.factory.create({
+        taskId: channel.taskId,
+        agent,
+        containerName: channel.containerName,
+        systemPreamble: channel.preambles.get(agent.id) ?? "",
+      });
+      channel.sessions.set(agent.id, session);
+      session.onEvent((event) => {
+        void this.handleAgentEvent(channel, agent.id, event);
+      });
+    }
+    // Container is ready: deliver each agent's first-turn prompt (ADR-022).
+    // Only agents whose `initialPrompt` was non-empty have an entry.
+    for (const agent of channel.roster) {
+      const firstTurn = channel.firstTurns.get(agent.id);
+      if (firstTurn === undefined) continue;
+      const session = channel.sessions.get(agent.id);
+      if (session) void session.send(firstTurn);
+    }
+    return true;
+  }
+  /**
+   * Ensure the channel exists and, if the task is `running`, its agent
+   * sessions are spawned. Idempotent — the task detail page's poll
+   * calls this so agents auto-start the moment the task is running,
+   * without the human having to send a message first.
+   */
+  async ensureStarted(taskId: string): Promise<void> {
+    const channel = await this.getOrCreateChannel(taskId);
+    if (channel) await this.ensureSessions(channel);
+  }
+  // -- inbound: a human (or routed peer) message ----------------------------
+  /**
+   * Deliver a cloud-routed message into one live agent session.
+   */
+  async deliver(
+    taskId: string,
+    agentId: string,
+    text: string,
+  ): Promise<{ ok: true } | { ok: false; error: string }> {
+    // Slash command: re-run GitHub auth setup for this task (ADR-027). Handled
+    // before sessions so it works even when an agent isn't ready.
+    if (text.trim() === "/retry-gh") {
+      void this.handleRetryGh(taskId);
+      return { ok: true };
+    }
+    const channel = await this.getOrCreateChannel(taskId);
+    if (!channel) return { ok: false, error: "task or project not found" };
+    const ready = await this.ensureSessions(channel);
+    if (!ready) {
+      return { ok: false, error: "task is not running" };
+    }
+    const session = channel.sessions.get(agentId);
+    if (!session) return { ok: false, error: `no such agent: ${agentId}` };
+    void session.send(rewriteAttachmentRefs(text));
+    return { ok: true };
+  }
+  /** Interrupt an agent's current turn (ESC). No-op when the task/agent has no
+   *  live session (nothing to stop). */
+  async interrupt(
+    taskId: string,
+    agentId: string,
+  ): Promise<{ ok: true } | { ok: false; error: string }> {
+    const session = this.channels.get(taskId)?.sessions.get(agentId);
+    if (!session) return { ok: false, error: "no active session" };
+    void session.interrupt();
+    this.emitSystemNote(taskId, `Stopped @${agentId}.`);
+    return { ok: true };
+  }
+  /** `/retry-gh` handler: re-mint + inject the task owner's GitHub token. */
+  private async handleRetryGh(taskId: string): Promise<void> {
+    const owner = getHostTask(taskId)?.ownerUserId;
+    if (!owner) {
+      this.emitSystemNote(taskId, "gh: no owner recorded for this task.");
+      return;
+    }
+    this.emitSystemNote(taskId, "gh: retrying authentication…");
+    const ok = await setupTaskGithub(taskId, owner);
+    this.emitSystemNote(
+      taskId,
+      ok
+        ? "gh: authentication restored."
+        : "gh: still not connected — reconnect GitHub on Account, then try again.",
+    );
+  }
+  // -- outbound: events from an agent session -------------------------------
+  private async handleAgentEvent(
+    channel: Channel,
+    agentId: string,
+    event: AgentEvent,
+  ): Promise<void> {
+    switch (event.type) {
+      case "message_delta": {
+        this.emitHost({
+          kind: "agent.message_delta",
+          taskId: channel.taskId,
+          agentId,
+          chunk: event.text,
+        });
+        break;
+      }
+      case "message_complete": {
+        this.emitHost({
+          kind: "agent.message_complete",
+          taskId: channel.taskId,
+          agentId,
+          fullText: event.text,
+          mentions: parseMentions(event.text, channel.roster),
+        });
+        break;
+      }
+      case "tool_call": {
+        this.emitHost({
+          kind: "agent.tool_call",
+          taskId: channel.taskId,
+          agentId,
+          tool: event.title,
+          meta: { detail: event.detail, toolId: event.id },
+        });
+        break;
+      }
+      case "permission_request": {
+        this.emitHost({
+          kind: "agent.permission_request",
+          taskId: channel.taskId,
+          agentId,
+          requestId: event.id,
+          meta: {
+            title: event.title,
+            detail: event.detail,
+            requestId: event.id,
+          },
+        });
+        break;
+      }
+      case "peer_message": {
+        const text = `@${event.toAgentId} ${event.text}`;
+        this.emitHost({
+          kind: "agent.message_complete",
+          taskId: channel.taskId,
+          agentId,
+          fullText: text,
+          mentions: [event.toAgentId],
+        });
+        break;
+      }
+      case "error": {
+        // Claude under load (especially Docker Desktop macOS) occasionally
+        // unlinks ~/.claude.json mid-write during atomic config rewrites,
+        // and a concurrent claude spawn lands during the gap and exits 0
+        // with "Claude configuration file not found". When we recognise
+        // that pattern, repair the in-container config + respawn the
+        // session so the user keeps working instead of staring at a dead
+        // chat.
+        const agent = channel.roster.find((a) => a.id === agentId);
+        const isClaude = agent?.kind === "claude";
+        const configMissing = CLAUDE_CONFIG_MISSING_PATTERNS.some((re) =>
+          re.test(event.message),
+        );
+        if (isClaude && configMissing) {
+          await this.recoverClaudeAgent(channel, agentId);
+          break;
+        }
+        this.emitHost({
+          kind: "agent.exit",
+          taskId: channel.taskId,
+          agentId,
+          reason: event.message,
+        });
+        break;
+      }
+      case "turn_complete":
+      case "exit":
+        break;
+    }
+  }
+  // -- session recovery -----------------------------------------------------
+  /**
+   * Re-copy the host's `~/.claude.json` into the task container and
+   * respawn the agent's session. Bounded by MAX_RESPAWNS_PER_AGENT so
+   * a permanently-broken setup doesn't loop forever.
+   */
+  private async recoverClaudeAgent(
+    channel: Channel,
+    agentId: string,
+  ): Promise<void> {
+    const tries = (channel.respawns.get(agentId) ?? 0) + 1;
+    channel.respawns.set(agentId, tries);
+    if (tries > MAX_RESPAWNS_PER_AGENT) {
+      this.emitHost({
+        kind: "agent.exit",
+        taskId: channel.taskId,
+        agentId,
+        reason:
+          `Claude exited with "configuration file not found" ${tries - 1} ` +
+          "times in a row. Giving up automatic recovery — recreate the " +
+          "task to start fresh.",
+      });
+      return;
+    }
+    const restored = repairClaudeConfigInContainer(channel.containerName);
+    // Tear the dead session down and build a fresh one in its place.
+    const old = channel.sessions.get(agentId);
+    if (old) {
+      try {
+        await old.close();
+      } catch {
+        // Already exited — close is idempotent for our adapters.
+      }
+    }
+    const agent = channel.roster.find((a) => a.id === agentId);
+    if (!agent) return;
+    const session = await this.factory.create({
+      taskId: channel.taskId,
+      agent,
+      containerName: channel.containerName,
+      systemPreamble: channel.preambles.get(agentId) ?? "",
+    });
+    channel.sessions.set(agentId, session);
+    session.onEvent((event) => {
+      void this.handleAgentEvent(channel, agentId, event);
+    });
+    const message = restored
+      ? `${agentId} restarted — config file was missing in the container, restored from the host.`
+      : `${agentId} restarted — config file was missing in the container (host copy not found; spawned anyway).`;
+    this.emitHost({
+      kind: "agent.exit",
+      taskId: channel.taskId,
+      agentId,
+      reason: message,
+    });
+  }
+  // -- permission resolution ------------------------------------------------
+  async resolvePermission(
+    taskId: string,
+    agentId: string,
+    requestId: string,
+    decision: "accept" | "decline",
+  ): Promise<boolean> {
+    const channel = this.channels.get(taskId);
+    const session = channel?.sessions.get(agentId);
+    if (!channel || !session) return false;
+    await session.resolvePermission(requestId, decision);
+    return true;
+  }
+  /** Tear a channel down (task killed). */
+  async closeChannel(taskId: string): Promise<void> {
+    const ch = this.channels.get(taskId);
+    if (!ch) return;
+    for (const session of ch.sessions.values()) await session.close();
+    this.channels.delete(taskId);
+  }
+}
+// ---------------------------------------------------------------------------
+// `@mention` addressing.
+// ---------------------------------------------------------------------------
+/**
+ * Extract the roster agents explicitly `@mentioned` in `text`, in order
+ * of first appearance, de-duplicated. Returns [] when none are
+ * mentioned. A single space after `@` is tolerated (`@ codex`) since
+ * agents sometimes emit that.
+ */
+export function parseMentions(text: string, roster: Roster): string[] {
+  const ids = new Set(roster.map((a) => a.id));
+  const mentioned: string[] = [];
+  const re = /@ ?([A-Za-z0-9_-]+)/g;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(text)) !== null) {
+    const id = m[1];
+    if (id && ids.has(id) && !mentioned.includes(id)) mentioned.push(id);
+  }
+  return mentioned;
+}
+/**
+ * Resolve who an inbound human message is addressed to: the explicit
+ * `@mentions`, else `fallbackAgentId`, else the first roster agent.
+ */
+/**
+ * Rewrite cloud attachment URLs (`…/api/tasks/<id>/uploads/<file>`) to the
+ * in-container workspace path so the agent reads the file directly with its own
+ * tools (ADR-015 attachments live in the workspace, mounted at /workspace). The
+ * cloud URL would be Clerk-gated and unreachable from inside the container.
+ */
+export function rewriteAttachmentRefs(text: string): string {
+  return text.replace(
+    /(?:https?:\/\/[^/\s)\]]+)?\/api\/tasks\/[^/\s)\]]+\/uploads\/([A-Za-z0-9._-]+)/g,
+    (_m, filename: string) => `/workspace/.uai/attachments/${filename}`,
+  );
+}
+export function resolveAddressing(
+  text: string,
+  roster: Roster,
+  fallbackAgentId?: string,
+): string[] {
+  const mentioned = parseMentions(text, roster);
+  if (mentioned.length > 0) return mentioned;
+  if (fallbackAgentId && roster.some((a) => a.id === fallbackAgentId)) {
+    return [fallbackAgentId];
+  }
+  const first = roster[0];
+  return first ? [first.id] : [];
+}
+/**
+ * Re-copy the host's `~/.claude.json` into the task container at
+ * `/home/node/.claude.json` and chown it back to `node`. Used when an
+ * agent exited with "configuration file not found" — Claude under
+ * Docker Desktop macOS sometimes unlinks its own config mid-write,
+ * and a concurrent spawn lands in the gap. Restoring from the host is
+ * the same recovery `task-up.sh` does at first launch.
+ *
+ * Returns true when the file actually made it into the container.
+ */
+function repairClaudeConfigInContainer(containerName: string): boolean {
+  // Operator can override the source path via UAI_OWNER_HOME so the
+  // server doesn't have to guess. Useful when Next.js's dev process
+  // env diverges from `os.homedir()` for any reason.
+  const home = process.env.UAI_OWNER_HOME || homedir();
+  const hostConfig = join(home, ".claude.json");
+  if (!existsSync(hostConfig)) {
+    console.error(
+      `[orchestrator] repair: host .claude.json not found at ${hostConfig} ` +
+        `(homedir=${homedir()}, UAI_OWNER_HOME=${
+          process.env.UAI_OWNER_HOME ?? "<unset>"
+        })`,
+    );
+    return false;
+  }
+  // Why not `docker cp`: it does unlink + create on the destination,
+  // and when a stuck claude process still has the in-container file
+  // open we get `Error response from daemon: unlinkat …: device or
+  // resource busy` and the new bytes never land.
+  //
+  // Instead, pipe the host file's bytes into a `cat >` inside the
+  // container. That truncates the existing inode in place — no
+  // unlink, no rename — so the file's bytes are replaced and any
+  // open fd just sees the new content on its next read. Permissions
+  // and ownership of the inode are preserved (it was already owned
+  // by `node`, which is what claude needs).
+  let content: Buffer;
+  try {
+    content = readFileSync(hostConfig);
+  } catch (err) {
+    console.error(
+      `[orchestrator] repair: read host .claude.json failed: ${
+        err instanceof Error ? err.message : String(err)
+      }`,
+    );
+    return false;
+  }
+  // Clear the existing dentry before writing. Docker Desktop macOS
+  // occasionally ends up with a stale dentry where `ls -la` shows
+  // `-????????? ?` and `open(O_CREAT)` fails with "Directory
+  // nonexistent" — the file appears to be there but every syscall
+  // bounces. `rm -f` punches through it (or no-ops on a genuine
+  // missing file). Then `cat >` creates the fresh inode. Both steps
+  // run in one `sh -c` so a partial failure leaves a clean state.
+  const write = spawnSync(
+    "docker",
+    [
+      "exec",
+      "-i",
+      containerName,
+      "sh",
+      "-c",
+      "rm -f /home/node/.claude.json && cat > /home/node/.claude.json",
+    ],
+    { input: content, encoding: "buffer" },
+  );
+  if (write.status !== 0) {
+    const stderr =
+      write.stderr instanceof Buffer
+        ? write.stderr.toString("utf8")
+        : String(write.stderr ?? "");
+    console.error(
+      `[orchestrator] repair: in-place write to .claude.json failed: ${stderr.trim()}`,
+    );
+    return false;
+  }
+  return true;
+}
+/** A project as carried in a channel spec — one repo + its prompt. */
+type ChannelProject = { slug: string; defaultPrompt: string };
+/**
+ * Concatenate each selected project's `defaultPrompt` in position order
+ * (the spec already orders them), dropping empties, joined by a blank
+ * line. The leading layer of the ADR-022 prompt-assembly chain.
+ */
+function concatProjectPrompts(projects: ChannelProject[]): string {
+  return projects
+    .map((p) => p.defaultPrompt.trim())
+    .filter((p) => p.length > 0)
+    .join("\n\n");
+}
+/**
+ * Assemble one agent's first-turn message (ADR-022 prompt-assembly
+ * chain):
+ *
+ *   concat(project.defaultPrompt in position order)
+ *     + "\n\n" + globalContext         (task-level mission, if present)
+ *     + "\n\n" + agent.defaultPrompt   (the agent's persona, if present)
+ *     + "\n\n" + agent.initialPrompt   (this-turn instructions)
+ *
+ * Returns `null` when the agent has no non-empty `initialPrompt` — those
+ * agents stay silent until addressed. There is no "first speaker" /
+ * coder rule; any agent with an `initialPrompt` opens a first turn.
+ */
+export function assembleFirstTurnPrompt(
+  projects: ChannelProject[],
+  globalContext: string | undefined,
+  agent: RosterAgent,
+): string | null {
+  const initial = (agent.initialPrompt ?? "").trim();
+  if (initial.length === 0) return null;
+  const parts = [
+    concatProjectPrompts(projects),
+    (globalContext ?? "").trim(),
+    (agent.defaultPrompt ?? "").trim(),
+    initial,
+  ].filter((p) => p.length > 0);
+  return parts.join("\n\n");
+}
+/**
+ * Build the system preamble an agent gets on session start. It opens
+ * with how uai's channel works — agents must know to hand off via
+ * `@mention`, since there is no `peer` command / shared tmux any more
+ * (ADR-008) — then appends the project context and the agent's persona.
+ *
+ * The persona / mission layers (project defaultPrompts, globalContext,
+ * agent.defaultPrompt) live in the always-on system prompt so they apply
+ * to every turn; only `initialPrompt` is delivered as the first user
+ * turn (see `assembleFirstTurnPrompt`).
+ */
+export function buildSystemPreamble(
+  roster: Roster,
+  agent: RosterAgent,
+  projects: ChannelProject[],
+  globalContext: string | undefined,
+  workspacePath: string,
+  taskBranch: string,
+): string {
+  const others = roster.map((a) => `@${a.id} (${a.label})`).join(", ");
+  const projectLines =
+    projects.length === 0
+      ? ["(none mounted)"]
+      : projects.map(
+          (p) =>
+            `- \`${workspacePath}/${p.slug}\` — git worktree on \`${taskBranch}\``,
+        );
+  const comms = [
+    "## uai task channel",
+    "",
+    "You are one agent in a uai task chat channel, shared with the human",
+    "and the other agents. To hand work to or ask another agent, mention",
+    "it by id at the start of a line — e.g. `@codex please review the",
+    "diff`. uai routes that message into that agent's input.",
+    "",
+    `Agents in this channel: ${others}.`,
+    "",
+    "An agent only receives a message when it is explicitly @-mentioned",
+    "(or addressed by the human) — so always @-mention the agent you mean.",
+    "There is NO `peer` command and no shared tmux session; hand-offs are",
+    "just @-mentions in your replies.",
+    "",
+    "Because your input is only what you're addressed, you may be missing",
+    "context from messages between the human and the other agents. The full",
+    "channel transcript — every message + who wrote it (no tool calls) — is",
+    "logged at `/workspace/.uai/chat.md`. Read it whenever you need that",
+    "context (e.g. the human shared a file or instruction with another",
+    "agent); it's appended live, so re-read it for the latest.",
+    "",
+    "Hand off when you finish your part of the work. When you've made",
+    "and committed your changes, or completed a review, end your reply by",
+    "@-mentioning the agent who should act next and telling them what you",
+    "did and what you need (e.g. `@codex changes committed on <branch> —",
+    "please review`, or `@claude review done, N issues to fix`). Never",
+    "stop silently — if genuinely no agent needs to act, say so to the",
+    "human.",
+    "",
+    "## Workspace layout",
+    "",
+    `Your shell starts in \`${workspacePath}\` (the task workspace).`,
+    "That directory is **not** itself a git repo — it holds one git",
+    "worktree per project this task spans. Every project below is on the",
+    "same task branch. To run git commands, **`cd` into one of the",
+    "project directories first**:",
+    "",
+    ...projectLines,
+    "",
+    `The task branch is \`${taskBranch}\`. Push with \`git push -u origin`,
+    `${taskBranch}\` from inside the project, then open a PR with \`gh pr`,
+    "create` (the container has gh authenticated). For multi-project",
+    "tasks, each project's PR is independent — open one per project whose",
+    "worktree you actually changed.",
+    "",
+    "The `.uai/` directory under each task is uai's own scaffolding",
+    "(rendered Dockerfile, compose file, container scripts) — it is NOT",
+    "part of the project. Never review, edit, stage, commit, or flag it;",
+    "treat it as ignored, even though git may show it as untracked.",
+    "",
+    "## Commit policy",
+    "",
+    "Commits are SSH-signed automatically (git is configured for it) — do",
+    "not disable or override signing. Do NOT add any `Co-Authored-By:`",
+    "trailers to commit messages, and do NOT add 'Generated with …' or any",
+    "tool/agent attribution footer to commit messages or PR/issue bodies.",
+    "Write commit messages and PR descriptions plainly, as the author, with",
+    "no agent attribution.",
+  ].join("\n");
+  // Persona / mission layers, always-on so they apply to every turn:
+  // project context + task globalContext + this agent's persona. The
+  // per-turn instructions (initialPrompt) are delivered separately.
+  const context = [
+    concatProjectPrompts(projects),
+    (globalContext ?? "").trim(),
+    (agent.defaultPrompt ?? "").trim(),
+  ]
+    .filter((p) => p.length > 0)
+    .join("\n\n");
+  return context.length > 0 ? `${comms}\n\n---\n\n${context}` : comms;
+}
+// ---------------------------------------------------------------------------
+// Singleton — survives Next.js HMR by living on globalThis.
+// ---------------------------------------------------------------------------
+const globalForOrchestrator = globalThis as unknown as {
+  __uaiOrchestrator?: Orchestrator;
+  __uaiRecoverRan?: boolean;
+};
+export function getOrchestrator(): Orchestrator {
+  if (!globalForOrchestrator.__uaiOrchestrator) {
+    // `UAI_AGENTS=real` drives the real Claude/Codex CLIs inside the
+    // task containers; anything else uses the mock (no Docker / no CLI
+    // needed — the default until the adapters are verified on a host).
+    const factory =
+      process.env.UAI_AGENTS === "real" ? realAgentFactory : mockAgentFactory;
+    globalForOrchestrator.__uaiOrchestrator = new Orchestrator(factory);
+  }
+  if (!globalForOrchestrator.__uaiRecoverRan) {
+    globalForOrchestrator.__uaiRecoverRan = true;
+    // Fire-and-forget — the orchestrator is usable while recovery runs.
+    void recoverRunningTasks();
+  }
+  return globalForOrchestrator.__uaiOrchestrator;
+}
+// ---------------------------------------------------------------------------
+// Boot-time recovery
+//
+// When uai starts after a machine reboot (or a hard `pnpm dev` restart)
+// task containers are gone but the DB still claims status='running'.
+// Walk the active rows and reconcile each against actual Docker state:
+//
+//   container up                  → re-discover the host port, keep
+//                                   the row at `running`.
+//   container exited (preserved)  → `docker start` it, re-run uai-init,
+//                                   discover the fresh port, keep
+//                                   `running`. AI sessions respawn on
+//                                   the next ensureSessions() call.
+//   container gone, worktree on   → mark `stopped`. User hits Resume,
+//   disk                            which runs task-up against the same
+//                                   id (task-up is idempotent).
+//   container gone, worktree gone → mark `error`. Data loss.
+//
+// Idempotent per process — guarded by `__uaiRecoverRan` on globalThis
+// so a Next.js HMR rebuild doesn't trigger it twice.
+// ---------------------------------------------------------------------------
+/**
+ * docker ps --all output entry — Names is unique per task because we
+ * scope by compose-project label (set via `-p task-<id>` at task-up).
+ */
+interface DockerPs {
+  Names: string;
+  State: string; // "running" | "exited" | "created" | "paused"
+}
+function dockerListContainersByLabel(label: string): DockerPs[] {
+  const res = spawnSync(
+    "docker",
+    ["ps", "--all", "--filter", `label=${label}`, "--format", "{{json .}}"],
+    { encoding: "utf8" },
+  );
+  if (res.status !== 0) return [];
+  return res.stdout
+    .split("\n")
+    .map((l) => l.trim())
+    .filter(Boolean)
+    .map((l) => {
+      try {
+        return JSON.parse(l) as DockerPs;
+      } catch {
+        return null;
+      }
+    })
+    .filter((x): x is DockerPs => x !== null);
+}
+function dockerStart(containerName: string): boolean {
+  const res = spawnSync("docker", ["start", containerName], {
+    encoding: "utf8",
+  });
+  if (res.status !== 0) {
+    console.error(
+      `[orchestrator] docker start ${containerName} failed: ${res.stderr.trim()}`,
+    );
+    return false;
+  }
+  return true;
+}
+function dockerPort(
+  containerName: string,
+  containerPort: number,
+): number | null {
+  const res = spawnSync(
+    "docker",
+    ["port", containerName, String(containerPort)],
+    { encoding: "utf8" },
+  );
+  if (res.status !== 0) return null;
+  // Output: "127.0.0.1:32785\n"
+  const firstLine = res.stdout.split("\n")[0]?.trim() ?? "";
+  const colonIdx = firstLine.lastIndexOf(":");
+  if (colonIdx === -1) return null;
+  const port = Number(firstLine.slice(colonIdx + 1));
+  return Number.isFinite(port) ? port : null;
+}
+function dockerExec(containerName: string, cmd: string[]): boolean {
+  const res = spawnSync("docker", ["exec", containerName, ...cmd], {
+    encoding: "utf8",
+  });
+  return res.status === 0;
+}
+async function recoverRunningTasks(): Promise<void> {
+  try {
+    const db = getDb();
+    const rows = db
+      .select()
+      .from(schema.hostTasks)
+      .where(inArray(schema.hostTasks.statusMirror, [...ACTIVE_STATUSES]))
+      .all();
+    if (rows.length === 0) return;
+    console.log(
+      `[orchestrator] recovery: scanning ${rows.length} active task row(s)`,
+    );
+    for (const task of rows) {
+      try {
+        await recoverOneTask(task);
+      } catch (err) {
+        console.error(
+          `[orchestrator] recovery: ${task.taskId} failed:`,
+          err instanceof Error ? err.message : err,
+        );
+      }
+    }
+  } catch (err) {
+    console.error(
+      "[orchestrator] recovery: top-level failure",
+      err instanceof Error ? err.message : err,
+    );
+  }
+}
+async function recoverOneTask(
+  task: typeof schema.hostTasks.$inferSelect,
+): Promise<void> {
+  const composeProject = task.composeProject;
+  if (!composeProject) {
+    // We never wrote a compose project name for this row — must be
+    // a row stuck in `queued`/`starting` from before task-up got past
+    // step 2. Leave it; the user can recreate.
+    return;
+  }
+  const containerName = `${composeProject}-app-1`;
+  const containers = dockerListContainersByLabel(
+    `com.docker.compose.project=${composeProject}`,
+  );
+  if (containers.length === 0) {
+    // Container gone. Worktree state determines whether resume is
+    // viable.
+    const workspaceOnDisk = task.worktreePath && existsSync(task.worktreePath);
+    const next = workspaceOnDisk ? "stopped" : "error";
+    db_setStatus(task.taskId, next, {
+      codeServerPort: null,
+      previewPorts: "[]",
+      composeProject: next === "error" ? null : composeProject,
+    });
+    console.log(
+      `[orchestrator] recovery: ${task.taskId} -> ${next} (container gone, ` +
+        `worktree ${workspaceOnDisk ? "present" : "gone"})`,
+    );
+    return;
+  }
+  const running = containers.some((c) => c.State === "running");
+  if (running) {
+    // Container is up. Re-discover the host port in case Docker
+    // remapped it across restarts (it usually does for ephemeral
+    // bindings).
+    const port = dockerPort(containerName, 8080);
+    if (port && port !== task.codeServerPort) {
+      db_setRuntime(task.taskId, {
+        codeServerPort: port,
+      });
+      console.log(`[orchestrator] recovery: ${task.taskId} ports refreshed`);
+    }
+    return;
+  }
+  // Container exists but exited. Bring it back up + re-launch the
+  // in-container init (deps + code-server).
+  console.log(
+    `[orchestrator] recovery: ${task.taskId} starting exited container ${containerName}`,
+  );
+  if (!dockerStart(containerName)) {
+    db_setStatus(task.taskId, "stopped", {
+      codeServerPort: null,
+      previewPorts: "[]",
+    });
+    return;
+  }
+  if (!dockerExec(containerName, ["/usr/local/bin/uai-init"])) {
+    console.warn(
+      `[orchestrator] recovery: ${task.taskId} uai-init failed; container is up but Editor may be down`,
+    );
+  }
+  const port = dockerPort(containerName, 8080) ?? null;
+  db_setRuntime(task.taskId, {
+    codeServerPort: port,
+  });
+  console.log(
+    `[orchestrator] recovery: ${task.taskId} resumed (port ${port ?? "?"})`,
+  );
+}
+function db_setStatus(
+  taskId: string,
+  status: string,
+  extras: Partial<typeof schema.hostTasks.$inferInsert>,
+): void {
+  upsertHostTask(taskId, { ...extras, statusMirror: status });
+}
+function db_setRuntime(
+  taskId: string,
+  extras: Partial<typeof schema.hostTasks.$inferInsert>,
+): void {
+  upsertHostTask(taskId, extras);
+}
+export type { Orchestrator };