npm - bloby-bot - Versions diffs - 0.70.1 → 0.70.5 - Mend

bloby-bot 0.70.1 → 0.70.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist-bloby/assets/{bloby-Dfx1hOe-.js → bloby-CXmOcb1r.js} +4 -4
package/dist-bloby/assets/{highlighted-body-OFNGDK62-BzAQnLnm.js → highlighted-body-OFNGDK62-D7cU1Y-Z.js} +1 -1
package/dist-bloby/assets/mermaid-GHXKKRXX-D5YxphBn.js +1 -0
package/dist-bloby/bloby.html +1 -1
package/package.json +4 -3
package/scripts/install +42 -4
package/scripts/install.ps1 +3 -3
package/scripts/install.sh +35 -7
package/scripts/postinstall.js +17 -2
package/supervisor/chat/bloby-main.tsx +14 -10
package/supervisor/harnesses/claude.ts +3 -0
package/supervisor/harnesses/codex.ts +927 -230
package/vite.bloby.config.ts +0 -8
package/worker/prompts/bloby-system-prompt-codex.txt +15 -15
package/workspace/client/index.html +2 -2
package/workspace/client/public/manifest.json +16 -4
package/workspace/client/public/morphy-icon-apple-180.png +0 -0
package/workspace/client/public/morphy-icon-maskable-192.png +0 -0
package/workspace/client/public/morphy-icon-maskable-512.png +0 -0
package/dist-bloby/assets/mermaid-GHXKKRXX-CHPqkcxH.js +0 -1

package/supervisor/harnesses/codex.ts CHANGED Viewed

@@ -8,7 +8,8 @@
  *
  * Lifecycle per live conversation:
  *   spawn → initialize → initialized → thread/start → turn/start (per
- *   user message; turn/steer to inject mid-turn) → turn/completed → idle
+ *   user message; messages arriving mid-turn are QUEUED and each gets its
+ *   own turn — see queueOrStart) → turn/completed → idle
  *   → endConversation → turn/interrupt (if needed) → kill subprocess
  *
  * Lifecycle per one-shot query: same as above, but the subprocess is killed
@@ -19,15 +20,22 @@
  * passed as `effort` on `turn/start`.
  *
  * Notes on parity with Claude harness:
- *   - System prompt → `baseInstructions` on `thread/start`
- *   - Sub-agents → not implemented (Codex has Skills, different model)
- *   - MCP servers → not wired yet (Codex has its own MCP layer)
- *   - Mid-turn input uses `turn/steer` (better than Claude's queue)
+ *   - System prompt → `developerInstructions` on `thread/start` (ADDITIVE —
+ *     `baseInstructions` would override codex's native apply_patch/shell spec)
+ *   - Mid-turn input is queued, one turn per message — same one-bot:response-
+ *     per-push contract the channel routing FIFO depends on. (`turn/steer`
+ *     merges inputs into the in-flight turn and breaks that contract.)
+ *   - Sub-agents → not implemented (codex collab tools are still experimental;
+ *     the collabAgentToolCall handlers below light up if that ever ships)
+ *   - MCP servers → MCP.json translated to `-c mcp_servers.*` spawn flags
+ *   - Pre-warm → spawn+initialize cached ahead of time (thread/start is issued
+ *     at claim time, so the warm process is reusable for any model/instructions)
  */
 import { spawn, type ChildProcessWithoutNullStreams } from 'child_process';
 import { createRequire } from 'module';
 import readline from 'readline';
+import crypto from 'crypto';
 import fs from 'fs';
 import path from 'path';
 import { log } from '../../shared/logger.js';
@@ -54,6 +62,21 @@ const VALID_EFFORTS = new Set(['none', 'minimal', 'low', 'medium', 'high', 'xhig
  * keeps emitting events and is never killed; only true silence trips recovery.
  */
 const TURN_WATCHDOG_MS = 5 * 60_000;
+/**
+ * Hard WALL-CLOCK cap for one-shot turns (pulse/cron, customer WhatsApp) —
+ * codex has no maxTurns equivalent, and the idle watchdog above never trips on
+ * an actively-emitting runaway turn (tool ping-pong). Claude one-shots abort at
+ * 5 minutes; mirror that so a runaway can't pin a customer slot forever.
+ */
+const ONE_SHOT_MAX_TURN_MS = 5 * 60_000;
+/**
+ * Micro-batch window for bot:token. Codex streams true per-word deltas — an
+ * order of magnitude more WS frames per reply than claude's per-block stream.
+ * Coalescing at ~60ms keeps the stream visually smooth while cutting frame
+ * volume. Every non-token event flushes first, so ordering (and the
+ * bot:response === streamed-text invariant) is preserved.
+ */
+const TOKEN_FLUSH_MS = 60;
 /**
  * Resolve the `codex` binary. We don't trust $PATH because Bloby may be
@@ -160,6 +183,19 @@ function parseModelString(model: string): { id: string; effort?: string } {
 type RpcResult<T = any> = { id: number; result?: T; error?: { code?: number; message: string } };
 type RpcNotification = { method: string; params?: any };
+/** Distinguishes a JSON-RPC error RESPONSE (server alive, request rejected)
+ *  from transport failures (timeout / closed / process exit) — turn/start
+ *  error containment keeps the conversation alive only for the former. */
+type RpcErrorKind = 'rpc-error' | 'timeout' | 'closed';
+function rpcError(message: string, kind: RpcErrorKind): Error {
+  const err = new Error(message);
+  (err as any).rpcKind = kind;
+  return err;
+}
+function isServerRejection(err: any): boolean {
+  return err?.rpcKind === 'rpc-error';
+}
 interface PendingRequest {
   resolve: (value: any) => void;
   reject: (err: Error) => void;
@@ -171,7 +207,7 @@ class CodexRpc {
   private pending = new Map<number, PendingRequest>();
   private nextId = 1;
   private notificationHandler: (n: RpcNotification) => void = () => {};
-  private closeHandler: (code: number | null) => void = () => {};
+  private closeHandler: (code: number | null, errMsg?: string) => void = () => {};
   private closed = false;
   private stderrBuf = '';
@@ -189,13 +225,18 @@ class CodexRpc {
     this.proc.on('exit', (code) => {
       if (this.closed) return;
       this.closed = true;
-      const err = new Error(`codex app-server exited (code=${code}). Stderr tail:\n${this.stderrBuf.trim().slice(-1000)}`);
+      const errMsg = `codex app-server exited (code=${code}). Stderr tail:\n${this.stderrBuf.trim().slice(-1000)}`;
+      const err = rpcError(errMsg, 'closed');
       for (const p of this.pending.values()) {
         clearTimeout(p.timer);
         p.reject(err);
       }
       this.pending.clear();
-      this.closeHandler(code);
+      // Pass the composed exit error along — mid-turn there are usually no
+      // pending requests, so this is the ONLY way crash detail reaches the
+      // conversation (the watchdog would otherwise ghost-fire 5 min later
+      // with a misleading "timed out" message).
+      this.closeHandler(code, errMsg);
     });
     this.proc.on('error', (err) => {
@@ -204,17 +245,22 @@ class CodexRpc {
       log.warn(`[codex-rpc] spawn error: ${err.message}`);
       for (const p of this.pending.values()) {
         clearTimeout(p.timer);
-        p.reject(err);
+        p.reject(rpcError(err.message, 'closed'));
       }
       this.pending.clear();
-      this.closeHandler(null);
+      this.closeHandler(null, `codex app-server failed to spawn: ${err.message}`);
     });
   }
   onNotification(handler: (n: RpcNotification) => void): void { this.notificationHandler = handler; }
-  onClose(handler: (code: number | null) => void): void { this.closeHandler = handler; }
+  onClose(handler: (code: number | null, errMsg?: string) => void): void { this.closeHandler = handler; }
   private onLine(line: string): void {
+    // close() already rejected everything pending; late stdout (buffered deltas,
+    // a turn/completed that landed the same instant as teardown) must not
+    // re-enter the event pipeline — a post-teardown turn/completed could drain
+    // pendingInputs into a dead rpc and wedge agentQueryActive forever.
+    if (this.closed) return;
     if (!line.trim()) return;
     let msg: any;
     try { msg = JSON.parse(line); } catch {
@@ -237,7 +283,7 @@ class CodexRpc {
       if (!pending) return;
       this.pending.delete(msg.id);
       clearTimeout(pending.timer);
-      if (msg.error) pending.reject(new Error(msg.error.message || 'RPC error'));
+      if (msg.error) pending.reject(rpcError(msg.error.message || 'RPC error', 'rpc-error'));
       else pending.resolve(msg.result);
       return;
     }
@@ -288,12 +334,12 @@ class CodexRpc {
   }
   request<T = any>(method: string, params?: any, timeoutMs = REQUEST_TIMEOUT_MS): Promise<T> {
-    if (this.closed || !this.proc) return Promise.reject(new Error('RPC connection closed'));
+    if (this.closed || !this.proc) return Promise.reject(rpcError('RPC connection closed', 'closed'));
     const id = this.nextId++;
     return new Promise<T>((resolve, reject) => {
       const timer = setTimeout(() => {
         this.pending.delete(id);
-        reject(new Error(`codex app-server: ${method} timed out after ${timeoutMs}ms`));
+        reject(rpcError(`codex app-server: ${method} timed out after ${timeoutMs}ms`, 'timeout'));
       }, timeoutMs);
       this.pending.set(id, { resolve, reject, timer });
       try {
@@ -301,7 +347,7 @@ class CodexRpc {
       } catch (err: any) {
         this.pending.delete(id);
         clearTimeout(timer);
-        reject(err);
+        reject(rpcError(err.message, 'closed'));
       }
     });
   }
@@ -320,7 +366,7 @@ class CodexRpc {
     this.closed = true;
     for (const p of this.pending.values()) {
       clearTimeout(p.timer);
-      p.reject(new Error('RPC connection closed'));
+      p.reject(rpcError('RPC connection closed', 'closed'));
     }
     this.pending.clear();
     const proc = this.proc;
@@ -339,11 +385,20 @@ class CodexRpc {
 /* ── Per-conversation state ────────────────────────────────────────────── */
+interface QueuedInput {
+  content: string;
+  savedFiles?: SavedFile[];
+  attachments?: AgentAttachment[];
+}
 interface CodexConversation {
   id: string;
   rpc: CodexRpc;
   threadId: string;
   effort?: string;
+  /** Original model string (with effort suffix) + names — kept for the post-teardown re-warm. */
+  model: string;
+  names?: { botName: string; humanName: string };
   onMessage: OnAgentMessage;
   /** Currently in-flight turn id (set on `turn/started`, cleared on `turn/completed`). */
   currentTurnId: string | null;
@@ -352,18 +407,32 @@ interface CodexConversation {
   currentMsgItemId: string | null;
   /** Streaming text accumulator for the current turn's agentMessage items. */
   fullText: string;
+  /** Per-itemId delta accumulation for the current turn. item/completed is the
+   *  authoritative final text per the docs — delta concatenation is not guaranteed
+   *  to match it, and a later agentMessage item may complete with NO deltas at all.
+   *  This map lets item/completed emit exactly the missing remainder per item. */
+  itemTexts: Map<string, string>;
   /** Tools/items used during the current turn, for the bot:turn-complete payload. */
   usedFileTools: boolean;
-  /**
-   * Queue of messages submitted via `pushMessage` that arrived while no turn
-   * was active *and* we hadn't yet returned from the previous turn — almost
-   * always empty, but covers a tight push-during-completed race.
-   */
-  pendingInputs: string[];
-  /** True once the harness has emitted the per-turn completion event. */
+  /** Messages pushed while a turn is active. Each is drained into its OWN turn on
+   *  turn/completed — one bot:response per push, mirroring claude's input queue.
+   *  (turn/steer would merge them into the in-flight turn and desync the channel
+   *  routing FIFO, which enqueues one target per push.) */
+  pendingInputs: QueuedInput[];
+  /** True while a turn is being processed. */
   busy: boolean;
   /** True for one-shot queries — the conversation ends after the first turn completes. */
   oneShot: boolean;
+  /** True once bot:error fired for the current turn — dedups the double-emit the
+   *  app-server produces on hard failures (a non-retry `error` notification followed
+   *  by `turn/completed {status:'failed'}` with the same message; live-verified). */
+  errorEmitted: boolean;
+  /** True once bot:done fired for this one-shot — teardown uses it to guarantee
+   *  exactly one bot:done on EVERY terminal path (slot-freeing consumers rely on it). */
+  doneEmitted: boolean;
+  /** Non-retry `error` notification stashed while a turn is active; surfaced by
+   *  turn/completed {failed} (its TurnError is authoritative, the stash is fallback). */
+  stashedError: { message: string; info: any } | null;
   /**
    * Latest context occupancy from `thread/tokenUsage/updated` (codex does NOT
    * report usage on `turn/completed` — Turn has no usage field). Emitted on
@@ -373,13 +442,127 @@ interface CodexConversation {
   lastContextWindow: number;
   /** Active per-turn watchdog timer (see TURN_WATCHDOG_MS). */
   turnWatchdog: NodeJS.Timeout | null;
+  /** Hard wall-clock cap for one-shot turns (see ONE_SHOT_MAX_TURN_MS). Non-resetting. */
+  hardTurnTimer: NodeJS.Timeout | null;
+  /** bot:token micro-batch buffer (see TOKEN_FLUSH_MS). */
+  tokenBuf: string;
+  tokenFlushTimer: NodeJS.Timeout | null;
 }
 const conversations = new Map<string, CodexConversation>();
-/* ── Helpers ───────────────────────────────────────────────────────────── */
+/** One-shot queries in their init window (token refresh + spawn + initialize +
+ *  thread/start) — not yet in `conversations`, so anyConversationBusy() can't see
+ *  them. Without this, a queued self-update / backend restart can fire mid-launch
+ *  and kill the just-spawning query. Mirrors claude's activeQueries registration. */
+const inFlightOneShots = new Set<string>();
+/* ── Event emission helpers ────────────────────────────────────────────── */
+function flushTokens(conv: CodexConversation): void {
+  if (conv.tokenFlushTimer) {
+    clearTimeout(conv.tokenFlushTimer);
+    conv.tokenFlushTimer = null;
+  }
+  if (conv.tokenBuf) {
+    const token = conv.tokenBuf;
+    conv.tokenBuf = '';
+    conv.onMessage('bot:token', { conversationId: conv.id, token });
+  }
+}
+/** Drop buffered tokens WITHOUT emitting. Teardown must discard, not flush:
+ *  on user-stop the frontend has already cleared its stream state, and a stray
+ *  trailing bot:token would re-open streaming with no bot:idle ever coming —
+ *  then corrupt the next turn's committedTextLength accounting. */
+function discardTokens(conv: CodexConversation): void {
+  if (conv.tokenFlushTimer) {
+    clearTimeout(conv.tokenFlushTimer);
+    conv.tokenFlushTimer = null;
+  }
+  conv.tokenBuf = '';
+}
+function emitToken(conv: CodexConversation, token: string): void {
+  conv.tokenBuf += token;
+  if (!conv.tokenFlushTimer) {
+    conv.tokenFlushTimer = setTimeout(() => {
+      conv.tokenFlushTimer = null;
+      flushTokens(conv);
+    }, TOKEN_FLUSH_MS);
+  }
+}
+/** Emit any non-token event, flushing buffered tokens first so ordering is
+ *  preserved (bot:tool commits the stream bubble; bot:response must equal the
+ *  streamed text the frontend already saw). */
+function emitEvent(conv: CodexConversation, type: string, data: any): void {
+  flushTokens(conv);
+  conv.onMessage(type, data);
+}
+/** Kinds we can branch on from CodexErrorInfo (string variant or `{ kind: {...} }`). */
+function errorInfoKind(info: any): string | undefined {
+  if (typeof info === 'string') return info;
+  if (info && typeof info === 'object') return Object.keys(info)[0];
+  return undefined;
+}
+/**
+ * Map codex's structured error onto a message that gives BLOBY's remedy.
+ * The raw upstream text actively misleads ("Please log out and sign in again"
+ * points at the codex CLI, not the dashboard re-auth flow).
+ */
+function humanizeCodexError(message?: string, info?: any, additionalDetails?: string | null): string {
+  const raw = message || 'Codex error';
+  switch (errorInfoKind(info)) {
+    case 'unauthorized':
+      return 'Codex session expired or unauthorized. Re-authenticate from the dashboard.';
+    case 'usageLimitExceeded':
+      return `Codex usage limit reached — ${raw}`;
+    case 'contextWindowExceeded':
+      return 'The conversation outgrew the model context window. Send your message again to continue in a fresh session.';
+    case 'serverOverloaded':
+      return 'OpenAI servers are overloaded right now — try again in a moment.';
+    case 'httpConnectionFailed':
+    case 'responseStreamConnectionFailed':
+    case 'responseStreamDisconnected': {
+      const status = (Object.values(info || {})[0] as any)?.httpStatusCode;
+      return `Connection to OpenAI failed${status ? ` (HTTP ${status})` : ''} — try again in a moment.`;
+    }
+    default:
+      return additionalDetails ? `${raw}\n${additionalDetails}` : raw;
+  }
+}
+/** Emit bot:error exactly once per turn (see errorEmitted). */
+function emitError(conv: CodexConversation, message?: string, info?: any, additionalDetails?: string | null): void {
+  const friendly = humanizeCodexError(message, info, additionalDetails);
+  if (conv.errorEmitted) {
+    log.info(`[codex] suppressing duplicate bot:error for conv=${conv.id}: ${friendly.slice(0, 120)}`);
+    return;
+  }
+  conv.errorEmitted = true;
+  emitEvent(conv, 'bot:error', { conversationId: conv.id, error: friendly });
+}
+/** Emit bot:done exactly once per one-shot (see doneEmitted). */
+function emitDone(conv: CodexConversation): void {
+  if (conv.doneEmitted) return;
+  conv.doneEmitted = true;
+  emitEvent(conv, 'bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
+}
+/* ── Input building ────────────────────────────────────────────────────── */
+/** mediaTypes whose content we inline into the prompt — codex has no document
+ *  input type (verified against 0.138 UserInput), so this is the closest we get
+ *  to claude's native base64 document ingestion for text-like files. */
+const INLINE_TEXT_MEDIA = /^(text\/|application\/(json|xml|yaml|x-yaml|toml|csv|javascript|typescript))/;
+const INLINE_TEXT_MAX_BYTES = 48_000;
+const INLINE_TEXT_TOTAL_BUDGET = 96_000;
-function buildUserInput(text: string, savedFiles?: SavedFile[]): Array<Record<string, any>> {
+function buildUserInput(text: string, savedFiles?: SavedFile[], attachments?: AgentAttachment[]): Array<Record<string, any>> {
   const input: Array<Record<string, any>> = [];
   let promptText = text || '(attached files)';
@@ -387,19 +570,59 @@ function buildUserInput(text: string, savedFiles?: SavedFile[]): Array<Record<st
     const lines = savedFiles.map((f) => `- ${f.name} -> ${f.relPath}`);
     promptText += `\n\n[Attached files saved to disk]\n${lines.join('\n')}\nYou can read or reference these files using the paths above (relative to your cwd).`;
   }
+  // Inline text-like attachments (size-capped) so the model can answer about
+  // their contents immediately instead of shelling out to read them from disk.
+  if (attachments?.length) {
+    let budget = INLINE_TEXT_TOTAL_BUDGET;
+    for (const att of attachments) {
+      if (att.type !== 'file' || !INLINE_TEXT_MEDIA.test(att.mediaType || '')) continue;
+      const approxBytes = Math.floor((att.data?.length || 0) * 0.75);
+      if (approxBytes === 0 || approxBytes > INLINE_TEXT_MAX_BYTES || approxBytes > budget) continue;
+      try {
+        const decoded = Buffer.from(att.data, 'base64').toString('utf-8');
+        budget -= approxBytes;
+        promptText += `\n\n[Attached file content: ${att.name}]\n\`\`\`\n${decoded}\n\`\`\``;
+      } catch {}
+    }
+  }
   input.push({ type: 'text', text: promptText });
   // Codex understands `localImage` (path on disk) — Bloby's file-saver already
   // wrote attachments to disk, so we just point at the absolute path.
+  const savedImageCounts = new Map<string, number>();
   if (savedFiles?.length) {
     for (const f of savedFiles) {
-      if (f.type === 'image') input.push({ type: 'localImage', path: f.absPath });
+      if (f.type === 'image') {
+        input.push({ type: 'localImage', path: f.absPath });
+        savedImageCounts.set(f.name, (savedImageCounts.get(f.name) || 0) + 1);
+      }
+    }
+  }
+  // Fallback for image attachments that never made it to disk (file-saver
+  // failure): inline as a data URL so the agent still SEES the image — claude
+  // always gets the base64 inline, codex shouldn't silently go blind.
+  // Match by COUNT, not name presence: WhatsApp multi-image pushes share one
+  // attachment name, and each saved file covers exactly one of them.
+  if (attachments?.length) {
+    for (const att of attachments) {
+      if (att.type !== 'image' || !att.data) continue;
+      const remaining = savedImageCounts.get(att.name) || 0;
+      if (remaining > 0) {
+        savedImageCounts.set(att.name, remaining - 1);
+        continue;
+      }
+      input.push({ type: 'image', url: `data:${att.mediaType};base64,${att.data}` });
     }
   }
   return input;
 }
+/* ── Turn lifecycle ────────────────────────────────────────────────────── */
 function clearTurnWatchdog(conv: CodexConversation): void {
   if (conv.turnWatchdog) {
     clearTimeout(conv.turnWatchdog);
@@ -407,6 +630,13 @@ function clearTurnWatchdog(conv: CodexConversation): void {
   }
 }
+function clearHardTurnTimer(conv: CodexConversation): void {
+  if (conv.hardTurnTimer) {
+    clearTimeout(conv.hardTurnTimer);
+    conv.hardTurnTimer = null;
+  }
+}
 /**
  * Arm the per-turn watchdog. On fire, unstick the conversation the same way a
  * real `turn/completed` would (so the dashboard, `anyConversationBusy`, and the
@@ -420,11 +650,15 @@ function armTurnWatchdog(conv: CodexConversation): void {
     log.warn(`[codex] turn watchdog fired (${TURN_WATCHDOG_MS}ms) — conv=${conv.id}; unsticking + tearing down`);
     conv.busy = false;
     conv.currentTurnId = null;
-    conv.onMessage('bot:error', { conversationId: conv.id, error: 'Codex turn timed out — no response from app-server.' });
+    // Prefer a stashed non-retry error: if the server wedged right after
+    // emitting it (the turn/completed that normally surfaces it never came),
+    // the stash carries the real cause + the M4 remedy mapping.
+    const stash = conv.stashedError;
+    emitError(conv, stash?.message || 'Codex turn timed out — no response from app-server.', stash?.info);
     if (conv.oneShot) {
-      conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
+      emitDone(conv);
     } else {
-      conv.onMessage('bot:turn-complete', {
+      emitEvent(conv, 'bot:turn-complete', {
         conversationId: conv.id,
         usedFileTools: conv.usedFileTools,
         contextTokens: conv.lastContextTokens || 0,
@@ -436,60 +670,98 @@ function armTurnWatchdog(conv: CodexConversation): void {
   }, TURN_WATCHDOG_MS);
 }
-async function startTurn(conv: CodexConversation, content: string, savedFiles?: SavedFile[]): Promise<void> {
-  const input = buildUserInput(content, savedFiles);
-  conv.busy = true;
+/** Hard wall-clock cap for one-shot turns — see ONE_SHOT_MAX_TURN_MS. */
+function armHardTurnTimer(conv: CodexConversation): void {
+  if (!conv.oneShot || conv.hardTurnTimer) return;
+  conv.hardTurnTimer = setTimeout(() => {
+    conv.hardTurnTimer = null;
+    log.warn(`[codex] one-shot hard cap fired (${ONE_SHOT_MAX_TURN_MS}ms) — conv=${conv.id}; interrupting`);
+    if (conv.currentTurnId) {
+      void conv.rpc.request('turn/interrupt', { threadId: conv.threadId, turnId: conv.currentTurnId }).catch(() => {});
+    }
+    conv.busy = false;
+    conv.currentTurnId = null;
+    emitError(conv, `Codex query hit the ${Math.round(ONE_SHOT_MAX_TURN_MS / 60_000)}-minute limit and was stopped.`);
+    teardownConversation(conv.id); // emits the guaranteed bot:done
+  }, ONE_SHOT_MAX_TURN_MS);
+}
+function resetTurnState(conv: CodexConversation): void {
   conv.fullText = '';
+  conv.currentMsgItemId = null;
+  conv.itemTexts = new Map();
   conv.usedFileTools = false;
-  conv.onMessage('bot:typing', { conversationId: conv.id });
+  conv.errorEmitted = false;
+  conv.stashedError = null;
+}
+async function startTurn(conv: CodexConversation, content: string, savedFiles?: SavedFile[], attachments?: AgentAttachment[]): Promise<void> {
+  const input = buildUserInput(content, savedFiles, attachments);
+  conv.busy = true;
+  resetTurnState(conv);
+  emitEvent(conv, 'bot:typing', { conversationId: conv.id });
   armTurnWatchdog(conv);
+  armHardTurnTimer(conv);
   try {
     const params: Record<string, any> = { threadId: conv.threadId, input };
     if (conv.effort) params.effort = conv.effort;
-    // turn/start resolves immediately with { turn }; seize the id now so a
-    // pushMessage arriving before the turn/started notification can steer
-    // instead of starting a second turn.
+    // turn/start resolves immediately with { turn }; seize the id now so an
+    // interrupt arriving before the turn/started notification still has a target.
     const res = await conv.rpc.request<{ turn?: { id?: string } }>('turn/start', params);
     if (res?.turn?.id) conv.currentTurnId = res.turn.id;
   } catch (err: any) {
     clearTurnWatchdog(conv);
     conv.busy = false;
     conv.currentTurnId = null;
-    conv.onMessage('bot:error', { conversationId: conv.id, error: `turn/start failed: ${err.message}` });
-    // turn/start produced no turn, so no turn/completed will arrive to clear the supervisor's
-    // agentQueryActive (set on bot:typing above). Left as-is, that wedges true forever:
-    // backend auto-heal is deferred indefinitely and chat is stuck showing "typing". Tear the
-    // conversation down so bot:conversation-ended fires (which, unlike bot:turn-complete, does
-    // NOT trigger a backend restart) — the next user message cold-starts a fresh thread.
-    teardownConversation(conv.id);
+    emitError(conv, `turn/start failed: ${err.message}`);
+    // turn/start produced no turn, so no turn/completed will arrive to clear the
+    // supervisor's agentQueryActive (set on bot:typing above).
+    //
+    // - One-shots and transport failures (timeout / process exit): tear down. A
+    //   60s timeout most plausibly means a hung app-server — keeping the conv
+    //   would just hang the next message too. teardown emits bot:conversation-
+    //   ended (clears agentQueryActive) and, for one-shots, the guaranteed bot:done.
+    // - A fast JSON-RPC REJECTION on a live conv means the server is alive (bad
+    //   effort value, transient thread error): keep the thread — its server-side
+    //   context (files read, compacted history) survives, matching claude's
+    //   per-turn error containment. bot:turn-complete unsticks the supervisor.
+    if (conv.oneShot || !isServerRejection(err)) {
+      teardownConversation(conv.id);
+    } else {
+      emitEvent(conv, 'bot:turn-complete', {
+        conversationId: conv.id,
+        usedFileTools: false,
+        contextTokens: conv.lastContextTokens || 0,
+        contextWindow: conv.lastContextWindow || 0,
+        idle: conv.pendingInputs.length === 0,
+      });
+      // Keep draining queued messages — each failure surfaces its own error,
+      // and the finite queue guarantees termination.
+      const next = conv.pendingInputs.shift();
+      if (next !== undefined) void startTurn(conv, next.content, next.savedFiles, next.attachments);
+    }
   }
 }
-async function steerOrQueue(conv: CodexConversation, content: string, savedFiles?: SavedFile[]): Promise<void> {
-  if (!conv.currentTurnId) {
-    // No active turn — start a fresh one.
-    await startTurn(conv, content, savedFiles);
+/**
+ * Queue-or-start: the codex side of claude's async input queue. While a turn
+ * is active, pushes are queued and drained ONE TURN PER MESSAGE on
+ * turn/completed — preserving the one-bot:response-per-push contract that
+ * channels/manager.ts's routing FIFO depends on (it enqueues exactly one
+ * routing target per push and consumes one per bot:response).
+ */
+function queueOrStart(conv: CodexConversation, content: string, savedFiles?: SavedFile[], attachments?: AgentAttachment[]): void {
+  if (conv.busy || conv.currentTurnId) {
+    conv.pendingInputs.push({ content, savedFiles, attachments });
+    // Mirror claude's pushMessage: every accepted push shows typing immediately.
+    emitEvent(conv, 'bot:typing', { conversationId: conv.id });
     return;
   }
-  // Active turn — inject mid-flight.
-  const input = buildUserInput(content, savedFiles);
-  try {
-    const res = await conv.rpc.request<{ turnId?: string }>('turn/steer', {
-      threadId: conv.threadId,
-      expectedTurnId: conv.currentTurnId,
-      input,
-    });
-    if (res?.turnId) conv.currentTurnId = res.turnId;
-    conv.onMessage('bot:typing', { conversationId: conv.id });
-  } catch (err: any) {
-    // expectedTurnId mismatch most likely means the turn just finished —
-    // retry as a fresh turn.
-    log.warn(`[codex] turn/steer failed (${err.message}); falling back to turn/start`);
-    if (!conv.currentTurnId) await startTurn(conv, content, savedFiles);
-    else conv.pendingInputs.push(content);
-  }
+  void startTurn(conv, content, savedFiles, attachments);
 }
+/* ── Notification handling ─────────────────────────────────────────────── */
 function handleNotification(conv: CodexConversation, n: { method: string; params?: any }): void {
   const p = n.params || {};
   // Any notification for this conv proves the app-server is alive and working —
@@ -498,9 +770,7 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
   switch (n.method) {
     case 'turn/started': {
       conv.currentTurnId = p.turn?.id || null;
-      conv.fullText = '';
-      conv.currentMsgItemId = null;
-      conv.usedFileTools = false;
+      resetTurnState(conv);
       break;
     }
@@ -511,52 +781,72 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
       // On a new itemId, insert a paragraph break so they don't run together (mirrors claude.ts).
       if (p.itemId && conv.currentMsgItemId && p.itemId !== conv.currentMsgItemId && conv.fullText && !conv.fullText.endsWith('\n')) {
         conv.fullText += '\n\n';
-        conv.onMessage('bot:token', { conversationId: conv.id, token: '\n\n' });
+        emitToken(conv, '\n\n');
+      }
+      if (p.itemId) {
+        conv.currentMsgItemId = p.itemId;
+        conv.itemTexts.set(p.itemId, (conv.itemTexts.get(p.itemId) || '') + delta);
       }
-      if (p.itemId) conv.currentMsgItemId = p.itemId;
       conv.fullText += delta;
-      conv.onMessage('bot:token', { conversationId: conv.id, token: delta });
+      emitToken(conv, delta);
       break;
     }
     case 'item/started': {
       const item = p.item || {};
-      // Surface tool-like items so the dashboard can show activity.
+      // Surface tool-like items so the dashboard can show activity. Names use
+      // claude's tool vocabulary (Bash/Edit/WebSearch/mcp__server__tool) so one
+      // UI label map serves both harnesses.
       switch (item.type) {
         case 'commandExecution':
-          conv.onMessage('bot:tool', {
+          emitEvent(conv, 'bot:tool', {
             conversationId: conv.id,
-            name: 'shell',
+            name: 'Bash',
             input: { command: item.command || item.commandLine || '' },
           });
           break;
         case 'mcpToolCall':
           // ThreadItem.mcpToolCall fields are `server` + `tool` (no toolName/name/input).
-          conv.onMessage('bot:tool', {
+          emitEvent(conv, 'bot:tool', {
             conversationId: conv.id,
-            name: item.tool ? (item.server ? `${item.server}/${item.tool}` : item.tool) : 'mcp_tool',
+            name: item.tool ? (item.server ? `mcp__${item.server}__${item.tool}` : item.tool) : 'mcp_tool',
             input: item.arguments || {},
           });
           break;
         case 'fileChange':
           conv.usedFileTools = true;
-          conv.onMessage('bot:tool', {
+          emitEvent(conv, 'bot:tool', {
             conversationId: conv.id,
-            name: 'file_change',
+            name: 'Edit',
             input: { changes: (item.changes || []).map((c: any) => c.path).filter(Boolean) },
           });
           break;
         case 'webSearch':
-          conv.onMessage('bot:tool', {
+          emitEvent(conv, 'bot:tool', {
             conversationId: conv.id,
-            name: 'web_search',
+            name: 'WebSearch',
             input: { query: item.query || '' },
           });
           break;
+        case 'reasoning':
+          // Activity signal during the (often long) thinking phase — without it
+          // the chat shows bare typing dots with zero events for the entire
+          // stretch and turns feel hung. The chat hooks dedup repeated
+          // same-name running tools, and channel chunk-flushes no-op on an
+          // empty buffer, so this is purely additive. Reasoning TEXT is never
+          // emitted as bot:token — that would corrupt the bot:response ===
+          // streamed-text contract and leak reasoning to channels.
+          emitEvent(conv, 'bot:tool', {
+            conversationId: conv.id,
+            name: 'thinking',
+            status: 'running',
+          });
+          break;
         case 'collabAgentToolCall':
-          // Codex's collaborating sub-agents (rarely enabled) → Bloby's sub-agent UX.
+          // Codex's collaborating sub-agents (experimental, not enabled by our
+          // initialize) → Bloby's sub-agent UX, if it ever lights up.
           if (item.tool === 'spawnAgent') {
-            conv.onMessage('bot:task-created', {
+            emitEvent(conv, 'bot:task-created', {
               conversationId: conv.id,
               taskId: item.id,
               description: item.prompt || 'sub-agent',
@@ -564,24 +854,70 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
             });
           }
           break;
-        // userMessage / agentMessage / reasoning — no tool-style event.
+        // userMessage / agentMessage — no tool-style event.
       }
       break;
     }
+    case 'turn/plan/updated': {
+      // Codex's native planning (its TodoWrite equivalent). Emit it in claude's
+      // TodoWrite shape so plans drive the same activity label + bubble/chunk
+      // boundaries — without this codex feels opaque exactly on the big agentic
+      // asks where claude feels alive. Statuses are camelCase in 0.138
+      // (pending | inProgress | completed) — normalize to claude's snake_case.
+      const steps = Array.isArray(p.plan) ? p.plan : [];
+      emitEvent(conv, 'bot:tool', {
+        conversationId: conv.id,
+        name: 'TodoWrite',
+        input: {
+          todos: steps.map((s: any) => ({
+            content: s.step || '',
+            status: s.status === 'inProgress' ? 'in_progress' : (s.status || 'pending'),
+          })),
+          ...(p.explanation ? { explanation: p.explanation } : {}),
+        },
+      });
+      break;
+    }
     case 'item/completed': {
       const item = p.item || {};
       if (item.type === 'fileChange') conv.usedFileTools = true;
-      // If a final agentMessage arrives without preceding deltas (rare), grab it now.
-      if (item.type === 'agentMessage' && !conv.fullText) {
-        const text = (item.content || []).map((c: any) => c.text || '').join('') || item.text || '';
-        if (text) {
-          conv.fullText = text;
-          conv.onMessage('bot:token', { conversationId: conv.id, token: text });
+      // item/completed carries the AUTHORITATIVE final text for an agentMessage.
+      // Reconcile per-itemId: an item that never streamed deltas (or whose final
+      // text extends beyond the concatenated deltas) gets the remainder emitted
+      // here — previously a turn-level `!conv.fullText` gate silently dropped any
+      // non-delta'd message once ANYTHING had streamed.
+      if (item.type === 'agentMessage') {
+        const finalText: string = (typeof item.text === 'string' && item.text)
+          ? item.text
+          : ((item.content || []).map((c: any) => c.text || '').join(''));
+        if (finalText) {
+          const itemId: string | undefined = item.id;
+          const streamed = itemId ? (conv.itemTexts.get(itemId) || '') : conv.fullText;
+          let remainder = '';
+          if (!streamed) {
+            remainder = finalText;
+          } else if (finalText.length > streamed.length && finalText.startsWith(streamed)) {
+            remainder = finalText.slice(streamed.length);
+          }
+          if (remainder) {
+            // Same paragraph-break rule as the delta path for a fresh item.
+            if (!streamed && conv.fullText && !conv.fullText.endsWith('\n')) {
+              conv.fullText += '\n\n';
+              emitToken(conv, '\n\n');
+            }
+            conv.fullText += remainder;
+            emitToken(conv, remainder);
+            if (itemId) {
+              conv.itemTexts.set(itemId, finalText);
+              conv.currentMsgItemId = itemId;
+            }
+          }
         }
       }
       if (item.type === 'collabAgentToolCall' && item.tool === 'spawnAgent') {
-        conv.onMessage('bot:task-done', {
+        emitEvent(conv, 'bot:task-done', {
           conversationId: conv.id,
           taskId: item.id,
           status: item.status,
@@ -593,11 +929,14 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
     case 'thread/tokenUsage/updated': {
       // Codex's only token-usage signal. ThreadTokenUsage = { total, last, modelContextWindow };
-      // `last` is the current prompt occupancy (mirrors Claude's input+cacheRead+cacheCreation),
-      // the right basis for the recycle compare in supervisor/index.ts (fraction*window, not lifetime).
+      // `last.inputTokens` is the current prompt occupancy — the right basis for
+      // the recycle compare in supervisor/index.ts (fraction*window, not lifetime).
+      // NB: codex's cachedInputTokens is a SUBSET of inputTokens (unlike
+      // Anthropic's disjoint cache counters) — summing them double-counts the
+      // cache and recycles sessions at ~35% real occupancy.
       const tu = p.tokenUsage || {};
       const last = tu.last || {};
-      conv.lastContextTokens = (last.inputTokens || 0) + (last.cachedInputTokens || 0);
+      conv.lastContextTokens = last.inputTokens || 0;
       if (typeof tu.modelContextWindow === 'number' && tu.modelContextWindow > 0) {
         conv.lastContextWindow = tu.modelContextWindow;
       }
@@ -609,29 +948,41 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
       const turnError = p.turn?.error;
       clearTurnWatchdog(conv);
+      clearHardTurnTimer(conv);
       conv.currentTurnId = null;
       conv.busy = false;
+      let failureKind: string | undefined;
       if (status === 'failed') {
-        conv.onMessage('bot:error', {
-          conversationId: conv.id,
-          error: turnError?.message || 'Codex turn failed.',
-        });
+        // Preserve streamed partials exactly like claude: if text already reached
+        // the user, commit it as the reply (the frontend's bot:error handler
+        // ERASES the uncommitted stream buffer); surface bot:error only when the
+        // turn died before producing anything.
+        const message = turnError?.message || conv.stashedError?.message || 'Codex turn failed.';
+        const info = turnError?.codexErrorInfo ?? conv.stashedError?.info;
+        failureKind = errorInfoKind(info);
+        if (conv.fullText) {
+          log.warn(`[codex] turn failed after streaming ${conv.fullText.length} chars (${message.slice(0, 160)}) — preserving partial as the reply`);
+          emitEvent(conv, 'bot:response', { conversationId: conv.id, content: conv.fullText });
+        } else {
+          emitError(conv, message, info, turnError?.additionalDetails);
+        }
       } else if (status === 'interrupted') {
         // Interrupted turns carry no final answer — stay silent.
       } else if (conv.fullText) {
-        conv.onMessage('bot:response', { conversationId: conv.id, content: conv.fullText });
+        emitEvent(conv, 'bot:response', { conversationId: conv.id, content: conv.fullText });
       }
+      conv.stashedError = null;
       if (conv.oneShot) {
-        conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
+        emitDone(conv);
         teardownConversation(conv.id);
       } else {
         // Context-size signal for the orchestrator's proactive session recycling,
         // sourced from the cached `thread/tokenUsage/updated` values above. 0 if codex
         // never sent one this turn → falls back to codex's own in-thread auto-compaction.
         const idle = conv.pendingInputs.length === 0;
-        conv.onMessage('bot:turn-complete', {
+        emitEvent(conv, 'bot:turn-complete', {
           conversationId: conv.id,
           usedFileTools: conv.usedFileTools,
           contextTokens: conv.lastContextTokens || 0,
@@ -639,9 +990,30 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
           idle,
         });
-        // Drain any messages that were submitted while we were busy.
+        // An unauthorized/context-wall thread can't run further turns — tear it
+        // down so the NEXT message pre-flights cleanly (friendly dashboard
+        // message / fresh thread) instead of repeating the same failure forever.
+        if (failureKind === 'unauthorized' || failureKind === 'contextWindowExceeded') {
+          // Every queued message got bot:typing on push — give each a terminal
+          // event too, or it vanishes without feedback. (Direct emit: the
+          // per-turn errorEmitted dedup must not swallow these.)
+          const reason = failureKind === 'unauthorized' ? 'authentication expired' : 'context window full';
+          for (const dropped of conv.pendingInputs.splice(0)) {
+            const snippet = dropped.content.slice(0, 60) + (dropped.content.length > 60 ? '…' : '');
+            emitEvent(conv, 'bot:error', {
+              conversationId: conv.id,
+              error: `Codex session ended (${reason}) before your message "${snippet}" could run — please resend it.`,
+            });
+          }
+          log.warn(`[codex] tearing down conv=${conv.id} after ${failureKind} so the next message starts clean`);
+          teardownConversation(conv.id);
+          break;
+        }
+        // Drain any messages that were queued while we were busy — each gets
+        // its own turn (and thus its own bot:response).
         const next = conv.pendingInputs.shift();
-        if (next !== undefined) void startTurn(conv, next);
+        if (next !== undefined) void startTurn(conv, next.content, next.savedFiles, next.attachments);
       }
       break;
     }
@@ -653,8 +1025,19 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
         log.info(`[codex] transient error (will retry): ${p.error?.message || 'unknown'}`);
         break;
       }
-      const errMsg = p.error?.message || 'Codex error notification';
-      conv.onMessage('bot:error', { conversationId: conv.id, error: errMsg });
+      // Hard failures emit BOTH this notification AND turn/completed{failed}
+      // with the same TurnError (live-verified on 0.138.0). While a turn is
+      // active, stash it and let turn/completed be the single surface — its
+      // TurnError is authoritative per the protocol. Outside a turn there is
+      // no turn/completed coming, so emit directly.
+      const message = p.error?.message || 'Codex error notification';
+      const info = p.error?.codexErrorInfo;
+      if (conv.busy) {
+        log.info(`[codex] stashing mid-turn error for turn/completed: ${message.slice(0, 160)}`);
+        conv.stashedError = { message, info };
+      } else {
+        emitError(conv, message, info, p.error?.additionalDetails);
+      }
       break;
     }
@@ -672,13 +1055,110 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
   }
 }
+/* ── Conversation teardown ─────────────────────────────────────────────── */
 function teardownConversation(conversationId: string): void {
   const conv = conversations.get(conversationId);
   if (!conv) return;
   clearTurnWatchdog(conv);
+  clearHardTurnTimer(conv);
   conversations.delete(conversationId);
+  discardTokens(conv);
   try { conv.rpc.close(); } catch {}
+  // bot:done guarantee: slot-freeing consumers (WhatsApp activeAgents, the
+  // scheduler) resolve ONLY on bot:done — claude guarantees it in a finally,
+  // so every codex terminal path (stop, settings:save teardown, init failure,
+  // crash) must too, or each failed customer message pins a slot until restart.
+  if (conv.oneShot) emitDone(conv);
   conv.onMessage('bot:conversation-ended', { conversationId });
+  // Re-warm for the next live conversation (mirrors claude's finally).
+  if (!conv.oneShot) void warmUpForLiveConversation(conv.model, conv.names);
+}
+/* ── Spawn + initialize (with pre-warm pool) ───────────────────────────── */
+/**
+ * Pre-warmed app-server: spawned + initialize handshake done. `thread/start`
+ * is deliberately NOT issued at warm time — it's a single fast local RPC and
+ * deferring it means the warm process is claimable for ANY model/instructions
+ * (claude's warmer must bake the full options in and misses whenever
+ * recentMessages differ; this design sidesteps that). Keyed on the MCP spawn
+ * flags, the only thing fixed at spawn time.
+ */
+interface CodexWarmEntry {
+  key: string;
+  rpc: CodexRpc;
+}
+let codexWarm: CodexWarmEntry | null = null;
+let codexWarmInflight: Promise<void> | null = null;
+function warmKeyFor(mcpArgs: string[]): string {
+  return crypto.createHash('sha256').update(JSON.stringify(mcpArgs)).digest('hex');
+}
+function claimWarmRpc(key: string): CodexRpc | null {
+  if (!codexWarm) return null;
+  if (codexWarm.key !== key) {
+    // MCP config changed since warm time — the flags are baked into the spawn.
+    try { codexWarm.rpc.close(); } catch {}
+    codexWarm = null;
+    return null;
+  }
+  const rpc = codexWarm.rpc;
+  codexWarm = null;
+  log.info('[codex] claimed pre-warmed app-server');
+  return rpc;
+}
+function discardCodexWarmup(): void {
+  if (codexWarm) {
+    try { codexWarm.rpc.close(); } catch {}
+    codexWarm = null;
+  }
+}
+export async function warmUpForLiveConversation(
+  _model: string,
+  _names?: { botName: string; humanName: string },
+): Promise<void> {
+  if (codexWarmInflight) return codexWarmInflight;
+  const mcpArgs = buildMcpConfigArgs();
+  const key = warmKeyFor(mcpArgs);
+  if (codexWarm?.key === key) return;
+  codexWarmInflight = (async () => {
+    try {
+      const token = await getCodexAccessToken();
+      if (!token) return; // not authed — nothing to warm
+      if (codexWarm && codexWarm.key !== key) discardCodexWarmup();
+      const rpc = new CodexRpc();
+      rpc.start(mcpArgs);
+      rpc.onClose(() => {
+        // Warm process died on its own — drop it from the cache so a claim
+        // never hands out a dead rpc.
+        if (codexWarm?.rpc === rpc) codexWarm = null;
+      });
+      await rpc.request('initialize', { clientInfo: CLIENT_INFO });
+      rpc.notify('initialized', {});
+      codexWarm = { key, rpc };
+      log.ok('[codex] app-server pre-warmed');
+    } catch (err: any) {
+      log.warn(`[codex] pre-warm skipped: ${err?.message || err}`);
+    } finally {
+      codexWarmInflight = null;
+    }
+  })();
+  return codexWarmInflight;
+}
+interface SpawnOpts {
+  oneShot: boolean;
+  /** Customer-facing runs (supportPrompt personas) get NO workspace skills —
+   *  mirrors claude's `skills: supportPrompt ? [] : ...` gate so internal ops
+   *  skills can't leak into the customer context. */
+  wantSkills: boolean;
+  names?: { botName: string; humanName: string };
 }
 async function spawnAndInitialize(
@@ -686,7 +1166,7 @@ async function spawnAndInitialize(
   model: string,
   onMessage: OnAgentMessage,
   instructions: string,
-  oneShot: boolean,
+  opts: SpawnOpts,
 ): Promise<CodexConversation | null> {
   // Pre-flight: confirm we have valid OAuth tokens before spending time spawning.
   const token = await getCodexAccessToken();
@@ -698,85 +1178,202 @@ async function spawnAndInitialize(
     return null;
   }
-  const { id: modelId, effort } = parseModelString(model);
-  const rpc = new CodexRpc();
-  rpc.start(buildMcpConfigArgs());
+  const { id: parsedModelId, effort: parsedEffort } = parseModelString(model);
+  const mcpArgs = buildMcpConfigArgs();
+  // One attempt with the pre-warmed process (live convs only — one-shots churn
+  // too fast to be worth re-warming for), falling back to a cold spawn if the
+  // warm one fails its thread/start.
+  const attempts: Array<'warm' | 'cold'> = [];
+  if (!opts.oneShot && codexWarm) attempts.push('warm');
+  attempts.push('cold');
+  for (const attempt of attempts) {
+    let rpc: CodexRpc;
+    if (attempt === 'warm') {
+      const claimed = claimWarmRpc(warmKeyFor(mcpArgs));
+      if (!claimed) continue;
+      rpc = claimed;
+    } else {
+      rpc = new CodexRpc();
+      rpc.start(mcpArgs);
+    }
-  const conv: CodexConversation = {
-    id: conversationId,
-    rpc,
-    threadId: '',
-    effort,
-    onMessage,
-    currentTurnId: null,
-    currentMsgItemId: null,
-    fullText: '',
-    usedFileTools: false,
-    pendingInputs: [],
-    busy: false,
-    oneShot,
-    lastContextTokens: 0,
-    lastContextWindow: 0,
-    turnWatchdog: null,
-  };
+    const conv: CodexConversation = {
+      id: conversationId,
+      rpc,
+      threadId: '',
+      effort: parsedEffort,
+      model,
+      names: opts.names,
+      onMessage,
+      currentTurnId: null,
+      currentMsgItemId: null,
+      fullText: '',
+      itemTexts: new Map(),
+      usedFileTools: false,
+      pendingInputs: [],
+      busy: false,
+      oneShot: opts.oneShot,
+      errorEmitted: false,
+      doneEmitted: false,
+      stashedError: null,
+      lastContextTokens: 0,
+      lastContextWindow: 0,
+      turnWatchdog: null,
+      hardTurnTimer: null,
+      tokenBuf: '',
+      tokenFlushTimer: null,
+    };
-  rpc.onNotification((n) => handleNotification(conv, n));
-  rpc.onClose(() => {
-    if (conversations.get(conversationId) === conv) {
+    rpc.onNotification((n) => handleNotification(conv, n));
+    rpc.onClose((code, errMsg) => {
+      if (conversations.get(conversationId) !== conv) return;
+      // App-server died out from under a live conversation. Surface it NOW —
+      // without this the user stares at frozen typing dots until the leaked
+      // watchdog ghost-fires 5 minutes later with a misleading "timed out"
+      // (and that orphaned watchdog could tear down a successor conversation
+      // started under the same convId in the meantime).
+      clearTurnWatchdog(conv);
+      clearHardTurnTimer(conv);
       conversations.delete(conversationId);
+      flushTokens(conv);
+      if (conv.busy) {
+        conv.busy = false;
+        conv.currentTurnId = null;
+        emitError(conv, errMsg || `codex app-server exited unexpectedly (code=${code}).`);
+        if (!conv.oneShot) {
+          emitEvent(conv, 'bot:turn-complete', {
+            conversationId: conv.id,
+            usedFileTools: conv.usedFileTools,
+            contextTokens: conv.lastContextTokens || 0,
+            contextWindow: conv.lastContextWindow || 0,
+            idle: true,
+          });
+        }
+      }
+      if (conv.oneShot) emitDone(conv);
       onMessage('bot:conversation-ended', { conversationId });
+      if (!conv.oneShot) void warmUpForLiveConversation(conv.model, conv.names);
+    });
+    try {
+      log.info(`[codex] init conversation ${conversationId} (model=${parsedModelId}${parsedEffort ? `, effort=${parsedEffort}` : ''}, ${attempt})`);
+      if (attempt === 'cold') {
+        await rpc.request('initialize', { clientInfo: CLIENT_INFO });
+        rpc.notify('initialized', {});
+      }
+      // Validate/repair the configured model + effort against the live catalog —
+      // a stale model id or retired effort tier otherwise fails the first message
+      // with a cryptic teardown (codex has no config-level model migrations).
+      const { modelId, effort } = await validateModelSelection(rpc, parsedModelId, parsedEffort);
+      conv.effort = effort;
+      // Context auto-compaction is ON by default in the codex app-server: when the
+      // thread's token count crosses the model's threshold it compacts history in
+      // place (emitting a `contextCompaction` item) and continues — no flag needed
+      // here. A manual trigger also exists (`thread/compact/start`) if we ever want
+      // to force it from the UI.
+      const startResult = await rpc.request<{ thread: { id: string } }>('thread/start', {
+        cwd: WORKSPACE_DIR,
+        model: modelId,
+        // Bloby's persona/workflow prompt rides developerInstructions (ADDITIVE),
+        // NOT baseInstructions. baseInstructions fully OVERRIDES codex's native base
+        // prompt — which carries the apply_patch FREEFORM spec + shell protocol the
+        // model needs to edit files. Leaving baseInstructions unset keeps that native
+        // scaffolding; developerInstructions layers Bloby's persona on top of it.
+        developerInstructions: instructions,
+        personality: 'pragmatic',
+        // Bloby's posture matches Claude's bypassPermissions — the bot is
+        // running on the user's own machine with their full consent. Skip the
+        // approval prompts and give it write access to the workspace + beyond.
+        approvalPolicy: 'never',
+        sandbox: 'danger-full-access',
+      });
+      conv.threadId = startResult.thread.id;
+      conversations.set(conversationId, conv);
+      log.ok(`[codex] thread started ${conv.threadId}`);
+      // Prime codex's per-thread skill cache with the workspace skills
+      // directory. Without this, codex only sees its system-scope skills and
+      // never discovers anything Bloby ships in `workspace/skills/*`. Fire and
+      // forget — failure here just means workspace skills won't be auto-routable
+      // for this thread, but the agent can still read SKILL.md files directly.
+      // Customer-facing personas skip it entirely (see SpawnOpts.wantSkills).
+      if (opts.wantSkills) primeWorkspaceSkills(rpc);
+      return conv;
+    } catch (err: any) {
+      rpc.close();
+      if (attempt === 'warm') {
+        log.warn(`[codex] warm claim failed (${err.message}) — retrying with a cold spawn`);
+        continue;
+      }
+      onMessage('bot:error', { conversationId, error: `Failed to initialize Codex: ${err.message}` });
+      return null;
     }
-  });
+  }
+  return null; // unreachable — 'cold' attempt always returns/errors above
+}
-  try {
-    log.info(`[codex] init conversation ${conversationId} (model=${modelId}${effort ? `, effort=${effort}` : ''})`);
-    await rpc.request('initialize', { clientInfo: CLIENT_INFO });
-    rpc.notify('initialized', {});
-    // Context auto-compaction is ON by default in the codex app-server: when the
-    // thread's token count crosses the model's threshold it compacts history in
-    // place (emitting a `contextCompaction` item) and continues — no flag needed
-    // here. A manual trigger also exists (`thread/compact/start`) if we ever want
-    // to force it from the UI.
-    const startResult = await rpc.request<{ thread: { id: string } }>('thread/start', {
-      cwd: WORKSPACE_DIR,
-      model: modelId,
-      // Bloby's persona/workflow prompt rides developerInstructions (ADDITIVE),
-      // NOT baseInstructions. baseInstructions fully OVERRIDES codex's native base
-      // prompt — which carries the apply_patch FREEFORM spec + shell protocol the
-      // model needs to edit files. Leaving baseInstructions unset keeps that native
-      // scaffolding; developerInstructions layers Bloby's persona on top of it.
-      developerInstructions: instructions,
-      personality: 'pragmatic',
-      // Bloby's posture matches Claude's bypassPermissions — the bot is
-      // running on the user's own machine with their full consent. Skip the
-      // approval prompts and give it write access to the workspace + beyond.
-      approvalPolicy: 'never',
-      sandbox: 'danger-full-access',
-    });
-    conv.threadId = startResult.thread.id;
-    conversations.set(conversationId, conv);
-    log.ok(`[codex] thread started ${conv.threadId}`);
-    // Prime codex's per-thread skill cache with the workspace skills
-    // directory. Without this, codex only sees its system-scope skills and
-    // never discovers anything Bloby ships in `workspace/skills/*`. Fire and
-    // forget — failure here just means workspace skills won't be auto-routable
-    // for this thread, but the agent can still read SKILL.md files directly.
-    primeWorkspaceSkills(rpc);
+/* ── Model catalog validation ──────────────────────────────────────────── */
+interface ValidatedModel { modelId: string; effort?: string }
+const modelValidationCache = new Map<string, ValidatedModel>();
-    return conv;
+/**
+ * Validate the configured model id + effort against `model/list` (local
+ * catalog, one fast RPC — cached per model string for the process lifetime).
+ * Unknown model → repair to the catalog default with a logged warning instead
+ * of letting thread/start or turn/start fail with a cryptic teardown. Unknown
+ * effort → the model's defaultReasoningEffort. If model/list itself fails,
+ * proceed unvalidated (today's behavior).
+ */
+async function validateModelSelection(rpc: CodexRpc, modelId: string, effort?: string): Promise<ValidatedModel> {
+  const cacheKey = `${modelId}:${effort || ''}`;
+  const cached = modelValidationCache.get(cacheKey);
+  if (cached) return cached;
+  let result: ValidatedModel = { modelId, effort };
+  try {
+    const res = await rpc.request<{ data?: any[] }>('model/list', { includeHidden: true });
+    const catalog = Array.isArray(res?.data) ? res.data : [];
+    if (catalog.length) {
+      let entry = catalog.find((m: any) => m.id === modelId || m.model === modelId);
+      if (!entry) {
+        const fallback = catalog.find((m: any) => m.isDefault) || catalog[0];
+        log.warn(`[codex] model "${modelId}" not in catalog — falling back to "${fallback.id}"`);
+        entry = fallback;
+        result.modelId = entry.id;
+        result.effort = undefined; // stale effort may not apply to the fallback
+      }
+      const supported: string[] = (entry.supportedReasoningEfforts || [])
+        .map((o: any) => (typeof o === 'string' ? o : o?.reasoningEffort))
+        .filter(Boolean);
+      if (result.effort && supported.length && !supported.includes(result.effort)) {
+        const repaired = entry.defaultReasoningEffort || undefined;
+        log.warn(`[codex] effort "${result.effort}" not supported by ${result.modelId} (supported: ${supported.join(', ')}) — using ${repaired || 'server default'}`);
+        result.effort = repaired;
+      }
+      // Only cache results actually validated against a catalog — caching the
+      // passthrough on a failed/empty model/list would disable the auto-repair
+      // for this model string for the whole process lifetime.
+      modelValidationCache.set(cacheKey, result);
+    }
   } catch (err: any) {
-    rpc.close();
-    onMessage('bot:error', { conversationId, error: `Failed to initialize Codex: ${err.message}` });
-    return null;
+    log.warn(`[codex] model/list validation skipped: ${err.message}`);
   }
+  return result;
 }
 // Codex discovers "repo"-scope skills under `<cwd>/.codex/skills` (verified
-// against 0.135.0 — a bare `<cwd>/skills` is NOT scanned, and `skills/list`
-// has no extra-root param). Bloby keeps the canonical skills in
-// `workspace/skills/<name>`, so we mirror each one into `.codex/skills/<name>`
-// as a symlink — single source of truth, discoverable by codex's native router.
+// against 0.135.0 — a bare `<cwd>/skills` is NOT scanned). Bloby keeps the
+// canonical skills in `workspace/skills/<name>`, so we mirror each one into
+// `.codex/skills/<name>` as a symlink — single source of truth, discoverable
+// by codex's native router. 0.138 added `skills/extraRoots/set` which could
+// replace this mirror — deliberately NOT adopted yet: the mirror is
+// e2e-verified working and the swap buys no user-visible change.
 // (Each SKILL.md needs YAML frontmatter or codex rejects it — see SKILL_FORMAT_MIGRATION.md.)
 const CODEX_SKILLS_ROOT = path.join(WORKSPACE_DIR, '.codex', 'skills');
@@ -826,12 +1423,25 @@ function toToml(v: any): string {
   return JSON.stringify(String(v)); // TOML basic string — JSON escaping is compatible
 }
+/** codex's config layer requires STRING values for args items, env values and
+ *  http_headers — a numeric `"PORT": 3000` in MCP.json (fine on claude) would
+ *  otherwise kill the app-server at config load, bricking EVERY codex spawn.
+ *  Coerce so non-string JSON values degrade to their string form instead. */
+function toTomlStringArray(arr: any[]): string {
+  return `[${arr.map((v) => JSON.stringify(String(v))).join(',')}]`;
+}
+function toTomlStringMap(obj: Record<string, any>): string {
+  return `{${Object.entries(obj).map(([k, v]) => `${JSON.stringify(k)}=${JSON.stringify(String(v))}`).join(',')}}`;
+}
 /**
  * Translate MCP.json into `codex app-server -c mcp_servers.<name>.<field>=<toml>`
  * spawn flags. Codex sources MCP from its own config layer rather than a per-query
  * param (verified against 0.135.0: a `-c mcp_servers.X.command=...` override shows
- * up in both mcpServerStatus/list and config/read). Only the stdio fields Bloby
- * uses (command/args/env) are translated; names must be TOML-bare-key safe.
+ * up in both mcpServerStatus/list and config/read). Stdio entries translate
+ * command/args/env; url entries (streamable HTTP — the Claude SDK's http/sse
+ * form) translate url + headers→http_headers + bearer_token_env_var, supported
+ * natively by the 0.138 binary. Names must be TOML-bare-key safe.
  */
 function buildMcpConfigArgs(): string[] {
   const servers = loadMcpServersForCodex();
@@ -840,10 +1450,22 @@ function buildMcpConfigArgs(): string[] {
   for (const [name, cfg] of Object.entries(servers)) {
     if (!/^[A-Za-z0-9_-]+$/.test(name)) { log.warn(`[codex] skipping MCP server "${name}" — name not TOML-bare-key safe`); continue; }
     const c: any = cfg || {};
-    if (!c.command) { log.warn(`[codex] skipping MCP server "${name}" — no command`); continue; }
-    args.push('-c', `mcp_servers.${name}.command=${toToml(c.command)}`);
-    if (Array.isArray(c.args) && c.args.length) args.push('-c', `mcp_servers.${name}.args=${toToml(c.args)}`);
-    if (c.env && typeof c.env === 'object' && Object.keys(c.env).length) args.push('-c', `mcp_servers.${name}.env=${toToml(c.env)}`);
+    if (c.command) {
+      args.push('-c', `mcp_servers.${name}.command=${toToml(c.command)}`);
+      if (Array.isArray(c.args) && c.args.length) args.push('-c', `mcp_servers.${name}.args=${toTomlStringArray(c.args)}`);
+      if (c.env && typeof c.env === 'object' && Object.keys(c.env).length) args.push('-c', `mcp_servers.${name}.env=${toTomlStringMap(c.env)}`);
+    } else if (typeof c.url === 'string' && c.url) {
+      args.push('-c', `mcp_servers.${name}.url=${toToml(c.url)}`);
+      if (c.headers && typeof c.headers === 'object' && Object.keys(c.headers).length) {
+        args.push('-c', `mcp_servers.${name}.http_headers=${toTomlStringMap(c.headers)}`);
+      }
+      if (typeof c.bearer_token_env_var === 'string' && c.bearer_token_env_var) {
+        args.push('-c', `mcp_servers.${name}.bearer_token_env_var=${toToml(c.bearer_token_env_var)}`);
+      }
+    } else {
+      log.warn(`[codex] skipping MCP server "${name}" — no command or url`);
+      continue;
+    }
     wired++;
   }
   if (wired) log.info(`[codex] wiring ${wired} MCP server(s) from MCP.json via -c overrides`);
@@ -860,12 +1482,11 @@ export function isConversationBusy(conversationId: string): boolean {
   return conversations.get(conversationId)?.busy ?? false;
 }
-/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
- *  backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
-/** Codex one-shot queries (startBlobyAgentQuery) reuse the conversations map and set conv.busy via
- *  startTurn, so anyConversationBusy() already covers them — nothing extra to track here. */
+/** True while any one-shot is in its init window (token refresh + spawn +
+ *  initialize + thread/start). Once the conv registers in `conversations` and
+ *  startTurn sets busy, anyConversationBusy() takes over seamlessly. */
 export function anyOneShotActive(): boolean {
-  return false;
+  return inFlightOneShots.size > 0;
 }
 export function anyConversationBusy(): boolean {
@@ -881,15 +1502,31 @@ export async function startConversation(
   recentMessages?: RecentMessage[],
 ): Promise<boolean> {
   if (conversations.has(conversationId)) endConversation(conversationId);
+  // Typing dots NOW — the supervisor awaits this whole handshake before the
+  // first pushMessage can run, and without an early signal the user stares at
+  // a dead chat through spawn + initialize + thread/start on every session
+  // start (boot, clear-context, every proactive recycle).
+  onMessage('bot:typing', { conversationId });
   const baseInstructions = await assembleBaseInstructions(names, recentMessages);
-  const conv = await spawnAndInitialize(conversationId, model, onMessage, baseInstructions, false);
-  return !!conv;
+  const conv = await spawnAndInitialize(conversationId, model, onMessage, baseInstructions, {
+    oneShot: false,
+    wantSkills: true,
+    names,
+  });
+  if (!conv) {
+    // bot:typing above set the supervisor's agentQueryActive; bot:error alone
+    // doesn't clear it — conversation-ended does (and is safe for a conv that
+    // never registered).
+    onMessage('bot:conversation-ended', { conversationId });
+    return false;
+  }
+  return true;
 }
 export function pushMessage(
   conversationId: string,
   content: string,
-  _attachments?: AgentAttachment[],
+  attachments?: AgentAttachment[],
   savedFiles?: SavedFile[],
 ): boolean {
   const conv = conversations.get(conversationId);
@@ -897,7 +1534,7 @@ export function pushMessage(
     log.warn(`[codex] pushMessage: no live conversation ${conversationId}`);
     return false;
   }
-  void steerOrQueue(conv, content, savedFiles);
+  queueOrStart(conv, content, savedFiles, attachments);
   return true;
 }
@@ -916,18 +1553,14 @@ export function endConversation(conversationId: string): void {
 export function endAllConversations(): void {
   for (const id of Array.from(conversations.keys())) endConversation(id);
+  // The pre-warmed app-server may hold pre-re-auth state — drop it (a fresh
+  // warm-up fires from the teardown re-warm path with current credentials).
+  discardCodexWarmup();
 }
 export async function stopSubAgentTask(_conversationId: string, _taskId: string): Promise<void> {
-  // Codex doesn't expose Claude-style sub-agent tasks. No-op for now.
-}
-export async function warmUpForLiveConversation(
-  _model: string,
-  _names?: { botName: string; humanName: string },
-): Promise<void> {
-  // No subprocess pre-warming yet — `codex app-server` startup is fast enough
-  // (~hundreds of ms). Re-evaluate if it becomes noticeable on the Pi.
+  // Codex doesn't expose Claude-style sub-agent tasks (collab tools are still
+  // experimental and not enabled by our initialize). No-op.
 }
 export async function startBlobyAgentQuery(
@@ -935,24 +1568,45 @@ export async function startBlobyAgentQuery(
   prompt: string,
   model: string,
   onMessage: OnAgentMessage,
-  _attachments?: AgentAttachment[],
+  attachments?: AgentAttachment[],
   savedFiles?: SavedFile[],
   names?: { botName: string; humanName: string },
   recentMessages?: RecentMessage[],
   supportPrompt?: string,
-  _maxTurns?: number,
+  _maxTurns?: number, // no codex equivalent — bounded by ONE_SHOT_MAX_TURN_MS instead
 ): Promise<void> {
-  if (conversations.has(conversationId)) endConversation(conversationId);
-  const baseInstructions = supportPrompt
-    ? supportPrompt
-    : await assembleBaseInstructions(names, recentMessages);
-  const conv = await spawnAndInitialize(conversationId, model, onMessage, baseInstructions, true);
-  if (!conv) return;
-  await startTurn(conv, prompt, savedFiles);
+  inFlightOneShots.add(conversationId);
+  try {
+    if (conversations.has(conversationId)) endConversation(conversationId);
+    onMessage('bot:typing', { conversationId });
+    // Support personas ride supportPrompt as the FULL instructions (SCRIPT.md
+    // governs them) — but the per-customer message buffer must still be
+    // appended, exactly like claude: every one-shot is a fresh thread, so
+    // recentMessages IS the agent's only memory of the conversation.
+    let baseInstructions = supportPrompt ?? await assembleBaseInstructions(names, recentMessages);
+    if (supportPrompt && recentMessages?.length) {
+      baseInstructions += `\n\n---\n# Recent Conversation\n${formatConversationHistory(recentMessages)}`;
+    }
+    const conv = await spawnAndInitialize(conversationId, model, onMessage, baseInstructions, {
+      oneShot: true,
+      wantSkills: !supportPrompt,
+      names,
+    });
+    if (!conv) {
+      // Init failed (no auth / spawn / thread-start error — bot:error already
+      // emitted). bot:done frees the caller's slot; without it each failed
+      // customer message pins one of the 5 WhatsApp agent slots until restart.
+      onMessage('bot:done', { conversationId, usedFileTools: false });
+      return;
+    }
+    await startTurn(conv, prompt, savedFiles, attachments);
+  } finally {
+    inFlightOneShots.delete(conversationId);
+  }
 }
 export function stopBlobyAgentQuery(conversationId: string): void {
-  endConversation(conversationId);
+  endConversation(conversationId); // teardown guarantees the one-shot's bot:done
 }
 // ── Workspace agent endpoint (POST /api/agent/query) ──────────────────────
@@ -996,49 +1650,82 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
   let resolvedThreadId = req.sessionId || '';
   let resolveTurn: (() => void) | null = null;
   let turnError: string | null = null;
+  const itemTexts = new Map<string, string>();
   const turnDone = new Promise<void>((r) => { resolveTurn = r; });
   rpc.onNotification((n) => {
     const p = n.params || {};
     switch (n.method) {
       case 'item/agentMessage/delta': {
-        if (typeof p.delta === 'string') fullText += p.delta;
+        if (typeof p.delta === 'string') {
+          fullText += p.delta;
+          if (p.itemId) itemTexts.set(p.itemId, (itemTexts.get(p.itemId) || '') + p.delta);
+        }
         break;
       }
       case 'item/started': {
         const item = p.item || {};
-        if (item.type === 'commandExecution') usedTools.add('shell');
-        else if (item.type === 'mcpToolCall') usedTools.add(item.tool || 'mcp_tool');
-        else if (item.type === 'fileChange') { usedTools.add('file_change'); usedFileTools = true; }
-        else if (item.type === 'webSearch') usedTools.add('web_search');
+        // Same claude-vocabulary normalization as the live path — agent-api
+        // callers see the identical toolsUsed names on both harnesses.
+        if (item.type === 'commandExecution') usedTools.add('Bash');
+        else if (item.type === 'mcpToolCall') usedTools.add(item.tool ? (item.server ? `mcp__${item.server}__${item.tool}` : item.tool) : 'mcp_tool');
+        else if (item.type === 'fileChange') { usedTools.add('Edit'); usedFileTools = true; }
+        else if (item.type === 'webSearch') usedTools.add('WebSearch');
         break;
       }
       case 'item/completed': {
         const item = p.item || {};
         if (item.type === 'fileChange') usedFileTools = true;
-        if (item.type === 'agentMessage' && !fullText) {
-          const text = (item.content || []).map((c: any) => c.text || '').join('') || item.text || '';
-          if (text) fullText = text;
+        // item/completed is authoritative per item — same per-itemId
+        // reconciliation as the live path: append items that never streamed
+        // deltas, and the remainder when the final text extends past them.
+        if (item.type === 'agentMessage' && item.id) {
+          const finalText: string = (typeof item.text === 'string' && item.text)
+            ? item.text
+            : ((item.content || []).map((c: any) => c.text || '').join(''));
+          if (finalText) {
+            const streamed = itemTexts.get(item.id) || '';
+            let remainder = '';
+            if (!streamed) {
+              remainder = finalText;
+            } else if (finalText.length > streamed.length && finalText.startsWith(streamed)) {
+              remainder = finalText.slice(streamed.length);
+            }
+            if (remainder) {
+              if (!streamed && fullText && !fullText.endsWith('\n')) fullText += '\n\n';
+              fullText += remainder;
+              itemTexts.set(item.id, finalText);
+            }
+          }
         }
         break;
       }
       case 'turn/completed': {
         const status = p.turn?.status || 'completed';
         if (status === 'failed') {
-          turnError = p.turn?.error?.message || 'Codex turn failed.';
+          turnError = humanizeCodexError(p.turn?.error?.message, p.turn?.error?.codexErrorInfo, p.turn?.error?.additionalDetails);
         }
         resolveTurn?.();
         break;
       }
       case 'error': {
         if (p.willRetry) break; // transient — codex retries itself
-        turnError = p.error?.message || 'Codex error';
+        turnError = humanizeCodexError(p.error?.message || 'Codex error', p.error?.codexErrorInfo, p.error?.additionalDetails);
         resolveTurn?.();
         break;
       }
     }
   });
+  // App-server crash mid-query: without this, no notification ever resolves
+  // turnDone and the caller burns the FULL timeout before getting a generic
+  // "timed out" — surface the real exit error immediately instead.
+  rpc.onClose((code, errMsg) => {
+    if (!turnError) turnError = errMsg || `codex app-server exited unexpectedly (code=${code}).`;
+    resolveTurn?.();
+  });
   const timeoutHandle = setTimeout(() => {
     if (!turnError) turnError = `Query timed out after ${timeout}ms.`;
     resolveTurn?.();
@@ -1049,31 +1736,41 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
     await rpc.request('initialize', { clientInfo: CLIENT_INFO });
     rpc.notify('initialized', {});
+    // Same stale-model/effort auto-repair as spawnAndInitialize — agent-api is
+    // otherwise the one codex entry point that fails raw on a retired model.
+    const validated = await validateModelSelection(rpc, model, effort);
+    model = validated.modelId;
+    effort = validated.effort;
+    // Same execution posture on EVERY thread path — resume included.
+    // ThreadResumeParams accepts all of these in 0.138; without them a resumed
+    // session silently dropped the caller's systemPrompt and fell back to
+    // config-default sandbox/approval settings.
+    const threadOverrides = {
+      cwd: WORKSPACE_DIR,
+      model,
+      ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
+      personality: 'pragmatic',
+      approvalPolicy: 'never',
+      sandbox: 'danger-full-access',
+    };
     if (req.sessionId) {
       // Resume an existing thread (if codex still has it). Caller must accept
       // failure here — we fall back to a fresh thread.
       try {
-        const r = await rpc.request<{ thread: { id: string } }>('thread/resume', { threadId: req.sessionId });
+        const r = await rpc.request<{ thread: { id: string } }>('thread/resume', {
+          threadId: req.sessionId,
+          ...threadOverrides,
+        });
         resolvedThreadId = r.thread.id;
       } catch (err: any) {
         log.warn(`[codex/agent-api] thread/resume failed (${err.message}); starting fresh thread`);
-        const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
-          cwd: WORKSPACE_DIR,
-          model,
-          ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
-          approvalPolicy: 'never',
-          sandbox: 'danger-full-access',
-        });
+        const r = await rpc.request<{ thread: { id: string } }>('thread/start', threadOverrides);
         resolvedThreadId = r.thread.id;
       }
     } else {
-      const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
-        cwd: WORKSPACE_DIR,
-        model,
-        ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
-        approvalPolicy: 'never',
-        sandbox: 'danger-full-access',
-      });
+      const r = await rpc.request<{ thread: { id: string } }>('thread/start', threadOverrides);
       resolvedThreadId = r.thread.id;
     }