npm - bloby-bot - Versions diffs - 0.53.10 → 0.54.11 - Mend

bloby-bot 0.53.10 → 0.54.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/package.json +2 -2
package/shared/config.ts +5 -0
package/supervisor/backend.ts +29 -4
package/supervisor/channels/manager.ts +81 -19
package/supervisor/channels/types.ts +5 -0
package/supervisor/chat/src/components/Chat/EnvForm.tsx +2 -1
package/supervisor/harnesses/claude.ts +12 -2
package/supervisor/harnesses/codex.ts +289 -43
package/supervisor/harnesses/pi/index.ts +8 -1
package/supervisor/index.ts +126 -7
package/worker/prompts/bloby-system-prompt-codex.txt +778 -0
package/worker/prompts/bloby-system-prompt-pi.txt +778 -0
package/worker/prompts/prompt-assembler.ts +49 -14
package/workspace/skills/alexa/SKILL.md +5 -0
package/workspace/skills/mac/SKILL.md +5 -0
package/workspace/skills/plaud/SKILL.md +5 -0
package/workspace/skills/whatsapp/SKILL.md +30 -2

package/supervisor/harnesses/codex.ts CHANGED Viewed

@@ -42,7 +42,17 @@ export type { RecentMessage, AgentAttachment };
 const CLIENT_INFO = { name: 'bloby', title: 'Bloby', version: '1' };
 const REQUEST_TIMEOUT_MS = 60_000;
-const VALID_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh']);
+const VALID_EFFORTS = new Set(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']);
+/**
+ * Per-turn IDLE watchdog. `turn/completed` is a NON-guaranteed notification — if
+ * the app-server stalls mid-turn without exiting, the RPC `exit` handler never
+ * fires and `busy` stays true forever (live: wedges the dashboard + defers
+ * backend restarts; one-shot: pins the WhatsApp/scheduler slot since bot:done
+ * never arrives). This is an IDLE timeout, reset on every notification for the
+ * conversation — a legitimately long turn (deep reasoning, many tool calls)
+ * keeps emitting events and is never killed; only true silence trips recovery.
+ */
+const TURN_WATCHDOG_MS = 5 * 60_000;
 /**
  * Resolve the `codex` binary. We don't trust $PATH because Bloby may be
@@ -115,7 +125,7 @@ async function assembleBaseInstructions(
   recentMessages?: RecentMessage[],
 ): Promise<string> {
   const memoryFiles = readMemoryFiles();
-  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
+  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'codex');
   let prompt = basePrompt;
   prompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
@@ -164,8 +174,8 @@ class CodexRpc {
   private closed = false;
   private stderrBuf = '';
-  start(): void {
-    this.proc = spawn(resolveCodexBin(), ['app-server'], { stdio: ['pipe', 'pipe', 'pipe'] });
+  start(extraArgs: string[] = []): void {
+    this.proc = spawn(resolveCodexBin(), ['app-server', ...extraArgs], { stdio: ['pipe', 'pipe', 'pipe'] });
     const rl = readline.createInterface({ input: this.proc.stdout });
     rl.on('line', (line) => this.onLine(line));
@@ -238,14 +248,30 @@ class CodexRpc {
   }
   private handleServerRequest(msg: { id: number; method: string; params?: any }): void {
-    const isApproval = msg.method.endsWith('/requestApproval');
-    if (isApproval) {
-      log.info(`[codex-rpc] auto-accepting server request: ${msg.method}`);
-      this.respond(msg.id, 'acceptForSession');
-      return;
+    // Responses are OBJECTS, not bare strings: CommandExecution/FileChange approval
+    // responses are `{ decision }` (CommandExecutionApprovalDecision / FileChangeApprovalDecision),
+    // and the legacy v1 aliases take `{ decision }` with the ReviewDecision enum.
+    // (None of these fire under approvalPolicy:'never' + danger-full-access, but reply
+    // correctly so an edge-case request can't stall the turn with a malformed reply.)
+    switch (msg.method) {
+      case 'item/commandExecution/requestApproval':
+      case 'item/fileChange/requestApproval':
+        log.info(`[codex-rpc] auto-accepting ${msg.method}`);
+        this.respond(msg.id, { decision: 'acceptForSession' });
+        return;
+      case 'execCommandApproval':
+      case 'applyPatchApproval':
+        log.info(`[codex-rpc] auto-accepting (legacy) ${msg.method}`);
+        this.respond(msg.id, { decision: 'approved_for_session' });
+        return;
+      // account/chatgptAuthTokens/refresh is only used by client-managed-token
+      // clients; Bloby authenticates via chatgpt OAuth and the app-server refreshes
+      // ~/.codex/auth.json itself, so this server-request never fires for us. Decline
+      // cleanly (a stale-credential edge would surface as a normal turn error instead).
+      default:
+        log.warn(`[codex-rpc] unhandled server request ${msg.method} — replying -32601`);
+        this.respondError(msg.id, -32601, `Method ${msg.method} not implemented by Bloby client`);
     }
-    log.warn(`[codex-rpc] unhandled server request ${msg.method} — replying with error`);
-    this.respondError(msg.id, -32601, `Method ${msg.method} not implemented by Bloby client`);
   }
   private respond(id: number, result: any): void {
@@ -296,8 +322,16 @@ class CodexRpc {
       p.reject(new Error('RPC connection closed'));
     }
     this.pending.clear();
-    try { this.proc?.stdin.end(); } catch {}
-    try { this.proc?.kill('SIGTERM'); } catch {}
+    const proc = this.proc;
+    try { proc?.stdin.end(); } catch {}
+    try { proc?.kill('SIGTERM'); } catch {}
+    // Escalate to SIGKILL if the app-server ignores SIGTERM (no true leak today
+    // since SIGTERM reaps it, but a SIGTERM-ignoring build would otherwise survive).
+    if (proc) {
+      const t = setTimeout(() => { try { proc.kill('SIGKILL'); } catch {} }, 2000);
+      if (typeof t.unref === 'function') t.unref();
+      proc.once('exit', () => clearTimeout(t));
+    }
     this.proc = null;
   }
 }
@@ -312,6 +346,9 @@ interface CodexConversation {
   onMessage: OnAgentMessage;
   /** Currently in-flight turn id (set on `turn/started`, cleared on `turn/completed`). */
   currentTurnId: string | null;
+  /** itemId of the agentMessage currently streaming — used to insert a paragraph
+   *  break when a turn emits multiple separate agentMessage items. */
+  currentMsgItemId: string | null;
   /** Streaming text accumulator for the current turn's agentMessage items. */
   fullText: string;
   /** Tools/items used during the current turn, for the bot:turn-complete payload. */
@@ -326,6 +363,15 @@ interface CodexConversation {
   busy: boolean;
   /** True for one-shot queries — the conversation ends after the first turn completes. */
   oneShot: boolean;
+  /**
+   * Latest context occupancy from `thread/tokenUsage/updated` (codex does NOT
+   * report usage on `turn/completed` — Turn has no usage field). Emitted on
+   * `bot:turn-complete` so the orchestrator's proactive recycling can fire.
+   */
+  lastContextTokens: number;
+  lastContextWindow: number;
+  /** Active per-turn watchdog timer (see TURN_WATCHDOG_MS). */
+  turnWatchdog: NodeJS.Timeout | null;
 }
 const conversations = new Map<string, CodexConversation>();
@@ -353,17 +399,59 @@ function buildUserInput(text: string, savedFiles?: SavedFile[]): Array<Record<st
   return input;
 }
+function clearTurnWatchdog(conv: CodexConversation): void {
+  if (conv.turnWatchdog) {
+    clearTimeout(conv.turnWatchdog);
+    conv.turnWatchdog = null;
+  }
+}
+/**
+ * Arm the per-turn watchdog. On fire, unstick the conversation the same way a
+ * real `turn/completed` would (so the dashboard, `anyConversationBusy`, and the
+ * channel slot all release), then tear the conversation down — the next message
+ * cold-starts a fresh thread.
+ */
+function armTurnWatchdog(conv: CodexConversation): void {
+  clearTurnWatchdog(conv);
+  conv.turnWatchdog = setTimeout(() => {
+    conv.turnWatchdog = null;
+    log.warn(`[codex] turn watchdog fired (${TURN_WATCHDOG_MS}ms) — conv=${conv.id}; unsticking + tearing down`);
+    conv.busy = false;
+    conv.currentTurnId = null;
+    conv.onMessage('bot:error', { conversationId: conv.id, error: 'Codex turn timed out — no response from app-server.' });
+    if (conv.oneShot) {
+      conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
+    } else {
+      conv.onMessage('bot:turn-complete', {
+        conversationId: conv.id,
+        usedFileTools: conv.usedFileTools,
+        contextTokens: conv.lastContextTokens || 0,
+        contextWindow: conv.lastContextWindow || 0,
+        idle: true,
+      });
+    }
+    teardownConversation(conv.id);
+  }, TURN_WATCHDOG_MS);
+}
 async function startTurn(conv: CodexConversation, content: string, savedFiles?: SavedFile[]): Promise<void> {
   const input = buildUserInput(content, savedFiles);
   conv.busy = true;
   conv.fullText = '';
   conv.usedFileTools = false;
   conv.onMessage('bot:typing', { conversationId: conv.id });
+  armTurnWatchdog(conv);
   try {
     const params: Record<string, any> = { threadId: conv.threadId, input };
     if (conv.effort) params.effort = conv.effort;
-    await conv.rpc.request('turn/start', params);
+    // turn/start resolves immediately with { turn }; seize the id now so a
+    // pushMessage arriving before the turn/started notification can steer
+    // instead of starting a second turn.
+    const res = await conv.rpc.request<{ turn?: { id?: string } }>('turn/start', params);
+    if (res?.turn?.id) conv.currentTurnId = res.turn.id;
   } catch (err: any) {
+    clearTurnWatchdog(conv);
     conv.busy = false;
     conv.currentTurnId = null;
     conv.onMessage('bot:error', { conversationId: conv.id, error: `turn/start failed: ${err.message}` });
@@ -385,11 +473,12 @@ async function steerOrQueue(conv: CodexConversation, content: string, savedFiles
   // Active turn — inject mid-flight.
   const input = buildUserInput(content, savedFiles);
   try {
-    await conv.rpc.request('turn/steer', {
+    const res = await conv.rpc.request<{ turnId?: string }>('turn/steer', {
       threadId: conv.threadId,
       expectedTurnId: conv.currentTurnId,
       input,
     });
+    if (res?.turnId) conv.currentTurnId = res.turnId;
     conv.onMessage('bot:typing', { conversationId: conv.id });
   } catch (err: any) {
     // expectedTurnId mismatch most likely means the turn just finished —
@@ -402,10 +491,14 @@ async function steerOrQueue(conv: CodexConversation, content: string, savedFiles
 function handleNotification(conv: CodexConversation, n: { method: string; params?: any }): void {
   const p = n.params || {};
+  // Any notification for this conv proves the app-server is alive and working —
+  // reset the idle watchdog so a long-but-active turn isn't torn down.
+  if (conv.turnWatchdog) armTurnWatchdog(conv);
   switch (n.method) {
     case 'turn/started': {
       conv.currentTurnId = p.turn?.id || null;
       conv.fullText = '';
+      conv.currentMsgItemId = null;
       conv.usedFileTools = false;
       break;
     }
@@ -413,6 +506,13 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
     case 'item/agentMessage/delta': {
       const delta: string = p.delta || '';
       if (!delta) break;
+      // A turn can emit multiple agentMessage items (commentary then final_answer).
+      // On a new itemId, insert a paragraph break so they don't run together (mirrors claude.ts).
+      if (p.itemId && conv.currentMsgItemId && p.itemId !== conv.currentMsgItemId && conv.fullText && !conv.fullText.endsWith('\n')) {
+        conv.fullText += '\n\n';
+        conv.onMessage('bot:token', { conversationId: conv.id, token: '\n\n' });
+      }
+      if (p.itemId) conv.currentMsgItemId = p.itemId;
       conv.fullText += delta;
       conv.onMessage('bot:token', { conversationId: conv.id, token: delta });
       break;
@@ -430,10 +530,11 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
           });
           break;
         case 'mcpToolCall':
+          // ThreadItem.mcpToolCall fields are `server` + `tool` (no toolName/name/input).
           conv.onMessage('bot:tool', {
             conversationId: conv.id,
-            name: item.toolName || item.name || 'mcp_tool',
-            input: item.arguments || item.input || {},
+            name: item.tool ? (item.server ? `${item.server}/${item.tool}` : item.tool) : 'mcp_tool',
+            input: item.arguments || {},
           });
           break;
         case 'fileChange':
@@ -451,6 +552,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
             input: { query: item.query || '' },
           });
           break;
+        case 'collabAgentToolCall':
+          // Codex's collaborating sub-agents (rarely enabled) → Bloby's sub-agent UX.
+          if (item.tool === 'spawnAgent') {
+            conv.onMessage('bot:task-created', {
+              conversationId: conv.id,
+              taskId: item.id,
+              description: item.prompt || 'sub-agent',
+              type: 'collab',
+            });
+          }
+          break;
         // userMessage / agentMessage / reasoning — no tool-style event.
       }
       break;
@@ -467,6 +579,27 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
           conv.onMessage('bot:token', { conversationId: conv.id, token: text });
         }
       }
+      if (item.type === 'collabAgentToolCall' && item.tool === 'spawnAgent') {
+        conv.onMessage('bot:task-done', {
+          conversationId: conv.id,
+          taskId: item.id,
+          status: item.status,
+          summary: item.prompt || '',
+        });
+      }
+      break;
+    }
+    case 'thread/tokenUsage/updated': {
+      // Codex's only token-usage signal. ThreadTokenUsage = { total, last, modelContextWindow };
+      // `last` is the current prompt occupancy (mirrors Claude's input+cacheRead+cacheCreation),
+      // the right basis for the recycle compare in supervisor/index.ts (fraction*window, not lifetime).
+      const tu = p.tokenUsage || {};
+      const last = tu.last || {};
+      conv.lastContextTokens = (last.inputTokens || 0) + (last.cachedInputTokens || 0);
+      if (typeof tu.modelContextWindow === 'number' && tu.modelContextWindow > 0) {
+        conv.lastContextWindow = tu.modelContextWindow;
+      }
       break;
     }
@@ -474,14 +607,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
       const status: string = p.turn?.status || 'completed';
       const turnError = p.turn?.error;
+      clearTurnWatchdog(conv);
       conv.currentTurnId = null;
       conv.busy = false;
-      if (status === 'failed' || status === 'systemError') {
+      if (status === 'failed') {
         conv.onMessage('bot:error', {
           conversationId: conv.id,
           error: turnError?.message || 'Codex turn failed.',
         });
+      } else if (status === 'interrupted') {
+        // Interrupted turns carry no final answer — stay silent.
       } else if (conv.fullText) {
         conv.onMessage('bot:response', { conversationId: conv.id, content: conv.fullText });
       }
@@ -490,16 +626,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
         conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
         teardownConversation(conv.id);
       } else {
-        // Context-size signal for the orchestrator's proactive session recycling.
-        // The codex app-server reports token usage on turn/completed; field names vary
-        // across versions, so probe defensively (0 if absent → falls back to codex's
-        // own built-in auto-compaction).
-        const tu: any = p.turn?.usage || p.usage || {};
-        const contextTokens = tu.input_tokens ?? tu.inputTokens ?? tu.total_tokens ?? tu.totalTokens ?? tu.tokens ?? 0;
-        const contextWindow = tu.context_window ?? tu.contextWindow ?? 0;
-        // idle = no message queued behind this turn (the drain happens just below).
+        // Context-size signal for the orchestrator's proactive session recycling,
+        // sourced from the cached `thread/tokenUsage/updated` values above. 0 if codex
+        // never sent one this turn → falls back to codex's own in-thread auto-compaction.
         const idle = conv.pendingInputs.length === 0;
-        conv.onMessage('bot:turn-complete', { conversationId: conv.id, usedFileTools: conv.usedFileTools, contextTokens, contextWindow, idle });
+        conv.onMessage('bot:turn-complete', {
+          conversationId: conv.id,
+          usedFileTools: conv.usedFileTools,
+          contextTokens: conv.lastContextTokens || 0,
+          contextWindow: conv.lastContextWindow || 0,
+          idle,
+        });
         // Drain any messages that were submitted while we were busy.
         const next = conv.pendingInputs.shift();
@@ -509,19 +646,35 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
     }
     case 'error': {
+      // ErrorNotification carries willRetry — codex will retry transient errors
+      // itself; don't surface those as a hard bot:error before the retry lands.
+      if (p.willRetry) {
+        log.info(`[codex] transient error (will retry): ${p.error?.message || 'unknown'}`);
+        break;
+      }
       const errMsg = p.error?.message || 'Codex error notification';
       conv.onMessage('bot:error', { conversationId: conv.id, error: errMsg });
       break;
     }
-    // thread/started, thread/status/changed, mcpServer/startupStatus/updated,
-    // remoteControl/status/changed — informational, no-op for the dashboard.
+    case 'mcpServer/startupStatus/updated': {
+      // Surface MCP servers (from MCP.json → -c overrides) that fail to start,
+      // so a misconfigured server is visible instead of silently absent.
+      if (p.status === 'failed' || p.status === 'cancelled') {
+        log.warn(`[codex] MCP server "${p.name}" ${p.status}${p.error ? `: ${p.error}` : ''}`);
+      }
+      break;
+    }
+    // thread/started, thread/status/changed, remoteControl/status/changed —
+    // informational, no-op for the dashboard.
   }
 }
 function teardownConversation(conversationId: string): void {
   const conv = conversations.get(conversationId);
   if (!conv) return;
+  clearTurnWatchdog(conv);
   conversations.delete(conversationId);
   try { conv.rpc.close(); } catch {}
   conv.onMessage('bot:conversation-ended', { conversationId });
@@ -531,7 +684,7 @@ async function spawnAndInitialize(
   conversationId: string,
   model: string,
   onMessage: OnAgentMessage,
-  baseInstructions: string,
+  instructions: string,
   oneShot: boolean,
 ): Promise<CodexConversation | null> {
   // Pre-flight: confirm we have valid OAuth tokens before spending time spawning.
@@ -546,7 +699,7 @@ async function spawnAndInitialize(
   const { id: modelId, effort } = parseModelString(model);
   const rpc = new CodexRpc();
-  rpc.start();
+  rpc.start(buildMcpConfigArgs());
   const conv: CodexConversation = {
     id: conversationId,
@@ -555,11 +708,15 @@ async function spawnAndInitialize(
     effort,
     onMessage,
     currentTurnId: null,
+    currentMsgItemId: null,
     fullText: '',
     usedFileTools: false,
     pendingInputs: [],
     busy: false,
     oneShot,
+    lastContextTokens: 0,
+    lastContextWindow: 0,
+    turnWatchdog: null,
   };
   rpc.onNotification((n) => handleNotification(conv, n));
@@ -582,7 +739,13 @@ async function spawnAndInitialize(
     const startResult = await rpc.request<{ thread: { id: string } }>('thread/start', {
       cwd: WORKSPACE_DIR,
       model: modelId,
-      baseInstructions,
+      // Bloby's persona/workflow prompt rides developerInstructions (ADDITIVE),
+      // NOT baseInstructions. baseInstructions fully OVERRIDES codex's native base
+      // prompt — which carries the apply_patch FREEFORM spec + shell protocol the
+      // model needs to edit files. Leaving baseInstructions unset keeps that native
+      // scaffolding; developerInstructions layers Bloby's persona on top of it.
+      developerInstructions: instructions,
+      personality: 'pragmatic',
       // Bloby's posture matches Claude's bypassPermissions — the bot is
       // running on the user's own machine with their full consent. Skip the
       // approval prompts and give it write access to the workspace + beyond.
@@ -609,27 +772,109 @@ async function spawnAndInitialize(
 }
 const SKILLS_DIR = path.join(WORKSPACE_DIR, 'skills');
+// Codex discovers "repo"-scope skills under `<cwd>/.codex/skills` (verified
+// against 0.135.0 — a bare `<cwd>/skills` is NOT scanned, and `skills/list`
+// has no extra-root param). Bloby keeps the canonical skills in
+// `workspace/skills/<name>`, so we mirror each one into `.codex/skills/<name>`
+// as a symlink — single source of truth, discoverable by codex's native router.
+// (Each SKILL.md needs YAML frontmatter or codex rejects it — see SKILL_FORMAT_MIGRATION.md.)
+const CODEX_SKILLS_ROOT = path.join(WORKSPACE_DIR, '.codex', 'skills');
+/** Mirror workspace/skills/<name> → workspace/.codex/skills/<name> as symlinks (idempotent). */
+function syncCodexSkillRoot(): void {
+  let names: string[] = [];
+  try {
+    names = fs.readdirSync(SKILLS_DIR, { withFileTypes: true })
+      .filter((e) => e.isDirectory() || e.isSymbolicLink())
+      .map((e) => e.name);
+  } catch {
+    return; // no skills dir — nothing to mirror
+  }
+  try { fs.mkdirSync(CODEX_SKILLS_ROOT, { recursive: true }); } catch {}
+  for (const name of names) {
+    const target = path.join(SKILLS_DIR, name);
+    const link = path.join(CODEX_SKILLS_ROOT, name);
+    try {
+      const cur = fs.existsSync(link) ? fs.realpathSync(link) : null;
+      if (cur === fs.realpathSync(target)) continue; // already correct
+      try { fs.rmSync(link, { recursive: true, force: true }); } catch {}
+      fs.symlinkSync(target, link, 'dir');
+    } catch (err: any) {
+      log.warn(`[codex] could not mirror skill "${name}" into .codex/skills: ${err.message}`);
+    }
+  }
+}
 function primeWorkspaceSkills(rpc: CodexRpc): void {
+  syncCodexSkillRoot();
   rpc.request('skills/list', {
     cwds: [WORKSPACE_DIR],
     forceReload: true,
-    perCwdExtraUserRoots: [{
-      cwd: WORKSPACE_DIR,
-      extraUserRoots: [SKILLS_DIR],
-    }],
   }).then((result: any) => {
     const entry = result?.data?.[0];
     const all = entry?.skills ?? [];
-    const workspace = all.filter((s: any) => s.scope !== 'system');
+    const repo = all.filter((s: any) => s.scope === 'repo');
     const errors = entry?.errors ?? [];
-    log.ok(`[codex] skills primed: ${workspace.length} workspace, ${all.length - workspace.length} system${errors.length ? `, ${errors.length} rejected` : ''}`);
+    log.ok(`[codex] skills primed: ${repo.length} workspace (repo), ${all.length - repo.length} user/system${errors.length ? `, ${errors.length} rejected` : ''}`);
     for (const err of errors) log.warn(`[codex] skill load error: ${err.path} — ${err.message}`);
   }).catch((err: any) => {
     log.warn(`[codex] skills/list failed: ${err.message}`);
   });
 }
+/* ── MCP wiring ────────────────────────────────────────────────────────── */
+const MCP_CONFIG_FILE = path.join(WORKSPACE_DIR, 'MCP.json');
+/**
+ * Load MCP servers from workspace/MCP.json (the same file the Claude harness
+ * reads). Accepts the canonical unwrapped map `{ name: { command, args, env } }`,
+ * a `{ mcpServers: {...} }` wrapper, or a legacy array of single-key maps.
+ * Returns {} when absent/invalid — so this is a no-op until the user populates MCP.json.
+ */
+function loadMcpServersForCodex(): Record<string, any> {
+  try {
+    const raw = JSON.parse(fs.readFileSync(MCP_CONFIG_FILE, 'utf-8'));
+    let map: any = raw;
+    if (raw && typeof raw === 'object' && raw.mcpServers && typeof raw.mcpServers === 'object') map = raw.mcpServers;
+    else if (Array.isArray(raw)) map = Object.assign({}, ...raw);
+    if (map && typeof map === 'object' && !Array.isArray(map)) return map;
+  } catch {}
+  return {};
+}
+/** Serialize a JS value as a TOML literal for a `-c key=value` override. */
+function toToml(v: any): string {
+  if (Array.isArray(v)) return `[${v.map(toToml).join(',')}]`;
+  if (v && typeof v === 'object') return `{${Object.entries(v).map(([k, val]) => `${JSON.stringify(k)}=${toToml(val)}`).join(',')}}`;
+  if (typeof v === 'number' || typeof v === 'boolean') return String(v);
+  return JSON.stringify(String(v)); // TOML basic string — JSON escaping is compatible
+}
+/**
+ * Translate MCP.json into `codex app-server -c mcp_servers.<name>.<field>=<toml>`
+ * spawn flags. Codex sources MCP from its own config layer rather than a per-query
+ * param (verified against 0.135.0: a `-c mcp_servers.X.command=...` override shows
+ * up in both mcpServerStatus/list and config/read). Only the stdio fields Bloby
+ * uses (command/args/env) are translated; names must be TOML-bare-key safe.
+ */
+function buildMcpConfigArgs(): string[] {
+  const servers = loadMcpServersForCodex();
+  const args: string[] = [];
+  let wired = 0;
+  for (const [name, cfg] of Object.entries(servers)) {
+    if (!/^[A-Za-z0-9_-]+$/.test(name)) { log.warn(`[codex] skipping MCP server "${name}" — name not TOML-bare-key safe`); continue; }
+    const c: any = cfg || {};
+    if (!c.command) { log.warn(`[codex] skipping MCP server "${name}" — no command`); continue; }
+    args.push('-c', `mcp_servers.${name}.command=${toToml(c.command)}`);
+    if (Array.isArray(c.args) && c.args.length) args.push('-c', `mcp_servers.${name}.args=${toToml(c.args)}`);
+    if (c.env && typeof c.env === 'object' && Object.keys(c.env).length) args.push('-c', `mcp_servers.${name}.env=${toToml(c.env)}`);
+    wired++;
+  }
+  if (wired) log.info(`[codex] wiring ${wired} MCP server(s) from MCP.json via -c overrides`);
+  return args;
+}
 /* ── Harness implementation ────────────────────────────────────────────── */
 export function hasConversation(conversationId: string): boolean {
@@ -762,7 +1007,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
   const timeout = Math.min(Math.max(req.timeout || 120_000, 5_000), 300_000);
   const rpc = new CodexRpc();
-  rpc.start();
+  rpc.start(buildMcpConfigArgs());
   let fullText = '';
   const usedTools = new Set<string>();
@@ -782,7 +1027,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
       case 'item/started': {
         const item = p.item || {};
         if (item.type === 'commandExecution') usedTools.add('shell');
-        else if (item.type === 'mcpToolCall') usedTools.add(item.toolName || item.name || 'mcp_tool');
+        else if (item.type === 'mcpToolCall') usedTools.add(item.tool || 'mcp_tool');
         else if (item.type === 'fileChange') { usedTools.add('file_change'); usedFileTools = true; }
         else if (item.type === 'webSearch') usedTools.add('web_search');
         break;
@@ -798,13 +1043,14 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
       }
       case 'turn/completed': {
         const status = p.turn?.status || 'completed';
-        if (status === 'failed' || status === 'systemError') {
+        if (status === 'failed') {
           turnError = p.turn?.error?.message || 'Codex turn failed.';
         }
         resolveTurn?.();
         break;
       }
       case 'error': {
+        if (p.willRetry) break; // transient — codex retries itself
         turnError = p.error?.message || 'Codex error';
         resolveTurn?.();
         break;
@@ -833,7 +1079,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
         const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
           cwd: WORKSPACE_DIR,
           model,
-          ...(req.systemPrompt ? { baseInstructions: req.systemPrompt } : {}),
+          ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
           approvalPolicy: 'never',
           sandbox: 'danger-full-access',
         });
@@ -843,7 +1089,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
       const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
         cwd: WORKSPACE_DIR,
         model,
-        ...(req.systemPrompt ? { baseInstructions: req.systemPrompt } : {}),
+        ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
         approvalPolicy: 'never',
         sandbox: 'danger-full-access',
       });

package/supervisor/harnesses/pi/index.ts CHANGED Viewed

@@ -107,7 +107,7 @@ async function buildSystemPrompt(
   recentMessages?: RecentMessage[],
 ): Promise<string> {
   const memoryFiles = readMemoryFiles();
-  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
+  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'pi');
   let systemPrompt = basePrompt;
   systemPrompt += LIVE_CONVERSATION_HINT;
   systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
@@ -369,6 +369,12 @@ export async function startBlobyAgentQuery(
   const abortController = new AbortController();
   activeQueries.set(conversationId, abortController);
+  // Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
+  // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
+  const watchdog = setTimeout(() => {
+    log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
+    abortController.abort();
+  }, 300_000);
   let systemPrompt: string;
   if (supportPrompt) {
@@ -425,6 +431,7 @@ export async function startBlobyAgentQuery(
       onMessage('bot:error', { conversationId, error: err?.message || String(err) });
     }
   } finally {
+    clearTimeout(watchdog);
     activeQueries.delete(conversationId);
     const FILE_TOOL_NAMES = ['Write', 'Edit', 'write', 'edit'];
     const usedFileTools = FILE_TOOL_NAMES.some((t) => usedTools.has(t));