npm - bloby-bot - Versions diffs - 0.53.10 → 0.54.10 - Mend

bloby-bot 0.53.10 → 0.54.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/package.json +2 -2
package/shared/config.ts +5 -0
package/supervisor/backend.ts +29 -4
package/supervisor/channels/manager.ts +81 -19
package/supervisor/channels/types.ts +5 -0
package/supervisor/chat/src/components/Chat/EnvForm.tsx +2 -1
package/supervisor/harnesses/claude.ts +12 -2
package/supervisor/harnesses/codex.ts +117 -22
package/supervisor/harnesses/pi/index.ts +8 -1
package/supervisor/index.ts +104 -7
package/worker/prompts/bloby-system-prompt-codex.txt +778 -0
package/worker/prompts/bloby-system-prompt-pi.txt +778 -0
package/worker/prompts/prompt-assembler.ts +49 -14
package/workspace/skills/whatsapp/SKILL.md +25 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bloby-bot",
-  "version": "0.53.10",
+  "version": "0.54.10",
   "releaseNotes": [
     "1. New Morphy animation system: config-driven sprites loaded from /morphy/*.json",
     "2. Swapped teleporting (splash) and headphones (bubble + chat) to the new format",
@@ -61,7 +61,7 @@
     "@streamdown/code": "^1.1.1",
     "@tailwindcss/vite": "^4.2.0",
     "@vitejs/plugin-react": "^6.0.1",
-    "@whiskeysockets/baileys": "^7.0.0-rc11",
+    "@whiskeysockets/baileys": "7.0.0-rc13",
     "better-sqlite3": "^12.6.2",
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",

package/shared/config.ts CHANGED Viewed

@@ -11,6 +11,11 @@ export interface ChannelConfig {
   skill?: string;
   /** Opt-in: process messages in group chats (default false). Channel mode ignores this. */
   allowGroups?: boolean;
+  /** Assistant mode only. When false (default) ONLY the account owner can trigger the agent
+   *  with `@botname`. When true, ANYONE who tags the bot (in a DM or group) can drive it.
+   *  DANGER: the triggerer gains control of an agent that can run Bash, edit files, etc.
+   *  Only enable for fully trusted shared use (e.g. a partner). See the WhatsApp SKILL.md. */
+  allowOthersToTrigger?: boolean;
 }
 export interface AlexaChannelConfig {

package/supervisor/backend.ts CHANGED Viewed

@@ -14,6 +14,15 @@ let intentionallyStopped = false;
 let gaveUp = false;
 const MAX_RESTARTS = 3;
 const STABLE_THRESHOLD = 30_000; // 30s — if backend ran this long, it wasn't a crash loop
+// Rolling-window backstop: the 30s "stable" rule resets the consecutive counter, so a backend
+// that crashes every ~35s would otherwise restart forever. Give up if it crashes too often within
+// the window (kept generous so a legitimately long-running-then-crashing backend isn't penalized).
+let crashTimes: number[] = [];
+const CRASH_WINDOW_MS = 5 * 60_000; // 5 min
+const CRASH_WINDOW_MAX = 6;         // > this many crashes in the window → give up regardless of the 30s reset
+// Called once when the backend gives up, so the supervisor can tell the live chat (which exists
+// precisely so the user can be told to fix it). Set via setBackendGiveUpHandler; logging-only default.
+let onGiveUp: (() => void) | null = null;
 /** Extra env vars injected into every backend spawn (e.g. BLOBY_AGENT_SECRET) */
 let extraEnv: Record<string, string> = {};
@@ -23,6 +32,12 @@ export function setBackendEnv(env: Record<string, string>): void {
   extraEnv = { ...extraEnv, ...env };
 }
+/** Register a callback fired once when the backend gives up (crash-looped past the limits).
+ *  The supervisor wires this to broadcast a chat event so the user is told to fix it. */
+export function setBackendGiveUpHandler(fn: () => void): void {
+  onGiveUp = fn;
+}
 const LOG_FILE = path.join(WORKSPACE_DIR, '.backend.log');
 export function getBackendPort(basePort: number): number {
@@ -101,18 +116,24 @@ export function spawnBackend(port: number): ChildProcess {
     // Any unexpected exit (crash, SIGTERM, OOM, null code) — restart
     log.warn(`Backend exited unexpectedly (code ${code})`);
-    // If backend was alive for >30s, it's not a crash loop — reset counter
-    if (Date.now() - lastSpawnTime > STABLE_THRESHOLD) {
+    // Track crashes in a rolling window (backstop for the 30s-reset crash-loop hole).
+    const now = Date.now();
+    crashTimes = crashTimes.filter((t) => now - t < CRASH_WINDOW_MS);
+    crashTimes.push(now);
+    const windowExceeded = crashTimes.length > CRASH_WINDOW_MAX;
+    // If backend was alive for >30s, it's not a crash loop — reset the consecutive counter.
+    if (now - lastSpawnTime > STABLE_THRESHOLD) {
       restarts = 0;
     }
-    if (restarts < MAX_RESTARTS) {
+    if (!windowExceeded && restarts < MAX_RESTARTS) {
       restarts++;
       const delay = Math.min(1000 * restarts, 5000);
       log.info(`Restarting backend (${restarts}/${MAX_RESTARTS}, delay ${delay}ms)...`);
       setTimeout(() => spawnBackend(port), delay);
     } else {
       gaveUp = true;
-      log.error('Backend failed too many times. Use Bloby chat to debug.');
+      log.error(`Backend failed too many times${windowExceeded ? ` (${crashTimes.length} crashes in ${CRASH_WINDOW_MS / 60000}min)` : ''}. Use Bloby chat to debug.`);
+      try { onGiveUp?.(); } catch {}
     }
   });
@@ -212,4 +233,8 @@ export function isBackendStopping(): boolean {
 export function resetBackendRestarts(): void {
   restarts = 0;
+  // A deliberate restart (file edit, user fix, scheduler) is a fresh attempt — clear the rolling
+  // crash window too so a just-fixed backend gets a clean slate (deliberate stops never record a
+  // crash, so this only matters right after a give-up + fix).
+  crashTimes = [];
 }

package/supervisor/channels/manager.ts CHANGED Viewed

@@ -83,6 +83,11 @@ interface DebounceEntry {
 /** Per-conversation accumulator for streaming bot text → WhatsApp. */
 export interface WaStreamState {
   chunkBuf: string;
+  /** True once the CURRENT turn has consumed its routing target (via `bot:response` or
+   *  `bot:error`). Reset on `bot:turn-complete`. Guards the turn-complete safety-net drain so a
+   *  normal turn — which already consumed on `bot:response` — never double-drains and eats the
+   *  NEXT queued message's target (the root cause of chat↔WhatsApp bleed and DM-answered-in-group). */
+  consumedThisTurn?: boolean;
 }
 /** Agent-turn events that carry per-turn content. Broadcast only for dashboard surfaces
@@ -489,6 +494,7 @@ export class ChannelManager {
     if (type === 'bot:response' && eventData?.content && convId) {
       const target = this.consumeRoute(convId);
+      state.consumedThisTurn = true;
       const remaining = state.chunkBuf.trim();
       state.chunkBuf = '';
@@ -514,20 +520,46 @@ export class ChannelManager {
       return;
     }
-    // Turn ended (or errored) without a bot:response — drain the head entry so it
-    // doesn't bleed into the next turn's reply. The SDK guarantees one response per
-    // pushed input; this safety net covers aborts, empty turns, and provider errors.
-    if ((type === 'bot:turn-complete' || type === 'bot:error') && convId) {
-      const head = this.peekRoute(convId);
-      if (head) {
+    // Turn errored without a usable reply — drain THIS turn's route so it can't bleed into the
+    // next turn. Guarded by `consumedThisTurn`: if `bot:response` already fired this turn it
+    // consumed the target, so we must NOT drain again.
+    if (type === 'bot:error' && convId) {
+      if (!state.consumedThisTurn) {
         const dropped = this.consumeRoute(convId);
-        log.warn(`[channels] ${type} without bot:response — dropping pending route (surface=${dropped?.surface}, to=${dropped?.waSendTo || 'none'})`);
-        if (dropped?.surface === 'alexa') {
-          const alexa = this.providers.get('alexa') as AlexaChannel | undefined;
-          alexa?.rejectHead(convId, type);
+        if (dropped) {
+          log.warn(`[channels] bot:error without bot:response — dropping pending route (surface=${dropped.surface}, to=${dropped.waSendTo || 'none'})`);
+          if (dropped.surface === 'alexa') {
+            const alexa = this.providers.get('alexa') as AlexaChannel | undefined;
+            alexa?.rejectHead(convId, type);
+          }
         }
+        state.consumedThisTurn = true;
       }
       state.chunkBuf = '';
+      return;
+    }
+    // Turn finished. Drain the head ONLY if this turn never consumed its route — i.e. a true
+    // empty turn (no `bot:response`, no `bot:error`; see harness claude.ts which always emits
+    // `bot:turn-complete` after every result). A normal turn already consumed on `bot:response`,
+    // so draining here would eat the NEXT queued message's target and every later reply would
+    // land on the wrong surface (chat↔WhatsApp bleed, DM answered in a group). Reset the per-turn
+    // flag afterwards so the next turn starts clean. Turns are strictly sequential per conversation
+    // (single input queue, one `result` at a time), so this per-conversation flag is race-free.
+    if (type === 'bot:turn-complete' && convId) {
+      if (!state.consumedThisTurn) {
+        const head = this.peekRoute(convId);
+        if (head) {
+          const dropped = this.consumeRoute(convId);
+          log.warn(`[channels] turn-complete without bot:response — dropping pending route (surface=${dropped?.surface}, to=${dropped?.waSendTo || 'none'})`);
+          if (dropped?.surface === 'alexa') {
+            const alexa = this.providers.get('alexa') as AlexaChannel | undefined;
+            alexa?.rejectHead(convId, type);
+          }
+        }
+      }
+      state.consumedThisTurn = false;
+      state.chunkBuf = '';
     }
   }
@@ -556,6 +588,22 @@ export class ChannelManager {
     return config.channels?.[channel];
   }
+  /** Robust "is this the account owner's own self-chat?" check.
+   *
+   *  Keys purely on JID equality (`isSelfChat` — the chat resolves to the owner's OWN number,
+   *  computed in whatsapp.ts from `ownPhoneJid` + LID translation). This is authoritative and is
+   *  UNAFFECTED by Baileys' `fromMe` decode regressions (e.g. the rc11→rc13 protocolMessage
+   *  `fromMe=false` bug): the chat JID of a self-message is still the owner's own number even when
+   *  `fromMe` decodes wrong. So we deliberately do NOT also require `fromMe` (which the old gate
+   *  did — that's what silently dropped self-messages under rc11).
+   *
+   *  We also deliberately do NOT treat `fromMe` alone as self-chat: `fromMe` is true for the owner
+   *  messaging a CONTACT from a linked device too, so a `fromMe`-based OR would misroute those into
+   *  the admin brain. Only own-number JID equality is safe and false-positive-free. */
+  private isOwnerSelfChat(isSelfChat: boolean, isGroup: boolean): boolean {
+    return !isGroup && isSelfChat;
+  }
   /** Handle an incoming message from any channel — debounces rapid messages from the same sender.
    *
    * Per-mode behavior is decided here. To add a new mode: extend the gating block below
@@ -586,26 +634,36 @@ export class ChannelManager {
       if (!channelConfig.allowGroups) return;
     }
+    // Owner self-chat — JID-based, immune to Baileys `fromMe` decode regressions.
+    const selfChat = this.isOwnerSelfChat(isSelfChat, isGroup);
     // ── Channel mode: ONLY respond to self-chat ──
     if (mode === 'channel') {
-      if (!fromMe || !isSelfChat) return;
+      if (!selfChat) return;
     }
-    // ── Business mode: filter outgoing (except self-chat) ──
-    if (mode === 'business' && fromMe && !isSelfChat) return;
+    // ── Business mode: filter outgoing to others (your messages to customers, not self-chat) ──
+    if (mode === 'business' && fromMe && !selfChat) return;
     // ── Assistant mode ──
     // Self-chat: falls through to debounce (processed as admin)
-    // Others' messages or my untriggered messages: store for context, don't invoke
-    // My messages with @botname trigger: falls through to debounce → agent
-    if (mode === 'assistant' && !(fromMe && isSelfChat)) {
+    // Others' messages or untriggered messages: store for context, don't invoke
+    // Triggered messages: fall through to debounce → agent (owner always; others only if opted-in)
+    if (mode === 'assistant' && !selfChat) {
       // Store every message for context (both mine and theirs) — keyed by the chat (group or 1:1)
       this.storeAssistantContext(channel, chatJid, senderName, text, fromMe);
-      // Only continue if it's me AND the message contains the trigger
+      // Trigger must be present.
       const botName = loadConfig().username || 'bloby';
       const triggerPattern = new RegExp(`(?:^|\\n)\\s*@${botName}[:\\s]`, 'i');
-      if (!fromMe || !triggerPattern.test(text)) return;
+      if (!triggerPattern.test(text)) return;
+      // Who may drive the agent? By default ONLY the account owner (fromMe). When the channel is
+      // explicitly configured with `allowOthersToTrigger`, anyone who tags the bot can — a
+      // deliberately dangerous shared-control mode (the triggerer gets an agent with Bash, file
+      // access, etc.; see the WhatsApp SKILL.md disclaimer).
+      const allowOthers = channelConfig.allowOthersToTrigger === true;
+      if (!fromMe && !allowOthers) return;
       // Falls through to debounce → flushDebounce → handleAssistantMessage
     }
@@ -660,8 +718,12 @@ export class ChannelManager {
     // Reply identifier — strip JID suffix to get a stable id (phone for 1:1, group hash for groups)
     const chatId = chatJid.replace(/@.*/, '');
+    // Owner self-chat — JID-based, immune to Baileys `fromMe` decode regressions (matches the
+    // gate in handleInboundMessage so a self-message can't pass one check and fail the other).
+    const selfChat = this.isOwnerSelfChat(isSelfChat, isGroup);
     // Route based on mode and role
-    if (mode === 'channel' || (mode === 'business' && fromMe && isSelfChat) || (mode === 'assistant' && fromMe && isSelfChat)) {
+    if (mode === 'channel' || (mode === 'business' && selfChat) || (mode === 'assistant' && selfChat)) {
       // Admin (self-chat in any mode)
       const message: InboundMessage = {
         channel,

package/supervisor/channels/types.ts CHANGED Viewed

@@ -15,6 +15,11 @@ export interface ChannelConfig {
   skill?: string;
   /** Opt-in: process messages in group chats (default false). Channel mode ignores this. */
   allowGroups?: boolean;
+  /** Assistant mode only. When false (default) ONLY the account owner (fromMe) can trigger
+   *  the agent with `@botname`. When true, ANYONE who tags the bot (DM or group) can drive it.
+   *  DANGER: the triggerer gains full control of an agent that can run Bash, edit files, etc.
+   *  Enable only for fully trusted shared use. See the WhatsApp SKILL.md disclaimer. */
+  allowOthersToTrigger?: boolean;
 }
 export interface InboundMessageAttachment {

package/supervisor/chat/src/components/Chat/EnvForm.tsx CHANGED Viewed

@@ -1,5 +1,6 @@
 import { useState } from 'react';
 import { Check, KeyRound, Loader2, AlertCircle } from 'lucide-react';
+import { authFetch } from '../../lib/auth';
 export interface EnvField {
   name: string;
@@ -34,7 +35,7 @@ export default function EnvForm({ group }: Props) {
     setErrorMsg('');
     try {
-      const res = await fetch('/api/env', {
+      const res = await authFetch('/api/env', {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
         body: JSON.stringify({ vars: Object.fromEntries(filled) }),

package/supervisor/harnesses/claude.ts CHANGED Viewed

@@ -191,7 +191,7 @@ async function buildConversationOptions(
   recentMessages?: RecentMessage[],
 ): Promise<Omit<Options, 'abortController' | 'stderr'>> {
   const memoryFiles = readMemoryFiles();
-  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
+  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'claude');
   let systemPrompt = basePrompt;
   systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
@@ -594,7 +594,7 @@ export async function startBlobyAgentQuery(
   if (supportPrompt) {
     enrichedPrompt = supportPrompt;
   } else {
-    const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
+    const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'claude');
     enrichedPrompt = basePrompt;
     enrichedPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
@@ -614,6 +614,15 @@ export async function startBlobyAgentQuery(
   activeQueries.set(conversationId, { abortController });
+  // Hard watchdog: a hung CLI subprocess (network stall, stuck MCP) would otherwise leave the
+  // `for await` loop pending forever — the finally never runs, bot:done never fires, and the
+  // caller's per-conversation slot (WhatsApp activeAgents / scheduler) is pinned for good. Abort
+  // after 5 min so the finally always emits bot:done. Cleared on normal completion.
+  const watchdog = setTimeout(() => {
+    log.warn(`[bloby-agent] One-shot query timed out (5m) — aborting conv=${conversationId}`);
+    abortController.abort();
+  }, 300_000);
   let fullText = '';
   const usedTools = new Set<string>();
   let stderrBuf = '';
@@ -705,6 +714,7 @@ export async function startBlobyAgentQuery(
       onMessage('bot:error', { conversationId, error: errMsg });
     }
   } finally {
+    clearTimeout(watchdog);
     activeQueries.delete(conversationId);
     const FILE_TOOLS = ['Write', 'Edit', 'MultiEdit', 'NotebookEdit'];
     const usedFileTools = FILE_TOOLS.some((t) => usedTools.has(t));

package/supervisor/harnesses/codex.ts CHANGED Viewed

@@ -42,7 +42,15 @@ export type { RecentMessage, AgentAttachment };
 const CLIENT_INFO = { name: 'bloby', title: 'Bloby', version: '1' };
 const REQUEST_TIMEOUT_MS = 60_000;
-const VALID_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh']);
+const VALID_EFFORTS = new Set(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']);
+/**
+ * Per-turn watchdog. `turn/completed` is a NON-guaranteed notification — if the
+ * app-server stalls mid-turn without exiting, the RPC `exit` handler never fires
+ * and `busy` stays true forever (live: wedges the dashboard + defers backend
+ * restarts; one-shot: pins the WhatsApp/scheduler slot since bot:done never
+ * arrives). Claude's one-shot path has the same 5-min guard. Mirrors it here.
+ */
+const TURN_WATCHDOG_MS = 5 * 60_000;
 /**
  * Resolve the `codex` binary. We don't trust $PATH because Bloby may be
@@ -115,7 +123,7 @@ async function assembleBaseInstructions(
   recentMessages?: RecentMessage[],
 ): Promise<string> {
   const memoryFiles = readMemoryFiles();
-  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
+  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'codex');
   let prompt = basePrompt;
   prompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
@@ -326,6 +334,15 @@ interface CodexConversation {
   busy: boolean;
   /** True for one-shot queries — the conversation ends after the first turn completes. */
   oneShot: boolean;
+  /**
+   * Latest context occupancy from `thread/tokenUsage/updated` (codex does NOT
+   * report usage on `turn/completed` — Turn has no usage field). Emitted on
+   * `bot:turn-complete` so the orchestrator's proactive recycling can fire.
+   */
+  lastContextTokens: number;
+  lastContextWindow: number;
+  /** Active per-turn watchdog timer (see TURN_WATCHDOG_MS). */
+  turnWatchdog: NodeJS.Timeout | null;
 }
 const conversations = new Map<string, CodexConversation>();
@@ -353,17 +370,59 @@ function buildUserInput(text: string, savedFiles?: SavedFile[]): Array<Record<st
   return input;
 }
+function clearTurnWatchdog(conv: CodexConversation): void {
+  if (conv.turnWatchdog) {
+    clearTimeout(conv.turnWatchdog);
+    conv.turnWatchdog = null;
+  }
+}
+/**
+ * Arm the per-turn watchdog. On fire, unstick the conversation the same way a
+ * real `turn/completed` would (so the dashboard, `anyConversationBusy`, and the
+ * channel slot all release), then tear the conversation down — the next message
+ * cold-starts a fresh thread.
+ */
+function armTurnWatchdog(conv: CodexConversation): void {
+  clearTurnWatchdog(conv);
+  conv.turnWatchdog = setTimeout(() => {
+    conv.turnWatchdog = null;
+    log.warn(`[codex] turn watchdog fired (${TURN_WATCHDOG_MS}ms) — conv=${conv.id}; unsticking + tearing down`);
+    conv.busy = false;
+    conv.currentTurnId = null;
+    conv.onMessage('bot:error', { conversationId: conv.id, error: 'Codex turn timed out — no response from app-server.' });
+    if (conv.oneShot) {
+      conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
+    } else {
+      conv.onMessage('bot:turn-complete', {
+        conversationId: conv.id,
+        usedFileTools: conv.usedFileTools,
+        contextTokens: conv.lastContextTokens || 0,
+        contextWindow: conv.lastContextWindow || 0,
+        idle: true,
+      });
+    }
+    teardownConversation(conv.id);
+  }, TURN_WATCHDOG_MS);
+}
 async function startTurn(conv: CodexConversation, content: string, savedFiles?: SavedFile[]): Promise<void> {
   const input = buildUserInput(content, savedFiles);
   conv.busy = true;
   conv.fullText = '';
   conv.usedFileTools = false;
   conv.onMessage('bot:typing', { conversationId: conv.id });
+  armTurnWatchdog(conv);
   try {
     const params: Record<string, any> = { threadId: conv.threadId, input };
     if (conv.effort) params.effort = conv.effort;
-    await conv.rpc.request('turn/start', params);
+    // turn/start resolves immediately with { turn }; seize the id now so a
+    // pushMessage arriving before the turn/started notification can steer
+    // instead of starting a second turn.
+    const res = await conv.rpc.request<{ turn?: { id?: string } }>('turn/start', params);
+    if (res?.turn?.id) conv.currentTurnId = res.turn.id;
   } catch (err: any) {
+    clearTurnWatchdog(conv);
     conv.busy = false;
     conv.currentTurnId = null;
     conv.onMessage('bot:error', { conversationId: conv.id, error: `turn/start failed: ${err.message}` });
@@ -385,11 +444,12 @@ async function steerOrQueue(conv: CodexConversation, content: string, savedFiles
   // Active turn — inject mid-flight.
   const input = buildUserInput(content, savedFiles);
   try {
-    await conv.rpc.request('turn/steer', {
+    const res = await conv.rpc.request<{ turnId?: string }>('turn/steer', {
       threadId: conv.threadId,
       expectedTurnId: conv.currentTurnId,
       input,
     });
+    if (res?.turnId) conv.currentTurnId = res.turnId;
     conv.onMessage('bot:typing', { conversationId: conv.id });
   } catch (err: any) {
     // expectedTurnId mismatch most likely means the turn just finished —
@@ -430,10 +490,11 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
           });
           break;
         case 'mcpToolCall':
+          // ThreadItem.mcpToolCall fields are `server` + `tool` (no toolName/name/input).
           conv.onMessage('bot:tool', {
             conversationId: conv.id,
-            name: item.toolName || item.name || 'mcp_tool',
-            input: item.arguments || item.input || {},
+            name: item.tool ? (item.server ? `${item.server}/${item.tool}` : item.tool) : 'mcp_tool',
+            input: item.arguments || {},
           });
           break;
         case 'fileChange':
@@ -470,18 +531,34 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
       break;
     }
+    case 'thread/tokenUsage/updated': {
+      // Codex's only token-usage signal. ThreadTokenUsage = { total, last, modelContextWindow };
+      // `last` is the current prompt occupancy (mirrors Claude's input+cacheRead+cacheCreation),
+      // the right basis for the recycle compare in supervisor/index.ts (fraction*window, not lifetime).
+      const tu = p.tokenUsage || {};
+      const last = tu.last || {};
+      conv.lastContextTokens = (last.inputTokens || 0) + (last.cachedInputTokens || 0);
+      if (typeof tu.modelContextWindow === 'number' && tu.modelContextWindow > 0) {
+        conv.lastContextWindow = tu.modelContextWindow;
+      }
+      break;
+    }
     case 'turn/completed': {
       const status: string = p.turn?.status || 'completed';
       const turnError = p.turn?.error;
+      clearTurnWatchdog(conv);
       conv.currentTurnId = null;
       conv.busy = false;
-      if (status === 'failed' || status === 'systemError') {
+      if (status === 'failed') {
         conv.onMessage('bot:error', {
           conversationId: conv.id,
           error: turnError?.message || 'Codex turn failed.',
         });
+      } else if (status === 'interrupted') {
+        // Interrupted turns carry no final answer — stay silent.
       } else if (conv.fullText) {
         conv.onMessage('bot:response', { conversationId: conv.id, content: conv.fullText });
       }
@@ -490,16 +567,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
         conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
         teardownConversation(conv.id);
       } else {
-        // Context-size signal for the orchestrator's proactive session recycling.
-        // The codex app-server reports token usage on turn/completed; field names vary
-        // across versions, so probe defensively (0 if absent → falls back to codex's
-        // own built-in auto-compaction).
-        const tu: any = p.turn?.usage || p.usage || {};
-        const contextTokens = tu.input_tokens ?? tu.inputTokens ?? tu.total_tokens ?? tu.totalTokens ?? tu.tokens ?? 0;
-        const contextWindow = tu.context_window ?? tu.contextWindow ?? 0;
-        // idle = no message queued behind this turn (the drain happens just below).
+        // Context-size signal for the orchestrator's proactive session recycling,
+        // sourced from the cached `thread/tokenUsage/updated` values above. 0 if codex
+        // never sent one this turn → falls back to codex's own in-thread auto-compaction.
         const idle = conv.pendingInputs.length === 0;
-        conv.onMessage('bot:turn-complete', { conversationId: conv.id, usedFileTools: conv.usedFileTools, contextTokens, contextWindow, idle });
+        conv.onMessage('bot:turn-complete', {
+          conversationId: conv.id,
+          usedFileTools: conv.usedFileTools,
+          contextTokens: conv.lastContextTokens || 0,
+          contextWindow: conv.lastContextWindow || 0,
+          idle,
+        });
         // Drain any messages that were submitted while we were busy.
         const next = conv.pendingInputs.shift();
@@ -509,6 +587,12 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
     }
     case 'error': {
+      // ErrorNotification carries willRetry — codex will retry transient errors
+      // itself; don't surface those as a hard bot:error before the retry lands.
+      if (p.willRetry) {
+        log.info(`[codex] transient error (will retry): ${p.error?.message || 'unknown'}`);
+        break;
+      }
       const errMsg = p.error?.message || 'Codex error notification';
       conv.onMessage('bot:error', { conversationId: conv.id, error: errMsg });
       break;
@@ -522,6 +606,7 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
 function teardownConversation(conversationId: string): void {
   const conv = conversations.get(conversationId);
   if (!conv) return;
+  clearTurnWatchdog(conv);
   conversations.delete(conversationId);
   try { conv.rpc.close(); } catch {}
   conv.onMessage('bot:conversation-ended', { conversationId });
@@ -531,7 +616,7 @@ async function spawnAndInitialize(
   conversationId: string,
   model: string,
   onMessage: OnAgentMessage,
-  baseInstructions: string,
+  instructions: string,
   oneShot: boolean,
 ): Promise<CodexConversation | null> {
   // Pre-flight: confirm we have valid OAuth tokens before spending time spawning.
@@ -560,6 +645,9 @@ async function spawnAndInitialize(
     pendingInputs: [],
     busy: false,
     oneShot,
+    lastContextTokens: 0,
+    lastContextWindow: 0,
+    turnWatchdog: null,
   };
   rpc.onNotification((n) => handleNotification(conv, n));
@@ -582,7 +670,13 @@ async function spawnAndInitialize(
     const startResult = await rpc.request<{ thread: { id: string } }>('thread/start', {
       cwd: WORKSPACE_DIR,
       model: modelId,
-      baseInstructions,
+      // Bloby's persona/workflow prompt rides developerInstructions (ADDITIVE),
+      // NOT baseInstructions. baseInstructions fully OVERRIDES codex's native base
+      // prompt — which carries the apply_patch FREEFORM spec + shell protocol the
+      // model needs to edit files. Leaving baseInstructions unset keeps that native
+      // scaffolding; developerInstructions layers Bloby's persona on top of it.
+      developerInstructions: instructions,
+      personality: 'pragmatic',
       // Bloby's posture matches Claude's bypassPermissions — the bot is
       // running on the user's own machine with their full consent. Skip the
       // approval prompts and give it write access to the workspace + beyond.
@@ -782,7 +876,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
       case 'item/started': {
         const item = p.item || {};
         if (item.type === 'commandExecution') usedTools.add('shell');
-        else if (item.type === 'mcpToolCall') usedTools.add(item.toolName || item.name || 'mcp_tool');
+        else if (item.type === 'mcpToolCall') usedTools.add(item.tool || 'mcp_tool');
         else if (item.type === 'fileChange') { usedTools.add('file_change'); usedFileTools = true; }
         else if (item.type === 'webSearch') usedTools.add('web_search');
         break;
@@ -798,13 +892,14 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
       }
       case 'turn/completed': {
         const status = p.turn?.status || 'completed';
-        if (status === 'failed' || status === 'systemError') {
+        if (status === 'failed') {
           turnError = p.turn?.error?.message || 'Codex turn failed.';
         }
         resolveTurn?.();
         break;
       }
       case 'error': {
+        if (p.willRetry) break; // transient — codex retries itself
         turnError = p.error?.message || 'Codex error';
         resolveTurn?.();
         break;
@@ -833,7 +928,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
         const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
           cwd: WORKSPACE_DIR,
           model,
-          ...(req.systemPrompt ? { baseInstructions: req.systemPrompt } : {}),
+          ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
           approvalPolicy: 'never',
           sandbox: 'danger-full-access',
         });
@@ -843,7 +938,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
       const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
         cwd: WORKSPACE_DIR,
         model,
-        ...(req.systemPrompt ? { baseInstructions: req.systemPrompt } : {}),
+        ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
         approvalPolicy: 'never',
         sandbox: 'danger-full-access',
       });

package/supervisor/harnesses/pi/index.ts CHANGED Viewed

@@ -107,7 +107,7 @@ async function buildSystemPrompt(
   recentMessages?: RecentMessage[],
 ): Promise<string> {
   const memoryFiles = readMemoryFiles();
-  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
+  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'pi');
   let systemPrompt = basePrompt;
   systemPrompt += LIVE_CONVERSATION_HINT;
   systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
@@ -369,6 +369,12 @@ export async function startBlobyAgentQuery(
   const abortController = new AbortController();
   activeQueries.set(conversationId, abortController);
+  // Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
+  // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
+  const watchdog = setTimeout(() => {
+    log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
+    abortController.abort();
+  }, 300_000);
   let systemPrompt: string;
   if (supportPrompt) {
@@ -425,6 +431,7 @@ export async function startBlobyAgentQuery(
       onMessage('bot:error', { conversationId, error: err?.message || String(err) });
     }
   } finally {
+    clearTimeout(watchdog);
     activeQueries.delete(conversationId);
     const FILE_TOOL_NAMES = ['Write', 'Edit', 'write', 'edit'];
     const usedFileTools = FILE_TOOL_NAMES.some((t) => usedTools.has(t));