bloby-bot 0.53.10 → 0.54.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.53.10",
3
+ "version": "0.54.10",
4
4
  "releaseNotes": [
5
5
  "1. New Morphy animation system: config-driven sprites loaded from /morphy/*.json",
6
6
  "2. Swapped teleporting (splash) and headphones (bubble + chat) to the new format",
@@ -61,7 +61,7 @@
61
61
  "@streamdown/code": "^1.1.1",
62
62
  "@tailwindcss/vite": "^4.2.0",
63
63
  "@vitejs/plugin-react": "^6.0.1",
64
- "@whiskeysockets/baileys": "^7.0.0-rc11",
64
+ "@whiskeysockets/baileys": "7.0.0-rc13",
65
65
  "better-sqlite3": "^12.6.2",
66
66
  "class-variance-authority": "^0.7.1",
67
67
  "clsx": "^2.1.1",
package/shared/config.ts CHANGED
@@ -11,6 +11,11 @@ export interface ChannelConfig {
11
11
  skill?: string;
12
12
  /** Opt-in: process messages in group chats (default false). Channel mode ignores this. */
13
13
  allowGroups?: boolean;
14
+ /** Assistant mode only. When false (default) ONLY the account owner can trigger the agent
15
+ * with `@botname`. When true, ANYONE who tags the bot (in a DM or group) can drive it.
16
+ * DANGER: the triggerer gains control of an agent that can run Bash, edit files, etc.
17
+ * Only enable for fully trusted shared use (e.g. a partner). See the WhatsApp SKILL.md. */
18
+ allowOthersToTrigger?: boolean;
14
19
  }
15
20
 
16
21
  export interface AlexaChannelConfig {
@@ -14,6 +14,15 @@ let intentionallyStopped = false;
14
14
  let gaveUp = false;
15
15
  const MAX_RESTARTS = 3;
16
16
  const STABLE_THRESHOLD = 30_000; // 30s — if backend ran this long, it wasn't a crash loop
17
+ // Rolling-window backstop: the 30s "stable" rule resets the consecutive counter, so a backend
18
+ // that crashes every ~35s would otherwise restart forever. Give up if it crashes too often within
19
+ // the window (kept generous so a legitimately long-running-then-crashing backend isn't penalized).
20
+ let crashTimes: number[] = [];
21
+ const CRASH_WINDOW_MS = 5 * 60_000; // 5 min
22
+ const CRASH_WINDOW_MAX = 6; // > this many crashes in the window → give up regardless of the 30s reset
23
+ // Called once when the backend gives up, so the supervisor can tell the live chat (which exists
24
+ // precisely so the user can be told to fix it). Set via setBackendGiveUpHandler; logging-only default.
25
+ let onGiveUp: (() => void) | null = null;
17
26
 
18
27
  /** Extra env vars injected into every backend spawn (e.g. BLOBY_AGENT_SECRET) */
19
28
  let extraEnv: Record<string, string> = {};
@@ -23,6 +32,12 @@ export function setBackendEnv(env: Record<string, string>): void {
23
32
  extraEnv = { ...extraEnv, ...env };
24
33
  }
25
34
 
35
+ /** Register a callback fired once when the backend gives up (crash-looped past the limits).
36
+ * The supervisor wires this to broadcast a chat event so the user is told to fix it. */
37
+ export function setBackendGiveUpHandler(fn: () => void): void {
38
+ onGiveUp = fn;
39
+ }
40
+
26
41
  const LOG_FILE = path.join(WORKSPACE_DIR, '.backend.log');
27
42
 
28
43
  export function getBackendPort(basePort: number): number {
@@ -101,18 +116,24 @@ export function spawnBackend(port: number): ChildProcess {
101
116
 
102
117
  // Any unexpected exit (crash, SIGTERM, OOM, null code) — restart
103
118
  log.warn(`Backend exited unexpectedly (code ${code})`);
104
- // If backend was alive for >30s, it's not a crash loop — reset counter
105
- if (Date.now() - lastSpawnTime > STABLE_THRESHOLD) {
119
+ // Track crashes in a rolling window (backstop for the 30s-reset crash-loop hole).
120
+ const now = Date.now();
121
+ crashTimes = crashTimes.filter((t) => now - t < CRASH_WINDOW_MS);
122
+ crashTimes.push(now);
123
+ const windowExceeded = crashTimes.length > CRASH_WINDOW_MAX;
124
+ // If backend was alive for >30s, it's not a crash loop — reset the consecutive counter.
125
+ if (now - lastSpawnTime > STABLE_THRESHOLD) {
106
126
  restarts = 0;
107
127
  }
108
- if (restarts < MAX_RESTARTS) {
128
+ if (!windowExceeded && restarts < MAX_RESTARTS) {
109
129
  restarts++;
110
130
  const delay = Math.min(1000 * restarts, 5000);
111
131
  log.info(`Restarting backend (${restarts}/${MAX_RESTARTS}, delay ${delay}ms)...`);
112
132
  setTimeout(() => spawnBackend(port), delay);
113
133
  } else {
114
134
  gaveUp = true;
115
- log.error('Backend failed too many times. Use Bloby chat to debug.');
135
+ log.error(`Backend failed too many times${windowExceeded ? ` (${crashTimes.length} crashes in ${CRASH_WINDOW_MS / 60000}min)` : ''}. Use Bloby chat to debug.`);
136
+ try { onGiveUp?.(); } catch {}
116
137
  }
117
138
  });
118
139
 
@@ -212,4 +233,8 @@ export function isBackendStopping(): boolean {
212
233
 
213
234
  export function resetBackendRestarts(): void {
214
235
  restarts = 0;
236
+ // A deliberate restart (file edit, user fix, scheduler) is a fresh attempt — clear the rolling
237
+ // crash window too so a just-fixed backend gets a clean slate (deliberate stops never record a
238
+ // crash, so this only matters right after a give-up + fix).
239
+ crashTimes = [];
215
240
  }
@@ -83,6 +83,11 @@ interface DebounceEntry {
83
83
  /** Per-conversation accumulator for streaming bot text → WhatsApp. */
84
84
  export interface WaStreamState {
85
85
  chunkBuf: string;
86
+ /** True once the CURRENT turn has consumed its routing target (via `bot:response` or
87
+ * `bot:error`). Reset on `bot:turn-complete`. Guards the turn-complete safety-net drain so a
88
+ * normal turn — which already consumed on `bot:response` — never double-drains and eats the
89
+ * NEXT queued message's target (the root cause of chat↔WhatsApp bleed and DM-answered-in-group). */
90
+ consumedThisTurn?: boolean;
86
91
  }
87
92
 
88
93
  /** Agent-turn events that carry per-turn content. Broadcast only for dashboard surfaces
@@ -489,6 +494,7 @@ export class ChannelManager {
489
494
 
490
495
  if (type === 'bot:response' && eventData?.content && convId) {
491
496
  const target = this.consumeRoute(convId);
497
+ state.consumedThisTurn = true;
492
498
  const remaining = state.chunkBuf.trim();
493
499
  state.chunkBuf = '';
494
500
 
@@ -514,20 +520,46 @@ export class ChannelManager {
514
520
  return;
515
521
  }
516
522
 
517
- // Turn ended (or errored) without a bot:response — drain the head entry so it
518
- // doesn't bleed into the next turn's reply. The SDK guarantees one response per
519
- // pushed input; this safety net covers aborts, empty turns, and provider errors.
520
- if ((type === 'bot:turn-complete' || type === 'bot:error') && convId) {
521
- const head = this.peekRoute(convId);
522
- if (head) {
523
+ // Turn errored without a usable reply — drain THIS turn's route so it can't bleed into the
524
+ // next turn. Guarded by `consumedThisTurn`: if `bot:response` already fired this turn it
525
+ // consumed the target, so we must NOT drain again.
526
+ if (type === 'bot:error' && convId) {
527
+ if (!state.consumedThisTurn) {
523
528
  const dropped = this.consumeRoute(convId);
524
- log.warn(`[channels] ${type} without bot:response — dropping pending route (surface=${dropped?.surface}, to=${dropped?.waSendTo || 'none'})`);
525
- if (dropped?.surface === 'alexa') {
526
- const alexa = this.providers.get('alexa') as AlexaChannel | undefined;
527
- alexa?.rejectHead(convId, type);
529
+ if (dropped) {
530
+ log.warn(`[channels] bot:error without bot:response — dropping pending route (surface=${dropped.surface}, to=${dropped.waSendTo || 'none'})`);
531
+ if (dropped.surface === 'alexa') {
532
+ const alexa = this.providers.get('alexa') as AlexaChannel | undefined;
533
+ alexa?.rejectHead(convId, type);
534
+ }
528
535
  }
536
+ state.consumedThisTurn = true;
529
537
  }
530
538
  state.chunkBuf = '';
539
+ return;
540
+ }
541
+
542
+ // Turn finished. Drain the head ONLY if this turn never consumed its route — i.e. a true
543
+ // empty turn (no `bot:response`, no `bot:error`; see harness claude.ts which always emits
544
+ // `bot:turn-complete` after every result). A normal turn already consumed on `bot:response`,
545
+ // so draining here would eat the NEXT queued message's target and every later reply would
546
+ // land on the wrong surface (chat↔WhatsApp bleed, DM answered in a group). Reset the per-turn
547
+ // flag afterwards so the next turn starts clean. Turns are strictly sequential per conversation
548
+ // (single input queue, one `result` at a time), so this per-conversation flag is race-free.
549
+ if (type === 'bot:turn-complete' && convId) {
550
+ if (!state.consumedThisTurn) {
551
+ const head = this.peekRoute(convId);
552
+ if (head) {
553
+ const dropped = this.consumeRoute(convId);
554
+ log.warn(`[channels] turn-complete without bot:response — dropping pending route (surface=${dropped?.surface}, to=${dropped?.waSendTo || 'none'})`);
555
+ if (dropped?.surface === 'alexa') {
556
+ const alexa = this.providers.get('alexa') as AlexaChannel | undefined;
557
+ alexa?.rejectHead(convId, type);
558
+ }
559
+ }
560
+ }
561
+ state.consumedThisTurn = false;
562
+ state.chunkBuf = '';
531
563
  }
532
564
  }
533
565
 
@@ -556,6 +588,22 @@ export class ChannelManager {
556
588
  return config.channels?.[channel];
557
589
  }
558
590
 
591
+ /** Robust "is this the account owner's own self-chat?" check.
592
+ *
593
+ * Keys purely on JID equality (`isSelfChat` — the chat resolves to the owner's OWN number,
594
+ * computed in whatsapp.ts from `ownPhoneJid` + LID translation). This is authoritative and is
595
+ * UNAFFECTED by Baileys' `fromMe` decode regressions (e.g. the rc11→rc13 protocolMessage
596
+ * `fromMe=false` bug): the chat JID of a self-message is still the owner's own number even when
597
+ * `fromMe` decodes wrong. So we deliberately do NOT also require `fromMe` (which the old gate
598
+ * did — that's what silently dropped self-messages under rc11).
599
+ *
600
+ * We also deliberately do NOT treat `fromMe` alone as self-chat: `fromMe` is true for the owner
601
+ * messaging a CONTACT from a linked device too, so a `fromMe`-based OR would misroute those into
602
+ * the admin brain. Only own-number JID equality is safe and false-positive-free. */
603
+ private isOwnerSelfChat(isSelfChat: boolean, isGroup: boolean): boolean {
604
+ return !isGroup && isSelfChat;
605
+ }
606
+
559
607
  /** Handle an incoming message from any channel — debounces rapid messages from the same sender.
560
608
  *
561
609
  * Per-mode behavior is decided here. To add a new mode: extend the gating block below
@@ -586,26 +634,36 @@ export class ChannelManager {
586
634
  if (!channelConfig.allowGroups) return;
587
635
  }
588
636
 
637
+ // Owner self-chat — JID-based, immune to Baileys `fromMe` decode regressions.
638
+ const selfChat = this.isOwnerSelfChat(isSelfChat, isGroup);
639
+
589
640
  // ── Channel mode: ONLY respond to self-chat ──
590
641
  if (mode === 'channel') {
591
- if (!fromMe || !isSelfChat) return;
642
+ if (!selfChat) return;
592
643
  }
593
644
 
594
- // ── Business mode: filter outgoing (except self-chat) ──
595
- if (mode === 'business' && fromMe && !isSelfChat) return;
645
+ // ── Business mode: filter outgoing to others (your messages to customers, not self-chat) ──
646
+ if (mode === 'business' && fromMe && !selfChat) return;
596
647
 
597
648
  // ── Assistant mode ──
598
649
  // Self-chat: falls through to debounce (processed as admin)
599
- // Others' messages or my untriggered messages: store for context, don't invoke
600
- // My messages with @botname trigger: falls through to debounce → agent
601
- if (mode === 'assistant' && !(fromMe && isSelfChat)) {
650
+ // Others' messages or untriggered messages: store for context, don't invoke
651
+ // Triggered messages: fall through to debounce → agent (owner always; others only if opted-in)
652
+ if (mode === 'assistant' && !selfChat) {
602
653
  // Store every message for context (both mine and theirs) — keyed by the chat (group or 1:1)
603
654
  this.storeAssistantContext(channel, chatJid, senderName, text, fromMe);
604
655
 
605
- // Only continue if it's me AND the message contains the trigger
656
+ // Trigger must be present.
606
657
  const botName = loadConfig().username || 'bloby';
607
658
  const triggerPattern = new RegExp(`(?:^|\\n)\\s*@${botName}[:\\s]`, 'i');
608
- if (!fromMe || !triggerPattern.test(text)) return;
659
+ if (!triggerPattern.test(text)) return;
660
+
661
+ // Who may drive the agent? By default ONLY the account owner (fromMe). When the channel is
662
+ // explicitly configured with `allowOthersToTrigger`, anyone who tags the bot can — a
663
+ // deliberately dangerous shared-control mode (the triggerer gets an agent with Bash, file
664
+ // access, etc.; see the WhatsApp SKILL.md disclaimer).
665
+ const allowOthers = channelConfig.allowOthersToTrigger === true;
666
+ if (!fromMe && !allowOthers) return;
609
667
  // Falls through to debounce → flushDebounce → handleAssistantMessage
610
668
  }
611
669
 
@@ -660,8 +718,12 @@ export class ChannelManager {
660
718
  // Reply identifier — strip JID suffix to get a stable id (phone for 1:1, group hash for groups)
661
719
  const chatId = chatJid.replace(/@.*/, '');
662
720
 
721
+ // Owner self-chat — JID-based, immune to Baileys `fromMe` decode regressions (matches the
722
+ // gate in handleInboundMessage so a self-message can't pass one check and fail the other).
723
+ const selfChat = this.isOwnerSelfChat(isSelfChat, isGroup);
724
+
663
725
  // Route based on mode and role
664
- if (mode === 'channel' || (mode === 'business' && fromMe && isSelfChat) || (mode === 'assistant' && fromMe && isSelfChat)) {
726
+ if (mode === 'channel' || (mode === 'business' && selfChat) || (mode === 'assistant' && selfChat)) {
665
727
  // Admin (self-chat in any mode)
666
728
  const message: InboundMessage = {
667
729
  channel,
@@ -15,6 +15,11 @@ export interface ChannelConfig {
15
15
  skill?: string;
16
16
  /** Opt-in: process messages in group chats (default false). Channel mode ignores this. */
17
17
  allowGroups?: boolean;
18
+ /** Assistant mode only. When false (default) ONLY the account owner (fromMe) can trigger
19
+ * the agent with `@botname`. When true, ANYONE who tags the bot (DM or group) can drive it.
20
+ * DANGER: the triggerer gains full control of an agent that can run Bash, edit files, etc.
21
+ * Enable only for fully trusted shared use. See the WhatsApp SKILL.md disclaimer. */
22
+ allowOthersToTrigger?: boolean;
18
23
  }
19
24
 
20
25
  export interface InboundMessageAttachment {
@@ -1,5 +1,6 @@
1
1
  import { useState } from 'react';
2
2
  import { Check, KeyRound, Loader2, AlertCircle } from 'lucide-react';
3
+ import { authFetch } from '../../lib/auth';
3
4
 
4
5
  export interface EnvField {
5
6
  name: string;
@@ -34,7 +35,7 @@ export default function EnvForm({ group }: Props) {
34
35
  setErrorMsg('');
35
36
 
36
37
  try {
37
- const res = await fetch('/api/env', {
38
+ const res = await authFetch('/api/env', {
38
39
  method: 'POST',
39
40
  headers: { 'Content-Type': 'application/json' },
40
41
  body: JSON.stringify({ vars: Object.fromEntries(filled) }),
@@ -191,7 +191,7 @@ async function buildConversationOptions(
191
191
  recentMessages?: RecentMessage[],
192
192
  ): Promise<Omit<Options, 'abortController' | 'stderr'>> {
193
193
  const memoryFiles = readMemoryFiles();
194
- const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
194
+ const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'claude');
195
195
  let systemPrompt = basePrompt;
196
196
  systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
197
197
 
@@ -594,7 +594,7 @@ export async function startBlobyAgentQuery(
594
594
  if (supportPrompt) {
595
595
  enrichedPrompt = supportPrompt;
596
596
  } else {
597
- const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
597
+ const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'claude');
598
598
  enrichedPrompt = basePrompt;
599
599
  enrichedPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
600
600
 
@@ -614,6 +614,15 @@ export async function startBlobyAgentQuery(
614
614
 
615
615
  activeQueries.set(conversationId, { abortController });
616
616
 
617
+ // Hard watchdog: a hung CLI subprocess (network stall, stuck MCP) would otherwise leave the
618
+ // `for await` loop pending forever — the finally never runs, bot:done never fires, and the
619
+ // caller's per-conversation slot (WhatsApp activeAgents / scheduler) is pinned for good. Abort
620
+ // after 5 min so the finally always emits bot:done. Cleared on normal completion.
621
+ const watchdog = setTimeout(() => {
622
+ log.warn(`[bloby-agent] One-shot query timed out (5m) — aborting conv=${conversationId}`);
623
+ abortController.abort();
624
+ }, 300_000);
625
+
617
626
  let fullText = '';
618
627
  const usedTools = new Set<string>();
619
628
  let stderrBuf = '';
@@ -705,6 +714,7 @@ export async function startBlobyAgentQuery(
705
714
  onMessage('bot:error', { conversationId, error: errMsg });
706
715
  }
707
716
  } finally {
717
+ clearTimeout(watchdog);
708
718
  activeQueries.delete(conversationId);
709
719
  const FILE_TOOLS = ['Write', 'Edit', 'MultiEdit', 'NotebookEdit'];
710
720
  const usedFileTools = FILE_TOOLS.some((t) => usedTools.has(t));
@@ -42,7 +42,15 @@ export type { RecentMessage, AgentAttachment };
42
42
 
43
43
  const CLIENT_INFO = { name: 'bloby', title: 'Bloby', version: '1' };
44
44
  const REQUEST_TIMEOUT_MS = 60_000;
45
- const VALID_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh']);
45
+ const VALID_EFFORTS = new Set(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']);
46
+ /**
47
+ * Per-turn watchdog. `turn/completed` is a NON-guaranteed notification — if the
48
+ * app-server stalls mid-turn without exiting, the RPC `exit` handler never fires
49
+ * and `busy` stays true forever (live: wedges the dashboard + defers backend
50
+ * restarts; one-shot: pins the WhatsApp/scheduler slot since bot:done never
51
+ * arrives). Claude's one-shot path has the same 5-min guard. Mirrors it here.
52
+ */
53
+ const TURN_WATCHDOG_MS = 5 * 60_000;
46
54
 
47
55
  /**
48
56
  * Resolve the `codex` binary. We don't trust $PATH because Bloby may be
@@ -115,7 +123,7 @@ async function assembleBaseInstructions(
115
123
  recentMessages?: RecentMessage[],
116
124
  ): Promise<string> {
117
125
  const memoryFiles = readMemoryFiles();
118
- const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
126
+ const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'codex');
119
127
  let prompt = basePrompt;
120
128
  prompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
121
129
 
@@ -326,6 +334,15 @@ interface CodexConversation {
326
334
  busy: boolean;
327
335
  /** True for one-shot queries — the conversation ends after the first turn completes. */
328
336
  oneShot: boolean;
337
+ /**
338
+ * Latest context occupancy from `thread/tokenUsage/updated` (codex does NOT
339
+ * report usage on `turn/completed` — Turn has no usage field). Emitted on
340
+ * `bot:turn-complete` so the orchestrator's proactive recycling can fire.
341
+ */
342
+ lastContextTokens: number;
343
+ lastContextWindow: number;
344
+ /** Active per-turn watchdog timer (see TURN_WATCHDOG_MS). */
345
+ turnWatchdog: NodeJS.Timeout | null;
329
346
  }
330
347
 
331
348
  const conversations = new Map<string, CodexConversation>();
@@ -353,17 +370,59 @@ function buildUserInput(text: string, savedFiles?: SavedFile[]): Array<Record<st
353
370
  return input;
354
371
  }
355
372
 
373
+ function clearTurnWatchdog(conv: CodexConversation): void {
374
+ if (conv.turnWatchdog) {
375
+ clearTimeout(conv.turnWatchdog);
376
+ conv.turnWatchdog = null;
377
+ }
378
+ }
379
+
380
+ /**
381
+ * Arm the per-turn watchdog. On fire, unstick the conversation the same way a
382
+ * real `turn/completed` would (so the dashboard, `anyConversationBusy`, and the
383
+ * channel slot all release), then tear the conversation down — the next message
384
+ * cold-starts a fresh thread.
385
+ */
386
+ function armTurnWatchdog(conv: CodexConversation): void {
387
+ clearTurnWatchdog(conv);
388
+ conv.turnWatchdog = setTimeout(() => {
389
+ conv.turnWatchdog = null;
390
+ log.warn(`[codex] turn watchdog fired (${TURN_WATCHDOG_MS}ms) — conv=${conv.id}; unsticking + tearing down`);
391
+ conv.busy = false;
392
+ conv.currentTurnId = null;
393
+ conv.onMessage('bot:error', { conversationId: conv.id, error: 'Codex turn timed out — no response from app-server.' });
394
+ if (conv.oneShot) {
395
+ conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
396
+ } else {
397
+ conv.onMessage('bot:turn-complete', {
398
+ conversationId: conv.id,
399
+ usedFileTools: conv.usedFileTools,
400
+ contextTokens: conv.lastContextTokens || 0,
401
+ contextWindow: conv.lastContextWindow || 0,
402
+ idle: true,
403
+ });
404
+ }
405
+ teardownConversation(conv.id);
406
+ }, TURN_WATCHDOG_MS);
407
+ }
408
+
356
409
  async function startTurn(conv: CodexConversation, content: string, savedFiles?: SavedFile[]): Promise<void> {
357
410
  const input = buildUserInput(content, savedFiles);
358
411
  conv.busy = true;
359
412
  conv.fullText = '';
360
413
  conv.usedFileTools = false;
361
414
  conv.onMessage('bot:typing', { conversationId: conv.id });
415
+ armTurnWatchdog(conv);
362
416
  try {
363
417
  const params: Record<string, any> = { threadId: conv.threadId, input };
364
418
  if (conv.effort) params.effort = conv.effort;
365
- await conv.rpc.request('turn/start', params);
419
+ // turn/start resolves immediately with { turn }; seize the id now so a
420
+ // pushMessage arriving before the turn/started notification can steer
421
+ // instead of starting a second turn.
422
+ const res = await conv.rpc.request<{ turn?: { id?: string } }>('turn/start', params);
423
+ if (res?.turn?.id) conv.currentTurnId = res.turn.id;
366
424
  } catch (err: any) {
425
+ clearTurnWatchdog(conv);
367
426
  conv.busy = false;
368
427
  conv.currentTurnId = null;
369
428
  conv.onMessage('bot:error', { conversationId: conv.id, error: `turn/start failed: ${err.message}` });
@@ -385,11 +444,12 @@ async function steerOrQueue(conv: CodexConversation, content: string, savedFiles
385
444
  // Active turn — inject mid-flight.
386
445
  const input = buildUserInput(content, savedFiles);
387
446
  try {
388
- await conv.rpc.request('turn/steer', {
447
+ const res = await conv.rpc.request<{ turnId?: string }>('turn/steer', {
389
448
  threadId: conv.threadId,
390
449
  expectedTurnId: conv.currentTurnId,
391
450
  input,
392
451
  });
452
+ if (res?.turnId) conv.currentTurnId = res.turnId;
393
453
  conv.onMessage('bot:typing', { conversationId: conv.id });
394
454
  } catch (err: any) {
395
455
  // expectedTurnId mismatch most likely means the turn just finished —
@@ -430,10 +490,11 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
430
490
  });
431
491
  break;
432
492
  case 'mcpToolCall':
493
+ // ThreadItem.mcpToolCall fields are `server` + `tool` (no toolName/name/input).
433
494
  conv.onMessage('bot:tool', {
434
495
  conversationId: conv.id,
435
- name: item.toolName || item.name || 'mcp_tool',
436
- input: item.arguments || item.input || {},
496
+ name: item.tool ? (item.server ? `${item.server}/${item.tool}` : item.tool) : 'mcp_tool',
497
+ input: item.arguments || {},
437
498
  });
438
499
  break;
439
500
  case 'fileChange':
@@ -470,18 +531,34 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
470
531
  break;
471
532
  }
472
533
 
534
+ case 'thread/tokenUsage/updated': {
535
+ // Codex's only token-usage signal. ThreadTokenUsage = { total, last, modelContextWindow };
536
+ // `last` is the current prompt occupancy (mirrors Claude's input+cacheRead+cacheCreation),
537
+ // the right basis for the recycle compare in supervisor/index.ts (fraction*window, not lifetime).
538
+ const tu = p.tokenUsage || {};
539
+ const last = tu.last || {};
540
+ conv.lastContextTokens = (last.inputTokens || 0) + (last.cachedInputTokens || 0);
541
+ if (typeof tu.modelContextWindow === 'number' && tu.modelContextWindow > 0) {
542
+ conv.lastContextWindow = tu.modelContextWindow;
543
+ }
544
+ break;
545
+ }
546
+
473
547
  case 'turn/completed': {
474
548
  const status: string = p.turn?.status || 'completed';
475
549
  const turnError = p.turn?.error;
476
550
 
551
+ clearTurnWatchdog(conv);
477
552
  conv.currentTurnId = null;
478
553
  conv.busy = false;
479
554
 
480
- if (status === 'failed' || status === 'systemError') {
555
+ if (status === 'failed') {
481
556
  conv.onMessage('bot:error', {
482
557
  conversationId: conv.id,
483
558
  error: turnError?.message || 'Codex turn failed.',
484
559
  });
560
+ } else if (status === 'interrupted') {
561
+ // Interrupted turns carry no final answer — stay silent.
485
562
  } else if (conv.fullText) {
486
563
  conv.onMessage('bot:response', { conversationId: conv.id, content: conv.fullText });
487
564
  }
@@ -490,16 +567,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
490
567
  conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
491
568
  teardownConversation(conv.id);
492
569
  } else {
493
- // Context-size signal for the orchestrator's proactive session recycling.
494
- // The codex app-server reports token usage on turn/completed; field names vary
495
- // across versions, so probe defensively (0 if absent → falls back to codex's
496
- // own built-in auto-compaction).
497
- const tu: any = p.turn?.usage || p.usage || {};
498
- const contextTokens = tu.input_tokens ?? tu.inputTokens ?? tu.total_tokens ?? tu.totalTokens ?? tu.tokens ?? 0;
499
- const contextWindow = tu.context_window ?? tu.contextWindow ?? 0;
500
- // idle = no message queued behind this turn (the drain happens just below).
570
+ // Context-size signal for the orchestrator's proactive session recycling,
571
+ // sourced from the cached `thread/tokenUsage/updated` values above. 0 if codex
572
+ // never sent one this turn → falls back to codex's own in-thread auto-compaction.
501
573
  const idle = conv.pendingInputs.length === 0;
502
- conv.onMessage('bot:turn-complete', { conversationId: conv.id, usedFileTools: conv.usedFileTools, contextTokens, contextWindow, idle });
574
+ conv.onMessage('bot:turn-complete', {
575
+ conversationId: conv.id,
576
+ usedFileTools: conv.usedFileTools,
577
+ contextTokens: conv.lastContextTokens || 0,
578
+ contextWindow: conv.lastContextWindow || 0,
579
+ idle,
580
+ });
503
581
 
504
582
  // Drain any messages that were submitted while we were busy.
505
583
  const next = conv.pendingInputs.shift();
@@ -509,6 +587,12 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
509
587
  }
510
588
 
511
589
  case 'error': {
590
+ // ErrorNotification carries willRetry — codex will retry transient errors
591
+ // itself; don't surface those as a hard bot:error before the retry lands.
592
+ if (p.willRetry) {
593
+ log.info(`[codex] transient error (will retry): ${p.error?.message || 'unknown'}`);
594
+ break;
595
+ }
512
596
  const errMsg = p.error?.message || 'Codex error notification';
513
597
  conv.onMessage('bot:error', { conversationId: conv.id, error: errMsg });
514
598
  break;
@@ -522,6 +606,7 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
522
606
  function teardownConversation(conversationId: string): void {
523
607
  const conv = conversations.get(conversationId);
524
608
  if (!conv) return;
609
+ clearTurnWatchdog(conv);
525
610
  conversations.delete(conversationId);
526
611
  try { conv.rpc.close(); } catch {}
527
612
  conv.onMessage('bot:conversation-ended', { conversationId });
@@ -531,7 +616,7 @@ async function spawnAndInitialize(
531
616
  conversationId: string,
532
617
  model: string,
533
618
  onMessage: OnAgentMessage,
534
- baseInstructions: string,
619
+ instructions: string,
535
620
  oneShot: boolean,
536
621
  ): Promise<CodexConversation | null> {
537
622
  // Pre-flight: confirm we have valid OAuth tokens before spending time spawning.
@@ -560,6 +645,9 @@ async function spawnAndInitialize(
560
645
  pendingInputs: [],
561
646
  busy: false,
562
647
  oneShot,
648
+ lastContextTokens: 0,
649
+ lastContextWindow: 0,
650
+ turnWatchdog: null,
563
651
  };
564
652
 
565
653
  rpc.onNotification((n) => handleNotification(conv, n));
@@ -582,7 +670,13 @@ async function spawnAndInitialize(
582
670
  const startResult = await rpc.request<{ thread: { id: string } }>('thread/start', {
583
671
  cwd: WORKSPACE_DIR,
584
672
  model: modelId,
585
- baseInstructions,
673
+ // Bloby's persona/workflow prompt rides developerInstructions (ADDITIVE),
674
+ // NOT baseInstructions. baseInstructions fully OVERRIDES codex's native base
675
+ // prompt — which carries the apply_patch FREEFORM spec + shell protocol the
676
+ // model needs to edit files. Leaving baseInstructions unset keeps that native
677
+ // scaffolding; developerInstructions layers Bloby's persona on top of it.
678
+ developerInstructions: instructions,
679
+ personality: 'pragmatic',
586
680
  // Bloby's posture matches Claude's bypassPermissions — the bot is
587
681
  // running on the user's own machine with their full consent. Skip the
588
682
  // approval prompts and give it write access to the workspace + beyond.
@@ -782,7 +876,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
782
876
  case 'item/started': {
783
877
  const item = p.item || {};
784
878
  if (item.type === 'commandExecution') usedTools.add('shell');
785
- else if (item.type === 'mcpToolCall') usedTools.add(item.toolName || item.name || 'mcp_tool');
879
+ else if (item.type === 'mcpToolCall') usedTools.add(item.tool || 'mcp_tool');
786
880
  else if (item.type === 'fileChange') { usedTools.add('file_change'); usedFileTools = true; }
787
881
  else if (item.type === 'webSearch') usedTools.add('web_search');
788
882
  break;
@@ -798,13 +892,14 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
798
892
  }
799
893
  case 'turn/completed': {
800
894
  const status = p.turn?.status || 'completed';
801
- if (status === 'failed' || status === 'systemError') {
895
+ if (status === 'failed') {
802
896
  turnError = p.turn?.error?.message || 'Codex turn failed.';
803
897
  }
804
898
  resolveTurn?.();
805
899
  break;
806
900
  }
807
901
  case 'error': {
902
+ if (p.willRetry) break; // transient — codex retries itself
808
903
  turnError = p.error?.message || 'Codex error';
809
904
  resolveTurn?.();
810
905
  break;
@@ -833,7 +928,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
833
928
  const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
834
929
  cwd: WORKSPACE_DIR,
835
930
  model,
836
- ...(req.systemPrompt ? { baseInstructions: req.systemPrompt } : {}),
931
+ ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
837
932
  approvalPolicy: 'never',
838
933
  sandbox: 'danger-full-access',
839
934
  });
@@ -843,7 +938,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
843
938
  const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
844
939
  cwd: WORKSPACE_DIR,
845
940
  model,
846
- ...(req.systemPrompt ? { baseInstructions: req.systemPrompt } : {}),
941
+ ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
847
942
  approvalPolicy: 'never',
848
943
  sandbox: 'danger-full-access',
849
944
  });
@@ -107,7 +107,7 @@ async function buildSystemPrompt(
107
107
  recentMessages?: RecentMessage[],
108
108
  ): Promise<string> {
109
109
  const memoryFiles = readMemoryFiles();
110
- const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
110
+ const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName, 'pi');
111
111
  let systemPrompt = basePrompt;
112
112
  systemPrompt += LIVE_CONVERSATION_HINT;
113
113
  systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
@@ -369,6 +369,12 @@ export async function startBlobyAgentQuery(
369
369
 
370
370
  const abortController = new AbortController();
371
371
  activeQueries.set(conversationId, abortController);
372
+ // Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
373
+ // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
374
+ const watchdog = setTimeout(() => {
375
+ log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
376
+ abortController.abort();
377
+ }, 300_000);
372
378
 
373
379
  let systemPrompt: string;
374
380
  if (supportPrompt) {
@@ -425,6 +431,7 @@ export async function startBlobyAgentQuery(
425
431
  onMessage('bot:error', { conversationId, error: err?.message || String(err) });
426
432
  }
427
433
  } finally {
434
+ clearTimeout(watchdog);
428
435
  activeQueries.delete(conversationId);
429
436
  const FILE_TOOL_NAMES = ['Write', 'Edit', 'write', 'edit'];
430
437
  const usedFileTools = FILE_TOOL_NAMES.some((t) => usedTools.has(t));