bloby-bot 0.70.4 → 0.70.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,8 @@
8
8
  *
9
9
  * Lifecycle per live conversation:
10
10
  * spawn → initialize → initialized → thread/start → turn/start (per
11
- * user message; turn/steer to inject mid-turn) turn/completed idle
11
+ * user message; messages arriving mid-turn are QUEUED and each gets its
12
+ * own turn — see queueOrStart) → turn/completed → idle
12
13
  * → endConversation → turn/interrupt (if needed) → kill subprocess
13
14
  *
14
15
  * Lifecycle per one-shot query: same as above, but the subprocess is killed
@@ -19,15 +20,22 @@
19
20
  * passed as `effort` on `turn/start`.
20
21
  *
21
22
  * Notes on parity with Claude harness:
22
- * - System prompt → `baseInstructions` on `thread/start`
23
- * - Sub-agents not implemented (Codex has Skills, different model)
24
- * - MCP servers not wired yet (Codex has its own MCP layer)
25
- * - Mid-turn input uses `turn/steer` (better than Claude's queue)
23
+ * - System prompt → `developerInstructions` on `thread/start` (ADDITIVE —
24
+ * `baseInstructions` would override codex's native apply_patch/shell spec)
25
+ * - Mid-turn input is queued, one turn per message same one-bot:response-
26
+ * per-push contract the channel routing FIFO depends on. (`turn/steer`
27
+ * merges inputs into the in-flight turn and breaks that contract.)
28
+ * - Sub-agents → not implemented (codex collab tools are still experimental;
29
+ * the collabAgentToolCall handlers below light up if that ever ships)
30
+ * - MCP servers → MCP.json translated to `-c mcp_servers.*` spawn flags
31
+ * - Pre-warm → spawn+initialize cached ahead of time (thread/start is issued
32
+ * at claim time, so the warm process is reusable for any model/instructions)
26
33
  */
27
34
 
28
35
  import { spawn, type ChildProcessWithoutNullStreams } from 'child_process';
29
36
  import { createRequire } from 'module';
30
37
  import readline from 'readline';
38
+ import crypto from 'crypto';
31
39
  import fs from 'fs';
32
40
  import path from 'path';
33
41
  import { log } from '../../shared/logger.js';
@@ -54,6 +62,21 @@ const VALID_EFFORTS = new Set(['none', 'minimal', 'low', 'medium', 'high', 'xhig
54
62
  * keeps emitting events and is never killed; only true silence trips recovery.
55
63
  */
56
64
  const TURN_WATCHDOG_MS = 5 * 60_000;
65
+ /**
66
+ * Hard WALL-CLOCK cap for one-shot turns (pulse/cron, customer WhatsApp) —
67
+ * codex has no maxTurns equivalent, and the idle watchdog above never trips on
68
+ * an actively-emitting runaway turn (tool ping-pong). Claude one-shots abort at
69
+ * 5 minutes; mirror that so a runaway can't pin a customer slot forever.
70
+ */
71
+ const ONE_SHOT_MAX_TURN_MS = 5 * 60_000;
72
+ /**
73
+ * Micro-batch window for bot:token. Codex streams true per-word deltas — an
74
+ * order of magnitude more WS frames per reply than claude's per-block stream.
75
+ * Coalescing at ~60ms keeps the stream visually smooth while cutting frame
76
+ * volume. Every non-token event flushes first, so ordering (and the
77
+ * bot:response === streamed-text invariant) is preserved.
78
+ */
79
+ const TOKEN_FLUSH_MS = 60;
57
80
 
58
81
  /**
59
82
  * Resolve the `codex` binary. We don't trust $PATH because Bloby may be
@@ -160,6 +183,19 @@ function parseModelString(model: string): { id: string; effort?: string } {
160
183
  type RpcResult<T = any> = { id: number; result?: T; error?: { code?: number; message: string } };
161
184
  type RpcNotification = { method: string; params?: any };
162
185
 
186
+ /** Distinguishes a JSON-RPC error RESPONSE (server alive, request rejected)
187
+ * from transport failures (timeout / closed / process exit) — turn/start
188
+ * error containment keeps the conversation alive only for the former. */
189
+ type RpcErrorKind = 'rpc-error' | 'timeout' | 'closed';
190
+ function rpcError(message: string, kind: RpcErrorKind): Error {
191
+ const err = new Error(message);
192
+ (err as any).rpcKind = kind;
193
+ return err;
194
+ }
195
+ function isServerRejection(err: any): boolean {
196
+ return err?.rpcKind === 'rpc-error';
197
+ }
198
+
163
199
  interface PendingRequest {
164
200
  resolve: (value: any) => void;
165
201
  reject: (err: Error) => void;
@@ -171,7 +207,7 @@ class CodexRpc {
171
207
  private pending = new Map<number, PendingRequest>();
172
208
  private nextId = 1;
173
209
  private notificationHandler: (n: RpcNotification) => void = () => {};
174
- private closeHandler: (code: number | null) => void = () => {};
210
+ private closeHandler: (code: number | null, errMsg?: string) => void = () => {};
175
211
  private closed = false;
176
212
  private stderrBuf = '';
177
213
 
@@ -189,13 +225,18 @@ class CodexRpc {
189
225
  this.proc.on('exit', (code) => {
190
226
  if (this.closed) return;
191
227
  this.closed = true;
192
- const err = new Error(`codex app-server exited (code=${code}). Stderr tail:\n${this.stderrBuf.trim().slice(-1000)}`);
228
+ const errMsg = `codex app-server exited (code=${code}). Stderr tail:\n${this.stderrBuf.trim().slice(-1000)}`;
229
+ const err = rpcError(errMsg, 'closed');
193
230
  for (const p of this.pending.values()) {
194
231
  clearTimeout(p.timer);
195
232
  p.reject(err);
196
233
  }
197
234
  this.pending.clear();
198
- this.closeHandler(code);
235
+ // Pass the composed exit error along — mid-turn there are usually no
236
+ // pending requests, so this is the ONLY way crash detail reaches the
237
+ // conversation (the watchdog would otherwise ghost-fire 5 min later
238
+ // with a misleading "timed out" message).
239
+ this.closeHandler(code, errMsg);
199
240
  });
200
241
 
201
242
  this.proc.on('error', (err) => {
@@ -204,17 +245,22 @@ class CodexRpc {
204
245
  log.warn(`[codex-rpc] spawn error: ${err.message}`);
205
246
  for (const p of this.pending.values()) {
206
247
  clearTimeout(p.timer);
207
- p.reject(err);
248
+ p.reject(rpcError(err.message, 'closed'));
208
249
  }
209
250
  this.pending.clear();
210
- this.closeHandler(null);
251
+ this.closeHandler(null, `codex app-server failed to spawn: ${err.message}`);
211
252
  });
212
253
  }
213
254
 
214
255
  onNotification(handler: (n: RpcNotification) => void): void { this.notificationHandler = handler; }
215
- onClose(handler: (code: number | null) => void): void { this.closeHandler = handler; }
256
+ onClose(handler: (code: number | null, errMsg?: string) => void): void { this.closeHandler = handler; }
216
257
 
217
258
  private onLine(line: string): void {
259
+ // close() already rejected everything pending; late stdout (buffered deltas,
260
+ // a turn/completed that landed the same instant as teardown) must not
261
+ // re-enter the event pipeline — a post-teardown turn/completed could drain
262
+ // pendingInputs into a dead rpc and wedge agentQueryActive forever.
263
+ if (this.closed) return;
218
264
  if (!line.trim()) return;
219
265
  let msg: any;
220
266
  try { msg = JSON.parse(line); } catch {
@@ -237,7 +283,7 @@ class CodexRpc {
237
283
  if (!pending) return;
238
284
  this.pending.delete(msg.id);
239
285
  clearTimeout(pending.timer);
240
- if (msg.error) pending.reject(new Error(msg.error.message || 'RPC error'));
286
+ if (msg.error) pending.reject(rpcError(msg.error.message || 'RPC error', 'rpc-error'));
241
287
  else pending.resolve(msg.result);
242
288
  return;
243
289
  }
@@ -288,12 +334,12 @@ class CodexRpc {
288
334
  }
289
335
 
290
336
  request<T = any>(method: string, params?: any, timeoutMs = REQUEST_TIMEOUT_MS): Promise<T> {
291
- if (this.closed || !this.proc) return Promise.reject(new Error('RPC connection closed'));
337
+ if (this.closed || !this.proc) return Promise.reject(rpcError('RPC connection closed', 'closed'));
292
338
  const id = this.nextId++;
293
339
  return new Promise<T>((resolve, reject) => {
294
340
  const timer = setTimeout(() => {
295
341
  this.pending.delete(id);
296
- reject(new Error(`codex app-server: ${method} timed out after ${timeoutMs}ms`));
342
+ reject(rpcError(`codex app-server: ${method} timed out after ${timeoutMs}ms`, 'timeout'));
297
343
  }, timeoutMs);
298
344
  this.pending.set(id, { resolve, reject, timer });
299
345
  try {
@@ -301,7 +347,7 @@ class CodexRpc {
301
347
  } catch (err: any) {
302
348
  this.pending.delete(id);
303
349
  clearTimeout(timer);
304
- reject(err);
350
+ reject(rpcError(err.message, 'closed'));
305
351
  }
306
352
  });
307
353
  }
@@ -320,7 +366,7 @@ class CodexRpc {
320
366
  this.closed = true;
321
367
  for (const p of this.pending.values()) {
322
368
  clearTimeout(p.timer);
323
- p.reject(new Error('RPC connection closed'));
369
+ p.reject(rpcError('RPC connection closed', 'closed'));
324
370
  }
325
371
  this.pending.clear();
326
372
  const proc = this.proc;
@@ -339,11 +385,20 @@ class CodexRpc {
339
385
 
340
386
  /* ── Per-conversation state ────────────────────────────────────────────── */
341
387
 
388
+ interface QueuedInput {
389
+ content: string;
390
+ savedFiles?: SavedFile[];
391
+ attachments?: AgentAttachment[];
392
+ }
393
+
342
394
  interface CodexConversation {
343
395
  id: string;
344
396
  rpc: CodexRpc;
345
397
  threadId: string;
346
398
  effort?: string;
399
+ /** Original model string (with effort suffix) + names — kept for the post-teardown re-warm. */
400
+ model: string;
401
+ names?: { botName: string; humanName: string };
347
402
  onMessage: OnAgentMessage;
348
403
  /** Currently in-flight turn id (set on `turn/started`, cleared on `turn/completed`). */
349
404
  currentTurnId: string | null;
@@ -352,18 +407,32 @@ interface CodexConversation {
352
407
  currentMsgItemId: string | null;
353
408
  /** Streaming text accumulator for the current turn's agentMessage items. */
354
409
  fullText: string;
410
+ /** Per-itemId delta accumulation for the current turn. item/completed is the
411
+ * authoritative final text per the docs — delta concatenation is not guaranteed
412
+ * to match it, and a later agentMessage item may complete with NO deltas at all.
413
+ * This map lets item/completed emit exactly the missing remainder per item. */
414
+ itemTexts: Map<string, string>;
355
415
  /** Tools/items used during the current turn, for the bot:turn-complete payload. */
356
416
  usedFileTools: boolean;
357
- /**
358
- * Queue of messages submitted via `pushMessage` that arrived while no turn
359
- * was active *and* we hadn't yet returned from the previous turn — almost
360
- * always empty, but covers a tight push-during-completed race.
361
- */
362
- pendingInputs: string[];
363
- /** True once the harness has emitted the per-turn completion event. */
417
+ /** Messages pushed while a turn is active. Each is drained into its OWN turn on
418
+ * turn/completed one bot:response per push, mirroring claude's input queue.
419
+ * (turn/steer would merge them into the in-flight turn and desync the channel
420
+ * routing FIFO, which enqueues one target per push.) */
421
+ pendingInputs: QueuedInput[];
422
+ /** True while a turn is being processed. */
364
423
  busy: boolean;
365
424
  /** True for one-shot queries — the conversation ends after the first turn completes. */
366
425
  oneShot: boolean;
426
+ /** True once bot:error fired for the current turn — dedups the double-emit the
427
+ * app-server produces on hard failures (a non-retry `error` notification followed
428
+ * by `turn/completed {status:'failed'}` with the same message; live-verified). */
429
+ errorEmitted: boolean;
430
+ /** True once bot:done fired for this one-shot — teardown uses it to guarantee
431
+ * exactly one bot:done on EVERY terminal path (slot-freeing consumers rely on it). */
432
+ doneEmitted: boolean;
433
+ /** Non-retry `error` notification stashed while a turn is active; surfaced by
434
+ * turn/completed {failed} (its TurnError is authoritative, the stash is fallback). */
435
+ stashedError: { message: string; info: any } | null;
367
436
  /**
368
437
  * Latest context occupancy from `thread/tokenUsage/updated` (codex does NOT
369
438
  * report usage on `turn/completed` — Turn has no usage field). Emitted on
@@ -373,13 +442,127 @@ interface CodexConversation {
373
442
  lastContextWindow: number;
374
443
  /** Active per-turn watchdog timer (see TURN_WATCHDOG_MS). */
375
444
  turnWatchdog: NodeJS.Timeout | null;
445
+ /** Hard wall-clock cap for one-shot turns (see ONE_SHOT_MAX_TURN_MS). Non-resetting. */
446
+ hardTurnTimer: NodeJS.Timeout | null;
447
+ /** bot:token micro-batch buffer (see TOKEN_FLUSH_MS). */
448
+ tokenBuf: string;
449
+ tokenFlushTimer: NodeJS.Timeout | null;
376
450
  }
377
451
 
378
452
  const conversations = new Map<string, CodexConversation>();
379
453
 
380
- /* ── Helpers ───────────────────────────────────────────────────────────── */
454
+ /** One-shot queries in their init window (token refresh + spawn + initialize +
455
+ * thread/start) — not yet in `conversations`, so anyConversationBusy() can't see
456
+ * them. Without this, a queued self-update / backend restart can fire mid-launch
457
+ * and kill the just-spawning query. Mirrors claude's activeQueries registration. */
458
+ const inFlightOneShots = new Set<string>();
459
+
460
+ /* ── Event emission helpers ────────────────────────────────────────────── */
461
+
462
+ function flushTokens(conv: CodexConversation): void {
463
+ if (conv.tokenFlushTimer) {
464
+ clearTimeout(conv.tokenFlushTimer);
465
+ conv.tokenFlushTimer = null;
466
+ }
467
+ if (conv.tokenBuf) {
468
+ const token = conv.tokenBuf;
469
+ conv.tokenBuf = '';
470
+ conv.onMessage('bot:token', { conversationId: conv.id, token });
471
+ }
472
+ }
473
+
474
+ /** Drop buffered tokens WITHOUT emitting. Teardown must discard, not flush:
475
+ * on user-stop the frontend has already cleared its stream state, and a stray
476
+ * trailing bot:token would re-open streaming with no bot:idle ever coming —
477
+ * then corrupt the next turn's committedTextLength accounting. */
478
+ function discardTokens(conv: CodexConversation): void {
479
+ if (conv.tokenFlushTimer) {
480
+ clearTimeout(conv.tokenFlushTimer);
481
+ conv.tokenFlushTimer = null;
482
+ }
483
+ conv.tokenBuf = '';
484
+ }
485
+
486
+ function emitToken(conv: CodexConversation, token: string): void {
487
+ conv.tokenBuf += token;
488
+ if (!conv.tokenFlushTimer) {
489
+ conv.tokenFlushTimer = setTimeout(() => {
490
+ conv.tokenFlushTimer = null;
491
+ flushTokens(conv);
492
+ }, TOKEN_FLUSH_MS);
493
+ }
494
+ }
495
+
496
+ /** Emit any non-token event, flushing buffered tokens first so ordering is
497
+ * preserved (bot:tool commits the stream bubble; bot:response must equal the
498
+ * streamed text the frontend already saw). */
499
+ function emitEvent(conv: CodexConversation, type: string, data: any): void {
500
+ flushTokens(conv);
501
+ conv.onMessage(type, data);
502
+ }
503
+
504
+ /** Kinds we can branch on from CodexErrorInfo (string variant or `{ kind: {...} }`). */
505
+ function errorInfoKind(info: any): string | undefined {
506
+ if (typeof info === 'string') return info;
507
+ if (info && typeof info === 'object') return Object.keys(info)[0];
508
+ return undefined;
509
+ }
510
+
511
+ /**
512
+ * Map codex's structured error onto a message that gives BLOBY's remedy.
513
+ * The raw upstream text actively misleads ("Please log out and sign in again"
514
+ * points at the codex CLI, not the dashboard re-auth flow).
515
+ */
516
+ function humanizeCodexError(message?: string, info?: any, additionalDetails?: string | null): string {
517
+ const raw = message || 'Codex error';
518
+ switch (errorInfoKind(info)) {
519
+ case 'unauthorized':
520
+ return 'Codex session expired or unauthorized. Re-authenticate from the dashboard.';
521
+ case 'usageLimitExceeded':
522
+ return `Codex usage limit reached — ${raw}`;
523
+ case 'contextWindowExceeded':
524
+ return 'The conversation outgrew the model context window. Send your message again to continue in a fresh session.';
525
+ case 'serverOverloaded':
526
+ return 'OpenAI servers are overloaded right now — try again in a moment.';
527
+ case 'httpConnectionFailed':
528
+ case 'responseStreamConnectionFailed':
529
+ case 'responseStreamDisconnected': {
530
+ const status = (Object.values(info || {})[0] as any)?.httpStatusCode;
531
+ return `Connection to OpenAI failed${status ? ` (HTTP ${status})` : ''} — try again in a moment.`;
532
+ }
533
+ default:
534
+ return additionalDetails ? `${raw}\n${additionalDetails}` : raw;
535
+ }
536
+ }
537
+
538
+ /** Emit bot:error exactly once per turn (see errorEmitted). */
539
+ function emitError(conv: CodexConversation, message?: string, info?: any, additionalDetails?: string | null): void {
540
+ const friendly = humanizeCodexError(message, info, additionalDetails);
541
+ if (conv.errorEmitted) {
542
+ log.info(`[codex] suppressing duplicate bot:error for conv=${conv.id}: ${friendly.slice(0, 120)}`);
543
+ return;
544
+ }
545
+ conv.errorEmitted = true;
546
+ emitEvent(conv, 'bot:error', { conversationId: conv.id, error: friendly });
547
+ }
548
+
549
+ /** Emit bot:done exactly once per one-shot (see doneEmitted). */
550
+ function emitDone(conv: CodexConversation): void {
551
+ if (conv.doneEmitted) return;
552
+ conv.doneEmitted = true;
553
+ emitEvent(conv, 'bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
554
+ }
555
+
556
+ /* ── Input building ────────────────────────────────────────────────────── */
557
+
558
+ /** mediaTypes whose content we inline into the prompt — codex has no document
559
+ * input type (verified against 0.138 UserInput), so this is the closest we get
560
+ * to claude's native base64 document ingestion for text-like files. */
561
+ const INLINE_TEXT_MEDIA = /^(text\/|application\/(json|xml|yaml|x-yaml|toml|csv|javascript|typescript))/;
562
+ const INLINE_TEXT_MAX_BYTES = 48_000;
563
+ const INLINE_TEXT_TOTAL_BUDGET = 96_000;
381
564
 
382
- function buildUserInput(text: string, savedFiles?: SavedFile[]): Array<Record<string, any>> {
565
+ function buildUserInput(text: string, savedFiles?: SavedFile[], attachments?: AgentAttachment[]): Array<Record<string, any>> {
383
566
  const input: Array<Record<string, any>> = [];
384
567
 
385
568
  let promptText = text || '(attached files)';
@@ -387,19 +570,59 @@ function buildUserInput(text: string, savedFiles?: SavedFile[]): Array<Record<st
387
570
  const lines = savedFiles.map((f) => `- ${f.name} -> ${f.relPath}`);
388
571
  promptText += `\n\n[Attached files saved to disk]\n${lines.join('\n')}\nYou can read or reference these files using the paths above (relative to your cwd).`;
389
572
  }
573
+
574
+ // Inline text-like attachments (size-capped) so the model can answer about
575
+ // their contents immediately instead of shelling out to read them from disk.
576
+ if (attachments?.length) {
577
+ let budget = INLINE_TEXT_TOTAL_BUDGET;
578
+ for (const att of attachments) {
579
+ if (att.type !== 'file' || !INLINE_TEXT_MEDIA.test(att.mediaType || '')) continue;
580
+ const approxBytes = Math.floor((att.data?.length || 0) * 0.75);
581
+ if (approxBytes === 0 || approxBytes > INLINE_TEXT_MAX_BYTES || approxBytes > budget) continue;
582
+ try {
583
+ const decoded = Buffer.from(att.data, 'base64').toString('utf-8');
584
+ budget -= approxBytes;
585
+ promptText += `\n\n[Attached file content: ${att.name}]\n\`\`\`\n${decoded}\n\`\`\``;
586
+ } catch {}
587
+ }
588
+ }
589
+
390
590
  input.push({ type: 'text', text: promptText });
391
591
 
392
592
  // Codex understands `localImage` (path on disk) — Bloby's file-saver already
393
593
  // wrote attachments to disk, so we just point at the absolute path.
594
+ const savedImageCounts = new Map<string, number>();
394
595
  if (savedFiles?.length) {
395
596
  for (const f of savedFiles) {
396
- if (f.type === 'image') input.push({ type: 'localImage', path: f.absPath });
597
+ if (f.type === 'image') {
598
+ input.push({ type: 'localImage', path: f.absPath });
599
+ savedImageCounts.set(f.name, (savedImageCounts.get(f.name) || 0) + 1);
600
+ }
601
+ }
602
+ }
603
+
604
+ // Fallback for image attachments that never made it to disk (file-saver
605
+ // failure): inline as a data URL so the agent still SEES the image — claude
606
+ // always gets the base64 inline, codex shouldn't silently go blind.
607
+ // Match by COUNT, not name presence: WhatsApp multi-image pushes share one
608
+ // attachment name, and each saved file covers exactly one of them.
609
+ if (attachments?.length) {
610
+ for (const att of attachments) {
611
+ if (att.type !== 'image' || !att.data) continue;
612
+ const remaining = savedImageCounts.get(att.name) || 0;
613
+ if (remaining > 0) {
614
+ savedImageCounts.set(att.name, remaining - 1);
615
+ continue;
616
+ }
617
+ input.push({ type: 'image', url: `data:${att.mediaType};base64,${att.data}` });
397
618
  }
398
619
  }
399
620
 
400
621
  return input;
401
622
  }
402
623
 
624
+ /* ── Turn lifecycle ────────────────────────────────────────────────────── */
625
+
403
626
  function clearTurnWatchdog(conv: CodexConversation): void {
404
627
  if (conv.turnWatchdog) {
405
628
  clearTimeout(conv.turnWatchdog);
@@ -407,6 +630,13 @@ function clearTurnWatchdog(conv: CodexConversation): void {
407
630
  }
408
631
  }
409
632
 
633
+ function clearHardTurnTimer(conv: CodexConversation): void {
634
+ if (conv.hardTurnTimer) {
635
+ clearTimeout(conv.hardTurnTimer);
636
+ conv.hardTurnTimer = null;
637
+ }
638
+ }
639
+
410
640
  /**
411
641
  * Arm the per-turn watchdog. On fire, unstick the conversation the same way a
412
642
  * real `turn/completed` would (so the dashboard, `anyConversationBusy`, and the
@@ -420,11 +650,15 @@ function armTurnWatchdog(conv: CodexConversation): void {
420
650
  log.warn(`[codex] turn watchdog fired (${TURN_WATCHDOG_MS}ms) — conv=${conv.id}; unsticking + tearing down`);
421
651
  conv.busy = false;
422
652
  conv.currentTurnId = null;
423
- conv.onMessage('bot:error', { conversationId: conv.id, error: 'Codex turn timed out no response from app-server.' });
653
+ // Prefer a stashed non-retry error: if the server wedged right after
654
+ // emitting it (the turn/completed that normally surfaces it never came),
655
+ // the stash carries the real cause + the M4 remedy mapping.
656
+ const stash = conv.stashedError;
657
+ emitError(conv, stash?.message || 'Codex turn timed out — no response from app-server.', stash?.info);
424
658
  if (conv.oneShot) {
425
- conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
659
+ emitDone(conv);
426
660
  } else {
427
- conv.onMessage('bot:turn-complete', {
661
+ emitEvent(conv, 'bot:turn-complete', {
428
662
  conversationId: conv.id,
429
663
  usedFileTools: conv.usedFileTools,
430
664
  contextTokens: conv.lastContextTokens || 0,
@@ -436,60 +670,98 @@ function armTurnWatchdog(conv: CodexConversation): void {
436
670
  }, TURN_WATCHDOG_MS);
437
671
  }
438
672
 
439
- async function startTurn(conv: CodexConversation, content: string, savedFiles?: SavedFile[]): Promise<void> {
440
- const input = buildUserInput(content, savedFiles);
441
- conv.busy = true;
673
+ /** Hard wall-clock cap for one-shot turns see ONE_SHOT_MAX_TURN_MS. */
674
+ function armHardTurnTimer(conv: CodexConversation): void {
675
+ if (!conv.oneShot || conv.hardTurnTimer) return;
676
+ conv.hardTurnTimer = setTimeout(() => {
677
+ conv.hardTurnTimer = null;
678
+ log.warn(`[codex] one-shot hard cap fired (${ONE_SHOT_MAX_TURN_MS}ms) — conv=${conv.id}; interrupting`);
679
+ if (conv.currentTurnId) {
680
+ void conv.rpc.request('turn/interrupt', { threadId: conv.threadId, turnId: conv.currentTurnId }).catch(() => {});
681
+ }
682
+ conv.busy = false;
683
+ conv.currentTurnId = null;
684
+ emitError(conv, `Codex query hit the ${Math.round(ONE_SHOT_MAX_TURN_MS / 60_000)}-minute limit and was stopped.`);
685
+ teardownConversation(conv.id); // emits the guaranteed bot:done
686
+ }, ONE_SHOT_MAX_TURN_MS);
687
+ }
688
+
689
+ function resetTurnState(conv: CodexConversation): void {
442
690
  conv.fullText = '';
691
+ conv.currentMsgItemId = null;
692
+ conv.itemTexts = new Map();
443
693
  conv.usedFileTools = false;
444
- conv.onMessage('bot:typing', { conversationId: conv.id });
694
+ conv.errorEmitted = false;
695
+ conv.stashedError = null;
696
+ }
697
+
698
+ async function startTurn(conv: CodexConversation, content: string, savedFiles?: SavedFile[], attachments?: AgentAttachment[]): Promise<void> {
699
+ const input = buildUserInput(content, savedFiles, attachments);
700
+ conv.busy = true;
701
+ resetTurnState(conv);
702
+ emitEvent(conv, 'bot:typing', { conversationId: conv.id });
445
703
  armTurnWatchdog(conv);
704
+ armHardTurnTimer(conv);
446
705
  try {
447
706
  const params: Record<string, any> = { threadId: conv.threadId, input };
448
707
  if (conv.effort) params.effort = conv.effort;
449
- // turn/start resolves immediately with { turn }; seize the id now so a
450
- // pushMessage arriving before the turn/started notification can steer
451
- // instead of starting a second turn.
708
+ // turn/start resolves immediately with { turn }; seize the id now so an
709
+ // interrupt arriving before the turn/started notification still has a target.
452
710
  const res = await conv.rpc.request<{ turn?: { id?: string } }>('turn/start', params);
453
711
  if (res?.turn?.id) conv.currentTurnId = res.turn.id;
454
712
  } catch (err: any) {
455
713
  clearTurnWatchdog(conv);
456
714
  conv.busy = false;
457
715
  conv.currentTurnId = null;
458
- conv.onMessage('bot:error', { conversationId: conv.id, error: `turn/start failed: ${err.message}` });
459
- // turn/start produced no turn, so no turn/completed will arrive to clear the supervisor's
460
- // agentQueryActive (set on bot:typing above). Left as-is, that wedges true forever:
461
- // backend auto-heal is deferred indefinitely and chat is stuck showing "typing". Tear the
462
- // conversation down so bot:conversation-ended fires (which, unlike bot:turn-complete, does
463
- // NOT trigger a backend restart) — the next user message cold-starts a fresh thread.
464
- teardownConversation(conv.id);
716
+ emitError(conv, `turn/start failed: ${err.message}`);
717
+ // turn/start produced no turn, so no turn/completed will arrive to clear the
718
+ // supervisor's agentQueryActive (set on bot:typing above).
719
+ //
720
+ // - One-shots and transport failures (timeout / process exit): tear down. A
721
+ // 60s timeout most plausibly means a hung app-serverkeeping the conv
722
+ // would just hang the next message too. teardown emits bot:conversation-
723
+ // ended (clears agentQueryActive) and, for one-shots, the guaranteed bot:done.
724
+ // - A fast JSON-RPC REJECTION on a live conv means the server is alive (bad
725
+ // effort value, transient thread error): keep the thread — its server-side
726
+ // context (files read, compacted history) survives, matching claude's
727
+ // per-turn error containment. bot:turn-complete unsticks the supervisor.
728
+ if (conv.oneShot || !isServerRejection(err)) {
729
+ teardownConversation(conv.id);
730
+ } else {
731
+ emitEvent(conv, 'bot:turn-complete', {
732
+ conversationId: conv.id,
733
+ usedFileTools: false,
734
+ contextTokens: conv.lastContextTokens || 0,
735
+ contextWindow: conv.lastContextWindow || 0,
736
+ idle: conv.pendingInputs.length === 0,
737
+ });
738
+ // Keep draining queued messages — each failure surfaces its own error,
739
+ // and the finite queue guarantees termination.
740
+ const next = conv.pendingInputs.shift();
741
+ if (next !== undefined) void startTurn(conv, next.content, next.savedFiles, next.attachments);
742
+ }
465
743
  }
466
744
  }
467
745
 
468
- async function steerOrQueue(conv: CodexConversation, content: string, savedFiles?: SavedFile[]): Promise<void> {
469
- if (!conv.currentTurnId) {
470
- // No active turn start a fresh one.
471
- await startTurn(conv, content, savedFiles);
746
+ /**
747
+ * Queue-or-start: the codex side of claude's async input queue. While a turn
748
+ * is active, pushes are queued and drained ONE TURN PER MESSAGE on
749
+ * turn/completed — preserving the one-bot:response-per-push contract that
750
+ * channels/manager.ts's routing FIFO depends on (it enqueues exactly one
751
+ * routing target per push and consumes one per bot:response).
752
+ */
753
+ function queueOrStart(conv: CodexConversation, content: string, savedFiles?: SavedFile[], attachments?: AgentAttachment[]): void {
754
+ if (conv.busy || conv.currentTurnId) {
755
+ conv.pendingInputs.push({ content, savedFiles, attachments });
756
+ // Mirror claude's pushMessage: every accepted push shows typing immediately.
757
+ emitEvent(conv, 'bot:typing', { conversationId: conv.id });
472
758
  return;
473
759
  }
474
- // Active turn inject mid-flight.
475
- const input = buildUserInput(content, savedFiles);
476
- try {
477
- const res = await conv.rpc.request<{ turnId?: string }>('turn/steer', {
478
- threadId: conv.threadId,
479
- expectedTurnId: conv.currentTurnId,
480
- input,
481
- });
482
- if (res?.turnId) conv.currentTurnId = res.turnId;
483
- conv.onMessage('bot:typing', { conversationId: conv.id });
484
- } catch (err: any) {
485
- // expectedTurnId mismatch most likely means the turn just finished —
486
- // retry as a fresh turn.
487
- log.warn(`[codex] turn/steer failed (${err.message}); falling back to turn/start`);
488
- if (!conv.currentTurnId) await startTurn(conv, content, savedFiles);
489
- else conv.pendingInputs.push(content);
490
- }
760
+ void startTurn(conv, content, savedFiles, attachments);
491
761
  }
492
762
 
763
+ /* ── Notification handling ─────────────────────────────────────────────── */
764
+
493
765
  function handleNotification(conv: CodexConversation, n: { method: string; params?: any }): void {
494
766
  const p = n.params || {};
495
767
  // Any notification for this conv proves the app-server is alive and working —
@@ -498,9 +770,7 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
498
770
  switch (n.method) {
499
771
  case 'turn/started': {
500
772
  conv.currentTurnId = p.turn?.id || null;
501
- conv.fullText = '';
502
- conv.currentMsgItemId = null;
503
- conv.usedFileTools = false;
773
+ resetTurnState(conv);
504
774
  break;
505
775
  }
506
776
 
@@ -511,52 +781,72 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
511
781
  // On a new itemId, insert a paragraph break so they don't run together (mirrors claude.ts).
512
782
  if (p.itemId && conv.currentMsgItemId && p.itemId !== conv.currentMsgItemId && conv.fullText && !conv.fullText.endsWith('\n')) {
513
783
  conv.fullText += '\n\n';
514
- conv.onMessage('bot:token', { conversationId: conv.id, token: '\n\n' });
784
+ emitToken(conv, '\n\n');
785
+ }
786
+ if (p.itemId) {
787
+ conv.currentMsgItemId = p.itemId;
788
+ conv.itemTexts.set(p.itemId, (conv.itemTexts.get(p.itemId) || '') + delta);
515
789
  }
516
- if (p.itemId) conv.currentMsgItemId = p.itemId;
517
790
  conv.fullText += delta;
518
- conv.onMessage('bot:token', { conversationId: conv.id, token: delta });
791
+ emitToken(conv, delta);
519
792
  break;
520
793
  }
521
794
 
522
795
  case 'item/started': {
523
796
  const item = p.item || {};
524
- // Surface tool-like items so the dashboard can show activity.
797
+ // Surface tool-like items so the dashboard can show activity. Names use
798
+ // claude's tool vocabulary (Bash/Edit/WebSearch/mcp__server__tool) so one
799
+ // UI label map serves both harnesses.
525
800
  switch (item.type) {
526
801
  case 'commandExecution':
527
- conv.onMessage('bot:tool', {
802
+ emitEvent(conv, 'bot:tool', {
528
803
  conversationId: conv.id,
529
- name: 'shell',
804
+ name: 'Bash',
530
805
  input: { command: item.command || item.commandLine || '' },
531
806
  });
532
807
  break;
533
808
  case 'mcpToolCall':
534
809
  // ThreadItem.mcpToolCall fields are `server` + `tool` (no toolName/name/input).
535
- conv.onMessage('bot:tool', {
810
+ emitEvent(conv, 'bot:tool', {
536
811
  conversationId: conv.id,
537
- name: item.tool ? (item.server ? `${item.server}/${item.tool}` : item.tool) : 'mcp_tool',
812
+ name: item.tool ? (item.server ? `mcp__${item.server}__${item.tool}` : item.tool) : 'mcp_tool',
538
813
  input: item.arguments || {},
539
814
  });
540
815
  break;
541
816
  case 'fileChange':
542
817
  conv.usedFileTools = true;
543
- conv.onMessage('bot:tool', {
818
+ emitEvent(conv, 'bot:tool', {
544
819
  conversationId: conv.id,
545
- name: 'file_change',
820
+ name: 'Edit',
546
821
  input: { changes: (item.changes || []).map((c: any) => c.path).filter(Boolean) },
547
822
  });
548
823
  break;
549
824
  case 'webSearch':
550
- conv.onMessage('bot:tool', {
825
+ emitEvent(conv, 'bot:tool', {
551
826
  conversationId: conv.id,
552
- name: 'web_search',
827
+ name: 'WebSearch',
553
828
  input: { query: item.query || '' },
554
829
  });
555
830
  break;
831
+ case 'reasoning':
832
+ // Activity signal during the (often long) thinking phase — without it
833
+ // the chat shows bare typing dots with zero events for the entire
834
+ // stretch and turns feel hung. The chat hooks dedup repeated
835
+ // same-name running tools, and channel chunk-flushes no-op on an
836
+ // empty buffer, so this is purely additive. Reasoning TEXT is never
837
+ // emitted as bot:token — that would corrupt the bot:response ===
838
+ // streamed-text contract and leak reasoning to channels.
839
+ emitEvent(conv, 'bot:tool', {
840
+ conversationId: conv.id,
841
+ name: 'thinking',
842
+ status: 'running',
843
+ });
844
+ break;
556
845
  case 'collabAgentToolCall':
557
- // Codex's collaborating sub-agents (rarely enabled) Bloby's sub-agent UX.
846
+ // Codex's collaborating sub-agents (experimental, not enabled by our
847
+ // initialize) → Bloby's sub-agent UX, if it ever lights up.
558
848
  if (item.tool === 'spawnAgent') {
559
- conv.onMessage('bot:task-created', {
849
+ emitEvent(conv, 'bot:task-created', {
560
850
  conversationId: conv.id,
561
851
  taskId: item.id,
562
852
  description: item.prompt || 'sub-agent',
@@ -564,24 +854,70 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
564
854
  });
565
855
  }
566
856
  break;
567
- // userMessage / agentMessage / reasoning — no tool-style event.
857
+ // userMessage / agentMessage — no tool-style event.
568
858
  }
569
859
  break;
570
860
  }
571
861
 
862
+ case 'turn/plan/updated': {
863
+ // Codex's native planning (its TodoWrite equivalent). Emit it in claude's
864
+ // TodoWrite shape so plans drive the same activity label + bubble/chunk
865
+ // boundaries — without this codex feels opaque exactly on the big agentic
866
+ // asks where claude feels alive. Statuses are camelCase in 0.138
867
+ // (pending | inProgress | completed) — normalize to claude's snake_case.
868
+ const steps = Array.isArray(p.plan) ? p.plan : [];
869
+ emitEvent(conv, 'bot:tool', {
870
+ conversationId: conv.id,
871
+ name: 'TodoWrite',
872
+ input: {
873
+ todos: steps.map((s: any) => ({
874
+ content: s.step || '',
875
+ status: s.status === 'inProgress' ? 'in_progress' : (s.status || 'pending'),
876
+ })),
877
+ ...(p.explanation ? { explanation: p.explanation } : {}),
878
+ },
879
+ });
880
+ break;
881
+ }
882
+
572
883
  case 'item/completed': {
573
884
  const item = p.item || {};
574
885
  if (item.type === 'fileChange') conv.usedFileTools = true;
575
- // If a final agentMessage arrives without preceding deltas (rare), grab it now.
576
- if (item.type === 'agentMessage' && !conv.fullText) {
577
- const text = (item.content || []).map((c: any) => c.text || '').join('') || item.text || '';
578
- if (text) {
579
- conv.fullText = text;
580
- conv.onMessage('bot:token', { conversationId: conv.id, token: text });
886
+ // item/completed carries the AUTHORITATIVE final text for an agentMessage.
887
+ // Reconcile per-itemId: an item that never streamed deltas (or whose final
888
+ // text extends beyond the concatenated deltas) gets the remainder emitted
889
+ // here — previously a turn-level `!conv.fullText` gate silently dropped any
890
+ // non-delta'd message once ANYTHING had streamed.
891
+ if (item.type === 'agentMessage') {
892
+ const finalText: string = (typeof item.text === 'string' && item.text)
893
+ ? item.text
894
+ : ((item.content || []).map((c: any) => c.text || '').join(''));
895
+ if (finalText) {
896
+ const itemId: string | undefined = item.id;
897
+ const streamed = itemId ? (conv.itemTexts.get(itemId) || '') : conv.fullText;
898
+ let remainder = '';
899
+ if (!streamed) {
900
+ remainder = finalText;
901
+ } else if (finalText.length > streamed.length && finalText.startsWith(streamed)) {
902
+ remainder = finalText.slice(streamed.length);
903
+ }
904
+ if (remainder) {
905
+ // Same paragraph-break rule as the delta path for a fresh item.
906
+ if (!streamed && conv.fullText && !conv.fullText.endsWith('\n')) {
907
+ conv.fullText += '\n\n';
908
+ emitToken(conv, '\n\n');
909
+ }
910
+ conv.fullText += remainder;
911
+ emitToken(conv, remainder);
912
+ if (itemId) {
913
+ conv.itemTexts.set(itemId, finalText);
914
+ conv.currentMsgItemId = itemId;
915
+ }
916
+ }
581
917
  }
582
918
  }
583
919
  if (item.type === 'collabAgentToolCall' && item.tool === 'spawnAgent') {
584
- conv.onMessage('bot:task-done', {
920
+ emitEvent(conv, 'bot:task-done', {
585
921
  conversationId: conv.id,
586
922
  taskId: item.id,
587
923
  status: item.status,
@@ -593,11 +929,14 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
593
929
 
594
930
  case 'thread/tokenUsage/updated': {
595
931
  // Codex's only token-usage signal. ThreadTokenUsage = { total, last, modelContextWindow };
596
- // `last` is the current prompt occupancy (mirrors Claude's input+cacheRead+cacheCreation),
597
- // the right basis for the recycle compare in supervisor/index.ts (fraction*window, not lifetime).
932
+ // `last.inputTokens` is the current prompt occupancy the right basis for
933
+ // the recycle compare in supervisor/index.ts (fraction*window, not lifetime).
934
+ // NB: codex's cachedInputTokens is a SUBSET of inputTokens (unlike
935
+ // Anthropic's disjoint cache counters) — summing them double-counts the
936
+ // cache and recycles sessions at ~35% real occupancy.
598
937
  const tu = p.tokenUsage || {};
599
938
  const last = tu.last || {};
600
- conv.lastContextTokens = (last.inputTokens || 0) + (last.cachedInputTokens || 0);
939
+ conv.lastContextTokens = last.inputTokens || 0;
601
940
  if (typeof tu.modelContextWindow === 'number' && tu.modelContextWindow > 0) {
602
941
  conv.lastContextWindow = tu.modelContextWindow;
603
942
  }
@@ -609,29 +948,41 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
609
948
  const turnError = p.turn?.error;
610
949
 
611
950
  clearTurnWatchdog(conv);
951
+ clearHardTurnTimer(conv);
612
952
  conv.currentTurnId = null;
613
953
  conv.busy = false;
614
954
 
955
+ let failureKind: string | undefined;
615
956
  if (status === 'failed') {
616
- conv.onMessage('bot:error', {
617
- conversationId: conv.id,
618
- error: turnError?.message || 'Codex turn failed.',
619
- });
957
+ // Preserve streamed partials exactly like claude: if text already reached
958
+ // the user, commit it as the reply (the frontend's bot:error handler
959
+ // ERASES the uncommitted stream buffer); surface bot:error only when the
960
+ // turn died before producing anything.
961
+ const message = turnError?.message || conv.stashedError?.message || 'Codex turn failed.';
962
+ const info = turnError?.codexErrorInfo ?? conv.stashedError?.info;
963
+ failureKind = errorInfoKind(info);
964
+ if (conv.fullText) {
965
+ log.warn(`[codex] turn failed after streaming ${conv.fullText.length} chars (${message.slice(0, 160)}) — preserving partial as the reply`);
966
+ emitEvent(conv, 'bot:response', { conversationId: conv.id, content: conv.fullText });
967
+ } else {
968
+ emitError(conv, message, info, turnError?.additionalDetails);
969
+ }
620
970
  } else if (status === 'interrupted') {
621
971
  // Interrupted turns carry no final answer — stay silent.
622
972
  } else if (conv.fullText) {
623
- conv.onMessage('bot:response', { conversationId: conv.id, content: conv.fullText });
973
+ emitEvent(conv, 'bot:response', { conversationId: conv.id, content: conv.fullText });
624
974
  }
975
+ conv.stashedError = null;
625
976
 
626
977
  if (conv.oneShot) {
627
- conv.onMessage('bot:done', { conversationId: conv.id, usedFileTools: conv.usedFileTools });
978
+ emitDone(conv);
628
979
  teardownConversation(conv.id);
629
980
  } else {
630
981
  // Context-size signal for the orchestrator's proactive session recycling,
631
982
  // sourced from the cached `thread/tokenUsage/updated` values above. 0 if codex
632
983
  // never sent one this turn → falls back to codex's own in-thread auto-compaction.
633
984
  const idle = conv.pendingInputs.length === 0;
634
- conv.onMessage('bot:turn-complete', {
985
+ emitEvent(conv, 'bot:turn-complete', {
635
986
  conversationId: conv.id,
636
987
  usedFileTools: conv.usedFileTools,
637
988
  contextTokens: conv.lastContextTokens || 0,
@@ -639,9 +990,30 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
639
990
  idle,
640
991
  });
641
992
 
642
- // Drain any messages that were submitted while we were busy.
993
+ // An unauthorized/context-wall thread can't run further turns tear it
994
+ // down so the NEXT message pre-flights cleanly (friendly dashboard
995
+ // message / fresh thread) instead of repeating the same failure forever.
996
+ if (failureKind === 'unauthorized' || failureKind === 'contextWindowExceeded') {
997
+ // Every queued message got bot:typing on push — give each a terminal
998
+ // event too, or it vanishes without feedback. (Direct emit: the
999
+ // per-turn errorEmitted dedup must not swallow these.)
1000
+ const reason = failureKind === 'unauthorized' ? 'authentication expired' : 'context window full';
1001
+ for (const dropped of conv.pendingInputs.splice(0)) {
1002
+ const snippet = dropped.content.slice(0, 60) + (dropped.content.length > 60 ? '…' : '');
1003
+ emitEvent(conv, 'bot:error', {
1004
+ conversationId: conv.id,
1005
+ error: `Codex session ended (${reason}) before your message "${snippet}" could run — please resend it.`,
1006
+ });
1007
+ }
1008
+ log.warn(`[codex] tearing down conv=${conv.id} after ${failureKind} so the next message starts clean`);
1009
+ teardownConversation(conv.id);
1010
+ break;
1011
+ }
1012
+
1013
+ // Drain any messages that were queued while we were busy — each gets
1014
+ // its own turn (and thus its own bot:response).
643
1015
  const next = conv.pendingInputs.shift();
644
- if (next !== undefined) void startTurn(conv, next);
1016
+ if (next !== undefined) void startTurn(conv, next.content, next.savedFiles, next.attachments);
645
1017
  }
646
1018
  break;
647
1019
  }
@@ -653,8 +1025,19 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
653
1025
  log.info(`[codex] transient error (will retry): ${p.error?.message || 'unknown'}`);
654
1026
  break;
655
1027
  }
656
- const errMsg = p.error?.message || 'Codex error notification';
657
- conv.onMessage('bot:error', { conversationId: conv.id, error: errMsg });
1028
+ // Hard failures emit BOTH this notification AND turn/completed{failed}
1029
+ // with the same TurnError (live-verified on 0.138.0). While a turn is
1030
+ // active, stash it and let turn/completed be the single surface — its
1031
+ // TurnError is authoritative per the protocol. Outside a turn there is
1032
+ // no turn/completed coming, so emit directly.
1033
+ const message = p.error?.message || 'Codex error notification';
1034
+ const info = p.error?.codexErrorInfo;
1035
+ if (conv.busy) {
1036
+ log.info(`[codex] stashing mid-turn error for turn/completed: ${message.slice(0, 160)}`);
1037
+ conv.stashedError = { message, info };
1038
+ } else {
1039
+ emitError(conv, message, info, p.error?.additionalDetails);
1040
+ }
658
1041
  break;
659
1042
  }
660
1043
 
@@ -672,13 +1055,110 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
672
1055
  }
673
1056
  }
674
1057
 
1058
+ /* ── Conversation teardown ─────────────────────────────────────────────── */
1059
+
675
1060
  function teardownConversation(conversationId: string): void {
676
1061
  const conv = conversations.get(conversationId);
677
1062
  if (!conv) return;
678
1063
  clearTurnWatchdog(conv);
1064
+ clearHardTurnTimer(conv);
679
1065
  conversations.delete(conversationId);
1066
+ discardTokens(conv);
680
1067
  try { conv.rpc.close(); } catch {}
1068
+ // bot:done guarantee: slot-freeing consumers (WhatsApp activeAgents, the
1069
+ // scheduler) resolve ONLY on bot:done — claude guarantees it in a finally,
1070
+ // so every codex terminal path (stop, settings:save teardown, init failure,
1071
+ // crash) must too, or each failed customer message pins a slot until restart.
1072
+ if (conv.oneShot) emitDone(conv);
681
1073
  conv.onMessage('bot:conversation-ended', { conversationId });
1074
+ // Re-warm for the next live conversation (mirrors claude's finally).
1075
+ if (!conv.oneShot) void warmUpForLiveConversation(conv.model, conv.names);
1076
+ }
1077
+
1078
+ /* ── Spawn + initialize (with pre-warm pool) ───────────────────────────── */
1079
+
1080
+ /**
1081
+ * Pre-warmed app-server: spawned + initialize handshake done. `thread/start`
1082
+ * is deliberately NOT issued at warm time — it's a single fast local RPC and
1083
+ * deferring it means the warm process is claimable for ANY model/instructions
1084
+ * (claude's warmer must bake the full options in and misses whenever
1085
+ * recentMessages differ; this design sidesteps that). Keyed on the MCP spawn
1086
+ * flags, the only thing fixed at spawn time.
1087
+ */
1088
+ interface CodexWarmEntry {
1089
+ key: string;
1090
+ rpc: CodexRpc;
1091
+ }
1092
+
1093
+ let codexWarm: CodexWarmEntry | null = null;
1094
+ let codexWarmInflight: Promise<void> | null = null;
1095
+
1096
+ function warmKeyFor(mcpArgs: string[]): string {
1097
+ return crypto.createHash('sha256').update(JSON.stringify(mcpArgs)).digest('hex');
1098
+ }
1099
+
1100
+ function claimWarmRpc(key: string): CodexRpc | null {
1101
+ if (!codexWarm) return null;
1102
+ if (codexWarm.key !== key) {
1103
+ // MCP config changed since warm time — the flags are baked into the spawn.
1104
+ try { codexWarm.rpc.close(); } catch {}
1105
+ codexWarm = null;
1106
+ return null;
1107
+ }
1108
+ const rpc = codexWarm.rpc;
1109
+ codexWarm = null;
1110
+ log.info('[codex] claimed pre-warmed app-server');
1111
+ return rpc;
1112
+ }
1113
+
1114
+ function discardCodexWarmup(): void {
1115
+ if (codexWarm) {
1116
+ try { codexWarm.rpc.close(); } catch {}
1117
+ codexWarm = null;
1118
+ }
1119
+ }
1120
+
1121
+ export async function warmUpForLiveConversation(
1122
+ _model: string,
1123
+ _names?: { botName: string; humanName: string },
1124
+ ): Promise<void> {
1125
+ if (codexWarmInflight) return codexWarmInflight;
1126
+ const mcpArgs = buildMcpConfigArgs();
1127
+ const key = warmKeyFor(mcpArgs);
1128
+ if (codexWarm?.key === key) return;
1129
+
1130
+ codexWarmInflight = (async () => {
1131
+ try {
1132
+ const token = await getCodexAccessToken();
1133
+ if (!token) return; // not authed — nothing to warm
1134
+ if (codexWarm && codexWarm.key !== key) discardCodexWarmup();
1135
+ const rpc = new CodexRpc();
1136
+ rpc.start(mcpArgs);
1137
+ rpc.onClose(() => {
1138
+ // Warm process died on its own — drop it from the cache so a claim
1139
+ // never hands out a dead rpc.
1140
+ if (codexWarm?.rpc === rpc) codexWarm = null;
1141
+ });
1142
+ await rpc.request('initialize', { clientInfo: CLIENT_INFO });
1143
+ rpc.notify('initialized', {});
1144
+ codexWarm = { key, rpc };
1145
+ log.ok('[codex] app-server pre-warmed');
1146
+ } catch (err: any) {
1147
+ log.warn(`[codex] pre-warm skipped: ${err?.message || err}`);
1148
+ } finally {
1149
+ codexWarmInflight = null;
1150
+ }
1151
+ })();
1152
+ return codexWarmInflight;
1153
+ }
1154
+
1155
+ interface SpawnOpts {
1156
+ oneShot: boolean;
1157
+ /** Customer-facing runs (supportPrompt personas) get NO workspace skills —
1158
+ * mirrors claude's `skills: supportPrompt ? [] : ...` gate so internal ops
1159
+ * skills can't leak into the customer context. */
1160
+ wantSkills: boolean;
1161
+ names?: { botName: string; humanName: string };
682
1162
  }
683
1163
 
684
1164
  async function spawnAndInitialize(
@@ -686,7 +1166,7 @@ async function spawnAndInitialize(
686
1166
  model: string,
687
1167
  onMessage: OnAgentMessage,
688
1168
  instructions: string,
689
- oneShot: boolean,
1169
+ opts: SpawnOpts,
690
1170
  ): Promise<CodexConversation | null> {
691
1171
  // Pre-flight: confirm we have valid OAuth tokens before spending time spawning.
692
1172
  const token = await getCodexAccessToken();
@@ -698,85 +1178,202 @@ async function spawnAndInitialize(
698
1178
  return null;
699
1179
  }
700
1180
 
701
- const { id: modelId, effort } = parseModelString(model);
702
- const rpc = new CodexRpc();
703
- rpc.start(buildMcpConfigArgs());
1181
+ const { id: parsedModelId, effort: parsedEffort } = parseModelString(model);
1182
+ const mcpArgs = buildMcpConfigArgs();
1183
+
1184
+ // One attempt with the pre-warmed process (live convs only — one-shots churn
1185
+ // too fast to be worth re-warming for), falling back to a cold spawn if the
1186
+ // warm one fails its thread/start.
1187
+ const attempts: Array<'warm' | 'cold'> = [];
1188
+ if (!opts.oneShot && codexWarm) attempts.push('warm');
1189
+ attempts.push('cold');
1190
+
1191
+ for (const attempt of attempts) {
1192
+ let rpc: CodexRpc;
1193
+ if (attempt === 'warm') {
1194
+ const claimed = claimWarmRpc(warmKeyFor(mcpArgs));
1195
+ if (!claimed) continue;
1196
+ rpc = claimed;
1197
+ } else {
1198
+ rpc = new CodexRpc();
1199
+ rpc.start(mcpArgs);
1200
+ }
704
1201
 
705
- const conv: CodexConversation = {
706
- id: conversationId,
707
- rpc,
708
- threadId: '',
709
- effort,
710
- onMessage,
711
- currentTurnId: null,
712
- currentMsgItemId: null,
713
- fullText: '',
714
- usedFileTools: false,
715
- pendingInputs: [],
716
- busy: false,
717
- oneShot,
718
- lastContextTokens: 0,
719
- lastContextWindow: 0,
720
- turnWatchdog: null,
721
- };
1202
+ const conv: CodexConversation = {
1203
+ id: conversationId,
1204
+ rpc,
1205
+ threadId: '',
1206
+ effort: parsedEffort,
1207
+ model,
1208
+ names: opts.names,
1209
+ onMessage,
1210
+ currentTurnId: null,
1211
+ currentMsgItemId: null,
1212
+ fullText: '',
1213
+ itemTexts: new Map(),
1214
+ usedFileTools: false,
1215
+ pendingInputs: [],
1216
+ busy: false,
1217
+ oneShot: opts.oneShot,
1218
+ errorEmitted: false,
1219
+ doneEmitted: false,
1220
+ stashedError: null,
1221
+ lastContextTokens: 0,
1222
+ lastContextWindow: 0,
1223
+ turnWatchdog: null,
1224
+ hardTurnTimer: null,
1225
+ tokenBuf: '',
1226
+ tokenFlushTimer: null,
1227
+ };
722
1228
 
723
- rpc.onNotification((n) => handleNotification(conv, n));
724
- rpc.onClose(() => {
725
- if (conversations.get(conversationId) === conv) {
1229
+ rpc.onNotification((n) => handleNotification(conv, n));
1230
+ rpc.onClose((code, errMsg) => {
1231
+ if (conversations.get(conversationId) !== conv) return;
1232
+ // App-server died out from under a live conversation. Surface it NOW —
1233
+ // without this the user stares at frozen typing dots until the leaked
1234
+ // watchdog ghost-fires 5 minutes later with a misleading "timed out"
1235
+ // (and that orphaned watchdog could tear down a successor conversation
1236
+ // started under the same convId in the meantime).
1237
+ clearTurnWatchdog(conv);
1238
+ clearHardTurnTimer(conv);
726
1239
  conversations.delete(conversationId);
1240
+ flushTokens(conv);
1241
+ if (conv.busy) {
1242
+ conv.busy = false;
1243
+ conv.currentTurnId = null;
1244
+ emitError(conv, errMsg || `codex app-server exited unexpectedly (code=${code}).`);
1245
+ if (!conv.oneShot) {
1246
+ emitEvent(conv, 'bot:turn-complete', {
1247
+ conversationId: conv.id,
1248
+ usedFileTools: conv.usedFileTools,
1249
+ contextTokens: conv.lastContextTokens || 0,
1250
+ contextWindow: conv.lastContextWindow || 0,
1251
+ idle: true,
1252
+ });
1253
+ }
1254
+ }
1255
+ if (conv.oneShot) emitDone(conv);
727
1256
  onMessage('bot:conversation-ended', { conversationId });
1257
+ if (!conv.oneShot) void warmUpForLiveConversation(conv.model, conv.names);
1258
+ });
1259
+
1260
+ try {
1261
+ log.info(`[codex] init conversation ${conversationId} (model=${parsedModelId}${parsedEffort ? `, effort=${parsedEffort}` : ''}, ${attempt})`);
1262
+ if (attempt === 'cold') {
1263
+ await rpc.request('initialize', { clientInfo: CLIENT_INFO });
1264
+ rpc.notify('initialized', {});
1265
+ }
1266
+
1267
+ // Validate/repair the configured model + effort against the live catalog —
1268
+ // a stale model id or retired effort tier otherwise fails the first message
1269
+ // with a cryptic teardown (codex has no config-level model migrations).
1270
+ const { modelId, effort } = await validateModelSelection(rpc, parsedModelId, parsedEffort);
1271
+ conv.effort = effort;
1272
+
1273
+ // Context auto-compaction is ON by default in the codex app-server: when the
1274
+ // thread's token count crosses the model's threshold it compacts history in
1275
+ // place (emitting a `contextCompaction` item) and continues — no flag needed
1276
+ // here. A manual trigger also exists (`thread/compact/start`) if we ever want
1277
+ // to force it from the UI.
1278
+ const startResult = await rpc.request<{ thread: { id: string } }>('thread/start', {
1279
+ cwd: WORKSPACE_DIR,
1280
+ model: modelId,
1281
+ // Bloby's persona/workflow prompt rides developerInstructions (ADDITIVE),
1282
+ // NOT baseInstructions. baseInstructions fully OVERRIDES codex's native base
1283
+ // prompt — which carries the apply_patch FREEFORM spec + shell protocol the
1284
+ // model needs to edit files. Leaving baseInstructions unset keeps that native
1285
+ // scaffolding; developerInstructions layers Bloby's persona on top of it.
1286
+ developerInstructions: instructions,
1287
+ personality: 'pragmatic',
1288
+ // Bloby's posture matches Claude's bypassPermissions — the bot is
1289
+ // running on the user's own machine with their full consent. Skip the
1290
+ // approval prompts and give it write access to the workspace + beyond.
1291
+ approvalPolicy: 'never',
1292
+ sandbox: 'danger-full-access',
1293
+ });
1294
+ conv.threadId = startResult.thread.id;
1295
+ conversations.set(conversationId, conv);
1296
+ log.ok(`[codex] thread started ${conv.threadId}`);
1297
+
1298
+ // Prime codex's per-thread skill cache with the workspace skills
1299
+ // directory. Without this, codex only sees its system-scope skills and
1300
+ // never discovers anything Bloby ships in `workspace/skills/*`. Fire and
1301
+ // forget — failure here just means workspace skills won't be auto-routable
1302
+ // for this thread, but the agent can still read SKILL.md files directly.
1303
+ // Customer-facing personas skip it entirely (see SpawnOpts.wantSkills).
1304
+ if (opts.wantSkills) primeWorkspaceSkills(rpc);
1305
+
1306
+ return conv;
1307
+ } catch (err: any) {
1308
+ rpc.close();
1309
+ if (attempt === 'warm') {
1310
+ log.warn(`[codex] warm claim failed (${err.message}) — retrying with a cold spawn`);
1311
+ continue;
1312
+ }
1313
+ onMessage('bot:error', { conversationId, error: `Failed to initialize Codex: ${err.message}` });
1314
+ return null;
728
1315
  }
729
- });
1316
+ }
1317
+ return null; // unreachable — 'cold' attempt always returns/errors above
1318
+ }
730
1319
 
731
- try {
732
- log.info(`[codex] init conversation ${conversationId} (model=${modelId}${effort ? `, effort=${effort}` : ''})`);
733
- await rpc.request('initialize', { clientInfo: CLIENT_INFO });
734
- rpc.notify('initialized', {});
735
- // Context auto-compaction is ON by default in the codex app-server: when the
736
- // thread's token count crosses the model's threshold it compacts history in
737
- // place (emitting a `contextCompaction` item) and continues — no flag needed
738
- // here. A manual trigger also exists (`thread/compact/start`) if we ever want
739
- // to force it from the UI.
740
- const startResult = await rpc.request<{ thread: { id: string } }>('thread/start', {
741
- cwd: WORKSPACE_DIR,
742
- model: modelId,
743
- // Bloby's persona/workflow prompt rides developerInstructions (ADDITIVE),
744
- // NOT baseInstructions. baseInstructions fully OVERRIDES codex's native base
745
- // prompt — which carries the apply_patch FREEFORM spec + shell protocol the
746
- // model needs to edit files. Leaving baseInstructions unset keeps that native
747
- // scaffolding; developerInstructions layers Bloby's persona on top of it.
748
- developerInstructions: instructions,
749
- personality: 'pragmatic',
750
- // Bloby's posture matches Claude's bypassPermissions — the bot is
751
- // running on the user's own machine with their full consent. Skip the
752
- // approval prompts and give it write access to the workspace + beyond.
753
- approvalPolicy: 'never',
754
- sandbox: 'danger-full-access',
755
- });
756
- conv.threadId = startResult.thread.id;
757
- conversations.set(conversationId, conv);
758
- log.ok(`[codex] thread started ${conv.threadId}`);
759
-
760
- // Prime codex's per-thread skill cache with the workspace skills
761
- // directory. Without this, codex only sees its system-scope skills and
762
- // never discovers anything Bloby ships in `workspace/skills/*`. Fire and
763
- // forget — failure here just means workspace skills won't be auto-routable
764
- // for this thread, but the agent can still read SKILL.md files directly.
765
- primeWorkspaceSkills(rpc);
1320
+ /* ── Model catalog validation ──────────────────────────────────────────── */
1321
+
1322
+ interface ValidatedModel { modelId: string; effort?: string }
1323
+ const modelValidationCache = new Map<string, ValidatedModel>();
766
1324
 
767
- return conv;
1325
+ /**
1326
+ * Validate the configured model id + effort against `model/list` (local
1327
+ * catalog, one fast RPC — cached per model string for the process lifetime).
1328
+ * Unknown model → repair to the catalog default with a logged warning instead
1329
+ * of letting thread/start or turn/start fail with a cryptic teardown. Unknown
1330
+ * effort → the model's defaultReasoningEffort. If model/list itself fails,
1331
+ * proceed unvalidated (today's behavior).
1332
+ */
1333
+ async function validateModelSelection(rpc: CodexRpc, modelId: string, effort?: string): Promise<ValidatedModel> {
1334
+ const cacheKey = `${modelId}:${effort || ''}`;
1335
+ const cached = modelValidationCache.get(cacheKey);
1336
+ if (cached) return cached;
1337
+
1338
+ let result: ValidatedModel = { modelId, effort };
1339
+ try {
1340
+ const res = await rpc.request<{ data?: any[] }>('model/list', { includeHidden: true });
1341
+ const catalog = Array.isArray(res?.data) ? res.data : [];
1342
+ if (catalog.length) {
1343
+ let entry = catalog.find((m: any) => m.id === modelId || m.model === modelId);
1344
+ if (!entry) {
1345
+ const fallback = catalog.find((m: any) => m.isDefault) || catalog[0];
1346
+ log.warn(`[codex] model "${modelId}" not in catalog — falling back to "${fallback.id}"`);
1347
+ entry = fallback;
1348
+ result.modelId = entry.id;
1349
+ result.effort = undefined; // stale effort may not apply to the fallback
1350
+ }
1351
+ const supported: string[] = (entry.supportedReasoningEfforts || [])
1352
+ .map((o: any) => (typeof o === 'string' ? o : o?.reasoningEffort))
1353
+ .filter(Boolean);
1354
+ if (result.effort && supported.length && !supported.includes(result.effort)) {
1355
+ const repaired = entry.defaultReasoningEffort || undefined;
1356
+ log.warn(`[codex] effort "${result.effort}" not supported by ${result.modelId} (supported: ${supported.join(', ')}) — using ${repaired || 'server default'}`);
1357
+ result.effort = repaired;
1358
+ }
1359
+ // Only cache results actually validated against a catalog — caching the
1360
+ // passthrough on a failed/empty model/list would disable the auto-repair
1361
+ // for this model string for the whole process lifetime.
1362
+ modelValidationCache.set(cacheKey, result);
1363
+ }
768
1364
  } catch (err: any) {
769
- rpc.close();
770
- onMessage('bot:error', { conversationId, error: `Failed to initialize Codex: ${err.message}` });
771
- return null;
1365
+ log.warn(`[codex] model/list validation skipped: ${err.message}`);
772
1366
  }
1367
+ return result;
773
1368
  }
774
1369
 
775
1370
  // Codex discovers "repo"-scope skills under `<cwd>/.codex/skills` (verified
776
- // against 0.135.0 — a bare `<cwd>/skills` is NOT scanned, and `skills/list`
777
- // has no extra-root param). Bloby keeps the canonical skills in
778
- // `workspace/skills/<name>`, so we mirror each one into `.codex/skills/<name>`
779
- // as a symlink — single source of truth, discoverable by codex's native router.
1371
+ // against 0.135.0 — a bare `<cwd>/skills` is NOT scanned). Bloby keeps the
1372
+ // canonical skills in `workspace/skills/<name>`, so we mirror each one into
1373
+ // `.codex/skills/<name>` as a symlink single source of truth, discoverable
1374
+ // by codex's native router. 0.138 added `skills/extraRoots/set` which could
1375
+ // replace this mirror — deliberately NOT adopted yet: the mirror is
1376
+ // e2e-verified working and the swap buys no user-visible change.
780
1377
  // (Each SKILL.md needs YAML frontmatter or codex rejects it — see SKILL_FORMAT_MIGRATION.md.)
781
1378
  const CODEX_SKILLS_ROOT = path.join(WORKSPACE_DIR, '.codex', 'skills');
782
1379
 
@@ -826,12 +1423,25 @@ function toToml(v: any): string {
826
1423
  return JSON.stringify(String(v)); // TOML basic string — JSON escaping is compatible
827
1424
  }
828
1425
 
1426
+ /** codex's config layer requires STRING values for args items, env values and
1427
+ * http_headers — a numeric `"PORT": 3000` in MCP.json (fine on claude) would
1428
+ * otherwise kill the app-server at config load, bricking EVERY codex spawn.
1429
+ * Coerce so non-string JSON values degrade to their string form instead. */
1430
+ function toTomlStringArray(arr: any[]): string {
1431
+ return `[${arr.map((v) => JSON.stringify(String(v))).join(',')}]`;
1432
+ }
1433
+ function toTomlStringMap(obj: Record<string, any>): string {
1434
+ return `{${Object.entries(obj).map(([k, v]) => `${JSON.stringify(k)}=${JSON.stringify(String(v))}`).join(',')}}`;
1435
+ }
1436
+
829
1437
  /**
830
1438
  * Translate MCP.json into `codex app-server -c mcp_servers.<name>.<field>=<toml>`
831
1439
  * spawn flags. Codex sources MCP from its own config layer rather than a per-query
832
1440
  * param (verified against 0.135.0: a `-c mcp_servers.X.command=...` override shows
833
- * up in both mcpServerStatus/list and config/read). Only the stdio fields Bloby
834
- * uses (command/args/env) are translated; names must be TOML-bare-key safe.
1441
+ * up in both mcpServerStatus/list and config/read). Stdio entries translate
1442
+ * command/args/env; url entries (streamable HTTP the Claude SDK's http/sse
1443
+ * form) translate url + headers→http_headers + bearer_token_env_var, supported
1444
+ * natively by the 0.138 binary. Names must be TOML-bare-key safe.
835
1445
  */
836
1446
  function buildMcpConfigArgs(): string[] {
837
1447
  const servers = loadMcpServersForCodex();
@@ -840,10 +1450,22 @@ function buildMcpConfigArgs(): string[] {
840
1450
  for (const [name, cfg] of Object.entries(servers)) {
841
1451
  if (!/^[A-Za-z0-9_-]+$/.test(name)) { log.warn(`[codex] skipping MCP server "${name}" — name not TOML-bare-key safe`); continue; }
842
1452
  const c: any = cfg || {};
843
- if (!c.command) { log.warn(`[codex] skipping MCP server "${name}" — no command`); continue; }
844
- args.push('-c', `mcp_servers.${name}.command=${toToml(c.command)}`);
845
- if (Array.isArray(c.args) && c.args.length) args.push('-c', `mcp_servers.${name}.args=${toToml(c.args)}`);
846
- if (c.env && typeof c.env === 'object' && Object.keys(c.env).length) args.push('-c', `mcp_servers.${name}.env=${toToml(c.env)}`);
1453
+ if (c.command) {
1454
+ args.push('-c', `mcp_servers.${name}.command=${toToml(c.command)}`);
1455
+ if (Array.isArray(c.args) && c.args.length) args.push('-c', `mcp_servers.${name}.args=${toTomlStringArray(c.args)}`);
1456
+ if (c.env && typeof c.env === 'object' && Object.keys(c.env).length) args.push('-c', `mcp_servers.${name}.env=${toTomlStringMap(c.env)}`);
1457
+ } else if (typeof c.url === 'string' && c.url) {
1458
+ args.push('-c', `mcp_servers.${name}.url=${toToml(c.url)}`);
1459
+ if (c.headers && typeof c.headers === 'object' && Object.keys(c.headers).length) {
1460
+ args.push('-c', `mcp_servers.${name}.http_headers=${toTomlStringMap(c.headers)}`);
1461
+ }
1462
+ if (typeof c.bearer_token_env_var === 'string' && c.bearer_token_env_var) {
1463
+ args.push('-c', `mcp_servers.${name}.bearer_token_env_var=${toToml(c.bearer_token_env_var)}`);
1464
+ }
1465
+ } else {
1466
+ log.warn(`[codex] skipping MCP server "${name}" — no command or url`);
1467
+ continue;
1468
+ }
847
1469
  wired++;
848
1470
  }
849
1471
  if (wired) log.info(`[codex] wiring ${wired} MCP server(s) from MCP.json via -c overrides`);
@@ -860,12 +1482,11 @@ export function isConversationBusy(conversationId: string): boolean {
860
1482
  return conversations.get(conversationId)?.busy ?? false;
861
1483
  }
862
1484
 
863
- /** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
864
- * backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
865
- /** Codex one-shot queries (startBlobyAgentQuery) reuse the conversations map and set conv.busy via
866
- * startTurn, so anyConversationBusy() already covers them — nothing extra to track here. */
1485
+ /** True while any one-shot is in its init window (token refresh + spawn +
1486
+ * initialize + thread/start). Once the conv registers in `conversations` and
1487
+ * startTurn sets busy, anyConversationBusy() takes over seamlessly. */
867
1488
  export function anyOneShotActive(): boolean {
868
- return false;
1489
+ return inFlightOneShots.size > 0;
869
1490
  }
870
1491
 
871
1492
  export function anyConversationBusy(): boolean {
@@ -881,15 +1502,31 @@ export async function startConversation(
881
1502
  recentMessages?: RecentMessage[],
882
1503
  ): Promise<boolean> {
883
1504
  if (conversations.has(conversationId)) endConversation(conversationId);
1505
+ // Typing dots NOW — the supervisor awaits this whole handshake before the
1506
+ // first pushMessage can run, and without an early signal the user stares at
1507
+ // a dead chat through spawn + initialize + thread/start on every session
1508
+ // start (boot, clear-context, every proactive recycle).
1509
+ onMessage('bot:typing', { conversationId });
884
1510
  const baseInstructions = await assembleBaseInstructions(names, recentMessages);
885
- const conv = await spawnAndInitialize(conversationId, model, onMessage, baseInstructions, false);
886
- return !!conv;
1511
+ const conv = await spawnAndInitialize(conversationId, model, onMessage, baseInstructions, {
1512
+ oneShot: false,
1513
+ wantSkills: true,
1514
+ names,
1515
+ });
1516
+ if (!conv) {
1517
+ // bot:typing above set the supervisor's agentQueryActive; bot:error alone
1518
+ // doesn't clear it — conversation-ended does (and is safe for a conv that
1519
+ // never registered).
1520
+ onMessage('bot:conversation-ended', { conversationId });
1521
+ return false;
1522
+ }
1523
+ return true;
887
1524
  }
888
1525
 
889
1526
  export function pushMessage(
890
1527
  conversationId: string,
891
1528
  content: string,
892
- _attachments?: AgentAttachment[],
1529
+ attachments?: AgentAttachment[],
893
1530
  savedFiles?: SavedFile[],
894
1531
  ): boolean {
895
1532
  const conv = conversations.get(conversationId);
@@ -897,7 +1534,7 @@ export function pushMessage(
897
1534
  log.warn(`[codex] pushMessage: no live conversation ${conversationId}`);
898
1535
  return false;
899
1536
  }
900
- void steerOrQueue(conv, content, savedFiles);
1537
+ queueOrStart(conv, content, savedFiles, attachments);
901
1538
  return true;
902
1539
  }
903
1540
 
@@ -916,18 +1553,14 @@ export function endConversation(conversationId: string): void {
916
1553
 
917
1554
  export function endAllConversations(): void {
918
1555
  for (const id of Array.from(conversations.keys())) endConversation(id);
1556
+ // The pre-warmed app-server may hold pre-re-auth state — drop it (a fresh
1557
+ // warm-up fires from the teardown re-warm path with current credentials).
1558
+ discardCodexWarmup();
919
1559
  }
920
1560
 
921
1561
  export async function stopSubAgentTask(_conversationId: string, _taskId: string): Promise<void> {
922
- // Codex doesn't expose Claude-style sub-agent tasks. No-op for now.
923
- }
924
-
925
- export async function warmUpForLiveConversation(
926
- _model: string,
927
- _names?: { botName: string; humanName: string },
928
- ): Promise<void> {
929
- // No subprocess pre-warming yet — `codex app-server` startup is fast enough
930
- // (~hundreds of ms). Re-evaluate if it becomes noticeable on the Pi.
1562
+ // Codex doesn't expose Claude-style sub-agent tasks (collab tools are still
1563
+ // experimental and not enabled by our initialize). No-op.
931
1564
  }
932
1565
 
933
1566
  export async function startBlobyAgentQuery(
@@ -935,24 +1568,45 @@ export async function startBlobyAgentQuery(
935
1568
  prompt: string,
936
1569
  model: string,
937
1570
  onMessage: OnAgentMessage,
938
- _attachments?: AgentAttachment[],
1571
+ attachments?: AgentAttachment[],
939
1572
  savedFiles?: SavedFile[],
940
1573
  names?: { botName: string; humanName: string },
941
1574
  recentMessages?: RecentMessage[],
942
1575
  supportPrompt?: string,
943
- _maxTurns?: number,
1576
+ _maxTurns?: number, // no codex equivalent — bounded by ONE_SHOT_MAX_TURN_MS instead
944
1577
  ): Promise<void> {
945
- if (conversations.has(conversationId)) endConversation(conversationId);
946
- const baseInstructions = supportPrompt
947
- ? supportPrompt
948
- : await assembleBaseInstructions(names, recentMessages);
949
- const conv = await spawnAndInitialize(conversationId, model, onMessage, baseInstructions, true);
950
- if (!conv) return;
951
- await startTurn(conv, prompt, savedFiles);
1578
+ inFlightOneShots.add(conversationId);
1579
+ try {
1580
+ if (conversations.has(conversationId)) endConversation(conversationId);
1581
+ onMessage('bot:typing', { conversationId });
1582
+ // Support personas ride supportPrompt as the FULL instructions (SCRIPT.md
1583
+ // governs them) — but the per-customer message buffer must still be
1584
+ // appended, exactly like claude: every one-shot is a fresh thread, so
1585
+ // recentMessages IS the agent's only memory of the conversation.
1586
+ let baseInstructions = supportPrompt ?? await assembleBaseInstructions(names, recentMessages);
1587
+ if (supportPrompt && recentMessages?.length) {
1588
+ baseInstructions += `\n\n---\n# Recent Conversation\n${formatConversationHistory(recentMessages)}`;
1589
+ }
1590
+ const conv = await spawnAndInitialize(conversationId, model, onMessage, baseInstructions, {
1591
+ oneShot: true,
1592
+ wantSkills: !supportPrompt,
1593
+ names,
1594
+ });
1595
+ if (!conv) {
1596
+ // Init failed (no auth / spawn / thread-start error — bot:error already
1597
+ // emitted). bot:done frees the caller's slot; without it each failed
1598
+ // customer message pins one of the 5 WhatsApp agent slots until restart.
1599
+ onMessage('bot:done', { conversationId, usedFileTools: false });
1600
+ return;
1601
+ }
1602
+ await startTurn(conv, prompt, savedFiles, attachments);
1603
+ } finally {
1604
+ inFlightOneShots.delete(conversationId);
1605
+ }
952
1606
  }
953
1607
 
954
1608
  export function stopBlobyAgentQuery(conversationId: string): void {
955
- endConversation(conversationId);
1609
+ endConversation(conversationId); // teardown guarantees the one-shot's bot:done
956
1610
  }
957
1611
 
958
1612
  // ── Workspace agent endpoint (POST /api/agent/query) ──────────────────────
@@ -996,49 +1650,82 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
996
1650
  let resolvedThreadId = req.sessionId || '';
997
1651
  let resolveTurn: (() => void) | null = null;
998
1652
  let turnError: string | null = null;
1653
+ const itemTexts = new Map<string, string>();
1654
+
999
1655
  const turnDone = new Promise<void>((r) => { resolveTurn = r; });
1000
1656
 
1001
1657
  rpc.onNotification((n) => {
1002
1658
  const p = n.params || {};
1003
1659
  switch (n.method) {
1004
1660
  case 'item/agentMessage/delta': {
1005
- if (typeof p.delta === 'string') fullText += p.delta;
1661
+ if (typeof p.delta === 'string') {
1662
+ fullText += p.delta;
1663
+ if (p.itemId) itemTexts.set(p.itemId, (itemTexts.get(p.itemId) || '') + p.delta);
1664
+ }
1006
1665
  break;
1007
1666
  }
1008
1667
  case 'item/started': {
1009
1668
  const item = p.item || {};
1010
- if (item.type === 'commandExecution') usedTools.add('shell');
1011
- else if (item.type === 'mcpToolCall') usedTools.add(item.tool || 'mcp_tool');
1012
- else if (item.type === 'fileChange') { usedTools.add('file_change'); usedFileTools = true; }
1013
- else if (item.type === 'webSearch') usedTools.add('web_search');
1669
+ // Same claude-vocabulary normalization as the live path — agent-api
1670
+ // callers see the identical toolsUsed names on both harnesses.
1671
+ if (item.type === 'commandExecution') usedTools.add('Bash');
1672
+ else if (item.type === 'mcpToolCall') usedTools.add(item.tool ? (item.server ? `mcp__${item.server}__${item.tool}` : item.tool) : 'mcp_tool');
1673
+ else if (item.type === 'fileChange') { usedTools.add('Edit'); usedFileTools = true; }
1674
+ else if (item.type === 'webSearch') usedTools.add('WebSearch');
1014
1675
  break;
1015
1676
  }
1016
1677
  case 'item/completed': {
1017
1678
  const item = p.item || {};
1018
1679
  if (item.type === 'fileChange') usedFileTools = true;
1019
- if (item.type === 'agentMessage' && !fullText) {
1020
- const text = (item.content || []).map((c: any) => c.text || '').join('') || item.text || '';
1021
- if (text) fullText = text;
1680
+ // item/completed is authoritative per item — same per-itemId
1681
+ // reconciliation as the live path: append items that never streamed
1682
+ // deltas, and the remainder when the final text extends past them.
1683
+ if (item.type === 'agentMessage' && item.id) {
1684
+ const finalText: string = (typeof item.text === 'string' && item.text)
1685
+ ? item.text
1686
+ : ((item.content || []).map((c: any) => c.text || '').join(''));
1687
+ if (finalText) {
1688
+ const streamed = itemTexts.get(item.id) || '';
1689
+ let remainder = '';
1690
+ if (!streamed) {
1691
+ remainder = finalText;
1692
+ } else if (finalText.length > streamed.length && finalText.startsWith(streamed)) {
1693
+ remainder = finalText.slice(streamed.length);
1694
+ }
1695
+ if (remainder) {
1696
+ if (!streamed && fullText && !fullText.endsWith('\n')) fullText += '\n\n';
1697
+ fullText += remainder;
1698
+ itemTexts.set(item.id, finalText);
1699
+ }
1700
+ }
1022
1701
  }
1023
1702
  break;
1024
1703
  }
1025
1704
  case 'turn/completed': {
1026
1705
  const status = p.turn?.status || 'completed';
1027
1706
  if (status === 'failed') {
1028
- turnError = p.turn?.error?.message || 'Codex turn failed.';
1707
+ turnError = humanizeCodexError(p.turn?.error?.message, p.turn?.error?.codexErrorInfo, p.turn?.error?.additionalDetails);
1029
1708
  }
1030
1709
  resolveTurn?.();
1031
1710
  break;
1032
1711
  }
1033
1712
  case 'error': {
1034
1713
  if (p.willRetry) break; // transient — codex retries itself
1035
- turnError = p.error?.message || 'Codex error';
1714
+ turnError = humanizeCodexError(p.error?.message || 'Codex error', p.error?.codexErrorInfo, p.error?.additionalDetails);
1036
1715
  resolveTurn?.();
1037
1716
  break;
1038
1717
  }
1039
1718
  }
1040
1719
  });
1041
1720
 
1721
+ // App-server crash mid-query: without this, no notification ever resolves
1722
+ // turnDone and the caller burns the FULL timeout before getting a generic
1723
+ // "timed out" — surface the real exit error immediately instead.
1724
+ rpc.onClose((code, errMsg) => {
1725
+ if (!turnError) turnError = errMsg || `codex app-server exited unexpectedly (code=${code}).`;
1726
+ resolveTurn?.();
1727
+ });
1728
+
1042
1729
  const timeoutHandle = setTimeout(() => {
1043
1730
  if (!turnError) turnError = `Query timed out after ${timeout}ms.`;
1044
1731
  resolveTurn?.();
@@ -1049,31 +1736,41 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
1049
1736
  await rpc.request('initialize', { clientInfo: CLIENT_INFO });
1050
1737
  rpc.notify('initialized', {});
1051
1738
 
1739
+ // Same stale-model/effort auto-repair as spawnAndInitialize — agent-api is
1740
+ // otherwise the one codex entry point that fails raw on a retired model.
1741
+ const validated = await validateModelSelection(rpc, model, effort);
1742
+ model = validated.modelId;
1743
+ effort = validated.effort;
1744
+
1745
+ // Same execution posture on EVERY thread path — resume included.
1746
+ // ThreadResumeParams accepts all of these in 0.138; without them a resumed
1747
+ // session silently dropped the caller's systemPrompt and fell back to
1748
+ // config-default sandbox/approval settings.
1749
+ const threadOverrides = {
1750
+ cwd: WORKSPACE_DIR,
1751
+ model,
1752
+ ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
1753
+ personality: 'pragmatic',
1754
+ approvalPolicy: 'never',
1755
+ sandbox: 'danger-full-access',
1756
+ };
1757
+
1052
1758
  if (req.sessionId) {
1053
1759
  // Resume an existing thread (if codex still has it). Caller must accept
1054
1760
  // failure here — we fall back to a fresh thread.
1055
1761
  try {
1056
- const r = await rpc.request<{ thread: { id: string } }>('thread/resume', { threadId: req.sessionId });
1762
+ const r = await rpc.request<{ thread: { id: string } }>('thread/resume', {
1763
+ threadId: req.sessionId,
1764
+ ...threadOverrides,
1765
+ });
1057
1766
  resolvedThreadId = r.thread.id;
1058
1767
  } catch (err: any) {
1059
1768
  log.warn(`[codex/agent-api] thread/resume failed (${err.message}); starting fresh thread`);
1060
- const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
1061
- cwd: WORKSPACE_DIR,
1062
- model,
1063
- ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
1064
- approvalPolicy: 'never',
1065
- sandbox: 'danger-full-access',
1066
- });
1769
+ const r = await rpc.request<{ thread: { id: string } }>('thread/start', threadOverrides);
1067
1770
  resolvedThreadId = r.thread.id;
1068
1771
  }
1069
1772
  } else {
1070
- const r = await rpc.request<{ thread: { id: string } }>('thread/start', {
1071
- cwd: WORKSPACE_DIR,
1072
- model,
1073
- ...(req.systemPrompt ? { developerInstructions: req.systemPrompt } : {}),
1074
- approvalPolicy: 'never',
1075
- sandbox: 'danger-full-access',
1076
- });
1773
+ const r = await rpc.request<{ thread: { id: string } }>('thread/start', threadOverrides);
1077
1774
  resolvedThreadId = r.thread.id;
1078
1775
  }
1079
1776