npm - bloby-bot - Versions diffs - 0.70.8 → 0.70.10 - Mend

bloby-bot 0.70.8 → 0.70.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist-bloby/assets/{bloby-CXmOcb1r.js → bloby-DSNB0g4w.js} +4 -4
package/dist-bloby/assets/{globals-DpO5tO92.js → globals-B3cTbITX.js} +1 -1
package/dist-bloby/assets/{highlighted-body-OFNGDK62-D7cU1Y-Z.js → highlighted-body-OFNGDK62-BLforpkr.js} +1 -1
package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +1 -0
package/dist-bloby/assets/{onboard-B96ELhXn.js → onboard-Dn2Ws_G2.js} +1 -1
package/dist-bloby/bloby.html +2 -2
package/dist-bloby/onboard.html +2 -2
package/package.json +1 -1
package/scripts/sync-pi-models.ts +37 -6
package/supervisor/chat/OnboardWizard.tsx +4 -4
package/supervisor/harnesses/pi/async-queue.ts +7 -11
package/supervisor/harnesses/pi/index.ts +475 -73
package/supervisor/harnesses/pi/models-catalog.generated.ts +840 -210
package/supervisor/harnesses/pi/providers/humanize-error.ts +125 -0
package/supervisor/harnesses/pi/providers/retry.ts +87 -0
package/supervisor/harnesses/pi/providers/stream-anthropic.ts +73 -11
package/supervisor/harnesses/pi/providers/stream-google.ts +15 -5
package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +55 -19
package/supervisor/harnesses/pi/providers/types.ts +26 -1
package/supervisor/harnesses/pi/session.ts +179 -73
package/supervisor/harnesses/pi/sub-providers.ts +30 -1
package/supervisor/harnesses/pi/test-completion.ts +8 -2
package/supervisor/harnesses/pi/tools/registry.ts +25 -9
package/supervisor/harnesses/pi/tools/task.ts +108 -0
package/supervisor/harnesses/pi/tools/types.ts +15 -0
package/supervisor/index.ts +11 -10
package/supervisor/public/morphy_sad.mov +0 -0
package/supervisor/public/morphy_sad.webm +0 -0
package/supervisor/shell.ts +1 -1
package/supervisor/workspace-guard.js +1 -1
package/workspace/client/public/morphy_bounce.mov +0 -0
package/workspace/client/public/morphy_bounce.webm +0 -0
package/workspace/client/public/morphy_hi.mov +0 -0
package/workspace/client/public/morphy_hi.webm +0 -0
package/workspace/client/src/App.tsx +5 -3
package/dist-bloby/assets/mermaid-GHXKKRXX-D5YxphBn.js +0 -1
package/supervisor/public/what-happened.mp4 +0 -0
package/supervisor/public/what-happened.webm +0 -0

package/supervisor/harnesses/pi/index.ts CHANGED Viewed

@@ -6,14 +6,18 @@
  * matches the Claude harness so the dispatcher needs no provider-specific
  * code.
  *
- * Phase 1 scope: live conversation + one-shot text only (no tools). The
+ * Live conversations run the full tool loop (session.ts); one-shots are still
+ * tool-less (audit Phase C will route them through createPiSession). The
  * non-blocking feel — user keeps typing while the model is still answering —
- * comes from the same `AsyncQueue` pattern Claude uses; see `async-queue.ts`.
+ * comes from the same `AsyncQueue` pattern Claude uses (one message per turn);
+ * see `async-queue.ts` and PI-PARITY-AUDIT-2026-06-11.md.
  */
 import { log } from '../../../shared/logger.js';
 import { WORKSPACE_DIR } from '../../../shared/paths.js';
 import type { SavedFile } from '../../file-saver.js';
 import { assembleSystemPrompt } from '../../../worker/prompts/prompt-assembler.js';
+import { buildAgents } from '../../agents/index.js';
+import crypto from 'crypto';
 import fs from 'fs';
 import path from 'path';
 import type {
@@ -26,12 +30,13 @@ export type { RecentMessage, AgentAttachment };
 import { buildSkillsIndex } from '../skills.js';
 import { createAsyncQueue, type AsyncQueue } from './async-queue.js';
-import { createPiSession, type PiSessionEvent } from './session.js';
-import { getPiSubProvider } from './sub-providers.js';
+import { createPiSession, type PiSessionEvent, type PiSessionAuth } from './session.js';
+import { getPiSubProvider, getCatalogModel } from './sub-providers.js';
 import { readPiAuth } from './auth-storage.js';
 import { streamProvider } from './providers/stream.js';
 import type { PiMessage } from './providers/types.js';
 import { toolDefsForProvider } from './tools/registry.js';
+import type { PiTaskHost } from './tools/types.js';
 // ── Live conversation state ────────────────────────────────────────────────
@@ -41,11 +46,70 @@ interface LiveConversation {
   abortController: AbortController;
   onMessage: (type: string, data: any) => void;
   busy: boolean;
+  /** Messages pushed but not yet completed (1 turn-complete per message) — mirrors
+   *  claude.ts pendingCount. idle:true on turn-complete only when this hits 0, so
+   *  the supervisor's session recycling never fires with a message still queued. */
+  pendingCount: number;
+  /** 60ms micro-batcher for bot:token — collapses per-delta WS frame floods. */
+  batcher: TokenBatcher;
+  /** Running background sub-agent tasks (Phase B). While non-empty, the
+   *  conversation reports idle:false (recycling deferred) and counts as busy
+   *  (backend restarts / self-updates deferred) so a task is never killed
+   *  mid-flight by housekeeping. */
+  tasks: Map<string, RunningTask>;
+  /** Set when a completed background task used file tools — OR'd into the next
+   *  bot:turn-complete (the continuation turn) so the backend restarts right
+   *  after the user hears "Done!", mirroring claude's usedTools capture of
+   *  sub-agent tool_use blocks. */
+  taskUsedFileTools: boolean;
   loopDone: Promise<void> | null;
 }
+interface RunningTask {
+  id: string;
+  description: string;
+  subagentType: string;
+  abortController: AbortController;
+  /** True when stopped via user:stop-task or conversation teardown. */
+  stopped: boolean;
+  startedAt: number;
+}
 const liveConversations = new Map<string, LiveConversation>();
+/**
+ * Micro-batch streamed deltas into ~60ms bot:token frames (house standard
+ * from the codex parity pass — an order-of-magnitude WS frame reduction with
+ * no visible change in streaming feel). Callers MUST flush() before emitting
+ * any non-token event so ordering and the streamed-text == bot:response
+ * contract are preserved; discard() on teardown drops post-abort stragglers.
+ */
+interface TokenBatcher {
+  add(delta: string): void;
+  flush(): void;
+  discard(): void;
+}
+function createTokenBatcher(emit: (text: string) => void, intervalMs = 60): TokenBatcher {
+  let buf = '';
+  let timer: NodeJS.Timeout | null = null;
+  const flush = () => {
+    if (timer) { clearTimeout(timer); timer = null; }
+    if (buf) { const out = buf; buf = ''; emit(out); }
+  };
+  return {
+    add(delta: string) {
+      buf += delta;
+      if (!timer) timer = setTimeout(flush, intervalMs);
+    },
+    flush,
+    discard() {
+      if (timer) { clearTimeout(timer); timer = null; }
+      buf = '';
+    },
+  };
+}
 export function hasConversation(conversationId: string): boolean {
   return liveConversations.has(conversationId);
 }
@@ -100,7 +164,7 @@ You are running in a streaming chat where the user can keep typing while you wor
 - Before kicking off a multi-step task, say one short line acknowledging it ("On it, looking at the widget now.").
 - Between tool calls on long tasks, drop a brief progress note ("Found the file, checking the layout next.") so the user knows you're still working.
-- If a new user message arrives while you're mid-task, you'll see it as a fresh user-role message in the conversation history. Answer it briefly inline, mention you're still working on the main task, then continue.
+- Messages the user sends while you're working are queued and delivered to you one at a time after the current task finishes — each gets its own answer, so never assume you missed one.
 - Final answers should be concise and concrete.`;
 async function buildSystemPrompt(
@@ -133,29 +197,232 @@ async function buildSystemPrompt(
   return systemPrompt;
 }
-/** Resolve sub-provider, base url, api key, model id from saved pi-auth.json. */
-function resolveAuth(): {
-  ok: true;
-  flavor: ReturnType<typeof getPiSubProvider> extends undefined ? never : NonNullable<ReturnType<typeof getPiSubProvider>>['flavor'];
-  modelId: string;
-  baseUrl: string;
-  apiKey: string;
-} | { ok: false; error: string } {
-  const auth = readPiAuth();
-  if (!auth) return { ok: false, error: 'Bloby provider is not configured. Run the onboarding wizard.' };
-  const sub = getPiSubProvider(auth.subProvider);
-  if (!sub) return { ok: false, error: `Unknown sub-provider in pi-auth.json: ${auth.subProvider}` };
-  const baseUrl = (auth.baseUrl || sub.baseUrl || '').replace(/\/+$/, '');
+/**
+ * Resolve the full provider auth bundle from saved pi-auth.json: sub-provider
+ * flavor, base url, api key, model id, plus catalog metadata (per-model output
+ * cap, context window) and the sub-provider's max-tokens field quirk.
+ *
+ * Called at session/one-shot start AND re-called on every live provider round
+ * via the session's getAuth thunk — so fixing a revoked key or switching
+ * models in the wizard heals a live conversation on its very next round.
+ */
+function resolveAuth(): { ok: true; auth: PiSessionAuth } | { ok: false; error: string } {
+  const saved = readPiAuth();
+  if (!saved) return { ok: false, error: 'Bloby provider is not configured. Run the onboarding wizard.' };
+  const sub = getPiSubProvider(saved.subProvider);
+  if (!sub) return { ok: false, error: `Unknown sub-provider in pi-auth.json: ${saved.subProvider}` };
+  const baseUrl = (saved.baseUrl || sub.baseUrl || '').replace(/\/+$/, '');
   if (!baseUrl) return { ok: false, error: `No base URL configured for ${sub.id}` };
-  const modelId = auth.modelId || sub.defaultModel || '';
+  const modelId = saved.modelId || sub.defaultModel || '';
   if (!modelId) return { ok: false, error: `No model selected for ${sub.id}` };
-  if (sub.needsApiKey && !auth.apiKey) return { ok: false, error: `Missing API key for ${sub.id}` };
+  if (sub.needsApiKey && !saved.apiKey) return { ok: false, error: `Missing API key for ${sub.id}` };
+  const catalog = getCatalogModel(sub.id, modelId);
+  // Effective window reported to the supervisor's recycler. Two corrections
+  // over the raw catalog figure (audit review F1):
+  // 1. Anthropic catalog windows can reflect the 1M-context beta; without the
+  //    beta header (we don't send it) the real window is 200k.
+  // 2. Since every request reserves max_tokens of output budget, providers
+  //    enforce input + max_tokens <= window — the usable INPUT ceiling is
+  //    window - maxOutputTokens. Reporting the raw window would put the 70%
+  //    recycle threshold ABOVE that ceiling (e.g. 140k > 200k-64k=136k on
+  //    claude-haiku-4-5) and the recycler could never preempt the wall.
+  let contextWindow = catalog?.contextWindow;
+  if (contextWindow && sub.flavor === 'anthropic-messages') {
+    contextWindow = Math.min(contextWindow, 200_000);
+  }
+  if (contextWindow && catalog?.maxOutputTokens) {
+    contextWindow = Math.max(0, contextWindow - catalog.maxOutputTokens);
+  }
   return {
     ok: true,
-    flavor: sub.flavor,
-    modelId,
-    baseUrl,
-    apiKey: auth.apiKey || '',
+    auth: {
+      flavor: sub.flavor,
+      modelId,
+      baseUrl,
+      apiKey: saved.apiKey || '',
+      maxOutputTokens: catalog?.maxOutputTokens,
+      maxTokensField: sub.maxTokensField,
+      includeStreamUsage: sub.noStreamUsage ? false : undefined,
+      contextWindow,
+    },
+  };
+}
+// ── Background sub-agents (Phase B — audit D4-1) ───────────────────────────
+/** Inject a system-originated message into the parent's queue (task completion).
+ *  Mirrors the Claude SDK's self-prompted continuation turn: no routing target
+ *  is enqueued (channelManager only wraps USER pushes), so the continuation's
+ *  bot:response meets an empty routing FIFO and falls through to the dashboard
+ *  broadcast — exactly claude's behavior. pendingCount/busy are maintained so
+ *  idle stays accurate and the recycler can't fire mid-continuation. No
+ *  bot:typing (claude's continuation turns emit none either). */
+function pushSyntheticMessage(conv: LiveConversation, text: string): void {
+  conv.busy = true;
+  conv.pendingCount += 1;
+  conv.inputQueue.push({ role: 'user', content: [{ type: 'text', text }] });
+}
+/** coder.txt advertises the claude toolset ("Read, Write, Edit, Bash, Glob,
+ *  Grep") — swap in the child's REAL pi toolset so the sub-agent never chases
+ *  tools it doesn't have (audit D4-4). claude keeps its richer line. */
+function rewriteToolAccessLine(prompt: string, toolNames: string[]): string {
+  return prompt.replace(/You have full tool access:[^\n]*/i, `You have full tool access: ${toolNames.join(', ')}.`);
+}
+/** Compact human-readable descriptor of a child tool call for bot:task-progress. */
+function toolCallSummary(name: string, input: any): string {
+  const tail = (p: any) => (typeof p === 'string' ? p.split('/').slice(-2).join('/') : '');
+  switch (name.toLowerCase()) {
+    case 'bash': return `Bash: ${String(input?.description || input?.command || '').slice(0, 80)}`;
+    case 'read': return `Reading ${tail(input?.file_path)}`;
+    case 'write': return `Writing ${tail(input?.file_path)}`;
+    case 'edit': return `Editing ${tail(input?.file_path)}`;
+    default: return name;
+  }
+}
+/**
+ * Per-conversation task host: spawns an in-process child `createPiSession`
+ * per Task call, translates child events into the `bot:task-*` vocabulary
+ * (payload fields exactly as claude.ts:443-484 emits them), and injects the
+ * completion back into the parent's queue for the "Done!" continuation turn.
+ */
+function createTaskHost(conv: LiveConversation, getAuth: () => PiSessionAuth): PiTaskHost {
+  return {
+    spawn(req) {
+      const agents = buildAgents();
+      const cfg = agents[req.subagentType];
+      if (!cfg) {
+        return {
+          ok: false,
+          error: `Unknown subagent_type "${req.subagentType}". Available: ${Object.keys(agents).join(', ') || 'none'}.`,
+        };
+      }
+      const taskId = crypto.randomUUID().slice(0, 8);
+      const abortController = new AbortController();
+      const task: RunningTask = {
+        id: taskId,
+        description: req.description,
+        subagentType: req.subagentType,
+        abortController,
+        stopped: false,
+        startedAt: Date.now(),
+      };
+      conv.tasks.set(taskId, task);
+      // Honor the agent config's tool restrictions (claude applies these via
+      // the SDK's tools/disallowedTools options — e.g. a future researcher
+      // agent with disallowedTools: ['Write','Edit']).
+      let childTools = toolDefsForProvider({ forSubagent: true });
+      if (Array.isArray(cfg.tools) && cfg.tools.length > 0) {
+        childTools = childTools.filter((t) => cfg.tools.includes(t.name));
+      }
+      if (Array.isArray(cfg.disallowedTools) && cfg.disallowedTools.length > 0) {
+        childTools = childTools.filter((t) => !cfg.disallowedTools.includes(t.name));
+      }
+      const systemPrompt = rewriteToolAccessLine(String(cfg.prompt || ''), childTools.map((t) => t.name));
+      let summaryText = '';
+      let errorText = '';
+      let usedFileTools = false;
+      let toolUses = 0;
+      let lastUsage: { inputTokens?: number; outputTokens?: number; cacheReadTokens?: number; cacheCreationTokens?: number } | undefined;
+      const session = createPiSession({
+        getAuth,
+        systemPrompt,
+        tools: childTools,
+        cwd: WORKSPACE_DIR,
+        abortController,
+        maxToolRounds: typeof cfg.maxTurns === 'number' ? cfg.maxTurns : 50,
+        onEvent: (evt: PiSessionEvent) => {
+          switch (evt.type) {
+            case 'tool_use':
+              toolUses += 1;
+              conv.batcher.flush();
+              conv.onMessage('bot:task-progress', {
+                conversationId: conv.id,
+                taskId,
+                summary: toolCallSummary(evt.name, evt.input),
+                lastTool: evt.name,
+                usage: { tool_uses: toolUses, duration_ms: Date.now() - task.startedAt },
+              });
+              break;
+            case 'text_end':
+              summaryText = evt.text;
+              break;
+            case 'error':
+              errorText = evt.error;
+              break;
+            case 'turn_complete':
+              usedFileTools = usedFileTools || evt.usedFileTools;
+              if (evt.usage) lastUsage = evt.usage;
+              break;
+          }
+        },
+      });
+      const queue = createAsyncQueue<PiMessage>();
+      queue.push({ role: 'user', content: [{ type: 'text', text: req.prompt }] });
+      queue.end();
+      log.info(`[pi/task] ──── SUB-AGENT STARTED ──── id=${taskId} type=${req.subagentType} "${req.description}"`);
+      // Task events bypass translateAndEmit, so flush the token batcher first —
+      // bot:task-created COMMITS the dashboard stream buffer (useBlobyChat),
+      // and a batch flushed after it would mis-slice committedTextLength.
+      conv.batcher.flush();
+      conv.onMessage('bot:task-created', {
+        conversationId: conv.id,
+        taskId,
+        description: req.description,
+        type: req.subagentType,
+      });
+      void (async () => {
+        try {
+          await session.run(queue);
+        } catch (err: any) {
+          errorText = errorText || err?.message || String(err);
+        } finally {
+          conv.tasks.delete(taskId);
+          const status = task.stopped ? 'stopped' : (errorText && !summaryText ? 'failed' : 'completed');
+          const summary = summaryText || errorText || '(the agent produced no output)';
+          const u = lastUsage;
+          const totalTokens = u
+            ? (u.inputTokens || 0) + (u.outputTokens || 0) + (u.cacheReadTokens || 0) + (u.cacheCreationTokens || 0)
+            : 0;
+          log.info(
+            `[pi/task] ──── SUB-AGENT ${status.toUpperCase()} ──── id=${taskId} ` +
+            `tools=${toolUses} ${Math.round((Date.now() - task.startedAt) / 1000)}s summary=${summary.slice(0, 160)}`,
+          );
+          conv.batcher.flush();
+          conv.onMessage('bot:task-done', {
+            conversationId: conv.id,
+            taskId,
+            status,
+            summary,
+            usage: { tool_uses: toolUses, duration_ms: Date.now() - task.startedAt, total_tokens: totalTokens },
+          });
+          if (usedFileTools) conv.taskUsedFileTools = true;
+          // Drive the user-facing continuation turn — unless the conversation
+          // itself is gone (ended/recycled), in which case the report dies with
+          // it (claude parity: the SDK subprocess dies too).
+          if (liveConversations.get(conv.id) === conv && !conv.abortController.signal.aborted) {
+            const note = task.stopped
+              ? `[System: the background task "${req.description}" was stopped by the user. Acknowledge that briefly in your own voice — never mention agents, tasks, or system messages.]`
+              : `[System: background task "${req.description}" ${status}.]\n\nResult summary:\n${summary}\n\nRelay the outcome to the user concisely in your own voice (never mention agents, tasks, ids, or system messages). If it failed, say what went wrong and offer a next step.`;
+            pushSyntheticMessage(conv, note);
+          }
+        }
+      })();
+      return { ok: true, taskId };
+    },
   };
 }
@@ -208,14 +475,14 @@ export async function startConversation(
     endConversation(conversationId);
   }
-  const auth = resolveAuth();
-  if (!auth.ok) {
-    log.warn(`[pi/conversation] Cannot start: ${auth.error}`);
-    onMessage('bot:error', { conversationId, error: auth.error });
+  const resolved = resolveAuth();
+  if (!resolved.ok) {
+    log.warn(`[pi/conversation] Cannot start: ${resolved.error}`);
+    onMessage('bot:error', { conversationId, error: resolved.error });
     return false;
   }
-  log.info(`[pi/conversation] Sub-provider: ${auth.flavor} · model: ${auth.modelId}`);
+  log.info(`[pi/conversation] Sub-provider: ${resolved.auth.flavor} · model: ${resolved.auth.modelId}`);
   const systemPrompt = await buildSystemPrompt(names, recentMessages);
   log.info(`[pi/conversation] System prompt: ${systemPrompt.length} chars`);
@@ -229,19 +496,31 @@ export async function startConversation(
     abortController,
     onMessage,
     busy: false,
+    pendingCount: 0,
+    batcher: createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text })),
+    tasks: new Map(),
+    taskUsedFileTools: false,
     loopDone: null,
   };
   liveConversations.set(conversationId, conv);
+  // Re-resolve auth on every provider round so a key/model fix in the wizard
+  // applies to the next round with full history intact (audit D6-8). Falls
+  // back to the last good bundle if pi-auth.json turns unreadable mid-session.
+  let currentAuth: PiSessionAuth = resolved.auth;
+  const getAuth = (): PiSessionAuth => {
+    const fresh = resolveAuth();
+    if (fresh.ok) currentAuth = fresh.auth;
+    return currentAuth;
+  };
   const session = createPiSession({
-    flavor: auth.flavor,
-    modelId: auth.modelId,
-    baseUrl: auth.baseUrl,
-    apiKey: auth.apiKey,
+    getAuth,
     systemPrompt,
     tools: toolDefsForProvider(),
     cwd: WORKSPACE_DIR,
     abortController,
+    taskHost: createTaskHost(conv, getAuth),
     onEvent: (evt: PiSessionEvent) => {
       translateAndEmit(conv, evt);
     },
@@ -258,6 +537,10 @@ export async function startConversation(
       }
     } finally {
       log.info(`[pi/conversation] Cleaning up conversation ${conversationId}`);
+      // Drop any unflushed token stragglers — at teardown the turn is either
+      // complete (already flushed before turn_complete) or aborted (tokens
+      // from an aborted stream must not surface after the fact).
+      conv.batcher.discard();
       liveConversations.delete(conversationId);
       onMessage('bot:conversation-ended', { conversationId });
     }
@@ -268,28 +551,86 @@ export async function startConversation(
 /** Map session-level events back into bloby's `bot:*` vocabulary. */
 function translateAndEmit(conv: LiveConversation, evt: PiSessionEvent) {
+  if (evt.type === 'text_delta') {
+    conv.batcher.add(evt.delta);
+    return;
+  }
+  // Any non-token event flushes the batch first — ordering (tokens before the
+  // tool chip / final response) and the streamed-text == bot:response
+  // invariant both depend on it.
+  conv.batcher.flush();
   switch (evt.type) {
     case 'turn_started':
       // No bloby event for this — `bot:typing` is already emitted by pushMessage().
       break;
-    case 'text_delta':
-      conv.onMessage('bot:token', { conversationId: conv.id, token: evt.delta });
-      break;
     case 'text_end':
       conv.onMessage('bot:response', { conversationId: conv.id, content: evt.text });
       break;
-    case 'tool_use':
-      conv.onMessage('bot:tool', { conversationId: conv.id, name: evt.name, input: evt.input });
+    case 'tool_use': {
+      // House vocabulary: claude's delegation tool is named Task; the pi
+      // prompt's 'Agent' alias resolves to the same tool — normalize the
+      // event so consumers see one name.
+      const toolName = evt.name === 'Agent' || evt.name === 'agent' ? 'Task' : evt.name;
+      conv.onMessage('bot:tool', { conversationId: conv.id, name: toolName, input: evt.input });
       break;
-    case 'turn_complete':
-      conv.busy = false;
-      conv.onMessage('bot:turn-complete', { conversationId: conv.id, usedFileTools: evt.usedFileTools });
-      log.info(`[pi/conversation] ──── TURN COMPLETE ────  busy=false`);
+    }
+    case 'tool_result':
+      // Not surfaced yet (Phase D: translate to a bot:tool progress pulse).
       break;
-    case 'error':
+    case 'turn_complete': {
       conv.busy = false;
-      conv.onMessage('bot:error', { conversationId: conv.id, error: evt.error });
+      // One turn-complete per pushed message (D1-1 restored that invariant);
+      // idle gates the supervisor's proactive recycling so it never fires with
+      // a message still queued OR a background task still running — recycling
+      // mid-task would kill the task (claude has the same teardown semantics,
+      // but its idle flag doesn't guard tasks; this is strictly safer).
+      conv.pendingCount = Math.max(0, conv.pendingCount - 1);
+      const idle = conv.pendingCount === 0 && conv.tasks.size === 0;
+      // A finished background task's file edits restart the backend on the
+      // very next turn boundary (the continuation turn) — claude captures
+      // sub-agent tool_use blocks into the parent's usedTools the same way.
+      const usedFileTools = evt.usedFileTools || conv.taskUsedFileTools;
+      conv.taskUsedFileTools = false;
+      // Prompt occupancy of the last provider round — input + cache reads +
+      // cache writes, exactly claude.ts's contextTokens math. Output tokens
+      // are NOT added (claude doesn't either; the recycler's 70% threshold
+      // absorbs the next-turn growth).
+      const contextTokens = evt.usage
+        ? (evt.usage.inputTokens || 0) + (evt.usage.cacheReadTokens || 0) + (evt.usage.cacheCreationTokens || 0)
+        : 0;
+      conv.onMessage('bot:turn-complete', {
+        conversationId: conv.id,
+        usedFileTools,
+        contextTokens,
+        contextWindow: evt.contextWindow || 0,
+        idle,
+      });
+      log.info(`[pi/conversation] ──── TURN COMPLETE ────  busy=false ctx=${contextTokens}/${evt.contextWindow || 'n/a'} idle=${idle} tasks=${conv.tasks.size}`);
       break;
+    }
+    case 'error': {
+      // busy is NOT cleared here (audit D1-9): turn_complete is the single
+      // busy=false site and the session guarantees it on every non-aborted
+      // turn; an aborted/fatal path is torn down via bot:conversation-ended.
+      const fatal = evt.kind === 'auth' || evt.kind === 'context-overflow';
+      const remedy = evt.kind === 'context-overflow'
+        ? ' Starting a fresh session — send your message again to continue.'
+        : evt.kind === 'auth'
+          ? ' I\'ll reconnect with the new key as soon as it\'s saved.'
+          : '';
+      conv.onMessage('bot:error', { conversationId: conv.id, error: `${evt.error}${remedy}` });
+      if (fatal) {
+        // Unrecoverable for this session (audit D6-4): an over-window history
+        // would re-fail on every future turn, and a dead key has no business
+        // keeping the loop alive. Tear down — the finally emits
+        // bot:conversation-ended (routes + flags clear) and the next user
+        // message cold-starts a fresh session with re-injected history.
+        log.warn(`[pi/conversation] Fatal provider error (${evt.kind}) — recycling session ${conv.id}`);
+        endConversation(conv.id);
+      }
+      break;
+    }
   }
 }
@@ -305,8 +646,9 @@ export function pushMessage(
     return false;
   }
-  log.info(`[pi/conversation] ──── PUSH MESSAGE ──── busy=${conv.busy}`);
+  log.info(`[pi/conversation] ──── PUSH MESSAGE ──── busy=${conv.busy} pending=${conv.pendingCount + 1}`);
   conv.busy = true;
+  conv.pendingCount += 1;
   conv.inputQueue.push(buildUserMessage(content, attachments, savedFiles));
   conv.onMessage('bot:typing', { conversationId });
   return true;
@@ -317,6 +659,15 @@ export function endConversation(conversationId: string): void {
   if (!conv) return;
   log.info(`[pi/conversation] ──── ENDING CONVERSATION ${conversationId} ────`);
+  // Background tasks die with the conversation (claude parity — the SDK
+  // subprocess takes its tasks down too). Their finallys still emit
+  // bot:task-done {status:'stopped'} so dashboard task cards don't spin
+  // forever; the completion injection is skipped (conv gone).
+  for (const task of conv.tasks.values()) {
+    task.stopped = true;
+    task.abortController.abort();
+  }
+  conv.batcher.discard();
   conv.inputQueue.end();
   conv.abortController.abort();
   liveConversations.delete(conversationId);
@@ -326,16 +677,29 @@ export function isConversationBusy(conversationId: string): boolean {
   return liveConversations.get(conversationId)?.busy || false;
 }
-/** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
- *  backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
+/** True if ANY live conversation in this harness is mid-turn OR has a background
+ *  sub-agent running. Used by the supervisor to defer backend restarts and
+ *  self-updates — a restart mid-task would kill the task's work in flight. */
 export function anyConversationBusy(): boolean {
-  for (const c of liveConversations.values()) if (c.busy) return true;
+  for (const c of liveConversations.values()) {
+    if (c.busy || c.tasks.size > 0) return true;
+  }
   return false;
 }
-/** Pi has no sub-agents yet; provided for interface compatibility. */
-export async function stopSubAgentTask(_conversationId: string, _taskId: string): Promise<void> {
-  // no-op for Phase 1
+/** Stop a specific background sub-agent task (dashboard user:stop-task). The
+ *  child's teardown emits bot:task-done {status:'stopped'} and injects a brief
+ *  acknowledgement turn into the parent. */
+export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
+  const conv = liveConversations.get(conversationId);
+  const task = conv?.tasks.get(taskId);
+  if (!task) {
+    log.warn(`[pi/task] Cannot stop task ${taskId} — not running in conversation ${conversationId}`);
+    return;
+  }
+  log.info(`[pi/task] Stopping sub-agent task ${taskId}`);
+  task.stopped = true;
+  task.abortController.abort();
 }
 /** Pi has no pre-warm step (no subprocess), but the interface requires this. */
@@ -373,21 +737,21 @@ export async function startBlobyAgentQuery(
   supportPrompt?: string,
   _maxTurns?: number,
 ): Promise<void> {
-  const auth = resolveAuth();
-  if (!auth.ok) {
-    onMessage('bot:error', { conversationId, error: auth.error });
+  const resolved = resolveAuth();
+  if (!resolved.ok) {
+    onMessage('bot:error', { conversationId, error: resolved.error });
+    // bot:done frees the caller's slot (WhatsApp activeAgents / scheduler) — without it
+    // each distinct customer hitting this path pins one of the 5 concurrent slots until
+    // supervisor restart (audit D3-2; mirrors claude.ts:620).
+    onMessage('bot:done', { conversationId, usedFileTools: false });
     return;
   }
+  const auth = resolved.auth;
-  const abortController = new AbortController();
-  activeQueries.set(conversationId, abortController);
-  // Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
-  // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
-  const watchdog = setTimeout(() => {
-    log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
-    abortController.abort();
-  }, 300_000);
+  // Build the prompt BEFORE registering in activeQueries / arming the watchdog
+  // (claude.ts ordering): if anything in here ever rejected after registration,
+  // the entry would leak forever — anyOneShotActive() stuck true defers every
+  // backend restart/self-update, and the caller's slot never frees.
   let systemPrompt: string;
   if (supportPrompt) {
     systemPrompt = supportPrompt;
@@ -398,11 +762,23 @@ export async function startBlobyAgentQuery(
   const messages: PiMessage[] = recentToPiMessages(recentMessages);
   messages.push(buildUserMessage(prompt, attachments, savedFiles));
+  const abortController = new AbortController();
+  activeQueries.set(conversationId, abortController);
+  // Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
+  // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
+  const watchdog = setTimeout(() => {
+    log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
+    abortController.abort();
+  }, 300_000);
   onMessage('bot:typing', { conversationId });
   let accumulated = '';
   const usedTools = new Set<string>();
-  let errored = false;
+  // Errors are stashed, not emitted inline — at the end, partial text wins
+  // over the error bubble (audit D3-5/D6-2, claude.ts:730-737 precedence).
+  let errorMsg: string | null = null;
+  const batcher = createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text }));
   try {
     const stream = streamProvider(auth.flavor, {
@@ -411,6 +787,9 @@ export async function startBlobyAgentQuery(
       apiKey: auth.apiKey,
       systemPrompt,
       messages,
+      maxOutputTokens: auth.maxOutputTokens,
+      maxTokensField: auth.maxTokensField,
+      includeStreamUsage: auth.includeStreamUsage,
       signal: abortController.signal,
     });
@@ -419,30 +798,46 @@ export async function startBlobyAgentQuery(
       switch (evt.type) {
         case 'text_delta':
           accumulated += evt.delta;
-          onMessage('bot:token', { conversationId, token: evt.delta });
+          batcher.add(evt.delta);
           break;
         case 'text_end':
+          batcher.flush();
           accumulated = evt.text;
           break;
         case 'tool_use':
+          batcher.flush();
           usedTools.add(evt.name);
           onMessage('bot:tool', { conversationId, name: evt.name, input: evt.input });
           break;
         case 'error':
-          errored = true;
-          onMessage('bot:error', { conversationId, error: evt.error });
+          batcher.flush();
+          errorMsg = evt.error;
           break;
       }
     }
-    if (accumulated && !errored) {
-      onMessage('bot:response', { conversationId, content: accumulated });
+    // Abort guard (audit D3-8): a watchdog-aborted run must not surface a
+    // truncated reply — a stopped pulse could otherwise still fire <Message>
+    // pushes with half-finished content.
+    if (!abortController.signal.aborted) {
+      batcher.flush();
+      if (accumulated) {
+        onMessage('bot:response', { conversationId, content: accumulated });
+      } else if (errorMsg) {
+        onMessage('bot:error', { conversationId, error: errorMsg });
+      }
     }
   } catch (err: any) {
     if (!abortController.signal.aborted) {
       log.warn(`[pi/bloby-agent] one-shot error: ${err?.message || err}`);
-      onMessage('bot:error', { conversationId, error: err?.message || String(err) });
+      batcher.flush();
+      if (accumulated) {
+        onMessage('bot:response', { conversationId, content: accumulated });
+      } else {
+        onMessage('bot:error', { conversationId, error: err?.message || String(err) });
+      }
     }
   } finally {
+    batcher.discard();
     clearTimeout(watchdog);
     activeQueries.delete(conversationId);
     const FILE_TOOL_NAMES = ['Write', 'Edit', 'write', 'edit'];
@@ -462,8 +857,9 @@ export function stopBlobyAgentQuery(conversationId: string): void {
 // ── Workspace agent endpoint (POST /api/agent/query) ──────────────────────
 export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryResult> {
-  const auth = resolveAuth();
-  if (!auth.ok) return { ok: false, error: auth.error };
+  const resolved = resolveAuth();
+  if (!resolved.ok) return { ok: false, error: resolved.error };
+  const auth = resolved.auth;
   const timeout = Math.min(Math.max(req.timeout || 120_000, 5_000), 300_000);
   const abortController = new AbortController();
@@ -487,6 +883,9 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
       apiKey: auth.apiKey,
       systemPrompt,
       messages,
+      maxOutputTokens: auth.maxOutputTokens,
+      maxTokensField: auth.maxTokensField,
+      includeStreamUsage: auth.includeStreamUsage,
       signal: abortController.signal,
     });
@@ -517,7 +916,10 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
     clearTimeout(timeoutHandle);
   }
-  if (errored) return { ok: false, error: errorMsg || 'Agent query failed' };
+  // Partial-text precedence (claude parity, audit D6-2): if the model streamed
+  // anything before failing, return it as a successful (truncated) response —
+  // claude's runAgentQuery only reports the error when nothing streamed.
+  if (errored && !fullText) return { ok: false, error: errorMsg || 'Agent query failed' };
   const usedFileTools = ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
   return { ok: true, response: fullText, toolsUsed: Array.from(usedTools), usedFileTools };